@wix/evalforge-types 0.37.0 → 0.39.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -916,6 +916,7 @@ var index_exports = {};
916
916
  __export(index_exports, {
917
917
  AVAILABLE_MODEL_IDS: () => AVAILABLE_MODEL_IDS,
918
918
  AVAILABLE_RUN_COMMANDS: () => AVAILABLE_RUN_COMMANDS,
919
+ AVAILABLE_TOOL_NAMES: () => AVAILABLE_TOOL_NAMES,
919
920
  AgentRunCommand: () => AgentRunCommand,
920
921
  AgentRunCommandSchema: () => AgentRunCommandSchema,
921
922
  AgentSchema: () => AgentSchema,
@@ -944,6 +945,7 @@ __export(index_exports, {
944
945
  CreateEvalRunInputSchema: () => CreateEvalRunInputSchema,
945
946
  CreateMcpInputSchema: () => CreateMcpInputSchema,
946
947
  CreateProjectInputSchema: () => CreateProjectInputSchema,
948
+ CreateRuleInputSchema: () => CreateRuleInputSchema,
947
949
  CreateSkillInputSchema: () => CreateSkillInputSchema,
948
950
  CreateSkillVersionInputSchema: () => CreateSkillVersionInputSchema,
949
951
  CreateSkillsGroupInputSchema: () => CreateSkillsGroupInputSchema,
@@ -998,6 +1000,8 @@ __export(index_exports, {
998
1000
  ProjectSchema: () => ProjectSchema,
999
1001
  PromptResultSchema: () => PromptResultSchema,
1000
1002
  RUN_COMMAND_LABELS: () => RUN_COMMAND_LABELS,
1003
+ RuleSchema: () => RuleSchema,
1004
+ RuleTypeSchema: () => RuleTypeSchema,
1001
1005
  SEMVER_REGEX: () => SEMVER_REGEX,
1002
1006
  SKILL_FOLDER_NAME_REGEX: () => SKILL_FOLDER_NAME_REGEX,
1003
1007
  SYSTEM_ASSERTIONS: () => SYSTEM_ASSERTIONS,
@@ -1030,6 +1034,8 @@ __export(index_exports, {
1030
1034
  TimeAssertionSchema: () => TimeAssertionSchema,
1031
1035
  TimeConfigSchema: () => TimeConfigSchema,
1032
1036
  TokenUsageSchema: () => TokenUsageSchema,
1037
+ ToolCalledWithParamAssertionSchema: () => ToolCalledWithParamAssertionSchema,
1038
+ ToolCalledWithParamConfigSchema: () => ToolCalledWithParamConfigSchema,
1033
1039
  ToolTestSchema: () => ToolTestSchema,
1034
1040
  TriggerMetadataSchema: () => TriggerMetadataSchema,
1035
1041
  TriggerSchema: () => TriggerSchema,
@@ -1038,6 +1044,7 @@ __export(index_exports, {
1038
1044
  UpdateCustomAssertionInputSchema: () => UpdateCustomAssertionInputSchema,
1039
1045
  UpdateMcpInputSchema: () => UpdateMcpInputSchema,
1040
1046
  UpdateProjectInputSchema: () => UpdateProjectInputSchema,
1047
+ UpdateRuleInputSchema: () => UpdateRuleInputSchema,
1041
1048
  UpdateSkillInputSchema: () => UpdateSkillInputSchema,
1042
1049
  UpdateSkillsGroupInputSchema: () => UpdateSkillsGroupInputSchema,
1043
1050
  UpdateSubAgentInputSchema: () => UpdateSubAgentInputSchema,
@@ -1128,6 +1135,33 @@ var ModelConfigSchema = import_zod4.z.object({
1128
1135
  maxTokens: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().min(1).optional())
1129
1136
  });
1130
1137
 
1138
+ // src/common/rule.ts
1139
+ var import_zod5 = require("zod");
1140
+ var RuleTypeSchema = import_zod5.z.enum(["claude-md", "agents-md", "cursor-rule"]);
1141
+ var RuleSchema = TenantEntitySchema.extend({
1142
+ ruleType: RuleTypeSchema,
1143
+ content: import_zod5.z.string()
1144
+ });
1145
+ var RuleInputBaseSchema = RuleSchema.omit({
1146
+ id: true,
1147
+ createdAt: true,
1148
+ updatedAt: true,
1149
+ deleted: true
1150
+ });
1151
+ var CreateRuleInputSchema = RuleInputBaseSchema;
1152
+ var UpdateRuleInputSchema = RuleInputBaseSchema.partial();
1153
+
1154
+ // src/common/tool-names.ts
1155
+ var AVAILABLE_TOOL_NAMES = [
1156
+ "Bash",
1157
+ "Edit",
1158
+ "Glob",
1159
+ "Grep",
1160
+ "Read",
1161
+ "Skill",
1162
+ "Write"
1163
+ ];
1164
+
1131
1165
  // src/target/target.ts
1132
1166
  var TargetSchema = TenantEntitySchema.extend({
1133
1167
  // Base for all testable entities
@@ -1135,7 +1169,7 @@ var TargetSchema = TenantEntitySchema.extend({
1135
1169
  });
1136
1170
 
1137
1171
  // src/target/agent.ts
1138
- var import_zod5 = require("zod");
1172
+ var import_zod6 = require("zod");
1139
1173
  var AgentRunCommand = /* @__PURE__ */ ((AgentRunCommand2) => {
1140
1174
  AgentRunCommand2["CLAUDE"] = "claude";
1141
1175
  return AgentRunCommand2;
@@ -1144,7 +1178,7 @@ var AVAILABLE_RUN_COMMANDS = Object.values(AgentRunCommand);
1144
1178
  var RUN_COMMAND_LABELS = {
1145
1179
  ["claude" /* CLAUDE */]: "Claude Code"
1146
1180
  };
1147
- var AgentRunCommandSchema = import_zod5.z.nativeEnum(AgentRunCommand);
1181
+ var AgentRunCommandSchema = import_zod6.z.nativeEnum(AgentRunCommand);
1148
1182
  var AgentSchema = TargetSchema.extend({
1149
1183
  /** Command to run the agent */
1150
1184
  runCommand: AgentRunCommandSchema,
@@ -1162,51 +1196,51 @@ var UpdateAgentInputSchema = CreateAgentInputSchema.partial().extend({
1162
1196
  });
1163
1197
 
1164
1198
  // src/target/skill.ts
1165
- var import_zod6 = require("zod");
1199
+ var import_zod7 = require("zod");
1166
1200
  var SKILL_FOLDER_NAME_REGEX = /^[a-z0-9]+(-[a-z0-9]+)*$/;
1167
1201
  var SEMVER_REGEX = /^\d+\.\d+\.\d+$/;
1168
- var SkillVersionOriginSchema = import_zod6.z.enum(["manual", "pr", "master"]);
1202
+ var SkillVersionOriginSchema = import_zod7.z.enum(["manual", "pr", "master"]);
1169
1203
  function isValidSkillFolderName(name) {
1170
1204
  return typeof name === "string" && name.length > 0 && SKILL_FOLDER_NAME_REGEX.test(name.trim());
1171
1205
  }
1172
- var SkillMetadataSchema = import_zod6.z.object({
1173
- name: import_zod6.z.string(),
1174
- description: import_zod6.z.string(),
1175
- allowedTools: import_zod6.z.array(import_zod6.z.string()).optional(),
1176
- skills: import_zod6.z.array(import_zod6.z.string()).optional()
1206
+ var SkillMetadataSchema = import_zod7.z.object({
1207
+ name: import_zod7.z.string(),
1208
+ description: import_zod7.z.string(),
1209
+ allowedTools: import_zod7.z.array(import_zod7.z.string()).optional(),
1210
+ skills: import_zod7.z.array(import_zod7.z.string()).optional()
1177
1211
  });
1178
- var SkillFileSchema = import_zod6.z.object({
1212
+ var SkillFileSchema = import_zod7.z.object({
1179
1213
  /** Relative path within the skill directory, e.g. "SKILL.md" or "references/API_SPEC.md" */
1180
- path: import_zod6.z.string().min(1),
1214
+ path: import_zod7.z.string().min(1),
1181
1215
  /** File content (UTF-8 text) */
1182
- content: import_zod6.z.string()
1216
+ content: import_zod7.z.string()
1183
1217
  });
1184
- var SkillVersionSchema = import_zod6.z.object({
1185
- id: import_zod6.z.string(),
1186
- projectId: import_zod6.z.string(),
1187
- skillId: import_zod6.z.string(),
1218
+ var SkillVersionSchema = import_zod7.z.object({
1219
+ id: import_zod7.z.string(),
1220
+ projectId: import_zod7.z.string(),
1221
+ skillId: import_zod7.z.string(),
1188
1222
  /** Semver string (e.g. "1.2.0") or Falcon fingerprint */
1189
- version: import_zod6.z.string(),
1223
+ version: import_zod7.z.string(),
1190
1224
  /** How this version was created */
1191
1225
  origin: SkillVersionOriginSchema,
1192
1226
  /** Where this snapshot was taken from */
1193
1227
  source: GitHubSourceSchema.optional(),
1194
1228
  /** Frozen snapshot of all files in the skill directory */
1195
- files: import_zod6.z.array(SkillFileSchema).optional(),
1229
+ files: import_zod7.z.array(SkillFileSchema).optional(),
1196
1230
  /** Optional notes about this version (changelog, reason for change) */
1197
- notes: import_zod6.z.string().optional(),
1198
- createdAt: import_zod6.z.string()
1231
+ notes: import_zod7.z.string().optional(),
1232
+ createdAt: import_zod7.z.string()
1199
1233
  });
1200
- var CreateSkillVersionInputSchema = import_zod6.z.object({
1234
+ var CreateSkillVersionInputSchema = import_zod7.z.object({
1201
1235
  /** GitHub source to snapshot from. If not provided, uses the Skill's source. */
1202
1236
  source: GitHubSourceSchema.optional(),
1203
1237
  /** Version string for this snapshot (e.g. "1.0.0", "1.0.3"). */
1204
- version: import_zod6.z.string().min(1),
1205
- notes: import_zod6.z.string().optional(),
1238
+ version: import_zod7.z.string().min(1),
1239
+ notes: import_zod7.z.string().optional(),
1206
1240
  /** Origin of this version. Defaults to 'manual' in backend. */
1207
1241
  origin: SkillVersionOriginSchema.optional(),
1208
1242
  /** Pre-edited files to store directly (bypasses GitHub fetch when provided) */
1209
- files: import_zod6.z.array(SkillFileSchema).optional()
1243
+ files: import_zod7.z.array(SkillFileSchema).optional()
1210
1244
  });
1211
1245
  var SkillSchema = TargetSchema.extend({
1212
1246
  /** GitHub source reference for live content fetching */
@@ -1222,15 +1256,15 @@ var SkillInputBaseSchema = SkillSchema.omit({
1222
1256
  source: true
1223
1257
  }).extend({
1224
1258
  /** Optional - not stored on Skill; content description lives in SkillVersion */
1225
- description: import_zod6.z.string().optional(),
1259
+ description: import_zod7.z.string().optional(),
1226
1260
  /** GitHub source reference for live content fetching */
1227
1261
  source: GitHubSourceSchema.optional()
1228
1262
  });
1229
- var InitialVersionInputSchema = import_zod6.z.object({
1230
- files: import_zod6.z.array(SkillFileSchema).optional(),
1231
- notes: import_zod6.z.string().optional(),
1263
+ var InitialVersionInputSchema = import_zod7.z.object({
1264
+ files: import_zod7.z.array(SkillFileSchema).optional(),
1265
+ notes: import_zod7.z.string().optional(),
1232
1266
  source: GitHubSourceSchema.optional(),
1233
- version: import_zod6.z.string().optional(),
1267
+ version: import_zod7.z.string().optional(),
1234
1268
  origin: SkillVersionOriginSchema.optional()
1235
1269
  });
1236
1270
  var CreateSkillInputSchema = SkillInputBaseSchema.extend({
@@ -1248,10 +1282,10 @@ var SkillWithLatestVersionSchema = SkillSchema.extend({
1248
1282
  });
1249
1283
 
1250
1284
  // src/target/skills-group.ts
1251
- var import_zod7 = require("zod");
1285
+ var import_zod8 = require("zod");
1252
1286
  var SkillsGroupSchema = TenantEntitySchema.extend({
1253
1287
  /** IDs of skills in this group */
1254
- skillIds: import_zod7.z.array(import_zod7.z.string())
1288
+ skillIds: import_zod8.z.array(import_zod8.z.string())
1255
1289
  });
1256
1290
  var CreateSkillsGroupInputSchema = SkillsGroupSchema.omit({
1257
1291
  id: true,
@@ -1262,10 +1296,10 @@ var CreateSkillsGroupInputSchema = SkillsGroupSchema.omit({
1262
1296
  var UpdateSkillsGroupInputSchema = CreateSkillsGroupInputSchema.partial();
1263
1297
 
1264
1298
  // src/target/sub-agent.ts
1265
- var import_zod8 = require("zod");
1299
+ var import_zod9 = require("zod");
1266
1300
  var SubAgentSchema = TargetSchema.extend({
1267
1301
  /** The full sub-agent markdown content (YAML frontmatter + body) */
1268
- subAgentMd: import_zod8.z.string()
1302
+ subAgentMd: import_zod9.z.string()
1269
1303
  });
1270
1304
  var SubAgentInputBaseSchema = SubAgentSchema.omit({
1271
1305
  id: true,
@@ -1277,10 +1311,10 @@ var CreateSubAgentInputSchema = SubAgentInputBaseSchema;
1277
1311
  var UpdateSubAgentInputSchema = SubAgentInputBaseSchema.partial();
1278
1312
 
1279
1313
  // src/test/index.ts
1280
- var import_zod19 = require("zod");
1314
+ var import_zod20 = require("zod");
1281
1315
 
1282
1316
  // src/test/base.ts
1283
- var import_zod9 = require("zod");
1317
+ var import_zod10 = require("zod");
1284
1318
  var TestType = /* @__PURE__ */ ((TestType2) => {
1285
1319
  TestType2["LLM"] = "LLM";
1286
1320
  TestType2["TOOL"] = "TOOL";
@@ -1293,7 +1327,7 @@ var TestType = /* @__PURE__ */ ((TestType2) => {
1293
1327
  TestType2["PLAYWRIGHT_NL"] = "PLAYWRIGHT_NL";
1294
1328
  return TestType2;
1295
1329
  })(TestType || {});
1296
- var TestTypeSchema = import_zod9.z.enum(TestType);
1330
+ var TestTypeSchema = import_zod10.z.enum(TestType);
1297
1331
  var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
1298
1332
  TestImportance2["LOW"] = "low";
1299
1333
  TestImportance2["MEDIUM"] = "medium";
@@ -1301,153 +1335,153 @@ var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
1301
1335
  TestImportance2["CRITICAL"] = "critical";
1302
1336
  return TestImportance2;
1303
1337
  })(TestImportance || {});
1304
- var TestImportanceSchema = import_zod9.z.enum(TestImportance);
1305
- var BaseTestSchema = import_zod9.z.object({
1306
- id: import_zod9.z.string(),
1338
+ var TestImportanceSchema = import_zod10.z.enum(TestImportance);
1339
+ var BaseTestSchema = import_zod10.z.object({
1340
+ id: import_zod10.z.string(),
1307
1341
  type: TestTypeSchema,
1308
- name: import_zod9.z.string().min(3),
1309
- description: import_zod9.z.string().optional(),
1342
+ name: import_zod10.z.string().min(3),
1343
+ description: import_zod10.z.string().optional(),
1310
1344
  importance: TestImportanceSchema.optional()
1311
1345
  });
1312
1346
 
1313
1347
  // src/test/llm.ts
1314
- var import_zod10 = require("zod");
1348
+ var import_zod11 = require("zod");
1315
1349
  var LLMTestSchema = BaseTestSchema.extend({
1316
- type: import_zod10.z.literal("LLM" /* LLM */),
1350
+ type: import_zod11.z.literal("LLM" /* LLM */),
1317
1351
  /** Maximum steps for the LLM to take */
1318
- maxSteps: import_zod10.z.number().min(1).max(100),
1352
+ maxSteps: import_zod11.z.number().min(1).max(100),
1319
1353
  /** Prompt to send to the evaluator */
1320
- prompt: import_zod10.z.string().min(1),
1354
+ prompt: import_zod11.z.string().min(1),
1321
1355
  /** ID of the evaluator agent to use */
1322
- evaluatorId: import_zod10.z.string()
1356
+ evaluatorId: import_zod11.z.string()
1323
1357
  });
1324
1358
 
1325
1359
  // src/test/tool.ts
1326
- var import_zod11 = require("zod");
1360
+ var import_zod12 = require("zod");
1327
1361
  var ToolTestSchema = BaseTestSchema.extend({
1328
- type: import_zod11.z.literal("TOOL" /* TOOL */),
1362
+ type: import_zod12.z.literal("TOOL" /* TOOL */),
1329
1363
  /** Name of the tool that should be called */
1330
- toolName: import_zod11.z.string().min(3),
1364
+ toolName: import_zod12.z.string().min(3),
1331
1365
  /** Expected arguments for the tool call */
1332
- args: import_zod11.z.record(import_zod11.z.string(), import_zod11.z.any()),
1366
+ args: import_zod12.z.record(import_zod12.z.string(), import_zod12.z.any()),
1333
1367
  /** Expected content in the tool results */
1334
- resultsContent: import_zod11.z.string()
1368
+ resultsContent: import_zod12.z.string()
1335
1369
  });
1336
1370
 
1337
1371
  // src/test/site-config.ts
1338
- var import_zod12 = require("zod");
1372
+ var import_zod13 = require("zod");
1339
1373
  var SiteConfigTestSchema = BaseTestSchema.extend({
1340
- type: import_zod12.z.literal("SITE_CONFIG" /* SITE_CONFIG */),
1374
+ type: import_zod13.z.literal("SITE_CONFIG" /* SITE_CONFIG */),
1341
1375
  /** URL to call */
1342
- url: import_zod12.z.string().url(),
1376
+ url: import_zod13.z.string().url(),
1343
1377
  /** HTTP method */
1344
- method: import_zod12.z.enum(["GET", "POST"]),
1378
+ method: import_zod13.z.enum(["GET", "POST"]),
1345
1379
  /** Request body (for POST) */
1346
- body: import_zod12.z.string().optional(),
1380
+ body: import_zod13.z.string().optional(),
1347
1381
  /** Expected HTTP status code */
1348
- expectedStatusCode: import_zod12.z.number().int().min(100).max(599),
1382
+ expectedStatusCode: import_zod13.z.number().int().min(100).max(599),
1349
1383
  /** Expected response content */
1350
- expectedResponse: import_zod12.z.string().optional(),
1384
+ expectedResponse: import_zod13.z.string().optional(),
1351
1385
  /** JMESPath expression to extract from response */
1352
- expectedResponseJMESPath: import_zod12.z.string().optional()
1386
+ expectedResponseJMESPath: import_zod13.z.string().optional()
1353
1387
  });
1354
1388
 
1355
1389
  // src/test/command-execution.ts
1356
- var import_zod13 = require("zod");
1390
+ var import_zod14 = require("zod");
1357
1391
  var AllowedCommands = [
1358
1392
  "yarn install --no-immutable && yarn build",
1359
1393
  "npm run build",
1360
1394
  "yarn typecheck"
1361
1395
  ];
1362
1396
  var CommandExecutionTestSchema = BaseTestSchema.extend({
1363
- type: import_zod13.z.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
1397
+ type: import_zod14.z.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
1364
1398
  /** Command to execute (must be in AllowedCommands) */
1365
- command: import_zod13.z.string().refine((value) => AllowedCommands.includes(value), {
1399
+ command: import_zod14.z.string().refine((value) => AllowedCommands.includes(value), {
1366
1400
  message: `Command must be one of: ${AllowedCommands.join(", ")}`
1367
1401
  }),
1368
1402
  /** Expected exit code (default: 0) */
1369
- expectedExitCode: import_zod13.z.number().default(0).optional()
1403
+ expectedExitCode: import_zod14.z.number().default(0).optional()
1370
1404
  });
1371
1405
 
1372
1406
  // src/test/file-presence.ts
1373
- var import_zod14 = require("zod");
1407
+ var import_zod15 = require("zod");
1374
1408
  var FilePresenceTestSchema = BaseTestSchema.extend({
1375
- type: import_zod14.z.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
1409
+ type: import_zod15.z.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
1376
1410
  /** Paths to check */
1377
- paths: import_zod14.z.array(import_zod14.z.string()),
1411
+ paths: import_zod15.z.array(import_zod15.z.string()),
1378
1412
  /** Whether files should exist (true) or not exist (false) */
1379
- shouldExist: import_zod14.z.boolean()
1413
+ shouldExist: import_zod15.z.boolean()
1380
1414
  });
1381
1415
 
1382
1416
  // src/test/file-content.ts
1383
- var import_zod15 = require("zod");
1384
- var FileContentCheckSchema = import_zod15.z.object({
1417
+ var import_zod16 = require("zod");
1418
+ var FileContentCheckSchema = import_zod16.z.object({
1385
1419
  /** Strings that must be present in the file */
1386
- contains: import_zod15.z.array(import_zod15.z.string()).optional(),
1420
+ contains: import_zod16.z.array(import_zod16.z.string()).optional(),
1387
1421
  /** Strings that must NOT be present in the file */
1388
- notContains: import_zod15.z.array(import_zod15.z.string()).optional(),
1422
+ notContains: import_zod16.z.array(import_zod16.z.string()).optional(),
1389
1423
  /** Regex pattern the content must match */
1390
- matches: import_zod15.z.string().optional(),
1424
+ matches: import_zod16.z.string().optional(),
1391
1425
  /** JSON path checks for structured content */
1392
- jsonPath: import_zod15.z.array(
1393
- import_zod15.z.object({
1394
- path: import_zod15.z.string(),
1395
- value: import_zod15.z.unknown()
1426
+ jsonPath: import_zod16.z.array(
1427
+ import_zod16.z.object({
1428
+ path: import_zod16.z.string(),
1429
+ value: import_zod16.z.unknown()
1396
1430
  })
1397
1431
  ).optional(),
1398
1432
  /** Lines that should be added (for diff checking) */
1399
- added: import_zod15.z.array(import_zod15.z.string()).optional(),
1433
+ added: import_zod16.z.array(import_zod16.z.string()).optional(),
1400
1434
  /** Lines that should be removed (for diff checking) */
1401
- removed: import_zod15.z.array(import_zod15.z.string()).optional()
1435
+ removed: import_zod16.z.array(import_zod16.z.string()).optional()
1402
1436
  });
1403
1437
  var FileContentTestSchema = BaseTestSchema.extend({
1404
- type: import_zod15.z.literal("FILE_CONTENT" /* FILE_CONTENT */),
1438
+ type: import_zod16.z.literal("FILE_CONTENT" /* FILE_CONTENT */),
1405
1439
  /** Path to the file to check */
1406
- path: import_zod15.z.string(),
1440
+ path: import_zod16.z.string(),
1407
1441
  /** Content checks to perform */
1408
1442
  checks: FileContentCheckSchema
1409
1443
  });
1410
1444
 
1411
1445
  // src/test/build-check.ts
1412
- var import_zod16 = require("zod");
1446
+ var import_zod17 = require("zod");
1413
1447
  var BuildCheckTestSchema = BaseTestSchema.extend({
1414
- type: import_zod16.z.literal("BUILD_CHECK" /* BUILD_CHECK */),
1448
+ type: import_zod17.z.literal("BUILD_CHECK" /* BUILD_CHECK */),
1415
1449
  /** Build command to execute */
1416
- command: import_zod16.z.string(),
1450
+ command: import_zod17.z.string(),
1417
1451
  /** Whether the build should succeed */
1418
- expectSuccess: import_zod16.z.boolean(),
1452
+ expectSuccess: import_zod17.z.boolean(),
1419
1453
  /** Maximum allowed warnings (optional) */
1420
- allowedWarnings: import_zod16.z.number().optional(),
1454
+ allowedWarnings: import_zod17.z.number().optional(),
1421
1455
  /** Timeout in milliseconds */
1422
- timeout: import_zod16.z.number().optional()
1456
+ timeout: import_zod17.z.number().optional()
1423
1457
  });
1424
1458
 
1425
1459
  // src/test/vitest.ts
1426
- var import_zod17 = require("zod");
1460
+ var import_zod18 = require("zod");
1427
1461
  var VitestTestSchema = BaseTestSchema.extend({
1428
- type: import_zod17.z.literal("VITEST" /* VITEST */),
1462
+ type: import_zod18.z.literal("VITEST" /* VITEST */),
1429
1463
  /** Test file content */
1430
- testFile: import_zod17.z.string(),
1464
+ testFile: import_zod18.z.string(),
1431
1465
  /** Name of the test file */
1432
- testFileName: import_zod17.z.string(),
1466
+ testFileName: import_zod18.z.string(),
1433
1467
  /** Minimum pass rate required (0-100) */
1434
- minPassRate: import_zod17.z.number().min(0).max(100)
1468
+ minPassRate: import_zod18.z.number().min(0).max(100)
1435
1469
  });
1436
1470
 
1437
1471
  // src/test/playwright-nl.ts
1438
- var import_zod18 = require("zod");
1472
+ var import_zod19 = require("zod");
1439
1473
  var PlaywrightNLTestSchema = BaseTestSchema.extend({
1440
- type: import_zod18.z.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
1474
+ type: import_zod19.z.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
1441
1475
  /** Natural language steps to execute */
1442
- steps: import_zod18.z.array(import_zod18.z.string()),
1476
+ steps: import_zod19.z.array(import_zod19.z.string()),
1443
1477
  /** Expected outcome description */
1444
- expectedOutcome: import_zod18.z.string(),
1478
+ expectedOutcome: import_zod19.z.string(),
1445
1479
  /** Timeout in milliseconds */
1446
- timeout: import_zod18.z.number().optional()
1480
+ timeout: import_zod19.z.number().optional()
1447
1481
  });
1448
1482
 
1449
1483
  // src/test/index.ts
1450
- var TestSchema = import_zod19.z.discriminatedUnion("type", [
1484
+ var TestSchema = import_zod20.z.discriminatedUnion("type", [
1451
1485
  LLMTestSchema,
1452
1486
  ToolTestSchema,
1453
1487
  SiteConfigTestSchema,
@@ -1460,44 +1494,52 @@ var TestSchema = import_zod19.z.discriminatedUnion("type", [
1460
1494
  ]);
1461
1495
 
1462
1496
  // src/scenario/assertions.ts
1463
- var import_zod20 = require("zod");
1464
- var SkillWasCalledAssertionSchema = import_zod20.z.object({
1465
- type: import_zod20.z.literal("skill_was_called"),
1497
+ var import_zod21 = require("zod");
1498
+ var SkillWasCalledAssertionSchema = import_zod21.z.object({
1499
+ type: import_zod21.z.literal("skill_was_called"),
1466
1500
  /** Names of the skills that must have been called (matched against trace Skill tool args) */
1467
- skillNames: import_zod20.z.array(import_zod20.z.string().min(1)).min(1)
1501
+ skillNames: import_zod21.z.array(import_zod21.z.string().min(1)).min(1)
1468
1502
  });
1469
- var BuildPassedAssertionSchema = import_zod20.z.object({
1470
- type: import_zod20.z.literal("build_passed"),
1503
+ var ToolCalledWithParamAssertionSchema = import_zod21.z.object({
1504
+ type: import_zod21.z.literal("tool_called_with_param"),
1505
+ /** Name of the tool that must have been called */
1506
+ toolName: import_zod21.z.string().min(1),
1507
+ /** JSON string of key-value pairs for expected parameters (substring match) */
1508
+ expectedParams: import_zod21.z.string().min(1)
1509
+ });
1510
+ var BuildPassedAssertionSchema = import_zod21.z.object({
1511
+ type: import_zod21.z.literal("build_passed"),
1471
1512
  /** Command to run (default: "yarn build") */
1472
- command: import_zod20.z.string().optional(),
1513
+ command: import_zod21.z.string().optional(),
1473
1514
  /** Expected exit code (default: 0) */
1474
- expectedExitCode: import_zod20.z.number().int().optional()
1515
+ expectedExitCode: import_zod21.z.number().int().optional()
1475
1516
  });
1476
- var CostAssertionSchema = import_zod20.z.object({
1477
- type: import_zod20.z.literal("cost"),
1517
+ var CostAssertionSchema = import_zod21.z.object({
1518
+ type: import_zod21.z.literal("cost"),
1478
1519
  /** Maximum allowed cost in USD */
1479
- maxCostUsd: import_zod20.z.number().positive()
1520
+ maxCostUsd: import_zod21.z.number().positive()
1480
1521
  });
1481
- var LlmJudgeAssertionSchema = import_zod20.z.object({
1482
- type: import_zod20.z.literal("llm_judge"),
1522
+ var LlmJudgeAssertionSchema = import_zod21.z.object({
1523
+ type: import_zod21.z.literal("llm_judge"),
1483
1524
  /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
1484
- prompt: import_zod20.z.string(),
1525
+ prompt: import_zod21.z.string(),
1485
1526
  /** Optional system prompt for the judge (default asks for JSON with score) */
1486
- systemPrompt: import_zod20.z.string().optional(),
1527
+ systemPrompt: import_zod21.z.string().optional(),
1487
1528
  /** Minimum score to pass (0-100, default 70) */
1488
- minScore: import_zod20.z.number().int().min(0).max(100).optional(),
1529
+ minScore: import_zod21.z.number().int().min(0).max(100).optional(),
1489
1530
  /** Model for the judge (e.g. claude-3-5-haiku) */
1490
- model: import_zod20.z.string().optional(),
1491
- maxTokens: import_zod20.z.number().int().optional(),
1492
- temperature: import_zod20.z.number().min(0).max(1).optional()
1531
+ model: import_zod21.z.string().optional(),
1532
+ maxTokens: import_zod21.z.number().int().optional(),
1533
+ temperature: import_zod21.z.number().min(0).max(1).optional()
1493
1534
  });
1494
- var TimeAssertionSchema = import_zod20.z.object({
1495
- type: import_zod20.z.literal("time_limit"),
1535
+ var TimeAssertionSchema = import_zod21.z.object({
1536
+ type: import_zod21.z.literal("time_limit"),
1496
1537
  /** Maximum allowed duration in milliseconds */
1497
- maxDurationMs: import_zod20.z.number().int().positive()
1538
+ maxDurationMs: import_zod21.z.number().int().positive()
1498
1539
  });
1499
- var AssertionSchema = import_zod20.z.union([
1540
+ var AssertionSchema = import_zod21.z.union([
1500
1541
  SkillWasCalledAssertionSchema,
1542
+ ToolCalledWithParamAssertionSchema,
1501
1543
  BuildPassedAssertionSchema,
1502
1544
  TimeAssertionSchema,
1503
1545
  CostAssertionSchema,
@@ -1505,33 +1547,33 @@ var AssertionSchema = import_zod20.z.union([
1505
1547
  ]);
1506
1548
 
1507
1549
  // src/scenario/environment.ts
1508
- var import_zod21 = require("zod");
1509
- var LocalProjectConfigSchema = import_zod21.z.object({
1550
+ var import_zod22 = require("zod");
1551
+ var LocalProjectConfigSchema = import_zod22.z.object({
1510
1552
  /** Template ID to use for the local project */
1511
- templateId: import_zod21.z.string().optional(),
1553
+ templateId: import_zod22.z.string().optional(),
1512
1554
  /** Files to create in the project */
1513
- files: import_zod21.z.array(
1514
- import_zod21.z.object({
1515
- path: import_zod21.z.string().min(1),
1516
- content: import_zod21.z.string().min(1)
1555
+ files: import_zod22.z.array(
1556
+ import_zod22.z.object({
1557
+ path: import_zod22.z.string().min(1),
1558
+ content: import_zod22.z.string().min(1)
1517
1559
  })
1518
1560
  ).optional()
1519
1561
  });
1520
- var MetaSiteConfigSchema = import_zod21.z.object({
1521
- configurations: import_zod21.z.array(
1522
- import_zod21.z.object({
1523
- name: import_zod21.z.string().min(1),
1524
- apiCalls: import_zod21.z.array(
1525
- import_zod21.z.object({
1526
- url: import_zod21.z.string().url(),
1527
- method: import_zod21.z.enum(["POST", "PUT"]),
1528
- body: import_zod21.z.string()
1562
+ var MetaSiteConfigSchema = import_zod22.z.object({
1563
+ configurations: import_zod22.z.array(
1564
+ import_zod22.z.object({
1565
+ name: import_zod22.z.string().min(1),
1566
+ apiCalls: import_zod22.z.array(
1567
+ import_zod22.z.object({
1568
+ url: import_zod22.z.string().url(),
1569
+ method: import_zod22.z.enum(["POST", "PUT"]),
1570
+ body: import_zod22.z.string()
1529
1571
  })
1530
1572
  )
1531
1573
  })
1532
1574
  ).optional()
1533
1575
  });
1534
- var EnvironmentSchema = import_zod21.z.object({
1576
+ var EnvironmentSchema = import_zod22.z.object({
1535
1577
  /** Local project configuration */
1536
1578
  localProject: LocalProjectConfigSchema.optional(),
1537
1579
  /** Meta site configuration */
@@ -1539,64 +1581,71 @@ var EnvironmentSchema = import_zod21.z.object({
1539
1581
  });
1540
1582
 
1541
1583
  // src/scenario/test-scenario.ts
1542
- var import_zod23 = require("zod");
1584
+ var import_zod24 = require("zod");
1543
1585
 
1544
1586
  // src/assertion/assertion.ts
1545
- var import_zod22 = require("zod");
1546
- var AssertionTypeSchema = import_zod22.z.enum([
1587
+ var import_zod23 = require("zod");
1588
+ var AssertionTypeSchema = import_zod23.z.enum([
1547
1589
  "skill_was_called",
1590
+ "tool_called_with_param",
1548
1591
  "build_passed",
1549
1592
  "time_limit",
1550
1593
  "cost",
1551
1594
  "llm_judge"
1552
1595
  ]);
1553
- var AssertionParameterTypeSchema = import_zod22.z.enum([
1596
+ var AssertionParameterTypeSchema = import_zod23.z.enum([
1554
1597
  "string",
1555
1598
  "number",
1556
1599
  "boolean"
1557
1600
  ]);
1558
- var AssertionParameterSchema = import_zod22.z.object({
1601
+ var AssertionParameterSchema = import_zod23.z.object({
1559
1602
  /** Parameter name (used as key in params object) */
1560
- name: import_zod22.z.string().min(1),
1603
+ name: import_zod23.z.string().min(1),
1561
1604
  /** Display label for the parameter */
1562
- label: import_zod22.z.string().min(1),
1605
+ label: import_zod23.z.string().min(1),
1563
1606
  /** Parameter type */
1564
1607
  type: AssertionParameterTypeSchema,
1565
1608
  /** Whether this parameter is required */
1566
- required: import_zod22.z.boolean(),
1609
+ required: import_zod23.z.boolean(),
1567
1610
  /** Default value (optional, used when not provided) */
1568
- defaultValue: import_zod22.z.union([import_zod22.z.string(), import_zod22.z.number(), import_zod22.z.boolean()]).optional(),
1611
+ defaultValue: import_zod23.z.union([import_zod23.z.string(), import_zod23.z.number(), import_zod23.z.boolean()]).optional(),
1569
1612
  /** If true, parameter is hidden by default behind "Show advanced options" */
1570
- advanced: import_zod22.z.boolean().optional()
1613
+ advanced: import_zod23.z.boolean().optional()
1571
1614
  });
1572
- var ScenarioAssertionLinkSchema = import_zod22.z.object({
1615
+ var ScenarioAssertionLinkSchema = import_zod23.z.object({
1573
1616
  /** ID of the assertion (can be system assertion like 'system:skill_was_called' or custom assertion UUID) */
1574
- assertionId: import_zod22.z.string(),
1617
+ assertionId: import_zod23.z.string(),
1575
1618
  /** Parameter values for this assertion in this scenario */
1576
- params: import_zod22.z.record(
1577
- import_zod22.z.string(),
1578
- import_zod22.z.union([import_zod22.z.string(), import_zod22.z.number(), import_zod22.z.boolean(), import_zod22.z.null()])
1619
+ params: import_zod23.z.record(
1620
+ import_zod23.z.string(),
1621
+ import_zod23.z.union([import_zod23.z.string(), import_zod23.z.number(), import_zod23.z.boolean(), import_zod23.z.null()])
1579
1622
  ).optional()
1580
1623
  });
1581
- var SkillWasCalledConfigSchema = import_zod22.z.object({
1624
+ var SkillWasCalledConfigSchema = import_zod23.z.object({
1582
1625
  /** Names of the skills that must have been called */
1583
- skillNames: import_zod22.z.array(import_zod22.z.string().min(1)).min(1)
1626
+ skillNames: import_zod23.z.array(import_zod23.z.string().min(1)).min(1)
1584
1627
  });
1585
- var CostConfigSchema = import_zod22.z.strictObject({
1628
+ var CostConfigSchema = import_zod23.z.strictObject({
1586
1629
  /** Maximum allowed cost in USD */
1587
- maxCostUsd: import_zod22.z.number().positive()
1630
+ maxCostUsd: import_zod23.z.number().positive()
1631
+ });
1632
+ var ToolCalledWithParamConfigSchema = import_zod23.z.strictObject({
1633
+ /** Name of the tool that must have been called */
1634
+ toolName: import_zod23.z.string().min(1),
1635
+ /** JSON string of key-value pairs for expected parameters (substring match) */
1636
+ expectedParams: import_zod23.z.string().min(1)
1588
1637
  });
1589
- var BuildPassedConfigSchema = import_zod22.z.strictObject({
1638
+ var BuildPassedConfigSchema = import_zod23.z.strictObject({
1590
1639
  /** Command to run (default: "yarn build") */
1591
- command: import_zod22.z.string().optional(),
1640
+ command: import_zod23.z.string().optional(),
1592
1641
  /** Expected exit code (default: 0) */
1593
- expectedExitCode: import_zod22.z.number().int().optional()
1642
+ expectedExitCode: import_zod23.z.number().int().optional()
1594
1643
  });
1595
- var TimeConfigSchema = import_zod22.z.strictObject({
1644
+ var TimeConfigSchema = import_zod23.z.strictObject({
1596
1645
  /** Maximum allowed duration in milliseconds */
1597
- maxDurationMs: import_zod22.z.number().int().positive()
1646
+ maxDurationMs: import_zod23.z.number().int().positive()
1598
1647
  });
1599
- var LlmJudgeConfigSchema = import_zod22.z.object({
1648
+ var LlmJudgeConfigSchema = import_zod23.z.object({
1600
1649
  /**
1601
1650
  * Prompt template with placeholders:
1602
1651
  * - {{output}}: agent's final output
@@ -1607,32 +1656,34 @@ var LlmJudgeConfigSchema = import_zod22.z.object({
1607
1656
  * - {{trace}}: step-by-step trace of tool calls
1608
1657
  * - Custom parameters defined in the parameters array
1609
1658
  */
1610
- prompt: import_zod22.z.string().min(1),
1659
+ prompt: import_zod23.z.string().min(1),
1611
1660
  /** Optional system prompt for the judge */
1612
- systemPrompt: import_zod22.z.string().optional(),
1661
+ systemPrompt: import_zod23.z.string().optional(),
1613
1662
  /** Minimum score to pass (0-100, default 70) */
1614
- minScore: import_zod22.z.number().int().min(0).max(100).optional(),
1663
+ minScore: import_zod23.z.number().int().min(0).max(100).optional(),
1615
1664
  /** Model for the judge (e.g. claude-3-5-haiku-20241022) */
1616
- model: import_zod22.z.string().optional(),
1665
+ model: import_zod23.z.string().optional(),
1617
1666
  /** Max output tokens */
1618
- maxTokens: import_zod22.z.number().int().optional(),
1667
+ maxTokens: import_zod23.z.number().int().optional(),
1619
1668
  /** Temperature (0-1) */
1620
- temperature: import_zod22.z.number().min(0).max(1).optional(),
1669
+ temperature: import_zod23.z.number().min(0).max(1).optional(),
1621
1670
  /** User-defined parameters for this assertion */
1622
- parameters: import_zod22.z.array(AssertionParameterSchema).optional()
1671
+ parameters: import_zod23.z.array(AssertionParameterSchema).optional()
1623
1672
  });
1624
- var AssertionConfigSchema = import_zod22.z.union([
1673
+ var AssertionConfigSchema = import_zod23.z.union([
1625
1674
  LlmJudgeConfigSchema,
1626
1675
  // requires prompt - check first
1627
1676
  SkillWasCalledConfigSchema,
1628
1677
  // requires skillNames
1678
+ ToolCalledWithParamConfigSchema,
1679
+ // requires toolName + expectedParams, uses strictObject
1629
1680
  TimeConfigSchema,
1630
1681
  // requires maxDurationMs, uses strictObject
1631
1682
  CostConfigSchema,
1632
1683
  // requires maxCostUsd, uses strictObject
1633
1684
  BuildPassedConfigSchema,
1634
1685
  // all optional, uses strictObject to reject unknown keys
1635
- import_zod22.z.object({})
1686
+ import_zod23.z.object({})
1636
1687
  // fallback empty config
1637
1688
  ]);
1638
1689
  var CustomAssertionSchema = TenantEntitySchema.extend({
@@ -1654,6 +1705,8 @@ function validateAssertionConfig(type, config) {
1654
1705
  return SkillWasCalledConfigSchema.safeParse(config).success;
1655
1706
  case "cost":
1656
1707
  return CostConfigSchema.safeParse(config).success;
1708
+ case "tool_called_with_param":
1709
+ return ToolCalledWithParamConfigSchema.safeParse(config).success;
1657
1710
  case "build_passed":
1658
1711
  return BuildPassedConfigSchema.safeParse(config).success;
1659
1712
  case "time_limit":
@@ -1681,23 +1734,23 @@ function getLlmJudgeConfig(assertion) {
1681
1734
  }
1682
1735
 
1683
1736
  // src/scenario/test-scenario.ts
1684
- var ExpectedFileSchema = import_zod23.z.object({
1737
+ var ExpectedFileSchema = import_zod24.z.object({
1685
1738
  /** Relative path where the file should be created */
1686
- path: import_zod23.z.string(),
1739
+ path: import_zod24.z.string(),
1687
1740
  /** Optional expected content */
1688
- content: import_zod23.z.string().optional()
1741
+ content: import_zod24.z.string().optional()
1689
1742
  });
1690
1743
  var TestScenarioSchema = TenantEntitySchema.extend({
1691
1744
  /** The prompt sent to the agent to trigger the task */
1692
- triggerPrompt: import_zod23.z.string().min(10),
1745
+ triggerPrompt: import_zod24.z.string().min(10),
1693
1746
  /** ID of the template to use for this scenario (null = no template) */
1694
- templateId: import_zod23.z.string().nullish(),
1747
+ templateId: import_zod24.z.string().nullish(),
1695
1748
  /** Inline assertions to evaluate for this scenario (legacy) */
1696
- assertions: import_zod23.z.array(AssertionSchema).optional(),
1749
+ assertions: import_zod24.z.array(AssertionSchema).optional(),
1697
1750
  /** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
1698
- assertionIds: import_zod23.z.array(import_zod23.z.string()).optional(),
1751
+ assertionIds: import_zod24.z.array(import_zod24.z.string()).optional(),
1699
1752
  /** Linked assertions with per-scenario parameter values */
1700
- assertionLinks: import_zod23.z.array(ScenarioAssertionLinkSchema).optional()
1753
+ assertionLinks: import_zod24.z.array(ScenarioAssertionLinkSchema).optional()
1701
1754
  });
1702
1755
  var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
1703
1756
  id: true,
@@ -1708,10 +1761,10 @@ var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
1708
1761
  var UpdateTestScenarioInputSchema = CreateTestScenarioInputSchema.partial();
1709
1762
 
1710
1763
  // src/suite/test-suite.ts
1711
- var import_zod24 = require("zod");
1764
+ var import_zod25 = require("zod");
1712
1765
  var TestSuiteSchema = TenantEntitySchema.extend({
1713
1766
  /** IDs of test scenarios in this suite */
1714
- scenarioIds: import_zod24.z.array(import_zod24.z.string())
1767
+ scenarioIds: import_zod25.z.array(import_zod25.z.string())
1715
1768
  });
1716
1769
  var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
1717
1770
  id: true,
@@ -1722,21 +1775,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
1722
1775
  var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
1723
1776
 
1724
1777
  // src/evaluation/metrics.ts
1725
- var import_zod25 = require("zod");
1726
- var TokenUsageSchema = import_zod25.z.object({
1727
- prompt: import_zod25.z.number(),
1728
- completion: import_zod25.z.number(),
1729
- total: import_zod25.z.number()
1730
- });
1731
- var EvalMetricsSchema = import_zod25.z.object({
1732
- totalAssertions: import_zod25.z.number(),
1733
- passed: import_zod25.z.number(),
1734
- failed: import_zod25.z.number(),
1735
- skipped: import_zod25.z.number(),
1736
- errors: import_zod25.z.number(),
1737
- passRate: import_zod25.z.number(),
1738
- avgDuration: import_zod25.z.number(),
1739
- totalDuration: import_zod25.z.number()
1778
+ var import_zod26 = require("zod");
1779
+ var TokenUsageSchema = import_zod26.z.object({
1780
+ prompt: import_zod26.z.number(),
1781
+ completion: import_zod26.z.number(),
1782
+ total: import_zod26.z.number()
1783
+ });
1784
+ var EvalMetricsSchema = import_zod26.z.object({
1785
+ totalAssertions: import_zod26.z.number(),
1786
+ passed: import_zod26.z.number(),
1787
+ failed: import_zod26.z.number(),
1788
+ skipped: import_zod26.z.number(),
1789
+ errors: import_zod26.z.number(),
1790
+ passRate: import_zod26.z.number(),
1791
+ avgDuration: import_zod26.z.number(),
1792
+ totalDuration: import_zod26.z.number()
1740
1793
  });
1741
1794
  var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
1742
1795
  EvalStatus2["PENDING"] = "pending";
@@ -1746,7 +1799,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
1746
1799
  EvalStatus2["CANCELLED"] = "cancelled";
1747
1800
  return EvalStatus2;
1748
1801
  })(EvalStatus || {});
1749
- var EvalStatusSchema = import_zod25.z.enum(EvalStatus);
1802
+ var EvalStatusSchema = import_zod26.z.enum(EvalStatus);
1750
1803
  var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
1751
1804
  LLMStepType2["COMPLETION"] = "completion";
1752
1805
  LLMStepType2["TOOL_USE"] = "tool_use";
@@ -1754,52 +1807,52 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
1754
1807
  LLMStepType2["THINKING"] = "thinking";
1755
1808
  return LLMStepType2;
1756
1809
  })(LLMStepType || {});
1757
- var LLMTraceStepSchema = import_zod25.z.object({
1758
- id: import_zod25.z.string(),
1759
- stepNumber: import_zod25.z.number(),
1760
- type: import_zod25.z.enum(LLMStepType),
1761
- model: import_zod25.z.string(),
1762
- provider: import_zod25.z.string(),
1763
- startedAt: import_zod25.z.string(),
1764
- durationMs: import_zod25.z.number(),
1810
+ var LLMTraceStepSchema = import_zod26.z.object({
1811
+ id: import_zod26.z.string(),
1812
+ stepNumber: import_zod26.z.number(),
1813
+ type: import_zod26.z.enum(LLMStepType),
1814
+ model: import_zod26.z.string(),
1815
+ provider: import_zod26.z.string(),
1816
+ startedAt: import_zod26.z.string(),
1817
+ durationMs: import_zod26.z.number(),
1765
1818
  tokenUsage: TokenUsageSchema,
1766
- costUsd: import_zod25.z.number(),
1767
- toolName: import_zod25.z.string().optional(),
1768
- toolArguments: import_zod25.z.string().optional(),
1769
- inputPreview: import_zod25.z.string().optional(),
1770
- outputPreview: import_zod25.z.string().optional(),
1771
- success: import_zod25.z.boolean(),
1772
- error: import_zod25.z.string().optional()
1773
- });
1774
- var LLMBreakdownStatsSchema = import_zod25.z.object({
1775
- count: import_zod25.z.number(),
1776
- durationMs: import_zod25.z.number(),
1777
- tokens: import_zod25.z.number(),
1778
- costUsd: import_zod25.z.number()
1779
- });
1780
- var LLMTraceSummarySchema = import_zod25.z.object({
1781
- totalSteps: import_zod25.z.number(),
1782
- totalDurationMs: import_zod25.z.number(),
1819
+ costUsd: import_zod26.z.number(),
1820
+ toolName: import_zod26.z.string().optional(),
1821
+ toolArguments: import_zod26.z.string().optional(),
1822
+ inputPreview: import_zod26.z.string().optional(),
1823
+ outputPreview: import_zod26.z.string().optional(),
1824
+ success: import_zod26.z.boolean(),
1825
+ error: import_zod26.z.string().optional()
1826
+ });
1827
+ var LLMBreakdownStatsSchema = import_zod26.z.object({
1828
+ count: import_zod26.z.number(),
1829
+ durationMs: import_zod26.z.number(),
1830
+ tokens: import_zod26.z.number(),
1831
+ costUsd: import_zod26.z.number()
1832
+ });
1833
+ var LLMTraceSummarySchema = import_zod26.z.object({
1834
+ totalSteps: import_zod26.z.number(),
1835
+ totalDurationMs: import_zod26.z.number(),
1783
1836
  totalTokens: TokenUsageSchema,
1784
- totalCostUsd: import_zod25.z.number(),
1785
- stepTypeBreakdown: import_zod25.z.record(import_zod25.z.string(), LLMBreakdownStatsSchema).optional(),
1786
- modelBreakdown: import_zod25.z.record(import_zod25.z.string(), LLMBreakdownStatsSchema),
1787
- modelsUsed: import_zod25.z.array(import_zod25.z.string())
1788
- });
1789
- var LLMTraceSchema = import_zod25.z.object({
1790
- id: import_zod25.z.string(),
1791
- steps: import_zod25.z.array(LLMTraceStepSchema),
1837
+ totalCostUsd: import_zod26.z.number(),
1838
+ stepTypeBreakdown: import_zod26.z.record(import_zod26.z.string(), LLMBreakdownStatsSchema).optional(),
1839
+ modelBreakdown: import_zod26.z.record(import_zod26.z.string(), LLMBreakdownStatsSchema),
1840
+ modelsUsed: import_zod26.z.array(import_zod26.z.string())
1841
+ });
1842
+ var LLMTraceSchema = import_zod26.z.object({
1843
+ id: import_zod26.z.string(),
1844
+ steps: import_zod26.z.array(LLMTraceStepSchema),
1792
1845
  summary: LLMTraceSummarySchema
1793
1846
  });
1794
1847
 
1795
1848
  // src/evaluation/eval-result.ts
1796
- var import_zod28 = require("zod");
1849
+ var import_zod29 = require("zod");
1797
1850
 
1798
1851
  // src/evaluation/eval-run.ts
1799
- var import_zod27 = require("zod");
1852
+ var import_zod28 = require("zod");
1800
1853
 
1801
1854
  // src/evaluation/live-trace.ts
1802
- var import_zod26 = require("zod");
1855
+ var import_zod27 = require("zod");
1803
1856
  var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
1804
1857
  LiveTraceEventType2["THINKING"] = "thinking";
1805
1858
  LiveTraceEventType2["TOOL_USE"] = "tool_use";
@@ -1813,37 +1866,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
1813
1866
  LiveTraceEventType2["USER"] = "user";
1814
1867
  return LiveTraceEventType2;
1815
1868
  })(LiveTraceEventType || {});
1816
- var LiveTraceEventSchema = import_zod26.z.object({
1869
+ var LiveTraceEventSchema = import_zod27.z.object({
1817
1870
  /** The evaluation run ID */
1818
- evalRunId: import_zod26.z.string(),
1871
+ evalRunId: import_zod27.z.string(),
1819
1872
  /** The scenario ID being executed */
1820
- scenarioId: import_zod26.z.string(),
1873
+ scenarioId: import_zod27.z.string(),
1821
1874
  /** The scenario name for display */
1822
- scenarioName: import_zod26.z.string(),
1875
+ scenarioName: import_zod27.z.string(),
1823
1876
  /** The target ID (skill, agent, etc.) */
1824
- targetId: import_zod26.z.string(),
1877
+ targetId: import_zod27.z.string(),
1825
1878
  /** The target name for display */
1826
- targetName: import_zod26.z.string(),
1879
+ targetName: import_zod27.z.string(),
1827
1880
  /** Step number in the current scenario execution */
1828
- stepNumber: import_zod26.z.number(),
1881
+ stepNumber: import_zod27.z.number(),
1829
1882
  /** Type of trace event */
1830
- type: import_zod26.z.enum(LiveTraceEventType),
1883
+ type: import_zod27.z.enum(LiveTraceEventType),
1831
1884
  /** Tool name if this is a tool_use event */
1832
- toolName: import_zod26.z.string().optional(),
1885
+ toolName: import_zod27.z.string().optional(),
1833
1886
  /** Tool arguments preview (truncated JSON) */
1834
- toolArgs: import_zod26.z.string().optional(),
1887
+ toolArgs: import_zod27.z.string().optional(),
1835
1888
  /** Output preview (truncated text) */
1836
- outputPreview: import_zod26.z.string().optional(),
1889
+ outputPreview: import_zod27.z.string().optional(),
1837
1890
  /** File path for file operations */
1838
- filePath: import_zod26.z.string().optional(),
1891
+ filePath: import_zod27.z.string().optional(),
1839
1892
  /** Elapsed time in milliseconds for progress events */
1840
- elapsedMs: import_zod26.z.number().optional(),
1893
+ elapsedMs: import_zod27.z.number().optional(),
1841
1894
  /** Thinking/reasoning text from Claude */
1842
- thinking: import_zod26.z.string().optional(),
1895
+ thinking: import_zod27.z.string().optional(),
1843
1896
  /** Timestamp when this event occurred */
1844
- timestamp: import_zod26.z.string(),
1897
+ timestamp: import_zod27.z.string(),
1845
1898
  /** Whether this is the final event for this scenario */
1846
- isComplete: import_zod26.z.boolean()
1899
+ isComplete: import_zod27.z.boolean()
1847
1900
  });
1848
1901
  var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
1849
1902
  function parseTraceEventLine(line) {
@@ -1871,14 +1924,14 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
1871
1924
  TriggerType2["MANUAL"] = "MANUAL";
1872
1925
  return TriggerType2;
1873
1926
  })(TriggerType || {});
1874
- var TriggerMetadataSchema = import_zod27.z.object({
1875
- version: import_zod27.z.string().optional(),
1876
- resourceUpdated: import_zod27.z.array(import_zod27.z.string()).optional()
1927
+ var TriggerMetadataSchema = import_zod28.z.object({
1928
+ version: import_zod28.z.string().optional(),
1929
+ resourceUpdated: import_zod28.z.array(import_zod28.z.string()).optional()
1877
1930
  });
1878
- var TriggerSchema = import_zod27.z.object({
1879
- id: import_zod27.z.string(),
1931
+ var TriggerSchema = import_zod28.z.object({
1932
+ id: import_zod28.z.string(),
1880
1933
  metadata: TriggerMetadataSchema.optional(),
1881
- type: import_zod27.z.enum(TriggerType)
1934
+ type: import_zod28.z.enum(TriggerType)
1882
1935
  });
1883
1936
  var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
1884
1937
  FailureCategory2["MISSING_FILE"] = "missing_file";
@@ -1896,28 +1949,28 @@ var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
1896
1949
  FailureSeverity2["LOW"] = "low";
1897
1950
  return FailureSeverity2;
1898
1951
  })(FailureSeverity || {});
1899
- var DiffLineTypeSchema = import_zod27.z.enum(["added", "removed", "unchanged"]);
1900
- var DiffLineSchema = import_zod27.z.object({
1952
+ var DiffLineTypeSchema = import_zod28.z.enum(["added", "removed", "unchanged"]);
1953
+ var DiffLineSchema = import_zod28.z.object({
1901
1954
  type: DiffLineTypeSchema,
1902
- content: import_zod27.z.string(),
1903
- lineNumber: import_zod27.z.number()
1904
- });
1905
- var DiffContentSchema = import_zod27.z.object({
1906
- path: import_zod27.z.string(),
1907
- expected: import_zod27.z.string(),
1908
- actual: import_zod27.z.string(),
1909
- diffLines: import_zod27.z.array(DiffLineSchema),
1910
- renamedFrom: import_zod27.z.string().optional()
1911
- });
1912
- var CommandExecutionSchema = import_zod27.z.object({
1913
- command: import_zod27.z.string(),
1914
- exitCode: import_zod27.z.number(),
1915
- output: import_zod27.z.string().optional(),
1916
- duration: import_zod27.z.number()
1917
- });
1918
- var FileModificationSchema = import_zod27.z.object({
1919
- path: import_zod27.z.string(),
1920
- action: import_zod27.z.enum(["created", "modified", "deleted"])
1955
+ content: import_zod28.z.string(),
1956
+ lineNumber: import_zod28.z.number()
1957
+ });
1958
+ var DiffContentSchema = import_zod28.z.object({
1959
+ path: import_zod28.z.string(),
1960
+ expected: import_zod28.z.string(),
1961
+ actual: import_zod28.z.string(),
1962
+ diffLines: import_zod28.z.array(DiffLineSchema),
1963
+ renamedFrom: import_zod28.z.string().optional()
1964
+ });
1965
+ var CommandExecutionSchema = import_zod28.z.object({
1966
+ command: import_zod28.z.string(),
1967
+ exitCode: import_zod28.z.number(),
1968
+ output: import_zod28.z.string().optional(),
1969
+ duration: import_zod28.z.number()
1970
+ });
1971
+ var FileModificationSchema = import_zod28.z.object({
1972
+ path: import_zod28.z.string(),
1973
+ action: import_zod28.z.enum(["created", "modified", "deleted"])
1921
1974
  });
1922
1975
  var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
1923
1976
  TemplateFileStatus2["NEW"] = "new";
@@ -1925,81 +1978,83 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
1925
1978
  TemplateFileStatus2["UNCHANGED"] = "unchanged";
1926
1979
  return TemplateFileStatus2;
1927
1980
  })(TemplateFileStatus || {});
1928
- var TemplateFileSchema = import_zod27.z.object({
1981
+ var TemplateFileSchema = import_zod28.z.object({
1929
1982
  /** Relative path within the template */
1930
- path: import_zod27.z.string(),
1983
+ path: import_zod28.z.string(),
1931
1984
  /** Full file content after execution */
1932
- content: import_zod27.z.string(),
1985
+ content: import_zod28.z.string(),
1933
1986
  /** File status (new, modified, unchanged) */
1934
- status: import_zod27.z.enum(["new", "modified", "unchanged"])
1935
- });
1936
- var ApiCallSchema = import_zod27.z.object({
1937
- endpoint: import_zod27.z.string(),
1938
- tokensUsed: import_zod27.z.number(),
1939
- duration: import_zod27.z.number()
1940
- });
1941
- var ExecutionTraceSchema = import_zod27.z.object({
1942
- commands: import_zod27.z.array(CommandExecutionSchema),
1943
- filesModified: import_zod27.z.array(FileModificationSchema),
1944
- apiCalls: import_zod27.z.array(ApiCallSchema),
1945
- totalDuration: import_zod27.z.number()
1946
- });
1947
- var FailureAnalysisSchema = import_zod27.z.object({
1948
- category: import_zod27.z.enum(FailureCategory),
1949
- severity: import_zod27.z.enum(FailureSeverity),
1950
- summary: import_zod27.z.string(),
1951
- details: import_zod27.z.string(),
1952
- rootCause: import_zod27.z.string(),
1953
- suggestedFix: import_zod27.z.string(),
1954
- relatedAssertions: import_zod27.z.array(import_zod27.z.string()),
1955
- codeSnippet: import_zod27.z.string().optional(),
1956
- similarIssues: import_zod27.z.array(import_zod27.z.string()).optional(),
1957
- patternId: import_zod27.z.string().optional(),
1987
+ status: import_zod28.z.enum(["new", "modified", "unchanged"])
1988
+ });
1989
+ var ApiCallSchema = import_zod28.z.object({
1990
+ endpoint: import_zod28.z.string(),
1991
+ tokensUsed: import_zod28.z.number(),
1992
+ duration: import_zod28.z.number()
1993
+ });
1994
+ var ExecutionTraceSchema = import_zod28.z.object({
1995
+ commands: import_zod28.z.array(CommandExecutionSchema),
1996
+ filesModified: import_zod28.z.array(FileModificationSchema),
1997
+ apiCalls: import_zod28.z.array(ApiCallSchema),
1998
+ totalDuration: import_zod28.z.number()
1999
+ });
2000
+ var FailureAnalysisSchema = import_zod28.z.object({
2001
+ category: import_zod28.z.enum(FailureCategory),
2002
+ severity: import_zod28.z.enum(FailureSeverity),
2003
+ summary: import_zod28.z.string(),
2004
+ details: import_zod28.z.string(),
2005
+ rootCause: import_zod28.z.string(),
2006
+ suggestedFix: import_zod28.z.string(),
2007
+ relatedAssertions: import_zod28.z.array(import_zod28.z.string()),
2008
+ codeSnippet: import_zod28.z.string().optional(),
2009
+ similarIssues: import_zod28.z.array(import_zod28.z.string()).optional(),
2010
+ patternId: import_zod28.z.string().optional(),
1958
2011
  // Extended fields for detailed debugging
1959
2012
  diff: DiffContentSchema.optional(),
1960
2013
  executionTrace: ExecutionTraceSchema.optional()
1961
2014
  });
1962
2015
  var EvalRunSchema = TenantEntitySchema.extend({
1963
2016
  /** Agent ID for this run */
1964
- agentId: import_zod27.z.string().optional(),
2017
+ agentId: import_zod28.z.string().optional(),
1965
2018
  /** Skills group ID for this run */
1966
- skillsGroupId: import_zod27.z.string().optional(),
2019
+ skillsGroupId: import_zod28.z.string().optional(),
1967
2020
  /** Map of skillId to skillVersionId for this run */
1968
- skillVersions: import_zod27.z.record(import_zod27.z.string(), import_zod27.z.string()).optional(),
2021
+ skillVersions: import_zod28.z.record(import_zod28.z.string(), import_zod28.z.string()).optional(),
1969
2022
  /** Scenario IDs to run */
1970
- scenarioIds: import_zod27.z.array(import_zod27.z.string()),
2023
+ scenarioIds: import_zod28.z.array(import_zod28.z.string()),
1971
2024
  /** Current status */
1972
2025
  status: EvalStatusSchema,
1973
2026
  /** Progress percentage (0-100) */
1974
- progress: import_zod27.z.number(),
2027
+ progress: import_zod28.z.number(),
1975
2028
  /** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
1976
- results: import_zod27.z.array(import_zod27.z.lazy(() => EvalRunResultSchema)),
2029
+ results: import_zod28.z.array(import_zod28.z.lazy(() => EvalRunResultSchema)),
1977
2030
  /** Aggregated metrics across all results */
1978
2031
  aggregateMetrics: EvalMetricsSchema,
1979
2032
  /** Failure analyses */
1980
- failureAnalyses: import_zod27.z.array(FailureAnalysisSchema).optional(),
2033
+ failureAnalyses: import_zod28.z.array(FailureAnalysisSchema).optional(),
1981
2034
  /** Aggregated LLM trace summary */
1982
2035
  llmTraceSummary: LLMTraceSummarySchema.optional(),
1983
2036
  /** What triggered this run */
1984
2037
  trigger: TriggerSchema.optional(),
1985
2038
  /** When the run started (set when evaluation is triggered) */
1986
- startedAt: import_zod27.z.string().optional(),
2039
+ startedAt: import_zod28.z.string().optional(),
1987
2040
  /** When the run completed */
1988
- completedAt: import_zod27.z.string().optional(),
2041
+ completedAt: import_zod28.z.string().optional(),
1989
2042
  /** Live trace events captured during execution (for playback on results page) */
1990
- liveTraceEvents: import_zod27.z.array(LiveTraceEventSchema).optional(),
2043
+ liveTraceEvents: import_zod28.z.array(LiveTraceEventSchema).optional(),
1991
2044
  /** Remote job ID for tracking execution in Dev Machines */
1992
- jobId: import_zod27.z.string().optional(),
2045
+ jobId: import_zod28.z.string().optional(),
1993
2046
  /** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
1994
- jobStatus: import_zod27.z.string().optional(),
2047
+ jobStatus: import_zod28.z.string().optional(),
1995
2048
  /** Remote job error message if the job failed */
1996
- jobError: import_zod27.z.string().optional(),
2049
+ jobError: import_zod28.z.string().optional(),
1997
2050
  /** Timestamp of the last job status check */
1998
- jobStatusCheckedAt: import_zod27.z.string().optional(),
2051
+ jobStatusCheckedAt: import_zod28.z.string().optional(),
1999
2052
  /** MCP server IDs to enable for this run (optional) */
2000
- mcpIds: import_zod27.z.array(import_zod27.z.string()).optional(),
2053
+ mcpIds: import_zod28.z.array(import_zod28.z.string()).optional(),
2001
2054
  /** Sub-agent IDs to enable for this run (optional) */
2002
- subAgentIds: import_zod27.z.array(import_zod27.z.string()).optional()
2055
+ subAgentIds: import_zod28.z.array(import_zod28.z.string()).optional(),
2056
+ /** Rule IDs to enable for this run (optional) */
2057
+ ruleIds: import_zod28.z.array(import_zod28.z.string()).optional()
2003
2058
  });
2004
2059
  var CreateEvalRunInputSchema = EvalRunSchema.omit({
2005
2060
  id: true,
@@ -2012,28 +2067,28 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
2012
2067
  startedAt: true,
2013
2068
  completedAt: true
2014
2069
  });
2015
- var EvaluationProgressSchema = import_zod27.z.object({
2016
- runId: import_zod27.z.string(),
2017
- targetId: import_zod27.z.string(),
2018
- totalScenarios: import_zod27.z.number(),
2019
- completedScenarios: import_zod27.z.number(),
2020
- scenarioProgress: import_zod27.z.array(
2021
- import_zod27.z.object({
2022
- scenarioId: import_zod27.z.string(),
2023
- currentStep: import_zod27.z.string(),
2024
- error: import_zod27.z.string().optional()
2070
+ var EvaluationProgressSchema = import_zod28.z.object({
2071
+ runId: import_zod28.z.string(),
2072
+ targetId: import_zod28.z.string(),
2073
+ totalScenarios: import_zod28.z.number(),
2074
+ completedScenarios: import_zod28.z.number(),
2075
+ scenarioProgress: import_zod28.z.array(
2076
+ import_zod28.z.object({
2077
+ scenarioId: import_zod28.z.string(),
2078
+ currentStep: import_zod28.z.string(),
2079
+ error: import_zod28.z.string().optional()
2025
2080
  })
2026
2081
  ),
2027
- createdAt: import_zod27.z.number()
2082
+ createdAt: import_zod28.z.number()
2028
2083
  });
2029
- var EvaluationLogSchema = import_zod27.z.object({
2030
- runId: import_zod27.z.string(),
2031
- scenarioId: import_zod27.z.string(),
2032
- log: import_zod27.z.object({
2033
- level: import_zod27.z.enum(["info", "error", "debug"]),
2034
- message: import_zod27.z.string().optional(),
2035
- args: import_zod27.z.array(import_zod27.z.any()).optional(),
2036
- error: import_zod27.z.string().optional()
2084
+ var EvaluationLogSchema = import_zod28.z.object({
2085
+ runId: import_zod28.z.string(),
2086
+ scenarioId: import_zod28.z.string(),
2087
+ log: import_zod28.z.object({
2088
+ level: import_zod28.z.enum(["info", "error", "debug"]),
2089
+ message: import_zod28.z.string().optional(),
2090
+ args: import_zod28.z.array(import_zod28.z.any()).optional(),
2091
+ error: import_zod28.z.string().optional()
2037
2092
  })
2038
2093
  });
2039
2094
  var LLM_TIMEOUT = 12e4;
@@ -2046,95 +2101,95 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
2046
2101
  AssertionResultStatus2["ERROR"] = "error";
2047
2102
  return AssertionResultStatus2;
2048
2103
  })(AssertionResultStatus || {});
2049
- var AssertionResultSchema = import_zod28.z.object({
2050
- id: import_zod28.z.string(),
2051
- assertionId: import_zod28.z.string(),
2052
- assertionType: import_zod28.z.string(),
2053
- assertionName: import_zod28.z.string(),
2054
- status: import_zod28.z.enum(AssertionResultStatus),
2055
- message: import_zod28.z.string().optional(),
2056
- expected: import_zod28.z.string().optional(),
2057
- actual: import_zod28.z.string().optional(),
2058
- duration: import_zod28.z.number().optional(),
2059
- details: import_zod28.z.record(import_zod28.z.string(), import_zod28.z.unknown()).optional(),
2060
- llmTraceSteps: import_zod28.z.array(LLMTraceStepSchema).optional()
2061
- });
2062
- var EvalRunResultSchema = import_zod28.z.object({
2063
- id: import_zod28.z.string(),
2064
- targetId: import_zod28.z.string(),
2065
- targetName: import_zod28.z.string().optional(),
2104
+ var AssertionResultSchema = import_zod29.z.object({
2105
+ id: import_zod29.z.string(),
2106
+ assertionId: import_zod29.z.string(),
2107
+ assertionType: import_zod29.z.string(),
2108
+ assertionName: import_zod29.z.string(),
2109
+ status: import_zod29.z.enum(AssertionResultStatus),
2110
+ message: import_zod29.z.string().optional(),
2111
+ expected: import_zod29.z.string().optional(),
2112
+ actual: import_zod29.z.string().optional(),
2113
+ duration: import_zod29.z.number().optional(),
2114
+ details: import_zod29.z.record(import_zod29.z.string(), import_zod29.z.unknown()).optional(),
2115
+ llmTraceSteps: import_zod29.z.array(LLMTraceStepSchema).optional()
2116
+ });
2117
+ var EvalRunResultSchema = import_zod29.z.object({
2118
+ id: import_zod29.z.string(),
2119
+ targetId: import_zod29.z.string(),
2120
+ targetName: import_zod29.z.string().optional(),
2066
2121
  /** SkillVersion ID used for this evaluation (for version tracking) */
2067
- skillVersionId: import_zod28.z.string().optional(),
2122
+ skillVersionId: import_zod29.z.string().optional(),
2068
2123
  /** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
2069
- skillVersion: import_zod28.z.string().optional(),
2070
- scenarioId: import_zod28.z.string(),
2071
- scenarioName: import_zod28.z.string(),
2124
+ skillVersion: import_zod29.z.string().optional(),
2125
+ scenarioId: import_zod29.z.string(),
2126
+ scenarioName: import_zod29.z.string(),
2072
2127
  modelConfig: ModelConfigSchema.optional(),
2073
- assertionResults: import_zod28.z.array(AssertionResultSchema),
2128
+ assertionResults: import_zod29.z.array(AssertionResultSchema),
2074
2129
  metrics: EvalMetricsSchema.optional(),
2075
- passed: import_zod28.z.number(),
2076
- failed: import_zod28.z.number(),
2077
- passRate: import_zod28.z.number(),
2078
- duration: import_zod28.z.number(),
2079
- outputText: import_zod28.z.string().optional(),
2080
- files: import_zod28.z.array(ExpectedFileSchema).optional(),
2081
- fileDiffs: import_zod28.z.array(DiffContentSchema).optional(),
2130
+ passed: import_zod29.z.number(),
2131
+ failed: import_zod29.z.number(),
2132
+ passRate: import_zod29.z.number(),
2133
+ duration: import_zod29.z.number(),
2134
+ outputText: import_zod29.z.string().optional(),
2135
+ files: import_zod29.z.array(ExpectedFileSchema).optional(),
2136
+ fileDiffs: import_zod29.z.array(DiffContentSchema).optional(),
2082
2137
  /** Full template files after execution with status indicators */
2083
- templateFiles: import_zod28.z.array(TemplateFileSchema).optional(),
2084
- startedAt: import_zod28.z.string().optional(),
2085
- completedAt: import_zod28.z.string().optional(),
2138
+ templateFiles: import_zod29.z.array(TemplateFileSchema).optional(),
2139
+ startedAt: import_zod29.z.string().optional(),
2140
+ completedAt: import_zod29.z.string().optional(),
2086
2141
  llmTrace: LLMTraceSchema.optional()
2087
2142
  });
2088
- var PromptResultSchema = import_zod28.z.object({
2089
- text: import_zod28.z.string(),
2090
- files: import_zod28.z.array(import_zod28.z.unknown()).optional(),
2091
- finishReason: import_zod28.z.string().optional(),
2092
- reasoning: import_zod28.z.string().optional(),
2093
- reasoningDetails: import_zod28.z.unknown().optional(),
2094
- toolCalls: import_zod28.z.array(import_zod28.z.unknown()).optional(),
2095
- toolResults: import_zod28.z.array(import_zod28.z.unknown()).optional(),
2096
- warnings: import_zod28.z.array(import_zod28.z.unknown()).optional(),
2097
- sources: import_zod28.z.array(import_zod28.z.unknown()).optional(),
2098
- steps: import_zod28.z.array(import_zod28.z.unknown()),
2099
- generationTimeMs: import_zod28.z.number(),
2100
- prompt: import_zod28.z.string(),
2101
- systemPrompt: import_zod28.z.string(),
2102
- usage: import_zod28.z.object({
2103
- totalTokens: import_zod28.z.number().optional(),
2104
- totalMicrocentsSpent: import_zod28.z.number().optional()
2143
+ var PromptResultSchema = import_zod29.z.object({
2144
+ text: import_zod29.z.string(),
2145
+ files: import_zod29.z.array(import_zod29.z.unknown()).optional(),
2146
+ finishReason: import_zod29.z.string().optional(),
2147
+ reasoning: import_zod29.z.string().optional(),
2148
+ reasoningDetails: import_zod29.z.unknown().optional(),
2149
+ toolCalls: import_zod29.z.array(import_zod29.z.unknown()).optional(),
2150
+ toolResults: import_zod29.z.array(import_zod29.z.unknown()).optional(),
2151
+ warnings: import_zod29.z.array(import_zod29.z.unknown()).optional(),
2152
+ sources: import_zod29.z.array(import_zod29.z.unknown()).optional(),
2153
+ steps: import_zod29.z.array(import_zod29.z.unknown()),
2154
+ generationTimeMs: import_zod29.z.number(),
2155
+ prompt: import_zod29.z.string(),
2156
+ systemPrompt: import_zod29.z.string(),
2157
+ usage: import_zod29.z.object({
2158
+ totalTokens: import_zod29.z.number().optional(),
2159
+ totalMicrocentsSpent: import_zod29.z.number().optional()
2105
2160
  })
2106
2161
  });
2107
- var EvaluationResultSchema = import_zod28.z.object({
2108
- id: import_zod28.z.string(),
2109
- runId: import_zod28.z.string(),
2110
- timestamp: import_zod28.z.number(),
2162
+ var EvaluationResultSchema = import_zod29.z.object({
2163
+ id: import_zod29.z.string(),
2164
+ runId: import_zod29.z.string(),
2165
+ timestamp: import_zod29.z.number(),
2111
2166
  promptResult: PromptResultSchema,
2112
- testResults: import_zod28.z.array(import_zod28.z.unknown()),
2113
- tags: import_zod28.z.array(import_zod28.z.string()).optional(),
2114
- feedback: import_zod28.z.string().optional(),
2115
- score: import_zod28.z.number(),
2116
- suiteId: import_zod28.z.string().optional()
2117
- });
2118
- var LeanEvaluationResultSchema = import_zod28.z.object({
2119
- id: import_zod28.z.string(),
2120
- runId: import_zod28.z.string(),
2121
- timestamp: import_zod28.z.number(),
2122
- tags: import_zod28.z.array(import_zod28.z.string()).optional(),
2123
- scenarioId: import_zod28.z.string(),
2124
- scenarioVersion: import_zod28.z.number().optional(),
2125
- targetId: import_zod28.z.string(),
2126
- targetVersion: import_zod28.z.number().optional(),
2127
- suiteId: import_zod28.z.string().optional(),
2128
- score: import_zod28.z.number(),
2129
- time: import_zod28.z.number().optional(),
2130
- microcentsSpent: import_zod28.z.number().optional()
2167
+ testResults: import_zod29.z.array(import_zod29.z.unknown()),
2168
+ tags: import_zod29.z.array(import_zod29.z.string()).optional(),
2169
+ feedback: import_zod29.z.string().optional(),
2170
+ score: import_zod29.z.number(),
2171
+ suiteId: import_zod29.z.string().optional()
2172
+ });
2173
+ var LeanEvaluationResultSchema = import_zod29.z.object({
2174
+ id: import_zod29.z.string(),
2175
+ runId: import_zod29.z.string(),
2176
+ timestamp: import_zod29.z.number(),
2177
+ tags: import_zod29.z.array(import_zod29.z.string()).optional(),
2178
+ scenarioId: import_zod29.z.string(),
2179
+ scenarioVersion: import_zod29.z.number().optional(),
2180
+ targetId: import_zod29.z.string(),
2181
+ targetVersion: import_zod29.z.number().optional(),
2182
+ suiteId: import_zod29.z.string().optional(),
2183
+ score: import_zod29.z.number(),
2184
+ time: import_zod29.z.number().optional(),
2185
+ microcentsSpent: import_zod29.z.number().optional()
2131
2186
  });
2132
2187
 
2133
2188
  // src/project/project.ts
2134
- var import_zod29 = require("zod");
2189
+ var import_zod30 = require("zod");
2135
2190
  var ProjectSchema = BaseEntitySchema.extend({
2136
- appId: import_zod29.z.string().optional().describe("The ID of the app in Dev Center"),
2137
- appSecret: import_zod29.z.string().optional().describe("The secret of the app in Dev Center")
2191
+ appId: import_zod30.z.string().optional().describe("The ID of the app in Dev Center"),
2192
+ appSecret: import_zod30.z.string().optional().describe("The secret of the app in Dev Center")
2138
2193
  });
2139
2194
  var CreateProjectInputSchema = ProjectSchema.omit({
2140
2195
  id: true,
@@ -2160,6 +2215,7 @@ var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
2160
2215
  // src/assertion/system-assertions.ts
2161
2216
  var SYSTEM_ASSERTION_IDS = {
2162
2217
  SKILL_WAS_CALLED: "system:skill_was_called",
2218
+ TOOL_CALLED_WITH_PARAM: "system:tool_called_with_param",
2163
2219
  BUILD_PASSED: "system:build_passed",
2164
2220
  TIME_LIMIT: "system:time_limit",
2165
2221
  COST: "system:cost",
@@ -2183,6 +2239,26 @@ var SYSTEM_ASSERTIONS = {
2183
2239
  }
2184
2240
  ]
2185
2241
  },
2242
+ [SYSTEM_ASSERTION_IDS.TOOL_CALLED_WITH_PARAM]: {
2243
+ id: SYSTEM_ASSERTION_IDS.TOOL_CALLED_WITH_PARAM,
2244
+ name: "Tool Called With Param",
2245
+ description: "Check that a tool was called with expected parameters",
2246
+ type: "tool_called_with_param",
2247
+ parameters: [
2248
+ {
2249
+ name: "toolName",
2250
+ label: "Tool Name",
2251
+ type: "string",
2252
+ required: true
2253
+ },
2254
+ {
2255
+ name: "expectedParams",
2256
+ label: "Expected Parameters (JSON, substring match)",
2257
+ type: "string",
2258
+ required: true
2259
+ }
2260
+ ]
2261
+ },
2186
2262
  [SYSTEM_ASSERTION_IDS.BUILD_PASSED]: {
2187
2263
  id: SYSTEM_ASSERTION_IDS.BUILD_PASSED,
2188
2264
  name: "Build Passed",
@@ -2301,6 +2377,7 @@ function getSystemAssertion(id) {
2301
2377
  0 && (module.exports = {
2302
2378
  AVAILABLE_MODEL_IDS,
2303
2379
  AVAILABLE_RUN_COMMANDS,
2380
+ AVAILABLE_TOOL_NAMES,
2304
2381
  AgentRunCommand,
2305
2382
  AgentRunCommandSchema,
2306
2383
  AgentSchema,
@@ -2329,6 +2406,7 @@ function getSystemAssertion(id) {
2329
2406
  CreateEvalRunInputSchema,
2330
2407
  CreateMcpInputSchema,
2331
2408
  CreateProjectInputSchema,
2409
+ CreateRuleInputSchema,
2332
2410
  CreateSkillInputSchema,
2333
2411
  CreateSkillVersionInputSchema,
2334
2412
  CreateSkillsGroupInputSchema,
@@ -2383,6 +2461,8 @@ function getSystemAssertion(id) {
2383
2461
  ProjectSchema,
2384
2462
  PromptResultSchema,
2385
2463
  RUN_COMMAND_LABELS,
2464
+ RuleSchema,
2465
+ RuleTypeSchema,
2386
2466
  SEMVER_REGEX,
2387
2467
  SKILL_FOLDER_NAME_REGEX,
2388
2468
  SYSTEM_ASSERTIONS,
@@ -2415,6 +2495,8 @@ function getSystemAssertion(id) {
2415
2495
  TimeAssertionSchema,
2416
2496
  TimeConfigSchema,
2417
2497
  TokenUsageSchema,
2498
+ ToolCalledWithParamAssertionSchema,
2499
+ ToolCalledWithParamConfigSchema,
2418
2500
  ToolTestSchema,
2419
2501
  TriggerMetadataSchema,
2420
2502
  TriggerSchema,
@@ -2423,6 +2505,7 @@ function getSystemAssertion(id) {
2423
2505
  UpdateCustomAssertionInputSchema,
2424
2506
  UpdateMcpInputSchema,
2425
2507
  UpdateProjectInputSchema,
2508
+ UpdateRuleInputSchema,
2426
2509
  UpdateSkillInputSchema,
2427
2510
  UpdateSkillsGroupInputSchema,
2428
2511
  UpdateSubAgentInputSchema,