@wix/evalforge-types 0.37.0 → 0.39.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -975,6 +975,33 @@ var ModelConfigSchema = z4.object({
975
975
  maxTokens: z4.preprocess(nullToUndefined, z4.number().min(1).optional())
976
976
  });
977
977
 
978
+ // src/common/rule.ts
979
+ import { z as z5 } from "zod";
980
+ var RuleTypeSchema = z5.enum(["claude-md", "agents-md", "cursor-rule"]);
981
+ var RuleSchema = TenantEntitySchema.extend({
982
+ ruleType: RuleTypeSchema,
983
+ content: z5.string()
984
+ });
985
+ var RuleInputBaseSchema = RuleSchema.omit({
986
+ id: true,
987
+ createdAt: true,
988
+ updatedAt: true,
989
+ deleted: true
990
+ });
991
+ var CreateRuleInputSchema = RuleInputBaseSchema;
992
+ var UpdateRuleInputSchema = RuleInputBaseSchema.partial();
993
+
994
+ // src/common/tool-names.ts
995
+ var AVAILABLE_TOOL_NAMES = [
996
+ "Bash",
997
+ "Edit",
998
+ "Glob",
999
+ "Grep",
1000
+ "Read",
1001
+ "Skill",
1002
+ "Write"
1003
+ ];
1004
+
978
1005
  // src/target/target.ts
979
1006
  var TargetSchema = TenantEntitySchema.extend({
980
1007
  // Base for all testable entities
@@ -982,7 +1009,7 @@ var TargetSchema = TenantEntitySchema.extend({
982
1009
  });
983
1010
 
984
1011
  // src/target/agent.ts
985
- import { z as z5 } from "zod";
1012
+ import { z as z6 } from "zod";
986
1013
  var AgentRunCommand = /* @__PURE__ */ ((AgentRunCommand2) => {
987
1014
  AgentRunCommand2["CLAUDE"] = "claude";
988
1015
  return AgentRunCommand2;
@@ -991,7 +1018,7 @@ var AVAILABLE_RUN_COMMANDS = Object.values(AgentRunCommand);
991
1018
  var RUN_COMMAND_LABELS = {
992
1019
  ["claude" /* CLAUDE */]: "Claude Code"
993
1020
  };
994
- var AgentRunCommandSchema = z5.nativeEnum(AgentRunCommand);
1021
+ var AgentRunCommandSchema = z6.nativeEnum(AgentRunCommand);
995
1022
  var AgentSchema = TargetSchema.extend({
996
1023
  /** Command to run the agent */
997
1024
  runCommand: AgentRunCommandSchema,
@@ -1009,51 +1036,51 @@ var UpdateAgentInputSchema = CreateAgentInputSchema.partial().extend({
1009
1036
  });
1010
1037
 
1011
1038
  // src/target/skill.ts
1012
- import { z as z6 } from "zod";
1039
+ import { z as z7 } from "zod";
1013
1040
  var SKILL_FOLDER_NAME_REGEX = /^[a-z0-9]+(-[a-z0-9]+)*$/;
1014
1041
  var SEMVER_REGEX = /^\d+\.\d+\.\d+$/;
1015
- var SkillVersionOriginSchema = z6.enum(["manual", "pr", "master"]);
1042
+ var SkillVersionOriginSchema = z7.enum(["manual", "pr", "master"]);
1016
1043
  function isValidSkillFolderName(name) {
1017
1044
  return typeof name === "string" && name.length > 0 && SKILL_FOLDER_NAME_REGEX.test(name.trim());
1018
1045
  }
1019
- var SkillMetadataSchema = z6.object({
1020
- name: z6.string(),
1021
- description: z6.string(),
1022
- allowedTools: z6.array(z6.string()).optional(),
1023
- skills: z6.array(z6.string()).optional()
1046
+ var SkillMetadataSchema = z7.object({
1047
+ name: z7.string(),
1048
+ description: z7.string(),
1049
+ allowedTools: z7.array(z7.string()).optional(),
1050
+ skills: z7.array(z7.string()).optional()
1024
1051
  });
1025
- var SkillFileSchema = z6.object({
1052
+ var SkillFileSchema = z7.object({
1026
1053
  /** Relative path within the skill directory, e.g. "SKILL.md" or "references/API_SPEC.md" */
1027
- path: z6.string().min(1),
1054
+ path: z7.string().min(1),
1028
1055
  /** File content (UTF-8 text) */
1029
- content: z6.string()
1056
+ content: z7.string()
1030
1057
  });
1031
- var SkillVersionSchema = z6.object({
1032
- id: z6.string(),
1033
- projectId: z6.string(),
1034
- skillId: z6.string(),
1058
+ var SkillVersionSchema = z7.object({
1059
+ id: z7.string(),
1060
+ projectId: z7.string(),
1061
+ skillId: z7.string(),
1035
1062
  /** Semver string (e.g. "1.2.0") or Falcon fingerprint */
1036
- version: z6.string(),
1063
+ version: z7.string(),
1037
1064
  /** How this version was created */
1038
1065
  origin: SkillVersionOriginSchema,
1039
1066
  /** Where this snapshot was taken from */
1040
1067
  source: GitHubSourceSchema.optional(),
1041
1068
  /** Frozen snapshot of all files in the skill directory */
1042
- files: z6.array(SkillFileSchema).optional(),
1069
+ files: z7.array(SkillFileSchema).optional(),
1043
1070
  /** Optional notes about this version (changelog, reason for change) */
1044
- notes: z6.string().optional(),
1045
- createdAt: z6.string()
1071
+ notes: z7.string().optional(),
1072
+ createdAt: z7.string()
1046
1073
  });
1047
- var CreateSkillVersionInputSchema = z6.object({
1074
+ var CreateSkillVersionInputSchema = z7.object({
1048
1075
  /** GitHub source to snapshot from. If not provided, uses the Skill's source. */
1049
1076
  source: GitHubSourceSchema.optional(),
1050
1077
  /** Version string for this snapshot (e.g. "1.0.0", "1.0.3"). */
1051
- version: z6.string().min(1),
1052
- notes: z6.string().optional(),
1078
+ version: z7.string().min(1),
1079
+ notes: z7.string().optional(),
1053
1080
  /** Origin of this version. Defaults to 'manual' in backend. */
1054
1081
  origin: SkillVersionOriginSchema.optional(),
1055
1082
  /** Pre-edited files to store directly (bypasses GitHub fetch when provided) */
1056
- files: z6.array(SkillFileSchema).optional()
1083
+ files: z7.array(SkillFileSchema).optional()
1057
1084
  });
1058
1085
  var SkillSchema = TargetSchema.extend({
1059
1086
  /** GitHub source reference for live content fetching */
@@ -1069,15 +1096,15 @@ var SkillInputBaseSchema = SkillSchema.omit({
1069
1096
  source: true
1070
1097
  }).extend({
1071
1098
  /** Optional - not stored on Skill; content description lives in SkillVersion */
1072
- description: z6.string().optional(),
1099
+ description: z7.string().optional(),
1073
1100
  /** GitHub source reference for live content fetching */
1074
1101
  source: GitHubSourceSchema.optional()
1075
1102
  });
1076
- var InitialVersionInputSchema = z6.object({
1077
- files: z6.array(SkillFileSchema).optional(),
1078
- notes: z6.string().optional(),
1103
+ var InitialVersionInputSchema = z7.object({
1104
+ files: z7.array(SkillFileSchema).optional(),
1105
+ notes: z7.string().optional(),
1079
1106
  source: GitHubSourceSchema.optional(),
1080
- version: z6.string().optional(),
1107
+ version: z7.string().optional(),
1081
1108
  origin: SkillVersionOriginSchema.optional()
1082
1109
  });
1083
1110
  var CreateSkillInputSchema = SkillInputBaseSchema.extend({
@@ -1095,10 +1122,10 @@ var SkillWithLatestVersionSchema = SkillSchema.extend({
1095
1122
  });
1096
1123
 
1097
1124
  // src/target/skills-group.ts
1098
- import { z as z7 } from "zod";
1125
+ import { z as z8 } from "zod";
1099
1126
  var SkillsGroupSchema = TenantEntitySchema.extend({
1100
1127
  /** IDs of skills in this group */
1101
- skillIds: z7.array(z7.string())
1128
+ skillIds: z8.array(z8.string())
1102
1129
  });
1103
1130
  var CreateSkillsGroupInputSchema = SkillsGroupSchema.omit({
1104
1131
  id: true,
@@ -1109,10 +1136,10 @@ var CreateSkillsGroupInputSchema = SkillsGroupSchema.omit({
1109
1136
  var UpdateSkillsGroupInputSchema = CreateSkillsGroupInputSchema.partial();
1110
1137
 
1111
1138
  // src/target/sub-agent.ts
1112
- import { z as z8 } from "zod";
1139
+ import { z as z9 } from "zod";
1113
1140
  var SubAgentSchema = TargetSchema.extend({
1114
1141
  /** The full sub-agent markdown content (YAML frontmatter + body) */
1115
- subAgentMd: z8.string()
1142
+ subAgentMd: z9.string()
1116
1143
  });
1117
1144
  var SubAgentInputBaseSchema = SubAgentSchema.omit({
1118
1145
  id: true,
@@ -1124,10 +1151,10 @@ var CreateSubAgentInputSchema = SubAgentInputBaseSchema;
1124
1151
  var UpdateSubAgentInputSchema = SubAgentInputBaseSchema.partial();
1125
1152
 
1126
1153
  // src/test/index.ts
1127
- import { z as z19 } from "zod";
1154
+ import { z as z20 } from "zod";
1128
1155
 
1129
1156
  // src/test/base.ts
1130
- import { z as z9 } from "zod";
1157
+ import { z as z10 } from "zod";
1131
1158
  var TestType = /* @__PURE__ */ ((TestType2) => {
1132
1159
  TestType2["LLM"] = "LLM";
1133
1160
  TestType2["TOOL"] = "TOOL";
@@ -1140,7 +1167,7 @@ var TestType = /* @__PURE__ */ ((TestType2) => {
1140
1167
  TestType2["PLAYWRIGHT_NL"] = "PLAYWRIGHT_NL";
1141
1168
  return TestType2;
1142
1169
  })(TestType || {});
1143
- var TestTypeSchema = z9.enum(TestType);
1170
+ var TestTypeSchema = z10.enum(TestType);
1144
1171
  var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
1145
1172
  TestImportance2["LOW"] = "low";
1146
1173
  TestImportance2["MEDIUM"] = "medium";
@@ -1148,153 +1175,153 @@ var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
1148
1175
  TestImportance2["CRITICAL"] = "critical";
1149
1176
  return TestImportance2;
1150
1177
  })(TestImportance || {});
1151
- var TestImportanceSchema = z9.enum(TestImportance);
1152
- var BaseTestSchema = z9.object({
1153
- id: z9.string(),
1178
+ var TestImportanceSchema = z10.enum(TestImportance);
1179
+ var BaseTestSchema = z10.object({
1180
+ id: z10.string(),
1154
1181
  type: TestTypeSchema,
1155
- name: z9.string().min(3),
1156
- description: z9.string().optional(),
1182
+ name: z10.string().min(3),
1183
+ description: z10.string().optional(),
1157
1184
  importance: TestImportanceSchema.optional()
1158
1185
  });
1159
1186
 
1160
1187
  // src/test/llm.ts
1161
- import { z as z10 } from "zod";
1188
+ import { z as z11 } from "zod";
1162
1189
  var LLMTestSchema = BaseTestSchema.extend({
1163
- type: z10.literal("LLM" /* LLM */),
1190
+ type: z11.literal("LLM" /* LLM */),
1164
1191
  /** Maximum steps for the LLM to take */
1165
- maxSteps: z10.number().min(1).max(100),
1192
+ maxSteps: z11.number().min(1).max(100),
1166
1193
  /** Prompt to send to the evaluator */
1167
- prompt: z10.string().min(1),
1194
+ prompt: z11.string().min(1),
1168
1195
  /** ID of the evaluator agent to use */
1169
- evaluatorId: z10.string()
1196
+ evaluatorId: z11.string()
1170
1197
  });
1171
1198
 
1172
1199
  // src/test/tool.ts
1173
- import { z as z11 } from "zod";
1200
+ import { z as z12 } from "zod";
1174
1201
  var ToolTestSchema = BaseTestSchema.extend({
1175
- type: z11.literal("TOOL" /* TOOL */),
1202
+ type: z12.literal("TOOL" /* TOOL */),
1176
1203
  /** Name of the tool that should be called */
1177
- toolName: z11.string().min(3),
1204
+ toolName: z12.string().min(3),
1178
1205
  /** Expected arguments for the tool call */
1179
- args: z11.record(z11.string(), z11.any()),
1206
+ args: z12.record(z12.string(), z12.any()),
1180
1207
  /** Expected content in the tool results */
1181
- resultsContent: z11.string()
1208
+ resultsContent: z12.string()
1182
1209
  });
1183
1210
 
1184
1211
  // src/test/site-config.ts
1185
- import { z as z12 } from "zod";
1212
+ import { z as z13 } from "zod";
1186
1213
  var SiteConfigTestSchema = BaseTestSchema.extend({
1187
- type: z12.literal("SITE_CONFIG" /* SITE_CONFIG */),
1214
+ type: z13.literal("SITE_CONFIG" /* SITE_CONFIG */),
1188
1215
  /** URL to call */
1189
- url: z12.string().url(),
1216
+ url: z13.string().url(),
1190
1217
  /** HTTP method */
1191
- method: z12.enum(["GET", "POST"]),
1218
+ method: z13.enum(["GET", "POST"]),
1192
1219
  /** Request body (for POST) */
1193
- body: z12.string().optional(),
1220
+ body: z13.string().optional(),
1194
1221
  /** Expected HTTP status code */
1195
- expectedStatusCode: z12.number().int().min(100).max(599),
1222
+ expectedStatusCode: z13.number().int().min(100).max(599),
1196
1223
  /** Expected response content */
1197
- expectedResponse: z12.string().optional(),
1224
+ expectedResponse: z13.string().optional(),
1198
1225
  /** JMESPath expression to extract from response */
1199
- expectedResponseJMESPath: z12.string().optional()
1226
+ expectedResponseJMESPath: z13.string().optional()
1200
1227
  });
1201
1228
 
1202
1229
  // src/test/command-execution.ts
1203
- import { z as z13 } from "zod";
1230
+ import { z as z14 } from "zod";
1204
1231
  var AllowedCommands = [
1205
1232
  "yarn install --no-immutable && yarn build",
1206
1233
  "npm run build",
1207
1234
  "yarn typecheck"
1208
1235
  ];
1209
1236
  var CommandExecutionTestSchema = BaseTestSchema.extend({
1210
- type: z13.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
1237
+ type: z14.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
1211
1238
  /** Command to execute (must be in AllowedCommands) */
1212
- command: z13.string().refine((value) => AllowedCommands.includes(value), {
1239
+ command: z14.string().refine((value) => AllowedCommands.includes(value), {
1213
1240
  message: `Command must be one of: ${AllowedCommands.join(", ")}`
1214
1241
  }),
1215
1242
  /** Expected exit code (default: 0) */
1216
- expectedExitCode: z13.number().default(0).optional()
1243
+ expectedExitCode: z14.number().default(0).optional()
1217
1244
  });
1218
1245
 
1219
1246
  // src/test/file-presence.ts
1220
- import { z as z14 } from "zod";
1247
+ import { z as z15 } from "zod";
1221
1248
  var FilePresenceTestSchema = BaseTestSchema.extend({
1222
- type: z14.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
1249
+ type: z15.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
1223
1250
  /** Paths to check */
1224
- paths: z14.array(z14.string()),
1251
+ paths: z15.array(z15.string()),
1225
1252
  /** Whether files should exist (true) or not exist (false) */
1226
- shouldExist: z14.boolean()
1253
+ shouldExist: z15.boolean()
1227
1254
  });
1228
1255
 
1229
1256
  // src/test/file-content.ts
1230
- import { z as z15 } from "zod";
1231
- var FileContentCheckSchema = z15.object({
1257
+ import { z as z16 } from "zod";
1258
+ var FileContentCheckSchema = z16.object({
1232
1259
  /** Strings that must be present in the file */
1233
- contains: z15.array(z15.string()).optional(),
1260
+ contains: z16.array(z16.string()).optional(),
1234
1261
  /** Strings that must NOT be present in the file */
1235
- notContains: z15.array(z15.string()).optional(),
1262
+ notContains: z16.array(z16.string()).optional(),
1236
1263
  /** Regex pattern the content must match */
1237
- matches: z15.string().optional(),
1264
+ matches: z16.string().optional(),
1238
1265
  /** JSON path checks for structured content */
1239
- jsonPath: z15.array(
1240
- z15.object({
1241
- path: z15.string(),
1242
- value: z15.unknown()
1266
+ jsonPath: z16.array(
1267
+ z16.object({
1268
+ path: z16.string(),
1269
+ value: z16.unknown()
1243
1270
  })
1244
1271
  ).optional(),
1245
1272
  /** Lines that should be added (for diff checking) */
1246
- added: z15.array(z15.string()).optional(),
1273
+ added: z16.array(z16.string()).optional(),
1247
1274
  /** Lines that should be removed (for diff checking) */
1248
- removed: z15.array(z15.string()).optional()
1275
+ removed: z16.array(z16.string()).optional()
1249
1276
  });
1250
1277
  var FileContentTestSchema = BaseTestSchema.extend({
1251
- type: z15.literal("FILE_CONTENT" /* FILE_CONTENT */),
1278
+ type: z16.literal("FILE_CONTENT" /* FILE_CONTENT */),
1252
1279
  /** Path to the file to check */
1253
- path: z15.string(),
1280
+ path: z16.string(),
1254
1281
  /** Content checks to perform */
1255
1282
  checks: FileContentCheckSchema
1256
1283
  });
1257
1284
 
1258
1285
  // src/test/build-check.ts
1259
- import { z as z16 } from "zod";
1286
+ import { z as z17 } from "zod";
1260
1287
  var BuildCheckTestSchema = BaseTestSchema.extend({
1261
- type: z16.literal("BUILD_CHECK" /* BUILD_CHECK */),
1288
+ type: z17.literal("BUILD_CHECK" /* BUILD_CHECK */),
1262
1289
  /** Build command to execute */
1263
- command: z16.string(),
1290
+ command: z17.string(),
1264
1291
  /** Whether the build should succeed */
1265
- expectSuccess: z16.boolean(),
1292
+ expectSuccess: z17.boolean(),
1266
1293
  /** Maximum allowed warnings (optional) */
1267
- allowedWarnings: z16.number().optional(),
1294
+ allowedWarnings: z17.number().optional(),
1268
1295
  /** Timeout in milliseconds */
1269
- timeout: z16.number().optional()
1296
+ timeout: z17.number().optional()
1270
1297
  });
1271
1298
 
1272
1299
  // src/test/vitest.ts
1273
- import { z as z17 } from "zod";
1300
+ import { z as z18 } from "zod";
1274
1301
  var VitestTestSchema = BaseTestSchema.extend({
1275
- type: z17.literal("VITEST" /* VITEST */),
1302
+ type: z18.literal("VITEST" /* VITEST */),
1276
1303
  /** Test file content */
1277
- testFile: z17.string(),
1304
+ testFile: z18.string(),
1278
1305
  /** Name of the test file */
1279
- testFileName: z17.string(),
1306
+ testFileName: z18.string(),
1280
1307
  /** Minimum pass rate required (0-100) */
1281
- minPassRate: z17.number().min(0).max(100)
1308
+ minPassRate: z18.number().min(0).max(100)
1282
1309
  });
1283
1310
 
1284
1311
  // src/test/playwright-nl.ts
1285
- import { z as z18 } from "zod";
1312
+ import { z as z19 } from "zod";
1286
1313
  var PlaywrightNLTestSchema = BaseTestSchema.extend({
1287
- type: z18.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
1314
+ type: z19.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
1288
1315
  /** Natural language steps to execute */
1289
- steps: z18.array(z18.string()),
1316
+ steps: z19.array(z19.string()),
1290
1317
  /** Expected outcome description */
1291
- expectedOutcome: z18.string(),
1318
+ expectedOutcome: z19.string(),
1292
1319
  /** Timeout in milliseconds */
1293
- timeout: z18.number().optional()
1320
+ timeout: z19.number().optional()
1294
1321
  });
1295
1322
 
1296
1323
  // src/test/index.ts
1297
- var TestSchema = z19.discriminatedUnion("type", [
1324
+ var TestSchema = z20.discriminatedUnion("type", [
1298
1325
  LLMTestSchema,
1299
1326
  ToolTestSchema,
1300
1327
  SiteConfigTestSchema,
@@ -1307,44 +1334,52 @@ var TestSchema = z19.discriminatedUnion("type", [
1307
1334
  ]);
1308
1335
 
1309
1336
  // src/scenario/assertions.ts
1310
- import { z as z20 } from "zod";
1311
- var SkillWasCalledAssertionSchema = z20.object({
1312
- type: z20.literal("skill_was_called"),
1337
+ import { z as z21 } from "zod";
1338
+ var SkillWasCalledAssertionSchema = z21.object({
1339
+ type: z21.literal("skill_was_called"),
1313
1340
  /** Names of the skills that must have been called (matched against trace Skill tool args) */
1314
- skillNames: z20.array(z20.string().min(1)).min(1)
1341
+ skillNames: z21.array(z21.string().min(1)).min(1)
1315
1342
  });
1316
- var BuildPassedAssertionSchema = z20.object({
1317
- type: z20.literal("build_passed"),
1343
+ var ToolCalledWithParamAssertionSchema = z21.object({
1344
+ type: z21.literal("tool_called_with_param"),
1345
+ /** Name of the tool that must have been called */
1346
+ toolName: z21.string().min(1),
1347
+ /** JSON string of key-value pairs for expected parameters (substring match) */
1348
+ expectedParams: z21.string().min(1)
1349
+ });
1350
+ var BuildPassedAssertionSchema = z21.object({
1351
+ type: z21.literal("build_passed"),
1318
1352
  /** Command to run (default: "yarn build") */
1319
- command: z20.string().optional(),
1353
+ command: z21.string().optional(),
1320
1354
  /** Expected exit code (default: 0) */
1321
- expectedExitCode: z20.number().int().optional()
1355
+ expectedExitCode: z21.number().int().optional()
1322
1356
  });
1323
- var CostAssertionSchema = z20.object({
1324
- type: z20.literal("cost"),
1357
+ var CostAssertionSchema = z21.object({
1358
+ type: z21.literal("cost"),
1325
1359
  /** Maximum allowed cost in USD */
1326
- maxCostUsd: z20.number().positive()
1360
+ maxCostUsd: z21.number().positive()
1327
1361
  });
1328
- var LlmJudgeAssertionSchema = z20.object({
1329
- type: z20.literal("llm_judge"),
1362
+ var LlmJudgeAssertionSchema = z21.object({
1363
+ type: z21.literal("llm_judge"),
1330
1364
  /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
1331
- prompt: z20.string(),
1365
+ prompt: z21.string(),
1332
1366
  /** Optional system prompt for the judge (default asks for JSON with score) */
1333
- systemPrompt: z20.string().optional(),
1367
+ systemPrompt: z21.string().optional(),
1334
1368
  /** Minimum score to pass (0-100, default 70) */
1335
- minScore: z20.number().int().min(0).max(100).optional(),
1369
+ minScore: z21.number().int().min(0).max(100).optional(),
1336
1370
  /** Model for the judge (e.g. claude-3-5-haiku) */
1337
- model: z20.string().optional(),
1338
- maxTokens: z20.number().int().optional(),
1339
- temperature: z20.number().min(0).max(1).optional()
1371
+ model: z21.string().optional(),
1372
+ maxTokens: z21.number().int().optional(),
1373
+ temperature: z21.number().min(0).max(1).optional()
1340
1374
  });
1341
- var TimeAssertionSchema = z20.object({
1342
- type: z20.literal("time_limit"),
1375
+ var TimeAssertionSchema = z21.object({
1376
+ type: z21.literal("time_limit"),
1343
1377
  /** Maximum allowed duration in milliseconds */
1344
- maxDurationMs: z20.number().int().positive()
1378
+ maxDurationMs: z21.number().int().positive()
1345
1379
  });
1346
- var AssertionSchema = z20.union([
1380
+ var AssertionSchema = z21.union([
1347
1381
  SkillWasCalledAssertionSchema,
1382
+ ToolCalledWithParamAssertionSchema,
1348
1383
  BuildPassedAssertionSchema,
1349
1384
  TimeAssertionSchema,
1350
1385
  CostAssertionSchema,
@@ -1352,33 +1387,33 @@ var AssertionSchema = z20.union([
1352
1387
  ]);
1353
1388
 
1354
1389
  // src/scenario/environment.ts
1355
- import { z as z21 } from "zod";
1356
- var LocalProjectConfigSchema = z21.object({
1390
+ import { z as z22 } from "zod";
1391
+ var LocalProjectConfigSchema = z22.object({
1357
1392
  /** Template ID to use for the local project */
1358
- templateId: z21.string().optional(),
1393
+ templateId: z22.string().optional(),
1359
1394
  /** Files to create in the project */
1360
- files: z21.array(
1361
- z21.object({
1362
- path: z21.string().min(1),
1363
- content: z21.string().min(1)
1395
+ files: z22.array(
1396
+ z22.object({
1397
+ path: z22.string().min(1),
1398
+ content: z22.string().min(1)
1364
1399
  })
1365
1400
  ).optional()
1366
1401
  });
1367
- var MetaSiteConfigSchema = z21.object({
1368
- configurations: z21.array(
1369
- z21.object({
1370
- name: z21.string().min(1),
1371
- apiCalls: z21.array(
1372
- z21.object({
1373
- url: z21.string().url(),
1374
- method: z21.enum(["POST", "PUT"]),
1375
- body: z21.string()
1402
+ var MetaSiteConfigSchema = z22.object({
1403
+ configurations: z22.array(
1404
+ z22.object({
1405
+ name: z22.string().min(1),
1406
+ apiCalls: z22.array(
1407
+ z22.object({
1408
+ url: z22.string().url(),
1409
+ method: z22.enum(["POST", "PUT"]),
1410
+ body: z22.string()
1376
1411
  })
1377
1412
  )
1378
1413
  })
1379
1414
  ).optional()
1380
1415
  });
1381
- var EnvironmentSchema = z21.object({
1416
+ var EnvironmentSchema = z22.object({
1382
1417
  /** Local project configuration */
1383
1418
  localProject: LocalProjectConfigSchema.optional(),
1384
1419
  /** Meta site configuration */
@@ -1386,64 +1421,71 @@ var EnvironmentSchema = z21.object({
1386
1421
  });
1387
1422
 
1388
1423
  // src/scenario/test-scenario.ts
1389
- import { z as z23 } from "zod";
1424
+ import { z as z24 } from "zod";
1390
1425
 
1391
1426
  // src/assertion/assertion.ts
1392
- import { z as z22 } from "zod";
1393
- var AssertionTypeSchema = z22.enum([
1427
+ import { z as z23 } from "zod";
1428
+ var AssertionTypeSchema = z23.enum([
1394
1429
  "skill_was_called",
1430
+ "tool_called_with_param",
1395
1431
  "build_passed",
1396
1432
  "time_limit",
1397
1433
  "cost",
1398
1434
  "llm_judge"
1399
1435
  ]);
1400
- var AssertionParameterTypeSchema = z22.enum([
1436
+ var AssertionParameterTypeSchema = z23.enum([
1401
1437
  "string",
1402
1438
  "number",
1403
1439
  "boolean"
1404
1440
  ]);
1405
- var AssertionParameterSchema = z22.object({
1441
+ var AssertionParameterSchema = z23.object({
1406
1442
  /** Parameter name (used as key in params object) */
1407
- name: z22.string().min(1),
1443
+ name: z23.string().min(1),
1408
1444
  /** Display label for the parameter */
1409
- label: z22.string().min(1),
1445
+ label: z23.string().min(1),
1410
1446
  /** Parameter type */
1411
1447
  type: AssertionParameterTypeSchema,
1412
1448
  /** Whether this parameter is required */
1413
- required: z22.boolean(),
1449
+ required: z23.boolean(),
1414
1450
  /** Default value (optional, used when not provided) */
1415
- defaultValue: z22.union([z22.string(), z22.number(), z22.boolean()]).optional(),
1451
+ defaultValue: z23.union([z23.string(), z23.number(), z23.boolean()]).optional(),
1416
1452
  /** If true, parameter is hidden by default behind "Show advanced options" */
1417
- advanced: z22.boolean().optional()
1453
+ advanced: z23.boolean().optional()
1418
1454
  });
1419
- var ScenarioAssertionLinkSchema = z22.object({
1455
+ var ScenarioAssertionLinkSchema = z23.object({
1420
1456
  /** ID of the assertion (can be system assertion like 'system:skill_was_called' or custom assertion UUID) */
1421
- assertionId: z22.string(),
1457
+ assertionId: z23.string(),
1422
1458
  /** Parameter values for this assertion in this scenario */
1423
- params: z22.record(
1424
- z22.string(),
1425
- z22.union([z22.string(), z22.number(), z22.boolean(), z22.null()])
1459
+ params: z23.record(
1460
+ z23.string(),
1461
+ z23.union([z23.string(), z23.number(), z23.boolean(), z23.null()])
1426
1462
  ).optional()
1427
1463
  });
1428
- var SkillWasCalledConfigSchema = z22.object({
1464
+ var SkillWasCalledConfigSchema = z23.object({
1429
1465
  /** Names of the skills that must have been called */
1430
- skillNames: z22.array(z22.string().min(1)).min(1)
1466
+ skillNames: z23.array(z23.string().min(1)).min(1)
1431
1467
  });
1432
- var CostConfigSchema = z22.strictObject({
1468
+ var CostConfigSchema = z23.strictObject({
1433
1469
  /** Maximum allowed cost in USD */
1434
- maxCostUsd: z22.number().positive()
1470
+ maxCostUsd: z23.number().positive()
1471
+ });
1472
+ var ToolCalledWithParamConfigSchema = z23.strictObject({
1473
+ /** Name of the tool that must have been called */
1474
+ toolName: z23.string().min(1),
1475
+ /** JSON string of key-value pairs for expected parameters (substring match) */
1476
+ expectedParams: z23.string().min(1)
1435
1477
  });
1436
- var BuildPassedConfigSchema = z22.strictObject({
1478
+ var BuildPassedConfigSchema = z23.strictObject({
1437
1479
  /** Command to run (default: "yarn build") */
1438
- command: z22.string().optional(),
1480
+ command: z23.string().optional(),
1439
1481
  /** Expected exit code (default: 0) */
1440
- expectedExitCode: z22.number().int().optional()
1482
+ expectedExitCode: z23.number().int().optional()
1441
1483
  });
1442
- var TimeConfigSchema = z22.strictObject({
1484
+ var TimeConfigSchema = z23.strictObject({
1443
1485
  /** Maximum allowed duration in milliseconds */
1444
- maxDurationMs: z22.number().int().positive()
1486
+ maxDurationMs: z23.number().int().positive()
1445
1487
  });
1446
- var LlmJudgeConfigSchema = z22.object({
1488
+ var LlmJudgeConfigSchema = z23.object({
1447
1489
  /**
1448
1490
  * Prompt template with placeholders:
1449
1491
  * - {{output}}: agent's final output
@@ -1454,32 +1496,34 @@ var LlmJudgeConfigSchema = z22.object({
1454
1496
  * - {{trace}}: step-by-step trace of tool calls
1455
1497
  * - Custom parameters defined in the parameters array
1456
1498
  */
1457
- prompt: z22.string().min(1),
1499
+ prompt: z23.string().min(1),
1458
1500
  /** Optional system prompt for the judge */
1459
- systemPrompt: z22.string().optional(),
1501
+ systemPrompt: z23.string().optional(),
1460
1502
  /** Minimum score to pass (0-100, default 70) */
1461
- minScore: z22.number().int().min(0).max(100).optional(),
1503
+ minScore: z23.number().int().min(0).max(100).optional(),
1462
1504
  /** Model for the judge (e.g. claude-3-5-haiku-20241022) */
1463
- model: z22.string().optional(),
1505
+ model: z23.string().optional(),
1464
1506
  /** Max output tokens */
1465
- maxTokens: z22.number().int().optional(),
1507
+ maxTokens: z23.number().int().optional(),
1466
1508
  /** Temperature (0-1) */
1467
- temperature: z22.number().min(0).max(1).optional(),
1509
+ temperature: z23.number().min(0).max(1).optional(),
1468
1510
  /** User-defined parameters for this assertion */
1469
- parameters: z22.array(AssertionParameterSchema).optional()
1511
+ parameters: z23.array(AssertionParameterSchema).optional()
1470
1512
  });
1471
- var AssertionConfigSchema = z22.union([
1513
+ var AssertionConfigSchema = z23.union([
1472
1514
  LlmJudgeConfigSchema,
1473
1515
  // requires prompt - check first
1474
1516
  SkillWasCalledConfigSchema,
1475
1517
  // requires skillNames
1518
+ ToolCalledWithParamConfigSchema,
1519
+ // requires toolName + expectedParams, uses strictObject
1476
1520
  TimeConfigSchema,
1477
1521
  // requires maxDurationMs, uses strictObject
1478
1522
  CostConfigSchema,
1479
1523
  // requires maxCostUsd, uses strictObject
1480
1524
  BuildPassedConfigSchema,
1481
1525
  // all optional, uses strictObject to reject unknown keys
1482
- z22.object({})
1526
+ z23.object({})
1483
1527
  // fallback empty config
1484
1528
  ]);
1485
1529
  var CustomAssertionSchema = TenantEntitySchema.extend({
@@ -1501,6 +1545,8 @@ function validateAssertionConfig(type, config) {
1501
1545
  return SkillWasCalledConfigSchema.safeParse(config).success;
1502
1546
  case "cost":
1503
1547
  return CostConfigSchema.safeParse(config).success;
1548
+ case "tool_called_with_param":
1549
+ return ToolCalledWithParamConfigSchema.safeParse(config).success;
1504
1550
  case "build_passed":
1505
1551
  return BuildPassedConfigSchema.safeParse(config).success;
1506
1552
  case "time_limit":
@@ -1528,23 +1574,23 @@ function getLlmJudgeConfig(assertion) {
1528
1574
  }
1529
1575
 
1530
1576
  // src/scenario/test-scenario.ts
1531
- var ExpectedFileSchema = z23.object({
1577
+ var ExpectedFileSchema = z24.object({
1532
1578
  /** Relative path where the file should be created */
1533
- path: z23.string(),
1579
+ path: z24.string(),
1534
1580
  /** Optional expected content */
1535
- content: z23.string().optional()
1581
+ content: z24.string().optional()
1536
1582
  });
1537
1583
  var TestScenarioSchema = TenantEntitySchema.extend({
1538
1584
  /** The prompt sent to the agent to trigger the task */
1539
- triggerPrompt: z23.string().min(10),
1585
+ triggerPrompt: z24.string().min(10),
1540
1586
  /** ID of the template to use for this scenario (null = no template) */
1541
- templateId: z23.string().nullish(),
1587
+ templateId: z24.string().nullish(),
1542
1588
  /** Inline assertions to evaluate for this scenario (legacy) */
1543
- assertions: z23.array(AssertionSchema).optional(),
1589
+ assertions: z24.array(AssertionSchema).optional(),
1544
1590
  /** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
1545
- assertionIds: z23.array(z23.string()).optional(),
1591
+ assertionIds: z24.array(z24.string()).optional(),
1546
1592
  /** Linked assertions with per-scenario parameter values */
1547
- assertionLinks: z23.array(ScenarioAssertionLinkSchema).optional()
1593
+ assertionLinks: z24.array(ScenarioAssertionLinkSchema).optional()
1548
1594
  });
1549
1595
  var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
1550
1596
  id: true,
@@ -1555,10 +1601,10 @@ var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
1555
1601
  var UpdateTestScenarioInputSchema = CreateTestScenarioInputSchema.partial();
1556
1602
 
1557
1603
  // src/suite/test-suite.ts
1558
- import { z as z24 } from "zod";
1604
+ import { z as z25 } from "zod";
1559
1605
  var TestSuiteSchema = TenantEntitySchema.extend({
1560
1606
  /** IDs of test scenarios in this suite */
1561
- scenarioIds: z24.array(z24.string())
1607
+ scenarioIds: z25.array(z25.string())
1562
1608
  });
1563
1609
  var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
1564
1610
  id: true,
@@ -1569,21 +1615,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
1569
1615
  var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
1570
1616
 
1571
1617
  // src/evaluation/metrics.ts
1572
- import { z as z25 } from "zod";
1573
- var TokenUsageSchema = z25.object({
1574
- prompt: z25.number(),
1575
- completion: z25.number(),
1576
- total: z25.number()
1577
- });
1578
- var EvalMetricsSchema = z25.object({
1579
- totalAssertions: z25.number(),
1580
- passed: z25.number(),
1581
- failed: z25.number(),
1582
- skipped: z25.number(),
1583
- errors: z25.number(),
1584
- passRate: z25.number(),
1585
- avgDuration: z25.number(),
1586
- totalDuration: z25.number()
1618
+ import { z as z26 } from "zod";
1619
+ var TokenUsageSchema = z26.object({
1620
+ prompt: z26.number(),
1621
+ completion: z26.number(),
1622
+ total: z26.number()
1623
+ });
1624
+ var EvalMetricsSchema = z26.object({
1625
+ totalAssertions: z26.number(),
1626
+ passed: z26.number(),
1627
+ failed: z26.number(),
1628
+ skipped: z26.number(),
1629
+ errors: z26.number(),
1630
+ passRate: z26.number(),
1631
+ avgDuration: z26.number(),
1632
+ totalDuration: z26.number()
1587
1633
  });
1588
1634
  var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
1589
1635
  EvalStatus2["PENDING"] = "pending";
@@ -1593,7 +1639,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
1593
1639
  EvalStatus2["CANCELLED"] = "cancelled";
1594
1640
  return EvalStatus2;
1595
1641
  })(EvalStatus || {});
1596
- var EvalStatusSchema = z25.enum(EvalStatus);
1642
+ var EvalStatusSchema = z26.enum(EvalStatus);
1597
1643
  var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
1598
1644
  LLMStepType2["COMPLETION"] = "completion";
1599
1645
  LLMStepType2["TOOL_USE"] = "tool_use";
@@ -1601,52 +1647,52 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
1601
1647
  LLMStepType2["THINKING"] = "thinking";
1602
1648
  return LLMStepType2;
1603
1649
  })(LLMStepType || {});
1604
- var LLMTraceStepSchema = z25.object({
1605
- id: z25.string(),
1606
- stepNumber: z25.number(),
1607
- type: z25.enum(LLMStepType),
1608
- model: z25.string(),
1609
- provider: z25.string(),
1610
- startedAt: z25.string(),
1611
- durationMs: z25.number(),
1650
+ var LLMTraceStepSchema = z26.object({
1651
+ id: z26.string(),
1652
+ stepNumber: z26.number(),
1653
+ type: z26.enum(LLMStepType),
1654
+ model: z26.string(),
1655
+ provider: z26.string(),
1656
+ startedAt: z26.string(),
1657
+ durationMs: z26.number(),
1612
1658
  tokenUsage: TokenUsageSchema,
1613
- costUsd: z25.number(),
1614
- toolName: z25.string().optional(),
1615
- toolArguments: z25.string().optional(),
1616
- inputPreview: z25.string().optional(),
1617
- outputPreview: z25.string().optional(),
1618
- success: z25.boolean(),
1619
- error: z25.string().optional()
1620
- });
1621
- var LLMBreakdownStatsSchema = z25.object({
1622
- count: z25.number(),
1623
- durationMs: z25.number(),
1624
- tokens: z25.number(),
1625
- costUsd: z25.number()
1626
- });
1627
- var LLMTraceSummarySchema = z25.object({
1628
- totalSteps: z25.number(),
1629
- totalDurationMs: z25.number(),
1659
+ costUsd: z26.number(),
1660
+ toolName: z26.string().optional(),
1661
+ toolArguments: z26.string().optional(),
1662
+ inputPreview: z26.string().optional(),
1663
+ outputPreview: z26.string().optional(),
1664
+ success: z26.boolean(),
1665
+ error: z26.string().optional()
1666
+ });
1667
+ var LLMBreakdownStatsSchema = z26.object({
1668
+ count: z26.number(),
1669
+ durationMs: z26.number(),
1670
+ tokens: z26.number(),
1671
+ costUsd: z26.number()
1672
+ });
1673
+ var LLMTraceSummarySchema = z26.object({
1674
+ totalSteps: z26.number(),
1675
+ totalDurationMs: z26.number(),
1630
1676
  totalTokens: TokenUsageSchema,
1631
- totalCostUsd: z25.number(),
1632
- stepTypeBreakdown: z25.record(z25.string(), LLMBreakdownStatsSchema).optional(),
1633
- modelBreakdown: z25.record(z25.string(), LLMBreakdownStatsSchema),
1634
- modelsUsed: z25.array(z25.string())
1635
- });
1636
- var LLMTraceSchema = z25.object({
1637
- id: z25.string(),
1638
- steps: z25.array(LLMTraceStepSchema),
1677
+ totalCostUsd: z26.number(),
1678
+ stepTypeBreakdown: z26.record(z26.string(), LLMBreakdownStatsSchema).optional(),
1679
+ modelBreakdown: z26.record(z26.string(), LLMBreakdownStatsSchema),
1680
+ modelsUsed: z26.array(z26.string())
1681
+ });
1682
+ var LLMTraceSchema = z26.object({
1683
+ id: z26.string(),
1684
+ steps: z26.array(LLMTraceStepSchema),
1639
1685
  summary: LLMTraceSummarySchema
1640
1686
  });
1641
1687
 
1642
1688
  // src/evaluation/eval-result.ts
1643
- import { z as z28 } from "zod";
1689
+ import { z as z29 } from "zod";
1644
1690
 
1645
1691
  // src/evaluation/eval-run.ts
1646
- import { z as z27 } from "zod";
1692
+ import { z as z28 } from "zod";
1647
1693
 
1648
1694
  // src/evaluation/live-trace.ts
1649
- import { z as z26 } from "zod";
1695
+ import { z as z27 } from "zod";
1650
1696
  var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
1651
1697
  LiveTraceEventType2["THINKING"] = "thinking";
1652
1698
  LiveTraceEventType2["TOOL_USE"] = "tool_use";
@@ -1660,37 +1706,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
1660
1706
  LiveTraceEventType2["USER"] = "user";
1661
1707
  return LiveTraceEventType2;
1662
1708
  })(LiveTraceEventType || {});
1663
- var LiveTraceEventSchema = z26.object({
1709
+ var LiveTraceEventSchema = z27.object({
1664
1710
  /** The evaluation run ID */
1665
- evalRunId: z26.string(),
1711
+ evalRunId: z27.string(),
1666
1712
  /** The scenario ID being executed */
1667
- scenarioId: z26.string(),
1713
+ scenarioId: z27.string(),
1668
1714
  /** The scenario name for display */
1669
- scenarioName: z26.string(),
1715
+ scenarioName: z27.string(),
1670
1716
  /** The target ID (skill, agent, etc.) */
1671
- targetId: z26.string(),
1717
+ targetId: z27.string(),
1672
1718
  /** The target name for display */
1673
- targetName: z26.string(),
1719
+ targetName: z27.string(),
1674
1720
  /** Step number in the current scenario execution */
1675
- stepNumber: z26.number(),
1721
+ stepNumber: z27.number(),
1676
1722
  /** Type of trace event */
1677
- type: z26.enum(LiveTraceEventType),
1723
+ type: z27.enum(LiveTraceEventType),
1678
1724
  /** Tool name if this is a tool_use event */
1679
- toolName: z26.string().optional(),
1725
+ toolName: z27.string().optional(),
1680
1726
  /** Tool arguments preview (truncated JSON) */
1681
- toolArgs: z26.string().optional(),
1727
+ toolArgs: z27.string().optional(),
1682
1728
  /** Output preview (truncated text) */
1683
- outputPreview: z26.string().optional(),
1729
+ outputPreview: z27.string().optional(),
1684
1730
  /** File path for file operations */
1685
- filePath: z26.string().optional(),
1731
+ filePath: z27.string().optional(),
1686
1732
  /** Elapsed time in milliseconds for progress events */
1687
- elapsedMs: z26.number().optional(),
1733
+ elapsedMs: z27.number().optional(),
1688
1734
  /** Thinking/reasoning text from Claude */
1689
- thinking: z26.string().optional(),
1735
+ thinking: z27.string().optional(),
1690
1736
  /** Timestamp when this event occurred */
1691
- timestamp: z26.string(),
1737
+ timestamp: z27.string(),
1692
1738
  /** Whether this is the final event for this scenario */
1693
- isComplete: z26.boolean()
1739
+ isComplete: z27.boolean()
1694
1740
  });
1695
1741
  var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
1696
1742
  function parseTraceEventLine(line) {
@@ -1718,14 +1764,14 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
1718
1764
  TriggerType2["MANUAL"] = "MANUAL";
1719
1765
  return TriggerType2;
1720
1766
  })(TriggerType || {});
1721
- var TriggerMetadataSchema = z27.object({
1722
- version: z27.string().optional(),
1723
- resourceUpdated: z27.array(z27.string()).optional()
1767
+ var TriggerMetadataSchema = z28.object({
1768
+ version: z28.string().optional(),
1769
+ resourceUpdated: z28.array(z28.string()).optional()
1724
1770
  });
1725
- var TriggerSchema = z27.object({
1726
- id: z27.string(),
1771
+ var TriggerSchema = z28.object({
1772
+ id: z28.string(),
1727
1773
  metadata: TriggerMetadataSchema.optional(),
1728
- type: z27.enum(TriggerType)
1774
+ type: z28.enum(TriggerType)
1729
1775
  });
1730
1776
  var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
1731
1777
  FailureCategory2["MISSING_FILE"] = "missing_file";
@@ -1743,28 +1789,28 @@ var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
1743
1789
  FailureSeverity2["LOW"] = "low";
1744
1790
  return FailureSeverity2;
1745
1791
  })(FailureSeverity || {});
1746
- var DiffLineTypeSchema = z27.enum(["added", "removed", "unchanged"]);
1747
- var DiffLineSchema = z27.object({
1792
+ var DiffLineTypeSchema = z28.enum(["added", "removed", "unchanged"]);
1793
+ var DiffLineSchema = z28.object({
1748
1794
  type: DiffLineTypeSchema,
1749
- content: z27.string(),
1750
- lineNumber: z27.number()
1751
- });
1752
- var DiffContentSchema = z27.object({
1753
- path: z27.string(),
1754
- expected: z27.string(),
1755
- actual: z27.string(),
1756
- diffLines: z27.array(DiffLineSchema),
1757
- renamedFrom: z27.string().optional()
1758
- });
1759
- var CommandExecutionSchema = z27.object({
1760
- command: z27.string(),
1761
- exitCode: z27.number(),
1762
- output: z27.string().optional(),
1763
- duration: z27.number()
1764
- });
1765
- var FileModificationSchema = z27.object({
1766
- path: z27.string(),
1767
- action: z27.enum(["created", "modified", "deleted"])
1795
+ content: z28.string(),
1796
+ lineNumber: z28.number()
1797
+ });
1798
+ var DiffContentSchema = z28.object({
1799
+ path: z28.string(),
1800
+ expected: z28.string(),
1801
+ actual: z28.string(),
1802
+ diffLines: z28.array(DiffLineSchema),
1803
+ renamedFrom: z28.string().optional()
1804
+ });
1805
+ var CommandExecutionSchema = z28.object({
1806
+ command: z28.string(),
1807
+ exitCode: z28.number(),
1808
+ output: z28.string().optional(),
1809
+ duration: z28.number()
1810
+ });
1811
+ var FileModificationSchema = z28.object({
1812
+ path: z28.string(),
1813
+ action: z28.enum(["created", "modified", "deleted"])
1768
1814
  });
1769
1815
  var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
1770
1816
  TemplateFileStatus2["NEW"] = "new";
@@ -1772,81 +1818,83 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
1772
1818
  TemplateFileStatus2["UNCHANGED"] = "unchanged";
1773
1819
  return TemplateFileStatus2;
1774
1820
  })(TemplateFileStatus || {});
1775
- var TemplateFileSchema = z27.object({
1821
+ var TemplateFileSchema = z28.object({
1776
1822
  /** Relative path within the template */
1777
- path: z27.string(),
1823
+ path: z28.string(),
1778
1824
  /** Full file content after execution */
1779
- content: z27.string(),
1825
+ content: z28.string(),
1780
1826
  /** File status (new, modified, unchanged) */
1781
- status: z27.enum(["new", "modified", "unchanged"])
1782
- });
1783
- var ApiCallSchema = z27.object({
1784
- endpoint: z27.string(),
1785
- tokensUsed: z27.number(),
1786
- duration: z27.number()
1787
- });
1788
- var ExecutionTraceSchema = z27.object({
1789
- commands: z27.array(CommandExecutionSchema),
1790
- filesModified: z27.array(FileModificationSchema),
1791
- apiCalls: z27.array(ApiCallSchema),
1792
- totalDuration: z27.number()
1793
- });
1794
- var FailureAnalysisSchema = z27.object({
1795
- category: z27.enum(FailureCategory),
1796
- severity: z27.enum(FailureSeverity),
1797
- summary: z27.string(),
1798
- details: z27.string(),
1799
- rootCause: z27.string(),
1800
- suggestedFix: z27.string(),
1801
- relatedAssertions: z27.array(z27.string()),
1802
- codeSnippet: z27.string().optional(),
1803
- similarIssues: z27.array(z27.string()).optional(),
1804
- patternId: z27.string().optional(),
1827
+ status: z28.enum(["new", "modified", "unchanged"])
1828
+ });
1829
+ var ApiCallSchema = z28.object({
1830
+ endpoint: z28.string(),
1831
+ tokensUsed: z28.number(),
1832
+ duration: z28.number()
1833
+ });
1834
+ var ExecutionTraceSchema = z28.object({
1835
+ commands: z28.array(CommandExecutionSchema),
1836
+ filesModified: z28.array(FileModificationSchema),
1837
+ apiCalls: z28.array(ApiCallSchema),
1838
+ totalDuration: z28.number()
1839
+ });
1840
+ var FailureAnalysisSchema = z28.object({
1841
+ category: z28.enum(FailureCategory),
1842
+ severity: z28.enum(FailureSeverity),
1843
+ summary: z28.string(),
1844
+ details: z28.string(),
1845
+ rootCause: z28.string(),
1846
+ suggestedFix: z28.string(),
1847
+ relatedAssertions: z28.array(z28.string()),
1848
+ codeSnippet: z28.string().optional(),
1849
+ similarIssues: z28.array(z28.string()).optional(),
1850
+ patternId: z28.string().optional(),
1805
1851
  // Extended fields for detailed debugging
1806
1852
  diff: DiffContentSchema.optional(),
1807
1853
  executionTrace: ExecutionTraceSchema.optional()
1808
1854
  });
1809
1855
  var EvalRunSchema = TenantEntitySchema.extend({
1810
1856
  /** Agent ID for this run */
1811
- agentId: z27.string().optional(),
1857
+ agentId: z28.string().optional(),
1812
1858
  /** Skills group ID for this run */
1813
- skillsGroupId: z27.string().optional(),
1859
+ skillsGroupId: z28.string().optional(),
1814
1860
  /** Map of skillId to skillVersionId for this run */
1815
- skillVersions: z27.record(z27.string(), z27.string()).optional(),
1861
+ skillVersions: z28.record(z28.string(), z28.string()).optional(),
1816
1862
  /** Scenario IDs to run */
1817
- scenarioIds: z27.array(z27.string()),
1863
+ scenarioIds: z28.array(z28.string()),
1818
1864
  /** Current status */
1819
1865
  status: EvalStatusSchema,
1820
1866
  /** Progress percentage (0-100) */
1821
- progress: z27.number(),
1867
+ progress: z28.number(),
1822
1868
  /** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
1823
- results: z27.array(z27.lazy(() => EvalRunResultSchema)),
1869
+ results: z28.array(z28.lazy(() => EvalRunResultSchema)),
1824
1870
  /** Aggregated metrics across all results */
1825
1871
  aggregateMetrics: EvalMetricsSchema,
1826
1872
  /** Failure analyses */
1827
- failureAnalyses: z27.array(FailureAnalysisSchema).optional(),
1873
+ failureAnalyses: z28.array(FailureAnalysisSchema).optional(),
1828
1874
  /** Aggregated LLM trace summary */
1829
1875
  llmTraceSummary: LLMTraceSummarySchema.optional(),
1830
1876
  /** What triggered this run */
1831
1877
  trigger: TriggerSchema.optional(),
1832
1878
  /** When the run started (set when evaluation is triggered) */
1833
- startedAt: z27.string().optional(),
1879
+ startedAt: z28.string().optional(),
1834
1880
  /** When the run completed */
1835
- completedAt: z27.string().optional(),
1881
+ completedAt: z28.string().optional(),
1836
1882
  /** Live trace events captured during execution (for playback on results page) */
1837
- liveTraceEvents: z27.array(LiveTraceEventSchema).optional(),
1883
+ liveTraceEvents: z28.array(LiveTraceEventSchema).optional(),
1838
1884
  /** Remote job ID for tracking execution in Dev Machines */
1839
- jobId: z27.string().optional(),
1885
+ jobId: z28.string().optional(),
1840
1886
  /** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
1841
- jobStatus: z27.string().optional(),
1887
+ jobStatus: z28.string().optional(),
1842
1888
  /** Remote job error message if the job failed */
1843
- jobError: z27.string().optional(),
1889
+ jobError: z28.string().optional(),
1844
1890
  /** Timestamp of the last job status check */
1845
- jobStatusCheckedAt: z27.string().optional(),
1891
+ jobStatusCheckedAt: z28.string().optional(),
1846
1892
  /** MCP server IDs to enable for this run (optional) */
1847
- mcpIds: z27.array(z27.string()).optional(),
1893
+ mcpIds: z28.array(z28.string()).optional(),
1848
1894
  /** Sub-agent IDs to enable for this run (optional) */
1849
- subAgentIds: z27.array(z27.string()).optional()
1895
+ subAgentIds: z28.array(z28.string()).optional(),
1896
+ /** Rule IDs to enable for this run (optional) */
1897
+ ruleIds: z28.array(z28.string()).optional()
1850
1898
  });
1851
1899
  var CreateEvalRunInputSchema = EvalRunSchema.omit({
1852
1900
  id: true,
@@ -1859,28 +1907,28 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
1859
1907
  startedAt: true,
1860
1908
  completedAt: true
1861
1909
  });
1862
- var EvaluationProgressSchema = z27.object({
1863
- runId: z27.string(),
1864
- targetId: z27.string(),
1865
- totalScenarios: z27.number(),
1866
- completedScenarios: z27.number(),
1867
- scenarioProgress: z27.array(
1868
- z27.object({
1869
- scenarioId: z27.string(),
1870
- currentStep: z27.string(),
1871
- error: z27.string().optional()
1910
+ var EvaluationProgressSchema = z28.object({
1911
+ runId: z28.string(),
1912
+ targetId: z28.string(),
1913
+ totalScenarios: z28.number(),
1914
+ completedScenarios: z28.number(),
1915
+ scenarioProgress: z28.array(
1916
+ z28.object({
1917
+ scenarioId: z28.string(),
1918
+ currentStep: z28.string(),
1919
+ error: z28.string().optional()
1872
1920
  })
1873
1921
  ),
1874
- createdAt: z27.number()
1922
+ createdAt: z28.number()
1875
1923
  });
1876
- var EvaluationLogSchema = z27.object({
1877
- runId: z27.string(),
1878
- scenarioId: z27.string(),
1879
- log: z27.object({
1880
- level: z27.enum(["info", "error", "debug"]),
1881
- message: z27.string().optional(),
1882
- args: z27.array(z27.any()).optional(),
1883
- error: z27.string().optional()
1924
+ var EvaluationLogSchema = z28.object({
1925
+ runId: z28.string(),
1926
+ scenarioId: z28.string(),
1927
+ log: z28.object({
1928
+ level: z28.enum(["info", "error", "debug"]),
1929
+ message: z28.string().optional(),
1930
+ args: z28.array(z28.any()).optional(),
1931
+ error: z28.string().optional()
1884
1932
  })
1885
1933
  });
1886
1934
  var LLM_TIMEOUT = 12e4;
@@ -1893,95 +1941,95 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
1893
1941
  AssertionResultStatus2["ERROR"] = "error";
1894
1942
  return AssertionResultStatus2;
1895
1943
  })(AssertionResultStatus || {});
1896
- var AssertionResultSchema = z28.object({
1897
- id: z28.string(),
1898
- assertionId: z28.string(),
1899
- assertionType: z28.string(),
1900
- assertionName: z28.string(),
1901
- status: z28.enum(AssertionResultStatus),
1902
- message: z28.string().optional(),
1903
- expected: z28.string().optional(),
1904
- actual: z28.string().optional(),
1905
- duration: z28.number().optional(),
1906
- details: z28.record(z28.string(), z28.unknown()).optional(),
1907
- llmTraceSteps: z28.array(LLMTraceStepSchema).optional()
1908
- });
1909
- var EvalRunResultSchema = z28.object({
1910
- id: z28.string(),
1911
- targetId: z28.string(),
1912
- targetName: z28.string().optional(),
1944
+ var AssertionResultSchema = z29.object({
1945
+ id: z29.string(),
1946
+ assertionId: z29.string(),
1947
+ assertionType: z29.string(),
1948
+ assertionName: z29.string(),
1949
+ status: z29.enum(AssertionResultStatus),
1950
+ message: z29.string().optional(),
1951
+ expected: z29.string().optional(),
1952
+ actual: z29.string().optional(),
1953
+ duration: z29.number().optional(),
1954
+ details: z29.record(z29.string(), z29.unknown()).optional(),
1955
+ llmTraceSteps: z29.array(LLMTraceStepSchema).optional()
1956
+ });
1957
+ var EvalRunResultSchema = z29.object({
1958
+ id: z29.string(),
1959
+ targetId: z29.string(),
1960
+ targetName: z29.string().optional(),
1913
1961
  /** SkillVersion ID used for this evaluation (for version tracking) */
1914
- skillVersionId: z28.string().optional(),
1962
+ skillVersionId: z29.string().optional(),
1915
1963
  /** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
1916
- skillVersion: z28.string().optional(),
1917
- scenarioId: z28.string(),
1918
- scenarioName: z28.string(),
1964
+ skillVersion: z29.string().optional(),
1965
+ scenarioId: z29.string(),
1966
+ scenarioName: z29.string(),
1919
1967
  modelConfig: ModelConfigSchema.optional(),
1920
- assertionResults: z28.array(AssertionResultSchema),
1968
+ assertionResults: z29.array(AssertionResultSchema),
1921
1969
  metrics: EvalMetricsSchema.optional(),
1922
- passed: z28.number(),
1923
- failed: z28.number(),
1924
- passRate: z28.number(),
1925
- duration: z28.number(),
1926
- outputText: z28.string().optional(),
1927
- files: z28.array(ExpectedFileSchema).optional(),
1928
- fileDiffs: z28.array(DiffContentSchema).optional(),
1970
+ passed: z29.number(),
1971
+ failed: z29.number(),
1972
+ passRate: z29.number(),
1973
+ duration: z29.number(),
1974
+ outputText: z29.string().optional(),
1975
+ files: z29.array(ExpectedFileSchema).optional(),
1976
+ fileDiffs: z29.array(DiffContentSchema).optional(),
1929
1977
  /** Full template files after execution with status indicators */
1930
- templateFiles: z28.array(TemplateFileSchema).optional(),
1931
- startedAt: z28.string().optional(),
1932
- completedAt: z28.string().optional(),
1978
+ templateFiles: z29.array(TemplateFileSchema).optional(),
1979
+ startedAt: z29.string().optional(),
1980
+ completedAt: z29.string().optional(),
1933
1981
  llmTrace: LLMTraceSchema.optional()
1934
1982
  });
1935
- var PromptResultSchema = z28.object({
1936
- text: z28.string(),
1937
- files: z28.array(z28.unknown()).optional(),
1938
- finishReason: z28.string().optional(),
1939
- reasoning: z28.string().optional(),
1940
- reasoningDetails: z28.unknown().optional(),
1941
- toolCalls: z28.array(z28.unknown()).optional(),
1942
- toolResults: z28.array(z28.unknown()).optional(),
1943
- warnings: z28.array(z28.unknown()).optional(),
1944
- sources: z28.array(z28.unknown()).optional(),
1945
- steps: z28.array(z28.unknown()),
1946
- generationTimeMs: z28.number(),
1947
- prompt: z28.string(),
1948
- systemPrompt: z28.string(),
1949
- usage: z28.object({
1950
- totalTokens: z28.number().optional(),
1951
- totalMicrocentsSpent: z28.number().optional()
1983
+ var PromptResultSchema = z29.object({
1984
+ text: z29.string(),
1985
+ files: z29.array(z29.unknown()).optional(),
1986
+ finishReason: z29.string().optional(),
1987
+ reasoning: z29.string().optional(),
1988
+ reasoningDetails: z29.unknown().optional(),
1989
+ toolCalls: z29.array(z29.unknown()).optional(),
1990
+ toolResults: z29.array(z29.unknown()).optional(),
1991
+ warnings: z29.array(z29.unknown()).optional(),
1992
+ sources: z29.array(z29.unknown()).optional(),
1993
+ steps: z29.array(z29.unknown()),
1994
+ generationTimeMs: z29.number(),
1995
+ prompt: z29.string(),
1996
+ systemPrompt: z29.string(),
1997
+ usage: z29.object({
1998
+ totalTokens: z29.number().optional(),
1999
+ totalMicrocentsSpent: z29.number().optional()
1952
2000
  })
1953
2001
  });
1954
- var EvaluationResultSchema = z28.object({
1955
- id: z28.string(),
1956
- runId: z28.string(),
1957
- timestamp: z28.number(),
2002
+ var EvaluationResultSchema = z29.object({
2003
+ id: z29.string(),
2004
+ runId: z29.string(),
2005
+ timestamp: z29.number(),
1958
2006
  promptResult: PromptResultSchema,
1959
- testResults: z28.array(z28.unknown()),
1960
- tags: z28.array(z28.string()).optional(),
1961
- feedback: z28.string().optional(),
1962
- score: z28.number(),
1963
- suiteId: z28.string().optional()
1964
- });
1965
- var LeanEvaluationResultSchema = z28.object({
1966
- id: z28.string(),
1967
- runId: z28.string(),
1968
- timestamp: z28.number(),
1969
- tags: z28.array(z28.string()).optional(),
1970
- scenarioId: z28.string(),
1971
- scenarioVersion: z28.number().optional(),
1972
- targetId: z28.string(),
1973
- targetVersion: z28.number().optional(),
1974
- suiteId: z28.string().optional(),
1975
- score: z28.number(),
1976
- time: z28.number().optional(),
1977
- microcentsSpent: z28.number().optional()
2007
+ testResults: z29.array(z29.unknown()),
2008
+ tags: z29.array(z29.string()).optional(),
2009
+ feedback: z29.string().optional(),
2010
+ score: z29.number(),
2011
+ suiteId: z29.string().optional()
2012
+ });
2013
+ var LeanEvaluationResultSchema = z29.object({
2014
+ id: z29.string(),
2015
+ runId: z29.string(),
2016
+ timestamp: z29.number(),
2017
+ tags: z29.array(z29.string()).optional(),
2018
+ scenarioId: z29.string(),
2019
+ scenarioVersion: z29.number().optional(),
2020
+ targetId: z29.string(),
2021
+ targetVersion: z29.number().optional(),
2022
+ suiteId: z29.string().optional(),
2023
+ score: z29.number(),
2024
+ time: z29.number().optional(),
2025
+ microcentsSpent: z29.number().optional()
1978
2026
  });
1979
2027
 
1980
2028
  // src/project/project.ts
1981
- import { z as z29 } from "zod";
2029
+ import { z as z30 } from "zod";
1982
2030
  var ProjectSchema = BaseEntitySchema.extend({
1983
- appId: z29.string().optional().describe("The ID of the app in Dev Center"),
1984
- appSecret: z29.string().optional().describe("The secret of the app in Dev Center")
2031
+ appId: z30.string().optional().describe("The ID of the app in Dev Center"),
2032
+ appSecret: z30.string().optional().describe("The secret of the app in Dev Center")
1985
2033
  });
1986
2034
  var CreateProjectInputSchema = ProjectSchema.omit({
1987
2035
  id: true,
@@ -2007,6 +2055,7 @@ var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
2007
2055
  // src/assertion/system-assertions.ts
2008
2056
  var SYSTEM_ASSERTION_IDS = {
2009
2057
  SKILL_WAS_CALLED: "system:skill_was_called",
2058
+ TOOL_CALLED_WITH_PARAM: "system:tool_called_with_param",
2010
2059
  BUILD_PASSED: "system:build_passed",
2011
2060
  TIME_LIMIT: "system:time_limit",
2012
2061
  COST: "system:cost",
@@ -2030,6 +2079,26 @@ var SYSTEM_ASSERTIONS = {
2030
2079
  }
2031
2080
  ]
2032
2081
  },
2082
+ [SYSTEM_ASSERTION_IDS.TOOL_CALLED_WITH_PARAM]: {
2083
+ id: SYSTEM_ASSERTION_IDS.TOOL_CALLED_WITH_PARAM,
2084
+ name: "Tool Called With Param",
2085
+ description: "Check that a tool was called with expected parameters",
2086
+ type: "tool_called_with_param",
2087
+ parameters: [
2088
+ {
2089
+ name: "toolName",
2090
+ label: "Tool Name",
2091
+ type: "string",
2092
+ required: true
2093
+ },
2094
+ {
2095
+ name: "expectedParams",
2096
+ label: "Expected Parameters (JSON, substring match)",
2097
+ type: "string",
2098
+ required: true
2099
+ }
2100
+ ]
2101
+ },
2033
2102
  [SYSTEM_ASSERTION_IDS.BUILD_PASSED]: {
2034
2103
  id: SYSTEM_ASSERTION_IDS.BUILD_PASSED,
2035
2104
  name: "Build Passed",
@@ -2148,6 +2217,7 @@ var export_ClaudeModel = import_types.ClaudeModel;
2148
2217
  export {
2149
2218
  AVAILABLE_MODEL_IDS,
2150
2219
  AVAILABLE_RUN_COMMANDS,
2220
+ AVAILABLE_TOOL_NAMES,
2151
2221
  AgentRunCommand,
2152
2222
  AgentRunCommandSchema,
2153
2223
  AgentSchema,
@@ -2176,6 +2246,7 @@ export {
2176
2246
  CreateEvalRunInputSchema,
2177
2247
  CreateMcpInputSchema,
2178
2248
  CreateProjectInputSchema,
2249
+ CreateRuleInputSchema,
2179
2250
  CreateSkillInputSchema,
2180
2251
  CreateSkillVersionInputSchema,
2181
2252
  CreateSkillsGroupInputSchema,
@@ -2230,6 +2301,8 @@ export {
2230
2301
  ProjectSchema,
2231
2302
  PromptResultSchema,
2232
2303
  RUN_COMMAND_LABELS,
2304
+ RuleSchema,
2305
+ RuleTypeSchema,
2233
2306
  SEMVER_REGEX,
2234
2307
  SKILL_FOLDER_NAME_REGEX,
2235
2308
  SYSTEM_ASSERTIONS,
@@ -2262,6 +2335,8 @@ export {
2262
2335
  TimeAssertionSchema,
2263
2336
  TimeConfigSchema,
2264
2337
  TokenUsageSchema,
2338
+ ToolCalledWithParamAssertionSchema,
2339
+ ToolCalledWithParamConfigSchema,
2265
2340
  ToolTestSchema,
2266
2341
  TriggerMetadataSchema,
2267
2342
  TriggerSchema,
@@ -2270,6 +2345,7 @@ export {
2270
2345
  UpdateCustomAssertionInputSchema,
2271
2346
  UpdateMcpInputSchema,
2272
2347
  UpdateProjectInputSchema,
2348
+ UpdateRuleInputSchema,
2273
2349
  UpdateSkillInputSchema,
2274
2350
  UpdateSkillsGroupInputSchema,
2275
2351
  UpdateSubAgentInputSchema,