@wix/evalforge-types 0.65.0 → 0.67.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -43,8 +43,15 @@ __export(index_exports, {
43
43
  AssertionResultStatus: () => AssertionResultStatus,
44
44
  AssertionSchema: () => AssertionSchema,
45
45
  AssertionTypeSchema: () => AssertionTypeSchema,
46
+ BATCH_IMPORT_LIMITS: () => BATCH_IMPORT_LIMITS,
46
47
  BaseEntitySchema: () => BaseEntitySchema,
47
48
  BaseTestSchema: () => BaseTestSchema,
49
+ BatchAssertionLinkSchema: () => BatchAssertionLinkSchema,
50
+ BatchImportPayloadSchema: () => BatchImportPayloadSchema,
51
+ BatchImportResponseSchema: () => BatchImportResponseSchema,
52
+ BatchResultItemSchema: () => BatchResultItemSchema,
53
+ BatchScenarioEntrySchema: () => BatchScenarioEntrySchema,
54
+ BatchSummarySchema: () => BatchSummarySchema,
48
55
  BuildCheckTestSchema: () => BuildCheckTestSchema,
49
56
  BuildPassedAssertionSchema: () => BuildPassedAssertionSchema,
50
57
  BuildPassedConfigSchema: () => BuildPassedConfigSchema,
@@ -187,11 +194,13 @@ __export(index_exports, {
187
194
  UpdateTestScenarioInputSchema: () => UpdateTestScenarioInputSchema,
188
195
  UpdateTestSuiteInputSchema: () => UpdateTestSuiteInputSchema,
189
196
  VitestTestSchema: () => VitestTestSchema,
197
+ classifyAssertionRef: () => classifyAssertionRef,
190
198
  formatTraceEventLine: () => formatTraceEventLine,
191
199
  getSystemAssertion: () => getSystemAssertion,
192
200
  getSystemAssertions: () => getSystemAssertions,
193
201
  isSystemAssertionId: () => isSystemAssertionId,
194
202
  isValidSkillFolderName: () => isValidSkillFolderName,
203
+ normalizeBatchAssertionLink: () => normalizeBatchAssertionLink,
195
204
  normalizeModelId: () => normalizeModelId,
196
205
  parseTraceEventLine: () => parseTraceEventLine,
197
206
  validateAssertionConfig: () => validateAssertionConfig
@@ -997,11 +1006,67 @@ var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
997
1006
  });
998
1007
  var UpdateTestScenarioInputSchema = CreateTestScenarioInputSchema.partial();
999
1008
 
1000
- // src/suite/test-suite.ts
1009
+ // src/scenario/batch-import.ts
1001
1010
  var import_zod24 = require("zod");
1011
+ var UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
1012
+ var BatchAssertionLinkSchema = import_zod24.z.union([
1013
+ import_zod24.z.string().min(1),
1014
+ ScenarioAssertionLinkSchema
1015
+ ]);
1016
+ var BatchScenarioEntrySchema = import_zod24.z.object({
1017
+ name: import_zod24.z.string().min(1, "name: Required"),
1018
+ description: import_zod24.z.string().optional().default(""),
1019
+ triggerPrompt: import_zod24.z.string().min(10, "triggerPrompt: Must be at least 10 characters"),
1020
+ templateId: import_zod24.z.string().nullish(),
1021
+ tags: import_zod24.z.array(import_zod24.z.string()).optional(),
1022
+ assertionLinks: import_zod24.z.array(BatchAssertionLinkSchema).optional()
1023
+ });
1024
+ var BatchImportPayloadSchema = import_zod24.z.object({
1025
+ scenarios: import_zod24.z.array(BatchScenarioEntrySchema).min(1, "scenarios array must contain at least one entry").max(100, "Maximum 100 scenarios per upload")
1026
+ });
1027
+ var BATCH_IMPORT_LIMITS = {
1028
+ MAX_SCENARIOS: 100,
1029
+ MAX_PAYLOAD_BYTES: 1048576
1030
+ // 1 MB
1031
+ };
1032
+ function classifyAssertionRef(ref) {
1033
+ if (ref.startsWith("system:")) {
1034
+ return { type: "system", value: ref };
1035
+ }
1036
+ if (UUID_REGEX.test(ref)) {
1037
+ return { type: "uuid", value: ref };
1038
+ }
1039
+ return { type: "name", value: ref };
1040
+ }
1041
+ function normalizeBatchAssertionLink(link) {
1042
+ if (typeof link === "string") {
1043
+ return { assertionId: link };
1044
+ }
1045
+ return link;
1046
+ }
1047
+ var BatchResultItemSchema = import_zod24.z.object({
1048
+ index: import_zod24.z.number(),
1049
+ name: import_zod24.z.string(),
1050
+ status: import_zod24.z.enum(["valid", "invalid"]),
1051
+ id: import_zod24.z.string().nullable().optional(),
1052
+ errors: import_zod24.z.array(import_zod24.z.string()).optional()
1053
+ });
1054
+ var BatchSummarySchema = import_zod24.z.object({
1055
+ total: import_zod24.z.number(),
1056
+ valid: import_zod24.z.number(),
1057
+ invalid: import_zod24.z.number(),
1058
+ created: import_zod24.z.number()
1059
+ });
1060
+ var BatchImportResponseSchema = import_zod24.z.object({
1061
+ summary: BatchSummarySchema,
1062
+ results: import_zod24.z.array(BatchResultItemSchema)
1063
+ });
1064
+
1065
+ // src/suite/test-suite.ts
1066
+ var import_zod25 = require("zod");
1002
1067
  var TestSuiteSchema = TenantEntitySchema.extend({
1003
1068
  /** IDs of test scenarios in this suite */
1004
- scenarioIds: import_zod24.z.array(import_zod24.z.string())
1069
+ scenarioIds: import_zod25.z.array(import_zod25.z.string())
1005
1070
  });
1006
1071
  var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
1007
1072
  id: true,
@@ -1012,21 +1077,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
1012
1077
  var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
1013
1078
 
1014
1079
  // src/evaluation/metrics.ts
1015
- var import_zod25 = require("zod");
1016
- var TokenUsageSchema = import_zod25.z.object({
1017
- prompt: import_zod25.z.number(),
1018
- completion: import_zod25.z.number(),
1019
- total: import_zod25.z.number()
1020
- });
1021
- var EvalMetricsSchema = import_zod25.z.object({
1022
- totalAssertions: import_zod25.z.number(),
1023
- passed: import_zod25.z.number(),
1024
- failed: import_zod25.z.number(),
1025
- skipped: import_zod25.z.number(),
1026
- errors: import_zod25.z.number(),
1027
- passRate: import_zod25.z.number(),
1028
- avgDuration: import_zod25.z.number(),
1029
- totalDuration: import_zod25.z.number()
1080
+ var import_zod26 = require("zod");
1081
+ var TokenUsageSchema = import_zod26.z.object({
1082
+ prompt: import_zod26.z.number(),
1083
+ completion: import_zod26.z.number(),
1084
+ total: import_zod26.z.number()
1085
+ });
1086
+ var EvalMetricsSchema = import_zod26.z.object({
1087
+ totalAssertions: import_zod26.z.number(),
1088
+ passed: import_zod26.z.number(),
1089
+ failed: import_zod26.z.number(),
1090
+ skipped: import_zod26.z.number(),
1091
+ errors: import_zod26.z.number(),
1092
+ passRate: import_zod26.z.number(),
1093
+ avgDuration: import_zod26.z.number(),
1094
+ totalDuration: import_zod26.z.number()
1030
1095
  });
1031
1096
  var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
1032
1097
  EvalStatus2["PENDING"] = "pending";
@@ -1036,7 +1101,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
1036
1101
  EvalStatus2["CANCELLED"] = "cancelled";
1037
1102
  return EvalStatus2;
1038
1103
  })(EvalStatus || {});
1039
- var EvalStatusSchema = import_zod25.z.enum(EvalStatus);
1104
+ var EvalStatusSchema = import_zod26.z.enum(EvalStatus);
1040
1105
  var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
1041
1106
  LLMStepType2["COMPLETION"] = "completion";
1042
1107
  LLMStepType2["TOOL_USE"] = "tool_use";
@@ -1044,54 +1109,54 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
1044
1109
  LLMStepType2["THINKING"] = "thinking";
1045
1110
  return LLMStepType2;
1046
1111
  })(LLMStepType || {});
1047
- var LLMTraceStepSchema = import_zod25.z.object({
1048
- id: import_zod25.z.string(),
1049
- stepNumber: import_zod25.z.number(),
1050
- type: import_zod25.z.enum(LLMStepType),
1051
- model: import_zod25.z.string(),
1052
- provider: import_zod25.z.string(),
1053
- startedAt: import_zod25.z.string(),
1054
- durationMs: import_zod25.z.number(),
1112
+ var LLMTraceStepSchema = import_zod26.z.object({
1113
+ id: import_zod26.z.string(),
1114
+ stepNumber: import_zod26.z.number(),
1115
+ type: import_zod26.z.enum(LLMStepType),
1116
+ model: import_zod26.z.string(),
1117
+ provider: import_zod26.z.string(),
1118
+ startedAt: import_zod26.z.string(),
1119
+ durationMs: import_zod26.z.number(),
1055
1120
  tokenUsage: TokenUsageSchema,
1056
- costUsd: import_zod25.z.number(),
1057
- toolName: import_zod25.z.string().optional(),
1058
- toolArguments: import_zod25.z.string().optional(),
1059
- inputPreview: import_zod25.z.string().optional(),
1060
- outputPreview: import_zod25.z.string().optional(),
1061
- success: import_zod25.z.boolean(),
1062
- error: import_zod25.z.string().optional(),
1063
- turnIndex: import_zod25.z.number().optional()
1064
- });
1065
- var LLMBreakdownStatsSchema = import_zod25.z.object({
1066
- count: import_zod25.z.number(),
1067
- durationMs: import_zod25.z.number(),
1068
- tokens: import_zod25.z.number(),
1069
- costUsd: import_zod25.z.number()
1070
- });
1071
- var LLMTraceSummarySchema = import_zod25.z.object({
1072
- totalSteps: import_zod25.z.number(),
1073
- totalTurns: import_zod25.z.number().optional(),
1074
- totalDurationMs: import_zod25.z.number(),
1121
+ costUsd: import_zod26.z.number(),
1122
+ toolName: import_zod26.z.string().optional(),
1123
+ toolArguments: import_zod26.z.string().optional(),
1124
+ inputPreview: import_zod26.z.string().optional(),
1125
+ outputPreview: import_zod26.z.string().optional(),
1126
+ success: import_zod26.z.boolean(),
1127
+ error: import_zod26.z.string().optional(),
1128
+ turnIndex: import_zod26.z.number().optional()
1129
+ });
1130
+ var LLMBreakdownStatsSchema = import_zod26.z.object({
1131
+ count: import_zod26.z.number(),
1132
+ durationMs: import_zod26.z.number(),
1133
+ tokens: import_zod26.z.number(),
1134
+ costUsd: import_zod26.z.number()
1135
+ });
1136
+ var LLMTraceSummarySchema = import_zod26.z.object({
1137
+ totalSteps: import_zod26.z.number(),
1138
+ totalTurns: import_zod26.z.number().optional(),
1139
+ totalDurationMs: import_zod26.z.number(),
1075
1140
  totalTokens: TokenUsageSchema,
1076
- totalCostUsd: import_zod25.z.number(),
1077
- stepTypeBreakdown: import_zod25.z.record(import_zod25.z.string(), LLMBreakdownStatsSchema).optional(),
1078
- modelBreakdown: import_zod25.z.record(import_zod25.z.string(), LLMBreakdownStatsSchema),
1079
- modelsUsed: import_zod25.z.array(import_zod25.z.string())
1080
- });
1081
- var LLMTraceSchema = import_zod25.z.object({
1082
- id: import_zod25.z.string(),
1083
- steps: import_zod25.z.array(LLMTraceStepSchema),
1141
+ totalCostUsd: import_zod26.z.number(),
1142
+ stepTypeBreakdown: import_zod26.z.record(import_zod26.z.string(), LLMBreakdownStatsSchema).optional(),
1143
+ modelBreakdown: import_zod26.z.record(import_zod26.z.string(), LLMBreakdownStatsSchema),
1144
+ modelsUsed: import_zod26.z.array(import_zod26.z.string())
1145
+ });
1146
+ var LLMTraceSchema = import_zod26.z.object({
1147
+ id: import_zod26.z.string(),
1148
+ steps: import_zod26.z.array(LLMTraceStepSchema),
1084
1149
  summary: LLMTraceSummarySchema
1085
1150
  });
1086
1151
 
1087
1152
  // src/evaluation/eval-result.ts
1088
- var import_zod29 = require("zod");
1153
+ var import_zod30 = require("zod");
1089
1154
 
1090
1155
  // src/evaluation/eval-run.ts
1091
- var import_zod27 = require("zod");
1156
+ var import_zod28 = require("zod");
1092
1157
 
1093
1158
  // src/evaluation/live-trace.ts
1094
- var import_zod26 = require("zod");
1159
+ var import_zod27 = require("zod");
1095
1160
  var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
1096
1161
  LiveTraceEventType2["THINKING"] = "thinking";
1097
1162
  LiveTraceEventType2["TOOL_USE"] = "tool_use";
@@ -1105,37 +1170,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
1105
1170
  LiveTraceEventType2["USER"] = "user";
1106
1171
  return LiveTraceEventType2;
1107
1172
  })(LiveTraceEventType || {});
1108
- var LiveTraceEventSchema = import_zod26.z.object({
1173
+ var LiveTraceEventSchema = import_zod27.z.object({
1109
1174
  /** The evaluation run ID */
1110
- evalRunId: import_zod26.z.string(),
1175
+ evalRunId: import_zod27.z.string(),
1111
1176
  /** The scenario ID being executed */
1112
- scenarioId: import_zod26.z.string(),
1177
+ scenarioId: import_zod27.z.string(),
1113
1178
  /** The scenario name for display */
1114
- scenarioName: import_zod26.z.string(),
1179
+ scenarioName: import_zod27.z.string(),
1115
1180
  /** The target ID (skill, agent, etc.) */
1116
- targetId: import_zod26.z.string(),
1181
+ targetId: import_zod27.z.string(),
1117
1182
  /** The target name for display */
1118
- targetName: import_zod26.z.string(),
1183
+ targetName: import_zod27.z.string(),
1119
1184
  /** Step number in the current scenario execution */
1120
- stepNumber: import_zod26.z.number(),
1185
+ stepNumber: import_zod27.z.number(),
1121
1186
  /** Type of trace event */
1122
- type: import_zod26.z.enum(LiveTraceEventType),
1187
+ type: import_zod27.z.enum(LiveTraceEventType),
1123
1188
  /** Tool name if this is a tool_use event */
1124
- toolName: import_zod26.z.string().optional(),
1189
+ toolName: import_zod27.z.string().optional(),
1125
1190
  /** Tool arguments preview (truncated JSON) */
1126
- toolArgs: import_zod26.z.string().optional(),
1191
+ toolArgs: import_zod27.z.string().optional(),
1127
1192
  /** Output preview (truncated text) */
1128
- outputPreview: import_zod26.z.string().optional(),
1193
+ outputPreview: import_zod27.z.string().optional(),
1129
1194
  /** File path for file operations */
1130
- filePath: import_zod26.z.string().optional(),
1195
+ filePath: import_zod27.z.string().optional(),
1131
1196
  /** Elapsed time in milliseconds for progress events */
1132
- elapsedMs: import_zod26.z.number().optional(),
1197
+ elapsedMs: import_zod27.z.number().optional(),
1133
1198
  /** Thinking/reasoning text from Claude */
1134
- thinking: import_zod26.z.string().optional(),
1199
+ thinking: import_zod27.z.string().optional(),
1135
1200
  /** Timestamp when this event occurred */
1136
- timestamp: import_zod26.z.string(),
1201
+ timestamp: import_zod27.z.string(),
1137
1202
  /** Whether this is the final event for this scenario */
1138
- isComplete: import_zod26.z.boolean()
1203
+ isComplete: import_zod27.z.boolean()
1139
1204
  });
1140
1205
  var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
1141
1206
  function parseTraceEventLine(line) {
@@ -1164,15 +1229,15 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
1164
1229
  TriggerType2["SCHEDULED"] = "SCHEDULED";
1165
1230
  return TriggerType2;
1166
1231
  })(TriggerType || {});
1167
- var TriggerMetadataSchema = import_zod27.z.object({
1168
- version: import_zod27.z.string().optional(),
1169
- resourceUpdated: import_zod27.z.array(import_zod27.z.string()).optional(),
1170
- scheduleId: import_zod27.z.string().optional()
1232
+ var TriggerMetadataSchema = import_zod28.z.object({
1233
+ version: import_zod28.z.string().optional(),
1234
+ resourceUpdated: import_zod28.z.array(import_zod28.z.string()).optional(),
1235
+ scheduleId: import_zod28.z.string().optional()
1171
1236
  });
1172
- var TriggerSchema = import_zod27.z.object({
1173
- id: import_zod27.z.string(),
1237
+ var TriggerSchema = import_zod28.z.object({
1238
+ id: import_zod28.z.string(),
1174
1239
  metadata: TriggerMetadataSchema.optional(),
1175
- type: import_zod27.z.nativeEnum(TriggerType)
1240
+ type: import_zod28.z.nativeEnum(TriggerType)
1176
1241
  });
1177
1242
  var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
1178
1243
  FailureCategory2["MISSING_FILE"] = "missing_file";
@@ -1190,30 +1255,30 @@ var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
1190
1255
  FailureSeverity2["LOW"] = "low";
1191
1256
  return FailureSeverity2;
1192
1257
  })(FailureSeverity || {});
1193
- var DiffLineTypeSchema = import_zod27.z.enum(["added", "removed", "unchanged"]);
1194
- var DiffLineSchema = import_zod27.z.object({
1258
+ var DiffLineTypeSchema = import_zod28.z.enum(["added", "removed", "unchanged"]);
1259
+ var DiffLineSchema = import_zod28.z.object({
1195
1260
  type: DiffLineTypeSchema,
1196
- content: import_zod27.z.string(),
1197
- lineNumber: import_zod27.z.number()
1198
- });
1199
- var DiffContentSchema = import_zod27.z.object({
1200
- path: import_zod27.z.string(),
1201
- expected: import_zod27.z.string(),
1202
- actual: import_zod27.z.string(),
1203
- diffLines: import_zod27.z.array(DiffLineSchema),
1204
- renamedFrom: import_zod27.z.string().optional(),
1261
+ content: import_zod28.z.string(),
1262
+ lineNumber: import_zod28.z.number()
1263
+ });
1264
+ var DiffContentSchema = import_zod28.z.object({
1265
+ path: import_zod28.z.string(),
1266
+ expected: import_zod28.z.string(),
1267
+ actual: import_zod28.z.string(),
1268
+ diffLines: import_zod28.z.array(DiffLineSchema),
1269
+ renamedFrom: import_zod28.z.string().optional(),
1205
1270
  /** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
1206
- isInfrastructure: import_zod27.z.boolean().optional()
1271
+ isInfrastructure: import_zod28.z.boolean().optional()
1207
1272
  });
1208
- var CommandExecutionSchema = import_zod27.z.object({
1209
- command: import_zod27.z.string(),
1210
- exitCode: import_zod27.z.number(),
1211
- output: import_zod27.z.string().optional(),
1212
- duration: import_zod27.z.number()
1273
+ var CommandExecutionSchema = import_zod28.z.object({
1274
+ command: import_zod28.z.string(),
1275
+ exitCode: import_zod28.z.number(),
1276
+ output: import_zod28.z.string().optional(),
1277
+ duration: import_zod28.z.number()
1213
1278
  });
1214
- var FileModificationSchema = import_zod27.z.object({
1215
- path: import_zod27.z.string(),
1216
- action: import_zod27.z.enum(["created", "modified", "deleted"])
1279
+ var FileModificationSchema = import_zod28.z.object({
1280
+ path: import_zod28.z.string(),
1281
+ action: import_zod28.z.enum(["created", "modified", "deleted"])
1217
1282
  });
1218
1283
  var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
1219
1284
  TemplateFileStatus2["NEW"] = "new";
@@ -1221,89 +1286,91 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
1221
1286
  TemplateFileStatus2["UNCHANGED"] = "unchanged";
1222
1287
  return TemplateFileStatus2;
1223
1288
  })(TemplateFileStatus || {});
1224
- var TemplateFileSchema = import_zod27.z.object({
1289
+ var TemplateFileSchema = import_zod28.z.object({
1225
1290
  /** Relative path within the template */
1226
- path: import_zod27.z.string(),
1291
+ path: import_zod28.z.string(),
1227
1292
  /** Full file content after execution */
1228
- content: import_zod27.z.string(),
1293
+ content: import_zod28.z.string(),
1229
1294
  /** File status (new, modified, unchanged) */
1230
- status: import_zod27.z.enum(["new", "modified", "unchanged"]),
1295
+ status: import_zod28.z.enum(["new", "modified", "unchanged"]),
1231
1296
  /** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
1232
- isInfrastructure: import_zod27.z.boolean().optional()
1233
- });
1234
- var ApiCallSchema = import_zod27.z.object({
1235
- endpoint: import_zod27.z.string(),
1236
- tokensUsed: import_zod27.z.number(),
1237
- duration: import_zod27.z.number()
1238
- });
1239
- var ExecutionTraceSchema = import_zod27.z.object({
1240
- commands: import_zod27.z.array(CommandExecutionSchema),
1241
- filesModified: import_zod27.z.array(FileModificationSchema),
1242
- apiCalls: import_zod27.z.array(ApiCallSchema),
1243
- totalDuration: import_zod27.z.number()
1244
- });
1245
- var FailureAnalysisSchema = import_zod27.z.object({
1246
- category: import_zod27.z.enum(FailureCategory),
1247
- severity: import_zod27.z.enum(FailureSeverity),
1248
- summary: import_zod27.z.string(),
1249
- details: import_zod27.z.string(),
1250
- rootCause: import_zod27.z.string(),
1251
- suggestedFix: import_zod27.z.string(),
1252
- relatedAssertions: import_zod27.z.array(import_zod27.z.string()),
1253
- codeSnippet: import_zod27.z.string().optional(),
1254
- similarIssues: import_zod27.z.array(import_zod27.z.string()).optional(),
1255
- patternId: import_zod27.z.string().optional(),
1297
+ isInfrastructure: import_zod28.z.boolean().optional()
1298
+ });
1299
+ var ApiCallSchema = import_zod28.z.object({
1300
+ endpoint: import_zod28.z.string(),
1301
+ tokensUsed: import_zod28.z.number(),
1302
+ duration: import_zod28.z.number()
1303
+ });
1304
+ var ExecutionTraceSchema = import_zod28.z.object({
1305
+ commands: import_zod28.z.array(CommandExecutionSchema),
1306
+ filesModified: import_zod28.z.array(FileModificationSchema),
1307
+ apiCalls: import_zod28.z.array(ApiCallSchema),
1308
+ totalDuration: import_zod28.z.number()
1309
+ });
1310
+ var FailureAnalysisSchema = import_zod28.z.object({
1311
+ category: import_zod28.z.enum(FailureCategory),
1312
+ severity: import_zod28.z.enum(FailureSeverity),
1313
+ summary: import_zod28.z.string(),
1314
+ details: import_zod28.z.string(),
1315
+ rootCause: import_zod28.z.string(),
1316
+ suggestedFix: import_zod28.z.string(),
1317
+ relatedAssertions: import_zod28.z.array(import_zod28.z.string()),
1318
+ codeSnippet: import_zod28.z.string().optional(),
1319
+ similarIssues: import_zod28.z.array(import_zod28.z.string()).optional(),
1320
+ patternId: import_zod28.z.string().optional(),
1256
1321
  // Extended fields for detailed debugging
1257
1322
  diff: DiffContentSchema.optional(),
1258
1323
  executionTrace: ExecutionTraceSchema.optional()
1259
1324
  });
1260
1325
  var EvalRunSchema = TenantEntitySchema.extend({
1261
1326
  /** Agent ID for this run */
1262
- agentId: import_zod27.z.string().optional(),
1327
+ agentId: import_zod28.z.string().optional(),
1263
1328
  /** Preset ID that originated this run (optional) */
1264
- presetId: import_zod27.z.string().optional(),
1329
+ presetId: import_zod28.z.string().optional(),
1265
1330
  /** Skill IDs for this run */
1266
- skillIds: import_zod27.z.array(import_zod27.z.string()).optional(),
1331
+ skillIds: import_zod28.z.array(import_zod28.z.string()).optional(),
1267
1332
  /** Map of skillId to skillVersionId for this run */
1268
- skillVersions: import_zod27.z.record(import_zod27.z.string(), import_zod27.z.string()).optional(),
1333
+ skillVersions: import_zod28.z.record(import_zod28.z.string(), import_zod28.z.string()).optional(),
1269
1334
  /** Scenario IDs to run (always present — resolved server-side from tags when needed) */
1270
- scenarioIds: import_zod27.z.array(import_zod27.z.string()),
1335
+ scenarioIds: import_zod28.z.array(import_zod28.z.string()),
1271
1336
  /** Current status */
1272
1337
  status: EvalStatusSchema,
1273
1338
  /** Progress percentage (0-100) */
1274
- progress: import_zod27.z.number(),
1339
+ progress: import_zod28.z.number(),
1275
1340
  /** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
1276
- results: import_zod27.z.array(import_zod27.z.lazy(() => EvalRunResultSchema)),
1341
+ results: import_zod28.z.array(import_zod28.z.lazy(() => EvalRunResultSchema)),
1277
1342
  /** Aggregated metrics across all results */
1278
1343
  aggregateMetrics: EvalMetricsSchema,
1279
1344
  /** Failure analyses */
1280
- failureAnalyses: import_zod27.z.array(FailureAnalysisSchema).optional(),
1345
+ failureAnalyses: import_zod28.z.array(FailureAnalysisSchema).optional(),
1281
1346
  /** Aggregated LLM trace summary */
1282
1347
  llmTraceSummary: LLMTraceSummarySchema.optional(),
1283
1348
  /** What triggered this run */
1284
1349
  trigger: TriggerSchema.optional(),
1285
1350
  /** When the run started (set when evaluation is triggered) */
1286
- startedAt: import_zod27.z.string().optional(),
1351
+ startedAt: import_zod28.z.string().optional(),
1287
1352
  /** When the run completed */
1288
- completedAt: import_zod27.z.string().optional(),
1353
+ completedAt: import_zod28.z.string().optional(),
1289
1354
  /** Live trace events captured during execution (for playback on results page) */
1290
- liveTraceEvents: import_zod27.z.array(LiveTraceEventSchema).optional(),
1355
+ liveTraceEvents: import_zod28.z.array(LiveTraceEventSchema).optional(),
1291
1356
  /** Remote job ID for tracking execution in Dev Machines */
1292
- jobId: import_zod27.z.string().optional(),
1357
+ jobId: import_zod28.z.string().optional(),
1293
1358
  /** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
1294
- jobStatus: import_zod27.z.string().optional(),
1359
+ jobStatus: import_zod28.z.string().optional(),
1295
1360
  /** Remote job error message if the job failed */
1296
- jobError: import_zod27.z.string().optional(),
1361
+ jobError: import_zod28.z.string().optional(),
1297
1362
  /** Timestamp of the last job status check */
1298
- jobStatusCheckedAt: import_zod27.z.string().optional(),
1363
+ jobStatusCheckedAt: import_zod28.z.string().optional(),
1299
1364
  /** MCP server IDs to enable for this run (optional) */
1300
- mcpIds: import_zod27.z.array(import_zod27.z.string()).optional(),
1365
+ mcpIds: import_zod28.z.array(import_zod28.z.string()).optional(),
1301
1366
  /** Sub-agent IDs to enable for this run (optional) */
1302
- subAgentIds: import_zod27.z.array(import_zod27.z.string()).optional(),
1367
+ subAgentIds: import_zod28.z.array(import_zod28.z.string()).optional(),
1303
1368
  /** Rule IDs to enable for this run (optional) */
1304
- ruleIds: import_zod27.z.array(import_zod27.z.string()).optional(),
1369
+ ruleIds: import_zod28.z.array(import_zod28.z.string()).optional(),
1305
1370
  /** Tags used to select scenarios for this run (for traceability) */
1306
- tags: import_zod27.z.array(import_zod27.z.string()).optional()
1371
+ tags: import_zod28.z.array(import_zod28.z.string()).optional(),
1372
+ /** How many times each scenario is executed within this eval run. Default: 1. Max: 20. */
1373
+ runsPerScenario: import_zod28.z.number().int().min(1).max(20).optional()
1307
1374
  });
1308
1375
  var CreateEvalRunInputSchema = EvalRunSchema.omit({
1309
1376
  id: true,
@@ -1318,60 +1385,60 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
1318
1385
  scenarioIds: true
1319
1386
  }).extend({
1320
1387
  /** Optional on input — backend resolves from tags when not provided */
1321
- scenarioIds: import_zod27.z.array(import_zod27.z.string()).optional()
1388
+ scenarioIds: import_zod28.z.array(import_zod28.z.string()).optional()
1322
1389
  }).refine(
1323
1390
  (data) => data.scenarioIds && data.scenarioIds.length > 0 || data.tags && data.tags.length > 0,
1324
1391
  { message: "Either scenarioIds or tags must be provided" }
1325
1392
  );
1326
- var EvaluationProgressSchema = import_zod27.z.object({
1327
- runId: import_zod27.z.string(),
1328
- targetId: import_zod27.z.string(),
1329
- totalScenarios: import_zod27.z.number(),
1330
- completedScenarios: import_zod27.z.number(),
1331
- scenarioProgress: import_zod27.z.array(
1332
- import_zod27.z.object({
1333
- scenarioId: import_zod27.z.string(),
1334
- currentStep: import_zod27.z.string(),
1335
- error: import_zod27.z.string().optional()
1393
+ var EvaluationProgressSchema = import_zod28.z.object({
1394
+ runId: import_zod28.z.string(),
1395
+ targetId: import_zod28.z.string(),
1396
+ totalScenarios: import_zod28.z.number(),
1397
+ completedScenarios: import_zod28.z.number(),
1398
+ scenarioProgress: import_zod28.z.array(
1399
+ import_zod28.z.object({
1400
+ scenarioId: import_zod28.z.string(),
1401
+ currentStep: import_zod28.z.string(),
1402
+ error: import_zod28.z.string().optional()
1336
1403
  })
1337
1404
  ),
1338
- createdAt: import_zod27.z.number()
1339
- });
1340
- var EvaluationLogSchema = import_zod27.z.object({
1341
- runId: import_zod27.z.string(),
1342
- scenarioId: import_zod27.z.string(),
1343
- log: import_zod27.z.object({
1344
- level: import_zod27.z.enum(["info", "error", "debug"]),
1345
- message: import_zod27.z.string().optional(),
1346
- args: import_zod27.z.array(import_zod27.z.any()).optional(),
1347
- error: import_zod27.z.string().optional()
1405
+ createdAt: import_zod28.z.number()
1406
+ });
1407
+ var EvaluationLogSchema = import_zod28.z.object({
1408
+ runId: import_zod28.z.string(),
1409
+ scenarioId: import_zod28.z.string(),
1410
+ log: import_zod28.z.object({
1411
+ level: import_zod28.z.enum(["info", "error", "debug"]),
1412
+ message: import_zod28.z.string().optional(),
1413
+ args: import_zod28.z.array(import_zod28.z.any()).optional(),
1414
+ error: import_zod28.z.string().optional()
1348
1415
  })
1349
1416
  });
1350
1417
  var LLM_TIMEOUT = 12e4;
1351
1418
 
1352
1419
  // src/evaluation/conversation.ts
1353
- var import_zod28 = require("zod");
1354
- var TextBlockSchema = import_zod28.z.object({
1355
- type: import_zod28.z.literal("text"),
1356
- text: import_zod28.z.string()
1357
- });
1358
- var ThinkingBlockSchema = import_zod28.z.object({
1359
- type: import_zod28.z.literal("thinking"),
1360
- thinking: import_zod28.z.string()
1361
- });
1362
- var ToolUseBlockSchema = import_zod28.z.object({
1363
- type: import_zod28.z.literal("tool_use"),
1364
- toolName: import_zod28.z.string(),
1365
- toolId: import_zod28.z.string(),
1366
- input: import_zod28.z.unknown()
1367
- });
1368
- var ToolResultBlockSchema = import_zod28.z.object({
1369
- type: import_zod28.z.literal("tool_result"),
1370
- toolUseId: import_zod28.z.string(),
1371
- content: import_zod28.z.string(),
1372
- isError: import_zod28.z.boolean().optional()
1373
- });
1374
- var ConversationBlockSchema = import_zod28.z.discriminatedUnion("type", [
1420
+ var import_zod29 = require("zod");
1421
+ var TextBlockSchema = import_zod29.z.object({
1422
+ type: import_zod29.z.literal("text"),
1423
+ text: import_zod29.z.string()
1424
+ });
1425
+ var ThinkingBlockSchema = import_zod29.z.object({
1426
+ type: import_zod29.z.literal("thinking"),
1427
+ thinking: import_zod29.z.string()
1428
+ });
1429
+ var ToolUseBlockSchema = import_zod29.z.object({
1430
+ type: import_zod29.z.literal("tool_use"),
1431
+ toolName: import_zod29.z.string(),
1432
+ toolId: import_zod29.z.string(),
1433
+ input: import_zod29.z.unknown()
1434
+ });
1435
+ var ToolResultBlockSchema = import_zod29.z.object({
1436
+ type: import_zod29.z.literal("tool_result"),
1437
+ toolUseId: import_zod29.z.string(),
1438
+ content: import_zod29.z.string(),
1439
+ isError: import_zod29.z.boolean().optional()
1440
+ });
1441
+ var ConversationBlockSchema = import_zod29.z.discriminatedUnion("type", [
1375
1442
  TextBlockSchema,
1376
1443
  ThinkingBlockSchema,
1377
1444
  ToolUseBlockSchema,
@@ -1382,18 +1449,18 @@ var ConversationMessageRoles = [
1382
1449
  "user",
1383
1450
  "system"
1384
1451
  ];
1385
- var ConversationMessageSchema = import_zod28.z.object({
1386
- role: import_zod28.z.enum(ConversationMessageRoles),
1387
- content: import_zod28.z.array(ConversationBlockSchema),
1388
- timestamp: import_zod28.z.string()
1452
+ var ConversationMessageSchema = import_zod29.z.object({
1453
+ role: import_zod29.z.enum(ConversationMessageRoles),
1454
+ content: import_zod29.z.array(ConversationBlockSchema),
1455
+ timestamp: import_zod29.z.string()
1389
1456
  });
1390
- var ScenarioConversationSchema = import_zod28.z.object({
1391
- id: import_zod28.z.string(),
1392
- projectId: import_zod28.z.string(),
1393
- evalRunId: import_zod28.z.string(),
1394
- resultId: import_zod28.z.string(),
1395
- messages: import_zod28.z.array(ConversationMessageSchema),
1396
- createdAt: import_zod28.z.string()
1457
+ var ScenarioConversationSchema = import_zod29.z.object({
1458
+ id: import_zod29.z.string(),
1459
+ projectId: import_zod29.z.string(),
1460
+ evalRunId: import_zod29.z.string(),
1461
+ resultId: import_zod29.z.string(),
1462
+ messages: import_zod29.z.array(ConversationMessageSchema),
1463
+ createdAt: import_zod29.z.string()
1397
1464
  });
1398
1465
 
1399
1466
  // src/evaluation/eval-result.ts
@@ -1404,94 +1471,96 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
1404
1471
  AssertionResultStatus2["ERROR"] = "error";
1405
1472
  return AssertionResultStatus2;
1406
1473
  })(AssertionResultStatus || {});
1407
- var AssertionResultSchema = import_zod29.z.object({
1408
- id: import_zod29.z.string(),
1409
- assertionId: import_zod29.z.string(),
1410
- assertionType: import_zod29.z.string(),
1411
- assertionName: import_zod29.z.string(),
1412
- status: import_zod29.z.enum(AssertionResultStatus),
1413
- message: import_zod29.z.string().optional(),
1414
- expected: import_zod29.z.string().optional(),
1415
- actual: import_zod29.z.string().optional(),
1416
- duration: import_zod29.z.number().optional(),
1417
- details: import_zod29.z.record(import_zod29.z.string(), import_zod29.z.unknown()).optional(),
1418
- llmTraceSteps: import_zod29.z.array(LLMTraceStepSchema).optional()
1419
- });
1420
- var EvalRunResultSchema = import_zod29.z.object({
1421
- id: import_zod29.z.string(),
1422
- targetId: import_zod29.z.string(),
1423
- targetName: import_zod29.z.string().optional(),
1474
+ var AssertionResultSchema = import_zod30.z.object({
1475
+ id: import_zod30.z.string(),
1476
+ assertionId: import_zod30.z.string(),
1477
+ assertionType: import_zod30.z.string(),
1478
+ assertionName: import_zod30.z.string(),
1479
+ status: import_zod30.z.enum(AssertionResultStatus),
1480
+ message: import_zod30.z.string().optional(),
1481
+ expected: import_zod30.z.string().optional(),
1482
+ actual: import_zod30.z.string().optional(),
1483
+ duration: import_zod30.z.number().optional(),
1484
+ details: import_zod30.z.record(import_zod30.z.string(), import_zod30.z.unknown()).optional(),
1485
+ llmTraceSteps: import_zod30.z.array(LLMTraceStepSchema).optional()
1486
+ });
1487
+ var EvalRunResultSchema = import_zod30.z.object({
1488
+ id: import_zod30.z.string(),
1489
+ targetId: import_zod30.z.string(),
1490
+ targetName: import_zod30.z.string().optional(),
1424
1491
  /** SkillVersion ID used for this evaluation (for version tracking) */
1425
- skillVersionId: import_zod29.z.string().optional(),
1492
+ skillVersionId: import_zod30.z.string().optional(),
1426
1493
  /** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
1427
- skillVersion: import_zod29.z.string().optional(),
1428
- scenarioId: import_zod29.z.string(),
1429
- scenarioName: import_zod29.z.string(),
1494
+ skillVersion: import_zod30.z.string().optional(),
1495
+ scenarioId: import_zod30.z.string(),
1496
+ scenarioName: import_zod30.z.string(),
1430
1497
  modelConfig: ModelConfigSchema.optional(),
1431
- assertionResults: import_zod29.z.array(AssertionResultSchema),
1498
+ assertionResults: import_zod30.z.array(AssertionResultSchema),
1432
1499
  metrics: EvalMetricsSchema.optional(),
1433
- passed: import_zod29.z.number(),
1434
- failed: import_zod29.z.number(),
1435
- passRate: import_zod29.z.number(),
1436
- duration: import_zod29.z.number(),
1437
- outputText: import_zod29.z.string().optional(),
1438
- files: import_zod29.z.array(ExpectedFileSchema).optional(),
1439
- fileDiffs: import_zod29.z.array(DiffContentSchema).optional(),
1500
+ passed: import_zod30.z.number(),
1501
+ failed: import_zod30.z.number(),
1502
+ passRate: import_zod30.z.number(),
1503
+ duration: import_zod30.z.number(),
1504
+ outputText: import_zod30.z.string().optional(),
1505
+ files: import_zod30.z.array(ExpectedFileSchema).optional(),
1506
+ fileDiffs: import_zod30.z.array(DiffContentSchema).optional(),
1440
1507
  /** Full template files after execution with status indicators */
1441
- templateFiles: import_zod29.z.array(TemplateFileSchema).optional(),
1442
- startedAt: import_zod29.z.string().optional(),
1443
- completedAt: import_zod29.z.string().optional(),
1508
+ templateFiles: import_zod30.z.array(TemplateFileSchema).optional(),
1509
+ startedAt: import_zod30.z.string().optional(),
1510
+ completedAt: import_zod30.z.string().optional(),
1444
1511
  llmTrace: LLMTraceSchema.optional(),
1445
1512
  /** Full conversation messages (only present in transit; stripped before DB storage) */
1446
- conversation: import_zod29.z.array(ConversationMessageSchema).optional()
1447
- });
1448
- var PromptResultSchema = import_zod29.z.object({
1449
- text: import_zod29.z.string(),
1450
- files: import_zod29.z.array(import_zod29.z.unknown()).optional(),
1451
- finishReason: import_zod29.z.string().optional(),
1452
- reasoning: import_zod29.z.string().optional(),
1453
- reasoningDetails: import_zod29.z.unknown().optional(),
1454
- toolCalls: import_zod29.z.array(import_zod29.z.unknown()).optional(),
1455
- toolResults: import_zod29.z.array(import_zod29.z.unknown()).optional(),
1456
- warnings: import_zod29.z.array(import_zod29.z.unknown()).optional(),
1457
- sources: import_zod29.z.array(import_zod29.z.unknown()).optional(),
1458
- steps: import_zod29.z.array(import_zod29.z.unknown()),
1459
- generationTimeMs: import_zod29.z.number(),
1460
- prompt: import_zod29.z.string(),
1461
- systemPrompt: import_zod29.z.string(),
1462
- usage: import_zod29.z.object({
1463
- totalTokens: import_zod29.z.number().optional(),
1464
- totalMicrocentsSpent: import_zod29.z.number().optional()
1513
+ conversation: import_zod30.z.array(ConversationMessageSchema).optional(),
1514
+ /** 0-based iteration index when a scenario is run multiple times within a single eval run */
1515
+ iterationIndex: import_zod30.z.number().int().min(0).optional()
1516
+ });
1517
+ var PromptResultSchema = import_zod30.z.object({
1518
+ text: import_zod30.z.string(),
1519
+ files: import_zod30.z.array(import_zod30.z.unknown()).optional(),
1520
+ finishReason: import_zod30.z.string().optional(),
1521
+ reasoning: import_zod30.z.string().optional(),
1522
+ reasoningDetails: import_zod30.z.unknown().optional(),
1523
+ toolCalls: import_zod30.z.array(import_zod30.z.unknown()).optional(),
1524
+ toolResults: import_zod30.z.array(import_zod30.z.unknown()).optional(),
1525
+ warnings: import_zod30.z.array(import_zod30.z.unknown()).optional(),
1526
+ sources: import_zod30.z.array(import_zod30.z.unknown()).optional(),
1527
+ steps: import_zod30.z.array(import_zod30.z.unknown()),
1528
+ generationTimeMs: import_zod30.z.number(),
1529
+ prompt: import_zod30.z.string(),
1530
+ systemPrompt: import_zod30.z.string(),
1531
+ usage: import_zod30.z.object({
1532
+ totalTokens: import_zod30.z.number().optional(),
1533
+ totalMicrocentsSpent: import_zod30.z.number().optional()
1465
1534
  })
1466
1535
  });
1467
- var EvaluationResultSchema = import_zod29.z.object({
1468
- id: import_zod29.z.string(),
1469
- runId: import_zod29.z.string(),
1470
- timestamp: import_zod29.z.number(),
1536
+ var EvaluationResultSchema = import_zod30.z.object({
1537
+ id: import_zod30.z.string(),
1538
+ runId: import_zod30.z.string(),
1539
+ timestamp: import_zod30.z.number(),
1471
1540
  promptResult: PromptResultSchema,
1472
- testResults: import_zod29.z.array(import_zod29.z.unknown()),
1473
- tags: import_zod29.z.array(import_zod29.z.string()).optional(),
1474
- feedback: import_zod29.z.string().optional(),
1475
- score: import_zod29.z.number(),
1476
- suiteId: import_zod29.z.string().optional()
1477
- });
1478
- var LeanEvaluationResultSchema = import_zod29.z.object({
1479
- id: import_zod29.z.string(),
1480
- runId: import_zod29.z.string(),
1481
- timestamp: import_zod29.z.number(),
1482
- tags: import_zod29.z.array(import_zod29.z.string()).optional(),
1483
- scenarioId: import_zod29.z.string(),
1484
- scenarioVersion: import_zod29.z.number().optional(),
1485
- targetId: import_zod29.z.string(),
1486
- targetVersion: import_zod29.z.number().optional(),
1487
- suiteId: import_zod29.z.string().optional(),
1488
- score: import_zod29.z.number(),
1489
- time: import_zod29.z.number().optional(),
1490
- microcentsSpent: import_zod29.z.number().optional()
1541
+ testResults: import_zod30.z.array(import_zod30.z.unknown()),
1542
+ tags: import_zod30.z.array(import_zod30.z.string()).optional(),
1543
+ feedback: import_zod30.z.string().optional(),
1544
+ score: import_zod30.z.number(),
1545
+ suiteId: import_zod30.z.string().optional()
1546
+ });
1547
+ var LeanEvaluationResultSchema = import_zod30.z.object({
1548
+ id: import_zod30.z.string(),
1549
+ runId: import_zod30.z.string(),
1550
+ timestamp: import_zod30.z.number(),
1551
+ tags: import_zod30.z.array(import_zod30.z.string()).optional(),
1552
+ scenarioId: import_zod30.z.string(),
1553
+ scenarioVersion: import_zod30.z.number().optional(),
1554
+ targetId: import_zod30.z.string(),
1555
+ targetVersion: import_zod30.z.number().optional(),
1556
+ suiteId: import_zod30.z.string().optional(),
1557
+ score: import_zod30.z.number(),
1558
+ time: import_zod30.z.number().optional(),
1559
+ microcentsSpent: import_zod30.z.number().optional()
1491
1560
  });
1492
1561
 
1493
1562
  // src/evaluation/eval-run-folder.ts
1494
- var import_zod30 = require("zod");
1563
+ var import_zod31 = require("zod");
1495
1564
  var EvalRunFolderSchema = TenantEntitySchema.extend({});
1496
1565
  var CreateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
1497
1566
  id: true,
@@ -1505,26 +1574,26 @@ var UpdateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
1505
1574
  updatedAt: true,
1506
1575
  deleted: true
1507
1576
  }).partial();
1508
- var EvalRunFolderMembershipSchema = import_zod30.z.object({
1509
- folderId: import_zod30.z.string(),
1510
- evalRunId: import_zod30.z.string(),
1511
- projectId: import_zod30.z.string(),
1512
- createdAt: import_zod30.z.string()
1577
+ var EvalRunFolderMembershipSchema = import_zod31.z.object({
1578
+ folderId: import_zod31.z.string(),
1579
+ evalRunId: import_zod31.z.string(),
1580
+ projectId: import_zod31.z.string(),
1581
+ createdAt: import_zod31.z.string()
1513
1582
  });
1514
1583
 
1515
1584
  // src/project/project.ts
1516
- var import_zod31 = require("zod");
1585
+ var import_zod32 = require("zod");
1517
1586
  var ProjectSchema = BaseEntitySchema.extend({
1518
- appId: import_zod31.z.string().optional().describe("The ID of the app in Dev Center"),
1519
- scenarioTags: import_zod31.z.array(import_zod31.z.string()).optional().describe("Project-level tag vocabulary for scenarios"),
1587
+ appId: import_zod32.z.string().optional().describe("The ID of the app in Dev Center"),
1588
+ scenarioTags: import_zod32.z.array(import_zod32.z.string()).optional().describe("Project-level tag vocabulary for scenarios"),
1520
1589
  /** Per-project Wix auth token (write-only — never returned in GET responses). null = clear. */
1521
- wixAuthToken: import_zod31.z.string().nullable().optional().describe("Wix auth token for CLI/MCP authentication (encrypted at rest)"),
1590
+ wixAuthToken: import_zod32.z.string().nullable().optional().describe("Wix auth token for CLI/MCP authentication (encrypted at rest)"),
1522
1591
  /** Per-project Base44 auth file content (write-only — never returned in GET responses). null = clear. */
1523
- base44AuthFile: import_zod31.z.string().nullable().optional().describe("Base64-encoded Base44 auth file content (encrypted at rest)"),
1592
+ base44AuthFile: import_zod32.z.string().nullable().optional().describe("Base64-encoded Base44 auth file content (encrypted at rest)"),
1524
1593
  /** Resolved at runtime from the encrypted Wix auth token */
1525
- wixAuthEmail: import_zod31.z.string().optional().describe("Email associated with the Wix auth token (resolved at runtime)"),
1594
+ wixAuthEmail: import_zod32.z.string().optional().describe("Email associated with the Wix auth token (resolved at runtime)"),
1526
1595
  /** Resolved at runtime from the encrypted Base44 auth file */
1527
- base44AuthEmail: import_zod31.z.string().optional().describe("Email from the Base44 auth file (resolved at runtime)")
1596
+ base44AuthEmail: import_zod32.z.string().optional().describe("Email from the Base44 auth file (resolved at runtime)")
1528
1597
  });
1529
1598
  var CreateProjectInputSchema = ProjectSchema.omit({
1530
1599
  id: true,
@@ -1550,7 +1619,7 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
1550
1619
  var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
1551
1620
 
1552
1621
  // src/schedule/eval-schedule.ts
1553
- var import_zod32 = require("zod");
1622
+ var import_zod33 = require("zod");
1554
1623
  var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
1555
1624
  FrequencyType2["DAILY"] = "daily";
1556
1625
  FrequencyType2["WEEKDAY"] = "weekday";
@@ -1560,29 +1629,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
1560
1629
  })(FrequencyType || {});
1561
1630
  var EvalScheduleSchema = TenantEntitySchema.extend({
1562
1631
  /** Whether the schedule is active */
1563
- enabled: import_zod32.z.boolean(),
1632
+ enabled: import_zod33.z.boolean(),
1564
1633
  /** Test suite to run */
1565
- suiteId: import_zod32.z.string(),
1634
+ suiteId: import_zod33.z.string(),
1566
1635
  /** Preset that provides agent + entities for this schedule */
1567
- presetId: import_zod32.z.string(),
1636
+ presetId: import_zod33.z.string(),
1568
1637
  /** How often to run */
1569
- frequencyType: import_zod32.z.nativeEnum(FrequencyType),
1638
+ frequencyType: import_zod33.z.nativeEnum(FrequencyType),
1570
1639
  /** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
1571
- timeOfDay: import_zod32.z.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
1640
+ timeOfDay: import_zod33.z.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
1572
1641
  /** Day of week (0=Sun, 6=Sat) for weekly schedules */
1573
- dayOfWeek: import_zod32.z.number().min(0).max(6).optional(),
1642
+ dayOfWeek: import_zod33.z.number().min(0).max(6).optional(),
1574
1643
  /** Day of month (1-31) for monthly schedules */
1575
- dayOfMonth: import_zod32.z.number().min(1).max(31).optional(),
1644
+ dayOfMonth: import_zod33.z.number().min(1).max(31).optional(),
1576
1645
  /** IANA timezone (e.g., 'America/New_York') */
1577
- timezone: import_zod32.z.string(),
1646
+ timezone: import_zod33.z.string(),
1578
1647
  /** ID of the last eval run created by this schedule */
1579
- lastRunId: import_zod32.z.string().optional(),
1648
+ lastRunId: import_zod33.z.string().optional(),
1580
1649
  /** Denormalized status of the last run */
1581
- lastRunStatus: import_zod32.z.string().optional(),
1650
+ lastRunStatus: import_zod33.z.string().optional(),
1582
1651
  /** ISO timestamp of the last run */
1583
- lastRunAt: import_zod32.z.string().optional(),
1652
+ lastRunAt: import_zod33.z.string().optional(),
1584
1653
  /** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
1585
- nextRunAt: import_zod32.z.string().optional()
1654
+ nextRunAt: import_zod33.z.string().optional()
1586
1655
  });
1587
1656
  function isValidTimezone(tz) {
1588
1657
  try {
@@ -1595,14 +1664,14 @@ function isValidTimezone(tz) {
1595
1664
  function validateScheduleFields(data, ctx, options) {
1596
1665
  if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
1597
1666
  ctx.addIssue({
1598
- code: import_zod32.z.ZodIssueCode.custom,
1667
+ code: import_zod33.z.ZodIssueCode.custom,
1599
1668
  message: "dayOfWeek is required for weekly schedules",
1600
1669
  path: ["dayOfWeek"]
1601
1670
  });
1602
1671
  }
1603
1672
  if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
1604
1673
  ctx.addIssue({
1605
- code: import_zod32.z.ZodIssueCode.custom,
1674
+ code: import_zod33.z.ZodIssueCode.custom,
1606
1675
  message: "dayOfMonth is required for monthly schedules",
1607
1676
  path: ["dayOfMonth"]
1608
1677
  });
@@ -1610,7 +1679,7 @@ function validateScheduleFields(data, ctx, options) {
1610
1679
  const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
1611
1680
  if (shouldValidateTz && !isValidTimezone(data.timezone)) {
1612
1681
  ctx.addIssue({
1613
- code: import_zod32.z.ZodIssueCode.custom,
1682
+ code: import_zod33.z.ZodIssueCode.custom,
1614
1683
  message: "Invalid IANA timezone",
1615
1684
  path: ["timezone"]
1616
1685
  });
@@ -1878,8 +1947,15 @@ function getSystemAssertion(id) {
1878
1947
  AssertionResultStatus,
1879
1948
  AssertionSchema,
1880
1949
  AssertionTypeSchema,
1950
+ BATCH_IMPORT_LIMITS,
1881
1951
  BaseEntitySchema,
1882
1952
  BaseTestSchema,
1953
+ BatchAssertionLinkSchema,
1954
+ BatchImportPayloadSchema,
1955
+ BatchImportResponseSchema,
1956
+ BatchResultItemSchema,
1957
+ BatchScenarioEntrySchema,
1958
+ BatchSummarySchema,
1883
1959
  BuildCheckTestSchema,
1884
1960
  BuildPassedAssertionSchema,
1885
1961
  BuildPassedConfigSchema,
@@ -2022,11 +2098,13 @@ function getSystemAssertion(id) {
2022
2098
  UpdateTestScenarioInputSchema,
2023
2099
  UpdateTestSuiteInputSchema,
2024
2100
  VitestTestSchema,
2101
+ classifyAssertionRef,
2025
2102
  formatTraceEventLine,
2026
2103
  getSystemAssertion,
2027
2104
  getSystemAssertions,
2028
2105
  isSystemAssertionId,
2029
2106
  isValidSkillFolderName,
2107
+ normalizeBatchAssertionLink,
2030
2108
  normalizeModelId,
2031
2109
  parseTraceEventLine,
2032
2110
  validateAssertionConfig