@wix/evalforge-types 0.65.0 → 0.66.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +377 -303
- package/build/index.js.map +4 -4
- package/build/index.mjs +368 -303
- package/build/index.mjs.map +4 -4
- package/build/types/scenario/batch-import.d.ts +106 -0
- package/build/types/scenario/index.d.ts +1 -0
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -43,8 +43,15 @@ __export(index_exports, {
|
|
|
43
43
|
AssertionResultStatus: () => AssertionResultStatus,
|
|
44
44
|
AssertionSchema: () => AssertionSchema,
|
|
45
45
|
AssertionTypeSchema: () => AssertionTypeSchema,
|
|
46
|
+
BATCH_IMPORT_LIMITS: () => BATCH_IMPORT_LIMITS,
|
|
46
47
|
BaseEntitySchema: () => BaseEntitySchema,
|
|
47
48
|
BaseTestSchema: () => BaseTestSchema,
|
|
49
|
+
BatchAssertionLinkSchema: () => BatchAssertionLinkSchema,
|
|
50
|
+
BatchImportPayloadSchema: () => BatchImportPayloadSchema,
|
|
51
|
+
BatchImportResponseSchema: () => BatchImportResponseSchema,
|
|
52
|
+
BatchResultItemSchema: () => BatchResultItemSchema,
|
|
53
|
+
BatchScenarioEntrySchema: () => BatchScenarioEntrySchema,
|
|
54
|
+
BatchSummarySchema: () => BatchSummarySchema,
|
|
48
55
|
BuildCheckTestSchema: () => BuildCheckTestSchema,
|
|
49
56
|
BuildPassedAssertionSchema: () => BuildPassedAssertionSchema,
|
|
50
57
|
BuildPassedConfigSchema: () => BuildPassedConfigSchema,
|
|
@@ -187,11 +194,13 @@ __export(index_exports, {
|
|
|
187
194
|
UpdateTestScenarioInputSchema: () => UpdateTestScenarioInputSchema,
|
|
188
195
|
UpdateTestSuiteInputSchema: () => UpdateTestSuiteInputSchema,
|
|
189
196
|
VitestTestSchema: () => VitestTestSchema,
|
|
197
|
+
classifyAssertionRef: () => classifyAssertionRef,
|
|
190
198
|
formatTraceEventLine: () => formatTraceEventLine,
|
|
191
199
|
getSystemAssertion: () => getSystemAssertion,
|
|
192
200
|
getSystemAssertions: () => getSystemAssertions,
|
|
193
201
|
isSystemAssertionId: () => isSystemAssertionId,
|
|
194
202
|
isValidSkillFolderName: () => isValidSkillFolderName,
|
|
203
|
+
normalizeBatchAssertionLink: () => normalizeBatchAssertionLink,
|
|
195
204
|
normalizeModelId: () => normalizeModelId,
|
|
196
205
|
parseTraceEventLine: () => parseTraceEventLine,
|
|
197
206
|
validateAssertionConfig: () => validateAssertionConfig
|
|
@@ -997,11 +1006,67 @@ var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
|
|
|
997
1006
|
});
|
|
998
1007
|
var UpdateTestScenarioInputSchema = CreateTestScenarioInputSchema.partial();
|
|
999
1008
|
|
|
1000
|
-
// src/
|
|
1009
|
+
// src/scenario/batch-import.ts
|
|
1001
1010
|
var import_zod24 = require("zod");
|
|
1011
|
+
var UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
1012
|
+
var BatchAssertionLinkSchema = import_zod24.z.union([
|
|
1013
|
+
import_zod24.z.string().min(1),
|
|
1014
|
+
ScenarioAssertionLinkSchema
|
|
1015
|
+
]);
|
|
1016
|
+
var BatchScenarioEntrySchema = import_zod24.z.object({
|
|
1017
|
+
name: import_zod24.z.string().min(1, "name: Required"),
|
|
1018
|
+
description: import_zod24.z.string().optional().default(""),
|
|
1019
|
+
triggerPrompt: import_zod24.z.string().min(10, "triggerPrompt: Must be at least 10 characters"),
|
|
1020
|
+
templateId: import_zod24.z.string().nullish(),
|
|
1021
|
+
tags: import_zod24.z.array(import_zod24.z.string()).optional(),
|
|
1022
|
+
assertionLinks: import_zod24.z.array(BatchAssertionLinkSchema).optional()
|
|
1023
|
+
});
|
|
1024
|
+
var BatchImportPayloadSchema = import_zod24.z.object({
|
|
1025
|
+
scenarios: import_zod24.z.array(BatchScenarioEntrySchema).min(1, "scenarios array must contain at least one entry").max(100, "Maximum 100 scenarios per upload")
|
|
1026
|
+
});
|
|
1027
|
+
var BATCH_IMPORT_LIMITS = {
|
|
1028
|
+
MAX_SCENARIOS: 100,
|
|
1029
|
+
MAX_PAYLOAD_BYTES: 1048576
|
|
1030
|
+
// 1 MB
|
|
1031
|
+
};
|
|
1032
|
+
function classifyAssertionRef(ref) {
|
|
1033
|
+
if (ref.startsWith("system:")) {
|
|
1034
|
+
return { type: "system", value: ref };
|
|
1035
|
+
}
|
|
1036
|
+
if (UUID_REGEX.test(ref)) {
|
|
1037
|
+
return { type: "uuid", value: ref };
|
|
1038
|
+
}
|
|
1039
|
+
return { type: "name", value: ref };
|
|
1040
|
+
}
|
|
1041
|
+
function normalizeBatchAssertionLink(link) {
|
|
1042
|
+
if (typeof link === "string") {
|
|
1043
|
+
return { assertionId: link };
|
|
1044
|
+
}
|
|
1045
|
+
return link;
|
|
1046
|
+
}
|
|
1047
|
+
var BatchResultItemSchema = import_zod24.z.object({
|
|
1048
|
+
index: import_zod24.z.number(),
|
|
1049
|
+
name: import_zod24.z.string(),
|
|
1050
|
+
status: import_zod24.z.enum(["valid", "invalid"]),
|
|
1051
|
+
id: import_zod24.z.string().nullable().optional(),
|
|
1052
|
+
errors: import_zod24.z.array(import_zod24.z.string()).optional()
|
|
1053
|
+
});
|
|
1054
|
+
var BatchSummarySchema = import_zod24.z.object({
|
|
1055
|
+
total: import_zod24.z.number(),
|
|
1056
|
+
valid: import_zod24.z.number(),
|
|
1057
|
+
invalid: import_zod24.z.number(),
|
|
1058
|
+
created: import_zod24.z.number()
|
|
1059
|
+
});
|
|
1060
|
+
var BatchImportResponseSchema = import_zod24.z.object({
|
|
1061
|
+
summary: BatchSummarySchema,
|
|
1062
|
+
results: import_zod24.z.array(BatchResultItemSchema)
|
|
1063
|
+
});
|
|
1064
|
+
|
|
1065
|
+
// src/suite/test-suite.ts
|
|
1066
|
+
var import_zod25 = require("zod");
|
|
1002
1067
|
var TestSuiteSchema = TenantEntitySchema.extend({
|
|
1003
1068
|
/** IDs of test scenarios in this suite */
|
|
1004
|
-
scenarioIds:
|
|
1069
|
+
scenarioIds: import_zod25.z.array(import_zod25.z.string())
|
|
1005
1070
|
});
|
|
1006
1071
|
var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
1007
1072
|
id: true,
|
|
@@ -1012,21 +1077,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
|
1012
1077
|
var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
|
|
1013
1078
|
|
|
1014
1079
|
// src/evaluation/metrics.ts
|
|
1015
|
-
var
|
|
1016
|
-
var TokenUsageSchema =
|
|
1017
|
-
prompt:
|
|
1018
|
-
completion:
|
|
1019
|
-
total:
|
|
1020
|
-
});
|
|
1021
|
-
var EvalMetricsSchema =
|
|
1022
|
-
totalAssertions:
|
|
1023
|
-
passed:
|
|
1024
|
-
failed:
|
|
1025
|
-
skipped:
|
|
1026
|
-
errors:
|
|
1027
|
-
passRate:
|
|
1028
|
-
avgDuration:
|
|
1029
|
-
totalDuration:
|
|
1080
|
+
var import_zod26 = require("zod");
|
|
1081
|
+
var TokenUsageSchema = import_zod26.z.object({
|
|
1082
|
+
prompt: import_zod26.z.number(),
|
|
1083
|
+
completion: import_zod26.z.number(),
|
|
1084
|
+
total: import_zod26.z.number()
|
|
1085
|
+
});
|
|
1086
|
+
var EvalMetricsSchema = import_zod26.z.object({
|
|
1087
|
+
totalAssertions: import_zod26.z.number(),
|
|
1088
|
+
passed: import_zod26.z.number(),
|
|
1089
|
+
failed: import_zod26.z.number(),
|
|
1090
|
+
skipped: import_zod26.z.number(),
|
|
1091
|
+
errors: import_zod26.z.number(),
|
|
1092
|
+
passRate: import_zod26.z.number(),
|
|
1093
|
+
avgDuration: import_zod26.z.number(),
|
|
1094
|
+
totalDuration: import_zod26.z.number()
|
|
1030
1095
|
});
|
|
1031
1096
|
var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
1032
1097
|
EvalStatus2["PENDING"] = "pending";
|
|
@@ -1036,7 +1101,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
|
1036
1101
|
EvalStatus2["CANCELLED"] = "cancelled";
|
|
1037
1102
|
return EvalStatus2;
|
|
1038
1103
|
})(EvalStatus || {});
|
|
1039
|
-
var EvalStatusSchema =
|
|
1104
|
+
var EvalStatusSchema = import_zod26.z.enum(EvalStatus);
|
|
1040
1105
|
var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
1041
1106
|
LLMStepType2["COMPLETION"] = "completion";
|
|
1042
1107
|
LLMStepType2["TOOL_USE"] = "tool_use";
|
|
@@ -1044,54 +1109,54 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
|
1044
1109
|
LLMStepType2["THINKING"] = "thinking";
|
|
1045
1110
|
return LLMStepType2;
|
|
1046
1111
|
})(LLMStepType || {});
|
|
1047
|
-
var LLMTraceStepSchema =
|
|
1048
|
-
id:
|
|
1049
|
-
stepNumber:
|
|
1050
|
-
type:
|
|
1051
|
-
model:
|
|
1052
|
-
provider:
|
|
1053
|
-
startedAt:
|
|
1054
|
-
durationMs:
|
|
1112
|
+
var LLMTraceStepSchema = import_zod26.z.object({
|
|
1113
|
+
id: import_zod26.z.string(),
|
|
1114
|
+
stepNumber: import_zod26.z.number(),
|
|
1115
|
+
type: import_zod26.z.enum(LLMStepType),
|
|
1116
|
+
model: import_zod26.z.string(),
|
|
1117
|
+
provider: import_zod26.z.string(),
|
|
1118
|
+
startedAt: import_zod26.z.string(),
|
|
1119
|
+
durationMs: import_zod26.z.number(),
|
|
1055
1120
|
tokenUsage: TokenUsageSchema,
|
|
1056
|
-
costUsd:
|
|
1057
|
-
toolName:
|
|
1058
|
-
toolArguments:
|
|
1059
|
-
inputPreview:
|
|
1060
|
-
outputPreview:
|
|
1061
|
-
success:
|
|
1062
|
-
error:
|
|
1063
|
-
turnIndex:
|
|
1064
|
-
});
|
|
1065
|
-
var LLMBreakdownStatsSchema =
|
|
1066
|
-
count:
|
|
1067
|
-
durationMs:
|
|
1068
|
-
tokens:
|
|
1069
|
-
costUsd:
|
|
1070
|
-
});
|
|
1071
|
-
var LLMTraceSummarySchema =
|
|
1072
|
-
totalSteps:
|
|
1073
|
-
totalTurns:
|
|
1074
|
-
totalDurationMs:
|
|
1121
|
+
costUsd: import_zod26.z.number(),
|
|
1122
|
+
toolName: import_zod26.z.string().optional(),
|
|
1123
|
+
toolArguments: import_zod26.z.string().optional(),
|
|
1124
|
+
inputPreview: import_zod26.z.string().optional(),
|
|
1125
|
+
outputPreview: import_zod26.z.string().optional(),
|
|
1126
|
+
success: import_zod26.z.boolean(),
|
|
1127
|
+
error: import_zod26.z.string().optional(),
|
|
1128
|
+
turnIndex: import_zod26.z.number().optional()
|
|
1129
|
+
});
|
|
1130
|
+
var LLMBreakdownStatsSchema = import_zod26.z.object({
|
|
1131
|
+
count: import_zod26.z.number(),
|
|
1132
|
+
durationMs: import_zod26.z.number(),
|
|
1133
|
+
tokens: import_zod26.z.number(),
|
|
1134
|
+
costUsd: import_zod26.z.number()
|
|
1135
|
+
});
|
|
1136
|
+
var LLMTraceSummarySchema = import_zod26.z.object({
|
|
1137
|
+
totalSteps: import_zod26.z.number(),
|
|
1138
|
+
totalTurns: import_zod26.z.number().optional(),
|
|
1139
|
+
totalDurationMs: import_zod26.z.number(),
|
|
1075
1140
|
totalTokens: TokenUsageSchema,
|
|
1076
|
-
totalCostUsd:
|
|
1077
|
-
stepTypeBreakdown:
|
|
1078
|
-
modelBreakdown:
|
|
1079
|
-
modelsUsed:
|
|
1080
|
-
});
|
|
1081
|
-
var LLMTraceSchema =
|
|
1082
|
-
id:
|
|
1083
|
-
steps:
|
|
1141
|
+
totalCostUsd: import_zod26.z.number(),
|
|
1142
|
+
stepTypeBreakdown: import_zod26.z.record(import_zod26.z.string(), LLMBreakdownStatsSchema).optional(),
|
|
1143
|
+
modelBreakdown: import_zod26.z.record(import_zod26.z.string(), LLMBreakdownStatsSchema),
|
|
1144
|
+
modelsUsed: import_zod26.z.array(import_zod26.z.string())
|
|
1145
|
+
});
|
|
1146
|
+
var LLMTraceSchema = import_zod26.z.object({
|
|
1147
|
+
id: import_zod26.z.string(),
|
|
1148
|
+
steps: import_zod26.z.array(LLMTraceStepSchema),
|
|
1084
1149
|
summary: LLMTraceSummarySchema
|
|
1085
1150
|
});
|
|
1086
1151
|
|
|
1087
1152
|
// src/evaluation/eval-result.ts
|
|
1088
|
-
var
|
|
1153
|
+
var import_zod30 = require("zod");
|
|
1089
1154
|
|
|
1090
1155
|
// src/evaluation/eval-run.ts
|
|
1091
|
-
var
|
|
1156
|
+
var import_zod28 = require("zod");
|
|
1092
1157
|
|
|
1093
1158
|
// src/evaluation/live-trace.ts
|
|
1094
|
-
var
|
|
1159
|
+
var import_zod27 = require("zod");
|
|
1095
1160
|
var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
1096
1161
|
LiveTraceEventType2["THINKING"] = "thinking";
|
|
1097
1162
|
LiveTraceEventType2["TOOL_USE"] = "tool_use";
|
|
@@ -1105,37 +1170,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
|
1105
1170
|
LiveTraceEventType2["USER"] = "user";
|
|
1106
1171
|
return LiveTraceEventType2;
|
|
1107
1172
|
})(LiveTraceEventType || {});
|
|
1108
|
-
var LiveTraceEventSchema =
|
|
1173
|
+
var LiveTraceEventSchema = import_zod27.z.object({
|
|
1109
1174
|
/** The evaluation run ID */
|
|
1110
|
-
evalRunId:
|
|
1175
|
+
evalRunId: import_zod27.z.string(),
|
|
1111
1176
|
/** The scenario ID being executed */
|
|
1112
|
-
scenarioId:
|
|
1177
|
+
scenarioId: import_zod27.z.string(),
|
|
1113
1178
|
/** The scenario name for display */
|
|
1114
|
-
scenarioName:
|
|
1179
|
+
scenarioName: import_zod27.z.string(),
|
|
1115
1180
|
/** The target ID (skill, agent, etc.) */
|
|
1116
|
-
targetId:
|
|
1181
|
+
targetId: import_zod27.z.string(),
|
|
1117
1182
|
/** The target name for display */
|
|
1118
|
-
targetName:
|
|
1183
|
+
targetName: import_zod27.z.string(),
|
|
1119
1184
|
/** Step number in the current scenario execution */
|
|
1120
|
-
stepNumber:
|
|
1185
|
+
stepNumber: import_zod27.z.number(),
|
|
1121
1186
|
/** Type of trace event */
|
|
1122
|
-
type:
|
|
1187
|
+
type: import_zod27.z.enum(LiveTraceEventType),
|
|
1123
1188
|
/** Tool name if this is a tool_use event */
|
|
1124
|
-
toolName:
|
|
1189
|
+
toolName: import_zod27.z.string().optional(),
|
|
1125
1190
|
/** Tool arguments preview (truncated JSON) */
|
|
1126
|
-
toolArgs:
|
|
1191
|
+
toolArgs: import_zod27.z.string().optional(),
|
|
1127
1192
|
/** Output preview (truncated text) */
|
|
1128
|
-
outputPreview:
|
|
1193
|
+
outputPreview: import_zod27.z.string().optional(),
|
|
1129
1194
|
/** File path for file operations */
|
|
1130
|
-
filePath:
|
|
1195
|
+
filePath: import_zod27.z.string().optional(),
|
|
1131
1196
|
/** Elapsed time in milliseconds for progress events */
|
|
1132
|
-
elapsedMs:
|
|
1197
|
+
elapsedMs: import_zod27.z.number().optional(),
|
|
1133
1198
|
/** Thinking/reasoning text from Claude */
|
|
1134
|
-
thinking:
|
|
1199
|
+
thinking: import_zod27.z.string().optional(),
|
|
1135
1200
|
/** Timestamp when this event occurred */
|
|
1136
|
-
timestamp:
|
|
1201
|
+
timestamp: import_zod27.z.string(),
|
|
1137
1202
|
/** Whether this is the final event for this scenario */
|
|
1138
|
-
isComplete:
|
|
1203
|
+
isComplete: import_zod27.z.boolean()
|
|
1139
1204
|
});
|
|
1140
1205
|
var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
|
|
1141
1206
|
function parseTraceEventLine(line) {
|
|
@@ -1164,15 +1229,15 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
|
|
|
1164
1229
|
TriggerType2["SCHEDULED"] = "SCHEDULED";
|
|
1165
1230
|
return TriggerType2;
|
|
1166
1231
|
})(TriggerType || {});
|
|
1167
|
-
var TriggerMetadataSchema =
|
|
1168
|
-
version:
|
|
1169
|
-
resourceUpdated:
|
|
1170
|
-
scheduleId:
|
|
1232
|
+
var TriggerMetadataSchema = import_zod28.z.object({
|
|
1233
|
+
version: import_zod28.z.string().optional(),
|
|
1234
|
+
resourceUpdated: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
1235
|
+
scheduleId: import_zod28.z.string().optional()
|
|
1171
1236
|
});
|
|
1172
|
-
var TriggerSchema =
|
|
1173
|
-
id:
|
|
1237
|
+
var TriggerSchema = import_zod28.z.object({
|
|
1238
|
+
id: import_zod28.z.string(),
|
|
1174
1239
|
metadata: TriggerMetadataSchema.optional(),
|
|
1175
|
-
type:
|
|
1240
|
+
type: import_zod28.z.nativeEnum(TriggerType)
|
|
1176
1241
|
});
|
|
1177
1242
|
var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
|
|
1178
1243
|
FailureCategory2["MISSING_FILE"] = "missing_file";
|
|
@@ -1190,30 +1255,30 @@ var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
|
|
|
1190
1255
|
FailureSeverity2["LOW"] = "low";
|
|
1191
1256
|
return FailureSeverity2;
|
|
1192
1257
|
})(FailureSeverity || {});
|
|
1193
|
-
var DiffLineTypeSchema =
|
|
1194
|
-
var DiffLineSchema =
|
|
1258
|
+
var DiffLineTypeSchema = import_zod28.z.enum(["added", "removed", "unchanged"]);
|
|
1259
|
+
var DiffLineSchema = import_zod28.z.object({
|
|
1195
1260
|
type: DiffLineTypeSchema,
|
|
1196
|
-
content:
|
|
1197
|
-
lineNumber:
|
|
1198
|
-
});
|
|
1199
|
-
var DiffContentSchema =
|
|
1200
|
-
path:
|
|
1201
|
-
expected:
|
|
1202
|
-
actual:
|
|
1203
|
-
diffLines:
|
|
1204
|
-
renamedFrom:
|
|
1261
|
+
content: import_zod28.z.string(),
|
|
1262
|
+
lineNumber: import_zod28.z.number()
|
|
1263
|
+
});
|
|
1264
|
+
var DiffContentSchema = import_zod28.z.object({
|
|
1265
|
+
path: import_zod28.z.string(),
|
|
1266
|
+
expected: import_zod28.z.string(),
|
|
1267
|
+
actual: import_zod28.z.string(),
|
|
1268
|
+
diffLines: import_zod28.z.array(DiffLineSchema),
|
|
1269
|
+
renamedFrom: import_zod28.z.string().optional(),
|
|
1205
1270
|
/** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
|
|
1206
|
-
isInfrastructure:
|
|
1271
|
+
isInfrastructure: import_zod28.z.boolean().optional()
|
|
1207
1272
|
});
|
|
1208
|
-
var CommandExecutionSchema =
|
|
1209
|
-
command:
|
|
1210
|
-
exitCode:
|
|
1211
|
-
output:
|
|
1212
|
-
duration:
|
|
1273
|
+
var CommandExecutionSchema = import_zod28.z.object({
|
|
1274
|
+
command: import_zod28.z.string(),
|
|
1275
|
+
exitCode: import_zod28.z.number(),
|
|
1276
|
+
output: import_zod28.z.string().optional(),
|
|
1277
|
+
duration: import_zod28.z.number()
|
|
1213
1278
|
});
|
|
1214
|
-
var FileModificationSchema =
|
|
1215
|
-
path:
|
|
1216
|
-
action:
|
|
1279
|
+
var FileModificationSchema = import_zod28.z.object({
|
|
1280
|
+
path: import_zod28.z.string(),
|
|
1281
|
+
action: import_zod28.z.enum(["created", "modified", "deleted"])
|
|
1217
1282
|
});
|
|
1218
1283
|
var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
1219
1284
|
TemplateFileStatus2["NEW"] = "new";
|
|
@@ -1221,89 +1286,89 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
|
1221
1286
|
TemplateFileStatus2["UNCHANGED"] = "unchanged";
|
|
1222
1287
|
return TemplateFileStatus2;
|
|
1223
1288
|
})(TemplateFileStatus || {});
|
|
1224
|
-
var TemplateFileSchema =
|
|
1289
|
+
var TemplateFileSchema = import_zod28.z.object({
|
|
1225
1290
|
/** Relative path within the template */
|
|
1226
|
-
path:
|
|
1291
|
+
path: import_zod28.z.string(),
|
|
1227
1292
|
/** Full file content after execution */
|
|
1228
|
-
content:
|
|
1293
|
+
content: import_zod28.z.string(),
|
|
1229
1294
|
/** File status (new, modified, unchanged) */
|
|
1230
|
-
status:
|
|
1295
|
+
status: import_zod28.z.enum(["new", "modified", "unchanged"]),
|
|
1231
1296
|
/** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
|
|
1232
|
-
isInfrastructure:
|
|
1233
|
-
});
|
|
1234
|
-
var ApiCallSchema =
|
|
1235
|
-
endpoint:
|
|
1236
|
-
tokensUsed:
|
|
1237
|
-
duration:
|
|
1238
|
-
});
|
|
1239
|
-
var ExecutionTraceSchema =
|
|
1240
|
-
commands:
|
|
1241
|
-
filesModified:
|
|
1242
|
-
apiCalls:
|
|
1243
|
-
totalDuration:
|
|
1244
|
-
});
|
|
1245
|
-
var FailureAnalysisSchema =
|
|
1246
|
-
category:
|
|
1247
|
-
severity:
|
|
1248
|
-
summary:
|
|
1249
|
-
details:
|
|
1250
|
-
rootCause:
|
|
1251
|
-
suggestedFix:
|
|
1252
|
-
relatedAssertions:
|
|
1253
|
-
codeSnippet:
|
|
1254
|
-
similarIssues:
|
|
1255
|
-
patternId:
|
|
1297
|
+
isInfrastructure: import_zod28.z.boolean().optional()
|
|
1298
|
+
});
|
|
1299
|
+
var ApiCallSchema = import_zod28.z.object({
|
|
1300
|
+
endpoint: import_zod28.z.string(),
|
|
1301
|
+
tokensUsed: import_zod28.z.number(),
|
|
1302
|
+
duration: import_zod28.z.number()
|
|
1303
|
+
});
|
|
1304
|
+
var ExecutionTraceSchema = import_zod28.z.object({
|
|
1305
|
+
commands: import_zod28.z.array(CommandExecutionSchema),
|
|
1306
|
+
filesModified: import_zod28.z.array(FileModificationSchema),
|
|
1307
|
+
apiCalls: import_zod28.z.array(ApiCallSchema),
|
|
1308
|
+
totalDuration: import_zod28.z.number()
|
|
1309
|
+
});
|
|
1310
|
+
var FailureAnalysisSchema = import_zod28.z.object({
|
|
1311
|
+
category: import_zod28.z.enum(FailureCategory),
|
|
1312
|
+
severity: import_zod28.z.enum(FailureSeverity),
|
|
1313
|
+
summary: import_zod28.z.string(),
|
|
1314
|
+
details: import_zod28.z.string(),
|
|
1315
|
+
rootCause: import_zod28.z.string(),
|
|
1316
|
+
suggestedFix: import_zod28.z.string(),
|
|
1317
|
+
relatedAssertions: import_zod28.z.array(import_zod28.z.string()),
|
|
1318
|
+
codeSnippet: import_zod28.z.string().optional(),
|
|
1319
|
+
similarIssues: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
1320
|
+
patternId: import_zod28.z.string().optional(),
|
|
1256
1321
|
// Extended fields for detailed debugging
|
|
1257
1322
|
diff: DiffContentSchema.optional(),
|
|
1258
1323
|
executionTrace: ExecutionTraceSchema.optional()
|
|
1259
1324
|
});
|
|
1260
1325
|
var EvalRunSchema = TenantEntitySchema.extend({
|
|
1261
1326
|
/** Agent ID for this run */
|
|
1262
|
-
agentId:
|
|
1327
|
+
agentId: import_zod28.z.string().optional(),
|
|
1263
1328
|
/** Preset ID that originated this run (optional) */
|
|
1264
|
-
presetId:
|
|
1329
|
+
presetId: import_zod28.z.string().optional(),
|
|
1265
1330
|
/** Skill IDs for this run */
|
|
1266
|
-
skillIds:
|
|
1331
|
+
skillIds: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
1267
1332
|
/** Map of skillId to skillVersionId for this run */
|
|
1268
|
-
skillVersions:
|
|
1333
|
+
skillVersions: import_zod28.z.record(import_zod28.z.string(), import_zod28.z.string()).optional(),
|
|
1269
1334
|
/** Scenario IDs to run (always present — resolved server-side from tags when needed) */
|
|
1270
|
-
scenarioIds:
|
|
1335
|
+
scenarioIds: import_zod28.z.array(import_zod28.z.string()),
|
|
1271
1336
|
/** Current status */
|
|
1272
1337
|
status: EvalStatusSchema,
|
|
1273
1338
|
/** Progress percentage (0-100) */
|
|
1274
|
-
progress:
|
|
1339
|
+
progress: import_zod28.z.number(),
|
|
1275
1340
|
/** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
|
|
1276
|
-
results:
|
|
1341
|
+
results: import_zod28.z.array(import_zod28.z.lazy(() => EvalRunResultSchema)),
|
|
1277
1342
|
/** Aggregated metrics across all results */
|
|
1278
1343
|
aggregateMetrics: EvalMetricsSchema,
|
|
1279
1344
|
/** Failure analyses */
|
|
1280
|
-
failureAnalyses:
|
|
1345
|
+
failureAnalyses: import_zod28.z.array(FailureAnalysisSchema).optional(),
|
|
1281
1346
|
/** Aggregated LLM trace summary */
|
|
1282
1347
|
llmTraceSummary: LLMTraceSummarySchema.optional(),
|
|
1283
1348
|
/** What triggered this run */
|
|
1284
1349
|
trigger: TriggerSchema.optional(),
|
|
1285
1350
|
/** When the run started (set when evaluation is triggered) */
|
|
1286
|
-
startedAt:
|
|
1351
|
+
startedAt: import_zod28.z.string().optional(),
|
|
1287
1352
|
/** When the run completed */
|
|
1288
|
-
completedAt:
|
|
1353
|
+
completedAt: import_zod28.z.string().optional(),
|
|
1289
1354
|
/** Live trace events captured during execution (for playback on results page) */
|
|
1290
|
-
liveTraceEvents:
|
|
1355
|
+
liveTraceEvents: import_zod28.z.array(LiveTraceEventSchema).optional(),
|
|
1291
1356
|
/** Remote job ID for tracking execution in Dev Machines */
|
|
1292
|
-
jobId:
|
|
1357
|
+
jobId: import_zod28.z.string().optional(),
|
|
1293
1358
|
/** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
|
|
1294
|
-
jobStatus:
|
|
1359
|
+
jobStatus: import_zod28.z.string().optional(),
|
|
1295
1360
|
/** Remote job error message if the job failed */
|
|
1296
|
-
jobError:
|
|
1361
|
+
jobError: import_zod28.z.string().optional(),
|
|
1297
1362
|
/** Timestamp of the last job status check */
|
|
1298
|
-
jobStatusCheckedAt:
|
|
1363
|
+
jobStatusCheckedAt: import_zod28.z.string().optional(),
|
|
1299
1364
|
/** MCP server IDs to enable for this run (optional) */
|
|
1300
|
-
mcpIds:
|
|
1365
|
+
mcpIds: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
1301
1366
|
/** Sub-agent IDs to enable for this run (optional) */
|
|
1302
|
-
subAgentIds:
|
|
1367
|
+
subAgentIds: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
1303
1368
|
/** Rule IDs to enable for this run (optional) */
|
|
1304
|
-
ruleIds:
|
|
1369
|
+
ruleIds: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
1305
1370
|
/** Tags used to select scenarios for this run (for traceability) */
|
|
1306
|
-
tags:
|
|
1371
|
+
tags: import_zod28.z.array(import_zod28.z.string()).optional()
|
|
1307
1372
|
});
|
|
1308
1373
|
var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
1309
1374
|
id: true,
|
|
@@ -1318,60 +1383,60 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
|
1318
1383
|
scenarioIds: true
|
|
1319
1384
|
}).extend({
|
|
1320
1385
|
/** Optional on input — backend resolves from tags when not provided */
|
|
1321
|
-
scenarioIds:
|
|
1386
|
+
scenarioIds: import_zod28.z.array(import_zod28.z.string()).optional()
|
|
1322
1387
|
}).refine(
|
|
1323
1388
|
(data) => data.scenarioIds && data.scenarioIds.length > 0 || data.tags && data.tags.length > 0,
|
|
1324
1389
|
{ message: "Either scenarioIds or tags must be provided" }
|
|
1325
1390
|
);
|
|
1326
|
-
var EvaluationProgressSchema =
|
|
1327
|
-
runId:
|
|
1328
|
-
targetId:
|
|
1329
|
-
totalScenarios:
|
|
1330
|
-
completedScenarios:
|
|
1331
|
-
scenarioProgress:
|
|
1332
|
-
|
|
1333
|
-
scenarioId:
|
|
1334
|
-
currentStep:
|
|
1335
|
-
error:
|
|
1391
|
+
var EvaluationProgressSchema = import_zod28.z.object({
|
|
1392
|
+
runId: import_zod28.z.string(),
|
|
1393
|
+
targetId: import_zod28.z.string(),
|
|
1394
|
+
totalScenarios: import_zod28.z.number(),
|
|
1395
|
+
completedScenarios: import_zod28.z.number(),
|
|
1396
|
+
scenarioProgress: import_zod28.z.array(
|
|
1397
|
+
import_zod28.z.object({
|
|
1398
|
+
scenarioId: import_zod28.z.string(),
|
|
1399
|
+
currentStep: import_zod28.z.string(),
|
|
1400
|
+
error: import_zod28.z.string().optional()
|
|
1336
1401
|
})
|
|
1337
1402
|
),
|
|
1338
|
-
createdAt:
|
|
1339
|
-
});
|
|
1340
|
-
var EvaluationLogSchema =
|
|
1341
|
-
runId:
|
|
1342
|
-
scenarioId:
|
|
1343
|
-
log:
|
|
1344
|
-
level:
|
|
1345
|
-
message:
|
|
1346
|
-
args:
|
|
1347
|
-
error:
|
|
1403
|
+
createdAt: import_zod28.z.number()
|
|
1404
|
+
});
|
|
1405
|
+
var EvaluationLogSchema = import_zod28.z.object({
|
|
1406
|
+
runId: import_zod28.z.string(),
|
|
1407
|
+
scenarioId: import_zod28.z.string(),
|
|
1408
|
+
log: import_zod28.z.object({
|
|
1409
|
+
level: import_zod28.z.enum(["info", "error", "debug"]),
|
|
1410
|
+
message: import_zod28.z.string().optional(),
|
|
1411
|
+
args: import_zod28.z.array(import_zod28.z.any()).optional(),
|
|
1412
|
+
error: import_zod28.z.string().optional()
|
|
1348
1413
|
})
|
|
1349
1414
|
});
|
|
1350
1415
|
var LLM_TIMEOUT = 12e4;
|
|
1351
1416
|
|
|
1352
1417
|
// src/evaluation/conversation.ts
|
|
1353
|
-
var
|
|
1354
|
-
var TextBlockSchema =
|
|
1355
|
-
type:
|
|
1356
|
-
text:
|
|
1357
|
-
});
|
|
1358
|
-
var ThinkingBlockSchema =
|
|
1359
|
-
type:
|
|
1360
|
-
thinking:
|
|
1361
|
-
});
|
|
1362
|
-
var ToolUseBlockSchema =
|
|
1363
|
-
type:
|
|
1364
|
-
toolName:
|
|
1365
|
-
toolId:
|
|
1366
|
-
input:
|
|
1367
|
-
});
|
|
1368
|
-
var ToolResultBlockSchema =
|
|
1369
|
-
type:
|
|
1370
|
-
toolUseId:
|
|
1371
|
-
content:
|
|
1372
|
-
isError:
|
|
1373
|
-
});
|
|
1374
|
-
var ConversationBlockSchema =
|
|
1418
|
+
var import_zod29 = require("zod");
|
|
1419
|
+
var TextBlockSchema = import_zod29.z.object({
|
|
1420
|
+
type: import_zod29.z.literal("text"),
|
|
1421
|
+
text: import_zod29.z.string()
|
|
1422
|
+
});
|
|
1423
|
+
var ThinkingBlockSchema = import_zod29.z.object({
|
|
1424
|
+
type: import_zod29.z.literal("thinking"),
|
|
1425
|
+
thinking: import_zod29.z.string()
|
|
1426
|
+
});
|
|
1427
|
+
var ToolUseBlockSchema = import_zod29.z.object({
|
|
1428
|
+
type: import_zod29.z.literal("tool_use"),
|
|
1429
|
+
toolName: import_zod29.z.string(),
|
|
1430
|
+
toolId: import_zod29.z.string(),
|
|
1431
|
+
input: import_zod29.z.unknown()
|
|
1432
|
+
});
|
|
1433
|
+
var ToolResultBlockSchema = import_zod29.z.object({
|
|
1434
|
+
type: import_zod29.z.literal("tool_result"),
|
|
1435
|
+
toolUseId: import_zod29.z.string(),
|
|
1436
|
+
content: import_zod29.z.string(),
|
|
1437
|
+
isError: import_zod29.z.boolean().optional()
|
|
1438
|
+
});
|
|
1439
|
+
var ConversationBlockSchema = import_zod29.z.discriminatedUnion("type", [
|
|
1375
1440
|
TextBlockSchema,
|
|
1376
1441
|
ThinkingBlockSchema,
|
|
1377
1442
|
ToolUseBlockSchema,
|
|
@@ -1382,18 +1447,18 @@ var ConversationMessageRoles = [
|
|
|
1382
1447
|
"user",
|
|
1383
1448
|
"system"
|
|
1384
1449
|
];
|
|
1385
|
-
var ConversationMessageSchema =
|
|
1386
|
-
role:
|
|
1387
|
-
content:
|
|
1388
|
-
timestamp:
|
|
1450
|
+
var ConversationMessageSchema = import_zod29.z.object({
|
|
1451
|
+
role: import_zod29.z.enum(ConversationMessageRoles),
|
|
1452
|
+
content: import_zod29.z.array(ConversationBlockSchema),
|
|
1453
|
+
timestamp: import_zod29.z.string()
|
|
1389
1454
|
});
|
|
1390
|
-
var ScenarioConversationSchema =
|
|
1391
|
-
id:
|
|
1392
|
-
projectId:
|
|
1393
|
-
evalRunId:
|
|
1394
|
-
resultId:
|
|
1395
|
-
messages:
|
|
1396
|
-
createdAt:
|
|
1455
|
+
var ScenarioConversationSchema = import_zod29.z.object({
|
|
1456
|
+
id: import_zod29.z.string(),
|
|
1457
|
+
projectId: import_zod29.z.string(),
|
|
1458
|
+
evalRunId: import_zod29.z.string(),
|
|
1459
|
+
resultId: import_zod29.z.string(),
|
|
1460
|
+
messages: import_zod29.z.array(ConversationMessageSchema),
|
|
1461
|
+
createdAt: import_zod29.z.string()
|
|
1397
1462
|
});
|
|
1398
1463
|
|
|
1399
1464
|
// src/evaluation/eval-result.ts
|
|
@@ -1404,94 +1469,94 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
|
1404
1469
|
AssertionResultStatus2["ERROR"] = "error";
|
|
1405
1470
|
return AssertionResultStatus2;
|
|
1406
1471
|
})(AssertionResultStatus || {});
|
|
1407
|
-
var AssertionResultSchema =
|
|
1408
|
-
id:
|
|
1409
|
-
assertionId:
|
|
1410
|
-
assertionType:
|
|
1411
|
-
assertionName:
|
|
1412
|
-
status:
|
|
1413
|
-
message:
|
|
1414
|
-
expected:
|
|
1415
|
-
actual:
|
|
1416
|
-
duration:
|
|
1417
|
-
details:
|
|
1418
|
-
llmTraceSteps:
|
|
1419
|
-
});
|
|
1420
|
-
var EvalRunResultSchema =
|
|
1421
|
-
id:
|
|
1422
|
-
targetId:
|
|
1423
|
-
targetName:
|
|
1472
|
+
var AssertionResultSchema = import_zod30.z.object({
|
|
1473
|
+
id: import_zod30.z.string(),
|
|
1474
|
+
assertionId: import_zod30.z.string(),
|
|
1475
|
+
assertionType: import_zod30.z.string(),
|
|
1476
|
+
assertionName: import_zod30.z.string(),
|
|
1477
|
+
status: import_zod30.z.enum(AssertionResultStatus),
|
|
1478
|
+
message: import_zod30.z.string().optional(),
|
|
1479
|
+
expected: import_zod30.z.string().optional(),
|
|
1480
|
+
actual: import_zod30.z.string().optional(),
|
|
1481
|
+
duration: import_zod30.z.number().optional(),
|
|
1482
|
+
details: import_zod30.z.record(import_zod30.z.string(), import_zod30.z.unknown()).optional(),
|
|
1483
|
+
llmTraceSteps: import_zod30.z.array(LLMTraceStepSchema).optional()
|
|
1484
|
+
});
|
|
1485
|
+
var EvalRunResultSchema = import_zod30.z.object({
|
|
1486
|
+
id: import_zod30.z.string(),
|
|
1487
|
+
targetId: import_zod30.z.string(),
|
|
1488
|
+
targetName: import_zod30.z.string().optional(),
|
|
1424
1489
|
/** SkillVersion ID used for this evaluation (for version tracking) */
|
|
1425
|
-
skillVersionId:
|
|
1490
|
+
skillVersionId: import_zod30.z.string().optional(),
|
|
1426
1491
|
/** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
|
|
1427
|
-
skillVersion:
|
|
1428
|
-
scenarioId:
|
|
1429
|
-
scenarioName:
|
|
1492
|
+
skillVersion: import_zod30.z.string().optional(),
|
|
1493
|
+
scenarioId: import_zod30.z.string(),
|
|
1494
|
+
scenarioName: import_zod30.z.string(),
|
|
1430
1495
|
modelConfig: ModelConfigSchema.optional(),
|
|
1431
|
-
assertionResults:
|
|
1496
|
+
assertionResults: import_zod30.z.array(AssertionResultSchema),
|
|
1432
1497
|
metrics: EvalMetricsSchema.optional(),
|
|
1433
|
-
passed:
|
|
1434
|
-
failed:
|
|
1435
|
-
passRate:
|
|
1436
|
-
duration:
|
|
1437
|
-
outputText:
|
|
1438
|
-
files:
|
|
1439
|
-
fileDiffs:
|
|
1498
|
+
passed: import_zod30.z.number(),
|
|
1499
|
+
failed: import_zod30.z.number(),
|
|
1500
|
+
passRate: import_zod30.z.number(),
|
|
1501
|
+
duration: import_zod30.z.number(),
|
|
1502
|
+
outputText: import_zod30.z.string().optional(),
|
|
1503
|
+
files: import_zod30.z.array(ExpectedFileSchema).optional(),
|
|
1504
|
+
fileDiffs: import_zod30.z.array(DiffContentSchema).optional(),
|
|
1440
1505
|
/** Full template files after execution with status indicators */
|
|
1441
|
-
templateFiles:
|
|
1442
|
-
startedAt:
|
|
1443
|
-
completedAt:
|
|
1506
|
+
templateFiles: import_zod30.z.array(TemplateFileSchema).optional(),
|
|
1507
|
+
startedAt: import_zod30.z.string().optional(),
|
|
1508
|
+
completedAt: import_zod30.z.string().optional(),
|
|
1444
1509
|
llmTrace: LLMTraceSchema.optional(),
|
|
1445
1510
|
/** Full conversation messages (only present in transit; stripped before DB storage) */
|
|
1446
|
-
conversation:
|
|
1447
|
-
});
|
|
1448
|
-
var PromptResultSchema =
|
|
1449
|
-
text:
|
|
1450
|
-
files:
|
|
1451
|
-
finishReason:
|
|
1452
|
-
reasoning:
|
|
1453
|
-
reasoningDetails:
|
|
1454
|
-
toolCalls:
|
|
1455
|
-
toolResults:
|
|
1456
|
-
warnings:
|
|
1457
|
-
sources:
|
|
1458
|
-
steps:
|
|
1459
|
-
generationTimeMs:
|
|
1460
|
-
prompt:
|
|
1461
|
-
systemPrompt:
|
|
1462
|
-
usage:
|
|
1463
|
-
totalTokens:
|
|
1464
|
-
totalMicrocentsSpent:
|
|
1511
|
+
conversation: import_zod30.z.array(ConversationMessageSchema).optional()
|
|
1512
|
+
});
|
|
1513
|
+
var PromptResultSchema = import_zod30.z.object({
|
|
1514
|
+
text: import_zod30.z.string(),
|
|
1515
|
+
files: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
1516
|
+
finishReason: import_zod30.z.string().optional(),
|
|
1517
|
+
reasoning: import_zod30.z.string().optional(),
|
|
1518
|
+
reasoningDetails: import_zod30.z.unknown().optional(),
|
|
1519
|
+
toolCalls: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
1520
|
+
toolResults: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
1521
|
+
warnings: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
1522
|
+
sources: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
1523
|
+
steps: import_zod30.z.array(import_zod30.z.unknown()),
|
|
1524
|
+
generationTimeMs: import_zod30.z.number(),
|
|
1525
|
+
prompt: import_zod30.z.string(),
|
|
1526
|
+
systemPrompt: import_zod30.z.string(),
|
|
1527
|
+
usage: import_zod30.z.object({
|
|
1528
|
+
totalTokens: import_zod30.z.number().optional(),
|
|
1529
|
+
totalMicrocentsSpent: import_zod30.z.number().optional()
|
|
1465
1530
|
})
|
|
1466
1531
|
});
|
|
1467
|
-
var EvaluationResultSchema =
|
|
1468
|
-
id:
|
|
1469
|
-
runId:
|
|
1470
|
-
timestamp:
|
|
1532
|
+
var EvaluationResultSchema = import_zod30.z.object({
|
|
1533
|
+
id: import_zod30.z.string(),
|
|
1534
|
+
runId: import_zod30.z.string(),
|
|
1535
|
+
timestamp: import_zod30.z.number(),
|
|
1471
1536
|
promptResult: PromptResultSchema,
|
|
1472
|
-
testResults:
|
|
1473
|
-
tags:
|
|
1474
|
-
feedback:
|
|
1475
|
-
score:
|
|
1476
|
-
suiteId:
|
|
1477
|
-
});
|
|
1478
|
-
var LeanEvaluationResultSchema =
|
|
1479
|
-
id:
|
|
1480
|
-
runId:
|
|
1481
|
-
timestamp:
|
|
1482
|
-
tags:
|
|
1483
|
-
scenarioId:
|
|
1484
|
-
scenarioVersion:
|
|
1485
|
-
targetId:
|
|
1486
|
-
targetVersion:
|
|
1487
|
-
suiteId:
|
|
1488
|
-
score:
|
|
1489
|
-
time:
|
|
1490
|
-
microcentsSpent:
|
|
1537
|
+
testResults: import_zod30.z.array(import_zod30.z.unknown()),
|
|
1538
|
+
tags: import_zod30.z.array(import_zod30.z.string()).optional(),
|
|
1539
|
+
feedback: import_zod30.z.string().optional(),
|
|
1540
|
+
score: import_zod30.z.number(),
|
|
1541
|
+
suiteId: import_zod30.z.string().optional()
|
|
1542
|
+
});
|
|
1543
|
+
var LeanEvaluationResultSchema = import_zod30.z.object({
|
|
1544
|
+
id: import_zod30.z.string(),
|
|
1545
|
+
runId: import_zod30.z.string(),
|
|
1546
|
+
timestamp: import_zod30.z.number(),
|
|
1547
|
+
tags: import_zod30.z.array(import_zod30.z.string()).optional(),
|
|
1548
|
+
scenarioId: import_zod30.z.string(),
|
|
1549
|
+
scenarioVersion: import_zod30.z.number().optional(),
|
|
1550
|
+
targetId: import_zod30.z.string(),
|
|
1551
|
+
targetVersion: import_zod30.z.number().optional(),
|
|
1552
|
+
suiteId: import_zod30.z.string().optional(),
|
|
1553
|
+
score: import_zod30.z.number(),
|
|
1554
|
+
time: import_zod30.z.number().optional(),
|
|
1555
|
+
microcentsSpent: import_zod30.z.number().optional()
|
|
1491
1556
|
});
|
|
1492
1557
|
|
|
1493
1558
|
// src/evaluation/eval-run-folder.ts
|
|
1494
|
-
var
|
|
1559
|
+
var import_zod31 = require("zod");
|
|
1495
1560
|
var EvalRunFolderSchema = TenantEntitySchema.extend({});
|
|
1496
1561
|
var CreateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
|
|
1497
1562
|
id: true,
|
|
@@ -1505,26 +1570,26 @@ var UpdateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
|
|
|
1505
1570
|
updatedAt: true,
|
|
1506
1571
|
deleted: true
|
|
1507
1572
|
}).partial();
|
|
1508
|
-
var EvalRunFolderMembershipSchema =
|
|
1509
|
-
folderId:
|
|
1510
|
-
evalRunId:
|
|
1511
|
-
projectId:
|
|
1512
|
-
createdAt:
|
|
1573
|
+
var EvalRunFolderMembershipSchema = import_zod31.z.object({
|
|
1574
|
+
folderId: import_zod31.z.string(),
|
|
1575
|
+
evalRunId: import_zod31.z.string(),
|
|
1576
|
+
projectId: import_zod31.z.string(),
|
|
1577
|
+
createdAt: import_zod31.z.string()
|
|
1513
1578
|
});
|
|
1514
1579
|
|
|
1515
1580
|
// src/project/project.ts
|
|
1516
|
-
var
|
|
1581
|
+
var import_zod32 = require("zod");
|
|
1517
1582
|
var ProjectSchema = BaseEntitySchema.extend({
|
|
1518
|
-
appId:
|
|
1519
|
-
scenarioTags:
|
|
1583
|
+
appId: import_zod32.z.string().optional().describe("The ID of the app in Dev Center"),
|
|
1584
|
+
scenarioTags: import_zod32.z.array(import_zod32.z.string()).optional().describe("Project-level tag vocabulary for scenarios"),
|
|
1520
1585
|
/** Per-project Wix auth token (write-only — never returned in GET responses). null = clear. */
|
|
1521
|
-
wixAuthToken:
|
|
1586
|
+
wixAuthToken: import_zod32.z.string().nullable().optional().describe("Wix auth token for CLI/MCP authentication (encrypted at rest)"),
|
|
1522
1587
|
/** Per-project Base44 auth file content (write-only — never returned in GET responses). null = clear. */
|
|
1523
|
-
base44AuthFile:
|
|
1588
|
+
base44AuthFile: import_zod32.z.string().nullable().optional().describe("Base64-encoded Base44 auth file content (encrypted at rest)"),
|
|
1524
1589
|
/** Resolved at runtime from the encrypted Wix auth token */
|
|
1525
|
-
wixAuthEmail:
|
|
1590
|
+
wixAuthEmail: import_zod32.z.string().optional().describe("Email associated with the Wix auth token (resolved at runtime)"),
|
|
1526
1591
|
/** Resolved at runtime from the encrypted Base44 auth file */
|
|
1527
|
-
base44AuthEmail:
|
|
1592
|
+
base44AuthEmail: import_zod32.z.string().optional().describe("Email from the Base44 auth file (resolved at runtime)")
|
|
1528
1593
|
});
|
|
1529
1594
|
var CreateProjectInputSchema = ProjectSchema.omit({
|
|
1530
1595
|
id: true,
|
|
@@ -1550,7 +1615,7 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
|
|
|
1550
1615
|
var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
1551
1616
|
|
|
1552
1617
|
// src/schedule/eval-schedule.ts
|
|
1553
|
-
var
|
|
1618
|
+
var import_zod33 = require("zod");
|
|
1554
1619
|
var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
1555
1620
|
FrequencyType2["DAILY"] = "daily";
|
|
1556
1621
|
FrequencyType2["WEEKDAY"] = "weekday";
|
|
@@ -1560,29 +1625,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
|
1560
1625
|
})(FrequencyType || {});
|
|
1561
1626
|
var EvalScheduleSchema = TenantEntitySchema.extend({
|
|
1562
1627
|
/** Whether the schedule is active */
|
|
1563
|
-
enabled:
|
|
1628
|
+
enabled: import_zod33.z.boolean(),
|
|
1564
1629
|
/** Test suite to run */
|
|
1565
|
-
suiteId:
|
|
1630
|
+
suiteId: import_zod33.z.string(),
|
|
1566
1631
|
/** Preset that provides agent + entities for this schedule */
|
|
1567
|
-
presetId:
|
|
1632
|
+
presetId: import_zod33.z.string(),
|
|
1568
1633
|
/** How often to run */
|
|
1569
|
-
frequencyType:
|
|
1634
|
+
frequencyType: import_zod33.z.nativeEnum(FrequencyType),
|
|
1570
1635
|
/** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
|
|
1571
|
-
timeOfDay:
|
|
1636
|
+
timeOfDay: import_zod33.z.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
|
|
1572
1637
|
/** Day of week (0=Sun, 6=Sat) for weekly schedules */
|
|
1573
|
-
dayOfWeek:
|
|
1638
|
+
dayOfWeek: import_zod33.z.number().min(0).max(6).optional(),
|
|
1574
1639
|
/** Day of month (1-31) for monthly schedules */
|
|
1575
|
-
dayOfMonth:
|
|
1640
|
+
dayOfMonth: import_zod33.z.number().min(1).max(31).optional(),
|
|
1576
1641
|
/** IANA timezone (e.g., 'America/New_York') */
|
|
1577
|
-
timezone:
|
|
1642
|
+
timezone: import_zod33.z.string(),
|
|
1578
1643
|
/** ID of the last eval run created by this schedule */
|
|
1579
|
-
lastRunId:
|
|
1644
|
+
lastRunId: import_zod33.z.string().optional(),
|
|
1580
1645
|
/** Denormalized status of the last run */
|
|
1581
|
-
lastRunStatus:
|
|
1646
|
+
lastRunStatus: import_zod33.z.string().optional(),
|
|
1582
1647
|
/** ISO timestamp of the last run */
|
|
1583
|
-
lastRunAt:
|
|
1648
|
+
lastRunAt: import_zod33.z.string().optional(),
|
|
1584
1649
|
/** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
|
|
1585
|
-
nextRunAt:
|
|
1650
|
+
nextRunAt: import_zod33.z.string().optional()
|
|
1586
1651
|
});
|
|
1587
1652
|
function isValidTimezone(tz) {
|
|
1588
1653
|
try {
|
|
@@ -1595,14 +1660,14 @@ function isValidTimezone(tz) {
|
|
|
1595
1660
|
function validateScheduleFields(data, ctx, options) {
|
|
1596
1661
|
if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
|
|
1597
1662
|
ctx.addIssue({
|
|
1598
|
-
code:
|
|
1663
|
+
code: import_zod33.z.ZodIssueCode.custom,
|
|
1599
1664
|
message: "dayOfWeek is required for weekly schedules",
|
|
1600
1665
|
path: ["dayOfWeek"]
|
|
1601
1666
|
});
|
|
1602
1667
|
}
|
|
1603
1668
|
if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
|
|
1604
1669
|
ctx.addIssue({
|
|
1605
|
-
code:
|
|
1670
|
+
code: import_zod33.z.ZodIssueCode.custom,
|
|
1606
1671
|
message: "dayOfMonth is required for monthly schedules",
|
|
1607
1672
|
path: ["dayOfMonth"]
|
|
1608
1673
|
});
|
|
@@ -1610,7 +1675,7 @@ function validateScheduleFields(data, ctx, options) {
|
|
|
1610
1675
|
const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
|
|
1611
1676
|
if (shouldValidateTz && !isValidTimezone(data.timezone)) {
|
|
1612
1677
|
ctx.addIssue({
|
|
1613
|
-
code:
|
|
1678
|
+
code: import_zod33.z.ZodIssueCode.custom,
|
|
1614
1679
|
message: "Invalid IANA timezone",
|
|
1615
1680
|
path: ["timezone"]
|
|
1616
1681
|
});
|
|
@@ -1878,8 +1943,15 @@ function getSystemAssertion(id) {
|
|
|
1878
1943
|
AssertionResultStatus,
|
|
1879
1944
|
AssertionSchema,
|
|
1880
1945
|
AssertionTypeSchema,
|
|
1946
|
+
BATCH_IMPORT_LIMITS,
|
|
1881
1947
|
BaseEntitySchema,
|
|
1882
1948
|
BaseTestSchema,
|
|
1949
|
+
BatchAssertionLinkSchema,
|
|
1950
|
+
BatchImportPayloadSchema,
|
|
1951
|
+
BatchImportResponseSchema,
|
|
1952
|
+
BatchResultItemSchema,
|
|
1953
|
+
BatchScenarioEntrySchema,
|
|
1954
|
+
BatchSummarySchema,
|
|
1883
1955
|
BuildCheckTestSchema,
|
|
1884
1956
|
BuildPassedAssertionSchema,
|
|
1885
1957
|
BuildPassedConfigSchema,
|
|
@@ -2022,11 +2094,13 @@ function getSystemAssertion(id) {
|
|
|
2022
2094
|
UpdateTestScenarioInputSchema,
|
|
2023
2095
|
UpdateTestSuiteInputSchema,
|
|
2024
2096
|
VitestTestSchema,
|
|
2097
|
+
classifyAssertionRef,
|
|
2025
2098
|
formatTraceEventLine,
|
|
2026
2099
|
getSystemAssertion,
|
|
2027
2100
|
getSystemAssertions,
|
|
2028
2101
|
isSystemAssertionId,
|
|
2029
2102
|
isValidSkillFolderName,
|
|
2103
|
+
normalizeBatchAssertionLink,
|
|
2030
2104
|
normalizeModelId,
|
|
2031
2105
|
parseTraceEventLine,
|
|
2032
2106
|
validateAssertionConfig
|