@wix/evalforge-types 0.65.0 → 0.67.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +381 -303
- package/build/index.js.map +4 -4
- package/build/index.mjs +372 -303
- package/build/index.mjs.map +4 -4
- package/build/types/evaluation/eval-result.d.ts +1 -0
- package/build/types/evaluation/eval-run.d.ts +3 -0
- package/build/types/scenario/batch-import.d.ts +106 -0
- package/build/types/scenario/index.d.ts +1 -0
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -43,8 +43,15 @@ __export(index_exports, {
|
|
|
43
43
|
AssertionResultStatus: () => AssertionResultStatus,
|
|
44
44
|
AssertionSchema: () => AssertionSchema,
|
|
45
45
|
AssertionTypeSchema: () => AssertionTypeSchema,
|
|
46
|
+
BATCH_IMPORT_LIMITS: () => BATCH_IMPORT_LIMITS,
|
|
46
47
|
BaseEntitySchema: () => BaseEntitySchema,
|
|
47
48
|
BaseTestSchema: () => BaseTestSchema,
|
|
49
|
+
BatchAssertionLinkSchema: () => BatchAssertionLinkSchema,
|
|
50
|
+
BatchImportPayloadSchema: () => BatchImportPayloadSchema,
|
|
51
|
+
BatchImportResponseSchema: () => BatchImportResponseSchema,
|
|
52
|
+
BatchResultItemSchema: () => BatchResultItemSchema,
|
|
53
|
+
BatchScenarioEntrySchema: () => BatchScenarioEntrySchema,
|
|
54
|
+
BatchSummarySchema: () => BatchSummarySchema,
|
|
48
55
|
BuildCheckTestSchema: () => BuildCheckTestSchema,
|
|
49
56
|
BuildPassedAssertionSchema: () => BuildPassedAssertionSchema,
|
|
50
57
|
BuildPassedConfigSchema: () => BuildPassedConfigSchema,
|
|
@@ -187,11 +194,13 @@ __export(index_exports, {
|
|
|
187
194
|
UpdateTestScenarioInputSchema: () => UpdateTestScenarioInputSchema,
|
|
188
195
|
UpdateTestSuiteInputSchema: () => UpdateTestSuiteInputSchema,
|
|
189
196
|
VitestTestSchema: () => VitestTestSchema,
|
|
197
|
+
classifyAssertionRef: () => classifyAssertionRef,
|
|
190
198
|
formatTraceEventLine: () => formatTraceEventLine,
|
|
191
199
|
getSystemAssertion: () => getSystemAssertion,
|
|
192
200
|
getSystemAssertions: () => getSystemAssertions,
|
|
193
201
|
isSystemAssertionId: () => isSystemAssertionId,
|
|
194
202
|
isValidSkillFolderName: () => isValidSkillFolderName,
|
|
203
|
+
normalizeBatchAssertionLink: () => normalizeBatchAssertionLink,
|
|
195
204
|
normalizeModelId: () => normalizeModelId,
|
|
196
205
|
parseTraceEventLine: () => parseTraceEventLine,
|
|
197
206
|
validateAssertionConfig: () => validateAssertionConfig
|
|
@@ -997,11 +1006,67 @@ var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
|
|
|
997
1006
|
});
|
|
998
1007
|
var UpdateTestScenarioInputSchema = CreateTestScenarioInputSchema.partial();
|
|
999
1008
|
|
|
1000
|
-
// src/
|
|
1009
|
+
// src/scenario/batch-import.ts
|
|
1001
1010
|
var import_zod24 = require("zod");
|
|
1011
|
+
var UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
1012
|
+
var BatchAssertionLinkSchema = import_zod24.z.union([
|
|
1013
|
+
import_zod24.z.string().min(1),
|
|
1014
|
+
ScenarioAssertionLinkSchema
|
|
1015
|
+
]);
|
|
1016
|
+
var BatchScenarioEntrySchema = import_zod24.z.object({
|
|
1017
|
+
name: import_zod24.z.string().min(1, "name: Required"),
|
|
1018
|
+
description: import_zod24.z.string().optional().default(""),
|
|
1019
|
+
triggerPrompt: import_zod24.z.string().min(10, "triggerPrompt: Must be at least 10 characters"),
|
|
1020
|
+
templateId: import_zod24.z.string().nullish(),
|
|
1021
|
+
tags: import_zod24.z.array(import_zod24.z.string()).optional(),
|
|
1022
|
+
assertionLinks: import_zod24.z.array(BatchAssertionLinkSchema).optional()
|
|
1023
|
+
});
|
|
1024
|
+
var BatchImportPayloadSchema = import_zod24.z.object({
|
|
1025
|
+
scenarios: import_zod24.z.array(BatchScenarioEntrySchema).min(1, "scenarios array must contain at least one entry").max(100, "Maximum 100 scenarios per upload")
|
|
1026
|
+
});
|
|
1027
|
+
var BATCH_IMPORT_LIMITS = {
|
|
1028
|
+
MAX_SCENARIOS: 100,
|
|
1029
|
+
MAX_PAYLOAD_BYTES: 1048576
|
|
1030
|
+
// 1 MB
|
|
1031
|
+
};
|
|
1032
|
+
function classifyAssertionRef(ref) {
|
|
1033
|
+
if (ref.startsWith("system:")) {
|
|
1034
|
+
return { type: "system", value: ref };
|
|
1035
|
+
}
|
|
1036
|
+
if (UUID_REGEX.test(ref)) {
|
|
1037
|
+
return { type: "uuid", value: ref };
|
|
1038
|
+
}
|
|
1039
|
+
return { type: "name", value: ref };
|
|
1040
|
+
}
|
|
1041
|
+
function normalizeBatchAssertionLink(link) {
|
|
1042
|
+
if (typeof link === "string") {
|
|
1043
|
+
return { assertionId: link };
|
|
1044
|
+
}
|
|
1045
|
+
return link;
|
|
1046
|
+
}
|
|
1047
|
+
var BatchResultItemSchema = import_zod24.z.object({
|
|
1048
|
+
index: import_zod24.z.number(),
|
|
1049
|
+
name: import_zod24.z.string(),
|
|
1050
|
+
status: import_zod24.z.enum(["valid", "invalid"]),
|
|
1051
|
+
id: import_zod24.z.string().nullable().optional(),
|
|
1052
|
+
errors: import_zod24.z.array(import_zod24.z.string()).optional()
|
|
1053
|
+
});
|
|
1054
|
+
var BatchSummarySchema = import_zod24.z.object({
|
|
1055
|
+
total: import_zod24.z.number(),
|
|
1056
|
+
valid: import_zod24.z.number(),
|
|
1057
|
+
invalid: import_zod24.z.number(),
|
|
1058
|
+
created: import_zod24.z.number()
|
|
1059
|
+
});
|
|
1060
|
+
var BatchImportResponseSchema = import_zod24.z.object({
|
|
1061
|
+
summary: BatchSummarySchema,
|
|
1062
|
+
results: import_zod24.z.array(BatchResultItemSchema)
|
|
1063
|
+
});
|
|
1064
|
+
|
|
1065
|
+
// src/suite/test-suite.ts
|
|
1066
|
+
var import_zod25 = require("zod");
|
|
1002
1067
|
var TestSuiteSchema = TenantEntitySchema.extend({
|
|
1003
1068
|
/** IDs of test scenarios in this suite */
|
|
1004
|
-
scenarioIds:
|
|
1069
|
+
scenarioIds: import_zod25.z.array(import_zod25.z.string())
|
|
1005
1070
|
});
|
|
1006
1071
|
var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
1007
1072
|
id: true,
|
|
@@ -1012,21 +1077,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
|
1012
1077
|
var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
|
|
1013
1078
|
|
|
1014
1079
|
// src/evaluation/metrics.ts
|
|
1015
|
-
var
|
|
1016
|
-
var TokenUsageSchema =
|
|
1017
|
-
prompt:
|
|
1018
|
-
completion:
|
|
1019
|
-
total:
|
|
1020
|
-
});
|
|
1021
|
-
var EvalMetricsSchema =
|
|
1022
|
-
totalAssertions:
|
|
1023
|
-
passed:
|
|
1024
|
-
failed:
|
|
1025
|
-
skipped:
|
|
1026
|
-
errors:
|
|
1027
|
-
passRate:
|
|
1028
|
-
avgDuration:
|
|
1029
|
-
totalDuration:
|
|
1080
|
+
var import_zod26 = require("zod");
|
|
1081
|
+
var TokenUsageSchema = import_zod26.z.object({
|
|
1082
|
+
prompt: import_zod26.z.number(),
|
|
1083
|
+
completion: import_zod26.z.number(),
|
|
1084
|
+
total: import_zod26.z.number()
|
|
1085
|
+
});
|
|
1086
|
+
var EvalMetricsSchema = import_zod26.z.object({
|
|
1087
|
+
totalAssertions: import_zod26.z.number(),
|
|
1088
|
+
passed: import_zod26.z.number(),
|
|
1089
|
+
failed: import_zod26.z.number(),
|
|
1090
|
+
skipped: import_zod26.z.number(),
|
|
1091
|
+
errors: import_zod26.z.number(),
|
|
1092
|
+
passRate: import_zod26.z.number(),
|
|
1093
|
+
avgDuration: import_zod26.z.number(),
|
|
1094
|
+
totalDuration: import_zod26.z.number()
|
|
1030
1095
|
});
|
|
1031
1096
|
var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
1032
1097
|
EvalStatus2["PENDING"] = "pending";
|
|
@@ -1036,7 +1101,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
|
1036
1101
|
EvalStatus2["CANCELLED"] = "cancelled";
|
|
1037
1102
|
return EvalStatus2;
|
|
1038
1103
|
})(EvalStatus || {});
|
|
1039
|
-
var EvalStatusSchema =
|
|
1104
|
+
var EvalStatusSchema = import_zod26.z.enum(EvalStatus);
|
|
1040
1105
|
var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
1041
1106
|
LLMStepType2["COMPLETION"] = "completion";
|
|
1042
1107
|
LLMStepType2["TOOL_USE"] = "tool_use";
|
|
@@ -1044,54 +1109,54 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
|
1044
1109
|
LLMStepType2["THINKING"] = "thinking";
|
|
1045
1110
|
return LLMStepType2;
|
|
1046
1111
|
})(LLMStepType || {});
|
|
1047
|
-
var LLMTraceStepSchema =
|
|
1048
|
-
id:
|
|
1049
|
-
stepNumber:
|
|
1050
|
-
type:
|
|
1051
|
-
model:
|
|
1052
|
-
provider:
|
|
1053
|
-
startedAt:
|
|
1054
|
-
durationMs:
|
|
1112
|
+
var LLMTraceStepSchema = import_zod26.z.object({
|
|
1113
|
+
id: import_zod26.z.string(),
|
|
1114
|
+
stepNumber: import_zod26.z.number(),
|
|
1115
|
+
type: import_zod26.z.enum(LLMStepType),
|
|
1116
|
+
model: import_zod26.z.string(),
|
|
1117
|
+
provider: import_zod26.z.string(),
|
|
1118
|
+
startedAt: import_zod26.z.string(),
|
|
1119
|
+
durationMs: import_zod26.z.number(),
|
|
1055
1120
|
tokenUsage: TokenUsageSchema,
|
|
1056
|
-
costUsd:
|
|
1057
|
-
toolName:
|
|
1058
|
-
toolArguments:
|
|
1059
|
-
inputPreview:
|
|
1060
|
-
outputPreview:
|
|
1061
|
-
success:
|
|
1062
|
-
error:
|
|
1063
|
-
turnIndex:
|
|
1064
|
-
});
|
|
1065
|
-
var LLMBreakdownStatsSchema =
|
|
1066
|
-
count:
|
|
1067
|
-
durationMs:
|
|
1068
|
-
tokens:
|
|
1069
|
-
costUsd:
|
|
1070
|
-
});
|
|
1071
|
-
var LLMTraceSummarySchema =
|
|
1072
|
-
totalSteps:
|
|
1073
|
-
totalTurns:
|
|
1074
|
-
totalDurationMs:
|
|
1121
|
+
costUsd: import_zod26.z.number(),
|
|
1122
|
+
toolName: import_zod26.z.string().optional(),
|
|
1123
|
+
toolArguments: import_zod26.z.string().optional(),
|
|
1124
|
+
inputPreview: import_zod26.z.string().optional(),
|
|
1125
|
+
outputPreview: import_zod26.z.string().optional(),
|
|
1126
|
+
success: import_zod26.z.boolean(),
|
|
1127
|
+
error: import_zod26.z.string().optional(),
|
|
1128
|
+
turnIndex: import_zod26.z.number().optional()
|
|
1129
|
+
});
|
|
1130
|
+
var LLMBreakdownStatsSchema = import_zod26.z.object({
|
|
1131
|
+
count: import_zod26.z.number(),
|
|
1132
|
+
durationMs: import_zod26.z.number(),
|
|
1133
|
+
tokens: import_zod26.z.number(),
|
|
1134
|
+
costUsd: import_zod26.z.number()
|
|
1135
|
+
});
|
|
1136
|
+
var LLMTraceSummarySchema = import_zod26.z.object({
|
|
1137
|
+
totalSteps: import_zod26.z.number(),
|
|
1138
|
+
totalTurns: import_zod26.z.number().optional(),
|
|
1139
|
+
totalDurationMs: import_zod26.z.number(),
|
|
1075
1140
|
totalTokens: TokenUsageSchema,
|
|
1076
|
-
totalCostUsd:
|
|
1077
|
-
stepTypeBreakdown:
|
|
1078
|
-
modelBreakdown:
|
|
1079
|
-
modelsUsed:
|
|
1080
|
-
});
|
|
1081
|
-
var LLMTraceSchema =
|
|
1082
|
-
id:
|
|
1083
|
-
steps:
|
|
1141
|
+
totalCostUsd: import_zod26.z.number(),
|
|
1142
|
+
stepTypeBreakdown: import_zod26.z.record(import_zod26.z.string(), LLMBreakdownStatsSchema).optional(),
|
|
1143
|
+
modelBreakdown: import_zod26.z.record(import_zod26.z.string(), LLMBreakdownStatsSchema),
|
|
1144
|
+
modelsUsed: import_zod26.z.array(import_zod26.z.string())
|
|
1145
|
+
});
|
|
1146
|
+
var LLMTraceSchema = import_zod26.z.object({
|
|
1147
|
+
id: import_zod26.z.string(),
|
|
1148
|
+
steps: import_zod26.z.array(LLMTraceStepSchema),
|
|
1084
1149
|
summary: LLMTraceSummarySchema
|
|
1085
1150
|
});
|
|
1086
1151
|
|
|
1087
1152
|
// src/evaluation/eval-result.ts
|
|
1088
|
-
var
|
|
1153
|
+
var import_zod30 = require("zod");
|
|
1089
1154
|
|
|
1090
1155
|
// src/evaluation/eval-run.ts
|
|
1091
|
-
var
|
|
1156
|
+
var import_zod28 = require("zod");
|
|
1092
1157
|
|
|
1093
1158
|
// src/evaluation/live-trace.ts
|
|
1094
|
-
var
|
|
1159
|
+
var import_zod27 = require("zod");
|
|
1095
1160
|
var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
1096
1161
|
LiveTraceEventType2["THINKING"] = "thinking";
|
|
1097
1162
|
LiveTraceEventType2["TOOL_USE"] = "tool_use";
|
|
@@ -1105,37 +1170,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
|
1105
1170
|
LiveTraceEventType2["USER"] = "user";
|
|
1106
1171
|
return LiveTraceEventType2;
|
|
1107
1172
|
})(LiveTraceEventType || {});
|
|
1108
|
-
var LiveTraceEventSchema =
|
|
1173
|
+
var LiveTraceEventSchema = import_zod27.z.object({
|
|
1109
1174
|
/** The evaluation run ID */
|
|
1110
|
-
evalRunId:
|
|
1175
|
+
evalRunId: import_zod27.z.string(),
|
|
1111
1176
|
/** The scenario ID being executed */
|
|
1112
|
-
scenarioId:
|
|
1177
|
+
scenarioId: import_zod27.z.string(),
|
|
1113
1178
|
/** The scenario name for display */
|
|
1114
|
-
scenarioName:
|
|
1179
|
+
scenarioName: import_zod27.z.string(),
|
|
1115
1180
|
/** The target ID (skill, agent, etc.) */
|
|
1116
|
-
targetId:
|
|
1181
|
+
targetId: import_zod27.z.string(),
|
|
1117
1182
|
/** The target name for display */
|
|
1118
|
-
targetName:
|
|
1183
|
+
targetName: import_zod27.z.string(),
|
|
1119
1184
|
/** Step number in the current scenario execution */
|
|
1120
|
-
stepNumber:
|
|
1185
|
+
stepNumber: import_zod27.z.number(),
|
|
1121
1186
|
/** Type of trace event */
|
|
1122
|
-
type:
|
|
1187
|
+
type: import_zod27.z.enum(LiveTraceEventType),
|
|
1123
1188
|
/** Tool name if this is a tool_use event */
|
|
1124
|
-
toolName:
|
|
1189
|
+
toolName: import_zod27.z.string().optional(),
|
|
1125
1190
|
/** Tool arguments preview (truncated JSON) */
|
|
1126
|
-
toolArgs:
|
|
1191
|
+
toolArgs: import_zod27.z.string().optional(),
|
|
1127
1192
|
/** Output preview (truncated text) */
|
|
1128
|
-
outputPreview:
|
|
1193
|
+
outputPreview: import_zod27.z.string().optional(),
|
|
1129
1194
|
/** File path for file operations */
|
|
1130
|
-
filePath:
|
|
1195
|
+
filePath: import_zod27.z.string().optional(),
|
|
1131
1196
|
/** Elapsed time in milliseconds for progress events */
|
|
1132
|
-
elapsedMs:
|
|
1197
|
+
elapsedMs: import_zod27.z.number().optional(),
|
|
1133
1198
|
/** Thinking/reasoning text from Claude */
|
|
1134
|
-
thinking:
|
|
1199
|
+
thinking: import_zod27.z.string().optional(),
|
|
1135
1200
|
/** Timestamp when this event occurred */
|
|
1136
|
-
timestamp:
|
|
1201
|
+
timestamp: import_zod27.z.string(),
|
|
1137
1202
|
/** Whether this is the final event for this scenario */
|
|
1138
|
-
isComplete:
|
|
1203
|
+
isComplete: import_zod27.z.boolean()
|
|
1139
1204
|
});
|
|
1140
1205
|
var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
|
|
1141
1206
|
function parseTraceEventLine(line) {
|
|
@@ -1164,15 +1229,15 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
|
|
|
1164
1229
|
TriggerType2["SCHEDULED"] = "SCHEDULED";
|
|
1165
1230
|
return TriggerType2;
|
|
1166
1231
|
})(TriggerType || {});
|
|
1167
|
-
var TriggerMetadataSchema =
|
|
1168
|
-
version:
|
|
1169
|
-
resourceUpdated:
|
|
1170
|
-
scheduleId:
|
|
1232
|
+
var TriggerMetadataSchema = import_zod28.z.object({
|
|
1233
|
+
version: import_zod28.z.string().optional(),
|
|
1234
|
+
resourceUpdated: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
1235
|
+
scheduleId: import_zod28.z.string().optional()
|
|
1171
1236
|
});
|
|
1172
|
-
var TriggerSchema =
|
|
1173
|
-
id:
|
|
1237
|
+
var TriggerSchema = import_zod28.z.object({
|
|
1238
|
+
id: import_zod28.z.string(),
|
|
1174
1239
|
metadata: TriggerMetadataSchema.optional(),
|
|
1175
|
-
type:
|
|
1240
|
+
type: import_zod28.z.nativeEnum(TriggerType)
|
|
1176
1241
|
});
|
|
1177
1242
|
var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
|
|
1178
1243
|
FailureCategory2["MISSING_FILE"] = "missing_file";
|
|
@@ -1190,30 +1255,30 @@ var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
|
|
|
1190
1255
|
FailureSeverity2["LOW"] = "low";
|
|
1191
1256
|
return FailureSeverity2;
|
|
1192
1257
|
})(FailureSeverity || {});
|
|
1193
|
-
var DiffLineTypeSchema =
|
|
1194
|
-
var DiffLineSchema =
|
|
1258
|
+
var DiffLineTypeSchema = import_zod28.z.enum(["added", "removed", "unchanged"]);
|
|
1259
|
+
var DiffLineSchema = import_zod28.z.object({
|
|
1195
1260
|
type: DiffLineTypeSchema,
|
|
1196
|
-
content:
|
|
1197
|
-
lineNumber:
|
|
1198
|
-
});
|
|
1199
|
-
var DiffContentSchema =
|
|
1200
|
-
path:
|
|
1201
|
-
expected:
|
|
1202
|
-
actual:
|
|
1203
|
-
diffLines:
|
|
1204
|
-
renamedFrom:
|
|
1261
|
+
content: import_zod28.z.string(),
|
|
1262
|
+
lineNumber: import_zod28.z.number()
|
|
1263
|
+
});
|
|
1264
|
+
var DiffContentSchema = import_zod28.z.object({
|
|
1265
|
+
path: import_zod28.z.string(),
|
|
1266
|
+
expected: import_zod28.z.string(),
|
|
1267
|
+
actual: import_zod28.z.string(),
|
|
1268
|
+
diffLines: import_zod28.z.array(DiffLineSchema),
|
|
1269
|
+
renamedFrom: import_zod28.z.string().optional(),
|
|
1205
1270
|
/** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
|
|
1206
|
-
isInfrastructure:
|
|
1271
|
+
isInfrastructure: import_zod28.z.boolean().optional()
|
|
1207
1272
|
});
|
|
1208
|
-
var CommandExecutionSchema =
|
|
1209
|
-
command:
|
|
1210
|
-
exitCode:
|
|
1211
|
-
output:
|
|
1212
|
-
duration:
|
|
1273
|
+
var CommandExecutionSchema = import_zod28.z.object({
|
|
1274
|
+
command: import_zod28.z.string(),
|
|
1275
|
+
exitCode: import_zod28.z.number(),
|
|
1276
|
+
output: import_zod28.z.string().optional(),
|
|
1277
|
+
duration: import_zod28.z.number()
|
|
1213
1278
|
});
|
|
1214
|
-
var FileModificationSchema =
|
|
1215
|
-
path:
|
|
1216
|
-
action:
|
|
1279
|
+
var FileModificationSchema = import_zod28.z.object({
|
|
1280
|
+
path: import_zod28.z.string(),
|
|
1281
|
+
action: import_zod28.z.enum(["created", "modified", "deleted"])
|
|
1217
1282
|
});
|
|
1218
1283
|
var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
1219
1284
|
TemplateFileStatus2["NEW"] = "new";
|
|
@@ -1221,89 +1286,91 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
|
1221
1286
|
TemplateFileStatus2["UNCHANGED"] = "unchanged";
|
|
1222
1287
|
return TemplateFileStatus2;
|
|
1223
1288
|
})(TemplateFileStatus || {});
|
|
1224
|
-
var TemplateFileSchema =
|
|
1289
|
+
var TemplateFileSchema = import_zod28.z.object({
|
|
1225
1290
|
/** Relative path within the template */
|
|
1226
|
-
path:
|
|
1291
|
+
path: import_zod28.z.string(),
|
|
1227
1292
|
/** Full file content after execution */
|
|
1228
|
-
content:
|
|
1293
|
+
content: import_zod28.z.string(),
|
|
1229
1294
|
/** File status (new, modified, unchanged) */
|
|
1230
|
-
status:
|
|
1295
|
+
status: import_zod28.z.enum(["new", "modified", "unchanged"]),
|
|
1231
1296
|
/** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
|
|
1232
|
-
isInfrastructure:
|
|
1233
|
-
});
|
|
1234
|
-
var ApiCallSchema =
|
|
1235
|
-
endpoint:
|
|
1236
|
-
tokensUsed:
|
|
1237
|
-
duration:
|
|
1238
|
-
});
|
|
1239
|
-
var ExecutionTraceSchema =
|
|
1240
|
-
commands:
|
|
1241
|
-
filesModified:
|
|
1242
|
-
apiCalls:
|
|
1243
|
-
totalDuration:
|
|
1244
|
-
});
|
|
1245
|
-
var FailureAnalysisSchema =
|
|
1246
|
-
category:
|
|
1247
|
-
severity:
|
|
1248
|
-
summary:
|
|
1249
|
-
details:
|
|
1250
|
-
rootCause:
|
|
1251
|
-
suggestedFix:
|
|
1252
|
-
relatedAssertions:
|
|
1253
|
-
codeSnippet:
|
|
1254
|
-
similarIssues:
|
|
1255
|
-
patternId:
|
|
1297
|
+
isInfrastructure: import_zod28.z.boolean().optional()
|
|
1298
|
+
});
|
|
1299
|
+
var ApiCallSchema = import_zod28.z.object({
|
|
1300
|
+
endpoint: import_zod28.z.string(),
|
|
1301
|
+
tokensUsed: import_zod28.z.number(),
|
|
1302
|
+
duration: import_zod28.z.number()
|
|
1303
|
+
});
|
|
1304
|
+
var ExecutionTraceSchema = import_zod28.z.object({
|
|
1305
|
+
commands: import_zod28.z.array(CommandExecutionSchema),
|
|
1306
|
+
filesModified: import_zod28.z.array(FileModificationSchema),
|
|
1307
|
+
apiCalls: import_zod28.z.array(ApiCallSchema),
|
|
1308
|
+
totalDuration: import_zod28.z.number()
|
|
1309
|
+
});
|
|
1310
|
+
var FailureAnalysisSchema = import_zod28.z.object({
|
|
1311
|
+
category: import_zod28.z.enum(FailureCategory),
|
|
1312
|
+
severity: import_zod28.z.enum(FailureSeverity),
|
|
1313
|
+
summary: import_zod28.z.string(),
|
|
1314
|
+
details: import_zod28.z.string(),
|
|
1315
|
+
rootCause: import_zod28.z.string(),
|
|
1316
|
+
suggestedFix: import_zod28.z.string(),
|
|
1317
|
+
relatedAssertions: import_zod28.z.array(import_zod28.z.string()),
|
|
1318
|
+
codeSnippet: import_zod28.z.string().optional(),
|
|
1319
|
+
similarIssues: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
1320
|
+
patternId: import_zod28.z.string().optional(),
|
|
1256
1321
|
// Extended fields for detailed debugging
|
|
1257
1322
|
diff: DiffContentSchema.optional(),
|
|
1258
1323
|
executionTrace: ExecutionTraceSchema.optional()
|
|
1259
1324
|
});
|
|
1260
1325
|
var EvalRunSchema = TenantEntitySchema.extend({
|
|
1261
1326
|
/** Agent ID for this run */
|
|
1262
|
-
agentId:
|
|
1327
|
+
agentId: import_zod28.z.string().optional(),
|
|
1263
1328
|
/** Preset ID that originated this run (optional) */
|
|
1264
|
-
presetId:
|
|
1329
|
+
presetId: import_zod28.z.string().optional(),
|
|
1265
1330
|
/** Skill IDs for this run */
|
|
1266
|
-
skillIds:
|
|
1331
|
+
skillIds: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
1267
1332
|
/** Map of skillId to skillVersionId for this run */
|
|
1268
|
-
skillVersions:
|
|
1333
|
+
skillVersions: import_zod28.z.record(import_zod28.z.string(), import_zod28.z.string()).optional(),
|
|
1269
1334
|
/** Scenario IDs to run (always present — resolved server-side from tags when needed) */
|
|
1270
|
-
scenarioIds:
|
|
1335
|
+
scenarioIds: import_zod28.z.array(import_zod28.z.string()),
|
|
1271
1336
|
/** Current status */
|
|
1272
1337
|
status: EvalStatusSchema,
|
|
1273
1338
|
/** Progress percentage (0-100) */
|
|
1274
|
-
progress:
|
|
1339
|
+
progress: import_zod28.z.number(),
|
|
1275
1340
|
/** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
|
|
1276
|
-
results:
|
|
1341
|
+
results: import_zod28.z.array(import_zod28.z.lazy(() => EvalRunResultSchema)),
|
|
1277
1342
|
/** Aggregated metrics across all results */
|
|
1278
1343
|
aggregateMetrics: EvalMetricsSchema,
|
|
1279
1344
|
/** Failure analyses */
|
|
1280
|
-
failureAnalyses:
|
|
1345
|
+
failureAnalyses: import_zod28.z.array(FailureAnalysisSchema).optional(),
|
|
1281
1346
|
/** Aggregated LLM trace summary */
|
|
1282
1347
|
llmTraceSummary: LLMTraceSummarySchema.optional(),
|
|
1283
1348
|
/** What triggered this run */
|
|
1284
1349
|
trigger: TriggerSchema.optional(),
|
|
1285
1350
|
/** When the run started (set when evaluation is triggered) */
|
|
1286
|
-
startedAt:
|
|
1351
|
+
startedAt: import_zod28.z.string().optional(),
|
|
1287
1352
|
/** When the run completed */
|
|
1288
|
-
completedAt:
|
|
1353
|
+
completedAt: import_zod28.z.string().optional(),
|
|
1289
1354
|
/** Live trace events captured during execution (for playback on results page) */
|
|
1290
|
-
liveTraceEvents:
|
|
1355
|
+
liveTraceEvents: import_zod28.z.array(LiveTraceEventSchema).optional(),
|
|
1291
1356
|
/** Remote job ID for tracking execution in Dev Machines */
|
|
1292
|
-
jobId:
|
|
1357
|
+
jobId: import_zod28.z.string().optional(),
|
|
1293
1358
|
/** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
|
|
1294
|
-
jobStatus:
|
|
1359
|
+
jobStatus: import_zod28.z.string().optional(),
|
|
1295
1360
|
/** Remote job error message if the job failed */
|
|
1296
|
-
jobError:
|
|
1361
|
+
jobError: import_zod28.z.string().optional(),
|
|
1297
1362
|
/** Timestamp of the last job status check */
|
|
1298
|
-
jobStatusCheckedAt:
|
|
1363
|
+
jobStatusCheckedAt: import_zod28.z.string().optional(),
|
|
1299
1364
|
/** MCP server IDs to enable for this run (optional) */
|
|
1300
|
-
mcpIds:
|
|
1365
|
+
mcpIds: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
1301
1366
|
/** Sub-agent IDs to enable for this run (optional) */
|
|
1302
|
-
subAgentIds:
|
|
1367
|
+
subAgentIds: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
1303
1368
|
/** Rule IDs to enable for this run (optional) */
|
|
1304
|
-
ruleIds:
|
|
1369
|
+
ruleIds: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
1305
1370
|
/** Tags used to select scenarios for this run (for traceability) */
|
|
1306
|
-
tags:
|
|
1371
|
+
tags: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
1372
|
+
/** How many times each scenario is executed within this eval run. Default: 1. Max: 20. */
|
|
1373
|
+
runsPerScenario: import_zod28.z.number().int().min(1).max(20).optional()
|
|
1307
1374
|
});
|
|
1308
1375
|
var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
1309
1376
|
id: true,
|
|
@@ -1318,60 +1385,60 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
|
1318
1385
|
scenarioIds: true
|
|
1319
1386
|
}).extend({
|
|
1320
1387
|
/** Optional on input — backend resolves from tags when not provided */
|
|
1321
|
-
scenarioIds:
|
|
1388
|
+
scenarioIds: import_zod28.z.array(import_zod28.z.string()).optional()
|
|
1322
1389
|
}).refine(
|
|
1323
1390
|
(data) => data.scenarioIds && data.scenarioIds.length > 0 || data.tags && data.tags.length > 0,
|
|
1324
1391
|
{ message: "Either scenarioIds or tags must be provided" }
|
|
1325
1392
|
);
|
|
1326
|
-
var EvaluationProgressSchema =
|
|
1327
|
-
runId:
|
|
1328
|
-
targetId:
|
|
1329
|
-
totalScenarios:
|
|
1330
|
-
completedScenarios:
|
|
1331
|
-
scenarioProgress:
|
|
1332
|
-
|
|
1333
|
-
scenarioId:
|
|
1334
|
-
currentStep:
|
|
1335
|
-
error:
|
|
1393
|
+
var EvaluationProgressSchema = import_zod28.z.object({
|
|
1394
|
+
runId: import_zod28.z.string(),
|
|
1395
|
+
targetId: import_zod28.z.string(),
|
|
1396
|
+
totalScenarios: import_zod28.z.number(),
|
|
1397
|
+
completedScenarios: import_zod28.z.number(),
|
|
1398
|
+
scenarioProgress: import_zod28.z.array(
|
|
1399
|
+
import_zod28.z.object({
|
|
1400
|
+
scenarioId: import_zod28.z.string(),
|
|
1401
|
+
currentStep: import_zod28.z.string(),
|
|
1402
|
+
error: import_zod28.z.string().optional()
|
|
1336
1403
|
})
|
|
1337
1404
|
),
|
|
1338
|
-
createdAt:
|
|
1339
|
-
});
|
|
1340
|
-
var EvaluationLogSchema =
|
|
1341
|
-
runId:
|
|
1342
|
-
scenarioId:
|
|
1343
|
-
log:
|
|
1344
|
-
level:
|
|
1345
|
-
message:
|
|
1346
|
-
args:
|
|
1347
|
-
error:
|
|
1405
|
+
createdAt: import_zod28.z.number()
|
|
1406
|
+
});
|
|
1407
|
+
var EvaluationLogSchema = import_zod28.z.object({
|
|
1408
|
+
runId: import_zod28.z.string(),
|
|
1409
|
+
scenarioId: import_zod28.z.string(),
|
|
1410
|
+
log: import_zod28.z.object({
|
|
1411
|
+
level: import_zod28.z.enum(["info", "error", "debug"]),
|
|
1412
|
+
message: import_zod28.z.string().optional(),
|
|
1413
|
+
args: import_zod28.z.array(import_zod28.z.any()).optional(),
|
|
1414
|
+
error: import_zod28.z.string().optional()
|
|
1348
1415
|
})
|
|
1349
1416
|
});
|
|
1350
1417
|
var LLM_TIMEOUT = 12e4;
|
|
1351
1418
|
|
|
1352
1419
|
// src/evaluation/conversation.ts
|
|
1353
|
-
var
|
|
1354
|
-
var TextBlockSchema =
|
|
1355
|
-
type:
|
|
1356
|
-
text:
|
|
1357
|
-
});
|
|
1358
|
-
var ThinkingBlockSchema =
|
|
1359
|
-
type:
|
|
1360
|
-
thinking:
|
|
1361
|
-
});
|
|
1362
|
-
var ToolUseBlockSchema =
|
|
1363
|
-
type:
|
|
1364
|
-
toolName:
|
|
1365
|
-
toolId:
|
|
1366
|
-
input:
|
|
1367
|
-
});
|
|
1368
|
-
var ToolResultBlockSchema =
|
|
1369
|
-
type:
|
|
1370
|
-
toolUseId:
|
|
1371
|
-
content:
|
|
1372
|
-
isError:
|
|
1373
|
-
});
|
|
1374
|
-
var ConversationBlockSchema =
|
|
1420
|
+
var import_zod29 = require("zod");
|
|
1421
|
+
var TextBlockSchema = import_zod29.z.object({
|
|
1422
|
+
type: import_zod29.z.literal("text"),
|
|
1423
|
+
text: import_zod29.z.string()
|
|
1424
|
+
});
|
|
1425
|
+
var ThinkingBlockSchema = import_zod29.z.object({
|
|
1426
|
+
type: import_zod29.z.literal("thinking"),
|
|
1427
|
+
thinking: import_zod29.z.string()
|
|
1428
|
+
});
|
|
1429
|
+
var ToolUseBlockSchema = import_zod29.z.object({
|
|
1430
|
+
type: import_zod29.z.literal("tool_use"),
|
|
1431
|
+
toolName: import_zod29.z.string(),
|
|
1432
|
+
toolId: import_zod29.z.string(),
|
|
1433
|
+
input: import_zod29.z.unknown()
|
|
1434
|
+
});
|
|
1435
|
+
var ToolResultBlockSchema = import_zod29.z.object({
|
|
1436
|
+
type: import_zod29.z.literal("tool_result"),
|
|
1437
|
+
toolUseId: import_zod29.z.string(),
|
|
1438
|
+
content: import_zod29.z.string(),
|
|
1439
|
+
isError: import_zod29.z.boolean().optional()
|
|
1440
|
+
});
|
|
1441
|
+
var ConversationBlockSchema = import_zod29.z.discriminatedUnion("type", [
|
|
1375
1442
|
TextBlockSchema,
|
|
1376
1443
|
ThinkingBlockSchema,
|
|
1377
1444
|
ToolUseBlockSchema,
|
|
@@ -1382,18 +1449,18 @@ var ConversationMessageRoles = [
|
|
|
1382
1449
|
"user",
|
|
1383
1450
|
"system"
|
|
1384
1451
|
];
|
|
1385
|
-
var ConversationMessageSchema =
|
|
1386
|
-
role:
|
|
1387
|
-
content:
|
|
1388
|
-
timestamp:
|
|
1452
|
+
var ConversationMessageSchema = import_zod29.z.object({
|
|
1453
|
+
role: import_zod29.z.enum(ConversationMessageRoles),
|
|
1454
|
+
content: import_zod29.z.array(ConversationBlockSchema),
|
|
1455
|
+
timestamp: import_zod29.z.string()
|
|
1389
1456
|
});
|
|
1390
|
-
var ScenarioConversationSchema =
|
|
1391
|
-
id:
|
|
1392
|
-
projectId:
|
|
1393
|
-
evalRunId:
|
|
1394
|
-
resultId:
|
|
1395
|
-
messages:
|
|
1396
|
-
createdAt:
|
|
1457
|
+
var ScenarioConversationSchema = import_zod29.z.object({
|
|
1458
|
+
id: import_zod29.z.string(),
|
|
1459
|
+
projectId: import_zod29.z.string(),
|
|
1460
|
+
evalRunId: import_zod29.z.string(),
|
|
1461
|
+
resultId: import_zod29.z.string(),
|
|
1462
|
+
messages: import_zod29.z.array(ConversationMessageSchema),
|
|
1463
|
+
createdAt: import_zod29.z.string()
|
|
1397
1464
|
});
|
|
1398
1465
|
|
|
1399
1466
|
// src/evaluation/eval-result.ts
|
|
@@ -1404,94 +1471,96 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
|
1404
1471
|
AssertionResultStatus2["ERROR"] = "error";
|
|
1405
1472
|
return AssertionResultStatus2;
|
|
1406
1473
|
})(AssertionResultStatus || {});
|
|
1407
|
-
var AssertionResultSchema =
|
|
1408
|
-
id:
|
|
1409
|
-
assertionId:
|
|
1410
|
-
assertionType:
|
|
1411
|
-
assertionName:
|
|
1412
|
-
status:
|
|
1413
|
-
message:
|
|
1414
|
-
expected:
|
|
1415
|
-
actual:
|
|
1416
|
-
duration:
|
|
1417
|
-
details:
|
|
1418
|
-
llmTraceSteps:
|
|
1419
|
-
});
|
|
1420
|
-
var EvalRunResultSchema =
|
|
1421
|
-
id:
|
|
1422
|
-
targetId:
|
|
1423
|
-
targetName:
|
|
1474
|
+
var AssertionResultSchema = import_zod30.z.object({
|
|
1475
|
+
id: import_zod30.z.string(),
|
|
1476
|
+
assertionId: import_zod30.z.string(),
|
|
1477
|
+
assertionType: import_zod30.z.string(),
|
|
1478
|
+
assertionName: import_zod30.z.string(),
|
|
1479
|
+
status: import_zod30.z.enum(AssertionResultStatus),
|
|
1480
|
+
message: import_zod30.z.string().optional(),
|
|
1481
|
+
expected: import_zod30.z.string().optional(),
|
|
1482
|
+
actual: import_zod30.z.string().optional(),
|
|
1483
|
+
duration: import_zod30.z.number().optional(),
|
|
1484
|
+
details: import_zod30.z.record(import_zod30.z.string(), import_zod30.z.unknown()).optional(),
|
|
1485
|
+
llmTraceSteps: import_zod30.z.array(LLMTraceStepSchema).optional()
|
|
1486
|
+
});
|
|
1487
|
+
var EvalRunResultSchema = import_zod30.z.object({
|
|
1488
|
+
id: import_zod30.z.string(),
|
|
1489
|
+
targetId: import_zod30.z.string(),
|
|
1490
|
+
targetName: import_zod30.z.string().optional(),
|
|
1424
1491
|
/** SkillVersion ID used for this evaluation (for version tracking) */
|
|
1425
|
-
skillVersionId:
|
|
1492
|
+
skillVersionId: import_zod30.z.string().optional(),
|
|
1426
1493
|
/** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
|
|
1427
|
-
skillVersion:
|
|
1428
|
-
scenarioId:
|
|
1429
|
-
scenarioName:
|
|
1494
|
+
skillVersion: import_zod30.z.string().optional(),
|
|
1495
|
+
scenarioId: import_zod30.z.string(),
|
|
1496
|
+
scenarioName: import_zod30.z.string(),
|
|
1430
1497
|
modelConfig: ModelConfigSchema.optional(),
|
|
1431
|
-
assertionResults:
|
|
1498
|
+
assertionResults: import_zod30.z.array(AssertionResultSchema),
|
|
1432
1499
|
metrics: EvalMetricsSchema.optional(),
|
|
1433
|
-
passed:
|
|
1434
|
-
failed:
|
|
1435
|
-
passRate:
|
|
1436
|
-
duration:
|
|
1437
|
-
outputText:
|
|
1438
|
-
files:
|
|
1439
|
-
fileDiffs:
|
|
1500
|
+
passed: import_zod30.z.number(),
|
|
1501
|
+
failed: import_zod30.z.number(),
|
|
1502
|
+
passRate: import_zod30.z.number(),
|
|
1503
|
+
duration: import_zod30.z.number(),
|
|
1504
|
+
outputText: import_zod30.z.string().optional(),
|
|
1505
|
+
files: import_zod30.z.array(ExpectedFileSchema).optional(),
|
|
1506
|
+
fileDiffs: import_zod30.z.array(DiffContentSchema).optional(),
|
|
1440
1507
|
/** Full template files after execution with status indicators */
|
|
1441
|
-
templateFiles:
|
|
1442
|
-
startedAt:
|
|
1443
|
-
completedAt:
|
|
1508
|
+
templateFiles: import_zod30.z.array(TemplateFileSchema).optional(),
|
|
1509
|
+
startedAt: import_zod30.z.string().optional(),
|
|
1510
|
+
completedAt: import_zod30.z.string().optional(),
|
|
1444
1511
|
llmTrace: LLMTraceSchema.optional(),
|
|
1445
1512
|
/** Full conversation messages (only present in transit; stripped before DB storage) */
|
|
1446
|
-
conversation:
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1513
|
+
conversation: import_zod30.z.array(ConversationMessageSchema).optional(),
|
|
1514
|
+
/** 0-based iteration index when a scenario is run multiple times within a single eval run */
|
|
1515
|
+
iterationIndex: import_zod30.z.number().int().min(0).optional()
|
|
1516
|
+
});
|
|
1517
|
+
var PromptResultSchema = import_zod30.z.object({
|
|
1518
|
+
text: import_zod30.z.string(),
|
|
1519
|
+
files: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
1520
|
+
finishReason: import_zod30.z.string().optional(),
|
|
1521
|
+
reasoning: import_zod30.z.string().optional(),
|
|
1522
|
+
reasoningDetails: import_zod30.z.unknown().optional(),
|
|
1523
|
+
toolCalls: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
1524
|
+
toolResults: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
1525
|
+
warnings: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
1526
|
+
sources: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
1527
|
+
steps: import_zod30.z.array(import_zod30.z.unknown()),
|
|
1528
|
+
generationTimeMs: import_zod30.z.number(),
|
|
1529
|
+
prompt: import_zod30.z.string(),
|
|
1530
|
+
systemPrompt: import_zod30.z.string(),
|
|
1531
|
+
usage: import_zod30.z.object({
|
|
1532
|
+
totalTokens: import_zod30.z.number().optional(),
|
|
1533
|
+
totalMicrocentsSpent: import_zod30.z.number().optional()
|
|
1465
1534
|
})
|
|
1466
1535
|
});
|
|
1467
|
-
var EvaluationResultSchema =
|
|
1468
|
-
id:
|
|
1469
|
-
runId:
|
|
1470
|
-
timestamp:
|
|
1536
|
+
var EvaluationResultSchema = import_zod30.z.object({
|
|
1537
|
+
id: import_zod30.z.string(),
|
|
1538
|
+
runId: import_zod30.z.string(),
|
|
1539
|
+
timestamp: import_zod30.z.number(),
|
|
1471
1540
|
promptResult: PromptResultSchema,
|
|
1472
|
-
testResults:
|
|
1473
|
-
tags:
|
|
1474
|
-
feedback:
|
|
1475
|
-
score:
|
|
1476
|
-
suiteId:
|
|
1477
|
-
});
|
|
1478
|
-
var LeanEvaluationResultSchema =
|
|
1479
|
-
id:
|
|
1480
|
-
runId:
|
|
1481
|
-
timestamp:
|
|
1482
|
-
tags:
|
|
1483
|
-
scenarioId:
|
|
1484
|
-
scenarioVersion:
|
|
1485
|
-
targetId:
|
|
1486
|
-
targetVersion:
|
|
1487
|
-
suiteId:
|
|
1488
|
-
score:
|
|
1489
|
-
time:
|
|
1490
|
-
microcentsSpent:
|
|
1541
|
+
testResults: import_zod30.z.array(import_zod30.z.unknown()),
|
|
1542
|
+
tags: import_zod30.z.array(import_zod30.z.string()).optional(),
|
|
1543
|
+
feedback: import_zod30.z.string().optional(),
|
|
1544
|
+
score: import_zod30.z.number(),
|
|
1545
|
+
suiteId: import_zod30.z.string().optional()
|
|
1546
|
+
});
|
|
1547
|
+
var LeanEvaluationResultSchema = import_zod30.z.object({
|
|
1548
|
+
id: import_zod30.z.string(),
|
|
1549
|
+
runId: import_zod30.z.string(),
|
|
1550
|
+
timestamp: import_zod30.z.number(),
|
|
1551
|
+
tags: import_zod30.z.array(import_zod30.z.string()).optional(),
|
|
1552
|
+
scenarioId: import_zod30.z.string(),
|
|
1553
|
+
scenarioVersion: import_zod30.z.number().optional(),
|
|
1554
|
+
targetId: import_zod30.z.string(),
|
|
1555
|
+
targetVersion: import_zod30.z.number().optional(),
|
|
1556
|
+
suiteId: import_zod30.z.string().optional(),
|
|
1557
|
+
score: import_zod30.z.number(),
|
|
1558
|
+
time: import_zod30.z.number().optional(),
|
|
1559
|
+
microcentsSpent: import_zod30.z.number().optional()
|
|
1491
1560
|
});
|
|
1492
1561
|
|
|
1493
1562
|
// src/evaluation/eval-run-folder.ts
|
|
1494
|
-
var
|
|
1563
|
+
var import_zod31 = require("zod");
|
|
1495
1564
|
var EvalRunFolderSchema = TenantEntitySchema.extend({});
|
|
1496
1565
|
var CreateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
|
|
1497
1566
|
id: true,
|
|
@@ -1505,26 +1574,26 @@ var UpdateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
|
|
|
1505
1574
|
updatedAt: true,
|
|
1506
1575
|
deleted: true
|
|
1507
1576
|
}).partial();
|
|
1508
|
-
var EvalRunFolderMembershipSchema =
|
|
1509
|
-
folderId:
|
|
1510
|
-
evalRunId:
|
|
1511
|
-
projectId:
|
|
1512
|
-
createdAt:
|
|
1577
|
+
var EvalRunFolderMembershipSchema = import_zod31.z.object({
|
|
1578
|
+
folderId: import_zod31.z.string(),
|
|
1579
|
+
evalRunId: import_zod31.z.string(),
|
|
1580
|
+
projectId: import_zod31.z.string(),
|
|
1581
|
+
createdAt: import_zod31.z.string()
|
|
1513
1582
|
});
|
|
1514
1583
|
|
|
1515
1584
|
// src/project/project.ts
|
|
1516
|
-
var
|
|
1585
|
+
var import_zod32 = require("zod");
|
|
1517
1586
|
var ProjectSchema = BaseEntitySchema.extend({
|
|
1518
|
-
appId:
|
|
1519
|
-
scenarioTags:
|
|
1587
|
+
appId: import_zod32.z.string().optional().describe("The ID of the app in Dev Center"),
|
|
1588
|
+
scenarioTags: import_zod32.z.array(import_zod32.z.string()).optional().describe("Project-level tag vocabulary for scenarios"),
|
|
1520
1589
|
/** Per-project Wix auth token (write-only — never returned in GET responses). null = clear. */
|
|
1521
|
-
wixAuthToken:
|
|
1590
|
+
wixAuthToken: import_zod32.z.string().nullable().optional().describe("Wix auth token for CLI/MCP authentication (encrypted at rest)"),
|
|
1522
1591
|
/** Per-project Base44 auth file content (write-only — never returned in GET responses). null = clear. */
|
|
1523
|
-
base44AuthFile:
|
|
1592
|
+
base44AuthFile: import_zod32.z.string().nullable().optional().describe("Base64-encoded Base44 auth file content (encrypted at rest)"),
|
|
1524
1593
|
/** Resolved at runtime from the encrypted Wix auth token */
|
|
1525
|
-
wixAuthEmail:
|
|
1594
|
+
wixAuthEmail: import_zod32.z.string().optional().describe("Email associated with the Wix auth token (resolved at runtime)"),
|
|
1526
1595
|
/** Resolved at runtime from the encrypted Base44 auth file */
|
|
1527
|
-
base44AuthEmail:
|
|
1596
|
+
base44AuthEmail: import_zod32.z.string().optional().describe("Email from the Base44 auth file (resolved at runtime)")
|
|
1528
1597
|
});
|
|
1529
1598
|
var CreateProjectInputSchema = ProjectSchema.omit({
|
|
1530
1599
|
id: true,
|
|
@@ -1550,7 +1619,7 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
|
|
|
1550
1619
|
var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
1551
1620
|
|
|
1552
1621
|
// src/schedule/eval-schedule.ts
|
|
1553
|
-
var
|
|
1622
|
+
var import_zod33 = require("zod");
|
|
1554
1623
|
var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
1555
1624
|
FrequencyType2["DAILY"] = "daily";
|
|
1556
1625
|
FrequencyType2["WEEKDAY"] = "weekday";
|
|
@@ -1560,29 +1629,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
|
1560
1629
|
})(FrequencyType || {});
|
|
1561
1630
|
var EvalScheduleSchema = TenantEntitySchema.extend({
|
|
1562
1631
|
/** Whether the schedule is active */
|
|
1563
|
-
enabled:
|
|
1632
|
+
enabled: import_zod33.z.boolean(),
|
|
1564
1633
|
/** Test suite to run */
|
|
1565
|
-
suiteId:
|
|
1634
|
+
suiteId: import_zod33.z.string(),
|
|
1566
1635
|
/** Preset that provides agent + entities for this schedule */
|
|
1567
|
-
presetId:
|
|
1636
|
+
presetId: import_zod33.z.string(),
|
|
1568
1637
|
/** How often to run */
|
|
1569
|
-
frequencyType:
|
|
1638
|
+
frequencyType: import_zod33.z.nativeEnum(FrequencyType),
|
|
1570
1639
|
/** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
|
|
1571
|
-
timeOfDay:
|
|
1640
|
+
timeOfDay: import_zod33.z.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
|
|
1572
1641
|
/** Day of week (0=Sun, 6=Sat) for weekly schedules */
|
|
1573
|
-
dayOfWeek:
|
|
1642
|
+
dayOfWeek: import_zod33.z.number().min(0).max(6).optional(),
|
|
1574
1643
|
/** Day of month (1-31) for monthly schedules */
|
|
1575
|
-
dayOfMonth:
|
|
1644
|
+
dayOfMonth: import_zod33.z.number().min(1).max(31).optional(),
|
|
1576
1645
|
/** IANA timezone (e.g., 'America/New_York') */
|
|
1577
|
-
timezone:
|
|
1646
|
+
timezone: import_zod33.z.string(),
|
|
1578
1647
|
/** ID of the last eval run created by this schedule */
|
|
1579
|
-
lastRunId:
|
|
1648
|
+
lastRunId: import_zod33.z.string().optional(),
|
|
1580
1649
|
/** Denormalized status of the last run */
|
|
1581
|
-
lastRunStatus:
|
|
1650
|
+
lastRunStatus: import_zod33.z.string().optional(),
|
|
1582
1651
|
/** ISO timestamp of the last run */
|
|
1583
|
-
lastRunAt:
|
|
1652
|
+
lastRunAt: import_zod33.z.string().optional(),
|
|
1584
1653
|
/** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
|
|
1585
|
-
nextRunAt:
|
|
1654
|
+
nextRunAt: import_zod33.z.string().optional()
|
|
1586
1655
|
});
|
|
1587
1656
|
function isValidTimezone(tz) {
|
|
1588
1657
|
try {
|
|
@@ -1595,14 +1664,14 @@ function isValidTimezone(tz) {
|
|
|
1595
1664
|
function validateScheduleFields(data, ctx, options) {
|
|
1596
1665
|
if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
|
|
1597
1666
|
ctx.addIssue({
|
|
1598
|
-
code:
|
|
1667
|
+
code: import_zod33.z.ZodIssueCode.custom,
|
|
1599
1668
|
message: "dayOfWeek is required for weekly schedules",
|
|
1600
1669
|
path: ["dayOfWeek"]
|
|
1601
1670
|
});
|
|
1602
1671
|
}
|
|
1603
1672
|
if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
|
|
1604
1673
|
ctx.addIssue({
|
|
1605
|
-
code:
|
|
1674
|
+
code: import_zod33.z.ZodIssueCode.custom,
|
|
1606
1675
|
message: "dayOfMonth is required for monthly schedules",
|
|
1607
1676
|
path: ["dayOfMonth"]
|
|
1608
1677
|
});
|
|
@@ -1610,7 +1679,7 @@ function validateScheduleFields(data, ctx, options) {
|
|
|
1610
1679
|
const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
|
|
1611
1680
|
if (shouldValidateTz && !isValidTimezone(data.timezone)) {
|
|
1612
1681
|
ctx.addIssue({
|
|
1613
|
-
code:
|
|
1682
|
+
code: import_zod33.z.ZodIssueCode.custom,
|
|
1614
1683
|
message: "Invalid IANA timezone",
|
|
1615
1684
|
path: ["timezone"]
|
|
1616
1685
|
});
|
|
@@ -1878,8 +1947,15 @@ function getSystemAssertion(id) {
|
|
|
1878
1947
|
AssertionResultStatus,
|
|
1879
1948
|
AssertionSchema,
|
|
1880
1949
|
AssertionTypeSchema,
|
|
1950
|
+
BATCH_IMPORT_LIMITS,
|
|
1881
1951
|
BaseEntitySchema,
|
|
1882
1952
|
BaseTestSchema,
|
|
1953
|
+
BatchAssertionLinkSchema,
|
|
1954
|
+
BatchImportPayloadSchema,
|
|
1955
|
+
BatchImportResponseSchema,
|
|
1956
|
+
BatchResultItemSchema,
|
|
1957
|
+
BatchScenarioEntrySchema,
|
|
1958
|
+
BatchSummarySchema,
|
|
1883
1959
|
BuildCheckTestSchema,
|
|
1884
1960
|
BuildPassedAssertionSchema,
|
|
1885
1961
|
BuildPassedConfigSchema,
|
|
@@ -2022,11 +2098,13 @@ function getSystemAssertion(id) {
|
|
|
2022
2098
|
UpdateTestScenarioInputSchema,
|
|
2023
2099
|
UpdateTestSuiteInputSchema,
|
|
2024
2100
|
VitestTestSchema,
|
|
2101
|
+
classifyAssertionRef,
|
|
2025
2102
|
formatTraceEventLine,
|
|
2026
2103
|
getSystemAssertion,
|
|
2027
2104
|
getSystemAssertions,
|
|
2028
2105
|
isSystemAssertionId,
|
|
2029
2106
|
isValidSkillFolderName,
|
|
2107
|
+
normalizeBatchAssertionLink,
|
|
2030
2108
|
normalizeModelId,
|
|
2031
2109
|
parseTraceEventLine,
|
|
2032
2110
|
validateAssertionConfig
|