@wix/evalforge-types 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +1047 -0
- package/build/index.js.map +7 -0
- package/build/index.mjs +928 -0
- package/build/index.mjs.map +7 -0
- package/build/types/common/base-entity.d.ts +26 -0
- package/build/types/common/index.d.ts +3 -0
- package/build/types/common/mcp.d.ts +17 -0
- package/build/types/common/models.d.ts +55 -0
- package/build/types/evaluation/eval-result.d.ts +239 -0
- package/build/types/evaluation/eval-run.d.ts +581 -0
- package/build/types/evaluation/index.d.ts +4 -0
- package/build/types/evaluation/live-trace.d.ts +47 -0
- package/build/types/evaluation/metrics.d.ts +157 -0
- package/build/types/improvement/improvement.d.ts +140 -0
- package/build/types/improvement/index.d.ts +1 -0
- package/build/types/index.d.ts +24 -0
- package/build/types/project/index.d.ts +1 -0
- package/build/types/project/project.d.ts +41 -0
- package/build/types/scenario/environment.d.ts +58 -0
- package/build/types/scenario/index.d.ts +2 -0
- package/build/types/scenario/test-scenario.d.ts +50 -0
- package/build/types/suite/index.d.ts +1 -0
- package/build/types/suite/test-suite.d.ts +37 -0
- package/build/types/target/agent.d.ts +53 -0
- package/build/types/target/index.d.ts +4 -0
- package/build/types/target/skill.d.ts +78 -0
- package/build/types/target/skills-group.d.ts +37 -0
- package/build/types/target/target.d.ts +17 -0
- package/build/types/template/index.d.ts +1 -0
- package/build/types/template/template.d.ts +38 -0
- package/build/types/test/base.d.ts +43 -0
- package/build/types/test/build-check.d.ts +29 -0
- package/build/types/test/command-execution.d.ts +31 -0
- package/build/types/test/file-content.d.ts +52 -0
- package/build/types/test/file-presence.d.ts +24 -0
- package/build/types/test/index.d.ts +124 -0
- package/build/types/test/llm.d.ts +36 -0
- package/build/types/test/playwright-nl.d.ts +28 -0
- package/build/types/test/site-config.d.ts +32 -0
- package/build/types/test/tool.d.ts +26 -0
- package/build/types/test/vitest.d.ts +30 -0
- package/package.json +50 -0
package/build/index.js
ADDED
|
@@ -0,0 +1,1047 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/index.ts
|
|
21
|
+
var index_exports = {};
|
|
22
|
+
__export(index_exports, {
|
|
23
|
+
AVAILABLE_MODELS: () => AVAILABLE_MODELS,
|
|
24
|
+
AVAILABLE_MODELS_MAP: () => AVAILABLE_MODELS_MAP,
|
|
25
|
+
AgentSchema: () => AgentSchema,
|
|
26
|
+
AllowedCommands: () => AllowedCommands,
|
|
27
|
+
ApiCallSchema: () => ApiCallSchema,
|
|
28
|
+
AssertionResultSchema: () => AssertionResultSchema,
|
|
29
|
+
AssertionResultStatus: () => AssertionResultStatus,
|
|
30
|
+
BaseEntitySchema: () => BaseEntitySchema,
|
|
31
|
+
BaseTestSchema: () => BaseTestSchema,
|
|
32
|
+
BuildCheckTestSchema: () => BuildCheckTestSchema,
|
|
33
|
+
CommandExecutionSchema: () => CommandExecutionSchema,
|
|
34
|
+
CommandExecutionTestSchema: () => CommandExecutionTestSchema,
|
|
35
|
+
CreateAgentInputSchema: () => CreateAgentInputSchema,
|
|
36
|
+
CreateEvalRunInputSchema: () => CreateEvalRunInputSchema,
|
|
37
|
+
CreateProjectInputSchema: () => CreateProjectInputSchema,
|
|
38
|
+
CreateSkillInputSchema: () => CreateSkillInputSchema,
|
|
39
|
+
CreateSkillsGroupInputSchema: () => CreateSkillsGroupInputSchema,
|
|
40
|
+
CreateTemplateInputSchema: () => CreateTemplateInputSchema,
|
|
41
|
+
CreateTestScenarioInputSchema: () => CreateTestScenarioInputSchema,
|
|
42
|
+
CreateTestSuiteInputSchema: () => CreateTestSuiteInputSchema,
|
|
43
|
+
DiffContentSchema: () => DiffContentSchema,
|
|
44
|
+
DiffLineSchema: () => DiffLineSchema,
|
|
45
|
+
DiffLineTypeSchema: () => DiffLineTypeSchema,
|
|
46
|
+
EnvironmentSchema: () => EnvironmentSchema,
|
|
47
|
+
EvalMetricsSchema: () => EvalMetricsSchema,
|
|
48
|
+
EvalRunResultSchema: () => EvalRunResultSchema,
|
|
49
|
+
EvalRunSchema: () => EvalRunSchema,
|
|
50
|
+
EvalStatus: () => EvalStatus,
|
|
51
|
+
EvalStatusSchema: () => EvalStatusSchema,
|
|
52
|
+
EvaluationLogSchema: () => EvaluationLogSchema,
|
|
53
|
+
EvaluationProgressSchema: () => EvaluationProgressSchema,
|
|
54
|
+
EvaluationResultSchema: () => EvaluationResultSchema,
|
|
55
|
+
ExecutionTraceSchema: () => ExecutionTraceSchema,
|
|
56
|
+
ExpectedFileSchema: () => ExpectedFileSchema,
|
|
57
|
+
FailureAnalysisSchema: () => FailureAnalysisSchema,
|
|
58
|
+
FailureCategory: () => FailureCategory,
|
|
59
|
+
FailureSeverity: () => FailureSeverity,
|
|
60
|
+
FileContentCheckSchema: () => FileContentCheckSchema,
|
|
61
|
+
FileContentTestSchema: () => FileContentTestSchema,
|
|
62
|
+
FileModificationSchema: () => FileModificationSchema,
|
|
63
|
+
FilePresenceTestSchema: () => FilePresenceTestSchema,
|
|
64
|
+
LLMBreakdownStatsSchema: () => LLMBreakdownStatsSchema,
|
|
65
|
+
LLMStepType: () => LLMStepType,
|
|
66
|
+
LLMTestSchema: () => LLMTestSchema,
|
|
67
|
+
LLMTraceSchema: () => LLMTraceSchema,
|
|
68
|
+
LLMTraceStepSchema: () => LLMTraceStepSchema,
|
|
69
|
+
LLMTraceSummarySchema: () => LLMTraceSummarySchema,
|
|
70
|
+
LLM_TIMEOUT: () => LLM_TIMEOUT,
|
|
71
|
+
LeanEvaluationResultSchema: () => LeanEvaluationResultSchema,
|
|
72
|
+
LiveTraceEventSchema: () => LiveTraceEventSchema,
|
|
73
|
+
LiveTraceEventType: () => LiveTraceEventType,
|
|
74
|
+
LocalProjectConfigSchema: () => LocalProjectConfigSchema,
|
|
75
|
+
MCPServerConfigSchema: () => MCPServerConfigSchema,
|
|
76
|
+
MetaSiteConfigSchema: () => MetaSiteConfigSchema,
|
|
77
|
+
ModelConfigSchema: () => ModelConfigSchema,
|
|
78
|
+
ModelIds: () => ModelIds,
|
|
79
|
+
ModelIdsSchema: () => ModelIdsSchema,
|
|
80
|
+
ModelPricingSchema: () => ModelPricingSchema,
|
|
81
|
+
ModelSchema: () => ModelSchema,
|
|
82
|
+
PlaywrightNLTestSchema: () => PlaywrightNLTestSchema,
|
|
83
|
+
ProjectSchema: () => ProjectSchema,
|
|
84
|
+
PromptResultSchema: () => PromptResultSchema,
|
|
85
|
+
SiteConfigTestSchema: () => SiteConfigTestSchema,
|
|
86
|
+
SkillMetadataSchema: () => SkillMetadataSchema,
|
|
87
|
+
SkillSchema: () => SkillSchema,
|
|
88
|
+
SkillVersionSchema: () => SkillVersionSchema,
|
|
89
|
+
SkillsGroupSchema: () => SkillsGroupSchema,
|
|
90
|
+
TRACE_EVENT_PREFIX: () => TRACE_EVENT_PREFIX,
|
|
91
|
+
TargetSchema: () => TargetSchema,
|
|
92
|
+
TemplateSchema: () => TemplateSchema,
|
|
93
|
+
TenantEntitySchema: () => TenantEntitySchema,
|
|
94
|
+
TestImportance: () => TestImportance,
|
|
95
|
+
TestImportanceSchema: () => TestImportanceSchema,
|
|
96
|
+
TestScenarioSchema: () => TestScenarioSchema,
|
|
97
|
+
TestSchema: () => TestSchema,
|
|
98
|
+
TestSuiteSchema: () => TestSuiteSchema,
|
|
99
|
+
TestType: () => TestType,
|
|
100
|
+
TestTypeSchema: () => TestTypeSchema,
|
|
101
|
+
TokenUsageSchema: () => TokenUsageSchema,
|
|
102
|
+
ToolTestSchema: () => ToolTestSchema,
|
|
103
|
+
TriggerMetadataSchema: () => TriggerMetadataSchema,
|
|
104
|
+
TriggerSchema: () => TriggerSchema,
|
|
105
|
+
TriggerType: () => TriggerType,
|
|
106
|
+
UpdateAgentInputSchema: () => UpdateAgentInputSchema,
|
|
107
|
+
UpdateProjectInputSchema: () => UpdateProjectInputSchema,
|
|
108
|
+
UpdateSkillInputSchema: () => UpdateSkillInputSchema,
|
|
109
|
+
UpdateSkillsGroupInputSchema: () => UpdateSkillsGroupInputSchema,
|
|
110
|
+
UpdateTemplateInputSchema: () => UpdateTemplateInputSchema,
|
|
111
|
+
UpdateTestScenarioInputSchema: () => UpdateTestScenarioInputSchema,
|
|
112
|
+
UpdateTestSuiteInputSchema: () => UpdateTestSuiteInputSchema,
|
|
113
|
+
VitestTestSchema: () => VitestTestSchema,
|
|
114
|
+
formatTraceEventLine: () => formatTraceEventLine,
|
|
115
|
+
parseTraceEventLine: () => parseTraceEventLine
|
|
116
|
+
});
|
|
117
|
+
module.exports = __toCommonJS(index_exports);
|
|
118
|
+
|
|
119
|
+
// src/common/base-entity.ts
|
|
120
|
+
var import_zod = require("zod");
|
|
121
|
+
var BaseEntitySchema = import_zod.z.object({
|
|
122
|
+
id: import_zod.z.string(),
|
|
123
|
+
name: import_zod.z.string().min(1),
|
|
124
|
+
description: import_zod.z.string(),
|
|
125
|
+
createdAt: import_zod.z.string(),
|
|
126
|
+
updatedAt: import_zod.z.string(),
|
|
127
|
+
deleted: import_zod.z.boolean().optional()
|
|
128
|
+
});
|
|
129
|
+
var TenantEntitySchema = BaseEntitySchema.extend({
|
|
130
|
+
projectId: import_zod.z.string()
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
// src/common/mcp.ts
|
|
134
|
+
var import_zod2 = require("zod");
|
|
135
|
+
var MCPServerConfigSchema = import_zod2.z.object({
|
|
136
|
+
/** Unique name for this MCP server */
|
|
137
|
+
name: import_zod2.z.string(),
|
|
138
|
+
/** Command to start the MCP server */
|
|
139
|
+
command: import_zod2.z.string(),
|
|
140
|
+
/** Command line arguments */
|
|
141
|
+
args: import_zod2.z.array(import_zod2.z.string()).optional(),
|
|
142
|
+
/** Environment variables for the server process */
|
|
143
|
+
envVars: import_zod2.z.record(import_zod2.z.string(), import_zod2.z.string()).optional(),
|
|
144
|
+
/** Tools to disable for this MCP server */
|
|
145
|
+
disabledTools: import_zod2.z.array(import_zod2.z.string()).optional()
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
// src/common/models.ts
|
|
149
|
+
var import_zod3 = require("zod");
|
|
150
|
+
var ModelIds = /* @__PURE__ */ ((ModelIds2) => {
|
|
151
|
+
ModelIds2["CLAUDE_3_HAIKU_1_0"] = "CLAUDE_3_HAIKU_1_0";
|
|
152
|
+
ModelIds2["CLAUDE_3_OPUS_1_0"] = "CLAUDE_3_OPUS_1_0";
|
|
153
|
+
ModelIds2["CLAUDE_3_SONNET_1_0"] = "CLAUDE_3_SONNET_1_0";
|
|
154
|
+
ModelIds2["CLAUDE_3_5_SONNET_1_0"] = "CLAUDE_3_5_SONNET_1_0";
|
|
155
|
+
ModelIds2["CLAUDE_3_5_SONNET_2_0"] = "CLAUDE_3_5_SONNET_2_0";
|
|
156
|
+
ModelIds2["CLAUDE_3_7_SONNET_1_0"] = "CLAUDE_3_7_SONNET_1_0";
|
|
157
|
+
ModelIds2["CLAUDE_4_OPUS_1_0"] = "CLAUDE_4_OPUS_1_0";
|
|
158
|
+
ModelIds2["CLAUDE_4_SONNET_1_0"] = "CLAUDE_4_SONNET_1_0";
|
|
159
|
+
return ModelIds2;
|
|
160
|
+
})(ModelIds || {});
|
|
161
|
+
var ModelIdsSchema = import_zod3.z.enum(ModelIds);
|
|
162
|
+
var ModelConfigSchema = import_zod3.z.object({
|
|
163
|
+
model: ModelIdsSchema,
|
|
164
|
+
temperature: import_zod3.z.number().min(0).max(1).optional(),
|
|
165
|
+
maxTokens: import_zod3.z.number().min(1).optional()
|
|
166
|
+
});
|
|
167
|
+
var ModelPricingSchema = import_zod3.z.object({
|
|
168
|
+
inputPer1M: import_zod3.z.number(),
|
|
169
|
+
outputPer1M: import_zod3.z.number()
|
|
170
|
+
});
|
|
171
|
+
var ModelSchema = import_zod3.z.object({
|
|
172
|
+
/** AI Gateway model ID */
|
|
173
|
+
id: ModelIdsSchema,
|
|
174
|
+
/** Display name */
|
|
175
|
+
name: import_zod3.z.string(),
|
|
176
|
+
/** Provider (always 'anthropic') */
|
|
177
|
+
provider: import_zod3.z.literal("anthropic"),
|
|
178
|
+
/** Provider's model identifier (e.g., "claude-3-5-sonnet-20241022") */
|
|
179
|
+
providerModelId: import_zod3.z.string(),
|
|
180
|
+
/** Pricing per 1M tokens */
|
|
181
|
+
pricing: ModelPricingSchema
|
|
182
|
+
});
|
|
183
|
+
var AVAILABLE_MODELS = [
|
|
184
|
+
{
|
|
185
|
+
id: "CLAUDE_4_SONNET_1_0" /* CLAUDE_4_SONNET_1_0 */,
|
|
186
|
+
name: "Claude 4 Sonnet",
|
|
187
|
+
provider: "anthropic",
|
|
188
|
+
providerModelId: "claude-4-sonnet",
|
|
189
|
+
pricing: { inputPer1M: 3, outputPer1M: 15 }
|
|
190
|
+
},
|
|
191
|
+
{
|
|
192
|
+
id: "CLAUDE_4_OPUS_1_0" /* CLAUDE_4_OPUS_1_0 */,
|
|
193
|
+
name: "Claude 4 Opus",
|
|
194
|
+
provider: "anthropic",
|
|
195
|
+
providerModelId: "claude-4-opus",
|
|
196
|
+
pricing: { inputPer1M: 15, outputPer1M: 75 }
|
|
197
|
+
},
|
|
198
|
+
{
|
|
199
|
+
id: "CLAUDE_3_7_SONNET_1_0" /* CLAUDE_3_7_SONNET_1_0 */,
|
|
200
|
+
name: "Claude 3.7 Sonnet",
|
|
201
|
+
provider: "anthropic",
|
|
202
|
+
providerModelId: "claude-3-7-sonnet",
|
|
203
|
+
pricing: { inputPer1M: 3, outputPer1M: 15 }
|
|
204
|
+
},
|
|
205
|
+
{
|
|
206
|
+
id: "CLAUDE_3_5_SONNET_2_0" /* CLAUDE_3_5_SONNET_2_0 */,
|
|
207
|
+
name: "Claude 3.5 Sonnet v2",
|
|
208
|
+
provider: "anthropic",
|
|
209
|
+
providerModelId: "claude-3-5-sonnet-20241022",
|
|
210
|
+
pricing: { inputPer1M: 3, outputPer1M: 15 }
|
|
211
|
+
},
|
|
212
|
+
{
|
|
213
|
+
id: "CLAUDE_3_5_SONNET_1_0" /* CLAUDE_3_5_SONNET_1_0 */,
|
|
214
|
+
name: "Claude 3.5 Sonnet",
|
|
215
|
+
provider: "anthropic",
|
|
216
|
+
providerModelId: "claude-3-5-sonnet-20240620",
|
|
217
|
+
pricing: { inputPer1M: 3, outputPer1M: 15 }
|
|
218
|
+
},
|
|
219
|
+
{
|
|
220
|
+
id: "CLAUDE_3_OPUS_1_0" /* CLAUDE_3_OPUS_1_0 */,
|
|
221
|
+
name: "Claude 3 Opus",
|
|
222
|
+
provider: "anthropic",
|
|
223
|
+
providerModelId: "claude-3-opus-20240229",
|
|
224
|
+
pricing: { inputPer1M: 15, outputPer1M: 75 }
|
|
225
|
+
},
|
|
226
|
+
{
|
|
227
|
+
id: "CLAUDE_3_SONNET_1_0" /* CLAUDE_3_SONNET_1_0 */,
|
|
228
|
+
name: "Claude 3 Sonnet",
|
|
229
|
+
provider: "anthropic",
|
|
230
|
+
providerModelId: "claude-3-sonnet-20240229",
|
|
231
|
+
pricing: { inputPer1M: 3, outputPer1M: 15 }
|
|
232
|
+
},
|
|
233
|
+
{
|
|
234
|
+
id: "CLAUDE_3_HAIKU_1_0" /* CLAUDE_3_HAIKU_1_0 */,
|
|
235
|
+
name: "Claude 3 Haiku",
|
|
236
|
+
provider: "anthropic",
|
|
237
|
+
providerModelId: "claude-3-haiku-20240307",
|
|
238
|
+
pricing: { inputPer1M: 0.25, outputPer1M: 1.25 }
|
|
239
|
+
}
|
|
240
|
+
];
|
|
241
|
+
var AVAILABLE_MODELS_MAP = Object.fromEntries(
|
|
242
|
+
AVAILABLE_MODELS.map((model) => [model.id, model])
|
|
243
|
+
);
|
|
244
|
+
|
|
245
|
+
// src/target/target.ts
|
|
246
|
+
var TargetSchema = TenantEntitySchema.extend({
|
|
247
|
+
// Base for all testable entities
|
|
248
|
+
// Specific targets add their own fields
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
// src/target/agent.ts
|
|
252
|
+
var import_zod4 = require("zod");
|
|
253
|
+
var AgentSchema = TargetSchema.extend({
|
|
254
|
+
/** Command to run the agent */
|
|
255
|
+
runCommand: import_zod4.z.string(),
|
|
256
|
+
/** Optional model configuration override */
|
|
257
|
+
modelConfig: ModelConfigSchema.optional()
|
|
258
|
+
});
|
|
259
|
+
var CreateAgentInputSchema = AgentSchema.omit({
|
|
260
|
+
id: true,
|
|
261
|
+
createdAt: true,
|
|
262
|
+
updatedAt: true,
|
|
263
|
+
deleted: true
|
|
264
|
+
});
|
|
265
|
+
var UpdateAgentInputSchema = CreateAgentInputSchema.partial();
|
|
266
|
+
|
|
267
|
+
// src/target/skill.ts
|
|
268
|
+
var import_zod5 = require("zod");
|
|
269
|
+
var SkillMetadataSchema = import_zod5.z.object({
|
|
270
|
+
name: import_zod5.z.string(),
|
|
271
|
+
description: import_zod5.z.string(),
|
|
272
|
+
allowedTools: import_zod5.z.array(import_zod5.z.string()).optional(),
|
|
273
|
+
skills: import_zod5.z.array(import_zod5.z.string()).optional()
|
|
274
|
+
});
|
|
275
|
+
var SkillVersionSchema = import_zod5.z.object({
|
|
276
|
+
id: import_zod5.z.string(),
|
|
277
|
+
skillId: import_zod5.z.string(),
|
|
278
|
+
skillMd: import_zod5.z.string(),
|
|
279
|
+
metadata: SkillMetadataSchema,
|
|
280
|
+
model: ModelConfigSchema.optional(),
|
|
281
|
+
systemPrompt: import_zod5.z.string().optional(),
|
|
282
|
+
version: import_zod5.z.number(),
|
|
283
|
+
createdAt: import_zod5.z.string(),
|
|
284
|
+
notes: import_zod5.z.string().optional()
|
|
285
|
+
});
|
|
286
|
+
var SkillSchema = TargetSchema.extend({
|
|
287
|
+
/** The current SKILL.md content */
|
|
288
|
+
skillMd: import_zod5.z.string()
|
|
289
|
+
});
|
|
290
|
+
var CreateSkillInputSchema = SkillSchema.omit({
|
|
291
|
+
id: true,
|
|
292
|
+
createdAt: true,
|
|
293
|
+
updatedAt: true,
|
|
294
|
+
deleted: true
|
|
295
|
+
});
|
|
296
|
+
var UpdateSkillInputSchema = CreateSkillInputSchema.partial();
|
|
297
|
+
|
|
298
|
+
// src/target/skills-group.ts
|
|
299
|
+
var import_zod6 = require("zod");
|
|
300
|
+
var SkillsGroupSchema = TenantEntitySchema.extend({
|
|
301
|
+
/** IDs of skills in this group */
|
|
302
|
+
skillIds: import_zod6.z.array(import_zod6.z.string())
|
|
303
|
+
});
|
|
304
|
+
var CreateSkillsGroupInputSchema = SkillsGroupSchema.omit({
|
|
305
|
+
id: true,
|
|
306
|
+
createdAt: true,
|
|
307
|
+
updatedAt: true,
|
|
308
|
+
deleted: true
|
|
309
|
+
});
|
|
310
|
+
var UpdateSkillsGroupInputSchema = CreateSkillsGroupInputSchema.partial();
|
|
311
|
+
|
|
312
|
+
// src/test/index.ts
|
|
313
|
+
var import_zod17 = require("zod");
|
|
314
|
+
|
|
315
|
+
// src/test/base.ts
|
|
316
|
+
var import_zod7 = require("zod");
|
|
317
|
+
var TestType = /* @__PURE__ */ ((TestType2) => {
|
|
318
|
+
TestType2["LLM"] = "LLM";
|
|
319
|
+
TestType2["TOOL"] = "TOOL";
|
|
320
|
+
TestType2["SITE_CONFIG"] = "SITE_CONFIG";
|
|
321
|
+
TestType2["COMMAND_EXECUTION"] = "COMMAND_EXECUTION";
|
|
322
|
+
TestType2["FILE_PRESENCE"] = "FILE_PRESENCE";
|
|
323
|
+
TestType2["FILE_CONTENT"] = "FILE_CONTENT";
|
|
324
|
+
TestType2["BUILD_CHECK"] = "BUILD_CHECK";
|
|
325
|
+
TestType2["VITEST"] = "VITEST";
|
|
326
|
+
TestType2["PLAYWRIGHT_NL"] = "PLAYWRIGHT_NL";
|
|
327
|
+
return TestType2;
|
|
328
|
+
})(TestType || {});
|
|
329
|
+
var TestTypeSchema = import_zod7.z.enum(TestType);
|
|
330
|
+
var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
|
|
331
|
+
TestImportance2["LOW"] = "low";
|
|
332
|
+
TestImportance2["MEDIUM"] = "medium";
|
|
333
|
+
TestImportance2["HIGH"] = "high";
|
|
334
|
+
TestImportance2["CRITICAL"] = "critical";
|
|
335
|
+
return TestImportance2;
|
|
336
|
+
})(TestImportance || {});
|
|
337
|
+
var TestImportanceSchema = import_zod7.z.enum(TestImportance);
|
|
338
|
+
var BaseTestSchema = import_zod7.z.object({
|
|
339
|
+
id: import_zod7.z.string(),
|
|
340
|
+
type: TestTypeSchema,
|
|
341
|
+
name: import_zod7.z.string().min(3),
|
|
342
|
+
description: import_zod7.z.string().optional(),
|
|
343
|
+
importance: TestImportanceSchema.optional()
|
|
344
|
+
});
|
|
345
|
+
|
|
346
|
+
// src/test/llm.ts
|
|
347
|
+
var import_zod8 = require("zod");
|
|
348
|
+
var LLMTestSchema = BaseTestSchema.extend({
|
|
349
|
+
type: import_zod8.z.literal("LLM" /* LLM */),
|
|
350
|
+
/** Maximum steps for the LLM to take */
|
|
351
|
+
maxSteps: import_zod8.z.number().min(1).max(100),
|
|
352
|
+
/** Prompt to send to the evaluator */
|
|
353
|
+
prompt: import_zod8.z.string().min(1),
|
|
354
|
+
/** ID of the evaluator agent to use */
|
|
355
|
+
evaluatorId: import_zod8.z.string()
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
// src/test/tool.ts
|
|
359
|
+
var import_zod9 = require("zod");
|
|
360
|
+
var ToolTestSchema = BaseTestSchema.extend({
|
|
361
|
+
type: import_zod9.z.literal("TOOL" /* TOOL */),
|
|
362
|
+
/** Name of the tool that should be called */
|
|
363
|
+
toolName: import_zod9.z.string().min(3),
|
|
364
|
+
/** Expected arguments for the tool call */
|
|
365
|
+
args: import_zod9.z.record(import_zod9.z.string(), import_zod9.z.any()),
|
|
366
|
+
/** Expected content in the tool results */
|
|
367
|
+
resultsContent: import_zod9.z.string()
|
|
368
|
+
});
|
|
369
|
+
|
|
370
|
+
// src/test/site-config.ts
|
|
371
|
+
var import_zod10 = require("zod");
|
|
372
|
+
var SiteConfigTestSchema = BaseTestSchema.extend({
|
|
373
|
+
type: import_zod10.z.literal("SITE_CONFIG" /* SITE_CONFIG */),
|
|
374
|
+
/** URL to call */
|
|
375
|
+
url: import_zod10.z.string().url(),
|
|
376
|
+
/** HTTP method */
|
|
377
|
+
method: import_zod10.z.enum(["GET", "POST"]),
|
|
378
|
+
/** Request body (for POST) */
|
|
379
|
+
body: import_zod10.z.string().optional(),
|
|
380
|
+
/** Expected HTTP status code */
|
|
381
|
+
expectedStatusCode: import_zod10.z.number().int().min(100).max(599),
|
|
382
|
+
/** Expected response content */
|
|
383
|
+
expectedResponse: import_zod10.z.string().optional(),
|
|
384
|
+
/** JMESPath expression to extract from response */
|
|
385
|
+
expectedResponseJMESPath: import_zod10.z.string().optional()
|
|
386
|
+
});
|
|
387
|
+
|
|
388
|
+
// src/test/command-execution.ts
|
|
389
|
+
var import_zod11 = require("zod");
|
|
390
|
+
var AllowedCommands = [
|
|
391
|
+
"yarn install --no-immutable && yarn build",
|
|
392
|
+
"npm run build",
|
|
393
|
+
"yarn typecheck"
|
|
394
|
+
];
|
|
395
|
+
var CommandExecutionTestSchema = BaseTestSchema.extend({
|
|
396
|
+
type: import_zod11.z.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
|
|
397
|
+
/** Command to execute (must be in AllowedCommands) */
|
|
398
|
+
command: import_zod11.z.string().refine((value) => AllowedCommands.includes(value), {
|
|
399
|
+
message: `Command must be one of: ${AllowedCommands.join(", ")}`
|
|
400
|
+
}),
|
|
401
|
+
/** Expected exit code (default: 0) */
|
|
402
|
+
expectedExitCode: import_zod11.z.number().default(0).optional()
|
|
403
|
+
});
|
|
404
|
+
|
|
405
|
+
// src/test/file-presence.ts
|
|
406
|
+
var import_zod12 = require("zod");
|
|
407
|
+
var FilePresenceTestSchema = BaseTestSchema.extend({
|
|
408
|
+
type: import_zod12.z.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
|
|
409
|
+
/** Paths to check */
|
|
410
|
+
paths: import_zod12.z.array(import_zod12.z.string()),
|
|
411
|
+
/** Whether files should exist (true) or not exist (false) */
|
|
412
|
+
shouldExist: import_zod12.z.boolean()
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
// src/test/file-content.ts
|
|
416
|
+
var import_zod13 = require("zod");
|
|
417
|
+
var FileContentCheckSchema = import_zod13.z.object({
|
|
418
|
+
/** Strings that must be present in the file */
|
|
419
|
+
contains: import_zod13.z.array(import_zod13.z.string()).optional(),
|
|
420
|
+
/** Strings that must NOT be present in the file */
|
|
421
|
+
notContains: import_zod13.z.array(import_zod13.z.string()).optional(),
|
|
422
|
+
/** Regex pattern the content must match */
|
|
423
|
+
matches: import_zod13.z.string().optional(),
|
|
424
|
+
/** JSON path checks for structured content */
|
|
425
|
+
jsonPath: import_zod13.z.array(
|
|
426
|
+
import_zod13.z.object({
|
|
427
|
+
path: import_zod13.z.string(),
|
|
428
|
+
value: import_zod13.z.unknown()
|
|
429
|
+
})
|
|
430
|
+
).optional(),
|
|
431
|
+
/** Lines that should be added (for diff checking) */
|
|
432
|
+
added: import_zod13.z.array(import_zod13.z.string()).optional(),
|
|
433
|
+
/** Lines that should be removed (for diff checking) */
|
|
434
|
+
removed: import_zod13.z.array(import_zod13.z.string()).optional()
|
|
435
|
+
});
|
|
436
|
+
var FileContentTestSchema = BaseTestSchema.extend({
|
|
437
|
+
type: import_zod13.z.literal("FILE_CONTENT" /* FILE_CONTENT */),
|
|
438
|
+
/** Path to the file to check */
|
|
439
|
+
path: import_zod13.z.string(),
|
|
440
|
+
/** Content checks to perform */
|
|
441
|
+
checks: FileContentCheckSchema
|
|
442
|
+
});
|
|
443
|
+
|
|
444
|
+
// src/test/build-check.ts
|
|
445
|
+
var import_zod14 = require("zod");
|
|
446
|
+
var BuildCheckTestSchema = BaseTestSchema.extend({
|
|
447
|
+
type: import_zod14.z.literal("BUILD_CHECK" /* BUILD_CHECK */),
|
|
448
|
+
/** Build command to execute */
|
|
449
|
+
command: import_zod14.z.string(),
|
|
450
|
+
/** Whether the build should succeed */
|
|
451
|
+
expectSuccess: import_zod14.z.boolean(),
|
|
452
|
+
/** Maximum allowed warnings (optional) */
|
|
453
|
+
allowedWarnings: import_zod14.z.number().optional(),
|
|
454
|
+
/** Timeout in milliseconds */
|
|
455
|
+
timeout: import_zod14.z.number().optional()
|
|
456
|
+
});
|
|
457
|
+
|
|
458
|
+
// src/test/vitest.ts
|
|
459
|
+
var import_zod15 = require("zod");
|
|
460
|
+
var VitestTestSchema = BaseTestSchema.extend({
|
|
461
|
+
type: import_zod15.z.literal("VITEST" /* VITEST */),
|
|
462
|
+
/** Test file content */
|
|
463
|
+
testFile: import_zod15.z.string(),
|
|
464
|
+
/** Name of the test file */
|
|
465
|
+
testFileName: import_zod15.z.string(),
|
|
466
|
+
/** Minimum pass rate required (0-100) */
|
|
467
|
+
minPassRate: import_zod15.z.number().min(0).max(100)
|
|
468
|
+
});
|
|
469
|
+
|
|
470
|
+
// src/test/playwright-nl.ts
|
|
471
|
+
var import_zod16 = require("zod");
|
|
472
|
+
var PlaywrightNLTestSchema = BaseTestSchema.extend({
|
|
473
|
+
type: import_zod16.z.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
|
|
474
|
+
/** Natural language steps to execute */
|
|
475
|
+
steps: import_zod16.z.array(import_zod16.z.string()),
|
|
476
|
+
/** Expected outcome description */
|
|
477
|
+
expectedOutcome: import_zod16.z.string(),
|
|
478
|
+
/** Timeout in milliseconds */
|
|
479
|
+
timeout: import_zod16.z.number().optional()
|
|
480
|
+
});
|
|
481
|
+
|
|
482
|
+
// src/test/index.ts
|
|
483
|
+
var TestSchema = import_zod17.z.discriminatedUnion("type", [
|
|
484
|
+
LLMTestSchema,
|
|
485
|
+
ToolTestSchema,
|
|
486
|
+
SiteConfigTestSchema,
|
|
487
|
+
CommandExecutionTestSchema,
|
|
488
|
+
FilePresenceTestSchema,
|
|
489
|
+
FileContentTestSchema,
|
|
490
|
+
BuildCheckTestSchema,
|
|
491
|
+
VitestTestSchema,
|
|
492
|
+
PlaywrightNLTestSchema
|
|
493
|
+
]);
|
|
494
|
+
|
|
495
|
+
// src/scenario/environment.ts
|
|
496
|
+
var import_zod18 = require("zod");
|
|
497
|
+
var LocalProjectConfigSchema = import_zod18.z.object({
|
|
498
|
+
/** Template ID to use for the local project */
|
|
499
|
+
templateId: import_zod18.z.string().optional(),
|
|
500
|
+
/** Files to create in the project */
|
|
501
|
+
files: import_zod18.z.array(
|
|
502
|
+
import_zod18.z.object({
|
|
503
|
+
path: import_zod18.z.string().min(1),
|
|
504
|
+
content: import_zod18.z.string().min(1)
|
|
505
|
+
})
|
|
506
|
+
).optional()
|
|
507
|
+
});
|
|
508
|
+
var MetaSiteConfigSchema = import_zod18.z.object({
|
|
509
|
+
configurations: import_zod18.z.array(
|
|
510
|
+
import_zod18.z.object({
|
|
511
|
+
name: import_zod18.z.string().min(1),
|
|
512
|
+
apiCalls: import_zod18.z.array(
|
|
513
|
+
import_zod18.z.object({
|
|
514
|
+
url: import_zod18.z.string().url(),
|
|
515
|
+
method: import_zod18.z.enum(["POST", "PUT"]),
|
|
516
|
+
body: import_zod18.z.string()
|
|
517
|
+
})
|
|
518
|
+
)
|
|
519
|
+
})
|
|
520
|
+
).optional()
|
|
521
|
+
});
|
|
522
|
+
var EnvironmentSchema = import_zod18.z.object({
|
|
523
|
+
/** Local project configuration */
|
|
524
|
+
localProject: LocalProjectConfigSchema.optional(),
|
|
525
|
+
/** Meta site configuration */
|
|
526
|
+
metaSite: MetaSiteConfigSchema.optional()
|
|
527
|
+
});
|
|
528
|
+
|
|
529
|
+
// src/scenario/test-scenario.ts
|
|
530
|
+
var import_zod19 = require("zod");
|
|
531
|
+
var ExpectedFileSchema = import_zod19.z.object({
|
|
532
|
+
/** Relative path where the file should be created */
|
|
533
|
+
path: import_zod19.z.string(),
|
|
534
|
+
/** Optional expected content */
|
|
535
|
+
content: import_zod19.z.string().optional()
|
|
536
|
+
});
|
|
537
|
+
var TestScenarioSchema = TenantEntitySchema.extend({
|
|
538
|
+
/** The prompt sent to the agent to trigger the task */
|
|
539
|
+
triggerPrompt: import_zod19.z.string().min(10),
|
|
540
|
+
/** ID of the template to use for this scenario */
|
|
541
|
+
templateId: import_zod19.z.string().optional()
|
|
542
|
+
});
|
|
543
|
+
var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
|
|
544
|
+
id: true,
|
|
545
|
+
createdAt: true,
|
|
546
|
+
updatedAt: true,
|
|
547
|
+
deleted: true
|
|
548
|
+
});
|
|
549
|
+
var UpdateTestScenarioInputSchema = CreateTestScenarioInputSchema.partial();
|
|
550
|
+
|
|
551
|
+
// src/suite/test-suite.ts
|
|
552
|
+
var import_zod20 = require("zod");
|
|
553
|
+
var TestSuiteSchema = TenantEntitySchema.extend({
|
|
554
|
+
/** IDs of test scenarios in this suite */
|
|
555
|
+
scenarioIds: import_zod20.z.array(import_zod20.z.string())
|
|
556
|
+
});
|
|
557
|
+
var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
558
|
+
id: true,
|
|
559
|
+
createdAt: true,
|
|
560
|
+
updatedAt: true,
|
|
561
|
+
deleted: true
|
|
562
|
+
});
|
|
563
|
+
var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
|
|
564
|
+
|
|
565
|
+
// src/evaluation/metrics.ts
|
|
566
|
+
var import_zod21 = require("zod");
|
|
567
|
+
var TokenUsageSchema = import_zod21.z.object({
|
|
568
|
+
prompt: import_zod21.z.number(),
|
|
569
|
+
completion: import_zod21.z.number(),
|
|
570
|
+
total: import_zod21.z.number()
|
|
571
|
+
});
|
|
572
|
+
var EvalMetricsSchema = import_zod21.z.object({
|
|
573
|
+
totalAssertions: import_zod21.z.number(),
|
|
574
|
+
passed: import_zod21.z.number(),
|
|
575
|
+
failed: import_zod21.z.number(),
|
|
576
|
+
skipped: import_zod21.z.number(),
|
|
577
|
+
errors: import_zod21.z.number(),
|
|
578
|
+
passRate: import_zod21.z.number(),
|
|
579
|
+
avgDuration: import_zod21.z.number(),
|
|
580
|
+
totalDuration: import_zod21.z.number()
|
|
581
|
+
});
|
|
582
|
+
var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
583
|
+
EvalStatus2["PENDING"] = "pending";
|
|
584
|
+
EvalStatus2["RUNNING"] = "running";
|
|
585
|
+
EvalStatus2["COMPLETED"] = "completed";
|
|
586
|
+
EvalStatus2["FAILED"] = "failed";
|
|
587
|
+
EvalStatus2["CANCELLED"] = "cancelled";
|
|
588
|
+
return EvalStatus2;
|
|
589
|
+
})(EvalStatus || {});
|
|
590
|
+
var EvalStatusSchema = import_zod21.z.enum(EvalStatus);
|
|
591
|
+
var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
592
|
+
LLMStepType2["COMPLETION"] = "completion";
|
|
593
|
+
LLMStepType2["TOOL_USE"] = "tool_use";
|
|
594
|
+
LLMStepType2["TOOL_RESULT"] = "tool_result";
|
|
595
|
+
LLMStepType2["THINKING"] = "thinking";
|
|
596
|
+
return LLMStepType2;
|
|
597
|
+
})(LLMStepType || {});
|
|
598
|
+
var LLMTraceStepSchema = import_zod21.z.object({
|
|
599
|
+
id: import_zod21.z.string(),
|
|
600
|
+
stepNumber: import_zod21.z.number(),
|
|
601
|
+
type: import_zod21.z.enum(LLMStepType),
|
|
602
|
+
model: import_zod21.z.string(),
|
|
603
|
+
provider: import_zod21.z.string(),
|
|
604
|
+
startedAt: import_zod21.z.string(),
|
|
605
|
+
durationMs: import_zod21.z.number(),
|
|
606
|
+
tokenUsage: TokenUsageSchema,
|
|
607
|
+
costUsd: import_zod21.z.number(),
|
|
608
|
+
toolName: import_zod21.z.string().optional(),
|
|
609
|
+
toolArguments: import_zod21.z.string().optional(),
|
|
610
|
+
inputPreview: import_zod21.z.string().optional(),
|
|
611
|
+
outputPreview: import_zod21.z.string().optional(),
|
|
612
|
+
success: import_zod21.z.boolean(),
|
|
613
|
+
error: import_zod21.z.string().optional()
|
|
614
|
+
});
|
|
615
|
+
var LLMBreakdownStatsSchema = import_zod21.z.object({
|
|
616
|
+
count: import_zod21.z.number(),
|
|
617
|
+
durationMs: import_zod21.z.number(),
|
|
618
|
+
tokens: import_zod21.z.number(),
|
|
619
|
+
costUsd: import_zod21.z.number()
|
|
620
|
+
});
|
|
621
|
+
var LLMTraceSummarySchema = import_zod21.z.object({
|
|
622
|
+
totalSteps: import_zod21.z.number(),
|
|
623
|
+
totalDurationMs: import_zod21.z.number(),
|
|
624
|
+
totalTokens: TokenUsageSchema,
|
|
625
|
+
totalCostUsd: import_zod21.z.number(),
|
|
626
|
+
stepTypeBreakdown: import_zod21.z.record(import_zod21.z.string(), LLMBreakdownStatsSchema).optional(),
|
|
627
|
+
modelBreakdown: import_zod21.z.record(import_zod21.z.string(), LLMBreakdownStatsSchema),
|
|
628
|
+
modelsUsed: import_zod21.z.array(import_zod21.z.string())
|
|
629
|
+
});
|
|
630
|
+
var LLMTraceSchema = import_zod21.z.object({
|
|
631
|
+
id: import_zod21.z.string(),
|
|
632
|
+
steps: import_zod21.z.array(LLMTraceStepSchema),
|
|
633
|
+
summary: LLMTraceSummarySchema
|
|
634
|
+
});
|
|
635
|
+
|
|
636
|
+
// src/evaluation/eval-result.ts
|
|
637
|
+
var import_zod22 = require("zod");
|
|
638
|
+
var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
639
|
+
AssertionResultStatus2["PASSED"] = "passed";
|
|
640
|
+
AssertionResultStatus2["FAILED"] = "failed";
|
|
641
|
+
AssertionResultStatus2["SKIPPED"] = "skipped";
|
|
642
|
+
AssertionResultStatus2["ERROR"] = "error";
|
|
643
|
+
return AssertionResultStatus2;
|
|
644
|
+
})(AssertionResultStatus || {});
|
|
645
|
+
var AssertionResultSchema = import_zod22.z.object({
|
|
646
|
+
id: import_zod22.z.string(),
|
|
647
|
+
assertionId: import_zod22.z.string(),
|
|
648
|
+
assertionType: import_zod22.z.string(),
|
|
649
|
+
assertionName: import_zod22.z.string(),
|
|
650
|
+
status: import_zod22.z.enum(AssertionResultStatus),
|
|
651
|
+
message: import_zod22.z.string().optional(),
|
|
652
|
+
expected: import_zod22.z.string().optional(),
|
|
653
|
+
actual: import_zod22.z.string().optional(),
|
|
654
|
+
duration: import_zod22.z.number().optional(),
|
|
655
|
+
details: import_zod22.z.record(import_zod22.z.string(), import_zod22.z.unknown()).optional(),
|
|
656
|
+
llmTraceSteps: import_zod22.z.array(LLMTraceStepSchema).optional()
|
|
657
|
+
});
|
|
658
|
+
var EvalRunResultSchema = import_zod22.z.object({
|
|
659
|
+
id: import_zod22.z.string(),
|
|
660
|
+
targetId: import_zod22.z.string(),
|
|
661
|
+
targetName: import_zod22.z.string().optional(),
|
|
662
|
+
scenarioId: import_zod22.z.string(),
|
|
663
|
+
scenarioName: import_zod22.z.string(),
|
|
664
|
+
modelConfig: ModelConfigSchema.optional(),
|
|
665
|
+
assertionResults: import_zod22.z.array(AssertionResultSchema),
|
|
666
|
+
metrics: EvalMetricsSchema.optional(),
|
|
667
|
+
passed: import_zod22.z.number(),
|
|
668
|
+
failed: import_zod22.z.number(),
|
|
669
|
+
passRate: import_zod22.z.number(),
|
|
670
|
+
duration: import_zod22.z.number(),
|
|
671
|
+
outputText: import_zod22.z.string().optional(),
|
|
672
|
+
files: import_zod22.z.array(ExpectedFileSchema).optional(),
|
|
673
|
+
startedAt: import_zod22.z.string().optional(),
|
|
674
|
+
completedAt: import_zod22.z.string().optional(),
|
|
675
|
+
llmTrace: LLMTraceSchema.optional()
|
|
676
|
+
});
|
|
677
|
+
var PromptResultSchema = import_zod22.z.object({
|
|
678
|
+
text: import_zod22.z.string(),
|
|
679
|
+
files: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
680
|
+
finishReason: import_zod22.z.string().optional(),
|
|
681
|
+
reasoning: import_zod22.z.string().optional(),
|
|
682
|
+
reasoningDetails: import_zod22.z.unknown().optional(),
|
|
683
|
+
toolCalls: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
684
|
+
toolResults: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
685
|
+
warnings: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
686
|
+
sources: import_zod22.z.array(import_zod22.z.unknown()).optional(),
|
|
687
|
+
steps: import_zod22.z.array(import_zod22.z.unknown()),
|
|
688
|
+
generationTimeMs: import_zod22.z.number(),
|
|
689
|
+
prompt: import_zod22.z.string(),
|
|
690
|
+
systemPrompt: import_zod22.z.string(),
|
|
691
|
+
usage: import_zod22.z.object({
|
|
692
|
+
totalTokens: import_zod22.z.number().optional(),
|
|
693
|
+
totalMicrocentsSpent: import_zod22.z.number().optional()
|
|
694
|
+
})
|
|
695
|
+
});
|
|
696
|
+
var EvaluationResultSchema = import_zod22.z.object({
|
|
697
|
+
id: import_zod22.z.string(),
|
|
698
|
+
runId: import_zod22.z.string(),
|
|
699
|
+
timestamp: import_zod22.z.number(),
|
|
700
|
+
promptResult: PromptResultSchema,
|
|
701
|
+
testResults: import_zod22.z.array(import_zod22.z.unknown()),
|
|
702
|
+
tags: import_zod22.z.array(import_zod22.z.string()).optional(),
|
|
703
|
+
feedback: import_zod22.z.string().optional(),
|
|
704
|
+
score: import_zod22.z.number(),
|
|
705
|
+
suiteId: import_zod22.z.string().optional()
|
|
706
|
+
});
|
|
707
|
+
var LeanEvaluationResultSchema = import_zod22.z.object({
|
|
708
|
+
id: import_zod22.z.string(),
|
|
709
|
+
runId: import_zod22.z.string(),
|
|
710
|
+
timestamp: import_zod22.z.number(),
|
|
711
|
+
tags: import_zod22.z.array(import_zod22.z.string()).optional(),
|
|
712
|
+
scenarioId: import_zod22.z.string(),
|
|
713
|
+
scenarioVersion: import_zod22.z.number().optional(),
|
|
714
|
+
targetId: import_zod22.z.string(),
|
|
715
|
+
targetVersion: import_zod22.z.number().optional(),
|
|
716
|
+
suiteId: import_zod22.z.string().optional(),
|
|
717
|
+
score: import_zod22.z.number(),
|
|
718
|
+
time: import_zod22.z.number().optional(),
|
|
719
|
+
microcentsSpent: import_zod22.z.number().optional()
|
|
720
|
+
});
|
|
721
|
+
|
|
722
|
+
// src/evaluation/eval-run.ts
|
|
723
|
+
var import_zod24 = require("zod");
|
|
724
|
+
|
|
725
|
+
// src/evaluation/live-trace.ts
|
|
726
|
+
var import_zod23 = require("zod");
|
|
727
|
+
var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
728
|
+
LiveTraceEventType2["THINKING"] = "thinking";
|
|
729
|
+
LiveTraceEventType2["TOOL_USE"] = "tool_use";
|
|
730
|
+
LiveTraceEventType2["COMPLETION"] = "completion";
|
|
731
|
+
LiveTraceEventType2["TOOL_RESULT"] = "tool_result";
|
|
732
|
+
return LiveTraceEventType2;
|
|
733
|
+
})(LiveTraceEventType || {});
|
|
734
|
+
var LiveTraceEventSchema = import_zod23.z.object({
|
|
735
|
+
/** The evaluation run ID */
|
|
736
|
+
evalRunId: import_zod23.z.string(),
|
|
737
|
+
/** The scenario ID being executed */
|
|
738
|
+
scenarioId: import_zod23.z.string(),
|
|
739
|
+
/** The scenario name for display */
|
|
740
|
+
scenarioName: import_zod23.z.string(),
|
|
741
|
+
/** The target ID (skill, agent, etc.) */
|
|
742
|
+
targetId: import_zod23.z.string(),
|
|
743
|
+
/** The target name for display */
|
|
744
|
+
targetName: import_zod23.z.string(),
|
|
745
|
+
/** Step number in the current scenario execution */
|
|
746
|
+
stepNumber: import_zod23.z.number(),
|
|
747
|
+
/** Type of trace event */
|
|
748
|
+
type: import_zod23.z.enum(LiveTraceEventType),
|
|
749
|
+
/** Tool name if this is a tool_use event */
|
|
750
|
+
toolName: import_zod23.z.string().optional(),
|
|
751
|
+
/** Tool arguments preview (truncated JSON) */
|
|
752
|
+
toolArgs: import_zod23.z.string().optional(),
|
|
753
|
+
/** Output preview (truncated text) */
|
|
754
|
+
outputPreview: import_zod23.z.string().optional(),
|
|
755
|
+
/** Timestamp when this event occurred */
|
|
756
|
+
timestamp: import_zod23.z.string(),
|
|
757
|
+
/** Whether this is the final event for this scenario */
|
|
758
|
+
isComplete: import_zod23.z.boolean()
|
|
759
|
+
});
|
|
760
|
+
var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
|
|
761
|
+
function parseTraceEventLine(line) {
|
|
762
|
+
if (!line.startsWith(TRACE_EVENT_PREFIX)) {
|
|
763
|
+
return null;
|
|
764
|
+
}
|
|
765
|
+
try {
|
|
766
|
+
const jsonStr = line.slice(TRACE_EVENT_PREFIX.length);
|
|
767
|
+
const parsed = JSON.parse(jsonStr);
|
|
768
|
+
const result = LiveTraceEventSchema.safeParse(parsed);
|
|
769
|
+
return result.success ? result.data : null;
|
|
770
|
+
} catch {
|
|
771
|
+
return null;
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
function formatTraceEventLine(event) {
|
|
775
|
+
return `${TRACE_EVENT_PREFIX}${JSON.stringify(event)}`;
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
// src/evaluation/eval-run.ts
|
|
779
|
+
var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
|
|
780
|
+
TriggerType2["RESOURCES_UPDATED"] = "RESOURCES_UPDATED";
|
|
781
|
+
TriggerType2["MCP_VERSION_RELEASE"] = "MCP_VERSION_RELEASE";
|
|
782
|
+
TriggerType2["MCP_PREVIEW_CREATED"] = "MCP_PREVIEW_CREATED";
|
|
783
|
+
TriggerType2["MANUAL"] = "MANUAL";
|
|
784
|
+
return TriggerType2;
|
|
785
|
+
})(TriggerType || {});
|
|
786
|
+
var TriggerMetadataSchema = import_zod24.z.object({
|
|
787
|
+
version: import_zod24.z.string().optional(),
|
|
788
|
+
resourceUpdated: import_zod24.z.array(import_zod24.z.string()).optional()
|
|
789
|
+
});
|
|
790
|
+
var TriggerSchema = import_zod24.z.object({
|
|
791
|
+
id: import_zod24.z.string(),
|
|
792
|
+
metadata: TriggerMetadataSchema.optional(),
|
|
793
|
+
type: import_zod24.z.enum(TriggerType)
|
|
794
|
+
});
|
|
795
|
+
var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
|
|
796
|
+
FailureCategory2["MISSING_FILE"] = "missing_file";
|
|
797
|
+
FailureCategory2["WRONG_CONTENT"] = "wrong_content";
|
|
798
|
+
FailureCategory2["BUILD_ERROR"] = "build_error";
|
|
799
|
+
FailureCategory2["TEST_FAILURE"] = "test_failure";
|
|
800
|
+
FailureCategory2["RUNTIME_ERROR"] = "runtime_error";
|
|
801
|
+
FailureCategory2["PERFORMANCE"] = "performance";
|
|
802
|
+
return FailureCategory2;
|
|
803
|
+
})(FailureCategory || {});
|
|
804
|
+
var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
|
|
805
|
+
FailureSeverity2["CRITICAL"] = "critical";
|
|
806
|
+
FailureSeverity2["HIGH"] = "high";
|
|
807
|
+
FailureSeverity2["MEDIUM"] = "medium";
|
|
808
|
+
FailureSeverity2["LOW"] = "low";
|
|
809
|
+
return FailureSeverity2;
|
|
810
|
+
})(FailureSeverity || {});
|
|
811
|
+
var DiffLineTypeSchema = import_zod24.z.enum(["added", "removed", "unchanged"]);
|
|
812
|
+
var DiffLineSchema = import_zod24.z.object({
|
|
813
|
+
type: DiffLineTypeSchema,
|
|
814
|
+
content: import_zod24.z.string(),
|
|
815
|
+
lineNumber: import_zod24.z.number()
|
|
816
|
+
});
|
|
817
|
+
var DiffContentSchema = import_zod24.z.object({
|
|
818
|
+
path: import_zod24.z.string(),
|
|
819
|
+
expected: import_zod24.z.string(),
|
|
820
|
+
actual: import_zod24.z.string(),
|
|
821
|
+
diffLines: import_zod24.z.array(DiffLineSchema)
|
|
822
|
+
});
|
|
823
|
+
var CommandExecutionSchema = import_zod24.z.object({
|
|
824
|
+
command: import_zod24.z.string(),
|
|
825
|
+
exitCode: import_zod24.z.number(),
|
|
826
|
+
output: import_zod24.z.string().optional(),
|
|
827
|
+
duration: import_zod24.z.number()
|
|
828
|
+
});
|
|
829
|
+
var FileModificationSchema = import_zod24.z.object({
|
|
830
|
+
path: import_zod24.z.string(),
|
|
831
|
+
action: import_zod24.z.enum(["created", "modified", "deleted"])
|
|
832
|
+
});
|
|
833
|
+
var ApiCallSchema = import_zod24.z.object({
|
|
834
|
+
endpoint: import_zod24.z.string(),
|
|
835
|
+
tokensUsed: import_zod24.z.number(),
|
|
836
|
+
duration: import_zod24.z.number()
|
|
837
|
+
});
|
|
838
|
+
var ExecutionTraceSchema = import_zod24.z.object({
|
|
839
|
+
commands: import_zod24.z.array(CommandExecutionSchema),
|
|
840
|
+
filesModified: import_zod24.z.array(FileModificationSchema),
|
|
841
|
+
apiCalls: import_zod24.z.array(ApiCallSchema),
|
|
842
|
+
totalDuration: import_zod24.z.number()
|
|
843
|
+
});
|
|
844
|
+
var FailureAnalysisSchema = import_zod24.z.object({
|
|
845
|
+
category: import_zod24.z.enum(FailureCategory),
|
|
846
|
+
severity: import_zod24.z.enum(FailureSeverity),
|
|
847
|
+
summary: import_zod24.z.string(),
|
|
848
|
+
details: import_zod24.z.string(),
|
|
849
|
+
rootCause: import_zod24.z.string(),
|
|
850
|
+
suggestedFix: import_zod24.z.string(),
|
|
851
|
+
relatedAssertions: import_zod24.z.array(import_zod24.z.string()),
|
|
852
|
+
codeSnippet: import_zod24.z.string().optional(),
|
|
853
|
+
similarIssues: import_zod24.z.array(import_zod24.z.string()).optional(),
|
|
854
|
+
patternId: import_zod24.z.string().optional(),
|
|
855
|
+
// Extended fields for detailed debugging
|
|
856
|
+
diff: DiffContentSchema.optional(),
|
|
857
|
+
executionTrace: ExecutionTraceSchema.optional()
|
|
858
|
+
});
|
|
859
|
+
var EvalRunSchema = TenantEntitySchema.extend({
|
|
860
|
+
/** Agent ID for this run */
|
|
861
|
+
agentId: import_zod24.z.string().optional(),
|
|
862
|
+
/** Skills group ID for this run */
|
|
863
|
+
skillsGroupId: import_zod24.z.string().optional(),
|
|
864
|
+
/** Scenario IDs to run */
|
|
865
|
+
scenarioIds: import_zod24.z.array(import_zod24.z.string()),
|
|
866
|
+
/** Current status */
|
|
867
|
+
status: EvalStatusSchema,
|
|
868
|
+
/** Progress percentage (0-100) */
|
|
869
|
+
progress: import_zod24.z.number(),
|
|
870
|
+
/** Results for each scenario/target combination */
|
|
871
|
+
results: import_zod24.z.array(EvalRunResultSchema),
|
|
872
|
+
/** Aggregated metrics across all results */
|
|
873
|
+
aggregateMetrics: EvalMetricsSchema,
|
|
874
|
+
/** Failure analyses */
|
|
875
|
+
failureAnalyses: import_zod24.z.array(FailureAnalysisSchema).optional(),
|
|
876
|
+
/** Aggregated LLM trace summary */
|
|
877
|
+
llmTraceSummary: LLMTraceSummarySchema.optional(),
|
|
878
|
+
/** What triggered this run */
|
|
879
|
+
trigger: TriggerSchema.optional(),
|
|
880
|
+
/** When the run started (set when evaluation is triggered) */
|
|
881
|
+
startedAt: import_zod24.z.string().optional(),
|
|
882
|
+
/** When the run completed */
|
|
883
|
+
completedAt: import_zod24.z.string().optional(),
|
|
884
|
+
/** Live trace events captured during execution (for playback on results page) */
|
|
885
|
+
liveTraceEvents: import_zod24.z.array(LiveTraceEventSchema).optional()
|
|
886
|
+
});
|
|
887
|
+
var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
888
|
+
id: true,
|
|
889
|
+
createdAt: true,
|
|
890
|
+
updatedAt: true,
|
|
891
|
+
status: true,
|
|
892
|
+
progress: true,
|
|
893
|
+
results: true,
|
|
894
|
+
aggregateMetrics: true,
|
|
895
|
+
startedAt: true,
|
|
896
|
+
completedAt: true
|
|
897
|
+
});
|
|
898
|
+
var EvaluationProgressSchema = import_zod24.z.object({
|
|
899
|
+
runId: import_zod24.z.string(),
|
|
900
|
+
targetId: import_zod24.z.string(),
|
|
901
|
+
totalScenarios: import_zod24.z.number(),
|
|
902
|
+
completedScenarios: import_zod24.z.number(),
|
|
903
|
+
scenarioProgress: import_zod24.z.array(
|
|
904
|
+
import_zod24.z.object({
|
|
905
|
+
scenarioId: import_zod24.z.string(),
|
|
906
|
+
currentStep: import_zod24.z.string(),
|
|
907
|
+
error: import_zod24.z.string().optional()
|
|
908
|
+
})
|
|
909
|
+
),
|
|
910
|
+
createdAt: import_zod24.z.number()
|
|
911
|
+
});
|
|
912
|
+
var EvaluationLogSchema = import_zod24.z.object({
|
|
913
|
+
runId: import_zod24.z.string(),
|
|
914
|
+
scenarioId: import_zod24.z.string(),
|
|
915
|
+
log: import_zod24.z.object({
|
|
916
|
+
level: import_zod24.z.enum(["info", "error", "debug"]),
|
|
917
|
+
message: import_zod24.z.string().optional(),
|
|
918
|
+
args: import_zod24.z.array(import_zod24.z.any()).optional(),
|
|
919
|
+
error: import_zod24.z.string().optional()
|
|
920
|
+
})
|
|
921
|
+
});
|
|
922
|
+
var LLM_TIMEOUT = 12e4;
|
|
923
|
+
|
|
924
|
+
// src/project/project.ts
|
|
925
|
+
var import_zod25 = require("zod");
|
|
926
|
+
var ProjectSchema = BaseEntitySchema.extend({
|
|
927
|
+
appId: import_zod25.z.string().optional().describe("The ID of the app in Dev Center"),
|
|
928
|
+
appSecret: import_zod25.z.string().optional().describe("The secret of the app in Dev Center")
|
|
929
|
+
});
|
|
930
|
+
var CreateProjectInputSchema = ProjectSchema.omit({
|
|
931
|
+
id: true,
|
|
932
|
+
createdAt: true,
|
|
933
|
+
updatedAt: true,
|
|
934
|
+
deleted: true
|
|
935
|
+
});
|
|
936
|
+
var UpdateProjectInputSchema = CreateProjectInputSchema.partial();
|
|
937
|
+
|
|
938
|
+
// src/template/template.ts
|
|
939
|
+
var import_zod26 = require("zod");
|
|
940
|
+
var TemplateSchema = TenantEntitySchema.extend({
|
|
941
|
+
/** URL to download the template from */
|
|
942
|
+
downloadUrl: import_zod26.z.url()
|
|
943
|
+
});
|
|
944
|
+
var CreateTemplateInputSchema = TemplateSchema.omit({
|
|
945
|
+
id: true,
|
|
946
|
+
createdAt: true,
|
|
947
|
+
updatedAt: true,
|
|
948
|
+
deleted: true
|
|
949
|
+
});
|
|
950
|
+
var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
951
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
952
|
+
0 && (module.exports = {
|
|
953
|
+
AVAILABLE_MODELS,
|
|
954
|
+
AVAILABLE_MODELS_MAP,
|
|
955
|
+
AgentSchema,
|
|
956
|
+
AllowedCommands,
|
|
957
|
+
ApiCallSchema,
|
|
958
|
+
AssertionResultSchema,
|
|
959
|
+
AssertionResultStatus,
|
|
960
|
+
BaseEntitySchema,
|
|
961
|
+
BaseTestSchema,
|
|
962
|
+
BuildCheckTestSchema,
|
|
963
|
+
CommandExecutionSchema,
|
|
964
|
+
CommandExecutionTestSchema,
|
|
965
|
+
CreateAgentInputSchema,
|
|
966
|
+
CreateEvalRunInputSchema,
|
|
967
|
+
CreateProjectInputSchema,
|
|
968
|
+
CreateSkillInputSchema,
|
|
969
|
+
CreateSkillsGroupInputSchema,
|
|
970
|
+
CreateTemplateInputSchema,
|
|
971
|
+
CreateTestScenarioInputSchema,
|
|
972
|
+
CreateTestSuiteInputSchema,
|
|
973
|
+
DiffContentSchema,
|
|
974
|
+
DiffLineSchema,
|
|
975
|
+
DiffLineTypeSchema,
|
|
976
|
+
EnvironmentSchema,
|
|
977
|
+
EvalMetricsSchema,
|
|
978
|
+
EvalRunResultSchema,
|
|
979
|
+
EvalRunSchema,
|
|
980
|
+
EvalStatus,
|
|
981
|
+
EvalStatusSchema,
|
|
982
|
+
EvaluationLogSchema,
|
|
983
|
+
EvaluationProgressSchema,
|
|
984
|
+
EvaluationResultSchema,
|
|
985
|
+
ExecutionTraceSchema,
|
|
986
|
+
ExpectedFileSchema,
|
|
987
|
+
FailureAnalysisSchema,
|
|
988
|
+
FailureCategory,
|
|
989
|
+
FailureSeverity,
|
|
990
|
+
FileContentCheckSchema,
|
|
991
|
+
FileContentTestSchema,
|
|
992
|
+
FileModificationSchema,
|
|
993
|
+
FilePresenceTestSchema,
|
|
994
|
+
LLMBreakdownStatsSchema,
|
|
995
|
+
LLMStepType,
|
|
996
|
+
LLMTestSchema,
|
|
997
|
+
LLMTraceSchema,
|
|
998
|
+
LLMTraceStepSchema,
|
|
999
|
+
LLMTraceSummarySchema,
|
|
1000
|
+
LLM_TIMEOUT,
|
|
1001
|
+
LeanEvaluationResultSchema,
|
|
1002
|
+
LiveTraceEventSchema,
|
|
1003
|
+
LiveTraceEventType,
|
|
1004
|
+
LocalProjectConfigSchema,
|
|
1005
|
+
MCPServerConfigSchema,
|
|
1006
|
+
MetaSiteConfigSchema,
|
|
1007
|
+
ModelConfigSchema,
|
|
1008
|
+
ModelIds,
|
|
1009
|
+
ModelIdsSchema,
|
|
1010
|
+
ModelPricingSchema,
|
|
1011
|
+
ModelSchema,
|
|
1012
|
+
PlaywrightNLTestSchema,
|
|
1013
|
+
ProjectSchema,
|
|
1014
|
+
PromptResultSchema,
|
|
1015
|
+
SiteConfigTestSchema,
|
|
1016
|
+
SkillMetadataSchema,
|
|
1017
|
+
SkillSchema,
|
|
1018
|
+
SkillVersionSchema,
|
|
1019
|
+
SkillsGroupSchema,
|
|
1020
|
+
TRACE_EVENT_PREFIX,
|
|
1021
|
+
TargetSchema,
|
|
1022
|
+
TemplateSchema,
|
|
1023
|
+
TenantEntitySchema,
|
|
1024
|
+
TestImportance,
|
|
1025
|
+
TestImportanceSchema,
|
|
1026
|
+
TestScenarioSchema,
|
|
1027
|
+
TestSchema,
|
|
1028
|
+
TestSuiteSchema,
|
|
1029
|
+
TestType,
|
|
1030
|
+
TestTypeSchema,
|
|
1031
|
+
TokenUsageSchema,
|
|
1032
|
+
ToolTestSchema,
|
|
1033
|
+
TriggerMetadataSchema,
|
|
1034
|
+
TriggerSchema,
|
|
1035
|
+
TriggerType,
|
|
1036
|
+
UpdateAgentInputSchema,
|
|
1037
|
+
UpdateProjectInputSchema,
|
|
1038
|
+
UpdateSkillInputSchema,
|
|
1039
|
+
UpdateSkillsGroupInputSchema,
|
|
1040
|
+
UpdateTemplateInputSchema,
|
|
1041
|
+
UpdateTestScenarioInputSchema,
|
|
1042
|
+
UpdateTestSuiteInputSchema,
|
|
1043
|
+
VitestTestSchema,
|
|
1044
|
+
formatTraceEventLine,
|
|
1045
|
+
parseTraceEventLine
|
|
1046
|
+
});
|
|
1047
|
+
//# sourceMappingURL=index.js.map
|