@langwatch/mcp-server 0.3.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +97 -25
- package/dist/archive-scenario-GAE4XVFM.js +19 -0
- package/dist/archive-scenario-GAE4XVFM.js.map +1 -0
- package/dist/chunk-AAQNA53E.js +28 -0
- package/dist/chunk-AAQNA53E.js.map +1 -0
- package/dist/chunk-JVWDWL3J.js +91 -0
- package/dist/chunk-JVWDWL3J.js.map +1 -0
- package/dist/chunk-K2YFPOSD.js +40 -0
- package/dist/chunk-K2YFPOSD.js.map +1 -0
- package/dist/chunk-ZXKLPC2E.js +27 -0
- package/dist/chunk-ZXKLPC2E.js.map +1 -0
- package/dist/config-FIQWQRUB.js +11 -0
- package/dist/config-FIQWQRUB.js.map +1 -0
- package/dist/create-prompt-P35POKBW.js +22 -0
- package/dist/create-prompt-P35POKBW.js.map +1 -0
- package/dist/create-scenario-3YRZVDYF.js +26 -0
- package/dist/create-scenario-3YRZVDYF.js.map +1 -0
- package/dist/discover-scenario-schema-MEEEVND7.js +65 -0
- package/dist/discover-scenario-schema-MEEEVND7.js.map +1 -0
- package/dist/discover-schema-3T52ORPB.js +446 -0
- package/dist/discover-schema-3T52ORPB.js.map +1 -0
- package/dist/get-analytics-BAVXTAPB.js +55 -0
- package/dist/get-analytics-BAVXTAPB.js.map +1 -0
- package/dist/get-prompt-LKCPT26O.js +48 -0
- package/dist/get-prompt-LKCPT26O.js.map +1 -0
- package/dist/get-scenario-3SCDW4Z6.js +33 -0
- package/dist/get-scenario-3SCDW4Z6.js.map +1 -0
- package/dist/get-trace-QFDWJ5D4.js +50 -0
- package/dist/get-trace-QFDWJ5D4.js.map +1 -0
- package/dist/index.js +22114 -8786
- package/dist/index.js.map +1 -1
- package/dist/list-prompts-UQPBCUYA.js +33 -0
- package/dist/list-prompts-UQPBCUYA.js.map +1 -0
- package/dist/list-scenarios-573YOUKC.js +40 -0
- package/dist/list-scenarios-573YOUKC.js.map +1 -0
- package/dist/search-traces-RSMYCAN7.js +72 -0
- package/dist/search-traces-RSMYCAN7.js.map +1 -0
- package/dist/update-prompt-G2Y5EBQY.js +31 -0
- package/dist/update-prompt-G2Y5EBQY.js.map +1 -0
- package/dist/update-scenario-SSGVOBJO.js +27 -0
- package/dist/update-scenario-SSGVOBJO.js.map +1 -0
- package/package.json +3 -3
- package/src/__tests__/config.unit.test.ts +89 -0
- package/src/__tests__/date-parsing.unit.test.ts +78 -0
- package/src/__tests__/discover-schema.unit.test.ts +118 -0
- package/src/__tests__/integration.integration.test.ts +313 -0
- package/src/__tests__/langwatch-api.unit.test.ts +309 -0
- package/src/__tests__/scenario-tools.integration.test.ts +286 -0
- package/src/__tests__/scenario-tools.unit.test.ts +185 -0
- package/src/__tests__/schemas.unit.test.ts +85 -0
- package/src/__tests__/tools.unit.test.ts +729 -0
- package/src/config.ts +31 -0
- package/src/index.ts +383 -0
- package/src/langwatch-api-scenarios.ts +67 -0
- package/src/langwatch-api.ts +266 -0
- package/src/schemas/analytics-groups.ts +78 -0
- package/src/schemas/analytics-metrics.ts +179 -0
- package/src/schemas/filter-fields.ts +119 -0
- package/src/schemas/index.ts +3 -0
- package/src/tools/archive-scenario.ts +19 -0
- package/src/tools/create-prompt.ts +29 -0
- package/src/tools/create-scenario.ts +30 -0
- package/src/tools/discover-scenario-schema.ts +71 -0
- package/src/tools/discover-schema.ts +106 -0
- package/src/tools/get-analytics.ts +71 -0
- package/src/tools/get-prompt.ts +56 -0
- package/src/tools/get-scenario.ts +36 -0
- package/src/tools/get-trace.ts +61 -0
- package/src/tools/list-prompts.ts +35 -0
- package/src/tools/list-scenarios.ts +47 -0
- package/src/tools/search-traces.ts +91 -0
- package/src/tools/update-prompt.ts +44 -0
- package/src/tools/update-scenario.ts +32 -0
- package/src/utils/date-parsing.ts +31 -0
- package/tests/evaluations.ipynb +634 -634
- package/tests/scenario-openai.test.ts +3 -1
- package/uv.lock +1788 -1322
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
import { createServer, type Server } from "http";
|
|
2
|
+
import { afterAll, beforeAll, describe, expect, it } from "vitest";
|
|
3
|
+
import { initConfig } from "../config.js";
|
|
4
|
+
|
|
5
|
+
// --- Canned responses for scenario API endpoints ---
|
|
6
|
+
|
|
7
|
+
const CANNED_SCENARIOS_LIST = [
|
|
8
|
+
{
|
|
9
|
+
id: "scen_abc123",
|
|
10
|
+
name: "Login Flow Happy Path",
|
|
11
|
+
situation: "User attempts to log in with valid credentials",
|
|
12
|
+
criteria: ["Responds with a welcome message", "Includes user name in greeting"],
|
|
13
|
+
labels: ["auth", "happy-path"],
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
id: "scen_def456",
|
|
17
|
+
name: "Password Reset",
|
|
18
|
+
situation: "User requests a password reset link",
|
|
19
|
+
criteria: ["Sends reset email"],
|
|
20
|
+
labels: ["auth"],
|
|
21
|
+
},
|
|
22
|
+
];
|
|
23
|
+
|
|
24
|
+
const CANNED_SCENARIO_DETAIL = {
|
|
25
|
+
id: "scen_abc123",
|
|
26
|
+
name: "Login Flow Happy Path",
|
|
27
|
+
situation: "User attempts to log in with valid credentials",
|
|
28
|
+
criteria: ["Responds with a welcome message", "Includes user name in greeting"],
|
|
29
|
+
labels: ["auth", "happy-path"],
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
const CANNED_SCENARIO_CREATED = {
|
|
33
|
+
id: "scen_new789",
|
|
34
|
+
name: "Login Flow Happy Path",
|
|
35
|
+
situation: "User attempts to log in with valid creds",
|
|
36
|
+
criteria: ["Responds with a welcome message", "Includes user name in greeting"],
|
|
37
|
+
labels: ["auth", "happy-path"],
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
const CANNED_SCENARIO_UPDATED = {
|
|
41
|
+
id: "scen_abc123",
|
|
42
|
+
name: "Login Flow - Valid Credentials",
|
|
43
|
+
situation: "User logs in with correct email and pass",
|
|
44
|
+
criteria: [
|
|
45
|
+
"Responds with welcome message",
|
|
46
|
+
"Sets session cookie",
|
|
47
|
+
"Redirects to dashboard",
|
|
48
|
+
],
|
|
49
|
+
labels: ["auth", "happy-path"],
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
const CANNED_SCENARIO_ARCHIVED = {
|
|
53
|
+
id: "scen_abc123",
|
|
54
|
+
archived: true,
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
// --- Mock HTTP Server ---
|
|
58
|
+
|
|
59
|
+
function createMockServer(): Server {
|
|
60
|
+
return createServer((req, res) => {
|
|
61
|
+
const authToken = req.headers["x-auth-token"];
|
|
62
|
+
if (authToken !== "test-integration-key") {
|
|
63
|
+
res.writeHead(401, { "Content-Type": "application/json" });
|
|
64
|
+
res.end(JSON.stringify({ message: "Invalid auth token." }));
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
let body = "";
|
|
69
|
+
req.on("data", (chunk: string) => (body += chunk));
|
|
70
|
+
req.on("end", () => {
|
|
71
|
+
const url = req.url ?? "";
|
|
72
|
+
res.setHeader("Content-Type", "application/json");
|
|
73
|
+
|
|
74
|
+
// GET /api/scenarios - list all scenarios
|
|
75
|
+
if (url === "/api/scenarios" && req.method === "GET") {
|
|
76
|
+
res.writeHead(200);
|
|
77
|
+
res.end(JSON.stringify(CANNED_SCENARIOS_LIST));
|
|
78
|
+
}
|
|
79
|
+
// GET /api/scenarios/:id - get scenario detail
|
|
80
|
+
else if (
|
|
81
|
+
url.match(/^\/api\/scenarios\/scen_abc123(\?|$)/) &&
|
|
82
|
+
req.method === "GET"
|
|
83
|
+
) {
|
|
84
|
+
res.writeHead(200);
|
|
85
|
+
res.end(JSON.stringify(CANNED_SCENARIO_DETAIL));
|
|
86
|
+
}
|
|
87
|
+
// GET /api/scenarios/:id - not found
|
|
88
|
+
else if (
|
|
89
|
+
url.match(/^\/api\/scenarios\/scen_nonexistent(\?|$)/) &&
|
|
90
|
+
req.method === "GET"
|
|
91
|
+
) {
|
|
92
|
+
res.writeHead(404);
|
|
93
|
+
res.end(JSON.stringify({ message: "Scenario not found" }));
|
|
94
|
+
}
|
|
95
|
+
// POST /api/scenarios - create scenario
|
|
96
|
+
else if (url === "/api/scenarios" && req.method === "POST") {
|
|
97
|
+
const parsed = JSON.parse(body);
|
|
98
|
+
if (!parsed.name) {
|
|
99
|
+
res.writeHead(400);
|
|
100
|
+
res.end(JSON.stringify({ message: "Validation error: name is required" }));
|
|
101
|
+
} else {
|
|
102
|
+
res.writeHead(201);
|
|
103
|
+
res.end(JSON.stringify(CANNED_SCENARIO_CREATED));
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
// PUT /api/scenarios/:id - update scenario
|
|
107
|
+
else if (
|
|
108
|
+
url.match(/^\/api\/scenarios\/scen_abc123$/) &&
|
|
109
|
+
req.method === "PUT"
|
|
110
|
+
) {
|
|
111
|
+
res.writeHead(200);
|
|
112
|
+
res.end(JSON.stringify(CANNED_SCENARIO_UPDATED));
|
|
113
|
+
}
|
|
114
|
+
// PUT /api/scenarios/:id - not found
|
|
115
|
+
else if (
|
|
116
|
+
url.match(/^\/api\/scenarios\/scen_nonexistent$/) &&
|
|
117
|
+
req.method === "PUT"
|
|
118
|
+
) {
|
|
119
|
+
res.writeHead(404);
|
|
120
|
+
res.end(JSON.stringify({ message: "Scenario not found" }));
|
|
121
|
+
}
|
|
122
|
+
// DELETE /api/scenarios/:id - archive scenario
|
|
123
|
+
else if (
|
|
124
|
+
url.match(/^\/api\/scenarios\/scen_abc123$/) &&
|
|
125
|
+
req.method === "DELETE"
|
|
126
|
+
) {
|
|
127
|
+
res.writeHead(200);
|
|
128
|
+
res.end(JSON.stringify(CANNED_SCENARIO_ARCHIVED));
|
|
129
|
+
}
|
|
130
|
+
else {
|
|
131
|
+
res.writeHead(404);
|
|
132
|
+
res.end(
|
|
133
|
+
JSON.stringify({ message: `Not found: ${req.method} ${url}` })
|
|
134
|
+
);
|
|
135
|
+
}
|
|
136
|
+
});
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// --- Integration Tests ---
|
|
141
|
+
// These verify that MCP tool handlers correctly communicate with the REST API
|
|
142
|
+
// (auth, HTTP methods, status codes, error propagation).
|
|
143
|
+
// Formatting/digest logic is tested in scenario-tools.unit.test.ts.
|
|
144
|
+
|
|
145
|
+
describe("MCP scenario tools integration", () => {
|
|
146
|
+
let server: Server;
|
|
147
|
+
let port: number;
|
|
148
|
+
|
|
149
|
+
beforeAll(async () => {
|
|
150
|
+
server = createMockServer();
|
|
151
|
+
await new Promise<void>((resolve) => {
|
|
152
|
+
server.listen(0, () => {
|
|
153
|
+
const addr = server.address();
|
|
154
|
+
port = typeof addr === "object" && addr ? addr.port : 0;
|
|
155
|
+
initConfig({
|
|
156
|
+
apiKey: "test-integration-key",
|
|
157
|
+
endpoint: `http://localhost:${port}`,
|
|
158
|
+
});
|
|
159
|
+
resolve();
|
|
160
|
+
});
|
|
161
|
+
});
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
afterAll(async () => {
|
|
165
|
+
await new Promise<void>((resolve) => server.close(() => resolve()));
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
describe("list_scenarios", () => {
|
|
169
|
+
describe("when the API returns scenarios", () => {
|
|
170
|
+
it("returns a non-empty result", async () => {
|
|
171
|
+
const { handleListScenarios } = await import(
|
|
172
|
+
"../tools/list-scenarios.js"
|
|
173
|
+
);
|
|
174
|
+
const result = await handleListScenarios({});
|
|
175
|
+
expect(result.length).toBeGreaterThan(0);
|
|
176
|
+
});
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
describe("when format is json", () => {
|
|
180
|
+
it("returns parseable JSON matching the API response", async () => {
|
|
181
|
+
const { handleListScenarios } = await import(
|
|
182
|
+
"../tools/list-scenarios.js"
|
|
183
|
+
);
|
|
184
|
+
const result = await handleListScenarios({ format: "json" });
|
|
185
|
+
expect(JSON.parse(result)).toEqual(CANNED_SCENARIOS_LIST);
|
|
186
|
+
});
|
|
187
|
+
});
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
describe("get_scenario", () => {
|
|
191
|
+
describe("when the scenario exists", () => {
|
|
192
|
+
it("returns a non-empty result", async () => {
|
|
193
|
+
const { handleGetScenario } = await import(
|
|
194
|
+
"../tools/get-scenario.js"
|
|
195
|
+
);
|
|
196
|
+
const result = await handleGetScenario({ scenarioId: "scen_abc123" });
|
|
197
|
+
expect(result.length).toBeGreaterThan(0);
|
|
198
|
+
});
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
describe("when the scenario does not exist", () => {
|
|
202
|
+
it("propagates the 404 error", async () => {
|
|
203
|
+
const { handleGetScenario } = await import(
|
|
204
|
+
"../tools/get-scenario.js"
|
|
205
|
+
);
|
|
206
|
+
await expect(
|
|
207
|
+
handleGetScenario({ scenarioId: "scen_nonexistent" })
|
|
208
|
+
).rejects.toThrow("404");
|
|
209
|
+
});
|
|
210
|
+
});
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
describe("create_scenario", () => {
|
|
214
|
+
describe("when valid data is provided", () => {
|
|
215
|
+
it("returns confirmation with new scenario ID", async () => {
|
|
216
|
+
const { handleCreateScenario } = await import(
|
|
217
|
+
"../tools/create-scenario.js"
|
|
218
|
+
);
|
|
219
|
+
const result = await handleCreateScenario({
|
|
220
|
+
name: "Login Flow Happy Path",
|
|
221
|
+
situation: "User attempts to log in with valid creds",
|
|
222
|
+
criteria: ["Responds with a welcome message", "Includes user name in greeting"],
|
|
223
|
+
labels: ["auth", "happy-path"],
|
|
224
|
+
});
|
|
225
|
+
expect(result).toContain("scen_new789");
|
|
226
|
+
});
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
describe("when name is empty", () => {
|
|
230
|
+
it("propagates the validation error", async () => {
|
|
231
|
+
const { handleCreateScenario } = await import(
|
|
232
|
+
"../tools/create-scenario.js"
|
|
233
|
+
);
|
|
234
|
+
await expect(
|
|
235
|
+
handleCreateScenario({
|
|
236
|
+
name: "",
|
|
237
|
+
situation: "Some situation",
|
|
238
|
+
})
|
|
239
|
+
).rejects.toThrow();
|
|
240
|
+
});
|
|
241
|
+
});
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
describe("update_scenario", () => {
|
|
245
|
+
describe("when the scenario exists", () => {
|
|
246
|
+
it("returns a non-empty result", async () => {
|
|
247
|
+
const { handleUpdateScenario } = await import(
|
|
248
|
+
"../tools/update-scenario.js"
|
|
249
|
+
);
|
|
250
|
+
const result = await handleUpdateScenario({
|
|
251
|
+
scenarioId: "scen_abc123",
|
|
252
|
+
name: "Login Flow - Valid Credentials",
|
|
253
|
+
});
|
|
254
|
+
expect(result.length).toBeGreaterThan(0);
|
|
255
|
+
});
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
describe("when the scenario does not exist", () => {
|
|
259
|
+
it("propagates the 404 error", async () => {
|
|
260
|
+
const { handleUpdateScenario } = await import(
|
|
261
|
+
"../tools/update-scenario.js"
|
|
262
|
+
);
|
|
263
|
+
await expect(
|
|
264
|
+
handleUpdateScenario({
|
|
265
|
+
scenarioId: "scen_nonexistent",
|
|
266
|
+
name: "Updated Name",
|
|
267
|
+
})
|
|
268
|
+
).rejects.toThrow("404");
|
|
269
|
+
});
|
|
270
|
+
});
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
describe("archive_scenario", () => {
|
|
274
|
+
describe("when the scenario exists", () => {
|
|
275
|
+
it("returns confirmation that scenario was archived", async () => {
|
|
276
|
+
const { handleArchiveScenario } = await import(
|
|
277
|
+
"../tools/archive-scenario.js"
|
|
278
|
+
);
|
|
279
|
+
const result = await handleArchiveScenario({
|
|
280
|
+
scenarioId: "scen_abc123",
|
|
281
|
+
});
|
|
282
|
+
expect(result).toContain("archived");
|
|
283
|
+
});
|
|
284
|
+
});
|
|
285
|
+
});
|
|
286
|
+
});
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
2
|
+
|
|
3
|
+
vi.mock("../langwatch-api-scenarios.js", () => ({
|
|
4
|
+
listScenarios: vi.fn(),
|
|
5
|
+
getScenario: vi.fn(),
|
|
6
|
+
}));
|
|
7
|
+
|
|
8
|
+
import {
|
|
9
|
+
listScenarios,
|
|
10
|
+
getScenario,
|
|
11
|
+
} from "../langwatch-api-scenarios.js";
|
|
12
|
+
|
|
13
|
+
import { handleListScenarios } from "../tools/list-scenarios.js";
|
|
14
|
+
import { handleGetScenario } from "../tools/get-scenario.js";
|
|
15
|
+
import { formatScenarioSchema } from "../tools/discover-scenario-schema.js";
|
|
16
|
+
|
|
17
|
+
const mockListScenarios = vi.mocked(listScenarios);
|
|
18
|
+
const mockGetScenario = vi.mocked(getScenario);
|
|
19
|
+
|
|
20
|
+
beforeEach(() => {
|
|
21
|
+
vi.clearAllMocks();
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
describe("handleListScenarios()", () => {
|
|
25
|
+
const sampleScenarios = [
|
|
26
|
+
{
|
|
27
|
+
id: "scen_abc123",
|
|
28
|
+
name: "Login Flow Happy Path",
|
|
29
|
+
situation:
|
|
30
|
+
"User attempts to log in with valid credentials and expects a welcome message back from the system",
|
|
31
|
+
criteria: [
|
|
32
|
+
"Responds with a welcome message",
|
|
33
|
+
"Includes user name in greeting",
|
|
34
|
+
"Sets session cookie",
|
|
35
|
+
],
|
|
36
|
+
labels: ["auth", "happy-path"],
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
id: "scen_def456",
|
|
40
|
+
name: "Error Handling",
|
|
41
|
+
situation: "User sends malformed input",
|
|
42
|
+
criteria: ["Returns 400 status"],
|
|
43
|
+
labels: ["error"],
|
|
44
|
+
},
|
|
45
|
+
];
|
|
46
|
+
|
|
47
|
+
describe("when scenarios exist (digest mode)", () => {
|
|
48
|
+
let result: string;
|
|
49
|
+
|
|
50
|
+
beforeEach(async () => {
|
|
51
|
+
mockListScenarios.mockResolvedValue(sampleScenarios);
|
|
52
|
+
result = await handleListScenarios({});
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it("includes scenario id", () => {
|
|
56
|
+
expect(result).toContain("scen_abc123");
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it("includes scenario name", () => {
|
|
60
|
+
expect(result).toContain("Login Flow Happy Path");
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it("includes truncated situation preview", () => {
|
|
64
|
+
expect(result).toContain("User attempts to log in");
|
|
65
|
+
expect(result).not.toContain(
|
|
66
|
+
"User attempts to log in with valid credentials and expects a welcome message back from the system"
|
|
67
|
+
);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it("shows criteria count per scenario", () => {
|
|
71
|
+
expect(result).toContain("3 criteria");
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
it("includes labels", () => {
|
|
75
|
+
expect(result).toContain("auth");
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
it("includes all scenarios in the list", () => {
|
|
79
|
+
expect(result).toContain("scen_def456");
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it("includes the total count header", () => {
|
|
83
|
+
expect(result).toContain("# Scenarios (2 total)");
|
|
84
|
+
});
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
describe("when no scenarios exist", () => {
|
|
88
|
+
let result: string;
|
|
89
|
+
|
|
90
|
+
beforeEach(async () => {
|
|
91
|
+
mockListScenarios.mockResolvedValue([]);
|
|
92
|
+
result = await handleListScenarios({});
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it("returns a no-scenarios message", () => {
|
|
96
|
+
expect(result).toContain("No scenarios found");
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
it("includes a tip to use create_scenario", () => {
|
|
100
|
+
expect(result).toContain("create_scenario");
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
describe("when format is json", () => {
|
|
105
|
+
it("returns valid parseable JSON matching the scenario structure", async () => {
|
|
106
|
+
mockListScenarios.mockResolvedValue(sampleScenarios);
|
|
107
|
+
const result = await handleListScenarios({ format: "json" });
|
|
108
|
+
expect(JSON.parse(result)).toEqual(sampleScenarios);
|
|
109
|
+
});
|
|
110
|
+
});
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
describe("handleGetScenario()", () => {
|
|
114
|
+
const sampleScenario = {
|
|
115
|
+
id: "scen_abc123",
|
|
116
|
+
name: "Login Flow Happy Path",
|
|
117
|
+
situation: "User attempts to log in with valid credentials",
|
|
118
|
+
criteria: [
|
|
119
|
+
"Responds with a welcome message",
|
|
120
|
+
"Includes user name in greeting",
|
|
121
|
+
],
|
|
122
|
+
labels: ["auth", "happy-path"],
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
describe("when format is digest", () => {
|
|
126
|
+
let result: string;
|
|
127
|
+
|
|
128
|
+
beforeEach(async () => {
|
|
129
|
+
mockGetScenario.mockResolvedValue(sampleScenario);
|
|
130
|
+
result = await handleGetScenario({ scenarioId: "scen_abc123" });
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it("includes the scenario name in the heading", () => {
|
|
134
|
+
expect(result).toContain("# Scenario: Login Flow Happy Path");
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
it("includes the situation", () => {
|
|
138
|
+
expect(result).toContain("User attempts to log in with valid credentials");
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
it("includes each criteria item", () => {
|
|
142
|
+
expect(result).toContain("- Responds with a welcome message");
|
|
143
|
+
expect(result).toContain("- Includes user name in greeting");
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
it("includes labels", () => {
|
|
147
|
+
expect(result).toContain("auth, happy-path");
|
|
148
|
+
});
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
describe("when format is json", () => {
|
|
152
|
+
it("returns valid parseable JSON matching the scenario structure", async () => {
|
|
153
|
+
mockGetScenario.mockResolvedValue(sampleScenario);
|
|
154
|
+
const result = await handleGetScenario({
|
|
155
|
+
scenarioId: "scen_abc123",
|
|
156
|
+
format: "json",
|
|
157
|
+
});
|
|
158
|
+
expect(JSON.parse(result)).toEqual(sampleScenario);
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
describe("formatScenarioSchema()", () => {
|
|
164
|
+
it("includes field descriptions with required/optional annotations", () => {
|
|
165
|
+
const result = formatScenarioSchema();
|
|
166
|
+
expect(result).toContain("**name** (required)");
|
|
167
|
+
expect(result).toContain("**situation** (required)");
|
|
168
|
+
expect(result).toContain("**criteria** (array of strings)");
|
|
169
|
+
expect(result).toContain("**labels** (array of strings)");
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
it("includes all target types with descriptions", () => {
|
|
173
|
+
const result = formatScenarioSchema();
|
|
174
|
+
expect(result).toContain("**prompt**: Test a prompt template");
|
|
175
|
+
expect(result).toContain("**http**: Test an HTTP endpoint");
|
|
176
|
+
expect(result).toContain("**code**: Test a code function");
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
it("includes authoring guidance for situations and criteria", () => {
|
|
180
|
+
const result = formatScenarioSchema();
|
|
181
|
+
expect(result).toContain("## Writing a Good Situation");
|
|
182
|
+
expect(result).toContain("## Writing Good Criteria");
|
|
183
|
+
expect(result).toContain("Specific and testable");
|
|
184
|
+
});
|
|
185
|
+
});
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { filterFields } from "../schemas/filter-fields.js";
|
|
3
|
+
import { analyticsMetrics } from "../schemas/analytics-metrics.js";
|
|
4
|
+
import { analyticsGroups } from "../schemas/analytics-groups.js";
|
|
5
|
+
|
|
6
|
+
describe("schemas", () => {
|
|
7
|
+
describe("filterFields", () => {
|
|
8
|
+
it("has at least 22 entries", () => {
|
|
9
|
+
expect(filterFields.length).toBeGreaterThanOrEqual(22);
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
it("has non-empty field and description for every entry", () => {
|
|
13
|
+
for (const entry of filterFields) {
|
|
14
|
+
expect(entry.field).toBeTruthy();
|
|
15
|
+
expect(entry.description).toBeTruthy();
|
|
16
|
+
}
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
it("contains expected filter fields", () => {
|
|
20
|
+
const fieldNames = filterFields.map((f) => f.field);
|
|
21
|
+
expect(fieldNames).toContain("topics.topics");
|
|
22
|
+
expect(fieldNames).toContain("metadata.user_id");
|
|
23
|
+
expect(fieldNames).toContain("spans.model");
|
|
24
|
+
expect(fieldNames).toContain("evaluations.passed");
|
|
25
|
+
expect(fieldNames).toContain("events.event_type");
|
|
26
|
+
expect(fieldNames).toContain("annotations.hasAnnotation");
|
|
27
|
+
expect(fieldNames).toContain("sentiment.input_sentiment");
|
|
28
|
+
});
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
describe("analyticsMetrics", () => {
|
|
32
|
+
it("covers expected categories", () => {
|
|
33
|
+
const categories = new Set(analyticsMetrics.map((m) => m.category));
|
|
34
|
+
expect(categories).toContain("metadata");
|
|
35
|
+
expect(categories).toContain("performance");
|
|
36
|
+
expect(categories).toContain("evaluations");
|
|
37
|
+
expect(categories).toContain("sentiment");
|
|
38
|
+
expect(categories).toContain("events");
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
it("has non-empty allowedAggregations for every metric", () => {
|
|
42
|
+
for (const metric of analyticsMetrics) {
|
|
43
|
+
expect(metric.allowedAggregations.length).toBeGreaterThan(0);
|
|
44
|
+
}
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it("has non-empty name, label, and description for every metric", () => {
|
|
48
|
+
for (const metric of analyticsMetrics) {
|
|
49
|
+
expect(metric.name).toBeTruthy();
|
|
50
|
+
expect(metric.label).toBeTruthy();
|
|
51
|
+
expect(metric.description).toBeTruthy();
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it("contains expected metrics", () => {
|
|
56
|
+
const names = analyticsMetrics.map((m) => `${m.category}.${m.name}`);
|
|
57
|
+
expect(names).toContain("metadata.trace_id");
|
|
58
|
+
expect(names).toContain("performance.completion_time");
|
|
59
|
+
expect(names).toContain("performance.total_cost");
|
|
60
|
+
expect(names).toContain("evaluations.evaluation_score");
|
|
61
|
+
});
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
describe("analyticsGroups", () => {
|
|
65
|
+
it("has at least 10 entries", () => {
|
|
66
|
+
expect(analyticsGroups.length).toBeGreaterThanOrEqual(10);
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it("has non-empty name, label, and description for every group", () => {
|
|
70
|
+
for (const group of analyticsGroups) {
|
|
71
|
+
expect(group.name).toBeTruthy();
|
|
72
|
+
expect(group.label).toBeTruthy();
|
|
73
|
+
expect(group.description).toBeTruthy();
|
|
74
|
+
}
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
it("contains expected group-by options", () => {
|
|
78
|
+
const names = analyticsGroups.map((g) => g.name);
|
|
79
|
+
expect(names).toContain("topics.topics");
|
|
80
|
+
expect(names).toContain("metadata.user_id");
|
|
81
|
+
expect(names).toContain("metadata.model");
|
|
82
|
+
expect(names).toContain("events.event_type");
|
|
83
|
+
});
|
|
84
|
+
});
|
|
85
|
+
});
|