@langwatch/mcp-server 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/dist/archive-scenario-GAE4XVFM.js +19 -0
  3. package/dist/archive-scenario-GAE4XVFM.js.map +1 -0
  4. package/dist/{chunk-HOPTUDCZ.js → chunk-JVWDWL3J.js} +3 -2
  5. package/dist/chunk-JVWDWL3J.js.map +1 -0
  6. package/dist/chunk-K2YFPOSD.js +40 -0
  7. package/dist/chunk-K2YFPOSD.js.map +1 -0
  8. package/dist/{create-prompt-UBC537BJ.js → create-prompt-P35POKBW.js} +2 -2
  9. package/dist/create-scenario-3YRZVDYF.js +26 -0
  10. package/dist/create-scenario-3YRZVDYF.js.map +1 -0
  11. package/dist/discover-scenario-schema-MEEEVND7.js +65 -0
  12. package/dist/discover-scenario-schema-MEEEVND7.js.map +1 -0
  13. package/dist/{get-analytics-3IFTN6MY.js → get-analytics-BAVXTAPB.js} +2 -2
  14. package/dist/{get-prompt-2ZB5B3QC.js → get-prompt-LKCPT26O.js} +2 -2
  15. package/dist/get-scenario-3SCDW4Z6.js +33 -0
  16. package/dist/get-scenario-3SCDW4Z6.js.map +1 -0
  17. package/dist/{get-trace-7IXKKCJJ.js → get-trace-QFDWJ5D4.js} +2 -2
  18. package/dist/index.js +16040 -8719
  19. package/dist/index.js.map +1 -1
  20. package/dist/{list-prompts-J72LTP7Z.js → list-prompts-UQPBCUYA.js} +2 -2
  21. package/dist/list-scenarios-573YOUKC.js +40 -0
  22. package/dist/list-scenarios-573YOUKC.js.map +1 -0
  23. package/dist/{search-traces-RW2NDHN5.js → search-traces-RSMYCAN7.js} +2 -2
  24. package/dist/{update-prompt-G6HHZSUM.js → update-prompt-G2Y5EBQY.js} +2 -2
  25. package/dist/update-scenario-SSGVOBJO.js +27 -0
  26. package/dist/update-scenario-SSGVOBJO.js.map +1 -0
  27. package/package.json +3 -3
  28. package/src/__tests__/scenario-tools.integration.test.ts +286 -0
  29. package/src/__tests__/scenario-tools.unit.test.ts +185 -0
  30. package/src/index.ts +132 -3
  31. package/src/langwatch-api-scenarios.ts +67 -0
  32. package/src/langwatch-api.ts +4 -3
  33. package/src/tools/archive-scenario.ts +19 -0
  34. package/src/tools/create-scenario.ts +30 -0
  35. package/src/tools/discover-scenario-schema.ts +71 -0
  36. package/src/tools/get-scenario.ts +36 -0
  37. package/src/tools/list-scenarios.ts +47 -0
  38. package/src/tools/update-scenario.ts +32 -0
  39. package/uv.lock +1788 -1322
  40. package/dist/chunk-HOPTUDCZ.js.map +0 -1
  41. /package/dist/{create-prompt-UBC537BJ.js.map → create-prompt-P35POKBW.js.map} +0 -0
  42. /package/dist/{get-analytics-3IFTN6MY.js.map → get-analytics-BAVXTAPB.js.map} +0 -0
  43. /package/dist/{get-prompt-2ZB5B3QC.js.map → get-prompt-LKCPT26O.js.map} +0 -0
  44. /package/dist/{get-trace-7IXKKCJJ.js.map → get-trace-QFDWJ5D4.js.map} +0 -0
  45. /package/dist/{list-prompts-J72LTP7Z.js.map → list-prompts-UQPBCUYA.js.map} +0 -0
  46. /package/dist/{search-traces-RW2NDHN5.js.map → search-traces-RSMYCAN7.js.map} +0 -0
  47. /package/dist/{update-prompt-G6HHZSUM.js.map → update-prompt-G2Y5EBQY.js.map} +0 -0
@@ -0,0 +1,185 @@
1
+ import { describe, it, expect, vi, beforeEach } from "vitest";
2
+
3
+ vi.mock("../langwatch-api-scenarios.js", () => ({
4
+ listScenarios: vi.fn(),
5
+ getScenario: vi.fn(),
6
+ }));
7
+
8
+ import {
9
+ listScenarios,
10
+ getScenario,
11
+ } from "../langwatch-api-scenarios.js";
12
+
13
+ import { handleListScenarios } from "../tools/list-scenarios.js";
14
+ import { handleGetScenario } from "../tools/get-scenario.js";
15
+ import { formatScenarioSchema } from "../tools/discover-scenario-schema.js";
16
+
17
+ const mockListScenarios = vi.mocked(listScenarios);
18
+ const mockGetScenario = vi.mocked(getScenario);
19
+
20
+ beforeEach(() => {
21
+ vi.clearAllMocks();
22
+ });
23
+
24
+ describe("handleListScenarios()", () => {
25
+ const sampleScenarios = [
26
+ {
27
+ id: "scen_abc123",
28
+ name: "Login Flow Happy Path",
29
+ situation:
30
+ "User attempts to log in with valid credentials and expects a welcome message back from the system",
31
+ criteria: [
32
+ "Responds with a welcome message",
33
+ "Includes user name in greeting",
34
+ "Sets session cookie",
35
+ ],
36
+ labels: ["auth", "happy-path"],
37
+ },
38
+ {
39
+ id: "scen_def456",
40
+ name: "Error Handling",
41
+ situation: "User sends malformed input",
42
+ criteria: ["Returns 400 status"],
43
+ labels: ["error"],
44
+ },
45
+ ];
46
+
47
+ describe("when scenarios exist (digest mode)", () => {
48
+ let result: string;
49
+
50
+ beforeEach(async () => {
51
+ mockListScenarios.mockResolvedValue(sampleScenarios);
52
+ result = await handleListScenarios({});
53
+ });
54
+
55
+ it("includes scenario id", () => {
56
+ expect(result).toContain("scen_abc123");
57
+ });
58
+
59
+ it("includes scenario name", () => {
60
+ expect(result).toContain("Login Flow Happy Path");
61
+ });
62
+
63
+ it("includes truncated situation preview", () => {
64
+ expect(result).toContain("User attempts to log in");
65
+ expect(result).not.toContain(
66
+ "User attempts to log in with valid credentials and expects a welcome message back from the system"
67
+ );
68
+ });
69
+
70
+ it("shows criteria count per scenario", () => {
71
+ expect(result).toContain("3 criteria");
72
+ });
73
+
74
+ it("includes labels", () => {
75
+ expect(result).toContain("auth");
76
+ });
77
+
78
+ it("includes all scenarios in the list", () => {
79
+ expect(result).toContain("scen_def456");
80
+ });
81
+
82
+ it("includes the total count header", () => {
83
+ expect(result).toContain("# Scenarios (2 total)");
84
+ });
85
+ });
86
+
87
+ describe("when no scenarios exist", () => {
88
+ let result: string;
89
+
90
+ beforeEach(async () => {
91
+ mockListScenarios.mockResolvedValue([]);
92
+ result = await handleListScenarios({});
93
+ });
94
+
95
+ it("returns a no-scenarios message", () => {
96
+ expect(result).toContain("No scenarios found");
97
+ });
98
+
99
+ it("includes a tip to use create_scenario", () => {
100
+ expect(result).toContain("create_scenario");
101
+ });
102
+ });
103
+
104
+ describe("when format is json", () => {
105
+ it("returns valid parseable JSON matching the scenario structure", async () => {
106
+ mockListScenarios.mockResolvedValue(sampleScenarios);
107
+ const result = await handleListScenarios({ format: "json" });
108
+ expect(JSON.parse(result)).toEqual(sampleScenarios);
109
+ });
110
+ });
111
+ });
112
+
113
+ describe("handleGetScenario()", () => {
114
+ const sampleScenario = {
115
+ id: "scen_abc123",
116
+ name: "Login Flow Happy Path",
117
+ situation: "User attempts to log in with valid credentials",
118
+ criteria: [
119
+ "Responds with a welcome message",
120
+ "Includes user name in greeting",
121
+ ],
122
+ labels: ["auth", "happy-path"],
123
+ };
124
+
125
+ describe("when format is digest", () => {
126
+ let result: string;
127
+
128
+ beforeEach(async () => {
129
+ mockGetScenario.mockResolvedValue(sampleScenario);
130
+ result = await handleGetScenario({ scenarioId: "scen_abc123" });
131
+ });
132
+
133
+ it("includes the scenario name in the heading", () => {
134
+ expect(result).toContain("# Scenario: Login Flow Happy Path");
135
+ });
136
+
137
+ it("includes the situation", () => {
138
+ expect(result).toContain("User attempts to log in with valid credentials");
139
+ });
140
+
141
+ it("includes each criteria item", () => {
142
+ expect(result).toContain("- Responds with a welcome message");
143
+ expect(result).toContain("- Includes user name in greeting");
144
+ });
145
+
146
+ it("includes labels", () => {
147
+ expect(result).toContain("auth, happy-path");
148
+ });
149
+ });
150
+
151
+ describe("when format is json", () => {
152
+ it("returns valid parseable JSON matching the scenario structure", async () => {
153
+ mockGetScenario.mockResolvedValue(sampleScenario);
154
+ const result = await handleGetScenario({
155
+ scenarioId: "scen_abc123",
156
+ format: "json",
157
+ });
158
+ expect(JSON.parse(result)).toEqual(sampleScenario);
159
+ });
160
+ });
161
+ });
162
+
163
+ describe("formatScenarioSchema()", () => {
164
+ it("includes field descriptions with required/optional annotations", () => {
165
+ const result = formatScenarioSchema();
166
+ expect(result).toContain("**name** (required)");
167
+ expect(result).toContain("**situation** (required)");
168
+ expect(result).toContain("**criteria** (array of strings)");
169
+ expect(result).toContain("**labels** (array of strings)");
170
+ });
171
+
172
+ it("includes all target types with descriptions", () => {
173
+ const result = formatScenarioSchema();
174
+ expect(result).toContain("**prompt**: Test a prompt template");
175
+ expect(result).toContain("**http**: Test an HTTP endpoint");
176
+ expect(result).toContain("**code**: Test a code function");
177
+ });
178
+
179
+ it("includes authoring guidance for situations and criteria", () => {
180
+ const result = formatScenarioSchema();
181
+ expect(result).toContain("## Writing a Good Situation");
182
+ expect(result).toContain("## Writing Good Criteria");
183
+ expect(result).toContain("Specific and testable");
184
+ });
185
+ });
package/src/index.ts CHANGED
@@ -94,15 +94,24 @@ server.tool(
94
94
 
95
95
  server.tool(
96
96
  "discover_schema",
97
- "Discover available filter fields, metrics, aggregation types, and group-by options for LangWatch queries. Call this before using search_traces or get_analytics to understand available options.",
97
+ "Discover available filter fields, metrics, aggregation types, group-by options, and scenario schema for LangWatch queries. Call this before using search_traces, get_analytics, or scenario tools to understand available options.",
98
98
  {
99
99
  category: z
100
- .enum(["filters", "metrics", "aggregations", "groups", "all"])
100
+ .enum(["filters", "metrics", "aggregations", "groups", "scenarios", "all"])
101
101
  .describe("Which schema category to discover"),
102
102
  },
103
103
  async ({ category }) => {
104
+ if (category === "scenarios") {
105
+ const { formatScenarioSchema } = await import("./tools/discover-scenario-schema.js");
106
+ return { content: [{ type: "text", text: formatScenarioSchema() }] };
107
+ }
104
108
  const { formatSchema } = await import("./tools/discover-schema.js");
105
- return { content: [{ type: "text", text: formatSchema(category) }] };
109
+ let text = formatSchema(category);
110
+ if (category === "all") {
111
+ const { formatScenarioSchema } = await import("./tools/discover-scenario-schema.js");
112
+ text += "\n\n" + formatScenarioSchema();
113
+ }
114
+ return { content: [{ type: "text", text }] };
106
115
  }
107
116
  );
108
117
 
@@ -324,4 +333,124 @@ server.tool(
324
333
  }
325
334
  );
326
335
 
336
+ // --- Scenario Tools (require API key) ---
337
+
338
+ server.tool(
339
+ "list_scenarios",
340
+ "List all scenarios in the LangWatch project. Returns AI-readable digest by default.",
341
+ {
342
+ format: z
343
+ .enum(["digest", "json"])
344
+ .optional()
345
+ .describe(
346
+ "Output format: 'digest' (default, AI-readable) or 'json' (full raw data)"
347
+ ),
348
+ },
349
+ async (params) => {
350
+ const { requireApiKey } = await import("./config.js");
351
+ requireApiKey();
352
+ const { handleListScenarios } = await import("./tools/list-scenarios.js");
353
+ return {
354
+ content: [{ type: "text", text: await handleListScenarios(params) }],
355
+ };
356
+ }
357
+ );
358
+
359
+ server.tool(
360
+ "get_scenario",
361
+ "Get full details of a scenario by ID, including situation, criteria, and labels.",
362
+ {
363
+ scenarioId: z.string().describe("The scenario ID to retrieve"),
364
+ format: z
365
+ .enum(["digest", "json"])
366
+ .optional()
367
+ .describe(
368
+ "Output format: 'digest' (default, AI-readable) or 'json' (full raw data)"
369
+ ),
370
+ },
371
+ async (params) => {
372
+ const { requireApiKey } = await import("./config.js");
373
+ requireApiKey();
374
+ const { handleGetScenario } = await import("./tools/get-scenario.js");
375
+ return {
376
+ content: [{ type: "text", text: await handleGetScenario(params) }],
377
+ };
378
+ }
379
+ );
380
+
381
+ server.tool(
382
+ "create_scenario",
383
+ "Create a new scenario in the LangWatch project. Call discover_schema({ category: 'scenarios' }) first to learn how to write effective situations and criteria.",
384
+ {
385
+ name: z.string().describe("Scenario name"),
386
+ situation: z
387
+ .string()
388
+ .describe("The context or setup describing what the user/agent is doing"),
389
+ criteria: z
390
+ .array(z.string())
391
+ .optional()
392
+ .describe("Pass/fail conditions the agent's response must satisfy"),
393
+ labels: z
394
+ .array(z.string())
395
+ .optional()
396
+ .describe("Tags for organizing and filtering scenarios"),
397
+ },
398
+ async (params) => {
399
+ const { requireApiKey } = await import("./config.js");
400
+ requireApiKey();
401
+ const { handleCreateScenario } = await import(
402
+ "./tools/create-scenario.js"
403
+ );
404
+ return {
405
+ content: [{ type: "text", text: await handleCreateScenario(params) }],
406
+ };
407
+ }
408
+ );
409
+
410
+ server.tool(
411
+ "update_scenario",
412
+ "Update an existing scenario.",
413
+ {
414
+ scenarioId: z.string().describe("The scenario ID to update"),
415
+ name: z.string().optional().describe("Updated scenario name"),
416
+ situation: z.string().optional().describe("Updated situation"),
417
+ criteria: z
418
+ .array(z.string())
419
+ .optional()
420
+ .describe("Updated criteria"),
421
+ labels: z
422
+ .array(z.string())
423
+ .optional()
424
+ .describe("Updated labels"),
425
+ },
426
+ async (params) => {
427
+ const { requireApiKey } = await import("./config.js");
428
+ requireApiKey();
429
+ const { handleUpdateScenario } = await import(
430
+ "./tools/update-scenario.js"
431
+ );
432
+ return {
433
+ content: [{ type: "text", text: await handleUpdateScenario(params) }],
434
+ };
435
+ }
436
+ );
437
+
438
+ server.tool(
439
+ "archive_scenario",
440
+ "Archive (soft-delete) a scenario.",
441
+ {
442
+ scenarioId: z.string().describe("The scenario ID to archive"),
443
+ },
444
+ async (params) => {
445
+ const { requireApiKey } = await import("./config.js");
446
+ requireApiKey();
447
+ const { handleArchiveScenario } = await import(
448
+ "./tools/archive-scenario.js"
449
+ );
450
+ return {
451
+ content: [{ type: "text", text: await handleArchiveScenario(params) }],
452
+ };
453
+ }
454
+ );
455
+
327
456
  await server.connect(transport);
@@ -0,0 +1,67 @@
1
+ import { makeRequest } from "./langwatch-api.js";
2
+
3
+ // --- Scenario types ---
4
+
5
+ export interface ScenarioSummary {
6
+ id: string;
7
+ name: string;
8
+ situation: string;
9
+ criteria: string[];
10
+ labels: string[];
11
+ }
12
+
13
+ export interface ScenarioArchiveResponse {
14
+ id: string;
15
+ archived: boolean;
16
+ }
17
+
18
+ // --- Scenario API functions ---
19
+
20
+ /** Lists all scenarios in the project. */
21
+ export async function listScenarios(): Promise<ScenarioSummary[]> {
22
+ return makeRequest("GET", "/api/scenarios") as Promise<ScenarioSummary[]>;
23
+ }
24
+
25
+ /** Retrieves a single scenario by ID. */
26
+ export async function getScenario(id: string): Promise<ScenarioSummary> {
27
+ return makeRequest(
28
+ "GET",
29
+ `/api/scenarios/${encodeURIComponent(id)}`
30
+ ) as Promise<ScenarioSummary>;
31
+ }
32
+
33
+ /** Creates a new scenario. */
34
+ export async function createScenario(data: {
35
+ name: string;
36
+ situation: string;
37
+ criteria?: string[];
38
+ labels?: string[];
39
+ }): Promise<ScenarioSummary> {
40
+ return makeRequest("POST", "/api/scenarios", data) as Promise<ScenarioSummary>;
41
+ }
42
+
43
+ /** Updates an existing scenario. */
44
+ export async function updateScenario(params: {
45
+ id: string;
46
+ name?: string;
47
+ situation?: string;
48
+ criteria?: string[];
49
+ labels?: string[];
50
+ }): Promise<ScenarioSummary> {
51
+ const { id, ...data } = params;
52
+ return makeRequest(
53
+ "PUT",
54
+ `/api/scenarios/${encodeURIComponent(id)}`,
55
+ data
56
+ ) as Promise<ScenarioSummary>;
57
+ }
58
+
59
+ /** Archives (soft-deletes) a scenario. */
60
+ export async function archiveScenario(
61
+ id: string
62
+ ): Promise<ScenarioArchiveResponse> {
63
+ return makeRequest(
64
+ "DELETE",
65
+ `/api/scenarios/${encodeURIComponent(id)}`
66
+ ) as Promise<ScenarioArchiveResponse>;
67
+ }
@@ -116,8 +116,8 @@ export interface PromptMutationResponse {
116
116
  *
117
117
  * @throws Error with status code and response body when the response is not OK
118
118
  */
119
- async function makeRequest(
120
- method: "GET" | "POST",
119
+ export async function makeRequest(
120
+ method: "GET" | "POST" | "PUT" | "PATCH" | "DELETE",
121
121
  path: string,
122
122
  body?: unknown
123
123
  ): Promise<unknown> {
@@ -126,7 +126,7 @@ async function makeRequest(
126
126
  "X-Auth-Token": requireApiKey(),
127
127
  };
128
128
 
129
- if (method === "POST") {
129
+ if (body !== undefined) {
130
130
  headers["Content-Type"] = "application/json";
131
131
  }
132
132
 
@@ -263,3 +263,4 @@ export async function createPromptVersion(
263
263
  data
264
264
  ) as Promise<PromptMutationResponse>;
265
265
  }
266
+
@@ -0,0 +1,19 @@
1
+ import { archiveScenario as apiArchiveScenario } from "../langwatch-api-scenarios.js";
2
+
3
+ /**
4
+ * Handles the archive_scenario MCP tool invocation.
5
+ *
6
+ * Archives (soft-deletes) a scenario and returns confirmation.
7
+ */
8
+ export async function handleArchiveScenario(params: {
9
+ scenarioId: string;
10
+ }): Promise<string> {
11
+ const result = await apiArchiveScenario(params.scenarioId);
12
+
13
+ const lines: string[] = [];
14
+ lines.push("Scenario archived successfully!\n");
15
+ lines.push(`**ID**: ${result.id}`);
16
+ lines.push(`**Status**: ${result.archived ? "archived" : "active"}`);
17
+
18
+ return lines.join("\n");
19
+ }
@@ -0,0 +1,30 @@
1
+ import { createScenario as apiCreateScenario } from "../langwatch-api-scenarios.js";
2
+
3
+ /**
4
+ * Handles the create_scenario MCP tool invocation.
5
+ *
6
+ * Creates a new scenario in the LangWatch project and returns a
7
+ * confirmation with the created scenario's details.
8
+ */
9
+ export async function handleCreateScenario(params: {
10
+ name: string;
11
+ situation: string;
12
+ criteria?: string[];
13
+ labels?: string[];
14
+ }): Promise<string> {
15
+ const result = await apiCreateScenario(params);
16
+
17
+ const lines: string[] = [];
18
+ lines.push("Scenario created successfully!\n");
19
+ lines.push(`**ID**: ${result.id}`);
20
+ lines.push(`**Name**: ${result.name}`);
21
+ lines.push(`**Situation**: ${result.situation}`);
22
+ if (Array.isArray(result.criteria) && result.criteria.length > 0) {
23
+ lines.push(`**Criteria**: ${result.criteria.length} criteria`);
24
+ }
25
+ if (Array.isArray(result.labels) && result.labels.length > 0) {
26
+ lines.push(`**Labels**: ${result.labels.join(", ")}`);
27
+ }
28
+
29
+ return lines.join("\n");
30
+ }
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Returns a human-readable description of the scenario schema,
3
+ * including field descriptions, authoring guidance, and examples.
4
+ */
5
+ export function formatScenarioSchema(): string {
6
+ const lines: string[] = [];
7
+
8
+ lines.push("# Scenario Schema\n");
9
+
10
+ lines.push("## Fields\n");
11
+ lines.push(
12
+ '- **name** (required): A short, descriptive name (e.g., "billing dispute resolution", "password reset with 2FA unavailable")',
13
+ );
14
+ lines.push(
15
+ "- **situation** (required): The context that guides the user simulator — who the user is, what they want, and any constraints (see Writing a Good Situation below)",
16
+ );
17
+ lines.push(
18
+ "- **criteria** (array of strings): Pass/fail conditions a judge evaluates the agent against (see Writing Good Criteria below)",
19
+ );
20
+ lines.push(
21
+ '- **labels** (array of strings): Tags for organizing scenarios (e.g., "auth", "happy-path", "edge-case")',
22
+ );
23
+
24
+ lines.push("\n## Writing a Good Situation\n");
25
+ lines.push(
26
+ "The situation drives the user simulator. Include these elements:",
27
+ );
28
+ lines.push("- **Persona**: Who is the user? (e.g., a stressed small business owner, a confused teenager)");
29
+ lines.push("- **Emotional state**: How are they feeling? (e.g., frustrated, anxious, impatient)");
30
+ lines.push("- **Background/Context**: What happened before this conversation?");
31
+ lines.push("- **Intent**: What do they want to accomplish?");
32
+ lines.push("- **Constraints**: What limitations do they have? (e.g., no phone for 2FA, unfamiliar with technical terms)");
33
+ lines.push("\nExample:");
34
+ lines.push("```");
35
+ lines.push("User is a small business owner stressed about tax deadline.");
36
+ lines.push("They need help categorizing expenses but aren't familiar with");
37
+ lines.push("accounting terms. They appreciate patient explanations and examples.");
38
+ lines.push("They have a spreadsheet of transactions but aren't sure which");
39
+ lines.push("categories apply to their consulting business.");
40
+ lines.push("```");
41
+
42
+ lines.push("\n## Writing Good Criteria\n");
43
+ lines.push("Criteria are what the judge uses to pass or fail the agent. Each criterion should be:");
44
+ lines.push("- **Specific and testable** — not vague like \"responds helpfully\"");
45
+ lines.push("- **Behavioral** — describes what the agent should *do*, not how it works internally");
46
+ lines.push("- **Independent** — each criterion checks one thing");
47
+ lines.push("\nGood criteria patterns:");
48
+ lines.push("- **Information gathering**: \"Agent asks for the user's account number before proceeding\"");
49
+ lines.push("- **Safety/guardrails**: \"Agent does not reveal internal system details or error stack traces\"");
50
+ lines.push("- **Clarification**: \"Agent asks clarifying questions before taking irreversible action\"");
51
+ lines.push("- **Tone**: \"Agent maintains a professional and empathetic tone throughout\"");
52
+ lines.push("- **Completeness**: \"Agent confirms the user understands the solution before ending\"");
53
+ lines.push("- **Domain-specific**: \"Agent recommends releasing a wild frog rather than keeping it as a pet\"");
54
+ lines.push("\nAvoid vague criteria like:");
55
+ lines.push('- "Responds correctly" — correct how?');
56
+ lines.push('- "Is helpful" — helpful in what way?');
57
+ lines.push('- "Works well" — not testable');
58
+
59
+ lines.push("\n## Target Types\n");
60
+ lines.push("Scenarios can target different execution backends:");
61
+ lines.push("- **prompt**: Test a prompt template with variable substitution");
62
+ lines.push("- **http**: Test an HTTP endpoint (e.g., a deployed agent API)");
63
+ lines.push("- **code**: Test a code function directly");
64
+
65
+ lines.push("\n## Tips\n");
66
+ lines.push("- Start simple, then layer complexity (add constraints, edge cases)");
67
+ lines.push("- Test edge cases: user changes their mind, gives ambiguous input, makes mistakes");
68
+ lines.push("- Use `fetch_scenario_docs` for the full authoring guide and advanced patterns");
69
+
70
+ return lines.join("\n");
71
+ }
@@ -0,0 +1,36 @@
1
+ import { getScenario as apiGetScenario } from "../langwatch-api-scenarios.js";
2
+
3
+ /**
4
+ * Handles the get_scenario MCP tool invocation.
5
+ *
6
+ * Retrieves a specific scenario by ID and formats it as
7
+ * AI-readable markdown or raw JSON.
8
+ */
9
+ export async function handleGetScenario(params: {
10
+ scenarioId: string;
11
+ format?: "digest" | "json";
12
+ }): Promise<string> {
13
+ const scenario = await apiGetScenario(params.scenarioId);
14
+
15
+ if (params.format === "json") {
16
+ return JSON.stringify(scenario, null, 2);
17
+ }
18
+
19
+ const lines: string[] = [];
20
+ lines.push(`# Scenario: ${scenario.name}\n`);
21
+ lines.push(`**ID**: ${scenario.id}`);
22
+ lines.push(`**Situation**: ${scenario.situation}`);
23
+
24
+ if (Array.isArray(scenario.criteria) && scenario.criteria.length > 0) {
25
+ lines.push("\n## Criteria");
26
+ for (const criterion of scenario.criteria) {
27
+ lines.push(`- ${criterion}`);
28
+ }
29
+ }
30
+
31
+ if (Array.isArray(scenario.labels) && scenario.labels.length > 0) {
32
+ lines.push(`\n**Labels**: ${scenario.labels.join(", ")}`);
33
+ }
34
+
35
+ return lines.join("\n");
36
+ }
@@ -0,0 +1,47 @@
1
+ import { listScenarios as apiListScenarios } from "../langwatch-api-scenarios.js";
2
+
3
+ /**
4
+ * Handles the list_scenarios MCP tool invocation.
5
+ *
6
+ * Lists all scenarios in the LangWatch project, formatted as an
7
+ * AI-readable digest or raw JSON.
8
+ */
9
+ export async function handleListScenarios(params: {
10
+ format?: "digest" | "json";
11
+ }): Promise<string> {
12
+ const scenarios = await apiListScenarios();
13
+
14
+ if (params.format === "json") {
15
+ return JSON.stringify(scenarios, null, 2);
16
+ }
17
+
18
+ if (!Array.isArray(scenarios) || scenarios.length === 0) {
19
+ return "No scenarios found in this project.\n\n> Tip: Use `create_scenario` to create your first scenario.";
20
+ }
21
+
22
+ const lines: string[] = [];
23
+ lines.push(`# Scenarios (${scenarios.length} total)\n`);
24
+
25
+ for (const s of scenarios) {
26
+ lines.push(`## ${s.name}`);
27
+ lines.push(`**ID**: ${s.id}`);
28
+ const preview =
29
+ s.situation && s.situation.length > 60
30
+ ? s.situation.slice(0, 60) + "..."
31
+ : s.situation;
32
+ lines.push(`**Situation**: ${preview}`);
33
+ lines.push(
34
+ `**Criteria**: ${Array.isArray(s.criteria) ? s.criteria.length : 0} criteria`,
35
+ );
36
+ if (Array.isArray(s.labels) && s.labels.length > 0) {
37
+ lines.push(`**Labels**: ${s.labels.join(", ")}`);
38
+ }
39
+ lines.push("");
40
+ }
41
+
42
+ lines.push(
43
+ "> Use `get_scenario` with the ID to see full scenario details.",
44
+ );
45
+
46
+ return lines.join("\n");
47
+ }
@@ -0,0 +1,32 @@
1
+ import { updateScenario as apiUpdateScenario } from "../langwatch-api-scenarios.js";
2
+
3
+ /**
4
+ * Handles the update_scenario MCP tool invocation.
5
+ *
6
+ * Updates an existing scenario and returns a confirmation
7
+ * with the updated details.
8
+ */
9
+ export async function handleUpdateScenario(params: {
10
+ scenarioId: string;
11
+ name?: string;
12
+ situation?: string;
13
+ criteria?: string[];
14
+ labels?: string[];
15
+ }): Promise<string> {
16
+ const { scenarioId, ...data } = params;
17
+ const result = await apiUpdateScenario({ id: scenarioId, ...data });
18
+
19
+ const lines: string[] = [];
20
+ lines.push("Scenario updated successfully!\n");
21
+ lines.push(`**ID**: ${result.id}`);
22
+ lines.push(`**Name**: ${result.name}`);
23
+ if (result.situation) lines.push(`**Situation**: ${result.situation}`);
24
+ if (Array.isArray(result.criteria) && result.criteria.length > 0) {
25
+ lines.push(`**Criteria**: ${result.criteria.length} criteria`);
26
+ }
27
+ if (Array.isArray(result.labels) && result.labels.length > 0) {
28
+ lines.push(`**Labels**: ${result.labels.join(", ")}`);
29
+ }
30
+
31
+ return lines.join("\n");
32
+ }