@langwatch/mcp-server 0.3.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/CHANGELOG.md +32 -0
  2. package/README.md +97 -25
  3. package/dist/archive-scenario-GAE4XVFM.js +19 -0
  4. package/dist/archive-scenario-GAE4XVFM.js.map +1 -0
  5. package/dist/chunk-AAQNA53E.js +28 -0
  6. package/dist/chunk-AAQNA53E.js.map +1 -0
  7. package/dist/chunk-JVWDWL3J.js +91 -0
  8. package/dist/chunk-JVWDWL3J.js.map +1 -0
  9. package/dist/chunk-K2YFPOSD.js +40 -0
  10. package/dist/chunk-K2YFPOSD.js.map +1 -0
  11. package/dist/chunk-ZXKLPC2E.js +27 -0
  12. package/dist/chunk-ZXKLPC2E.js.map +1 -0
  13. package/dist/config-FIQWQRUB.js +11 -0
  14. package/dist/config-FIQWQRUB.js.map +1 -0
  15. package/dist/create-prompt-P35POKBW.js +22 -0
  16. package/dist/create-prompt-P35POKBW.js.map +1 -0
  17. package/dist/create-scenario-3YRZVDYF.js +26 -0
  18. package/dist/create-scenario-3YRZVDYF.js.map +1 -0
  19. package/dist/discover-scenario-schema-MEEEVND7.js +65 -0
  20. package/dist/discover-scenario-schema-MEEEVND7.js.map +1 -0
  21. package/dist/discover-schema-3T52ORPB.js +446 -0
  22. package/dist/discover-schema-3T52ORPB.js.map +1 -0
  23. package/dist/get-analytics-BAVXTAPB.js +55 -0
  24. package/dist/get-analytics-BAVXTAPB.js.map +1 -0
  25. package/dist/get-prompt-LKCPT26O.js +48 -0
  26. package/dist/get-prompt-LKCPT26O.js.map +1 -0
  27. package/dist/get-scenario-3SCDW4Z6.js +33 -0
  28. package/dist/get-scenario-3SCDW4Z6.js.map +1 -0
  29. package/dist/get-trace-QFDWJ5D4.js +50 -0
  30. package/dist/get-trace-QFDWJ5D4.js.map +1 -0
  31. package/dist/index.js +22114 -8786
  32. package/dist/index.js.map +1 -1
  33. package/dist/list-prompts-UQPBCUYA.js +33 -0
  34. package/dist/list-prompts-UQPBCUYA.js.map +1 -0
  35. package/dist/list-scenarios-573YOUKC.js +40 -0
  36. package/dist/list-scenarios-573YOUKC.js.map +1 -0
  37. package/dist/search-traces-RSMYCAN7.js +72 -0
  38. package/dist/search-traces-RSMYCAN7.js.map +1 -0
  39. package/dist/update-prompt-G2Y5EBQY.js +31 -0
  40. package/dist/update-prompt-G2Y5EBQY.js.map +1 -0
  41. package/dist/update-scenario-SSGVOBJO.js +27 -0
  42. package/dist/update-scenario-SSGVOBJO.js.map +1 -0
  43. package/package.json +3 -3
  44. package/src/__tests__/config.unit.test.ts +89 -0
  45. package/src/__tests__/date-parsing.unit.test.ts +78 -0
  46. package/src/__tests__/discover-schema.unit.test.ts +118 -0
  47. package/src/__tests__/integration.integration.test.ts +313 -0
  48. package/src/__tests__/langwatch-api.unit.test.ts +309 -0
  49. package/src/__tests__/scenario-tools.integration.test.ts +286 -0
  50. package/src/__tests__/scenario-tools.unit.test.ts +185 -0
  51. package/src/__tests__/schemas.unit.test.ts +85 -0
  52. package/src/__tests__/tools.unit.test.ts +729 -0
  53. package/src/config.ts +31 -0
  54. package/src/index.ts +383 -0
  55. package/src/langwatch-api-scenarios.ts +67 -0
  56. package/src/langwatch-api.ts +266 -0
  57. package/src/schemas/analytics-groups.ts +78 -0
  58. package/src/schemas/analytics-metrics.ts +179 -0
  59. package/src/schemas/filter-fields.ts +119 -0
  60. package/src/schemas/index.ts +3 -0
  61. package/src/tools/archive-scenario.ts +19 -0
  62. package/src/tools/create-prompt.ts +29 -0
  63. package/src/tools/create-scenario.ts +30 -0
  64. package/src/tools/discover-scenario-schema.ts +71 -0
  65. package/src/tools/discover-schema.ts +106 -0
  66. package/src/tools/get-analytics.ts +71 -0
  67. package/src/tools/get-prompt.ts +56 -0
  68. package/src/tools/get-scenario.ts +36 -0
  69. package/src/tools/get-trace.ts +61 -0
  70. package/src/tools/list-prompts.ts +35 -0
  71. package/src/tools/list-scenarios.ts +47 -0
  72. package/src/tools/search-traces.ts +91 -0
  73. package/src/tools/update-prompt.ts +44 -0
  74. package/src/tools/update-scenario.ts +32 -0
  75. package/src/utils/date-parsing.ts +31 -0
  76. package/tests/evaluations.ipynb +634 -634
  77. package/tests/scenario-openai.test.ts +3 -1
  78. package/uv.lock +1788 -1322
package/src/config.ts ADDED
@@ -0,0 +1,31 @@
1
+ export interface McpConfig {
2
+ apiKey: string | undefined;
3
+ endpoint: string;
4
+ }
5
+
6
+ let config: McpConfig | undefined;
7
+
8
+ export function initConfig(args: { apiKey?: string; endpoint?: string }): void {
9
+ config = {
10
+ apiKey: args.apiKey || process.env.LANGWATCH_API_KEY,
11
+ endpoint:
12
+ args.endpoint ||
13
+ process.env.LANGWATCH_ENDPOINT ||
14
+ "https://app.langwatch.ai",
15
+ };
16
+ }
17
+
18
+ export function getConfig(): McpConfig {
19
+ if (!config) throw new Error("Config not initialized");
20
+ return config;
21
+ }
22
+
23
+ export function requireApiKey(): string {
24
+ const { apiKey } = getConfig();
25
+ if (!apiKey) {
26
+ throw new Error(
27
+ "LANGWATCH_API_KEY is required. Set it via --apiKey flag or LANGWATCH_API_KEY environment variable."
28
+ );
29
+ }
30
+ return apiKey;
31
+ }
package/src/index.ts CHANGED
@@ -1,8 +1,28 @@
1
1
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
2
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
3
+ import yargs from "yargs";
4
+ import { hideBin } from "yargs/helpers";
3
5
  import { z } from "zod";
4
6
 
5
7
  import packageJson from "../package.json" assert { type: "json" };
8
+ import { initConfig } from "./config.js";
9
+
10
+ const argv = await yargs(hideBin(process.argv))
11
+ .option("apiKey", {
12
+ type: "string",
13
+ description: "LangWatch API key",
14
+ })
15
+ .option("endpoint", {
16
+ type: "string",
17
+ description: "LangWatch API endpoint",
18
+ })
19
+ .help()
20
+ .parseAsync();
21
+
22
+ initConfig({
23
+ apiKey: argv.apiKey,
24
+ endpoint: argv.endpoint,
25
+ });
6
26
 
7
27
  const transport = new StdioServerTransport();
8
28
  const server = new McpServer({
@@ -70,4 +90,367 @@ server.tool(
70
90
  }
71
91
  );
72
92
 
93
+ // --- Observability Tools (require API key) ---
94
+
95
+ server.tool(
96
+ "discover_schema",
97
+ "Discover available filter fields, metrics, aggregation types, group-by options, and scenario schema for LangWatch queries. Call this before using search_traces, get_analytics, or scenario tools to understand available options.",
98
+ {
99
+ category: z
100
+ .enum(["filters", "metrics", "aggregations", "groups", "scenarios", "all"])
101
+ .describe("Which schema category to discover"),
102
+ },
103
+ async ({ category }) => {
104
+ if (category === "scenarios") {
105
+ const { formatScenarioSchema } = await import("./tools/discover-scenario-schema.js");
106
+ return { content: [{ type: "text", text: formatScenarioSchema() }] };
107
+ }
108
+ const { formatSchema } = await import("./tools/discover-schema.js");
109
+ let text = formatSchema(category);
110
+ if (category === "all") {
111
+ const { formatScenarioSchema } = await import("./tools/discover-scenario-schema.js");
112
+ text += "\n\n" + formatScenarioSchema();
113
+ }
114
+ return { content: [{ type: "text", text }] };
115
+ }
116
+ );
117
+
118
+ server.tool(
119
+ "search_traces",
120
+ "Search LangWatch traces with filters, text query, and date range. Returns AI-readable trace digests by default. Use format: 'json' for full raw data.",
121
+ {
122
+ query: z.string().optional().describe("Text search query"),
123
+ filters: z
124
+ .record(z.string(), z.array(z.string()))
125
+ .optional()
126
+ .describe(
127
+ 'Filter traces. Format: {"field": ["value"]}. Use discover_schema for field names.'
128
+ ),
129
+ startDate: z
130
+ .string()
131
+ .optional()
132
+ .describe(
133
+ 'Start date: ISO string or relative like "24h", "7d", "30d". Default: 24h ago'
134
+ ),
135
+ endDate: z
136
+ .string()
137
+ .optional()
138
+ .describe("End date: ISO string or relative. Default: now"),
139
+ pageSize: z
140
+ .number()
141
+ .optional()
142
+ .describe("Results per page (default: 25, max: 1000)"),
143
+ scrollId: z
144
+ .string()
145
+ .optional()
146
+ .describe("Pagination token from previous search"),
147
+ format: z
148
+ .enum(["digest", "json"])
149
+ .optional()
150
+ .describe(
151
+ "Output format: 'digest' (default, AI-readable) or 'json' (full raw data)"
152
+ ),
153
+ },
154
+ async (params) => {
155
+ const { requireApiKey } = await import("./config.js");
156
+ requireApiKey();
157
+ const { handleSearchTraces } = await import("./tools/search-traces.js");
158
+ return {
159
+ content: [{ type: "text", text: await handleSearchTraces(params) }],
160
+ };
161
+ }
162
+ );
163
+
164
+ server.tool(
165
+ "get_trace",
166
+ "Get full details of a single trace by ID. Returns AI-readable trace digest by default. Use format: 'json' for full raw data including all spans.",
167
+ {
168
+ traceId: z.string().describe("The trace ID to retrieve"),
169
+ format: z
170
+ .enum(["digest", "json"])
171
+ .optional()
172
+ .describe(
173
+ "Output format: 'digest' (default, AI-readable) or 'json' (full raw data)"
174
+ ),
175
+ },
176
+ async (params) => {
177
+ const { requireApiKey } = await import("./config.js");
178
+ requireApiKey();
179
+ const { handleGetTrace } = await import("./tools/get-trace.js");
180
+ return {
181
+ content: [{ type: "text", text: await handleGetTrace(params) }],
182
+ };
183
+ }
184
+ );
185
+
186
+ server.tool(
187
+ "get_analytics",
188
+ 'Query analytics timeseries from LangWatch. Metrics use "category.name" format (e.g., "performance.completion_time"). Use discover_schema to see available metrics.',
189
+ {
190
+ metric: z
191
+ .string()
192
+ .describe(
193
+ 'Metric in "category.name" format, e.g., "metadata.trace_id", "performance.total_cost"'
194
+ ),
195
+ aggregation: z
196
+ .string()
197
+ .optional()
198
+ .describe(
199
+ "Aggregation type: avg, sum, min, max, median, p90, p95, p99, cardinality, terms. Default: avg"
200
+ ),
201
+ startDate: z
202
+ .string()
203
+ .optional()
204
+ .describe('Start date: ISO or relative ("7d", "30d"). Default: 7 days ago'),
205
+ endDate: z.string().optional().describe("End date. Default: now"),
206
+ timeZone: z.string().optional().describe("Timezone. Default: UTC"),
207
+ groupBy: z
208
+ .string()
209
+ .optional()
210
+ .describe(
211
+ "Group results by field. Use discover_schema for options."
212
+ ),
213
+ filters: z
214
+ .record(z.string(), z.array(z.string()))
215
+ .optional()
216
+ .describe("Filters to apply"),
217
+ },
218
+ async (params) => {
219
+ const { requireApiKey } = await import("./config.js");
220
+ requireApiKey();
221
+ const { handleGetAnalytics } = await import("./tools/get-analytics.js");
222
+ return {
223
+ content: [{ type: "text", text: await handleGetAnalytics(params) }],
224
+ };
225
+ }
226
+ );
227
+
228
+ server.tool(
229
+ "list_prompts",
230
+ "List all prompts configured in the LangWatch project.",
231
+ {},
232
+ async () => {
233
+ const { requireApiKey } = await import("./config.js");
234
+ requireApiKey();
235
+ const { handleListPrompts } = await import("./tools/list-prompts.js");
236
+ return {
237
+ content: [{ type: "text", text: await handleListPrompts() }],
238
+ };
239
+ }
240
+ );
241
+
242
+ server.tool(
243
+ "get_prompt",
244
+ "Get a specific prompt by ID or handle, including messages, model config, and version history.",
245
+ {
246
+ idOrHandle: z.string().describe("Prompt ID or handle"),
247
+ version: z
248
+ .number()
249
+ .optional()
250
+ .describe("Specific version number (default: latest)"),
251
+ },
252
+ async (params) => {
253
+ const { requireApiKey } = await import("./config.js");
254
+ requireApiKey();
255
+ const { handleGetPrompt } = await import("./tools/get-prompt.js");
256
+ return {
257
+ content: [{ type: "text", text: await handleGetPrompt(params) }],
258
+ };
259
+ }
260
+ );
261
+
262
+ server.tool(
263
+ "create_prompt",
264
+ "Create a new prompt in the LangWatch project.",
265
+ {
266
+ name: z.string().describe("Prompt name"),
267
+ handle: z
268
+ .string()
269
+ .optional()
270
+ .describe("URL-friendly handle (auto-generated if omitted)"),
271
+ messages: z
272
+ .array(
273
+ z.object({
274
+ role: z
275
+ .enum(["system", "user", "assistant"])
276
+ .describe("Message role"),
277
+ content: z.string().describe("Message content"),
278
+ })
279
+ )
280
+ .describe("Prompt messages"),
281
+ model: z
282
+ .string()
283
+ .describe('Model name, e.g., "gpt-4o", "claude-sonnet-4-5-20250929"'),
284
+ modelProvider: z
285
+ .string()
286
+ .describe('Provider name, e.g., "openai", "anthropic"'),
287
+ description: z.string().optional().describe("Prompt description"),
288
+ },
289
+ async (params) => {
290
+ const { requireApiKey } = await import("./config.js");
291
+ requireApiKey();
292
+ const { handleCreatePrompt } = await import("./tools/create-prompt.js");
293
+ return {
294
+ content: [{ type: "text", text: await handleCreatePrompt(params) }],
295
+ };
296
+ }
297
+ );
298
+
299
+ server.tool(
300
+ "update_prompt",
301
+ "Update an existing prompt or create a new version.",
302
+ {
303
+ idOrHandle: z.string().describe("Prompt ID or handle to update"),
304
+ messages: z
305
+ .array(
306
+ z.object({
307
+ role: z.enum(["system", "user", "assistant"]),
308
+ content: z.string(),
309
+ })
310
+ )
311
+ .optional()
312
+ .describe("Updated messages"),
313
+ model: z.string().optional().describe("Updated model name"),
314
+ modelProvider: z.string().optional().describe("Updated provider"),
315
+ commitMessage: z
316
+ .string()
317
+ .optional()
318
+ .describe("Commit message for the change"),
319
+ createVersion: z
320
+ .boolean()
321
+ .optional()
322
+ .describe(
323
+ "If true, creates a new version instead of updating in place"
324
+ ),
325
+ },
326
+ async (params) => {
327
+ const { requireApiKey } = await import("./config.js");
328
+ requireApiKey();
329
+ const { handleUpdatePrompt } = await import("./tools/update-prompt.js");
330
+ return {
331
+ content: [{ type: "text", text: await handleUpdatePrompt(params) }],
332
+ };
333
+ }
334
+ );
335
+
336
+ // --- Scenario Tools (require API key) ---
337
+
338
+ server.tool(
339
+ "list_scenarios",
340
+ "List all scenarios in the LangWatch project. Returns AI-readable digest by default.",
341
+ {
342
+ format: z
343
+ .enum(["digest", "json"])
344
+ .optional()
345
+ .describe(
346
+ "Output format: 'digest' (default, AI-readable) or 'json' (full raw data)"
347
+ ),
348
+ },
349
+ async (params) => {
350
+ const { requireApiKey } = await import("./config.js");
351
+ requireApiKey();
352
+ const { handleListScenarios } = await import("./tools/list-scenarios.js");
353
+ return {
354
+ content: [{ type: "text", text: await handleListScenarios(params) }],
355
+ };
356
+ }
357
+ );
358
+
359
+ server.tool(
360
+ "get_scenario",
361
+ "Get full details of a scenario by ID, including situation, criteria, and labels.",
362
+ {
363
+ scenarioId: z.string().describe("The scenario ID to retrieve"),
364
+ format: z
365
+ .enum(["digest", "json"])
366
+ .optional()
367
+ .describe(
368
+ "Output format: 'digest' (default, AI-readable) or 'json' (full raw data)"
369
+ ),
370
+ },
371
+ async (params) => {
372
+ const { requireApiKey } = await import("./config.js");
373
+ requireApiKey();
374
+ const { handleGetScenario } = await import("./tools/get-scenario.js");
375
+ return {
376
+ content: [{ type: "text", text: await handleGetScenario(params) }],
377
+ };
378
+ }
379
+ );
380
+
381
+ server.tool(
382
+ "create_scenario",
383
+ "Create a new scenario in the LangWatch project. Call discover_schema({ category: 'scenarios' }) first to learn how to write effective situations and criteria.",
384
+ {
385
+ name: z.string().describe("Scenario name"),
386
+ situation: z
387
+ .string()
388
+ .describe("The context or setup describing what the user/agent is doing"),
389
+ criteria: z
390
+ .array(z.string())
391
+ .optional()
392
+ .describe("Pass/fail conditions the agent's response must satisfy"),
393
+ labels: z
394
+ .array(z.string())
395
+ .optional()
396
+ .describe("Tags for organizing and filtering scenarios"),
397
+ },
398
+ async (params) => {
399
+ const { requireApiKey } = await import("./config.js");
400
+ requireApiKey();
401
+ const { handleCreateScenario } = await import(
402
+ "./tools/create-scenario.js"
403
+ );
404
+ return {
405
+ content: [{ type: "text", text: await handleCreateScenario(params) }],
406
+ };
407
+ }
408
+ );
409
+
410
+ server.tool(
411
+ "update_scenario",
412
+ "Update an existing scenario.",
413
+ {
414
+ scenarioId: z.string().describe("The scenario ID to update"),
415
+ name: z.string().optional().describe("Updated scenario name"),
416
+ situation: z.string().optional().describe("Updated situation"),
417
+ criteria: z
418
+ .array(z.string())
419
+ .optional()
420
+ .describe("Updated criteria"),
421
+ labels: z
422
+ .array(z.string())
423
+ .optional()
424
+ .describe("Updated labels"),
425
+ },
426
+ async (params) => {
427
+ const { requireApiKey } = await import("./config.js");
428
+ requireApiKey();
429
+ const { handleUpdateScenario } = await import(
430
+ "./tools/update-scenario.js"
431
+ );
432
+ return {
433
+ content: [{ type: "text", text: await handleUpdateScenario(params) }],
434
+ };
435
+ }
436
+ );
437
+
438
+ server.tool(
439
+ "archive_scenario",
440
+ "Archive (soft-delete) a scenario.",
441
+ {
442
+ scenarioId: z.string().describe("The scenario ID to archive"),
443
+ },
444
+ async (params) => {
445
+ const { requireApiKey } = await import("./config.js");
446
+ requireApiKey();
447
+ const { handleArchiveScenario } = await import(
448
+ "./tools/archive-scenario.js"
449
+ );
450
+ return {
451
+ content: [{ type: "text", text: await handleArchiveScenario(params) }],
452
+ };
453
+ }
454
+ );
455
+
73
456
  await server.connect(transport);
@@ -0,0 +1,67 @@
1
+ import { makeRequest } from "./langwatch-api.js";
2
+
3
+ // --- Scenario types ---
4
+
5
+ export interface ScenarioSummary {
6
+ id: string;
7
+ name: string;
8
+ situation: string;
9
+ criteria: string[];
10
+ labels: string[];
11
+ }
12
+
13
+ export interface ScenarioArchiveResponse {
14
+ id: string;
15
+ archived: boolean;
16
+ }
17
+
18
+ // --- Scenario API functions ---
19
+
20
+ /** Lists all scenarios in the project. */
21
+ export async function listScenarios(): Promise<ScenarioSummary[]> {
22
+ return makeRequest("GET", "/api/scenarios") as Promise<ScenarioSummary[]>;
23
+ }
24
+
25
+ /** Retrieves a single scenario by ID. */
26
+ export async function getScenario(id: string): Promise<ScenarioSummary> {
27
+ return makeRequest(
28
+ "GET",
29
+ `/api/scenarios/${encodeURIComponent(id)}`
30
+ ) as Promise<ScenarioSummary>;
31
+ }
32
+
33
+ /** Creates a new scenario. */
34
+ export async function createScenario(data: {
35
+ name: string;
36
+ situation: string;
37
+ criteria?: string[];
38
+ labels?: string[];
39
+ }): Promise<ScenarioSummary> {
40
+ return makeRequest("POST", "/api/scenarios", data) as Promise<ScenarioSummary>;
41
+ }
42
+
43
+ /** Updates an existing scenario. */
44
+ export async function updateScenario(params: {
45
+ id: string;
46
+ name?: string;
47
+ situation?: string;
48
+ criteria?: string[];
49
+ labels?: string[];
50
+ }): Promise<ScenarioSummary> {
51
+ const { id, ...data } = params;
52
+ return makeRequest(
53
+ "PUT",
54
+ `/api/scenarios/${encodeURIComponent(id)}`,
55
+ data
56
+ ) as Promise<ScenarioSummary>;
57
+ }
58
+
59
+ /** Archives (soft-deletes) a scenario. */
60
+ export async function archiveScenario(
61
+ id: string
62
+ ): Promise<ScenarioArchiveResponse> {
63
+ return makeRequest(
64
+ "DELETE",
65
+ `/api/scenarios/${encodeURIComponent(id)}`
66
+ ) as Promise<ScenarioArchiveResponse>;
67
+ }