@langwatch/mcp-server 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/dist/{archive-scenario-GAE4XVFM.js → archive-scenario-YFD5THOR.js} +3 -3
- package/dist/archive-scenario-YFD5THOR.js.map +1 -0
- package/dist/chunk-5UOPNRXW.js +37 -0
- package/dist/chunk-5UOPNRXW.js.map +1 -0
- package/dist/{chunk-K2YFPOSD.js → chunk-6U4TCGFC.js} +2 -2
- package/dist/chunk-IX6QJKAD.js +22 -0
- package/dist/chunk-IX6QJKAD.js.map +1 -0
- package/dist/{chunk-JVWDWL3J.js → chunk-LLRQIF52.js} +3 -11
- package/dist/chunk-LLRQIF52.js.map +1 -0
- package/dist/create-evaluator-E5X5ZP3B.js +27 -0
- package/dist/create-evaluator-E5X5ZP3B.js.map +1 -0
- package/dist/create-prompt-7Z35MIL6.js +36 -0
- package/dist/create-prompt-7Z35MIL6.js.map +1 -0
- package/dist/{create-scenario-3YRZVDYF.js → create-scenario-DIMPJRPY.js} +3 -3
- package/dist/create-scenario-DIMPJRPY.js.map +1 -0
- package/dist/discover-evaluator-schema-H23XCLNE.js +1402 -0
- package/dist/discover-evaluator-schema-H23XCLNE.js.map +1 -0
- package/dist/{get-analytics-BAVXTAPB.js → get-analytics-4YJW4S5L.js} +2 -2
- package/dist/get-evaluator-WDEH2F7M.js +47 -0
- package/dist/get-evaluator-WDEH2F7M.js.map +1 -0
- package/dist/{get-prompt-LKCPT26O.js → get-prompt-F6PDVC76.js} +2 -5
- package/dist/get-prompt-F6PDVC76.js.map +1 -0
- package/dist/{get-scenario-3SCDW4Z6.js → get-scenario-H24ZYNT5.js} +3 -3
- package/dist/{get-trace-QFDWJ5D4.js → get-trace-27USKGO7.js} +2 -2
- package/dist/index.js +13310 -2410
- package/dist/index.js.map +1 -1
- package/dist/list-evaluators-KRGI72EH.js +34 -0
- package/dist/list-evaluators-KRGI72EH.js.map +1 -0
- package/dist/list-model-providers-A5YCFTPI.js +35 -0
- package/dist/list-model-providers-A5YCFTPI.js.map +1 -0
- package/dist/{list-prompts-UQPBCUYA.js → list-prompts-LKJSE7XN.js} +6 -7
- package/dist/list-prompts-LKJSE7XN.js.map +1 -0
- package/dist/{list-scenarios-573YOUKC.js → list-scenarios-ZK5CMGC4.js} +5 -5
- package/dist/list-scenarios-ZK5CMGC4.js.map +1 -0
- package/dist/{search-traces-RSMYCAN7.js → search-traces-SOKAAMAR.js} +2 -2
- package/dist/set-model-provider-7MGULZDH.js +33 -0
- package/dist/set-model-provider-7MGULZDH.js.map +1 -0
- package/dist/update-evaluator-A3XINFLJ.js +24 -0
- package/dist/update-evaluator-A3XINFLJ.js.map +1 -0
- package/dist/update-prompt-IW7X2UQM.js +22 -0
- package/dist/update-prompt-IW7X2UQM.js.map +1 -0
- package/dist/{update-scenario-SSGVOBJO.js → update-scenario-ZT7TOBFR.js} +3 -3
- package/dist/update-scenario-ZT7TOBFR.js.map +1 -0
- package/package.json +10 -10
- package/src/__tests__/all-tools.integration.test.ts +1337 -0
- package/src/__tests__/discover-evaluator-schema.unit.test.ts +89 -0
- package/src/__tests__/evaluator-tools.unit.test.ts +262 -0
- package/src/__tests__/integration.integration.test.ts +9 -34
- package/src/__tests__/langwatch-api.unit.test.ts +4 -32
- package/src/__tests__/model-provider-tools.unit.test.ts +190 -0
- package/src/__tests__/scenario-tools.integration.test.ts +5 -5
- package/src/__tests__/scenario-tools.unit.test.ts +2 -2
- package/src/__tests__/tools.unit.test.ts +59 -65
- package/src/index.ts +249 -88
- package/src/langwatch-api-evaluators.ts +70 -0
- package/src/langwatch-api-model-providers.ts +41 -0
- package/src/langwatch-api.ts +3 -28
- package/src/tools/archive-scenario.ts +1 -1
- package/src/tools/create-evaluator.ts +33 -0
- package/src/tools/create-prompt.ts +30 -5
- package/src/tools/create-scenario.ts +1 -1
- package/src/tools/discover-evaluator-schema.ts +143 -0
- package/src/tools/get-evaluator.ts +53 -0
- package/src/tools/get-prompt.ts +1 -4
- package/src/tools/list-evaluators.ts +37 -0
- package/src/tools/list-model-providers.ts +40 -0
- package/src/tools/list-prompts.ts +5 -6
- package/src/tools/list-scenarios.ts +3 -3
- package/src/tools/set-model-provider.ts +46 -0
- package/src/tools/update-evaluator.ts +30 -0
- package/src/tools/update-prompt.ts +9 -25
- package/src/tools/update-scenario.ts +1 -1
- package/dist/archive-scenario-GAE4XVFM.js.map +0 -1
- package/dist/chunk-JVWDWL3J.js.map +0 -1
- package/dist/create-prompt-P35POKBW.js +0 -22
- package/dist/create-prompt-P35POKBW.js.map +0 -1
- package/dist/create-scenario-3YRZVDYF.js.map +0 -1
- package/dist/get-prompt-LKCPT26O.js.map +0 -1
- package/dist/list-prompts-UQPBCUYA.js.map +0 -1
- package/dist/list-scenarios-573YOUKC.js.map +0 -1
- package/dist/update-prompt-G2Y5EBQY.js +0 -31
- package/dist/update-prompt-G2Y5EBQY.js.map +0 -1
- package/dist/update-scenario-SSGVOBJO.js.map +0 -1
- /package/dist/{chunk-K2YFPOSD.js.map → chunk-6U4TCGFC.js.map} +0 -0
- /package/dist/{get-analytics-BAVXTAPB.js.map → get-analytics-4YJW4S5L.js.map} +0 -0
- /package/dist/{get-scenario-3SCDW4Z6.js.map → get-scenario-H24ZYNT5.js.map} +0 -0
- /package/dist/{get-trace-QFDWJ5D4.js.map → get-trace-27USKGO7.js.map} +0 -0
- /package/dist/{search-traces-RSMYCAN7.js.map → search-traces-SOKAAMAR.js.map} +0 -0
package/src/index.ts
CHANGED
|
@@ -94,22 +94,32 @@ server.tool(
|
|
|
94
94
|
|
|
95
95
|
server.tool(
|
|
96
96
|
"discover_schema",
|
|
97
|
-
"Discover available filter fields, metrics, aggregation types, group-by options,
|
|
97
|
+
"Discover available filter fields, metrics, aggregation types, group-by options, scenario schema, and evaluator types for LangWatch queries. Call this before using search_traces, get_analytics, scenario tools, or evaluator tools to understand available options.",
|
|
98
98
|
{
|
|
99
99
|
category: z
|
|
100
|
-
.enum(["filters", "metrics", "aggregations", "groups", "scenarios", "all"])
|
|
100
|
+
.enum(["filters", "metrics", "aggregations", "groups", "scenarios", "evaluators", "all"])
|
|
101
101
|
.describe("Which schema category to discover"),
|
|
102
|
+
evaluatorType: z
|
|
103
|
+
.string()
|
|
104
|
+
.optional()
|
|
105
|
+
.describe("When category is 'evaluators', provide a specific evaluator type (e.g. 'langevals/llm_judge') to get its full schema details"),
|
|
102
106
|
},
|
|
103
|
-
async ({ category }) => {
|
|
107
|
+
async ({ category, evaluatorType }) => {
|
|
104
108
|
if (category === "scenarios") {
|
|
105
109
|
const { formatScenarioSchema } = await import("./tools/discover-scenario-schema.js");
|
|
106
110
|
return { content: [{ type: "text", text: formatScenarioSchema() }] };
|
|
107
111
|
}
|
|
112
|
+
if (category === "evaluators") {
|
|
113
|
+
const { formatEvaluatorSchema } = await import("./tools/discover-evaluator-schema.js");
|
|
114
|
+
return { content: [{ type: "text", text: formatEvaluatorSchema(evaluatorType) }] };
|
|
115
|
+
}
|
|
108
116
|
const { formatSchema } = await import("./tools/discover-schema.js");
|
|
109
117
|
let text = formatSchema(category);
|
|
110
118
|
if (category === "all") {
|
|
111
119
|
const { formatScenarioSchema } = await import("./tools/discover-scenario-schema.js");
|
|
112
120
|
text += "\n\n" + formatScenarioSchema();
|
|
121
|
+
const { formatEvaluatorSchema } = await import("./tools/discover-evaluator-schema.js");
|
|
122
|
+
text += "\n\n" + formatEvaluatorSchema();
|
|
113
123
|
}
|
|
114
124
|
return { content: [{ type: "text", text }] };
|
|
115
125
|
}
|
|
@@ -225,9 +235,59 @@ server.tool(
|
|
|
225
235
|
}
|
|
226
236
|
);
|
|
227
237
|
|
|
238
|
+
// --- Platform Prompt Tools (require API key) ---
|
|
239
|
+
// These tools manage prompts on the LangWatch platform via API.
|
|
240
|
+
// For code-based prompt management, see `fetch_langwatch_docs` for the CLI/SDK approach.
|
|
241
|
+
|
|
242
|
+
const modelSchema = z
|
|
243
|
+
.string()
|
|
244
|
+
.describe(
|
|
245
|
+
'Model in "provider/model-name" format, e.g., "openai/gpt-4o", "anthropic/claude-sonnet-4-5-20250929"'
|
|
246
|
+
);
|
|
247
|
+
|
|
248
|
+
server.tool(
|
|
249
|
+
"platform_create_prompt",
|
|
250
|
+
`Create a new prompt on the LangWatch platform.
|
|
251
|
+
|
|
252
|
+
NOTE: Prompts can be managed two ways. Determine which approach the user needs:
|
|
253
|
+
|
|
254
|
+
1. Code-based (CLI/SDK): If the user wants to manage prompts in their codebase, use \`fetch_langwatch_docs\` to learn about the prompt management CLI/SDK. This lets them version-control prompts and pull them into code.
|
|
255
|
+
|
|
256
|
+
2. Platform-based (LangWatch UI): If the user wants to manage prompts directly on the LangWatch platform, use the \`platform_\` MCP tools (\`platform_create_prompt\`, \`platform_update_prompt\`, etc.).
|
|
257
|
+
`,
|
|
258
|
+
{
|
|
259
|
+
name: z.string().describe("Prompt display name"),
|
|
260
|
+
handle: z
|
|
261
|
+
.string()
|
|
262
|
+
.optional()
|
|
263
|
+
.describe(
|
|
264
|
+
"URL-friendly handle (auto-generated from name if omitted)"
|
|
265
|
+
),
|
|
266
|
+
messages: z
|
|
267
|
+
.array(
|
|
268
|
+
z.object({
|
|
269
|
+
role: z
|
|
270
|
+
.enum(["system", "user", "assistant"])
|
|
271
|
+
.describe("Message role"),
|
|
272
|
+
content: z.string().describe("Message content"),
|
|
273
|
+
})
|
|
274
|
+
)
|
|
275
|
+
.describe("Prompt messages"),
|
|
276
|
+
model: modelSchema,
|
|
277
|
+
},
|
|
278
|
+
async (params) => {
|
|
279
|
+
const { requireApiKey } = await import("./config.js");
|
|
280
|
+
requireApiKey();
|
|
281
|
+
const { handleCreatePrompt } = await import("./tools/create-prompt.js");
|
|
282
|
+
return {
|
|
283
|
+
content: [{ type: "text", text: await handleCreatePrompt(params) }],
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
);
|
|
287
|
+
|
|
228
288
|
server.tool(
|
|
229
|
-
"
|
|
230
|
-
"List all prompts configured
|
|
289
|
+
"platform_list_prompts",
|
|
290
|
+
"List all prompts configured on the LangWatch platform.",
|
|
231
291
|
{},
|
|
232
292
|
async () => {
|
|
233
293
|
const { requireApiKey } = await import("./config.js");
|
|
@@ -240,8 +300,8 @@ server.tool(
|
|
|
240
300
|
);
|
|
241
301
|
|
|
242
302
|
server.tool(
|
|
243
|
-
"
|
|
244
|
-
"Get a specific prompt by ID or handle, including messages, model config, and version history.",
|
|
303
|
+
"platform_get_prompt",
|
|
304
|
+
"Get a specific prompt from the LangWatch platform by ID or handle, including messages, model config, and version history.",
|
|
245
305
|
{
|
|
246
306
|
idOrHandle: z.string().describe("Prompt ID or handle"),
|
|
247
307
|
version: z
|
|
@@ -260,84 +320,77 @@ server.tool(
|
|
|
260
320
|
);
|
|
261
321
|
|
|
262
322
|
server.tool(
|
|
263
|
-
"
|
|
264
|
-
"
|
|
323
|
+
"platform_update_prompt",
|
|
324
|
+
"Update an existing prompt on the LangWatch platform. Every update creates a new version.",
|
|
265
325
|
{
|
|
266
|
-
|
|
267
|
-
handle: z
|
|
268
|
-
.string()
|
|
269
|
-
.optional()
|
|
270
|
-
.describe("URL-friendly handle (auto-generated if omitted)"),
|
|
326
|
+
idOrHandle: z.string().describe("Prompt ID or handle to update"),
|
|
271
327
|
messages: z
|
|
272
328
|
.array(
|
|
273
329
|
z.object({
|
|
274
|
-
role: z
|
|
275
|
-
|
|
276
|
-
.describe("Message role"),
|
|
277
|
-
content: z.string().describe("Message content"),
|
|
330
|
+
role: z.enum(["system", "user", "assistant"]),
|
|
331
|
+
content: z.string(),
|
|
278
332
|
})
|
|
279
333
|
)
|
|
280
|
-
.
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
modelProvider: z
|
|
334
|
+
.optional()
|
|
335
|
+
.describe("Updated messages"),
|
|
336
|
+
model: modelSchema.optional(),
|
|
337
|
+
commitMessage: z
|
|
285
338
|
.string()
|
|
286
|
-
.describe(
|
|
287
|
-
description: z.string().optional().describe("Prompt description"),
|
|
339
|
+
.describe("Commit message describing the change"),
|
|
288
340
|
},
|
|
289
341
|
async (params) => {
|
|
290
342
|
const { requireApiKey } = await import("./config.js");
|
|
291
343
|
requireApiKey();
|
|
292
|
-
const {
|
|
344
|
+
const { handleUpdatePrompt } = await import("./tools/update-prompt.js");
|
|
293
345
|
return {
|
|
294
|
-
content: [{ type: "text", text: await
|
|
346
|
+
content: [{ type: "text", text: await handleUpdatePrompt(params) }],
|
|
295
347
|
};
|
|
296
348
|
}
|
|
297
349
|
);
|
|
298
350
|
|
|
351
|
+
// --- Platform Scenario Tools (require API key) ---
|
|
352
|
+
// These tools manage scenarios on the LangWatch platform via API.
|
|
353
|
+
// For code-based scenario testing, see `fetch_scenario_docs` for the SDK approach.
|
|
354
|
+
|
|
299
355
|
server.tool(
|
|
300
|
-
"
|
|
301
|
-
|
|
356
|
+
"platform_create_scenario",
|
|
357
|
+
`Create a new scenario on the LangWatch platform. Call discover_schema({ category: 'scenarios' }) first to learn how to write effective situations and criteria.
|
|
358
|
+
|
|
359
|
+
NOTE: Scenarios can be created two ways. Determine which approach the user needs:
|
|
360
|
+
|
|
361
|
+
1. Code-based (local testing): If the user has a codebase with an AI agent they want to test, use \`fetch_scenario_docs\` to learn about the Scenario Python/TypeScript SDK. This lets them run tests locally and iterate in code.
|
|
362
|
+
|
|
363
|
+
2. Platform-based (LangWatch UI): If the user wants to manage scenarios directly on the LangWatch platform, use the \`platform_\` MCP tools (\`platform_create_scenario\`, \`platform_update_scenario\`, etc.).
|
|
364
|
+
`,
|
|
302
365
|
{
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
.array(
|
|
306
|
-
z.object({
|
|
307
|
-
role: z.enum(["system", "user", "assistant"]),
|
|
308
|
-
content: z.string(),
|
|
309
|
-
})
|
|
310
|
-
)
|
|
311
|
-
.optional()
|
|
312
|
-
.describe("Updated messages"),
|
|
313
|
-
model: z.string().optional().describe("Updated model name"),
|
|
314
|
-
modelProvider: z.string().optional().describe("Updated provider"),
|
|
315
|
-
commitMessage: z
|
|
366
|
+
name: z.string().describe("Scenario name"),
|
|
367
|
+
situation: z
|
|
316
368
|
.string()
|
|
369
|
+
.describe("The context or setup describing what the user/agent is doing"),
|
|
370
|
+
criteria: z
|
|
371
|
+
.array(z.string())
|
|
317
372
|
.optional()
|
|
318
|
-
.describe("
|
|
319
|
-
|
|
320
|
-
.
|
|
373
|
+
.describe("Pass/fail conditions the agent's response must satisfy"),
|
|
374
|
+
labels: z
|
|
375
|
+
.array(z.string())
|
|
321
376
|
.optional()
|
|
322
|
-
.describe(
|
|
323
|
-
"If true, creates a new version instead of updating in place"
|
|
324
|
-
),
|
|
377
|
+
.describe("Tags for organizing and filtering scenarios"),
|
|
325
378
|
},
|
|
326
379
|
async (params) => {
|
|
327
380
|
const { requireApiKey } = await import("./config.js");
|
|
328
381
|
requireApiKey();
|
|
329
|
-
const {
|
|
382
|
+
const { handleCreateScenario } = await import(
|
|
383
|
+
"./tools/create-scenario.js"
|
|
384
|
+
);
|
|
330
385
|
return {
|
|
331
|
-
content: [{ type: "text", text: await
|
|
386
|
+
content: [{ type: "text", text: await handleCreateScenario(params) }],
|
|
332
387
|
};
|
|
333
388
|
}
|
|
334
389
|
);
|
|
335
390
|
|
|
336
|
-
// --- Scenario Tools (require API key) ---
|
|
337
|
-
|
|
338
391
|
server.tool(
|
|
339
|
-
"
|
|
340
|
-
"List all scenarios
|
|
392
|
+
"platform_list_scenarios",
|
|
393
|
+
"List all scenarios on the LangWatch platform. Returns AI-readable digest by default.",
|
|
341
394
|
{
|
|
342
395
|
format: z
|
|
343
396
|
.enum(["digest", "json"])
|
|
@@ -357,8 +410,8 @@ server.tool(
|
|
|
357
410
|
);
|
|
358
411
|
|
|
359
412
|
server.tool(
|
|
360
|
-
"
|
|
361
|
-
"Get full details of a scenario by ID, including situation, criteria, and labels.",
|
|
413
|
+
"platform_get_scenario",
|
|
414
|
+
"Get full details of a scenario on the LangWatch platform by ID, including situation, criteria, and labels.",
|
|
362
415
|
{
|
|
363
416
|
scenarioId: z.string().describe("The scenario ID to retrieve"),
|
|
364
417
|
format: z
|
|
@@ -379,76 +432,184 @@ server.tool(
|
|
|
379
432
|
);
|
|
380
433
|
|
|
381
434
|
server.tool(
|
|
382
|
-
"
|
|
383
|
-
"
|
|
435
|
+
"platform_update_scenario",
|
|
436
|
+
"Update an existing scenario on the LangWatch platform.",
|
|
384
437
|
{
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
.describe("The context or setup describing what the user/agent is doing"),
|
|
438
|
+
scenarioId: z.string().describe("The scenario ID to update"),
|
|
439
|
+
name: z.string().optional().describe("Updated scenario name"),
|
|
440
|
+
situation: z.string().optional().describe("Updated situation"),
|
|
389
441
|
criteria: z
|
|
390
442
|
.array(z.string())
|
|
391
443
|
.optional()
|
|
392
|
-
.describe("
|
|
444
|
+
.describe("Updated criteria"),
|
|
393
445
|
labels: z
|
|
394
446
|
.array(z.string())
|
|
395
447
|
.optional()
|
|
396
|
-
.describe("
|
|
448
|
+
.describe("Updated labels"),
|
|
397
449
|
},
|
|
398
450
|
async (params) => {
|
|
399
451
|
const { requireApiKey } = await import("./config.js");
|
|
400
452
|
requireApiKey();
|
|
401
|
-
const {
|
|
402
|
-
"./tools/
|
|
453
|
+
const { handleUpdateScenario } = await import(
|
|
454
|
+
"./tools/update-scenario.js"
|
|
403
455
|
);
|
|
404
456
|
return {
|
|
405
|
-
content: [{ type: "text", text: await
|
|
457
|
+
content: [{ type: "text", text: await handleUpdateScenario(params) }],
|
|
406
458
|
};
|
|
407
459
|
}
|
|
408
460
|
);
|
|
409
461
|
|
|
410
462
|
server.tool(
|
|
411
|
-
"
|
|
412
|
-
"
|
|
463
|
+
"platform_archive_scenario",
|
|
464
|
+
"Archive (soft-delete) a scenario on the LangWatch platform.",
|
|
413
465
|
{
|
|
414
|
-
scenarioId: z.string().describe("The scenario ID to
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
.
|
|
421
|
-
|
|
422
|
-
|
|
466
|
+
scenarioId: z.string().describe("The scenario ID to archive"),
|
|
467
|
+
},
|
|
468
|
+
async (params) => {
|
|
469
|
+
const { requireApiKey } = await import("./config.js");
|
|
470
|
+
requireApiKey();
|
|
471
|
+
const { handleArchiveScenario } = await import(
|
|
472
|
+
"./tools/archive-scenario.js"
|
|
473
|
+
);
|
|
474
|
+
return {
|
|
475
|
+
content: [{ type: "text", text: await handleArchiveScenario(params) }],
|
|
476
|
+
};
|
|
477
|
+
}
|
|
478
|
+
);
|
|
479
|
+
|
|
480
|
+
// --- Platform Evaluator Tools (require API key) ---
|
|
481
|
+
// These tools manage evaluators on the LangWatch platform via API.
|
|
482
|
+
|
|
483
|
+
server.tool(
|
|
484
|
+
"platform_create_evaluator",
|
|
485
|
+
`Create an evaluator on the LangWatch platform. Useful for setting up LLM-as-judge and other evaluators to use in evaluation notebooks. Call discover_schema({ category: 'evaluators' }) first to see available evaluator types and their settings.`,
|
|
486
|
+
{
|
|
487
|
+
name: z.string().describe("Evaluator name"),
|
|
488
|
+
config: z
|
|
489
|
+
.record(z.string(), z.unknown())
|
|
490
|
+
.describe(
|
|
491
|
+
'Evaluator config object. Must include "evaluatorType" (e.g. "langevals/llm_boolean") and optional "settings" overrides.'
|
|
492
|
+
),
|
|
493
|
+
},
|
|
494
|
+
async (params) => {
|
|
495
|
+
const { requireApiKey } = await import("./config.js");
|
|
496
|
+
requireApiKey();
|
|
497
|
+
const { handleCreateEvaluator } = await import(
|
|
498
|
+
"./tools/create-evaluator.js"
|
|
499
|
+
);
|
|
500
|
+
return {
|
|
501
|
+
content: [{ type: "text", text: await handleCreateEvaluator(params) }],
|
|
502
|
+
};
|
|
503
|
+
}
|
|
504
|
+
);
|
|
505
|
+
|
|
506
|
+
server.tool(
|
|
507
|
+
"platform_list_evaluators",
|
|
508
|
+
"List all evaluators configured on the LangWatch platform.",
|
|
509
|
+
{},
|
|
510
|
+
async () => {
|
|
511
|
+
const { requireApiKey } = await import("./config.js");
|
|
512
|
+
requireApiKey();
|
|
513
|
+
const { handleListEvaluators } = await import(
|
|
514
|
+
"./tools/list-evaluators.js"
|
|
515
|
+
);
|
|
516
|
+
return {
|
|
517
|
+
content: [{ type: "text", text: await handleListEvaluators() }],
|
|
518
|
+
};
|
|
519
|
+
}
|
|
520
|
+
);
|
|
521
|
+
|
|
522
|
+
server.tool(
|
|
523
|
+
"platform_get_evaluator",
|
|
524
|
+
"Get full details of an evaluator on the LangWatch platform by ID or slug, including config, input fields, and output fields.",
|
|
525
|
+
{
|
|
526
|
+
idOrSlug: z.string().describe("The evaluator ID or slug to retrieve"),
|
|
527
|
+
},
|
|
528
|
+
async (params) => {
|
|
529
|
+
const { requireApiKey } = await import("./config.js");
|
|
530
|
+
requireApiKey();
|
|
531
|
+
const { handleGetEvaluator } = await import("./tools/get-evaluator.js");
|
|
532
|
+
return {
|
|
533
|
+
content: [{ type: "text", text: await handleGetEvaluator(params) }],
|
|
534
|
+
};
|
|
535
|
+
}
|
|
536
|
+
);
|
|
537
|
+
|
|
538
|
+
server.tool(
|
|
539
|
+
"platform_update_evaluator",
|
|
540
|
+
"Update an existing evaluator on the LangWatch platform. The evaluatorType in config cannot be changed after creation.",
|
|
541
|
+
{
|
|
542
|
+
evaluatorId: z.string().describe("The evaluator ID to update"),
|
|
543
|
+
name: z.string().optional().describe("Updated evaluator name"),
|
|
544
|
+
config: z
|
|
545
|
+
.record(z.string(), z.unknown())
|
|
423
546
|
.optional()
|
|
424
|
-
.describe(
|
|
547
|
+
.describe(
|
|
548
|
+
"Updated config settings. Note: evaluatorType cannot be changed after creation."
|
|
549
|
+
),
|
|
425
550
|
},
|
|
426
551
|
async (params) => {
|
|
427
552
|
const { requireApiKey } = await import("./config.js");
|
|
428
553
|
requireApiKey();
|
|
429
|
-
const {
|
|
430
|
-
"./tools/update-
|
|
554
|
+
const { handleUpdateEvaluator } = await import(
|
|
555
|
+
"./tools/update-evaluator.js"
|
|
431
556
|
);
|
|
432
557
|
return {
|
|
433
|
-
content: [{ type: "text", text: await
|
|
558
|
+
content: [{ type: "text", text: await handleUpdateEvaluator(params) }],
|
|
434
559
|
};
|
|
435
560
|
}
|
|
436
561
|
);
|
|
437
562
|
|
|
563
|
+
// --- Platform Model Provider Tools (require API key) ---
|
|
564
|
+
// These tools manage model provider API keys on the LangWatch platform.
|
|
565
|
+
|
|
438
566
|
server.tool(
|
|
439
|
-
"
|
|
440
|
-
|
|
567
|
+
"platform_set_model_provider",
|
|
568
|
+
`Set or update a model provider on the LangWatch platform. Use this to configure API keys (e.g. OPENAI_API_KEY) needed to run evaluators. The API key is stored securely and never returned in responses. Omit customKeys to update other settings without changing existing keys.`,
|
|
441
569
|
{
|
|
442
|
-
|
|
570
|
+
provider: z
|
|
571
|
+
.string()
|
|
572
|
+
.describe(
|
|
573
|
+
'Provider name, e.g., "openai", "anthropic", "azure", "custom"'
|
|
574
|
+
),
|
|
575
|
+
enabled: z.boolean().describe("Whether the provider is enabled"),
|
|
576
|
+
customKeys: z
|
|
577
|
+
.record(z.string(), z.unknown())
|
|
578
|
+
.optional()
|
|
579
|
+
.describe(
|
|
580
|
+
'API key configuration, e.g. { "OPENAI_API_KEY": "sk-..." }. Omit to keep existing keys.'
|
|
581
|
+
),
|
|
582
|
+
defaultModel: z
|
|
583
|
+
.string()
|
|
584
|
+
.optional()
|
|
585
|
+
.describe("Set as project default model"),
|
|
443
586
|
},
|
|
444
587
|
async (params) => {
|
|
445
588
|
const { requireApiKey } = await import("./config.js");
|
|
446
589
|
requireApiKey();
|
|
447
|
-
const {
|
|
448
|
-
"./tools/
|
|
590
|
+
const { handleSetModelProvider } = await import(
|
|
591
|
+
"./tools/set-model-provider.js"
|
|
449
592
|
);
|
|
450
593
|
return {
|
|
451
|
-
content: [
|
|
594
|
+
content: [
|
|
595
|
+
{ type: "text", text: await handleSetModelProvider(params) },
|
|
596
|
+
],
|
|
597
|
+
};
|
|
598
|
+
}
|
|
599
|
+
);
|
|
600
|
+
|
|
601
|
+
server.tool(
|
|
602
|
+
"platform_list_model_providers",
|
|
603
|
+
"List all model providers configured on the LangWatch platform. API keys are masked in the response.",
|
|
604
|
+
{},
|
|
605
|
+
async () => {
|
|
606
|
+
const { requireApiKey } = await import("./config.js");
|
|
607
|
+
requireApiKey();
|
|
608
|
+
const { handleListModelProviders } = await import(
|
|
609
|
+
"./tools/list-model-providers.js"
|
|
610
|
+
);
|
|
611
|
+
return {
|
|
612
|
+
content: [{ type: "text", text: await handleListModelProviders() }],
|
|
452
613
|
};
|
|
453
614
|
}
|
|
454
615
|
);
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import { makeRequest } from "./langwatch-api.js";
|
|
2
|
+
|
|
3
|
+
// --- Evaluator types ---
|
|
4
|
+
|
|
5
|
+
export interface EvaluatorSummary {
|
|
6
|
+
id: string;
|
|
7
|
+
projectId: string;
|
|
8
|
+
name: string;
|
|
9
|
+
slug: string | null;
|
|
10
|
+
type: string;
|
|
11
|
+
config: Record<string, unknown> | null;
|
|
12
|
+
workflowId: string | null;
|
|
13
|
+
copiedFromEvaluatorId: string | null;
|
|
14
|
+
createdAt: string;
|
|
15
|
+
updatedAt: string;
|
|
16
|
+
fields: Array<{ identifier: string; type: string; optional?: boolean }>;
|
|
17
|
+
outputFields: Array<{ identifier: string; type: string; optional?: boolean }>;
|
|
18
|
+
workflowName?: string;
|
|
19
|
+
workflowIcon?: string;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// --- Helpers ---
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Extracts the evaluatorType from an evaluator's config.
|
|
26
|
+
* Centralises the cast so callers don't repeat it.
|
|
27
|
+
*/
|
|
28
|
+
export function getEvaluatorType(
|
|
29
|
+
evaluator: Pick<EvaluatorSummary, "config">,
|
|
30
|
+
): string | undefined {
|
|
31
|
+
return (evaluator.config as Record<string, unknown> | null)
|
|
32
|
+
?.evaluatorType as string | undefined;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// --- Evaluator API functions ---
|
|
36
|
+
|
|
37
|
+
/** Lists all evaluators in the project. */
|
|
38
|
+
export async function listEvaluators(): Promise<EvaluatorSummary[]> {
|
|
39
|
+
return makeRequest("GET", "/api/evaluators") as Promise<EvaluatorSummary[]>;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** Retrieves a single evaluator by ID or slug. */
|
|
43
|
+
export async function getEvaluator(idOrSlug: string): Promise<EvaluatorSummary> {
|
|
44
|
+
return makeRequest(
|
|
45
|
+
"GET",
|
|
46
|
+
`/api/evaluators/${encodeURIComponent(idOrSlug)}`,
|
|
47
|
+
) as Promise<EvaluatorSummary>;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/** Creates a new evaluator. */
|
|
51
|
+
export async function createEvaluator(data: {
|
|
52
|
+
name: string;
|
|
53
|
+
config: Record<string, unknown>;
|
|
54
|
+
}): Promise<EvaluatorSummary> {
|
|
55
|
+
return makeRequest("POST", "/api/evaluators", data) as Promise<EvaluatorSummary>;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** Updates an existing evaluator. */
|
|
59
|
+
export async function updateEvaluator(params: {
|
|
60
|
+
id: string;
|
|
61
|
+
name?: string;
|
|
62
|
+
config?: Record<string, unknown>;
|
|
63
|
+
}): Promise<EvaluatorSummary> {
|
|
64
|
+
const { id, ...data } = params;
|
|
65
|
+
return makeRequest(
|
|
66
|
+
"PUT",
|
|
67
|
+
`/api/evaluators/${encodeURIComponent(id)}`,
|
|
68
|
+
data,
|
|
69
|
+
) as Promise<EvaluatorSummary>;
|
|
70
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { makeRequest } from "./langwatch-api.js";
|
|
2
|
+
|
|
3
|
+
// --- Model Provider types ---
|
|
4
|
+
|
|
5
|
+
export interface ModelProviderEntry {
|
|
6
|
+
id?: string;
|
|
7
|
+
provider: string;
|
|
8
|
+
enabled: boolean;
|
|
9
|
+
customKeys: Record<string, unknown> | null;
|
|
10
|
+
models?: string[] | null;
|
|
11
|
+
embeddingsModels?: string[] | null;
|
|
12
|
+
customModels?: unknown[] | null;
|
|
13
|
+
customEmbeddingsModels?: unknown[] | null;
|
|
14
|
+
disabledByDefault?: boolean;
|
|
15
|
+
deploymentMapping?: unknown;
|
|
16
|
+
extraHeaders?: Array<{ key: string; value: string }> | null;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// --- Model Provider API functions ---
|
|
20
|
+
|
|
21
|
+
/** Lists all model providers for the project, with masked API keys. */
|
|
22
|
+
export async function listModelProviders(): Promise<Record<string, ModelProviderEntry>> {
|
|
23
|
+
return makeRequest("GET", "/api/model-providers") as Promise<
|
|
24
|
+
Record<string, ModelProviderEntry>
|
|
25
|
+
>;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/** Creates or updates a model provider. */
|
|
29
|
+
export async function setModelProvider(params: {
|
|
30
|
+
provider: string;
|
|
31
|
+
enabled: boolean;
|
|
32
|
+
customKeys?: Record<string, unknown>;
|
|
33
|
+
defaultModel?: string;
|
|
34
|
+
}): Promise<Record<string, ModelProviderEntry>> {
|
|
35
|
+
const { provider, ...data } = params;
|
|
36
|
+
return makeRequest(
|
|
37
|
+
"PUT",
|
|
38
|
+
`/api/model-providers/${encodeURIComponent(provider)}`,
|
|
39
|
+
data,
|
|
40
|
+
) as Promise<Record<string, ModelProviderEntry>>;
|
|
41
|
+
}
|
package/src/langwatch-api.ts
CHANGED
|
@@ -78,7 +78,6 @@ export interface PromptSummary {
|
|
|
78
78
|
id?: string;
|
|
79
79
|
handle?: string;
|
|
80
80
|
name?: string;
|
|
81
|
-
description?: string | null;
|
|
82
81
|
latestVersionNumber?: number;
|
|
83
82
|
version?: number;
|
|
84
83
|
}
|
|
@@ -87,14 +86,12 @@ export interface PromptVersion {
|
|
|
87
86
|
version?: number;
|
|
88
87
|
commitMessage?: string;
|
|
89
88
|
model?: string;
|
|
90
|
-
modelProvider?: string;
|
|
91
89
|
messages?: Array<{ role: string; content: string }>;
|
|
92
90
|
}
|
|
93
91
|
|
|
94
92
|
export interface PromptDetailResponse extends PromptSummary {
|
|
95
93
|
versions?: PromptVersion[];
|
|
96
94
|
model?: string;
|
|
97
|
-
modelProvider?: string;
|
|
98
95
|
messages?: Array<{ role: string; content: string }>;
|
|
99
96
|
prompt?: Array<{ role: string; content: string }>;
|
|
100
97
|
}
|
|
@@ -216,12 +213,9 @@ export async function getPrompt(
|
|
|
216
213
|
|
|
217
214
|
/** Creates a new prompt. */
|
|
218
215
|
export async function createPrompt(data: {
|
|
219
|
-
|
|
220
|
-
handle?: string;
|
|
216
|
+
handle: string;
|
|
221
217
|
messages: Array<{ role: string; content: string }>;
|
|
222
218
|
model: string;
|
|
223
|
-
modelProvider: string;
|
|
224
|
-
description?: string;
|
|
225
219
|
}): Promise<PromptMutationResponse> {
|
|
226
220
|
return makeRequest(
|
|
227
221
|
"POST",
|
|
@@ -236,31 +230,12 @@ export async function updatePrompt(
|
|
|
236
230
|
data: {
|
|
237
231
|
messages?: Array<{ role: string; content: string }>;
|
|
238
232
|
model?: string;
|
|
239
|
-
|
|
240
|
-
commitMessage?: string;
|
|
233
|
+
commitMessage: string;
|
|
241
234
|
}
|
|
242
235
|
): Promise<PromptMutationResponse> {
|
|
243
236
|
return makeRequest(
|
|
244
|
-
"
|
|
237
|
+
"PUT",
|
|
245
238
|
`/api/prompts/${encodeURIComponent(idOrHandle)}`,
|
|
246
239
|
data
|
|
247
240
|
) as Promise<PromptMutationResponse>;
|
|
248
241
|
}
|
|
249
|
-
|
|
250
|
-
/** Creates a new version of an existing prompt. */
|
|
251
|
-
export async function createPromptVersion(
|
|
252
|
-
idOrHandle: string,
|
|
253
|
-
data: {
|
|
254
|
-
messages?: Array<{ role: string; content: string }>;
|
|
255
|
-
model?: string;
|
|
256
|
-
modelProvider?: string;
|
|
257
|
-
commitMessage?: string;
|
|
258
|
-
}
|
|
259
|
-
): Promise<PromptMutationResponse> {
|
|
260
|
-
return makeRequest(
|
|
261
|
-
"POST",
|
|
262
|
-
`/api/prompts/${encodeURIComponent(idOrHandle)}/versions`,
|
|
263
|
-
data
|
|
264
|
-
) as Promise<PromptMutationResponse>;
|
|
265
|
-
}
|
|
266
|
-
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { archiveScenario as apiArchiveScenario } from "../langwatch-api-scenarios.js";
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
|
-
* Handles the
|
|
4
|
+
* Handles the platform_archive_scenario MCP tool invocation.
|
|
5
5
|
*
|
|
6
6
|
* Archives (soft-deletes) a scenario and returns confirmation.
|
|
7
7
|
*/
|