@langwatch/mcp-server 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +44 -0
- package/dist/archive-scenario-YFD5THOR.js +19 -0
- package/dist/archive-scenario-YFD5THOR.js.map +1 -0
- package/dist/chunk-5UOPNRXW.js +37 -0
- package/dist/chunk-5UOPNRXW.js.map +1 -0
- package/dist/chunk-6U4TCGFC.js +40 -0
- package/dist/chunk-6U4TCGFC.js.map +1 -0
- package/dist/chunk-IX6QJKAD.js +22 -0
- package/dist/chunk-IX6QJKAD.js.map +1 -0
- package/dist/{chunk-HOPTUDCZ.js → chunk-LLRQIF52.js} +5 -12
- package/dist/chunk-LLRQIF52.js.map +1 -0
- package/dist/create-evaluator-E5X5ZP3B.js +27 -0
- package/dist/create-evaluator-E5X5ZP3B.js.map +1 -0
- package/dist/create-prompt-7Z35MIL6.js +36 -0
- package/dist/create-prompt-7Z35MIL6.js.map +1 -0
- package/dist/create-scenario-DIMPJRPY.js +26 -0
- package/dist/create-scenario-DIMPJRPY.js.map +1 -0
- package/dist/discover-evaluator-schema-H23XCLNE.js +1402 -0
- package/dist/discover-evaluator-schema-H23XCLNE.js.map +1 -0
- package/dist/discover-scenario-schema-MEEEVND7.js +65 -0
- package/dist/discover-scenario-schema-MEEEVND7.js.map +1 -0
- package/dist/{get-analytics-3IFTN6MY.js → get-analytics-4YJW4S5L.js} +2 -2
- package/dist/get-evaluator-WDEH2F7M.js +47 -0
- package/dist/get-evaluator-WDEH2F7M.js.map +1 -0
- package/dist/{get-prompt-2ZB5B3QC.js → get-prompt-F6PDVC76.js} +2 -5
- package/dist/get-prompt-F6PDVC76.js.map +1 -0
- package/dist/get-scenario-H24ZYNT5.js +33 -0
- package/dist/get-scenario-H24ZYNT5.js.map +1 -0
- package/dist/{get-trace-7IXKKCJJ.js → get-trace-27USKGO7.js} +2 -2
- package/dist/index.js +27066 -8845
- package/dist/index.js.map +1 -1
- package/dist/list-evaluators-KRGI72EH.js +34 -0
- package/dist/list-evaluators-KRGI72EH.js.map +1 -0
- package/dist/list-model-providers-A5YCFTPI.js +35 -0
- package/dist/list-model-providers-A5YCFTPI.js.map +1 -0
- package/dist/{list-prompts-J72LTP7Z.js → list-prompts-LKJSE7XN.js} +6 -7
- package/dist/list-prompts-LKJSE7XN.js.map +1 -0
- package/dist/list-scenarios-ZK5CMGC4.js +40 -0
- package/dist/list-scenarios-ZK5CMGC4.js.map +1 -0
- package/dist/{search-traces-RW2NDHN5.js → search-traces-SOKAAMAR.js} +2 -2
- package/dist/set-model-provider-7MGULZDH.js +33 -0
- package/dist/set-model-provider-7MGULZDH.js.map +1 -0
- package/dist/update-evaluator-A3XINFLJ.js +24 -0
- package/dist/update-evaluator-A3XINFLJ.js.map +1 -0
- package/dist/update-prompt-IW7X2UQM.js +22 -0
- package/dist/update-prompt-IW7X2UQM.js.map +1 -0
- package/dist/update-scenario-ZT7TOBFR.js +27 -0
- package/dist/update-scenario-ZT7TOBFR.js.map +1 -0
- package/package.json +11 -11
- package/src/__tests__/all-tools.integration.test.ts +1337 -0
- package/src/__tests__/discover-evaluator-schema.unit.test.ts +89 -0
- package/src/__tests__/evaluator-tools.unit.test.ts +262 -0
- package/src/__tests__/integration.integration.test.ts +9 -34
- package/src/__tests__/langwatch-api.unit.test.ts +4 -32
- package/src/__tests__/model-provider-tools.unit.test.ts +190 -0
- package/src/__tests__/scenario-tools.integration.test.ts +286 -0
- package/src/__tests__/scenario-tools.unit.test.ts +185 -0
- package/src/__tests__/tools.unit.test.ts +59 -65
- package/src/index.ts +338 -48
- package/src/langwatch-api-evaluators.ts +70 -0
- package/src/langwatch-api-model-providers.ts +41 -0
- package/src/langwatch-api-scenarios.ts +67 -0
- package/src/langwatch-api.ts +6 -30
- package/src/tools/archive-scenario.ts +19 -0
- package/src/tools/create-evaluator.ts +33 -0
- package/src/tools/create-prompt.ts +30 -5
- package/src/tools/create-scenario.ts +30 -0
- package/src/tools/discover-evaluator-schema.ts +143 -0
- package/src/tools/discover-scenario-schema.ts +71 -0
- package/src/tools/get-evaluator.ts +53 -0
- package/src/tools/get-prompt.ts +1 -4
- package/src/tools/get-scenario.ts +36 -0
- package/src/tools/list-evaluators.ts +37 -0
- package/src/tools/list-model-providers.ts +40 -0
- package/src/tools/list-prompts.ts +5 -6
- package/src/tools/list-scenarios.ts +47 -0
- package/src/tools/set-model-provider.ts +46 -0
- package/src/tools/update-evaluator.ts +30 -0
- package/src/tools/update-prompt.ts +9 -25
- package/src/tools/update-scenario.ts +32 -0
- package/uv.lock +1788 -1322
- package/dist/chunk-HOPTUDCZ.js.map +0 -1
- package/dist/create-prompt-UBC537BJ.js +0 -22
- package/dist/create-prompt-UBC537BJ.js.map +0 -1
- package/dist/get-prompt-2ZB5B3QC.js.map +0 -1
- package/dist/list-prompts-J72LTP7Z.js.map +0 -1
- package/dist/update-prompt-G6HHZSUM.js +0 -31
- package/dist/update-prompt-G6HHZSUM.js.map +0 -1
- /package/dist/{get-analytics-3IFTN6MY.js.map → get-analytics-4YJW4S5L.js.map} +0 -0
- /package/dist/{get-trace-7IXKKCJJ.js.map → get-trace-27USKGO7.js.map} +0 -0
- /package/dist/{search-traces-RW2NDHN5.js.map → search-traces-SOKAAMAR.js.map} +0 -0
package/src/index.ts
CHANGED
|
@@ -94,15 +94,34 @@ server.tool(
|
|
|
94
94
|
|
|
95
95
|
server.tool(
|
|
96
96
|
"discover_schema",
|
|
97
|
-
"Discover available filter fields, metrics, aggregation types,
|
|
97
|
+
"Discover available filter fields, metrics, aggregation types, group-by options, scenario schema, and evaluator types for LangWatch queries. Call this before using search_traces, get_analytics, scenario tools, or evaluator tools to understand available options.",
|
|
98
98
|
{
|
|
99
99
|
category: z
|
|
100
|
-
.enum(["filters", "metrics", "aggregations", "groups", "all"])
|
|
100
|
+
.enum(["filters", "metrics", "aggregations", "groups", "scenarios", "evaluators", "all"])
|
|
101
101
|
.describe("Which schema category to discover"),
|
|
102
|
+
evaluatorType: z
|
|
103
|
+
.string()
|
|
104
|
+
.optional()
|
|
105
|
+
.describe("When category is 'evaluators', provide a specific evaluator type (e.g. 'langevals/llm_judge') to get its full schema details"),
|
|
102
106
|
},
|
|
103
|
-
async ({ category }) => {
|
|
107
|
+
async ({ category, evaluatorType }) => {
|
|
108
|
+
if (category === "scenarios") {
|
|
109
|
+
const { formatScenarioSchema } = await import("./tools/discover-scenario-schema.js");
|
|
110
|
+
return { content: [{ type: "text", text: formatScenarioSchema() }] };
|
|
111
|
+
}
|
|
112
|
+
if (category === "evaluators") {
|
|
113
|
+
const { formatEvaluatorSchema } = await import("./tools/discover-evaluator-schema.js");
|
|
114
|
+
return { content: [{ type: "text", text: formatEvaluatorSchema(evaluatorType) }] };
|
|
115
|
+
}
|
|
104
116
|
const { formatSchema } = await import("./tools/discover-schema.js");
|
|
105
|
-
|
|
117
|
+
let text = formatSchema(category);
|
|
118
|
+
if (category === "all") {
|
|
119
|
+
const { formatScenarioSchema } = await import("./tools/discover-scenario-schema.js");
|
|
120
|
+
text += "\n\n" + formatScenarioSchema();
|
|
121
|
+
const { formatEvaluatorSchema } = await import("./tools/discover-evaluator-schema.js");
|
|
122
|
+
text += "\n\n" + formatEvaluatorSchema();
|
|
123
|
+
}
|
|
124
|
+
return { content: [{ type: "text", text }] };
|
|
106
125
|
}
|
|
107
126
|
);
|
|
108
127
|
|
|
@@ -216,9 +235,59 @@ server.tool(
|
|
|
216
235
|
}
|
|
217
236
|
);
|
|
218
237
|
|
|
238
|
+
// --- Platform Prompt Tools (require API key) ---
|
|
239
|
+
// These tools manage prompts on the LangWatch platform via API.
|
|
240
|
+
// For code-based prompt management, see `fetch_langwatch_docs` for the CLI/SDK approach.
|
|
241
|
+
|
|
242
|
+
const modelSchema = z
|
|
243
|
+
.string()
|
|
244
|
+
.describe(
|
|
245
|
+
'Model in "provider/model-name" format, e.g., "openai/gpt-4o", "anthropic/claude-sonnet-4-5-20250929"'
|
|
246
|
+
);
|
|
247
|
+
|
|
248
|
+
server.tool(
|
|
249
|
+
"platform_create_prompt",
|
|
250
|
+
`Create a new prompt on the LangWatch platform.
|
|
251
|
+
|
|
252
|
+
NOTE: Prompts can be managed two ways. Determine which approach the user needs:
|
|
253
|
+
|
|
254
|
+
1. Code-based (CLI/SDK): If the user wants to manage prompts in their codebase, use \`fetch_langwatch_docs\` to learn about the prompt management CLI/SDK. This lets them version-control prompts and pull them into code.
|
|
255
|
+
|
|
256
|
+
2. Platform-based (LangWatch UI): If the user wants to manage prompts directly on the LangWatch platform, use the \`platform_\` MCP tools (\`platform_create_prompt\`, \`platform_update_prompt\`, etc.).
|
|
257
|
+
`,
|
|
258
|
+
{
|
|
259
|
+
name: z.string().describe("Prompt display name"),
|
|
260
|
+
handle: z
|
|
261
|
+
.string()
|
|
262
|
+
.optional()
|
|
263
|
+
.describe(
|
|
264
|
+
"URL-friendly handle (auto-generated from name if omitted)"
|
|
265
|
+
),
|
|
266
|
+
messages: z
|
|
267
|
+
.array(
|
|
268
|
+
z.object({
|
|
269
|
+
role: z
|
|
270
|
+
.enum(["system", "user", "assistant"])
|
|
271
|
+
.describe("Message role"),
|
|
272
|
+
content: z.string().describe("Message content"),
|
|
273
|
+
})
|
|
274
|
+
)
|
|
275
|
+
.describe("Prompt messages"),
|
|
276
|
+
model: modelSchema,
|
|
277
|
+
},
|
|
278
|
+
async (params) => {
|
|
279
|
+
const { requireApiKey } = await import("./config.js");
|
|
280
|
+
requireApiKey();
|
|
281
|
+
const { handleCreatePrompt } = await import("./tools/create-prompt.js");
|
|
282
|
+
return {
|
|
283
|
+
content: [{ type: "text", text: await handleCreatePrompt(params) }],
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
);
|
|
287
|
+
|
|
219
288
|
server.tool(
|
|
220
|
-
"
|
|
221
|
-
"List all prompts configured
|
|
289
|
+
"platform_list_prompts",
|
|
290
|
+
"List all prompts configured on the LangWatch platform.",
|
|
222
291
|
{},
|
|
223
292
|
async () => {
|
|
224
293
|
const { requireApiKey } = await import("./config.js");
|
|
@@ -231,8 +300,8 @@ server.tool(
|
|
|
231
300
|
);
|
|
232
301
|
|
|
233
302
|
server.tool(
|
|
234
|
-
"
|
|
235
|
-
"Get a specific prompt by ID or handle, including messages, model config, and version history.",
|
|
303
|
+
"platform_get_prompt",
|
|
304
|
+
"Get a specific prompt from the LangWatch platform by ID or handle, including messages, model config, and version history.",
|
|
236
305
|
{
|
|
237
306
|
idOrHandle: z.string().describe("Prompt ID or handle"),
|
|
238
307
|
version: z
|
|
@@ -251,75 +320,296 @@ server.tool(
|
|
|
251
320
|
);
|
|
252
321
|
|
|
253
322
|
server.tool(
|
|
254
|
-
"
|
|
255
|
-
"
|
|
323
|
+
"platform_update_prompt",
|
|
324
|
+
"Update an existing prompt on the LangWatch platform. Every update creates a new version.",
|
|
256
325
|
{
|
|
257
|
-
|
|
258
|
-
handle: z
|
|
259
|
-
.string()
|
|
260
|
-
.optional()
|
|
261
|
-
.describe("URL-friendly handle (auto-generated if omitted)"),
|
|
326
|
+
idOrHandle: z.string().describe("Prompt ID or handle to update"),
|
|
262
327
|
messages: z
|
|
263
328
|
.array(
|
|
264
329
|
z.object({
|
|
265
|
-
role: z
|
|
266
|
-
|
|
267
|
-
.describe("Message role"),
|
|
268
|
-
content: z.string().describe("Message content"),
|
|
330
|
+
role: z.enum(["system", "user", "assistant"]),
|
|
331
|
+
content: z.string(),
|
|
269
332
|
})
|
|
270
333
|
)
|
|
271
|
-
.
|
|
272
|
-
|
|
334
|
+
.optional()
|
|
335
|
+
.describe("Updated messages"),
|
|
336
|
+
model: modelSchema.optional(),
|
|
337
|
+
commitMessage: z
|
|
273
338
|
.string()
|
|
274
|
-
.describe(
|
|
275
|
-
|
|
339
|
+
.describe("Commit message describing the change"),
|
|
340
|
+
},
|
|
341
|
+
async (params) => {
|
|
342
|
+
const { requireApiKey } = await import("./config.js");
|
|
343
|
+
requireApiKey();
|
|
344
|
+
const { handleUpdatePrompt } = await import("./tools/update-prompt.js");
|
|
345
|
+
return {
|
|
346
|
+
content: [{ type: "text", text: await handleUpdatePrompt(params) }],
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
);
|
|
350
|
+
|
|
351
|
+
// --- Platform Scenario Tools (require API key) ---
|
|
352
|
+
// These tools manage scenarios on the LangWatch platform via API.
|
|
353
|
+
// For code-based scenario testing, see `fetch_scenario_docs` for the SDK approach.
|
|
354
|
+
|
|
355
|
+
server.tool(
|
|
356
|
+
"platform_create_scenario",
|
|
357
|
+
`Create a new scenario on the LangWatch platform. Call discover_schema({ category: 'scenarios' }) first to learn how to write effective situations and criteria.
|
|
358
|
+
|
|
359
|
+
NOTE: Scenarios can be created two ways. Determine which approach the user needs:
|
|
360
|
+
|
|
361
|
+
1. Code-based (local testing): If the user has a codebase with an AI agent they want to test, use \`fetch_scenario_docs\` to learn about the Scenario Python/TypeScript SDK. This lets them run tests locally and iterate in code.
|
|
362
|
+
|
|
363
|
+
2. Platform-based (LangWatch UI): If the user wants to manage scenarios directly on the LangWatch platform, use the \`platform_\` MCP tools (\`platform_create_scenario\`, \`platform_update_scenario\`, etc.).
|
|
364
|
+
`,
|
|
365
|
+
{
|
|
366
|
+
name: z.string().describe("Scenario name"),
|
|
367
|
+
situation: z
|
|
276
368
|
.string()
|
|
277
|
-
.describe(
|
|
278
|
-
|
|
369
|
+
.describe("The context or setup describing what the user/agent is doing"),
|
|
370
|
+
criteria: z
|
|
371
|
+
.array(z.string())
|
|
372
|
+
.optional()
|
|
373
|
+
.describe("Pass/fail conditions the agent's response must satisfy"),
|
|
374
|
+
labels: z
|
|
375
|
+
.array(z.string())
|
|
376
|
+
.optional()
|
|
377
|
+
.describe("Tags for organizing and filtering scenarios"),
|
|
279
378
|
},
|
|
280
379
|
async (params) => {
|
|
281
380
|
const { requireApiKey } = await import("./config.js");
|
|
282
381
|
requireApiKey();
|
|
283
|
-
const {
|
|
382
|
+
const { handleCreateScenario } = await import(
|
|
383
|
+
"./tools/create-scenario.js"
|
|
384
|
+
);
|
|
284
385
|
return {
|
|
285
|
-
content: [{ type: "text", text: await
|
|
386
|
+
content: [{ type: "text", text: await handleCreateScenario(params) }],
|
|
286
387
|
};
|
|
287
388
|
}
|
|
288
389
|
);
|
|
289
390
|
|
|
290
391
|
server.tool(
|
|
291
|
-
"
|
|
292
|
-
"
|
|
392
|
+
"platform_list_scenarios",
|
|
393
|
+
"List all scenarios on the LangWatch platform. Returns AI-readable digest by default.",
|
|
293
394
|
{
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
.array(
|
|
297
|
-
z.object({
|
|
298
|
-
role: z.enum(["system", "user", "assistant"]),
|
|
299
|
-
content: z.string(),
|
|
300
|
-
})
|
|
301
|
-
)
|
|
395
|
+
format: z
|
|
396
|
+
.enum(["digest", "json"])
|
|
302
397
|
.optional()
|
|
303
|
-
.describe(
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
398
|
+
.describe(
|
|
399
|
+
"Output format: 'digest' (default, AI-readable) or 'json' (full raw data)"
|
|
400
|
+
),
|
|
401
|
+
},
|
|
402
|
+
async (params) => {
|
|
403
|
+
const { requireApiKey } = await import("./config.js");
|
|
404
|
+
requireApiKey();
|
|
405
|
+
const { handleListScenarios } = await import("./tools/list-scenarios.js");
|
|
406
|
+
return {
|
|
407
|
+
content: [{ type: "text", text: await handleListScenarios(params) }],
|
|
408
|
+
};
|
|
409
|
+
}
|
|
410
|
+
);
|
|
411
|
+
|
|
412
|
+
server.tool(
|
|
413
|
+
"platform_get_scenario",
|
|
414
|
+
"Get full details of a scenario on the LangWatch platform by ID, including situation, criteria, and labels.",
|
|
415
|
+
{
|
|
416
|
+
scenarioId: z.string().describe("The scenario ID to retrieve"),
|
|
417
|
+
format: z
|
|
418
|
+
.enum(["digest", "json"])
|
|
308
419
|
.optional()
|
|
309
|
-
.describe(
|
|
310
|
-
|
|
311
|
-
|
|
420
|
+
.describe(
|
|
421
|
+
"Output format: 'digest' (default, AI-readable) or 'json' (full raw data)"
|
|
422
|
+
),
|
|
423
|
+
},
|
|
424
|
+
async (params) => {
|
|
425
|
+
const { requireApiKey } = await import("./config.js");
|
|
426
|
+
requireApiKey();
|
|
427
|
+
const { handleGetScenario } = await import("./tools/get-scenario.js");
|
|
428
|
+
return {
|
|
429
|
+
content: [{ type: "text", text: await handleGetScenario(params) }],
|
|
430
|
+
};
|
|
431
|
+
}
|
|
432
|
+
);
|
|
433
|
+
|
|
434
|
+
server.tool(
|
|
435
|
+
"platform_update_scenario",
|
|
436
|
+
"Update an existing scenario on the LangWatch platform.",
|
|
437
|
+
{
|
|
438
|
+
scenarioId: z.string().describe("The scenario ID to update"),
|
|
439
|
+
name: z.string().optional().describe("Updated scenario name"),
|
|
440
|
+
situation: z.string().optional().describe("Updated situation"),
|
|
441
|
+
criteria: z
|
|
442
|
+
.array(z.string())
|
|
443
|
+
.optional()
|
|
444
|
+
.describe("Updated criteria"),
|
|
445
|
+
labels: z
|
|
446
|
+
.array(z.string())
|
|
312
447
|
.optional()
|
|
448
|
+
.describe("Updated labels"),
|
|
449
|
+
},
|
|
450
|
+
async (params) => {
|
|
451
|
+
const { requireApiKey } = await import("./config.js");
|
|
452
|
+
requireApiKey();
|
|
453
|
+
const { handleUpdateScenario } = await import(
|
|
454
|
+
"./tools/update-scenario.js"
|
|
455
|
+
);
|
|
456
|
+
return {
|
|
457
|
+
content: [{ type: "text", text: await handleUpdateScenario(params) }],
|
|
458
|
+
};
|
|
459
|
+
}
|
|
460
|
+
);
|
|
461
|
+
|
|
462
|
+
server.tool(
|
|
463
|
+
"platform_archive_scenario",
|
|
464
|
+
"Archive (soft-delete) a scenario on the LangWatch platform.",
|
|
465
|
+
{
|
|
466
|
+
scenarioId: z.string().describe("The scenario ID to archive"),
|
|
467
|
+
},
|
|
468
|
+
async (params) => {
|
|
469
|
+
const { requireApiKey } = await import("./config.js");
|
|
470
|
+
requireApiKey();
|
|
471
|
+
const { handleArchiveScenario } = await import(
|
|
472
|
+
"./tools/archive-scenario.js"
|
|
473
|
+
);
|
|
474
|
+
return {
|
|
475
|
+
content: [{ type: "text", text: await handleArchiveScenario(params) }],
|
|
476
|
+
};
|
|
477
|
+
}
|
|
478
|
+
);
|
|
479
|
+
|
|
480
|
+
// --- Platform Evaluator Tools (require API key) ---
|
|
481
|
+
// These tools manage evaluators on the LangWatch platform via API.
|
|
482
|
+
|
|
483
|
+
server.tool(
|
|
484
|
+
"platform_create_evaluator",
|
|
485
|
+
`Create an evaluator on the LangWatch platform. Useful for setting up LLM-as-judge and other evaluators to use in evaluation notebooks. Call discover_schema({ category: 'evaluators' }) first to see available evaluator types and their settings.`,
|
|
486
|
+
{
|
|
487
|
+
name: z.string().describe("Evaluator name"),
|
|
488
|
+
config: z
|
|
489
|
+
.record(z.string(), z.unknown())
|
|
313
490
|
.describe(
|
|
314
|
-
|
|
491
|
+
'Evaluator config object. Must include "evaluatorType" (e.g. "langevals/llm_boolean") and optional "settings" overrides.'
|
|
315
492
|
),
|
|
316
493
|
},
|
|
317
494
|
async (params) => {
|
|
318
495
|
const { requireApiKey } = await import("./config.js");
|
|
319
496
|
requireApiKey();
|
|
320
|
-
const {
|
|
497
|
+
const { handleCreateEvaluator } = await import(
|
|
498
|
+
"./tools/create-evaluator.js"
|
|
499
|
+
);
|
|
321
500
|
return {
|
|
322
|
-
content: [{ type: "text", text: await
|
|
501
|
+
content: [{ type: "text", text: await handleCreateEvaluator(params) }],
|
|
502
|
+
};
|
|
503
|
+
}
|
|
504
|
+
);
|
|
505
|
+
|
|
506
|
+
server.tool(
|
|
507
|
+
"platform_list_evaluators",
|
|
508
|
+
"List all evaluators configured on the LangWatch platform.",
|
|
509
|
+
{},
|
|
510
|
+
async () => {
|
|
511
|
+
const { requireApiKey } = await import("./config.js");
|
|
512
|
+
requireApiKey();
|
|
513
|
+
const { handleListEvaluators } = await import(
|
|
514
|
+
"./tools/list-evaluators.js"
|
|
515
|
+
);
|
|
516
|
+
return {
|
|
517
|
+
content: [{ type: "text", text: await handleListEvaluators() }],
|
|
518
|
+
};
|
|
519
|
+
}
|
|
520
|
+
);
|
|
521
|
+
|
|
522
|
+
server.tool(
|
|
523
|
+
"platform_get_evaluator",
|
|
524
|
+
"Get full details of an evaluator on the LangWatch platform by ID or slug, including config, input fields, and output fields.",
|
|
525
|
+
{
|
|
526
|
+
idOrSlug: z.string().describe("The evaluator ID or slug to retrieve"),
|
|
527
|
+
},
|
|
528
|
+
async (params) => {
|
|
529
|
+
const { requireApiKey } = await import("./config.js");
|
|
530
|
+
requireApiKey();
|
|
531
|
+
const { handleGetEvaluator } = await import("./tools/get-evaluator.js");
|
|
532
|
+
return {
|
|
533
|
+
content: [{ type: "text", text: await handleGetEvaluator(params) }],
|
|
534
|
+
};
|
|
535
|
+
}
|
|
536
|
+
);
|
|
537
|
+
|
|
538
|
+
server.tool(
|
|
539
|
+
"platform_update_evaluator",
|
|
540
|
+
"Update an existing evaluator on the LangWatch platform. The evaluatorType in config cannot be changed after creation.",
|
|
541
|
+
{
|
|
542
|
+
evaluatorId: z.string().describe("The evaluator ID to update"),
|
|
543
|
+
name: z.string().optional().describe("Updated evaluator name"),
|
|
544
|
+
config: z
|
|
545
|
+
.record(z.string(), z.unknown())
|
|
546
|
+
.optional()
|
|
547
|
+
.describe(
|
|
548
|
+
"Updated config settings. Note: evaluatorType cannot be changed after creation."
|
|
549
|
+
),
|
|
550
|
+
},
|
|
551
|
+
async (params) => {
|
|
552
|
+
const { requireApiKey } = await import("./config.js");
|
|
553
|
+
requireApiKey();
|
|
554
|
+
const { handleUpdateEvaluator } = await import(
|
|
555
|
+
"./tools/update-evaluator.js"
|
|
556
|
+
);
|
|
557
|
+
return {
|
|
558
|
+
content: [{ type: "text", text: await handleUpdateEvaluator(params) }],
|
|
559
|
+
};
|
|
560
|
+
}
|
|
561
|
+
);
|
|
562
|
+
|
|
563
|
+
// --- Platform Model Provider Tools (require API key) ---
|
|
564
|
+
// These tools manage model provider API keys on the LangWatch platform.
|
|
565
|
+
|
|
566
|
+
server.tool(
|
|
567
|
+
"platform_set_model_provider",
|
|
568
|
+
`Set or update a model provider on the LangWatch platform. Use this to configure API keys (e.g. OPENAI_API_KEY) needed to run evaluators. The API key is stored securely and never returned in responses. Omit customKeys to update other settings without changing existing keys.`,
|
|
569
|
+
{
|
|
570
|
+
provider: z
|
|
571
|
+
.string()
|
|
572
|
+
.describe(
|
|
573
|
+
'Provider name, e.g., "openai", "anthropic", "azure", "custom"'
|
|
574
|
+
),
|
|
575
|
+
enabled: z.boolean().describe("Whether the provider is enabled"),
|
|
576
|
+
customKeys: z
|
|
577
|
+
.record(z.string(), z.unknown())
|
|
578
|
+
.optional()
|
|
579
|
+
.describe(
|
|
580
|
+
'API key configuration, e.g. { "OPENAI_API_KEY": "sk-..." }. Omit to keep existing keys.'
|
|
581
|
+
),
|
|
582
|
+
defaultModel: z
|
|
583
|
+
.string()
|
|
584
|
+
.optional()
|
|
585
|
+
.describe("Set as project default model"),
|
|
586
|
+
},
|
|
587
|
+
async (params) => {
|
|
588
|
+
const { requireApiKey } = await import("./config.js");
|
|
589
|
+
requireApiKey();
|
|
590
|
+
const { handleSetModelProvider } = await import(
|
|
591
|
+
"./tools/set-model-provider.js"
|
|
592
|
+
);
|
|
593
|
+
return {
|
|
594
|
+
content: [
|
|
595
|
+
{ type: "text", text: await handleSetModelProvider(params) },
|
|
596
|
+
],
|
|
597
|
+
};
|
|
598
|
+
}
|
|
599
|
+
);
|
|
600
|
+
|
|
601
|
+
server.tool(
|
|
602
|
+
"platform_list_model_providers",
|
|
603
|
+
"List all model providers configured on the LangWatch platform. API keys are masked in the response.",
|
|
604
|
+
{},
|
|
605
|
+
async () => {
|
|
606
|
+
const { requireApiKey } = await import("./config.js");
|
|
607
|
+
requireApiKey();
|
|
608
|
+
const { handleListModelProviders } = await import(
|
|
609
|
+
"./tools/list-model-providers.js"
|
|
610
|
+
);
|
|
611
|
+
return {
|
|
612
|
+
content: [{ type: "text", text: await handleListModelProviders() }],
|
|
323
613
|
};
|
|
324
614
|
}
|
|
325
615
|
);
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import { makeRequest } from "./langwatch-api.js";
|
|
2
|
+
|
|
3
|
+
// --- Evaluator types ---
|
|
4
|
+
|
|
5
|
+
export interface EvaluatorSummary {
|
|
6
|
+
id: string;
|
|
7
|
+
projectId: string;
|
|
8
|
+
name: string;
|
|
9
|
+
slug: string | null;
|
|
10
|
+
type: string;
|
|
11
|
+
config: Record<string, unknown> | null;
|
|
12
|
+
workflowId: string | null;
|
|
13
|
+
copiedFromEvaluatorId: string | null;
|
|
14
|
+
createdAt: string;
|
|
15
|
+
updatedAt: string;
|
|
16
|
+
fields: Array<{ identifier: string; type: string; optional?: boolean }>;
|
|
17
|
+
outputFields: Array<{ identifier: string; type: string; optional?: boolean }>;
|
|
18
|
+
workflowName?: string;
|
|
19
|
+
workflowIcon?: string;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// --- Helpers ---
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Extracts the evaluatorType from an evaluator's config.
|
|
26
|
+
* Centralises the cast so callers don't repeat it.
|
|
27
|
+
*/
|
|
28
|
+
export function getEvaluatorType(
|
|
29
|
+
evaluator: Pick<EvaluatorSummary, "config">,
|
|
30
|
+
): string | undefined {
|
|
31
|
+
return (evaluator.config as Record<string, unknown> | null)
|
|
32
|
+
?.evaluatorType as string | undefined;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// --- Evaluator API functions ---
|
|
36
|
+
|
|
37
|
+
/** Lists all evaluators in the project. */
|
|
38
|
+
export async function listEvaluators(): Promise<EvaluatorSummary[]> {
|
|
39
|
+
return makeRequest("GET", "/api/evaluators") as Promise<EvaluatorSummary[]>;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** Retrieves a single evaluator by ID or slug. */
|
|
43
|
+
export async function getEvaluator(idOrSlug: string): Promise<EvaluatorSummary> {
|
|
44
|
+
return makeRequest(
|
|
45
|
+
"GET",
|
|
46
|
+
`/api/evaluators/${encodeURIComponent(idOrSlug)}`,
|
|
47
|
+
) as Promise<EvaluatorSummary>;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/** Creates a new evaluator. */
|
|
51
|
+
export async function createEvaluator(data: {
|
|
52
|
+
name: string;
|
|
53
|
+
config: Record<string, unknown>;
|
|
54
|
+
}): Promise<EvaluatorSummary> {
|
|
55
|
+
return makeRequest("POST", "/api/evaluators", data) as Promise<EvaluatorSummary>;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** Updates an existing evaluator. */
|
|
59
|
+
export async function updateEvaluator(params: {
|
|
60
|
+
id: string;
|
|
61
|
+
name?: string;
|
|
62
|
+
config?: Record<string, unknown>;
|
|
63
|
+
}): Promise<EvaluatorSummary> {
|
|
64
|
+
const { id, ...data } = params;
|
|
65
|
+
return makeRequest(
|
|
66
|
+
"PUT",
|
|
67
|
+
`/api/evaluators/${encodeURIComponent(id)}`,
|
|
68
|
+
data,
|
|
69
|
+
) as Promise<EvaluatorSummary>;
|
|
70
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { makeRequest } from "./langwatch-api.js";
|
|
2
|
+
|
|
3
|
+
// --- Model Provider types ---
|
|
4
|
+
|
|
5
|
+
export interface ModelProviderEntry {
|
|
6
|
+
id?: string;
|
|
7
|
+
provider: string;
|
|
8
|
+
enabled: boolean;
|
|
9
|
+
customKeys: Record<string, unknown> | null;
|
|
10
|
+
models?: string[] | null;
|
|
11
|
+
embeddingsModels?: string[] | null;
|
|
12
|
+
customModels?: unknown[] | null;
|
|
13
|
+
customEmbeddingsModels?: unknown[] | null;
|
|
14
|
+
disabledByDefault?: boolean;
|
|
15
|
+
deploymentMapping?: unknown;
|
|
16
|
+
extraHeaders?: Array<{ key: string; value: string }> | null;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// --- Model Provider API functions ---
|
|
20
|
+
|
|
21
|
+
/** Lists all model providers for the project, with masked API keys. */
|
|
22
|
+
export async function listModelProviders(): Promise<Record<string, ModelProviderEntry>> {
|
|
23
|
+
return makeRequest("GET", "/api/model-providers") as Promise<
|
|
24
|
+
Record<string, ModelProviderEntry>
|
|
25
|
+
>;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/** Creates or updates a model provider. */
|
|
29
|
+
export async function setModelProvider(params: {
|
|
30
|
+
provider: string;
|
|
31
|
+
enabled: boolean;
|
|
32
|
+
customKeys?: Record<string, unknown>;
|
|
33
|
+
defaultModel?: string;
|
|
34
|
+
}): Promise<Record<string, ModelProviderEntry>> {
|
|
35
|
+
const { provider, ...data } = params;
|
|
36
|
+
return makeRequest(
|
|
37
|
+
"PUT",
|
|
38
|
+
`/api/model-providers/${encodeURIComponent(provider)}`,
|
|
39
|
+
data,
|
|
40
|
+
) as Promise<Record<string, ModelProviderEntry>>;
|
|
41
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { makeRequest } from "./langwatch-api.js";
|
|
2
|
+
|
|
3
|
+
// --- Scenario types ---
|
|
4
|
+
|
|
5
|
+
export interface ScenarioSummary {
|
|
6
|
+
id: string;
|
|
7
|
+
name: string;
|
|
8
|
+
situation: string;
|
|
9
|
+
criteria: string[];
|
|
10
|
+
labels: string[];
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface ScenarioArchiveResponse {
|
|
14
|
+
id: string;
|
|
15
|
+
archived: boolean;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// --- Scenario API functions ---
|
|
19
|
+
|
|
20
|
+
/** Lists all scenarios in the project. */
|
|
21
|
+
export async function listScenarios(): Promise<ScenarioSummary[]> {
|
|
22
|
+
return makeRequest("GET", "/api/scenarios") as Promise<ScenarioSummary[]>;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/** Retrieves a single scenario by ID. */
|
|
26
|
+
export async function getScenario(id: string): Promise<ScenarioSummary> {
|
|
27
|
+
return makeRequest(
|
|
28
|
+
"GET",
|
|
29
|
+
`/api/scenarios/${encodeURIComponent(id)}`
|
|
30
|
+
) as Promise<ScenarioSummary>;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/** Creates a new scenario. */
|
|
34
|
+
export async function createScenario(data: {
|
|
35
|
+
name: string;
|
|
36
|
+
situation: string;
|
|
37
|
+
criteria?: string[];
|
|
38
|
+
labels?: string[];
|
|
39
|
+
}): Promise<ScenarioSummary> {
|
|
40
|
+
return makeRequest("POST", "/api/scenarios", data) as Promise<ScenarioSummary>;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/** Updates an existing scenario. */
|
|
44
|
+
export async function updateScenario(params: {
|
|
45
|
+
id: string;
|
|
46
|
+
name?: string;
|
|
47
|
+
situation?: string;
|
|
48
|
+
criteria?: string[];
|
|
49
|
+
labels?: string[];
|
|
50
|
+
}): Promise<ScenarioSummary> {
|
|
51
|
+
const { id, ...data } = params;
|
|
52
|
+
return makeRequest(
|
|
53
|
+
"PUT",
|
|
54
|
+
`/api/scenarios/${encodeURIComponent(id)}`,
|
|
55
|
+
data
|
|
56
|
+
) as Promise<ScenarioSummary>;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** Archives (soft-deletes) a scenario. */
|
|
60
|
+
export async function archiveScenario(
|
|
61
|
+
id: string
|
|
62
|
+
): Promise<ScenarioArchiveResponse> {
|
|
63
|
+
return makeRequest(
|
|
64
|
+
"DELETE",
|
|
65
|
+
`/api/scenarios/${encodeURIComponent(id)}`
|
|
66
|
+
) as Promise<ScenarioArchiveResponse>;
|
|
67
|
+
}
|