@langwatch/mcp-server 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +44 -0
- package/dist/archive-scenario-YFD5THOR.js +19 -0
- package/dist/archive-scenario-YFD5THOR.js.map +1 -0
- package/dist/chunk-5UOPNRXW.js +37 -0
- package/dist/chunk-5UOPNRXW.js.map +1 -0
- package/dist/chunk-6U4TCGFC.js +40 -0
- package/dist/chunk-6U4TCGFC.js.map +1 -0
- package/dist/chunk-IX6QJKAD.js +22 -0
- package/dist/chunk-IX6QJKAD.js.map +1 -0
- package/dist/{chunk-HOPTUDCZ.js → chunk-LLRQIF52.js} +5 -12
- package/dist/chunk-LLRQIF52.js.map +1 -0
- package/dist/create-evaluator-E5X5ZP3B.js +27 -0
- package/dist/create-evaluator-E5X5ZP3B.js.map +1 -0
- package/dist/create-prompt-7Z35MIL6.js +36 -0
- package/dist/create-prompt-7Z35MIL6.js.map +1 -0
- package/dist/create-scenario-DIMPJRPY.js +26 -0
- package/dist/create-scenario-DIMPJRPY.js.map +1 -0
- package/dist/discover-evaluator-schema-H23XCLNE.js +1402 -0
- package/dist/discover-evaluator-schema-H23XCLNE.js.map +1 -0
- package/dist/discover-scenario-schema-MEEEVND7.js +65 -0
- package/dist/discover-scenario-schema-MEEEVND7.js.map +1 -0
- package/dist/{get-analytics-3IFTN6MY.js → get-analytics-4YJW4S5L.js} +2 -2
- package/dist/get-evaluator-WDEH2F7M.js +47 -0
- package/dist/get-evaluator-WDEH2F7M.js.map +1 -0
- package/dist/{get-prompt-2ZB5B3QC.js → get-prompt-F6PDVC76.js} +2 -5
- package/dist/get-prompt-F6PDVC76.js.map +1 -0
- package/dist/get-scenario-H24ZYNT5.js +33 -0
- package/dist/get-scenario-H24ZYNT5.js.map +1 -0
- package/dist/{get-trace-7IXKKCJJ.js → get-trace-27USKGO7.js} +2 -2
- package/dist/index.js +27066 -8845
- package/dist/index.js.map +1 -1
- package/dist/list-evaluators-KRGI72EH.js +34 -0
- package/dist/list-evaluators-KRGI72EH.js.map +1 -0
- package/dist/list-model-providers-A5YCFTPI.js +35 -0
- package/dist/list-model-providers-A5YCFTPI.js.map +1 -0
- package/dist/{list-prompts-J72LTP7Z.js → list-prompts-LKJSE7XN.js} +6 -7
- package/dist/list-prompts-LKJSE7XN.js.map +1 -0
- package/dist/list-scenarios-ZK5CMGC4.js +40 -0
- package/dist/list-scenarios-ZK5CMGC4.js.map +1 -0
- package/dist/{search-traces-RW2NDHN5.js → search-traces-SOKAAMAR.js} +2 -2
- package/dist/set-model-provider-7MGULZDH.js +33 -0
- package/dist/set-model-provider-7MGULZDH.js.map +1 -0
- package/dist/update-evaluator-A3XINFLJ.js +24 -0
- package/dist/update-evaluator-A3XINFLJ.js.map +1 -0
- package/dist/update-prompt-IW7X2UQM.js +22 -0
- package/dist/update-prompt-IW7X2UQM.js.map +1 -0
- package/dist/update-scenario-ZT7TOBFR.js +27 -0
- package/dist/update-scenario-ZT7TOBFR.js.map +1 -0
- package/package.json +11 -11
- package/src/__tests__/all-tools.integration.test.ts +1337 -0
- package/src/__tests__/discover-evaluator-schema.unit.test.ts +89 -0
- package/src/__tests__/evaluator-tools.unit.test.ts +262 -0
- package/src/__tests__/integration.integration.test.ts +9 -34
- package/src/__tests__/langwatch-api.unit.test.ts +4 -32
- package/src/__tests__/model-provider-tools.unit.test.ts +190 -0
- package/src/__tests__/scenario-tools.integration.test.ts +286 -0
- package/src/__tests__/scenario-tools.unit.test.ts +185 -0
- package/src/__tests__/tools.unit.test.ts +59 -65
- package/src/index.ts +338 -48
- package/src/langwatch-api-evaluators.ts +70 -0
- package/src/langwatch-api-model-providers.ts +41 -0
- package/src/langwatch-api-scenarios.ts +67 -0
- package/src/langwatch-api.ts +6 -30
- package/src/tools/archive-scenario.ts +19 -0
- package/src/tools/create-evaluator.ts +33 -0
- package/src/tools/create-prompt.ts +30 -5
- package/src/tools/create-scenario.ts +30 -0
- package/src/tools/discover-evaluator-schema.ts +143 -0
- package/src/tools/discover-scenario-schema.ts +71 -0
- package/src/tools/get-evaluator.ts +53 -0
- package/src/tools/get-prompt.ts +1 -4
- package/src/tools/get-scenario.ts +36 -0
- package/src/tools/list-evaluators.ts +37 -0
- package/src/tools/list-model-providers.ts +40 -0
- package/src/tools/list-prompts.ts +5 -6
- package/src/tools/list-scenarios.ts +47 -0
- package/src/tools/set-model-provider.ts +46 -0
- package/src/tools/update-evaluator.ts +30 -0
- package/src/tools/update-prompt.ts +9 -25
- package/src/tools/update-scenario.ts +32 -0
- package/uv.lock +1788 -1322
- package/dist/chunk-HOPTUDCZ.js.map +0 -1
- package/dist/create-prompt-UBC537BJ.js +0 -22
- package/dist/create-prompt-UBC537BJ.js.map +0 -1
- package/dist/get-prompt-2ZB5B3QC.js.map +0 -1
- package/dist/list-prompts-J72LTP7Z.js.map +0 -1
- package/dist/update-prompt-G6HHZSUM.js +0 -31
- package/dist/update-prompt-G6HHZSUM.js.map +0 -1
- /package/dist/{get-analytics-3IFTN6MY.js.map → get-analytics-4YJW4S5L.js.map} +0 -0
- /package/dist/{get-trace-7IXKKCJJ.js.map → get-trace-27USKGO7.js.map} +0 -0
- /package/dist/{search-traces-RW2NDHN5.js.map → search-traces-SOKAAMAR.js.map} +0 -0
package/src/langwatch-api.ts
CHANGED
|
@@ -78,7 +78,6 @@ export interface PromptSummary {
|
|
|
78
78
|
id?: string;
|
|
79
79
|
handle?: string;
|
|
80
80
|
name?: string;
|
|
81
|
-
description?: string | null;
|
|
82
81
|
latestVersionNumber?: number;
|
|
83
82
|
version?: number;
|
|
84
83
|
}
|
|
@@ -87,14 +86,12 @@ export interface PromptVersion {
|
|
|
87
86
|
version?: number;
|
|
88
87
|
commitMessage?: string;
|
|
89
88
|
model?: string;
|
|
90
|
-
modelProvider?: string;
|
|
91
89
|
messages?: Array<{ role: string; content: string }>;
|
|
92
90
|
}
|
|
93
91
|
|
|
94
92
|
export interface PromptDetailResponse extends PromptSummary {
|
|
95
93
|
versions?: PromptVersion[];
|
|
96
94
|
model?: string;
|
|
97
|
-
modelProvider?: string;
|
|
98
95
|
messages?: Array<{ role: string; content: string }>;
|
|
99
96
|
prompt?: Array<{ role: string; content: string }>;
|
|
100
97
|
}
|
|
@@ -116,8 +113,8 @@ export interface PromptMutationResponse {
|
|
|
116
113
|
*
|
|
117
114
|
* @throws Error with status code and response body when the response is not OK
|
|
118
115
|
*/
|
|
119
|
-
async function makeRequest(
|
|
120
|
-
method: "GET" | "POST",
|
|
116
|
+
export async function makeRequest(
|
|
117
|
+
method: "GET" | "POST" | "PUT" | "PATCH" | "DELETE",
|
|
121
118
|
path: string,
|
|
122
119
|
body?: unknown
|
|
123
120
|
): Promise<unknown> {
|
|
@@ -126,7 +123,7 @@ async function makeRequest(
|
|
|
126
123
|
"X-Auth-Token": requireApiKey(),
|
|
127
124
|
};
|
|
128
125
|
|
|
129
|
-
if (
|
|
126
|
+
if (body !== undefined) {
|
|
130
127
|
headers["Content-Type"] = "application/json";
|
|
131
128
|
}
|
|
132
129
|
|
|
@@ -216,12 +213,9 @@ export async function getPrompt(
|
|
|
216
213
|
|
|
217
214
|
/** Creates a new prompt. */
|
|
218
215
|
export async function createPrompt(data: {
|
|
219
|
-
|
|
220
|
-
handle?: string;
|
|
216
|
+
handle: string;
|
|
221
217
|
messages: Array<{ role: string; content: string }>;
|
|
222
218
|
model: string;
|
|
223
|
-
modelProvider: string;
|
|
224
|
-
description?: string;
|
|
225
219
|
}): Promise<PromptMutationResponse> {
|
|
226
220
|
return makeRequest(
|
|
227
221
|
"POST",
|
|
@@ -236,30 +230,12 @@ export async function updatePrompt(
|
|
|
236
230
|
data: {
|
|
237
231
|
messages?: Array<{ role: string; content: string }>;
|
|
238
232
|
model?: string;
|
|
239
|
-
|
|
240
|
-
commitMessage?: string;
|
|
233
|
+
commitMessage: string;
|
|
241
234
|
}
|
|
242
235
|
): Promise<PromptMutationResponse> {
|
|
243
236
|
return makeRequest(
|
|
244
|
-
"
|
|
237
|
+
"PUT",
|
|
245
238
|
`/api/prompts/${encodeURIComponent(idOrHandle)}`,
|
|
246
239
|
data
|
|
247
240
|
) as Promise<PromptMutationResponse>;
|
|
248
241
|
}
|
|
249
|
-
|
|
250
|
-
/** Creates a new version of an existing prompt. */
|
|
251
|
-
export async function createPromptVersion(
|
|
252
|
-
idOrHandle: string,
|
|
253
|
-
data: {
|
|
254
|
-
messages?: Array<{ role: string; content: string }>;
|
|
255
|
-
model?: string;
|
|
256
|
-
modelProvider?: string;
|
|
257
|
-
commitMessage?: string;
|
|
258
|
-
}
|
|
259
|
-
): Promise<PromptMutationResponse> {
|
|
260
|
-
return makeRequest(
|
|
261
|
-
"POST",
|
|
262
|
-
`/api/prompts/${encodeURIComponent(idOrHandle)}/versions`,
|
|
263
|
-
data
|
|
264
|
-
) as Promise<PromptMutationResponse>;
|
|
265
|
-
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { archiveScenario as apiArchiveScenario } from "../langwatch-api-scenarios.js";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Handles the platform_archive_scenario MCP tool invocation.
|
|
5
|
+
*
|
|
6
|
+
* Archives (soft-deletes) a scenario and returns confirmation.
|
|
7
|
+
*/
|
|
8
|
+
export async function handleArchiveScenario(params: {
|
|
9
|
+
scenarioId: string;
|
|
10
|
+
}): Promise<string> {
|
|
11
|
+
const result = await apiArchiveScenario(params.scenarioId);
|
|
12
|
+
|
|
13
|
+
const lines: string[] = [];
|
|
14
|
+
lines.push("Scenario archived successfully!\n");
|
|
15
|
+
lines.push(`**ID**: ${result.id}`);
|
|
16
|
+
lines.push(`**Status**: ${result.archived ? "archived" : "active"}`);
|
|
17
|
+
|
|
18
|
+
return lines.join("\n");
|
|
19
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import {
|
|
2
|
+
createEvaluator as apiCreateEvaluator,
|
|
3
|
+
getEvaluatorType,
|
|
4
|
+
} from "../langwatch-api-evaluators.js";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Handles the platform_create_evaluator MCP tool invocation.
|
|
8
|
+
*
|
|
9
|
+
* Creates a new evaluator in the LangWatch project and returns a
|
|
10
|
+
* confirmation with the created evaluator's details.
|
|
11
|
+
*/
|
|
12
|
+
export async function handleCreateEvaluator(params: {
|
|
13
|
+
name: string;
|
|
14
|
+
config: Record<string, unknown>;
|
|
15
|
+
}): Promise<string> {
|
|
16
|
+
const result = await apiCreateEvaluator(params);
|
|
17
|
+
|
|
18
|
+
const evaluatorType = getEvaluatorType(result);
|
|
19
|
+
|
|
20
|
+
const lines: string[] = [];
|
|
21
|
+
lines.push("Evaluator created successfully!\n");
|
|
22
|
+
lines.push(`**ID**: ${result.id}`);
|
|
23
|
+
if (result.slug) lines.push(`**Slug**: ${result.slug}`);
|
|
24
|
+
lines.push(`**Name**: ${result.name}`);
|
|
25
|
+
if (evaluatorType) lines.push(`**Evaluator Type**: ${evaluatorType}`);
|
|
26
|
+
lines.push(`**Kind**: ${result.type}`);
|
|
27
|
+
|
|
28
|
+
if (Array.isArray(result.fields) && result.fields.length > 0) {
|
|
29
|
+
lines.push(`**Input Fields**: ${result.fields.map((f) => f.identifier).join(", ")}`);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return lines.join("\n");
|
|
33
|
+
}
|
|
@@ -1,7 +1,23 @@
|
|
|
1
1
|
import { createPrompt as apiCreatePrompt } from "../langwatch-api.js";
|
|
2
2
|
|
|
3
|
+
const HANDLE_PATTERN = /^[a-z0-9_-]+(?:\/[a-z0-9_-]+)?$/;
|
|
4
|
+
|
|
3
5
|
/**
|
|
4
|
-
*
|
|
6
|
+
* Converts a human-readable name into a URL-friendly handle.
|
|
7
|
+
*
|
|
8
|
+
* Lowercases the input, replaces non-alphanumeric runs with hyphens,
|
|
9
|
+
* and strips leading/trailing hyphens. May return an empty string
|
|
10
|
+
* for inputs with no alphanumeric characters — callers must validate.
|
|
11
|
+
*/
|
|
12
|
+
function toHandle(name: string): string {
|
|
13
|
+
return name
|
|
14
|
+
.toLowerCase()
|
|
15
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
16
|
+
.replace(/^-|-$/g, "");
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Handles the platform_create_prompt MCP tool invocation.
|
|
5
21
|
*
|
|
6
22
|
* Creates a new prompt in the LangWatch project and returns a
|
|
7
23
|
* confirmation with the created prompt's details.
|
|
@@ -11,17 +27,26 @@ export async function handleCreatePrompt(params: {
|
|
|
11
27
|
handle?: string;
|
|
12
28
|
messages: Array<{ role: string; content: string }>;
|
|
13
29
|
model: string;
|
|
14
|
-
modelProvider: string;
|
|
15
|
-
description?: string;
|
|
16
30
|
}): Promise<string> {
|
|
17
|
-
const
|
|
31
|
+
const handle = params.handle?.trim() || toHandle(params.name);
|
|
32
|
+
if (!handle || !HANDLE_PATTERN.test(handle)) {
|
|
33
|
+
throw new Error(
|
|
34
|
+
`Invalid prompt handle "${handle || ""}". Handle must match ${HANDLE_PATTERN}. Provide a valid \`handle\` explicitly.`
|
|
35
|
+
);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const result = await apiCreatePrompt({
|
|
39
|
+
handle,
|
|
40
|
+
messages: params.messages,
|
|
41
|
+
model: params.model,
|
|
42
|
+
});
|
|
18
43
|
|
|
19
44
|
const lines: string[] = [];
|
|
20
45
|
lines.push("Prompt created successfully!\n");
|
|
21
46
|
if (result.id) lines.push(`**ID**: ${result.id}`);
|
|
22
47
|
if (result.handle) lines.push(`**Handle**: ${result.handle}`);
|
|
23
48
|
lines.push(`**Name**: ${result.name || params.name}`);
|
|
24
|
-
lines.push(`**Model**: ${params.model}
|
|
49
|
+
lines.push(`**Model**: ${params.model}`);
|
|
25
50
|
if (result.latestVersionNumber != null)
|
|
26
51
|
lines.push(`**Version**: v${result.latestVersionNumber}`);
|
|
27
52
|
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { createScenario as apiCreateScenario } from "../langwatch-api-scenarios.js";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Handles the platform_create_scenario MCP tool invocation.
|
|
5
|
+
*
|
|
6
|
+
* Creates a new scenario in the LangWatch project and returns a
|
|
7
|
+
* confirmation with the created scenario's details.
|
|
8
|
+
*/
|
|
9
|
+
export async function handleCreateScenario(params: {
|
|
10
|
+
name: string;
|
|
11
|
+
situation: string;
|
|
12
|
+
criteria?: string[];
|
|
13
|
+
labels?: string[];
|
|
14
|
+
}): Promise<string> {
|
|
15
|
+
const result = await apiCreateScenario(params);
|
|
16
|
+
|
|
17
|
+
const lines: string[] = [];
|
|
18
|
+
lines.push("Scenario created successfully!\n");
|
|
19
|
+
lines.push(`**ID**: ${result.id}`);
|
|
20
|
+
lines.push(`**Name**: ${result.name}`);
|
|
21
|
+
lines.push(`**Situation**: ${result.situation}`);
|
|
22
|
+
if (Array.isArray(result.criteria) && result.criteria.length > 0) {
|
|
23
|
+
lines.push(`**Criteria**: ${result.criteria.length} criteria`);
|
|
24
|
+
}
|
|
25
|
+
if (Array.isArray(result.labels) && result.labels.length > 0) {
|
|
26
|
+
lines.push(`**Labels**: ${result.labels.join(", ")}`);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
return lines.join("\n");
|
|
30
|
+
}
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import { AVAILABLE_EVALUATORS } from "../../../langevals/ts-integration/evaluators.generated.js";
|
|
2
|
+
import type { EvaluatorDefinition, EvaluatorTypes } from "../../../langevals/ts-integration/evaluators.generated.js";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Formats evaluator schema information for the discover_schema tool.
|
|
6
|
+
*
|
|
7
|
+
* Two levels of detail:
|
|
8
|
+
* - Overview (no evaluatorType): compact list of all evaluator types
|
|
9
|
+
* - Detail (with evaluatorType): full schema for one evaluator type
|
|
10
|
+
*/
|
|
11
|
+
export function formatEvaluatorSchema(evaluatorType?: string): string {
|
|
12
|
+
if (evaluatorType) {
|
|
13
|
+
return formatEvaluatorDetail(evaluatorType);
|
|
14
|
+
}
|
|
15
|
+
return formatEvaluatorOverview();
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Returns a compact overview of all available evaluator types.
|
|
20
|
+
* Shows type, name, category, and a one-line description.
|
|
21
|
+
*/
|
|
22
|
+
function formatEvaluatorOverview(): string {
|
|
23
|
+
const lines: string[] = [];
|
|
24
|
+
lines.push("# Available Evaluator Types\n");
|
|
25
|
+
|
|
26
|
+
const byCategory = new Map<string, { type: string; name: string; description: string }[]>();
|
|
27
|
+
|
|
28
|
+
for (const [type, def] of Object.entries(AVAILABLE_EVALUATORS)) {
|
|
29
|
+
const evalDef = def as EvaluatorDefinition<EvaluatorTypes>;
|
|
30
|
+
const oneLine = extractFirstLine(evalDef.description);
|
|
31
|
+
const entry = { type, name: evalDef.name, description: oneLine };
|
|
32
|
+
|
|
33
|
+
const list = byCategory.get(evalDef.category) ?? [];
|
|
34
|
+
list.push(entry);
|
|
35
|
+
byCategory.set(evalDef.category, list);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
for (const [category, entries] of byCategory) {
|
|
39
|
+
lines.push(`## ${category}\n`);
|
|
40
|
+
for (const entry of entries) {
|
|
41
|
+
lines.push(`- **${entry.type}** (${entry.name}): ${entry.description}`);
|
|
42
|
+
}
|
|
43
|
+
lines.push("");
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
lines.push(
|
|
47
|
+
"> Use `discover_schema({ category: 'evaluators', evaluatorType: '<type>' })` for full details on a specific evaluator type.",
|
|
48
|
+
);
|
|
49
|
+
|
|
50
|
+
return lines.join("\n");
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Returns the full schema for a specific evaluator type.
|
|
55
|
+
* Includes settings with descriptions and defaults, required/optional fields, env vars, and result fields.
|
|
56
|
+
*/
|
|
57
|
+
function formatEvaluatorDetail(evaluatorType: string): string {
|
|
58
|
+
const def = AVAILABLE_EVALUATORS[evaluatorType as EvaluatorTypes] as
|
|
59
|
+
| EvaluatorDefinition<EvaluatorTypes>
|
|
60
|
+
| undefined;
|
|
61
|
+
|
|
62
|
+
if (!def) {
|
|
63
|
+
return `Unknown evaluator type: "${evaluatorType}". Use \`discover_schema({ category: 'evaluators' })\` to see all available types.`;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const lines: string[] = [];
|
|
67
|
+
lines.push(`# ${def.name} (\`${evaluatorType}\`)\n`);
|
|
68
|
+
lines.push(`**Category**: ${def.category}`);
|
|
69
|
+
lines.push(`**Is Guardrail**: ${def.isGuardrail ? "Yes" : "No"}`);
|
|
70
|
+
if (def.docsUrl) {
|
|
71
|
+
lines.push(`**Docs**: ${def.docsUrl}`);
|
|
72
|
+
}
|
|
73
|
+
lines.push("");
|
|
74
|
+
lines.push(`## Description\n`);
|
|
75
|
+
lines.push(def.description.trim());
|
|
76
|
+
|
|
77
|
+
// Required and optional fields
|
|
78
|
+
lines.push("\n## Fields\n");
|
|
79
|
+
if (def.requiredFields.length > 0) {
|
|
80
|
+
lines.push(`**Required**: ${def.requiredFields.join(", ")}`);
|
|
81
|
+
} else {
|
|
82
|
+
lines.push("**Required**: none");
|
|
83
|
+
}
|
|
84
|
+
if (def.optionalFields.length > 0) {
|
|
85
|
+
lines.push(`**Optional**: ${def.optionalFields.join(", ")}`);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Settings
|
|
89
|
+
const settingsEntries = Object.entries(def.settings);
|
|
90
|
+
if (settingsEntries.length > 0) {
|
|
91
|
+
lines.push("\n## Settings\n");
|
|
92
|
+
for (const [key, setting] of settingsEntries) {
|
|
93
|
+
const s = setting as { description?: string; default: unknown };
|
|
94
|
+
const defaultStr = JSON.stringify(s.default);
|
|
95
|
+
const desc = s.description ? ` - ${s.description}` : "";
|
|
96
|
+
lines.push(`- **${key}**${desc}`);
|
|
97
|
+
lines.push(` Default: \`${defaultStr}\``);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Env vars
|
|
102
|
+
if (def.envVars.length > 0) {
|
|
103
|
+
lines.push("\n## Required Environment Variables\n");
|
|
104
|
+
for (const envVar of def.envVars) {
|
|
105
|
+
lines.push(`- \`${envVar}\``);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Result fields
|
|
110
|
+
const resultEntries = Object.entries(def.result);
|
|
111
|
+
if (resultEntries.length > 0) {
|
|
112
|
+
lines.push("\n## Result Fields\n");
|
|
113
|
+
for (const [key, value] of resultEntries) {
|
|
114
|
+
const v = value as { description: string };
|
|
115
|
+
lines.push(`- **${key}**: ${v.description}`);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
lines.push("\n## Usage Example\n");
|
|
120
|
+
lines.push("```json");
|
|
121
|
+
lines.push(JSON.stringify({
|
|
122
|
+
evaluatorType: evaluatorType,
|
|
123
|
+
settings: Object.fromEntries(
|
|
124
|
+
settingsEntries.map(([key, setting]) => [key, (setting as { default: unknown }).default]),
|
|
125
|
+
),
|
|
126
|
+
}, null, 2));
|
|
127
|
+
lines.push("```");
|
|
128
|
+
|
|
129
|
+
return lines.join("\n");
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Extracts the first meaningful line from a multi-line description.
|
|
134
|
+
*/
|
|
135
|
+
function extractFirstLine(description: string): string {
|
|
136
|
+
const trimmed = description.trim();
|
|
137
|
+
const firstLine = trimmed.split("\n")[0]?.trim() ?? trimmed;
|
|
138
|
+
// Limit to a reasonable length
|
|
139
|
+
if (firstLine.length > 120) {
|
|
140
|
+
return firstLine.slice(0, 117) + "...";
|
|
141
|
+
}
|
|
142
|
+
return firstLine;
|
|
143
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Returns a human-readable description of the scenario schema,
|
|
3
|
+
* including field descriptions, authoring guidance, and examples.
|
|
4
|
+
*/
|
|
5
|
+
export function formatScenarioSchema(): string {
|
|
6
|
+
const lines: string[] = [];
|
|
7
|
+
|
|
8
|
+
lines.push("# Scenario Schema\n");
|
|
9
|
+
|
|
10
|
+
lines.push("## Fields\n");
|
|
11
|
+
lines.push(
|
|
12
|
+
'- **name** (required): A short, descriptive name (e.g., "billing dispute resolution", "password reset with 2FA unavailable")',
|
|
13
|
+
);
|
|
14
|
+
lines.push(
|
|
15
|
+
"- **situation** (required): The context that guides the user simulator — who the user is, what they want, and any constraints (see Writing a Good Situation below)",
|
|
16
|
+
);
|
|
17
|
+
lines.push(
|
|
18
|
+
"- **criteria** (array of strings): Pass/fail conditions a judge evaluates the agent against (see Writing Good Criteria below)",
|
|
19
|
+
);
|
|
20
|
+
lines.push(
|
|
21
|
+
'- **labels** (array of strings): Tags for organizing scenarios (e.g., "auth", "happy-path", "edge-case")',
|
|
22
|
+
);
|
|
23
|
+
|
|
24
|
+
lines.push("\n## Writing a Good Situation\n");
|
|
25
|
+
lines.push(
|
|
26
|
+
"The situation drives the user simulator. Include these elements:",
|
|
27
|
+
);
|
|
28
|
+
lines.push("- **Persona**: Who is the user? (e.g., a stressed small business owner, a confused teenager)");
|
|
29
|
+
lines.push("- **Emotional state**: How are they feeling? (e.g., frustrated, anxious, impatient)");
|
|
30
|
+
lines.push("- **Background/Context**: What happened before this conversation?");
|
|
31
|
+
lines.push("- **Intent**: What do they want to accomplish?");
|
|
32
|
+
lines.push("- **Constraints**: What limitations do they have? (e.g., no phone for 2FA, unfamiliar with technical terms)");
|
|
33
|
+
lines.push("\nExample:");
|
|
34
|
+
lines.push("```");
|
|
35
|
+
lines.push("User is a small business owner stressed about tax deadline.");
|
|
36
|
+
lines.push("They need help categorizing expenses but aren't familiar with");
|
|
37
|
+
lines.push("accounting terms. They appreciate patient explanations and examples.");
|
|
38
|
+
lines.push("They have a spreadsheet of transactions but aren't sure which");
|
|
39
|
+
lines.push("categories apply to their consulting business.");
|
|
40
|
+
lines.push("```");
|
|
41
|
+
|
|
42
|
+
lines.push("\n## Writing Good Criteria\n");
|
|
43
|
+
lines.push("Criteria are what the judge uses to pass or fail the agent. Each criterion should be:");
|
|
44
|
+
lines.push("- **Specific and testable** — not vague like \"responds helpfully\"");
|
|
45
|
+
lines.push("- **Behavioral** — describes what the agent should *do*, not how it works internally");
|
|
46
|
+
lines.push("- **Independent** — each criterion checks one thing");
|
|
47
|
+
lines.push("\nGood criteria patterns:");
|
|
48
|
+
lines.push("- **Information gathering**: \"Agent asks for the user's account number before proceeding\"");
|
|
49
|
+
lines.push("- **Safety/guardrails**: \"Agent does not reveal internal system details or error stack traces\"");
|
|
50
|
+
lines.push("- **Clarification**: \"Agent asks clarifying questions before taking irreversible action\"");
|
|
51
|
+
lines.push("- **Tone**: \"Agent maintains a professional and empathetic tone throughout\"");
|
|
52
|
+
lines.push("- **Completeness**: \"Agent confirms the user understands the solution before ending\"");
|
|
53
|
+
lines.push("- **Domain-specific**: \"Agent recommends releasing a wild frog rather than keeping it as a pet\"");
|
|
54
|
+
lines.push("\nAvoid vague criteria like:");
|
|
55
|
+
lines.push('- "Responds correctly" — correct how?');
|
|
56
|
+
lines.push('- "Is helpful" — helpful in what way?');
|
|
57
|
+
lines.push('- "Works well" — not testable');
|
|
58
|
+
|
|
59
|
+
lines.push("\n## Target Types\n");
|
|
60
|
+
lines.push("Scenarios can target different execution backends:");
|
|
61
|
+
lines.push("- **prompt**: Test a prompt template with variable substitution");
|
|
62
|
+
lines.push("- **http**: Test an HTTP endpoint (e.g., a deployed agent API)");
|
|
63
|
+
lines.push("- **code**: Test a code function directly");
|
|
64
|
+
|
|
65
|
+
lines.push("\n## Tips\n");
|
|
66
|
+
lines.push("- Start simple, then layer complexity (add constraints, edge cases)");
|
|
67
|
+
lines.push("- Test edge cases: user changes their mind, gives ambiguous input, makes mistakes");
|
|
68
|
+
lines.push("- Use `fetch_scenario_docs` for the full authoring guide and advanced patterns");
|
|
69
|
+
|
|
70
|
+
return lines.join("\n");
|
|
71
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import {
|
|
2
|
+
getEvaluator as apiGetEvaluator,
|
|
3
|
+
getEvaluatorType,
|
|
4
|
+
} from "../langwatch-api-evaluators.js";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Handles the platform_get_evaluator MCP tool invocation.
|
|
8
|
+
*
|
|
9
|
+
* Retrieves a specific evaluator by ID or slug and formats it as
|
|
10
|
+
* AI-readable markdown.
|
|
11
|
+
*/
|
|
12
|
+
export async function handleGetEvaluator(params: {
|
|
13
|
+
idOrSlug: string;
|
|
14
|
+
}): Promise<string> {
|
|
15
|
+
const evaluator = await apiGetEvaluator(params.idOrSlug);
|
|
16
|
+
|
|
17
|
+
const evaluatorType = getEvaluatorType(evaluator);
|
|
18
|
+
|
|
19
|
+
const lines: string[] = [];
|
|
20
|
+
lines.push(`# Evaluator: ${evaluator.name}\n`);
|
|
21
|
+
lines.push(`**ID**: ${evaluator.id}`);
|
|
22
|
+
if (evaluator.slug) lines.push(`**Slug**: ${evaluator.slug}`);
|
|
23
|
+
lines.push(`**Kind**: ${evaluator.type}`);
|
|
24
|
+
if (evaluatorType) lines.push(`**Evaluator Type**: ${evaluatorType}`);
|
|
25
|
+
|
|
26
|
+
if (evaluator.config) {
|
|
27
|
+
lines.push("\n## Config");
|
|
28
|
+
lines.push("```json");
|
|
29
|
+
lines.push(JSON.stringify(evaluator.config, null, 2));
|
|
30
|
+
lines.push("```");
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
if (Array.isArray(evaluator.fields) && evaluator.fields.length > 0) {
|
|
34
|
+
lines.push("\n## Input Fields");
|
|
35
|
+
for (const field of evaluator.fields) {
|
|
36
|
+
const opt = field.optional ? " (optional)" : "";
|
|
37
|
+
lines.push(`- **${field.identifier}** (${field.type})${opt}`);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (Array.isArray(evaluator.outputFields) && evaluator.outputFields.length > 0) {
|
|
42
|
+
lines.push("\n## Output Fields");
|
|
43
|
+
for (const field of evaluator.outputFields) {
|
|
44
|
+
lines.push(`- **${field.identifier}** (${field.type})`);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if (evaluator.workflowName) {
|
|
49
|
+
lines.push(`\n**Workflow**: ${evaluator.workflowName}`);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return lines.join("\n");
|
|
53
|
+
}
|
package/src/tools/get-prompt.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { getPrompt as apiGetPrompt } from "../langwatch-api.js";
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
|
-
* Handles the
|
|
4
|
+
* Handles the platform_get_prompt MCP tool invocation.
|
|
5
5
|
*
|
|
6
6
|
* Retrieves a specific prompt by ID or handle and formats it as
|
|
7
7
|
* AI-readable markdown, including messages, model config, and version history.
|
|
@@ -19,15 +19,12 @@ export async function handleGetPrompt(params: {
|
|
|
19
19
|
|
|
20
20
|
if (prompt.handle) lines.push(`**Handle**: ${prompt.handle}`);
|
|
21
21
|
if (prompt.id) lines.push(`**ID**: ${prompt.id}`);
|
|
22
|
-
if (prompt.description) lines.push(`**Description**: ${prompt.description}`);
|
|
23
22
|
if (prompt.latestVersionNumber != null)
|
|
24
23
|
lines.push(`**Latest Version**: v${prompt.latestVersionNumber}`);
|
|
25
24
|
|
|
26
25
|
// Show model config
|
|
27
26
|
const version = prompt.versions?.[0] ?? prompt;
|
|
28
27
|
if (version.model) lines.push(`**Model**: ${version.model}`);
|
|
29
|
-
if (version.modelProvider)
|
|
30
|
-
lines.push(`**Provider**: ${version.modelProvider}`);
|
|
31
28
|
|
|
32
29
|
// Show messages
|
|
33
30
|
const messages = version.messages || prompt.prompt || [];
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { getScenario as apiGetScenario } from "../langwatch-api-scenarios.js";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Handles the get_scenario MCP tool invocation.
|
|
5
|
+
*
|
|
6
|
+
* Retrieves a specific scenario by ID and formats it as
|
|
7
|
+
* AI-readable markdown or raw JSON.
|
|
8
|
+
*/
|
|
9
|
+
export async function handleGetScenario(params: {
|
|
10
|
+
scenarioId: string;
|
|
11
|
+
format?: "digest" | "json";
|
|
12
|
+
}): Promise<string> {
|
|
13
|
+
const scenario = await apiGetScenario(params.scenarioId);
|
|
14
|
+
|
|
15
|
+
if (params.format === "json") {
|
|
16
|
+
return JSON.stringify(scenario, null, 2);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const lines: string[] = [];
|
|
20
|
+
lines.push(`# Scenario: ${scenario.name}\n`);
|
|
21
|
+
lines.push(`**ID**: ${scenario.id}`);
|
|
22
|
+
lines.push(`**Situation**: ${scenario.situation}`);
|
|
23
|
+
|
|
24
|
+
if (Array.isArray(scenario.criteria) && scenario.criteria.length > 0) {
|
|
25
|
+
lines.push("\n## Criteria");
|
|
26
|
+
for (const criterion of scenario.criteria) {
|
|
27
|
+
lines.push(`- ${criterion}`);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (Array.isArray(scenario.labels) && scenario.labels.length > 0) {
|
|
32
|
+
lines.push(`\n**Labels**: ${scenario.labels.join(", ")}`);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return lines.join("\n");
|
|
36
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import {
|
|
2
|
+
listEvaluators as apiListEvaluators,
|
|
3
|
+
getEvaluatorType,
|
|
4
|
+
} from "../langwatch-api-evaluators.js";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Handles the platform_list_evaluators MCP tool invocation.
|
|
8
|
+
*
|
|
9
|
+
* Lists all evaluators in the LangWatch project, formatted as an
|
|
10
|
+
* AI-readable digest.
|
|
11
|
+
*/
|
|
12
|
+
export async function handleListEvaluators(): Promise<string> {
|
|
13
|
+
const evaluators = await apiListEvaluators();
|
|
14
|
+
|
|
15
|
+
if (!Array.isArray(evaluators) || evaluators.length === 0) {
|
|
16
|
+
return "No evaluators found in this project.\n\n> Tip: Use `platform_create_evaluator` to create your first evaluator. Call `discover_schema({ category: 'evaluators' })` to see available evaluator types.";
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const lines: string[] = [];
|
|
20
|
+
lines.push(`# Evaluators (${evaluators.length} total)\n`);
|
|
21
|
+
|
|
22
|
+
for (const e of evaluators) {
|
|
23
|
+
const evaluatorType = getEvaluatorType(e);
|
|
24
|
+
lines.push(`## ${e.name}`);
|
|
25
|
+
lines.push(`**ID**: ${e.id}`);
|
|
26
|
+
if (e.slug) lines.push(`**Slug**: ${e.slug}`);
|
|
27
|
+
if (evaluatorType) lines.push(`**Type**: ${evaluatorType}`);
|
|
28
|
+
lines.push(`**Kind**: ${e.type}`);
|
|
29
|
+
lines.push("");
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
lines.push(
|
|
33
|
+
"> Use `platform_get_evaluator` with the ID or slug to see full evaluator details.",
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
return lines.join("\n");
|
|
37
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { listModelProviders as apiListModelProviders } from "../langwatch-api-model-providers.js";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Handles the platform_list_model_providers MCP tool invocation.
|
|
5
|
+
*
|
|
6
|
+
* Lists all model providers for the project, showing provider name,
|
|
7
|
+
* enabled status, and which key fields are set (masked).
|
|
8
|
+
*/
|
|
9
|
+
export async function handleListModelProviders(): Promise<string> {
|
|
10
|
+
const providers = await apiListModelProviders();
|
|
11
|
+
|
|
12
|
+
const entries = Object.entries(providers);
|
|
13
|
+
if (entries.length === 0) {
|
|
14
|
+
return "No model providers configured for this project.\n\n> Tip: Use `platform_set_model_provider` to configure an API key for a model provider.";
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
const lines: string[] = [];
|
|
18
|
+
lines.push(`# Model Providers (${entries.length} total)\n`);
|
|
19
|
+
|
|
20
|
+
for (const [key, provider] of entries) {
|
|
21
|
+
const status = provider.enabled ? "enabled" : "disabled";
|
|
22
|
+
lines.push(`## ${key}`);
|
|
23
|
+
lines.push(`**Status**: ${status}`);
|
|
24
|
+
|
|
25
|
+
if (provider.customKeys) {
|
|
26
|
+
const keyFields = Object.entries(provider.customKeys)
|
|
27
|
+
.map(([k, v]) => `${k}: ${v ? "set" : "not set"}`)
|
|
28
|
+
.join(", ");
|
|
29
|
+
lines.push(`**Keys**: ${keyFields}`);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
if (provider.models && provider.models.length > 0) {
|
|
33
|
+
lines.push(`**Models**: ${provider.models.length} available`);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
lines.push("");
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return lines.join("\n");
|
|
40
|
+
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { listPrompts as apiListPrompts } from "../langwatch-api.js";
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
|
-
* Handles the
|
|
4
|
+
* Handles the platform_list_prompts MCP tool invocation.
|
|
5
5
|
*
|
|
6
6
|
* Lists all prompts in the LangWatch project, formatted as an
|
|
7
7
|
* AI-readable markdown table.
|
|
@@ -15,20 +15,19 @@ export async function handleListPrompts(): Promise<string> {
|
|
|
15
15
|
|
|
16
16
|
const lines: string[] = [];
|
|
17
17
|
lines.push(`# Prompts (${prompts.length} total)\n`);
|
|
18
|
-
lines.push("| Handle | Name | Latest Version |
|
|
19
|
-
lines.push("
|
|
18
|
+
lines.push("| Handle | Name | Latest Version |");
|
|
19
|
+
lines.push("|--------|------|----------------|");
|
|
20
20
|
|
|
21
21
|
for (const p of prompts) {
|
|
22
22
|
const handle = p.handle || p.id || "N/A";
|
|
23
23
|
const name = p.name || "Untitled";
|
|
24
24
|
const versionNum = p.latestVersionNumber ?? p.version;
|
|
25
25
|
const version = versionNum != null ? `v${versionNum}` : "N/A";
|
|
26
|
-
|
|
27
|
-
lines.push(`| ${handle} | ${name} | ${version} | ${desc} |`);
|
|
26
|
+
lines.push(`| ${handle} | ${name} | ${version} |`);
|
|
28
27
|
}
|
|
29
28
|
|
|
30
29
|
lines.push(
|
|
31
|
-
"\n> Use `
|
|
30
|
+
"\n> Use `platform_get_prompt` with the handle or ID to see full prompt details."
|
|
32
31
|
);
|
|
33
32
|
|
|
34
33
|
return lines.join("\n");
|