@langwatch/mcp-server 0.3.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/CHANGELOG.md +32 -0
  2. package/README.md +97 -25
  3. package/dist/archive-scenario-GAE4XVFM.js +19 -0
  4. package/dist/archive-scenario-GAE4XVFM.js.map +1 -0
  5. package/dist/chunk-AAQNA53E.js +28 -0
  6. package/dist/chunk-AAQNA53E.js.map +1 -0
  7. package/dist/chunk-JVWDWL3J.js +91 -0
  8. package/dist/chunk-JVWDWL3J.js.map +1 -0
  9. package/dist/chunk-K2YFPOSD.js +40 -0
  10. package/dist/chunk-K2YFPOSD.js.map +1 -0
  11. package/dist/chunk-ZXKLPC2E.js +27 -0
  12. package/dist/chunk-ZXKLPC2E.js.map +1 -0
  13. package/dist/config-FIQWQRUB.js +11 -0
  14. package/dist/config-FIQWQRUB.js.map +1 -0
  15. package/dist/create-prompt-P35POKBW.js +22 -0
  16. package/dist/create-prompt-P35POKBW.js.map +1 -0
  17. package/dist/create-scenario-3YRZVDYF.js +26 -0
  18. package/dist/create-scenario-3YRZVDYF.js.map +1 -0
  19. package/dist/discover-scenario-schema-MEEEVND7.js +65 -0
  20. package/dist/discover-scenario-schema-MEEEVND7.js.map +1 -0
  21. package/dist/discover-schema-3T52ORPB.js +446 -0
  22. package/dist/discover-schema-3T52ORPB.js.map +1 -0
  23. package/dist/get-analytics-BAVXTAPB.js +55 -0
  24. package/dist/get-analytics-BAVXTAPB.js.map +1 -0
  25. package/dist/get-prompt-LKCPT26O.js +48 -0
  26. package/dist/get-prompt-LKCPT26O.js.map +1 -0
  27. package/dist/get-scenario-3SCDW4Z6.js +33 -0
  28. package/dist/get-scenario-3SCDW4Z6.js.map +1 -0
  29. package/dist/get-trace-QFDWJ5D4.js +50 -0
  30. package/dist/get-trace-QFDWJ5D4.js.map +1 -0
  31. package/dist/index.js +22114 -8786
  32. package/dist/index.js.map +1 -1
  33. package/dist/list-prompts-UQPBCUYA.js +33 -0
  34. package/dist/list-prompts-UQPBCUYA.js.map +1 -0
  35. package/dist/list-scenarios-573YOUKC.js +40 -0
  36. package/dist/list-scenarios-573YOUKC.js.map +1 -0
  37. package/dist/search-traces-RSMYCAN7.js +72 -0
  38. package/dist/search-traces-RSMYCAN7.js.map +1 -0
  39. package/dist/update-prompt-G2Y5EBQY.js +31 -0
  40. package/dist/update-prompt-G2Y5EBQY.js.map +1 -0
  41. package/dist/update-scenario-SSGVOBJO.js +27 -0
  42. package/dist/update-scenario-SSGVOBJO.js.map +1 -0
  43. package/package.json +3 -3
  44. package/src/__tests__/config.unit.test.ts +89 -0
  45. package/src/__tests__/date-parsing.unit.test.ts +78 -0
  46. package/src/__tests__/discover-schema.unit.test.ts +118 -0
  47. package/src/__tests__/integration.integration.test.ts +313 -0
  48. package/src/__tests__/langwatch-api.unit.test.ts +309 -0
  49. package/src/__tests__/scenario-tools.integration.test.ts +286 -0
  50. package/src/__tests__/scenario-tools.unit.test.ts +185 -0
  51. package/src/__tests__/schemas.unit.test.ts +85 -0
  52. package/src/__tests__/tools.unit.test.ts +729 -0
  53. package/src/config.ts +31 -0
  54. package/src/index.ts +383 -0
  55. package/src/langwatch-api-scenarios.ts +67 -0
  56. package/src/langwatch-api.ts +266 -0
  57. package/src/schemas/analytics-groups.ts +78 -0
  58. package/src/schemas/analytics-metrics.ts +179 -0
  59. package/src/schemas/filter-fields.ts +119 -0
  60. package/src/schemas/index.ts +3 -0
  61. package/src/tools/archive-scenario.ts +19 -0
  62. package/src/tools/create-prompt.ts +29 -0
  63. package/src/tools/create-scenario.ts +30 -0
  64. package/src/tools/discover-scenario-schema.ts +71 -0
  65. package/src/tools/discover-schema.ts +106 -0
  66. package/src/tools/get-analytics.ts +71 -0
  67. package/src/tools/get-prompt.ts +56 -0
  68. package/src/tools/get-scenario.ts +36 -0
  69. package/src/tools/get-trace.ts +61 -0
  70. package/src/tools/list-prompts.ts +35 -0
  71. package/src/tools/list-scenarios.ts +47 -0
  72. package/src/tools/search-traces.ts +91 -0
  73. package/src/tools/update-prompt.ts +44 -0
  74. package/src/tools/update-scenario.ts +32 -0
  75. package/src/utils/date-parsing.ts +31 -0
  76. package/tests/evaluations.ipynb +634 -634
  77. package/tests/scenario-openai.test.ts +3 -1
  78. package/uv.lock +1788 -1322
@@ -0,0 +1,30 @@
1
+ import { createScenario as apiCreateScenario } from "../langwatch-api-scenarios.js";
2
+
3
+ /**
4
+ * Handles the create_scenario MCP tool invocation.
5
+ *
6
+ * Creates a new scenario in the LangWatch project and returns a
7
+ * confirmation with the created scenario's details.
8
+ */
9
+ export async function handleCreateScenario(params: {
10
+ name: string;
11
+ situation: string;
12
+ criteria?: string[];
13
+ labels?: string[];
14
+ }): Promise<string> {
15
+ const result = await apiCreateScenario(params);
16
+
17
+ const lines: string[] = [];
18
+ lines.push("Scenario created successfully!\n");
19
+ lines.push(`**ID**: ${result.id}`);
20
+ lines.push(`**Name**: ${result.name}`);
21
+ lines.push(`**Situation**: ${result.situation}`);
22
+ if (Array.isArray(result.criteria) && result.criteria.length > 0) {
23
+ lines.push(`**Criteria**: ${result.criteria.length} criteria`);
24
+ }
25
+ if (Array.isArray(result.labels) && result.labels.length > 0) {
26
+ lines.push(`**Labels**: ${result.labels.join(", ")}`);
27
+ }
28
+
29
+ return lines.join("\n");
30
+ }
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Returns a human-readable description of the scenario schema,
3
+ * including field descriptions, authoring guidance, and examples.
4
+ */
5
+ export function formatScenarioSchema(): string {
6
+ const lines: string[] = [];
7
+
8
+ lines.push("# Scenario Schema\n");
9
+
10
+ lines.push("## Fields\n");
11
+ lines.push(
12
+ '- **name** (required): A short, descriptive name (e.g., "billing dispute resolution", "password reset with 2FA unavailable")',
13
+ );
14
+ lines.push(
15
+ "- **situation** (required): The context that guides the user simulator — who the user is, what they want, and any constraints (see Writing a Good Situation below)",
16
+ );
17
+ lines.push(
18
+ "- **criteria** (array of strings): Pass/fail conditions a judge evaluates the agent against (see Writing Good Criteria below)",
19
+ );
20
+ lines.push(
21
+ '- **labels** (array of strings): Tags for organizing scenarios (e.g., "auth", "happy-path", "edge-case")',
22
+ );
23
+
24
+ lines.push("\n## Writing a Good Situation\n");
25
+ lines.push(
26
+ "The situation drives the user simulator. Include these elements:",
27
+ );
28
+ lines.push("- **Persona**: Who is the user? (e.g., a stressed small business owner, a confused teenager)");
29
+ lines.push("- **Emotional state**: How are they feeling? (e.g., frustrated, anxious, impatient)");
30
+ lines.push("- **Background/Context**: What happened before this conversation?");
31
+ lines.push("- **Intent**: What do they want to accomplish?");
32
+ lines.push("- **Constraints**: What limitations do they have? (e.g., no phone for 2FA, unfamiliar with technical terms)");
33
+ lines.push("\nExample:");
34
+ lines.push("```");
35
+ lines.push("User is a small business owner stressed about tax deadline.");
36
+ lines.push("They need help categorizing expenses but aren't familiar with");
37
+ lines.push("accounting terms. They appreciate patient explanations and examples.");
38
+ lines.push("They have a spreadsheet of transactions but aren't sure which");
39
+ lines.push("categories apply to their consulting business.");
40
+ lines.push("```");
41
+
42
+ lines.push("\n## Writing Good Criteria\n");
43
+ lines.push("Criteria are what the judge uses to pass or fail the agent. Each criterion should be:");
44
+ lines.push("- **Specific and testable** — not vague like \"responds helpfully\"");
45
+ lines.push("- **Behavioral** — describes what the agent should *do*, not how it works internally");
46
+ lines.push("- **Independent** — each criterion checks one thing");
47
+ lines.push("\nGood criteria patterns:");
48
+ lines.push("- **Information gathering**: \"Agent asks for the user's account number before proceeding\"");
49
+ lines.push("- **Safety/guardrails**: \"Agent does not reveal internal system details or error stack traces\"");
50
+ lines.push("- **Clarification**: \"Agent asks clarifying questions before taking irreversible action\"");
51
+ lines.push("- **Tone**: \"Agent maintains a professional and empathetic tone throughout\"");
52
+ lines.push("- **Completeness**: \"Agent confirms the user understands the solution before ending\"");
53
+ lines.push("- **Domain-specific**: \"Agent recommends releasing a wild frog rather than keeping it as a pet\"");
54
+ lines.push("\nAvoid vague criteria like:");
55
+ lines.push('- "Responds correctly" — correct how?');
56
+ lines.push('- "Is helpful" — helpful in what way?');
57
+ lines.push('- "Works well" — not testable');
58
+
59
+ lines.push("\n## Target Types\n");
60
+ lines.push("Scenarios can target different execution backends:");
61
+ lines.push("- **prompt**: Test a prompt template with variable substitution");
62
+ lines.push("- **http**: Test an HTTP endpoint (e.g., a deployed agent API)");
63
+ lines.push("- **code**: Test a code function directly");
64
+
65
+ lines.push("\n## Tips\n");
66
+ lines.push("- Start simple, then layer complexity (add constraints, edge cases)");
67
+ lines.push("- Test edge cases: user changes their mind, gives ambiguous input, makes mistakes");
68
+ lines.push("- Use `fetch_scenario_docs` for the full authoring guide and advanced patterns");
69
+
70
+ return lines.join("\n");
71
+ }
@@ -0,0 +1,106 @@
1
+ import { filterFields } from "../schemas/filter-fields.js";
2
+ import { analyticsMetrics } from "../schemas/analytics-metrics.js";
3
+ import { analyticsGroups } from "../schemas/analytics-groups.js";
4
+
5
+ export type Category =
6
+ | "filters"
7
+ | "metrics"
8
+ | "aggregations"
9
+ | "groups"
10
+ | "all";
11
+
12
+ /**
13
+ * Formats the LangWatch analytics schema into human-readable markdown.
14
+ *
15
+ * Returns documentation for the requested category of schema elements
16
+ * (filter fields, metrics, aggregation types, or group-by options).
17
+ */
18
+ export function formatSchema(category: Category): string {
19
+ const sections: string[] = [];
20
+
21
+ if (category === "filters" || category === "all") {
22
+ sections.push(formatFilters());
23
+ }
24
+ if (category === "metrics" || category === "all") {
25
+ sections.push(formatMetrics());
26
+ }
27
+ if (category === "aggregations" || category === "all") {
28
+ sections.push(formatAggregations());
29
+ }
30
+ if (category === "groups" || category === "all") {
31
+ sections.push(formatGroups());
32
+ }
33
+
34
+ return sections.join("\n\n");
35
+ }
36
+
37
+ function formatFilters(): string {
38
+ const lines = ["## Available Filter Fields", ""];
39
+ lines.push(
40
+ "Use these in the `filters` parameter of `search_traces` and `get_analytics`."
41
+ );
42
+ lines.push('Format: `{ "field_name": ["value1", "value2"] }`');
43
+ lines.push("");
44
+ for (const f of filterFields) {
45
+ lines.push(
46
+ `- **${f.field}**: ${f.description}${f.example ? ` (e.g., \`${f.example}\`)` : ""}`
47
+ );
48
+ }
49
+ return lines.join("\n");
50
+ }
51
+
52
+ function formatMetrics(): string {
53
+ const lines = ["## Available Metrics", ""];
54
+ lines.push(
55
+ "Use these in `get_analytics` as `metric` parameter in `category.name` format."
56
+ );
57
+ lines.push("");
58
+
59
+ const byCategory = new Map<string, typeof analyticsMetrics>();
60
+ for (const m of analyticsMetrics) {
61
+ const list = byCategory.get(m.category) || [];
62
+ list.push(m);
63
+ byCategory.set(m.category, list);
64
+ }
65
+
66
+ for (const [cat, metrics] of byCategory) {
67
+ lines.push(`### ${cat}`);
68
+ for (const m of metrics) {
69
+ lines.push(`- **${cat}.${m.name}** (${m.label}): ${m.description}`);
70
+ lines.push(` Aggregations: ${m.allowedAggregations.join(", ")}`);
71
+ }
72
+ lines.push("");
73
+ }
74
+ return lines.join("\n");
75
+ }
76
+
77
+ function formatAggregations(): string {
78
+ return [
79
+ "## Available Aggregation Types",
80
+ "",
81
+ "- **cardinality**: Count unique values",
82
+ "- **terms**: Distribution/breakdown of values",
83
+ "- **avg**: Average",
84
+ "- **sum**: Sum total",
85
+ "- **min**: Minimum",
86
+ "- **max**: Maximum",
87
+ "- **median**: 50th percentile",
88
+ "- **p90**: 90th percentile",
89
+ "- **p95**: 95th percentile",
90
+ "- **p99**: 99th percentile",
91
+ "",
92
+ "Note: Not all aggregations are available for all metrics. Check the metric's allowed aggregations.",
93
+ ].join("\n");
94
+ }
95
+
96
+ function formatGroups(): string {
97
+ const lines = ["## Available Group-By Options", ""];
98
+ lines.push(
99
+ "Use these in the `groupBy` parameter of `get_analytics`."
100
+ );
101
+ lines.push("");
102
+ for (const g of analyticsGroups) {
103
+ lines.push(`- **${g.name}** (${g.label}): ${g.description}`);
104
+ }
105
+ return lines.join("\n");
106
+ }
@@ -0,0 +1,71 @@
1
+ import { getAnalyticsTimeseries as apiGetAnalytics } from "../langwatch-api.js";
2
+ import { parseRelativeDate } from "../utils/date-parsing.js";
3
+
4
+ /**
5
+ * Handles the get_analytics MCP tool invocation.
6
+ *
7
+ * Queries analytics timeseries from LangWatch and formats the results
8
+ * as an AI-readable markdown table.
9
+ */
10
+ export async function handleGetAnalytics(params: {
11
+ metric: string;
12
+ aggregation?: string;
13
+ startDate?: string;
14
+ endDate?: string;
15
+ timeZone?: string;
16
+ groupBy?: string;
17
+ filters?: Record<string, string[]>;
18
+ }): Promise<string> {
19
+ const now = Date.now();
20
+ const startDate = params.startDate
21
+ ? parseRelativeDate(params.startDate)
22
+ : now - 7 * 86400000;
23
+ const endDate = params.endDate ? parseRelativeDate(params.endDate) : now;
24
+
25
+ // Parse metric format "category.name"
26
+ const [category, name] = params.metric.includes(".")
27
+ ? params.metric.split(".", 2)
28
+ : ["metadata", params.metric];
29
+ const metricKey = `${category}.${name}`;
30
+ const aggregation = params.aggregation ?? "avg";
31
+
32
+ const result = await apiGetAnalytics({
33
+ series: [{ metric: metricKey, aggregation }],
34
+ startDate,
35
+ endDate,
36
+ timeZone: params.timeZone ?? "UTC",
37
+ groupBy: params.groupBy,
38
+ filters: params.filters,
39
+ });
40
+
41
+ const lines: string[] = [];
42
+ lines.push(`# Analytics: ${metricKey} (${aggregation})\n`);
43
+ lines.push(
44
+ `Period: ${new Date(startDate).toISOString().split("T")[0]} to ${new Date(endDate).toISOString().split("T")[0]}`
45
+ );
46
+ if (params.groupBy) lines.push(`Grouped by: ${params.groupBy}`);
47
+ lines.push("");
48
+
49
+ const currentPeriod = result.currentPeriod ?? [];
50
+ if (currentPeriod.length === 0) {
51
+ lines.push("No data available for this period.");
52
+ } else {
53
+ lines.push("| Date | Value |");
54
+ lines.push("|------|-------|");
55
+ for (const bucket of currentPeriod) {
56
+ const date = bucket.date;
57
+ // Find the metric value - it's typically keyed by index
58
+ const value =
59
+ Object.entries(bucket).find(
60
+ ([k]) => k !== "date" && typeof bucket[k] === "number"
61
+ )?.[1] ?? "N/A";
62
+ lines.push(`| ${date} | ${value} |`);
63
+ }
64
+ }
65
+
66
+ lines.push(
67
+ "\n> Tip: Use `discover_schema` to see all available metrics and aggregation types."
68
+ );
69
+
70
+ return lines.join("\n");
71
+ }
@@ -0,0 +1,56 @@
1
+ import { getPrompt as apiGetPrompt } from "../langwatch-api.js";
2
+
3
+ /**
4
+ * Handles the get_prompt MCP tool invocation.
5
+ *
6
+ * Retrieves a specific prompt by ID or handle and formats it as
7
+ * AI-readable markdown, including messages, model config, and version history.
8
+ */
9
+ export async function handleGetPrompt(params: {
10
+ idOrHandle: string;
11
+ version?: number;
12
+ }): Promise<string> {
13
+ const prompt = await apiGetPrompt(params.idOrHandle, params.version);
14
+
15
+ const lines: string[] = [];
16
+ lines.push(
17
+ `# Prompt: ${prompt.name || prompt.handle || prompt.id}\n`
18
+ );
19
+
20
+ if (prompt.handle) lines.push(`**Handle**: ${prompt.handle}`);
21
+ if (prompt.id) lines.push(`**ID**: ${prompt.id}`);
22
+ if (prompt.description) lines.push(`**Description**: ${prompt.description}`);
23
+ if (prompt.latestVersionNumber != null)
24
+ lines.push(`**Latest Version**: v${prompt.latestVersionNumber}`);
25
+
26
+ // Show model config
27
+ const version = prompt.versions?.[0] ?? prompt;
28
+ if (version.model) lines.push(`**Model**: ${version.model}`);
29
+ if (version.modelProvider)
30
+ lines.push(`**Provider**: ${version.modelProvider}`);
31
+
32
+ // Show messages
33
+ const messages = version.messages || prompt.prompt || [];
34
+ if (Array.isArray(messages) && messages.length > 0) {
35
+ lines.push("\n## Messages");
36
+ for (const msg of messages) {
37
+ lines.push(`\n### ${msg.role}`);
38
+ lines.push(msg.content);
39
+ }
40
+ }
41
+
42
+ // Show version history
43
+ if (prompt.versions && prompt.versions.length > 0) {
44
+ lines.push("\n## Version History");
45
+ for (const v of prompt.versions.slice(0, 10)) {
46
+ const versionNum = v.version ?? "?";
47
+ const commitMsg = v.commitMessage || "No message";
48
+ lines.push(`- **v${versionNum}**: ${commitMsg}`);
49
+ }
50
+ if (prompt.versions.length > 10) {
51
+ lines.push(`... and ${prompt.versions.length - 10} more versions`);
52
+ }
53
+ }
54
+
55
+ return lines.join("\n");
56
+ }
@@ -0,0 +1,36 @@
1
+ import { getScenario as apiGetScenario } from "../langwatch-api-scenarios.js";
2
+
3
+ /**
4
+ * Handles the get_scenario MCP tool invocation.
5
+ *
6
+ * Retrieves a specific scenario by ID and formats it as
7
+ * AI-readable markdown or raw JSON.
8
+ */
9
+ export async function handleGetScenario(params: {
10
+ scenarioId: string;
11
+ format?: "digest" | "json";
12
+ }): Promise<string> {
13
+ const scenario = await apiGetScenario(params.scenarioId);
14
+
15
+ if (params.format === "json") {
16
+ return JSON.stringify(scenario, null, 2);
17
+ }
18
+
19
+ const lines: string[] = [];
20
+ lines.push(`# Scenario: ${scenario.name}\n`);
21
+ lines.push(`**ID**: ${scenario.id}`);
22
+ lines.push(`**Situation**: ${scenario.situation}`);
23
+
24
+ if (Array.isArray(scenario.criteria) && scenario.criteria.length > 0) {
25
+ lines.push("\n## Criteria");
26
+ for (const criterion of scenario.criteria) {
27
+ lines.push(`- ${criterion}`);
28
+ }
29
+ }
30
+
31
+ if (Array.isArray(scenario.labels) && scenario.labels.length > 0) {
32
+ lines.push(`\n**Labels**: ${scenario.labels.join(", ")}`);
33
+ }
34
+
35
+ return lines.join("\n");
36
+ }
@@ -0,0 +1,61 @@
1
+ import { getTraceById as apiGetTraceById } from "../langwatch-api.js";
2
+
3
+ /**
4
+ * Handles the get_trace MCP tool invocation.
5
+ *
6
+ * Retrieves a single trace by ID. In digest mode (default), returns the
7
+ * AI-readable formatted digest. In json mode, returns the full raw JSON.
8
+ */
9
+ export async function handleGetTrace(params: {
10
+ traceId: string;
11
+ format?: "digest" | "json";
12
+ }): Promise<string> {
13
+ const format = params.format ?? "digest";
14
+ const result = await apiGetTraceById(params.traceId, format);
15
+
16
+ if (format === "json") {
17
+ return JSON.stringify(result, null, 2);
18
+ }
19
+
20
+ const lines: string[] = [];
21
+ lines.push(`# Trace: ${params.traceId}\n`);
22
+
23
+ if (result.timestamps) {
24
+ lines.push(`**Started**: ${result.timestamps.started_at}`);
25
+ if (result.timestamps.updated_at)
26
+ lines.push(`**Updated**: ${result.timestamps.updated_at}`);
27
+ }
28
+
29
+ if (result.metadata) {
30
+ const meta = result.metadata;
31
+ if (meta.user_id) lines.push(`**User**: ${meta.user_id}`);
32
+ if (meta.thread_id) lines.push(`**Thread**: ${meta.thread_id}`);
33
+ if (meta.customer_id) lines.push(`**Customer**: ${meta.customer_id}`);
34
+ if (meta.labels?.length) lines.push(`**Labels**: ${meta.labels.join(", ")}`);
35
+ }
36
+
37
+ if (result.evaluations && result.evaluations.length > 0) {
38
+ lines.push("\n## Evaluations");
39
+ for (const evaluation of result.evaluations) {
40
+ const status =
41
+ evaluation.passed === true
42
+ ? "PASSED"
43
+ : evaluation.passed === false
44
+ ? "FAILED"
45
+ : "N/A";
46
+ lines.push(
47
+ `- **${evaluation.name || evaluation.evaluator_id}**: ${status}${evaluation.score != null ? ` (score: ${evaluation.score})` : ""}${evaluation.label ? ` [${evaluation.label}]` : ""}`
48
+ );
49
+ }
50
+ }
51
+
52
+ if (result.formatted_trace) {
53
+ lines.push(`\n## Trace Details\n${result.formatted_trace}`);
54
+ }
55
+
56
+ lines.push(
57
+ '\n> Tip: Use `get_trace` with `format: "json"` to get the full raw trace data.'
58
+ );
59
+
60
+ return lines.join("\n");
61
+ }
@@ -0,0 +1,35 @@
1
+ import { listPrompts as apiListPrompts } from "../langwatch-api.js";
2
+
3
+ /**
4
+ * Handles the list_prompts MCP tool invocation.
5
+ *
6
+ * Lists all prompts in the LangWatch project, formatted as an
7
+ * AI-readable markdown table.
8
+ */
9
+ export async function handleListPrompts(): Promise<string> {
10
+ const prompts = await apiListPrompts();
11
+
12
+ if (!Array.isArray(prompts) || prompts.length === 0) {
13
+ return "No prompts found in this project.";
14
+ }
15
+
16
+ const lines: string[] = [];
17
+ lines.push(`# Prompts (${prompts.length} total)\n`);
18
+ lines.push("| Handle | Name | Latest Version | Description |");
19
+ lines.push("|--------|------|----------------|-------------|");
20
+
21
+ for (const p of prompts) {
22
+ const handle = p.handle || p.id || "N/A";
23
+ const name = p.name || "Untitled";
24
+ const versionNum = p.latestVersionNumber ?? p.version;
25
+ const version = versionNum != null ? `v${versionNum}` : "N/A";
26
+ const desc = (p.description || "").slice(0, 60);
27
+ lines.push(`| ${handle} | ${name} | ${version} | ${desc} |`);
28
+ }
29
+
30
+ lines.push(
31
+ "\n> Use `get_prompt` with the handle or ID to see full prompt details."
32
+ );
33
+
34
+ return lines.join("\n");
35
+ }
@@ -0,0 +1,47 @@
1
+ import { listScenarios as apiListScenarios } from "../langwatch-api-scenarios.js";
2
+
3
+ /**
4
+ * Handles the list_scenarios MCP tool invocation.
5
+ *
6
+ * Lists all scenarios in the LangWatch project, formatted as an
7
+ * AI-readable digest or raw JSON.
8
+ */
9
+ export async function handleListScenarios(params: {
10
+ format?: "digest" | "json";
11
+ }): Promise<string> {
12
+ const scenarios = await apiListScenarios();
13
+
14
+ if (params.format === "json") {
15
+ return JSON.stringify(scenarios, null, 2);
16
+ }
17
+
18
+ if (!Array.isArray(scenarios) || scenarios.length === 0) {
19
+ return "No scenarios found in this project.\n\n> Tip: Use `create_scenario` to create your first scenario.";
20
+ }
21
+
22
+ const lines: string[] = [];
23
+ lines.push(`# Scenarios (${scenarios.length} total)\n`);
24
+
25
+ for (const s of scenarios) {
26
+ lines.push(`## ${s.name}`);
27
+ lines.push(`**ID**: ${s.id}`);
28
+ const preview =
29
+ s.situation && s.situation.length > 60
30
+ ? s.situation.slice(0, 60) + "..."
31
+ : s.situation;
32
+ lines.push(`**Situation**: ${preview}`);
33
+ lines.push(
34
+ `**Criteria**: ${Array.isArray(s.criteria) ? s.criteria.length : 0} criteria`,
35
+ );
36
+ if (Array.isArray(s.labels) && s.labels.length > 0) {
37
+ lines.push(`**Labels**: ${s.labels.join(", ")}`);
38
+ }
39
+ lines.push("");
40
+ }
41
+
42
+ lines.push(
43
+ "> Use `get_scenario` with the ID to see full scenario details.",
44
+ );
45
+
46
+ return lines.join("\n");
47
+ }
@@ -0,0 +1,91 @@
1
+ import { searchTraces as apiSearchTraces } from "../langwatch-api.js";
2
+ import { parseRelativeDate } from "../utils/date-parsing.js";
3
+
4
+ /**
5
+ * Handles the search_traces MCP tool invocation.
6
+ *
7
+ * Searches LangWatch traces with optional filters, text query, and date range.
8
+ * In digest mode (default), returns AI-readable formatted digests per trace.
9
+ * In json mode, returns the full raw JSON.
10
+ */
11
+ export async function handleSearchTraces(params: {
12
+ query?: string;
13
+ filters?: Record<string, string[]>;
14
+ startDate?: string;
15
+ endDate?: string;
16
+ pageSize?: number;
17
+ scrollId?: string;
18
+ format?: "digest" | "json";
19
+ }): Promise<string> {
20
+ const now = Date.now();
21
+ const startDate = params.startDate
22
+ ? parseRelativeDate(params.startDate)
23
+ : now - 86400000;
24
+ const endDate = params.endDate ? parseRelativeDate(params.endDate) : now;
25
+ const format = params.format ?? "digest";
26
+
27
+ const result = await apiSearchTraces({
28
+ query: params.query,
29
+ filters: params.filters,
30
+ startDate,
31
+ endDate,
32
+ pageSize: params.pageSize ?? 25,
33
+ scrollId: params.scrollId,
34
+ format,
35
+ });
36
+
37
+ const traces = result.traces ?? [];
38
+ if (traces.length === 0) {
39
+ return "No traces found matching your query.";
40
+ }
41
+
42
+ if (format === "json") {
43
+ return JSON.stringify(result, null, 2);
44
+ }
45
+
46
+ const lines: string[] = [];
47
+ lines.push(
48
+ `Found ${result.pagination?.totalHits ?? traces.length} traces:\n`
49
+ );
50
+
51
+ for (const trace of traces) {
52
+ lines.push(`### Trace: ${trace.trace_id}`);
53
+
54
+ if (trace.formatted_trace) {
55
+ lines.push(trace.formatted_trace);
56
+ } else {
57
+ const inputStr = trace.input?.value
58
+ ? String(trace.input.value)
59
+ : "N/A";
60
+ const outputStr = trace.output?.value
61
+ ? String(trace.output.value)
62
+ : "N/A";
63
+ lines.push(
64
+ `- **Input**: ${inputStr.slice(0, 100)}${inputStr.length > 100 ? "..." : ""}`
65
+ );
66
+ lines.push(
67
+ `- **Output**: ${outputStr.slice(0, 100)}${outputStr.length > 100 ? "..." : ""}`
68
+ );
69
+ }
70
+
71
+ if (trace.timestamps) {
72
+ lines.push(`- **Time**: ${trace.timestamps.started_at || "N/A"}`);
73
+ }
74
+ if (trace.error) {
75
+ lines.push(`- **Error**: ${JSON.stringify(trace.error)}`);
76
+ }
77
+ lines.push("");
78
+ }
79
+
80
+ if (result.pagination?.scrollId) {
81
+ lines.push(
82
+ `\n**More results available.** Use scrollId: "${result.pagination.scrollId}" to get next page.`
83
+ );
84
+ }
85
+
86
+ lines.push(
87
+ '\n> Tip: Use `get_trace` with a trace_id for full details. Use `search_traces` with `format: "json"` for raw data. Use `discover_schema` to see available filter fields.'
88
+ );
89
+
90
+ return lines.join("\n");
91
+ }
@@ -0,0 +1,44 @@
1
+ import {
2
+ updatePrompt as apiUpdatePrompt,
3
+ createPromptVersion as apiCreateVersion,
4
+ } from "../langwatch-api.js";
5
+ import type { PromptMutationResponse } from "../langwatch-api.js";
6
+
7
+ /**
8
+ * Handles the update_prompt MCP tool invocation.
9
+ *
10
+ * Updates an existing prompt or creates a new version, depending on the
11
+ * `createVersion` flag. Returns a confirmation with the updated details.
12
+ */
13
+ export async function handleUpdatePrompt(params: {
14
+ idOrHandle: string;
15
+ messages?: Array<{ role: string; content: string }>;
16
+ model?: string;
17
+ modelProvider?: string;
18
+ commitMessage?: string;
19
+ createVersion?: boolean;
20
+ }): Promise<string> {
21
+ const { idOrHandle, createVersion, ...data } = params;
22
+
23
+ let result: PromptMutationResponse;
24
+ if (createVersion) {
25
+ result = await apiCreateVersion(idOrHandle, data);
26
+ } else {
27
+ result = await apiUpdatePrompt(idOrHandle, data);
28
+ }
29
+
30
+ const lines: string[] = [];
31
+ lines.push(
32
+ createVersion
33
+ ? "New version created successfully!\n"
34
+ : "Prompt updated successfully!\n"
35
+ );
36
+ if (result.id) lines.push(`**ID**: ${result.id}`);
37
+ if (result.handle) lines.push(`**Handle**: ${result.handle}`);
38
+ if (result.latestVersionNumber != null)
39
+ lines.push(`**Version**: v${result.latestVersionNumber}`);
40
+ if (params.commitMessage)
41
+ lines.push(`**Commit**: ${params.commitMessage}`);
42
+
43
+ return lines.join("\n");
44
+ }
@@ -0,0 +1,32 @@
1
+ import { updateScenario as apiUpdateScenario } from "../langwatch-api-scenarios.js";
2
+
3
+ /**
4
+ * Handles the update_scenario MCP tool invocation.
5
+ *
6
+ * Updates an existing scenario and returns a confirmation
7
+ * with the updated details.
8
+ */
9
+ export async function handleUpdateScenario(params: {
10
+ scenarioId: string;
11
+ name?: string;
12
+ situation?: string;
13
+ criteria?: string[];
14
+ labels?: string[];
15
+ }): Promise<string> {
16
+ const { scenarioId, ...data } = params;
17
+ const result = await apiUpdateScenario({ id: scenarioId, ...data });
18
+
19
+ const lines: string[] = [];
20
+ lines.push("Scenario updated successfully!\n");
21
+ lines.push(`**ID**: ${result.id}`);
22
+ lines.push(`**Name**: ${result.name}`);
23
+ if (result.situation) lines.push(`**Situation**: ${result.situation}`);
24
+ if (Array.isArray(result.criteria) && result.criteria.length > 0) {
25
+ lines.push(`**Criteria**: ${result.criteria.length} criteria`);
26
+ }
27
+ if (Array.isArray(result.labels) && result.labels.length > 0) {
28
+ lines.push(`**Labels**: ${result.labels.join(", ")}`);
29
+ }
30
+
31
+ return lines.join("\n");
32
+ }