@langwatch/mcp-server 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/CHANGELOG.md +20 -0
  2. package/README.md +97 -25
  3. package/dist/chunk-AAQNA53E.js +28 -0
  4. package/dist/chunk-AAQNA53E.js.map +1 -0
  5. package/dist/chunk-HOPTUDCZ.js +90 -0
  6. package/dist/chunk-HOPTUDCZ.js.map +1 -0
  7. package/dist/chunk-ZXKLPC2E.js +27 -0
  8. package/dist/chunk-ZXKLPC2E.js.map +1 -0
  9. package/dist/config-FIQWQRUB.js +11 -0
  10. package/dist/config-FIQWQRUB.js.map +1 -0
  11. package/dist/create-prompt-UBC537BJ.js +22 -0
  12. package/dist/create-prompt-UBC537BJ.js.map +1 -0
  13. package/dist/discover-schema-3T52ORPB.js +446 -0
  14. package/dist/discover-schema-3T52ORPB.js.map +1 -0
  15. package/dist/get-analytics-3IFTN6MY.js +55 -0
  16. package/dist/get-analytics-3IFTN6MY.js.map +1 -0
  17. package/dist/get-prompt-2ZB5B3QC.js +48 -0
  18. package/dist/get-prompt-2ZB5B3QC.js.map +1 -0
  19. package/dist/get-trace-7IXKKCJJ.js +50 -0
  20. package/dist/get-trace-7IXKKCJJ.js.map +1 -0
  21. package/dist/index.d.ts +2 -0
  22. package/dist/index.js +20003 -0
  23. package/dist/index.js.map +1 -0
  24. package/dist/list-prompts-J72LTP7Z.js +33 -0
  25. package/dist/list-prompts-J72LTP7Z.js.map +1 -0
  26. package/dist/search-traces-RW2NDHN5.js +72 -0
  27. package/dist/search-traces-RW2NDHN5.js.map +1 -0
  28. package/dist/update-prompt-G6HHZSUM.js +31 -0
  29. package/dist/update-prompt-G6HHZSUM.js.map +1 -0
  30. package/package.json +8 -8
  31. package/src/__tests__/config.unit.test.ts +89 -0
  32. package/src/__tests__/date-parsing.unit.test.ts +78 -0
  33. package/src/__tests__/discover-schema.unit.test.ts +118 -0
  34. package/src/__tests__/integration.integration.test.ts +313 -0
  35. package/src/__tests__/langwatch-api.unit.test.ts +309 -0
  36. package/src/__tests__/schemas.unit.test.ts +85 -0
  37. package/src/__tests__/tools.unit.test.ts +729 -0
  38. package/src/config.ts +31 -0
  39. package/src/index.ts +254 -0
  40. package/src/langwatch-api.ts +265 -0
  41. package/src/schemas/analytics-groups.ts +78 -0
  42. package/src/schemas/analytics-metrics.ts +179 -0
  43. package/src/schemas/filter-fields.ts +119 -0
  44. package/src/schemas/index.ts +3 -0
  45. package/src/tools/create-prompt.ts +29 -0
  46. package/src/tools/discover-schema.ts +106 -0
  47. package/src/tools/get-analytics.ts +71 -0
  48. package/src/tools/get-prompt.ts +56 -0
  49. package/src/tools/get-trace.ts +61 -0
  50. package/src/tools/list-prompts.ts +35 -0
  51. package/src/tools/search-traces.ts +91 -0
  52. package/src/tools/update-prompt.ts +44 -0
  53. package/src/utils/date-parsing.ts +31 -0
  54. package/tests/evaluations.ipynb +634 -634
  55. package/tests/scenario-openai.test.ts +3 -1
@@ -0,0 +1,179 @@
1
+ export interface MetricInfo {
2
+ category: string;
3
+ name: string;
4
+ label: string;
5
+ allowedAggregations: string[];
6
+ description: string;
7
+ }
8
+
9
+ export const analyticsMetrics: MetricInfo[] = [
10
+ // metadata
11
+ {
12
+ category: "metadata",
13
+ name: "trace_id",
14
+ label: "Traces",
15
+ allowedAggregations: ["cardinality"],
16
+ description: "Count of unique traces",
17
+ },
18
+ {
19
+ category: "metadata",
20
+ name: "user_id",
21
+ label: "Users",
22
+ allowedAggregations: ["cardinality"],
23
+ description: "Count of unique users",
24
+ },
25
+ {
26
+ category: "metadata",
27
+ name: "thread_id",
28
+ label: "Threads",
29
+ allowedAggregations: ["cardinality"],
30
+ description: "Count of unique conversation threads",
31
+ },
32
+ {
33
+ category: "metadata",
34
+ name: "span_type",
35
+ label: "Span Type",
36
+ allowedAggregations: ["cardinality"],
37
+ description: "Count of spans, optionally filtered by span type",
38
+ },
39
+ // sentiment
40
+ {
41
+ category: "sentiment",
42
+ name: "input_sentiment",
43
+ label: "Input Sentiment Score",
44
+ allowedAggregations: ["avg", "sum", "min", "max", "median", "p99", "p95", "p90"],
45
+ description: "Sentiment analysis score of inputs",
46
+ },
47
+ {
48
+ category: "sentiment",
49
+ name: "thumbs_up_down",
50
+ label: "Thumbs Up/Down Score",
51
+ allowedAggregations: [
52
+ "terms",
53
+ "cardinality",
54
+ "avg",
55
+ "sum",
56
+ "min",
57
+ "max",
58
+ "median",
59
+ "p99",
60
+ "p95",
61
+ "p90",
62
+ ],
63
+ description: "User feedback score (-1 to 1)",
64
+ },
65
+ // performance
66
+ {
67
+ category: "performance",
68
+ name: "completion_time",
69
+ label: "Completion Time",
70
+ allowedAggregations: ["avg", "sum", "min", "max", "median", "p99", "p95", "p90"],
71
+ description: "Time to complete the trace (ms)",
72
+ },
73
+ {
74
+ category: "performance",
75
+ name: "first_token",
76
+ label: "Time to First Token",
77
+ allowedAggregations: ["avg", "sum", "min", "max", "median", "p99", "p95", "p90"],
78
+ description: "Time to first token (ms)",
79
+ },
80
+ {
81
+ category: "performance",
82
+ name: "total_cost",
83
+ label: "Total Cost",
84
+ allowedAggregations: ["avg", "sum", "min", "max", "median", "p99", "p95", "p90"],
85
+ description: "Cost per trace in USD",
86
+ },
87
+ {
88
+ category: "performance",
89
+ name: "prompt_tokens",
90
+ label: "Prompt Tokens",
91
+ allowedAggregations: ["avg", "sum", "min", "max", "median", "p99", "p95", "p90"],
92
+ description: "Input token count",
93
+ },
94
+ {
95
+ category: "performance",
96
+ name: "completion_tokens",
97
+ label: "Completion Tokens",
98
+ allowedAggregations: ["avg", "sum", "min", "max", "median", "p99", "p95", "p90"],
99
+ description: "Output token count",
100
+ },
101
+ {
102
+ category: "performance",
103
+ name: "total_tokens",
104
+ label: "Total Tokens",
105
+ allowedAggregations: ["avg", "sum", "min", "max", "median", "p99", "p95", "p90"],
106
+ description: "Total token count (input + output)",
107
+ },
108
+ {
109
+ category: "performance",
110
+ name: "tokens_per_second",
111
+ label: "Tokens per Second",
112
+ allowedAggregations: ["avg", "sum", "min", "max", "median", "p99", "p95", "p90"],
113
+ description: "Token generation speed",
114
+ },
115
+ // events
116
+ {
117
+ category: "events",
118
+ name: "event_type",
119
+ label: "Event Type",
120
+ allowedAggregations: ["cardinality"],
121
+ description: "Count of events, optionally filtered by event type",
122
+ },
123
+ {
124
+ category: "events",
125
+ name: "event_score",
126
+ label: "Event Score",
127
+ allowedAggregations: [
128
+ "terms",
129
+ "avg",
130
+ "sum",
131
+ "min",
132
+ "max",
133
+ "median",
134
+ "p99",
135
+ "p95",
136
+ "p90",
137
+ ],
138
+ description: "Numeric score from events (requires event_type key and metrics key)",
139
+ },
140
+ {
141
+ category: "events",
142
+ name: "event_details",
143
+ label: "Event Details",
144
+ allowedAggregations: ["cardinality"],
145
+ description:
146
+ "Event detail key/value distribution (requires event_type key and details key)",
147
+ },
148
+ // evaluations
149
+ {
150
+ category: "evaluations",
151
+ name: "evaluation_score",
152
+ label: "Evaluation Score",
153
+ allowedAggregations: ["avg", "sum", "min", "max", "median", "p99", "p95", "p90"],
154
+ description: "Numeric evaluation score (requires evaluator_id key)",
155
+ },
156
+ {
157
+ category: "evaluations",
158
+ name: "evaluation_pass_rate",
159
+ label: "Evaluation Pass Rate",
160
+ allowedAggregations: ["avg", "sum", "min", "max", "median", "p99", "p95", "p90"],
161
+ description:
162
+ "Percentage of traces passing evaluation (requires evaluator_id key)",
163
+ },
164
+ {
165
+ category: "evaluations",
166
+ name: "evaluation_runs",
167
+ label: "Evaluation Runs",
168
+ allowedAggregations: ["cardinality"],
169
+ description: "Count of evaluation executions",
170
+ },
171
+ // threads
172
+ {
173
+ category: "threads",
174
+ name: "average_duration_per_thread",
175
+ label: "Thread Duration",
176
+ allowedAggregations: ["avg"],
177
+ description: "Average duration of conversation threads (ms)",
178
+ },
179
+ ];
@@ -0,0 +1,119 @@
1
+ export interface FilterFieldInfo {
2
+ field: string;
3
+ description: string;
4
+ example?: string;
5
+ }
6
+
7
+ export const filterFields: FilterFieldInfo[] = [
8
+ {
9
+ field: "topics.topics",
10
+ description: "Main topic classification of the trace",
11
+ example: "billing",
12
+ },
13
+ {
14
+ field: "topics.subtopics",
15
+ description: "Subtopic classification",
16
+ example: "refund-request",
17
+ },
18
+ {
19
+ field: "metadata.user_id",
20
+ description: "User ID from trace metadata",
21
+ example: "user-123",
22
+ },
23
+ {
24
+ field: "metadata.thread_id",
25
+ description: "Conversation thread ID",
26
+ example: "thread-456",
27
+ },
28
+ {
29
+ field: "metadata.customer_id",
30
+ description: "Customer/organization ID",
31
+ example: "customer-789",
32
+ },
33
+ {
34
+ field: "metadata.labels",
35
+ description: "Custom labels attached to traces",
36
+ example: "production",
37
+ },
38
+ {
39
+ field: "metadata.key",
40
+ description: "Custom metadata key",
41
+ example: "environment",
42
+ },
43
+ {
44
+ field: "metadata.value",
45
+ description: "Custom metadata value (used with metadata.key)",
46
+ example: "staging",
47
+ },
48
+ {
49
+ field: "metadata.prompt_ids",
50
+ description: "Prompt IDs used in the trace",
51
+ },
52
+ {
53
+ field: "traces.error",
54
+ description: "Whether the trace has errors",
55
+ example: "true",
56
+ },
57
+ {
58
+ field: "spans.type",
59
+ description: "Span type (llm, tool, agent, chain, rag)",
60
+ example: "llm",
61
+ },
62
+ {
63
+ field: "spans.model",
64
+ description: "LLM model name used in spans",
65
+ example: "gpt-4o",
66
+ },
67
+ {
68
+ field: "evaluations.evaluator_id",
69
+ description: "Evaluator that ran on the trace",
70
+ },
71
+ {
72
+ field: "evaluations.evaluator_id.guardrails_only",
73
+ description: "Evaluator ID filtered to guardrails only",
74
+ },
75
+ {
76
+ field: "evaluations.passed",
77
+ description: "Whether evaluations passed",
78
+ example: "true",
79
+ },
80
+ {
81
+ field: "evaluations.score",
82
+ description: "Evaluation score (numeric)",
83
+ },
84
+ {
85
+ field: "evaluations.state",
86
+ description: "Evaluation state (processed, error, skipped)",
87
+ },
88
+ {
89
+ field: "evaluations.label",
90
+ description: "Evaluation label result",
91
+ },
92
+ {
93
+ field: "events.event_type",
94
+ description: "Type of event (thumbs_up_down, custom)",
95
+ example: "thumbs_up_down",
96
+ },
97
+ {
98
+ field: "events.metrics.key",
99
+ description: "Event metric key",
100
+ },
101
+ {
102
+ field: "events.metrics.value",
103
+ description: "Event metric value (numeric)",
104
+ },
105
+ {
106
+ field: "events.event_details.key",
107
+ description: "Event detail key",
108
+ },
109
+ {
110
+ field: "annotations.hasAnnotation",
111
+ description: "Whether trace has human annotations",
112
+ example: "true",
113
+ },
114
+ {
115
+ field: "sentiment.input_sentiment",
116
+ description: "Detected sentiment of input",
117
+ example: "positive",
118
+ },
119
+ ];
@@ -0,0 +1,3 @@
1
+ export { filterFields, type FilterFieldInfo } from "./filter-fields.js";
2
+ export { analyticsMetrics, type MetricInfo } from "./analytics-metrics.js";
3
+ export { analyticsGroups, type GroupByInfo } from "./analytics-groups.js";
@@ -0,0 +1,29 @@
1
+ import { createPrompt as apiCreatePrompt } from "../langwatch-api.js";
2
+
3
+ /**
4
+ * Handles the create_prompt MCP tool invocation.
5
+ *
6
+ * Creates a new prompt in the LangWatch project and returns a
7
+ * confirmation with the created prompt's details.
8
+ */
9
+ export async function handleCreatePrompt(params: {
10
+ name: string;
11
+ handle?: string;
12
+ messages: Array<{ role: string; content: string }>;
13
+ model: string;
14
+ modelProvider: string;
15
+ description?: string;
16
+ }): Promise<string> {
17
+ const result = await apiCreatePrompt(params);
18
+
19
+ const lines: string[] = [];
20
+ lines.push("Prompt created successfully!\n");
21
+ if (result.id) lines.push(`**ID**: ${result.id}`);
22
+ if (result.handle) lines.push(`**Handle**: ${result.handle}`);
23
+ lines.push(`**Name**: ${result.name || params.name}`);
24
+ lines.push(`**Model**: ${params.model} (${params.modelProvider})`);
25
+ if (result.latestVersionNumber != null)
26
+ lines.push(`**Version**: v${result.latestVersionNumber}`);
27
+
28
+ return lines.join("\n");
29
+ }
@@ -0,0 +1,106 @@
1
+ import { filterFields } from "../schemas/filter-fields.js";
2
+ import { analyticsMetrics } from "../schemas/analytics-metrics.js";
3
+ import { analyticsGroups } from "../schemas/analytics-groups.js";
4
+
5
+ export type Category =
6
+ | "filters"
7
+ | "metrics"
8
+ | "aggregations"
9
+ | "groups"
10
+ | "all";
11
+
12
+ /**
13
+ * Formats the LangWatch analytics schema into human-readable markdown.
14
+ *
15
+ * Returns documentation for the requested category of schema elements
16
+ * (filter fields, metrics, aggregation types, or group-by options).
17
+ */
18
+ export function formatSchema(category: Category): string {
19
+ const sections: string[] = [];
20
+
21
+ if (category === "filters" || category === "all") {
22
+ sections.push(formatFilters());
23
+ }
24
+ if (category === "metrics" || category === "all") {
25
+ sections.push(formatMetrics());
26
+ }
27
+ if (category === "aggregations" || category === "all") {
28
+ sections.push(formatAggregations());
29
+ }
30
+ if (category === "groups" || category === "all") {
31
+ sections.push(formatGroups());
32
+ }
33
+
34
+ return sections.join("\n\n");
35
+ }
36
+
37
+ function formatFilters(): string {
38
+ const lines = ["## Available Filter Fields", ""];
39
+ lines.push(
40
+ "Use these in the `filters` parameter of `search_traces` and `get_analytics`."
41
+ );
42
+ lines.push('Format: `{ "field_name": ["value1", "value2"] }`');
43
+ lines.push("");
44
+ for (const f of filterFields) {
45
+ lines.push(
46
+ `- **${f.field}**: ${f.description}${f.example ? ` (e.g., \`${f.example}\`)` : ""}`
47
+ );
48
+ }
49
+ return lines.join("\n");
50
+ }
51
+
52
+ function formatMetrics(): string {
53
+ const lines = ["## Available Metrics", ""];
54
+ lines.push(
55
+ "Use these in `get_analytics` as `metric` parameter in `category.name` format."
56
+ );
57
+ lines.push("");
58
+
59
+ const byCategory = new Map<string, typeof analyticsMetrics>();
60
+ for (const m of analyticsMetrics) {
61
+ const list = byCategory.get(m.category) || [];
62
+ list.push(m);
63
+ byCategory.set(m.category, list);
64
+ }
65
+
66
+ for (const [cat, metrics] of byCategory) {
67
+ lines.push(`### ${cat}`);
68
+ for (const m of metrics) {
69
+ lines.push(`- **${cat}.${m.name}** (${m.label}): ${m.description}`);
70
+ lines.push(` Aggregations: ${m.allowedAggregations.join(", ")}`);
71
+ }
72
+ lines.push("");
73
+ }
74
+ return lines.join("\n");
75
+ }
76
+
77
+ function formatAggregations(): string {
78
+ return [
79
+ "## Available Aggregation Types",
80
+ "",
81
+ "- **cardinality**: Count unique values",
82
+ "- **terms**: Distribution/breakdown of values",
83
+ "- **avg**: Average",
84
+ "- **sum**: Sum total",
85
+ "- **min**: Minimum",
86
+ "- **max**: Maximum",
87
+ "- **median**: 50th percentile",
88
+ "- **p90**: 90th percentile",
89
+ "- **p95**: 95th percentile",
90
+ "- **p99**: 99th percentile",
91
+ "",
92
+ "Note: Not all aggregations are available for all metrics. Check the metric's allowed aggregations.",
93
+ ].join("\n");
94
+ }
95
+
96
+ function formatGroups(): string {
97
+ const lines = ["## Available Group-By Options", ""];
98
+ lines.push(
99
+ "Use these in the `groupBy` parameter of `get_analytics`."
100
+ );
101
+ lines.push("");
102
+ for (const g of analyticsGroups) {
103
+ lines.push(`- **${g.name}** (${g.label}): ${g.description}`);
104
+ }
105
+ return lines.join("\n");
106
+ }
@@ -0,0 +1,71 @@
1
+ import { getAnalyticsTimeseries as apiGetAnalytics } from "../langwatch-api.js";
2
+ import { parseRelativeDate } from "../utils/date-parsing.js";
3
+
4
+ /**
5
+ * Handles the get_analytics MCP tool invocation.
6
+ *
7
+ * Queries analytics timeseries from LangWatch and formats the results
8
+ * as an AI-readable markdown table.
9
+ */
10
+ export async function handleGetAnalytics(params: {
11
+ metric: string;
12
+ aggregation?: string;
13
+ startDate?: string;
14
+ endDate?: string;
15
+ timeZone?: string;
16
+ groupBy?: string;
17
+ filters?: Record<string, string[]>;
18
+ }): Promise<string> {
19
+ const now = Date.now();
20
+ const startDate = params.startDate
21
+ ? parseRelativeDate(params.startDate)
22
+ : now - 7 * 86400000;
23
+ const endDate = params.endDate ? parseRelativeDate(params.endDate) : now;
24
+
25
+ // Parse metric format "category.name"
26
+ const [category, name] = params.metric.includes(".")
27
+ ? params.metric.split(".", 2)
28
+ : ["metadata", params.metric];
29
+ const metricKey = `${category}.${name}`;
30
+ const aggregation = params.aggregation ?? "avg";
31
+
32
+ const result = await apiGetAnalytics({
33
+ series: [{ metric: metricKey, aggregation }],
34
+ startDate,
35
+ endDate,
36
+ timeZone: params.timeZone ?? "UTC",
37
+ groupBy: params.groupBy,
38
+ filters: params.filters,
39
+ });
40
+
41
+ const lines: string[] = [];
42
+ lines.push(`# Analytics: ${metricKey} (${aggregation})\n`);
43
+ lines.push(
44
+ `Period: ${new Date(startDate).toISOString().split("T")[0]} to ${new Date(endDate).toISOString().split("T")[0]}`
45
+ );
46
+ if (params.groupBy) lines.push(`Grouped by: ${params.groupBy}`);
47
+ lines.push("");
48
+
49
+ const currentPeriod = result.currentPeriod ?? [];
50
+ if (currentPeriod.length === 0) {
51
+ lines.push("No data available for this period.");
52
+ } else {
53
+ lines.push("| Date | Value |");
54
+ lines.push("|------|-------|");
55
+ for (const bucket of currentPeriod) {
56
+ const date = bucket.date;
57
+ // Find the metric value - it's typically keyed by index
58
+ const value =
59
+ Object.entries(bucket).find(
60
+ ([k]) => k !== "date" && typeof bucket[k] === "number"
61
+ )?.[1] ?? "N/A";
62
+ lines.push(`| ${date} | ${value} |`);
63
+ }
64
+ }
65
+
66
+ lines.push(
67
+ "\n> Tip: Use `discover_schema` to see all available metrics and aggregation types."
68
+ );
69
+
70
+ return lines.join("\n");
71
+ }
@@ -0,0 +1,56 @@
1
+ import { getPrompt as apiGetPrompt } from "../langwatch-api.js";
2
+
3
+ /**
4
+ * Handles the get_prompt MCP tool invocation.
5
+ *
6
+ * Retrieves a specific prompt by ID or handle and formats it as
7
+ * AI-readable markdown, including messages, model config, and version history.
8
+ */
9
+ export async function handleGetPrompt(params: {
10
+ idOrHandle: string;
11
+ version?: number;
12
+ }): Promise<string> {
13
+ const prompt = await apiGetPrompt(params.idOrHandle, params.version);
14
+
15
+ const lines: string[] = [];
16
+ lines.push(
17
+ `# Prompt: ${prompt.name || prompt.handle || prompt.id}\n`
18
+ );
19
+
20
+ if (prompt.handle) lines.push(`**Handle**: ${prompt.handle}`);
21
+ if (prompt.id) lines.push(`**ID**: ${prompt.id}`);
22
+ if (prompt.description) lines.push(`**Description**: ${prompt.description}`);
23
+ if (prompt.latestVersionNumber != null)
24
+ lines.push(`**Latest Version**: v${prompt.latestVersionNumber}`);
25
+
26
+ // Show model config
27
+ const version = prompt.versions?.[0] ?? prompt;
28
+ if (version.model) lines.push(`**Model**: ${version.model}`);
29
+ if (version.modelProvider)
30
+ lines.push(`**Provider**: ${version.modelProvider}`);
31
+
32
+ // Show messages
33
+ const messages = version.messages || prompt.prompt || [];
34
+ if (Array.isArray(messages) && messages.length > 0) {
35
+ lines.push("\n## Messages");
36
+ for (const msg of messages) {
37
+ lines.push(`\n### ${msg.role}`);
38
+ lines.push(msg.content);
39
+ }
40
+ }
41
+
42
+ // Show version history
43
+ if (prompt.versions && prompt.versions.length > 0) {
44
+ lines.push("\n## Version History");
45
+ for (const v of prompt.versions.slice(0, 10)) {
46
+ const versionNum = v.version ?? "?";
47
+ const commitMsg = v.commitMessage || "No message";
48
+ lines.push(`- **v${versionNum}**: ${commitMsg}`);
49
+ }
50
+ if (prompt.versions.length > 10) {
51
+ lines.push(`... and ${prompt.versions.length - 10} more versions`);
52
+ }
53
+ }
54
+
55
+ return lines.join("\n");
56
+ }
@@ -0,0 +1,61 @@
1
+ import { getTraceById as apiGetTraceById } from "../langwatch-api.js";
2
+
3
+ /**
4
+ * Handles the get_trace MCP tool invocation.
5
+ *
6
+ * Retrieves a single trace by ID. In digest mode (default), returns the
7
+ * AI-readable formatted digest. In json mode, returns the full raw JSON.
8
+ */
9
+ export async function handleGetTrace(params: {
10
+ traceId: string;
11
+ format?: "digest" | "json";
12
+ }): Promise<string> {
13
+ const format = params.format ?? "digest";
14
+ const result = await apiGetTraceById(params.traceId, format);
15
+
16
+ if (format === "json") {
17
+ return JSON.stringify(result, null, 2);
18
+ }
19
+
20
+ const lines: string[] = [];
21
+ lines.push(`# Trace: ${params.traceId}\n`);
22
+
23
+ if (result.timestamps) {
24
+ lines.push(`**Started**: ${result.timestamps.started_at}`);
25
+ if (result.timestamps.updated_at)
26
+ lines.push(`**Updated**: ${result.timestamps.updated_at}`);
27
+ }
28
+
29
+ if (result.metadata) {
30
+ const meta = result.metadata;
31
+ if (meta.user_id) lines.push(`**User**: ${meta.user_id}`);
32
+ if (meta.thread_id) lines.push(`**Thread**: ${meta.thread_id}`);
33
+ if (meta.customer_id) lines.push(`**Customer**: ${meta.customer_id}`);
34
+ if (meta.labels?.length) lines.push(`**Labels**: ${meta.labels.join(", ")}`);
35
+ }
36
+
37
+ if (result.evaluations && result.evaluations.length > 0) {
38
+ lines.push("\n## Evaluations");
39
+ for (const evaluation of result.evaluations) {
40
+ const status =
41
+ evaluation.passed === true
42
+ ? "PASSED"
43
+ : evaluation.passed === false
44
+ ? "FAILED"
45
+ : "N/A";
46
+ lines.push(
47
+ `- **${evaluation.name || evaluation.evaluator_id}**: ${status}${evaluation.score != null ? ` (score: ${evaluation.score})` : ""}${evaluation.label ? ` [${evaluation.label}]` : ""}`
48
+ );
49
+ }
50
+ }
51
+
52
+ if (result.formatted_trace) {
53
+ lines.push(`\n## Trace Details\n${result.formatted_trace}`);
54
+ }
55
+
56
+ lines.push(
57
+ '\n> Tip: Use `get_trace` with `format: "json"` to get the full raw trace data.'
58
+ );
59
+
60
+ return lines.join("\n");
61
+ }
@@ -0,0 +1,35 @@
1
+ import { listPrompts as apiListPrompts } from "../langwatch-api.js";
2
+
3
+ /**
4
+ * Handles the list_prompts MCP tool invocation.
5
+ *
6
+ * Lists all prompts in the LangWatch project, formatted as an
7
+ * AI-readable markdown table.
8
+ */
9
+ export async function handleListPrompts(): Promise<string> {
10
+ const prompts = await apiListPrompts();
11
+
12
+ if (!Array.isArray(prompts) || prompts.length === 0) {
13
+ return "No prompts found in this project.";
14
+ }
15
+
16
+ const lines: string[] = [];
17
+ lines.push(`# Prompts (${prompts.length} total)\n`);
18
+ lines.push("| Handle | Name | Latest Version | Description |");
19
+ lines.push("|--------|------|----------------|-------------|");
20
+
21
+ for (const p of prompts) {
22
+ const handle = p.handle || p.id || "N/A";
23
+ const name = p.name || "Untitled";
24
+ const versionNum = p.latestVersionNumber ?? p.version;
25
+ const version = versionNum != null ? `v${versionNum}` : "N/A";
26
+ const desc = (p.description || "").slice(0, 60);
27
+ lines.push(`| ${handle} | ${name} | ${version} | ${desc} |`);
28
+ }
29
+
30
+ lines.push(
31
+ "\n> Use `get_prompt` with the handle or ID to see full prompt details."
32
+ );
33
+
34
+ return lines.join("\n");
35
+ }