@langwatch/mcp-server 0.3.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/CHANGELOG.md +32 -0
  2. package/README.md +97 -25
  3. package/dist/archive-scenario-GAE4XVFM.js +19 -0
  4. package/dist/archive-scenario-GAE4XVFM.js.map +1 -0
  5. package/dist/chunk-AAQNA53E.js +28 -0
  6. package/dist/chunk-AAQNA53E.js.map +1 -0
  7. package/dist/chunk-JVWDWL3J.js +91 -0
  8. package/dist/chunk-JVWDWL3J.js.map +1 -0
  9. package/dist/chunk-K2YFPOSD.js +40 -0
  10. package/dist/chunk-K2YFPOSD.js.map +1 -0
  11. package/dist/chunk-ZXKLPC2E.js +27 -0
  12. package/dist/chunk-ZXKLPC2E.js.map +1 -0
  13. package/dist/config-FIQWQRUB.js +11 -0
  14. package/dist/config-FIQWQRUB.js.map +1 -0
  15. package/dist/create-prompt-P35POKBW.js +22 -0
  16. package/dist/create-prompt-P35POKBW.js.map +1 -0
  17. package/dist/create-scenario-3YRZVDYF.js +26 -0
  18. package/dist/create-scenario-3YRZVDYF.js.map +1 -0
  19. package/dist/discover-scenario-schema-MEEEVND7.js +65 -0
  20. package/dist/discover-scenario-schema-MEEEVND7.js.map +1 -0
  21. package/dist/discover-schema-3T52ORPB.js +446 -0
  22. package/dist/discover-schema-3T52ORPB.js.map +1 -0
  23. package/dist/get-analytics-BAVXTAPB.js +55 -0
  24. package/dist/get-analytics-BAVXTAPB.js.map +1 -0
  25. package/dist/get-prompt-LKCPT26O.js +48 -0
  26. package/dist/get-prompt-LKCPT26O.js.map +1 -0
  27. package/dist/get-scenario-3SCDW4Z6.js +33 -0
  28. package/dist/get-scenario-3SCDW4Z6.js.map +1 -0
  29. package/dist/get-trace-QFDWJ5D4.js +50 -0
  30. package/dist/get-trace-QFDWJ5D4.js.map +1 -0
  31. package/dist/index.js +22114 -8786
  32. package/dist/index.js.map +1 -1
  33. package/dist/list-prompts-UQPBCUYA.js +33 -0
  34. package/dist/list-prompts-UQPBCUYA.js.map +1 -0
  35. package/dist/list-scenarios-573YOUKC.js +40 -0
  36. package/dist/list-scenarios-573YOUKC.js.map +1 -0
  37. package/dist/search-traces-RSMYCAN7.js +72 -0
  38. package/dist/search-traces-RSMYCAN7.js.map +1 -0
  39. package/dist/update-prompt-G2Y5EBQY.js +31 -0
  40. package/dist/update-prompt-G2Y5EBQY.js.map +1 -0
  41. package/dist/update-scenario-SSGVOBJO.js +27 -0
  42. package/dist/update-scenario-SSGVOBJO.js.map +1 -0
  43. package/package.json +3 -3
  44. package/src/__tests__/config.unit.test.ts +89 -0
  45. package/src/__tests__/date-parsing.unit.test.ts +78 -0
  46. package/src/__tests__/discover-schema.unit.test.ts +118 -0
  47. package/src/__tests__/integration.integration.test.ts +313 -0
  48. package/src/__tests__/langwatch-api.unit.test.ts +309 -0
  49. package/src/__tests__/scenario-tools.integration.test.ts +286 -0
  50. package/src/__tests__/scenario-tools.unit.test.ts +185 -0
  51. package/src/__tests__/schemas.unit.test.ts +85 -0
  52. package/src/__tests__/tools.unit.test.ts +729 -0
  53. package/src/config.ts +31 -0
  54. package/src/index.ts +383 -0
  55. package/src/langwatch-api-scenarios.ts +67 -0
  56. package/src/langwatch-api.ts +266 -0
  57. package/src/schemas/analytics-groups.ts +78 -0
  58. package/src/schemas/analytics-metrics.ts +179 -0
  59. package/src/schemas/filter-fields.ts +119 -0
  60. package/src/schemas/index.ts +3 -0
  61. package/src/tools/archive-scenario.ts +19 -0
  62. package/src/tools/create-prompt.ts +29 -0
  63. package/src/tools/create-scenario.ts +30 -0
  64. package/src/tools/discover-scenario-schema.ts +71 -0
  65. package/src/tools/discover-schema.ts +106 -0
  66. package/src/tools/get-analytics.ts +71 -0
  67. package/src/tools/get-prompt.ts +56 -0
  68. package/src/tools/get-scenario.ts +36 -0
  69. package/src/tools/get-trace.ts +61 -0
  70. package/src/tools/list-prompts.ts +35 -0
  71. package/src/tools/list-scenarios.ts +47 -0
  72. package/src/tools/search-traces.ts +91 -0
  73. package/src/tools/update-prompt.ts +44 -0
  74. package/src/tools/update-scenario.ts +32 -0
  75. package/src/utils/date-parsing.ts +31 -0
  76. package/tests/evaluations.ipynb +634 -634
  77. package/tests/scenario-openai.test.ts +3 -1
  78. package/uv.lock +1788 -1322
package/CHANGELOG.md CHANGED
@@ -1,5 +1,37 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.5.0](https://github.com/langwatch/langwatch/compare/mcp-server@v0.4.0...mcp-server@v0.5.0) (2026-02-20)
4
+
5
+
6
+ ### Features
7
+
8
+ * add scenario management tools to MCP server ([#1705](https://github.com/langwatch/langwatch/issues/1705)) ([0376fde](https://github.com/langwatch/langwatch/commit/0376fde0abff7b110b4ec5996a399c4b2ceafde0))
9
+
10
+
11
+ ### Miscellaneous
12
+
13
+ * **deps-dev:** bump @eslint/js from 9.35.0 to 9.39.2 in /mcp-server ([#1465](https://github.com/langwatch/langwatch/issues/1465)) ([fbee07d](https://github.com/langwatch/langwatch/commit/fbee07d8b964d0a059eaa32c7685c8bf667898e7))
14
+ * **deps:** bump hono ([f7e8f05](https://github.com/langwatch/langwatch/commit/f7e8f056843958cac4504ae02f37a351457f77ee))
15
+ * **deps:** bump hono from 4.11.9 to 4.12.0 in /mcp-server in the npm_and_yarn group across 1 directory ([#1736](https://github.com/langwatch/langwatch/issues/1736)) ([f7e8f05](https://github.com/langwatch/langwatch/commit/f7e8f056843958cac4504ae02f37a351457f77ee))
16
+ * **deps:** bump node-pty from 1.0.0 to 1.1.0 in /mcp-server ([#1447](https://github.com/langwatch/langwatch/issues/1447)) ([12ad02c](https://github.com/langwatch/langwatch/commit/12ad02c19dcc0ba90ad32f77659816b768188a53))
17
+ * **deps:** bump qs ([f0e9747](https://github.com/langwatch/langwatch/commit/f0e97475becd58dfa523a944fbb3fa0657dfc1dc))
18
+ * **deps:** bump qs from 6.14.1 to 6.14.2 in /mcp-server in the npm_and_yarn group across 1 directory ([#1568](https://github.com/langwatch/langwatch/issues/1568)) ([f0e9747](https://github.com/langwatch/langwatch/commit/f0e97475becd58dfa523a944fbb3fa0657dfc1dc))
19
+ * **deps:** bump the npm_and_yarn group across 1 directory with 8 updates ([#1519](https://github.com/langwatch/langwatch/issues/1519)) ([487e563](https://github.com/langwatch/langwatch/commit/487e5637a941fa9335ec8e951efdf38bb0a02a8c))
20
+ * **deps:** bump the uv group across 1 directory with 7 updates ([#1516](https://github.com/langwatch/langwatch/issues/1516)) ([7f2f178](https://github.com/langwatch/langwatch/commit/7f2f178588d89a63f3b38510844b87de7b528b3b))
21
+
22
+ ## [0.4.0](https://github.com/langwatch/langwatch/compare/mcp-server@v0.3.3...mcp-server@v0.4.0) (2026-02-08)
23
+
24
+
25
+ ### Features
26
+
27
+ * add CI/CD execution support for evaluations v3 ([#1118](https://github.com/langwatch/langwatch/issues/1118)) ([d28adac](https://github.com/langwatch/langwatch/commit/d28adaceeb87921d9c7c0f1cf76b5e03f3b90fbd))
28
+ * add observability and prompt MCP tools to @langwatch/mcp-server v0.4.0 ([#1410](https://github.com/langwatch/langwatch/issues/1410)) ([b770040](https://github.com/langwatch/langwatch/commit/b7700401dd87e7f1b76fefb213d67c906bcc1202))
29
+
30
+
31
+ ### Bug Fixes
32
+
33
+ * **mcp-server:** skip integration test in CI ([#1300](https://github.com/langwatch/langwatch/issues/1300)) ([c16f232](https://github.com/langwatch/langwatch/commit/c16f2320b5c99818324d506a64ed3588085d8517))
34
+
3
35
  ## [0.3.3](https://github.com/langwatch/langwatch/compare/mcp-server@v0.3.2...mcp-server@v0.3.3) (2025-12-18)
4
36
 
5
37
 
package/README.md CHANGED
@@ -1,56 +1,128 @@
1
- # LangWatch 🏰 MCP Server
1
+ # LangWatch MCP Server
2
2
 
3
- The LangWatch MCP Server makes your AI coding assistant an expert in both [LangWatch](https://langwatch.ai/docs), for automatically instrumenting your code, managing versioned prompts and creating evaluations; and [Scenario](https://langwatch.ai/scenario), for automatically testing your agents via simulations.
3
+ MCP server that gives AI coding agents access to LangWatch observability data, prompts, and documentation via the [Model Context Protocol](https://modelcontextprotocol.io/introduction).
4
4
 
5
- ## Setup in your Coding Assistant 👩‍💻
5
+ ## Quick Setup
6
6
 
7
- 1. Open Cursor/Claude Code/your editor Settings
8
- 2. Navigate to the MCP settings
9
- 3. Set the "name" as "LangWatch"
10
- 4. Add the LangWatch MCP:
7
+ Add to your MCP client configuration (Claude Code, Cursor, etc.):
11
8
 
12
9
  ```json
13
10
  {
14
11
  "mcpServers": {
15
12
  "langwatch": {
16
13
  "command": "npx",
17
- "args": ["-y", "@langwatch/mcp-server"]
14
+ "args": ["-y", "@langwatch/mcp-server"],
15
+ "env": {
16
+ "LANGWATCH_API_KEY": "your-api-key-here"
17
+ }
18
18
  }
19
19
  }
20
20
  }
21
21
  ```
22
22
 
23
+ For Claude Code, you can also run:
24
+
25
+ ```bash
26
+ claude mcp add langwatch -- npx -y @langwatch/mcp-server --apiKey your-api-key-here
27
+ ```
28
+
29
+ The API key is required for observability and prompt tools. Documentation tools work without it.
30
+
31
+ ## Configuration
32
+
33
+ | Env Var | CLI Arg | Description |
34
+ |---------|---------|-------------|
35
+ | `LANGWATCH_API_KEY` | `--apiKey` | API key for authentication |
36
+ | `LANGWATCH_ENDPOINT` | `--endpoint` | API endpoint (default: `https://app.langwatch.ai`) |
37
+
23
38
  ## Tools
24
39
 
25
- The MCP Server provides the following tools:
40
+ ### Documentation
41
+
42
+ | Tool | Description |
43
+ |------|-------------|
44
+ | `fetch_langwatch_docs` | Fetch LangWatch integration docs |
45
+ | `fetch_scenario_docs` | Fetch Scenario agent testing docs |
46
+
47
+ ### Observability (requires API key)
48
+
49
+ | Tool | Description |
50
+ |------|-------------|
51
+ | `discover_schema` | Explore available filters, metrics, aggregations, and groups |
52
+ | `search_traces` | Search traces with filters, text query, and date range |
53
+ | `get_trace` | Get full trace details with AI-readable formatting |
54
+ | `get_analytics` | Query timeseries analytics data |
55
+
56
+ ### Prompts (requires API key)
57
+
58
+ | Tool | Description |
59
+ |------|-------------|
60
+ | `list_prompts` | List all prompts |
61
+ | `get_prompt` | Get prompt with messages and version history |
62
+ | `create_prompt` | Create a new prompt |
63
+ | `update_prompt` | Update prompt or create new version |
64
+
65
+ ## Output Formats
66
+
67
+ The `search_traces` and `get_trace` tools support a `format` parameter:
68
+
69
+ - **`digest`** (default) — AI-readable trace digest with hierarchical span tree, timing, inputs/outputs, and errors. Optimized for LLM consumption — compact and information-dense.
70
+ - **`json`** — Full raw trace data with all fields. Useful for programmatic access or when you need the complete schema.
26
71
 
27
- ### `fetch_langwatch_docs`
72
+ ## Usage Tips
28
73
 
29
- - **Description:** Fetches the LangWatch docs for understanding how to implement LangWatch in your codebase.
30
- - **Parameters:**
31
- - `url`: (Optional) The full url of the specific doc page. If not provided, the docs index will be fetched.
74
+ - Start with `discover_schema` to understand available filter fields and metrics.
75
+ - Use `search_traces` to find relevant traces, then `get_trace` for full details.
76
+ - Search returns 25 traces per page by default. Use `scrollId` from the response to paginate.
77
+ - Analytics uses `category.name` format for metrics (e.g., `performance.completion_time`).
78
+ - Use `create_prompt` / `update_prompt` with `createVersion: true` for safe prompt iteration.
32
79
 
33
- ### `fetch_scenario_docs`
80
+ ## Development
34
81
 
35
- - **Description:** Fetches Scenario docs for understanding how to write agent simulations to test the agents on your codebase.
36
- - **Parameters:**
37
- - `url`: (Optional) The full url of the specific doc page. If not provided, the docs index will be fetched.
82
+ ### Prerequisites
38
83
 
39
- ## Example Usage
84
+ - Node.js 18+
85
+ - pnpm
40
86
 
41
- Ask your coding assistant to instrument your code:
87
+ ### Setup
42
88
 
43
- > "Can you instrument my LLM code with LangWatch"
89
+ ```bash
90
+ pnpm install
91
+ ```
92
+
93
+ ### Build
44
94
 
45
- Or ask it to write a scenario test:
95
+ ```bash
96
+ pnpm build
97
+ ```
46
98
 
47
- > "Can you implement a scenario test for my agent?"
99
+ ### Test
48
100
 
49
- <img alt="LangWatch MCP Cursor Example" src="../assets/mcp-server/cursor-example.png" width="900">
101
+ ```bash
102
+ pnpm test # Run all tests
103
+ pnpm test:unit # Unit tests only
104
+ ```
50
105
 
51
- ## 🛟 Support
106
+ ### Local testing
107
+
108
+ Build and point your MCP client to the local dist:
109
+
110
+ ```json
111
+ {
112
+ "mcpServers": {
113
+ "langwatch": {
114
+ "command": "node",
115
+ "args": [
116
+ "/path/to/mcp-server/dist/index.js",
117
+ "--apiKey", "your-api-key",
118
+ "--endpoint", "http://localhost:5560"
119
+ ]
120
+ }
121
+ }
122
+ }
123
+ ```
52
124
 
53
- If you have questions or need help, join our community:
125
+ ## Support
54
126
 
55
127
  - [Discord Community](https://discord.gg/kT4PhDS2gH)
56
128
  - [LangWatch Docs](https://langwatch.ai/docs)
@@ -0,0 +1,19 @@
1
+ import {
2
+ archiveScenario
3
+ } from "./chunk-K2YFPOSD.js";
4
+ import "./chunk-JVWDWL3J.js";
5
+ import "./chunk-AAQNA53E.js";
6
+
7
+ // src/tools/archive-scenario.ts
8
+ async function handleArchiveScenario(params) {
9
+ const result = await archiveScenario(params.scenarioId);
10
+ const lines = [];
11
+ lines.push("Scenario archived successfully!\n");
12
+ lines.push(`**ID**: ${result.id}`);
13
+ lines.push(`**Status**: ${result.archived ? "archived" : "active"}`);
14
+ return lines.join("\n");
15
+ }
16
+ export {
17
+ handleArchiveScenario
18
+ };
19
+ //# sourceMappingURL=archive-scenario-GAE4XVFM.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/tools/archive-scenario.ts"],"sourcesContent":["import { archiveScenario as apiArchiveScenario } from \"../langwatch-api-scenarios.js\";\n\n/**\n * Handles the archive_scenario MCP tool invocation.\n *\n * Archives (soft-deletes) a scenario and returns confirmation.\n */\nexport async function handleArchiveScenario(params: {\n scenarioId: string;\n}): Promise<string> {\n const result = await apiArchiveScenario(params.scenarioId);\n\n const lines: string[] = [];\n lines.push(\"Scenario archived successfully!\\n\");\n lines.push(`**ID**: ${result.id}`);\n lines.push(`**Status**: ${result.archived ? \"archived\" : \"active\"}`);\n\n return lines.join(\"\\n\");\n}\n"],"mappings":";;;;;;;AAOA,eAAsB,sBAAsB,QAExB;AAClB,QAAM,SAAS,MAAM,gBAAmB,OAAO,UAAU;AAEzD,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,mCAAmC;AAC9C,QAAM,KAAK,WAAW,OAAO,EAAE,EAAE;AACjC,QAAM,KAAK,eAAe,OAAO,WAAW,aAAa,QAAQ,EAAE;AAEnE,SAAO,MAAM,KAAK,IAAI;AACxB;","names":[]}
@@ -0,0 +1,28 @@
1
+ // src/config.ts
2
+ var config;
3
+ function initConfig(args) {
4
+ config = {
5
+ apiKey: args.apiKey || process.env.LANGWATCH_API_KEY,
6
+ endpoint: args.endpoint || process.env.LANGWATCH_ENDPOINT || "https://app.langwatch.ai"
7
+ };
8
+ }
9
+ function getConfig() {
10
+ if (!config) throw new Error("Config not initialized");
11
+ return config;
12
+ }
13
+ function requireApiKey() {
14
+ const { apiKey } = getConfig();
15
+ if (!apiKey) {
16
+ throw new Error(
17
+ "LANGWATCH_API_KEY is required. Set it via --apiKey flag or LANGWATCH_API_KEY environment variable."
18
+ );
19
+ }
20
+ return apiKey;
21
+ }
22
+
23
+ export {
24
+ initConfig,
25
+ getConfig,
26
+ requireApiKey
27
+ };
28
+ //# sourceMappingURL=chunk-AAQNA53E.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/config.ts"],"sourcesContent":["export interface McpConfig {\n apiKey: string | undefined;\n endpoint: string;\n}\n\nlet config: McpConfig | undefined;\n\nexport function initConfig(args: { apiKey?: string; endpoint?: string }): void {\n config = {\n apiKey: args.apiKey || process.env.LANGWATCH_API_KEY,\n endpoint:\n args.endpoint ||\n process.env.LANGWATCH_ENDPOINT ||\n \"https://app.langwatch.ai\",\n };\n}\n\nexport function getConfig(): McpConfig {\n if (!config) throw new Error(\"Config not initialized\");\n return config;\n}\n\nexport function requireApiKey(): string {\n const { apiKey } = getConfig();\n if (!apiKey) {\n throw new Error(\n \"LANGWATCH_API_KEY is required. Set it via --apiKey flag or LANGWATCH_API_KEY environment variable.\"\n );\n }\n return apiKey;\n}\n"],"mappings":";AAKA,IAAI;AAEG,SAAS,WAAW,MAAoD;AAC7E,WAAS;AAAA,IACP,QAAQ,KAAK,UAAU,QAAQ,IAAI;AAAA,IACnC,UACE,KAAK,YACL,QAAQ,IAAI,sBACZ;AAAA,EACJ;AACF;AAEO,SAAS,YAAuB;AACrC,MAAI,CAAC,OAAQ,OAAM,IAAI,MAAM,wBAAwB;AACrD,SAAO;AACT;AAEO,SAAS,gBAAwB;AACtC,QAAM,EAAE,OAAO,IAAI,UAAU;AAC7B,MAAI,CAAC,QAAQ;AACX,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;","names":[]}
@@ -0,0 +1,91 @@
1
+ import {
2
+ getConfig,
3
+ requireApiKey
4
+ } from "./chunk-AAQNA53E.js";
5
+
6
+ // src/langwatch-api.ts
7
+ async function makeRequest(method, path, body) {
8
+ const url = getConfig().endpoint + path;
9
+ const headers = {
10
+ "X-Auth-Token": requireApiKey()
11
+ };
12
+ if (body !== void 0) {
13
+ headers["Content-Type"] = "application/json";
14
+ }
15
+ const response = await fetch(url, {
16
+ method,
17
+ headers,
18
+ ...body !== void 0 ? { body: JSON.stringify(body) } : {}
19
+ });
20
+ if (!response.ok) {
21
+ const responseBody = await response.text();
22
+ throw new Error(
23
+ `LangWatch API error ${response.status}: ${responseBody}`
24
+ );
25
+ }
26
+ return response.json();
27
+ }
28
+ async function searchTraces(params) {
29
+ const { format = "digest", ...rest } = params;
30
+ return makeRequest("POST", "/api/traces/search", {
31
+ ...rest,
32
+ format
33
+ });
34
+ }
35
+ async function getTraceById(traceId, format = "digest") {
36
+ return makeRequest(
37
+ "GET",
38
+ `/api/traces/${encodeURIComponent(traceId)}?format=${format}`
39
+ );
40
+ }
41
+ async function getAnalyticsTimeseries(params) {
42
+ return makeRequest(
43
+ "POST",
44
+ "/api/analytics/timeseries",
45
+ params
46
+ );
47
+ }
48
+ async function listPrompts() {
49
+ return makeRequest("GET", "/api/prompts");
50
+ }
51
+ async function getPrompt(idOrHandle, version) {
52
+ const query = version != null ? `?version=${version}` : "";
53
+ return makeRequest(
54
+ "GET",
55
+ `/api/prompts/${encodeURIComponent(idOrHandle)}${query}`
56
+ );
57
+ }
58
+ async function createPrompt(data) {
59
+ return makeRequest(
60
+ "POST",
61
+ "/api/prompts",
62
+ data
63
+ );
64
+ }
65
+ async function updatePrompt(idOrHandle, data) {
66
+ return makeRequest(
67
+ "POST",
68
+ `/api/prompts/${encodeURIComponent(idOrHandle)}`,
69
+ data
70
+ );
71
+ }
72
+ async function createPromptVersion(idOrHandle, data) {
73
+ return makeRequest(
74
+ "POST",
75
+ `/api/prompts/${encodeURIComponent(idOrHandle)}/versions`,
76
+ data
77
+ );
78
+ }
79
+
80
+ export {
81
+ makeRequest,
82
+ searchTraces,
83
+ getTraceById,
84
+ getAnalyticsTimeseries,
85
+ listPrompts,
86
+ getPrompt,
87
+ createPrompt,
88
+ updatePrompt,
89
+ createPromptVersion
90
+ };
91
+ //# sourceMappingURL=chunk-JVWDWL3J.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/langwatch-api.ts"],"sourcesContent":["import { getConfig, requireApiKey } from \"./config.js\";\n\n// --- Response types ---\n\nexport interface TraceSearchResult {\n trace_id: string;\n formatted_trace?: string;\n input?: { value: string };\n output?: { value: string };\n timestamps?: { started_at?: string | number };\n metadata?: Record<string, unknown>;\n error?: Record<string, unknown>;\n}\n\nexport interface SearchTracesResponse {\n traces: TraceSearchResult[];\n pagination?: {\n totalHits?: number;\n scrollId?: string;\n };\n}\n\nexport interface TraceDetailResponse {\n trace_id: string;\n formatted_trace?: string;\n input?: { value: string };\n output?: { value: string };\n timestamps?: {\n started_at?: string | number;\n updated_at?: string | number;\n inserted_at?: string | number;\n };\n metadata?: {\n user_id?: string;\n thread_id?: string;\n customer_id?: string;\n labels?: string[];\n [key: string]: unknown;\n };\n error?: Record<string, unknown>;\n ascii_tree?: string;\n evaluations?: Array<{\n evaluator_id?: string;\n name?: string;\n score?: number;\n passed?: boolean;\n label?: string;\n }>;\n spans?: Array<{\n span_id: string;\n name?: string;\n type?: string;\n model?: string;\n input?: { value: string };\n output?: { value: string };\n timestamps?: { started_at?: number; finished_at?: number };\n metrics?: {\n completion_time_ms?: number;\n prompt_tokens?: number;\n completion_tokens?: number;\n tokens_estimated?: boolean;\n cost?: number;\n };\n }>;\n}\n\nexport interface AnalyticsBucket {\n date: string;\n [key: string]: unknown;\n}\n\nexport interface AnalyticsTimeseriesResponse {\n currentPeriod: AnalyticsBucket[];\n previousPeriod: AnalyticsBucket[];\n}\n\nexport interface PromptSummary {\n id?: string;\n handle?: string;\n name?: string;\n description?: string | null;\n latestVersionNumber?: number;\n version?: number;\n}\n\nexport interface PromptVersion {\n version?: number;\n commitMessage?: string;\n model?: string;\n modelProvider?: string;\n messages?: Array<{ role: string; content: string }>;\n}\n\nexport interface PromptDetailResponse extends PromptSummary {\n versions?: PromptVersion[];\n model?: string;\n modelProvider?: string;\n messages?: Array<{ role: string; content: string }>;\n prompt?: Array<{ role: string; content: string }>;\n}\n\nexport interface PromptMutationResponse {\n id?: string;\n handle?: string;\n name?: string;\n latestVersionNumber?: number;\n}\n\n// --- HTTP client ---\n\n/**\n * Sends an HTTP request to the LangWatch API.\n *\n * Builds the full URL from the configured endpoint, adds authentication,\n * and handles JSON serialization/deserialization.\n *\n * @throws Error with status code and response body when the response is not OK\n */\nexport async function makeRequest(\n method: \"GET\" | \"POST\" | \"PUT\" | \"PATCH\" | \"DELETE\",\n path: string,\n body?: unknown\n): Promise<unknown> {\n const url = getConfig().endpoint + path;\n const headers: Record<string, string> = {\n \"X-Auth-Token\": requireApiKey(),\n };\n\n if (body !== undefined) {\n headers[\"Content-Type\"] = \"application/json\";\n }\n\n const response = await fetch(url, {\n method,\n headers,\n ...(body !== undefined ? { body: JSON.stringify(body) } : {}),\n });\n\n if (!response.ok) {\n const responseBody = await response.text();\n throw new Error(\n `LangWatch API error ${response.status}: ${responseBody}`\n );\n }\n\n return response.json();\n}\n\n/** Searches traces with optional filters and pagination. */\nexport async function searchTraces(params: {\n query?: string;\n filters?: Record<string, string[]>;\n startDate: number;\n endDate: number;\n pageSize?: number;\n pageOffset?: number;\n scrollId?: string;\n format?: \"digest\" | \"json\";\n}): Promise<SearchTracesResponse> {\n const { format = \"digest\", ...rest } = params;\n return makeRequest(\"POST\", \"/api/traces/search\", {\n ...rest,\n format,\n }) as Promise<SearchTracesResponse>;\n}\n\n/** Retrieves a single trace by its ID. */\nexport async function getTraceById(\n traceId: string,\n format: \"digest\" | \"json\" = \"digest\"\n): Promise<TraceDetailResponse> {\n return makeRequest(\n \"GET\",\n `/api/traces/${encodeURIComponent(traceId)}?format=${format}`\n ) as Promise<TraceDetailResponse>;\n}\n\n/** Fetches analytics timeseries data for the given metrics and date range. */\nexport async function getAnalyticsTimeseries(params: {\n series: Array<{\n metric: string;\n aggregation: string;\n key?: string;\n subkey?: string;\n }>;\n startDate: number;\n endDate: number;\n timeZone?: string;\n groupBy?: string;\n groupByKey?: string;\n filters?: Record<string, string[]>;\n}): Promise<AnalyticsTimeseriesResponse> {\n return makeRequest(\n \"POST\",\n \"/api/analytics/timeseries\",\n params\n ) as Promise<AnalyticsTimeseriesResponse>;\n}\n\n/** Lists all prompts in the project. */\nexport async function listPrompts(): Promise<PromptSummary[]> {\n return makeRequest(\"GET\", \"/api/prompts\") as Promise<PromptSummary[]>;\n}\n\n/** Retrieves a single prompt by ID or handle. */\nexport async function getPrompt(\n idOrHandle: string,\n version?: number\n): Promise<PromptDetailResponse> {\n const query = version != null ? `?version=${version}` : \"\";\n return makeRequest(\n \"GET\",\n `/api/prompts/${encodeURIComponent(idOrHandle)}${query}`\n ) as Promise<PromptDetailResponse>;\n}\n\n/** Creates a new prompt. */\nexport async function createPrompt(data: {\n name: string;\n handle?: string;\n messages: Array<{ role: string; content: string }>;\n model: string;\n modelProvider: string;\n description?: string;\n}): Promise<PromptMutationResponse> {\n return makeRequest(\n \"POST\",\n \"/api/prompts\",\n data\n ) as Promise<PromptMutationResponse>;\n}\n\n/** Updates an existing prompt by ID or handle. */\nexport async function updatePrompt(\n idOrHandle: string,\n data: {\n messages?: Array<{ role: string; content: string }>;\n model?: string;\n modelProvider?: string;\n commitMessage?: string;\n }\n): Promise<PromptMutationResponse> {\n return makeRequest(\n \"POST\",\n `/api/prompts/${encodeURIComponent(idOrHandle)}`,\n data\n ) as Promise<PromptMutationResponse>;\n}\n\n/** Creates a new version of an existing prompt. */\nexport async function createPromptVersion(\n idOrHandle: string,\n data: {\n messages?: Array<{ role: string; content: string }>;\n model?: string;\n modelProvider?: string;\n commitMessage?: string;\n }\n): Promise<PromptMutationResponse> {\n return makeRequest(\n \"POST\",\n `/api/prompts/${encodeURIComponent(idOrHandle)}/versions`,\n data\n ) as Promise<PromptMutationResponse>;\n}\n\n"],"mappings":";;;;;;AAsHA,eAAsB,YACpB,QACA,MACA,MACkB;AAClB,QAAM,MAAM,UAAU,EAAE,WAAW;AACnC,QAAM,UAAkC;AAAA,IACtC,gBAAgB,cAAc;AAAA,EAChC;AAEA,MAAI,SAAS,QAAW;AACtB,YAAQ,cAAc,IAAI;AAAA,EAC5B;AAEA,QAAM,WAAW,MAAM,MAAM,KAAK;AAAA,IAChC;AAAA,IACA;AAAA,IACA,GAAI,SAAS,SAAY,EAAE,MAAM,KAAK,UAAU,IAAI,EAAE,IAAI,CAAC;AAAA,EAC7D,CAAC;AAED,MAAI,CAAC,SAAS,IAAI;AAChB,UAAM,eAAe,MAAM,SAAS,KAAK;AACzC,UAAM,IAAI;AAAA,MACR,uBAAuB,SAAS,MAAM,KAAK,YAAY;AAAA,IACzD;AAAA,EACF;AAEA,SAAO,SAAS,KAAK;AACvB;AAGA,eAAsB,aAAa,QASD;AAChC,QAAM,EAAE,SAAS,UAAU,GAAG,KAAK,IAAI;AACvC,SAAO,YAAY,QAAQ,sBAAsB;AAAA,IAC/C,GAAG;AAAA,IACH;AAAA,EACF,CAAC;AACH;AAGA,eAAsB,aACpB,SACA,SAA4B,UACE;AAC9B,SAAO;AAAA,IACL;AAAA,IACA,eAAe,mBAAmB,OAAO,CAAC,WAAW,MAAM;AAAA,EAC7D;AACF;AAGA,eAAsB,uBAAuB,QAaJ;AACvC,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAGA,eAAsB,cAAwC;AAC5D,SAAO,YAAY,OAAO,cAAc;AAC1C;AAGA,eAAsB,UACpB,YACA,SAC+B;AAC/B,QAAM,QAAQ,WAAW,OAAO,YAAY,OAAO,KAAK;AACxD,SAAO;AAAA,IACL;AAAA,IACA,gBAAgB,mBAAmB,UAAU,CAAC,GAAG,KAAK;AAAA,EACxD;AACF;AAGA,eAAsB,aAAa,MAOC;AAClC,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAGA,eAAsB,aACpB,YACA,MAMiC;AACjC,SAAO;AAAA,IACL;AAAA,IACA,gBAAgB,mBAAmB,UAAU,CAAC;AAAA,IAC9C;AAAA,EACF;AACF;AAGA,eAAsB,oBACpB,YACA,MAMiC;AACjC,SAAO;AAAA,IACL;AAAA,IACA,gBAAgB,mBAAmB,UAAU,CAAC;AAAA,IAC9C;AAAA,EACF;AACF;","names":[]}
@@ -0,0 +1,40 @@
1
+ import {
2
+ makeRequest
3
+ } from "./chunk-JVWDWL3J.js";
4
+
5
+ // src/langwatch-api-scenarios.ts
6
+ async function listScenarios() {
7
+ return makeRequest("GET", "/api/scenarios");
8
+ }
9
+ async function getScenario(id) {
10
+ return makeRequest(
11
+ "GET",
12
+ `/api/scenarios/${encodeURIComponent(id)}`
13
+ );
14
+ }
15
+ async function createScenario(data) {
16
+ return makeRequest("POST", "/api/scenarios", data);
17
+ }
18
+ async function updateScenario(params) {
19
+ const { id, ...data } = params;
20
+ return makeRequest(
21
+ "PUT",
22
+ `/api/scenarios/${encodeURIComponent(id)}`,
23
+ data
24
+ );
25
+ }
26
+ async function archiveScenario(id) {
27
+ return makeRequest(
28
+ "DELETE",
29
+ `/api/scenarios/${encodeURIComponent(id)}`
30
+ );
31
+ }
32
+
33
+ export {
34
+ listScenarios,
35
+ getScenario,
36
+ createScenario,
37
+ updateScenario,
38
+ archiveScenario
39
+ };
40
+ //# sourceMappingURL=chunk-K2YFPOSD.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/langwatch-api-scenarios.ts"],"sourcesContent":["import { makeRequest } from \"./langwatch-api.js\";\n\n// --- Scenario types ---\n\nexport interface ScenarioSummary {\n id: string;\n name: string;\n situation: string;\n criteria: string[];\n labels: string[];\n}\n\nexport interface ScenarioArchiveResponse {\n id: string;\n archived: boolean;\n}\n\n// --- Scenario API functions ---\n\n/** Lists all scenarios in the project. */\nexport async function listScenarios(): Promise<ScenarioSummary[]> {\n return makeRequest(\"GET\", \"/api/scenarios\") as Promise<ScenarioSummary[]>;\n}\n\n/** Retrieves a single scenario by ID. */\nexport async function getScenario(id: string): Promise<ScenarioSummary> {\n return makeRequest(\n \"GET\",\n `/api/scenarios/${encodeURIComponent(id)}`\n ) as Promise<ScenarioSummary>;\n}\n\n/** Creates a new scenario. */\nexport async function createScenario(data: {\n name: string;\n situation: string;\n criteria?: string[];\n labels?: string[];\n}): Promise<ScenarioSummary> {\n return makeRequest(\"POST\", \"/api/scenarios\", data) as Promise<ScenarioSummary>;\n}\n\n/** Updates an existing scenario. */\nexport async function updateScenario(params: {\n id: string;\n name?: string;\n situation?: string;\n criteria?: string[];\n labels?: string[];\n}): Promise<ScenarioSummary> {\n const { id, ...data } = params;\n return makeRequest(\n \"PUT\",\n `/api/scenarios/${encodeURIComponent(id)}`,\n data\n ) as Promise<ScenarioSummary>;\n}\n\n/** Archives (soft-deletes) a scenario. */\nexport async function archiveScenario(\n id: string\n): Promise<ScenarioArchiveResponse> {\n return makeRequest(\n \"DELETE\",\n `/api/scenarios/${encodeURIComponent(id)}`\n ) as Promise<ScenarioArchiveResponse>;\n}\n"],"mappings":";;;;;AAoBA,eAAsB,gBAA4C;AAChE,SAAO,YAAY,OAAO,gBAAgB;AAC5C;AAGA,eAAsB,YAAY,IAAsC;AACtE,SAAO;AAAA,IACL;AAAA,IACA,kBAAkB,mBAAmB,EAAE,CAAC;AAAA,EAC1C;AACF;AAGA,eAAsB,eAAe,MAKR;AAC3B,SAAO,YAAY,QAAQ,kBAAkB,IAAI;AACnD;AAGA,eAAsB,eAAe,QAMR;AAC3B,QAAM,EAAE,IAAI,GAAG,KAAK,IAAI;AACxB,SAAO;AAAA,IACL;AAAA,IACA,kBAAkB,mBAAmB,EAAE,CAAC;AAAA,IACxC;AAAA,EACF;AACF;AAGA,eAAsB,gBACpB,IACkC;AAClC,SAAO;AAAA,IACL;AAAA,IACA,kBAAkB,mBAAmB,EAAE,CAAC;AAAA,EAC1C;AACF;","names":[]}
@@ -0,0 +1,27 @@
1
+ // src/utils/date-parsing.ts
2
+ var RELATIVE_UNITS = {
3
+ h: 36e5,
4
+ d: 864e5,
5
+ w: 6048e5,
6
+ m: 2592e6
7
+ };
8
+ function parseRelativeDate(input) {
9
+ if (input === "now") return Date.now();
10
+ const match = input.match(/^(\d+)(h|d|w|m)$/);
11
+ if (match) {
12
+ const [, amount, unit] = match;
13
+ return Date.now() - parseInt(amount) * (RELATIVE_UNITS[unit] ?? 864e5);
14
+ }
15
+ const parsed = Date.parse(input);
16
+ if (Number.isNaN(parsed)) {
17
+ throw new Error(
18
+ `Invalid date: "${input}". Use a relative duration (e.g. "24h", "7d", "4w") or an ISO date string.`
19
+ );
20
+ }
21
+ return parsed;
22
+ }
23
+
24
+ export {
25
+ parseRelativeDate
26
+ };
27
+ //# sourceMappingURL=chunk-ZXKLPC2E.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/utils/date-parsing.ts"],"sourcesContent":["const RELATIVE_UNITS: Record<string, number> = {\n h: 3600000,\n d: 86400000,\n w: 604800000,\n m: 2592000000,\n};\n\n/**\n * Parses a date string that can be either a relative duration (e.g. \"24h\", \"7d\")\n * or an ISO date string. Throws on invalid input rather than silently falling back.\n *\n * @returns epoch milliseconds\n * @throws Error if the input is not a valid relative duration or parseable date string\n */\nexport function parseRelativeDate(input: string): number {\n if (input === \"now\") return Date.now();\n\n const match = input.match(/^(\\d+)(h|d|w|m)$/);\n if (match) {\n const [, amount, unit] = match;\n return Date.now() - parseInt(amount!) * (RELATIVE_UNITS[unit!] ?? 86400000);\n }\n\n const parsed = Date.parse(input);\n if (Number.isNaN(parsed)) {\n throw new Error(\n `Invalid date: \"${input}\". Use a relative duration (e.g. \"24h\", \"7d\", \"4w\") or an ISO date string.`\n );\n }\n return parsed;\n}\n"],"mappings":";AAAA,IAAM,iBAAyC;AAAA,EAC7C,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AACL;AASO,SAAS,kBAAkB,OAAuB;AACvD,MAAI,UAAU,MAAO,QAAO,KAAK,IAAI;AAErC,QAAM,QAAQ,MAAM,MAAM,kBAAkB;AAC5C,MAAI,OAAO;AACT,UAAM,CAAC,EAAE,QAAQ,IAAI,IAAI;AACzB,WAAO,KAAK,IAAI,IAAI,SAAS,MAAO,KAAK,eAAe,IAAK,KAAK;AAAA,EACpE;AAEA,QAAM,SAAS,KAAK,MAAM,KAAK;AAC/B,MAAI,OAAO,MAAM,MAAM,GAAG;AACxB,UAAM,IAAI;AAAA,MACR,kBAAkB,KAAK;AAAA,IACzB;AAAA,EACF;AACA,SAAO;AACT;","names":[]}
@@ -0,0 +1,11 @@
1
+ import {
2
+ getConfig,
3
+ initConfig,
4
+ requireApiKey
5
+ } from "./chunk-AAQNA53E.js";
6
+ export {
7
+ getConfig,
8
+ initConfig,
9
+ requireApiKey
10
+ };
11
+ //# sourceMappingURL=config-FIQWQRUB.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
@@ -0,0 +1,22 @@
1
+ import {
2
+ createPrompt
3
+ } from "./chunk-JVWDWL3J.js";
4
+ import "./chunk-AAQNA53E.js";
5
+
6
+ // src/tools/create-prompt.ts
7
+ async function handleCreatePrompt(params) {
8
+ const result = await createPrompt(params);
9
+ const lines = [];
10
+ lines.push("Prompt created successfully!\n");
11
+ if (result.id) lines.push(`**ID**: ${result.id}`);
12
+ if (result.handle) lines.push(`**Handle**: ${result.handle}`);
13
+ lines.push(`**Name**: ${result.name || params.name}`);
14
+ lines.push(`**Model**: ${params.model} (${params.modelProvider})`);
15
+ if (result.latestVersionNumber != null)
16
+ lines.push(`**Version**: v${result.latestVersionNumber}`);
17
+ return lines.join("\n");
18
+ }
19
+ export {
20
+ handleCreatePrompt
21
+ };
22
+ //# sourceMappingURL=create-prompt-P35POKBW.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/tools/create-prompt.ts"],"sourcesContent":["import { createPrompt as apiCreatePrompt } from \"../langwatch-api.js\";\n\n/**\n * Handles the create_prompt MCP tool invocation.\n *\n * Creates a new prompt in the LangWatch project and returns a\n * confirmation with the created prompt's details.\n */\nexport async function handleCreatePrompt(params: {\n name: string;\n handle?: string;\n messages: Array<{ role: string; content: string }>;\n model: string;\n modelProvider: string;\n description?: string;\n}): Promise<string> {\n const result = await apiCreatePrompt(params);\n\n const lines: string[] = [];\n lines.push(\"Prompt created successfully!\\n\");\n if (result.id) lines.push(`**ID**: ${result.id}`);\n if (result.handle) lines.push(`**Handle**: ${result.handle}`);\n lines.push(`**Name**: ${result.name || params.name}`);\n lines.push(`**Model**: ${params.model} (${params.modelProvider})`);\n if (result.latestVersionNumber != null)\n lines.push(`**Version**: v${result.latestVersionNumber}`);\n\n return lines.join(\"\\n\");\n}\n"],"mappings":";;;;;;AAQA,eAAsB,mBAAmB,QAOrB;AAClB,QAAM,SAAS,MAAM,aAAgB,MAAM;AAE3C,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,gCAAgC;AAC3C,MAAI,OAAO,GAAI,OAAM,KAAK,WAAW,OAAO,EAAE,EAAE;AAChD,MAAI,OAAO,OAAQ,OAAM,KAAK,eAAe,OAAO,MAAM,EAAE;AAC5D,QAAM,KAAK,aAAa,OAAO,QAAQ,OAAO,IAAI,EAAE;AACpD,QAAM,KAAK,cAAc,OAAO,KAAK,KAAK,OAAO,aAAa,GAAG;AACjE,MAAI,OAAO,uBAAuB;AAChC,UAAM,KAAK,iBAAiB,OAAO,mBAAmB,EAAE;AAE1D,SAAO,MAAM,KAAK,IAAI;AACxB;","names":[]}
@@ -0,0 +1,26 @@
1
+ import {
2
+ createScenario
3
+ } from "./chunk-K2YFPOSD.js";
4
+ import "./chunk-JVWDWL3J.js";
5
+ import "./chunk-AAQNA53E.js";
6
+
7
+ // src/tools/create-scenario.ts
8
+ async function handleCreateScenario(params) {
9
+ const result = await createScenario(params);
10
+ const lines = [];
11
+ lines.push("Scenario created successfully!\n");
12
+ lines.push(`**ID**: ${result.id}`);
13
+ lines.push(`**Name**: ${result.name}`);
14
+ lines.push(`**Situation**: ${result.situation}`);
15
+ if (Array.isArray(result.criteria) && result.criteria.length > 0) {
16
+ lines.push(`**Criteria**: ${result.criteria.length} criteria`);
17
+ }
18
+ if (Array.isArray(result.labels) && result.labels.length > 0) {
19
+ lines.push(`**Labels**: ${result.labels.join(", ")}`);
20
+ }
21
+ return lines.join("\n");
22
+ }
23
+ export {
24
+ handleCreateScenario
25
+ };
26
+ //# sourceMappingURL=create-scenario-3YRZVDYF.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/tools/create-scenario.ts"],"sourcesContent":["import { createScenario as apiCreateScenario } from \"../langwatch-api-scenarios.js\";\n\n/**\n * Handles the create_scenario MCP tool invocation.\n *\n * Creates a new scenario in the LangWatch project and returns a\n * confirmation with the created scenario's details.\n */\nexport async function handleCreateScenario(params: {\n name: string;\n situation: string;\n criteria?: string[];\n labels?: string[];\n}): Promise<string> {\n const result = await apiCreateScenario(params);\n\n const lines: string[] = [];\n lines.push(\"Scenario created successfully!\\n\");\n lines.push(`**ID**: ${result.id}`);\n lines.push(`**Name**: ${result.name}`);\n lines.push(`**Situation**: ${result.situation}`);\n if (Array.isArray(result.criteria) && result.criteria.length > 0) {\n lines.push(`**Criteria**: ${result.criteria.length} criteria`);\n }\n if (Array.isArray(result.labels) && result.labels.length > 0) {\n lines.push(`**Labels**: ${result.labels.join(\", \")}`);\n }\n\n return lines.join(\"\\n\");\n}\n"],"mappings":";;;;;;;AAQA,eAAsB,qBAAqB,QAKvB;AAClB,QAAM,SAAS,MAAM,eAAkB,MAAM;AAE7C,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,kCAAkC;AAC7C,QAAM,KAAK,WAAW,OAAO,EAAE,EAAE;AACjC,QAAM,KAAK,aAAa,OAAO,IAAI,EAAE;AACrC,QAAM,KAAK,kBAAkB,OAAO,SAAS,EAAE;AAC/C,MAAI,MAAM,QAAQ,OAAO,QAAQ,KAAK,OAAO,SAAS,SAAS,GAAG;AAChE,UAAM,KAAK,iBAAiB,OAAO,SAAS,MAAM,WAAW;AAAA,EAC/D;AACA,MAAI,MAAM,QAAQ,OAAO,MAAM,KAAK,OAAO,OAAO,SAAS,GAAG;AAC5D,UAAM,KAAK,eAAe,OAAO,OAAO,KAAK,IAAI,CAAC,EAAE;AAAA,EACtD;AAEA,SAAO,MAAM,KAAK,IAAI;AACxB;","names":[]}
@@ -0,0 +1,65 @@
1
+ // src/tools/discover-scenario-schema.ts
2
+ function formatScenarioSchema() {
3
+ const lines = [];
4
+ lines.push("# Scenario Schema\n");
5
+ lines.push("## Fields\n");
6
+ lines.push(
7
+ '- **name** (required): A short, descriptive name (e.g., "billing dispute resolution", "password reset with 2FA unavailable")'
8
+ );
9
+ lines.push(
10
+ "- **situation** (required): The context that guides the user simulator \u2014 who the user is, what they want, and any constraints (see Writing a Good Situation below)"
11
+ );
12
+ lines.push(
13
+ "- **criteria** (array of strings): Pass/fail conditions a judge evaluates the agent against (see Writing Good Criteria below)"
14
+ );
15
+ lines.push(
16
+ '- **labels** (array of strings): Tags for organizing scenarios (e.g., "auth", "happy-path", "edge-case")'
17
+ );
18
+ lines.push("\n## Writing a Good Situation\n");
19
+ lines.push(
20
+ "The situation drives the user simulator. Include these elements:"
21
+ );
22
+ lines.push("- **Persona**: Who is the user? (e.g., a stressed small business owner, a confused teenager)");
23
+ lines.push("- **Emotional state**: How are they feeling? (e.g., frustrated, anxious, impatient)");
24
+ lines.push("- **Background/Context**: What happened before this conversation?");
25
+ lines.push("- **Intent**: What do they want to accomplish?");
26
+ lines.push("- **Constraints**: What limitations do they have? (e.g., no phone for 2FA, unfamiliar with technical terms)");
27
+ lines.push("\nExample:");
28
+ lines.push("```");
29
+ lines.push("User is a small business owner stressed about tax deadline.");
30
+ lines.push("They need help categorizing expenses but aren't familiar with");
31
+ lines.push("accounting terms. They appreciate patient explanations and examples.");
32
+ lines.push("They have a spreadsheet of transactions but aren't sure which");
33
+ lines.push("categories apply to their consulting business.");
34
+ lines.push("```");
35
+ lines.push("\n## Writing Good Criteria\n");
36
+ lines.push("Criteria are what the judge uses to pass or fail the agent. Each criterion should be:");
37
+ lines.push('- **Specific and testable** \u2014 not vague like "responds helpfully"');
38
+ lines.push("- **Behavioral** \u2014 describes what the agent should *do*, not how it works internally");
39
+ lines.push("- **Independent** \u2014 each criterion checks one thing");
40
+ lines.push("\nGood criteria patterns:");
41
+ lines.push(`- **Information gathering**: "Agent asks for the user's account number before proceeding"`);
42
+ lines.push('- **Safety/guardrails**: "Agent does not reveal internal system details or error stack traces"');
43
+ lines.push('- **Clarification**: "Agent asks clarifying questions before taking irreversible action"');
44
+ lines.push('- **Tone**: "Agent maintains a professional and empathetic tone throughout"');
45
+ lines.push('- **Completeness**: "Agent confirms the user understands the solution before ending"');
46
+ lines.push('- **Domain-specific**: "Agent recommends releasing a wild frog rather than keeping it as a pet"');
47
+ lines.push("\nAvoid vague criteria like:");
48
+ lines.push('- "Responds correctly" \u2014 correct how?');
49
+ lines.push('- "Is helpful" \u2014 helpful in what way?');
50
+ lines.push('- "Works well" \u2014 not testable');
51
+ lines.push("\n## Target Types\n");
52
+ lines.push("Scenarios can target different execution backends:");
53
+ lines.push("- **prompt**: Test a prompt template with variable substitution");
54
+ lines.push("- **http**: Test an HTTP endpoint (e.g., a deployed agent API)");
55
+ lines.push("- **code**: Test a code function directly");
56
+ lines.push("\n## Tips\n");
57
+ lines.push("- Start simple, then layer complexity (add constraints, edge cases)");
58
+ lines.push("- Test edge cases: user changes their mind, gives ambiguous input, makes mistakes");
59
+ lines.push("- Use `fetch_scenario_docs` for the full authoring guide and advanced patterns");
60
+ return lines.join("\n");
61
+ }
62
+ export {
63
+ formatScenarioSchema
64
+ };
65
+ //# sourceMappingURL=discover-scenario-schema-MEEEVND7.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/tools/discover-scenario-schema.ts"],"sourcesContent":["/**\n * Returns a human-readable description of the scenario schema,\n * including field descriptions, authoring guidance, and examples.\n */\nexport function formatScenarioSchema(): string {\n const lines: string[] = [];\n\n lines.push(\"# Scenario Schema\\n\");\n\n lines.push(\"## Fields\\n\");\n lines.push(\n '- **name** (required): A short, descriptive name (e.g., \"billing dispute resolution\", \"password reset with 2FA unavailable\")',\n );\n lines.push(\n \"- **situation** (required): The context that guides the user simulator — who the user is, what they want, and any constraints (see Writing a Good Situation below)\",\n );\n lines.push(\n \"- **criteria** (array of strings): Pass/fail conditions a judge evaluates the agent against (see Writing Good Criteria below)\",\n );\n lines.push(\n '- **labels** (array of strings): Tags for organizing scenarios (e.g., \"auth\", \"happy-path\", \"edge-case\")',\n );\n\n lines.push(\"\\n## Writing a Good Situation\\n\");\n lines.push(\n \"The situation drives the user simulator. Include these elements:\",\n );\n lines.push(\"- **Persona**: Who is the user? (e.g., a stressed small business owner, a confused teenager)\");\n lines.push(\"- **Emotional state**: How are they feeling? (e.g., frustrated, anxious, impatient)\");\n lines.push(\"- **Background/Context**: What happened before this conversation?\");\n lines.push(\"- **Intent**: What do they want to accomplish?\");\n lines.push(\"- **Constraints**: What limitations do they have? (e.g., no phone for 2FA, unfamiliar with technical terms)\");\n lines.push(\"\\nExample:\");\n lines.push(\"```\");\n lines.push(\"User is a small business owner stressed about tax deadline.\");\n lines.push(\"They need help categorizing expenses but aren't familiar with\");\n lines.push(\"accounting terms. They appreciate patient explanations and examples.\");\n lines.push(\"They have a spreadsheet of transactions but aren't sure which\");\n lines.push(\"categories apply to their consulting business.\");\n lines.push(\"```\");\n\n lines.push(\"\\n## Writing Good Criteria\\n\");\n lines.push(\"Criteria are what the judge uses to pass or fail the agent. Each criterion should be:\");\n lines.push(\"- **Specific and testable** — not vague like \\\"responds helpfully\\\"\");\n lines.push(\"- **Behavioral** — describes what the agent should *do*, not how it works internally\");\n lines.push(\"- **Independent** — each criterion checks one thing\");\n lines.push(\"\\nGood criteria patterns:\");\n lines.push(\"- **Information gathering**: \\\"Agent asks for the user's account number before proceeding\\\"\");\n lines.push(\"- **Safety/guardrails**: \\\"Agent does not reveal internal system details or error stack traces\\\"\");\n lines.push(\"- **Clarification**: \\\"Agent asks clarifying questions before taking irreversible action\\\"\");\n lines.push(\"- **Tone**: \\\"Agent maintains a professional and empathetic tone throughout\\\"\");\n lines.push(\"- **Completeness**: \\\"Agent confirms the user understands the solution before ending\\\"\");\n lines.push(\"- **Domain-specific**: \\\"Agent recommends releasing a wild frog rather than keeping it as a pet\\\"\");\n lines.push(\"\\nAvoid vague criteria like:\");\n lines.push('- \"Responds correctly\" — correct how?');\n lines.push('- \"Is helpful\" — helpful in what way?');\n lines.push('- \"Works well\" — not testable');\n\n lines.push(\"\\n## Target Types\\n\");\n lines.push(\"Scenarios can target different execution backends:\");\n lines.push(\"- **prompt**: Test a prompt template with variable substitution\");\n lines.push(\"- **http**: Test an HTTP endpoint (e.g., a deployed agent API)\");\n lines.push(\"- **code**: Test a code function directly\");\n\n lines.push(\"\\n## Tips\\n\");\n lines.push(\"- Start simple, then layer complexity (add constraints, edge cases)\");\n lines.push(\"- Test edge cases: user changes their mind, gives ambiguous input, makes mistakes\");\n lines.push(\"- Use `fetch_scenario_docs` for the full authoring guide and advanced patterns\");\n\n return lines.join(\"\\n\");\n}\n"],"mappings":";AAIO,SAAS,uBAA+B;AAC7C,QAAM,QAAkB,CAAC;AAEzB,QAAM,KAAK,qBAAqB;AAEhC,QAAM,KAAK,aAAa;AACxB,QAAM;AAAA,IACJ;AAAA,EACF;AACA,QAAM;AAAA,IACJ;AAAA,EACF;AACA,QAAM;AAAA,IACJ;AAAA,EACF;AACA,QAAM;AAAA,IACJ;AAAA,EACF;AAEA,QAAM,KAAK,iCAAiC;AAC5C,QAAM;AAAA,IACJ;AAAA,EACF;AACA,QAAM,KAAK,8FAA8F;AACzG,QAAM,KAAK,qFAAqF;AAChG,QAAM,KAAK,mEAAmE;AAC9E,QAAM,KAAK,gDAAgD;AAC3D,QAAM,KAAK,6GAA6G;AACxH,QAAM,KAAK,YAAY;AACvB,QAAM,KAAK,KAAK;AAChB,QAAM,KAAK,6DAA6D;AACxE,QAAM,KAAK,+DAA+D;AAC1E,QAAM,KAAK,sEAAsE;AACjF,QAAM,KAAK,+DAA+D;AAC1E,QAAM,KAAK,gDAAgD;AAC3D,QAAM,KAAK,KAAK;AAEhB,QAAM,KAAK,8BAA8B;AACzC,QAAM,KAAK,uFAAuF;AAClG,QAAM,KAAK,wEAAqE;AAChF,QAAM,KAAK,2FAAsF;AACjG,QAAM,KAAK,0DAAqD;AAChE,QAAM,KAAK,2BAA2B;AACtC,QAAM,KAAK,2FAA6F;AACxG,QAAM,KAAK,gGAAkG;AAC7G,QAAM,KAAK,0FAA4F;AACvG,QAAM,KAAK,6EAA+E;AAC1F,QAAM,KAAK,sFAAwF;AACnG,QAAM,KAAK,iGAAmG;AAC9G,QAAM,KAAK,8BAA8B;AACzC,QAAM,KAAK,4CAAuC;AAClD,QAAM,KAAK,4CAAuC;AAClD,QAAM,KAAK,oCAA+B;AAE1C,QAAM,KAAK,qBAAqB;AAChC,QAAM,KAAK,oDAAoD;AAC/D,QAAM,KAAK,iEAAiE;AAC5E,QAAM,KAAK,gEAAgE;AAC3E,QAAM,KAAK,2CAA2C;AAEtD,QAAM,KAAK,aAAa;AACxB,QAAM,KAAK,qEAAqE;AAChF,QAAM,KAAK,mFAAmF;AAC9F,QAAM,KAAK,gFAAgF;AAE3F,SAAO,MAAM,KAAK,IAAI;AACxB;","names":[]}