npm - @langwatch/mcp-server - Versions diffs - 0.3.3 → 0.5.0 - Mend

@langwatch/mcp-server 0.3.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/CHANGELOG.md +32 -0
package/README.md +97 -25
package/dist/archive-scenario-GAE4XVFM.js +19 -0
package/dist/archive-scenario-GAE4XVFM.js.map +1 -0
package/dist/chunk-AAQNA53E.js +28 -0
package/dist/chunk-AAQNA53E.js.map +1 -0
package/dist/chunk-JVWDWL3J.js +91 -0
package/dist/chunk-JVWDWL3J.js.map +1 -0
package/dist/chunk-K2YFPOSD.js +40 -0
package/dist/chunk-K2YFPOSD.js.map +1 -0
package/dist/chunk-ZXKLPC2E.js +27 -0
package/dist/chunk-ZXKLPC2E.js.map +1 -0
package/dist/config-FIQWQRUB.js +11 -0
package/dist/config-FIQWQRUB.js.map +1 -0
package/dist/create-prompt-P35POKBW.js +22 -0
package/dist/create-prompt-P35POKBW.js.map +1 -0
package/dist/create-scenario-3YRZVDYF.js +26 -0
package/dist/create-scenario-3YRZVDYF.js.map +1 -0
package/dist/discover-scenario-schema-MEEEVND7.js +65 -0
package/dist/discover-scenario-schema-MEEEVND7.js.map +1 -0
package/dist/discover-schema-3T52ORPB.js +446 -0
package/dist/discover-schema-3T52ORPB.js.map +1 -0
package/dist/get-analytics-BAVXTAPB.js +55 -0
package/dist/get-analytics-BAVXTAPB.js.map +1 -0
package/dist/get-prompt-LKCPT26O.js +48 -0
package/dist/get-prompt-LKCPT26O.js.map +1 -0
package/dist/get-scenario-3SCDW4Z6.js +33 -0
package/dist/get-scenario-3SCDW4Z6.js.map +1 -0
package/dist/get-trace-QFDWJ5D4.js +50 -0
package/dist/get-trace-QFDWJ5D4.js.map +1 -0
package/dist/index.js +22114 -8786
package/dist/index.js.map +1 -1
package/dist/list-prompts-UQPBCUYA.js +33 -0
package/dist/list-prompts-UQPBCUYA.js.map +1 -0
package/dist/list-scenarios-573YOUKC.js +40 -0
package/dist/list-scenarios-573YOUKC.js.map +1 -0
package/dist/search-traces-RSMYCAN7.js +72 -0
package/dist/search-traces-RSMYCAN7.js.map +1 -0
package/dist/update-prompt-G2Y5EBQY.js +31 -0
package/dist/update-prompt-G2Y5EBQY.js.map +1 -0
package/dist/update-scenario-SSGVOBJO.js +27 -0
package/dist/update-scenario-SSGVOBJO.js.map +1 -0
package/package.json +3 -3
package/src/__tests__/config.unit.test.ts +89 -0
package/src/__tests__/date-parsing.unit.test.ts +78 -0
package/src/__tests__/discover-schema.unit.test.ts +118 -0
package/src/__tests__/integration.integration.test.ts +313 -0
package/src/__tests__/langwatch-api.unit.test.ts +309 -0
package/src/__tests__/scenario-tools.integration.test.ts +286 -0
package/src/__tests__/scenario-tools.unit.test.ts +185 -0
package/src/__tests__/schemas.unit.test.ts +85 -0
package/src/__tests__/tools.unit.test.ts +729 -0
package/src/config.ts +31 -0
package/src/index.ts +383 -0
package/src/langwatch-api-scenarios.ts +67 -0
package/src/langwatch-api.ts +266 -0
package/src/schemas/analytics-groups.ts +78 -0
package/src/schemas/analytics-metrics.ts +179 -0
package/src/schemas/filter-fields.ts +119 -0
package/src/schemas/index.ts +3 -0
package/src/tools/archive-scenario.ts +19 -0
package/src/tools/create-prompt.ts +29 -0
package/src/tools/create-scenario.ts +30 -0
package/src/tools/discover-scenario-schema.ts +71 -0
package/src/tools/discover-schema.ts +106 -0
package/src/tools/get-analytics.ts +71 -0
package/src/tools/get-prompt.ts +56 -0
package/src/tools/get-scenario.ts +36 -0
package/src/tools/get-trace.ts +61 -0
package/src/tools/list-prompts.ts +35 -0
package/src/tools/list-scenarios.ts +47 -0
package/src/tools/search-traces.ts +91 -0
package/src/tools/update-prompt.ts +44 -0
package/src/tools/update-scenario.ts +32 -0
package/src/utils/date-parsing.ts +31 -0
package/tests/evaluations.ipynb +634 -634
package/tests/scenario-openai.test.ts +3 -1
package/uv.lock +1788 -1322

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,37 @@
 # Changelog
+## [0.5.0](https://github.com/langwatch/langwatch/compare/mcp-server@v0.4.0...mcp-server@v0.5.0) (2026-02-20)
+### Features
+* add scenario management tools to MCP server ([#1705](https://github.com/langwatch/langwatch/issues/1705)) ([0376fde](https://github.com/langwatch/langwatch/commit/0376fde0abff7b110b4ec5996a399c4b2ceafde0))
+### Miscellaneous
+* **deps-dev:** bump @eslint/js from 9.35.0 to 9.39.2 in /mcp-server ([#1465](https://github.com/langwatch/langwatch/issues/1465)) ([fbee07d](https://github.com/langwatch/langwatch/commit/fbee07d8b964d0a059eaa32c7685c8bf667898e7))
+* **deps:** bump hono ([f7e8f05](https://github.com/langwatch/langwatch/commit/f7e8f056843958cac4504ae02f37a351457f77ee))
+* **deps:** bump hono from 4.11.9 to 4.12.0 in /mcp-server in the npm_and_yarn group across 1 directory ([#1736](https://github.com/langwatch/langwatch/issues/1736)) ([f7e8f05](https://github.com/langwatch/langwatch/commit/f7e8f056843958cac4504ae02f37a351457f77ee))
+* **deps:** bump node-pty from 1.0.0 to 1.1.0 in /mcp-server ([#1447](https://github.com/langwatch/langwatch/issues/1447)) ([12ad02c](https://github.com/langwatch/langwatch/commit/12ad02c19dcc0ba90ad32f77659816b768188a53))
+* **deps:** bump qs ([f0e9747](https://github.com/langwatch/langwatch/commit/f0e97475becd58dfa523a944fbb3fa0657dfc1dc))
+* **deps:** bump qs from 6.14.1 to 6.14.2 in /mcp-server in the npm_and_yarn group across 1 directory ([#1568](https://github.com/langwatch/langwatch/issues/1568)) ([f0e9747](https://github.com/langwatch/langwatch/commit/f0e97475becd58dfa523a944fbb3fa0657dfc1dc))
+* **deps:** bump the npm_and_yarn group across 1 directory with 8 updates ([#1519](https://github.com/langwatch/langwatch/issues/1519)) ([487e563](https://github.com/langwatch/langwatch/commit/487e5637a941fa9335ec8e951efdf38bb0a02a8c))
+* **deps:** bump the uv group across 1 directory with 7 updates ([#1516](https://github.com/langwatch/langwatch/issues/1516)) ([7f2f178](https://github.com/langwatch/langwatch/commit/7f2f178588d89a63f3b38510844b87de7b528b3b))
+## [0.4.0](https://github.com/langwatch/langwatch/compare/mcp-server@v0.3.3...mcp-server@v0.4.0) (2026-02-08)
+### Features
+* add CI/CD execution support for evaluations v3 ([#1118](https://github.com/langwatch/langwatch/issues/1118)) ([d28adac](https://github.com/langwatch/langwatch/commit/d28adaceeb87921d9c7c0f1cf76b5e03f3b90fbd))
+* add observability and prompt MCP tools to @langwatch/mcp-server v0.4.0 ([#1410](https://github.com/langwatch/langwatch/issues/1410)) ([b770040](https://github.com/langwatch/langwatch/commit/b7700401dd87e7f1b76fefb213d67c906bcc1202))
+### Bug Fixes
+* **mcp-server:** skip integration test in CI ([#1300](https://github.com/langwatch/langwatch/issues/1300)) ([c16f232](https://github.com/langwatch/langwatch/commit/c16f2320b5c99818324d506a64ed3588085d8517))
 ## [0.3.3](https://github.com/langwatch/langwatch/compare/mcp-server@v0.3.2...mcp-server@v0.3.3) (2025-12-18)

package/README.md CHANGED Viewed

@@ -1,56 +1,128 @@
-# LangWatch 🏰 MCP Server
+# LangWatch MCP Server
-The LangWatch MCP Server makes your AI coding assistant an expert in both [LangWatch](https://langwatch.ai/docs), for automatically instrumenting your code, managing versioned prompts and creating evaluations; and [Scenario](https://langwatch.ai/scenario), for automatically testing your agents via simulations.
+MCP server that gives AI coding agents access to LangWatch observability data, prompts, and documentation via the [Model Context Protocol](https://modelcontextprotocol.io/introduction).
-## Setup in your Coding Assistant 👩‍💻
+## Quick Setup
-1. Open Cursor/Claude Code/your editor Settings
-2. Navigate to the MCP settings
-3. Set the "name" as "LangWatch"
-4. Add the LangWatch MCP:
+Add to your MCP client configuration (Claude Code, Cursor, etc.):
 ```json
 {
   "mcpServers": {
     "langwatch": {
       "command": "npx",
-      "args": ["-y", "@langwatch/mcp-server"]
+      "args": ["-y", "@langwatch/mcp-server"],
+      "env": {
+        "LANGWATCH_API_KEY": "your-api-key-here"
+      }
     }
   }
 }
 ```
+For Claude Code, you can also run:
+```bash
+claude mcp add langwatch -- npx -y @langwatch/mcp-server --apiKey your-api-key-here
+```
+The API key is required for observability and prompt tools. Documentation tools work without it.
+## Configuration
+| Env Var | CLI Arg | Description |
+|---------|---------|-------------|
+| `LANGWATCH_API_KEY` | `--apiKey` | API key for authentication |
+| `LANGWATCH_ENDPOINT` | `--endpoint` | API endpoint (default: `https://app.langwatch.ai`) |
 ## Tools
-The MCP Server provides the following tools:
+### Documentation
+| Tool | Description |
+|------|-------------|
+| `fetch_langwatch_docs` | Fetch LangWatch integration docs |
+| `fetch_scenario_docs` | Fetch Scenario agent testing docs |
+### Observability (requires API key)
+| Tool | Description |
+|------|-------------|
+| `discover_schema` | Explore available filters, metrics, aggregations, and groups |
+| `search_traces` | Search traces with filters, text query, and date range |
+| `get_trace` | Get full trace details with AI-readable formatting |
+| `get_analytics` | Query timeseries analytics data |
+### Prompts (requires API key)
+| Tool | Description |
+|------|-------------|
+| `list_prompts` | List all prompts |
+| `get_prompt` | Get prompt with messages and version history |
+| `create_prompt` | Create a new prompt |
+| `update_prompt` | Update prompt or create new version |
+## Output Formats
+The `search_traces` and `get_trace` tools support a `format` parameter:
+- **`digest`** (default) — AI-readable trace digest with hierarchical span tree, timing, inputs/outputs, and errors. Optimized for LLM consumption — compact and information-dense.
+- **`json`** — Full raw trace data with all fields. Useful for programmatic access or when you need the complete schema.
-### `fetch_langwatch_docs`
+## Usage Tips
-- **Description:** Fetches the LangWatch docs for understanding how to implement LangWatch in your codebase.
-- **Parameters:**
-  - `url`: (Optional) The full url of the specific doc page. If not provided, the docs index will be fetched.
+- Start with `discover_schema` to understand available filter fields and metrics.
+- Use `search_traces` to find relevant traces, then `get_trace` for full details.
+- Search returns 25 traces per page by default. Use `scrollId` from the response to paginate.
+- Analytics uses `category.name` format for metrics (e.g., `performance.completion_time`).
+- Use `create_prompt` / `update_prompt` with `createVersion: true` for safe prompt iteration.
-### `fetch_scenario_docs`
+## Development
-- **Description:** Fetches Scenario docs for understanding how to write agent simulations to test the agents on your codebase.
-- **Parameters:**
-  - `url`: (Optional) The full url of the specific doc page. If not provided, the docs index will be fetched.
+### Prerequisites
-## Example Usage
+- Node.js 18+
+- pnpm
-Ask your coding assistant to instrument your code:
+### Setup
-> "Can you instrument my LLM code with LangWatch"
+```bash
+pnpm install
+```
+### Build
-Or ask it to write a scenario test:
+```bash
+pnpm build
+```
-> "Can you implement a scenario test for my agent?"
+### Test
-<img alt="LangWatch MCP Cursor Example" src="../assets/mcp-server/cursor-example.png" width="900">
+```bash
+pnpm test        # Run all tests
+pnpm test:unit   # Unit tests only
+```
-## 🛟 Support
+### Local testing
+Build and point your MCP client to the local dist:
+```json
+{
+  "mcpServers": {
+    "langwatch": {
+      "command": "node",
+      "args": [
+        "/path/to/mcp-server/dist/index.js",
+        "--apiKey", "your-api-key",
+        "--endpoint", "http://localhost:5560"
+      ]
+    }
+  }
+}
+```
-If you have questions or need help, join our community:
+## Support
 - [Discord Community](https://discord.gg/kT4PhDS2gH)
 - [LangWatch Docs](https://langwatch.ai/docs)

package/dist/archive-scenario-GAE4XVFM.js ADDED Viewed

@@ -0,0 +1,19 @@
+import {
+  archiveScenario
+} from "./chunk-K2YFPOSD.js";
+import "./chunk-JVWDWL3J.js";
+import "./chunk-AAQNA53E.js";
+// src/tools/archive-scenario.ts
+async function handleArchiveScenario(params) {
+  const result = await archiveScenario(params.scenarioId);
+  const lines = [];
+  lines.push("Scenario archived successfully!\n");
+  lines.push(`**ID**: ${result.id}`);
+  lines.push(`**Status**: ${result.archived ? "archived" : "active"}`);
+  return lines.join("\n");
+}
+export {
+  handleArchiveScenario
+};
+//# sourceMappingURL=archive-scenario-GAE4XVFM.js.map

package/dist/archive-scenario-GAE4XVFM.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"sources":["../src/tools/archive-scenario.ts"],"sourcesContent":["import { archiveScenario as apiArchiveScenario } from \"../langwatch-api-scenarios.js\";\n\n/**\n * Handles the archive_scenario MCP tool invocation.\n *\n * Archives (soft-deletes) a scenario and returns confirmation.\n */\nexport async function handleArchiveScenario(params: {\n scenarioId: string;\n}): Promise<string> {\n const result = await apiArchiveScenario(params.scenarioId);\n\n const lines: string[] = [];\n lines.push(\"Scenario archived successfully!\\n\");\n lines.push(`**ID**: ${result.id}`);\n lines.push(`**Status**: ${result.archived ? \"archived\" : \"active\"}`);\n\n return lines.join(\"\\n\");\n}\n"],"mappings":";;;;;;;AAOA,eAAsB,sBAAsB,QAExB;AAClB,QAAM,SAAS,MAAM,gBAAmB,OAAO,UAAU;AAEzD,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,mCAAmC;AAC9C,QAAM,KAAK,WAAW,OAAO,EAAE,EAAE;AACjC,QAAM,KAAK,eAAe,OAAO,WAAW,aAAa,QAAQ,EAAE;AAEnE,SAAO,MAAM,KAAK,IAAI;AACxB;","names":[]}

package/dist/chunk-AAQNA53E.js ADDED Viewed

@@ -0,0 +1,28 @@
+// src/config.ts
+var config;
+function initConfig(args) {
+  config = {
+    apiKey: args.apiKey || process.env.LANGWATCH_API_KEY,
+    endpoint: args.endpoint || process.env.LANGWATCH_ENDPOINT || "https://app.langwatch.ai"
+  };
+}
+function getConfig() {
+  if (!config) throw new Error("Config not initialized");
+  return config;
+}
+function requireApiKey() {
+  const { apiKey } = getConfig();
+  if (!apiKey) {
+    throw new Error(
+      "LANGWATCH_API_KEY is required. Set it via --apiKey flag or LANGWATCH_API_KEY environment variable."
+    );
+  }
+  return apiKey;
+}
+export {
+  initConfig,
+  getConfig,
+  requireApiKey
+};
+//# sourceMappingURL=chunk-AAQNA53E.js.map

package/dist/chunk-AAQNA53E.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"sources":["../src/config.ts"],"sourcesContent":["export interface McpConfig {\n apiKey: string | undefined;\n endpoint: string;\n}\n\nlet config: McpConfig | undefined;\n\nexport function initConfig(args: { apiKey?: string; endpoint?: string }): void {\n config = {\n apiKey: args.apiKey || process.env.LANGWATCH_API_KEY,\n endpoint:\n args.endpoint ||\n process.env.LANGWATCH_ENDPOINT ||\n \"https://app.langwatch.ai\",\n };\n}\n\nexport function getConfig(): McpConfig {\n if (!config) throw new Error(\"Config not initialized\");\n return config;\n}\n\nexport function requireApiKey(): string {\n const { apiKey } = getConfig();\n if (!apiKey) {\n throw new Error(\n \"LANGWATCH_API_KEY is required. Set it via --apiKey flag or LANGWATCH_API_KEY environment variable.\"\n );\n }\n return apiKey;\n}\n"],"mappings":";AAKA,IAAI;AAEG,SAAS,WAAW,MAAoD;AAC7E,WAAS;AAAA,IACP,QAAQ,KAAK,UAAU,QAAQ,IAAI;AAAA,IACnC,UACE,KAAK,YACL,QAAQ,IAAI,sBACZ;AAAA,EACJ;AACF;AAEO,SAAS,YAAuB;AACrC,MAAI,CAAC,OAAQ,OAAM,IAAI,MAAM,wBAAwB;AACrD,SAAO;AACT;AAEO,SAAS,gBAAwB;AACtC,QAAM,EAAE,OAAO,IAAI,UAAU;AAC7B,MAAI,CAAC,QAAQ;AACX,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;","names":[]}

package/dist/chunk-JVWDWL3J.js ADDED Viewed

@@ -0,0 +1,91 @@
+import {
+  getConfig,
+  requireApiKey
+} from "./chunk-AAQNA53E.js";
+// src/langwatch-api.ts
+async function makeRequest(method, path, body) {
+  const url = getConfig().endpoint + path;
+  const headers = {
+    "X-Auth-Token": requireApiKey()
+  };
+  if (body !== void 0) {
+    headers["Content-Type"] = "application/json";
+  }
+  const response = await fetch(url, {
+    method,
+    headers,
+    ...body !== void 0 ? { body: JSON.stringify(body) } : {}
+  });
+  if (!response.ok) {
+    const responseBody = await response.text();
+    throw new Error(
+      `LangWatch API error ${response.status}: ${responseBody}`
+    );
+  }
+  return response.json();
+}
+async function searchTraces(params) {
+  const { format = "digest", ...rest } = params;
+  return makeRequest("POST", "/api/traces/search", {
+    ...rest,
+    format
+  });
+}
+async function getTraceById(traceId, format = "digest") {
+  return makeRequest(
+    "GET",
+    `/api/traces/${encodeURIComponent(traceId)}?format=${format}`
+  );
+}
+async function getAnalyticsTimeseries(params) {
+  return makeRequest(
+    "POST",
+    "/api/analytics/timeseries",
+    params
+  );
+}
+async function listPrompts() {
+  return makeRequest("GET", "/api/prompts");
+}
+async function getPrompt(idOrHandle, version) {
+  const query = version != null ? `?version=${version}` : "";
+  return makeRequest(
+    "GET",
+    `/api/prompts/${encodeURIComponent(idOrHandle)}${query}`
+  );
+}
+async function createPrompt(data) {
+  return makeRequest(
+    "POST",
+    "/api/prompts",
+    data
+  );
+}
+async function updatePrompt(idOrHandle, data) {
+  return makeRequest(
+    "POST",
+    `/api/prompts/${encodeURIComponent(idOrHandle)}`,
+    data
+  );
+}
+async function createPromptVersion(idOrHandle, data) {
+  return makeRequest(
+    "POST",
+    `/api/prompts/${encodeURIComponent(idOrHandle)}/versions`,
+    data
+  );
+}
+export {
+  makeRequest,
+  searchTraces,
+  getTraceById,
+  getAnalyticsTimeseries,
+  listPrompts,
+  getPrompt,
+  createPrompt,
+  updatePrompt,
+  createPromptVersion
+};
+//# sourceMappingURL=chunk-JVWDWL3J.js.map

package/dist/chunk-JVWDWL3J.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"sources":["../src/langwatch-api.ts"],"sourcesContent":["import { getConfig, requireApiKey } from \"./config.js\";\n\n// --- Response types ---\n\nexport interface TraceSearchResult {\n trace_id: string;\n formatted_trace?: string;\n input?: { value: string };\n output?: { value: string };\n timestamps?: { started_at?: string | number };\n metadata?: Record<string, unknown>;\n error?: Record<string, unknown>;\n}\n\nexport interface SearchTracesResponse {\n traces: TraceSearchResult[];\n pagination?: {\n totalHits?: number;\n scrollId?: string;\n };\n}\n\nexport interface TraceDetailResponse {\n trace_id: string;\n formatted_trace?: string;\n input?: { value: string };\n output?: { value: string };\n timestamps?: {\n started_at?: string | number;\n updated_at?: string | number;\n inserted_at?: string | number;\n };\n metadata?: {\n user_id?: string;\n thread_id?: string;\n customer_id?: string;\n labels?: string[];\n [key: string]: unknown;\n };\n error?: Record<string, unknown>;\n ascii_tree?: string;\n evaluations?: Array<{\n evaluator_id?: string;\n name?: string;\n score?: number;\n passed?: boolean;\n label?: string;\n }>;\n spans?: Array<{\n span_id: string;\n name?: string;\n type?: string;\n model?: string;\n input?: { value: string };\n output?: { value: string };\n timestamps?: { started_at?: number; finished_at?: number };\n metrics?: {\n completion_time_ms?: number;\n prompt_tokens?: number;\n completion_tokens?: number;\n tokens_estimated?: boolean;\n cost?: number;\n };\n }>;\n}\n\nexport interface AnalyticsBucket {\n date: string;\n [key: string]: unknown;\n}\n\nexport interface AnalyticsTimeseriesResponse {\n currentPeriod: AnalyticsBucket[];\n previousPeriod: AnalyticsBucket[];\n}\n\nexport interface PromptSummary {\n id?: string;\n handle?: string;\n name?: string;\n description?: string | null;\n latestVersionNumber?: number;\n version?: number;\n}\n\nexport interface PromptVersion {\n version?: number;\n commitMessage?: string;\n model?: string;\n modelProvider?: string;\n messages?: Array<{ role: string; content: string }>;\n}\n\nexport interface PromptDetailResponse extends PromptSummary {\n versions?: PromptVersion[];\n model?: string;\n modelProvider?: string;\n messages?: Array<{ role: string; content: string }>;\n prompt?: Array<{ role: string; content: string }>;\n}\n\nexport interface PromptMutationResponse {\n id?: string;\n handle?: string;\n name?: string;\n latestVersionNumber?: number;\n}\n\n// --- HTTP client ---\n\n/**\n * Sends an HTTP request to the LangWatch API.\n *\n * Builds the full URL from the configured endpoint, adds authentication,\n * and handles JSON serialization/deserialization.\n *\n * @throws Error with status code and response body when the response is not OK\n */\nexport async function makeRequest(\n method: \"GET\" | \"POST\" | \"PUT\" | \"PATCH\" | \"DELETE\",\n path: string,\n body?: unknown\n): Promise<unknown> {\n const url = getConfig().endpoint + path;\n const headers: Record<string, string> = {\n \"X-Auth-Token\": requireApiKey(),\n };\n\n if (body !== undefined) {\n headers[\"Content-Type\"] = \"application/json\";\n }\n\n const response = await fetch(url, {\n method,\n headers,\n ...(body !== undefined ? { body: JSON.stringify(body) } : {}),\n });\n\n if (!response.ok) {\n const responseBody = await response.text();\n throw new Error(\n `LangWatch API error ${response.status}: ${responseBody}`\n );\n }\n\n return response.json();\n}\n\n/** Searches traces with optional filters and pagination. */\nexport async function searchTraces(params: {\n query?: string;\n filters?: Record<string, string[]>;\n startDate: number;\n endDate: number;\n pageSize?: number;\n pageOffset?: number;\n scrollId?: string;\n format?: \"digest\" | \"json\";\n}): Promise<SearchTracesResponse> {\n const { format = \"digest\", ...rest } = params;\n return makeRequest(\"POST\", \"/api/traces/search\", {\n ...rest,\n format,\n }) as Promise<SearchTracesResponse>;\n}\n\n/** Retrieves a single trace by its ID. */\nexport async function getTraceById(\n traceId: string,\n format: \"digest\" | \"json\" = \"digest\"\n): Promise<TraceDetailResponse> {\n return makeRequest(\n \"GET\",\n `/api/traces/${encodeURIComponent(traceId)}?format=${format}`\n ) as Promise<TraceDetailResponse>;\n}\n\n/** Fetches analytics timeseries data for the given metrics and date range. */\nexport async function getAnalyticsTimeseries(params: {\n series: Array<{\n metric: string;\n aggregation: string;\n key?: string;\n subkey?: string;\n }>;\n startDate: number;\n endDate: number;\n timeZone?: string;\n groupBy?: string;\n groupByKey?: string;\n filters?: Record<string, string[]>;\n}): Promise<AnalyticsTimeseriesResponse> {\n return makeRequest(\n \"POST\",\n \"/api/analytics/timeseries\",\n params\n ) as Promise<AnalyticsTimeseriesResponse>;\n}\n\n/** Lists all prompts in the project. */\nexport async function listPrompts(): Promise<PromptSummary[]> {\n return makeRequest(\"GET\", \"/api/prompts\") as Promise<PromptSummary[]>;\n}\n\n/** Retrieves a single prompt by ID or handle. */\nexport async function getPrompt(\n idOrHandle: string,\n version?: number\n): Promise<PromptDetailResponse> {\n const query = version != null ? `?version=${version}` : \"\";\n return makeRequest(\n \"GET\",\n `/api/prompts/${encodeURIComponent(idOrHandle)}${query}`\n ) as Promise<PromptDetailResponse>;\n}\n\n/** Creates a new prompt. */\nexport async function createPrompt(data: {\n name: string;\n handle?: string;\n messages: Array<{ role: string; content: string }>;\n model: string;\n modelProvider: string;\n description?: string;\n}): Promise<PromptMutationResponse> {\n return makeRequest(\n \"POST\",\n \"/api/prompts\",\n data\n ) as Promise<PromptMutationResponse>;\n}\n\n/** Updates an existing prompt by ID or handle. */\nexport async function updatePrompt(\n idOrHandle: string,\n data: {\n messages?: Array<{ role: string; content: string }>;\n model?: string;\n modelProvider?: string;\n commitMessage?: string;\n }\n): Promise<PromptMutationResponse> {\n return makeRequest(\n \"POST\",\n `/api/prompts/${encodeURIComponent(idOrHandle)}`,\n data\n ) as Promise<PromptMutationResponse>;\n}\n\n/** Creates a new version of an existing prompt. */\nexport async function createPromptVersion(\n idOrHandle: string,\n data: {\n messages?: Array<{ role: string; content: string }>;\n model?: string;\n modelProvider?: string;\n commitMessage?: string;\n }\n): Promise<PromptMutationResponse> {\n return makeRequest(\n \"POST\",\n `/api/prompts/${encodeURIComponent(idOrHandle)}/versions`,\n data\n ) as Promise<PromptMutationResponse>;\n}\n\n"],"mappings":";;;;;;AAsHA,eAAsB,YACpB,QACA,MACA,MACkB;AAClB,QAAM,MAAM,UAAU,EAAE,WAAW;AACnC,QAAM,UAAkC;AAAA,IACtC,gBAAgB,cAAc;AAAA,EAChC;AAEA,MAAI,SAAS,QAAW;AACtB,YAAQ,cAAc,IAAI;AAAA,EAC5B;AAEA,QAAM,WAAW,MAAM,MAAM,KAAK;AAAA,IAChC;AAAA,IACA;AAAA,IACA,GAAI,SAAS,SAAY,EAAE,MAAM,KAAK,UAAU,IAAI,EAAE,IAAI,CAAC;AAAA,EAC7D,CAAC;AAED,MAAI,CAAC,SAAS,IAAI;AAChB,UAAM,eAAe,MAAM,SAAS,KAAK;AACzC,UAAM,IAAI;AAAA,MACR,uBAAuB,SAAS,MAAM,KAAK,YAAY;AAAA,IACzD;AAAA,EACF;AAEA,SAAO,SAAS,KAAK;AACvB;AAGA,eAAsB,aAAa,QASD;AAChC,QAAM,EAAE,SAAS,UAAU,GAAG,KAAK,IAAI;AACvC,SAAO,YAAY,QAAQ,sBAAsB;AAAA,IAC/C,GAAG;AAAA,IACH;AAAA,EACF,CAAC;AACH;AAGA,eAAsB,aACpB,SACA,SAA4B,UACE;AAC9B,SAAO;AAAA,IACL;AAAA,IACA,eAAe,mBAAmB,OAAO,CAAC,WAAW,MAAM;AAAA,EAC7D;AACF;AAGA,eAAsB,uBAAuB,QAaJ;AACvC,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAGA,eAAsB,cAAwC;AAC5D,SAAO,YAAY,OAAO,cAAc;AAC1C;AAGA,eAAsB,UACpB,YACA,SAC+B;AAC/B,QAAM,QAAQ,WAAW,OAAO,YAAY,OAAO,KAAK;AACxD,SAAO;AAAA,IACL;AAAA,IACA,gBAAgB,mBAAmB,UAAU,CAAC,GAAG,KAAK;AAAA,EACxD;AACF;AAGA,eAAsB,aAAa,MAOC;AAClC,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAGA,eAAsB,aACpB,YACA,MAMiC;AACjC,SAAO;AAAA,IACL;AAAA,IACA,gBAAgB,mBAAmB,UAAU,CAAC;AAAA,IAC9C;AAAA,EACF;AACF;AAGA,eAAsB,oBACpB,YACA,MAMiC;AACjC,SAAO;AAAA,IACL;AAAA,IACA,gBAAgB,mBAAmB,UAAU,CAAC;AAAA,IAC9C;AAAA,EACF;AACF;","names":[]}

package/dist/chunk-K2YFPOSD.js ADDED Viewed

@@ -0,0 +1,40 @@
+import {
+  makeRequest
+} from "./chunk-JVWDWL3J.js";
+// src/langwatch-api-scenarios.ts
+async function listScenarios() {
+  return makeRequest("GET", "/api/scenarios");
+}
+async function getScenario(id) {
+  return makeRequest(
+    "GET",
+    `/api/scenarios/${encodeURIComponent(id)}`
+  );
+}
+async function createScenario(data) {
+  return makeRequest("POST", "/api/scenarios", data);
+}
+async function updateScenario(params) {
+  const { id, ...data } = params;
+  return makeRequest(
+    "PUT",
+    `/api/scenarios/${encodeURIComponent(id)}`,
+    data
+  );
+}
+async function archiveScenario(id) {
+  return makeRequest(
+    "DELETE",
+    `/api/scenarios/${encodeURIComponent(id)}`
+  );
+}
+export {
+  listScenarios,
+  getScenario,
+  createScenario,
+  updateScenario,
+  archiveScenario
+};
+//# sourceMappingURL=chunk-K2YFPOSD.js.map

package/dist/chunk-K2YFPOSD.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"sources":["../src/langwatch-api-scenarios.ts"],"sourcesContent":["import { makeRequest } from \"./langwatch-api.js\";\n\n// --- Scenario types ---\n\nexport interface ScenarioSummary {\n id: string;\n name: string;\n situation: string;\n criteria: string[];\n labels: string[];\n}\n\nexport interface ScenarioArchiveResponse {\n id: string;\n archived: boolean;\n}\n\n// --- Scenario API functions ---\n\n/** Lists all scenarios in the project. */\nexport async function listScenarios(): Promise<ScenarioSummary[]> {\n return makeRequest(\"GET\", \"/api/scenarios\") as Promise<ScenarioSummary[]>;\n}\n\n/** Retrieves a single scenario by ID. */\nexport async function getScenario(id: string): Promise<ScenarioSummary> {\n return makeRequest(\n \"GET\",\n `/api/scenarios/${encodeURIComponent(id)}`\n ) as Promise<ScenarioSummary>;\n}\n\n/** Creates a new scenario. */\nexport async function createScenario(data: {\n name: string;\n situation: string;\n criteria?: string[];\n labels?: string[];\n}): Promise<ScenarioSummary> {\n return makeRequest(\"POST\", \"/api/scenarios\", data) as Promise<ScenarioSummary>;\n}\n\n/** Updates an existing scenario. */\nexport async function updateScenario(params: {\n id: string;\n name?: string;\n situation?: string;\n criteria?: string[];\n labels?: string[];\n}): Promise<ScenarioSummary> {\n const { id, ...data } = params;\n return makeRequest(\n \"PUT\",\n `/api/scenarios/${encodeURIComponent(id)}`,\n data\n ) as Promise<ScenarioSummary>;\n}\n\n/** Archives (soft-deletes) a scenario. */\nexport async function archiveScenario(\n id: string\n): Promise<ScenarioArchiveResponse> {\n return makeRequest(\n \"DELETE\",\n `/api/scenarios/${encodeURIComponent(id)}`\n ) as Promise<ScenarioArchiveResponse>;\n}\n"],"mappings":";;;;;AAoBA,eAAsB,gBAA4C;AAChE,SAAO,YAAY,OAAO,gBAAgB;AAC5C;AAGA,eAAsB,YAAY,IAAsC;AACtE,SAAO;AAAA,IACL;AAAA,IACA,kBAAkB,mBAAmB,EAAE,CAAC;AAAA,EAC1C;AACF;AAGA,eAAsB,eAAe,MAKR;AAC3B,SAAO,YAAY,QAAQ,kBAAkB,IAAI;AACnD;AAGA,eAAsB,eAAe,QAMR;AAC3B,QAAM,EAAE,IAAI,GAAG,KAAK,IAAI;AACxB,SAAO;AAAA,IACL;AAAA,IACA,kBAAkB,mBAAmB,EAAE,CAAC;AAAA,IACxC;AAAA,EACF;AACF;AAGA,eAAsB,gBACpB,IACkC;AAClC,SAAO;AAAA,IACL;AAAA,IACA,kBAAkB,mBAAmB,EAAE,CAAC;AAAA,EAC1C;AACF;","names":[]}

package/dist/chunk-ZXKLPC2E.js ADDED Viewed

@@ -0,0 +1,27 @@
+// src/utils/date-parsing.ts
+var RELATIVE_UNITS = {
+  h: 36e5,
+  d: 864e5,
+  w: 6048e5,
+  m: 2592e6
+};
+function parseRelativeDate(input) {
+  if (input === "now") return Date.now();
+  const match = input.match(/^(\d+)(h|d|w|m)$/);
+  if (match) {
+    const [, amount, unit] = match;
+    return Date.now() - parseInt(amount) * (RELATIVE_UNITS[unit] ?? 864e5);
+  }
+  const parsed = Date.parse(input);
+  if (Number.isNaN(parsed)) {
+    throw new Error(
+      `Invalid date: "${input}". Use a relative duration (e.g. "24h", "7d", "4w") or an ISO date string.`
+    );
+  }
+  return parsed;
+}
+export {
+  parseRelativeDate
+};
+//# sourceMappingURL=chunk-ZXKLPC2E.js.map

package/dist/chunk-ZXKLPC2E.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"sources":["../src/utils/date-parsing.ts"],"sourcesContent":["const RELATIVE_UNITS: Record<string, number> = {\n h: 3600000,\n d: 86400000,\n w: 604800000,\n m: 2592000000,\n};\n\n/**\n * Parses a date string that can be either a relative duration (e.g. \"24h\", \"7d\")\n * or an ISO date string. Throws on invalid input rather than silently falling back.\n *\n * @returns epoch milliseconds\n * @throws Error if the input is not a valid relative duration or parseable date string\n */\nexport function parseRelativeDate(input: string): number {\n if (input === \"now\") return Date.now();\n\n const match = input.match(/^(\\d+)(h|d|w|m)$/);\n if (match) {\n const [, amount, unit] = match;\n return Date.now() - parseInt(amount!) * (RELATIVE_UNITS[unit!] ?? 86400000);\n }\n\n const parsed = Date.parse(input);\n if (Number.isNaN(parsed)) {\n throw new Error(\n `Invalid date: \"${input}\". Use a relative duration (e.g. \"24h\", \"7d\", \"4w\") or an ISO date string.`\n );\n }\n return parsed;\n}\n"],"mappings":";AAAA,IAAM,iBAAyC;AAAA,EAC7C,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AACL;AASO,SAAS,kBAAkB,OAAuB;AACvD,MAAI,UAAU,MAAO,QAAO,KAAK,IAAI;AAErC,QAAM,QAAQ,MAAM,MAAM,kBAAkB;AAC5C,MAAI,OAAO;AACT,UAAM,CAAC,EAAE,QAAQ,IAAI,IAAI;AACzB,WAAO,KAAK,IAAI,IAAI,SAAS,MAAO,KAAK,eAAe,IAAK,KAAK;AAAA,EACpE;AAEA,QAAM,SAAS,KAAK,MAAM,KAAK;AAC/B,MAAI,OAAO,MAAM,MAAM,GAAG;AACxB,UAAM,IAAI;AAAA,MACR,kBAAkB,KAAK;AAAA,IACzB;AAAA,EACF;AACA,SAAO;AACT;","names":[]}

package/dist/config-FIQWQRUB.js ADDED Viewed

@@ -0,0 +1,11 @@
+import {
+  getConfig,
+  initConfig,
+  requireApiKey
+} from "./chunk-AAQNA53E.js";
+export {
+  getConfig,
+  initConfig,
+  requireApiKey
+};
+//# sourceMappingURL=config-FIQWQRUB.js.map

package/dist/config-FIQWQRUB.js.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}

package/dist/create-prompt-P35POKBW.js ADDED Viewed

@@ -0,0 +1,22 @@
+import {
+  createPrompt
+} from "./chunk-JVWDWL3J.js";
+import "./chunk-AAQNA53E.js";
+// src/tools/create-prompt.ts
+async function handleCreatePrompt(params) {
+  const result = await createPrompt(params);
+  const lines = [];
+  lines.push("Prompt created successfully!\n");
+  if (result.id) lines.push(`**ID**: ${result.id}`);
+  if (result.handle) lines.push(`**Handle**: ${result.handle}`);
+  lines.push(`**Name**: ${result.name || params.name}`);
+  lines.push(`**Model**: ${params.model} (${params.modelProvider})`);
+  if (result.latestVersionNumber != null)
+    lines.push(`**Version**: v${result.latestVersionNumber}`);
+  return lines.join("\n");
+}
+export {
+  handleCreatePrompt
+};
+//# sourceMappingURL=create-prompt-P35POKBW.js.map

package/dist/create-prompt-P35POKBW.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"sources":["../src/tools/create-prompt.ts"],"sourcesContent":["import { createPrompt as apiCreatePrompt } from \"../langwatch-api.js\";\n\n/**\n * Handles the create_prompt MCP tool invocation.\n *\n * Creates a new prompt in the LangWatch project and returns a\n * confirmation with the created prompt's details.\n */\nexport async function handleCreatePrompt(params: {\n name: string;\n handle?: string;\n messages: Array<{ role: string; content: string }>;\n model: string;\n modelProvider: string;\n description?: string;\n}): Promise<string> {\n const result = await apiCreatePrompt(params);\n\n const lines: string[] = [];\n lines.push(\"Prompt created successfully!\\n\");\n if (result.id) lines.push(`**ID**: ${result.id}`);\n if (result.handle) lines.push(`**Handle**: ${result.handle}`);\n lines.push(`**Name**: ${result.name || params.name}`);\n lines.push(`**Model**: ${params.model} (${params.modelProvider})`);\n if (result.latestVersionNumber != null)\n lines.push(`**Version**: v${result.latestVersionNumber}`);\n\n return lines.join(\"\\n\");\n}\n"],"mappings":";;;;;;AAQA,eAAsB,mBAAmB,QAOrB;AAClB,QAAM,SAAS,MAAM,aAAgB,MAAM;AAE3C,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,gCAAgC;AAC3C,MAAI,OAAO,GAAI,OAAM,KAAK,WAAW,OAAO,EAAE,EAAE;AAChD,MAAI,OAAO,OAAQ,OAAM,KAAK,eAAe,OAAO,MAAM,EAAE;AAC5D,QAAM,KAAK,aAAa,OAAO,QAAQ,OAAO,IAAI,EAAE;AACpD,QAAM,KAAK,cAAc,OAAO,KAAK,KAAK,OAAO,aAAa,GAAG;AACjE,MAAI,OAAO,uBAAuB;AAChC,UAAM,KAAK,iBAAiB,OAAO,mBAAmB,EAAE;AAE1D,SAAO,MAAM,KAAK,IAAI;AACxB;","names":[]}

package/dist/create-scenario-3YRZVDYF.js ADDED Viewed

@@ -0,0 +1,26 @@
+import {
+  createScenario
+} from "./chunk-K2YFPOSD.js";
+import "./chunk-JVWDWL3J.js";
+import "./chunk-AAQNA53E.js";
+// src/tools/create-scenario.ts
+async function handleCreateScenario(params) {
+  const result = await createScenario(params);
+  const lines = [];
+  lines.push("Scenario created successfully!\n");
+  lines.push(`**ID**: ${result.id}`);
+  lines.push(`**Name**: ${result.name}`);
+  lines.push(`**Situation**: ${result.situation}`);
+  if (Array.isArray(result.criteria) && result.criteria.length > 0) {
+    lines.push(`**Criteria**: ${result.criteria.length} criteria`);
+  }
+  if (Array.isArray(result.labels) && result.labels.length > 0) {
+    lines.push(`**Labels**: ${result.labels.join(", ")}`);
+  }
+  return lines.join("\n");
+}
+export {
+  handleCreateScenario
+};
+//# sourceMappingURL=create-scenario-3YRZVDYF.js.map

package/dist/create-scenario-3YRZVDYF.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"sources":["../src/tools/create-scenario.ts"],"sourcesContent":["import { createScenario as apiCreateScenario } from \"../langwatch-api-scenarios.js\";\n\n/**\n * Handles the create_scenario MCP tool invocation.\n *\n * Creates a new scenario in the LangWatch project and returns a\n * confirmation with the created scenario's details.\n */\nexport async function handleCreateScenario(params: {\n name: string;\n situation: string;\n criteria?: string[];\n labels?: string[];\n}): Promise<string> {\n const result = await apiCreateScenario(params);\n\n const lines: string[] = [];\n lines.push(\"Scenario created successfully!\\n\");\n lines.push(`**ID**: ${result.id}`);\n lines.push(`**Name**: ${result.name}`);\n lines.push(`**Situation**: ${result.situation}`);\n if (Array.isArray(result.criteria) && result.criteria.length > 0) {\n lines.push(`**Criteria**: ${result.criteria.length} criteria`);\n }\n if (Array.isArray(result.labels) && result.labels.length > 0) {\n lines.push(`**Labels**: ${result.labels.join(\", \")}`);\n }\n\n return lines.join(\"\\n\");\n}\n"],"mappings":";;;;;;;AAQA,eAAsB,qBAAqB,QAKvB;AAClB,QAAM,SAAS,MAAM,eAAkB,MAAM;AAE7C,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,kCAAkC;AAC7C,QAAM,KAAK,WAAW,OAAO,EAAE,EAAE;AACjC,QAAM,KAAK,aAAa,OAAO,IAAI,EAAE;AACrC,QAAM,KAAK,kBAAkB,OAAO,SAAS,EAAE;AAC/C,MAAI,MAAM,QAAQ,OAAO,QAAQ,KAAK,OAAO,SAAS,SAAS,GAAG;AAChE,UAAM,KAAK,iBAAiB,OAAO,SAAS,MAAM,WAAW;AAAA,EAC/D;AACA,MAAI,MAAM,QAAQ,OAAO,MAAM,KAAK,OAAO,OAAO,SAAS,GAAG;AAC5D,UAAM,KAAK,eAAe,OAAO,OAAO,KAAK,IAAI,CAAC,EAAE;AAAA,EACtD;AAEA,SAAO,MAAM,KAAK,IAAI;AACxB;","names":[]}

package/dist/discover-scenario-schema-MEEEVND7.js ADDED Viewed

@@ -0,0 +1,65 @@
+// src/tools/discover-scenario-schema.ts
+function formatScenarioSchema() {
+  const lines = [];
+  lines.push("# Scenario Schema\n");
+  lines.push("## Fields\n");
+  lines.push(
+    '- **name** (required): A short, descriptive name (e.g., "billing dispute resolution", "password reset with 2FA unavailable")'
+  );
+  lines.push(
+    "- **situation** (required): The context that guides the user simulator \u2014 who the user is, what they want, and any constraints (see Writing a Good Situation below)"
+  );
+  lines.push(
+    "- **criteria** (array of strings): Pass/fail conditions a judge evaluates the agent against (see Writing Good Criteria below)"
+  );
+  lines.push(
+    '- **labels** (array of strings): Tags for organizing scenarios (e.g., "auth", "happy-path", "edge-case")'
+  );
+  lines.push("\n## Writing a Good Situation\n");
+  lines.push(
+    "The situation drives the user simulator. Include these elements:"
+  );
+  lines.push("- **Persona**: Who is the user? (e.g., a stressed small business owner, a confused teenager)");
+  lines.push("- **Emotional state**: How are they feeling? (e.g., frustrated, anxious, impatient)");
+  lines.push("- **Background/Context**: What happened before this conversation?");
+  lines.push("- **Intent**: What do they want to accomplish?");
+  lines.push("- **Constraints**: What limitations do they have? (e.g., no phone for 2FA, unfamiliar with technical terms)");
+  lines.push("\nExample:");
+  lines.push("```");
+  lines.push("User is a small business owner stressed about tax deadline.");
+  lines.push("They need help categorizing expenses but aren't familiar with");
+  lines.push("accounting terms. They appreciate patient explanations and examples.");
+  lines.push("They have a spreadsheet of transactions but aren't sure which");
+  lines.push("categories apply to their consulting business.");
+  lines.push("```");
+  lines.push("\n## Writing Good Criteria\n");
+  lines.push("Criteria are what the judge uses to pass or fail the agent. Each criterion should be:");
+  lines.push('- **Specific and testable** \u2014 not vague like "responds helpfully"');
+  lines.push("- **Behavioral** \u2014 describes what the agent should *do*, not how it works internally");
+  lines.push("- **Independent** \u2014 each criterion checks one thing");
+  lines.push("\nGood criteria patterns:");
+  lines.push(`- **Information gathering**: "Agent asks for the user's account number before proceeding"`);
+  lines.push('- **Safety/guardrails**: "Agent does not reveal internal system details or error stack traces"');
+  lines.push('- **Clarification**: "Agent asks clarifying questions before taking irreversible action"');
+  lines.push('- **Tone**: "Agent maintains a professional and empathetic tone throughout"');
+  lines.push('- **Completeness**: "Agent confirms the user understands the solution before ending"');
+  lines.push('- **Domain-specific**: "Agent recommends releasing a wild frog rather than keeping it as a pet"');
+  lines.push("\nAvoid vague criteria like:");
+  lines.push('- "Responds correctly" \u2014 correct how?');
+  lines.push('- "Is helpful" \u2014 helpful in what way?');
+  lines.push('- "Works well" \u2014 not testable');
+  lines.push("\n## Target Types\n");
+  lines.push("Scenarios can target different execution backends:");
+  lines.push("- **prompt**: Test a prompt template with variable substitution");
+  lines.push("- **http**: Test an HTTP endpoint (e.g., a deployed agent API)");
+  lines.push("- **code**: Test a code function directly");
+  lines.push("\n## Tips\n");
+  lines.push("- Start simple, then layer complexity (add constraints, edge cases)");
+  lines.push("- Test edge cases: user changes their mind, gives ambiguous input, makes mistakes");
+  lines.push("- Use `fetch_scenario_docs` for the full authoring guide and advanced patterns");
+  return lines.join("\n");
+}
+export {
+  formatScenarioSchema
+};
+//# sourceMappingURL=discover-scenario-schema-MEEEVND7.js.map

package/dist/discover-scenario-schema-MEEEVND7.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"sources":["../src/tools/discover-scenario-schema.ts"],"sourcesContent":["/**\n * Returns a human-readable description of the scenario schema,\n * including field descriptions, authoring guidance, and examples.\n */\nexport function formatScenarioSchema(): string {\n const lines: string[] = [];\n\n lines.push(\"# Scenario Schema\\n\");\n\n lines.push(\"## Fields\\n\");\n lines.push(\n '- **name** (required): A short, descriptive name (e.g., \"billing dispute resolution\", \"password reset with 2FA unavailable\")',\n );\n lines.push(\n \"- **situation** (required): The context that guides the user simulator — who the user is, what they want, and any constraints (see Writing a Good Situation below)\",\n );\n lines.push(\n \"- **criteria** (array of strings): Pass/fail conditions a judge evaluates the agent against (see Writing Good Criteria below)\",\n );\n lines.push(\n '- **labels** (array of strings): Tags for organizing scenarios (e.g., \"auth\", \"happy-path\", \"edge-case\")',\n );\n\n lines.push(\"\\n## Writing a Good Situation\\n\");\n lines.push(\n \"The situation drives the user simulator. Include these elements:\",\n );\n lines.push(\"- **Persona**: Who is the user? (e.g., a stressed small business owner, a confused teenager)\");\n lines.push(\"- **Emotional state**: How are they feeling? (e.g., frustrated, anxious, impatient)\");\n lines.push(\"- **Background/Context**: What happened before this conversation?\");\n lines.push(\"- **Intent**: What do they want to accomplish?\");\n lines.push(\"- **Constraints**: What limitations do they have? (e.g., no phone for 2FA, unfamiliar with technical terms)\");\n lines.push(\"\\nExample:\");\n lines.push(\"```\");\n lines.push(\"User is a small business owner stressed about tax deadline.\");\n lines.push(\"They need help categorizing expenses but aren't familiar with\");\n lines.push(\"accounting terms. They appreciate patient explanations and examples.\");\n lines.push(\"They have a spreadsheet of transactions but aren't sure which\");\n lines.push(\"categories apply to their consulting business.\");\n lines.push(\"```\");\n\n lines.push(\"\\n## Writing Good Criteria\\n\");\n lines.push(\"Criteria are what the judge uses to pass or fail the agent. Each criterion should be:\");\n lines.push(\"- **Specific and testable** — not vague like \\\"responds helpfully\\\"\");\n lines.push(\"- **Behavioral** — describes what the agent should *do*, not how it works internally\");\n lines.push(\"- **Independent** — each criterion checks one thing\");\n lines.push(\"\\nGood criteria patterns:\");\n lines.push(\"- **Information gathering**: \\\"Agent asks for the user's account number before proceeding\\\"\");\n lines.push(\"- **Safety/guardrails**: \\\"Agent does not reveal internal system details or error stack traces\\\"\");\n lines.push(\"- **Clarification**: \\\"Agent asks clarifying questions before taking irreversible action\\\"\");\n lines.push(\"- **Tone**: \\\"Agent maintains a professional and empathetic tone throughout\\\"\");\n lines.push(\"- **Completeness**: \\\"Agent confirms the user understands the solution before ending\\\"\");\n lines.push(\"- **Domain-specific**: \\\"Agent recommends releasing a wild frog rather than keeping it as a pet\\\"\");\n lines.push(\"\\nAvoid vague criteria like:\");\n lines.push('- \"Responds correctly\" — correct how?');\n lines.push('- \"Is helpful\" — helpful in what way?');\n lines.push('- \"Works well\" — not testable');\n\n lines.push(\"\\n## Target Types\\n\");\n lines.push(\"Scenarios can target different execution backends:\");\n lines.push(\"- **prompt**: Test a prompt template with variable substitution\");\n lines.push(\"- **http**: Test an HTTP endpoint (e.g., a deployed agent API)\");\n lines.push(\"- **code**: Test a code function directly\");\n\n lines.push(\"\\n## Tips\\n\");\n lines.push(\"- Start simple, then layer complexity (add constraints, edge cases)\");\n lines.push(\"- Test edge cases: user changes their mind, gives ambiguous input, makes mistakes\");\n lines.push(\"- Use `fetch_scenario_docs` for the full authoring guide and advanced patterns\");\n\n return lines.join(\"\\n\");\n}\n"],"mappings":";AAIO,SAAS,uBAA+B;AAC7C,QAAM,QAAkB,CAAC;AAEzB,QAAM,KAAK,qBAAqB;AAEhC,QAAM,KAAK,aAAa;AACxB,QAAM;AAAA,IACJ;AAAA,EACF;AACA,QAAM;AAAA,IACJ;AAAA,EACF;AACA,QAAM;AAAA,IACJ;AAAA,EACF;AACA,QAAM;AAAA,IACJ;AAAA,EACF;AAEA,QAAM,KAAK,iCAAiC;AAC5C,QAAM;AAAA,IACJ;AAAA,EACF;AACA,QAAM,KAAK,8FAA8F;AACzG,QAAM,KAAK,qFAAqF;AAChG,QAAM,KAAK,mEAAmE;AAC9E,QAAM,KAAK,gDAAgD;AAC3D,QAAM,KAAK,6GAA6G;AACxH,QAAM,KAAK,YAAY;AACvB,QAAM,KAAK,KAAK;AAChB,QAAM,KAAK,6DAA6D;AACxE,QAAM,KAAK,+DAA+D;AAC1E,QAAM,KAAK,sEAAsE;AACjF,QAAM,KAAK,+DAA+D;AAC1E,QAAM,KAAK,gDAAgD;AAC3D,QAAM,KAAK,KAAK;AAEhB,QAAM,KAAK,8BAA8B;AACzC,QAAM,KAAK,uFAAuF;AAClG,QAAM,KAAK,wEAAqE;AAChF,QAAM,KAAK,2FAAsF;AACjG,QAAM,KAAK,0DAAqD;AAChE,QAAM,KAAK,2BAA2B;AACtC,QAAM,KAAK,2FAA6F;AACxG,QAAM,KAAK,gGAAkG;AAC7G,QAAM,KAAK,0FAA4F;AACvG,QAAM,KAAK,6EAA+E;AAC1F,QAAM,KAAK,sFAAwF;AACnG,QAAM,KAAK,iGAAmG;AAC9G,QAAM,KAAK,8BAA8B;AACzC,QAAM,KAAK,4CAAuC;AAClD,QAAM,KAAK,4CAAuC;AAClD,QAAM,KAAK,oCAA+B;AAE1C,QAAM,KAAK,qBAAqB;AAChC,QAAM,KAAK,oDAAoD;AAC/D,QAAM,KAAK,iEAAiE;AAC5E,QAAM,KAAK,gEAAgE;AAC3E,QAAM,KAAK,2CAA2C;AAEtD,QAAM,KAAK,aAAa;AACxB,QAAM,KAAK,qEAAqE;AAChF,QAAM,KAAK,mFAAmF;AAC9F,QAAM,KAAK,gFAAgF;AAE3F,SAAO,MAAM,KAAK,IAAI;AACxB;","names":[]}