npm - @langwatch/mcp-server - Versions diffs - 0.5.0 → 0.6.1 - Mend

@langwatch/mcp-server 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

package/CHANGELOG.md +32 -0
package/dist/{archive-scenario-GAE4XVFM.js → archive-scenario-YFD5THOR.js} +3 -3
package/dist/archive-scenario-YFD5THOR.js.map +1 -0
package/dist/chunk-5UOPNRXW.js +37 -0
package/dist/chunk-5UOPNRXW.js.map +1 -0
package/dist/{chunk-K2YFPOSD.js → chunk-6U4TCGFC.js} +2 -2
package/dist/chunk-IX6QJKAD.js +22 -0
package/dist/chunk-IX6QJKAD.js.map +1 -0
package/dist/{chunk-JVWDWL3J.js → chunk-LLRQIF52.js} +3 -11
package/dist/chunk-LLRQIF52.js.map +1 -0
package/dist/create-evaluator-E5X5ZP3B.js +27 -0
package/dist/create-evaluator-E5X5ZP3B.js.map +1 -0
package/dist/create-prompt-7Z35MIL6.js +36 -0
package/dist/create-prompt-7Z35MIL6.js.map +1 -0
package/dist/{create-scenario-3YRZVDYF.js → create-scenario-DIMPJRPY.js} +3 -3
package/dist/create-scenario-DIMPJRPY.js.map +1 -0
package/dist/discover-evaluator-schema-H23XCLNE.js +1402 -0
package/dist/discover-evaluator-schema-H23XCLNE.js.map +1 -0
package/dist/{get-analytics-BAVXTAPB.js → get-analytics-4YJW4S5L.js} +2 -2
package/dist/get-evaluator-WDEH2F7M.js +47 -0
package/dist/get-evaluator-WDEH2F7M.js.map +1 -0
package/dist/{get-prompt-LKCPT26O.js → get-prompt-F6PDVC76.js} +2 -5
package/dist/get-prompt-F6PDVC76.js.map +1 -0
package/dist/{get-scenario-3SCDW4Z6.js → get-scenario-H24ZYNT5.js} +3 -3
package/dist/{get-trace-QFDWJ5D4.js → get-trace-27USKGO7.js} +2 -2
package/dist/index.js +13311 -2411
package/dist/index.js.map +1 -1
package/dist/list-evaluators-KRGI72EH.js +34 -0
package/dist/list-evaluators-KRGI72EH.js.map +1 -0
package/dist/list-model-providers-A5YCFTPI.js +35 -0
package/dist/list-model-providers-A5YCFTPI.js.map +1 -0
package/dist/{list-prompts-UQPBCUYA.js → list-prompts-LKJSE7XN.js} +6 -7
package/dist/list-prompts-LKJSE7XN.js.map +1 -0
package/dist/{list-scenarios-573YOUKC.js → list-scenarios-ZK5CMGC4.js} +5 -5
package/dist/list-scenarios-ZK5CMGC4.js.map +1 -0
package/dist/{search-traces-RSMYCAN7.js → search-traces-SOKAAMAR.js} +2 -2
package/dist/set-model-provider-7MGULZDH.js +33 -0
package/dist/set-model-provider-7MGULZDH.js.map +1 -0
package/dist/update-evaluator-A3XINFLJ.js +24 -0
package/dist/update-evaluator-A3XINFLJ.js.map +1 -0
package/dist/update-prompt-IW7X2UQM.js +22 -0
package/dist/update-prompt-IW7X2UQM.js.map +1 -0
package/dist/{update-scenario-SSGVOBJO.js → update-scenario-ZT7TOBFR.js} +3 -3
package/dist/update-scenario-ZT7TOBFR.js.map +1 -0
package/package.json +10 -10
package/src/__tests__/all-tools.integration.test.ts +1337 -0
package/src/__tests__/discover-evaluator-schema.unit.test.ts +89 -0
package/src/__tests__/evaluator-tools.unit.test.ts +262 -0
package/src/__tests__/integration.integration.test.ts +9 -34
package/src/__tests__/langwatch-api.unit.test.ts +4 -32
package/src/__tests__/model-provider-tools.unit.test.ts +190 -0
package/src/__tests__/scenario-tools.integration.test.ts +5 -5
package/src/__tests__/scenario-tools.unit.test.ts +2 -2
package/src/__tests__/tools.unit.test.ts +59 -65
package/src/index.ts +250 -89
package/src/langwatch-api-evaluators.ts +70 -0
package/src/langwatch-api-model-providers.ts +41 -0
package/src/langwatch-api.ts +3 -28
package/src/tools/archive-scenario.ts +1 -1
package/src/tools/create-evaluator.ts +33 -0
package/src/tools/create-prompt.ts +30 -5
package/src/tools/create-scenario.ts +1 -1
package/src/tools/discover-evaluator-schema.ts +143 -0
package/src/tools/get-evaluator.ts +53 -0
package/src/tools/get-prompt.ts +1 -4
package/src/tools/list-evaluators.ts +37 -0
package/src/tools/list-model-providers.ts +40 -0
package/src/tools/list-prompts.ts +5 -6
package/src/tools/list-scenarios.ts +3 -3
package/src/tools/set-model-provider.ts +46 -0
package/src/tools/update-evaluator.ts +30 -0
package/src/tools/update-prompt.ts +9 -25
package/src/tools/update-scenario.ts +1 -1
package/dist/archive-scenario-GAE4XVFM.js.map +0 -1
package/dist/chunk-JVWDWL3J.js.map +0 -1
package/dist/create-prompt-P35POKBW.js +0 -22
package/dist/create-prompt-P35POKBW.js.map +0 -1
package/dist/create-scenario-3YRZVDYF.js.map +0 -1
package/dist/get-prompt-LKCPT26O.js.map +0 -1
package/dist/list-prompts-UQPBCUYA.js.map +0 -1
package/dist/list-scenarios-573YOUKC.js.map +0 -1
package/dist/update-prompt-G2Y5EBQY.js +0 -31
package/dist/update-prompt-G2Y5EBQY.js.map +0 -1
package/dist/update-scenario-SSGVOBJO.js.map +0 -1
/package/dist/{chunk-K2YFPOSD.js.map → chunk-6U4TCGFC.js.map} +0 -0
/package/dist/{get-analytics-BAVXTAPB.js.map → get-analytics-4YJW4S5L.js.map} +0 -0
/package/dist/{get-scenario-3SCDW4Z6.js.map → get-scenario-H24ZYNT5.js.map} +0 -0
/package/dist/{get-trace-QFDWJ5D4.js.map → get-trace-27USKGO7.js.map} +0 -0
/package/dist/{search-traces-RSMYCAN7.js.map → search-traces-SOKAAMAR.js.map} +0 -0

package/src/__tests__/discover-evaluator-schema.unit.test.ts ADDED Viewed

@@ -0,0 +1,89 @@
+import { describe, it, expect } from "vitest";
+import { formatEvaluatorSchema } from "../tools/discover-evaluator-schema.js";
+describe("formatEvaluatorSchema()", () => {
+  describe("when called without evaluatorType (overview)", () => {
+    it("includes the overview header", () => {
+      const result = formatEvaluatorSchema();
+      expect(result).toContain("# Available Evaluator Types");
+    });
+    it("includes category sections", () => {
+      const result = formatEvaluatorSchema();
+      expect(result).toContain("## safety");
+    });
+    it("includes evaluator type identifiers", () => {
+      const result = formatEvaluatorSchema();
+      expect(result).toContain("openai/moderation");
+      expect(result).toContain("langevals/exact_match");
+    });
+    it("includes evaluator display names", () => {
+      const result = formatEvaluatorSchema();
+      expect(result).toContain("OpenAI Moderation");
+    });
+    it("instructs the agent to use evaluatorType parameter for full details", () => {
+      const result = formatEvaluatorSchema();
+      expect(result).toContain(
+        "discover_schema({ category: 'evaluators', evaluatorType: '<type>' })",
+      );
+    });
+  });
+  describe("when called with a valid evaluatorType (detail)", () => {
+    it("includes the evaluator name in the heading", () => {
+      const result = formatEvaluatorSchema("openai/moderation");
+      expect(result).toContain("# OpenAI Moderation");
+    });
+    it("includes the evaluator type in backticks", () => {
+      const result = formatEvaluatorSchema("openai/moderation");
+      expect(result).toContain("`openai/moderation`");
+    });
+    it("includes the category", () => {
+      const result = formatEvaluatorSchema("openai/moderation");
+      expect(result).toContain("**Category**: safety");
+    });
+    it("includes settings with descriptions and defaults", () => {
+      const result = formatEvaluatorSchema("openai/moderation");
+      expect(result).toContain("## Settings");
+      expect(result).toContain("**model**");
+      expect(result).toContain("Default:");
+    });
+    it("includes required/optional fields section", () => {
+      const result = formatEvaluatorSchema("openai/moderation");
+      expect(result).toContain("## Fields");
+    });
+    it("includes result fields", () => {
+      const result = formatEvaluatorSchema("openai/moderation");
+      expect(result).toContain("## Result Fields");
+    });
+    it("includes a usage example with evaluatorType", () => {
+      const result = formatEvaluatorSchema("openai/moderation");
+      expect(result).toContain("## Usage Example");
+      expect(result).toContain('"evaluatorType"');
+    });
+    it("includes env vars when the evaluator requires them", () => {
+      const result = formatEvaluatorSchema("azure/content_safety");
+      expect(result).toContain("## Required Environment Variables");
+      expect(result).toContain("AZURE_CONTENT_SAFETY_ENDPOINT");
+    });
+  });
+  describe("when called with an unknown evaluatorType", () => {
+    it("returns an error message with guidance", () => {
+      const result = formatEvaluatorSchema("nonexistent/type");
+      expect(result).toContain("Unknown evaluator type");
+      expect(result).toContain("nonexistent/type");
+      expect(result).toContain("discover_schema");
+    });
+  });
+});

package/src/__tests__/evaluator-tools.unit.test.ts ADDED Viewed

@@ -0,0 +1,262 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+vi.mock("../langwatch-api-evaluators.js", async (importOriginal) => {
+  const actual = await importOriginal() as Record<string, unknown>;
+  return {
+    ...actual,
+    listEvaluators: vi.fn(),
+    getEvaluator: vi.fn(),
+    createEvaluator: vi.fn(),
+    updateEvaluator: vi.fn(),
+  };
+});
+import {
+  listEvaluators,
+  getEvaluator,
+  createEvaluator,
+  updateEvaluator,
+} from "../langwatch-api-evaluators.js";
+import { handleListEvaluators } from "../tools/list-evaluators.js";
+import { handleGetEvaluator } from "../tools/get-evaluator.js";
+import { handleCreateEvaluator } from "../tools/create-evaluator.js";
+import { handleUpdateEvaluator } from "../tools/update-evaluator.js";
+const mockListEvaluators = vi.mocked(listEvaluators);
+const mockGetEvaluator = vi.mocked(getEvaluator);
+const mockCreateEvaluator = vi.mocked(createEvaluator);
+const mockUpdateEvaluator = vi.mocked(updateEvaluator);
+beforeEach(() => {
+  vi.clearAllMocks();
+});
+describe("handleListEvaluators()", () => {
+  const sampleEvaluators = [
+    {
+      id: "evaluator_abc123",
+      projectId: "proj_1",
+      name: "Toxicity Check",
+      slug: "toxicity-check",
+      type: "evaluator",
+      config: { evaluatorType: "openai/moderation" },
+      workflowId: null,
+      copiedFromEvaluatorId: null,
+      createdAt: "2024-01-01T00:00:00.000Z",
+      updatedAt: "2024-01-01T00:00:00.000Z",
+      fields: [{ identifier: "input", type: "str" }],
+      outputFields: [{ identifier: "passed", type: "bool" }],
+    },
+    {
+      id: "evaluator_def456",
+      projectId: "proj_1",
+      name: "Exact Match",
+      slug: "exact-match",
+      type: "evaluator",
+      config: { evaluatorType: "langevals/exact_match" },
+      workflowId: null,
+      copiedFromEvaluatorId: null,
+      createdAt: "2024-01-02T00:00:00.000Z",
+      updatedAt: "2024-01-02T00:00:00.000Z",
+      fields: [
+        { identifier: "output", type: "str" },
+        { identifier: "expected_output", type: "str" },
+      ],
+      outputFields: [{ identifier: "passed", type: "bool" }],
+    },
+  ];
+  describe("when evaluators exist", () => {
+    let result: string;
+    beforeEach(async () => {
+      mockListEvaluators.mockResolvedValue(sampleEvaluators);
+      result = await handleListEvaluators();
+    });
+    it("includes evaluator id", () => {
+      expect(result).toContain("evaluator_abc123");
+    });
+    it("includes evaluator name", () => {
+      expect(result).toContain("Toxicity Check");
+    });
+    it("includes evaluator type", () => {
+      expect(result).toContain("openai/moderation");
+    });
+    it("includes slug", () => {
+      expect(result).toContain("toxicity-check");
+    });
+    it("includes all evaluators in the list", () => {
+      expect(result).toContain("evaluator_def456");
+    });
+    it("includes the total count header", () => {
+      expect(result).toContain("# Evaluators (2 total)");
+    });
+  });
+  describe("when no evaluators exist", () => {
+    let result: string;
+    beforeEach(async () => {
+      mockListEvaluators.mockResolvedValue([]);
+      result = await handleListEvaluators();
+    });
+    it("returns a no-evaluators message", () => {
+      expect(result).toContain("No evaluators found");
+    });
+    it("includes a tip to use platform_create_evaluator", () => {
+      expect(result).toContain("platform_create_evaluator");
+    });
+  });
+});
+describe("handleGetEvaluator()", () => {
+  const sampleEvaluator = {
+    id: "evaluator_abc123",
+    projectId: "proj_1",
+    name: "Toxicity Check",
+    slug: "toxicity-check",
+    type: "evaluator",
+    config: {
+      evaluatorType: "openai/moderation",
+      settings: { model: "text-moderation-stable" },
+    },
+    workflowId: null,
+    copiedFromEvaluatorId: null,
+    createdAt: "2024-01-01T00:00:00.000Z",
+    updatedAt: "2024-01-01T00:00:00.000Z",
+    fields: [
+      { identifier: "input", type: "str" },
+      { identifier: "output", type: "str", optional: true },
+    ],
+    outputFields: [
+      { identifier: "passed", type: "bool" },
+      { identifier: "score", type: "float" },
+    ],
+  };
+  describe("when evaluator is found", () => {
+    let result: string;
+    beforeEach(async () => {
+      mockGetEvaluator.mockResolvedValue(sampleEvaluator);
+      result = await handleGetEvaluator({ idOrSlug: "evaluator_abc123" });
+    });
+    it("includes the evaluator name in the heading", () => {
+      expect(result).toContain("# Evaluator: Toxicity Check");
+    });
+    it("includes the evaluator type", () => {
+      expect(result).toContain("openai/moderation");
+    });
+    it("includes the config as JSON", () => {
+      expect(result).toContain("text-moderation-stable");
+    });
+    it("includes input fields", () => {
+      expect(result).toContain("## Input Fields");
+      expect(result).toContain("**input** (str)");
+    });
+    it("marks optional fields", () => {
+      expect(result).toContain("(optional)");
+    });
+    it("includes output fields", () => {
+      expect(result).toContain("## Output Fields");
+      expect(result).toContain("**passed** (bool)");
+    });
+  });
+});
+describe("handleCreateEvaluator()", () => {
+  describe("when creation succeeds", () => {
+    let result: string;
+    beforeEach(async () => {
+      mockCreateEvaluator.mockResolvedValue({
+        id: "evaluator_new123",
+        projectId: "proj_1",
+        name: "My LLM Judge",
+        slug: "my-llm-judge",
+        type: "evaluator",
+        config: { evaluatorType: "langevals/llm_boolean" },
+        workflowId: null,
+        copiedFromEvaluatorId: null,
+        createdAt: "2024-01-01T00:00:00.000Z",
+        updatedAt: "2024-01-01T00:00:00.000Z",
+        fields: [{ identifier: "input", type: "str" }],
+        outputFields: [{ identifier: "passed", type: "bool" }],
+      });
+      result = await handleCreateEvaluator({
+        name: "My LLM Judge",
+        config: { evaluatorType: "langevals/llm_boolean" },
+      });
+    });
+    it("confirms creation", () => {
+      expect(result).toContain("Evaluator created successfully!");
+    });
+    it("includes the generated ID", () => {
+      expect(result).toContain("evaluator_new123");
+    });
+    it("includes the slug", () => {
+      expect(result).toContain("my-llm-judge");
+    });
+    it("includes the evaluator type", () => {
+      expect(result).toContain("langevals/llm_boolean");
+    });
+  });
+});
+describe("handleUpdateEvaluator()", () => {
+  describe("when update succeeds", () => {
+    let result: string;
+    beforeEach(async () => {
+      mockUpdateEvaluator.mockResolvedValue({
+        id: "evaluator_abc123",
+        projectId: "proj_1",
+        name: "Updated Name",
+        slug: "toxicity-check",
+        type: "evaluator",
+        config: { evaluatorType: "openai/moderation" },
+        workflowId: null,
+        copiedFromEvaluatorId: null,
+        createdAt: "2024-01-01T00:00:00.000Z",
+        updatedAt: "2024-01-02T00:00:00.000Z",
+        fields: [],
+        outputFields: [],
+      });
+      result = await handleUpdateEvaluator({
+        evaluatorId: "evaluator_abc123",
+        name: "Updated Name",
+      });
+    });
+    it("confirms update", () => {
+      expect(result).toContain("Evaluator updated successfully!");
+    });
+    it("includes the evaluator ID", () => {
+      expect(result).toContain("evaluator_abc123");
+    });
+    it("includes the updated name", () => {
+      expect(result).toContain("Updated Name");
+    });
+  });
+});

package/src/__tests__/integration.integration.test.ts CHANGED Viewed

@@ -49,14 +49,12 @@ const CANNED_PROMPTS_LIST = [
     id: "p1",
     handle: "greeting-bot",
     name: "Greeting Bot",
-    description: "A friendly greeting bot",
     latestVersionNumber: 3,
   },
   {
     id: "p2",
     handle: "qa-assistant",
     name: "QA Assistant",
-    description: null,
     latestVersionNumber: 1,
   },
 ];
@@ -65,14 +63,12 @@ const CANNED_PROMPT_DETAIL = {
   id: "p1",
   handle: "greeting-bot",
   name: "Greeting Bot",
-  description: "A friendly greeting bot",
   latestVersionNumber: 3,
   versions: [
     {
       version: 3,
       commitMessage: "Updated tone",
-      model: "gpt-4o",
-      modelProvider: "openai",
+      model: "openai/gpt-4o",
       messages: [{ role: "system", content: "You are a friendly bot." }],
     },
     { version: 2, commitMessage: "Added greeting" },
@@ -128,12 +124,6 @@ function createMockServer(): Server {
       } else if (url === "/api/prompts" && req.method === "GET") {
         res.writeHead(200);
         res.end(JSON.stringify(CANNED_PROMPTS_LIST));
-      } else if (
-        url.match(/^\/api\/prompts\/[^/]+\/versions/) &&
-        req.method === "POST"
-      ) {
-        res.writeHead(200);
-        res.end(JSON.stringify(CANNED_PROMPT_UPDATED));
       } else if (url.match(/^\/api\/prompts\/[^/]+$/) && req.method === "GET") {
         res.writeHead(200);
         res.end(JSON.stringify(CANNED_PROMPT_DETAIL));
@@ -142,7 +132,7 @@ function createMockServer(): Server {
         res.end(JSON.stringify(CANNED_PROMPT_CREATED));
       } else if (
         url.match(/^\/api\/prompts\/[^/]+$/) &&
-        req.method === "POST"
+        req.method === "PUT"
       ) {
         res.writeHead(200);
         res.end(JSON.stringify(CANNED_PROMPT_UPDATED));
@@ -222,7 +212,7 @@ describe("MCP tools integration", () => {
     });
   });
-  describe("list_prompts", () => {
+  describe("platform_list_prompts", () => {
     it("returns formatted prompt list from mock server", async () => {
       const { handleListPrompts } = await import("../tools/list-prompts.js");
       const result = await handleListPrompts();
@@ -232,7 +222,7 @@ describe("MCP tools integration", () => {
     });
   });
-  describe("get_prompt", () => {
+  describe("platform_get_prompt", () => {
     it("returns formatted prompt details from mock server", async () => {
       const { handleGetPrompt } = await import("../tools/get-prompt.js");
       const result = await handleGetPrompt({ idOrHandle: "greeting-bot" });
@@ -243,7 +233,7 @@ describe("MCP tools integration", () => {
     });
   });
-  describe("create_prompt", () => {
+  describe("platform_create_prompt", () => {
     it("returns success message from mock server", async () => {
       const { handleCreatePrompt } = await import(
         "../tools/create-prompt.js"
@@ -251,41 +241,26 @@ describe("MCP tools integration", () => {
       const result = await handleCreatePrompt({
         name: "New Prompt",
         messages: [{ role: "system", content: "You are helpful." }],
-        model: "gpt-4o",
-        modelProvider: "openai",
+        model: "openai/gpt-4o",
       });
       expect(result).toContain("created successfully");
       expect(result).toContain("p-new");
     });
   });
-  describe("update_prompt", () => {
-    it("returns success message for in-place update from mock server", async () => {
+  describe("platform_update_prompt", () => {
+    it("returns success message from mock server", async () => {
       const { handleUpdatePrompt } = await import(
         "../tools/update-prompt.js"
       );
       const result = await handleUpdatePrompt({
         idOrHandle: "greeting-bot",
-        model: "gpt-4o-mini",
+        model: "openai/gpt-4o-mini",
         commitMessage: "Switch to mini",
       });
       expect(result).toContain("updated successfully");
       expect(result).toContain("Switch to mini");
     });
-    it("returns success message for version creation from mock server", async () => {
-      const { handleUpdatePrompt } = await import(
-        "../tools/update-prompt.js"
-      );
-      const result = await handleUpdatePrompt({
-        idOrHandle: "greeting-bot",
-        messages: [{ role: "system", content: "Be concise." }],
-        createVersion: true,
-        commitMessage: "Make concise",
-      });
-      expect(result).toContain("version created");
-      expect(result).toContain("Make concise");
-    });
   });
   describe("when API key is invalid", () => {

package/src/__tests__/langwatch-api.unit.test.ts CHANGED Viewed

@@ -208,10 +208,9 @@ describe("langwatch-api", () => {
     it("sends POST to /api/prompts with body", async () => {
       const { createPrompt } = await import("../langwatch-api.js");
       const data = {
-        name: "Test Prompt",
+        handle: "test-prompt",
         messages: [{ role: "system", content: "You are helpful." }],
-        model: "gpt-4o",
-        modelProvider: "openai",
+        model: "openai/gpt-4o",
       };
       const responseData = { id: "new-id", ...data };
       mockJsonResponse(responseData);
@@ -234,7 +233,7 @@ describe("langwatch-api", () => {
   });
   describe("updatePrompt()", () => {
-    it("sends POST to /api/prompts/{id} with body", async () => {
+    it("sends PUT to /api/prompts/{id} with body", async () => {
       const { updatePrompt } = await import("../langwatch-api.js");
       const data = {
         messages: [{ role: "system", content: "Updated" }],
@@ -248,34 +247,7 @@ describe("langwatch-api", () => {
       expect(mockFetch).toHaveBeenCalledWith(
         `${TEST_ENDPOINT}/api/prompts/${encodeURIComponent("p1")}`,
         expect.objectContaining({
-          method: "POST",
-          headers: expect.objectContaining({
-            "X-Auth-Token": TEST_API_KEY,
-            "Content-Type": "application/json",
-          }),
-          body: JSON.stringify(data),
-        })
-      );
-      expect(result).toEqual(responseData);
-    });
-  });
-  describe("createPromptVersion()", () => {
-    it("sends POST to /api/prompts/{id}/versions with body", async () => {
-      const { createPromptVersion } = await import("../langwatch-api.js");
-      const data = {
-        messages: [{ role: "user", content: "new version" }],
-        commitMessage: "v2",
-      };
-      const responseData = { version: 2 };
-      mockJsonResponse(responseData);
-      const result = await createPromptVersion("p1", data);
-      expect(mockFetch).toHaveBeenCalledWith(
-        `${TEST_ENDPOINT}/api/prompts/${encodeURIComponent("p1")}/versions`,
-        expect.objectContaining({
-          method: "POST",
+          method: "PUT",
           headers: expect.objectContaining({
             "X-Auth-Token": TEST_API_KEY,
             "Content-Type": "application/json",