@langwatch/mcp-server 0.0.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/.env.example +2 -0
  2. package/.eslintrc.cjs +0 -1
  3. package/CHANGELOG.md +36 -0
  4. package/CONTRIBUTING.md +96 -0
  5. package/README.md +13 -6
  6. package/dist/index.js +7943 -1057
  7. package/dist/index.js.map +1 -1
  8. package/package.json +22 -9
  9. package/pnpm-workspace.yaml +2 -0
  10. package/pyproject.toml +17 -0
  11. package/src/index.ts +47 -36
  12. package/src/langwatch-api.ts +95 -85
  13. package/tests/evaluations.ipynb +649 -0
  14. package/tests/fixtures/azure/azure_openai_stream_bot_expected.py +102 -0
  15. package/tests/fixtures/azure/azure_openai_stream_bot_input.py +78 -0
  16. package/tests/fixtures/dspy/dspy_bot_expected.py +61 -0
  17. package/tests/fixtures/dspy/dspy_bot_input.py +53 -0
  18. package/tests/fixtures/fastapi/fastapi_app_expected.py +68 -0
  19. package/tests/fixtures/fastapi/fastapi_app_input.py +60 -0
  20. package/tests/fixtures/fastapi/prompt_management_fastapi_expected.py +114 -0
  21. package/tests/fixtures/fastapi/prompt_management_fastapi_input.py +88 -0
  22. package/tests/fixtures/haystack/haystack_bot_expected.py +141 -0
  23. package/tests/fixtures/haystack/haystack_bot_input.py +69 -0
  24. package/tests/fixtures/langchain/langchain_bot_expected.py +53 -0
  25. package/tests/fixtures/langchain/langchain_bot_input.py +45 -0
  26. package/tests/fixtures/langchain/langchain_bot_with_memory_expected.py +69 -0
  27. package/tests/fixtures/langchain/langchain_bot_with_memory_input.py +61 -0
  28. package/tests/fixtures/langchain/langchain_rag_bot_expected.py +97 -0
  29. package/tests/fixtures/langchain/langchain_rag_bot_input.py +77 -0
  30. package/tests/fixtures/langchain/langchain_rag_bot_vertex_ai_expected.py +116 -0
  31. package/tests/fixtures/langchain/langchain_rag_bot_vertex_ai_input.py +81 -0
  32. package/tests/fixtures/langchain/langgraph_rag_bot_with_threads_expected.py +331 -0
  33. package/tests/fixtures/langchain/langgraph_rag_bot_with_threads_input.py +106 -0
  34. package/tests/fixtures/litellm/litellm_bot_expected.py +40 -0
  35. package/tests/fixtures/litellm/litellm_bot_input.py +35 -0
  36. package/tests/fixtures/openai/openai_bot_expected.py +43 -0
  37. package/tests/fixtures/openai/openai_bot_function_call_expected.py +91 -0
  38. package/tests/fixtures/openai/openai_bot_function_call_input.py +82 -0
  39. package/tests/fixtures/openai/openai_bot_input.py +36 -0
  40. package/tests/fixtures/openai/openai_bot_rag_expected.py +73 -0
  41. package/tests/fixtures/openai/openai_bot_rag_input.py +51 -0
  42. package/tests/fixtures/opentelemetry/openinference_dspy_bot_expected.py +63 -0
  43. package/tests/fixtures/opentelemetry/openinference_dspy_bot_input.py +58 -0
  44. package/tests/fixtures/opentelemetry/openinference_langchain_bot_expected.py +53 -0
  45. package/tests/fixtures/opentelemetry/openinference_langchain_bot_input.py +52 -0
  46. package/tests/fixtures/opentelemetry/openinference_openai_bot_expected.py +49 -0
  47. package/tests/fixtures/opentelemetry/openinference_openai_bot_input.py +41 -0
  48. package/tests/fixtures/opentelemetry/openllmetry_openai_bot_expected.py +44 -0
  49. package/tests/fixtures/opentelemetry/openllmetry_openai_bot_input.py +40 -0
  50. package/tests/fixtures/strands/strands_bot_expected.py +84 -0
  51. package/tests/fixtures/strands/strands_bot_input.py +52 -0
  52. package/tests/scenario-openai.test.ts +158 -0
  53. package/tsconfig.json +0 -1
  54. package/uv.lock +2607 -0
  55. package/vitest.config.js +7 -0
@@ -0,0 +1,41 @@
1
+ # This example uses the OpenTelemetry instrumentation for OpenAI from OpenInference: https://pypi.org/project/openinference-instrumentation-openai/
2
+
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+ import chainlit as cl
8
+
9
+ from openinference.instrumentation.openai import OpenAIInstrumentor
10
+ from openinference.instrumentation import using_attributes
11
+ from openai import OpenAI
12
+
13
+ client = OpenAI()
14
+
15
+ # Manual instrumentation setup would go here
16
+ OpenAIInstrumentor().instrument()
17
+
18
+
19
+ @cl.on_message
20
+ async def main(message: cl.Message):
21
+ msg = cl.Message(
22
+ content="",
23
+ )
24
+
25
+ completion = client.chat.completions.create(
26
+ model="gpt-5",
27
+ messages=[
28
+ {
29
+ "role": "system",
30
+ "content": "You are a helpful assistant that only reply in short tweet-like responses, using lots of emojis.",
31
+ },
32
+ {"role": "user", "content": message.content},
33
+ ],
34
+ stream=True,
35
+ )
36
+
37
+ for part in completion:
38
+ if token := part.choices[0].delta.content or "":
39
+ await msg.stream_token(token)
40
+
41
+ await msg.update()
@@ -0,0 +1,44 @@
1
+ # This example uses the OpenTelemetry instrumentation for OpenAI from OpenLLMetry: https://pypi.org/project/opentelemetry-instrumentation-openai/
2
+
3
+ from dotenv import load_dotenv
4
+
5
+ import langwatch
6
+
7
+ load_dotenv()
8
+
9
+ import chainlit as cl
10
+
11
+ from opentelemetry.instrumentation.openai import OpenAIInstrumentor
12
+ from openai import OpenAI
13
+
14
+
15
+ client = OpenAI()
16
+
17
+ langwatch.setup(
18
+ instrumentors=[OpenAIInstrumentor()],
19
+ )
20
+
21
+
22
+ @cl.on_message
23
+ async def main(message: cl.Message):
24
+ msg = cl.Message(
25
+ content="",
26
+ )
27
+
28
+ completion = client.chat.completions.create(
29
+ model="gpt-5",
30
+ messages=[
31
+ {
32
+ "role": "system",
33
+ "content": "You are a helpful assistant that only reply in short tweet-like responses, using lots of emojis.",
34
+ },
35
+ {"role": "user", "content": message.content},
36
+ ],
37
+ stream=True,
38
+ )
39
+
40
+ for part in completion:
41
+ if token := part.choices[0].delta.content or "":
42
+ await msg.stream_token(token)
43
+
44
+ await msg.update()
@@ -0,0 +1,40 @@
1
+ # This example uses the OpenTelemetry instrumentation for OpenAI from OpenLLMetry
2
+
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+ import chainlit as cl
8
+ from openai import OpenAI
9
+
10
+ # Manual instrumentation setup would go here
11
+ from openllmetry.instrumentation.openai import OpenAIInstrumentor
12
+
13
+ OpenAIInstrumentor().instrument()
14
+
15
+ client = OpenAI()
16
+
17
+
18
+ @cl.on_message
19
+ async def main(message: cl.Message):
20
+ msg = cl.Message(
21
+ content="",
22
+ )
23
+
24
+ completion = client.chat.completions.create(
25
+ model="gpt-5",
26
+ messages=[
27
+ {
28
+ "role": "system",
29
+ "content": "You are a helpful assistant that only reply in short tweet-like responses, using lots of emojis.",
30
+ },
31
+ {"role": "user", "content": message.content},
32
+ ],
33
+ stream=True,
34
+ )
35
+
36
+ for part in completion:
37
+ if token := part.choices[0].delta.content or "":
38
+ await msg.stream_token(token)
39
+
40
+ await msg.update()
@@ -0,0 +1,84 @@
1
+ import os
2
+ from strands import Agent, tool
3
+ from strands.models.litellm import LiteLLMModel
4
+ import langwatch
5
+
6
+ from dotenv import load_dotenv
7
+
8
+ load_dotenv()
9
+
10
+ import chainlit.config as cl_config
11
+ import chainlit as cl
12
+
13
+ cl_config.config.project.enable_telemetry = False
14
+
15
+ # OpenTelemetry Setup Options:
16
+
17
+
18
+ # Option 1: Use only the LangWatch SDK. This is the simplest option.
19
+ langwatch.setup() # The api key is set from the environment variable automatically
20
+
21
+
22
+ # Option 2: Use StrandsTelemetry to handle complete OpenTelemetry setup
23
+ # (Creates new tracer provider and sets it as global)
24
+ # from strands.telemetry import StrandsTelemetry
25
+ # strands_telemetry = StrandsTelemetry()
26
+ # strands_telemetry.setup_otlp_exporter(
27
+ # endpoint=f"{os.environ.get('LANGWATCH_ENDPOINT', 'https://app.langwatch.ai')}/api/otel/v1/traces",
28
+ # headers={"Authorization": "Bearer " + os.environ["LANGWATCH_API_KEY"]},
29
+ # )
30
+ # As OTel is managed by StrandsTelemetry, we must skip setting it up in LangWatch
31
+ # langwatch.setup(skip_open_telemetry_setup=True)
32
+
33
+
34
+
35
+ @tool
36
+ @langwatch.span(type="tool")
37
+ def get_user_location() -> str:
38
+ """Get the user's location."""
39
+
40
+ # Implement user location lookup logic here
41
+ return "London, UK"
42
+
43
+
44
+ class KiteAgent:
45
+ def __init__(self):
46
+ self.model = LiteLLMModel(
47
+ client_args={
48
+ "api_key": os.getenv("OPENAI_API_KEY"),
49
+ },
50
+ model_id="openai/gpt-5-mini",
51
+ )
52
+ self.agent = Agent(
53
+ name="kite-agent",
54
+ model=self.model,
55
+ system_prompt="Always use the get_user_location tool before answering any questions.",
56
+ tools=[get_user_location],
57
+ trace_attributes={
58
+ "custom.model_id": "openai/gpt-5-mini",
59
+ "custom.example.attribute": "swift",
60
+ },
61
+ )
62
+
63
+ def run(self, prompt: str):
64
+ return self.agent(prompt)
65
+
66
+
67
+ @cl.on_message
68
+ @langwatch.trace()
69
+ async def main(message: cl.Message):
70
+ msg = cl.Message(
71
+ content="",
72
+ )
73
+
74
+ langwatch.get_current_trace().update(
75
+ metadata={
76
+ "custom.example.attribute2": "langwatch",
77
+ }
78
+ )
79
+
80
+ agent = KiteAgent()
81
+ response = agent.run(message.content)
82
+
83
+ await msg.stream_token(str(response))
84
+ await msg.update()
@@ -0,0 +1,52 @@
1
+ import os
2
+ from strands import Agent, tool
3
+ from strands.models.litellm import LiteLLMModel
4
+
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+
9
+ import chainlit.config as cl_config
10
+ import chainlit as cl
11
+
12
+ cl_config.config.project.enable_telemetry = False
13
+
14
+
15
+ @tool
16
+ def get_user_location() -> str:
17
+ """Get the user's location."""
18
+
19
+ # Implement user location lookup logic here
20
+ return "London, UK"
21
+
22
+
23
+ class KiteAgent:
24
+ def __init__(self):
25
+ self.model = LiteLLMModel(
26
+ client_args={
27
+ "api_key": os.getenv("OPENAI_API_KEY"),
28
+ },
29
+ model_id="openai/gpt-5-mini",
30
+ )
31
+ self.agent = Agent(
32
+ name="kite-agent",
33
+ model=self.model,
34
+ system_prompt="Always use the get_user_location tool before answering any questions.",
35
+ tools=[get_user_location],
36
+ )
37
+
38
+ def run(self, prompt: str):
39
+ return self.agent(prompt)
40
+
41
+
42
+ @cl.on_message
43
+ async def main(message: cl.Message):
44
+ msg = cl.Message(
45
+ content="",
46
+ )
47
+
48
+ agent = KiteAgent()
49
+ response = agent.run(message.content)
50
+
51
+ await msg.stream_token(str(response))
52
+ await msg.update()
@@ -0,0 +1,158 @@
1
+ import scenario, {
2
+ type AgentAdapter,
3
+ AgentRole,
4
+ ScenarioExecutionStateLike,
5
+ } from "@langwatch/scenario";
6
+ import fs from "fs";
7
+ import { execSync } from "child_process";
8
+ import { describe, it, expect } from "vitest";
9
+ import dotenv from "dotenv";
10
+ import os from "os";
11
+ import path from "path";
12
+ import * as pty from "node-pty";
13
+ import chalk from "chalk";
14
+ import { anthropic } from "@ai-sdk/anthropic";
15
+
16
+ dotenv.config();
17
+
18
+ const claudeCodeAgent = (workingDirectory: string): AgentAdapter => ({
19
+ role: AgentRole.AGENT,
20
+ call: async (state) => {
21
+ const formattedMessages = state.messages
22
+ .map((message) => `${message.role}: ${message.content}`)
23
+ .join("\n\n");
24
+
25
+ const mcpConfig = {
26
+ mcpServers: {
27
+ LangWatch: {
28
+ command: "node",
29
+ args: [
30
+ `${__dirname}/../dist/index.js`,
31
+ "--apiKey",
32
+ process.env.LANGWATCH_API_KEY!,
33
+ ],
34
+ },
35
+ },
36
+ };
37
+ fs.writeFileSync(
38
+ `${__dirname}/.mcp-config.json`,
39
+ JSON.stringify(mcpConfig)
40
+ );
41
+
42
+ return new Promise<string>((resolve, reject) => {
43
+ const args = [
44
+ "--output-format",
45
+ "stream-json",
46
+ "-p",
47
+ "--mcp-config",
48
+ `${__dirname}/.mcp-config.json`,
49
+ "--dangerously-skip-permissions",
50
+ "--verbose",
51
+ formattedMessages,
52
+ ];
53
+
54
+ console.log(chalk.blue("Starting claude in:"), workingDirectory);
55
+
56
+ const ptyProcess = pty.spawn(
57
+ `${__dirname}/../node_modules/.bin/claude`,
58
+ args,
59
+ {
60
+ name: "xterm-256color",
61
+ cols: 80,
62
+ rows: 30,
63
+ cwd: workingDirectory,
64
+ env: { ...process.env, FORCE_COLOR: "1" },
65
+ }
66
+ );
67
+
68
+ let output = "";
69
+
70
+ ptyProcess.onData((data) => {
71
+ console.log(chalk.cyan("Claude Code:"), data);
72
+ output += data;
73
+ });
74
+
75
+ ptyProcess.onExit(({ exitCode }) => {
76
+ if (exitCode === 0) {
77
+ console.log("output", output);
78
+ const messages: any = output
79
+ .split("\n")
80
+ .map((line) => {
81
+ try {
82
+ return JSON.parse(line.trim());
83
+ } catch (error) {
84
+ return null;
85
+ }
86
+ })
87
+ .filter((message) => message !== null && "message" in message)
88
+ .map((message) => message.message);
89
+ console.log("messages", JSON.stringify(messages, undefined, 2));
90
+
91
+ resolve(messages);
92
+ } else {
93
+ reject(new Error(`Command failed with exit code ${exitCode}`));
94
+ }
95
+ });
96
+ });
97
+ },
98
+ });
99
+
100
+ describe("OpenAI Implementation", () => {
101
+ it("implements LangWatch in an OpenAI bot project", async () => {
102
+ const tempFolder = fs.mkdtempSync(
103
+ path.join(os.tmpdir(), "langwatch-openai-bot-")
104
+ );
105
+ execSync(
106
+ `cp -r tests/fixtures/openai/openai_bot_function_call_input.py ${tempFolder}/main.py`
107
+ );
108
+
109
+ const result = await scenario.run({
110
+ name: "OpenAI bot project",
111
+ description: `Implementing code changes in an OpenAI bot project to add LangWatch instrumentation.`,
112
+ agents: [
113
+ claudeCodeAgent(tempFolder),
114
+ scenario.userSimulatorAgent(),
115
+ scenario.judgeAgent({
116
+ model: anthropic("claude-sonnet-4-20250514"),
117
+ criteria: [
118
+ "Agent should edit main.py file",
119
+ "Agent should use the langwatch MCP for checking the documentation",
120
+ ],
121
+ }),
122
+ ],
123
+ script: [
124
+ scenario.user(
125
+ "please instrument my code with langwatch, short and sweet, no need to test the changes"
126
+ ),
127
+ scenario.agent(),
128
+ () => {
129
+ const resultFile = fs.readFileSync(`${tempFolder}/main.py`, "utf8");
130
+
131
+ expect(resultFile).toContain("@langwatch.trace(");
132
+ expect(resultFile).toContain("autotrack_openai_calls(client)");
133
+ // TODO: expect(resultFile).toContain('@langwatch.span(type="tool")');
134
+ },
135
+ toolCallFix,
136
+ scenario.judge(),
137
+ ],
138
+ });
139
+
140
+ expect(result.success).toBe(true);
141
+ });
142
+ });
143
+
144
+ function toolCallFix(state: ScenarioExecutionStateLike) {
145
+ // Fix for anthropic tool use format, that is not supported by vercel ai for the judge
146
+ state.messages.forEach((message) => {
147
+ if (Array.isArray(message.content)) {
148
+ message.content.forEach((content, index) => {
149
+ if (content.type !== "text") {
150
+ (message.content as any)[index] = {
151
+ type: "text",
152
+ text: JSON.stringify(content),
153
+ };
154
+ }
155
+ });
156
+ }
157
+ });
158
+ }
package/tsconfig.json CHANGED
@@ -17,7 +17,6 @@
17
17
  "jsx": "preserve",
18
18
  "incremental": true,
19
19
  "noUncheckedIndexedAccess": true,
20
- "baseUrl": ".",
21
20
  "tsBuildInfoFile": "./tsconfig.tsbuildinfo"
22
21
  },
23
22
  "include": [