@langwatch/mcp-server 0.0.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +2 -0
- package/.eslintrc.cjs +0 -1
- package/CHANGELOG.md +36 -0
- package/CONTRIBUTING.md +96 -0
- package/README.md +13 -6
- package/dist/index.js +7943 -1057
- package/dist/index.js.map +1 -1
- package/package.json +22 -9
- package/pnpm-workspace.yaml +2 -0
- package/pyproject.toml +17 -0
- package/src/index.ts +47 -36
- package/src/langwatch-api.ts +95 -85
- package/tests/evaluations.ipynb +649 -0
- package/tests/fixtures/azure/azure_openai_stream_bot_expected.py +102 -0
- package/tests/fixtures/azure/azure_openai_stream_bot_input.py +78 -0
- package/tests/fixtures/dspy/dspy_bot_expected.py +61 -0
- package/tests/fixtures/dspy/dspy_bot_input.py +53 -0
- package/tests/fixtures/fastapi/fastapi_app_expected.py +68 -0
- package/tests/fixtures/fastapi/fastapi_app_input.py +60 -0
- package/tests/fixtures/fastapi/prompt_management_fastapi_expected.py +114 -0
- package/tests/fixtures/fastapi/prompt_management_fastapi_input.py +88 -0
- package/tests/fixtures/haystack/haystack_bot_expected.py +141 -0
- package/tests/fixtures/haystack/haystack_bot_input.py +69 -0
- package/tests/fixtures/langchain/langchain_bot_expected.py +53 -0
- package/tests/fixtures/langchain/langchain_bot_input.py +45 -0
- package/tests/fixtures/langchain/langchain_bot_with_memory_expected.py +69 -0
- package/tests/fixtures/langchain/langchain_bot_with_memory_input.py +61 -0
- package/tests/fixtures/langchain/langchain_rag_bot_expected.py +97 -0
- package/tests/fixtures/langchain/langchain_rag_bot_input.py +77 -0
- package/tests/fixtures/langchain/langchain_rag_bot_vertex_ai_expected.py +116 -0
- package/tests/fixtures/langchain/langchain_rag_bot_vertex_ai_input.py +81 -0
- package/tests/fixtures/langchain/langgraph_rag_bot_with_threads_expected.py +331 -0
- package/tests/fixtures/langchain/langgraph_rag_bot_with_threads_input.py +106 -0
- package/tests/fixtures/litellm/litellm_bot_expected.py +40 -0
- package/tests/fixtures/litellm/litellm_bot_input.py +35 -0
- package/tests/fixtures/openai/openai_bot_expected.py +43 -0
- package/tests/fixtures/openai/openai_bot_function_call_expected.py +91 -0
- package/tests/fixtures/openai/openai_bot_function_call_input.py +82 -0
- package/tests/fixtures/openai/openai_bot_input.py +36 -0
- package/tests/fixtures/openai/openai_bot_rag_expected.py +73 -0
- package/tests/fixtures/openai/openai_bot_rag_input.py +51 -0
- package/tests/fixtures/opentelemetry/openinference_dspy_bot_expected.py +63 -0
- package/tests/fixtures/opentelemetry/openinference_dspy_bot_input.py +58 -0
- package/tests/fixtures/opentelemetry/openinference_langchain_bot_expected.py +53 -0
- package/tests/fixtures/opentelemetry/openinference_langchain_bot_input.py +52 -0
- package/tests/fixtures/opentelemetry/openinference_openai_bot_expected.py +49 -0
- package/tests/fixtures/opentelemetry/openinference_openai_bot_input.py +41 -0
- package/tests/fixtures/opentelemetry/openllmetry_openai_bot_expected.py +44 -0
- package/tests/fixtures/opentelemetry/openllmetry_openai_bot_input.py +40 -0
- package/tests/fixtures/strands/strands_bot_expected.py +84 -0
- package/tests/fixtures/strands/strands_bot_input.py +52 -0
- package/tests/scenario-openai.test.ts +158 -0
- package/tsconfig.json +0 -1
- package/uv.lock +2607 -0
- package/vitest.config.js +7 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# This example uses the OpenTelemetry instrumentation for OpenAI from OpenInference: https://pypi.org/project/openinference-instrumentation-openai/
|
|
2
|
+
|
|
3
|
+
from dotenv import load_dotenv
|
|
4
|
+
|
|
5
|
+
load_dotenv()
|
|
6
|
+
|
|
7
|
+
import chainlit as cl
|
|
8
|
+
|
|
9
|
+
from openinference.instrumentation.openai import OpenAIInstrumentor
|
|
10
|
+
from openinference.instrumentation import using_attributes
|
|
11
|
+
from openai import OpenAI
|
|
12
|
+
|
|
13
|
+
client = OpenAI()
|
|
14
|
+
|
|
15
|
+
# Manual instrumentation setup would go here
|
|
16
|
+
OpenAIInstrumentor().instrument()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@cl.on_message
|
|
20
|
+
async def main(message: cl.Message):
|
|
21
|
+
msg = cl.Message(
|
|
22
|
+
content="",
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
completion = client.chat.completions.create(
|
|
26
|
+
model="gpt-5",
|
|
27
|
+
messages=[
|
|
28
|
+
{
|
|
29
|
+
"role": "system",
|
|
30
|
+
"content": "You are a helpful assistant that only reply in short tweet-like responses, using lots of emojis.",
|
|
31
|
+
},
|
|
32
|
+
{"role": "user", "content": message.content},
|
|
33
|
+
],
|
|
34
|
+
stream=True,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
for part in completion:
|
|
38
|
+
if token := part.choices[0].delta.content or "":
|
|
39
|
+
await msg.stream_token(token)
|
|
40
|
+
|
|
41
|
+
await msg.update()
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# This example uses the OpenTelemetry instrumentation for OpenAI from OpenLLMetry: https://pypi.org/project/opentelemetry-instrumentation-openai/
|
|
2
|
+
|
|
3
|
+
from dotenv import load_dotenv
|
|
4
|
+
|
|
5
|
+
import langwatch
|
|
6
|
+
|
|
7
|
+
load_dotenv()
|
|
8
|
+
|
|
9
|
+
import chainlit as cl
|
|
10
|
+
|
|
11
|
+
from opentelemetry.instrumentation.openai import OpenAIInstrumentor
|
|
12
|
+
from openai import OpenAI
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
client = OpenAI()
|
|
16
|
+
|
|
17
|
+
langwatch.setup(
|
|
18
|
+
instrumentors=[OpenAIInstrumentor()],
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@cl.on_message
|
|
23
|
+
async def main(message: cl.Message):
|
|
24
|
+
msg = cl.Message(
|
|
25
|
+
content="",
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
completion = client.chat.completions.create(
|
|
29
|
+
model="gpt-5",
|
|
30
|
+
messages=[
|
|
31
|
+
{
|
|
32
|
+
"role": "system",
|
|
33
|
+
"content": "You are a helpful assistant that only reply in short tweet-like responses, using lots of emojis.",
|
|
34
|
+
},
|
|
35
|
+
{"role": "user", "content": message.content},
|
|
36
|
+
],
|
|
37
|
+
stream=True,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
for part in completion:
|
|
41
|
+
if token := part.choices[0].delta.content or "":
|
|
42
|
+
await msg.stream_token(token)
|
|
43
|
+
|
|
44
|
+
await msg.update()
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# This example uses the OpenTelemetry instrumentation for OpenAI from OpenLLMetry
|
|
2
|
+
|
|
3
|
+
from dotenv import load_dotenv
|
|
4
|
+
|
|
5
|
+
load_dotenv()
|
|
6
|
+
|
|
7
|
+
import chainlit as cl
|
|
8
|
+
from openai import OpenAI
|
|
9
|
+
|
|
10
|
+
# Manual instrumentation setup would go here
|
|
11
|
+
from openllmetry.instrumentation.openai import OpenAIInstrumentor
|
|
12
|
+
|
|
13
|
+
OpenAIInstrumentor().instrument()
|
|
14
|
+
|
|
15
|
+
client = OpenAI()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@cl.on_message
|
|
19
|
+
async def main(message: cl.Message):
|
|
20
|
+
msg = cl.Message(
|
|
21
|
+
content="",
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
completion = client.chat.completions.create(
|
|
25
|
+
model="gpt-5",
|
|
26
|
+
messages=[
|
|
27
|
+
{
|
|
28
|
+
"role": "system",
|
|
29
|
+
"content": "You are a helpful assistant that only reply in short tweet-like responses, using lots of emojis.",
|
|
30
|
+
},
|
|
31
|
+
{"role": "user", "content": message.content},
|
|
32
|
+
],
|
|
33
|
+
stream=True,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
for part in completion:
|
|
37
|
+
if token := part.choices[0].delta.content or "":
|
|
38
|
+
await msg.stream_token(token)
|
|
39
|
+
|
|
40
|
+
await msg.update()
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from strands import Agent, tool
|
|
3
|
+
from strands.models.litellm import LiteLLMModel
|
|
4
|
+
import langwatch
|
|
5
|
+
|
|
6
|
+
from dotenv import load_dotenv
|
|
7
|
+
|
|
8
|
+
load_dotenv()
|
|
9
|
+
|
|
10
|
+
import chainlit.config as cl_config
|
|
11
|
+
import chainlit as cl
|
|
12
|
+
|
|
13
|
+
cl_config.config.project.enable_telemetry = False
|
|
14
|
+
|
|
15
|
+
# OpenTelemetry Setup Options:
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# Option 1: Use only the LangWatch SDK. This is the simplest option.
|
|
19
|
+
langwatch.setup() # The api key is set from the environment variable automatically
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# Option 2: Use StrandsTelemetry to handle complete OpenTelemetry setup
|
|
23
|
+
# (Creates new tracer provider and sets it as global)
|
|
24
|
+
# from strands.telemetry import StrandsTelemetry
|
|
25
|
+
# strands_telemetry = StrandsTelemetry()
|
|
26
|
+
# strands_telemetry.setup_otlp_exporter(
|
|
27
|
+
# endpoint=f"{os.environ.get('LANGWATCH_ENDPOINT', 'https://app.langwatch.ai')}/api/otel/v1/traces",
|
|
28
|
+
# headers={"Authorization": "Bearer " + os.environ["LANGWATCH_API_KEY"]},
|
|
29
|
+
# )
|
|
30
|
+
# As OTel is managed by StrandsTelemetry, we must skip setting it up in LangWatch
|
|
31
|
+
# langwatch.setup(skip_open_telemetry_setup=True)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@tool
|
|
36
|
+
@langwatch.span(type="tool")
|
|
37
|
+
def get_user_location() -> str:
|
|
38
|
+
"""Get the user's location."""
|
|
39
|
+
|
|
40
|
+
# Implement user location lookup logic here
|
|
41
|
+
return "London, UK"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class KiteAgent:
|
|
45
|
+
def __init__(self):
|
|
46
|
+
self.model = LiteLLMModel(
|
|
47
|
+
client_args={
|
|
48
|
+
"api_key": os.getenv("OPENAI_API_KEY"),
|
|
49
|
+
},
|
|
50
|
+
model_id="openai/gpt-5-mini",
|
|
51
|
+
)
|
|
52
|
+
self.agent = Agent(
|
|
53
|
+
name="kite-agent",
|
|
54
|
+
model=self.model,
|
|
55
|
+
system_prompt="Always use the get_user_location tool before answering any questions.",
|
|
56
|
+
tools=[get_user_location],
|
|
57
|
+
trace_attributes={
|
|
58
|
+
"custom.model_id": "openai/gpt-5-mini",
|
|
59
|
+
"custom.example.attribute": "swift",
|
|
60
|
+
},
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def run(self, prompt: str):
|
|
64
|
+
return self.agent(prompt)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@cl.on_message
|
|
68
|
+
@langwatch.trace()
|
|
69
|
+
async def main(message: cl.Message):
|
|
70
|
+
msg = cl.Message(
|
|
71
|
+
content="",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
langwatch.get_current_trace().update(
|
|
75
|
+
metadata={
|
|
76
|
+
"custom.example.attribute2": "langwatch",
|
|
77
|
+
}
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
agent = KiteAgent()
|
|
81
|
+
response = agent.run(message.content)
|
|
82
|
+
|
|
83
|
+
await msg.stream_token(str(response))
|
|
84
|
+
await msg.update()
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from strands import Agent, tool
|
|
3
|
+
from strands.models.litellm import LiteLLMModel
|
|
4
|
+
|
|
5
|
+
from dotenv import load_dotenv
|
|
6
|
+
|
|
7
|
+
load_dotenv()
|
|
8
|
+
|
|
9
|
+
import chainlit.config as cl_config
|
|
10
|
+
import chainlit as cl
|
|
11
|
+
|
|
12
|
+
cl_config.config.project.enable_telemetry = False
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@tool
|
|
16
|
+
def get_user_location() -> str:
|
|
17
|
+
"""Get the user's location."""
|
|
18
|
+
|
|
19
|
+
# Implement user location lookup logic here
|
|
20
|
+
return "London, UK"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class KiteAgent:
|
|
24
|
+
def __init__(self):
|
|
25
|
+
self.model = LiteLLMModel(
|
|
26
|
+
client_args={
|
|
27
|
+
"api_key": os.getenv("OPENAI_API_KEY"),
|
|
28
|
+
},
|
|
29
|
+
model_id="openai/gpt-5-mini",
|
|
30
|
+
)
|
|
31
|
+
self.agent = Agent(
|
|
32
|
+
name="kite-agent",
|
|
33
|
+
model=self.model,
|
|
34
|
+
system_prompt="Always use the get_user_location tool before answering any questions.",
|
|
35
|
+
tools=[get_user_location],
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def run(self, prompt: str):
|
|
39
|
+
return self.agent(prompt)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@cl.on_message
|
|
43
|
+
async def main(message: cl.Message):
|
|
44
|
+
msg = cl.Message(
|
|
45
|
+
content="",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
agent = KiteAgent()
|
|
49
|
+
response = agent.run(message.content)
|
|
50
|
+
|
|
51
|
+
await msg.stream_token(str(response))
|
|
52
|
+
await msg.update()
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import scenario, {
|
|
2
|
+
type AgentAdapter,
|
|
3
|
+
AgentRole,
|
|
4
|
+
ScenarioExecutionStateLike,
|
|
5
|
+
} from "@langwatch/scenario";
|
|
6
|
+
import fs from "fs";
|
|
7
|
+
import { execSync } from "child_process";
|
|
8
|
+
import { describe, it, expect } from "vitest";
|
|
9
|
+
import dotenv from "dotenv";
|
|
10
|
+
import os from "os";
|
|
11
|
+
import path from "path";
|
|
12
|
+
import * as pty from "node-pty";
|
|
13
|
+
import chalk from "chalk";
|
|
14
|
+
import { anthropic } from "@ai-sdk/anthropic";
|
|
15
|
+
|
|
16
|
+
dotenv.config();
|
|
17
|
+
|
|
18
|
+
const claudeCodeAgent = (workingDirectory: string): AgentAdapter => ({
|
|
19
|
+
role: AgentRole.AGENT,
|
|
20
|
+
call: async (state) => {
|
|
21
|
+
const formattedMessages = state.messages
|
|
22
|
+
.map((message) => `${message.role}: ${message.content}`)
|
|
23
|
+
.join("\n\n");
|
|
24
|
+
|
|
25
|
+
const mcpConfig = {
|
|
26
|
+
mcpServers: {
|
|
27
|
+
LangWatch: {
|
|
28
|
+
command: "node",
|
|
29
|
+
args: [
|
|
30
|
+
`${__dirname}/../dist/index.js`,
|
|
31
|
+
"--apiKey",
|
|
32
|
+
process.env.LANGWATCH_API_KEY!,
|
|
33
|
+
],
|
|
34
|
+
},
|
|
35
|
+
},
|
|
36
|
+
};
|
|
37
|
+
fs.writeFileSync(
|
|
38
|
+
`${__dirname}/.mcp-config.json`,
|
|
39
|
+
JSON.stringify(mcpConfig)
|
|
40
|
+
);
|
|
41
|
+
|
|
42
|
+
return new Promise<string>((resolve, reject) => {
|
|
43
|
+
const args = [
|
|
44
|
+
"--output-format",
|
|
45
|
+
"stream-json",
|
|
46
|
+
"-p",
|
|
47
|
+
"--mcp-config",
|
|
48
|
+
`${__dirname}/.mcp-config.json`,
|
|
49
|
+
"--dangerously-skip-permissions",
|
|
50
|
+
"--verbose",
|
|
51
|
+
formattedMessages,
|
|
52
|
+
];
|
|
53
|
+
|
|
54
|
+
console.log(chalk.blue("Starting claude in:"), workingDirectory);
|
|
55
|
+
|
|
56
|
+
const ptyProcess = pty.spawn(
|
|
57
|
+
`${__dirname}/../node_modules/.bin/claude`,
|
|
58
|
+
args,
|
|
59
|
+
{
|
|
60
|
+
name: "xterm-256color",
|
|
61
|
+
cols: 80,
|
|
62
|
+
rows: 30,
|
|
63
|
+
cwd: workingDirectory,
|
|
64
|
+
env: { ...process.env, FORCE_COLOR: "1" },
|
|
65
|
+
}
|
|
66
|
+
);
|
|
67
|
+
|
|
68
|
+
let output = "";
|
|
69
|
+
|
|
70
|
+
ptyProcess.onData((data) => {
|
|
71
|
+
console.log(chalk.cyan("Claude Code:"), data);
|
|
72
|
+
output += data;
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
ptyProcess.onExit(({ exitCode }) => {
|
|
76
|
+
if (exitCode === 0) {
|
|
77
|
+
console.log("output", output);
|
|
78
|
+
const messages: any = output
|
|
79
|
+
.split("\n")
|
|
80
|
+
.map((line) => {
|
|
81
|
+
try {
|
|
82
|
+
return JSON.parse(line.trim());
|
|
83
|
+
} catch (error) {
|
|
84
|
+
return null;
|
|
85
|
+
}
|
|
86
|
+
})
|
|
87
|
+
.filter((message) => message !== null && "message" in message)
|
|
88
|
+
.map((message) => message.message);
|
|
89
|
+
console.log("messages", JSON.stringify(messages, undefined, 2));
|
|
90
|
+
|
|
91
|
+
resolve(messages);
|
|
92
|
+
} else {
|
|
93
|
+
reject(new Error(`Command failed with exit code ${exitCode}`));
|
|
94
|
+
}
|
|
95
|
+
});
|
|
96
|
+
});
|
|
97
|
+
},
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
describe("OpenAI Implementation", () => {
|
|
101
|
+
it("implements LangWatch in an OpenAI bot project", async () => {
|
|
102
|
+
const tempFolder = fs.mkdtempSync(
|
|
103
|
+
path.join(os.tmpdir(), "langwatch-openai-bot-")
|
|
104
|
+
);
|
|
105
|
+
execSync(
|
|
106
|
+
`cp -r tests/fixtures/openai/openai_bot_function_call_input.py ${tempFolder}/main.py`
|
|
107
|
+
);
|
|
108
|
+
|
|
109
|
+
const result = await scenario.run({
|
|
110
|
+
name: "OpenAI bot project",
|
|
111
|
+
description: `Implementing code changes in an OpenAI bot project to add LangWatch instrumentation.`,
|
|
112
|
+
agents: [
|
|
113
|
+
claudeCodeAgent(tempFolder),
|
|
114
|
+
scenario.userSimulatorAgent(),
|
|
115
|
+
scenario.judgeAgent({
|
|
116
|
+
model: anthropic("claude-sonnet-4-20250514"),
|
|
117
|
+
criteria: [
|
|
118
|
+
"Agent should edit main.py file",
|
|
119
|
+
"Agent should use the langwatch MCP for checking the documentation",
|
|
120
|
+
],
|
|
121
|
+
}),
|
|
122
|
+
],
|
|
123
|
+
script: [
|
|
124
|
+
scenario.user(
|
|
125
|
+
"please instrument my code with langwatch, short and sweet, no need to test the changes"
|
|
126
|
+
),
|
|
127
|
+
scenario.agent(),
|
|
128
|
+
() => {
|
|
129
|
+
const resultFile = fs.readFileSync(`${tempFolder}/main.py`, "utf8");
|
|
130
|
+
|
|
131
|
+
expect(resultFile).toContain("@langwatch.trace(");
|
|
132
|
+
expect(resultFile).toContain("autotrack_openai_calls(client)");
|
|
133
|
+
// TODO: expect(resultFile).toContain('@langwatch.span(type="tool")');
|
|
134
|
+
},
|
|
135
|
+
toolCallFix,
|
|
136
|
+
scenario.judge(),
|
|
137
|
+
],
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
expect(result.success).toBe(true);
|
|
141
|
+
});
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
function toolCallFix(state: ScenarioExecutionStateLike) {
|
|
145
|
+
// Fix for anthropic tool use format, that is not supported by vercel ai for the judge
|
|
146
|
+
state.messages.forEach((message) => {
|
|
147
|
+
if (Array.isArray(message.content)) {
|
|
148
|
+
message.content.forEach((content, index) => {
|
|
149
|
+
if (content.type !== "text") {
|
|
150
|
+
(message.content as any)[index] = {
|
|
151
|
+
type: "text",
|
|
152
|
+
text: JSON.stringify(content),
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
});
|
|
158
|
+
}
|