npm - @langwatch/mcp-server - Versions diffs - 0.0.5 → 0.1.0 - Mend

@langwatch/mcp-server 0.0.5 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/.env.example +2 -0
package/.eslintrc.cjs +0 -1
package/CHANGELOG.md +29 -0
package/CONTRIBUTING.md +96 -0
package/README.md +13 -6
package/dist/index.js +7957 -1017
package/dist/index.js.map +1 -1
package/package.json +22 -9
package/pnpm-workspace.yaml +2 -0
package/pyproject.toml +17 -0
package/src/index.ts +54 -11
package/src/langwatch-api.ts +95 -85
package/tests/evaluations.ipynb +649 -0
package/tests/fixtures/azure/azure_openai_stream_bot_expected.py +102 -0
package/tests/fixtures/azure/azure_openai_stream_bot_input.py +78 -0
package/tests/fixtures/dspy/dspy_bot_expected.py +61 -0
package/tests/fixtures/dspy/dspy_bot_input.py +53 -0
package/tests/fixtures/fastapi/fastapi_app_expected.py +68 -0
package/tests/fixtures/fastapi/fastapi_app_input.py +60 -0
package/tests/fixtures/fastapi/prompt_management_fastapi_expected.py +114 -0
package/tests/fixtures/fastapi/prompt_management_fastapi_input.py +88 -0
package/tests/fixtures/haystack/haystack_bot_expected.py +141 -0
package/tests/fixtures/haystack/haystack_bot_input.py +69 -0
package/tests/fixtures/langchain/langchain_bot_expected.py +53 -0
package/tests/fixtures/langchain/langchain_bot_input.py +45 -0
package/tests/fixtures/langchain/langchain_bot_with_memory_expected.py +69 -0
package/tests/fixtures/langchain/langchain_bot_with_memory_input.py +61 -0
package/tests/fixtures/langchain/langchain_rag_bot_expected.py +97 -0
package/tests/fixtures/langchain/langchain_rag_bot_input.py +77 -0
package/tests/fixtures/langchain/langchain_rag_bot_vertex_ai_expected.py +116 -0
package/tests/fixtures/langchain/langchain_rag_bot_vertex_ai_input.py +81 -0
package/tests/fixtures/langchain/langgraph_rag_bot_with_threads_expected.py +331 -0
package/tests/fixtures/langchain/langgraph_rag_bot_with_threads_input.py +106 -0
package/tests/fixtures/litellm/litellm_bot_expected.py +40 -0
package/tests/fixtures/litellm/litellm_bot_input.py +35 -0
package/tests/fixtures/openai/openai_bot_expected.py +43 -0
package/tests/fixtures/openai/openai_bot_function_call_expected.py +91 -0
package/tests/fixtures/openai/openai_bot_function_call_input.py +82 -0
package/tests/fixtures/openai/openai_bot_input.py +36 -0
package/tests/fixtures/openai/openai_bot_rag_expected.py +73 -0
package/tests/fixtures/openai/openai_bot_rag_input.py +51 -0
package/tests/fixtures/opentelemetry/openinference_dspy_bot_expected.py +63 -0
package/tests/fixtures/opentelemetry/openinference_dspy_bot_input.py +58 -0
package/tests/fixtures/opentelemetry/openinference_langchain_bot_expected.py +53 -0
package/tests/fixtures/opentelemetry/openinference_langchain_bot_input.py +52 -0
package/tests/fixtures/opentelemetry/openinference_openai_bot_expected.py +49 -0
package/tests/fixtures/opentelemetry/openinference_openai_bot_input.py +41 -0
package/tests/fixtures/opentelemetry/openllmetry_openai_bot_expected.py +44 -0
package/tests/fixtures/opentelemetry/openllmetry_openai_bot_input.py +40 -0
package/tests/fixtures/strands/strands_bot_expected.py +84 -0
package/tests/fixtures/strands/strands_bot_input.py +52 -0
package/tests/scenario-openai.test.ts +158 -0
package/tsconfig.json +0 -1
package/uv.lock +2607 -0
package/vitest.config.js +7 -0

package/tests/fixtures/azure/azure_openai_stream_bot_expected.py ADDED Viewed

@@ -0,0 +1,102 @@
+import os
+from typing import Optional
+from dotenv import load_dotenv
+from langwatch.types import RAGChunk
+load_dotenv()
+import chainlit as cl
+from openai import AzureOpenAI
+import langwatch
+client = AzureOpenAI(
+    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
+    api_version="2024-02-01",
+    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),  # type: ignore
+)
+langwatch.api_key = os.getenv("LANGWATCH_API_KEY")
+@langwatch.span(type="rag")
+def retrieve(query: Optional[str] = None):
+    search_results = [
+        {
+            "id": "result_1",
+            "content": "This is the first result",
+        },
+        {
+            "id": "result_2",
+            "content": "This is the second result",
+        },
+    ]
+    langwatch.get_current_span().update(
+        contexts=[
+            RAGChunk(
+                document_id=docs["id"],
+                content=docs["content"],
+            )
+            for docs in search_results
+        ],
+    )
+    return search_results
+@cl.on_message
+@langwatch.trace()
+async def main(message: cl.Message):
+    langwatch.get_current_trace().autotrack_openai_calls(client)
+    msg = cl.Message(
+        content="",
+    )
+    langwatch.get_current_trace().update(
+        trace_id=message.id,
+        metadata={"labels": ["azure"], "user_id": message.author},
+    )
+    completion = client.chat.completions.create(
+        model="gpt-35-turbo-0613",
+        messages=[
+            {
+                "role": "system",
+                "content": "come up with a query for searching the database based on user question, 3 words max",
+            },
+            {"role": "user", "content": message.content},
+        ],
+    )
+    query = completion.choices[0].message.content
+    search_results = retrieve(query=query)
+    results = "\n".join([f"{docs['id']}: {docs['content']}" for docs in search_results])
+    completion = client.chat.completions.create(
+        model="gpt-35-turbo-0613",
+        messages=[
+            {
+                "role": "system",
+                "content": f"""
+                    You are a helpful assistant that only reply in short tweet-like responses, using lots of emojis.
+                    We just made a search in the database for {query} and found {len(search_results)} results. Here they are, use that to help answering user:
+                    {results}
+                """,
+            },
+            {"role": "user", "content": message.content},
+        ],
+        stream=True,
+    )
+    for part in completion:
+        if len(part.choices) == 0:
+            continue
+        if token := part.choices[0].delta.content or "":
+            await msg.stream_token(token)
+    await msg.update()

package/tests/fixtures/azure/azure_openai_stream_bot_input.py ADDED Viewed

@@ -0,0 +1,78 @@
+import os
+from typing import Optional
+from dotenv import load_dotenv
+load_dotenv()
+import chainlit as cl
+from openai import AzureOpenAI
+client = AzureOpenAI(
+    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
+    api_version="2024-02-01",
+    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),  # type: ignore
+)
+def retrieve(query: Optional[str] = None):
+    search_results = [
+        {
+            "id": "result_1",
+            "content": "This is the first result",
+        },
+        {
+            "id": "result_2",
+            "content": "This is the second result",
+        },
+    ]
+    return search_results
+@cl.on_message
+async def main(message: cl.Message):
+    msg = cl.Message(
+        content="",
+    )
+    completion = client.chat.completions.create(
+        model="gpt-35-turbo-0613",
+        messages=[
+            {
+                "role": "system",
+                "content": "come up with a query for searching the database based on user question, 3 words max",
+            },
+            {"role": "user", "content": message.content},
+        ],
+    )
+    query = completion.choices[0].message.content
+    search_results = retrieve(query=query)
+    results = "\n".join([f"{docs['id']}: {docs['content']}" for docs in search_results])
+    completion = client.chat.completions.create(
+        model="gpt-35-turbo-0613",
+        messages=[
+            {
+                "role": "system",
+                "content": f"""
+                    You are a helpful assistant that only reply in short tweet-like responses, using lots of emojis.
+                    We just made a search in the database for {query} and found {len(search_results)} results. Here they are, use that to help answering user:
+                    {results}
+                """,
+            },
+            {"role": "user", "content": message.content},
+        ],
+        stream=True,
+    )
+    for part in completion:
+        if len(part.choices) == 0:
+            continue
+        if token := part.choices[0].delta.content or "":
+            await msg.stream_token(token)
+    await msg.update()

package/tests/fixtures/dspy/dspy_bot_expected.py ADDED Viewed

@@ -0,0 +1,61 @@
+import os
+from dotenv import load_dotenv
+load_dotenv()
+import chainlit as cl
+import langwatch
+import dspy
+lm = dspy.LM("openai/gpt-5", api_key=os.environ["OPENAI_API_KEY"], temperature=1)
+colbertv2_wiki17_abstracts = dspy.ColBERTv2(
+    url="http://20.102.90.50:2017/wiki17_abstracts"
+)
+dspy.settings.configure(lm=lm, rm=colbertv2_wiki17_abstracts)
+class GenerateAnswer(dspy.Signature):
+    """Answer questions with careful explanations to the user."""
+    context = dspy.InputField(desc="may contain relevant facts")
+    question = dspy.InputField()
+    answer = dspy.OutputField(desc="markdown formatted answer, use some emojis")
+class RAG(dspy.Module):
+    def __init__(self, num_passages=3):
+        super().__init__()
+        self.retrieve = dspy.Retrieve(k=num_passages)
+        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
+    def forward(self, question):
+        context = self.retrieve(question).passages  # type: ignore
+        prediction = self.generate_answer(question=question, context=context)
+        return dspy.Prediction(answer=prediction.answer)
+@cl.on_message
+@langwatch.trace()
+async def main(message: cl.Message):
+    langwatch.get_current_trace().autotrack_dspy()
+    langwatch.get_current_trace().update(
+        metadata={"labels": ["dspy", "thread"], "thread_id": "90210"},
+    )
+    msg = cl.Message(
+        content="",
+    )
+    program = RAG()
+    prediction = program(question=message.content)
+    await msg.stream_token(prediction.answer)
+    await msg.update()
+    return prediction.answer

package/tests/fixtures/dspy/dspy_bot_input.py ADDED Viewed

@@ -0,0 +1,53 @@
+import os
+from dotenv import load_dotenv
+load_dotenv()
+import chainlit as cl
+import dspy
+lm = dspy.LM("openai/gpt-5", api_key=os.environ["OPENAI_API_KEY"], temperature=1)
+colbertv2_wiki17_abstracts = dspy.ColBERTv2(
+    url="http://20.102.90.50:2017/wiki17_abstracts"
+)
+dspy.settings.configure(lm=lm, rm=colbertv2_wiki17_abstracts)
+class GenerateAnswer(dspy.Signature):
+    """Answer questions with careful explanations to the user."""
+    context = dspy.InputField(desc="may contain relevant facts")
+    question = dspy.InputField()
+    answer = dspy.OutputField(desc="markdown formatted answer, use some emojis")
+class RAG(dspy.Module):
+    def __init__(self, num_passages=3):
+        super().__init__()
+        self.retrieve = dspy.Retrieve(k=num_passages)
+        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
+    def forward(self, question):
+        context = self.retrieve(question).passages  # type: ignore
+        prediction = self.generate_answer(question=question, context=context)
+        return dspy.Prediction(answer=prediction.answer)
+@cl.on_message
+async def main(message: cl.Message):
+    msg = cl.Message(
+        content="",
+    )
+    program = RAG()
+    prediction = program(question=message.content)
+    await msg.stream_token(prediction.answer)
+    await msg.update()
+    return prediction.answer

package/tests/fixtures/fastapi/fastapi_app_expected.py ADDED Viewed

@@ -0,0 +1,68 @@
+from dotenv import load_dotenv
+from fastapi.responses import StreamingResponse
+from fastapi.testclient import TestClient
+load_dotenv()
+from fastapi import FastAPI
+from openai import OpenAI
+from pydantic import BaseModel
+client = OpenAI()
+import langwatch
+app = FastAPI()
+class EndpointParams(BaseModel):
+    input: str
+class CompletionStreaming:
+    @langwatch.trace(name="fastapi_sample_endpoint")
+    async def execute(self, input: str):
+        langwatch.get_current_trace().autotrack_openai_calls(client)
+        langwatch.get_current_trace().update(
+            metadata={"label": "fastapi"},
+        )
+        completion = client.chat.completions.create(
+            model="gpt-4o",
+            messages=[
+                {
+                    "role": "system",
+                    "content": "You are a helpful assistant that only reply in short tweet-like responses, using lots of emojis.",
+                },
+                {"role": "user", "content": input},
+            ],
+            stream=True,
+        )
+        for chunk in completion:
+            content = chunk.choices[0].delta.content
+            if content is not None:
+                yield content
+@app.post("/")
+async def fastapi_sample_endpoint(params: EndpointParams):
+    return StreamingResponse(CompletionStreaming().execute(params.input))  # type: ignore
+def call_fastapi_sample_endpoint(input: str) -> str:
+    test_client = TestClient(app)
+    response = test_client.post("/", json={"input": input})
+    return response.text
+if __name__ == "__main__":
+    import uvicorn
+    import os
+    # Test one llm call before starting the server
+    print(call_fastapi_sample_endpoint("Hello, world!"))
+    port = int(os.environ.get("PORT", 9000))
+    uvicorn.run(app, host="0.0.0.0", port=port)

package/tests/fixtures/fastapi/fastapi_app_input.py ADDED Viewed

@@ -0,0 +1,60 @@
+from dotenv import load_dotenv
+from fastapi.responses import StreamingResponse
+from fastapi.testclient import TestClient
+load_dotenv()
+from fastapi import FastAPI
+from openai import OpenAI
+from pydantic import BaseModel
+client = OpenAI()
+app = FastAPI()
+class EndpointParams(BaseModel):
+    input: str
+class CompletionStreaming:
+    async def execute(self, input: str):
+        completion = client.chat.completions.create(
+            model="gpt-4o",
+            messages=[
+                {
+                    "role": "system",
+                    "content": "You are a helpful assistant that only reply in short tweet-like responses, using lots of emojis.",
+                },
+                {"role": "user", "content": input},
+            ],
+            stream=True,
+        )
+        for chunk in completion:
+            content = chunk.choices[0].delta.content
+            if content is not None:
+                yield content
+@app.post("/")
+async def fastapi_sample_endpoint(params: EndpointParams):
+    return StreamingResponse(CompletionStreaming().execute(params.input))  # type: ignore
+def call_fastapi_sample_endpoint(input: str) -> str:
+    test_client = TestClient(app)
+    response = test_client.post("/", json={"input": input})
+    return response.text
+if __name__ == "__main__":
+    import uvicorn
+    import os
+    # Test one llm call before starting the server
+    print(call_fastapi_sample_endpoint("Hello, world!"))
+    port = int(os.environ.get("PORT", 9000))
+    uvicorn.run(app, host="0.0.0.0", port=port)

package/tests/fixtures/fastapi/prompt_management_fastapi_expected.py ADDED Viewed

@@ -0,0 +1,114 @@
+"""
+Example demonstrating LangWatch prompt management operations.
+This example shows how to:
+1. Create a new prompt
+2. Retrieve and use a prompt
+3. Update a prompt
+4. Use the updated prompt
+5. Delete a prompt
+Run this example with:
+    python examples/prompt_management.py
+"""
+from dotenv import load_dotenv
+import langwatch
+import uuid
+from openai import OpenAI
+load_dotenv()
+client = OpenAI()
+# Initialize LangWatch (ensure you have LANGWATCH_API_KEY set)
+langwatch.setup(debug=True)
+@langwatch.span()
+def example():
+    # Autotrack OpenAI calls
+    langwatch.get_current_trace().autotrack_openai_calls(client)
+    print("=== LangWatch Prompt Management Example ===\n")
+    # 1. Create a new prompt
+    print("1. Creating a new prompt...")
+    short_uuid = str(uuid.uuid4())[:8]
+    prompt = langwatch.prompts.create(
+        handle=f"something/example_prompt_{short_uuid}",
+        scope="PROJECT",  # optional - 'ORGANIZATION' or 'PROJECT'
+        author_id=None,  # optional
+        prompt="You are a helpful assistant. Specialize in {{subject}}.",  # optional
+        messages=[  # optional -- you cannot set a system message and a prompt at the same time
+            {"role": "user", "content": "{{question}}"},
+        ],
+        inputs=[{"identifier": "question", "type": "str"}],  # optional
+        outputs=[
+            {"identifier": "answer", "type": "str", "json_schema": {"type": "str"}}
+        ],  # optional
+    )
+    print(f"Created prompt with id: {prompt.id}")
+    print(f"Created prompt with handle: {prompt.handle}")
+    # 2. Get and use the prompt
+    print("2. Retrieving the prompt...")
+    retrieved_prompt_specific_version = langwatch.prompts.get(
+        prompt.handle, version_number=prompt.version_number
+    )
+    print(f"Retrieved prompt: {retrieved_prompt_specific_version.version_number}")
+    # Use the prompt (example usage)
+    print("Using the created prompt...")
+    # Compile the prompt with variables
+    compiled_prompt = retrieved_prompt.compile(
+        question="What is the capital of France?"
+    )
+    print(f"Compiled prompt: {compiled_prompt.prompt}")
+    print(f"Compiled prompt messages: {compiled_prompt.messages}")
+    # 3. Update the prompt
+    print("3. Updating the prompt...")
+    updated_prompt = langwatch.prompts.update(
+        prompt.handle,
+        handle=f"updated_example_prompt_{short_uuid}",  # optional
+        scope="PROJECT",  # optional - 'ORGANIZATION' or 'PROJECT'
+        prompt="You are obsessed with {{subject}} and talk in CAPS.",  # optional
+    )
+    print(f"Updated prompt name: {updated_prompt.name}")
+    print(f"Prompt ID remains: {updated_prompt.id}")
+    # 4. Use the updated prompt
+    print("Using the updated prompt...")
+    # Compile the updated prompt to show the difference
+    updated_compiled = updated_prompt.compile_strict(
+        subject="quantum computing", question="How does it work in 10 words or less?"
+    )
+    print(f"Updated compiled prompt: {updated_compiled.prompt}")
+    print(f"Updated compiled prompt messages: {updated_compiled.messages}")
+    # This is where you would use the prompt in your application
+    # For example, you could use the prompt to generate a response
+    response = client.chat.completions.create(
+        model=updated_compiled.model.split("openai/")[1],
+        messages=updated_compiled.messages,
+    )
+    print(f"Response: {response.choices[0].message.content}")
+    # 5. Delete the prompt
+    print("5. Deleting the prompt...")
+    result = langwatch.prompts.delete(updated_prompt.handle)
+    print(f"Deletion result: {result}")
+    print("Prompt management example completed successfully!")
+@langwatch.trace()
+def main():
+    example()
+if __name__ == "__main__":
+    main()

package/tests/fixtures/fastapi/prompt_management_fastapi_input.py ADDED Viewed

@@ -0,0 +1,88 @@
+"""
+Example demonstrating prompt management operations.
+This example shows how to:
+1. Create a new prompt
+2. Retrieve and use a prompt
+3. Update a prompt
+4. Use the updated prompt
+5. Delete a prompt
+Run this example with:
+    python examples/prompt_management.py
+"""
+from dotenv import load_dotenv
+import uuid
+from openai import OpenAI
+load_dotenv()
+client = OpenAI()
+def example():
+    print("=== Prompt Management Example ===\n")
+    # 1. Create a new prompt
+    print("1. Creating a new prompt...")
+    short_uuid = str(uuid.uuid4())[:8]
+    # Manual prompt management would go here
+    prompt_template = "You are a helpful assistant. Specialize in {{subject}}."
+    messages_template = [
+        {"role": "user", "content": "{{question}}"},
+    ]
+    print(f"Created prompt template: {prompt_template}")
+    # 2. Get and use the prompt
+    print("2. Using the prompt...")
+    # Compile the prompt with variables (manual substitution)
+    compiled_prompt = prompt_template.replace("{{subject}}", "quantum computing")
+    compiled_messages = [
+        {"role": "user", "content": "What is the capital of France?"}
+    ]
+    print(f"Compiled prompt: {compiled_prompt}")
+    print(f"Compiled prompt messages: {compiled_messages}")
+    # 3. Update the prompt
+    print("3. Updating the prompt...")
+    updated_prompt_template = "You are obsessed with {{subject}} and talk in CAPS."
+    print(f"Updated prompt template: {updated_prompt_template}")
+    # 4. Use the updated prompt
+    print("Using the updated prompt...")
+    # Compile the updated prompt to show the difference
+    updated_compiled = updated_prompt_template.replace("{{subject}}", "quantum computing")
+    updated_messages = [
+        {"role": "user", "content": "How does it work in 10 words or less?"}
+    ]
+    print(f"Updated compiled prompt: {updated_compiled}")
+    print(f"Updated compiled prompt messages: {updated_messages}")
+    # This is where you would use the prompt in your application
+    # For example, you could use the prompt to generate a response
+    response = client.chat.completions.create(
+        model="gpt-4",
+        messages=[
+            {"role": "system", "content": updated_compiled},
+            *updated_messages
+        ],
+    )
+    print(f"Response: {response.choices[0].message.content}")
+    # 5. Delete the prompt
+    print("5. Deleting the prompt...")
+    print("Prompt management example completed successfully!")
+def main():
+    example()
+if __name__ == "__main__":
+    main()