datarobot-genai 0.2.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datarobot_genai/__init__.py +19 -0
- datarobot_genai/core/__init__.py +0 -0
- datarobot_genai/core/agents/__init__.py +43 -0
- datarobot_genai/core/agents/base.py +195 -0
- datarobot_genai/core/chat/__init__.py +19 -0
- datarobot_genai/core/chat/auth.py +146 -0
- datarobot_genai/core/chat/client.py +178 -0
- datarobot_genai/core/chat/responses.py +297 -0
- datarobot_genai/core/cli/__init__.py +18 -0
- datarobot_genai/core/cli/agent_environment.py +47 -0
- datarobot_genai/core/cli/agent_kernel.py +211 -0
- datarobot_genai/core/custom_model.py +141 -0
- datarobot_genai/core/mcp/__init__.py +0 -0
- datarobot_genai/core/mcp/common.py +218 -0
- datarobot_genai/core/telemetry_agent.py +126 -0
- datarobot_genai/core/utils/__init__.py +3 -0
- datarobot_genai/core/utils/auth.py +234 -0
- datarobot_genai/core/utils/urls.py +64 -0
- datarobot_genai/crewai/__init__.py +24 -0
- datarobot_genai/crewai/agent.py +42 -0
- datarobot_genai/crewai/base.py +159 -0
- datarobot_genai/crewai/events.py +117 -0
- datarobot_genai/crewai/mcp.py +59 -0
- datarobot_genai/drmcp/__init__.py +78 -0
- datarobot_genai/drmcp/core/__init__.py +13 -0
- datarobot_genai/drmcp/core/auth.py +165 -0
- datarobot_genai/drmcp/core/clients.py +180 -0
- datarobot_genai/drmcp/core/config.py +364 -0
- datarobot_genai/drmcp/core/config_utils.py +174 -0
- datarobot_genai/drmcp/core/constants.py +18 -0
- datarobot_genai/drmcp/core/credentials.py +190 -0
- datarobot_genai/drmcp/core/dr_mcp_server.py +350 -0
- datarobot_genai/drmcp/core/dr_mcp_server_logo.py +136 -0
- datarobot_genai/drmcp/core/dynamic_prompts/__init__.py +13 -0
- datarobot_genai/drmcp/core/dynamic_prompts/controllers.py +130 -0
- datarobot_genai/drmcp/core/dynamic_prompts/dr_lib.py +70 -0
- datarobot_genai/drmcp/core/dynamic_prompts/register.py +205 -0
- datarobot_genai/drmcp/core/dynamic_prompts/utils.py +33 -0
- datarobot_genai/drmcp/core/dynamic_tools/__init__.py +14 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/__init__.py +0 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/adapters/__init__.py +14 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/adapters/base.py +72 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/adapters/default.py +82 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/adapters/drum.py +238 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/config.py +228 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/controllers.py +63 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/metadata.py +162 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/register.py +87 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/schemas/drum_agentic_fallback_schema.json +36 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/schemas/drum_prediction_fallback_schema.json +10 -0
- datarobot_genai/drmcp/core/dynamic_tools/register.py +254 -0
- datarobot_genai/drmcp/core/dynamic_tools/schema.py +532 -0
- datarobot_genai/drmcp/core/exceptions.py +25 -0
- datarobot_genai/drmcp/core/logging.py +98 -0
- datarobot_genai/drmcp/core/mcp_instance.py +515 -0
- datarobot_genai/drmcp/core/memory_management/__init__.py +13 -0
- datarobot_genai/drmcp/core/memory_management/manager.py +820 -0
- datarobot_genai/drmcp/core/memory_management/memory_tools.py +201 -0
- datarobot_genai/drmcp/core/routes.py +439 -0
- datarobot_genai/drmcp/core/routes_utils.py +30 -0
- datarobot_genai/drmcp/core/server_life_cycle.py +107 -0
- datarobot_genai/drmcp/core/telemetry.py +424 -0
- datarobot_genai/drmcp/core/tool_config.py +111 -0
- datarobot_genai/drmcp/core/tool_filter.py +117 -0
- datarobot_genai/drmcp/core/utils.py +138 -0
- datarobot_genai/drmcp/server.py +19 -0
- datarobot_genai/drmcp/test_utils/__init__.py +13 -0
- datarobot_genai/drmcp/test_utils/clients/__init__.py +0 -0
- datarobot_genai/drmcp/test_utils/clients/anthropic.py +68 -0
- datarobot_genai/drmcp/test_utils/clients/base.py +300 -0
- datarobot_genai/drmcp/test_utils/clients/dr_gateway.py +58 -0
- datarobot_genai/drmcp/test_utils/clients/openai.py +68 -0
- datarobot_genai/drmcp/test_utils/elicitation_test_tool.py +89 -0
- datarobot_genai/drmcp/test_utils/integration_mcp_server.py +109 -0
- datarobot_genai/drmcp/test_utils/mcp_utils_ete.py +133 -0
- datarobot_genai/drmcp/test_utils/mcp_utils_integration.py +107 -0
- datarobot_genai/drmcp/test_utils/test_interactive.py +205 -0
- datarobot_genai/drmcp/test_utils/tool_base_ete.py +220 -0
- datarobot_genai/drmcp/test_utils/utils.py +91 -0
- datarobot_genai/drmcp/tools/__init__.py +14 -0
- datarobot_genai/drmcp/tools/clients/__init__.py +14 -0
- datarobot_genai/drmcp/tools/clients/atlassian.py +188 -0
- datarobot_genai/drmcp/tools/clients/confluence.py +584 -0
- datarobot_genai/drmcp/tools/clients/gdrive.py +832 -0
- datarobot_genai/drmcp/tools/clients/jira.py +334 -0
- datarobot_genai/drmcp/tools/clients/microsoft_graph.py +479 -0
- datarobot_genai/drmcp/tools/clients/s3.py +28 -0
- datarobot_genai/drmcp/tools/confluence/__init__.py +14 -0
- datarobot_genai/drmcp/tools/confluence/tools.py +321 -0
- datarobot_genai/drmcp/tools/gdrive/__init__.py +0 -0
- datarobot_genai/drmcp/tools/gdrive/tools.py +347 -0
- datarobot_genai/drmcp/tools/jira/__init__.py +14 -0
- datarobot_genai/drmcp/tools/jira/tools.py +243 -0
- datarobot_genai/drmcp/tools/microsoft_graph/__init__.py +13 -0
- datarobot_genai/drmcp/tools/microsoft_graph/tools.py +198 -0
- datarobot_genai/drmcp/tools/predictive/__init__.py +27 -0
- datarobot_genai/drmcp/tools/predictive/data.py +133 -0
- datarobot_genai/drmcp/tools/predictive/deployment.py +91 -0
- datarobot_genai/drmcp/tools/predictive/deployment_info.py +392 -0
- datarobot_genai/drmcp/tools/predictive/model.py +148 -0
- datarobot_genai/drmcp/tools/predictive/predict.py +254 -0
- datarobot_genai/drmcp/tools/predictive/predict_realtime.py +307 -0
- datarobot_genai/drmcp/tools/predictive/project.py +90 -0
- datarobot_genai/drmcp/tools/predictive/training.py +661 -0
- datarobot_genai/langgraph/__init__.py +0 -0
- datarobot_genai/langgraph/agent.py +341 -0
- datarobot_genai/langgraph/mcp.py +73 -0
- datarobot_genai/llama_index/__init__.py +16 -0
- datarobot_genai/llama_index/agent.py +50 -0
- datarobot_genai/llama_index/base.py +299 -0
- datarobot_genai/llama_index/mcp.py +79 -0
- datarobot_genai/nat/__init__.py +0 -0
- datarobot_genai/nat/agent.py +275 -0
- datarobot_genai/nat/datarobot_auth_provider.py +110 -0
- datarobot_genai/nat/datarobot_llm_clients.py +318 -0
- datarobot_genai/nat/datarobot_llm_providers.py +130 -0
- datarobot_genai/nat/datarobot_mcp_client.py +266 -0
- datarobot_genai/nat/helpers.py +87 -0
- datarobot_genai/py.typed +0 -0
- datarobot_genai-0.2.31.dist-info/METADATA +145 -0
- datarobot_genai-0.2.31.dist-info/RECORD +125 -0
- datarobot_genai-0.2.31.dist-info/WHEEL +4 -0
- datarobot_genai-0.2.31.dist-info/entry_points.txt +5 -0
- datarobot_genai-0.2.31.dist-info/licenses/AUTHORS +2 -0
- datarobot_genai-0.2.31.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2025 DataRobot, Inc.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
"""Interactive MCP Client Test Script.
|
|
18
|
+
|
|
19
|
+
This script allows you to test arbitrary commands with the MCP server
|
|
20
|
+
using an LLM agent that can decide which tools to call.
|
|
21
|
+
|
|
22
|
+
Supports elicitation - when tools require user input (like authentication tokens),
|
|
23
|
+
the script will prompt you interactively.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
import asyncio
|
|
27
|
+
import json
|
|
28
|
+
import os
|
|
29
|
+
import sys
|
|
30
|
+
import traceback
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import Any
|
|
33
|
+
|
|
34
|
+
from dotenv import load_dotenv
|
|
35
|
+
from mcp import ClientSession
|
|
36
|
+
from mcp.client.streamable_http import streamablehttp_client
|
|
37
|
+
from mcp.shared.context import RequestContext
|
|
38
|
+
from mcp.types import ElicitRequestParams
|
|
39
|
+
from mcp.types import ElicitResult
|
|
40
|
+
|
|
41
|
+
from datarobot_genai.drmcp import get_dr_mcp_server_url
|
|
42
|
+
from datarobot_genai.drmcp import get_headers
|
|
43
|
+
from datarobot_genai.drmcp.test_utils.clients.base import LLMResponse
|
|
44
|
+
from datarobot_genai.drmcp.test_utils.clients.base import ToolCall
|
|
45
|
+
from datarobot_genai.drmcp.test_utils.clients.dr_gateway import DRLLMGatewayMCPClient
|
|
46
|
+
|
|
47
|
+
# Re-export for backwards compatibility
|
|
48
|
+
__all__ = ["DRLLMGatewayMCPClient", "LLMResponse", "ToolCall", "test_mcp_interactive"]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
async def test_mcp_interactive() -> None:
|
|
52
|
+
"""Test the MCP server interactively with LLM agent."""
|
|
53
|
+
# Check for required environment variables
|
|
54
|
+
datarobot_api_token = os.environ.get("DATAROBOT_API_TOKEN")
|
|
55
|
+
if not datarobot_api_token:
|
|
56
|
+
print("❌ Error: DATAROBOT_API_TOKEN environment variable is required")
|
|
57
|
+
print("Please set it in your .env file or export it")
|
|
58
|
+
return
|
|
59
|
+
|
|
60
|
+
# Optional DataRobot settings
|
|
61
|
+
datarobot_endpoint = os.environ.get("DATAROBOT_ENDPOINT")
|
|
62
|
+
model = os.environ.get("MODEL")
|
|
63
|
+
|
|
64
|
+
print("🤖 Initializing LLM MCP Client...")
|
|
65
|
+
|
|
66
|
+
# Initialize the LLM client with elicitation handler
|
|
67
|
+
config = {
|
|
68
|
+
"datarobot_api_token": datarobot_api_token,
|
|
69
|
+
"save_llm_responses": False,
|
|
70
|
+
}
|
|
71
|
+
if datarobot_endpoint:
|
|
72
|
+
config["datarobot_endpoint"] = datarobot_endpoint
|
|
73
|
+
if model:
|
|
74
|
+
config["model"] = model
|
|
75
|
+
|
|
76
|
+
llm_client = DRLLMGatewayMCPClient(str(config))
|
|
77
|
+
|
|
78
|
+
# Get MCP server URL
|
|
79
|
+
mcp_server_url = get_dr_mcp_server_url()
|
|
80
|
+
if not mcp_server_url:
|
|
81
|
+
print("❌ Error: MCP server URL is not configured")
|
|
82
|
+
print("Please set DR_MCP_SERVER_URL environment variable or run: task test-interactive")
|
|
83
|
+
return
|
|
84
|
+
|
|
85
|
+
print(f"🔗 Connecting to MCP server at: {mcp_server_url}")
|
|
86
|
+
|
|
87
|
+
# Elicitation handler: prompt user for required values
|
|
88
|
+
async def elicitation_handler(
|
|
89
|
+
context: RequestContext[ClientSession, Any], params: ElicitRequestParams
|
|
90
|
+
) -> ElicitResult:
|
|
91
|
+
print(f"\n📋 Elicitation Request: {params.message}")
|
|
92
|
+
if params.requestedSchema:
|
|
93
|
+
print(f" Schema: {params.requestedSchema}")
|
|
94
|
+
|
|
95
|
+
while True:
|
|
96
|
+
try:
|
|
97
|
+
response = input(" Enter value (or 'decline'/'cancel'): ").strip()
|
|
98
|
+
except (EOFError, KeyboardInterrupt):
|
|
99
|
+
return ElicitResult(action="cancel")
|
|
100
|
+
|
|
101
|
+
if response.lower() == "decline":
|
|
102
|
+
return ElicitResult(action="decline")
|
|
103
|
+
if response.lower() == "cancel":
|
|
104
|
+
return ElicitResult(action="cancel")
|
|
105
|
+
if response:
|
|
106
|
+
return ElicitResult(action="accept", content={"value": response})
|
|
107
|
+
print(" Please enter a value or 'decline'/'cancel'")
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
async with streamablehttp_client(
|
|
111
|
+
url=mcp_server_url,
|
|
112
|
+
headers=get_headers(),
|
|
113
|
+
) as (read_stream, write_stream, _):
|
|
114
|
+
async with ClientSession(
|
|
115
|
+
read_stream,
|
|
116
|
+
write_stream,
|
|
117
|
+
elicitation_callback=elicitation_handler,
|
|
118
|
+
) as session:
|
|
119
|
+
await session.initialize()
|
|
120
|
+
|
|
121
|
+
print("✅ Connected to MCP server!")
|
|
122
|
+
print("📋 Available tools:")
|
|
123
|
+
|
|
124
|
+
tools_result = await session.list_tools()
|
|
125
|
+
for i, tool in enumerate(tools_result.tools, 1):
|
|
126
|
+
print(f" {i}. {tool.name}: {tool.description}")
|
|
127
|
+
|
|
128
|
+
print("\n" + "=" * 60)
|
|
129
|
+
print("🎯 Interactive Testing Mode")
|
|
130
|
+
print("=" * 60)
|
|
131
|
+
print("Type your questions/commands. The AI will decide which tools to use.")
|
|
132
|
+
print("If a tool requires additional information, you will be prompted.")
|
|
133
|
+
print("Type 'quit' or 'exit' to stop.")
|
|
134
|
+
print()
|
|
135
|
+
|
|
136
|
+
while True:
|
|
137
|
+
try:
|
|
138
|
+
user_input = input("🤔 You: ").strip()
|
|
139
|
+
|
|
140
|
+
if user_input.lower() in ["quit", "exit", "q"]:
|
|
141
|
+
print("👋 Goodbye!")
|
|
142
|
+
break
|
|
143
|
+
|
|
144
|
+
if not user_input:
|
|
145
|
+
continue
|
|
146
|
+
except (EOFError, KeyboardInterrupt):
|
|
147
|
+
print("\n👋 Goodbye!")
|
|
148
|
+
break
|
|
149
|
+
|
|
150
|
+
print("🤖 AI is thinking...")
|
|
151
|
+
|
|
152
|
+
response = await llm_client.process_prompt_with_mcp_support(
|
|
153
|
+
prompt=user_input,
|
|
154
|
+
mcp_session=session,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
print("\n🤖 AI Response:")
|
|
158
|
+
print("-" * 40)
|
|
159
|
+
print(response.content)
|
|
160
|
+
|
|
161
|
+
if response.tool_calls:
|
|
162
|
+
print("\n🔧 Tools Used:")
|
|
163
|
+
for i, tool_call in enumerate(response.tool_calls, 1):
|
|
164
|
+
print(f" {i}. {tool_call.tool_name}")
|
|
165
|
+
print(f" Parameters: {tool_call.parameters}")
|
|
166
|
+
print(f" Reasoning: {tool_call.reasoning}")
|
|
167
|
+
|
|
168
|
+
if i <= len(response.tool_results):
|
|
169
|
+
result = response.tool_results[i - 1]
|
|
170
|
+
try:
|
|
171
|
+
result_data = json.loads(result)
|
|
172
|
+
if result_data.get("status") == "error":
|
|
173
|
+
error_msg = result_data.get("error", "Unknown error")
|
|
174
|
+
print(f" ❌ Error: {error_msg}")
|
|
175
|
+
elif result_data.get("status") == "success":
|
|
176
|
+
print(" ✅ Success")
|
|
177
|
+
except json.JSONDecodeError:
|
|
178
|
+
if len(result) > 100:
|
|
179
|
+
print(f" Result: {result[:100]}...")
|
|
180
|
+
else:
|
|
181
|
+
print(f" Result: {result}")
|
|
182
|
+
|
|
183
|
+
print("\n" + "=" * 60)
|
|
184
|
+
except Exception as e:
|
|
185
|
+
print(f"❌ Connection Error: {e}")
|
|
186
|
+
print(f" Server URL: {mcp_server_url}")
|
|
187
|
+
traceback.print_exc()
|
|
188
|
+
return
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
if __name__ == "__main__":
|
|
192
|
+
# Ensure we're in the right directory
|
|
193
|
+
if not Path("src").exists():
|
|
194
|
+
print("❌ Error: Please run this script from the project root")
|
|
195
|
+
sys.exit(1)
|
|
196
|
+
|
|
197
|
+
# Load environment variables from .env file
|
|
198
|
+
print("📄 Loading environment variables...")
|
|
199
|
+
load_dotenv()
|
|
200
|
+
|
|
201
|
+
print("🚀 Starting Interactive MCP Client Test")
|
|
202
|
+
print("Make sure the MCP server is running with: task drmcp-dev")
|
|
203
|
+
print()
|
|
204
|
+
|
|
205
|
+
asyncio.run(test_mcp_interactive())
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
# Copyright 2025 DataRobot, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from pydantic import BaseModel
|
|
19
|
+
|
|
20
|
+
from .clients.base import LLMResponse
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ToolCallTestExpectations(BaseModel):
|
|
24
|
+
"""Class to store tool call information."""
|
|
25
|
+
|
|
26
|
+
name: str
|
|
27
|
+
parameters: dict[str, Any]
|
|
28
|
+
result: str | dict[str, Any]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ETETestExpectations(BaseModel):
|
|
32
|
+
"""Class to store test expectations for ETE tests."""
|
|
33
|
+
|
|
34
|
+
potential_no_tool_calls: bool = False
|
|
35
|
+
tool_calls_expected: list[ToolCallTestExpectations]
|
|
36
|
+
llm_response_content_contains_expectations: list[str]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
SHOULD_NOT_BE_EMPTY = "SHOULD_NOT_BE_EMPTY"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _extract_structured_content(tool_result: str) -> Any:
|
|
43
|
+
r"""
|
|
44
|
+
Extract and parse structured content from tool result string.
|
|
45
|
+
|
|
46
|
+
Tool results are formatted as:
|
|
47
|
+
"Content: {content}\nStructured content: {structured_content}"
|
|
48
|
+
|
|
49
|
+
Structured content can be:
|
|
50
|
+
1. A JSON object with a "result" key: {"result": "..."} or {"result": "{...}"}
|
|
51
|
+
2. A direct JSON object: {"key": "value", ...}
|
|
52
|
+
3. Empty or missing
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
tool_result: The tool result string
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
Parsed structured content, or None if not available
|
|
60
|
+
"""
|
|
61
|
+
# Early returns for invalid inputs
|
|
62
|
+
if not tool_result or "Structured content: " not in tool_result:
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
structured_part = tool_result.split("Structured content: ", 1)[1].strip()
|
|
66
|
+
# Parse JSON, return None on failure or empty structured_part
|
|
67
|
+
if not structured_part:
|
|
68
|
+
return None
|
|
69
|
+
try:
|
|
70
|
+
structured_data = json.loads(structured_part)
|
|
71
|
+
except json.JSONDecodeError:
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
# If structured data has a "result" key, extract and parse that
|
|
75
|
+
if isinstance(structured_data, dict) and "result" in structured_data:
|
|
76
|
+
result_value = structured_data["result"]
|
|
77
|
+
# If result is a JSON string (starts with { or [), try to parse it
|
|
78
|
+
if isinstance(result_value, str) and result_value.strip().startswith(("{", "[")):
|
|
79
|
+
try:
|
|
80
|
+
parsed_result = json.loads(result_value)
|
|
81
|
+
except json.JSONDecodeError:
|
|
82
|
+
parsed_result = result_value # Return string as-is if parsing fails
|
|
83
|
+
return parsed_result
|
|
84
|
+
return result_value # Return result value directly
|
|
85
|
+
|
|
86
|
+
# If it's a direct JSON object (not wrapped in {"result": ...}), return it as-is
|
|
87
|
+
return structured_data
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _check_dict_has_keys(
|
|
91
|
+
expected: dict[str, Any],
|
|
92
|
+
actual: dict[str, Any] | list[dict[str, Any]],
|
|
93
|
+
path: str = "",
|
|
94
|
+
) -> bool:
|
|
95
|
+
"""
|
|
96
|
+
Recursively check if all keys in expected dict exist in actual dict or in each item of
|
|
97
|
+
actual list.
|
|
98
|
+
Returns True if all expected keys exist, False otherwise.
|
|
99
|
+
"""
|
|
100
|
+
# If actual is a list, check each item against the expected structure
|
|
101
|
+
if isinstance(actual, list):
|
|
102
|
+
if not actual: # Empty list
|
|
103
|
+
return False
|
|
104
|
+
# Check first item against expected structure
|
|
105
|
+
return _check_dict_has_keys(expected, actual[0], path)
|
|
106
|
+
|
|
107
|
+
# Regular dict check
|
|
108
|
+
for key, value in expected.items():
|
|
109
|
+
current_path = f"{path}.{key}" if path else key
|
|
110
|
+
if key not in actual:
|
|
111
|
+
return False
|
|
112
|
+
if isinstance(value, dict):
|
|
113
|
+
if not isinstance(actual[key], dict):
|
|
114
|
+
return False
|
|
115
|
+
if not _check_dict_has_keys(value, actual[key], current_path):
|
|
116
|
+
return False
|
|
117
|
+
return True
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class ToolBaseE2E:
|
|
121
|
+
"""Base class for end-to-end tests."""
|
|
122
|
+
|
|
123
|
+
async def _run_test_with_expectations(
|
|
124
|
+
self,
|
|
125
|
+
prompt: str,
|
|
126
|
+
test_expectations: ETETestExpectations,
|
|
127
|
+
openai_llm_client: Any,
|
|
128
|
+
mcp_session: Any,
|
|
129
|
+
test_name: str,
|
|
130
|
+
) -> None:
|
|
131
|
+
"""
|
|
132
|
+
Run a test with given expectations and validate the results.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
prompt: The prompt to send to the LLM
|
|
136
|
+
test_expectations: ETETestExpectations object containing test expectations with keys:
|
|
137
|
+
- tool_calls_expected: List of expected tool calls with their parameters and results
|
|
138
|
+
- llm_response_content_contains_expectations: Expected content in the LLM response
|
|
139
|
+
openai_llm_client: The OpenAI LLM client
|
|
140
|
+
mcp_session: The test session
|
|
141
|
+
test_name: The name of the test (e.g. test_get_best_model_success)
|
|
142
|
+
"""
|
|
143
|
+
# Get the test file name from the class name
|
|
144
|
+
file_name = self.__class__.__name__.lower().replace("e2e", "").replace("test", "")
|
|
145
|
+
output_file_name = f"{file_name}_{test_name}"
|
|
146
|
+
|
|
147
|
+
# Act
|
|
148
|
+
response: LLMResponse = await openai_llm_client.process_prompt_with_mcp_support(
|
|
149
|
+
prompt, mcp_session, output_file_name
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# sometimes llm are too smart and doesn't call tools especially for the case when file
|
|
153
|
+
# doesn't exist
|
|
154
|
+
if test_expectations.potential_no_tool_calls and len(response.tool_calls) == 0:
|
|
155
|
+
pass
|
|
156
|
+
else:
|
|
157
|
+
# Verify LLM decided to use tools
|
|
158
|
+
assert len(response.tool_calls) == len(test_expectations.tool_calls_expected), (
|
|
159
|
+
"LLM should have decided to call tools"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
for i, tool_call in enumerate(response.tool_calls):
|
|
163
|
+
assert tool_call.tool_name == test_expectations.tool_calls_expected[i].name, (
|
|
164
|
+
f"Should have called {test_expectations.tool_calls_expected[i].name} tool, but "
|
|
165
|
+
f"got: {tool_call.tool_name}"
|
|
166
|
+
)
|
|
167
|
+
assert (
|
|
168
|
+
tool_call.parameters == test_expectations.tool_calls_expected[i].parameters
|
|
169
|
+
), (
|
|
170
|
+
f"Should have called {tool_call.tool_name} tool with the correct parameters, "
|
|
171
|
+
f"but got: {tool_call.parameters}"
|
|
172
|
+
)
|
|
173
|
+
if test_expectations.tool_calls_expected[i].result != SHOULD_NOT_BE_EMPTY:
|
|
174
|
+
expected_result = test_expectations.tool_calls_expected[i].result
|
|
175
|
+
if isinstance(expected_result, str):
|
|
176
|
+
assert expected_result in response.tool_results[i], (
|
|
177
|
+
f"Should have called {tool_call.tool_name} tool with the correct "
|
|
178
|
+
f"result, but got: {response.tool_results[i]}"
|
|
179
|
+
)
|
|
180
|
+
else:
|
|
181
|
+
actual_result = _extract_structured_content(response.tool_results[i])
|
|
182
|
+
if actual_result is None:
|
|
183
|
+
# Fallback: try to parse the entire tool result as JSON
|
|
184
|
+
try:
|
|
185
|
+
actual_result = json.loads(response.tool_results[i])
|
|
186
|
+
except json.JSONDecodeError:
|
|
187
|
+
# If that fails, try to extract content part
|
|
188
|
+
if "Content: " in response.tool_results[i]:
|
|
189
|
+
content_part = response.tool_results[i].split("Content: ", 1)[1]
|
|
190
|
+
if "\nStructured content: " in content_part:
|
|
191
|
+
content_part = content_part.split(
|
|
192
|
+
"\nStructured content: ", 1
|
|
193
|
+
)[0]
|
|
194
|
+
try:
|
|
195
|
+
actual_result = json.loads(content_part.strip())
|
|
196
|
+
except json.JSONDecodeError:
|
|
197
|
+
raise AssertionError(
|
|
198
|
+
f"Could not parse tool result for "
|
|
199
|
+
f"{tool_call.tool_name}: {response.tool_results[i]}"
|
|
200
|
+
)
|
|
201
|
+
assert _check_dict_has_keys(expected_result, actual_result), (
|
|
202
|
+
f"Should have called {tool_call.tool_name} tool with the correct "
|
|
203
|
+
f"result structure, but got: {response.tool_results[i]}"
|
|
204
|
+
)
|
|
205
|
+
else:
|
|
206
|
+
assert len(response.tool_results[i]) > 0, (
|
|
207
|
+
f"Should have called {tool_call.tool_name} tool with non-empty result, but "
|
|
208
|
+
f"got: {response.tool_results[i]}"
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
# Verify LLM provided comprehensive response
|
|
212
|
+
assert len(response.content) > 100, "LLM should provide detailed response"
|
|
213
|
+
assert any(
|
|
214
|
+
expected_response.lower() in response.content
|
|
215
|
+
for expected_response in test_expectations.llm_response_content_contains_expectations
|
|
216
|
+
), (
|
|
217
|
+
f"Response should mention "
|
|
218
|
+
f"{test_expectations.llm_response_content_contains_expectations}, "
|
|
219
|
+
f"but got: {response.content}"
|
|
220
|
+
)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# Copyright 2025 DataRobot, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import datetime
|
|
16
|
+
import json
|
|
17
|
+
import os
|
|
18
|
+
from typing import TYPE_CHECKING
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from .clients.base import LLMResponse
|
|
23
|
+
|
|
24
|
+
from dotenv import load_dotenv
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def load_env() -> None:
|
|
28
|
+
load_dotenv(dotenv_path=".env", verbose=True, override=True)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def format_tool_call(tool_call: dict[str, Any]) -> str:
|
|
32
|
+
"""Format a single tool call in a readable way."""
|
|
33
|
+
return (
|
|
34
|
+
f"Tool: {tool_call['tool_name']}\n"
|
|
35
|
+
f"Parameters: {json.dumps(tool_call['parameters'], indent=2)}\n"
|
|
36
|
+
f"Reasoning: {tool_call['reasoning']}"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def format_response(response: "LLMResponse") -> str:
|
|
41
|
+
"""Format the LLM response in a readable way."""
|
|
42
|
+
formatted_parts = []
|
|
43
|
+
|
|
44
|
+
# Format the main content
|
|
45
|
+
formatted_parts.append("=== LLM Response ===\n")
|
|
46
|
+
formatted_parts.append(response.content)
|
|
47
|
+
|
|
48
|
+
# Format tool calls if any
|
|
49
|
+
if response.tool_calls:
|
|
50
|
+
formatted_parts.append("\n=== Tools Used ===")
|
|
51
|
+
for i, tool_call in enumerate(response.tool_calls, 1):
|
|
52
|
+
formatted_parts.append(f"\nTool Call #{i}:")
|
|
53
|
+
formatted_parts.append(
|
|
54
|
+
format_tool_call(
|
|
55
|
+
{
|
|
56
|
+
"tool_name": tool_call.tool_name,
|
|
57
|
+
"parameters": tool_call.parameters,
|
|
58
|
+
"reasoning": tool_call.reasoning,
|
|
59
|
+
}
|
|
60
|
+
)
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Format tool results if any
|
|
64
|
+
if response.tool_results:
|
|
65
|
+
formatted_parts.append("\n=== Tool Results ===")
|
|
66
|
+
for i, result in enumerate(response.tool_results, 1):
|
|
67
|
+
formatted_parts.append(f"\nResult #{i}:")
|
|
68
|
+
formatted_parts.append(result)
|
|
69
|
+
|
|
70
|
+
return "\n".join(formatted_parts)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def save_response_to_file(response: "LLMResponse", name: str | None = None) -> None:
|
|
74
|
+
"""Save the response to a file in a readable format.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
response: The LLM response to save
|
|
78
|
+
name: Optional name to use in the filename. If not provided,
|
|
79
|
+
will use a timestamp only.
|
|
80
|
+
"""
|
|
81
|
+
# Create responses directory with timestamp
|
|
82
|
+
timestamp = datetime.datetime.now().strftime("%Y%m%d")
|
|
83
|
+
dir_path = "test_results/drmcp/.ete_responses/" + timestamp
|
|
84
|
+
os.makedirs(dir_path, exist_ok=True)
|
|
85
|
+
|
|
86
|
+
# Save both raw JSON and formatted text
|
|
87
|
+
base_name = f"{name}" if name else "response"
|
|
88
|
+
|
|
89
|
+
# Save formatted text
|
|
90
|
+
with open(f"{dir_path}/{base_name}.txt", "w") as f:
|
|
91
|
+
f.write(format_response(response))
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Copyright 2025 DataRobot, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Copyright 2025 DataRobot, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|