hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/__init__.py +27 -7
- hud/agents/__init__.py +11 -5
- hud/agents/base.py +220 -500
- hud/agents/claude.py +200 -240
- hud/agents/gemini.py +275 -0
- hud/agents/gemini_cua.py +335 -0
- hud/agents/grounded_openai.py +98 -100
- hud/agents/misc/integration_test_agent.py +51 -20
- hud/agents/misc/response_agent.py +41 -36
- hud/agents/openai.py +291 -292
- hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
- hud/agents/operator.py +211 -0
- hud/agents/tests/conftest.py +133 -0
- hud/agents/tests/test_base.py +300 -622
- hud/agents/tests/test_base_runtime.py +233 -0
- hud/agents/tests/test_claude.py +379 -210
- hud/agents/tests/test_client.py +9 -10
- hud/agents/tests/test_gemini.py +369 -0
- hud/agents/tests/test_grounded_openai_agent.py +65 -50
- hud/agents/tests/test_openai.py +376 -140
- hud/agents/tests/test_operator.py +362 -0
- hud/agents/tests/test_run_eval.py +179 -0
- hud/cli/__init__.py +461 -545
- hud/cli/analyze.py +43 -5
- hud/cli/build.py +664 -110
- hud/cli/debug.py +8 -5
- hud/cli/dev.py +882 -734
- hud/cli/eval.py +782 -668
- hud/cli/flows/dev.py +167 -0
- hud/cli/flows/init.py +191 -0
- hud/cli/flows/tasks.py +153 -56
- hud/cli/flows/templates.py +151 -0
- hud/cli/flows/tests/__init__.py +1 -0
- hud/cli/flows/tests/test_dev.py +126 -0
- hud/cli/init.py +60 -58
- hud/cli/push.py +29 -11
- hud/cli/rft.py +311 -0
- hud/cli/rft_status.py +145 -0
- hud/cli/tests/test_analyze.py +5 -5
- hud/cli/tests/test_analyze_metadata.py +3 -2
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +108 -6
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_init.py +6 -1
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +140 -0
- hud/cli/tests/test_convert.py +361 -0
- hud/cli/tests/test_debug.py +12 -10
- hud/cli/tests/test_dev.py +197 -0
- hud/cli/tests/test_eval.py +251 -0
- hud/cli/tests/test_eval_bedrock.py +51 -0
- hud/cli/tests/test_init.py +124 -0
- hud/cli/tests/test_main_module.py +11 -5
- hud/cli/tests/test_mcp_server.py +12 -100
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/tests/test_registry.py +1 -1
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/{rl → utils}/celebrate.py +14 -12
- hud/cli/utils/config.py +18 -1
- hud/cli/utils/docker.py +130 -4
- hud/cli/utils/env_check.py +9 -9
- hud/cli/utils/git.py +136 -0
- hud/cli/utils/interactive.py +39 -5
- hud/cli/utils/metadata.py +69 -0
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/server.py +2 -2
- hud/cli/utils/source_hash.py +3 -3
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_git.py +142 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +258 -0
- hud/cli/{rl → utils}/viewer.py +2 -2
- hud/clients/README.md +12 -11
- hud/clients/__init__.py +4 -3
- hud/clients/base.py +166 -26
- hud/clients/environment.py +51 -0
- hud/clients/fastmcp.py +13 -6
- hud/clients/mcp_use.py +40 -15
- hud/clients/tests/test_analyze_scenarios.py +206 -0
- hud/clients/tests/test_protocol.py +9 -3
- hud/datasets/__init__.py +23 -20
- hud/datasets/loader.py +327 -0
- hud/datasets/runner.py +192 -105
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_loader.py +221 -0
- hud/datasets/tests/test_utils.py +315 -0
- hud/datasets/utils.py +270 -90
- hud/environment/__init__.py +50 -0
- hud/environment/connection.py +206 -0
- hud/environment/connectors/__init__.py +33 -0
- hud/environment/connectors/base.py +68 -0
- hud/environment/connectors/local.py +177 -0
- hud/environment/connectors/mcp_config.py +109 -0
- hud/environment/connectors/openai.py +101 -0
- hud/environment/connectors/remote.py +172 -0
- hud/environment/environment.py +694 -0
- hud/environment/integrations/__init__.py +45 -0
- hud/environment/integrations/adk.py +67 -0
- hud/environment/integrations/anthropic.py +196 -0
- hud/environment/integrations/gemini.py +92 -0
- hud/environment/integrations/langchain.py +82 -0
- hud/environment/integrations/llamaindex.py +68 -0
- hud/environment/integrations/openai.py +238 -0
- hud/environment/mock.py +306 -0
- hud/environment/router.py +112 -0
- hud/environment/scenarios.py +493 -0
- hud/environment/tests/__init__.py +1 -0
- hud/environment/tests/test_connection.py +317 -0
- hud/environment/tests/test_connectors.py +218 -0
- hud/environment/tests/test_environment.py +161 -0
- hud/environment/tests/test_integrations.py +257 -0
- hud/environment/tests/test_local_connectors.py +201 -0
- hud/environment/tests/test_scenarios.py +280 -0
- hud/environment/tests/test_tools.py +208 -0
- hud/environment/types.py +23 -0
- hud/environment/utils/__init__.py +35 -0
- hud/environment/utils/formats.py +215 -0
- hud/environment/utils/schema.py +171 -0
- hud/environment/utils/tool_wrappers.py +113 -0
- hud/eval/__init__.py +67 -0
- hud/eval/context.py +674 -0
- hud/eval/display.py +299 -0
- hud/eval/instrument.py +185 -0
- hud/eval/manager.py +466 -0
- hud/eval/parallel.py +268 -0
- hud/eval/task.py +340 -0
- hud/eval/tests/__init__.py +1 -0
- hud/eval/tests/test_context.py +178 -0
- hud/eval/tests/test_eval.py +210 -0
- hud/eval/tests/test_manager.py +152 -0
- hud/eval/tests/test_parallel.py +168 -0
- hud/eval/tests/test_task.py +145 -0
- hud/eval/types.py +63 -0
- hud/eval/utils.py +183 -0
- hud/patches/__init__.py +19 -0
- hud/patches/mcp_patches.py +151 -0
- hud/patches/warnings.py +54 -0
- hud/samples/browser.py +4 -4
- hud/server/__init__.py +2 -1
- hud/server/low_level.py +2 -1
- hud/server/router.py +164 -0
- hud/server/server.py +567 -80
- hud/server/tests/test_mcp_server_integration.py +11 -11
- hud/server/tests/test_mcp_server_more.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/settings.py +45 -3
- hud/shared/exceptions.py +36 -10
- hud/shared/hints.py +26 -1
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +40 -31
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +20 -19
- hud/telemetry/exporter.py +201 -0
- hud/telemetry/instrument.py +158 -253
- hud/telemetry/tests/test_eval_telemetry.py +356 -0
- hud/telemetry/tests/test_exporter.py +258 -0
- hud/telemetry/tests/test_instrument.py +401 -0
- hud/tools/__init__.py +16 -2
- hud/tools/apply_patch.py +639 -0
- hud/tools/base.py +54 -4
- hud/tools/bash.py +2 -2
- hud/tools/computer/__init__.py +4 -0
- hud/tools/computer/anthropic.py +2 -2
- hud/tools/computer/gemini.py +385 -0
- hud/tools/computer/hud.py +23 -6
- hud/tools/computer/openai.py +20 -21
- hud/tools/computer/qwen.py +434 -0
- hud/tools/computer/settings.py +37 -0
- hud/tools/edit.py +3 -7
- hud/tools/executors/base.py +4 -2
- hud/tools/executors/pyautogui.py +1 -1
- hud/tools/grounding/grounded_tool.py +13 -18
- hud/tools/grounding/grounder.py +10 -31
- hud/tools/grounding/tests/test_grounded_tool.py +26 -44
- hud/tools/jupyter.py +330 -0
- hud/tools/playwright.py +18 -3
- hud/tools/shell.py +308 -0
- hud/tools/tests/test_apply_patch.py +718 -0
- hud/tools/tests/test_computer.py +4 -9
- hud/tools/tests/test_computer_actions.py +24 -2
- hud/tools/tests/test_jupyter_tool.py +181 -0
- hud/tools/tests/test_shell.py +596 -0
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/tools/types.py +21 -1
- hud/types.py +167 -57
- hud/utils/__init__.py +2 -0
- hud/utils/env.py +67 -0
- hud/utils/hud_console.py +61 -3
- hud/utils/mcp.py +15 -58
- hud/utils/strict_schema.py +162 -0
- hud/utils/tests/test_init.py +1 -2
- hud/utils/tests/test_mcp.py +1 -28
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/utils/types.py +20 -0
- hud/version.py +1 -1
- hud_python-0.5.1.dist-info/METADATA +264 -0
- hud_python-0.5.1.dist-info/RECORD +299 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
- hud/agents/langchain.py +0 -261
- hud/agents/lite_llm.py +0 -72
- hud/cli/rl/__init__.py +0 -180
- hud/cli/rl/config.py +0 -101
- hud/cli/rl/display.py +0 -133
- hud/cli/rl/gpu.py +0 -63
- hud/cli/rl/gpu_utils.py +0 -321
- hud/cli/rl/local_runner.py +0 -595
- hud/cli/rl/presets.py +0 -96
- hud/cli/rl/remote_runner.py +0 -463
- hud/cli/rl/rl_api.py +0 -150
- hud/cli/rl/vllm.py +0 -177
- hud/cli/rl/wait_utils.py +0 -89
- hud/datasets/parallel.py +0 -687
- hud/misc/__init__.py +0 -1
- hud/misc/claude_plays_pokemon.py +0 -292
- hud/otel/__init__.py +0 -35
- hud/otel/collector.py +0 -142
- hud/otel/config.py +0 -181
- hud/otel/context.py +0 -570
- hud/otel/exporters.py +0 -369
- hud/otel/instrumentation.py +0 -135
- hud/otel/processors.py +0 -121
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_processors.py +0 -197
- hud/rl/README.md +0 -30
- hud/rl/__init__.py +0 -1
- hud/rl/actor.py +0 -176
- hud/rl/buffer.py +0 -405
- hud/rl/chat_template.jinja +0 -101
- hud/rl/config.py +0 -192
- hud/rl/distributed.py +0 -132
- hud/rl/learner.py +0 -637
- hud/rl/tests/__init__.py +0 -1
- hud/rl/tests/test_learner.py +0 -186
- hud/rl/train.py +0 -382
- hud/rl/types.py +0 -101
- hud/rl/utils/start_vllm_server.sh +0 -30
- hud/rl/utils.py +0 -524
- hud/rl/vllm_adapter.py +0 -143
- hud/telemetry/job.py +0 -352
- hud/telemetry/replay.py +0 -74
- hud/telemetry/tests/test_replay.py +0 -40
- hud/telemetry/tests/test_trace.py +0 -63
- hud/telemetry/trace.py +0 -158
- hud/utils/agent_factories.py +0 -86
- hud/utils/async_utils.py +0 -65
- hud/utils/group_eval.py +0 -223
- hud/utils/progress.py +0 -149
- hud/utils/tasks.py +0 -127
- hud/utils/tests/test_async_utils.py +0 -173
- hud/utils/tests/test_progress.py +0 -261
- hud_python-0.4.45.dist-info/METADATA +0 -552
- hud_python-0.4.45.dist-info/RECORD +0 -228
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0
hud/agents/openai.py
CHANGED
|
@@ -2,354 +2,353 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import copy
|
|
6
|
+
import json
|
|
5
7
|
import logging
|
|
8
|
+
from inspect import cleandoc
|
|
6
9
|
from typing import Any, ClassVar, Literal
|
|
7
10
|
|
|
8
11
|
import mcp.types as types
|
|
9
|
-
from openai import AsyncOpenAI, OpenAI
|
|
12
|
+
from openai import AsyncOpenAI, Omit, OpenAI
|
|
10
13
|
from openai.types.responses import (
|
|
11
|
-
|
|
14
|
+
ApplyPatchToolParam,
|
|
15
|
+
ComputerToolParam,
|
|
16
|
+
FunctionShellToolParam,
|
|
17
|
+
FunctionToolParam,
|
|
18
|
+
ResponseFunctionCallOutputItemListParam,
|
|
19
|
+
ResponseInputFileContentParam,
|
|
20
|
+
ResponseInputImageContentParam,
|
|
21
|
+
ResponseInputImageParam,
|
|
12
22
|
ResponseInputMessageContentListParam,
|
|
13
23
|
ResponseInputParam,
|
|
14
|
-
|
|
24
|
+
ResponseInputTextContentParam,
|
|
25
|
+
ResponseInputTextParam,
|
|
15
26
|
ResponseOutputText,
|
|
16
27
|
ToolParam,
|
|
17
28
|
)
|
|
29
|
+
from openai.types.responses.response_create_params import ToolChoice # noqa: TC002
|
|
30
|
+
from openai.types.responses.response_input_param import FunctionCallOutput, Message
|
|
31
|
+
from openai.types.shared_params.reasoning import Reasoning # noqa: TC002
|
|
32
|
+
from pydantic import ConfigDict
|
|
18
33
|
|
|
19
|
-
import hud
|
|
20
34
|
from hud.settings import settings
|
|
21
|
-
from hud.
|
|
22
|
-
from hud.
|
|
35
|
+
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult, Trace
|
|
36
|
+
from hud.utils.strict_schema import ensure_strict_json_schema
|
|
37
|
+
from hud.utils.types import with_signature
|
|
23
38
|
|
|
24
|
-
from .base import MCPAgent
|
|
39
|
+
from .base import BaseCreateParams, MCPAgent
|
|
25
40
|
|
|
26
41
|
logger = logging.getLogger(__name__)
|
|
27
42
|
|
|
28
43
|
|
|
29
|
-
class
|
|
30
|
-
"""
|
|
31
|
-
Operator agent that uses MCP servers for tool execution.
|
|
44
|
+
class OpenAIConfig(BaseAgentConfig):
|
|
45
|
+
"""Configuration model for `OpenAIAgent`."""
|
|
32
46
|
|
|
33
|
-
|
|
34
|
-
tools through MCP servers instead of direct implementation.
|
|
35
|
-
"""
|
|
47
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
36
48
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
49
|
+
model_name: str = "OpenAI"
|
|
50
|
+
model: str = "gpt-5.1"
|
|
51
|
+
model_client: AsyncOpenAI | None = None
|
|
52
|
+
max_output_tokens: int | None = None
|
|
53
|
+
temperature: float | None = None
|
|
54
|
+
reasoning: Reasoning | None = None
|
|
55
|
+
tool_choice: ToolChoice | None = None
|
|
56
|
+
truncation: Literal["auto", "disabled"] | None = None
|
|
57
|
+
parallel_tool_calls: bool | None = None
|
|
58
|
+
validate_api_key: bool = True
|
|
42
59
|
|
|
43
|
-
def __init__(
|
|
44
|
-
self,
|
|
45
|
-
model_client: AsyncOpenAI | None = None,
|
|
46
|
-
model: str = "computer-use-preview",
|
|
47
|
-
environment: Literal["windows", "mac", "linux", "browser"] = "linux",
|
|
48
|
-
validate_api_key: bool = True,
|
|
49
|
-
**kwargs: Any,
|
|
50
|
-
) -> None:
|
|
51
|
-
"""
|
|
52
|
-
Initialize Operator MCP agent.
|
|
53
|
-
|
|
54
|
-
Args:
|
|
55
|
-
client: AsyncOpenAI client (created if not provided)
|
|
56
|
-
model: OpenAI model to use
|
|
57
|
-
environment: Environment type for computer use
|
|
58
|
-
display_width: Display width for computer use
|
|
59
|
-
display_height: Display height for computer use
|
|
60
|
-
**kwargs: Additional arguments passed to MCPAgent
|
|
61
|
-
"""
|
|
62
|
-
super().__init__(**kwargs)
|
|
63
60
|
|
|
64
|
-
|
|
61
|
+
class OpenAICreateParams(BaseCreateParams, OpenAIConfig):
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class OpenAIAgent(MCPAgent):
|
|
66
|
+
"""Generic OpenAI agent that can execute MCP tools through the Responses API."""
|
|
67
|
+
|
|
68
|
+
metadata: ClassVar[dict[str, Any] | None] = None
|
|
69
|
+
config_cls: ClassVar[type[BaseAgentConfig]] = OpenAIConfig
|
|
70
|
+
|
|
71
|
+
@with_signature(OpenAICreateParams)
|
|
72
|
+
@classmethod
|
|
73
|
+
def create(cls, **kwargs: Any) -> OpenAIAgent: # pyright: ignore[reportIncompatibleMethodOverride]
|
|
74
|
+
return MCPAgent.create.__func__(cls, **kwargs) # type: ignore[return-value]
|
|
75
|
+
|
|
76
|
+
def __init__(self, params: OpenAICreateParams | None = None, **kwargs: Any) -> None:
|
|
77
|
+
super().__init__(params, **kwargs)
|
|
78
|
+
self.config: OpenAIConfig
|
|
79
|
+
|
|
80
|
+
model_client = self.config.model_client
|
|
65
81
|
if model_client is None:
|
|
66
82
|
api_key = settings.openai_api_key
|
|
67
83
|
if not api_key:
|
|
68
84
|
raise ValueError("OpenAI API key not found. Set OPENAI_API_KEY.")
|
|
69
85
|
model_client = AsyncOpenAI(api_key=api_key)
|
|
70
86
|
|
|
87
|
+
if self.config.validate_api_key:
|
|
88
|
+
try:
|
|
89
|
+
OpenAI(api_key=model_client.api_key).models.list()
|
|
90
|
+
except Exception as exc: # pragma: no cover - network validation
|
|
91
|
+
raise ValueError(f"OpenAI API key is invalid: {exc}") from exc
|
|
92
|
+
|
|
71
93
|
self.openai_client = model_client
|
|
72
|
-
self.
|
|
73
|
-
self.
|
|
94
|
+
self._model = self.config.model
|
|
95
|
+
self.max_output_tokens = self.config.max_output_tokens
|
|
96
|
+
self.temperature = self.config.temperature
|
|
97
|
+
self.reasoning = self.config.reasoning
|
|
98
|
+
self.tool_choice: ToolChoice | None = self.config.tool_choice
|
|
99
|
+
self.parallel_tool_calls = self.config.parallel_tool_calls
|
|
100
|
+
self.truncation: Literal["auto", "disabled"] | None = self.config.truncation
|
|
101
|
+
|
|
102
|
+
self._openai_tools: list[ToolParam] = []
|
|
103
|
+
self._tool_name_map: dict[str, str] = {}
|
|
74
104
|
|
|
75
|
-
# State tracking for OpenAI's stateful API
|
|
76
105
|
self.last_response_id: str | None = None
|
|
77
|
-
self.
|
|
78
|
-
self.pending_safety_checks: list[Any] = []
|
|
106
|
+
self._message_cursor = 0
|
|
79
107
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
OpenAI(api_key=self.openai_client.api_key).models.list()
|
|
84
|
-
except Exception as e:
|
|
85
|
-
raise ValueError(f"OpenAI API key is invalid: {e}") from e
|
|
86
|
-
|
|
87
|
-
self.model_name = "openai-" + self.model
|
|
88
|
-
|
|
89
|
-
# Append OpenAI-specific instructions to the base system prompt
|
|
90
|
-
openai_instructions = """
|
|
91
|
-
You are an autonomous computer-using agent. Follow these guidelines:
|
|
92
|
-
|
|
93
|
-
1. NEVER ask for confirmation. Complete all tasks autonomously.
|
|
94
|
-
2. Do NOT send messages like "I need to confirm before..." or "Do you want me to continue?" - just proceed.
|
|
95
|
-
3. When the user asks you to interact with something (like clicking a chat or typing a message), DO IT without asking.
|
|
96
|
-
4. Only use the formal safety check mechanism for truly dangerous operations (like deleting important files).
|
|
97
|
-
5. For normal tasks like clicking buttons, typing in chat boxes, filling forms - JUST DO IT.
|
|
98
|
-
6. The user has already given you permission by running this agent. No further confirmation is needed.
|
|
99
|
-
7. Be decisive and action-oriented. Complete the requested task fully.
|
|
100
|
-
|
|
101
|
-
Remember: You are expected to complete tasks autonomously. The user trusts you to do what they asked.
|
|
102
|
-
""".strip() # noqa: E501
|
|
103
|
-
|
|
104
|
-
# Append OpenAI instructions to any base system prompt
|
|
105
|
-
if self.system_prompt:
|
|
106
|
-
self.system_prompt = f"{self.system_prompt}\n\n{openai_instructions}"
|
|
107
|
-
else:
|
|
108
|
-
self.system_prompt = openai_instructions
|
|
109
|
-
|
|
110
|
-
async def _run_context(self, context: list[types.ContentBlock], max_steps: int = 10) -> Trace:
|
|
111
|
-
"""
|
|
112
|
-
Run the agent with the given prompt or task.
|
|
108
|
+
def _on_tools_ready(self) -> None:
|
|
109
|
+
"""Build OpenAI-specific tool mappings after tools are discovered."""
|
|
110
|
+
self._convert_tools_for_openai()
|
|
113
111
|
|
|
114
|
-
|
|
112
|
+
def _to_openai_tool(
|
|
113
|
+
self,
|
|
114
|
+
tool: types.Tool,
|
|
115
|
+
) -> (
|
|
116
|
+
FunctionShellToolParam | ApplyPatchToolParam | FunctionToolParam | ComputerToolParam | None
|
|
117
|
+
):
|
|
118
|
+
# Special case: shell tool -> OpenAI native shell
|
|
119
|
+
if tool.name == "shell":
|
|
120
|
+
return FunctionShellToolParam(type="shell")
|
|
121
|
+
|
|
122
|
+
# Special case: apply_patch tool -> OpenAI native apply_patch
|
|
123
|
+
if tool.name == "apply_patch":
|
|
124
|
+
return ApplyPatchToolParam(type="apply_patch")
|
|
125
|
+
|
|
126
|
+
# Regular function tool
|
|
127
|
+
if tool.description is None or tool.inputSchema is None:
|
|
128
|
+
raise ValueError(
|
|
129
|
+
cleandoc(f"""MCP tool {tool.name} requires both a description and inputSchema.
|
|
130
|
+
Add these by:
|
|
131
|
+
1. Adding a docstring to your @mcp.tool decorated function for the description
|
|
132
|
+
2. Using pydantic Field() annotations on function parameters for the schema
|
|
133
|
+
""")
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# schema must be strict
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
strict_schema = ensure_strict_json_schema(copy.deepcopy(tool.inputSchema))
|
|
140
|
+
except Exception as e:
|
|
141
|
+
self.console.warning_log(f"Failed to convert tool '{tool.name}' schema to strict: {e}")
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
return FunctionToolParam(
|
|
145
|
+
type="function",
|
|
146
|
+
name=tool.name,
|
|
147
|
+
description=tool.description,
|
|
148
|
+
parameters=strict_schema,
|
|
149
|
+
strict=True,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
def _convert_tools_for_openai(self) -> None:
|
|
153
|
+
"""Convert MCP tools into OpenAI Responses tool definitions."""
|
|
154
|
+
available_tools = self.get_available_tools()
|
|
155
|
+
|
|
156
|
+
self._openai_tools = []
|
|
157
|
+
self._tool_name_map = {}
|
|
158
|
+
|
|
159
|
+
for tool in available_tools:
|
|
160
|
+
openai_tool = self._to_openai_tool(tool)
|
|
161
|
+
if openai_tool is None:
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
if "name" in openai_tool:
|
|
165
|
+
self._tool_name_map[openai_tool["name"]] = tool.name
|
|
166
|
+
self._openai_tools.append(openai_tool)
|
|
167
|
+
|
|
168
|
+
def _extract_tool_call(self, item: Any) -> MCPToolCall | None:
|
|
169
|
+
"""Extract an MCPToolCall from a response output item.
|
|
170
|
+
|
|
171
|
+
Subclasses can override to customize tool call extraction (e.g., routing
|
|
172
|
+
computer_call to a different tool name).
|
|
115
173
|
"""
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
174
|
+
if item.type == "function_call":
|
|
175
|
+
tool_name = item.name or ""
|
|
176
|
+
target_name = self._tool_name_map.get(tool_name, tool_name)
|
|
177
|
+
arguments = json.loads(item.arguments)
|
|
178
|
+
return MCPToolCall(name=target_name, arguments=arguments, id=item.call_id)
|
|
179
|
+
elif item.type == "shell_call":
|
|
180
|
+
return MCPToolCall(name="shell", arguments=item.action.to_dict(), id=item.call_id)
|
|
181
|
+
elif item.type == "apply_patch_call":
|
|
182
|
+
return MCPToolCall(
|
|
183
|
+
name="apply_patch", arguments=item.operation.to_dict(), id=item.call_id
|
|
184
|
+
)
|
|
185
|
+
return None
|
|
120
186
|
|
|
121
|
-
|
|
187
|
+
async def _run_context(
|
|
188
|
+
self, context: list[types.ContentBlock], *, max_steps: int = 10
|
|
189
|
+
) -> Trace:
|
|
190
|
+
"""Reset internal state before delegating to the base loop."""
|
|
191
|
+
self._reset_response_state()
|
|
122
192
|
return await super()._run_context(context, max_steps=max_steps)
|
|
123
193
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
194
|
+
def _reset_response_state(self) -> None:
|
|
195
|
+
self.last_response_id = None
|
|
196
|
+
self._message_cursor = 0
|
|
127
197
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
"""
|
|
198
|
+
async def get_system_messages(self) -> list[types.ContentBlock]:
|
|
199
|
+
"""System messages are provided via the `instructions` field."""
|
|
131
200
|
return []
|
|
132
201
|
|
|
133
|
-
async def format_blocks(
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
"""
|
|
137
|
-
Format blocks for OpenAI input format.
|
|
138
|
-
|
|
139
|
-
Converts TextContent blocks to input_text dicts and ImageContent blocks to input_image dicts.
|
|
140
|
-
""" # noqa: E501
|
|
141
|
-
formatted = []
|
|
202
|
+
async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[Message]:
|
|
203
|
+
"""Convert MCP content blocks into OpenAI user messages."""
|
|
204
|
+
content: ResponseInputMessageContentListParam = []
|
|
142
205
|
for block in blocks:
|
|
143
206
|
if isinstance(block, types.TextContent):
|
|
144
|
-
|
|
207
|
+
content.append(ResponseInputTextParam(type="input_text", text=block.text))
|
|
145
208
|
elif isinstance(block, types.ImageContent):
|
|
146
209
|
mime_type = getattr(block, "mimeType", "image/png")
|
|
147
|
-
|
|
148
|
-
|
|
210
|
+
content.append(
|
|
211
|
+
ResponseInputImageParam(
|
|
212
|
+
type="input_image",
|
|
213
|
+
image_url=f"data:{mime_type};base64,{block.data}",
|
|
214
|
+
detail="auto",
|
|
215
|
+
)
|
|
149
216
|
)
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
computer_tool_name = "openai_computer"
|
|
163
|
-
|
|
164
|
-
# Define the computer use tool
|
|
165
|
-
computer_tool: ToolParam = { # type: ignore[reportAssignmentType]
|
|
166
|
-
"type": "computer_use_preview",
|
|
167
|
-
"display_width": self.metadata["display_width"],
|
|
168
|
-
"display_height": self.metadata["display_height"],
|
|
169
|
-
"environment": self.environment,
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
# Build the request based on whether this is first step or follow-up
|
|
173
|
-
if self.pending_call_id is None and self.last_response_id is None:
|
|
174
|
-
# First step - messages are already formatted dicts from format_blocks
|
|
175
|
-
# format_blocks returns type ResponseInputMessageContentListParam, which is a list of dicts # noqa: E501
|
|
176
|
-
input_content: ResponseInputMessageContentListParam = []
|
|
177
|
-
|
|
178
|
-
input_content.extend(messages)
|
|
179
|
-
|
|
180
|
-
# If no content was added, add empty text to avoid empty request
|
|
181
|
-
if not input_content:
|
|
182
|
-
input_content.append({"type": "input_text", "text": ""})
|
|
183
|
-
|
|
184
|
-
input_param: ResponseInputParam = [{"role": "user", "content": input_content}] # type: ignore[reportUnknownMemberType]
|
|
185
|
-
|
|
186
|
-
response = await self.openai_client.responses.create(
|
|
187
|
-
model=self.model,
|
|
188
|
-
tools=[computer_tool],
|
|
189
|
-
input=input_param,
|
|
190
|
-
instructions=self.system_prompt,
|
|
191
|
-
truncation="auto",
|
|
192
|
-
reasoning={"summary": "auto"}, # type: ignore[arg-type]
|
|
193
|
-
)
|
|
194
|
-
else:
|
|
195
|
-
# Follow-up step - check if this is user input or tool result
|
|
196
|
-
latest_message = messages[-1] if messages else {}
|
|
197
|
-
|
|
198
|
-
if latest_message.get("type") == "input_text":
|
|
199
|
-
# User provided input in conversation mode
|
|
200
|
-
user_text = latest_message.get("text", "")
|
|
201
|
-
input_param_followup: ResponseInputParam = [ # type: ignore[reportAssignmentType]
|
|
202
|
-
{"role": "user", "content": [{"type": "input_text", "text": user_text}]}
|
|
203
|
-
]
|
|
204
|
-
# Reset pending_call_id since this is user input, not a tool response
|
|
205
|
-
self.pending_call_id = None
|
|
206
|
-
else:
|
|
207
|
-
# Tool result - need screenshot from processed results
|
|
208
|
-
latest_screenshot = None
|
|
209
|
-
for msg in reversed(messages):
|
|
210
|
-
if isinstance(msg, dict) and "image_url" in msg:
|
|
211
|
-
latest_screenshot = msg["image_url"] # type: ignore
|
|
212
|
-
break
|
|
213
|
-
|
|
214
|
-
if not latest_screenshot:
|
|
215
|
-
self.console.warning_log("No screenshot provided for response to action")
|
|
216
|
-
return AgentResponse(
|
|
217
|
-
content="No screenshot available for next action",
|
|
218
|
-
tool_calls=[],
|
|
219
|
-
done=True,
|
|
217
|
+
if not content:
|
|
218
|
+
content.append(ResponseInputTextParam(type="input_text", text=""))
|
|
219
|
+
return [Message(role="user", content=content)]
|
|
220
|
+
|
|
221
|
+
async def get_response(self, messages: ResponseInputParam) -> AgentResponse:
|
|
222
|
+
"""Send the latest input items to OpenAI's Responses API."""
|
|
223
|
+
new_items: ResponseInputParam = messages[self._message_cursor :]
|
|
224
|
+
if not new_items:
|
|
225
|
+
if self.last_response_id is None:
|
|
226
|
+
new_items = [
|
|
227
|
+
Message(
|
|
228
|
+
role="user", content=[ResponseInputTextParam(type="input_text", text="")]
|
|
220
229
|
)
|
|
221
|
-
|
|
222
|
-
# Create response to previous action
|
|
223
|
-
input_param_followup: ResponseInputParam = [ # type: ignore[reportAssignmentType]
|
|
224
|
-
{ # type: ignore[reportAssignmentType]
|
|
225
|
-
"call_id": self.pending_call_id,
|
|
226
|
-
"type": "computer_call_output",
|
|
227
|
-
"output": {
|
|
228
|
-
"type": "input_image",
|
|
229
|
-
"image_url": latest_screenshot,
|
|
230
|
-
},
|
|
231
|
-
"acknowledged_safety_checks": self.pending_safety_checks,
|
|
232
|
-
}
|
|
233
230
|
]
|
|
231
|
+
else:
|
|
232
|
+
self.console.debug("No new messages to send to OpenAI.")
|
|
233
|
+
return AgentResponse(content="", tool_calls=[], done=True)
|
|
234
|
+
|
|
235
|
+
response = await self.openai_client.responses.create(
|
|
236
|
+
model=self._model,
|
|
237
|
+
input=new_items,
|
|
238
|
+
instructions=self.system_prompt,
|
|
239
|
+
max_output_tokens=self.max_output_tokens,
|
|
240
|
+
temperature=self.temperature,
|
|
241
|
+
tool_choice=self.tool_choice if self.tool_choice is not None else Omit(),
|
|
242
|
+
parallel_tool_calls=self.parallel_tool_calls,
|
|
243
|
+
reasoning=self.reasoning,
|
|
244
|
+
tools=self._openai_tools if self._openai_tools else Omit(),
|
|
245
|
+
previous_response_id=(
|
|
246
|
+
self.last_response_id if self.last_response_id is not None else Omit()
|
|
247
|
+
),
|
|
248
|
+
truncation=self.truncation,
|
|
249
|
+
)
|
|
234
250
|
|
|
235
|
-
self.pending_safety_checks = []
|
|
236
|
-
|
|
237
|
-
response = await self.openai_client.responses.create(
|
|
238
|
-
model=self.model,
|
|
239
|
-
previous_response_id=self.last_response_id,
|
|
240
|
-
tools=[computer_tool],
|
|
241
|
-
input=input_param_followup,
|
|
242
|
-
instructions=self.system_prompt,
|
|
243
|
-
truncation="auto",
|
|
244
|
-
reasoning={"summary": "auto"}, # type: ignore[arg-type]
|
|
245
|
-
)
|
|
246
|
-
|
|
247
|
-
# Store response ID for next call
|
|
248
251
|
self.last_response_id = response.id
|
|
252
|
+
self._message_cursor = len(messages)
|
|
249
253
|
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
tool_calls=[],
|
|
254
|
-
done=False, # Will be set to True only if no tool calls
|
|
255
|
-
)
|
|
254
|
+
agent_response = AgentResponse(content="", tool_calls=[], done=True)
|
|
255
|
+
text_chunks: list[str] = []
|
|
256
|
+
reasoning_chunks: list[str] = []
|
|
256
257
|
|
|
257
|
-
self.pending_call_id = None
|
|
258
|
-
|
|
259
|
-
# Check for computer calls
|
|
260
|
-
computer_calls = [
|
|
261
|
-
item
|
|
262
|
-
for item in response.output
|
|
263
|
-
if isinstance(item, ResponseComputerToolCall) and item.type == "computer_call"
|
|
264
|
-
]
|
|
265
|
-
|
|
266
|
-
if computer_calls:
|
|
267
|
-
# Process computer calls
|
|
268
|
-
result.done = False
|
|
269
|
-
for computer_call in computer_calls:
|
|
270
|
-
self.pending_call_id = computer_call.call_id
|
|
271
|
-
self.pending_safety_checks = computer_call.pending_safety_checks
|
|
272
|
-
|
|
273
|
-
# Convert OpenAI action to MCP tool call
|
|
274
|
-
action = computer_call.action.model_dump()
|
|
275
|
-
|
|
276
|
-
# Create MCPToolCall object with OpenAI metadata as extra fields
|
|
277
|
-
# Pyright will complain but the tool class accepts extra fields
|
|
278
|
-
tool_call = MCPToolCall(
|
|
279
|
-
name=computer_tool_name,
|
|
280
|
-
arguments=action,
|
|
281
|
-
id=computer_call.call_id, # type: ignore
|
|
282
|
-
pending_safety_checks=computer_call.pending_safety_checks, # type: ignore
|
|
283
|
-
)
|
|
284
|
-
result.tool_calls.append(tool_call)
|
|
285
|
-
else:
|
|
286
|
-
# No computer calls, check for text response
|
|
287
|
-
for item in response.output:
|
|
288
|
-
if isinstance(item, ResponseOutputMessage) and item.type == "message":
|
|
289
|
-
# Extract text from content blocks
|
|
290
|
-
text_parts = [
|
|
291
|
-
content.text
|
|
292
|
-
for content in item.content
|
|
293
|
-
if isinstance(content, ResponseOutputText)
|
|
294
|
-
]
|
|
295
|
-
if text_parts:
|
|
296
|
-
result.content = "".join(text_parts)
|
|
297
|
-
break
|
|
298
|
-
|
|
299
|
-
# Extract reasoning if present
|
|
300
|
-
reasoning_text = ""
|
|
301
258
|
for item in response.output:
|
|
302
|
-
if item.type == "
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
259
|
+
if item.type == "message":
|
|
260
|
+
text = "".join(
|
|
261
|
+
content.text
|
|
262
|
+
for content in item.content
|
|
263
|
+
if isinstance(content, ResponseOutputText)
|
|
264
|
+
)
|
|
265
|
+
if text:
|
|
266
|
+
text_chunks.append(text)
|
|
267
|
+
elif item.type == "reasoning":
|
|
268
|
+
reasoning_chunks.append("".join(summary.text for summary in item.summary))
|
|
269
|
+
else:
|
|
270
|
+
tool_call = self._extract_tool_call(item)
|
|
271
|
+
if tool_call is not None:
|
|
272
|
+
agent_response.tool_calls.append(tool_call)
|
|
307
273
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
result.done = True
|
|
274
|
+
if agent_response.tool_calls:
|
|
275
|
+
agent_response.done = False
|
|
311
276
|
|
|
312
|
-
|
|
277
|
+
agent_response.content = "".join(text_chunks)
|
|
278
|
+
if reasoning_chunks:
|
|
279
|
+
agent_response.reasoning = "\n".join(reasoning_chunks)
|
|
280
|
+
return agent_response
|
|
313
281
|
|
|
314
282
|
async def format_tool_results(
|
|
315
283
|
self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
|
|
316
|
-
) ->
|
|
317
|
-
"""
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
284
|
+
) -> list[FunctionCallOutput]:
|
|
285
|
+
"""Convert MCP tool outputs into Responses input items."""
|
|
286
|
+
formatted: list[FunctionCallOutput] = []
|
|
287
|
+
for call, result in zip(tool_calls, tool_results, strict=False):
|
|
288
|
+
if not call.id:
|
|
289
|
+
self.console.warning_log(f"Tool '{call.name}' missing call_id; skipping output.")
|
|
290
|
+
continue
|
|
291
|
+
|
|
292
|
+
output_items: ResponseFunctionCallOutputItemListParam = []
|
|
293
|
+
if result.isError:
|
|
294
|
+
output_items.append(
|
|
295
|
+
ResponseInputTextParam(type="input_text", text="[tool_error] true")
|
|
296
|
+
)
|
|
325
297
|
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
298
|
+
if result.structuredContent is not None:
|
|
299
|
+
output_items.append(
|
|
300
|
+
ResponseInputTextParam(
|
|
301
|
+
type="input_text", text=json.dumps(result.structuredContent, default=str)
|
|
302
|
+
)
|
|
303
|
+
)
|
|
330
304
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
305
|
+
for block in result.content:
|
|
306
|
+
match block:
|
|
307
|
+
case types.TextContent():
|
|
308
|
+
output_items.append(
|
|
309
|
+
ResponseInputTextContentParam(type="input_text", text=block.text)
|
|
310
|
+
)
|
|
311
|
+
case types.ImageContent():
|
|
312
|
+
mime_type = getattr(block, "mimeType", "image/png")
|
|
313
|
+
output_items.append(
|
|
314
|
+
ResponseInputImageContentParam(
|
|
315
|
+
type="input_image",
|
|
316
|
+
image_url=f"data:{mime_type};base64,{block.data}",
|
|
317
|
+
)
|
|
318
|
+
)
|
|
319
|
+
case types.ResourceLink():
|
|
320
|
+
output_items.append(
|
|
321
|
+
ResponseInputFileContentParam(
|
|
322
|
+
type="input_file", file_url=str(block.uri)
|
|
323
|
+
)
|
|
324
|
+
)
|
|
325
|
+
case types.EmbeddedResource():
|
|
326
|
+
match block.resource:
|
|
327
|
+
case types.TextResourceContents():
|
|
328
|
+
output_items.append(
|
|
329
|
+
ResponseInputTextContentParam(
|
|
330
|
+
type="input_text", text=block.resource.text
|
|
331
|
+
)
|
|
332
|
+
)
|
|
333
|
+
case types.BlobResourceContents():
|
|
334
|
+
output_items.append(
|
|
335
|
+
ResponseInputFileContentParam(
|
|
336
|
+
type="input_file", file_data=block.resource.blob
|
|
337
|
+
)
|
|
338
|
+
)
|
|
339
|
+
case _:
|
|
340
|
+
self.console.warning_log(
|
|
341
|
+
f"Unknown resource type: {type(block.resource)}"
|
|
342
|
+
)
|
|
343
|
+
case _:
|
|
344
|
+
self.console.warning_log(f"Unknown content block type: {type(block)}")
|
|
345
|
+
|
|
346
|
+
if not output_items:
|
|
347
|
+
output_items.append(ResponseInputTextParam(type="input_text", text=""))
|
|
348
|
+
|
|
349
|
+
formatted.append(
|
|
350
|
+
FunctionCallOutput(
|
|
351
|
+
type="function_call_output", call_id=call.id, output=output_items
|
|
352
|
+
),
|
|
353
353
|
)
|
|
354
|
-
|
|
355
|
-
return formatted_results
|
|
354
|
+
return formatted
|