hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/__init__.py +27 -7
- hud/agents/__init__.py +70 -5
- hud/agents/base.py +238 -500
- hud/agents/claude.py +236 -247
- hud/agents/gateway.py +42 -0
- hud/agents/gemini.py +264 -0
- hud/agents/gemini_cua.py +324 -0
- hud/agents/grounded_openai.py +98 -100
- hud/agents/misc/integration_test_agent.py +51 -20
- hud/agents/misc/response_agent.py +48 -36
- hud/agents/openai.py +282 -296
- hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
- hud/agents/operator.py +199 -0
- hud/agents/resolver.py +70 -0
- hud/agents/tests/conftest.py +133 -0
- hud/agents/tests/test_base.py +300 -622
- hud/agents/tests/test_base_runtime.py +233 -0
- hud/agents/tests/test_claude.py +381 -214
- hud/agents/tests/test_client.py +9 -10
- hud/agents/tests/test_gemini.py +369 -0
- hud/agents/tests/test_grounded_openai_agent.py +65 -50
- hud/agents/tests/test_openai.py +377 -140
- hud/agents/tests/test_operator.py +362 -0
- hud/agents/tests/test_resolver.py +192 -0
- hud/agents/tests/test_run_eval.py +179 -0
- hud/agents/types.py +148 -0
- hud/cli/__init__.py +493 -546
- hud/cli/analyze.py +43 -5
- hud/cli/build.py +699 -113
- hud/cli/debug.py +8 -5
- hud/cli/dev.py +889 -732
- hud/cli/eval.py +793 -667
- hud/cli/flows/dev.py +167 -0
- hud/cli/flows/init.py +191 -0
- hud/cli/flows/tasks.py +153 -56
- hud/cli/flows/templates.py +151 -0
- hud/cli/flows/tests/__init__.py +1 -0
- hud/cli/flows/tests/test_dev.py +126 -0
- hud/cli/init.py +60 -58
- hud/cli/pull.py +1 -1
- hud/cli/push.py +38 -13
- hud/cli/rft.py +311 -0
- hud/cli/rft_status.py +145 -0
- hud/cli/tests/test_analyze.py +5 -5
- hud/cli/tests/test_analyze_metadata.py +3 -2
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +110 -8
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_init.py +6 -1
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +140 -0
- hud/cli/tests/test_convert.py +361 -0
- hud/cli/tests/test_debug.py +12 -10
- hud/cli/tests/test_dev.py +197 -0
- hud/cli/tests/test_eval.py +251 -0
- hud/cli/tests/test_eval_bedrock.py +51 -0
- hud/cli/tests/test_init.py +124 -0
- hud/cli/tests/test_main_module.py +11 -5
- hud/cli/tests/test_mcp_server.py +12 -100
- hud/cli/tests/test_push.py +1 -1
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/tests/test_registry.py +1 -1
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/{rl → utils}/celebrate.py +14 -12
- hud/cli/utils/config.py +18 -1
- hud/cli/utils/docker.py +130 -4
- hud/cli/utils/env_check.py +9 -9
- hud/cli/utils/git.py +136 -0
- hud/cli/utils/interactive.py +39 -5
- hud/cli/utils/metadata.py +70 -1
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/server.py +2 -2
- hud/cli/utils/source_hash.py +3 -3
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_git.py +142 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +258 -0
- hud/cli/{rl → utils}/viewer.py +2 -2
- hud/clients/README.md +12 -11
- hud/clients/__init__.py +4 -3
- hud/clients/base.py +166 -26
- hud/clients/environment.py +51 -0
- hud/clients/fastmcp.py +13 -6
- hud/clients/mcp_use.py +45 -15
- hud/clients/tests/test_analyze_scenarios.py +206 -0
- hud/clients/tests/test_protocol.py +9 -3
- hud/datasets/__init__.py +23 -20
- hud/datasets/loader.py +326 -0
- hud/datasets/runner.py +198 -105
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_loader.py +221 -0
- hud/datasets/tests/test_utils.py +315 -0
- hud/datasets/utils.py +270 -90
- hud/environment/__init__.py +52 -0
- hud/environment/connection.py +258 -0
- hud/environment/connectors/__init__.py +33 -0
- hud/environment/connectors/base.py +68 -0
- hud/environment/connectors/local.py +177 -0
- hud/environment/connectors/mcp_config.py +137 -0
- hud/environment/connectors/openai.py +101 -0
- hud/environment/connectors/remote.py +172 -0
- hud/environment/environment.py +835 -0
- hud/environment/integrations/__init__.py +45 -0
- hud/environment/integrations/adk.py +67 -0
- hud/environment/integrations/anthropic.py +196 -0
- hud/environment/integrations/gemini.py +92 -0
- hud/environment/integrations/langchain.py +82 -0
- hud/environment/integrations/llamaindex.py +68 -0
- hud/environment/integrations/openai.py +238 -0
- hud/environment/mock.py +306 -0
- hud/environment/router.py +263 -0
- hud/environment/scenarios.py +620 -0
- hud/environment/tests/__init__.py +1 -0
- hud/environment/tests/test_connection.py +317 -0
- hud/environment/tests/test_connectors.py +205 -0
- hud/environment/tests/test_environment.py +593 -0
- hud/environment/tests/test_integrations.py +257 -0
- hud/environment/tests/test_local_connectors.py +242 -0
- hud/environment/tests/test_scenarios.py +1086 -0
- hud/environment/tests/test_tools.py +208 -0
- hud/environment/types.py +23 -0
- hud/environment/utils/__init__.py +35 -0
- hud/environment/utils/formats.py +215 -0
- hud/environment/utils/schema.py +171 -0
- hud/environment/utils/tool_wrappers.py +113 -0
- hud/eval/__init__.py +67 -0
- hud/eval/context.py +727 -0
- hud/eval/display.py +299 -0
- hud/eval/instrument.py +187 -0
- hud/eval/manager.py +533 -0
- hud/eval/parallel.py +268 -0
- hud/eval/task.py +372 -0
- hud/eval/tests/__init__.py +1 -0
- hud/eval/tests/test_context.py +178 -0
- hud/eval/tests/test_eval.py +210 -0
- hud/eval/tests/test_manager.py +152 -0
- hud/eval/tests/test_parallel.py +168 -0
- hud/eval/tests/test_task.py +291 -0
- hud/eval/types.py +65 -0
- hud/eval/utils.py +194 -0
- hud/patches/__init__.py +19 -0
- hud/patches/mcp_patches.py +308 -0
- hud/patches/warnings.py +54 -0
- hud/samples/browser.py +4 -4
- hud/server/__init__.py +2 -1
- hud/server/low_level.py +2 -1
- hud/server/router.py +164 -0
- hud/server/server.py +567 -80
- hud/server/tests/test_mcp_server_integration.py +11 -11
- hud/server/tests/test_mcp_server_more.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/settings.py +45 -3
- hud/shared/exceptions.py +36 -10
- hud/shared/hints.py +26 -1
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +40 -31
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +20 -19
- hud/telemetry/exporter.py +201 -0
- hud/telemetry/instrument.py +165 -253
- hud/telemetry/tests/test_eval_telemetry.py +356 -0
- hud/telemetry/tests/test_exporter.py +258 -0
- hud/telemetry/tests/test_instrument.py +401 -0
- hud/tools/__init__.py +18 -2
- hud/tools/agent.py +223 -0
- hud/tools/apply_patch.py +639 -0
- hud/tools/base.py +54 -4
- hud/tools/bash.py +2 -2
- hud/tools/computer/__init__.py +36 -3
- hud/tools/computer/anthropic.py +2 -2
- hud/tools/computer/gemini.py +385 -0
- hud/tools/computer/hud.py +23 -6
- hud/tools/computer/openai.py +20 -21
- hud/tools/computer/qwen.py +434 -0
- hud/tools/computer/settings.py +37 -0
- hud/tools/edit.py +3 -7
- hud/tools/executors/base.py +4 -2
- hud/tools/executors/pyautogui.py +1 -1
- hud/tools/grounding/grounded_tool.py +13 -18
- hud/tools/grounding/grounder.py +10 -31
- hud/tools/grounding/tests/test_grounded_tool.py +26 -44
- hud/tools/jupyter.py +330 -0
- hud/tools/playwright.py +18 -3
- hud/tools/shell.py +308 -0
- hud/tools/tests/test_agent_tool.py +355 -0
- hud/tools/tests/test_apply_patch.py +718 -0
- hud/tools/tests/test_computer.py +4 -9
- hud/tools/tests/test_computer_actions.py +24 -2
- hud/tools/tests/test_jupyter_tool.py +181 -0
- hud/tools/tests/test_shell.py +596 -0
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/tools/types.py +21 -1
- hud/types.py +194 -56
- hud/utils/__init__.py +2 -0
- hud/utils/env.py +67 -0
- hud/utils/hud_console.py +89 -18
- hud/utils/mcp.py +15 -58
- hud/utils/strict_schema.py +162 -0
- hud/utils/tests/test_init.py +1 -2
- hud/utils/tests/test_mcp.py +1 -28
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/utils/types.py +20 -0
- hud/version.py +1 -1
- hud_python-0.5.13.dist-info/METADATA +264 -0
- hud_python-0.5.13.dist-info/RECORD +305 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
- hud/agents/langchain.py +0 -261
- hud/agents/lite_llm.py +0 -72
- hud/cli/rl/__init__.py +0 -180
- hud/cli/rl/config.py +0 -101
- hud/cli/rl/display.py +0 -133
- hud/cli/rl/gpu.py +0 -63
- hud/cli/rl/gpu_utils.py +0 -321
- hud/cli/rl/local_runner.py +0 -595
- hud/cli/rl/presets.py +0 -96
- hud/cli/rl/remote_runner.py +0 -463
- hud/cli/rl/rl_api.py +0 -150
- hud/cli/rl/vllm.py +0 -177
- hud/cli/rl/wait_utils.py +0 -89
- hud/datasets/parallel.py +0 -687
- hud/misc/__init__.py +0 -1
- hud/misc/claude_plays_pokemon.py +0 -292
- hud/otel/__init__.py +0 -35
- hud/otel/collector.py +0 -142
- hud/otel/config.py +0 -181
- hud/otel/context.py +0 -570
- hud/otel/exporters.py +0 -369
- hud/otel/instrumentation.py +0 -135
- hud/otel/processors.py +0 -121
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_processors.py +0 -197
- hud/rl/README.md +0 -30
- hud/rl/__init__.py +0 -1
- hud/rl/actor.py +0 -176
- hud/rl/buffer.py +0 -405
- hud/rl/chat_template.jinja +0 -101
- hud/rl/config.py +0 -192
- hud/rl/distributed.py +0 -132
- hud/rl/learner.py +0 -637
- hud/rl/tests/__init__.py +0 -1
- hud/rl/tests/test_learner.py +0 -186
- hud/rl/train.py +0 -382
- hud/rl/types.py +0 -101
- hud/rl/utils/start_vllm_server.sh +0 -30
- hud/rl/utils.py +0 -524
- hud/rl/vllm_adapter.py +0 -143
- hud/telemetry/job.py +0 -352
- hud/telemetry/replay.py +0 -74
- hud/telemetry/tests/test_replay.py +0 -40
- hud/telemetry/tests/test_trace.py +0 -63
- hud/telemetry/trace.py +0 -158
- hud/utils/agent_factories.py +0 -86
- hud/utils/async_utils.py +0 -65
- hud/utils/group_eval.py +0 -223
- hud/utils/progress.py +0 -149
- hud/utils/tasks.py +0 -127
- hud/utils/tests/test_async_utils.py +0 -173
- hud/utils/tests/test_progress.py +0 -261
- hud_python-0.4.45.dist-info/METADATA +0 -552
- hud_python-0.4.45.dist-info/RECORD +0 -228
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
hud/agents/openai.py
CHANGED
|
@@ -2,354 +2,340 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import copy
|
|
6
|
+
import json
|
|
5
7
|
import logging
|
|
8
|
+
from inspect import cleandoc
|
|
6
9
|
from typing import Any, ClassVar, Literal
|
|
7
10
|
|
|
8
11
|
import mcp.types as types
|
|
9
|
-
from openai import AsyncOpenAI, OpenAI
|
|
12
|
+
from openai import AsyncOpenAI, Omit, OpenAI
|
|
10
13
|
from openai.types.responses import (
|
|
11
|
-
|
|
14
|
+
ApplyPatchToolParam,
|
|
15
|
+
ComputerToolParam,
|
|
16
|
+
FunctionShellToolParam,
|
|
17
|
+
FunctionToolParam,
|
|
18
|
+
ResponseFunctionCallOutputItemListParam,
|
|
19
|
+
ResponseInputFileContentParam,
|
|
20
|
+
ResponseInputImageContentParam,
|
|
21
|
+
ResponseInputImageParam,
|
|
12
22
|
ResponseInputMessageContentListParam,
|
|
13
23
|
ResponseInputParam,
|
|
14
|
-
|
|
24
|
+
ResponseInputTextContentParam,
|
|
25
|
+
ResponseInputTextParam,
|
|
15
26
|
ResponseOutputText,
|
|
16
27
|
ToolParam,
|
|
17
28
|
)
|
|
29
|
+
from openai.types.responses.response_create_params import ToolChoice # noqa: TC002
|
|
30
|
+
from openai.types.responses.response_input_param import FunctionCallOutput, Message
|
|
31
|
+
from openai.types.shared_params.reasoning import Reasoning # noqa: TC002
|
|
18
32
|
|
|
19
|
-
import hud
|
|
20
33
|
from hud.settings import settings
|
|
21
|
-
from hud.
|
|
22
|
-
from hud.
|
|
34
|
+
from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult, Trace
|
|
35
|
+
from hud.utils.strict_schema import ensure_strict_json_schema
|
|
36
|
+
from hud.utils.types import with_signature
|
|
23
37
|
|
|
24
38
|
from .base import MCPAgent
|
|
39
|
+
from .types import OpenAIConfig, OpenAICreateParams
|
|
25
40
|
|
|
26
41
|
logger = logging.getLogger(__name__)
|
|
27
42
|
|
|
28
43
|
|
|
29
|
-
class
|
|
30
|
-
"""
|
|
31
|
-
Operator agent that uses MCP servers for tool execution.
|
|
44
|
+
class OpenAIAgent(MCPAgent):
|
|
45
|
+
"""Generic OpenAI agent that can execute MCP tools through the Responses API."""
|
|
32
46
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
"""
|
|
47
|
+
metadata: ClassVar[dict[str, Any] | None] = None
|
|
48
|
+
config_cls: ClassVar[type[BaseAgentConfig]] = OpenAIConfig
|
|
36
49
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
required_tools: ClassVar[list[str]] = ["openai_computer"]
|
|
50
|
+
@with_signature(OpenAICreateParams)
|
|
51
|
+
@classmethod
|
|
52
|
+
def create(cls, **kwargs: Any) -> OpenAIAgent: # pyright: ignore[reportIncompatibleMethodOverride]
|
|
53
|
+
return MCPAgent.create.__func__(cls, **kwargs) # type: ignore[return-value]
|
|
42
54
|
|
|
43
|
-
def __init__(
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
model: str = "computer-use-preview",
|
|
47
|
-
environment: Literal["windows", "mac", "linux", "browser"] = "linux",
|
|
48
|
-
validate_api_key: bool = True,
|
|
49
|
-
**kwargs: Any,
|
|
50
|
-
) -> None:
|
|
51
|
-
"""
|
|
52
|
-
Initialize Operator MCP agent.
|
|
53
|
-
|
|
54
|
-
Args:
|
|
55
|
-
client: AsyncOpenAI client (created if not provided)
|
|
56
|
-
model: OpenAI model to use
|
|
57
|
-
environment: Environment type for computer use
|
|
58
|
-
display_width: Display width for computer use
|
|
59
|
-
display_height: Display height for computer use
|
|
60
|
-
**kwargs: Additional arguments passed to MCPAgent
|
|
61
|
-
"""
|
|
62
|
-
super().__init__(**kwargs)
|
|
55
|
+
def __init__(self, params: OpenAICreateParams | None = None, **kwargs: Any) -> None:
|
|
56
|
+
super().__init__(params, **kwargs)
|
|
57
|
+
self.config: OpenAIConfig
|
|
63
58
|
|
|
64
|
-
|
|
59
|
+
model_client = self.config.model_client
|
|
65
60
|
if model_client is None:
|
|
66
|
-
|
|
67
|
-
if
|
|
68
|
-
|
|
69
|
-
model_client = AsyncOpenAI(api_key=api_key)
|
|
61
|
+
# Default to HUD gateway when HUD_API_KEY is available
|
|
62
|
+
if settings.api_key:
|
|
63
|
+
from hud.agents.gateway import build_gateway_client
|
|
70
64
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
65
|
+
model_client = build_gateway_client("openai")
|
|
66
|
+
elif settings.openai_api_key:
|
|
67
|
+
model_client = AsyncOpenAI(api_key=settings.openai_api_key)
|
|
68
|
+
else:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
"No API key found. Set HUD_API_KEY for HUD gateway, "
|
|
71
|
+
"or OPENAI_API_KEY for direct OpenAI access."
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
if self.config.validate_api_key:
|
|
75
|
+
try:
|
|
76
|
+
OpenAI(api_key=model_client.api_key).models.list()
|
|
77
|
+
except Exception as exc: # pragma: no cover - network validation
|
|
78
|
+
raise ValueError(f"OpenAI API key is invalid: {exc}") from exc
|
|
79
|
+
|
|
80
|
+
self.openai_client: AsyncOpenAI = model_client
|
|
81
|
+
self._model = self.config.model
|
|
82
|
+
self.max_output_tokens = self.config.max_output_tokens
|
|
83
|
+
self.temperature = self.config.temperature
|
|
84
|
+
self.reasoning: Reasoning | None = self.config.reasoning
|
|
85
|
+
self.tool_choice: ToolChoice | None = self.config.tool_choice
|
|
86
|
+
self.parallel_tool_calls = self.config.parallel_tool_calls
|
|
87
|
+
self.truncation: Literal["auto", "disabled"] | None = self.config.truncation
|
|
88
|
+
|
|
89
|
+
self._openai_tools: list[ToolParam] = []
|
|
90
|
+
self._tool_name_map: dict[str, str] = {}
|
|
74
91
|
|
|
75
|
-
# State tracking for OpenAI's stateful API
|
|
76
92
|
self.last_response_id: str | None = None
|
|
77
|
-
self.
|
|
78
|
-
self.pending_safety_checks: list[Any] = []
|
|
93
|
+
self._message_cursor = 0
|
|
79
94
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
95
|
+
def _on_tools_ready(self) -> None:
|
|
96
|
+
"""Build OpenAI-specific tool mappings after tools are discovered."""
|
|
97
|
+
self._convert_tools_for_openai()
|
|
98
|
+
|
|
99
|
+
def _to_openai_tool(
|
|
100
|
+
self,
|
|
101
|
+
tool: types.Tool,
|
|
102
|
+
) -> (
|
|
103
|
+
FunctionShellToolParam | ApplyPatchToolParam | FunctionToolParam | ComputerToolParam | None
|
|
104
|
+
):
|
|
105
|
+
# Special case: shell tool -> OpenAI native shell
|
|
106
|
+
if tool.name == "shell":
|
|
107
|
+
return FunctionShellToolParam(type="shell")
|
|
108
|
+
|
|
109
|
+
# Special case: apply_patch tool -> OpenAI native apply_patch
|
|
110
|
+
if tool.name == "apply_patch":
|
|
111
|
+
return ApplyPatchToolParam(type="apply_patch")
|
|
112
|
+
|
|
113
|
+
# Regular function tool
|
|
114
|
+
if tool.description is None or tool.inputSchema is None:
|
|
115
|
+
raise ValueError(
|
|
116
|
+
cleandoc(f"""MCP tool {tool.name} requires both a description and inputSchema.
|
|
117
|
+
Add these by:
|
|
118
|
+
1. Adding a docstring to your @mcp.tool decorated function for the description
|
|
119
|
+
2. Using pydantic Field() annotations on function parameters for the schema
|
|
120
|
+
""")
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# schema must be strict
|
|
124
|
+
|
|
125
|
+
try:
|
|
126
|
+
strict_schema = ensure_strict_json_schema(copy.deepcopy(tool.inputSchema))
|
|
127
|
+
except Exception as e:
|
|
128
|
+
self.console.warning_log(f"Failed to convert tool '{tool.name}' schema to strict: {e}")
|
|
129
|
+
return None
|
|
113
130
|
|
|
114
|
-
|
|
131
|
+
return FunctionToolParam(
|
|
132
|
+
type="function",
|
|
133
|
+
name=tool.name,
|
|
134
|
+
description=tool.description,
|
|
135
|
+
parameters=strict_schema,
|
|
136
|
+
strict=True,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
def _convert_tools_for_openai(self) -> None:
|
|
140
|
+
"""Convert MCP tools into OpenAI Responses tool definitions."""
|
|
141
|
+
available_tools = self.get_available_tools()
|
|
142
|
+
|
|
143
|
+
self._openai_tools = []
|
|
144
|
+
self._tool_name_map = {}
|
|
145
|
+
|
|
146
|
+
for tool in available_tools:
|
|
147
|
+
openai_tool = self._to_openai_tool(tool)
|
|
148
|
+
if openai_tool is None:
|
|
149
|
+
continue
|
|
150
|
+
|
|
151
|
+
if "name" in openai_tool:
|
|
152
|
+
self._tool_name_map[openai_tool["name"]] = tool.name
|
|
153
|
+
self._openai_tools.append(openai_tool)
|
|
154
|
+
|
|
155
|
+
def _extract_tool_call(self, item: Any) -> MCPToolCall | None:
|
|
156
|
+
"""Extract an MCPToolCall from a response output item.
|
|
157
|
+
|
|
158
|
+
Subclasses can override to customize tool call extraction (e.g., routing
|
|
159
|
+
computer_call to a different tool name).
|
|
115
160
|
"""
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
161
|
+
if item.type == "function_call":
|
|
162
|
+
tool_name = item.name or ""
|
|
163
|
+
target_name = self._tool_name_map.get(tool_name, tool_name)
|
|
164
|
+
arguments = json.loads(item.arguments)
|
|
165
|
+
return MCPToolCall(name=target_name, arguments=arguments, id=item.call_id)
|
|
166
|
+
elif item.type == "shell_call":
|
|
167
|
+
return MCPToolCall(name="shell", arguments=item.action.to_dict(), id=item.call_id)
|
|
168
|
+
elif item.type == "apply_patch_call":
|
|
169
|
+
return MCPToolCall(
|
|
170
|
+
name="apply_patch", arguments=item.operation.to_dict(), id=item.call_id
|
|
171
|
+
)
|
|
172
|
+
return None
|
|
120
173
|
|
|
121
|
-
|
|
174
|
+
async def _run_context(
|
|
175
|
+
self, context: list[types.ContentBlock], *, max_steps: int = 10
|
|
176
|
+
) -> Trace:
|
|
177
|
+
"""Reset internal state before delegating to the base loop."""
|
|
178
|
+
self._reset_response_state()
|
|
122
179
|
return await super()._run_context(context, max_steps=max_steps)
|
|
123
180
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
181
|
+
def _reset_response_state(self) -> None:
|
|
182
|
+
self.last_response_id = None
|
|
183
|
+
self._message_cursor = 0
|
|
127
184
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
"""
|
|
185
|
+
async def get_system_messages(self) -> list[types.ContentBlock]:
|
|
186
|
+
"""System messages are provided via the `instructions` field."""
|
|
131
187
|
return []
|
|
132
188
|
|
|
133
|
-
async def format_blocks(
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
"""
|
|
137
|
-
Format blocks for OpenAI input format.
|
|
138
|
-
|
|
139
|
-
Converts TextContent blocks to input_text dicts and ImageContent blocks to input_image dicts.
|
|
140
|
-
""" # noqa: E501
|
|
141
|
-
formatted = []
|
|
189
|
+
async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[Message]:
|
|
190
|
+
"""Convert MCP content blocks into OpenAI user messages."""
|
|
191
|
+
content: ResponseInputMessageContentListParam = []
|
|
142
192
|
for block in blocks:
|
|
143
193
|
if isinstance(block, types.TextContent):
|
|
144
|
-
|
|
194
|
+
content.append(ResponseInputTextParam(type="input_text", text=block.text))
|
|
145
195
|
elif isinstance(block, types.ImageContent):
|
|
146
196
|
mime_type = getattr(block, "mimeType", "image/png")
|
|
147
|
-
|
|
148
|
-
|
|
197
|
+
content.append(
|
|
198
|
+
ResponseInputImageParam(
|
|
199
|
+
type="input_image",
|
|
200
|
+
image_url=f"data:{mime_type};base64,{block.data}",
|
|
201
|
+
detail="auto",
|
|
202
|
+
)
|
|
149
203
|
)
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
computer_tool_name = "openai_computer"
|
|
163
|
-
|
|
164
|
-
# Define the computer use tool
|
|
165
|
-
computer_tool: ToolParam = { # type: ignore[reportAssignmentType]
|
|
166
|
-
"type": "computer_use_preview",
|
|
167
|
-
"display_width": self.metadata["display_width"],
|
|
168
|
-
"display_height": self.metadata["display_height"],
|
|
169
|
-
"environment": self.environment,
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
# Build the request based on whether this is first step or follow-up
|
|
173
|
-
if self.pending_call_id is None and self.last_response_id is None:
|
|
174
|
-
# First step - messages are already formatted dicts from format_blocks
|
|
175
|
-
# format_blocks returns type ResponseInputMessageContentListParam, which is a list of dicts # noqa: E501
|
|
176
|
-
input_content: ResponseInputMessageContentListParam = []
|
|
177
|
-
|
|
178
|
-
input_content.extend(messages)
|
|
179
|
-
|
|
180
|
-
# If no content was added, add empty text to avoid empty request
|
|
181
|
-
if not input_content:
|
|
182
|
-
input_content.append({"type": "input_text", "text": ""})
|
|
183
|
-
|
|
184
|
-
input_param: ResponseInputParam = [{"role": "user", "content": input_content}] # type: ignore[reportUnknownMemberType]
|
|
185
|
-
|
|
186
|
-
response = await self.openai_client.responses.create(
|
|
187
|
-
model=self.model,
|
|
188
|
-
tools=[computer_tool],
|
|
189
|
-
input=input_param,
|
|
190
|
-
instructions=self.system_prompt,
|
|
191
|
-
truncation="auto",
|
|
192
|
-
reasoning={"summary": "auto"}, # type: ignore[arg-type]
|
|
193
|
-
)
|
|
194
|
-
else:
|
|
195
|
-
# Follow-up step - check if this is user input or tool result
|
|
196
|
-
latest_message = messages[-1] if messages else {}
|
|
197
|
-
|
|
198
|
-
if latest_message.get("type") == "input_text":
|
|
199
|
-
# User provided input in conversation mode
|
|
200
|
-
user_text = latest_message.get("text", "")
|
|
201
|
-
input_param_followup: ResponseInputParam = [ # type: ignore[reportAssignmentType]
|
|
202
|
-
{"role": "user", "content": [{"type": "input_text", "text": user_text}]}
|
|
203
|
-
]
|
|
204
|
-
# Reset pending_call_id since this is user input, not a tool response
|
|
205
|
-
self.pending_call_id = None
|
|
206
|
-
else:
|
|
207
|
-
# Tool result - need screenshot from processed results
|
|
208
|
-
latest_screenshot = None
|
|
209
|
-
for msg in reversed(messages):
|
|
210
|
-
if isinstance(msg, dict) and "image_url" in msg:
|
|
211
|
-
latest_screenshot = msg["image_url"] # type: ignore
|
|
212
|
-
break
|
|
213
|
-
|
|
214
|
-
if not latest_screenshot:
|
|
215
|
-
self.console.warning_log("No screenshot provided for response to action")
|
|
216
|
-
return AgentResponse(
|
|
217
|
-
content="No screenshot available for next action",
|
|
218
|
-
tool_calls=[],
|
|
219
|
-
done=True,
|
|
204
|
+
if not content:
|
|
205
|
+
content.append(ResponseInputTextParam(type="input_text", text=""))
|
|
206
|
+
return [Message(role="user", content=content)]
|
|
207
|
+
|
|
208
|
+
async def get_response(self, messages: ResponseInputParam) -> AgentResponse:
|
|
209
|
+
"""Send the latest input items to OpenAI's Responses API."""
|
|
210
|
+
new_items: ResponseInputParam = messages[self._message_cursor :]
|
|
211
|
+
if not new_items:
|
|
212
|
+
if self.last_response_id is None:
|
|
213
|
+
new_items = [
|
|
214
|
+
Message(
|
|
215
|
+
role="user", content=[ResponseInputTextParam(type="input_text", text="")]
|
|
220
216
|
)
|
|
221
|
-
|
|
222
|
-
# Create response to previous action
|
|
223
|
-
input_param_followup: ResponseInputParam = [ # type: ignore[reportAssignmentType]
|
|
224
|
-
{ # type: ignore[reportAssignmentType]
|
|
225
|
-
"call_id": self.pending_call_id,
|
|
226
|
-
"type": "computer_call_output",
|
|
227
|
-
"output": {
|
|
228
|
-
"type": "input_image",
|
|
229
|
-
"image_url": latest_screenshot,
|
|
230
|
-
},
|
|
231
|
-
"acknowledged_safety_checks": self.pending_safety_checks,
|
|
232
|
-
}
|
|
233
217
|
]
|
|
218
|
+
else:
|
|
219
|
+
self.console.debug("No new messages to send to OpenAI.")
|
|
220
|
+
return AgentResponse(content="", tool_calls=[], done=True)
|
|
221
|
+
|
|
222
|
+
response = await self.openai_client.responses.create(
|
|
223
|
+
model=self._model,
|
|
224
|
+
input=new_items,
|
|
225
|
+
instructions=self.system_prompt,
|
|
226
|
+
max_output_tokens=self.max_output_tokens,
|
|
227
|
+
temperature=self.temperature,
|
|
228
|
+
tool_choice=self.tool_choice if self.tool_choice is not None else Omit(),
|
|
229
|
+
parallel_tool_calls=self.parallel_tool_calls,
|
|
230
|
+
reasoning=self.reasoning,
|
|
231
|
+
tools=self._openai_tools if self._openai_tools else Omit(),
|
|
232
|
+
previous_response_id=(
|
|
233
|
+
self.last_response_id if self.last_response_id is not None else Omit()
|
|
234
|
+
),
|
|
235
|
+
truncation=self.truncation,
|
|
236
|
+
)
|
|
234
237
|
|
|
235
|
-
self.pending_safety_checks = []
|
|
236
|
-
|
|
237
|
-
response = await self.openai_client.responses.create(
|
|
238
|
-
model=self.model,
|
|
239
|
-
previous_response_id=self.last_response_id,
|
|
240
|
-
tools=[computer_tool],
|
|
241
|
-
input=input_param_followup,
|
|
242
|
-
instructions=self.system_prompt,
|
|
243
|
-
truncation="auto",
|
|
244
|
-
reasoning={"summary": "auto"}, # type: ignore[arg-type]
|
|
245
|
-
)
|
|
246
|
-
|
|
247
|
-
# Store response ID for next call
|
|
248
238
|
self.last_response_id = response.id
|
|
239
|
+
self._message_cursor = len(messages)
|
|
249
240
|
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
tool_calls=[],
|
|
254
|
-
done=False, # Will be set to True only if no tool calls
|
|
255
|
-
)
|
|
241
|
+
agent_response = AgentResponse(content="", tool_calls=[], done=True)
|
|
242
|
+
text_chunks: list[str] = []
|
|
243
|
+
reasoning_chunks: list[str] = []
|
|
256
244
|
|
|
257
|
-
self.pending_call_id = None
|
|
258
|
-
|
|
259
|
-
# Check for computer calls
|
|
260
|
-
computer_calls = [
|
|
261
|
-
item
|
|
262
|
-
for item in response.output
|
|
263
|
-
if isinstance(item, ResponseComputerToolCall) and item.type == "computer_call"
|
|
264
|
-
]
|
|
265
|
-
|
|
266
|
-
if computer_calls:
|
|
267
|
-
# Process computer calls
|
|
268
|
-
result.done = False
|
|
269
|
-
for computer_call in computer_calls:
|
|
270
|
-
self.pending_call_id = computer_call.call_id
|
|
271
|
-
self.pending_safety_checks = computer_call.pending_safety_checks
|
|
272
|
-
|
|
273
|
-
# Convert OpenAI action to MCP tool call
|
|
274
|
-
action = computer_call.action.model_dump()
|
|
275
|
-
|
|
276
|
-
# Create MCPToolCall object with OpenAI metadata as extra fields
|
|
277
|
-
# Pyright will complain but the tool class accepts extra fields
|
|
278
|
-
tool_call = MCPToolCall(
|
|
279
|
-
name=computer_tool_name,
|
|
280
|
-
arguments=action,
|
|
281
|
-
id=computer_call.call_id, # type: ignore
|
|
282
|
-
pending_safety_checks=computer_call.pending_safety_checks, # type: ignore
|
|
283
|
-
)
|
|
284
|
-
result.tool_calls.append(tool_call)
|
|
285
|
-
else:
|
|
286
|
-
# No computer calls, check for text response
|
|
287
|
-
for item in response.output:
|
|
288
|
-
if isinstance(item, ResponseOutputMessage) and item.type == "message":
|
|
289
|
-
# Extract text from content blocks
|
|
290
|
-
text_parts = [
|
|
291
|
-
content.text
|
|
292
|
-
for content in item.content
|
|
293
|
-
if isinstance(content, ResponseOutputText)
|
|
294
|
-
]
|
|
295
|
-
if text_parts:
|
|
296
|
-
result.content = "".join(text_parts)
|
|
297
|
-
break
|
|
298
|
-
|
|
299
|
-
# Extract reasoning if present
|
|
300
|
-
reasoning_text = ""
|
|
301
245
|
for item in response.output:
|
|
302
|
-
if item.type == "
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
246
|
+
if item.type == "message":
|
|
247
|
+
text = "".join(
|
|
248
|
+
content.text
|
|
249
|
+
for content in item.content
|
|
250
|
+
if isinstance(content, ResponseOutputText)
|
|
251
|
+
)
|
|
252
|
+
if text:
|
|
253
|
+
text_chunks.append(text)
|
|
254
|
+
elif item.type == "reasoning":
|
|
255
|
+
reasoning_chunks.append("".join(summary.text for summary in item.summary))
|
|
256
|
+
else:
|
|
257
|
+
tool_call = self._extract_tool_call(item)
|
|
258
|
+
if tool_call is not None:
|
|
259
|
+
agent_response.tool_calls.append(tool_call)
|
|
307
260
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
result.done = True
|
|
261
|
+
if agent_response.tool_calls:
|
|
262
|
+
agent_response.done = False
|
|
311
263
|
|
|
312
|
-
|
|
264
|
+
agent_response.content = "".join(text_chunks)
|
|
265
|
+
if reasoning_chunks:
|
|
266
|
+
agent_response.reasoning = "\n".join(reasoning_chunks)
|
|
267
|
+
return agent_response
|
|
313
268
|
|
|
314
269
|
async def format_tool_results(
|
|
315
270
|
self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
|
|
316
|
-
) ->
|
|
317
|
-
"""
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
271
|
+
) -> list[FunctionCallOutput]:
|
|
272
|
+
"""Convert MCP tool outputs into Responses input items."""
|
|
273
|
+
formatted: list[FunctionCallOutput] = []
|
|
274
|
+
for call, result in zip(tool_calls, tool_results, strict=False):
|
|
275
|
+
if not call.id:
|
|
276
|
+
self.console.warning_log(f"Tool '{call.name}' missing call_id; skipping output.")
|
|
277
|
+
continue
|
|
278
|
+
|
|
279
|
+
output_items: ResponseFunctionCallOutputItemListParam = []
|
|
280
|
+
if result.isError:
|
|
281
|
+
output_items.append(
|
|
282
|
+
ResponseInputTextParam(type="input_text", text="[tool_error] true")
|
|
283
|
+
)
|
|
325
284
|
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
285
|
+
if result.structuredContent is not None:
|
|
286
|
+
output_items.append(
|
|
287
|
+
ResponseInputTextParam(
|
|
288
|
+
type="input_text", text=json.dumps(result.structuredContent, default=str)
|
|
289
|
+
)
|
|
290
|
+
)
|
|
330
291
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
292
|
+
for block in result.content:
|
|
293
|
+
match block:
|
|
294
|
+
case types.TextContent():
|
|
295
|
+
output_items.append(
|
|
296
|
+
ResponseInputTextContentParam(type="input_text", text=block.text)
|
|
297
|
+
)
|
|
298
|
+
case types.ImageContent():
|
|
299
|
+
mime_type = getattr(block, "mimeType", "image/png")
|
|
300
|
+
output_items.append(
|
|
301
|
+
ResponseInputImageContentParam(
|
|
302
|
+
type="input_image",
|
|
303
|
+
image_url=f"data:{mime_type};base64,{block.data}",
|
|
304
|
+
)
|
|
305
|
+
)
|
|
306
|
+
case types.ResourceLink():
|
|
307
|
+
output_items.append(
|
|
308
|
+
ResponseInputFileContentParam(
|
|
309
|
+
type="input_file", file_url=str(block.uri)
|
|
310
|
+
)
|
|
311
|
+
)
|
|
312
|
+
case types.EmbeddedResource():
|
|
313
|
+
match block.resource:
|
|
314
|
+
case types.TextResourceContents():
|
|
315
|
+
output_items.append(
|
|
316
|
+
ResponseInputTextContentParam(
|
|
317
|
+
type="input_text", text=block.resource.text
|
|
318
|
+
)
|
|
319
|
+
)
|
|
320
|
+
case types.BlobResourceContents():
|
|
321
|
+
output_items.append(
|
|
322
|
+
ResponseInputFileContentParam(
|
|
323
|
+
type="input_file", file_data=block.resource.blob
|
|
324
|
+
)
|
|
325
|
+
)
|
|
326
|
+
case _:
|
|
327
|
+
self.console.warning_log(
|
|
328
|
+
f"Unknown resource type: {type(block.resource)}"
|
|
329
|
+
)
|
|
330
|
+
case _:
|
|
331
|
+
self.console.warning_log(f"Unknown content block type: {type(block)}")
|
|
332
|
+
|
|
333
|
+
if not output_items:
|
|
334
|
+
output_items.append(ResponseInputTextParam(type="input_text", text=""))
|
|
335
|
+
|
|
336
|
+
formatted.append(
|
|
337
|
+
FunctionCallOutput(
|
|
338
|
+
type="function_call_output", call_id=call.id, output=output_items
|
|
339
|
+
),
|
|
353
340
|
)
|
|
354
|
-
|
|
355
|
-
return formatted_results
|
|
341
|
+
return formatted
|