nighthawk-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nighthawk/__init__.py +48 -0
- nighthawk/backends/__init__.py +0 -0
- nighthawk/backends/base.py +95 -0
- nighthawk/backends/claude_code_cli.py +342 -0
- nighthawk/backends/claude_code_sdk.py +325 -0
- nighthawk/backends/codex.py +352 -0
- nighthawk/backends/mcp_boundary.py +129 -0
- nighthawk/backends/mcp_server.py +226 -0
- nighthawk/backends/tool_bridge.py +240 -0
- nighthawk/configuration.py +193 -0
- nighthawk/errors.py +25 -0
- nighthawk/identifier_path.py +35 -0
- nighthawk/json_renderer.py +216 -0
- nighthawk/natural/__init__.py +0 -0
- nighthawk/natural/blocks.py +279 -0
- nighthawk/natural/decorator.py +302 -0
- nighthawk/natural/transform.py +346 -0
- nighthawk/runtime/__init__.py +0 -0
- nighthawk/runtime/async_bridge.py +50 -0
- nighthawk/runtime/prompt.py +344 -0
- nighthawk/runtime/runner.py +462 -0
- nighthawk/runtime/scoping.py +288 -0
- nighthawk/runtime/step_context.py +171 -0
- nighthawk/runtime/step_contract.py +231 -0
- nighthawk/runtime/step_executor.py +360 -0
- nighthawk/runtime/tool_calls.py +99 -0
- nighthawk/tools/__init__.py +0 -0
- nighthawk/tools/assignment.py +246 -0
- nighthawk/tools/contracts.py +72 -0
- nighthawk/tools/execution.py +83 -0
- nighthawk/tools/provided.py +80 -0
- nighthawk/tools/registry.py +212 -0
- nighthawk_python-0.1.0.dist-info/METADATA +111 -0
- nighthawk_python-0.1.0.dist-info/RECORD +36 -0
- nighthawk_python-0.1.0.dist-info/WHEEL +4 -0
- nighthawk_python-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from typing import Any, Literal
|
|
8
|
+
|
|
9
|
+
from opentelemetry import context as otel_context
|
|
10
|
+
from pydantic import BaseModel, ConfigDict, field_validator
|
|
11
|
+
from pydantic_ai.builtin_tools import AbstractBuiltinTool
|
|
12
|
+
from pydantic_ai.exceptions import UnexpectedModelBehavior, UserError
|
|
13
|
+
from pydantic_ai.messages import ModelMessage, ModelResponse, TextPart
|
|
14
|
+
from pydantic_ai.models import ModelRequestParameters
|
|
15
|
+
from pydantic_ai.profiles import ModelProfile
|
|
16
|
+
from pydantic_ai.settings import ModelSettings
|
|
17
|
+
from pydantic_ai.usage import RequestUsage
|
|
18
|
+
|
|
19
|
+
from ..json_renderer import to_jsonable_value
|
|
20
|
+
from ..tools.registry import get_visible_tools
|
|
21
|
+
from .base import BackendModelBase
|
|
22
|
+
from .mcp_boundary import call_tool_for_claude_code_sdk
|
|
23
|
+
from .tool_bridge import ToolHandler
|
|
24
|
+
|
|
25
|
+
type PermissionMode = Literal["default", "acceptEdits", "plan", "bypassPermissions"]
|
|
26
|
+
|
|
27
|
+
type SettingSource = Literal["user", "project", "local"]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _normalize_timestamp(value: object) -> datetime:
|
|
31
|
+
if isinstance(value, datetime):
|
|
32
|
+
return value
|
|
33
|
+
return datetime.now(tz=datetime.now().astimezone().tzinfo)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ClaudeCodeSdkModelSettings(BaseModel):
|
|
37
|
+
"""Settings for the Claude Code SDK backend.
|
|
38
|
+
|
|
39
|
+
Attributes:
|
|
40
|
+
permission_mode: Claude Code permission mode.
|
|
41
|
+
setting_sources: Configuration sources to load.
|
|
42
|
+
allowed_tool_names: Nighthawk tool names exposed to the model.
|
|
43
|
+
claude_allowed_tool_names: Additional Claude Code native tool names to allow.
|
|
44
|
+
claude_max_turns: Maximum conversation turns.
|
|
45
|
+
working_directory: Absolute path to the working directory for Claude Code.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
model_config = ConfigDict(extra="forbid")
|
|
49
|
+
|
|
50
|
+
permission_mode: PermissionMode = "default"
|
|
51
|
+
setting_sources: list[SettingSource] | None = None
|
|
52
|
+
allowed_tool_names: tuple[str, ...] | None = None
|
|
53
|
+
claude_allowed_tool_names: tuple[str, ...] | None = None
|
|
54
|
+
claude_max_turns: int = 50
|
|
55
|
+
working_directory: str = ""
|
|
56
|
+
|
|
57
|
+
@field_validator("claude_max_turns")
|
|
58
|
+
@classmethod
|
|
59
|
+
def _validate_claude_max_turns(cls, value: int) -> int:
|
|
60
|
+
if value <= 0:
|
|
61
|
+
raise ValueError("claude_max_turns must be greater than 0")
|
|
62
|
+
return value
|
|
63
|
+
|
|
64
|
+
@field_validator("working_directory")
|
|
65
|
+
@classmethod
|
|
66
|
+
def _validate_working_directory(cls, value: str) -> str:
|
|
67
|
+
if value and not os.path.isabs(value):
|
|
68
|
+
raise ValueError("working_directory must be an absolute path")
|
|
69
|
+
return value
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _get_claude_code_sdk_model_settings(model_settings: ModelSettings | None) -> ClaudeCodeSdkModelSettings:
|
|
73
|
+
if model_settings is None:
|
|
74
|
+
return ClaudeCodeSdkModelSettings()
|
|
75
|
+
try:
|
|
76
|
+
return ClaudeCodeSdkModelSettings.model_validate(model_settings)
|
|
77
|
+
except Exception as exception:
|
|
78
|
+
raise UserError(str(exception)) from exception
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _build_json_schema_output_format(model_request_parameters: ModelRequestParameters) -> dict[str, Any] | None:
|
|
82
|
+
output_object = model_request_parameters.output_object
|
|
83
|
+
if output_object is None:
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
schema = dict(output_object.json_schema)
|
|
87
|
+
if output_object.name:
|
|
88
|
+
schema["title"] = output_object.name
|
|
89
|
+
if output_object.description:
|
|
90
|
+
schema["description"] = output_object.description
|
|
91
|
+
|
|
92
|
+
return {"type": "json_schema", "schema": schema}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _normalize_claude_code_sdk_usage_to_request_usage(usage: object) -> RequestUsage:
|
|
96
|
+
request_usage = RequestUsage()
|
|
97
|
+
if not isinstance(usage, dict):
|
|
98
|
+
return request_usage
|
|
99
|
+
|
|
100
|
+
input_tokens = usage.get("input_tokens")
|
|
101
|
+
if isinstance(input_tokens, int):
|
|
102
|
+
request_usage.input_tokens = input_tokens
|
|
103
|
+
|
|
104
|
+
output_tokens = usage.get("output_tokens")
|
|
105
|
+
if isinstance(output_tokens, int):
|
|
106
|
+
request_usage.output_tokens = output_tokens
|
|
107
|
+
|
|
108
|
+
cache_read_input_tokens = usage.get("cache_read_input_tokens")
|
|
109
|
+
if isinstance(cache_read_input_tokens, int):
|
|
110
|
+
request_usage.cache_read_tokens = cache_read_input_tokens
|
|
111
|
+
|
|
112
|
+
cache_creation_input_tokens = usage.get("cache_creation_input_tokens")
|
|
113
|
+
if isinstance(cache_creation_input_tokens, int):
|
|
114
|
+
request_usage.cache_write_tokens = cache_creation_input_tokens
|
|
115
|
+
|
|
116
|
+
return request_usage
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _serialize_result_message_to_json(result_message: object) -> str:
|
|
120
|
+
result_message_model_dump_json = getattr(result_message, "model_dump_json", None)
|
|
121
|
+
if callable(result_message_model_dump_json):
|
|
122
|
+
try:
|
|
123
|
+
result_message_json = result_message_model_dump_json()
|
|
124
|
+
if isinstance(result_message_json, str):
|
|
125
|
+
return result_message_json
|
|
126
|
+
except Exception:
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
result_message_model_dump = getattr(result_message, "model_dump", None)
|
|
130
|
+
if callable(result_message_model_dump):
|
|
131
|
+
with contextlib.suppress(Exception):
|
|
132
|
+
result_message = result_message_model_dump()
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
return json.dumps(to_jsonable_value(result_message), ensure_ascii=False)
|
|
136
|
+
except Exception:
|
|
137
|
+
return json.dumps({"result_message_repr": repr(result_message)}, ensure_ascii=False)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class ClaudeCodeSdkModel(BackendModelBase):
|
|
141
|
+
"""Pydantic AI model that delegates to Claude Code via the Claude Agent SDK."""
|
|
142
|
+
|
|
143
|
+
def __init__(self, *, model_name: str | None = None) -> None:
|
|
144
|
+
super().__init__(
|
|
145
|
+
backend_label="Claude Code SDK backend",
|
|
146
|
+
profile=ModelProfile(
|
|
147
|
+
supports_tools=True,
|
|
148
|
+
supports_json_schema_output=True,
|
|
149
|
+
supports_json_object_output=False,
|
|
150
|
+
supports_image_output=False,
|
|
151
|
+
default_structured_output_mode="native",
|
|
152
|
+
supported_builtin_tools=frozenset([AbstractBuiltinTool]),
|
|
153
|
+
),
|
|
154
|
+
)
|
|
155
|
+
self._model_name = model_name
|
|
156
|
+
|
|
157
|
+
@property
|
|
158
|
+
def model_name(self) -> str:
|
|
159
|
+
return f"claude-code-sdk:{self._model_name or 'default'}"
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def system(self) -> str:
|
|
163
|
+
return "anthropic"
|
|
164
|
+
|
|
165
|
+
async def request(
|
|
166
|
+
self,
|
|
167
|
+
messages: list[ModelMessage],
|
|
168
|
+
model_settings: ModelSettings | None,
|
|
169
|
+
model_request_parameters: ModelRequestParameters,
|
|
170
|
+
) -> ModelResponse:
|
|
171
|
+
from claude_agent_sdk import (
|
|
172
|
+
ClaudeAgentOptions,
|
|
173
|
+
ClaudeSDKClient,
|
|
174
|
+
SdkMcpTool,
|
|
175
|
+
create_sdk_mcp_server,
|
|
176
|
+
)
|
|
177
|
+
from claude_agent_sdk.types import AssistantMessage, Message, ResultMessage # pyright: ignore[reportMissingImports]
|
|
178
|
+
|
|
179
|
+
model_settings, model_request_parameters = self.prepare_request(model_settings, model_request_parameters)
|
|
180
|
+
|
|
181
|
+
parent_otel_context = otel_context.get_current()
|
|
182
|
+
|
|
183
|
+
_, system_prompt_text, user_prompt_text = self._prepare_common_request_parts(
|
|
184
|
+
messages=messages,
|
|
185
|
+
model_request_parameters=model_request_parameters,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
claude_code_model_settings = _get_claude_code_sdk_model_settings(model_settings)
|
|
189
|
+
|
|
190
|
+
tool_name_to_tool_definition, tool_name_to_handler, allowed_tool_names = await self._prepare_allowed_tools(
|
|
191
|
+
model_request_parameters=model_request_parameters,
|
|
192
|
+
configured_allowed_tool_names=claude_code_model_settings.allowed_tool_names,
|
|
193
|
+
visible_tools=get_visible_tools(),
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
mcp_tools: list[Any] = []
|
|
197
|
+
for tool_name, handler in tool_name_to_handler.items():
|
|
198
|
+
tool_definition = tool_name_to_tool_definition.get(tool_name)
|
|
199
|
+
if tool_definition is None:
|
|
200
|
+
raise UnexpectedModelBehavior(f"Tool definition missing for {tool_name!r}")
|
|
201
|
+
|
|
202
|
+
async def wrapped_handler(
|
|
203
|
+
arguments: dict[str, Any],
|
|
204
|
+
*,
|
|
205
|
+
tool_handler: ToolHandler = handler,
|
|
206
|
+
bound_tool_name: str = tool_name,
|
|
207
|
+
) -> dict[str, Any]:
|
|
208
|
+
return await call_tool_for_claude_code_sdk(
|
|
209
|
+
tool_name=bound_tool_name,
|
|
210
|
+
arguments=arguments,
|
|
211
|
+
tool_handler=tool_handler,
|
|
212
|
+
parent_otel_context=parent_otel_context,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
mcp_tools.append(
|
|
216
|
+
SdkMcpTool(
|
|
217
|
+
name=tool_name,
|
|
218
|
+
description=tool_definition.description or "",
|
|
219
|
+
input_schema=tool_definition.parameters_json_schema,
|
|
220
|
+
handler=wrapped_handler,
|
|
221
|
+
)
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
sdk_server = create_sdk_mcp_server("nighthawk", tools=mcp_tools)
|
|
225
|
+
|
|
226
|
+
allowed_tools_for_claude = [f"mcp__nighthawk__{tool_name}" for tool_name in allowed_tool_names]
|
|
227
|
+
|
|
228
|
+
claude_allowed_tool_names = claude_code_model_settings.claude_allowed_tool_names or ()
|
|
229
|
+
merged_allowed_tools: list[str] = []
|
|
230
|
+
seen_allowed_tools: set[str] = set()
|
|
231
|
+
for tool_name in [*claude_allowed_tool_names, *allowed_tools_for_claude]:
|
|
232
|
+
if tool_name in seen_allowed_tools:
|
|
233
|
+
continue
|
|
234
|
+
merged_allowed_tools.append(tool_name)
|
|
235
|
+
seen_allowed_tools.add(tool_name)
|
|
236
|
+
|
|
237
|
+
working_directory = claude_code_model_settings.working_directory
|
|
238
|
+
|
|
239
|
+
if allowed_tool_names:
|
|
240
|
+
system_prompt_text = "\n".join(
|
|
241
|
+
[
|
|
242
|
+
system_prompt_text,
|
|
243
|
+
"",
|
|
244
|
+
"Tool access:",
|
|
245
|
+
"- Nighthawk tools are exposed via MCP; tool names are prefixed with: mcp__nighthawk__",
|
|
246
|
+
"- Example: to call nh_exec(...), use: mcp__nighthawk__nh_exec",
|
|
247
|
+
]
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
options_keyword_arguments: dict[str, Any] = {
|
|
251
|
+
"tools": {
|
|
252
|
+
"type": "preset",
|
|
253
|
+
"preset": "claude_code",
|
|
254
|
+
},
|
|
255
|
+
"allowed_tools": merged_allowed_tools,
|
|
256
|
+
"system_prompt": {
|
|
257
|
+
"type": "preset",
|
|
258
|
+
"preset": "claude_code",
|
|
259
|
+
"append": system_prompt_text,
|
|
260
|
+
},
|
|
261
|
+
"mcp_servers": {"nighthawk": sdk_server},
|
|
262
|
+
"permission_mode": claude_code_model_settings.permission_mode,
|
|
263
|
+
"model": self._model_name,
|
|
264
|
+
"setting_sources": claude_code_model_settings.setting_sources,
|
|
265
|
+
"max_turns": claude_code_model_settings.claude_max_turns,
|
|
266
|
+
"output_format": _build_json_schema_output_format(model_request_parameters),
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
if working_directory:
|
|
270
|
+
options_keyword_arguments["cwd"] = working_directory
|
|
271
|
+
|
|
272
|
+
options = ClaudeAgentOptions(**options_keyword_arguments)
|
|
273
|
+
|
|
274
|
+
assistant_model_name: str | None = None
|
|
275
|
+
result_message: ResultMessage | None = None
|
|
276
|
+
result_messages: list[Message] = []
|
|
277
|
+
|
|
278
|
+
# Claude Code sets the CLAUDECODE environment variable for nested sessions.
|
|
279
|
+
# When the variable is set, the Claude Code CLI refuses to launch.
|
|
280
|
+
# This modifies the process-global environment, which is unavoidable because
|
|
281
|
+
# the Claude Agent SDK inherits environment variables from the parent process.
|
|
282
|
+
saved_claudecode_value = os.environ.pop("CLAUDECODE", None)
|
|
283
|
+
|
|
284
|
+
try:
|
|
285
|
+
async with ClaudeSDKClient(options=options) as client:
|
|
286
|
+
await client.query(user_prompt_text)
|
|
287
|
+
|
|
288
|
+
async for message in client.receive_response():
|
|
289
|
+
if isinstance(message, AssistantMessage):
|
|
290
|
+
assistant_model_name = message.model
|
|
291
|
+
elif isinstance(message, ResultMessage):
|
|
292
|
+
result_message = message
|
|
293
|
+
result_messages.append(message)
|
|
294
|
+
finally:
|
|
295
|
+
if saved_claudecode_value is not None:
|
|
296
|
+
os.environ["CLAUDECODE"] = saved_claudecode_value
|
|
297
|
+
|
|
298
|
+
if result_message is None:
|
|
299
|
+
raise UnexpectedModelBehavior("Claude Code backend did not produce a result message")
|
|
300
|
+
|
|
301
|
+
if result_message.is_error:
|
|
302
|
+
error_text = result_message.result or "Claude Code backend reported an error"
|
|
303
|
+
result_messages_json = _serialize_result_message_to_json(result_messages)
|
|
304
|
+
raise UnexpectedModelBehavior(
|
|
305
|
+
f"{error_text}\nresult_message_json={result_messages_json}\noutput_format={options_keyword_arguments['output_format']}"
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
structured_output = result_message.structured_output
|
|
309
|
+
if structured_output is None:
|
|
310
|
+
if model_request_parameters.output_object is not None:
|
|
311
|
+
result_messages_json = _serialize_result_message_to_json(result_messages)
|
|
312
|
+
raise UnexpectedModelBehavior(f"Claude Code backend did not return structured output\nresult_message_json={result_messages_json}")
|
|
313
|
+
|
|
314
|
+
if result_message.result is None:
|
|
315
|
+
raise UnexpectedModelBehavior("Claude Code backend did not return text output")
|
|
316
|
+
output_text = result_message.result
|
|
317
|
+
else:
|
|
318
|
+
output_text = json.dumps(structured_output, ensure_ascii=False)
|
|
319
|
+
|
|
320
|
+
return ModelResponse(
|
|
321
|
+
parts=[TextPart(content=output_text)],
|
|
322
|
+
model_name=assistant_model_name,
|
|
323
|
+
timestamp=_normalize_timestamp(getattr(result_message, "timestamp", None)),
|
|
324
|
+
usage=_normalize_claude_code_sdk_usage_to_request_usage(getattr(result_message, "usage", None)),
|
|
325
|
+
)
|
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import contextlib
|
|
5
|
+
import json
|
|
6
|
+
import tempfile
|
|
7
|
+
from dataclasses import replace
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import IO, Any, Literal, TypedDict
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel, ConfigDict, field_validator
|
|
12
|
+
from pydantic_ai.builtin_tools import AbstractBuiltinTool
|
|
13
|
+
from pydantic_ai.exceptions import UnexpectedModelBehavior, UserError
|
|
14
|
+
from pydantic_ai.messages import ModelMessage, ModelResponse, TextPart
|
|
15
|
+
from pydantic_ai.models import ModelRequestParameters
|
|
16
|
+
from pydantic_ai.profiles import InlineDefsJsonSchemaTransformer, ModelProfile
|
|
17
|
+
from pydantic_ai.profiles.openai import OpenAIJsonSchemaTransformer
|
|
18
|
+
from pydantic_ai.settings import ModelSettings
|
|
19
|
+
from pydantic_ai.usage import RequestUsage
|
|
20
|
+
|
|
21
|
+
from ..tools.registry import get_visible_tools
|
|
22
|
+
from .base import BackendModelBase
|
|
23
|
+
from .mcp_server import mcp_server_if_needed
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class _CodexJsonSchemaTransformer(OpenAIJsonSchemaTransformer):
|
|
27
|
+
def __init__(self, schema: dict[str, Any], *, strict: bool | None = None):
|
|
28
|
+
schema = InlineDefsJsonSchemaTransformer(schema, strict=strict).walk()
|
|
29
|
+
super().__init__(schema, strict=strict)
|
|
30
|
+
|
|
31
|
+
def transform(self, schema: dict[str, Any]) -> dict[str, Any]:
|
|
32
|
+
if not schema:
|
|
33
|
+
schema = {"type": "object"}
|
|
34
|
+
elif "properties" in schema and "type" not in schema:
|
|
35
|
+
schema = dict(schema)
|
|
36
|
+
schema["type"] = "object"
|
|
37
|
+
return super().transform(schema)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
type SandboxMode = Literal["read-only", "workspace-write", "danger-full-access"]
|
|
41
|
+
type ModelReasoningEffort = Literal["minimal", "low", "medium", "high", "xhigh"]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class CodexModelSettings(BaseModel):
|
|
45
|
+
"""Settings for the Codex backend.
|
|
46
|
+
|
|
47
|
+
Attributes:
|
|
48
|
+
allowed_tool_names: Nighthawk tool names exposed to the model.
|
|
49
|
+
codex_executable: Path or name of the Codex CLI executable.
|
|
50
|
+
model_reasoning_effort: Reasoning effort level for the model.
|
|
51
|
+
sandbox_mode: Codex sandbox isolation mode.
|
|
52
|
+
working_directory: Absolute path to the working directory for Codex.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
model_config = ConfigDict(extra="forbid")
|
|
56
|
+
|
|
57
|
+
allowed_tool_names: tuple[str, ...] | None = None
|
|
58
|
+
codex_executable: str = "codex"
|
|
59
|
+
model_reasoning_effort: ModelReasoningEffort | None = None
|
|
60
|
+
sandbox_mode: SandboxMode | None = None
|
|
61
|
+
working_directory: str = ""
|
|
62
|
+
|
|
63
|
+
@field_validator("codex_executable")
|
|
64
|
+
@classmethod
|
|
65
|
+
def _validate_codex_executable(cls, value: str) -> str:
|
|
66
|
+
if value.strip() == "":
|
|
67
|
+
raise ValueError("codex_executable must be a non-empty string")
|
|
68
|
+
return value
|
|
69
|
+
|
|
70
|
+
@field_validator("working_directory")
|
|
71
|
+
@classmethod
|
|
72
|
+
def _validate_working_directory(cls, value: str) -> str:
|
|
73
|
+
if value and not Path(value).is_absolute():
|
|
74
|
+
raise ValueError("working_directory must be an absolute path")
|
|
75
|
+
return value
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _get_codex_model_settings(model_settings: ModelSettings | None) -> CodexModelSettings:
|
|
79
|
+
if model_settings is None:
|
|
80
|
+
return CodexModelSettings()
|
|
81
|
+
try:
|
|
82
|
+
return CodexModelSettings.model_validate(model_settings)
|
|
83
|
+
except Exception as exception:
|
|
84
|
+
raise UserError(str(exception)) from exception
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class _CodexTurnOutcome(TypedDict):
|
|
88
|
+
output_text: str
|
|
89
|
+
thread_id: str | None
|
|
90
|
+
usage: RequestUsage
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _render_toml_value_text(value: object) -> str:
|
|
94
|
+
# Codex CLI accepts `--config key=value` where values are TOML literals.
|
|
95
|
+
# Using JSON serialization for strings/arrays produces TOML-compatible literals for the cases we need here.
|
|
96
|
+
if isinstance(value, str):
|
|
97
|
+
return json.dumps(value)
|
|
98
|
+
if isinstance(value, bool):
|
|
99
|
+
return "true" if value else "false"
|
|
100
|
+
if isinstance(value, (int, float)):
|
|
101
|
+
return str(value)
|
|
102
|
+
if isinstance(value, list):
|
|
103
|
+
return json.dumps(value)
|
|
104
|
+
if isinstance(value, dict):
|
|
105
|
+
return json.dumps(value)
|
|
106
|
+
raise TypeError(f"Unsupported config value type: {type(value).__name__}")
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _build_codex_config_arguments(configuration_overrides: dict[str, object]) -> list[str]:
|
|
110
|
+
arguments: list[str] = []
|
|
111
|
+
for key, value in configuration_overrides.items():
|
|
112
|
+
arguments.extend(["--config", f"{key}={_render_toml_value_text(value)}"])
|
|
113
|
+
return arguments
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _parse_codex_jsonl_lines(jsonl_lines: list[str]) -> _CodexTurnOutcome:
|
|
117
|
+
output_text: str | None = None
|
|
118
|
+
thread_id: str | None = None
|
|
119
|
+
most_recent_stream_error_message: str | None = None
|
|
120
|
+
|
|
121
|
+
usage = RequestUsage()
|
|
122
|
+
|
|
123
|
+
for line in jsonl_lines:
|
|
124
|
+
try:
|
|
125
|
+
event = json.loads(line)
|
|
126
|
+
except Exception as exception:
|
|
127
|
+
raise UnexpectedModelBehavior("Codex CLI produced invalid JSONL output") from exception
|
|
128
|
+
|
|
129
|
+
if not isinstance(event, dict) or "type" not in event:
|
|
130
|
+
raise UnexpectedModelBehavior("Codex CLI produced an unexpected event")
|
|
131
|
+
|
|
132
|
+
event_type = event.get("type")
|
|
133
|
+
if event_type == "thread.started":
|
|
134
|
+
thread_id_value = event.get("thread_id")
|
|
135
|
+
if isinstance(thread_id_value, str):
|
|
136
|
+
thread_id = thread_id_value
|
|
137
|
+
elif event_type == "turn.completed":
|
|
138
|
+
usage_value = event.get("usage")
|
|
139
|
+
if isinstance(usage_value, dict):
|
|
140
|
+
input_tokens = usage_value.get("input_tokens")
|
|
141
|
+
cached_input_tokens = usage_value.get("cached_input_tokens")
|
|
142
|
+
output_tokens = usage_value.get("output_tokens")
|
|
143
|
+
|
|
144
|
+
if isinstance(input_tokens, int):
|
|
145
|
+
usage.input_tokens = input_tokens
|
|
146
|
+
if isinstance(cached_input_tokens, int):
|
|
147
|
+
usage.cache_read_tokens = cached_input_tokens
|
|
148
|
+
if isinstance(output_tokens, int):
|
|
149
|
+
usage.output_tokens = output_tokens
|
|
150
|
+
elif event_type == "turn.failed":
|
|
151
|
+
error_value = event.get("error")
|
|
152
|
+
if isinstance(error_value, dict) and isinstance(error_value.get("message"), str):
|
|
153
|
+
raise UnexpectedModelBehavior(str(error_value.get("message")))
|
|
154
|
+
raise UnexpectedModelBehavior("Codex CLI reported a failed turn")
|
|
155
|
+
elif event_type == "error":
|
|
156
|
+
# Codex CLI can emit transient reconnect progress as `error` events.
|
|
157
|
+
# Preserve the latest message and only fail if no usable response is produced.
|
|
158
|
+
message_value = event.get("message")
|
|
159
|
+
most_recent_stream_error_message = message_value if isinstance(message_value, str) else "Codex CLI reported a stream error"
|
|
160
|
+
elif event_type == "item.completed":
|
|
161
|
+
item_value = event.get("item")
|
|
162
|
+
if isinstance(item_value, dict) and item_value.get("type") == "agent_message":
|
|
163
|
+
text_value = item_value.get("text")
|
|
164
|
+
if isinstance(text_value, str):
|
|
165
|
+
output_text = text_value
|
|
166
|
+
|
|
167
|
+
if output_text is None:
|
|
168
|
+
if most_recent_stream_error_message is not None:
|
|
169
|
+
raise UnexpectedModelBehavior(most_recent_stream_error_message)
|
|
170
|
+
raise UnexpectedModelBehavior("Codex CLI did not produce an agent message")
|
|
171
|
+
|
|
172
|
+
return {
|
|
173
|
+
"output_text": output_text,
|
|
174
|
+
"thread_id": thread_id,
|
|
175
|
+
"usage": usage,
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class CodexModel(BackendModelBase):
|
|
180
|
+
"""Pydantic AI model that delegates to the Codex CLI."""
|
|
181
|
+
|
|
182
|
+
def __init__(self, *, model_name: str | None = None) -> None:
|
|
183
|
+
super().__init__(
|
|
184
|
+
backend_label="Codex backend",
|
|
185
|
+
profile=ModelProfile(
|
|
186
|
+
supports_tools=True,
|
|
187
|
+
supports_json_schema_output=True,
|
|
188
|
+
supports_json_object_output=False,
|
|
189
|
+
supports_image_output=False,
|
|
190
|
+
default_structured_output_mode="native",
|
|
191
|
+
supported_builtin_tools=frozenset([AbstractBuiltinTool]),
|
|
192
|
+
json_schema_transformer=_CodexJsonSchemaTransformer,
|
|
193
|
+
),
|
|
194
|
+
)
|
|
195
|
+
self._model_name = model_name
|
|
196
|
+
|
|
197
|
+
@property
|
|
198
|
+
def model_name(self) -> str:
|
|
199
|
+
return f"codex:{self._model_name or 'default'}"
|
|
200
|
+
|
|
201
|
+
@property
|
|
202
|
+
def system(self) -> str:
|
|
203
|
+
return "openai"
|
|
204
|
+
|
|
205
|
+
async def request(
|
|
206
|
+
self,
|
|
207
|
+
messages: list[ModelMessage],
|
|
208
|
+
model_settings: ModelSettings | None,
|
|
209
|
+
model_request_parameters: ModelRequestParameters,
|
|
210
|
+
) -> ModelResponse:
|
|
211
|
+
if model_request_parameters.output_object is not None:
|
|
212
|
+
model_request_parameters = replace(
|
|
213
|
+
model_request_parameters,
|
|
214
|
+
output_object=replace(model_request_parameters.output_object, strict=True),
|
|
215
|
+
)
|
|
216
|
+
model_settings, model_request_parameters = self.prepare_request(model_settings, model_request_parameters)
|
|
217
|
+
|
|
218
|
+
output_schema_file: IO[str] | None = None
|
|
219
|
+
|
|
220
|
+
try:
|
|
221
|
+
_, system_prompt_text, user_prompt_text = self._prepare_common_request_parts(
|
|
222
|
+
messages=messages,
|
|
223
|
+
model_request_parameters=model_request_parameters,
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
prompt_parts = [p for p in [system_prompt_text, user_prompt_text] if p]
|
|
227
|
+
prompt_text = "\n\n".join(prompt_parts)
|
|
228
|
+
|
|
229
|
+
codex_model_settings = _get_codex_model_settings(model_settings)
|
|
230
|
+
|
|
231
|
+
tool_name_to_tool_definition, tool_name_to_handler, allowed_tool_names = await self._prepare_allowed_tools(
|
|
232
|
+
model_request_parameters=model_request_parameters,
|
|
233
|
+
configured_allowed_tool_names=codex_model_settings.allowed_tool_names,
|
|
234
|
+
visible_tools=get_visible_tools(),
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
output_object = model_request_parameters.output_object
|
|
238
|
+
if output_object is None:
|
|
239
|
+
output_schema_file = None
|
|
240
|
+
else:
|
|
241
|
+
output_schema_file = tempfile.NamedTemporaryFile(mode="wt", encoding="utf-8", prefix="nighthawk-codex-output-schema-", suffix=".json") # noqa: SIM115
|
|
242
|
+
output_schema_file.write(json.dumps(dict(output_object.json_schema)))
|
|
243
|
+
output_schema_file.flush()
|
|
244
|
+
async with mcp_server_if_needed(
|
|
245
|
+
tool_name_to_tool_definition=tool_name_to_tool_definition,
|
|
246
|
+
tool_name_to_handler=tool_name_to_handler,
|
|
247
|
+
) as mcp_server_url:
|
|
248
|
+
configuration_overrides: dict[str, object] = {}
|
|
249
|
+
|
|
250
|
+
if self._model_name is not None:
|
|
251
|
+
configuration_overrides["model"] = self._model_name
|
|
252
|
+
|
|
253
|
+
if mcp_server_url is not None:
|
|
254
|
+
configuration_overrides["mcp_servers.nighthawk.url"] = mcp_server_url
|
|
255
|
+
configuration_overrides["mcp_servers.nighthawk.enabled_tools"] = list(allowed_tool_names)
|
|
256
|
+
model_reasoning_effort = codex_model_settings.model_reasoning_effort
|
|
257
|
+
if model_reasoning_effort is not None:
|
|
258
|
+
configuration_overrides["model_reasoning_effort"] = model_reasoning_effort
|
|
259
|
+
|
|
260
|
+
codex_arguments = [
|
|
261
|
+
codex_model_settings.codex_executable,
|
|
262
|
+
"exec",
|
|
263
|
+
"--experimental-json",
|
|
264
|
+
"--skip-git-repo-check",
|
|
265
|
+
]
|
|
266
|
+
sandbox_mode = codex_model_settings.sandbox_mode
|
|
267
|
+
if sandbox_mode is not None:
|
|
268
|
+
codex_arguments.extend(["--sandbox", sandbox_mode])
|
|
269
|
+
codex_arguments.extend(_build_codex_config_arguments(configuration_overrides))
|
|
270
|
+
|
|
271
|
+
if output_schema_file is not None:
|
|
272
|
+
codex_arguments.extend(["--output-schema", output_schema_file.name])
|
|
273
|
+
|
|
274
|
+
working_directory = codex_model_settings.working_directory
|
|
275
|
+
if working_directory:
|
|
276
|
+
codex_arguments.extend(["--cd", working_directory])
|
|
277
|
+
|
|
278
|
+
process = await asyncio.create_subprocess_exec(
|
|
279
|
+
*codex_arguments,
|
|
280
|
+
stdin=asyncio.subprocess.PIPE,
|
|
281
|
+
stdout=asyncio.subprocess.PIPE,
|
|
282
|
+
stderr=asyncio.subprocess.PIPE,
|
|
283
|
+
)
|
|
284
|
+
if process.stdin is None or process.stdout is None or process.stderr is None:
|
|
285
|
+
raise UnexpectedModelBehavior("Codex CLI subprocess streams are unexpectedly None")
|
|
286
|
+
|
|
287
|
+
process.stdin.write(prompt_text.encode("utf-8"))
|
|
288
|
+
await process.stdin.drain()
|
|
289
|
+
process.stdin.close()
|
|
290
|
+
|
|
291
|
+
jsonl_lines: list[str] = []
|
|
292
|
+
|
|
293
|
+
process_stderr = process.stderr
|
|
294
|
+
|
|
295
|
+
async def read_stderr() -> bytes:
|
|
296
|
+
if process_stderr is None:
|
|
297
|
+
return b""
|
|
298
|
+
return await process_stderr.read()
|
|
299
|
+
|
|
300
|
+
stderr_task = asyncio.create_task(read_stderr())
|
|
301
|
+
|
|
302
|
+
async for line_bytes in process.stdout:
|
|
303
|
+
line_text = line_bytes.decode("utf-8").rstrip("\n")
|
|
304
|
+
if line_text:
|
|
305
|
+
jsonl_lines.append(line_text)
|
|
306
|
+
|
|
307
|
+
return_code = await process.wait()
|
|
308
|
+
stderr_bytes = await stderr_task
|
|
309
|
+
|
|
310
|
+
if return_code != 0:
|
|
311
|
+
stderr_text = stderr_bytes.decode("utf-8", errors="replace").strip()
|
|
312
|
+
detail_parts: list[str] = []
|
|
313
|
+
|
|
314
|
+
if stderr_text:
|
|
315
|
+
detail_parts.append(f"stderr={stderr_text[:2000]}")
|
|
316
|
+
|
|
317
|
+
recent_jsonl_lines = jsonl_lines[-8:]
|
|
318
|
+
if recent_jsonl_lines:
|
|
319
|
+
recent_jsonl_text = "\n".join(recent_jsonl_lines)
|
|
320
|
+
detail_parts.append(f"recent_jsonl_events={recent_jsonl_text[:4000]}")
|
|
321
|
+
|
|
322
|
+
if not detail_parts:
|
|
323
|
+
detail_parts.append("no stderr or JSONL events were captured")
|
|
324
|
+
|
|
325
|
+
detail = " | ".join(detail_parts)
|
|
326
|
+
raise UnexpectedModelBehavior(f"Codex CLI exited with non-zero status. {detail}")
|
|
327
|
+
|
|
328
|
+
turn_outcome = _parse_codex_jsonl_lines(jsonl_lines)
|
|
329
|
+
|
|
330
|
+
output_text = turn_outcome["output_text"]
|
|
331
|
+
|
|
332
|
+
provider_details: dict[str, Any] = {
|
|
333
|
+
"codex": {
|
|
334
|
+
"thread_id": turn_outcome["thread_id"],
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
return ModelResponse(
|
|
339
|
+
parts=[TextPart(content=output_text)],
|
|
340
|
+
usage=turn_outcome["usage"],
|
|
341
|
+
model_name=self.model_name,
|
|
342
|
+
provider_name="codex",
|
|
343
|
+
provider_details=provider_details,
|
|
344
|
+
)
|
|
345
|
+
except (UserError, UnexpectedModelBehavior, ValueError):
|
|
346
|
+
raise
|
|
347
|
+
except Exception as exception:
|
|
348
|
+
raise UnexpectedModelBehavior("Codex backend failed") from exception
|
|
349
|
+
finally:
|
|
350
|
+
if output_schema_file is not None:
|
|
351
|
+
with contextlib.suppress(Exception):
|
|
352
|
+
output_schema_file.close()
|