python-codex 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pycodex/__init__.py +5 -1
- pycodex/agent.py +89 -51
- pycodex/cli.py +152 -45
- pycodex/collaboration.py +6 -7
- pycodex/compat.py +99 -0
- pycodex/context.py +110 -87
- pycodex/doctor.py +40 -40
- pycodex/model.py +429 -90
- pycodex/portable.py +33 -33
- pycodex/portable_server.py +22 -21
- pycodex/prompts/models.json +30 -0
- pycodex/protocol.py +84 -86
- pycodex/runtime.py +36 -35
- pycodex/runtime_services.py +69 -69
- pycodex/tools/agent_tool_schemas.py +0 -2
- pycodex/tools/apply_patch_tool.py +45 -46
- pycodex/tools/base_tool.py +35 -36
- pycodex/tools/close_agent_tool.py +2 -4
- pycodex/tools/code_mode_manager.py +61 -61
- pycodex/tools/exec_command_tool.py +5 -6
- pycodex/tools/exec_runtime.js +3 -3
- pycodex/tools/exec_tool.py +2 -4
- pycodex/tools/grep_files_tool.py +10 -11
- pycodex/tools/list_dir_tool.py +8 -9
- pycodex/tools/read_file_tool.py +13 -14
- pycodex/tools/request_permissions_tool.py +2 -4
- pycodex/tools/request_user_input_tool.py +13 -14
- pycodex/tools/resume_agent_tool.py +2 -4
- pycodex/tools/send_input_tool.py +8 -9
- pycodex/tools/shell_command_tool.py +5 -6
- pycodex/tools/shell_tool.py +5 -6
- pycodex/tools/spawn_agent_tool.py +4 -5
- pycodex/tools/unified_exec_manager.py +62 -61
- pycodex/tools/update_plan_tool.py +4 -5
- pycodex/tools/view_image_tool.py +4 -5
- pycodex/tools/wait_agent_tool.py +2 -4
- pycodex/tools/wait_tool.py +4 -5
- pycodex/tools/web_search_tool.py +1 -3
- pycodex/tools/write_stdin_tool.py +4 -5
- pycodex/utils/__init__.py +4 -0
- pycodex/utils/compactor.py +189 -0
- pycodex/utils/dotenv.py +6 -6
- pycodex/utils/get_env.py +37 -33
- pycodex/utils/random_ids.py +1 -2
- pycodex/utils/session_persist.py +483 -0
- pycodex/utils/visualize.py +197 -83
- {python_codex-0.1.2.dist-info → python_codex-0.1.4.dist-info}/METADATA +32 -11
- python_codex-0.1.4.dist-info/RECORD +76 -0
- {python_codex-0.1.2.dist-info → python_codex-0.1.4.dist-info}/WHEEL +1 -1
- responses_server/app.py +32 -20
- responses_server/config.py +17 -17
- responses_server/payload_processors.py +26 -17
- responses_server/server.py +11 -11
- responses_server/session_store.py +10 -10
- responses_server/stream_router.py +83 -64
- responses_server/tools/custom_adapter.py +12 -12
- responses_server/tools/web_search.py +33 -33
- python_codex-0.1.2.dist-info/RECORD +0 -73
- {python_codex-0.1.2.dist-info → python_codex-0.1.4.dist-info}/entry_points.txt +0 -0
- {python_codex-0.1.2.dist-info → python_codex-0.1.4.dist-info}/licenses/LICENSE +0 -0
pycodex/model.py
CHANGED
|
@@ -1,15 +1,16 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
1
|
|
|
3
2
|
import asyncio
|
|
4
3
|
import json
|
|
5
4
|
import os
|
|
5
|
+
import re
|
|
6
6
|
import urllib.parse
|
|
7
|
-
from collections.abc import Callable
|
|
8
7
|
from dataclasses import dataclass, field, replace
|
|
9
8
|
from pathlib import Path
|
|
10
|
-
from typing import
|
|
9
|
+
from typing import Callable
|
|
10
|
+
from .compat import Protocol
|
|
11
11
|
|
|
12
12
|
import requests
|
|
13
|
+
import typing
|
|
13
14
|
|
|
14
15
|
try:
|
|
15
16
|
import tomllib
|
|
@@ -29,38 +30,47 @@ from .utils import build_user_agent, uuid7_string
|
|
|
29
30
|
DEFAULT_CODEX_CONFIG_PATH = Path.home() / ".codex" / "config.toml"
|
|
30
31
|
DEFAULT_ORIGINATOR = "pycodex"
|
|
31
32
|
ModelStreamEventHandler = Callable[[ModelStreamEvent], None]
|
|
32
|
-
NOOP_MODEL_STREAM_EVENT_HANDLER: ModelStreamEventHandler = lambda _event: None
|
|
33
|
+
NOOP_MODEL_STREAM_EVENT_HANDLER: 'ModelStreamEventHandler' = lambda _event: None
|
|
34
|
+
DEFAULT_STREAM_MAX_RETRIES = 5
|
|
35
|
+
DEFAULT_STREAM_IDLE_TIMEOUT_MS = 300_000
|
|
36
|
+
INITIAL_RETRY_DELAY_SECONDS = 0.2
|
|
37
|
+
RETRY_BACKOFF_FACTOR = 2.0
|
|
38
|
+
RATE_LIMIT_RETRY_AFTER_RE = re.compile(
|
|
39
|
+
r"(?i)try again in\s*(\d+(?:\.\d+)?)\s*(s|ms|seconds?)"
|
|
40
|
+
)
|
|
33
41
|
|
|
34
42
|
|
|
35
43
|
class ModelClient(Protocol):
|
|
36
44
|
async def complete(
|
|
37
45
|
self,
|
|
38
|
-
prompt: Prompt,
|
|
39
|
-
event_handler: ModelStreamEventHandler = NOOP_MODEL_STREAM_EVENT_HANDLER,
|
|
40
|
-
) -> ModelResponse:
|
|
46
|
+
prompt: 'Prompt',
|
|
47
|
+
event_handler: 'ModelStreamEventHandler' = NOOP_MODEL_STREAM_EVENT_HANDLER,
|
|
48
|
+
) -> 'ModelResponse':
|
|
41
49
|
"""Return the next batch of model output items for the current prompt."""
|
|
42
50
|
|
|
43
51
|
|
|
44
|
-
@dataclass(frozen=True,
|
|
52
|
+
@dataclass(frozen=True, )
|
|
45
53
|
class ResponsesProviderConfig:
|
|
46
|
-
model: str
|
|
47
|
-
provider_name: str
|
|
48
|
-
base_url: str
|
|
49
|
-
api_key_env: str
|
|
50
|
-
wire_api: str = "responses"
|
|
51
|
-
query_params:
|
|
52
|
-
reasoning_effort: str
|
|
53
|
-
reasoning_summary: str
|
|
54
|
-
verbosity: str
|
|
55
|
-
sandbox_mode: str
|
|
56
|
-
beta_features_header: str
|
|
54
|
+
model: 'str'
|
|
55
|
+
provider_name: 'str'
|
|
56
|
+
base_url: 'str'
|
|
57
|
+
api_key_env: 'typing.Union[str, None]'
|
|
58
|
+
wire_api: 'str' = "responses"
|
|
59
|
+
query_params: 'typing.Dict[str, str]' = field(default_factory=dict)
|
|
60
|
+
reasoning_effort: 'typing.Union[str, None]' = None
|
|
61
|
+
reasoning_summary: 'typing.Union[str, None]' = None
|
|
62
|
+
verbosity: 'typing.Union[str, None]' = None
|
|
63
|
+
sandbox_mode: 'typing.Union[str, None]' = None
|
|
64
|
+
beta_features_header: 'typing.Union[str, None]' = None
|
|
65
|
+
stream_max_retries: 'typing.Union[int, None]' = None
|
|
66
|
+
stream_idle_timeout_ms: 'typing.Union[int, None]' = None
|
|
57
67
|
|
|
58
68
|
@classmethod
|
|
59
69
|
def from_codex_config(
|
|
60
70
|
cls,
|
|
61
|
-
config_path: str
|
|
62
|
-
profile: str
|
|
63
|
-
) -> ResponsesProviderConfig:
|
|
71
|
+
config_path: 'typing.Union[str, Path]' = DEFAULT_CODEX_CONFIG_PATH,
|
|
72
|
+
profile: 'typing.Union[str, None]' = None,
|
|
73
|
+
) -> 'ResponsesProviderConfig':
|
|
64
74
|
data = tomllib.loads(Path(config_path).read_text())
|
|
65
75
|
selected = dict(data)
|
|
66
76
|
if profile is not None:
|
|
@@ -76,17 +86,13 @@ class ResponsesProviderConfig:
|
|
|
76
86
|
raise ValueError(f"unsupported wire_api for Python client: {wire_api}")
|
|
77
87
|
|
|
78
88
|
api_key_env = provider.get("env_key")
|
|
79
|
-
if not api_key_env:
|
|
80
|
-
raise ValueError(
|
|
81
|
-
f"provider {provider_name} does not define env_key in Codex config"
|
|
82
|
-
)
|
|
83
89
|
|
|
84
90
|
query_params = {
|
|
85
91
|
str(key): str(value)
|
|
86
92
|
for key, value in provider.get("query_params", {}).items()
|
|
87
93
|
}
|
|
88
94
|
features = selected.get("features", {})
|
|
89
|
-
beta_features:
|
|
95
|
+
beta_features: 'typing.List[str]' = []
|
|
90
96
|
if isinstance(features, dict) and features.get("guardian_approval") is True:
|
|
91
97
|
beta_features.append("guardian_approval")
|
|
92
98
|
return cls(
|
|
@@ -101,9 +107,13 @@ class ResponsesProviderConfig:
|
|
|
101
107
|
verbosity=selected.get("model_verbosity"),
|
|
102
108
|
sandbox_mode=selected.get("sandbox_mode"),
|
|
103
109
|
beta_features_header=",".join(beta_features) or None,
|
|
110
|
+
stream_max_retries=_optional_int(provider.get("stream_max_retries")),
|
|
111
|
+
stream_idle_timeout_ms=_optional_int(provider.get("stream_idle_timeout_ms")),
|
|
104
112
|
)
|
|
105
113
|
|
|
106
|
-
def api_key(self) -> str:
|
|
114
|
+
def api_key(self) -> 'typing.Union[str, None]':
|
|
115
|
+
if not self.api_key_env:
|
|
116
|
+
return None
|
|
107
117
|
value = os.environ.get(self.api_key_env, "")
|
|
108
118
|
if not value:
|
|
109
119
|
raise RuntimeError(
|
|
@@ -113,9 +123,9 @@ class ResponsesProviderConfig:
|
|
|
113
123
|
|
|
114
124
|
def with_overrides(
|
|
115
125
|
self,
|
|
116
|
-
model: str
|
|
117
|
-
reasoning_effort: str
|
|
118
|
-
) -> ResponsesProviderConfig:
|
|
126
|
+
model: 'typing.Union[str, None]' = None,
|
|
127
|
+
reasoning_effort: 'typing.Union[str, None]' = None,
|
|
128
|
+
) -> 'ResponsesProviderConfig':
|
|
119
129
|
return replace(
|
|
120
130
|
self,
|
|
121
131
|
model=self.model if model is None else model,
|
|
@@ -126,11 +136,41 @@ class ResponsesProviderConfig:
|
|
|
126
136
|
),
|
|
127
137
|
)
|
|
128
138
|
|
|
139
|
+
def effective_stream_max_retries(self) -> 'int':
|
|
140
|
+
if self.stream_max_retries is None:
|
|
141
|
+
return DEFAULT_STREAM_MAX_RETRIES
|
|
142
|
+
return max(int(self.stream_max_retries), 0)
|
|
143
|
+
|
|
144
|
+
def effective_stream_idle_timeout_seconds(self) -> 'float':
|
|
145
|
+
if self.stream_idle_timeout_ms is None:
|
|
146
|
+
return DEFAULT_STREAM_IDLE_TIMEOUT_MS / 1000.0
|
|
147
|
+
return max(int(self.stream_idle_timeout_ms), 1) / 1000.0
|
|
148
|
+
|
|
129
149
|
|
|
130
150
|
class ResponsesApiError(RuntimeError):
|
|
131
151
|
pass
|
|
132
152
|
|
|
133
153
|
|
|
154
|
+
class ResponsesRetryableError(ResponsesApiError):
|
|
155
|
+
def __init__(
|
|
156
|
+
self,
|
|
157
|
+
message: 'str',
|
|
158
|
+
retry_delay_seconds: 'typing.Union[float, None]' = None,
|
|
159
|
+
) -> 'None':
|
|
160
|
+
super().__init__(message)
|
|
161
|
+
self.retry_delay_seconds = retry_delay_seconds
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
@dataclass
|
|
165
|
+
class _StreamDiagnostics:
|
|
166
|
+
raw_lines_received: 'int' = 0
|
|
167
|
+
sse_events_received: 'int' = 0
|
|
168
|
+
output_items_received: 'int' = 0
|
|
169
|
+
last_sse_event_name: 'str' = ""
|
|
170
|
+
last_event_type: 'str' = ""
|
|
171
|
+
last_payload_excerpt: 'str' = ""
|
|
172
|
+
|
|
173
|
+
|
|
134
174
|
class ResponsesModelClient:
|
|
135
175
|
"""Minimal OpenAI-compatible Responses API client.
|
|
136
176
|
|
|
@@ -141,13 +181,13 @@ class ResponsesModelClient:
|
|
|
141
181
|
|
|
142
182
|
def __init__(
|
|
143
183
|
self,
|
|
144
|
-
config: ResponsesProviderConfig,
|
|
145
|
-
timeout_seconds: float = 120.0,
|
|
146
|
-
session_id: str
|
|
147
|
-
originator: str = DEFAULT_ORIGINATOR,
|
|
148
|
-
user_agent: str
|
|
149
|
-
openai_subagent: str
|
|
150
|
-
) -> None:
|
|
184
|
+
config: 'ResponsesProviderConfig',
|
|
185
|
+
timeout_seconds: 'float' = 120.0,
|
|
186
|
+
session_id: 'typing.Union[str, None]' = None,
|
|
187
|
+
originator: 'str' = DEFAULT_ORIGINATOR,
|
|
188
|
+
user_agent: 'typing.Union[str, None]' = None,
|
|
189
|
+
openai_subagent: 'typing.Union[str, None]' = None,
|
|
190
|
+
) -> 'None':
|
|
151
191
|
self._config = config
|
|
152
192
|
self.model = config.model
|
|
153
193
|
self._timeout_seconds = timeout_seconds
|
|
@@ -159,22 +199,22 @@ class ResponsesModelClient:
|
|
|
159
199
|
@classmethod
|
|
160
200
|
def from_codex_config(
|
|
161
201
|
cls,
|
|
162
|
-
config_path: str
|
|
163
|
-
profile: str
|
|
164
|
-
timeout_seconds: float = 120.0,
|
|
165
|
-
originator: str = DEFAULT_ORIGINATOR,
|
|
166
|
-
user_agent: str
|
|
167
|
-
) -> ResponsesModelClient:
|
|
202
|
+
config_path: 'typing.Union[str, Path]' = DEFAULT_CODEX_CONFIG_PATH,
|
|
203
|
+
profile: 'typing.Union[str, None]' = None,
|
|
204
|
+
timeout_seconds: 'float' = 120.0,
|
|
205
|
+
originator: 'str' = DEFAULT_ORIGINATOR,
|
|
206
|
+
user_agent: 'typing.Union[str, None]' = None,
|
|
207
|
+
) -> 'ResponsesModelClient':
|
|
168
208
|
config = ResponsesProviderConfig.from_codex_config(config_path, profile)
|
|
169
209
|
return cls(config, timeout_seconds, originator=originator, user_agent=user_agent)
|
|
170
210
|
|
|
171
211
|
def with_overrides(
|
|
172
212
|
self,
|
|
173
|
-
model: str
|
|
174
|
-
reasoning_effort: str
|
|
175
|
-
session_id: str
|
|
176
|
-
openai_subagent: str
|
|
177
|
-
) -> ResponsesModelClient:
|
|
213
|
+
model: 'typing.Union[str, None]' = None,
|
|
214
|
+
reasoning_effort: 'typing.Union[str, None]' = None,
|
|
215
|
+
session_id: 'typing.Union[str, None]' = None,
|
|
216
|
+
openai_subagent: 'typing.Union[str, None]' = None,
|
|
217
|
+
) -> 'ResponsesModelClient':
|
|
178
218
|
return ResponsesModelClient(
|
|
179
219
|
self._config.with_overrides(
|
|
180
220
|
model or self.model,
|
|
@@ -191,35 +231,64 @@ class ResponsesModelClient:
|
|
|
191
231
|
),
|
|
192
232
|
)
|
|
193
233
|
|
|
194
|
-
def responses_url(self) -> str:
|
|
234
|
+
def responses_url(self) -> 'str':
|
|
195
235
|
base_url = self._config.base_url.rstrip("/")
|
|
196
236
|
url = f"{base_url}/responses"
|
|
197
237
|
if self._config.query_params:
|
|
198
238
|
return f"{url}?{urllib.parse.urlencode(self._config.query_params)}"
|
|
199
239
|
return url
|
|
200
240
|
|
|
201
|
-
def models_url(self) -> str:
|
|
241
|
+
def models_url(self) -> 'str':
|
|
202
242
|
base_url = self._config.base_url.rstrip("/")
|
|
203
243
|
url = f"{base_url}/models"
|
|
204
244
|
if self._config.query_params:
|
|
205
245
|
return f"{url}?{urllib.parse.urlencode(self._config.query_params)}"
|
|
206
246
|
return url
|
|
207
247
|
|
|
208
|
-
async def list_models(self) ->
|
|
248
|
+
async def list_models(self) -> 'typing.List[str]':
|
|
209
249
|
return await asyncio.to_thread(self._list_models_sync)
|
|
210
250
|
|
|
211
251
|
async def complete(
|
|
212
252
|
self,
|
|
213
|
-
prompt: Prompt,
|
|
214
|
-
event_handler: ModelStreamEventHandler = NOOP_MODEL_STREAM_EVENT_HANDLER,
|
|
215
|
-
) -> ModelResponse:
|
|
216
|
-
|
|
253
|
+
prompt: 'Prompt',
|
|
254
|
+
event_handler: 'ModelStreamEventHandler' = NOOP_MODEL_STREAM_EVENT_HANDLER,
|
|
255
|
+
) -> 'ModelResponse':
|
|
256
|
+
retries = 0
|
|
257
|
+
max_retries = self._config.effective_stream_max_retries()
|
|
258
|
+
while True:
|
|
259
|
+
try:
|
|
260
|
+
return await asyncio.to_thread(
|
|
261
|
+
self._complete_sync,
|
|
262
|
+
prompt,
|
|
263
|
+
event_handler,
|
|
264
|
+
)
|
|
265
|
+
except ResponsesRetryableError as exc:
|
|
266
|
+
if retries >= max_retries:
|
|
267
|
+
raise
|
|
268
|
+
retries += 1
|
|
269
|
+
delay_seconds = exc.retry_delay_seconds
|
|
270
|
+
if delay_seconds is None:
|
|
271
|
+
delay_seconds = self._retry_delay_seconds(retries)
|
|
272
|
+
event_handler(
|
|
273
|
+
ModelStreamEvent(
|
|
274
|
+
kind="stream_error",
|
|
275
|
+
payload={
|
|
276
|
+
"message": f"Reconnecting... {retries}/{max_retries}",
|
|
277
|
+
"attempt": retries,
|
|
278
|
+
"max_retries": max_retries,
|
|
279
|
+
"delay_seconds": delay_seconds,
|
|
280
|
+
"error": str(exc),
|
|
281
|
+
},
|
|
282
|
+
)
|
|
283
|
+
)
|
|
284
|
+
if delay_seconds > 0:
|
|
285
|
+
await asyncio.sleep(delay_seconds)
|
|
217
286
|
|
|
218
287
|
def _complete_sync(
|
|
219
288
|
self,
|
|
220
|
-
prompt: Prompt,
|
|
221
|
-
event_handler: ModelStreamEventHandler,
|
|
222
|
-
) -> ModelResponse:
|
|
289
|
+
prompt: 'Prompt',
|
|
290
|
+
event_handler: 'ModelStreamEventHandler',
|
|
291
|
+
) -> 'ModelResponse':
|
|
223
292
|
payload = self._build_payload(prompt)
|
|
224
293
|
body = json.dumps(payload).encode("utf-8")
|
|
225
294
|
url = self.responses_url()
|
|
@@ -230,6 +299,7 @@ class ResponsesModelClient:
|
|
|
230
299
|
headers=self._build_headers(prompt),
|
|
231
300
|
data=body,
|
|
232
301
|
)
|
|
302
|
+
diagnostics = _StreamDiagnostics()
|
|
233
303
|
try:
|
|
234
304
|
with requests.Session() as session:
|
|
235
305
|
settings = session.merge_environment_settings(
|
|
@@ -242,41 +312,56 @@ class ResponsesModelClient:
|
|
|
242
312
|
verify = _requests_verify_setting()
|
|
243
313
|
if verify is not None:
|
|
244
314
|
settings["verify"] = verify
|
|
315
|
+
timeout = (
|
|
316
|
+
max(self._timeout_seconds, 1.0),
|
|
317
|
+
self._config.effective_stream_idle_timeout_seconds(),
|
|
318
|
+
)
|
|
245
319
|
response = session.send(
|
|
246
320
|
prepared,
|
|
247
|
-
timeout=
|
|
321
|
+
timeout=timeout,
|
|
248
322
|
allow_redirects=False,
|
|
249
323
|
**settings,
|
|
250
324
|
)
|
|
251
325
|
with response:
|
|
252
326
|
if response.status_code >= 400:
|
|
253
327
|
error_body = response.text
|
|
254
|
-
|
|
328
|
+
message = (
|
|
255
329
|
f"responses request failed with status {response.status_code}: "
|
|
256
330
|
f"{error_body[:500]}"
|
|
257
331
|
)
|
|
258
|
-
|
|
332
|
+
if response.status_code >= 500:
|
|
333
|
+
raise ResponsesRetryableError(message)
|
|
334
|
+
raise ResponsesApiError(message)
|
|
335
|
+
tracked_lines = self._track_stream_lines(
|
|
259
336
|
response.iter_lines(chunk_size=1, decode_unicode=False),
|
|
337
|
+
diagnostics,
|
|
338
|
+
)
|
|
339
|
+
return self._parse_stream(
|
|
340
|
+
tracked_lines,
|
|
260
341
|
event_handler,
|
|
342
|
+
diagnostics=diagnostics,
|
|
261
343
|
)
|
|
262
344
|
except requests.RequestException as exc:
|
|
263
|
-
raise
|
|
345
|
+
raise ResponsesRetryableError(
|
|
346
|
+
self._format_transport_error(url, exc, diagnostics)
|
|
347
|
+
) from exc
|
|
264
348
|
|
|
265
|
-
def _build_payload(self, prompt: Prompt) ->
|
|
266
|
-
payload:
|
|
349
|
+
def _build_payload(self, prompt: 'Prompt') -> 'typing.Dict[str, object]':
|
|
350
|
+
payload: 'typing.Dict[str, object]' = {
|
|
267
351
|
"model": self.model,
|
|
268
352
|
"instructions": prompt.base_instructions or "",
|
|
269
353
|
"input": [item.serialize() for item in prompt.input],
|
|
270
354
|
"tools": [tool.serialize() for tool in prompt.tools],
|
|
271
|
-
"tool_choice": "auto",
|
|
272
355
|
"parallel_tool_calls": prompt.parallel_tool_calls,
|
|
273
356
|
"store": False,
|
|
274
357
|
"stream": True,
|
|
275
358
|
"include": ["reasoning.encrypted_content"],
|
|
276
359
|
"prompt_cache_key": self._session_id,
|
|
277
360
|
}
|
|
361
|
+
if prompt.tools:
|
|
362
|
+
payload["tool_choice"] = "auto"
|
|
278
363
|
|
|
279
|
-
reasoning:
|
|
364
|
+
reasoning: 'typing.Dict[str, str]' = {}
|
|
280
365
|
if self._config.reasoning_effort is not None:
|
|
281
366
|
reasoning["effort"] = self._config.reasoning_effort
|
|
282
367
|
if self._config.reasoning_summary is not None:
|
|
@@ -292,7 +377,7 @@ class ResponsesModelClient:
|
|
|
292
377
|
|
|
293
378
|
return payload
|
|
294
379
|
|
|
295
|
-
def _list_models_sync(self) ->
|
|
380
|
+
def _list_models_sync(self) -> 'typing.List[str]':
|
|
296
381
|
prepared = requests.PreparedRequest()
|
|
297
382
|
prepared.prepare(
|
|
298
383
|
method="GET",
|
|
@@ -330,7 +415,7 @@ class ResponsesModelClient:
|
|
|
330
415
|
data = payload.get("data")
|
|
331
416
|
if not isinstance(data, list):
|
|
332
417
|
raise ResponsesApiError("models response is missing `data` list")
|
|
333
|
-
models:
|
|
418
|
+
models: 'typing.List[str]' = []
|
|
334
419
|
for item in data:
|
|
335
420
|
if not isinstance(item, dict):
|
|
336
421
|
continue
|
|
@@ -339,16 +424,18 @@ class ResponsesModelClient:
|
|
|
339
424
|
models.append(model_id)
|
|
340
425
|
return models
|
|
341
426
|
|
|
342
|
-
def _build_headers(self, prompt: Prompt) ->
|
|
427
|
+
def _build_headers(self, prompt: 'Prompt') -> 'typing.Dict[str, str]':
|
|
343
428
|
headers = {
|
|
344
429
|
"content-type": "application/json",
|
|
345
430
|
"accept": "text/event-stream",
|
|
346
|
-
"authorization": f"Bearer {self._config.api_key()}",
|
|
347
431
|
"x-client-request-id": self._session_id,
|
|
348
432
|
"session_id": self._session_id,
|
|
349
433
|
"originator": self._originator,
|
|
350
434
|
"user-agent": self._user_agent,
|
|
351
435
|
}
|
|
436
|
+
api_key = self._config.api_key()
|
|
437
|
+
if api_key is not None:
|
|
438
|
+
headers["authorization"] = f"Bearer {api_key}"
|
|
352
439
|
if self._config.beta_features_header is not None:
|
|
353
440
|
headers["x-codex-beta-features"] = self._config.beta_features_header
|
|
354
441
|
if self._openai_subagent is not None:
|
|
@@ -360,13 +447,15 @@ class ResponsesModelClient:
|
|
|
360
447
|
)
|
|
361
448
|
return headers
|
|
362
449
|
|
|
363
|
-
def _build_model_list_headers(self) ->
|
|
450
|
+
def _build_model_list_headers(self) -> 'typing.Dict[str, str]':
|
|
364
451
|
headers = {
|
|
365
452
|
"accept": "application/json",
|
|
366
|
-
"authorization": f"Bearer {self._config.api_key()}",
|
|
367
453
|
"originator": self._originator,
|
|
368
454
|
"user-agent": self._user_agent,
|
|
369
455
|
}
|
|
456
|
+
api_key = self._config.api_key()
|
|
457
|
+
if api_key is not None:
|
|
458
|
+
headers["authorization"] = f"Bearer {api_key}"
|
|
370
459
|
if self._config.beta_features_header is not None:
|
|
371
460
|
headers["x-codex-beta-features"] = self._config.beta_features_header
|
|
372
461
|
if self._openai_subagent is not None:
|
|
@@ -376,16 +465,26 @@ class ResponsesModelClient:
|
|
|
376
465
|
def _parse_stream(
|
|
377
466
|
self,
|
|
378
467
|
response,
|
|
379
|
-
event_handler: ModelStreamEventHandler,
|
|
380
|
-
|
|
381
|
-
|
|
468
|
+
event_handler: 'ModelStreamEventHandler',
|
|
469
|
+
diagnostics: 'typing.Union[_StreamDiagnostics, None]' = None,
|
|
470
|
+
) -> 'ModelResponse':
|
|
471
|
+
items: 'typing.List[typing.Union[typing.Union[AssistantMessage, ToolCall], ReasoningItem]]' = []
|
|
382
472
|
saw_completed = False
|
|
473
|
+
last_event_type = ""
|
|
383
474
|
|
|
384
|
-
for event_name, data in self._iter_sse_events(response):
|
|
475
|
+
for event_name, data in self._iter_sse_events(response, diagnostics):
|
|
385
476
|
if not data:
|
|
386
477
|
continue
|
|
387
|
-
|
|
478
|
+
try:
|
|
479
|
+
payload = json.loads(data)
|
|
480
|
+
except json.JSONDecodeError as exc:
|
|
481
|
+
raise ResponsesRetryableError(
|
|
482
|
+
self._format_invalid_event_error(event_name, data, exc)
|
|
483
|
+
) from exc
|
|
388
484
|
event_type = payload.get("type", event_name)
|
|
485
|
+
last_event_type = str(event_type)
|
|
486
|
+
if diagnostics is not None:
|
|
487
|
+
diagnostics.last_event_type = last_event_type
|
|
389
488
|
|
|
390
489
|
if event_type == "response.output_text.delta":
|
|
391
490
|
event_handler(
|
|
@@ -445,26 +544,42 @@ class ResponsesModelClient:
|
|
|
445
544
|
)
|
|
446
545
|
)
|
|
447
546
|
items.append(parsed)
|
|
547
|
+
if diagnostics is not None:
|
|
548
|
+
diagnostics.output_items_received += 1
|
|
448
549
|
continue
|
|
449
550
|
|
|
450
551
|
if event_type == "response.completed":
|
|
552
|
+
response_payload = payload.get("response")
|
|
553
|
+
usage = None
|
|
554
|
+
if isinstance(response_payload, dict):
|
|
555
|
+
response_usage = response_payload.get("usage")
|
|
556
|
+
if isinstance(response_usage, dict):
|
|
557
|
+
usage = dict(response_usage)
|
|
558
|
+
elif isinstance(payload.get("usage"), dict):
|
|
559
|
+
usage = dict(payload["usage"])
|
|
560
|
+
event_handler(
|
|
561
|
+
ModelStreamEvent(
|
|
562
|
+
kind="token_count",
|
|
563
|
+
payload={"usage": usage},
|
|
564
|
+
)
|
|
565
|
+
)
|
|
451
566
|
saw_completed = True
|
|
452
567
|
break
|
|
453
568
|
|
|
454
569
|
if event_type == "response.failed":
|
|
455
|
-
|
|
456
|
-
message = error.get("message") or "responses stream failed"
|
|
457
|
-
raise ResponsesApiError(message)
|
|
570
|
+
self._raise_response_failed_error(payload)
|
|
458
571
|
|
|
459
572
|
if not saw_completed:
|
|
460
|
-
raise
|
|
573
|
+
raise ResponsesRetryableError(
|
|
574
|
+
self._format_incomplete_stream_error(last_event_type, len(items))
|
|
575
|
+
)
|
|
461
576
|
|
|
462
577
|
return ModelResponse(items=items)
|
|
463
578
|
|
|
464
579
|
def _parse_output_item(
|
|
465
580
|
self,
|
|
466
|
-
item:
|
|
467
|
-
) -> AssistantMessage
|
|
581
|
+
item: 'typing.Dict[str, object]',
|
|
582
|
+
) -> 'typing.Union[typing.Union[typing.Union[AssistantMessage, ToolCall], ReasoningItem], None]':
|
|
468
583
|
item_type = item.get("type")
|
|
469
584
|
if item_type == "reasoning":
|
|
470
585
|
return ReasoningItem(payload=dict(item))
|
|
@@ -500,15 +615,28 @@ class ResponsesModelClient:
|
|
|
500
615
|
|
|
501
616
|
return None
|
|
502
617
|
|
|
503
|
-
def _iter_sse_events(
|
|
504
|
-
|
|
505
|
-
|
|
618
|
+
def _iter_sse_events(
|
|
619
|
+
self,
|
|
620
|
+
response,
|
|
621
|
+
diagnostics: 'typing.Union[_StreamDiagnostics, None]' = None,
|
|
622
|
+
):
|
|
623
|
+
event_name: 'typing.Union[str, None]' = None
|
|
624
|
+
data_lines: 'typing.List[str]' = []
|
|
506
625
|
|
|
507
626
|
for raw_line in response:
|
|
508
627
|
line = raw_line.decode("utf-8", errors="replace").rstrip("\r\n")
|
|
509
628
|
if line == "":
|
|
510
629
|
if data_lines:
|
|
511
|
-
|
|
630
|
+
resolved_event_name = event_name or "message"
|
|
631
|
+
payload = "\n".join(data_lines)
|
|
632
|
+
if diagnostics is not None:
|
|
633
|
+
diagnostics.sse_events_received += 1
|
|
634
|
+
diagnostics.last_sse_event_name = resolved_event_name
|
|
635
|
+
diagnostics.last_payload_excerpt = self._truncate_excerpt(
|
|
636
|
+
payload,
|
|
637
|
+
240,
|
|
638
|
+
)
|
|
639
|
+
yield resolved_event_name, payload
|
|
512
640
|
event_name = None
|
|
513
641
|
data_lines = []
|
|
514
642
|
continue
|
|
@@ -522,10 +650,221 @@ class ResponsesModelClient:
|
|
|
522
650
|
data_lines.append(line.split(":", 1)[1].lstrip())
|
|
523
651
|
|
|
524
652
|
if data_lines:
|
|
525
|
-
|
|
653
|
+
resolved_event_name = event_name or "message"
|
|
654
|
+
payload = "\n".join(data_lines)
|
|
655
|
+
if diagnostics is not None:
|
|
656
|
+
diagnostics.sse_events_received += 1
|
|
657
|
+
diagnostics.last_sse_event_name = resolved_event_name
|
|
658
|
+
diagnostics.last_payload_excerpt = self._truncate_excerpt(
|
|
659
|
+
payload,
|
|
660
|
+
240,
|
|
661
|
+
)
|
|
662
|
+
yield resolved_event_name, payload
|
|
663
|
+
|
|
664
|
+
def _track_stream_lines(
|
|
665
|
+
self,
|
|
666
|
+
response,
|
|
667
|
+
diagnostics: '_StreamDiagnostics',
|
|
668
|
+
):
|
|
669
|
+
for raw_line in response:
|
|
670
|
+
diagnostics.raw_lines_received += 1
|
|
671
|
+
yield raw_line
|
|
672
|
+
|
|
673
|
+
def _base_error_details(
|
|
674
|
+
self,
|
|
675
|
+
url: 'str',
|
|
676
|
+
) -> 'typing.List[typing.Tuple[str, str]]':
|
|
677
|
+
return [
|
|
678
|
+
("provider", self._config.provider_name),
|
|
679
|
+
("model", self.model),
|
|
680
|
+
("request", f"POST {url}"),
|
|
681
|
+
("session_id", self._session_id),
|
|
682
|
+
]
|
|
683
|
+
|
|
684
|
+
def _format_error_message(
|
|
685
|
+
self,
|
|
686
|
+
summary: 'str',
|
|
687
|
+
details: 'typing.Iterable[typing.Tuple[str, str]]',
|
|
688
|
+
) -> 'str':
|
|
689
|
+
lines = [summary]
|
|
690
|
+
for label, value in details:
|
|
691
|
+
text = str(value).strip()
|
|
692
|
+
if not text:
|
|
693
|
+
continue
|
|
694
|
+
lines.append(f"- {label}: {text}")
|
|
695
|
+
return "\n".join(lines)
|
|
696
|
+
|
|
697
|
+
def _format_transport_error(
|
|
698
|
+
self,
|
|
699
|
+
url: 'str',
|
|
700
|
+
exc: 'BaseException',
|
|
701
|
+
diagnostics: 'typing.Union[_StreamDiagnostics, None]' = None,
|
|
702
|
+
) -> 'str':
|
|
703
|
+
details = self._base_error_details(url)
|
|
704
|
+
if diagnostics is not None:
|
|
705
|
+
details.extend(self._transport_diagnostics_details(diagnostics))
|
|
706
|
+
details.append(("exception", type(exc).__name__))
|
|
707
|
+
details.append(("detail", str(exc) or repr(exc)))
|
|
708
|
+
details.append(
|
|
709
|
+
(
|
|
710
|
+
"meaning",
|
|
711
|
+
"the HTTP response body ended before the SSE stream finished",
|
|
712
|
+
)
|
|
713
|
+
)
|
|
714
|
+
details.append(
|
|
715
|
+
(
|
|
716
|
+
"hint",
|
|
717
|
+
"the server or a proxy likely closed the connection before sending "
|
|
718
|
+
"`response.completed` or `response.failed`",
|
|
719
|
+
)
|
|
720
|
+
)
|
|
721
|
+
hostname = urllib.parse.urlparse(url).hostname or ""
|
|
722
|
+
if hostname in {"127.0.0.1", "localhost"}:
|
|
723
|
+
details.append(
|
|
724
|
+
(
|
|
725
|
+
"hint",
|
|
726
|
+
"if this goes through local `responses_server`, inspect that "
|
|
727
|
+
"server's stderr/logs for the downstream backend failure",
|
|
728
|
+
)
|
|
729
|
+
)
|
|
730
|
+
return self._format_error_message(
|
|
731
|
+
"responses request failed while reading the HTTP stream",
|
|
732
|
+
details,
|
|
733
|
+
)
|
|
734
|
+
|
|
735
|
+
def _format_response_failed_error(self, message: 'str') -> 'str':
|
|
736
|
+
details = self._base_error_details(self.responses_url())
|
|
737
|
+
details.append(("detail", message))
|
|
738
|
+
details.append(
|
|
739
|
+
(
|
|
740
|
+
"meaning",
|
|
741
|
+
"the server accepted the request but emitted a terminal "
|
|
742
|
+
"`response.failed` event",
|
|
743
|
+
)
|
|
744
|
+
)
|
|
745
|
+
return self._format_error_message(
|
|
746
|
+
"responses stream failed on the server side",
|
|
747
|
+
details,
|
|
748
|
+
)
|
|
749
|
+
|
|
750
|
+
def _raise_response_failed_error(self, payload: 'typing.Dict[str, object]') -> 'None':
|
|
751
|
+
response = payload.get("response")
|
|
752
|
+
error = response.get("error") if isinstance(response, dict) else None
|
|
753
|
+
if not isinstance(error, dict):
|
|
754
|
+
raise ResponsesRetryableError(
|
|
755
|
+
self._format_response_failed_error("responses stream failed")
|
|
756
|
+
)
|
|
757
|
+
|
|
758
|
+
message = str(error.get("message") or "responses stream failed")
|
|
759
|
+
code = str(error.get("code") or "").strip()
|
|
760
|
+
if code in {
|
|
761
|
+
"context_length_exceeded",
|
|
762
|
+
"insufficient_quota",
|
|
763
|
+
"invalid_prompt",
|
|
764
|
+
"usage_not_included",
|
|
765
|
+
}:
|
|
766
|
+
raise ResponsesApiError(self._format_response_failed_error(message))
|
|
767
|
+
|
|
768
|
+
raise ResponsesRetryableError(
|
|
769
|
+
self._format_response_failed_error(message),
|
|
770
|
+
retry_delay_seconds=self._try_parse_retry_after_seconds(code, message),
|
|
771
|
+
)
|
|
772
|
+
|
|
773
|
+
def _format_incomplete_stream_error(
|
|
774
|
+
self,
|
|
775
|
+
last_event_type: 'str',
|
|
776
|
+
output_item_count: 'int',
|
|
777
|
+
) -> 'str':
|
|
778
|
+
details = self._base_error_details(self.responses_url())
|
|
779
|
+
if last_event_type:
|
|
780
|
+
details.append(("last_event", last_event_type))
|
|
781
|
+
details.append(("output_items_received", str(output_item_count)))
|
|
782
|
+
details.append(
|
|
783
|
+
(
|
|
784
|
+
"meaning",
|
|
785
|
+
"the stream ended without a terminal `response.completed` event",
|
|
786
|
+
)
|
|
787
|
+
)
|
|
788
|
+
details.append(
|
|
789
|
+
(
|
|
790
|
+
"hint",
|
|
791
|
+
"the server should emit `response.failed` on mid-stream errors; "
|
|
792
|
+
"an abrupt end usually points to a backend, proxy, or server bug",
|
|
793
|
+
)
|
|
794
|
+
)
|
|
795
|
+
return self._format_error_message(
|
|
796
|
+
"responses stream ended before `response.completed`",
|
|
797
|
+
details,
|
|
798
|
+
)
|
|
799
|
+
|
|
800
|
+
def _format_invalid_event_error(
|
|
801
|
+
self,
|
|
802
|
+
event_name: 'str',
|
|
803
|
+
raw_data: 'str',
|
|
804
|
+
exc: 'json.JSONDecodeError',
|
|
805
|
+
) -> 'str':
|
|
806
|
+
details = self._base_error_details(self.responses_url())
|
|
807
|
+
details.append(("event", event_name or "message"))
|
|
808
|
+
details.append(("exception", type(exc).__name__))
|
|
809
|
+
details.append(("detail", str(exc)))
|
|
810
|
+
excerpt = raw_data if len(raw_data) <= 240 else f"{raw_data[:240]}..."
|
|
811
|
+
details.append(("data_excerpt", excerpt))
|
|
812
|
+
return self._format_error_message(
|
|
813
|
+
"responses stream contained an invalid JSON event",
|
|
814
|
+
details,
|
|
815
|
+
)
|
|
816
|
+
|
|
817
|
+
def _transport_diagnostics_details(
|
|
818
|
+
self,
|
|
819
|
+
diagnostics: '_StreamDiagnostics',
|
|
820
|
+
) -> 'typing.List[typing.Tuple[str, str]]':
|
|
821
|
+
details: 'typing.List[typing.Tuple[str, str]]' = [
|
|
822
|
+
("raw_lines_received", str(diagnostics.raw_lines_received)),
|
|
823
|
+
("sse_events_received", str(diagnostics.sse_events_received)),
|
|
824
|
+
("output_items_received", str(diagnostics.output_items_received)),
|
|
825
|
+
]
|
|
826
|
+
if diagnostics.last_sse_event_name:
|
|
827
|
+
details.append(("last_sse_event", diagnostics.last_sse_event_name))
|
|
828
|
+
if diagnostics.last_event_type:
|
|
829
|
+
details.append(("last_event_type", diagnostics.last_event_type))
|
|
830
|
+
if diagnostics.last_payload_excerpt:
|
|
831
|
+
details.append(("last_payload_excerpt", diagnostics.last_payload_excerpt))
|
|
832
|
+
return details
|
|
833
|
+
|
|
834
|
+
def _truncate_excerpt(self, text: 'str', limit: 'int') -> 'str':
|
|
835
|
+
if len(text) <= limit:
|
|
836
|
+
return text
|
|
837
|
+
return f"{text[:limit]}..."
|
|
838
|
+
|
|
839
|
+
def _retry_delay_seconds(self, attempt: 'int') -> 'float':
|
|
840
|
+
return INITIAL_RETRY_DELAY_SECONDS * (
|
|
841
|
+
RETRY_BACKOFF_FACTOR ** max(attempt - 1, 0)
|
|
842
|
+
)
|
|
843
|
+
|
|
844
|
+
def _try_parse_retry_after_seconds(
|
|
845
|
+
self,
|
|
846
|
+
code: 'str',
|
|
847
|
+
message: 'str',
|
|
848
|
+
) -> 'typing.Union[float, None]':
|
|
849
|
+
if code != "rate_limit_exceeded":
|
|
850
|
+
return None
|
|
851
|
+
match = RATE_LIMIT_RETRY_AFTER_RE.search(message)
|
|
852
|
+
if match is None:
|
|
853
|
+
return None
|
|
854
|
+
value = float(match.group(1))
|
|
855
|
+
unit = match.group(2).lower()
|
|
856
|
+
if unit == "ms":
|
|
857
|
+
return value / 1000.0
|
|
858
|
+
return value
|
|
859
|
+
|
|
860
|
+
|
|
861
|
+
def _optional_int(value: 'object') -> 'typing.Union[int, None]':
|
|
862
|
+
if value is None:
|
|
863
|
+
return None
|
|
864
|
+
return int(value)
|
|
526
865
|
|
|
527
866
|
|
|
528
|
-
def _requests_verify_setting() -> str
|
|
867
|
+
def _requests_verify_setting() -> 'typing.Union[typing.Union[str, bool], None]':
|
|
529
868
|
for env_name in ("REQUESTS_CA_BUNDLE", "CURL_CA_BUNDLE", "SSL_CERT_FILE"):
|
|
530
869
|
value = os.environ.get(env_name, "").strip()
|
|
531
870
|
if value:
|