python-codex 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. pycodex/__init__.py +5 -1
  2. pycodex/agent.py +89 -51
  3. pycodex/cli.py +152 -45
  4. pycodex/collaboration.py +6 -7
  5. pycodex/compat.py +99 -0
  6. pycodex/context.py +110 -87
  7. pycodex/doctor.py +40 -40
  8. pycodex/model.py +429 -90
  9. pycodex/portable.py +33 -33
  10. pycodex/portable_server.py +22 -21
  11. pycodex/prompts/models.json +30 -0
  12. pycodex/protocol.py +84 -86
  13. pycodex/runtime.py +36 -35
  14. pycodex/runtime_services.py +69 -69
  15. pycodex/tools/agent_tool_schemas.py +0 -2
  16. pycodex/tools/apply_patch_tool.py +45 -46
  17. pycodex/tools/base_tool.py +35 -36
  18. pycodex/tools/close_agent_tool.py +2 -4
  19. pycodex/tools/code_mode_manager.py +61 -61
  20. pycodex/tools/exec_command_tool.py +5 -6
  21. pycodex/tools/exec_runtime.js +3 -3
  22. pycodex/tools/exec_tool.py +2 -4
  23. pycodex/tools/grep_files_tool.py +10 -11
  24. pycodex/tools/list_dir_tool.py +8 -9
  25. pycodex/tools/read_file_tool.py +13 -14
  26. pycodex/tools/request_permissions_tool.py +2 -4
  27. pycodex/tools/request_user_input_tool.py +13 -14
  28. pycodex/tools/resume_agent_tool.py +2 -4
  29. pycodex/tools/send_input_tool.py +8 -9
  30. pycodex/tools/shell_command_tool.py +5 -6
  31. pycodex/tools/shell_tool.py +5 -6
  32. pycodex/tools/spawn_agent_tool.py +4 -5
  33. pycodex/tools/unified_exec_manager.py +62 -61
  34. pycodex/tools/update_plan_tool.py +4 -5
  35. pycodex/tools/view_image_tool.py +4 -5
  36. pycodex/tools/wait_agent_tool.py +2 -4
  37. pycodex/tools/wait_tool.py +4 -5
  38. pycodex/tools/web_search_tool.py +1 -3
  39. pycodex/tools/write_stdin_tool.py +4 -5
  40. pycodex/utils/__init__.py +4 -0
  41. pycodex/utils/compactor.py +189 -0
  42. pycodex/utils/dotenv.py +6 -6
  43. pycodex/utils/get_env.py +37 -33
  44. pycodex/utils/random_ids.py +1 -2
  45. pycodex/utils/session_persist.py +483 -0
  46. pycodex/utils/visualize.py +197 -83
  47. {python_codex-0.1.2.dist-info → python_codex-0.1.4.dist-info}/METADATA +32 -11
  48. python_codex-0.1.4.dist-info/RECORD +76 -0
  49. {python_codex-0.1.2.dist-info → python_codex-0.1.4.dist-info}/WHEEL +1 -1
  50. responses_server/app.py +32 -20
  51. responses_server/config.py +17 -17
  52. responses_server/payload_processors.py +26 -17
  53. responses_server/server.py +11 -11
  54. responses_server/session_store.py +10 -10
  55. responses_server/stream_router.py +83 -64
  56. responses_server/tools/custom_adapter.py +12 -12
  57. responses_server/tools/web_search.py +33 -33
  58. python_codex-0.1.2.dist-info/RECORD +0 -73
  59. {python_codex-0.1.2.dist-info → python_codex-0.1.4.dist-info}/entry_points.txt +0 -0
  60. {python_codex-0.1.2.dist-info → python_codex-0.1.4.dist-info}/licenses/LICENSE +0 -0
pycodex/model.py CHANGED
@@ -1,15 +1,16 @@
1
- from __future__ import annotations
2
1
 
3
2
  import asyncio
4
3
  import json
5
4
  import os
5
+ import re
6
6
  import urllib.parse
7
- from collections.abc import Callable
8
7
  from dataclasses import dataclass, field, replace
9
8
  from pathlib import Path
10
- from typing import Protocol
9
+ from typing import Callable
10
+ from .compat import Protocol
11
11
 
12
12
  import requests
13
+ import typing
13
14
 
14
15
  try:
15
16
  import tomllib
@@ -29,38 +30,47 @@ from .utils import build_user_agent, uuid7_string
29
30
  DEFAULT_CODEX_CONFIG_PATH = Path.home() / ".codex" / "config.toml"
30
31
  DEFAULT_ORIGINATOR = "pycodex"
31
32
  ModelStreamEventHandler = Callable[[ModelStreamEvent], None]
32
- NOOP_MODEL_STREAM_EVENT_HANDLER: ModelStreamEventHandler = lambda _event: None
33
+ NOOP_MODEL_STREAM_EVENT_HANDLER: 'ModelStreamEventHandler' = lambda _event: None
34
+ DEFAULT_STREAM_MAX_RETRIES = 5
35
+ DEFAULT_STREAM_IDLE_TIMEOUT_MS = 300_000
36
+ INITIAL_RETRY_DELAY_SECONDS = 0.2
37
+ RETRY_BACKOFF_FACTOR = 2.0
38
+ RATE_LIMIT_RETRY_AFTER_RE = re.compile(
39
+ r"(?i)try again in\s*(\d+(?:\.\d+)?)\s*(s|ms|seconds?)"
40
+ )
33
41
 
34
42
 
35
43
  class ModelClient(Protocol):
36
44
  async def complete(
37
45
  self,
38
- prompt: Prompt,
39
- event_handler: ModelStreamEventHandler = NOOP_MODEL_STREAM_EVENT_HANDLER,
40
- ) -> ModelResponse:
46
+ prompt: 'Prompt',
47
+ event_handler: 'ModelStreamEventHandler' = NOOP_MODEL_STREAM_EVENT_HANDLER,
48
+ ) -> 'ModelResponse':
41
49
  """Return the next batch of model output items for the current prompt."""
42
50
 
43
51
 
44
- @dataclass(frozen=True, slots=True)
52
+ @dataclass(frozen=True, )
45
53
  class ResponsesProviderConfig:
46
- model: str
47
- provider_name: str
48
- base_url: str
49
- api_key_env: str
50
- wire_api: str = "responses"
51
- query_params: dict[str, str] = field(default_factory=dict)
52
- reasoning_effort: str | None = None
53
- reasoning_summary: str | None = None
54
- verbosity: str | None = None
55
- sandbox_mode: str | None = None
56
- beta_features_header: str | None = None
54
+ model: 'str'
55
+ provider_name: 'str'
56
+ base_url: 'str'
57
+ api_key_env: 'typing.Union[str, None]'
58
+ wire_api: 'str' = "responses"
59
+ query_params: 'typing.Dict[str, str]' = field(default_factory=dict)
60
+ reasoning_effort: 'typing.Union[str, None]' = None
61
+ reasoning_summary: 'typing.Union[str, None]' = None
62
+ verbosity: 'typing.Union[str, None]' = None
63
+ sandbox_mode: 'typing.Union[str, None]' = None
64
+ beta_features_header: 'typing.Union[str, None]' = None
65
+ stream_max_retries: 'typing.Union[int, None]' = None
66
+ stream_idle_timeout_ms: 'typing.Union[int, None]' = None
57
67
 
58
68
  @classmethod
59
69
  def from_codex_config(
60
70
  cls,
61
- config_path: str | Path = DEFAULT_CODEX_CONFIG_PATH,
62
- profile: str | None = None,
63
- ) -> ResponsesProviderConfig:
71
+ config_path: 'typing.Union[str, Path]' = DEFAULT_CODEX_CONFIG_PATH,
72
+ profile: 'typing.Union[str, None]' = None,
73
+ ) -> 'ResponsesProviderConfig':
64
74
  data = tomllib.loads(Path(config_path).read_text())
65
75
  selected = dict(data)
66
76
  if profile is not None:
@@ -76,17 +86,13 @@ class ResponsesProviderConfig:
76
86
  raise ValueError(f"unsupported wire_api for Python client: {wire_api}")
77
87
 
78
88
  api_key_env = provider.get("env_key")
79
- if not api_key_env:
80
- raise ValueError(
81
- f"provider {provider_name} does not define env_key in Codex config"
82
- )
83
89
 
84
90
  query_params = {
85
91
  str(key): str(value)
86
92
  for key, value in provider.get("query_params", {}).items()
87
93
  }
88
94
  features = selected.get("features", {})
89
- beta_features: list[str] = []
95
+ beta_features: 'typing.List[str]' = []
90
96
  if isinstance(features, dict) and features.get("guardian_approval") is True:
91
97
  beta_features.append("guardian_approval")
92
98
  return cls(
@@ -101,9 +107,13 @@ class ResponsesProviderConfig:
101
107
  verbosity=selected.get("model_verbosity"),
102
108
  sandbox_mode=selected.get("sandbox_mode"),
103
109
  beta_features_header=",".join(beta_features) or None,
110
+ stream_max_retries=_optional_int(provider.get("stream_max_retries")),
111
+ stream_idle_timeout_ms=_optional_int(provider.get("stream_idle_timeout_ms")),
104
112
  )
105
113
 
106
- def api_key(self) -> str:
114
+ def api_key(self) -> 'typing.Union[str, None]':
115
+ if not self.api_key_env:
116
+ return None
107
117
  value = os.environ.get(self.api_key_env, "")
108
118
  if not value:
109
119
  raise RuntimeError(
@@ -113,9 +123,9 @@ class ResponsesProviderConfig:
113
123
 
114
124
  def with_overrides(
115
125
  self,
116
- model: str | None = None,
117
- reasoning_effort: str | None = None,
118
- ) -> ResponsesProviderConfig:
126
+ model: 'typing.Union[str, None]' = None,
127
+ reasoning_effort: 'typing.Union[str, None]' = None,
128
+ ) -> 'ResponsesProviderConfig':
119
129
  return replace(
120
130
  self,
121
131
  model=self.model if model is None else model,
@@ -126,11 +136,41 @@ class ResponsesProviderConfig:
126
136
  ),
127
137
  )
128
138
 
139
+ def effective_stream_max_retries(self) -> 'int':
140
+ if self.stream_max_retries is None:
141
+ return DEFAULT_STREAM_MAX_RETRIES
142
+ return max(int(self.stream_max_retries), 0)
143
+
144
+ def effective_stream_idle_timeout_seconds(self) -> 'float':
145
+ if self.stream_idle_timeout_ms is None:
146
+ return DEFAULT_STREAM_IDLE_TIMEOUT_MS / 1000.0
147
+ return max(int(self.stream_idle_timeout_ms), 1) / 1000.0
148
+
129
149
 
130
150
  class ResponsesApiError(RuntimeError):
131
151
  pass
132
152
 
133
153
 
154
+ class ResponsesRetryableError(ResponsesApiError):
155
+ def __init__(
156
+ self,
157
+ message: 'str',
158
+ retry_delay_seconds: 'typing.Union[float, None]' = None,
159
+ ) -> 'None':
160
+ super().__init__(message)
161
+ self.retry_delay_seconds = retry_delay_seconds
162
+
163
+
164
+ @dataclass
165
+ class _StreamDiagnostics:
166
+ raw_lines_received: 'int' = 0
167
+ sse_events_received: 'int' = 0
168
+ output_items_received: 'int' = 0
169
+ last_sse_event_name: 'str' = ""
170
+ last_event_type: 'str' = ""
171
+ last_payload_excerpt: 'str' = ""
172
+
173
+
134
174
  class ResponsesModelClient:
135
175
  """Minimal OpenAI-compatible Responses API client.
136
176
 
@@ -141,13 +181,13 @@ class ResponsesModelClient:
141
181
 
142
182
  def __init__(
143
183
  self,
144
- config: ResponsesProviderConfig,
145
- timeout_seconds: float = 120.0,
146
- session_id: str | None = None,
147
- originator: str = DEFAULT_ORIGINATOR,
148
- user_agent: str | None = None,
149
- openai_subagent: str | None = None,
150
- ) -> None:
184
+ config: 'ResponsesProviderConfig',
185
+ timeout_seconds: 'float' = 120.0,
186
+ session_id: 'typing.Union[str, None]' = None,
187
+ originator: 'str' = DEFAULT_ORIGINATOR,
188
+ user_agent: 'typing.Union[str, None]' = None,
189
+ openai_subagent: 'typing.Union[str, None]' = None,
190
+ ) -> 'None':
151
191
  self._config = config
152
192
  self.model = config.model
153
193
  self._timeout_seconds = timeout_seconds
@@ -159,22 +199,22 @@ class ResponsesModelClient:
159
199
  @classmethod
160
200
  def from_codex_config(
161
201
  cls,
162
- config_path: str | Path = DEFAULT_CODEX_CONFIG_PATH,
163
- profile: str | None = None,
164
- timeout_seconds: float = 120.0,
165
- originator: str = DEFAULT_ORIGINATOR,
166
- user_agent: str | None = None,
167
- ) -> ResponsesModelClient:
202
+ config_path: 'typing.Union[str, Path]' = DEFAULT_CODEX_CONFIG_PATH,
203
+ profile: 'typing.Union[str, None]' = None,
204
+ timeout_seconds: 'float' = 120.0,
205
+ originator: 'str' = DEFAULT_ORIGINATOR,
206
+ user_agent: 'typing.Union[str, None]' = None,
207
+ ) -> 'ResponsesModelClient':
168
208
  config = ResponsesProviderConfig.from_codex_config(config_path, profile)
169
209
  return cls(config, timeout_seconds, originator=originator, user_agent=user_agent)
170
210
 
171
211
  def with_overrides(
172
212
  self,
173
- model: str | None = None,
174
- reasoning_effort: str | None = None,
175
- session_id: str | None = None,
176
- openai_subagent: str | None = None,
177
- ) -> ResponsesModelClient:
213
+ model: 'typing.Union[str, None]' = None,
214
+ reasoning_effort: 'typing.Union[str, None]' = None,
215
+ session_id: 'typing.Union[str, None]' = None,
216
+ openai_subagent: 'typing.Union[str, None]' = None,
217
+ ) -> 'ResponsesModelClient':
178
218
  return ResponsesModelClient(
179
219
  self._config.with_overrides(
180
220
  model or self.model,
@@ -191,35 +231,64 @@ class ResponsesModelClient:
191
231
  ),
192
232
  )
193
233
 
194
- def responses_url(self) -> str:
234
+ def responses_url(self) -> 'str':
195
235
  base_url = self._config.base_url.rstrip("/")
196
236
  url = f"{base_url}/responses"
197
237
  if self._config.query_params:
198
238
  return f"{url}?{urllib.parse.urlencode(self._config.query_params)}"
199
239
  return url
200
240
 
201
- def models_url(self) -> str:
241
+ def models_url(self) -> 'str':
202
242
  base_url = self._config.base_url.rstrip("/")
203
243
  url = f"{base_url}/models"
204
244
  if self._config.query_params:
205
245
  return f"{url}?{urllib.parse.urlencode(self._config.query_params)}"
206
246
  return url
207
247
 
208
- async def list_models(self) -> list[str]:
248
+ async def list_models(self) -> 'typing.List[str]':
209
249
  return await asyncio.to_thread(self._list_models_sync)
210
250
 
211
251
  async def complete(
212
252
  self,
213
- prompt: Prompt,
214
- event_handler: ModelStreamEventHandler = NOOP_MODEL_STREAM_EVENT_HANDLER,
215
- ) -> ModelResponse:
216
- return await asyncio.to_thread(self._complete_sync, prompt, event_handler)
253
+ prompt: 'Prompt',
254
+ event_handler: 'ModelStreamEventHandler' = NOOP_MODEL_STREAM_EVENT_HANDLER,
255
+ ) -> 'ModelResponse':
256
+ retries = 0
257
+ max_retries = self._config.effective_stream_max_retries()
258
+ while True:
259
+ try:
260
+ return await asyncio.to_thread(
261
+ self._complete_sync,
262
+ prompt,
263
+ event_handler,
264
+ )
265
+ except ResponsesRetryableError as exc:
266
+ if retries >= max_retries:
267
+ raise
268
+ retries += 1
269
+ delay_seconds = exc.retry_delay_seconds
270
+ if delay_seconds is None:
271
+ delay_seconds = self._retry_delay_seconds(retries)
272
+ event_handler(
273
+ ModelStreamEvent(
274
+ kind="stream_error",
275
+ payload={
276
+ "message": f"Reconnecting... {retries}/{max_retries}",
277
+ "attempt": retries,
278
+ "max_retries": max_retries,
279
+ "delay_seconds": delay_seconds,
280
+ "error": str(exc),
281
+ },
282
+ )
283
+ )
284
+ if delay_seconds > 0:
285
+ await asyncio.sleep(delay_seconds)
217
286
 
218
287
  def _complete_sync(
219
288
  self,
220
- prompt: Prompt,
221
- event_handler: ModelStreamEventHandler,
222
- ) -> ModelResponse:
289
+ prompt: 'Prompt',
290
+ event_handler: 'ModelStreamEventHandler',
291
+ ) -> 'ModelResponse':
223
292
  payload = self._build_payload(prompt)
224
293
  body = json.dumps(payload).encode("utf-8")
225
294
  url = self.responses_url()
@@ -230,6 +299,7 @@ class ResponsesModelClient:
230
299
  headers=self._build_headers(prompt),
231
300
  data=body,
232
301
  )
302
+ diagnostics = _StreamDiagnostics()
233
303
  try:
234
304
  with requests.Session() as session:
235
305
  settings = session.merge_environment_settings(
@@ -242,41 +312,56 @@ class ResponsesModelClient:
242
312
  verify = _requests_verify_setting()
243
313
  if verify is not None:
244
314
  settings["verify"] = verify
315
+ timeout = (
316
+ max(self._timeout_seconds, 1.0),
317
+ self._config.effective_stream_idle_timeout_seconds(),
318
+ )
245
319
  response = session.send(
246
320
  prepared,
247
- timeout=self._timeout_seconds,
321
+ timeout=timeout,
248
322
  allow_redirects=False,
249
323
  **settings,
250
324
  )
251
325
  with response:
252
326
  if response.status_code >= 400:
253
327
  error_body = response.text
254
- raise ResponsesApiError(
328
+ message = (
255
329
  f"responses request failed with status {response.status_code}: "
256
330
  f"{error_body[:500]}"
257
331
  )
258
- return self._parse_stream(
332
+ if response.status_code >= 500:
333
+ raise ResponsesRetryableError(message)
334
+ raise ResponsesApiError(message)
335
+ tracked_lines = self._track_stream_lines(
259
336
  response.iter_lines(chunk_size=1, decode_unicode=False),
337
+ diagnostics,
338
+ )
339
+ return self._parse_stream(
340
+ tracked_lines,
260
341
  event_handler,
342
+ diagnostics=diagnostics,
261
343
  )
262
344
  except requests.RequestException as exc:
263
- raise ResponsesApiError(f"responses request failed: {exc}") from exc
345
+ raise ResponsesRetryableError(
346
+ self._format_transport_error(url, exc, diagnostics)
347
+ ) from exc
264
348
 
265
- def _build_payload(self, prompt: Prompt) -> dict[str, object]:
266
- payload: dict[str, object] = {
349
+ def _build_payload(self, prompt: 'Prompt') -> 'typing.Dict[str, object]':
350
+ payload: 'typing.Dict[str, object]' = {
267
351
  "model": self.model,
268
352
  "instructions": prompt.base_instructions or "",
269
353
  "input": [item.serialize() for item in prompt.input],
270
354
  "tools": [tool.serialize() for tool in prompt.tools],
271
- "tool_choice": "auto",
272
355
  "parallel_tool_calls": prompt.parallel_tool_calls,
273
356
  "store": False,
274
357
  "stream": True,
275
358
  "include": ["reasoning.encrypted_content"],
276
359
  "prompt_cache_key": self._session_id,
277
360
  }
361
+ if prompt.tools:
362
+ payload["tool_choice"] = "auto"
278
363
 
279
- reasoning: dict[str, str] = {}
364
+ reasoning: 'typing.Dict[str, str]' = {}
280
365
  if self._config.reasoning_effort is not None:
281
366
  reasoning["effort"] = self._config.reasoning_effort
282
367
  if self._config.reasoning_summary is not None:
@@ -292,7 +377,7 @@ class ResponsesModelClient:
292
377
 
293
378
  return payload
294
379
 
295
- def _list_models_sync(self) -> list[str]:
380
+ def _list_models_sync(self) -> 'typing.List[str]':
296
381
  prepared = requests.PreparedRequest()
297
382
  prepared.prepare(
298
383
  method="GET",
@@ -330,7 +415,7 @@ class ResponsesModelClient:
330
415
  data = payload.get("data")
331
416
  if not isinstance(data, list):
332
417
  raise ResponsesApiError("models response is missing `data` list")
333
- models: list[str] = []
418
+ models: 'typing.List[str]' = []
334
419
  for item in data:
335
420
  if not isinstance(item, dict):
336
421
  continue
@@ -339,16 +424,18 @@ class ResponsesModelClient:
339
424
  models.append(model_id)
340
425
  return models
341
426
 
342
- def _build_headers(self, prompt: Prompt) -> dict[str, str]:
427
+ def _build_headers(self, prompt: 'Prompt') -> 'typing.Dict[str, str]':
343
428
  headers = {
344
429
  "content-type": "application/json",
345
430
  "accept": "text/event-stream",
346
- "authorization": f"Bearer {self._config.api_key()}",
347
431
  "x-client-request-id": self._session_id,
348
432
  "session_id": self._session_id,
349
433
  "originator": self._originator,
350
434
  "user-agent": self._user_agent,
351
435
  }
436
+ api_key = self._config.api_key()
437
+ if api_key is not None:
438
+ headers["authorization"] = f"Bearer {api_key}"
352
439
  if self._config.beta_features_header is not None:
353
440
  headers["x-codex-beta-features"] = self._config.beta_features_header
354
441
  if self._openai_subagent is not None:
@@ -360,13 +447,15 @@ class ResponsesModelClient:
360
447
  )
361
448
  return headers
362
449
 
363
- def _build_model_list_headers(self) -> dict[str, str]:
450
+ def _build_model_list_headers(self) -> 'typing.Dict[str, str]':
364
451
  headers = {
365
452
  "accept": "application/json",
366
- "authorization": f"Bearer {self._config.api_key()}",
367
453
  "originator": self._originator,
368
454
  "user-agent": self._user_agent,
369
455
  }
456
+ api_key = self._config.api_key()
457
+ if api_key is not None:
458
+ headers["authorization"] = f"Bearer {api_key}"
370
459
  if self._config.beta_features_header is not None:
371
460
  headers["x-codex-beta-features"] = self._config.beta_features_header
372
461
  if self._openai_subagent is not None:
@@ -376,16 +465,26 @@ class ResponsesModelClient:
376
465
  def _parse_stream(
377
466
  self,
378
467
  response,
379
- event_handler: ModelStreamEventHandler,
380
- ) -> ModelResponse:
381
- items: list[AssistantMessage | ToolCall | ReasoningItem] = []
468
+ event_handler: 'ModelStreamEventHandler',
469
+ diagnostics: 'typing.Union[_StreamDiagnostics, None]' = None,
470
+ ) -> 'ModelResponse':
471
+ items: 'typing.List[typing.Union[typing.Union[AssistantMessage, ToolCall], ReasoningItem]]' = []
382
472
  saw_completed = False
473
+ last_event_type = ""
383
474
 
384
- for event_name, data in self._iter_sse_events(response):
475
+ for event_name, data in self._iter_sse_events(response, diagnostics):
385
476
  if not data:
386
477
  continue
387
- payload = json.loads(data)
478
+ try:
479
+ payload = json.loads(data)
480
+ except json.JSONDecodeError as exc:
481
+ raise ResponsesRetryableError(
482
+ self._format_invalid_event_error(event_name, data, exc)
483
+ ) from exc
388
484
  event_type = payload.get("type", event_name)
485
+ last_event_type = str(event_type)
486
+ if diagnostics is not None:
487
+ diagnostics.last_event_type = last_event_type
389
488
 
390
489
  if event_type == "response.output_text.delta":
391
490
  event_handler(
@@ -445,26 +544,42 @@ class ResponsesModelClient:
445
544
  )
446
545
  )
447
546
  items.append(parsed)
547
+ if diagnostics is not None:
548
+ diagnostics.output_items_received += 1
448
549
  continue
449
550
 
450
551
  if event_type == "response.completed":
552
+ response_payload = payload.get("response")
553
+ usage = None
554
+ if isinstance(response_payload, dict):
555
+ response_usage = response_payload.get("usage")
556
+ if isinstance(response_usage, dict):
557
+ usage = dict(response_usage)
558
+ elif isinstance(payload.get("usage"), dict):
559
+ usage = dict(payload["usage"])
560
+ event_handler(
561
+ ModelStreamEvent(
562
+ kind="token_count",
563
+ payload={"usage": usage},
564
+ )
565
+ )
451
566
  saw_completed = True
452
567
  break
453
568
 
454
569
  if event_type == "response.failed":
455
- error = payload.get("response", {}).get("error") or {}
456
- message = error.get("message") or "responses stream failed"
457
- raise ResponsesApiError(message)
570
+ self._raise_response_failed_error(payload)
458
571
 
459
572
  if not saw_completed:
460
- raise ResponsesApiError("responses stream ended before response.completed")
573
+ raise ResponsesRetryableError(
574
+ self._format_incomplete_stream_error(last_event_type, len(items))
575
+ )
461
576
 
462
577
  return ModelResponse(items=items)
463
578
 
464
579
  def _parse_output_item(
465
580
  self,
466
- item: dict[str, object],
467
- ) -> AssistantMessage | ToolCall | ReasoningItem | None:
581
+ item: 'typing.Dict[str, object]',
582
+ ) -> 'typing.Union[typing.Union[typing.Union[AssistantMessage, ToolCall], ReasoningItem], None]':
468
583
  item_type = item.get("type")
469
584
  if item_type == "reasoning":
470
585
  return ReasoningItem(payload=dict(item))
@@ -500,15 +615,28 @@ class ResponsesModelClient:
500
615
 
501
616
  return None
502
617
 
503
- def _iter_sse_events(self, response):
504
- event_name: str | None = None
505
- data_lines: list[str] = []
618
+ def _iter_sse_events(
619
+ self,
620
+ response,
621
+ diagnostics: 'typing.Union[_StreamDiagnostics, None]' = None,
622
+ ):
623
+ event_name: 'typing.Union[str, None]' = None
624
+ data_lines: 'typing.List[str]' = []
506
625
 
507
626
  for raw_line in response:
508
627
  line = raw_line.decode("utf-8", errors="replace").rstrip("\r\n")
509
628
  if line == "":
510
629
  if data_lines:
511
- yield event_name or "message", "\n".join(data_lines)
630
+ resolved_event_name = event_name or "message"
631
+ payload = "\n".join(data_lines)
632
+ if diagnostics is not None:
633
+ diagnostics.sse_events_received += 1
634
+ diagnostics.last_sse_event_name = resolved_event_name
635
+ diagnostics.last_payload_excerpt = self._truncate_excerpt(
636
+ payload,
637
+ 240,
638
+ )
639
+ yield resolved_event_name, payload
512
640
  event_name = None
513
641
  data_lines = []
514
642
  continue
@@ -522,10 +650,221 @@ class ResponsesModelClient:
522
650
  data_lines.append(line.split(":", 1)[1].lstrip())
523
651
 
524
652
  if data_lines:
525
- yield event_name or "message", "\n".join(data_lines)
653
+ resolved_event_name = event_name or "message"
654
+ payload = "\n".join(data_lines)
655
+ if diagnostics is not None:
656
+ diagnostics.sse_events_received += 1
657
+ diagnostics.last_sse_event_name = resolved_event_name
658
+ diagnostics.last_payload_excerpt = self._truncate_excerpt(
659
+ payload,
660
+ 240,
661
+ )
662
+ yield resolved_event_name, payload
663
+
664
+ def _track_stream_lines(
665
+ self,
666
+ response,
667
+ diagnostics: '_StreamDiagnostics',
668
+ ):
669
+ for raw_line in response:
670
+ diagnostics.raw_lines_received += 1
671
+ yield raw_line
672
+
673
+ def _base_error_details(
674
+ self,
675
+ url: 'str',
676
+ ) -> 'typing.List[typing.Tuple[str, str]]':
677
+ return [
678
+ ("provider", self._config.provider_name),
679
+ ("model", self.model),
680
+ ("request", f"POST {url}"),
681
+ ("session_id", self._session_id),
682
+ ]
683
+
684
+ def _format_error_message(
685
+ self,
686
+ summary: 'str',
687
+ details: 'typing.Iterable[typing.Tuple[str, str]]',
688
+ ) -> 'str':
689
+ lines = [summary]
690
+ for label, value in details:
691
+ text = str(value).strip()
692
+ if not text:
693
+ continue
694
+ lines.append(f"- {label}: {text}")
695
+ return "\n".join(lines)
696
+
697
+ def _format_transport_error(
698
+ self,
699
+ url: 'str',
700
+ exc: 'BaseException',
701
+ diagnostics: 'typing.Union[_StreamDiagnostics, None]' = None,
702
+ ) -> 'str':
703
+ details = self._base_error_details(url)
704
+ if diagnostics is not None:
705
+ details.extend(self._transport_diagnostics_details(diagnostics))
706
+ details.append(("exception", type(exc).__name__))
707
+ details.append(("detail", str(exc) or repr(exc)))
708
+ details.append(
709
+ (
710
+ "meaning",
711
+ "the HTTP response body ended before the SSE stream finished",
712
+ )
713
+ )
714
+ details.append(
715
+ (
716
+ "hint",
717
+ "the server or a proxy likely closed the connection before sending "
718
+ "`response.completed` or `response.failed`",
719
+ )
720
+ )
721
+ hostname = urllib.parse.urlparse(url).hostname or ""
722
+ if hostname in {"127.0.0.1", "localhost"}:
723
+ details.append(
724
+ (
725
+ "hint",
726
+ "if this goes through local `responses_server`, inspect that "
727
+ "server's stderr/logs for the downstream backend failure",
728
+ )
729
+ )
730
+ return self._format_error_message(
731
+ "responses request failed while reading the HTTP stream",
732
+ details,
733
+ )
734
+
735
+ def _format_response_failed_error(self, message: 'str') -> 'str':
736
+ details = self._base_error_details(self.responses_url())
737
+ details.append(("detail", message))
738
+ details.append(
739
+ (
740
+ "meaning",
741
+ "the server accepted the request but emitted a terminal "
742
+ "`response.failed` event",
743
+ )
744
+ )
745
+ return self._format_error_message(
746
+ "responses stream failed on the server side",
747
+ details,
748
+ )
749
+
750
+ def _raise_response_failed_error(self, payload: 'typing.Dict[str, object]') -> 'None':
751
+ response = payload.get("response")
752
+ error = response.get("error") if isinstance(response, dict) else None
753
+ if not isinstance(error, dict):
754
+ raise ResponsesRetryableError(
755
+ self._format_response_failed_error("responses stream failed")
756
+ )
757
+
758
+ message = str(error.get("message") or "responses stream failed")
759
+ code = str(error.get("code") or "").strip()
760
+ if code in {
761
+ "context_length_exceeded",
762
+ "insufficient_quota",
763
+ "invalid_prompt",
764
+ "usage_not_included",
765
+ }:
766
+ raise ResponsesApiError(self._format_response_failed_error(message))
767
+
768
+ raise ResponsesRetryableError(
769
+ self._format_response_failed_error(message),
770
+ retry_delay_seconds=self._try_parse_retry_after_seconds(code, message),
771
+ )
772
+
773
+ def _format_incomplete_stream_error(
774
+ self,
775
+ last_event_type: 'str',
776
+ output_item_count: 'int',
777
+ ) -> 'str':
778
+ details = self._base_error_details(self.responses_url())
779
+ if last_event_type:
780
+ details.append(("last_event", last_event_type))
781
+ details.append(("output_items_received", str(output_item_count)))
782
+ details.append(
783
+ (
784
+ "meaning",
785
+ "the stream ended without a terminal `response.completed` event",
786
+ )
787
+ )
788
+ details.append(
789
+ (
790
+ "hint",
791
+ "the server should emit `response.failed` on mid-stream errors; "
792
+ "an abrupt end usually points to a backend, proxy, or server bug",
793
+ )
794
+ )
795
+ return self._format_error_message(
796
+ "responses stream ended before `response.completed`",
797
+ details,
798
+ )
799
+
800
+ def _format_invalid_event_error(
801
+ self,
802
+ event_name: 'str',
803
+ raw_data: 'str',
804
+ exc: 'json.JSONDecodeError',
805
+ ) -> 'str':
806
+ details = self._base_error_details(self.responses_url())
807
+ details.append(("event", event_name or "message"))
808
+ details.append(("exception", type(exc).__name__))
809
+ details.append(("detail", str(exc)))
810
+ excerpt = raw_data if len(raw_data) <= 240 else f"{raw_data[:240]}..."
811
+ details.append(("data_excerpt", excerpt))
812
+ return self._format_error_message(
813
+ "responses stream contained an invalid JSON event",
814
+ details,
815
+ )
816
+
817
+ def _transport_diagnostics_details(
818
+ self,
819
+ diagnostics: '_StreamDiagnostics',
820
+ ) -> 'typing.List[typing.Tuple[str, str]]':
821
+ details: 'typing.List[typing.Tuple[str, str]]' = [
822
+ ("raw_lines_received", str(diagnostics.raw_lines_received)),
823
+ ("sse_events_received", str(diagnostics.sse_events_received)),
824
+ ("output_items_received", str(diagnostics.output_items_received)),
825
+ ]
826
+ if diagnostics.last_sse_event_name:
827
+ details.append(("last_sse_event", diagnostics.last_sse_event_name))
828
+ if diagnostics.last_event_type:
829
+ details.append(("last_event_type", diagnostics.last_event_type))
830
+ if diagnostics.last_payload_excerpt:
831
+ details.append(("last_payload_excerpt", diagnostics.last_payload_excerpt))
832
+ return details
833
+
834
+ def _truncate_excerpt(self, text: 'str', limit: 'int') -> 'str':
835
+ if len(text) <= limit:
836
+ return text
837
+ return f"{text[:limit]}..."
838
+
839
+ def _retry_delay_seconds(self, attempt: 'int') -> 'float':
840
+ return INITIAL_RETRY_DELAY_SECONDS * (
841
+ RETRY_BACKOFF_FACTOR ** max(attempt - 1, 0)
842
+ )
843
+
844
+ def _try_parse_retry_after_seconds(
845
+ self,
846
+ code: 'str',
847
+ message: 'str',
848
+ ) -> 'typing.Union[float, None]':
849
+ if code != "rate_limit_exceeded":
850
+ return None
851
+ match = RATE_LIMIT_RETRY_AFTER_RE.search(message)
852
+ if match is None:
853
+ return None
854
+ value = float(match.group(1))
855
+ unit = match.group(2).lower()
856
+ if unit == "ms":
857
+ return value / 1000.0
858
+ return value
859
+
860
+
861
+ def _optional_int(value: 'object') -> 'typing.Union[int, None]':
862
+ if value is None:
863
+ return None
864
+ return int(value)
526
865
 
527
866
 
528
- def _requests_verify_setting() -> str | bool | None:
867
+ def _requests_verify_setting() -> 'typing.Union[typing.Union[str, bool], None]':
529
868
  for env_name in ("REQUESTS_CA_BUNDLE", "CURL_CA_BUNDLE", "SSL_CERT_FILE"):
530
869
  value = os.environ.get(env_name, "").strip()
531
870
  if value: