python-codex 0.1.11__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pycodex/agent.py CHANGED
@@ -1,6 +1,7 @@
1
1
 
2
2
  import asyncio
3
3
  import json
4
+ import re
4
5
  from typing import Callable
5
6
 
6
7
  from .context import ContextManager
@@ -26,6 +27,18 @@ if typing.TYPE_CHECKING:
26
27
 
27
28
  EventHandler = Callable[[AgentEvent], None]
28
29
  NOOP_EVENT_HANDLER: 'EventHandler' = lambda _event: None
30
+ _REQUESTED_TOKENS_RE = re.compile(
31
+ r"requested\s+([0-9,]+)\s+tokens",
32
+ re.IGNORECASE,
33
+ )
34
+ _REQUESTED_TOKEN_SPLIT_RE = re.compile(
35
+ r"\(([0-9,]+)\s+in\s+the\s+messages,\s+([0-9,]+)\s+in\s+the\s+completion\)",
36
+ re.IGNORECASE,
37
+ )
38
+ _MAX_CONTEXT_TOKENS_RE = re.compile(
39
+ r"maximum\s+context\s+length\s+is\s+([0-9,]+)\s+tokens",
40
+ re.IGNORECASE,
41
+ )
29
42
 
30
43
 
31
44
  class TurnInterrupted(RuntimeError):
@@ -58,6 +71,10 @@ class AgentLoop:
58
71
  self._event_handler = event_handler
59
72
  self._history: 'typing.List[ConversationItem]' = list(initial_history)
60
73
  self._rollout_recorder = rollout_recorder
74
+ self._auto_compact_token_limit = (
75
+ self._context_manager.resolve_auto_compact_token_limit()
76
+ )
77
+ self._last_total_usage_tokens: 'typing.Union[int, None]' = None
61
78
  self.interrupt_asap = False
62
79
 
63
80
  @property
@@ -101,8 +118,6 @@ class AgentLoop:
101
118
  turn_id = turn_id or uuid7_string()
102
119
  self.interrupt_asap = False
103
120
  new_user_messages = [UserMessage(text=text) for text in texts]
104
- self._history.extend(new_user_messages)
105
- self._persist_history_items(new_user_messages)
106
121
 
107
122
  self._emit(
108
123
  "turn_started",
@@ -110,6 +125,9 @@ class AgentLoop:
110
125
  user_text="\n".join(texts),
111
126
  user_texts=list(texts),
112
127
  )
128
+ await self._maybe_auto_compact(turn_id, phase="pre_turn")
129
+ self._history.extend(new_user_messages)
130
+ self._persist_history_items(new_user_messages)
113
131
 
114
132
  last_assistant_message: 'typing.Union[str, None]' = None
115
133
  final_response_items: 'typing.Tuple[\n typing.Union[typing.Union[AssistantMessage, ToolCall], ReasoningItem], ...\n]' = ()
@@ -122,23 +140,11 @@ class AgentLoop:
122
140
  iteration,
123
141
  output_text=last_assistant_message,
124
142
  )
143
+ await self._maybe_auto_compact(turn_id, phase="mid_turn")
125
144
  iteration += 1
126
- prompt = self._context_manager.build_prompt(
127
- self._history,
128
- self._tool_registry.model_visible_specs(),
129
- self._parallel_tool_calls,
130
- turn_id=turn_id,
131
- )
132
- self._emit(
133
- "model_called",
145
+ response = await self._complete_model_request(
134
146
  turn_id,
135
- iteration=iteration,
136
- history_size=len(prompt.input),
137
- tool_count=len(prompt.tools),
138
- )
139
- response = await self._model_client.complete(
140
- prompt,
141
- lambda event: self._handle_model_stream_event(turn_id, event),
147
+ iteration,
142
148
  )
143
149
  final_response_items = tuple(response.items)
144
150
  self._emit(
@@ -193,6 +199,10 @@ class AgentLoop:
193
199
  except TurnInterrupted:
194
200
  raise
195
201
  except Exception as exc:
202
+ context_usage = _usage_from_context_length_error(str(exc))
203
+ if context_usage is not None:
204
+ self._remember_token_usage(context_usage)
205
+ self._emit("token_count", turn_id, usage=context_usage)
196
206
  self._emit(
197
207
  "turn_failed",
198
208
  turn_id,
@@ -287,6 +297,8 @@ class AgentLoop:
287
297
  return
288
298
 
289
299
  def _handle_model_stream_event(self, turn_id: 'str', event: 'ModelStreamEvent') -> 'None':
300
+ if event.kind == "token_count":
301
+ self._remember_token_usage(event.payload.get("usage"))
290
302
  if event.kind == "assistant_delta":
291
303
  self._emit("assistant_delta", turn_id, **event.payload)
292
304
  elif event.kind == "tool_call":
@@ -296,6 +308,140 @@ class AgentLoop:
296
308
  elif event.kind == "stream_error":
297
309
  self._emit("stream_error", turn_id, **event.payload)
298
310
 
311
+ def _remember_token_usage(self, usage: 'object') -> 'None':
312
+ if not isinstance(usage, dict):
313
+ return
314
+ try:
315
+ self._last_total_usage_tokens = int(usage["total_tokens"])
316
+ except (KeyError, TypeError, ValueError):
317
+ return
318
+
319
+ async def _complete_model_request(
320
+ self,
321
+ turn_id: 'str',
322
+ iteration: 'int',
323
+ ) -> 'typing.Any':
324
+ attempted_context_compact = False
325
+ while True:
326
+ prompt = self._context_manager.build_prompt(
327
+ self._history,
328
+ self._tool_registry.model_visible_specs(),
329
+ self._parallel_tool_calls,
330
+ turn_id=turn_id,
331
+ )
332
+ self._emit(
333
+ "model_called",
334
+ turn_id,
335
+ iteration=iteration,
336
+ history_size=len(prompt.input),
337
+ tool_count=len(prompt.tools),
338
+ )
339
+ try:
340
+ return await self._model_client.complete(
341
+ prompt,
342
+ lambda event: self._handle_model_stream_event(turn_id, event),
343
+ )
344
+ except Exception as exc:
345
+ context_usage = _usage_from_context_length_error(str(exc))
346
+ if context_usage is None or attempted_context_compact:
347
+ raise
348
+ attempted_context_compact = True
349
+ self._remember_token_usage(context_usage)
350
+ self._emit("token_count", turn_id, usage=context_usage)
351
+ await self._run_auto_compact(
352
+ turn_id,
353
+ phase="context_length_exceeded",
354
+ total_tokens=context_usage.get("total_tokens"),
355
+ token_limit=_context_length_error_token_limit(str(exc)),
356
+ prune_tool_results_on_context_error=True,
357
+ )
358
+ self._raise_if_interrupt_requested(turn_id, iteration)
359
+
360
+ async def _maybe_auto_compact(
361
+ self,
362
+ turn_id: 'str',
363
+ phase: 'str',
364
+ ) -> 'None':
365
+ limit = self._auto_compact_token_limit
366
+ total_tokens = self._last_total_usage_tokens
367
+ if limit is None or total_tokens is None:
368
+ return
369
+ if total_tokens < limit or not self._history:
370
+ return
371
+
372
+ await self._run_auto_compact(
373
+ turn_id,
374
+ phase=phase,
375
+ total_tokens=total_tokens,
376
+ token_limit=limit,
377
+ prune_tool_results_on_context_error=True,
378
+ )
379
+
380
+ async def _run_auto_compact(
381
+ self,
382
+ turn_id: 'str',
383
+ phase: 'str',
384
+ total_tokens: 'typing.Union[int, None]' = None,
385
+ token_limit: 'typing.Union[int, None]' = None,
386
+ prune_tool_results_on_context_error: 'bool' = False,
387
+ ) -> 'None':
388
+ from .utils.compactor import compact_agent_loop
389
+
390
+ payload: 'typing.Dict[str, object]' = {"phase": phase}
391
+ if total_tokens is not None:
392
+ payload["total_tokens"] = total_tokens
393
+ if token_limit is not None:
394
+ payload["token_limit"] = token_limit
395
+ self._emit(
396
+ "auto_compact_started",
397
+ turn_id,
398
+ **payload,
399
+ )
400
+
401
+ def handle_compact_stream_event(event: 'ModelStreamEvent') -> 'None':
402
+ if event.kind == "stream_error":
403
+ self._emit("stream_error", turn_id, **event.payload)
404
+
405
+ try:
406
+ compact_result = await compact_agent_loop(
407
+ self,
408
+ handle_compact_stream_event,
409
+ prune_tool_results_on_context_error,
410
+ )
411
+ except Exception as exc:
412
+ failed_payload = dict(payload)
413
+ failed_payload.update(
414
+ {
415
+ "error": str(exc),
416
+ "error_type": type(exc).__name__,
417
+ }
418
+ )
419
+ self._emit(
420
+ "auto_compact_failed",
421
+ turn_id,
422
+ **failed_payload,
423
+ )
424
+ raise
425
+
426
+ self._last_total_usage_tokens = None
427
+ if compact_result is None:
428
+ return
429
+ completed_payload = dict(payload)
430
+ completed_payload.update(
431
+ {
432
+ "original_item_count": compact_result.original_item_count,
433
+ "retained_item_count": compact_result.retained_item_count,
434
+ "summary": compact_result.display_text(),
435
+ }
436
+ )
437
+ if compact_result.pruned_tool_results:
438
+ completed_payload["pruned_tool_results"] = compact_result.pruned_tool_results
439
+ self._emit(
440
+ "auto_compact_completed",
441
+ turn_id,
442
+ **completed_payload,
443
+ )
444
+
299
445
  def _build_follow_up_messages(
300
446
  self,
301
447
  tool_results: 'typing.List[ToolResult]',
@@ -326,3 +472,38 @@ class AgentLoop:
326
472
  )
327
473
  )
328
474
  return follow_ups
475
+
476
+
477
+ def _usage_from_context_length_error(
478
+ message: 'str',
479
+ ) -> 'typing.Union[typing.Dict[str, int], None]':
480
+ lower = message.lower()
481
+ if (
482
+ "context_length_exceeded" not in lower
483
+ and "maximum context length" not in lower
484
+ ):
485
+ return None
486
+
487
+ requested_match = _REQUESTED_TOKENS_RE.search(message)
488
+ if requested_match is None:
489
+ return None
490
+
491
+ usage = {"total_tokens": _parse_token_count(requested_match.group(1))}
492
+ split_match = _REQUESTED_TOKEN_SPLIT_RE.search(message)
493
+ if split_match is not None:
494
+ usage["input_tokens"] = _parse_token_count(split_match.group(1))
495
+ usage["output_tokens"] = _parse_token_count(split_match.group(2))
496
+ else:
497
+ usage["input_tokens"] = usage["total_tokens"]
498
+ return usage
499
+
500
+
501
+ def _context_length_error_token_limit(message: 'str') -> 'typing.Union[int, None]':
502
+ limit_match = _MAX_CONTEXT_TOKENS_RE.search(message)
503
+ if limit_match is None:
504
+ return None
505
+ return _parse_token_count(limit_match.group(1))
506
+
507
+
508
+ def _parse_token_count(value: 'str') -> 'int':
509
+ return int(value.replace(",", ""))
pycodex/cli.py CHANGED
@@ -594,6 +594,7 @@ async def run_interactive_session(
594
594
  compact_result = await compact_agent_loop(
595
595
  agent_loop,
596
596
  handle_compact_stream_event,
597
+ True,
597
598
  )
598
599
  if compact_result is None:
599
600
  view.write_line("Nothing to compact.")
pycodex/context.py CHANGED
@@ -78,6 +78,7 @@ class ContextConfig:
78
78
  project_doc_max_bytes: 'typing.Union[int, None]' = None
79
79
  model: 'typing.Union[str, None]' = None
80
80
  model_context_window: 'typing.Union[int, None]' = None
81
+ model_auto_compact_token_limit: 'typing.Union[int, None]' = None
81
82
  personality: 'typing.Union[str, None]' = None
82
83
  approval_policy: 'typing.Union[str, None]' = None
83
84
  sandbox_mode: 'typing.Union[str, None]' = None
@@ -120,6 +121,9 @@ class ContextConfig:
120
121
  project_doc_max_bytes=_normalize_int(selected.get("project_doc_max_bytes")),
121
122
  model=_normalize_text(selected.get("model")),
122
123
  model_context_window=_normalize_int(selected.get("model_context_window")),
124
+ model_auto_compact_token_limit=_normalize_int(
125
+ selected.get("model_auto_compact_token_limit")
126
+ ),
123
127
  personality=_normalize_text(selected.get("personality")),
124
128
  approval_policy=_normalize_text(selected.get("approval_policy")),
125
129
  sandbox_mode=_normalize_text(selected.get("sandbox_mode")),
@@ -268,6 +272,18 @@ class ContextManager:
268
272
  effective_percent = DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT
269
273
  return context_window * max(effective_percent, 0) // 100
270
274
 
275
+ def resolve_auto_compact_token_limit(self) -> 'typing.Union[int, None]':
276
+ if self._config.model_auto_compact_token_limit is not None:
277
+ return self._config.model_auto_compact_token_limit
278
+
279
+ model_slug = self._config.model
280
+ if model_slug is None:
281
+ return None
282
+ model_metadata = _load_models_by_slug().get(model_slug)
283
+ if model_metadata is None:
284
+ return None
285
+ return _normalize_int(model_metadata.get("auto_compact_token_limit"))
286
+
271
287
  def _resolve_model_instructions(self) -> 'typing.Union[str, None]':
272
288
  model_slug = self._config.model
273
289
  if model_slug is None:
pycodex/model.py CHANGED
@@ -287,6 +287,8 @@ class ResponsesModelClient:
287
287
  event_handler,
288
288
  )
289
289
  except ResponsesRetryableError as exc:
290
+ if _is_context_length_error_message(str(exc)):
291
+ raise ResponsesApiError(str(exc)) from exc
290
292
  if retries >= max_retries:
291
293
  raise
292
294
  retries += 1
@@ -780,11 +782,14 @@ class ResponsesModelClient:
780
782
  )
781
783
 
782
784
  message = str(error.get("message") or "responses stream failed")
783
- code = str(error.get("code") or "").strip()
785
+ code = str(error.get("code") or error.get("type") or "").strip()
786
+ if _is_context_length_error_message(message):
787
+ raise ResponsesApiError(self._format_response_failed_error(message))
784
788
  if code in {
785
789
  "context_length_exceeded",
786
790
  "insufficient_quota",
787
791
  "invalid_prompt",
792
+ "model_output_invalid",
788
793
  "usage_not_included",
789
794
  }:
790
795
  raise ResponsesApiError(self._format_response_failed_error(message))
@@ -888,6 +893,14 @@ def _optional_int(value: 'object') -> 'typing.Union[int, None]':
888
893
  return int(value)
889
894
 
890
895
 
896
+ def _is_context_length_error_message(message: 'str') -> 'bool':
897
+ lower = message.lower()
898
+ return (
899
+ "context_length_exceeded" in lower
900
+ or "maximum context length" in lower
901
+ )
902
+
903
+
891
904
  def _requests_verify_setting() -> 'typing.Union[typing.Union[str, bool], None]':
892
905
  for env_name in ("REQUESTS_CA_BUNDLE", "CURL_CA_BUNDLE", "SSL_CERT_FILE"):
893
906
  value = os.environ.get(env_name, "").strip()
@@ -1,6 +1,13 @@
1
1
  from dataclasses import dataclass
2
2
 
3
- from ..protocol import AssistantMessage, ConversationItem, ModelStreamEvent, UserMessage
3
+ from ..protocol import (
4
+ AssistantMessage,
5
+ ConversationItem,
6
+ ModelStreamEvent,
7
+ ToolCall,
8
+ ToolResult,
9
+ UserMessage,
10
+ )
4
11
  from .random_ids import uuid7_string
5
12
  import typing
6
13
 
@@ -35,6 +42,7 @@ _SUBAGENT_NOTIFICATION_PREFIX = "<subagent_notification>\n"
35
42
  class CompactResult:
36
43
  history: 'typing.Tuple[ConversationItem, ...]'
37
44
  original_item_count: 'int'
45
+ pruned_tool_results: 'int' = 0
38
46
 
39
47
  @property
40
48
  def retained_item_count(self) -> 'int':
@@ -43,10 +51,14 @@ class CompactResult:
43
51
  def display_text(self) -> 'str':
44
52
  retained_label = _pluralize("item", self.retained_item_count)
45
53
  original_label = _pluralize("item", self.original_item_count)
46
- return (
54
+ text = (
47
55
  f"compact({self.original_item_count} {original_label}) -> "
48
56
  f"{self.retained_item_count} {retained_label} + [summary]"
49
57
  )
58
+ if self.pruned_tool_results:
59
+ tool_label = _pluralize("tool response", self.pruned_tool_results)
60
+ text += f" (dropped {self.pruned_tool_results} old {tool_label})"
61
+ return text
50
62
 
51
63
 
52
64
  def compact(
@@ -60,24 +72,42 @@ def compact(
60
72
  async def compact_agent_loop(
61
73
  agent_loop: 'AgentLoop',
62
74
  stream_event_handler: 'typing.Union[typing.Callable[[ModelStreamEvent], None], None]' = None,
75
+ prune_tool_results_on_context_error: 'bool' = False,
63
76
  ) -> 'typing.Union[CompactResult, None]':
64
77
  history = agent_loop.history
65
78
  if not history:
66
79
  return None
67
80
  original_item_count = len(history)
81
+ pruned_tool_results = 0
68
82
 
69
- compact_prompt = UserMessage(text=DEFAULT_COMPACT_PROMPT)
70
- prompt = agent_loop._context_manager.build_prompt(
71
- list(history) + [compact_prompt],
72
- [],
73
- False,
74
- turn_id=uuid7_string(),
75
- )
76
83
  noop_stream_event_handler = lambda _event: None
77
- response = await agent_loop._model_client.complete(
78
- prompt,
79
- stream_event_handler or noop_stream_event_handler,
80
- )
84
+ while True:
85
+ compact_prompt = UserMessage(text=DEFAULT_COMPACT_PROMPT)
86
+ prompt = agent_loop._context_manager.build_prompt(
87
+ list(history) + [compact_prompt],
88
+ [],
89
+ False,
90
+ turn_id=uuid7_string(),
91
+ )
92
+ try:
93
+ response = await agent_loop._model_client.complete(
94
+ prompt,
95
+ stream_event_handler or noop_stream_event_handler,
96
+ )
97
+ break
98
+ except Exception as exc:
99
+ if (
100
+ not prune_tool_results_on_context_error
101
+ or not _is_context_length_error(str(exc))
102
+ ):
103
+ raise
104
+ pruned_history = prune_oldest_tool_response(history)
105
+ if pruned_history is None:
106
+ raise
107
+ history = pruned_history
108
+ pruned_tool_results += 1
109
+ agent_loop.replace_history(history)
110
+
81
111
  compacted_history = compact(
82
112
  list(history) + [compact_prompt] + list(response.items)
83
113
  )
@@ -88,6 +118,32 @@ async def compact_agent_loop(
88
118
  return CompactResult(
89
119
  history=compacted_history,
90
120
  original_item_count=original_item_count,
121
+ pruned_tool_results=pruned_tool_results,
122
+ )
123
+
124
+
125
+ def prune_oldest_tool_response(
126
+ history: 'typing.Sequence[ConversationItem]',
127
+ ) -> 'typing.Union[typing.Tuple[ConversationItem, ...], None]':
128
+ items = list(history)
129
+ tool_result_index = None
130
+ call_id = None
131
+ for index, item in enumerate(items):
132
+ if isinstance(item, ToolResult):
133
+ tool_result_index = index
134
+ call_id = item.call_id
135
+ break
136
+ if tool_result_index is None:
137
+ return None
138
+
139
+ indexes_to_remove = {tool_result_index}
140
+ for index, item in enumerate(items[:tool_result_index]):
141
+ if isinstance(item, ToolCall) and item.call_id == call_id:
142
+ indexes_to_remove.add(index)
143
+ break
144
+
145
+ return tuple(
146
+ item for index, item in enumerate(items) if index not in indexes_to_remove
91
147
  )
92
148
 
93
149
 
@@ -187,3 +243,11 @@ def _pluralize(noun: 'str', count: 'int') -> 'str':
187
243
 
188
244
  def _is_synthetic_user_message(text: 'str') -> 'bool':
189
245
  return text.startswith(_SUBAGENT_NOTIFICATION_PREFIX)
246
+
247
+
248
+ def _is_context_length_error(message: 'str') -> 'bool':
249
+ lower = message.lower()
250
+ return (
251
+ "context_length_exceeded" in lower
252
+ or "maximum context length" in lower
253
+ )
@@ -705,6 +705,59 @@ class CliSessionView:
705
705
  self._spinner.set_label("reconnecting")
706
706
  return
707
707
 
708
+ if event.kind == "auto_compact_started":
709
+ self._finish_stream()
710
+ total_tokens = event.payload.get("total_tokens")
711
+ token_limit = event.payload.get("token_limit")
712
+ if total_tokens is not None and token_limit is not None:
713
+ message = f"[status] auto-compact: {total_tokens}/{token_limit} tokens"
714
+ else:
715
+ message = "[status] auto-compact"
716
+ self._print_line(
717
+ colorize_cli_message(message, "status", self._color_enabled)
718
+ )
719
+ if self._input_active:
720
+ self._spinner.pause()
721
+ else:
722
+ self._spinner.resume()
723
+ self._spinner.set_label("compacting context")
724
+ self._spinner.render_now()
725
+ return
726
+
727
+ if event.kind == "auto_compact_completed":
728
+ self._finish_stream()
729
+ summary = str(event.payload.get("summary", "")).strip()
730
+ message = f"[status] {summary}" if summary else "[status] context compacted"
731
+ self._print_line(
732
+ colorize_cli_message(message, "status", self._color_enabled)
733
+ )
734
+ if self._input_active:
735
+ self._spinner.pause()
736
+ else:
737
+ self._spinner.resume()
738
+ self._spinner.set_label("thinking")
739
+ self._spinner.render_now()
740
+ return
741
+
742
+ if event.kind == "auto_compact_failed":
743
+ self._finish_stream()
744
+ error = str(event.payload.get("error", "")).strip()
745
+ message = (
746
+ f"[error] auto-compact failed: {error}"
747
+ if error
748
+ else "[error] auto-compact failed"
749
+ )
750
+ self._print_line(
751
+ colorize_cli_message(message, "error", self._color_enabled)
752
+ )
753
+ if self._input_active:
754
+ self._spinner.pause()
755
+ else:
756
+ self._spinner.resume()
757
+ self._spinner.set_label("thinking")
758
+ self._spinner.render_now()
759
+ return
760
+
708
761
  if event.kind == "assistant_delta":
709
762
  delta = str(event.payload.get("delta", ""))
710
763
  if not delta:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-codex
3
- Version: 0.1.11
3
+ Version: 0.1.12
4
4
  Summary: A minimal Python extraction of Codex's main agent loop
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.6.2
@@ -185,6 +185,14 @@ Current behavior:
185
185
  - `/compact` synthesizes a local handoff summary, replaces the in-memory
186
186
  conversation history with the compacted view, and appends a compacted-history
187
187
  entry to the rollout so later `/resume` sees the same state
188
+ - `model_auto_compact_token_limit = <tokens>` in `config.toml` enables the same
189
+ compaction path automatically when the latest reported usage reaches that
190
+ threshold before a follow-up sampling request or the next user turn
191
+ - if a model request fails with `context_length_exceeded`, pycodex now treats
192
+ the provider-reported requested token count as a failed-request usage sample,
193
+ triggers the same compact path immediately, and retries the request once; if
194
+ the compact request is also over the limit, it repeatedly drops the oldest
195
+ tool response plus its matching tool call before retrying compact
188
196
  - new sessions are now recorded under `CODEX_HOME/sessions/.../rollout-*.jsonl`
189
197
  with a stable session/thread id and per-item append+flush semantics so
190
198
  `/resume` reads back the same rollout format
@@ -211,7 +219,12 @@ Current behavior:
211
219
  `reasoning_content` are translated back into Responses `reasoning` items, and
212
220
  historical `reasoning` items are replayed into downstream assistant messages
213
221
  via the `reasoning` field. Streaming token usage is also requested from vLLM
214
- and forwarded to the final `response.completed.response.usage`
222
+ and forwarded to the final `response.completed.response.usage`. If a
223
+ downstream chat stream terminates after emitting only reasoning, with no
224
+ assistant content and no tool call, the compat layer discards that partial
225
+ reasoning, retries the same downstream request once, and only then emits
226
+ `response.failed` with `type = "model_output_invalid"` if the retry is still
227
+ reasoning-only
215
228
  - standalone `responses_server` now also supports downstream `/v1/messages`
216
229
  backends via `--outcomming-api messages`, while keeping the internal
217
230
  canonical request/route logic in chat-completions shape
@@ -1,11 +1,11 @@
1
1
  pycodex/__init__.py,sha256=jCnC_Bgotlxa4GwO3Re2sChKGY49TRM-uVZEQ9uBpfw,3106
2
- pycodex/agent.py,sha256=s0FrF_XG2pHKryooS461Jr_acmQ_TKTp2JLGQNiny6w,11888
3
- pycodex/cli.py,sha256=5xQuVhjXh3gGKUawFdmevlqykDjz1va_0kvwfv_rksA,32059
2
+ pycodex/agent.py,sha256=593HxMrQSo_-hZMUL299Rhdg70NBcx1fjKD7x8ryQ1c,18173
3
+ pycodex/cli.py,sha256=wvPoBYX3GPHHsOY6DtMLotV2vnfDL1JvsMVu6ZznqrM,32081
4
4
  pycodex/collaboration.py,sha256=yQ6pBD-R3ZWR4_FAYQFoS7KF0m4LLD42otXIbPqw2ys,641
5
5
  pycodex/compat.py,sha256=IO0X7AgcYhlHnYnpvBZ6leCh_UjoQzg5HLT5wYBNNIw,3155
6
- pycodex/context.py,sha256=NuyiDJoUbhj4Xp6xExqs4cATGCKfYQ-YUx2BC7JXGeA,25306
6
+ pycodex/context.py,sha256=IeqSzVZktL5-kTCY9ZlZr9J-NEe31qEFIvoacQl3jwk,26018
7
7
  pycodex/doctor.py,sha256=De3M4hRBJq8ZeqsUJgHz0vitqrH18YugrEnz7oHhTdQ,10572
8
- pycodex/model.py,sha256=ApPW9ecs5Z3oo-nUL9VHg3LzB6nwI0q1iE7dGClgheQ,33662
8
+ pycodex/model.py,sha256=dJKsCfG_xytPkJuE7xPOXrVquoTqtfudSNj33uhWDlE,34181
9
9
  pycodex/portable.py,sha256=gxl2E2h5uZJbasMEPPs-nyALFPIvX79T2ZYsu6vXZrg,15656
10
10
  pycodex/portable_server.py,sha256=6I3pQkWj3e_SFlDXY2mGdCPns1w_3PSxByBV9wv5epI,7331
11
11
  pycodex/protocol.py,sha256=LYDzJefu1tugqQzee4NuZzxhGAv3hXrNcnlw04CudAY,11106
@@ -52,28 +52,28 @@ pycodex/tools/wait_tool.py,sha256=EJcW2Ev9jUD9eZ7cFDNOLDzlywS2BD3ll6pArXyxfrI,23
52
52
  pycodex/tools/web_search_tool.py,sha256=_7r2ltWhnBM0ZCgweA5a0GbEi0qSFAHOyi1RHrl6tfQ,957
53
53
  pycodex/tools/write_stdin_tool.py,sha256=nCuProkbeewfQ_yS8CgBajo--K3EmkXzJYh1D2QtAM4,2549
54
54
  pycodex/utils/__init__.py,sha256=jE63cZR1IBzs4ED86lwdYRLqV5FmPhNNDzIgC90mr6A,1216
55
- pycodex/utils/compactor.py,sha256=ZCzGc02xHmXq1rIjnG2gATKcFtt6r-OGsCIK0ypjnyI,6467
55
+ pycodex/utils/compactor.py,sha256=bJbtsyT-KCPasGMH403njrrSDDbPyKbnrVaFSt5O4Vg,8440
56
56
  pycodex/utils/debug.py,sha256=JeEB5JfzYfbdG0fXlrWFmXyR1ts86fKsI_97IqgF6R0,296
57
57
  pycodex/utils/dotenv.py,sha256=rGKmurHjm7GdP4giyjHBPpSPv2Oi45qBqDB6HG3CnfA,1866
58
58
  pycodex/utils/get_env.py,sha256=5fNhcNhujOakWV6AS66rGW3jEA68WGpuE4YVXJZFE6U,7427
59
59
  pycodex/utils/random_ids.py,sha256=zBphjVGc7OXk9ZNExAbxRi_bk7ipyLG491qTv7hi8jM,380
60
60
  pycodex/utils/session_persist.py,sha256=Ntu0jcb2cEZbXpKDe0LXD-OuxfjK0SzBV0lRi90-NAM,16496
61
- pycodex/utils/visualize.py,sha256=JURzq2AbV046bblE5fojcAe885Juda0LDxt_gqT2PUc,41006
61
+ pycodex/utils/visualize.py,sha256=oWgApf9pXqd-3ijjelhQn0PLPEW3abR_R_PZN2nWhwQ,43099
62
62
  responses_server/__init__.py,sha256=3yPv_zeGT7P11tTnmj5kXktISLNsNW-02MUnnbiZcb0,394
63
63
  responses_server/__main__.py,sha256=9SRp-Yw7ShGxc6DhSIXcDLKgGEdAVm3oBZ59rBOPjT0,62
64
- responses_server/app.py,sha256=53SEwI2pUj_YL8zWvvYhQHiT1EUVVPzdRzOqXDvFMJ0,7770
64
+ responses_server/app.py,sha256=ack2a0otiBwq_DpsFURqLMlQzcf9oJPwo8o6iJ1fuig,7885
65
65
  responses_server/config.py,sha256=leb3_uPrCyYdUIkyRyVPX4luGF88dQ62OkhRLPe7uxw,2718
66
66
  responses_server/messages_api.py,sha256=WgO6J1jz2pOJkI79rLXp-pS1yxtLARcwX8T6JX5Vkcc,16971
67
67
  responses_server/payload_processors.py,sha256=cbXGW8Xi-mliaWRg0_Af41X0vXV2W6R9VBzTE6DXfe4,3483
68
68
  responses_server/server.py,sha256=Ko-Cqz_kW-uve091itucMklsPhEei77v-YcTjtjEdqU,2286
69
69
  responses_server/session_store.py,sha256=ZD3cH2aEOkWaQsu5qTzcal2mThTSFQPAhAhPUN9srgI,1115
70
- responses_server/stream_router.py,sha256=PuW8_fo8c_R9kd_Gy_Z7nyij7-xW_lPcfFcnYI2PvQA,35434
70
+ responses_server/stream_router.py,sha256=UiP-T4IKgJubD1L0AY93N3DqUh4K41fNcdONmC3Z-0A,37161
71
71
  responses_server/trajectory_dump.py,sha256=XCwYaZZmlAxSsSXOfhk3zRvyfDpOHX5R8KzspScNFUM,3435
72
72
  responses_server/tools/__init__.py,sha256=ivsBSEy0SBUhY-Uea5v1XMLXShkwHdCVl0id-1FwdZg,150
73
73
  responses_server/tools/custom_adapter.py,sha256=LxO7ldydvR-GWachDz8GKC0Q8KGGFoFPbZxM0QvxuZ0,8350
74
74
  responses_server/tools/web_search.py,sha256=pm4ZUiHUfxc0bGY1kEvt-BCzDrZIyP24xzPUcga2ul0,8908
75
- python_codex-0.1.11.dist-info/METADATA,sha256=eRfTDhadn89LMl9z1BITds05EbtinXymdUgT40km-IE,15720
76
- python_codex-0.1.11.dist-info/WHEEL,sha256=KGYbc1zXlYddvwxnNty23BeaKzh7YuoSIvIMO4jEhvw,87
77
- python_codex-0.1.11.dist-info/entry_points.txt,sha256=sNUVakoVuTrzJH505ZgRTQxmtRRPUHV_EH0i6EbYTyM,45
78
- python_codex-0.1.11.dist-info/licenses/LICENSE,sha256=0X8ifk312hYAORM4hlzg8wVSEXYKNmiPgWlB1YIy2Nw,10926
79
- python_codex-0.1.11.dist-info/RECORD,,
75
+ python_codex-0.1.12.dist-info/METADATA,sha256=T85L7qOCT8P-MNzXHQsNPttvcGQ9ZTetWJbuy6QC6Pw,16657
76
+ python_codex-0.1.12.dist-info/WHEEL,sha256=KGYbc1zXlYddvwxnNty23BeaKzh7YuoSIvIMO4jEhvw,87
77
+ python_codex-0.1.12.dist-info/entry_points.txt,sha256=sNUVakoVuTrzJH505ZgRTQxmtRRPUHV_EH0i6EbYTyM,45
78
+ python_codex-0.1.12.dist-info/licenses/LICENSE,sha256=0X8ifk312hYAORM4hlzg8wVSEXYKNmiPgWlB1YIy2Nw,10926
79
+ python_codex-0.1.12.dist-info/RECORD,,
responses_server/app.py CHANGED
@@ -37,14 +37,18 @@ def _stream_events(response_server: 'ResponseServer', request_body: 'typing.Dict
37
37
 
38
38
  import traceback
39
39
  exc_info = traceback.format_exception(type(exc), exc, exc.__traceback__)
40
+ error = {
41
+ "message": '\n'.join(exc_info),
42
+ }
43
+ error_type = getattr(exc, "error_type", None)
44
+ if error_type:
45
+ error["type"] = error_type
40
46
  yield _format_sse_event(
41
47
  "response.failed",
42
48
  {
43
49
  "type": "response.failed",
44
50
  "response": {
45
- "error": {
46
- "message": '\n'.join(exc_info),
47
- }
51
+ "error": error,
48
52
  },
49
53
  },
50
54
  )
@@ -36,7 +36,13 @@ class UnsupportedIncommingFeature(ValueError):
36
36
 
37
37
 
38
38
  class OutcommingChatError(RuntimeError):
39
- pass
39
+ def __init__(
40
+ self,
41
+ message: 'str',
42
+ error_type: 'typing.Union[str, None]' = None,
43
+ ) -> 'None':
44
+ super().__init__(message)
45
+ self.error_type = error_type
40
46
 
41
47
 
42
48
  class StreamRouter:
@@ -312,10 +318,14 @@ class StreamRouter:
312
318
  current_request,
313
319
  trajectory_dump,
314
320
  )
321
+ retried_reasoning_only_output = False
315
322
 
316
323
  while True:
317
324
  tool_calls: 'typing.Dict[int, typing.Dict[str, object]]' = {}
325
+ finish_reasons: 'typing.List[str]' = []
318
326
  current_usage: 'typing.Dict[str, object]' = {}
327
+ reasoning_start = len(reasoning_parts)
328
+ text_start = len(text_parts)
319
329
  for chunk in current_stream:
320
330
  for event_name, payload in self._consume_chat_chunk(
321
331
  chunk,
@@ -323,6 +333,7 @@ class StreamRouter:
323
333
  text_parts,
324
334
  tool_calls,
325
335
  current_usage,
336
+ finish_reasons,
326
337
  ):
327
338
  yield event_name, payload
328
339
  if current_usage:
@@ -362,6 +373,29 @@ class StreamRouter:
362
373
  )
363
374
  continue
364
375
 
376
+ if (
377
+ len(reasoning_parts) > reasoning_start
378
+ and len(text_parts) == text_start
379
+ and not ordinary_tool_calls
380
+ ):
381
+ if not retried_reasoning_only_output:
382
+ retried_reasoning_only_output = True
383
+ del reasoning_parts[reasoning_start:]
384
+ del text_parts[text_start:]
385
+ current_request = json.loads(json.dumps(current_request))
386
+ current_stream = self._open_tracked_outcomming_stream(
387
+ current_request,
388
+ trajectory_dump,
389
+ )
390
+ continue
391
+ finish_reason = finish_reasons[-1] if finish_reasons else "<unknown>"
392
+ raise OutcommingChatError(
393
+ "outcomming chat completion ended without assistant content "
394
+ "or tool calls after emitting only reasoning "
395
+ f"(finish_reason={finish_reason!r})",
396
+ error_type="model_output_invalid",
397
+ )
398
+
365
399
  for item in self._build_output_items(
366
400
  reasoning_parts,
367
401
  text_parts,
@@ -676,6 +710,7 @@ class StreamRouter:
676
710
  text_parts: 'typing.List[str]',
677
711
  tool_calls: 'typing.Dict[int, typing.Dict[str, object]]',
678
712
  current_usage: 'typing.Dict[str, object]',
713
+ finish_reasons: 'typing.List[str]',
679
714
  ) -> 'typing.List[typing.Tuple[str, typing.Dict[str, object]]]':
680
715
  events: 'typing.List[typing.Tuple[str, typing.Dict[str, object]]]' = []
681
716
  usage = payload.get("usage")
@@ -689,6 +724,9 @@ class StreamRouter:
689
724
  for choice in choices:
690
725
  if not isinstance(choice, dict):
691
726
  continue
727
+ finish_reason = choice.get("finish_reason")
728
+ if isinstance(finish_reason, str) and finish_reason:
729
+ finish_reasons.append(finish_reason)
692
730
  delta = choice.get("delta") or {}
693
731
  if not isinstance(delta, dict):
694
732
  continue