python-codex 0.1.11__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pycodex/agent.py +198 -17
- pycodex/cli.py +1 -0
- pycodex/context.py +16 -0
- pycodex/model.py +14 -1
- pycodex/utils/compactor.py +77 -13
- pycodex/utils/visualize.py +53 -0
- {python_codex-0.1.11.dist-info → python_codex-0.1.12.dist-info}/METADATA +15 -2
- {python_codex-0.1.11.dist-info → python_codex-0.1.12.dist-info}/RECORD +13 -13
- responses_server/app.py +7 -3
- responses_server/stream_router.py +39 -1
- {python_codex-0.1.11.dist-info → python_codex-0.1.12.dist-info}/WHEEL +0 -0
- {python_codex-0.1.11.dist-info → python_codex-0.1.12.dist-info}/entry_points.txt +0 -0
- {python_codex-0.1.11.dist-info → python_codex-0.1.12.dist-info}/licenses/LICENSE +0 -0
pycodex/agent.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
|
|
2
2
|
import asyncio
|
|
3
3
|
import json
|
|
4
|
+
import re
|
|
4
5
|
from typing import Callable
|
|
5
6
|
|
|
6
7
|
from .context import ContextManager
|
|
@@ -26,6 +27,18 @@ if typing.TYPE_CHECKING:
|
|
|
26
27
|
|
|
27
28
|
EventHandler = Callable[[AgentEvent], None]
|
|
28
29
|
NOOP_EVENT_HANDLER: 'EventHandler' = lambda _event: None
|
|
30
|
+
_REQUESTED_TOKENS_RE = re.compile(
|
|
31
|
+
r"requested\s+([0-9,]+)\s+tokens",
|
|
32
|
+
re.IGNORECASE,
|
|
33
|
+
)
|
|
34
|
+
_REQUESTED_TOKEN_SPLIT_RE = re.compile(
|
|
35
|
+
r"\(([0-9,]+)\s+in\s+the\s+messages,\s+([0-9,]+)\s+in\s+the\s+completion\)",
|
|
36
|
+
re.IGNORECASE,
|
|
37
|
+
)
|
|
38
|
+
_MAX_CONTEXT_TOKENS_RE = re.compile(
|
|
39
|
+
r"maximum\s+context\s+length\s+is\s+([0-9,]+)\s+tokens",
|
|
40
|
+
re.IGNORECASE,
|
|
41
|
+
)
|
|
29
42
|
|
|
30
43
|
|
|
31
44
|
class TurnInterrupted(RuntimeError):
|
|
@@ -58,6 +71,10 @@ class AgentLoop:
|
|
|
58
71
|
self._event_handler = event_handler
|
|
59
72
|
self._history: 'typing.List[ConversationItem]' = list(initial_history)
|
|
60
73
|
self._rollout_recorder = rollout_recorder
|
|
74
|
+
self._auto_compact_token_limit = (
|
|
75
|
+
self._context_manager.resolve_auto_compact_token_limit()
|
|
76
|
+
)
|
|
77
|
+
self._last_total_usage_tokens: 'typing.Union[int, None]' = None
|
|
61
78
|
self.interrupt_asap = False
|
|
62
79
|
|
|
63
80
|
@property
|
|
@@ -101,8 +118,6 @@ class AgentLoop:
|
|
|
101
118
|
turn_id = turn_id or uuid7_string()
|
|
102
119
|
self.interrupt_asap = False
|
|
103
120
|
new_user_messages = [UserMessage(text=text) for text in texts]
|
|
104
|
-
self._history.extend(new_user_messages)
|
|
105
|
-
self._persist_history_items(new_user_messages)
|
|
106
121
|
|
|
107
122
|
self._emit(
|
|
108
123
|
"turn_started",
|
|
@@ -110,6 +125,9 @@ class AgentLoop:
|
|
|
110
125
|
user_text="\n".join(texts),
|
|
111
126
|
user_texts=list(texts),
|
|
112
127
|
)
|
|
128
|
+
await self._maybe_auto_compact(turn_id, phase="pre_turn")
|
|
129
|
+
self._history.extend(new_user_messages)
|
|
130
|
+
self._persist_history_items(new_user_messages)
|
|
113
131
|
|
|
114
132
|
last_assistant_message: 'typing.Union[str, None]' = None
|
|
115
133
|
final_response_items: 'typing.Tuple[\n typing.Union[typing.Union[AssistantMessage, ToolCall], ReasoningItem], ...\n]' = ()
|
|
@@ -122,23 +140,11 @@ class AgentLoop:
|
|
|
122
140
|
iteration,
|
|
123
141
|
output_text=last_assistant_message,
|
|
124
142
|
)
|
|
143
|
+
await self._maybe_auto_compact(turn_id, phase="mid_turn")
|
|
125
144
|
iteration += 1
|
|
126
|
-
|
|
127
|
-
self._history,
|
|
128
|
-
self._tool_registry.model_visible_specs(),
|
|
129
|
-
self._parallel_tool_calls,
|
|
130
|
-
turn_id=turn_id,
|
|
131
|
-
)
|
|
132
|
-
self._emit(
|
|
133
|
-
"model_called",
|
|
145
|
+
response = await self._complete_model_request(
|
|
134
146
|
turn_id,
|
|
135
|
-
iteration
|
|
136
|
-
history_size=len(prompt.input),
|
|
137
|
-
tool_count=len(prompt.tools),
|
|
138
|
-
)
|
|
139
|
-
response = await self._model_client.complete(
|
|
140
|
-
prompt,
|
|
141
|
-
lambda event: self._handle_model_stream_event(turn_id, event),
|
|
147
|
+
iteration,
|
|
142
148
|
)
|
|
143
149
|
final_response_items = tuple(response.items)
|
|
144
150
|
self._emit(
|
|
@@ -193,6 +199,10 @@ class AgentLoop:
|
|
|
193
199
|
except TurnInterrupted:
|
|
194
200
|
raise
|
|
195
201
|
except Exception as exc:
|
|
202
|
+
context_usage = _usage_from_context_length_error(str(exc))
|
|
203
|
+
if context_usage is not None:
|
|
204
|
+
self._remember_token_usage(context_usage)
|
|
205
|
+
self._emit("token_count", turn_id, usage=context_usage)
|
|
196
206
|
self._emit(
|
|
197
207
|
"turn_failed",
|
|
198
208
|
turn_id,
|
|
@@ -287,6 +297,8 @@ class AgentLoop:
|
|
|
287
297
|
return
|
|
288
298
|
|
|
289
299
|
def _handle_model_stream_event(self, turn_id: 'str', event: 'ModelStreamEvent') -> 'None':
|
|
300
|
+
if event.kind == "token_count":
|
|
301
|
+
self._remember_token_usage(event.payload.get("usage"))
|
|
290
302
|
if event.kind == "assistant_delta":
|
|
291
303
|
self._emit("assistant_delta", turn_id, **event.payload)
|
|
292
304
|
elif event.kind == "tool_call":
|
|
@@ -296,6 +308,140 @@ class AgentLoop:
|
|
|
296
308
|
elif event.kind == "stream_error":
|
|
297
309
|
self._emit("stream_error", turn_id, **event.payload)
|
|
298
310
|
|
|
311
|
+
def _remember_token_usage(self, usage: 'object') -> 'None':
|
|
312
|
+
if not isinstance(usage, dict):
|
|
313
|
+
return
|
|
314
|
+
try:
|
|
315
|
+
self._last_total_usage_tokens = int(usage["total_tokens"])
|
|
316
|
+
except (KeyError, TypeError, ValueError):
|
|
317
|
+
return
|
|
318
|
+
|
|
319
|
+
async def _complete_model_request(
|
|
320
|
+
self,
|
|
321
|
+
turn_id: 'str',
|
|
322
|
+
iteration: 'int',
|
|
323
|
+
) -> 'typing.Any':
|
|
324
|
+
attempted_context_compact = False
|
|
325
|
+
while True:
|
|
326
|
+
prompt = self._context_manager.build_prompt(
|
|
327
|
+
self._history,
|
|
328
|
+
self._tool_registry.model_visible_specs(),
|
|
329
|
+
self._parallel_tool_calls,
|
|
330
|
+
turn_id=turn_id,
|
|
331
|
+
)
|
|
332
|
+
self._emit(
|
|
333
|
+
"model_called",
|
|
334
|
+
turn_id,
|
|
335
|
+
iteration=iteration,
|
|
336
|
+
history_size=len(prompt.input),
|
|
337
|
+
tool_count=len(prompt.tools),
|
|
338
|
+
)
|
|
339
|
+
try:
|
|
340
|
+
return await self._model_client.complete(
|
|
341
|
+
prompt,
|
|
342
|
+
lambda event: self._handle_model_stream_event(turn_id, event),
|
|
343
|
+
)
|
|
344
|
+
except Exception as exc:
|
|
345
|
+
context_usage = _usage_from_context_length_error(str(exc))
|
|
346
|
+
if context_usage is None or attempted_context_compact:
|
|
347
|
+
raise
|
|
348
|
+
attempted_context_compact = True
|
|
349
|
+
self._remember_token_usage(context_usage)
|
|
350
|
+
self._emit("token_count", turn_id, usage=context_usage)
|
|
351
|
+
await self._run_auto_compact(
|
|
352
|
+
turn_id,
|
|
353
|
+
phase="context_length_exceeded",
|
|
354
|
+
total_tokens=context_usage.get("total_tokens"),
|
|
355
|
+
token_limit=_context_length_error_token_limit(str(exc)),
|
|
356
|
+
prune_tool_results_on_context_error=True,
|
|
357
|
+
)
|
|
358
|
+
self._raise_if_interrupt_requested(turn_id, iteration)
|
|
359
|
+
|
|
360
|
+
async def _maybe_auto_compact(
|
|
361
|
+
self,
|
|
362
|
+
turn_id: 'str',
|
|
363
|
+
phase: 'str',
|
|
364
|
+
) -> 'None':
|
|
365
|
+
limit = self._auto_compact_token_limit
|
|
366
|
+
total_tokens = self._last_total_usage_tokens
|
|
367
|
+
if limit is None or total_tokens is None:
|
|
368
|
+
return
|
|
369
|
+
if total_tokens < limit or not self._history:
|
|
370
|
+
return
|
|
371
|
+
|
|
372
|
+
await self._run_auto_compact(
|
|
373
|
+
turn_id,
|
|
374
|
+
phase=phase,
|
|
375
|
+
total_tokens=total_tokens,
|
|
376
|
+
token_limit=limit,
|
|
377
|
+
prune_tool_results_on_context_error=True,
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
async def _run_auto_compact(
|
|
381
|
+
self,
|
|
382
|
+
turn_id: 'str',
|
|
383
|
+
phase: 'str',
|
|
384
|
+
total_tokens: 'typing.Union[int, None]' = None,
|
|
385
|
+
token_limit: 'typing.Union[int, None]' = None,
|
|
386
|
+
prune_tool_results_on_context_error: 'bool' = False,
|
|
387
|
+
) -> 'None':
|
|
388
|
+
from .utils.compactor import compact_agent_loop
|
|
389
|
+
|
|
390
|
+
payload: 'typing.Dict[str, object]' = {"phase": phase}
|
|
391
|
+
if total_tokens is not None:
|
|
392
|
+
payload["total_tokens"] = total_tokens
|
|
393
|
+
if token_limit is not None:
|
|
394
|
+
payload["token_limit"] = token_limit
|
|
395
|
+
self._emit(
|
|
396
|
+
"auto_compact_started",
|
|
397
|
+
turn_id,
|
|
398
|
+
**payload,
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
def handle_compact_stream_event(event: 'ModelStreamEvent') -> 'None':
|
|
402
|
+
if event.kind == "stream_error":
|
|
403
|
+
self._emit("stream_error", turn_id, **event.payload)
|
|
404
|
+
|
|
405
|
+
try:
|
|
406
|
+
compact_result = await compact_agent_loop(
|
|
407
|
+
self,
|
|
408
|
+
handle_compact_stream_event,
|
|
409
|
+
prune_tool_results_on_context_error,
|
|
410
|
+
)
|
|
411
|
+
except Exception as exc:
|
|
412
|
+
failed_payload = dict(payload)
|
|
413
|
+
failed_payload.update(
|
|
414
|
+
{
|
|
415
|
+
"error": str(exc),
|
|
416
|
+
"error_type": type(exc).__name__,
|
|
417
|
+
}
|
|
418
|
+
)
|
|
419
|
+
self._emit(
|
|
420
|
+
"auto_compact_failed",
|
|
421
|
+
turn_id,
|
|
422
|
+
**failed_payload,
|
|
423
|
+
)
|
|
424
|
+
raise
|
|
425
|
+
|
|
426
|
+
self._last_total_usage_tokens = None
|
|
427
|
+
if compact_result is None:
|
|
428
|
+
return
|
|
429
|
+
completed_payload = dict(payload)
|
|
430
|
+
completed_payload.update(
|
|
431
|
+
{
|
|
432
|
+
"original_item_count": compact_result.original_item_count,
|
|
433
|
+
"retained_item_count": compact_result.retained_item_count,
|
|
434
|
+
"summary": compact_result.display_text(),
|
|
435
|
+
}
|
|
436
|
+
)
|
|
437
|
+
if compact_result.pruned_tool_results:
|
|
438
|
+
completed_payload["pruned_tool_results"] = compact_result.pruned_tool_results
|
|
439
|
+
self._emit(
|
|
440
|
+
"auto_compact_completed",
|
|
441
|
+
turn_id,
|
|
442
|
+
**completed_payload,
|
|
443
|
+
)
|
|
444
|
+
|
|
299
445
|
def _build_follow_up_messages(
|
|
300
446
|
self,
|
|
301
447
|
tool_results: 'typing.List[ToolResult]',
|
|
@@ -326,3 +472,38 @@ class AgentLoop:
|
|
|
326
472
|
)
|
|
327
473
|
)
|
|
328
474
|
return follow_ups
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def _usage_from_context_length_error(
|
|
478
|
+
message: 'str',
|
|
479
|
+
) -> 'typing.Union[typing.Dict[str, int], None]':
|
|
480
|
+
lower = message.lower()
|
|
481
|
+
if (
|
|
482
|
+
"context_length_exceeded" not in lower
|
|
483
|
+
and "maximum context length" not in lower
|
|
484
|
+
):
|
|
485
|
+
return None
|
|
486
|
+
|
|
487
|
+
requested_match = _REQUESTED_TOKENS_RE.search(message)
|
|
488
|
+
if requested_match is None:
|
|
489
|
+
return None
|
|
490
|
+
|
|
491
|
+
usage = {"total_tokens": _parse_token_count(requested_match.group(1))}
|
|
492
|
+
split_match = _REQUESTED_TOKEN_SPLIT_RE.search(message)
|
|
493
|
+
if split_match is not None:
|
|
494
|
+
usage["input_tokens"] = _parse_token_count(split_match.group(1))
|
|
495
|
+
usage["output_tokens"] = _parse_token_count(split_match.group(2))
|
|
496
|
+
else:
|
|
497
|
+
usage["input_tokens"] = usage["total_tokens"]
|
|
498
|
+
return usage
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
def _context_length_error_token_limit(message: 'str') -> 'typing.Union[int, None]':
|
|
502
|
+
limit_match = _MAX_CONTEXT_TOKENS_RE.search(message)
|
|
503
|
+
if limit_match is None:
|
|
504
|
+
return None
|
|
505
|
+
return _parse_token_count(limit_match.group(1))
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
def _parse_token_count(value: 'str') -> 'int':
|
|
509
|
+
return int(value.replace(",", ""))
|
pycodex/cli.py
CHANGED
pycodex/context.py
CHANGED
|
@@ -78,6 +78,7 @@ class ContextConfig:
|
|
|
78
78
|
project_doc_max_bytes: 'typing.Union[int, None]' = None
|
|
79
79
|
model: 'typing.Union[str, None]' = None
|
|
80
80
|
model_context_window: 'typing.Union[int, None]' = None
|
|
81
|
+
model_auto_compact_token_limit: 'typing.Union[int, None]' = None
|
|
81
82
|
personality: 'typing.Union[str, None]' = None
|
|
82
83
|
approval_policy: 'typing.Union[str, None]' = None
|
|
83
84
|
sandbox_mode: 'typing.Union[str, None]' = None
|
|
@@ -120,6 +121,9 @@ class ContextConfig:
|
|
|
120
121
|
project_doc_max_bytes=_normalize_int(selected.get("project_doc_max_bytes")),
|
|
121
122
|
model=_normalize_text(selected.get("model")),
|
|
122
123
|
model_context_window=_normalize_int(selected.get("model_context_window")),
|
|
124
|
+
model_auto_compact_token_limit=_normalize_int(
|
|
125
|
+
selected.get("model_auto_compact_token_limit")
|
|
126
|
+
),
|
|
123
127
|
personality=_normalize_text(selected.get("personality")),
|
|
124
128
|
approval_policy=_normalize_text(selected.get("approval_policy")),
|
|
125
129
|
sandbox_mode=_normalize_text(selected.get("sandbox_mode")),
|
|
@@ -268,6 +272,18 @@ class ContextManager:
|
|
|
268
272
|
effective_percent = DEFAULT_EFFECTIVE_CONTEXT_WINDOW_PERCENT
|
|
269
273
|
return context_window * max(effective_percent, 0) // 100
|
|
270
274
|
|
|
275
|
+
def resolve_auto_compact_token_limit(self) -> 'typing.Union[int, None]':
|
|
276
|
+
if self._config.model_auto_compact_token_limit is not None:
|
|
277
|
+
return self._config.model_auto_compact_token_limit
|
|
278
|
+
|
|
279
|
+
model_slug = self._config.model
|
|
280
|
+
if model_slug is None:
|
|
281
|
+
return None
|
|
282
|
+
model_metadata = _load_models_by_slug().get(model_slug)
|
|
283
|
+
if model_metadata is None:
|
|
284
|
+
return None
|
|
285
|
+
return _normalize_int(model_metadata.get("auto_compact_token_limit"))
|
|
286
|
+
|
|
271
287
|
def _resolve_model_instructions(self) -> 'typing.Union[str, None]':
|
|
272
288
|
model_slug = self._config.model
|
|
273
289
|
if model_slug is None:
|
pycodex/model.py
CHANGED
|
@@ -287,6 +287,8 @@ class ResponsesModelClient:
|
|
|
287
287
|
event_handler,
|
|
288
288
|
)
|
|
289
289
|
except ResponsesRetryableError as exc:
|
|
290
|
+
if _is_context_length_error_message(str(exc)):
|
|
291
|
+
raise ResponsesApiError(str(exc)) from exc
|
|
290
292
|
if retries >= max_retries:
|
|
291
293
|
raise
|
|
292
294
|
retries += 1
|
|
@@ -780,11 +782,14 @@ class ResponsesModelClient:
|
|
|
780
782
|
)
|
|
781
783
|
|
|
782
784
|
message = str(error.get("message") or "responses stream failed")
|
|
783
|
-
code = str(error.get("code") or "").strip()
|
|
785
|
+
code = str(error.get("code") or error.get("type") or "").strip()
|
|
786
|
+
if _is_context_length_error_message(message):
|
|
787
|
+
raise ResponsesApiError(self._format_response_failed_error(message))
|
|
784
788
|
if code in {
|
|
785
789
|
"context_length_exceeded",
|
|
786
790
|
"insufficient_quota",
|
|
787
791
|
"invalid_prompt",
|
|
792
|
+
"model_output_invalid",
|
|
788
793
|
"usage_not_included",
|
|
789
794
|
}:
|
|
790
795
|
raise ResponsesApiError(self._format_response_failed_error(message))
|
|
@@ -888,6 +893,14 @@ def _optional_int(value: 'object') -> 'typing.Union[int, None]':
|
|
|
888
893
|
return int(value)
|
|
889
894
|
|
|
890
895
|
|
|
896
|
+
def _is_context_length_error_message(message: 'str') -> 'bool':
|
|
897
|
+
lower = message.lower()
|
|
898
|
+
return (
|
|
899
|
+
"context_length_exceeded" in lower
|
|
900
|
+
or "maximum context length" in lower
|
|
901
|
+
)
|
|
902
|
+
|
|
903
|
+
|
|
891
904
|
def _requests_verify_setting() -> 'typing.Union[typing.Union[str, bool], None]':
|
|
892
905
|
for env_name in ("REQUESTS_CA_BUNDLE", "CURL_CA_BUNDLE", "SSL_CERT_FILE"):
|
|
893
906
|
value = os.environ.get(env_name, "").strip()
|
pycodex/utils/compactor.py
CHANGED
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
|
|
3
|
-
from ..protocol import
|
|
3
|
+
from ..protocol import (
|
|
4
|
+
AssistantMessage,
|
|
5
|
+
ConversationItem,
|
|
6
|
+
ModelStreamEvent,
|
|
7
|
+
ToolCall,
|
|
8
|
+
ToolResult,
|
|
9
|
+
UserMessage,
|
|
10
|
+
)
|
|
4
11
|
from .random_ids import uuid7_string
|
|
5
12
|
import typing
|
|
6
13
|
|
|
@@ -35,6 +42,7 @@ _SUBAGENT_NOTIFICATION_PREFIX = "<subagent_notification>\n"
|
|
|
35
42
|
class CompactResult:
|
|
36
43
|
history: 'typing.Tuple[ConversationItem, ...]'
|
|
37
44
|
original_item_count: 'int'
|
|
45
|
+
pruned_tool_results: 'int' = 0
|
|
38
46
|
|
|
39
47
|
@property
|
|
40
48
|
def retained_item_count(self) -> 'int':
|
|
@@ -43,10 +51,14 @@ class CompactResult:
|
|
|
43
51
|
def display_text(self) -> 'str':
|
|
44
52
|
retained_label = _pluralize("item", self.retained_item_count)
|
|
45
53
|
original_label = _pluralize("item", self.original_item_count)
|
|
46
|
-
|
|
54
|
+
text = (
|
|
47
55
|
f"compact({self.original_item_count} {original_label}) -> "
|
|
48
56
|
f"{self.retained_item_count} {retained_label} + [summary]"
|
|
49
57
|
)
|
|
58
|
+
if self.pruned_tool_results:
|
|
59
|
+
tool_label = _pluralize("tool response", self.pruned_tool_results)
|
|
60
|
+
text += f" (dropped {self.pruned_tool_results} old {tool_label})"
|
|
61
|
+
return text
|
|
50
62
|
|
|
51
63
|
|
|
52
64
|
def compact(
|
|
@@ -60,24 +72,42 @@ def compact(
|
|
|
60
72
|
async def compact_agent_loop(
|
|
61
73
|
agent_loop: 'AgentLoop',
|
|
62
74
|
stream_event_handler: 'typing.Union[typing.Callable[[ModelStreamEvent], None], None]' = None,
|
|
75
|
+
prune_tool_results_on_context_error: 'bool' = False,
|
|
63
76
|
) -> 'typing.Union[CompactResult, None]':
|
|
64
77
|
history = agent_loop.history
|
|
65
78
|
if not history:
|
|
66
79
|
return None
|
|
67
80
|
original_item_count = len(history)
|
|
81
|
+
pruned_tool_results = 0
|
|
68
82
|
|
|
69
|
-
compact_prompt = UserMessage(text=DEFAULT_COMPACT_PROMPT)
|
|
70
|
-
prompt = agent_loop._context_manager.build_prompt(
|
|
71
|
-
list(history) + [compact_prompt],
|
|
72
|
-
[],
|
|
73
|
-
False,
|
|
74
|
-
turn_id=uuid7_string(),
|
|
75
|
-
)
|
|
76
83
|
noop_stream_event_handler = lambda _event: None
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
84
|
+
while True:
|
|
85
|
+
compact_prompt = UserMessage(text=DEFAULT_COMPACT_PROMPT)
|
|
86
|
+
prompt = agent_loop._context_manager.build_prompt(
|
|
87
|
+
list(history) + [compact_prompt],
|
|
88
|
+
[],
|
|
89
|
+
False,
|
|
90
|
+
turn_id=uuid7_string(),
|
|
91
|
+
)
|
|
92
|
+
try:
|
|
93
|
+
response = await agent_loop._model_client.complete(
|
|
94
|
+
prompt,
|
|
95
|
+
stream_event_handler or noop_stream_event_handler,
|
|
96
|
+
)
|
|
97
|
+
break
|
|
98
|
+
except Exception as exc:
|
|
99
|
+
if (
|
|
100
|
+
not prune_tool_results_on_context_error
|
|
101
|
+
or not _is_context_length_error(str(exc))
|
|
102
|
+
):
|
|
103
|
+
raise
|
|
104
|
+
pruned_history = prune_oldest_tool_response(history)
|
|
105
|
+
if pruned_history is None:
|
|
106
|
+
raise
|
|
107
|
+
history = pruned_history
|
|
108
|
+
pruned_tool_results += 1
|
|
109
|
+
agent_loop.replace_history(history)
|
|
110
|
+
|
|
81
111
|
compacted_history = compact(
|
|
82
112
|
list(history) + [compact_prompt] + list(response.items)
|
|
83
113
|
)
|
|
@@ -88,6 +118,32 @@ async def compact_agent_loop(
|
|
|
88
118
|
return CompactResult(
|
|
89
119
|
history=compacted_history,
|
|
90
120
|
original_item_count=original_item_count,
|
|
121
|
+
pruned_tool_results=pruned_tool_results,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def prune_oldest_tool_response(
|
|
126
|
+
history: 'typing.Sequence[ConversationItem]',
|
|
127
|
+
) -> 'typing.Union[typing.Tuple[ConversationItem, ...], None]':
|
|
128
|
+
items = list(history)
|
|
129
|
+
tool_result_index = None
|
|
130
|
+
call_id = None
|
|
131
|
+
for index, item in enumerate(items):
|
|
132
|
+
if isinstance(item, ToolResult):
|
|
133
|
+
tool_result_index = index
|
|
134
|
+
call_id = item.call_id
|
|
135
|
+
break
|
|
136
|
+
if tool_result_index is None:
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
indexes_to_remove = {tool_result_index}
|
|
140
|
+
for index, item in enumerate(items[:tool_result_index]):
|
|
141
|
+
if isinstance(item, ToolCall) and item.call_id == call_id:
|
|
142
|
+
indexes_to_remove.add(index)
|
|
143
|
+
break
|
|
144
|
+
|
|
145
|
+
return tuple(
|
|
146
|
+
item for index, item in enumerate(items) if index not in indexes_to_remove
|
|
91
147
|
)
|
|
92
148
|
|
|
93
149
|
|
|
@@ -187,3 +243,11 @@ def _pluralize(noun: 'str', count: 'int') -> 'str':
|
|
|
187
243
|
|
|
188
244
|
def _is_synthetic_user_message(text: 'str') -> 'bool':
|
|
189
245
|
return text.startswith(_SUBAGENT_NOTIFICATION_PREFIX)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _is_context_length_error(message: 'str') -> 'bool':
|
|
249
|
+
lower = message.lower()
|
|
250
|
+
return (
|
|
251
|
+
"context_length_exceeded" in lower
|
|
252
|
+
or "maximum context length" in lower
|
|
253
|
+
)
|
pycodex/utils/visualize.py
CHANGED
|
@@ -705,6 +705,59 @@ class CliSessionView:
|
|
|
705
705
|
self._spinner.set_label("reconnecting")
|
|
706
706
|
return
|
|
707
707
|
|
|
708
|
+
if event.kind == "auto_compact_started":
|
|
709
|
+
self._finish_stream()
|
|
710
|
+
total_tokens = event.payload.get("total_tokens")
|
|
711
|
+
token_limit = event.payload.get("token_limit")
|
|
712
|
+
if total_tokens is not None and token_limit is not None:
|
|
713
|
+
message = f"[status] auto-compact: {total_tokens}/{token_limit} tokens"
|
|
714
|
+
else:
|
|
715
|
+
message = "[status] auto-compact"
|
|
716
|
+
self._print_line(
|
|
717
|
+
colorize_cli_message(message, "status", self._color_enabled)
|
|
718
|
+
)
|
|
719
|
+
if self._input_active:
|
|
720
|
+
self._spinner.pause()
|
|
721
|
+
else:
|
|
722
|
+
self._spinner.resume()
|
|
723
|
+
self._spinner.set_label("compacting context")
|
|
724
|
+
self._spinner.render_now()
|
|
725
|
+
return
|
|
726
|
+
|
|
727
|
+
if event.kind == "auto_compact_completed":
|
|
728
|
+
self._finish_stream()
|
|
729
|
+
summary = str(event.payload.get("summary", "")).strip()
|
|
730
|
+
message = f"[status] {summary}" if summary else "[status] context compacted"
|
|
731
|
+
self._print_line(
|
|
732
|
+
colorize_cli_message(message, "status", self._color_enabled)
|
|
733
|
+
)
|
|
734
|
+
if self._input_active:
|
|
735
|
+
self._spinner.pause()
|
|
736
|
+
else:
|
|
737
|
+
self._spinner.resume()
|
|
738
|
+
self._spinner.set_label("thinking")
|
|
739
|
+
self._spinner.render_now()
|
|
740
|
+
return
|
|
741
|
+
|
|
742
|
+
if event.kind == "auto_compact_failed":
|
|
743
|
+
self._finish_stream()
|
|
744
|
+
error = str(event.payload.get("error", "")).strip()
|
|
745
|
+
message = (
|
|
746
|
+
f"[error] auto-compact failed: {error}"
|
|
747
|
+
if error
|
|
748
|
+
else "[error] auto-compact failed"
|
|
749
|
+
)
|
|
750
|
+
self._print_line(
|
|
751
|
+
colorize_cli_message(message, "error", self._color_enabled)
|
|
752
|
+
)
|
|
753
|
+
if self._input_active:
|
|
754
|
+
self._spinner.pause()
|
|
755
|
+
else:
|
|
756
|
+
self._spinner.resume()
|
|
757
|
+
self._spinner.set_label("thinking")
|
|
758
|
+
self._spinner.render_now()
|
|
759
|
+
return
|
|
760
|
+
|
|
708
761
|
if event.kind == "assistant_delta":
|
|
709
762
|
delta = str(event.payload.get("delta", ""))
|
|
710
763
|
if not delta:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: python-codex
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.12
|
|
4
4
|
Summary: A minimal Python extraction of Codex's main agent loop
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Requires-Python: >=3.6.2
|
|
@@ -185,6 +185,14 @@ Current behavior:
|
|
|
185
185
|
- `/compact` synthesizes a local handoff summary, replaces the in-memory
|
|
186
186
|
conversation history with the compacted view, and appends a compacted-history
|
|
187
187
|
entry to the rollout so later `/resume` sees the same state
|
|
188
|
+
- `model_auto_compact_token_limit = <tokens>` in `config.toml` enables the same
|
|
189
|
+
compaction path automatically when the latest reported usage reaches that
|
|
190
|
+
threshold before a follow-up sampling request or the next user turn
|
|
191
|
+
- if a model request fails with `context_length_exceeded`, pycodex now treats
|
|
192
|
+
the provider-reported requested token count as a failed-request usage sample,
|
|
193
|
+
triggers the same compact path immediately, and retries the request once; if
|
|
194
|
+
the compact request is also over the limit, it repeatedly drops the oldest
|
|
195
|
+
tool response plus its matching tool call before retrying compact
|
|
188
196
|
- new sessions are now recorded under `CODEX_HOME/sessions/.../rollout-*.jsonl`
|
|
189
197
|
with a stable session/thread id and per-item append+flush semantics so
|
|
190
198
|
`/resume` reads back the same rollout format
|
|
@@ -211,7 +219,12 @@ Current behavior:
|
|
|
211
219
|
`reasoning_content` are translated back into Responses `reasoning` items, and
|
|
212
220
|
historical `reasoning` items are replayed into downstream assistant messages
|
|
213
221
|
via the `reasoning` field. Streaming token usage is also requested from vLLM
|
|
214
|
-
and forwarded to the final `response.completed.response.usage
|
|
222
|
+
and forwarded to the final `response.completed.response.usage`. If a
|
|
223
|
+
downstream chat stream terminates after emitting only reasoning, with no
|
|
224
|
+
assistant content and no tool call, the compat layer discards that partial
|
|
225
|
+
reasoning, retries the same downstream request once, and only then emits
|
|
226
|
+
`response.failed` with `type = "model_output_invalid"` if the retry is still
|
|
227
|
+
reasoning-only
|
|
215
228
|
- standalone `responses_server` now also supports downstream `/v1/messages`
|
|
216
229
|
backends via `--outcomming-api messages`, while keeping the internal
|
|
217
230
|
canonical request/route logic in chat-completions shape
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
pycodex/__init__.py,sha256=jCnC_Bgotlxa4GwO3Re2sChKGY49TRM-uVZEQ9uBpfw,3106
|
|
2
|
-
pycodex/agent.py,sha256=
|
|
3
|
-
pycodex/cli.py,sha256=
|
|
2
|
+
pycodex/agent.py,sha256=593HxMrQSo_-hZMUL299Rhdg70NBcx1fjKD7x8ryQ1c,18173
|
|
3
|
+
pycodex/cli.py,sha256=wvPoBYX3GPHHsOY6DtMLotV2vnfDL1JvsMVu6ZznqrM,32081
|
|
4
4
|
pycodex/collaboration.py,sha256=yQ6pBD-R3ZWR4_FAYQFoS7KF0m4LLD42otXIbPqw2ys,641
|
|
5
5
|
pycodex/compat.py,sha256=IO0X7AgcYhlHnYnpvBZ6leCh_UjoQzg5HLT5wYBNNIw,3155
|
|
6
|
-
pycodex/context.py,sha256=
|
|
6
|
+
pycodex/context.py,sha256=IeqSzVZktL5-kTCY9ZlZr9J-NEe31qEFIvoacQl3jwk,26018
|
|
7
7
|
pycodex/doctor.py,sha256=De3M4hRBJq8ZeqsUJgHz0vitqrH18YugrEnz7oHhTdQ,10572
|
|
8
|
-
pycodex/model.py,sha256=
|
|
8
|
+
pycodex/model.py,sha256=dJKsCfG_xytPkJuE7xPOXrVquoTqtfudSNj33uhWDlE,34181
|
|
9
9
|
pycodex/portable.py,sha256=gxl2E2h5uZJbasMEPPs-nyALFPIvX79T2ZYsu6vXZrg,15656
|
|
10
10
|
pycodex/portable_server.py,sha256=6I3pQkWj3e_SFlDXY2mGdCPns1w_3PSxByBV9wv5epI,7331
|
|
11
11
|
pycodex/protocol.py,sha256=LYDzJefu1tugqQzee4NuZzxhGAv3hXrNcnlw04CudAY,11106
|
|
@@ -52,28 +52,28 @@ pycodex/tools/wait_tool.py,sha256=EJcW2Ev9jUD9eZ7cFDNOLDzlywS2BD3ll6pArXyxfrI,23
|
|
|
52
52
|
pycodex/tools/web_search_tool.py,sha256=_7r2ltWhnBM0ZCgweA5a0GbEi0qSFAHOyi1RHrl6tfQ,957
|
|
53
53
|
pycodex/tools/write_stdin_tool.py,sha256=nCuProkbeewfQ_yS8CgBajo--K3EmkXzJYh1D2QtAM4,2549
|
|
54
54
|
pycodex/utils/__init__.py,sha256=jE63cZR1IBzs4ED86lwdYRLqV5FmPhNNDzIgC90mr6A,1216
|
|
55
|
-
pycodex/utils/compactor.py,sha256=
|
|
55
|
+
pycodex/utils/compactor.py,sha256=bJbtsyT-KCPasGMH403njrrSDDbPyKbnrVaFSt5O4Vg,8440
|
|
56
56
|
pycodex/utils/debug.py,sha256=JeEB5JfzYfbdG0fXlrWFmXyR1ts86fKsI_97IqgF6R0,296
|
|
57
57
|
pycodex/utils/dotenv.py,sha256=rGKmurHjm7GdP4giyjHBPpSPv2Oi45qBqDB6HG3CnfA,1866
|
|
58
58
|
pycodex/utils/get_env.py,sha256=5fNhcNhujOakWV6AS66rGW3jEA68WGpuE4YVXJZFE6U,7427
|
|
59
59
|
pycodex/utils/random_ids.py,sha256=zBphjVGc7OXk9ZNExAbxRi_bk7ipyLG491qTv7hi8jM,380
|
|
60
60
|
pycodex/utils/session_persist.py,sha256=Ntu0jcb2cEZbXpKDe0LXD-OuxfjK0SzBV0lRi90-NAM,16496
|
|
61
|
-
pycodex/utils/visualize.py,sha256=
|
|
61
|
+
pycodex/utils/visualize.py,sha256=oWgApf9pXqd-3ijjelhQn0PLPEW3abR_R_PZN2nWhwQ,43099
|
|
62
62
|
responses_server/__init__.py,sha256=3yPv_zeGT7P11tTnmj5kXktISLNsNW-02MUnnbiZcb0,394
|
|
63
63
|
responses_server/__main__.py,sha256=9SRp-Yw7ShGxc6DhSIXcDLKgGEdAVm3oBZ59rBOPjT0,62
|
|
64
|
-
responses_server/app.py,sha256=
|
|
64
|
+
responses_server/app.py,sha256=ack2a0otiBwq_DpsFURqLMlQzcf9oJPwo8o6iJ1fuig,7885
|
|
65
65
|
responses_server/config.py,sha256=leb3_uPrCyYdUIkyRyVPX4luGF88dQ62OkhRLPe7uxw,2718
|
|
66
66
|
responses_server/messages_api.py,sha256=WgO6J1jz2pOJkI79rLXp-pS1yxtLARcwX8T6JX5Vkcc,16971
|
|
67
67
|
responses_server/payload_processors.py,sha256=cbXGW8Xi-mliaWRg0_Af41X0vXV2W6R9VBzTE6DXfe4,3483
|
|
68
68
|
responses_server/server.py,sha256=Ko-Cqz_kW-uve091itucMklsPhEei77v-YcTjtjEdqU,2286
|
|
69
69
|
responses_server/session_store.py,sha256=ZD3cH2aEOkWaQsu5qTzcal2mThTSFQPAhAhPUN9srgI,1115
|
|
70
|
-
responses_server/stream_router.py,sha256=
|
|
70
|
+
responses_server/stream_router.py,sha256=UiP-T4IKgJubD1L0AY93N3DqUh4K41fNcdONmC3Z-0A,37161
|
|
71
71
|
responses_server/trajectory_dump.py,sha256=XCwYaZZmlAxSsSXOfhk3zRvyfDpOHX5R8KzspScNFUM,3435
|
|
72
72
|
responses_server/tools/__init__.py,sha256=ivsBSEy0SBUhY-Uea5v1XMLXShkwHdCVl0id-1FwdZg,150
|
|
73
73
|
responses_server/tools/custom_adapter.py,sha256=LxO7ldydvR-GWachDz8GKC0Q8KGGFoFPbZxM0QvxuZ0,8350
|
|
74
74
|
responses_server/tools/web_search.py,sha256=pm4ZUiHUfxc0bGY1kEvt-BCzDrZIyP24xzPUcga2ul0,8908
|
|
75
|
-
python_codex-0.1.
|
|
76
|
-
python_codex-0.1.
|
|
77
|
-
python_codex-0.1.
|
|
78
|
-
python_codex-0.1.
|
|
79
|
-
python_codex-0.1.
|
|
75
|
+
python_codex-0.1.12.dist-info/METADATA,sha256=T85L7qOCT8P-MNzXHQsNPttvcGQ9ZTetWJbuy6QC6Pw,16657
|
|
76
|
+
python_codex-0.1.12.dist-info/WHEEL,sha256=KGYbc1zXlYddvwxnNty23BeaKzh7YuoSIvIMO4jEhvw,87
|
|
77
|
+
python_codex-0.1.12.dist-info/entry_points.txt,sha256=sNUVakoVuTrzJH505ZgRTQxmtRRPUHV_EH0i6EbYTyM,45
|
|
78
|
+
python_codex-0.1.12.dist-info/licenses/LICENSE,sha256=0X8ifk312hYAORM4hlzg8wVSEXYKNmiPgWlB1YIy2Nw,10926
|
|
79
|
+
python_codex-0.1.12.dist-info/RECORD,,
|
responses_server/app.py
CHANGED
|
@@ -37,14 +37,18 @@ def _stream_events(response_server: 'ResponseServer', request_body: 'typing.Dict
|
|
|
37
37
|
|
|
38
38
|
import traceback
|
|
39
39
|
exc_info = traceback.format_exception(type(exc), exc, exc.__traceback__)
|
|
40
|
+
error = {
|
|
41
|
+
"message": '\n'.join(exc_info),
|
|
42
|
+
}
|
|
43
|
+
error_type = getattr(exc, "error_type", None)
|
|
44
|
+
if error_type:
|
|
45
|
+
error["type"] = error_type
|
|
40
46
|
yield _format_sse_event(
|
|
41
47
|
"response.failed",
|
|
42
48
|
{
|
|
43
49
|
"type": "response.failed",
|
|
44
50
|
"response": {
|
|
45
|
-
"error":
|
|
46
|
-
"message": '\n'.join(exc_info),
|
|
47
|
-
}
|
|
51
|
+
"error": error,
|
|
48
52
|
},
|
|
49
53
|
},
|
|
50
54
|
)
|
|
@@ -36,7 +36,13 @@ class UnsupportedIncommingFeature(ValueError):
|
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
class OutcommingChatError(RuntimeError):
|
|
39
|
-
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
message: 'str',
|
|
42
|
+
error_type: 'typing.Union[str, None]' = None,
|
|
43
|
+
) -> 'None':
|
|
44
|
+
super().__init__(message)
|
|
45
|
+
self.error_type = error_type
|
|
40
46
|
|
|
41
47
|
|
|
42
48
|
class StreamRouter:
|
|
@@ -312,10 +318,14 @@ class StreamRouter:
|
|
|
312
318
|
current_request,
|
|
313
319
|
trajectory_dump,
|
|
314
320
|
)
|
|
321
|
+
retried_reasoning_only_output = False
|
|
315
322
|
|
|
316
323
|
while True:
|
|
317
324
|
tool_calls: 'typing.Dict[int, typing.Dict[str, object]]' = {}
|
|
325
|
+
finish_reasons: 'typing.List[str]' = []
|
|
318
326
|
current_usage: 'typing.Dict[str, object]' = {}
|
|
327
|
+
reasoning_start = len(reasoning_parts)
|
|
328
|
+
text_start = len(text_parts)
|
|
319
329
|
for chunk in current_stream:
|
|
320
330
|
for event_name, payload in self._consume_chat_chunk(
|
|
321
331
|
chunk,
|
|
@@ -323,6 +333,7 @@ class StreamRouter:
|
|
|
323
333
|
text_parts,
|
|
324
334
|
tool_calls,
|
|
325
335
|
current_usage,
|
|
336
|
+
finish_reasons,
|
|
326
337
|
):
|
|
327
338
|
yield event_name, payload
|
|
328
339
|
if current_usage:
|
|
@@ -362,6 +373,29 @@ class StreamRouter:
|
|
|
362
373
|
)
|
|
363
374
|
continue
|
|
364
375
|
|
|
376
|
+
if (
|
|
377
|
+
len(reasoning_parts) > reasoning_start
|
|
378
|
+
and len(text_parts) == text_start
|
|
379
|
+
and not ordinary_tool_calls
|
|
380
|
+
):
|
|
381
|
+
if not retried_reasoning_only_output:
|
|
382
|
+
retried_reasoning_only_output = True
|
|
383
|
+
del reasoning_parts[reasoning_start:]
|
|
384
|
+
del text_parts[text_start:]
|
|
385
|
+
current_request = json.loads(json.dumps(current_request))
|
|
386
|
+
current_stream = self._open_tracked_outcomming_stream(
|
|
387
|
+
current_request,
|
|
388
|
+
trajectory_dump,
|
|
389
|
+
)
|
|
390
|
+
continue
|
|
391
|
+
finish_reason = finish_reasons[-1] if finish_reasons else "<unknown>"
|
|
392
|
+
raise OutcommingChatError(
|
|
393
|
+
"outcomming chat completion ended without assistant content "
|
|
394
|
+
"or tool calls after emitting only reasoning "
|
|
395
|
+
f"(finish_reason={finish_reason!r})",
|
|
396
|
+
error_type="model_output_invalid",
|
|
397
|
+
)
|
|
398
|
+
|
|
365
399
|
for item in self._build_output_items(
|
|
366
400
|
reasoning_parts,
|
|
367
401
|
text_parts,
|
|
@@ -676,6 +710,7 @@ class StreamRouter:
|
|
|
676
710
|
text_parts: 'typing.List[str]',
|
|
677
711
|
tool_calls: 'typing.Dict[int, typing.Dict[str, object]]',
|
|
678
712
|
current_usage: 'typing.Dict[str, object]',
|
|
713
|
+
finish_reasons: 'typing.List[str]',
|
|
679
714
|
) -> 'typing.List[typing.Tuple[str, typing.Dict[str, object]]]':
|
|
680
715
|
events: 'typing.List[typing.Tuple[str, typing.Dict[str, object]]]' = []
|
|
681
716
|
usage = payload.get("usage")
|
|
@@ -689,6 +724,9 @@ class StreamRouter:
|
|
|
689
724
|
for choice in choices:
|
|
690
725
|
if not isinstance(choice, dict):
|
|
691
726
|
continue
|
|
727
|
+
finish_reason = choice.get("finish_reason")
|
|
728
|
+
if isinstance(finish_reason, str) and finish_reason:
|
|
729
|
+
finish_reasons.append(finish_reason)
|
|
692
730
|
delta = choice.get("delta") or {}
|
|
693
731
|
if not isinstance(delta, dict):
|
|
694
732
|
continue
|
|
File without changes
|
|
File without changes
|
|
File without changes
|