python-codex 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pycodex/context.py CHANGED
@@ -89,7 +89,7 @@ class ContextConfig:
89
89
  profile: 'typing.Union[str, None]' = None,
90
90
  ) -> 'ContextConfig':
91
91
  path = Path(config_path)
92
- data = tomllib.loads(path.read_text())
92
+ data = tomllib.loads(path.read_text(encoding="utf-8"))
93
93
  selected = dict(data)
94
94
  if profile is not None:
95
95
  overrides = data.get("profiles", {}).get(profile)
@@ -162,7 +162,9 @@ class ContextManager:
162
162
  self._include_permissions_instructions = include_permissions_instructions
163
163
  self._include_skills_instructions = include_skills_instructions
164
164
  self._network_access = network_access
165
- self._default_base_instructions = DEFAULT_BASE_INSTRUCTIONS_PATH.read_text()
165
+ self._default_base_instructions = DEFAULT_BASE_INSTRUCTIONS_PATH.read_text(
166
+ encoding="utf-8"
167
+ )
166
168
  self._workspace_metadata_turn_id: 'typing.Union[str, None]' = None
167
169
  self._workspace_metadata_cache: 'typing.Union[JSONDict, None]' = None
168
170
 
@@ -237,7 +239,10 @@ class ContextManager:
237
239
  if self._config.base_instructions is not None:
238
240
  return self._config.base_instructions
239
241
  if self._config.model_instructions_file is not None:
240
- return self._config.model_instructions_file.read_text().strip()
242
+ return self._config.model_instructions_file.read_text(
243
+ encoding="utf-8",
244
+ errors="replace",
245
+ ).strip()
241
246
  resolved = self._resolve_model_instructions()
242
247
  if resolved is not None:
243
248
  return resolved
@@ -327,11 +332,11 @@ class ContextManager:
327
332
  return None
328
333
 
329
334
  sandbox_text = (
330
- sandbox_prompt_path.read_text().strip().replace(
335
+ sandbox_prompt_path.read_text(encoding="utf-8").strip().replace(
331
336
  "{network_access}", self._network_access
332
337
  )
333
338
  )
334
- approval_text = approval_prompt_path.read_text().strip()
339
+ approval_text = approval_prompt_path.read_text(encoding="utf-8").strip()
335
340
  return "\n".join(
336
341
  [
337
342
  PERMISSIONS_OPEN_TAG,
@@ -429,7 +434,7 @@ class ContextManager:
429
434
  docs: 'typing.List[str]' = []
430
435
  remaining = self._config.project_doc_max_bytes
431
436
  for path in self._discover_project_doc_paths():
432
- text = path.read_text()
437
+ text = path.read_text(encoding="utf-8", errors="replace")
433
438
  if not text.strip():
434
439
  continue
435
440
  if remaining is None:
@@ -437,7 +442,7 @@ class ContextManager:
437
442
  continue
438
443
  if remaining <= 0:
439
444
  break
440
- encoded = text.encode()
445
+ encoded = text.encode("utf-8")
441
446
  docs.append(encoded[:remaining].decode(errors="ignore"))
442
447
  remaining -= min(len(encoded), remaining)
443
448
  if not docs:
@@ -507,15 +512,15 @@ def _normalize_int(value) -> 'typing.Union[int, None]':
507
512
 
508
513
  def _default_collaboration_instructions(mode: 'CollaborationMode') -> 'str':
509
514
  if mode == "plan":
510
- return PLAN_COLLABORATION_INSTRUCTIONS_PATH.read_text()
511
- return DEFAULT_COLLABORATION_INSTRUCTIONS_PATH.read_text()
515
+ return PLAN_COLLABORATION_INSTRUCTIONS_PATH.read_text(encoding="utf-8")
516
+ return DEFAULT_COLLABORATION_INSTRUCTIONS_PATH.read_text(encoding="utf-8")
512
517
 
513
518
 
514
519
  def _read_first_instruction_file(base: 'Path') -> 'typing.Union[str, None]':
515
520
  for candidate_name in (LOCAL_PROJECT_DOC_FILENAME, DEFAULT_PROJECT_DOC_FILENAME):
516
521
  candidate = base / candidate_name
517
522
  try:
518
- contents = candidate.read_text()
523
+ contents = candidate.read_text(encoding="utf-8", errors="replace")
519
524
  except OSError:
520
525
  continue
521
526
  trimmed = contents.strip()
@@ -526,7 +531,7 @@ def _read_first_instruction_file(base: 'Path') -> 'typing.Union[str, None]':
526
531
 
527
532
  @lru_cache(maxsize=1)
528
533
  def _load_models_by_slug() -> 'typing.Dict[str, JSONDict]':
529
- payload = json.loads(DEFAULT_MODELS_PATH.read_text())
534
+ payload = json.loads(DEFAULT_MODELS_PATH.read_text(encoding="utf-8"))
530
535
  models = payload.get("models", [])
531
536
  by_slug: 'typing.Dict[str, JSONDict]' = {}
532
537
  for model in models:
@@ -571,7 +576,7 @@ def _discover_skill_files(
571
576
 
572
577
 
573
578
  def _parse_skill_descriptor(path: 'Path', scope_rank: 'int') -> 'typing.Union[SkillDescriptor, None]':
574
- text = path.read_text()
579
+ text = path.read_text(encoding="utf-8", errors="replace")
575
580
  if not text.startswith("---\n"):
576
581
  return None
577
582
  end_marker = "\n---\n"
pycodex/model.py CHANGED
@@ -71,7 +71,7 @@ class ResponsesProviderConfig:
71
71
  config_path: 'typing.Union[str, Path]' = DEFAULT_CODEX_CONFIG_PATH,
72
72
  profile: 'typing.Union[str, None]' = None,
73
73
  ) -> 'ResponsesProviderConfig':
74
- data = tomllib.loads(Path(config_path).read_text())
74
+ data = tomllib.loads(Path(config_path).read_text(encoding="utf-8"))
75
75
  selected = dict(data)
76
76
  if profile is not None:
77
77
  overrides = data.get("profiles", {}).get(profile)
pycodex/portable.py CHANGED
@@ -123,7 +123,8 @@ def bootstrap_called_home(
123
123
  },
124
124
  ensure_ascii=False,
125
125
  indent=2,
126
- )
126
+ ),
127
+ encoding="utf-8",
127
128
  )
128
129
  return home_dir / DEFAULT_ENTRY_CONFIG
129
130
 
@@ -199,7 +200,7 @@ def _collect_config_referenced_files(root: 'Path') -> 'typing.Set[str]':
199
200
  config_path = root / DEFAULT_ENTRY_CONFIG
200
201
  if not config_path.is_file():
201
202
  return set()
202
- data = tomllib.loads(config_path.read_text())
203
+ data = tomllib.loads(config_path.read_text(encoding="utf-8"))
203
204
  referenced: 'typing.Set[str]' = set()
204
205
  candidates = [data]
205
206
  profiles = data.get("profiles")
@@ -352,7 +353,7 @@ def _load_cached_metadata(metadata_path: 'Path') -> 'typing.Dict[str, object]':
352
353
  if not metadata_path.is_file():
353
354
  return {}
354
355
  try:
355
- payload = json.loads(metadata_path.read_text())
356
+ payload = json.loads(metadata_path.read_text(encoding="utf-8"))
356
357
  except (ValueError, OSError):
357
358
  return {}
358
359
  return payload if isinstance(payload, dict) else {}
@@ -30,7 +30,8 @@ EXEC_TOOLS_SNAPSHOT_PATH = (
30
30
  @lru_cache(maxsize=1)
31
31
  def _load_exec_tool_payloads() -> 'typing.Dict[str, JSONDict]':
32
32
  payloads: 'typing.Dict[str, JSONDict]' = {}
33
- for payload in json.loads(EXEC_TOOLS_SNAPSHOT_PATH.read_text()):
33
+ raw_payloads = EXEC_TOOLS_SNAPSHOT_PATH.read_text(encoding="utf-8")
34
+ for payload in json.loads(raw_payloads):
34
35
  if not isinstance(payload, dict):
35
36
  continue
36
37
  name = payload.get("name")
pycodex/utils/dotenv.py CHANGED
@@ -18,7 +18,9 @@ def load_codex_dotenv(config_path: 'typing.Union[str, Path]') -> 'None':
18
18
  _LOADED_CODEX_DOTENV_HOMES.add(codex_home)
19
19
  return
20
20
 
21
- for key, value in parse_dotenv(dotenv_path.read_text()).items():
21
+ for key, value in parse_dotenv(
22
+ dotenv_path.read_text(encoding="utf-8", errors="replace")
23
+ ).items():
22
24
  if key.upper().startswith(ILLEGAL_ENV_VAR_PREFIX):
23
25
  continue
24
26
  os.environ[key] = value
pycodex/utils/get_env.py CHANGED
@@ -98,7 +98,11 @@ def get_os_info() -> 'typing.Tuple[str, str]':
98
98
  os_release = Path("/etc/os-release")
99
99
  if os_release.is_file():
100
100
  values: 'typing.Dict[str, str]' = {}
101
- for line in os_release.read_text().splitlines():
101
+ os_release_text = os_release.read_text(
102
+ encoding="utf-8",
103
+ errors="replace",
104
+ )
105
+ for line in os_release_text.splitlines():
102
106
  if "=" not in line:
103
107
  continue
104
108
  key, value = line.split("=", 1)
@@ -282,7 +282,9 @@ def _latest_thread_names_by_id(codex_home: 'Path') -> 'typing.Dict[str, str]':
282
282
  return {}
283
283
 
284
284
  names_by_id: 'typing.Dict[str, str]' = {}
285
- for raw_line in reversed(index_path.read_text().splitlines()):
285
+ for raw_line in reversed(
286
+ index_path.read_text(encoding="utf-8", errors="replace").splitlines()
287
+ ):
286
288
  line = raw_line.strip()
287
289
  if not line:
288
290
  continue
@@ -321,7 +323,7 @@ def _extract_first_user_message_preview(rollout_path: 'Path') -> 'typing.Union[s
321
323
 
322
324
 
323
325
  def _iter_rollout_entries(rollout_path: 'Path') -> 'typing.Iterable[typing.Dict[str, object]]':
324
- text = rollout_path.read_text()
326
+ text = rollout_path.read_text(encoding="utf-8", errors="replace")
325
327
  decoder = json.JSONDecoder()
326
328
  index = 0
327
329
  parsed_entries = 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-codex
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: A minimal Python extraction of Codex's main agent loop
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.6.2
@@ -3,10 +3,10 @@ pycodex/agent.py,sha256=s0FrF_XG2pHKryooS461Jr_acmQ_TKTp2JLGQNiny6w,11888
3
3
  pycodex/cli.py,sha256=MDd6aK9L3FkNSmwIiYQs322HPqL4L8kQl9laRn4brLM,31924
4
4
  pycodex/collaboration.py,sha256=yQ6pBD-R3ZWR4_FAYQFoS7KF0m4LLD42otXIbPqw2ys,641
5
5
  pycodex/compat.py,sha256=IO0X7AgcYhlHnYnpvBZ6leCh_UjoQzg5HLT5wYBNNIw,3155
6
- pycodex/context.py,sha256=R5tuMcNrX1F-Lh9ymsSbnfRbKLJ19TWrtQoZ3tWlHvM,24982
6
+ pycodex/context.py,sha256=NuyiDJoUbhj4Xp6xExqs4cATGCKfYQ-YUx2BC7JXGeA,25306
7
7
  pycodex/doctor.py,sha256=De3M4hRBJq8ZeqsUJgHz0vitqrH18YugrEnz7oHhTdQ,10572
8
- pycodex/model.py,sha256=Mk9LZKmFcXG71I18-gs4dUWNn0GIM1rbMhFfKDut_3w,32790
9
- pycodex/portable.py,sha256=kZ5XVOMZq0l6xXsx3FY9C3DfB4Jra5Hw38qTMH0TEwg,15597
8
+ pycodex/model.py,sha256=p4eiRx_PTu8YK36zWQgldMgjW4PXNsdHSr4k8XCYbVQ,32806
9
+ pycodex/portable.py,sha256=gxl2E2h5uZJbasMEPPs-nyALFPIvX79T2ZYsu6vXZrg,15656
10
10
  pycodex/portable_server.py,sha256=6I3pQkWj3e_SFlDXY2mGdCPns1w_3PSxByBV9wv5epI,7331
11
11
  pycodex/protocol.py,sha256=LYDzJefu1tugqQzee4NuZzxhGAv3hXrNcnlw04CudAY,11106
12
12
  pycodex/runtime.py,sha256=gpDDxQKfp1Cqh1U0uslI3rCoXEN7XpJGuHlV-bsveM0,7983
@@ -28,7 +28,7 @@ pycodex/prompts/permissions/sandbox_mode/workspace_write.md,sha256=lVN-LwrBbHqlv
28
28
  pycodex/tools/__init__.py,sha256=aSLXrr_31KGQgDfRow5zVIc-2-KdXlHaCE6qUnE4HWI,1772
29
29
  pycodex/tools/agent_tool_schemas.py,sha256=r7pBICcx8fb0Rg6IzIg8-u3um2z11TogQ4yCzuiO-4o,2033
30
30
  pycodex/tools/apply_patch_tool.py,sha256=aFob-gzaCXlzPdCIvRXVKm1NrQqqhqe8CVkFVAhqiTc,13955
31
- pycodex/tools/base_tool.py,sha256=vlNw1PoQhXXb8evPjj7bqLOuOcw1ttKl-SrXOHJGVvs,6266
31
+ pycodex/tools/base_tool.py,sha256=gClTp7i4KIwb8IGAAfhjqJbvEFKo-8QCe4rPXjgDLUM,6314
32
32
  pycodex/tools/close_agent_tool.py,sha256=nY3l_UOX6NyTgUqdXag3yRpdyQScV0g0Vv4HE3ElLwg,1597
33
33
  pycodex/tools/code_mode_manager.py,sha256=Wow42H_9IomUKUjjjU8rrAFAklhE-UlgxgrbgHRU_4M,19031
34
34
  pycodex/tools/exec_command_tool.py,sha256=l8GWlZKTvlWWAd_OPKsnnt3m0woMWXK8NkilmspnaQQ,3485
@@ -54,25 +54,26 @@ pycodex/tools/write_stdin_tool.py,sha256=nCuProkbeewfQ_yS8CgBajo--K3EmkXzJYh1D2Q
54
54
  pycodex/utils/__init__.py,sha256=jE63cZR1IBzs4ED86lwdYRLqV5FmPhNNDzIgC90mr6A,1216
55
55
  pycodex/utils/compactor.py,sha256=ZCzGc02xHmXq1rIjnG2gATKcFtt6r-OGsCIK0ypjnyI,6467
56
56
  pycodex/utils/debug.py,sha256=JeEB5JfzYfbdG0fXlrWFmXyR1ts86fKsI_97IqgF6R0,296
57
- pycodex/utils/dotenv.py,sha256=EDBXdn93ewmq9zhJki5_LsJJXe0wMIQJ6VfCE1r7voQ,1818
58
- pycodex/utils/get_env.py,sha256=jR8G0Xco57jX-71E1oHIcl3-Kz9Ltc0kzxj04DKzt80,7316
57
+ pycodex/utils/dotenv.py,sha256=rGKmurHjm7GdP4giyjHBPpSPv2Oi45qBqDB6HG3CnfA,1866
58
+ pycodex/utils/get_env.py,sha256=5fNhcNhujOakWV6AS66rGW3jEA68WGpuE4YVXJZFE6U,7427
59
59
  pycodex/utils/random_ids.py,sha256=zBphjVGc7OXk9ZNExAbxRi_bk7ipyLG491qTv7hi8jM,380
60
- pycodex/utils/session_persist.py,sha256=dUvo3Z1QBB4HJT1tLerDlLD3ZB25umB6FP6JORg9V40,16414
60
+ pycodex/utils/session_persist.py,sha256=Ntu0jcb2cEZbXpKDe0LXD-OuxfjK0SzBV0lRi90-NAM,16496
61
61
  pycodex/utils/visualize.py,sha256=JURzq2AbV046bblE5fojcAe885Juda0LDxt_gqT2PUc,41006
62
62
  responses_server/__init__.py,sha256=3yPv_zeGT7P11tTnmj5kXktISLNsNW-02MUnnbiZcb0,394
63
63
  responses_server/__main__.py,sha256=9SRp-Yw7ShGxc6DhSIXcDLKgGEdAVm3oBZ59rBOPjT0,62
64
- responses_server/app.py,sha256=4SUG8xqKqmVKVY9i1f5WF9QrnmxGbD4mwiI6s13zQDE,7742
64
+ responses_server/app.py,sha256=53SEwI2pUj_YL8zWvvYhQHiT1EUVVPzdRzOqXDvFMJ0,7770
65
65
  responses_server/config.py,sha256=leb3_uPrCyYdUIkyRyVPX4luGF88dQ62OkhRLPe7uxw,2718
66
- responses_server/messages_api.py,sha256=3GPMfs3ksQkhezLyWBjeW5zJ1e_MeHXVaq1lALIA7Mk,16815
67
- responses_server/payload_processors.py,sha256=gfOXqvVwlhCk-yjaDdGU4RKcpDdxIq2y6CmqUCggIjY,3444
68
- responses_server/server.py,sha256=isyzN-p-Ir8LLycN_dQfcanvie2ZqqSu52mOPz_wYD4,2095
66
+ responses_server/messages_api.py,sha256=WgO6J1jz2pOJkI79rLXp-pS1yxtLARcwX8T6JX5Vkcc,16971
67
+ responses_server/payload_processors.py,sha256=cbXGW8Xi-mliaWRg0_Af41X0vXV2W6R9VBzTE6DXfe4,3483
68
+ responses_server/server.py,sha256=Ko-Cqz_kW-uve091itucMklsPhEei77v-YcTjtjEdqU,2286
69
69
  responses_server/session_store.py,sha256=ZD3cH2aEOkWaQsu5qTzcal2mThTSFQPAhAhPUN9srgI,1115
70
- responses_server/stream_router.py,sha256=OVwaDEsUaKVDNGF2vnqNZTo3WA9h3D3uzHeYY-QN9IU,34754
70
+ responses_server/stream_router.py,sha256=PuW8_fo8c_R9kd_Gy_Z7nyij7-xW_lPcfFcnYI2PvQA,35434
71
+ responses_server/trajectory_dump.py,sha256=XCwYaZZmlAxSsSXOfhk3zRvyfDpOHX5R8KzspScNFUM,3435
71
72
  responses_server/tools/__init__.py,sha256=ivsBSEy0SBUhY-Uea5v1XMLXShkwHdCVl0id-1FwdZg,150
72
73
  responses_server/tools/custom_adapter.py,sha256=LxO7ldydvR-GWachDz8GKC0Q8KGGFoFPbZxM0QvxuZ0,8350
73
74
  responses_server/tools/web_search.py,sha256=pm4ZUiHUfxc0bGY1kEvt-BCzDrZIyP24xzPUcga2ul0,8908
74
- python_codex-0.1.8.dist-info/METADATA,sha256=LAgQh2YnOizB3dQExftZEmRaIMNE_rL6L7rNTxLpzfA,15719
75
- python_codex-0.1.8.dist-info/WHEEL,sha256=KGYbc1zXlYddvwxnNty23BeaKzh7YuoSIvIMO4jEhvw,87
76
- python_codex-0.1.8.dist-info/entry_points.txt,sha256=sNUVakoVuTrzJH505ZgRTQxmtRRPUHV_EH0i6EbYTyM,45
77
- python_codex-0.1.8.dist-info/licenses/LICENSE,sha256=0X8ifk312hYAORM4hlzg8wVSEXYKNmiPgWlB1YIy2Nw,10926
78
- python_codex-0.1.8.dist-info/RECORD,,
75
+ python_codex-0.1.10.dist-info/METADATA,sha256=K4ZxbwNtBlhsCjm0z3Ro15K1eA97rplNd8k2qGUJt7U,15720
76
+ python_codex-0.1.10.dist-info/WHEEL,sha256=KGYbc1zXlYddvwxnNty23BeaKzh7YuoSIvIMO4jEhvw,87
77
+ python_codex-0.1.10.dist-info/entry_points.txt,sha256=sNUVakoVuTrzJH505ZgRTQxmtRRPUHV_EH0i6EbYTyM,45
78
+ python_codex-0.1.10.dist-info/licenses/LICENSE,sha256=0X8ifk312hYAORM4hlzg8wVSEXYKNmiPgWlB1YIy2Nw,10926
79
+ python_codex-0.1.10.dist-info/RECORD,,
responses_server/app.py CHANGED
@@ -177,6 +177,7 @@ class ManagedResponseServer:
177
177
  self._app,
178
178
  host=self._config.host,
179
179
  port=self._config.port,
180
+ loop="asyncio",
180
181
  log_level="error",
181
182
  access_log=False,
182
183
  )
@@ -66,6 +66,8 @@ def build_messages_request(
66
66
  "max_tokens": _resolve_max_tokens(outcomming_request),
67
67
  "stream": bool(outcomming_request.get("stream", True)),
68
68
  }
69
+ if isinstance(outcomming_request.get("return_token_ids"), bool):
70
+ payload["return_token_ids"] = bool(outcomming_request.get("return_token_ids"))
69
71
  if system_blocks:
70
72
  payload["system"] = system_blocks
71
73
 
@@ -32,6 +32,7 @@ class OutgoingRequest(TypedDict):
32
32
  tools: 'Optional[typing.List[typing.Dict[str, object]]]'
33
33
  tool_choice: 'Optional[object]'
34
34
  parallel_tool_calls: 'Optional[bool]'
35
+ return_token_ids: 'Optional[bool]'
35
36
 
36
37
 
37
38
  PayloadPostProcessor = Callable[[OutgoingRequest], OutgoingRequest]
@@ -3,6 +3,7 @@ from .config import CompatServerConfig
3
3
  from .payload_processors import post_process_outcomming_request
4
4
  from .session_store import SessionStore
5
5
  from .stream_router import StreamRouter
6
+ from .trajectory_dump import TrajectoryDumpWriter
6
7
  import typing
7
8
 
8
9
 
@@ -16,6 +17,7 @@ class ResponseServer:
16
17
  self._config = config
17
18
  self._session_store = session_store or SessionStore()
18
19
  self._stream_router = stream_router or StreamRouter(config)
20
+ self._trajectory_dump = TrajectoryDumpWriter.from_env()
19
21
 
20
22
  @property
21
23
  def config(self) -> 'CompatServerConfig':
@@ -38,6 +40,9 @@ class ResponseServer:
38
40
  request_headers: 'typing.Dict[str, str]',
39
41
  ):
40
42
  outcomming_request = self._stream_router.build_outcomming_request(request_body)
43
+ if self._trajectory_dump is not None:
44
+ # vLLM surfaces prompt/decode token IDs only when this flag is set.
45
+ outcomming_request["return_token_ids"] = True
41
46
  outcomming_request = post_process_outcomming_request(
42
47
  outcomming_request,
43
48
  self._config.model_provider,
@@ -52,12 +57,9 @@ class ResponseServer:
52
57
  session_id=session_id,
53
58
  model=str(outcomming_request["model"]),
54
59
  )
55
- incomming_stream = self._stream_router.open_outcomming_stream(
56
- outcomming_request
57
- )
58
60
  return self._stream_router.route_stream(
59
- incomming_stream,
60
61
  stored_response,
61
62
  outcomming_request,
62
63
  custom_tool_names,
64
+ self._trajectory_dump,
63
65
  )
@@ -27,6 +27,7 @@ from .tools.web_search import (
27
27
  hydrate_tool_call_names,
28
28
  partition_tool_calls,
29
29
  )
30
+ from .trajectory_dump import TrajectoryDumpWriter
30
31
  import typing
31
32
 
32
33
 
@@ -285,10 +286,10 @@ class StreamRouter:
285
286
 
286
287
  def route_stream(
287
288
  self,
288
- incomming_stream,
289
289
  stored_response: 'StoredResponse',
290
290
  outcomming_request: 'typing.Dict[str, object]',
291
291
  custom_tool_names: 'typing.Union[typing.Set[str], None]' = None,
292
+ trajectory_dump: 'typing.Union[TrajectoryDumpWriter, None]' = None,
292
293
  ):
293
294
  yield (
294
295
  "response.created",
@@ -307,7 +308,10 @@ class StreamRouter:
307
308
  reasoning_parts: 'typing.List[str]' = []
308
309
  latest_usage: 'typing.Dict[str, object]' = {}
309
310
  current_request = json.loads(json.dumps(outcomming_request))
310
- current_stream = incomming_stream
311
+ current_stream = self._open_tracked_outcomming_stream(
312
+ current_request,
313
+ trajectory_dump,
314
+ )
311
315
 
312
316
  while True:
313
317
  tool_calls: 'typing.Dict[int, typing.Dict[str, object]]' = {}
@@ -352,7 +356,10 @@ class StreamRouter:
352
356
  )
353
357
  except ValueError as exc:
354
358
  raise OutcommingChatError(str(exc)) from exc
355
- current_stream = self.open_outcomming_stream(current_request)
359
+ current_stream = self._open_tracked_outcomming_stream(
360
+ current_request,
361
+ trajectory_dump,
362
+ )
356
363
  continue
357
364
 
358
365
  for item in self._build_output_items(
@@ -394,6 +401,16 @@ class StreamRouter:
394
401
  },
395
402
  )
396
403
 
404
+ def _open_tracked_outcomming_stream(
405
+ self,
406
+ outcomming_request: 'typing.Dict[str, object]',
407
+ trajectory_dump: 'typing.Union[TrajectoryDumpWriter, None]' = None,
408
+ ):
409
+ outcomming_stream = self.open_outcomming_stream(outcomming_request)
410
+ if trajectory_dump is None:
411
+ return outcomming_stream
412
+ return trajectory_dump.wrap_stream(outcomming_stream)
413
+
397
414
  def _responses_input_to_chat_messages(
398
415
  self,
399
416
  instructions: 'str',
@@ -0,0 +1,105 @@
1
+ import json
2
+ import os
3
+ import sys
4
+ import threading
5
+ import time
6
+ import typing
7
+
8
+
9
+ class TrajectoryDumpWriter:
10
+ ENV_VAR = "PYCODEX_DUMP"
11
+
12
+ def __init__(self, root_dir: 'str') -> 'None':
13
+ self._root_dir = os.path.abspath(root_dir)
14
+ self._dump_path = os.path.join(self._root_dir, "dump.jsonl")
15
+ self._lock = threading.Lock()
16
+ os.makedirs(self._root_dir, exist_ok=True)
17
+
18
+ @classmethod
19
+ def from_env(cls) -> 'typing.Union[TrajectoryDumpWriter, None]':
20
+ root_dir = str(os.environ.get(cls.ENV_VAR, "") or "").strip()
21
+ if not root_dir:
22
+ return None
23
+ return cls(root_dir)
24
+
25
+ def wrap_stream(self, outcomming_stream):
26
+ def iter_stream():
27
+ capture = _TrajectoryCapture(self, time.time())
28
+ try:
29
+ for chunk in outcomming_stream:
30
+ capture.observe_chunk(chunk)
31
+ yield chunk
32
+ finally:
33
+ capture.flush()
34
+
35
+ return iter_stream()
36
+
37
+ def _append_record(self, record: 'typing.Dict[str, object]') -> 'None':
38
+ serialized = json.dumps(record, ensure_ascii=False)
39
+ with self._lock:
40
+ os.makedirs(self._root_dir, exist_ok=True)
41
+ with open(self._dump_path, "a", encoding="utf-8") as handle:
42
+ handle.write(serialized)
43
+ handle.write("\n")
44
+
45
+
46
+ class _TrajectoryCapture:
47
+ def __init__(
48
+ self,
49
+ writer: 'TrajectoryDumpWriter',
50
+ send_timestamp: 'float',
51
+ ) -> 'None':
52
+ self._writer = writer
53
+ self._send_timestamp = float(send_timestamp)
54
+ self._prefill_token_ids = None
55
+ self._decode_token_ids = []
56
+ self._closed = False
57
+
58
+ def observe_chunk(self, payload: 'object') -> 'None':
59
+ if not isinstance(payload, dict):
60
+ return
61
+ if self._prefill_token_ids is None and "prompt_token_ids" in payload:
62
+ normalized_prefill = _normalize_token_ids(payload.get("prompt_token_ids"))
63
+ if normalized_prefill is not None:
64
+ self._prefill_token_ids = normalized_prefill
65
+
66
+ choices = payload.get("choices") or []
67
+ if not isinstance(choices, list):
68
+ return
69
+ for raw_choice in choices:
70
+ if not isinstance(raw_choice, dict):
71
+ continue
72
+ normalized_decode = _normalize_token_ids(raw_choice.get("token_ids"))
73
+ if normalized_decode:
74
+ self._decode_token_ids.extend(normalized_decode)
75
+
76
+ def flush(self) -> 'None':
77
+ if self._closed:
78
+ return
79
+ self._closed = True
80
+ record = {
81
+ "tokens": {
82
+ "prefill": list(self._prefill_token_ids or []),
83
+ "decode": list(self._decode_token_ids),
84
+ },
85
+ "send_timestamp": self._send_timestamp,
86
+ }
87
+ try:
88
+ self._writer._append_record(record)
89
+ except Exception as exc:
90
+ print(
91
+ "responses_server: failed to append PYCODEX_DUMP trajectory: %s"
92
+ % exc,
93
+ file=sys.stderr,
94
+ )
95
+
96
+
97
+ def _normalize_token_ids(raw_value: 'object') -> 'typing.Union[typing.List[int], None]':
98
+ if not isinstance(raw_value, list):
99
+ return None
100
+ token_ids = []
101
+ for value in raw_value:
102
+ if isinstance(value, bool) or not isinstance(value, int):
103
+ continue
104
+ token_ids.append(value)
105
+ return token_ids