python-codex 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pycodex/cli.py CHANGED
@@ -42,7 +42,6 @@ CliSessionMode = Literal["exec", "tui"]
42
42
  LOCAL_RESPONSES_SERVER_API_KEY_ENV = "PYCODEX_LOCAL_RESPONSES_SERVER_KEY"
43
43
  CLI_ORIGINATOR = "codex-tui"
44
44
 
45
-
46
45
  def launch_chat_completion_compat_server(*args, **kwargs):
47
46
  from responses_server import (
48
47
  launch_chat_completion_compat_server as launch_compat_server,
@@ -123,6 +122,15 @@ def build_parser() -> 'argparse.ArgumentParser':
123
122
  "When set, pycodex starts a local responses compat server for this session."
124
123
  ),
125
124
  )
125
+ parser.add_argument(
126
+ "--use-messages",
127
+ default=False,
128
+ action="store_true",
129
+ help=(
130
+ "When set, pycodex starts a local responses compat server and routes "
131
+ "to a downstream /v1/messages backend for this session."
132
+ ),
133
+ )
126
134
  parser.add_argument(
127
135
  "--system-prompt",
128
136
  default=None,
@@ -373,12 +381,17 @@ def _build_model_client(
373
381
  managed_responses_base_url: 'typing.Union[str, None]' = None,
374
382
  vllm_endpoint: 'typing.Union[str, None]' = None,
375
383
  use_chat_completion: 'bool' = False,
384
+ use_messages: 'bool' = False,
376
385
  ):
377
386
  load_codex_dotenv(config_path)
378
387
  provider_config = ResponsesProviderConfig.from_codex_config(
379
388
  config_path,
380
389
  profile,
381
390
  )
391
+ if use_chat_completion and use_messages:
392
+ raise ValueError("--use-chat-completion and --use-messages cannot be combined")
393
+ if vllm_endpoint and use_messages:
394
+ raise ValueError("--vllm-endpoint and --use-messages cannot be combined")
382
395
  url, key_env = provider_config.base_url, provider_config.api_key_env
383
396
  if managed_responses_base_url is not None:
384
397
  url, key_env = (
@@ -386,7 +399,7 @@ def _build_model_client(
386
399
  LOCAL_RESPONSES_SERVER_API_KEY_ENV,
387
400
  )
388
401
  os.environ.setdefault(LOCAL_RESPONSES_SERVER_API_KEY_ENV, "dummy")
389
- elif vllm_endpoint or use_chat_completion:
402
+ elif vllm_endpoint or use_chat_completion or use_messages:
390
403
  if vllm_endpoint:
391
404
  managed_server = launch_chat_completion_compat_server(
392
405
  vllm_endpoint,
@@ -397,6 +410,9 @@ def _build_model_client(
397
410
  provider_config.base_url,
398
411
  provider_config.api_key_env,
399
412
  model_provider=provider_config.provider_name,
413
+ outcomming_api=(
414
+ "messages" if use_messages else "chat_completions"
415
+ ),
400
416
  )
401
417
  atexit.register(managed_server.stop)
402
418
  url, key_env = (
@@ -755,6 +771,7 @@ async def run_cli(args: 'argparse.Namespace') -> 'int':
755
771
  args.timeout_seconds,
756
772
  vllm_endpoint=args.vllm_endpoint,
757
773
  use_chat_completion=args.use_chat_completion,
774
+ use_messages=args.use_messages,
758
775
  )
759
776
 
760
777
  runtime = build_runtime(
@@ -83,7 +83,7 @@ def format_cli_plan_messages(
83
83
 
84
84
  def build_cli_spinner_frame(index: 'int', label: 'str') -> 'str':
85
85
  suffix = f" {label}" if label else ""
86
- return f"{suffix} {SPINNER_FRAMES[index % len(SPINNER_FRAMES)]}"
86
+ return f"{SPINNER_FRAMES[index % len(SPINNER_FRAMES)]}{suffix}"
87
87
 
88
88
 
89
89
  def percent_of_context_window_remaining(
@@ -158,13 +158,29 @@ class Spinner:
158
158
  self._paused = False
159
159
 
160
160
  def clear(self) -> 'None':
161
- if not self._enabled or not self._visible:
162
- return
163
161
  with self._terminal_lock:
162
+ if not self._visible:
163
+ return
164
164
  self._raw_write("\r\x1b[2K")
165
165
  self._raw_flush()
166
166
  self._visible = False
167
167
 
168
+ def render_now(self) -> 'None':
169
+ if not self._turn_active or self._paused:
170
+ return
171
+ frame = colorize_cli_message(
172
+ build_cli_spinner_frame(self._index, self._label),
173
+ "status",
174
+ self._color_enabled,
175
+ )
176
+ self._index += 1
177
+ with self._terminal_lock:
178
+ if not self._turn_active or self._paused:
179
+ return
180
+ self._raw_write(f"\r\x1b[2K{frame}")
181
+ self._raw_flush()
182
+ self._visible = True
183
+
168
184
  def close(self) -> 'None':
169
185
  self.finish_turn()
170
186
  if self._thread is not None:
@@ -726,6 +742,7 @@ class CliSessionView:
726
742
  else:
727
743
  self._spinner.resume()
728
744
  self._spinner.set_label("running provider tools")
745
+ self._spinner.render_now()
729
746
  return
730
747
 
731
748
  if event.kind == "tool_started":
@@ -740,20 +757,16 @@ class CliSessionView:
740
757
  else:
741
758
  self._spinner.resume()
742
759
  if tool_name and args is not None:
743
- self._spinner.set_label(f"running {tool_name}({args})")
760
+ self._spinner.set_label(shorten_title(f"running {tool_name}({args})", limit=72))
744
761
  elif tool_name:
745
762
  self._spinner.set_label(f"running {tool_name}")
746
763
  else:
747
764
  self._spinner.set_label("running provider tools")
765
+ self._spinner.render_now()
748
766
  return
749
767
 
750
768
  if event.kind == "tool_completed":
751
769
  self._finish_stream()
752
- if self._input_active:
753
- self._spinner.pause()
754
- else:
755
- self._spinner.resume()
756
- self._spinner.set_label("thinking")
757
770
  tool_name, summary, is_error = extract_tool_event_display(event.payload)
758
771
  summary = self._rewrite_agent_summary(tool_name, summary)
759
772
  if tool_name == "update_plan" and not is_error:
@@ -762,6 +775,12 @@ class CliSessionView:
762
775
  self._print_line(
763
776
  colorize_cli_message(line, "plan", self._color_enabled)
764
777
  )
778
+ if self._input_active:
779
+ self._spinner.pause()
780
+ else:
781
+ self._spinner.resume()
782
+ self._spinner.set_label("thinking")
783
+ self._spinner.render_now()
765
784
  return
766
785
  message = format_cli_tool_message(
767
786
  tool_name,
@@ -770,6 +789,12 @@ class CliSessionView:
770
789
  )
771
790
  self._remember_agent_name(tool_name, summary)
772
791
  self._print_line(self._colorize_formatted_tool_message(message))
792
+ if self._input_active:
793
+ self._spinner.pause()
794
+ else:
795
+ self._spinner.resume()
796
+ self._spinner.set_label("thinking")
797
+ self._spinner.render_now()
773
798
  return
774
799
 
775
800
  if event.kind == "turn_completed":
@@ -830,6 +855,8 @@ class CliSessionView:
830
855
 
831
856
  def resume_spinner(self) -> 'None':
832
857
  self._spinner.resume()
858
+ if not self._input_active:
859
+ self._spinner.render_now()
833
860
 
834
861
  def set_input_active(self, active: 'bool', resume_spinner: 'bool' = True) -> 'None':
835
862
  self._input_active = active
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-codex
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: A minimal Python extraction of Codex's main agent loop
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.6.2
@@ -159,6 +159,7 @@ pycodex "Summarize this repo in one sentence."
159
159
  printf 'Reply with exactly OK.' | pycodex
160
160
  pycodex --json "Reply with exactly OK."
161
161
  pycodex --profile model_proxy "Reply with exactly OK."
162
+ pycodex --profile opus --use-messages "Reply with exactly OK."
162
163
  pycodex --vllm-endpoint http://127.0.0.1:18000 "Reply with exactly OK."
163
164
  pycodex --put @127.0.0.1:5577
164
165
  pycodex --put /data/.codex/@127.0.0.1:5577
@@ -211,6 +212,9 @@ Current behavior:
211
212
  historical `reasoning` items are replayed into downstream assistant messages
212
213
  via the `reasoning` field. Streaming token usage is also requested from vLLM
213
214
  and forwarded to the final `response.completed.response.usage`
215
+ - standalone `responses_server` now also supports downstream `/v1/messages`
216
+ backends via `--outcomming-api messages`, while keeping the internal
217
+ canonical request/route logic in chat-completions shape
214
218
  - `pycodex doctor` checks config, `.env`, API keys, DNS, TCP/TLS, and an
215
219
  optional live Responses API request
216
220
 
@@ -1,6 +1,6 @@
1
1
  pycodex/__init__.py,sha256=jCnC_Bgotlxa4GwO3Re2sChKGY49TRM-uVZEQ9uBpfw,3106
2
2
  pycodex/agent.py,sha256=s0FrF_XG2pHKryooS461Jr_acmQ_TKTp2JLGQNiny6w,11888
3
- pycodex/cli.py,sha256=ntgC0LWlSOhuYAUOBgSEeVIjBTKS91klyvkTO9QtFoE,29559
3
+ pycodex/cli.py,sha256=FG0klhVuJiS7797GQZpj6gk1-jChFwgFPEhPXA2vXM8,30316
4
4
  pycodex/collaboration.py,sha256=yQ6pBD-R3ZWR4_FAYQFoS7KF0m4LLD42otXIbPqw2ys,641
5
5
  pycodex/compat.py,sha256=IO0X7AgcYhlHnYnpvBZ6leCh_UjoQzg5HLT5wYBNNIw,3155
6
6
  pycodex/context.py,sha256=R5tuMcNrX1F-Lh9ymsSbnfRbKLJ19TWrtQoZ3tWlHvM,24982
@@ -57,20 +57,21 @@ pycodex/utils/dotenv.py,sha256=EDBXdn93ewmq9zhJki5_LsJJXe0wMIQJ6VfCE1r7voQ,1818
57
57
  pycodex/utils/get_env.py,sha256=jR8G0Xco57jX-71E1oHIcl3-Kz9Ltc0kzxj04DKzt80,7316
58
58
  pycodex/utils/random_ids.py,sha256=zBphjVGc7OXk9ZNExAbxRi_bk7ipyLG491qTv7hi8jM,380
59
59
  pycodex/utils/session_persist.py,sha256=dUvo3Z1QBB4HJT1tLerDlLD3ZB25umB6FP6JORg9V40,16414
60
- pycodex/utils/visualize.py,sha256=9S3oOUAnI_SbVvoFJ18dzq8MLE5v6kAsNiYsMTtqKAc,40022
60
+ pycodex/utils/visualize.py,sha256=JURzq2AbV046bblE5fojcAe885Juda0LDxt_gqT2PUc,41006
61
61
  responses_server/__init__.py,sha256=3yPv_zeGT7P11tTnmj5kXktISLNsNW-02MUnnbiZcb0,394
62
62
  responses_server/__main__.py,sha256=9SRp-Yw7ShGxc6DhSIXcDLKgGEdAVm3oBZ59rBOPjT0,62
63
- responses_server/app.py,sha256=AtysZYL6ViheHYISS8eCK_iyr7CwUfF3wrt86ekh79U,7371
64
- responses_server/config.py,sha256=wEcZbXZclTYz4fI_oy_sSMglWPeEITWlFeglQrrr6HE,2236
65
- responses_server/payload_processors.py,sha256=AcOipqVQyo4wKw_pb3ABlarwIK1VjcnQTlgPehRVGO8,3412
63
+ responses_server/app.py,sha256=4SUG8xqKqmVKVY9i1f5WF9QrnmxGbD4mwiI6s13zQDE,7742
64
+ responses_server/config.py,sha256=leb3_uPrCyYdUIkyRyVPX4luGF88dQ62OkhRLPe7uxw,2718
65
+ responses_server/messages_api.py,sha256=3GPMfs3ksQkhezLyWBjeW5zJ1e_MeHXVaq1lALIA7Mk,16815
66
+ responses_server/payload_processors.py,sha256=gfOXqvVwlhCk-yjaDdGU4RKcpDdxIq2y6CmqUCggIjY,3444
66
67
  responses_server/server.py,sha256=isyzN-p-Ir8LLycN_dQfcanvie2ZqqSu52mOPz_wYD4,2095
67
68
  responses_server/session_store.py,sha256=ZD3cH2aEOkWaQsu5qTzcal2mThTSFQPAhAhPUN9srgI,1115
68
- responses_server/stream_router.py,sha256=zWC4yyZ3I8E-Zgco844tIhRMWOwIkjOV0s-G-a9-B8k,30861
69
+ responses_server/stream_router.py,sha256=OVwaDEsUaKVDNGF2vnqNZTo3WA9h3D3uzHeYY-QN9IU,34754
69
70
  responses_server/tools/__init__.py,sha256=ivsBSEy0SBUhY-Uea5v1XMLXShkwHdCVl0id-1FwdZg,150
70
71
  responses_server/tools/custom_adapter.py,sha256=LxO7ldydvR-GWachDz8GKC0Q8KGGFoFPbZxM0QvxuZ0,8350
71
72
  responses_server/tools/web_search.py,sha256=pm4ZUiHUfxc0bGY1kEvt-BCzDrZIyP24xzPUcga2ul0,8908
72
- python_codex-0.1.4.dist-info/METADATA,sha256=fSNjm5GPh613W0ZFzU3UJFatqKUUs0xWYW17aOY4eLg,15451
73
- python_codex-0.1.4.dist-info/WHEEL,sha256=KGYbc1zXlYddvwxnNty23BeaKzh7YuoSIvIMO4jEhvw,87
74
- python_codex-0.1.4.dist-info/entry_points.txt,sha256=sNUVakoVuTrzJH505ZgRTQxmtRRPUHV_EH0i6EbYTyM,45
75
- python_codex-0.1.4.dist-info/licenses/LICENSE,sha256=0X8ifk312hYAORM4hlzg8wVSEXYKNmiPgWlB1YIy2Nw,10926
76
- python_codex-0.1.4.dist-info/RECORD,,
73
+ python_codex-0.1.6.dist-info/METADATA,sha256=SYSPoh0NP5qUQKfAu2ncG1_DXQZrWKSPcRGpnqmTVQk,15719
74
+ python_codex-0.1.6.dist-info/WHEEL,sha256=KGYbc1zXlYddvwxnNty23BeaKzh7YuoSIvIMO4jEhvw,87
75
+ python_codex-0.1.6.dist-info/entry_points.txt,sha256=sNUVakoVuTrzJH505ZgRTQxmtRRPUHV_EH0i6EbYTyM,45
76
+ python_codex-0.1.6.dist-info/licenses/LICENSE,sha256=0X8ifk312hYAORM4hlzg8wVSEXYKNmiPgWlB1YIy2Nw,10926
77
+ python_codex-0.1.6.dist-info/RECORD,,
responses_server/app.py CHANGED
@@ -36,13 +36,14 @@ def _stream_events(response_server: 'ResponseServer', request_body: 'typing.Dict
36
36
  except OutcommingChatError as exc:
37
37
 
38
38
  import traceback
39
+ exc_info = traceback.format_exception(type(exc), exc, exc.__traceback__)
39
40
  yield _format_sse_event(
40
41
  "response.failed",
41
42
  {
42
43
  "type": "response.failed",
43
44
  "response": {
44
45
  "error": {
45
- "message": '\n'.join(traceback.format_exception(exc)),
46
+ "message": '\n'.join(exc_info),
46
47
  }
47
48
  },
48
49
  },
@@ -54,12 +55,18 @@ def build_parser() -> 'argparse.ArgumentParser':
54
55
  prog="python -m responses_server",
55
56
  description=(
56
57
  "Standalone localhost `/v1/responses` server that translates the "
57
- "Codex/Responses subset onto an outcomming `/v1/chat/completions` backend."
58
+ "Codex/Responses subset onto an outcomming `/v1/chat/completions` "
59
+ "or `/v1/messages` backend."
58
60
  ),
59
61
  )
60
62
  parser.add_argument("--host", default="127.0.0.1")
61
63
  parser.add_argument("--port", type=int, default=8001)
62
64
  parser.add_argument("--outcomming-base-url", required=True)
65
+ parser.add_argument(
66
+ "--outcomming-api",
67
+ default="chat_completions",
68
+ choices=["chat_completions", "messages"],
69
+ )
63
70
  parser.add_argument("--outcomming-api-key-env", default=None)
64
71
  parser.add_argument("--model-provider", default=None)
65
72
  parser.add_argument("--timeout-seconds", type=float, default=120.0)
@@ -79,10 +86,12 @@ def launch_chat_completion_compat_server(
79
86
  base_url: 'str',
80
87
  api_key_env: 'typing.Union[str, None]' = None,
81
88
  model_provider: 'typing.Union[str, None]' = None,
89
+ outcomming_api: 'str' = "chat_completions",
82
90
  ):
83
91
  config = CompatServerConfig.from_base_url(
84
92
  base_url,
85
93
  api_key_env,
94
+ outcomming_api=outcomming_api,
86
95
  model_provider=model_provider,
87
96
  )
88
97
  server = ManagedResponseServer(config)
@@ -208,6 +217,7 @@ def main() -> 'None':
208
217
  host=args.host,
209
218
  port=args.port,
210
219
  outcomming_base_url=args.outcomming_base_url,
220
+ outcomming_api=args.outcomming_api,
211
221
  outcomming_api_key_env=args.outcomming_api_key_env,
212
222
  model_provider=args.model_provider,
213
223
  timeout_seconds=args.timeout_seconds,
@@ -10,6 +10,7 @@ class CompatServerConfig:
10
10
  host: 'str' = "127.0.0.1"
11
11
  port: 'int' = 0
12
12
  outcomming_base_url: 'str' = "http://127.0.0.1:8000/v1"
13
+ outcomming_api: 'str' = "chat_completions"
13
14
  outcomming_api_key_env: 'typing.Union[str, None]' = None
14
15
  model_provider: 'typing.Union[str, None]' = None
15
16
  timeout_seconds: 'float' = 120.0
@@ -24,15 +25,24 @@ class CompatServerConfig:
24
25
  base = self.outcomming_base_url.rstrip("/")
25
26
  return f"{base}/chat/completions"
26
27
 
28
+ def outcomming_messages_url(self) -> 'str':
29
+ base = self.outcomming_base_url.rstrip("/")
30
+ return f"{base}/messages"
31
+
27
32
  def outcomming_models_url(self) -> 'str':
28
33
  base = self.outcomming_base_url.rstrip("/")
29
34
  return f"{base}/models"
30
35
 
36
+ def normalized_outcomming_api(self) -> 'str':
37
+ value = str(self.outcomming_api or "").strip().lower()
38
+ return value or "chat_completions"
39
+
31
40
  def with_ephemeral_port(self) -> 'CompatServerConfig':
32
41
  return CompatServerConfig(
33
42
  host=self.host,
34
43
  port=0,
35
44
  outcomming_base_url=self.outcomming_base_url,
45
+ outcomming_api=self.outcomming_api,
36
46
  outcomming_api_key_env=self.outcomming_api_key_env,
37
47
  model_provider=self.model_provider,
38
48
  timeout_seconds=self.timeout_seconds,
@@ -44,6 +54,7 @@ class CompatServerConfig:
44
54
  outcomming_base_url: 'str',
45
55
  api_key_env: 'typing.Union[str, None]' = None,
46
56
  model_provider: 'typing.Union[str, None]' = None,
57
+ outcomming_api: 'str' = "chat_completions",
47
58
  ) -> 'CompatServerConfig':
48
59
  parsed = urllib.parse.urlparse(outcomming_base_url)
49
60
  if not parsed.scheme or not parsed.netloc:
@@ -58,6 +69,7 @@ class CompatServerConfig:
58
69
  )
59
70
  return cls(
60
71
  outcomming_base_url=outcomming_base_url,
72
+ outcomming_api=outcomming_api,
61
73
  outcomming_api_key_env=api_key_env,
62
74
  model_provider=model_provider,
63
75
  )
@@ -0,0 +1,479 @@
1
+ import json
2
+ import typing
3
+
4
+ DEFAULT_MESSAGES_MAX_TOKENS = 32000
5
+
6
+
7
+ class MessagesAPIAdapterError(ValueError):
8
+ pass
9
+
10
+
11
+ def build_messages_request(
12
+ outcomming_request: 'typing.Dict[str, object]',
13
+ ) -> 'typing.Dict[str, object]':
14
+ model = str(outcomming_request.get("model", "")).strip()
15
+ if not model:
16
+ raise MessagesAPIAdapterError("outcomming request is missing `model`")
17
+
18
+ raw_messages = outcomming_request.get("messages") or []
19
+ if not isinstance(raw_messages, list):
20
+ raise MessagesAPIAdapterError("outcomming request `messages` must be a list")
21
+
22
+ system_blocks: 'typing.List[typing.Dict[str, object]]' = []
23
+ messages: 'typing.List[typing.Dict[str, object]]' = []
24
+ for raw_message in raw_messages:
25
+ if not isinstance(raw_message, dict):
26
+ raise MessagesAPIAdapterError(
27
+ "outcomming request messages must be objects"
28
+ )
29
+ role = str(raw_message.get("role", "")).strip()
30
+ if role in {"developer", "system"}:
31
+ text = str(raw_message.get("content", "") or "")
32
+ if text:
33
+ system_blocks.append({"type": "text", "text": text})
34
+ continue
35
+ if role == "user":
36
+ messages.append(
37
+ {
38
+ "role": "user",
39
+ "content": _build_text_blocks(raw_message.get("content")),
40
+ }
41
+ )
42
+ continue
43
+ if role == "assistant":
44
+ messages.append(
45
+ {
46
+ "role": "assistant",
47
+ "content": _build_assistant_blocks(raw_message),
48
+ }
49
+ )
50
+ continue
51
+ if role == "tool":
52
+ messages.append(
53
+ {
54
+ "role": "user",
55
+ "content": [_build_tool_result_block(raw_message)],
56
+ }
57
+ )
58
+ continue
59
+ raise MessagesAPIAdapterError(
60
+ f"unsupported outcomming message role for messages API: {role!r}"
61
+ )
62
+
63
+ payload: 'typing.Dict[str, object]' = {
64
+ "model": model,
65
+ "messages": messages,
66
+ "max_tokens": _resolve_max_tokens(outcomming_request),
67
+ "stream": bool(outcomming_request.get("stream", True)),
68
+ }
69
+ if system_blocks:
70
+ payload["system"] = system_blocks
71
+
72
+ tools = _translate_tools(outcomming_request.get("tools"))
73
+ if tools:
74
+ payload["tools"] = tools
75
+ tool_choice = _translate_tool_choice(
76
+ outcomming_request.get("tool_choice"),
77
+ outcomming_request.get("parallel_tool_calls"),
78
+ )
79
+ if tool_choice is not None:
80
+ payload["tool_choice"] = tool_choice
81
+ return payload
82
+
83
+
84
+ def iter_chat_chunks(
85
+ event_name: 'typing.Union[str, None]',
86
+ payload: 'typing.Dict[str, object]',
87
+ state: 'typing.Dict[str, object]',
88
+ ) -> 'typing.List[typing.Dict[str, object]]':
89
+ event_type = str(payload.get("type") or event_name or "").strip()
90
+ chunks: 'typing.List[typing.Dict[str, object]]' = []
91
+
92
+ if event_type == "message_start":
93
+ message = payload.get("message") or {}
94
+ if isinstance(message, dict):
95
+ usage_chunk = _usage_chunk(message.get("usage"))
96
+ if usage_chunk is not None:
97
+ chunks.append(usage_chunk)
98
+ return chunks
99
+
100
+ if event_type == "content_block_start":
101
+ block_index = _normalize_index(payload.get("index"))
102
+ content_block = payload.get("content_block") or {}
103
+ if not isinstance(content_block, dict):
104
+ return chunks
105
+ content_blocks = state.setdefault("content_blocks", {})
106
+ if not isinstance(content_blocks, dict):
107
+ raise MessagesAPIAdapterError("messages stream state is corrupted")
108
+ content_blocks[block_index] = str(content_block.get("type", "")).strip()
109
+
110
+ block_type = str(content_block.get("type", "")).strip()
111
+ if block_type == "text":
112
+ text = str(content_block.get("text", "") or "")
113
+ if text:
114
+ chunks.append(_chat_text_chunk(text))
115
+ return chunks
116
+ if block_type == "thinking":
117
+ thinking = str(content_block.get("thinking", "") or "")
118
+ if thinking:
119
+ chunks.append(_chat_reasoning_chunk(thinking))
120
+ return chunks
121
+ if block_type == "tool_use":
122
+ arguments = _dump_json(content_block.get("input") or {})
123
+ chunks.append(
124
+ _chat_tool_chunk(
125
+ block_index,
126
+ call_id=str(content_block.get("id", "")).strip(),
127
+ name=str(content_block.get("name", "")).strip(),
128
+ arguments=arguments if arguments != "{}" else "",
129
+ )
130
+ )
131
+ return chunks
132
+ return chunks
133
+
134
+ if event_type == "content_block_delta":
135
+ block_index = _normalize_index(payload.get("index"))
136
+ delta = payload.get("delta") or {}
137
+ if not isinstance(delta, dict):
138
+ return chunks
139
+ delta_type = str(delta.get("type", "")).strip()
140
+ if delta_type == "text_delta":
141
+ text = str(delta.get("text", "") or "")
142
+ if text:
143
+ chunks.append(_chat_text_chunk(text))
144
+ return chunks
145
+ if delta_type == "thinking_delta":
146
+ thinking = str(delta.get("thinking", "") or "")
147
+ if thinking:
148
+ chunks.append(_chat_reasoning_chunk(thinking))
149
+ return chunks
150
+ if delta_type == "input_json_delta":
151
+ partial_json = str(delta.get("partial_json", "") or "")
152
+ chunks.append(_chat_tool_chunk(block_index, arguments=partial_json))
153
+ return chunks
154
+ return chunks
155
+
156
+ if event_type == "message_delta":
157
+ usage_chunk = _usage_chunk(payload.get("usage"))
158
+ if usage_chunk is not None:
159
+ chunks.append(usage_chunk)
160
+ delta = payload.get("delta") or {}
161
+ if not isinstance(delta, dict):
162
+ return chunks
163
+ finish_reason = _translate_stop_reason(delta.get("stop_reason"))
164
+ if finish_reason and not bool(state.get("finish_emitted")):
165
+ state["finish_reason"] = finish_reason
166
+ state["finish_emitted"] = True
167
+ chunks.append(_chat_finish_chunk(finish_reason))
168
+ return chunks
169
+
170
+ if event_type == "message_stop":
171
+ if not bool(state.get("finish_emitted")):
172
+ finish_reason = str(state.get("finish_reason") or "stop")
173
+ state["finish_emitted"] = True
174
+ chunks.append(_chat_finish_chunk(finish_reason))
175
+ state["saw_message_stop"] = True
176
+ return chunks
177
+
178
+ if event_type == "error":
179
+ error = payload.get("error")
180
+ if isinstance(error, dict):
181
+ message = str(error.get("message", "") or "").strip()
182
+ if message:
183
+ raise MessagesAPIAdapterError(message)
184
+ raise MessagesAPIAdapterError(_dump_json(payload))
185
+
186
+ return chunks
187
+
188
+
189
+ def saw_message_stop(state: 'typing.Dict[str, object]') -> 'bool':
190
+ return bool(state.get("saw_message_stop"))
191
+
192
+
193
+ def _build_text_blocks(raw_content: 'object') -> 'typing.List[typing.Dict[str, object]]':
194
+ text = str(raw_content or "")
195
+ if not text:
196
+ return []
197
+ return [{"type": "text", "text": text}]
198
+
199
+
200
+ def _build_assistant_blocks(
201
+ raw_message: 'typing.Dict[str, object]',
202
+ ) -> 'typing.List[typing.Dict[str, object]]':
203
+ blocks: 'typing.List[typing.Dict[str, object]]' = []
204
+ reasoning = str(raw_message.get("reasoning", "") or "")
205
+ if reasoning:
206
+ blocks.append({"type": "thinking", "thinking": reasoning})
207
+
208
+ text = str(raw_message.get("content", "") or "")
209
+ if text:
210
+ blocks.append({"type": "text", "text": text})
211
+
212
+ raw_tool_calls = raw_message.get("tool_calls") or []
213
+ if raw_tool_calls:
214
+ if not isinstance(raw_tool_calls, list):
215
+ raise MessagesAPIAdapterError("assistant `tool_calls` must be a list")
216
+ for raw_tool_call in raw_tool_calls:
217
+ if not isinstance(raw_tool_call, dict):
218
+ raise MessagesAPIAdapterError("assistant tool calls must be objects")
219
+ function = raw_tool_call.get("function") or {}
220
+ if not isinstance(function, dict):
221
+ raise MessagesAPIAdapterError(
222
+ "assistant tool call is missing function payload"
223
+ )
224
+ blocks.append(
225
+ {
226
+ "type": "tool_use",
227
+ "id": str(raw_tool_call.get("id", "")).strip(),
228
+ "name": str(function.get("name", "")).strip(),
229
+ "input": _parse_json_object(function.get("arguments")),
230
+ }
231
+ )
232
+ return blocks
233
+
234
+
235
+ def _build_tool_result_block(
236
+ raw_message: 'typing.Dict[str, object]',
237
+ ) -> 'typing.Dict[str, object]':
238
+ return {
239
+ "type": "tool_result",
240
+ "tool_use_id": str(raw_message.get("tool_call_id", "")).strip(),
241
+ "content": str(raw_message.get("content", "") or ""),
242
+ }
243
+
244
+
245
+ def _translate_tools(
246
+ raw_tools: 'object',
247
+ ) -> 'typing.List[typing.Dict[str, object]]':
248
+ translated: 'typing.List[typing.Dict[str, object]]' = []
249
+ if not isinstance(raw_tools, list):
250
+ return translated
251
+ for raw_tool in raw_tools:
252
+ if not isinstance(raw_tool, dict) or raw_tool.get("type") != "function":
253
+ raise MessagesAPIAdapterError(
254
+ "messages API backend only supports function-style tools"
255
+ )
256
+ function = raw_tool.get("function") or {}
257
+ if not isinstance(function, dict):
258
+ raise MessagesAPIAdapterError("tool definition is missing function payload")
259
+ name = str(function.get("name", raw_tool.get("name", ""))).strip()
260
+ if not name:
261
+ raise MessagesAPIAdapterError("tool definition is missing `name`")
262
+ translated.append(
263
+ {
264
+ "name": name,
265
+ "description": str(function.get("description", "") or ""),
266
+ "input_schema": function.get("parameters") or {"type": "object"},
267
+ }
268
+ )
269
+ return translated
270
+
271
+
272
+ def _translate_tool_choice(
273
+ raw_tool_choice: 'object',
274
+ parallel_tool_calls: 'object',
275
+ ) -> 'typing.Union[typing.Dict[str, object], None]':
276
+ if raw_tool_choice is None:
277
+ if parallel_tool_calls is False:
278
+ return {
279
+ "type": "auto",
280
+ "disable_parallel_tool_use": True,
281
+ }
282
+ return None
283
+
284
+ translated: 'typing.Dict[str, object]'
285
+ if isinstance(raw_tool_choice, str):
286
+ choice = raw_tool_choice.strip()
287
+ if choice == "auto":
288
+ translated = {"type": "auto"}
289
+ elif choice == "required":
290
+ translated = {"type": "any"}
291
+ elif choice == "none":
292
+ return None
293
+ else:
294
+ raise MessagesAPIAdapterError(
295
+ f"unsupported tool_choice for messages API: {raw_tool_choice!r}"
296
+ )
297
+ elif isinstance(raw_tool_choice, dict):
298
+ choice_type = str(raw_tool_choice.get("type", "")).strip()
299
+ if choice_type == "function":
300
+ function = raw_tool_choice.get("function") or {}
301
+ name = ""
302
+ if isinstance(function, dict):
303
+ name = str(function.get("name", "")).strip()
304
+ if not name:
305
+ name = str(raw_tool_choice.get("name", "")).strip()
306
+ if not name:
307
+ raise MessagesAPIAdapterError(
308
+ "function tool_choice is missing `name`"
309
+ )
310
+ translated = {
311
+ "type": "tool",
312
+ "name": name,
313
+ }
314
+ else:
315
+ raise MessagesAPIAdapterError(
316
+ f"unsupported tool_choice for messages API: {raw_tool_choice!r}"
317
+ )
318
+ else:
319
+ raise MessagesAPIAdapterError(
320
+ f"unsupported tool_choice for messages API: {raw_tool_choice!r}"
321
+ )
322
+
323
+ if parallel_tool_calls is False:
324
+ translated["disable_parallel_tool_use"] = True
325
+ return translated
326
+
327
+
328
+ def _parse_json_object(raw_value: 'object') -> 'typing.Dict[str, object]':
329
+ if isinstance(raw_value, dict):
330
+ return dict(raw_value)
331
+ if isinstance(raw_value, str):
332
+ text = raw_value.strip()
333
+ if not text:
334
+ return {}
335
+ try:
336
+ parsed = json.loads(text)
337
+ except json.JSONDecodeError as exc:
338
+ raise MessagesAPIAdapterError(
339
+ f"tool arguments must be valid JSON objects for messages API: {exc}"
340
+ ) from exc
341
+ if isinstance(parsed, dict):
342
+ return dict(parsed)
343
+ raise MessagesAPIAdapterError(
344
+ "tool arguments must decode to JSON objects for messages API"
345
+ )
346
+ raise MessagesAPIAdapterError(
347
+ "tool arguments must be strings or objects for messages API"
348
+ )
349
+
350
+
351
+ def _resolve_max_tokens(outcomming_request: 'typing.Dict[str, object]') -> 'int':
352
+ raw_value = outcomming_request.get("max_tokens")
353
+ if isinstance(raw_value, bool):
354
+ return DEFAULT_MESSAGES_MAX_TOKENS
355
+ if isinstance(raw_value, int) and raw_value > 0:
356
+ return raw_value
357
+ return DEFAULT_MESSAGES_MAX_TOKENS
358
+
359
+
360
+ def _usage_chunk(raw_usage: 'object') -> 'typing.Union[typing.Dict[str, object], None]':
361
+ usage = _translate_usage(raw_usage)
362
+ if not usage:
363
+ return None
364
+ return {
365
+ "choices": [],
366
+ "usage": usage,
367
+ }
368
+
369
+
370
+ def _translate_usage(raw_usage: 'object') -> 'typing.Dict[str, object]':
371
+ if not isinstance(raw_usage, dict):
372
+ return {}
373
+ usage: 'typing.Dict[str, object]' = {}
374
+ input_tokens = raw_usage.get("input_tokens")
375
+ output_tokens = raw_usage.get("output_tokens")
376
+ if isinstance(input_tokens, int):
377
+ usage["input_tokens"] = input_tokens
378
+ if isinstance(output_tokens, int):
379
+ usage["output_tokens"] = output_tokens
380
+ total_tokens = raw_usage.get("total_tokens")
381
+ if isinstance(total_tokens, int):
382
+ usage["total_tokens"] = total_tokens
383
+ elif isinstance(input_tokens, int) and isinstance(output_tokens, int):
384
+ usage["total_tokens"] = input_tokens + output_tokens
385
+
386
+ input_details: 'typing.Dict[str, int]' = {}
387
+ cache_creation = raw_usage.get("cache_creation_input_tokens")
388
+ if isinstance(cache_creation, int):
389
+ input_details["cache_creation_input_tokens"] = cache_creation
390
+ cache_read = raw_usage.get("cache_read_input_tokens")
391
+ if isinstance(cache_read, int):
392
+ input_details["cache_read_input_tokens"] = cache_read
393
+ if input_details:
394
+ usage["input_tokens_details"] = input_details
395
+ return usage
396
+
397
+
398
+ def _normalize_index(raw_index: 'object') -> 'int':
399
+ if isinstance(raw_index, int):
400
+ return raw_index
401
+ try:
402
+ return int(raw_index)
403
+ except (TypeError, ValueError):
404
+ return 0
405
+
406
+
407
+ def _translate_stop_reason(raw_stop_reason: 'object') -> 'typing.Union[str, None]':
408
+ if not isinstance(raw_stop_reason, str):
409
+ return None
410
+ stop_reason = raw_stop_reason.strip()
411
+ if not stop_reason:
412
+ return None
413
+ if stop_reason == "tool_use":
414
+ return "tool_calls"
415
+ if stop_reason == "max_tokens":
416
+ return "length"
417
+ if stop_reason in {"end_turn", "stop_sequence"}:
418
+ return "stop"
419
+ return stop_reason
420
+
421
+
422
+ def _chat_text_chunk(text: 'str') -> 'typing.Dict[str, object]':
423
+ return _chat_delta_chunk({"content": text})
424
+
425
+
426
+ def _chat_reasoning_chunk(reasoning: 'str') -> 'typing.Dict[str, object]':
427
+ return _chat_delta_chunk({"reasoning_content": reasoning})
428
+
429
+
430
+ def _chat_tool_chunk(
431
+ index: 'int',
432
+ call_id: 'str' = "",
433
+ name: 'str' = "",
434
+ arguments: 'str' = "",
435
+ ) -> 'typing.Dict[str, object]':
436
+ tool_call: 'typing.Dict[str, object]' = {
437
+ "index": index,
438
+ "function": {},
439
+ }
440
+ if call_id:
441
+ tool_call["id"] = call_id
442
+ if name:
443
+ tool_call["type"] = "function"
444
+ tool_call["function"] = {"name": name}
445
+ function = tool_call.get("function")
446
+ if not isinstance(function, dict):
447
+ function = {}
448
+ tool_call["function"] = function
449
+ if arguments:
450
+ function["arguments"] = arguments
451
+ return _chat_delta_chunk({"tool_calls": [tool_call]})
452
+
453
+
454
+ def _chat_delta_chunk(delta: 'typing.Dict[str, object]') -> 'typing.Dict[str, object]':
455
+ return {
456
+ "choices": [
457
+ {
458
+ "index": 0,
459
+ "delta": delta,
460
+ "finish_reason": None,
461
+ }
462
+ ]
463
+ }
464
+
465
+
466
+ def _chat_finish_chunk(finish_reason: 'str') -> 'typing.Dict[str, object]':
467
+ return {
468
+ "choices": [
469
+ {
470
+ "index": 0,
471
+ "delta": {},
472
+ "finish_reason": finish_reason,
473
+ }
474
+ ]
475
+ }
476
+
477
+
478
+ def _dump_json(raw_value: 'object') -> 'str':
479
+ return json.dumps(raw_value, ensure_ascii=False, separators=(",", ":"))
@@ -28,6 +28,7 @@ class OutgoingRequest(TypedDict):
28
28
  model: 'str'
29
29
  messages: 'typing.List[ChatMessage]'
30
30
  stream: 'bool'
31
+ max_tokens: 'Optional[int]'
31
32
  tools: 'Optional[typing.List[typing.Dict[str, object]]]'
32
33
  tool_choice: 'Optional[object]'
33
34
  parallel_tool_calls: 'Optional[bool]'
@@ -6,6 +6,12 @@ import urllib.error
6
6
  import urllib.request
7
7
 
8
8
  from .config import CompatServerConfig
9
+ from .messages_api import (
10
+ MessagesAPIAdapterError,
11
+ build_messages_request,
12
+ iter_chat_chunks as iter_chat_chunks_from_messages,
13
+ saw_message_stop as messages_saw_message_stop,
14
+ )
9
15
  from .session_store import StoredResponse
10
16
  from .tools import WebSearchTool, collect_custom_tool_names
11
17
  from .tools.custom_adapter import (
@@ -130,6 +136,13 @@ class StreamRouter:
130
136
  ),
131
137
  "stream": True,
132
138
  }
139
+ max_tokens = self._coerce_positive_int(
140
+ incomming_request.get("max_output_tokens")
141
+ )
142
+ if max_tokens is None:
143
+ max_tokens = self._coerce_positive_int(incomming_request.get("max_tokens"))
144
+ if max_tokens is not None:
145
+ payload["max_tokens"] = max_tokens
133
146
  if self._supports_stream_usage():
134
147
  payload["stream_options"] = {"include_usage": True}
135
148
 
@@ -150,6 +163,19 @@ class StreamRouter:
150
163
  return payload
151
164
 
152
165
  def open_outcomming_stream(self, outcomming_request: 'typing.Dict[str, object]'):
166
+ outcomming_api = self._config.normalized_outcomming_api()
167
+ if outcomming_api == "messages":
168
+ return self._open_outcomming_messages_stream(outcomming_request)
169
+ if outcomming_api != "chat_completions":
170
+ raise OutcommingChatError(
171
+ f"unsupported outcomming API: {self._config.outcomming_api!r}"
172
+ )
173
+ return self._open_outcomming_chat_stream(outcomming_request)
174
+
175
+ def _open_outcomming_chat_stream(
176
+ self,
177
+ outcomming_request: 'typing.Dict[str, object]',
178
+ ):
153
179
  request = urllib.request.Request(
154
180
  self._config.outcomming_chat_completions_url(),
155
181
  data=json.dumps(outcomming_request).encode("utf-8"),
@@ -196,6 +222,67 @@ class StreamRouter:
196
222
  f"outcomming chat request failed: {exc.reason}"
197
223
  ) from exc
198
224
 
225
+ def _open_outcomming_messages_stream(
226
+ self,
227
+ outcomming_request: 'typing.Dict[str, object]',
228
+ ):
229
+ try:
230
+ messages_request = build_messages_request(outcomming_request)
231
+ except MessagesAPIAdapterError as exc:
232
+ raise OutcommingChatError(str(exc)) from exc
233
+
234
+ request = urllib.request.Request(
235
+ self._config.outcomming_messages_url(),
236
+ data=json.dumps(messages_request).encode("utf-8"),
237
+ headers=self._build_headers(accept="text/event-stream"),
238
+ method="POST",
239
+ )
240
+ try:
241
+ with urllib.request.urlopen(
242
+ request,
243
+ context=ssl.create_default_context(),
244
+ timeout=self._config.timeout_seconds,
245
+ ) as response:
246
+ try:
247
+ stream_state: 'typing.Dict[str, object]' = {}
248
+ for event_name, data in self._iter_sse_events(response):
249
+ if not data:
250
+ continue
251
+ payload = json.loads(data)
252
+ if not isinstance(payload, dict):
253
+ continue
254
+ for chunk in iter_chat_chunks_from_messages(
255
+ event_name,
256
+ payload,
257
+ stream_state,
258
+ ):
259
+ yield chunk
260
+ if not messages_saw_message_stop(stream_state):
261
+ raise OutcommingChatError(
262
+ "outcomming messages stream ended before `message_stop`"
263
+ )
264
+ except (
265
+ ConnectionError,
266
+ EOFError,
267
+ OSError,
268
+ http.client.HTTPException,
269
+ json.JSONDecodeError,
270
+ MessagesAPIAdapterError,
271
+ ) as exc:
272
+ raise OutcommingChatError(
273
+ "outcomming messages stream failed while reading response body: "
274
+ f"{exc}"
275
+ ) from exc
276
+ except urllib.error.HTTPError as exc:
277
+ body = exc.read().decode("utf-8", errors="replace")
278
+ raise OutcommingChatError(
279
+ f"outcomming messages request failed with status {exc.code}: {body[:500]}"
280
+ ) from exc
281
+ except urllib.error.URLError as exc:
282
+ raise OutcommingChatError(
283
+ f"outcomming messages request failed: {exc.reason}"
284
+ ) from exc
285
+
199
286
  def route_stream(
200
287
  self,
201
288
  incomming_stream,
@@ -439,6 +526,13 @@ class StreamRouter:
439
526
  flush_pending_assistant()
440
527
  return messages
441
528
 
529
+ def _coerce_positive_int(self, raw_value: 'object') -> 'typing.Union[int, None]':
530
+ if isinstance(raw_value, bool):
531
+ return None
532
+ if isinstance(raw_value, int) and raw_value > 0:
533
+ return raw_value
534
+ return None
535
+
442
536
  def _coalesce_content_text(self, raw_content: 'object') -> 'str':
443
537
  if raw_content is None:
444
538
  return ""