python-codex 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pycodex/cli.py +19 -2
- pycodex/utils/visualize.py +36 -9
- {python_codex-0.1.4.dist-info → python_codex-0.1.6.dist-info}/METADATA +5 -1
- {python_codex-0.1.4.dist-info → python_codex-0.1.6.dist-info}/RECORD +12 -11
- responses_server/app.py +12 -2
- responses_server/config.py +12 -0
- responses_server/messages_api.py +479 -0
- responses_server/payload_processors.py +1 -0
- responses_server/stream_router.py +94 -0
- {python_codex-0.1.4.dist-info → python_codex-0.1.6.dist-info}/WHEEL +0 -0
- {python_codex-0.1.4.dist-info → python_codex-0.1.6.dist-info}/entry_points.txt +0 -0
- {python_codex-0.1.4.dist-info → python_codex-0.1.6.dist-info}/licenses/LICENSE +0 -0
pycodex/cli.py
CHANGED
|
@@ -42,7 +42,6 @@ CliSessionMode = Literal["exec", "tui"]
|
|
|
42
42
|
LOCAL_RESPONSES_SERVER_API_KEY_ENV = "PYCODEX_LOCAL_RESPONSES_SERVER_KEY"
|
|
43
43
|
CLI_ORIGINATOR = "codex-tui"
|
|
44
44
|
|
|
45
|
-
|
|
46
45
|
def launch_chat_completion_compat_server(*args, **kwargs):
|
|
47
46
|
from responses_server import (
|
|
48
47
|
launch_chat_completion_compat_server as launch_compat_server,
|
|
@@ -123,6 +122,15 @@ def build_parser() -> 'argparse.ArgumentParser':
|
|
|
123
122
|
"When set, pycodex starts a local responses compat server for this session."
|
|
124
123
|
),
|
|
125
124
|
)
|
|
125
|
+
parser.add_argument(
|
|
126
|
+
"--use-messages",
|
|
127
|
+
default=False,
|
|
128
|
+
action="store_true",
|
|
129
|
+
help=(
|
|
130
|
+
"When set, pycodex starts a local responses compat server and routes "
|
|
131
|
+
"to a downstream /v1/messages backend for this session."
|
|
132
|
+
),
|
|
133
|
+
)
|
|
126
134
|
parser.add_argument(
|
|
127
135
|
"--system-prompt",
|
|
128
136
|
default=None,
|
|
@@ -373,12 +381,17 @@ def _build_model_client(
|
|
|
373
381
|
managed_responses_base_url: 'typing.Union[str, None]' = None,
|
|
374
382
|
vllm_endpoint: 'typing.Union[str, None]' = None,
|
|
375
383
|
use_chat_completion: 'bool' = False,
|
|
384
|
+
use_messages: 'bool' = False,
|
|
376
385
|
):
|
|
377
386
|
load_codex_dotenv(config_path)
|
|
378
387
|
provider_config = ResponsesProviderConfig.from_codex_config(
|
|
379
388
|
config_path,
|
|
380
389
|
profile,
|
|
381
390
|
)
|
|
391
|
+
if use_chat_completion and use_messages:
|
|
392
|
+
raise ValueError("--use-chat-completion and --use-messages cannot be combined")
|
|
393
|
+
if vllm_endpoint and use_messages:
|
|
394
|
+
raise ValueError("--vllm-endpoint and --use-messages cannot be combined")
|
|
382
395
|
url, key_env = provider_config.base_url, provider_config.api_key_env
|
|
383
396
|
if managed_responses_base_url is not None:
|
|
384
397
|
url, key_env = (
|
|
@@ -386,7 +399,7 @@ def _build_model_client(
|
|
|
386
399
|
LOCAL_RESPONSES_SERVER_API_KEY_ENV,
|
|
387
400
|
)
|
|
388
401
|
os.environ.setdefault(LOCAL_RESPONSES_SERVER_API_KEY_ENV, "dummy")
|
|
389
|
-
elif vllm_endpoint or use_chat_completion:
|
|
402
|
+
elif vllm_endpoint or use_chat_completion or use_messages:
|
|
390
403
|
if vllm_endpoint:
|
|
391
404
|
managed_server = launch_chat_completion_compat_server(
|
|
392
405
|
vllm_endpoint,
|
|
@@ -397,6 +410,9 @@ def _build_model_client(
|
|
|
397
410
|
provider_config.base_url,
|
|
398
411
|
provider_config.api_key_env,
|
|
399
412
|
model_provider=provider_config.provider_name,
|
|
413
|
+
outcomming_api=(
|
|
414
|
+
"messages" if use_messages else "chat_completions"
|
|
415
|
+
),
|
|
400
416
|
)
|
|
401
417
|
atexit.register(managed_server.stop)
|
|
402
418
|
url, key_env = (
|
|
@@ -755,6 +771,7 @@ async def run_cli(args: 'argparse.Namespace') -> 'int':
|
|
|
755
771
|
args.timeout_seconds,
|
|
756
772
|
vllm_endpoint=args.vllm_endpoint,
|
|
757
773
|
use_chat_completion=args.use_chat_completion,
|
|
774
|
+
use_messages=args.use_messages,
|
|
758
775
|
)
|
|
759
776
|
|
|
760
777
|
runtime = build_runtime(
|
pycodex/utils/visualize.py
CHANGED
|
@@ -83,7 +83,7 @@ def format_cli_plan_messages(
|
|
|
83
83
|
|
|
84
84
|
def build_cli_spinner_frame(index: 'int', label: 'str') -> 'str':
|
|
85
85
|
suffix = f" {label}" if label else ""
|
|
86
|
-
return f"
|
|
86
|
+
return f"{SPINNER_FRAMES[index % len(SPINNER_FRAMES)]}{suffix}"
|
|
87
87
|
|
|
88
88
|
|
|
89
89
|
def percent_of_context_window_remaining(
|
|
@@ -158,13 +158,29 @@ class Spinner:
|
|
|
158
158
|
self._paused = False
|
|
159
159
|
|
|
160
160
|
def clear(self) -> 'None':
|
|
161
|
-
if not self._enabled or not self._visible:
|
|
162
|
-
return
|
|
163
161
|
with self._terminal_lock:
|
|
162
|
+
if not self._visible:
|
|
163
|
+
return
|
|
164
164
|
self._raw_write("\r\x1b[2K")
|
|
165
165
|
self._raw_flush()
|
|
166
166
|
self._visible = False
|
|
167
167
|
|
|
168
|
+
def render_now(self) -> 'None':
|
|
169
|
+
if not self._turn_active or self._paused:
|
|
170
|
+
return
|
|
171
|
+
frame = colorize_cli_message(
|
|
172
|
+
build_cli_spinner_frame(self._index, self._label),
|
|
173
|
+
"status",
|
|
174
|
+
self._color_enabled,
|
|
175
|
+
)
|
|
176
|
+
self._index += 1
|
|
177
|
+
with self._terminal_lock:
|
|
178
|
+
if not self._turn_active or self._paused:
|
|
179
|
+
return
|
|
180
|
+
self._raw_write(f"\r\x1b[2K{frame}")
|
|
181
|
+
self._raw_flush()
|
|
182
|
+
self._visible = True
|
|
183
|
+
|
|
168
184
|
def close(self) -> 'None':
|
|
169
185
|
self.finish_turn()
|
|
170
186
|
if self._thread is not None:
|
|
@@ -726,6 +742,7 @@ class CliSessionView:
|
|
|
726
742
|
else:
|
|
727
743
|
self._spinner.resume()
|
|
728
744
|
self._spinner.set_label("running provider tools")
|
|
745
|
+
self._spinner.render_now()
|
|
729
746
|
return
|
|
730
747
|
|
|
731
748
|
if event.kind == "tool_started":
|
|
@@ -740,20 +757,16 @@ class CliSessionView:
|
|
|
740
757
|
else:
|
|
741
758
|
self._spinner.resume()
|
|
742
759
|
if tool_name and args is not None:
|
|
743
|
-
self._spinner.set_label(f"running {tool_name}({args})")
|
|
760
|
+
self._spinner.set_label(shorten_title(f"running {tool_name}({args})", limit=72))
|
|
744
761
|
elif tool_name:
|
|
745
762
|
self._spinner.set_label(f"running {tool_name}")
|
|
746
763
|
else:
|
|
747
764
|
self._spinner.set_label("running provider tools")
|
|
765
|
+
self._spinner.render_now()
|
|
748
766
|
return
|
|
749
767
|
|
|
750
768
|
if event.kind == "tool_completed":
|
|
751
769
|
self._finish_stream()
|
|
752
|
-
if self._input_active:
|
|
753
|
-
self._spinner.pause()
|
|
754
|
-
else:
|
|
755
|
-
self._spinner.resume()
|
|
756
|
-
self._spinner.set_label("thinking")
|
|
757
770
|
tool_name, summary, is_error = extract_tool_event_display(event.payload)
|
|
758
771
|
summary = self._rewrite_agent_summary(tool_name, summary)
|
|
759
772
|
if tool_name == "update_plan" and not is_error:
|
|
@@ -762,6 +775,12 @@ class CliSessionView:
|
|
|
762
775
|
self._print_line(
|
|
763
776
|
colorize_cli_message(line, "plan", self._color_enabled)
|
|
764
777
|
)
|
|
778
|
+
if self._input_active:
|
|
779
|
+
self._spinner.pause()
|
|
780
|
+
else:
|
|
781
|
+
self._spinner.resume()
|
|
782
|
+
self._spinner.set_label("thinking")
|
|
783
|
+
self._spinner.render_now()
|
|
765
784
|
return
|
|
766
785
|
message = format_cli_tool_message(
|
|
767
786
|
tool_name,
|
|
@@ -770,6 +789,12 @@ class CliSessionView:
|
|
|
770
789
|
)
|
|
771
790
|
self._remember_agent_name(tool_name, summary)
|
|
772
791
|
self._print_line(self._colorize_formatted_tool_message(message))
|
|
792
|
+
if self._input_active:
|
|
793
|
+
self._spinner.pause()
|
|
794
|
+
else:
|
|
795
|
+
self._spinner.resume()
|
|
796
|
+
self._spinner.set_label("thinking")
|
|
797
|
+
self._spinner.render_now()
|
|
773
798
|
return
|
|
774
799
|
|
|
775
800
|
if event.kind == "turn_completed":
|
|
@@ -830,6 +855,8 @@ class CliSessionView:
|
|
|
830
855
|
|
|
831
856
|
def resume_spinner(self) -> 'None':
|
|
832
857
|
self._spinner.resume()
|
|
858
|
+
if not self._input_active:
|
|
859
|
+
self._spinner.render_now()
|
|
833
860
|
|
|
834
861
|
def set_input_active(self, active: 'bool', resume_spinner: 'bool' = True) -> 'None':
|
|
835
862
|
self._input_active = active
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: python-codex
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: A minimal Python extraction of Codex's main agent loop
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Requires-Python: >=3.6.2
|
|
@@ -159,6 +159,7 @@ pycodex "Summarize this repo in one sentence."
|
|
|
159
159
|
printf 'Reply with exactly OK.' | pycodex
|
|
160
160
|
pycodex --json "Reply with exactly OK."
|
|
161
161
|
pycodex --profile model_proxy "Reply with exactly OK."
|
|
162
|
+
pycodex --profile opus --use-messages "Reply with exactly OK."
|
|
162
163
|
pycodex --vllm-endpoint http://127.0.0.1:18000 "Reply with exactly OK."
|
|
163
164
|
pycodex --put @127.0.0.1:5577
|
|
164
165
|
pycodex --put /data/.codex/@127.0.0.1:5577
|
|
@@ -211,6 +212,9 @@ Current behavior:
|
|
|
211
212
|
historical `reasoning` items are replayed into downstream assistant messages
|
|
212
213
|
via the `reasoning` field. Streaming token usage is also requested from vLLM
|
|
213
214
|
and forwarded to the final `response.completed.response.usage`
|
|
215
|
+
- standalone `responses_server` now also supports downstream `/v1/messages`
|
|
216
|
+
backends via `--outcomming-api messages`, while keeping the internal
|
|
217
|
+
canonical request/route logic in chat-completions shape
|
|
214
218
|
- `pycodex doctor` checks config, `.env`, API keys, DNS, TCP/TLS, and an
|
|
215
219
|
optional live Responses API request
|
|
216
220
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
pycodex/__init__.py,sha256=jCnC_Bgotlxa4GwO3Re2sChKGY49TRM-uVZEQ9uBpfw,3106
|
|
2
2
|
pycodex/agent.py,sha256=s0FrF_XG2pHKryooS461Jr_acmQ_TKTp2JLGQNiny6w,11888
|
|
3
|
-
pycodex/cli.py,sha256=
|
|
3
|
+
pycodex/cli.py,sha256=FG0klhVuJiS7797GQZpj6gk1-jChFwgFPEhPXA2vXM8,30316
|
|
4
4
|
pycodex/collaboration.py,sha256=yQ6pBD-R3ZWR4_FAYQFoS7KF0m4LLD42otXIbPqw2ys,641
|
|
5
5
|
pycodex/compat.py,sha256=IO0X7AgcYhlHnYnpvBZ6leCh_UjoQzg5HLT5wYBNNIw,3155
|
|
6
6
|
pycodex/context.py,sha256=R5tuMcNrX1F-Lh9ymsSbnfRbKLJ19TWrtQoZ3tWlHvM,24982
|
|
@@ -57,20 +57,21 @@ pycodex/utils/dotenv.py,sha256=EDBXdn93ewmq9zhJki5_LsJJXe0wMIQJ6VfCE1r7voQ,1818
|
|
|
57
57
|
pycodex/utils/get_env.py,sha256=jR8G0Xco57jX-71E1oHIcl3-Kz9Ltc0kzxj04DKzt80,7316
|
|
58
58
|
pycodex/utils/random_ids.py,sha256=zBphjVGc7OXk9ZNExAbxRi_bk7ipyLG491qTv7hi8jM,380
|
|
59
59
|
pycodex/utils/session_persist.py,sha256=dUvo3Z1QBB4HJT1tLerDlLD3ZB25umB6FP6JORg9V40,16414
|
|
60
|
-
pycodex/utils/visualize.py,sha256=
|
|
60
|
+
pycodex/utils/visualize.py,sha256=JURzq2AbV046bblE5fojcAe885Juda0LDxt_gqT2PUc,41006
|
|
61
61
|
responses_server/__init__.py,sha256=3yPv_zeGT7P11tTnmj5kXktISLNsNW-02MUnnbiZcb0,394
|
|
62
62
|
responses_server/__main__.py,sha256=9SRp-Yw7ShGxc6DhSIXcDLKgGEdAVm3oBZ59rBOPjT0,62
|
|
63
|
-
responses_server/app.py,sha256=
|
|
64
|
-
responses_server/config.py,sha256=
|
|
65
|
-
responses_server/
|
|
63
|
+
responses_server/app.py,sha256=4SUG8xqKqmVKVY9i1f5WF9QrnmxGbD4mwiI6s13zQDE,7742
|
|
64
|
+
responses_server/config.py,sha256=leb3_uPrCyYdUIkyRyVPX4luGF88dQ62OkhRLPe7uxw,2718
|
|
65
|
+
responses_server/messages_api.py,sha256=3GPMfs3ksQkhezLyWBjeW5zJ1e_MeHXVaq1lALIA7Mk,16815
|
|
66
|
+
responses_server/payload_processors.py,sha256=gfOXqvVwlhCk-yjaDdGU4RKcpDdxIq2y6CmqUCggIjY,3444
|
|
66
67
|
responses_server/server.py,sha256=isyzN-p-Ir8LLycN_dQfcanvie2ZqqSu52mOPz_wYD4,2095
|
|
67
68
|
responses_server/session_store.py,sha256=ZD3cH2aEOkWaQsu5qTzcal2mThTSFQPAhAhPUN9srgI,1115
|
|
68
|
-
responses_server/stream_router.py,sha256=
|
|
69
|
+
responses_server/stream_router.py,sha256=OVwaDEsUaKVDNGF2vnqNZTo3WA9h3D3uzHeYY-QN9IU,34754
|
|
69
70
|
responses_server/tools/__init__.py,sha256=ivsBSEy0SBUhY-Uea5v1XMLXShkwHdCVl0id-1FwdZg,150
|
|
70
71
|
responses_server/tools/custom_adapter.py,sha256=LxO7ldydvR-GWachDz8GKC0Q8KGGFoFPbZxM0QvxuZ0,8350
|
|
71
72
|
responses_server/tools/web_search.py,sha256=pm4ZUiHUfxc0bGY1kEvt-BCzDrZIyP24xzPUcga2ul0,8908
|
|
72
|
-
python_codex-0.1.
|
|
73
|
-
python_codex-0.1.
|
|
74
|
-
python_codex-0.1.
|
|
75
|
-
python_codex-0.1.
|
|
76
|
-
python_codex-0.1.
|
|
73
|
+
python_codex-0.1.6.dist-info/METADATA,sha256=SYSPoh0NP5qUQKfAu2ncG1_DXQZrWKSPcRGpnqmTVQk,15719
|
|
74
|
+
python_codex-0.1.6.dist-info/WHEEL,sha256=KGYbc1zXlYddvwxnNty23BeaKzh7YuoSIvIMO4jEhvw,87
|
|
75
|
+
python_codex-0.1.6.dist-info/entry_points.txt,sha256=sNUVakoVuTrzJH505ZgRTQxmtRRPUHV_EH0i6EbYTyM,45
|
|
76
|
+
python_codex-0.1.6.dist-info/licenses/LICENSE,sha256=0X8ifk312hYAORM4hlzg8wVSEXYKNmiPgWlB1YIy2Nw,10926
|
|
77
|
+
python_codex-0.1.6.dist-info/RECORD,,
|
responses_server/app.py
CHANGED
|
@@ -36,13 +36,14 @@ def _stream_events(response_server: 'ResponseServer', request_body: 'typing.Dict
|
|
|
36
36
|
except OutcommingChatError as exc:
|
|
37
37
|
|
|
38
38
|
import traceback
|
|
39
|
+
exc_info = traceback.format_exception(type(exc), exc, exc.__traceback__)
|
|
39
40
|
yield _format_sse_event(
|
|
40
41
|
"response.failed",
|
|
41
42
|
{
|
|
42
43
|
"type": "response.failed",
|
|
43
44
|
"response": {
|
|
44
45
|
"error": {
|
|
45
|
-
"message": '\n'.join(
|
|
46
|
+
"message": '\n'.join(exc_info),
|
|
46
47
|
}
|
|
47
48
|
},
|
|
48
49
|
},
|
|
@@ -54,12 +55,18 @@ def build_parser() -> 'argparse.ArgumentParser':
|
|
|
54
55
|
prog="python -m responses_server",
|
|
55
56
|
description=(
|
|
56
57
|
"Standalone localhost `/v1/responses` server that translates the "
|
|
57
|
-
"Codex/Responses subset onto an outcomming `/v1/chat/completions`
|
|
58
|
+
"Codex/Responses subset onto an outcomming `/v1/chat/completions` "
|
|
59
|
+
"or `/v1/messages` backend."
|
|
58
60
|
),
|
|
59
61
|
)
|
|
60
62
|
parser.add_argument("--host", default="127.0.0.1")
|
|
61
63
|
parser.add_argument("--port", type=int, default=8001)
|
|
62
64
|
parser.add_argument("--outcomming-base-url", required=True)
|
|
65
|
+
parser.add_argument(
|
|
66
|
+
"--outcomming-api",
|
|
67
|
+
default="chat_completions",
|
|
68
|
+
choices=["chat_completions", "messages"],
|
|
69
|
+
)
|
|
63
70
|
parser.add_argument("--outcomming-api-key-env", default=None)
|
|
64
71
|
parser.add_argument("--model-provider", default=None)
|
|
65
72
|
parser.add_argument("--timeout-seconds", type=float, default=120.0)
|
|
@@ -79,10 +86,12 @@ def launch_chat_completion_compat_server(
|
|
|
79
86
|
base_url: 'str',
|
|
80
87
|
api_key_env: 'typing.Union[str, None]' = None,
|
|
81
88
|
model_provider: 'typing.Union[str, None]' = None,
|
|
89
|
+
outcomming_api: 'str' = "chat_completions",
|
|
82
90
|
):
|
|
83
91
|
config = CompatServerConfig.from_base_url(
|
|
84
92
|
base_url,
|
|
85
93
|
api_key_env,
|
|
94
|
+
outcomming_api=outcomming_api,
|
|
86
95
|
model_provider=model_provider,
|
|
87
96
|
)
|
|
88
97
|
server = ManagedResponseServer(config)
|
|
@@ -208,6 +217,7 @@ def main() -> 'None':
|
|
|
208
217
|
host=args.host,
|
|
209
218
|
port=args.port,
|
|
210
219
|
outcomming_base_url=args.outcomming_base_url,
|
|
220
|
+
outcomming_api=args.outcomming_api,
|
|
211
221
|
outcomming_api_key_env=args.outcomming_api_key_env,
|
|
212
222
|
model_provider=args.model_provider,
|
|
213
223
|
timeout_seconds=args.timeout_seconds,
|
responses_server/config.py
CHANGED
|
@@ -10,6 +10,7 @@ class CompatServerConfig:
|
|
|
10
10
|
host: 'str' = "127.0.0.1"
|
|
11
11
|
port: 'int' = 0
|
|
12
12
|
outcomming_base_url: 'str' = "http://127.0.0.1:8000/v1"
|
|
13
|
+
outcomming_api: 'str' = "chat_completions"
|
|
13
14
|
outcomming_api_key_env: 'typing.Union[str, None]' = None
|
|
14
15
|
model_provider: 'typing.Union[str, None]' = None
|
|
15
16
|
timeout_seconds: 'float' = 120.0
|
|
@@ -24,15 +25,24 @@ class CompatServerConfig:
|
|
|
24
25
|
base = self.outcomming_base_url.rstrip("/")
|
|
25
26
|
return f"{base}/chat/completions"
|
|
26
27
|
|
|
28
|
+
def outcomming_messages_url(self) -> 'str':
|
|
29
|
+
base = self.outcomming_base_url.rstrip("/")
|
|
30
|
+
return f"{base}/messages"
|
|
31
|
+
|
|
27
32
|
def outcomming_models_url(self) -> 'str':
|
|
28
33
|
base = self.outcomming_base_url.rstrip("/")
|
|
29
34
|
return f"{base}/models"
|
|
30
35
|
|
|
36
|
+
def normalized_outcomming_api(self) -> 'str':
|
|
37
|
+
value = str(self.outcomming_api or "").strip().lower()
|
|
38
|
+
return value or "chat_completions"
|
|
39
|
+
|
|
31
40
|
def with_ephemeral_port(self) -> 'CompatServerConfig':
|
|
32
41
|
return CompatServerConfig(
|
|
33
42
|
host=self.host,
|
|
34
43
|
port=0,
|
|
35
44
|
outcomming_base_url=self.outcomming_base_url,
|
|
45
|
+
outcomming_api=self.outcomming_api,
|
|
36
46
|
outcomming_api_key_env=self.outcomming_api_key_env,
|
|
37
47
|
model_provider=self.model_provider,
|
|
38
48
|
timeout_seconds=self.timeout_seconds,
|
|
@@ -44,6 +54,7 @@ class CompatServerConfig:
|
|
|
44
54
|
outcomming_base_url: 'str',
|
|
45
55
|
api_key_env: 'typing.Union[str, None]' = None,
|
|
46
56
|
model_provider: 'typing.Union[str, None]' = None,
|
|
57
|
+
outcomming_api: 'str' = "chat_completions",
|
|
47
58
|
) -> 'CompatServerConfig':
|
|
48
59
|
parsed = urllib.parse.urlparse(outcomming_base_url)
|
|
49
60
|
if not parsed.scheme or not parsed.netloc:
|
|
@@ -58,6 +69,7 @@ class CompatServerConfig:
|
|
|
58
69
|
)
|
|
59
70
|
return cls(
|
|
60
71
|
outcomming_base_url=outcomming_base_url,
|
|
72
|
+
outcomming_api=outcomming_api,
|
|
61
73
|
outcomming_api_key_env=api_key_env,
|
|
62
74
|
model_provider=model_provider,
|
|
63
75
|
)
|
|
@@ -0,0 +1,479 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import typing
|
|
3
|
+
|
|
4
|
+
DEFAULT_MESSAGES_MAX_TOKENS = 32000
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class MessagesAPIAdapterError(ValueError):
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def build_messages_request(
|
|
12
|
+
outcomming_request: 'typing.Dict[str, object]',
|
|
13
|
+
) -> 'typing.Dict[str, object]':
|
|
14
|
+
model = str(outcomming_request.get("model", "")).strip()
|
|
15
|
+
if not model:
|
|
16
|
+
raise MessagesAPIAdapterError("outcomming request is missing `model`")
|
|
17
|
+
|
|
18
|
+
raw_messages = outcomming_request.get("messages") or []
|
|
19
|
+
if not isinstance(raw_messages, list):
|
|
20
|
+
raise MessagesAPIAdapterError("outcomming request `messages` must be a list")
|
|
21
|
+
|
|
22
|
+
system_blocks: 'typing.List[typing.Dict[str, object]]' = []
|
|
23
|
+
messages: 'typing.List[typing.Dict[str, object]]' = []
|
|
24
|
+
for raw_message in raw_messages:
|
|
25
|
+
if not isinstance(raw_message, dict):
|
|
26
|
+
raise MessagesAPIAdapterError(
|
|
27
|
+
"outcomming request messages must be objects"
|
|
28
|
+
)
|
|
29
|
+
role = str(raw_message.get("role", "")).strip()
|
|
30
|
+
if role in {"developer", "system"}:
|
|
31
|
+
text = str(raw_message.get("content", "") or "")
|
|
32
|
+
if text:
|
|
33
|
+
system_blocks.append({"type": "text", "text": text})
|
|
34
|
+
continue
|
|
35
|
+
if role == "user":
|
|
36
|
+
messages.append(
|
|
37
|
+
{
|
|
38
|
+
"role": "user",
|
|
39
|
+
"content": _build_text_blocks(raw_message.get("content")),
|
|
40
|
+
}
|
|
41
|
+
)
|
|
42
|
+
continue
|
|
43
|
+
if role == "assistant":
|
|
44
|
+
messages.append(
|
|
45
|
+
{
|
|
46
|
+
"role": "assistant",
|
|
47
|
+
"content": _build_assistant_blocks(raw_message),
|
|
48
|
+
}
|
|
49
|
+
)
|
|
50
|
+
continue
|
|
51
|
+
if role == "tool":
|
|
52
|
+
messages.append(
|
|
53
|
+
{
|
|
54
|
+
"role": "user",
|
|
55
|
+
"content": [_build_tool_result_block(raw_message)],
|
|
56
|
+
}
|
|
57
|
+
)
|
|
58
|
+
continue
|
|
59
|
+
raise MessagesAPIAdapterError(
|
|
60
|
+
f"unsupported outcomming message role for messages API: {role!r}"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
payload: 'typing.Dict[str, object]' = {
|
|
64
|
+
"model": model,
|
|
65
|
+
"messages": messages,
|
|
66
|
+
"max_tokens": _resolve_max_tokens(outcomming_request),
|
|
67
|
+
"stream": bool(outcomming_request.get("stream", True)),
|
|
68
|
+
}
|
|
69
|
+
if system_blocks:
|
|
70
|
+
payload["system"] = system_blocks
|
|
71
|
+
|
|
72
|
+
tools = _translate_tools(outcomming_request.get("tools"))
|
|
73
|
+
if tools:
|
|
74
|
+
payload["tools"] = tools
|
|
75
|
+
tool_choice = _translate_tool_choice(
|
|
76
|
+
outcomming_request.get("tool_choice"),
|
|
77
|
+
outcomming_request.get("parallel_tool_calls"),
|
|
78
|
+
)
|
|
79
|
+
if tool_choice is not None:
|
|
80
|
+
payload["tool_choice"] = tool_choice
|
|
81
|
+
return payload
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def iter_chat_chunks(
|
|
85
|
+
event_name: 'typing.Union[str, None]',
|
|
86
|
+
payload: 'typing.Dict[str, object]',
|
|
87
|
+
state: 'typing.Dict[str, object]',
|
|
88
|
+
) -> 'typing.List[typing.Dict[str, object]]':
|
|
89
|
+
event_type = str(payload.get("type") or event_name or "").strip()
|
|
90
|
+
chunks: 'typing.List[typing.Dict[str, object]]' = []
|
|
91
|
+
|
|
92
|
+
if event_type == "message_start":
|
|
93
|
+
message = payload.get("message") or {}
|
|
94
|
+
if isinstance(message, dict):
|
|
95
|
+
usage_chunk = _usage_chunk(message.get("usage"))
|
|
96
|
+
if usage_chunk is not None:
|
|
97
|
+
chunks.append(usage_chunk)
|
|
98
|
+
return chunks
|
|
99
|
+
|
|
100
|
+
if event_type == "content_block_start":
|
|
101
|
+
block_index = _normalize_index(payload.get("index"))
|
|
102
|
+
content_block = payload.get("content_block") or {}
|
|
103
|
+
if not isinstance(content_block, dict):
|
|
104
|
+
return chunks
|
|
105
|
+
content_blocks = state.setdefault("content_blocks", {})
|
|
106
|
+
if not isinstance(content_blocks, dict):
|
|
107
|
+
raise MessagesAPIAdapterError("messages stream state is corrupted")
|
|
108
|
+
content_blocks[block_index] = str(content_block.get("type", "")).strip()
|
|
109
|
+
|
|
110
|
+
block_type = str(content_block.get("type", "")).strip()
|
|
111
|
+
if block_type == "text":
|
|
112
|
+
text = str(content_block.get("text", "") or "")
|
|
113
|
+
if text:
|
|
114
|
+
chunks.append(_chat_text_chunk(text))
|
|
115
|
+
return chunks
|
|
116
|
+
if block_type == "thinking":
|
|
117
|
+
thinking = str(content_block.get("thinking", "") or "")
|
|
118
|
+
if thinking:
|
|
119
|
+
chunks.append(_chat_reasoning_chunk(thinking))
|
|
120
|
+
return chunks
|
|
121
|
+
if block_type == "tool_use":
|
|
122
|
+
arguments = _dump_json(content_block.get("input") or {})
|
|
123
|
+
chunks.append(
|
|
124
|
+
_chat_tool_chunk(
|
|
125
|
+
block_index,
|
|
126
|
+
call_id=str(content_block.get("id", "")).strip(),
|
|
127
|
+
name=str(content_block.get("name", "")).strip(),
|
|
128
|
+
arguments=arguments if arguments != "{}" else "",
|
|
129
|
+
)
|
|
130
|
+
)
|
|
131
|
+
return chunks
|
|
132
|
+
return chunks
|
|
133
|
+
|
|
134
|
+
if event_type == "content_block_delta":
|
|
135
|
+
block_index = _normalize_index(payload.get("index"))
|
|
136
|
+
delta = payload.get("delta") or {}
|
|
137
|
+
if not isinstance(delta, dict):
|
|
138
|
+
return chunks
|
|
139
|
+
delta_type = str(delta.get("type", "")).strip()
|
|
140
|
+
if delta_type == "text_delta":
|
|
141
|
+
text = str(delta.get("text", "") or "")
|
|
142
|
+
if text:
|
|
143
|
+
chunks.append(_chat_text_chunk(text))
|
|
144
|
+
return chunks
|
|
145
|
+
if delta_type == "thinking_delta":
|
|
146
|
+
thinking = str(delta.get("thinking", "") or "")
|
|
147
|
+
if thinking:
|
|
148
|
+
chunks.append(_chat_reasoning_chunk(thinking))
|
|
149
|
+
return chunks
|
|
150
|
+
if delta_type == "input_json_delta":
|
|
151
|
+
partial_json = str(delta.get("partial_json", "") or "")
|
|
152
|
+
chunks.append(_chat_tool_chunk(block_index, arguments=partial_json))
|
|
153
|
+
return chunks
|
|
154
|
+
return chunks
|
|
155
|
+
|
|
156
|
+
if event_type == "message_delta":
|
|
157
|
+
usage_chunk = _usage_chunk(payload.get("usage"))
|
|
158
|
+
if usage_chunk is not None:
|
|
159
|
+
chunks.append(usage_chunk)
|
|
160
|
+
delta = payload.get("delta") or {}
|
|
161
|
+
if not isinstance(delta, dict):
|
|
162
|
+
return chunks
|
|
163
|
+
finish_reason = _translate_stop_reason(delta.get("stop_reason"))
|
|
164
|
+
if finish_reason and not bool(state.get("finish_emitted")):
|
|
165
|
+
state["finish_reason"] = finish_reason
|
|
166
|
+
state["finish_emitted"] = True
|
|
167
|
+
chunks.append(_chat_finish_chunk(finish_reason))
|
|
168
|
+
return chunks
|
|
169
|
+
|
|
170
|
+
if event_type == "message_stop":
|
|
171
|
+
if not bool(state.get("finish_emitted")):
|
|
172
|
+
finish_reason = str(state.get("finish_reason") or "stop")
|
|
173
|
+
state["finish_emitted"] = True
|
|
174
|
+
chunks.append(_chat_finish_chunk(finish_reason))
|
|
175
|
+
state["saw_message_stop"] = True
|
|
176
|
+
return chunks
|
|
177
|
+
|
|
178
|
+
if event_type == "error":
|
|
179
|
+
error = payload.get("error")
|
|
180
|
+
if isinstance(error, dict):
|
|
181
|
+
message = str(error.get("message", "") or "").strip()
|
|
182
|
+
if message:
|
|
183
|
+
raise MessagesAPIAdapterError(message)
|
|
184
|
+
raise MessagesAPIAdapterError(_dump_json(payload))
|
|
185
|
+
|
|
186
|
+
return chunks
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def saw_message_stop(state: 'typing.Dict[str, object]') -> 'bool':
|
|
190
|
+
return bool(state.get("saw_message_stop"))
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _build_text_blocks(raw_content: 'object') -> 'typing.List[typing.Dict[str, object]]':
|
|
194
|
+
text = str(raw_content or "")
|
|
195
|
+
if not text:
|
|
196
|
+
return []
|
|
197
|
+
return [{"type": "text", "text": text}]
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _build_assistant_blocks(
|
|
201
|
+
raw_message: 'typing.Dict[str, object]',
|
|
202
|
+
) -> 'typing.List[typing.Dict[str, object]]':
|
|
203
|
+
blocks: 'typing.List[typing.Dict[str, object]]' = []
|
|
204
|
+
reasoning = str(raw_message.get("reasoning", "") or "")
|
|
205
|
+
if reasoning:
|
|
206
|
+
blocks.append({"type": "thinking", "thinking": reasoning})
|
|
207
|
+
|
|
208
|
+
text = str(raw_message.get("content", "") or "")
|
|
209
|
+
if text:
|
|
210
|
+
blocks.append({"type": "text", "text": text})
|
|
211
|
+
|
|
212
|
+
raw_tool_calls = raw_message.get("tool_calls") or []
|
|
213
|
+
if raw_tool_calls:
|
|
214
|
+
if not isinstance(raw_tool_calls, list):
|
|
215
|
+
raise MessagesAPIAdapterError("assistant `tool_calls` must be a list")
|
|
216
|
+
for raw_tool_call in raw_tool_calls:
|
|
217
|
+
if not isinstance(raw_tool_call, dict):
|
|
218
|
+
raise MessagesAPIAdapterError("assistant tool calls must be objects")
|
|
219
|
+
function = raw_tool_call.get("function") or {}
|
|
220
|
+
if not isinstance(function, dict):
|
|
221
|
+
raise MessagesAPIAdapterError(
|
|
222
|
+
"assistant tool call is missing function payload"
|
|
223
|
+
)
|
|
224
|
+
blocks.append(
|
|
225
|
+
{
|
|
226
|
+
"type": "tool_use",
|
|
227
|
+
"id": str(raw_tool_call.get("id", "")).strip(),
|
|
228
|
+
"name": str(function.get("name", "")).strip(),
|
|
229
|
+
"input": _parse_json_object(function.get("arguments")),
|
|
230
|
+
}
|
|
231
|
+
)
|
|
232
|
+
return blocks
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _build_tool_result_block(
|
|
236
|
+
raw_message: 'typing.Dict[str, object]',
|
|
237
|
+
) -> 'typing.Dict[str, object]':
|
|
238
|
+
return {
|
|
239
|
+
"type": "tool_result",
|
|
240
|
+
"tool_use_id": str(raw_message.get("tool_call_id", "")).strip(),
|
|
241
|
+
"content": str(raw_message.get("content", "") or ""),
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _translate_tools(
|
|
246
|
+
raw_tools: 'object',
|
|
247
|
+
) -> 'typing.List[typing.Dict[str, object]]':
|
|
248
|
+
translated: 'typing.List[typing.Dict[str, object]]' = []
|
|
249
|
+
if not isinstance(raw_tools, list):
|
|
250
|
+
return translated
|
|
251
|
+
for raw_tool in raw_tools:
|
|
252
|
+
if not isinstance(raw_tool, dict) or raw_tool.get("type") != "function":
|
|
253
|
+
raise MessagesAPIAdapterError(
|
|
254
|
+
"messages API backend only supports function-style tools"
|
|
255
|
+
)
|
|
256
|
+
function = raw_tool.get("function") or {}
|
|
257
|
+
if not isinstance(function, dict):
|
|
258
|
+
raise MessagesAPIAdapterError("tool definition is missing function payload")
|
|
259
|
+
name = str(function.get("name", raw_tool.get("name", ""))).strip()
|
|
260
|
+
if not name:
|
|
261
|
+
raise MessagesAPIAdapterError("tool definition is missing `name`")
|
|
262
|
+
translated.append(
|
|
263
|
+
{
|
|
264
|
+
"name": name,
|
|
265
|
+
"description": str(function.get("description", "") or ""),
|
|
266
|
+
"input_schema": function.get("parameters") or {"type": "object"},
|
|
267
|
+
}
|
|
268
|
+
)
|
|
269
|
+
return translated
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _translate_tool_choice(
|
|
273
|
+
raw_tool_choice: 'object',
|
|
274
|
+
parallel_tool_calls: 'object',
|
|
275
|
+
) -> 'typing.Union[typing.Dict[str, object], None]':
|
|
276
|
+
if raw_tool_choice is None:
|
|
277
|
+
if parallel_tool_calls is False:
|
|
278
|
+
return {
|
|
279
|
+
"type": "auto",
|
|
280
|
+
"disable_parallel_tool_use": True,
|
|
281
|
+
}
|
|
282
|
+
return None
|
|
283
|
+
|
|
284
|
+
translated: 'typing.Dict[str, object]'
|
|
285
|
+
if isinstance(raw_tool_choice, str):
|
|
286
|
+
choice = raw_tool_choice.strip()
|
|
287
|
+
if choice == "auto":
|
|
288
|
+
translated = {"type": "auto"}
|
|
289
|
+
elif choice == "required":
|
|
290
|
+
translated = {"type": "any"}
|
|
291
|
+
elif choice == "none":
|
|
292
|
+
return None
|
|
293
|
+
else:
|
|
294
|
+
raise MessagesAPIAdapterError(
|
|
295
|
+
f"unsupported tool_choice for messages API: {raw_tool_choice!r}"
|
|
296
|
+
)
|
|
297
|
+
elif isinstance(raw_tool_choice, dict):
|
|
298
|
+
choice_type = str(raw_tool_choice.get("type", "")).strip()
|
|
299
|
+
if choice_type == "function":
|
|
300
|
+
function = raw_tool_choice.get("function") or {}
|
|
301
|
+
name = ""
|
|
302
|
+
if isinstance(function, dict):
|
|
303
|
+
name = str(function.get("name", "")).strip()
|
|
304
|
+
if not name:
|
|
305
|
+
name = str(raw_tool_choice.get("name", "")).strip()
|
|
306
|
+
if not name:
|
|
307
|
+
raise MessagesAPIAdapterError(
|
|
308
|
+
"function tool_choice is missing `name`"
|
|
309
|
+
)
|
|
310
|
+
translated = {
|
|
311
|
+
"type": "tool",
|
|
312
|
+
"name": name,
|
|
313
|
+
}
|
|
314
|
+
else:
|
|
315
|
+
raise MessagesAPIAdapterError(
|
|
316
|
+
f"unsupported tool_choice for messages API: {raw_tool_choice!r}"
|
|
317
|
+
)
|
|
318
|
+
else:
|
|
319
|
+
raise MessagesAPIAdapterError(
|
|
320
|
+
f"unsupported tool_choice for messages API: {raw_tool_choice!r}"
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
if parallel_tool_calls is False:
|
|
324
|
+
translated["disable_parallel_tool_use"] = True
|
|
325
|
+
return translated
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _parse_json_object(raw_value: 'object') -> 'typing.Dict[str, object]':
|
|
329
|
+
if isinstance(raw_value, dict):
|
|
330
|
+
return dict(raw_value)
|
|
331
|
+
if isinstance(raw_value, str):
|
|
332
|
+
text = raw_value.strip()
|
|
333
|
+
if not text:
|
|
334
|
+
return {}
|
|
335
|
+
try:
|
|
336
|
+
parsed = json.loads(text)
|
|
337
|
+
except json.JSONDecodeError as exc:
|
|
338
|
+
raise MessagesAPIAdapterError(
|
|
339
|
+
f"tool arguments must be valid JSON objects for messages API: {exc}"
|
|
340
|
+
) from exc
|
|
341
|
+
if isinstance(parsed, dict):
|
|
342
|
+
return dict(parsed)
|
|
343
|
+
raise MessagesAPIAdapterError(
|
|
344
|
+
"tool arguments must decode to JSON objects for messages API"
|
|
345
|
+
)
|
|
346
|
+
raise MessagesAPIAdapterError(
|
|
347
|
+
"tool arguments must be strings or objects for messages API"
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def _resolve_max_tokens(outcomming_request: 'typing.Dict[str, object]') -> 'int':
|
|
352
|
+
raw_value = outcomming_request.get("max_tokens")
|
|
353
|
+
if isinstance(raw_value, bool):
|
|
354
|
+
return DEFAULT_MESSAGES_MAX_TOKENS
|
|
355
|
+
if isinstance(raw_value, int) and raw_value > 0:
|
|
356
|
+
return raw_value
|
|
357
|
+
return DEFAULT_MESSAGES_MAX_TOKENS
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def _usage_chunk(raw_usage: 'object') -> 'typing.Union[typing.Dict[str, object], None]':
|
|
361
|
+
usage = _translate_usage(raw_usage)
|
|
362
|
+
if not usage:
|
|
363
|
+
return None
|
|
364
|
+
return {
|
|
365
|
+
"choices": [],
|
|
366
|
+
"usage": usage,
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def _translate_usage(raw_usage: 'object') -> 'typing.Dict[str, object]':
|
|
371
|
+
if not isinstance(raw_usage, dict):
|
|
372
|
+
return {}
|
|
373
|
+
usage: 'typing.Dict[str, object]' = {}
|
|
374
|
+
input_tokens = raw_usage.get("input_tokens")
|
|
375
|
+
output_tokens = raw_usage.get("output_tokens")
|
|
376
|
+
if isinstance(input_tokens, int):
|
|
377
|
+
usage["input_tokens"] = input_tokens
|
|
378
|
+
if isinstance(output_tokens, int):
|
|
379
|
+
usage["output_tokens"] = output_tokens
|
|
380
|
+
total_tokens = raw_usage.get("total_tokens")
|
|
381
|
+
if isinstance(total_tokens, int):
|
|
382
|
+
usage["total_tokens"] = total_tokens
|
|
383
|
+
elif isinstance(input_tokens, int) and isinstance(output_tokens, int):
|
|
384
|
+
usage["total_tokens"] = input_tokens + output_tokens
|
|
385
|
+
|
|
386
|
+
input_details: 'typing.Dict[str, int]' = {}
|
|
387
|
+
cache_creation = raw_usage.get("cache_creation_input_tokens")
|
|
388
|
+
if isinstance(cache_creation, int):
|
|
389
|
+
input_details["cache_creation_input_tokens"] = cache_creation
|
|
390
|
+
cache_read = raw_usage.get("cache_read_input_tokens")
|
|
391
|
+
if isinstance(cache_read, int):
|
|
392
|
+
input_details["cache_read_input_tokens"] = cache_read
|
|
393
|
+
if input_details:
|
|
394
|
+
usage["input_tokens_details"] = input_details
|
|
395
|
+
return usage
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def _normalize_index(raw_index: 'object') -> 'int':
|
|
399
|
+
if isinstance(raw_index, int):
|
|
400
|
+
return raw_index
|
|
401
|
+
try:
|
|
402
|
+
return int(raw_index)
|
|
403
|
+
except (TypeError, ValueError):
|
|
404
|
+
return 0
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def _translate_stop_reason(raw_stop_reason: 'object') -> 'typing.Union[str, None]':
|
|
408
|
+
if not isinstance(raw_stop_reason, str):
|
|
409
|
+
return None
|
|
410
|
+
stop_reason = raw_stop_reason.strip()
|
|
411
|
+
if not stop_reason:
|
|
412
|
+
return None
|
|
413
|
+
if stop_reason == "tool_use":
|
|
414
|
+
return "tool_calls"
|
|
415
|
+
if stop_reason == "max_tokens":
|
|
416
|
+
return "length"
|
|
417
|
+
if stop_reason in {"end_turn", "stop_sequence"}:
|
|
418
|
+
return "stop"
|
|
419
|
+
return stop_reason
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def _chat_text_chunk(text: 'str') -> 'typing.Dict[str, object]':
|
|
423
|
+
return _chat_delta_chunk({"content": text})
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def _chat_reasoning_chunk(reasoning: 'str') -> 'typing.Dict[str, object]':
|
|
427
|
+
return _chat_delta_chunk({"reasoning_content": reasoning})
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def _chat_tool_chunk(
|
|
431
|
+
index: 'int',
|
|
432
|
+
call_id: 'str' = "",
|
|
433
|
+
name: 'str' = "",
|
|
434
|
+
arguments: 'str' = "",
|
|
435
|
+
) -> 'typing.Dict[str, object]':
|
|
436
|
+
tool_call: 'typing.Dict[str, object]' = {
|
|
437
|
+
"index": index,
|
|
438
|
+
"function": {},
|
|
439
|
+
}
|
|
440
|
+
if call_id:
|
|
441
|
+
tool_call["id"] = call_id
|
|
442
|
+
if name:
|
|
443
|
+
tool_call["type"] = "function"
|
|
444
|
+
tool_call["function"] = {"name": name}
|
|
445
|
+
function = tool_call.get("function")
|
|
446
|
+
if not isinstance(function, dict):
|
|
447
|
+
function = {}
|
|
448
|
+
tool_call["function"] = function
|
|
449
|
+
if arguments:
|
|
450
|
+
function["arguments"] = arguments
|
|
451
|
+
return _chat_delta_chunk({"tool_calls": [tool_call]})
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def _chat_delta_chunk(delta: 'typing.Dict[str, object]') -> 'typing.Dict[str, object]':
|
|
455
|
+
return {
|
|
456
|
+
"choices": [
|
|
457
|
+
{
|
|
458
|
+
"index": 0,
|
|
459
|
+
"delta": delta,
|
|
460
|
+
"finish_reason": None,
|
|
461
|
+
}
|
|
462
|
+
]
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
def _chat_finish_chunk(finish_reason: 'str') -> 'typing.Dict[str, object]':
|
|
467
|
+
return {
|
|
468
|
+
"choices": [
|
|
469
|
+
{
|
|
470
|
+
"index": 0,
|
|
471
|
+
"delta": {},
|
|
472
|
+
"finish_reason": finish_reason,
|
|
473
|
+
}
|
|
474
|
+
]
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def _dump_json(raw_value: 'object') -> 'str':
|
|
479
|
+
return json.dumps(raw_value, ensure_ascii=False, separators=(",", ":"))
|
|
@@ -28,6 +28,7 @@ class OutgoingRequest(TypedDict):
|
|
|
28
28
|
model: 'str'
|
|
29
29
|
messages: 'typing.List[ChatMessage]'
|
|
30
30
|
stream: 'bool'
|
|
31
|
+
max_tokens: 'Optional[int]'
|
|
31
32
|
tools: 'Optional[typing.List[typing.Dict[str, object]]]'
|
|
32
33
|
tool_choice: 'Optional[object]'
|
|
33
34
|
parallel_tool_calls: 'Optional[bool]'
|
|
@@ -6,6 +6,12 @@ import urllib.error
|
|
|
6
6
|
import urllib.request
|
|
7
7
|
|
|
8
8
|
from .config import CompatServerConfig
|
|
9
|
+
from .messages_api import (
|
|
10
|
+
MessagesAPIAdapterError,
|
|
11
|
+
build_messages_request,
|
|
12
|
+
iter_chat_chunks as iter_chat_chunks_from_messages,
|
|
13
|
+
saw_message_stop as messages_saw_message_stop,
|
|
14
|
+
)
|
|
9
15
|
from .session_store import StoredResponse
|
|
10
16
|
from .tools import WebSearchTool, collect_custom_tool_names
|
|
11
17
|
from .tools.custom_adapter import (
|
|
@@ -130,6 +136,13 @@ class StreamRouter:
|
|
|
130
136
|
),
|
|
131
137
|
"stream": True,
|
|
132
138
|
}
|
|
139
|
+
max_tokens = self._coerce_positive_int(
|
|
140
|
+
incomming_request.get("max_output_tokens")
|
|
141
|
+
)
|
|
142
|
+
if max_tokens is None:
|
|
143
|
+
max_tokens = self._coerce_positive_int(incomming_request.get("max_tokens"))
|
|
144
|
+
if max_tokens is not None:
|
|
145
|
+
payload["max_tokens"] = max_tokens
|
|
133
146
|
if self._supports_stream_usage():
|
|
134
147
|
payload["stream_options"] = {"include_usage": True}
|
|
135
148
|
|
|
@@ -150,6 +163,19 @@ class StreamRouter:
|
|
|
150
163
|
return payload
|
|
151
164
|
|
|
152
165
|
def open_outcomming_stream(self, outcomming_request: 'typing.Dict[str, object]'):
|
|
166
|
+
outcomming_api = self._config.normalized_outcomming_api()
|
|
167
|
+
if outcomming_api == "messages":
|
|
168
|
+
return self._open_outcomming_messages_stream(outcomming_request)
|
|
169
|
+
if outcomming_api != "chat_completions":
|
|
170
|
+
raise OutcommingChatError(
|
|
171
|
+
f"unsupported outcomming API: {self._config.outcomming_api!r}"
|
|
172
|
+
)
|
|
173
|
+
return self._open_outcomming_chat_stream(outcomming_request)
|
|
174
|
+
|
|
175
|
+
def _open_outcomming_chat_stream(
|
|
176
|
+
self,
|
|
177
|
+
outcomming_request: 'typing.Dict[str, object]',
|
|
178
|
+
):
|
|
153
179
|
request = urllib.request.Request(
|
|
154
180
|
self._config.outcomming_chat_completions_url(),
|
|
155
181
|
data=json.dumps(outcomming_request).encode("utf-8"),
|
|
@@ -196,6 +222,67 @@ class StreamRouter:
|
|
|
196
222
|
f"outcomming chat request failed: {exc.reason}"
|
|
197
223
|
) from exc
|
|
198
224
|
|
|
225
|
+
def _open_outcomming_messages_stream(
|
|
226
|
+
self,
|
|
227
|
+
outcomming_request: 'typing.Dict[str, object]',
|
|
228
|
+
):
|
|
229
|
+
try:
|
|
230
|
+
messages_request = build_messages_request(outcomming_request)
|
|
231
|
+
except MessagesAPIAdapterError as exc:
|
|
232
|
+
raise OutcommingChatError(str(exc)) from exc
|
|
233
|
+
|
|
234
|
+
request = urllib.request.Request(
|
|
235
|
+
self._config.outcomming_messages_url(),
|
|
236
|
+
data=json.dumps(messages_request).encode("utf-8"),
|
|
237
|
+
headers=self._build_headers(accept="text/event-stream"),
|
|
238
|
+
method="POST",
|
|
239
|
+
)
|
|
240
|
+
try:
|
|
241
|
+
with urllib.request.urlopen(
|
|
242
|
+
request,
|
|
243
|
+
context=ssl.create_default_context(),
|
|
244
|
+
timeout=self._config.timeout_seconds,
|
|
245
|
+
) as response:
|
|
246
|
+
try:
|
|
247
|
+
stream_state: 'typing.Dict[str, object]' = {}
|
|
248
|
+
for event_name, data in self._iter_sse_events(response):
|
|
249
|
+
if not data:
|
|
250
|
+
continue
|
|
251
|
+
payload = json.loads(data)
|
|
252
|
+
if not isinstance(payload, dict):
|
|
253
|
+
continue
|
|
254
|
+
for chunk in iter_chat_chunks_from_messages(
|
|
255
|
+
event_name,
|
|
256
|
+
payload,
|
|
257
|
+
stream_state,
|
|
258
|
+
):
|
|
259
|
+
yield chunk
|
|
260
|
+
if not messages_saw_message_stop(stream_state):
|
|
261
|
+
raise OutcommingChatError(
|
|
262
|
+
"outcomming messages stream ended before `message_stop`"
|
|
263
|
+
)
|
|
264
|
+
except (
|
|
265
|
+
ConnectionError,
|
|
266
|
+
EOFError,
|
|
267
|
+
OSError,
|
|
268
|
+
http.client.HTTPException,
|
|
269
|
+
json.JSONDecodeError,
|
|
270
|
+
MessagesAPIAdapterError,
|
|
271
|
+
) as exc:
|
|
272
|
+
raise OutcommingChatError(
|
|
273
|
+
"outcomming messages stream failed while reading response body: "
|
|
274
|
+
f"{exc}"
|
|
275
|
+
) from exc
|
|
276
|
+
except urllib.error.HTTPError as exc:
|
|
277
|
+
body = exc.read().decode("utf-8", errors="replace")
|
|
278
|
+
raise OutcommingChatError(
|
|
279
|
+
f"outcomming messages request failed with status {exc.code}: {body[:500]}"
|
|
280
|
+
) from exc
|
|
281
|
+
except urllib.error.URLError as exc:
|
|
282
|
+
raise OutcommingChatError(
|
|
283
|
+
f"outcomming messages request failed: {exc.reason}"
|
|
284
|
+
) from exc
|
|
285
|
+
|
|
199
286
|
def route_stream(
|
|
200
287
|
self,
|
|
201
288
|
incomming_stream,
|
|
@@ -439,6 +526,13 @@ class StreamRouter:
|
|
|
439
526
|
flush_pending_assistant()
|
|
440
527
|
return messages
|
|
441
528
|
|
|
529
|
+
def _coerce_positive_int(self, raw_value: 'object') -> 'typing.Union[int, None]':
|
|
530
|
+
if isinstance(raw_value, bool):
|
|
531
|
+
return None
|
|
532
|
+
if isinstance(raw_value, int) and raw_value > 0:
|
|
533
|
+
return raw_value
|
|
534
|
+
return None
|
|
535
|
+
|
|
442
536
|
def _coalesce_content_text(self, raw_content: 'object') -> 'str':
|
|
443
537
|
if raw_content is None:
|
|
444
538
|
return ""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|