python-codex 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pycodex/cli.py +8 -1
- pycodex/runtime_services.py +3 -0
- pycodex/tools/exec_tool.py +1 -1
- pycodex/tools/unified_exec_manager.py +19 -2
- pycodex/utils/get_env.py +23 -4
- {python_codex-0.1.1.dist-info → python_codex-0.1.2.dist-info}/METADATA +1 -1
- {python_codex-0.1.1.dist-info → python_codex-0.1.2.dist-info}/RECORD +21 -10
- responses_server/__init__.py +17 -0
- responses_server/__main__.py +5 -0
- responses_server/app.py +217 -0
- responses_server/config.py +63 -0
- responses_server/payload_processors.py +86 -0
- responses_server/server.py +63 -0
- responses_server/session_store.py +37 -0
- responses_server/stream_router.py +784 -0
- responses_server/tools/__init__.py +4 -0
- responses_server/tools/custom_adapter.py +235 -0
- responses_server/tools/web_search.py +263 -0
- {python_codex-0.1.1.dist-info → python_codex-0.1.2.dist-info}/WHEEL +0 -0
- {python_codex-0.1.1.dist-info → python_codex-0.1.2.dist-info}/entry_points.txt +0 -0
- {python_codex-0.1.1.dist-info → python_codex-0.1.2.dist-info}/licenses/LICENSE +0 -0
pycodex/cli.py
CHANGED
|
@@ -21,7 +21,6 @@ from .protocol import AgentEvent
|
|
|
21
21
|
from .runtime import AgentRuntime
|
|
22
22
|
from .runtime_services import RuntimeEnvironment, create_runtime_environment
|
|
23
23
|
from .utils import CliSessionView, load_codex_dotenv
|
|
24
|
-
from responses_server import launch_chat_completion_compat_server
|
|
25
24
|
|
|
26
25
|
EXIT_COMMANDS = {"/exit", "/quit"}
|
|
27
26
|
HISTORY_COMMAND = "/history"
|
|
@@ -33,6 +32,14 @@ LOCAL_RESPONSES_SERVER_API_KEY_ENV = "PYCODEX_LOCAL_RESPONSES_SERVER_KEY"
|
|
|
33
32
|
CLI_ORIGINATOR = "codex-tui"
|
|
34
33
|
|
|
35
34
|
|
|
35
|
+
def launch_chat_completion_compat_server(*args, **kwargs):
|
|
36
|
+
from responses_server import (
|
|
37
|
+
launch_chat_completion_compat_server as launch_compat_server,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
return launch_compat_server(*args, **kwargs)
|
|
41
|
+
|
|
42
|
+
|
|
36
43
|
def configure_loguru() -> None:
|
|
37
44
|
try:
|
|
38
45
|
from loguru import logger
|
pycodex/runtime_services.py
CHANGED
|
@@ -346,6 +346,9 @@ class SubAgentManager:
|
|
|
346
346
|
managed.state = "completed"
|
|
347
347
|
finally:
|
|
348
348
|
managed.pending_submission_ids.discard(submission_id)
|
|
349
|
+
if managed.pending_submission_ids and managed.error_message is None:
|
|
350
|
+
managed.completed_message = None
|
|
351
|
+
managed.state = "running"
|
|
349
352
|
async with self._condition:
|
|
350
353
|
self._condition.notify_all()
|
|
351
354
|
|
pycodex/tools/exec_tool.py
CHANGED
|
@@ -17,7 +17,7 @@ from ..protocol import JSONValue
|
|
|
17
17
|
from .base_tool import BaseTool, ToolContext
|
|
18
18
|
from .code_mode_manager import CodeModeManager
|
|
19
19
|
|
|
20
|
-
EXEC_FREEFORM_GRAMMAR = """start: pragma_source | plain_source
|
|
20
|
+
EXEC_FREEFORM_GRAMMAR = r"""start: pragma_source | plain_source
|
|
21
21
|
pragma_source: PRAGMA_LINE NEWLINE SOURCE
|
|
22
22
|
plain_source: SOURCE
|
|
23
23
|
|
|
@@ -184,6 +184,9 @@ class _HeadTailBuffer:
|
|
|
184
184
|
self.tail.clear()
|
|
185
185
|
return combined
|
|
186
186
|
|
|
187
|
+
def has_data(self) -> bool:
|
|
188
|
+
return bool(self.head or self.tail)
|
|
189
|
+
|
|
187
190
|
|
|
188
191
|
@dataclass(slots=True)
|
|
189
192
|
class UnifiedExecSession:
|
|
@@ -194,6 +197,7 @@ class UnifiedExecSession:
|
|
|
194
197
|
tty: bool
|
|
195
198
|
unread_output: _HeadTailBuffer = field(default_factory=_HeadTailBuffer)
|
|
196
199
|
reader_task: asyncio.Task | None = None
|
|
200
|
+
output_event: asyncio.Event = field(default_factory=asyncio.Event)
|
|
197
201
|
|
|
198
202
|
|
|
199
203
|
class UnifiedExecManager:
|
|
@@ -294,11 +298,22 @@ class UnifiedExecManager:
|
|
|
294
298
|
if session is None:
|
|
295
299
|
return f"Error: session_id {session_id} is not running."
|
|
296
300
|
|
|
297
|
-
|
|
301
|
+
loop = asyncio.get_running_loop()
|
|
302
|
+
start_wait = loop.time()
|
|
298
303
|
try:
|
|
299
304
|
await asyncio.wait_for(session.process.wait(), timeout=yield_time_ms / 1000.0)
|
|
300
305
|
except asyncio.TimeoutError:
|
|
301
|
-
|
|
306
|
+
remaining_seconds = (yield_time_ms / 1000.0) - (loop.time() - start_wait)
|
|
307
|
+
if (
|
|
308
|
+
session.process.returncode is None
|
|
309
|
+
and not session.unread_output.has_data()
|
|
310
|
+
and remaining_seconds > 0
|
|
311
|
+
):
|
|
312
|
+
session.output_event.clear()
|
|
313
|
+
try:
|
|
314
|
+
await asyncio.wait_for(session.output_event.wait(), timeout=remaining_seconds)
|
|
315
|
+
except asyncio.TimeoutError:
|
|
316
|
+
pass
|
|
302
317
|
|
|
303
318
|
if session.reader_task is not None and session.process.returncode is not None:
|
|
304
319
|
await session.reader_task
|
|
@@ -345,6 +360,8 @@ class UnifiedExecManager:
|
|
|
345
360
|
if not chunk:
|
|
346
361
|
break
|
|
347
362
|
session.unread_output.push_chunk(chunk)
|
|
363
|
+
session.output_event.set()
|
|
364
|
+
session.output_event.set()
|
|
348
365
|
|
|
349
366
|
def _resolve_workdir(self, workdir: str | None) -> Path:
|
|
350
367
|
if not workdir:
|
pycodex/utils/get_env.py
CHANGED
|
@@ -83,10 +83,15 @@ def get_package_version() -> str:
|
|
|
83
83
|
detected = _detect_upstream_codex_version()
|
|
84
84
|
if detected is not None:
|
|
85
85
|
return detected
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
86
|
+
for distribution_name in ("python-codex", "pycodex"):
|
|
87
|
+
try:
|
|
88
|
+
return importlib.metadata.version(distribution_name)
|
|
89
|
+
except importlib.metadata.PackageNotFoundError:
|
|
90
|
+
continue
|
|
91
|
+
local_version = _read_local_package_version()
|
|
92
|
+
if local_version is not None:
|
|
93
|
+
return local_version
|
|
94
|
+
return "0.1.0"
|
|
90
95
|
|
|
91
96
|
|
|
92
97
|
def get_os_info() -> tuple[str, str]:
|
|
@@ -178,6 +183,20 @@ def _normalize_os_version(version: str) -> str:
|
|
|
178
183
|
return version
|
|
179
184
|
|
|
180
185
|
|
|
186
|
+
def _read_local_package_version() -> str | None:
|
|
187
|
+
pyproject_path = Path(__file__).resolve().parents[2] / "pyproject.toml"
|
|
188
|
+
if not pyproject_path.is_file():
|
|
189
|
+
return None
|
|
190
|
+
match = re.search(
|
|
191
|
+
r'^\s*version\s*=\s*"([^"]+)"\s*$',
|
|
192
|
+
pyproject_path.read_text(encoding="utf-8"),
|
|
193
|
+
flags=re.MULTILINE,
|
|
194
|
+
)
|
|
195
|
+
if match is None:
|
|
196
|
+
return None
|
|
197
|
+
return match.group(1).strip() or None
|
|
198
|
+
|
|
199
|
+
|
|
181
200
|
def _tmux_display_message(fmt: str) -> str | None:
|
|
182
201
|
try:
|
|
183
202
|
output = subprocess.run(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
pycodex/__init__.py,sha256=T11JU1QHEk81TchhrTAOqVkvUUiQGlesk9PNaivjPrU,3052
|
|
2
2
|
pycodex/agent.py,sha256=ApIneWSqDxryf9hdmTRFL65AH4e-sn0MWuuR80951Ec,10069
|
|
3
|
-
pycodex/cli.py,sha256=
|
|
3
|
+
pycodex/cli.py,sha256=ju4aF_kwbraqZ-NfoymzB6CjvvMX22afeXDvse2ykf8,24676
|
|
4
4
|
pycodex/collaboration.py,sha256=XAM2enljzHMjzZVlLxbOQF0JhWgKW4qaaDfVcUdE47g,632
|
|
5
5
|
pycodex/context.py,sha256=8-Eg1TE4-GVbEfW0fNZjDWhjLypK3jBlKZY1haYYVPY,23143
|
|
6
6
|
pycodex/doctor.py,sha256=VN-qetM2qJCNRNTZXBMe44VSrEOu8kUXE01luLMF050,10357
|
|
@@ -9,7 +9,7 @@ pycodex/portable.py,sha256=Y2pY08pDiWITY0QYgH3F9YKpOe2EYtxE0qqSmrCkp_g,15260
|
|
|
9
9
|
pycodex/portable_server.py,sha256=xhEwySCJ41WnsowXM-Db6kkmCOVM02Lmd4pbN6hZzh0,7232
|
|
10
10
|
pycodex/protocol.py,sha256=8mQ7I-y9bxYueSr7d_yGj2Tw69t47OCgwvmxhwihdFw,10807
|
|
11
11
|
pycodex/runtime.py,sha256=tfEuyZmnTP625BQ0NMm-AGhjfQpXcv2EaZLtCJTnEmM,7757
|
|
12
|
-
pycodex/runtime_services.py,sha256=
|
|
12
|
+
pycodex/runtime_services.py,sha256=hmdwFiOZ1DPEJ5T8vfDSLfujgGQBPrzPQkn6uX_9vZ8,12503
|
|
13
13
|
pycodex/prompts/collaboration_default.md,sha256=MBTmPuMubeWfZgIeFVj49wwnwD4n_o3fVYAbgWKwu6Q,955
|
|
14
14
|
pycodex/prompts/collaboration_plan.md,sha256=IzjQAA5oHJz-3FmJdOjsJ4LHq6LW1tlEYMoy09n0HKk,8777
|
|
15
15
|
pycodex/prompts/default_base_instructions.md,sha256=D65mcj6bo4CDvVom-D9cbJRJVNquo0NghKt164_fRsg,20923
|
|
@@ -32,7 +32,7 @@ pycodex/tools/close_agent_tool.py,sha256=InKhe2gFWOcqE187J3XYrCckecsyAR48VeVmGdY
|
|
|
32
32
|
pycodex/tools/code_mode_manager.py,sha256=pEczPyCq-3DpJlTtfUEpl4JAGolz8cOpI8mBc7gdrn0,18603
|
|
33
33
|
pycodex/tools/exec_command_tool.py,sha256=_fWfkQLGeINb2-cniY9CWskkAPjC9hE8pfjcBKkWXAg,3459
|
|
34
34
|
pycodex/tools/exec_runtime.js,sha256=ZczdhrzpSZ-qNnJDDJOe8Ap86HpzHb2FZ_vSpHszgLs,3625
|
|
35
|
-
pycodex/tools/exec_tool.py,sha256=
|
|
35
|
+
pycodex/tools/exec_tool.py,sha256=xJfEpcQXpL3OX-ZzKxQ2sI781OuEqpeyPvVkkwhgZ1c,1415
|
|
36
36
|
pycodex/tools/grep_files_tool.py,sha256=twsx1KsvOWh8mi-lbycAtEyh6PeLxtNzl9LzdjwgAf4,4742
|
|
37
37
|
pycodex/tools/list_dir_tool.py,sha256=7S0RsE-NL04G47FmFZtzo-N-O3fPCYQFF0HrjEVuv3U,4749
|
|
38
38
|
pycodex/tools/read_file_tool.py,sha256=GVamhSNEZ1F1IU_og9GgSCzV12TL5t5b1fOUlzTOQBQ,8084
|
|
@@ -43,7 +43,7 @@ pycodex/tools/send_input_tool.py,sha256=z9PR5VoFd9SF4A-ol04Op8AXQF_3YLE74C6coiTX
|
|
|
43
43
|
pycodex/tools/shell_command_tool.py,sha256=Bbah_5HirG1BJOIiqzuMa8kNHNYVPCUvxCFa09eRU6A,3500
|
|
44
44
|
pycodex/tools/shell_tool.py,sha256=BWSaEJZwfQg9Ta-ld2wqeXqavrZC7Y8qgF_vBEOxfYA,3678
|
|
45
45
|
pycodex/tools/spawn_agent_tool.py,sha256=LfJlGI0Ecp9HWNLlTubyybFq-xeRNChILq9ozT7piA8,3556
|
|
46
|
-
pycodex/tools/unified_exec_manager.py,sha256=
|
|
46
|
+
pycodex/tools/unified_exec_manager.py,sha256=ZEaMXmO83Iu20V4dwxAuUjy4EF5IHqHmwnTvFJh8zGc,13330
|
|
47
47
|
pycodex/tools/update_plan_tool.py,sha256=l_EG39bEw5K9BIUKoSUsXYDb0W7aLn8SviKSb-bs7Os,2887
|
|
48
48
|
pycodex/tools/view_image_tool.py,sha256=yB915Jd3he4RjPANdm-dYdvio24OXKhBkAsp-9WVPBg,3924
|
|
49
49
|
pycodex/tools/wait_agent_tool.py,sha256=1tJ5spBtpZ_MjoMv5xmZz5WWKl7UwMqHIJ3SYKXEPZw,2596
|
|
@@ -52,11 +52,22 @@ pycodex/tools/web_search_tool.py,sha256=hq78XF6MRvmNyPFSIp5eI0eYn9ryKdKvvoIOFNU3
|
|
|
52
52
|
pycodex/tools/write_stdin_tool.py,sha256=DghlwPJnAqDoRBYyh1zeXRsfTXoQUdLJ8JQfrdE4RLs,2542
|
|
53
53
|
pycodex/utils/__init__.py,sha256=Hj_0a7RhkAblWkaHyFhpi0cs2nSjJ1NdavbkBgEHieY,1024
|
|
54
54
|
pycodex/utils/dotenv.py,sha256=sOpu6PA1VrsPZK13ynh3nZg3-u9pdiCXkW648v3pwZQ,1789
|
|
55
|
-
pycodex/utils/get_env.py,sha256=
|
|
55
|
+
pycodex/utils/get_env.py,sha256=Ehh0mVPhkDlPMd1WXhrz1UqjPCePg8YfZH7zrtu1EOQ,6894
|
|
56
56
|
pycodex/utils/random_ids.py,sha256=vOEVgkwKeQXaHoEVU7IfsPPjKUABkGIeQ7lu9MZctU8,413
|
|
57
57
|
pycodex/utils/visualize.py,sha256=fK79pTfOwMmRrQujAosGt0nGyyJjpz0GfpWY8BkK91c,35369
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
58
|
+
responses_server/__init__.py,sha256=3yPv_zeGT7P11tTnmj5kXktISLNsNW-02MUnnbiZcb0,394
|
|
59
|
+
responses_server/__main__.py,sha256=9SRp-Yw7ShGxc6DhSIXcDLKgGEdAVm3oBZ59rBOPjT0,62
|
|
60
|
+
responses_server/app.py,sha256=VJSceVaXxbPLu9KGafLIt5fiGvxrqVTkWeOg81O5-FQ,7016
|
|
61
|
+
responses_server/config.py,sha256=XAJmvvLiCYN5jCUcP_6uZyoW79OzxigMbik8X-ZTdKE,2174
|
|
62
|
+
responses_server/payload_processors.py,sha256=_3Sl7HLG00BgN_TKcvT3_3drCDAq1MAeK1HxbRXNta4,3019
|
|
63
|
+
responses_server/server.py,sha256=Q-gjtHzb7K1Guex10G38PgH9hxqJgdzYnBV7Ycy9L7Y,2049
|
|
64
|
+
responses_server/session_store.py,sha256=oP3aFHsGmEMoXuUcxNh6B4vzp6KwaeRdLzM-3AOwM98,1078
|
|
65
|
+
responses_server/stream_router.py,sha256=uYPjrlBUnNqiuQQ91eKmwDfNF6e5NYDtf5C8BdPRPRc,29536
|
|
66
|
+
responses_server/tools/__init__.py,sha256=ivsBSEy0SBUhY-Uea5v1XMLXShkwHdCVl0id-1FwdZg,150
|
|
67
|
+
responses_server/tools/custom_adapter.py,sha256=ivROeI8D9B1saS7skGLXnwF7fbsjAmEVSbeMceYno4E,8238
|
|
68
|
+
responses_server/tools/web_search.py,sha256=HR9E5uMxWU07khsaIO9zvdg1GCCrWNy73263zfMaxsw,8565
|
|
69
|
+
python_codex-0.1.2.dist-info/METADATA,sha256=wy8bEToZxyf9Cio-upoAS8XmC3MscN8wiKTVXp9MW9g,13969
|
|
70
|
+
python_codex-0.1.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
71
|
+
python_codex-0.1.2.dist-info/entry_points.txt,sha256=sNUVakoVuTrzJH505ZgRTQxmtRRPUHV_EH0i6EbYTyM,45
|
|
72
|
+
python_codex-0.1.2.dist-info/licenses/LICENSE,sha256=0X8ifk312hYAORM4hlzg8wVSEXYKNmiPgWlB1YIy2Nw,10926
|
|
73
|
+
python_codex-0.1.2.dist-info/RECORD,,
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from .app import (
|
|
2
|
+
ManagedResponseServer,
|
|
3
|
+
launch_chat_completion_compat_server,
|
|
4
|
+
run_server,
|
|
5
|
+
)
|
|
6
|
+
from .config import CompatServerConfig
|
|
7
|
+
from .server import ResponseServer
|
|
8
|
+
from .stream_router import StreamRouter
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"CompatServerConfig",
|
|
12
|
+
"ManagedResponseServer",
|
|
13
|
+
"ResponseServer",
|
|
14
|
+
"launch_chat_completion_compat_server",
|
|
15
|
+
"run_server",
|
|
16
|
+
"StreamRouter",
|
|
17
|
+
]
|
responses_server/app.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
from dataclasses import replace
|
|
5
|
+
import json
|
|
6
|
+
import socket
|
|
7
|
+
import threading
|
|
8
|
+
import time
|
|
9
|
+
from typing import Iterator
|
|
10
|
+
|
|
11
|
+
from fastapi import FastAPI, Request
|
|
12
|
+
from fastapi.responses import JSONResponse, StreamingResponse
|
|
13
|
+
import uvicorn
|
|
14
|
+
|
|
15
|
+
from .config import CompatServerConfig
|
|
16
|
+
from .server import ResponseServer
|
|
17
|
+
from .stream_router import OutcommingChatError, UnsupportedIncommingFeature
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _format_sse_event(event_name: str, payload: dict[str, object]) -> bytes:
|
|
21
|
+
data = json.dumps(payload, ensure_ascii=False)
|
|
22
|
+
return f"event: {event_name}\ndata: {data}\n\n".encode("utf-8")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _stream_events(response_server: ResponseServer, request_body: dict[str, object], request_headers: dict[str, str]) -> Iterator[bytes]:
|
|
26
|
+
try:
|
|
27
|
+
event_iter = response_server.start_response_stream(request_body, request_headers)
|
|
28
|
+
for event_name, payload in event_iter:
|
|
29
|
+
yield _format_sse_event(event_name, payload)
|
|
30
|
+
except OutcommingChatError as exc:
|
|
31
|
+
yield _format_sse_event(
|
|
32
|
+
"response.failed",
|
|
33
|
+
{
|
|
34
|
+
"type": "response.failed",
|
|
35
|
+
"response": {
|
|
36
|
+
"error": {
|
|
37
|
+
"message": str(exc),
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
},
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
45
|
+
parser = argparse.ArgumentParser(
|
|
46
|
+
prog="python -m responses_server",
|
|
47
|
+
description=(
|
|
48
|
+
"Standalone localhost `/v1/responses` server that translates the "
|
|
49
|
+
"Codex/Responses subset onto an outcomming `/v1/chat/completions` backend."
|
|
50
|
+
),
|
|
51
|
+
)
|
|
52
|
+
parser.add_argument("--host", default="127.0.0.1")
|
|
53
|
+
parser.add_argument("--port", type=int, default=8001)
|
|
54
|
+
parser.add_argument("--outcomming-base-url", required=True)
|
|
55
|
+
parser.add_argument("--outcomming-api-key-env", default=None)
|
|
56
|
+
parser.add_argument("--model-provider", default=None)
|
|
57
|
+
parser.add_argument("--timeout-seconds", type=float, default=120.0)
|
|
58
|
+
return parser
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def run_server(config: CompatServerConfig) -> None:
|
|
62
|
+
uvicorn.run(
|
|
63
|
+
ManagedResponseServer.build_app(config),
|
|
64
|
+
host=config.host,
|
|
65
|
+
port=config.port,
|
|
66
|
+
log_level="info",
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def launch_chat_completion_compat_server(
|
|
71
|
+
base_url: str,
|
|
72
|
+
api_key_env: str | None = None,
|
|
73
|
+
model_provider: str | None = None,
|
|
74
|
+
):
|
|
75
|
+
config = CompatServerConfig.from_base_url(
|
|
76
|
+
base_url,
|
|
77
|
+
api_key_env,
|
|
78
|
+
model_provider=model_provider,
|
|
79
|
+
)
|
|
80
|
+
server = ManagedResponseServer(config)
|
|
81
|
+
server.start()
|
|
82
|
+
return server
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class ManagedResponseServer:
|
|
86
|
+
@staticmethod
|
|
87
|
+
def build_app(
|
|
88
|
+
config: CompatServerConfig,
|
|
89
|
+
session_store=None,
|
|
90
|
+
stream_router=None,
|
|
91
|
+
) -> FastAPI:
|
|
92
|
+
response_server = ResponseServer(
|
|
93
|
+
config,
|
|
94
|
+
session_store=session_store,
|
|
95
|
+
stream_router=stream_router,
|
|
96
|
+
)
|
|
97
|
+
app = FastAPI(title="ResponsesCompat", version="0.1.0")
|
|
98
|
+
app.state.response_server = response_server
|
|
99
|
+
|
|
100
|
+
@app.get("/health")
|
|
101
|
+
@app.get("/healthz")
|
|
102
|
+
async def health() -> dict[str, bool]:
|
|
103
|
+
return {"ok": True}
|
|
104
|
+
|
|
105
|
+
@app.get("/models")
|
|
106
|
+
@app.get("/v1/models")
|
|
107
|
+
async def list_models():
|
|
108
|
+
try:
|
|
109
|
+
return response_server.list_models()
|
|
110
|
+
except OutcommingChatError as exc:
|
|
111
|
+
return JSONResponse(
|
|
112
|
+
{"error": {"message": str(exc)}},
|
|
113
|
+
status_code=502,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
@app.post("/responses")
|
|
117
|
+
@app.post("/v1/responses")
|
|
118
|
+
async def responses(request: Request):
|
|
119
|
+
try:
|
|
120
|
+
request_body = await request.json()
|
|
121
|
+
except Exception as exc:
|
|
122
|
+
return JSONResponse(
|
|
123
|
+
{"error": {"message": f"invalid JSON body: {exc}"}},
|
|
124
|
+
status_code=400,
|
|
125
|
+
)
|
|
126
|
+
if not isinstance(request_body, dict):
|
|
127
|
+
return JSONResponse(
|
|
128
|
+
{"error": {"message": "request body must be a JSON object"}},
|
|
129
|
+
status_code=400,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
request_headers = {
|
|
133
|
+
str(key).lower(): str(value)
|
|
134
|
+
for key, value in request.headers.items()
|
|
135
|
+
}
|
|
136
|
+
try:
|
|
137
|
+
response_server.stream_router.validate_incomming_request(request_body)
|
|
138
|
+
except UnsupportedIncommingFeature as exc:
|
|
139
|
+
return JSONResponse(
|
|
140
|
+
{"error": {"message": str(exc)}},
|
|
141
|
+
status_code=501,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
return StreamingResponse(
|
|
145
|
+
_stream_events(response_server, request_body, request_headers),
|
|
146
|
+
media_type="text/event-stream",
|
|
147
|
+
headers={
|
|
148
|
+
"Cache-Control": "no-cache",
|
|
149
|
+
"Connection": "close",
|
|
150
|
+
},
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
return app
|
|
154
|
+
|
|
155
|
+
def __init__(self, config: CompatServerConfig) -> None:
|
|
156
|
+
port = config.port or _reserve_free_port()
|
|
157
|
+
self._config = replace(config, port=port)
|
|
158
|
+
self._app = self.build_app(self._config)
|
|
159
|
+
self._uvicorn_config = uvicorn.Config(
|
|
160
|
+
self._app,
|
|
161
|
+
host=self._config.host,
|
|
162
|
+
port=self._config.port,
|
|
163
|
+
log_level="error",
|
|
164
|
+
access_log=False,
|
|
165
|
+
)
|
|
166
|
+
self._server = uvicorn.Server(self._uvicorn_config)
|
|
167
|
+
self._thread = threading.Thread(target=self._server.run, daemon=True)
|
|
168
|
+
|
|
169
|
+
@property
|
|
170
|
+
def base_url(self) -> str:
|
|
171
|
+
return f"http://{self._config.host}:{self._config.port}/v1"
|
|
172
|
+
|
|
173
|
+
def start(self, timeout_seconds: float = 10.0) -> None:
|
|
174
|
+
self._thread.start()
|
|
175
|
+
deadline = time.time() + timeout_seconds
|
|
176
|
+
while not self._server.started:
|
|
177
|
+
if time.time() >= deadline:
|
|
178
|
+
raise RuntimeError(
|
|
179
|
+
"timed out waiting for managed responses server to start"
|
|
180
|
+
)
|
|
181
|
+
time.sleep(0.01)
|
|
182
|
+
|
|
183
|
+
def stop(self, timeout_seconds: float = 5.0) -> None:
|
|
184
|
+
self._server.should_exit = True
|
|
185
|
+
self._thread.join(timeout=timeout_seconds)
|
|
186
|
+
if self._thread.is_alive():
|
|
187
|
+
raise RuntimeError(
|
|
188
|
+
"timed out waiting for managed responses server to stop"
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def main() -> None:
|
|
193
|
+
args = build_parser().parse_args()
|
|
194
|
+
run_server(
|
|
195
|
+
CompatServerConfig(
|
|
196
|
+
host=args.host,
|
|
197
|
+
port=args.port,
|
|
198
|
+
outcomming_base_url=args.outcomming_base_url,
|
|
199
|
+
outcomming_api_key_env=args.outcomming_api_key_env,
|
|
200
|
+
model_provider=args.model_provider,
|
|
201
|
+
timeout_seconds=args.timeout_seconds,
|
|
202
|
+
)
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
if __name__ == "__main__":
|
|
207
|
+
main()
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _reserve_free_port() -> int:
|
|
211
|
+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
212
|
+
try:
|
|
213
|
+
sock.bind(("127.0.0.1", 0))
|
|
214
|
+
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
215
|
+
return int(sock.getsockname()[1])
|
|
216
|
+
finally:
|
|
217
|
+
sock.close()
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
import urllib.parse
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(frozen=True, slots=True)
|
|
9
|
+
class CompatServerConfig:
|
|
10
|
+
host: str = "127.0.0.1"
|
|
11
|
+
port: int = 0
|
|
12
|
+
outcomming_base_url: str = "http://127.0.0.1:8000/v1"
|
|
13
|
+
outcomming_api_key_env: str | None = None
|
|
14
|
+
model_provider: str | None = None
|
|
15
|
+
timeout_seconds: float = 120.0
|
|
16
|
+
|
|
17
|
+
def outcomming_api_key(self) -> str | None:
|
|
18
|
+
if self.outcomming_api_key_env is None:
|
|
19
|
+
return None
|
|
20
|
+
value = os.environ.get(self.outcomming_api_key_env, "").strip()
|
|
21
|
+
return value or None
|
|
22
|
+
|
|
23
|
+
def outcomming_chat_completions_url(self) -> str:
|
|
24
|
+
base = self.outcomming_base_url.rstrip("/")
|
|
25
|
+
return f"{base}/chat/completions"
|
|
26
|
+
|
|
27
|
+
def outcomming_models_url(self) -> str:
|
|
28
|
+
base = self.outcomming_base_url.rstrip("/")
|
|
29
|
+
return f"{base}/models"
|
|
30
|
+
|
|
31
|
+
def with_ephemeral_port(self) -> CompatServerConfig:
|
|
32
|
+
return CompatServerConfig(
|
|
33
|
+
host=self.host,
|
|
34
|
+
port=0,
|
|
35
|
+
outcomming_base_url=self.outcomming_base_url,
|
|
36
|
+
outcomming_api_key_env=self.outcomming_api_key_env,
|
|
37
|
+
model_provider=self.model_provider,
|
|
38
|
+
timeout_seconds=self.timeout_seconds,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
@classmethod
|
|
42
|
+
def from_base_url(
|
|
43
|
+
cls,
|
|
44
|
+
outcomming_base_url: str,
|
|
45
|
+
api_key_env: str | None = None,
|
|
46
|
+
model_provider: str | None = None,
|
|
47
|
+
) -> CompatServerConfig:
|
|
48
|
+
parsed = urllib.parse.urlparse(outcomming_base_url)
|
|
49
|
+
if not parsed.scheme or not parsed.netloc:
|
|
50
|
+
raise ValueError(f"invalid outcomming base url: {outcomming_base_url}")
|
|
51
|
+
normalized_path = parsed.path.rstrip("/")
|
|
52
|
+
if normalized_path in {"", "/"}:
|
|
53
|
+
parsed = parsed._replace(path="/v1")
|
|
54
|
+
outcomming_base_url = urllib.parse.urlunparse(parsed)
|
|
55
|
+
else:
|
|
56
|
+
outcomming_base_url = urllib.parse.urlunparse(
|
|
57
|
+
parsed._replace(path=normalized_path)
|
|
58
|
+
)
|
|
59
|
+
return cls(
|
|
60
|
+
outcomming_base_url=outcomming_base_url,
|
|
61
|
+
outcomming_api_key_env=api_key_env,
|
|
62
|
+
model_provider=model_provider,
|
|
63
|
+
)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""Provider-specific post-process hooks for canonical outgoing chat requests.
|
|
4
|
+
|
|
5
|
+
Each downstream chat-completions provider may have its own payload quirks:
|
|
6
|
+
extra fields, removed fields, role normalization, tool-shape tweaks, etc.
|
|
7
|
+
Keep all of those provider-specific rewrites here so `StreamRouter` can keep
|
|
8
|
+
building one canonical `outcomming_request`, while `server.py` selects the
|
|
9
|
+
appropriate hook from `CompatServerConfig.model_provider`.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from collections.abc import Callable
|
|
13
|
+
from copy import deepcopy
|
|
14
|
+
from typing import Optional, TypedDict
|
|
15
|
+
|
|
16
|
+
ChatMessage = dict[str, object]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class OutgoingRequest(TypedDict):
|
|
20
|
+
"""Canonical downstream `/v1/chat/completions` request shape.
|
|
21
|
+
|
|
22
|
+
`model`, `messages`, and `stream` are always populated by
|
|
23
|
+
`StreamRouter.build_outcomming_request(...)`. Provider-specific fields that
|
|
24
|
+
may be omitted use `Optional[...]` here so the schema stays simple and does
|
|
25
|
+
not rely on TypedDict inheritance.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
model: str
|
|
29
|
+
messages: list[ChatMessage]
|
|
30
|
+
stream: bool
|
|
31
|
+
tools: Optional[list[dict[str, object]]]
|
|
32
|
+
tool_choice: Optional[object]
|
|
33
|
+
parallel_tool_calls: Optional[bool]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
PayloadPostProcessor = Callable[[OutgoingRequest], OutgoingRequest]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _identity(outcomming_request: OutgoingRequest) -> OutgoingRequest:
|
|
40
|
+
"""Keep the canonical request unchanged."""
|
|
41
|
+
|
|
42
|
+
return outcomming_request
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _drop_developer_messages(outcomming_request: OutgoingRequest) -> OutgoingRequest:
|
|
46
|
+
"""Remove all developer-role messages for providers that reject them."""
|
|
47
|
+
|
|
48
|
+
outcomming_request["messages"] = [
|
|
49
|
+
message
|
|
50
|
+
for message in outcomming_request["messages"]
|
|
51
|
+
if message.get("role") != "developer"
|
|
52
|
+
]
|
|
53
|
+
return outcomming_request
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
PAYLOAD_POST_PROCESSORS: dict[str, PayloadPostProcessor] = {
|
|
57
|
+
"stepfun": _drop_developer_messages,
|
|
58
|
+
"vllm": _identity,
|
|
59
|
+
}
|
|
60
|
+
"""Mapping from normalized `model_provider` name to payload rewrite hook."""
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def post_process_outcomming_request(
|
|
64
|
+
outcomming_request: OutgoingRequest,
|
|
65
|
+
model_provider: str | None,
|
|
66
|
+
) -> OutgoingRequest:
|
|
67
|
+
"""Apply the provider-specific payload hook to one outgoing request.
|
|
68
|
+
|
|
69
|
+
This is the single wrapper around `PAYLOAD_POST_PROCESSORS`: it normalizes
|
|
70
|
+
the provider name, falls back to the default `vllm` behavior when the
|
|
71
|
+
provider is missing or unknown, deep-copies the canonical request, applies
|
|
72
|
+
the selected hook, and validates that the hook returns another request dict.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
processed_request = deepcopy(outcomming_request)
|
|
76
|
+
provider_name = str(model_provider or "").strip().lower()
|
|
77
|
+
provider_processor = PAYLOAD_POST_PROCESSORS.get(
|
|
78
|
+
provider_name,
|
|
79
|
+
PAYLOAD_POST_PROCESSORS.get("vllm"),
|
|
80
|
+
)
|
|
81
|
+
if provider_processor is None:
|
|
82
|
+
return processed_request
|
|
83
|
+
processed_request = provider_processor(processed_request)
|
|
84
|
+
if not isinstance(processed_request, dict):
|
|
85
|
+
raise TypeError("payload processor must return a dict outcomming_request")
|
|
86
|
+
return processed_request
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .config import CompatServerConfig
|
|
4
|
+
from .payload_processors import post_process_outcomming_request
|
|
5
|
+
from .session_store import SessionStore
|
|
6
|
+
from .stream_router import StreamRouter
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ResponseServer:
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
config: CompatServerConfig,
|
|
13
|
+
session_store: SessionStore | None = None,
|
|
14
|
+
stream_router: StreamRouter | None = None,
|
|
15
|
+
) -> None:
|
|
16
|
+
self._config = config
|
|
17
|
+
self._session_store = session_store or SessionStore()
|
|
18
|
+
self._stream_router = stream_router or StreamRouter(config)
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def config(self) -> CompatServerConfig:
|
|
22
|
+
return self._config
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def session_store(self) -> SessionStore:
|
|
26
|
+
return self._session_store
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def stream_router(self) -> StreamRouter:
|
|
30
|
+
return self._stream_router
|
|
31
|
+
|
|
32
|
+
def list_models(self) -> dict[str, object]:
|
|
33
|
+
return self._stream_router.list_models()
|
|
34
|
+
|
|
35
|
+
def start_response_stream(
|
|
36
|
+
self,
|
|
37
|
+
request_body: dict[str, object],
|
|
38
|
+
request_headers: dict[str, str],
|
|
39
|
+
):
|
|
40
|
+
outcomming_request = self._stream_router.build_outcomming_request(request_body)
|
|
41
|
+
outcomming_request = post_process_outcomming_request(
|
|
42
|
+
outcomming_request,
|
|
43
|
+
self._config.model_provider,
|
|
44
|
+
)
|
|
45
|
+
custom_tool_names = self._stream_router.collect_custom_tool_names(request_body)
|
|
46
|
+
session_id = (
|
|
47
|
+
request_headers.get("x-client-request-id")
|
|
48
|
+
or str(request_body.get("prompt_cache_key", "")).strip()
|
|
49
|
+
or None
|
|
50
|
+
)
|
|
51
|
+
stored_response = self._session_store.create_response(
|
|
52
|
+
session_id=session_id,
|
|
53
|
+
model=str(outcomming_request["model"]),
|
|
54
|
+
)
|
|
55
|
+
incomming_stream = self._stream_router.open_outcomming_stream(
|
|
56
|
+
outcomming_request
|
|
57
|
+
)
|
|
58
|
+
return self._stream_router.route_stream(
|
|
59
|
+
incomming_stream,
|
|
60
|
+
stored_response,
|
|
61
|
+
outcomming_request,
|
|
62
|
+
custom_tool_names,
|
|
63
|
+
)
|