devcopilot 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- api/__init__.py +17 -0
- api/admin_config.py +1303 -0
- api/admin_routes.py +287 -0
- api/admin_static/admin.css +459 -0
- api/admin_static/admin.js +497 -0
- api/admin_static/index.html +77 -0
- api/admin_urls.py +34 -0
- api/app.py +194 -0
- api/command_utils.py +164 -0
- api/dependencies.py +144 -0
- api/detection.py +152 -0
- api/gateway_model_ids.py +54 -0
- api/model_catalog.py +133 -0
- api/model_router.py +125 -0
- api/models/__init__.py +45 -0
- api/models/anthropic.py +234 -0
- api/models/openai_responses.py +28 -0
- api/models/responses.py +60 -0
- api/optimization_handlers.py +154 -0
- api/request_pipeline.py +424 -0
- api/routes.py +156 -0
- api/runtime.py +334 -0
- api/validation_log.py +48 -0
- api/web_server_tools.py +22 -0
- api/web_tools/__init__.py +17 -0
- api/web_tools/constants.py +15 -0
- api/web_tools/egress.py +99 -0
- api/web_tools/outbound.py +278 -0
- api/web_tools/parsers.py +104 -0
- api/web_tools/request.py +87 -0
- api/web_tools/streaming.py +206 -0
- cli/__init__.py +5 -0
- cli/claude_env.py +12 -0
- cli/entrypoints.py +166 -0
- cli/env.example +209 -0
- cli/launchers/__init__.py +1 -0
- cli/launchers/claude.py +84 -0
- cli/launchers/codex.py +204 -0
- cli/launchers/codex_model_catalog.py +186 -0
- cli/launchers/common.py +93 -0
- cli/managed/__init__.py +6 -0
- cli/managed/claude.py +215 -0
- cli/managed/manager.py +157 -0
- cli/managed/session.py +260 -0
- cli/process_registry.py +78 -0
- config/__init__.py +5 -0
- config/constants.py +13 -0
- config/logging_config.py +159 -0
- config/nim.py +118 -0
- config/paths.py +91 -0
- config/provider_catalog.py +259 -0
- config/provider_ids.py +7 -0
- config/settings.py +538 -0
- core/__init__.py +1 -0
- core/anthropic/__init__.py +46 -0
- core/anthropic/content.py +31 -0
- core/anthropic/conversion.py +587 -0
- core/anthropic/emitted_sse_tracker.py +346 -0
- core/anthropic/errors.py +70 -0
- core/anthropic/native_messages_request.py +280 -0
- core/anthropic/native_sse_block_policy.py +313 -0
- core/anthropic/provider_stream_error.py +34 -0
- core/anthropic/server_tool_sse.py +14 -0
- core/anthropic/sse.py +440 -0
- core/anthropic/stream_contracts.py +205 -0
- core/anthropic/stream_recovery.py +346 -0
- core/anthropic/stream_recovery_session.py +133 -0
- core/anthropic/thinking.py +140 -0
- core/anthropic/tokens.py +117 -0
- core/anthropic/tools.py +212 -0
- core/anthropic/utils.py +9 -0
- core/openai_responses/__init__.py +5 -0
- core/openai_responses/adapter.py +31 -0
- core/openai_responses/anthropic_sse.py +59 -0
- core/openai_responses/errors.py +22 -0
- core/openai_responses/events.py +19 -0
- core/openai_responses/ids.py +21 -0
- core/openai_responses/input.py +258 -0
- core/openai_responses/items.py +37 -0
- core/openai_responses/reasoning.py +52 -0
- core/openai_responses/stream.py +25 -0
- core/openai_responses/stream_state.py +654 -0
- core/openai_responses/tools.py +374 -0
- core/openai_responses/usage.py +37 -0
- core/rate_limit.py +60 -0
- core/trace.py +216 -0
- devcopilot-0.2.0.dist-info/METADATA +687 -0
- devcopilot-0.2.0.dist-info/RECORD +189 -0
- devcopilot-0.2.0.dist-info/WHEEL +4 -0
- devcopilot-0.2.0.dist-info/entry_points.txt +6 -0
- devcopilot-0.2.0.dist-info/licenses/LICENSE +21 -0
- messaging/__init__.py +26 -0
- messaging/cli_event_constants.py +67 -0
- messaging/command_context.py +66 -0
- messaging/command_dispatcher.py +37 -0
- messaging/commands.py +275 -0
- messaging/event_parser.py +181 -0
- messaging/limiter.py +300 -0
- messaging/models.py +36 -0
- messaging/node_event_pipeline.py +127 -0
- messaging/node_runner.py +342 -0
- messaging/platforms/__init__.py +15 -0
- messaging/platforms/base.py +228 -0
- messaging/platforms/discord.py +567 -0
- messaging/platforms/factory.py +103 -0
- messaging/platforms/outbox.py +144 -0
- messaging/platforms/telegram.py +688 -0
- messaging/platforms/voice_flow.py +295 -0
- messaging/rendering/__init__.py +3 -0
- messaging/rendering/discord_markdown.py +318 -0
- messaging/rendering/markdown_tables.py +49 -0
- messaging/rendering/profiles.py +55 -0
- messaging/rendering/telegram_markdown.py +327 -0
- messaging/safe_diagnostics.py +17 -0
- messaging/session.py +334 -0
- messaging/transcript.py +581 -0
- messaging/transcription.py +164 -0
- messaging/trees/__init__.py +15 -0
- messaging/trees/data.py +482 -0
- messaging/trees/manager.py +433 -0
- messaging/trees/processor.py +179 -0
- messaging/trees/repository.py +177 -0
- messaging/turn_intake.py +235 -0
- messaging/ui_updates.py +101 -0
- messaging/voice.py +76 -0
- messaging/workflow.py +200 -0
- providers/__init__.py +31 -0
- providers/base.py +152 -0
- providers/cerebras/__init__.py +7 -0
- providers/cerebras/client.py +31 -0
- providers/cerebras/request.py +55 -0
- providers/codestral/__init__.py +7 -0
- providers/codestral/client.py +34 -0
- providers/deepseek/__init__.py +11 -0
- providers/deepseek/client.py +51 -0
- providers/deepseek/request.py +475 -0
- providers/defaults.py +41 -0
- providers/error_mapping.py +309 -0
- providers/exceptions.py +113 -0
- providers/fireworks/__init__.py +5 -0
- providers/fireworks/client.py +45 -0
- providers/fireworks/request.py +48 -0
- providers/gemini/__init__.py +7 -0
- providers/gemini/client.py +49 -0
- providers/gemini/request.py +199 -0
- providers/groq/__init__.py +7 -0
- providers/groq/client.py +31 -0
- providers/groq/request.py +83 -0
- providers/kimi/__init__.py +10 -0
- providers/kimi/client.py +53 -0
- providers/kimi/request.py +42 -0
- providers/llamacpp/__init__.py +3 -0
- providers/llamacpp/client.py +16 -0
- providers/lmstudio/__init__.py +5 -0
- providers/lmstudio/client.py +16 -0
- providers/mistral/__init__.py +7 -0
- providers/mistral/client.py +31 -0
- providers/mistral/request.py +37 -0
- providers/model_listing.py +133 -0
- providers/nvidia_nim/__init__.py +7 -0
- providers/nvidia_nim/client.py +91 -0
- providers/nvidia_nim/request.py +430 -0
- providers/nvidia_nim/voice.py +95 -0
- providers/ollama/__init__.py +7 -0
- providers/ollama/client.py +39 -0
- providers/open_router/__init__.py +7 -0
- providers/open_router/client.py +124 -0
- providers/open_router/request.py +42 -0
- providers/opencode/__init__.py +11 -0
- providers/opencode/client.py +31 -0
- providers/opencode/request.py +35 -0
- providers/rate_limit.py +300 -0
- providers/registry.py +527 -0
- providers/transports/__init__.py +1 -0
- providers/transports/anthropic_messages/__init__.py +5 -0
- providers/transports/anthropic_messages/http.py +118 -0
- providers/transports/anthropic_messages/recovery.py +206 -0
- providers/transports/anthropic_messages/stream.py +295 -0
- providers/transports/anthropic_messages/transport.py +236 -0
- providers/transports/openai_chat/__init__.py +5 -0
- providers/transports/openai_chat/recovery.py +217 -0
- providers/transports/openai_chat/stream.py +384 -0
- providers/transports/openai_chat/tool_calls.py +293 -0
- providers/transports/openai_chat/transport.py +156 -0
- providers/wafer/__init__.py +10 -0
- providers/wafer/client.py +50 -0
- providers/zai/__init__.py +10 -0
- providers/zai/client.py +46 -0
- providers/zai/request.py +42 -0
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""Optimization handlers for fast-path API responses.
|
|
2
|
+
|
|
3
|
+
Each handler returns a MessagesResponse if the request matches and the
|
|
4
|
+
optimization is enabled, otherwise None.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import uuid
|
|
8
|
+
|
|
9
|
+
from loguru import logger
|
|
10
|
+
|
|
11
|
+
from config.settings import Settings
|
|
12
|
+
|
|
13
|
+
from .command_utils import extract_command_prefix, extract_filepaths_from_command
|
|
14
|
+
from .detection import (
|
|
15
|
+
is_filepath_extraction_request,
|
|
16
|
+
is_prefix_detection_request,
|
|
17
|
+
is_quota_check_request,
|
|
18
|
+
is_suggestion_mode_request,
|
|
19
|
+
is_title_generation_request,
|
|
20
|
+
)
|
|
21
|
+
from .models.anthropic import MessagesRequest
|
|
22
|
+
from .models.responses import MessagesResponse, Usage
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _text_response(
|
|
26
|
+
request_data: MessagesRequest,
|
|
27
|
+
text: str,
|
|
28
|
+
*,
|
|
29
|
+
input_tokens: int,
|
|
30
|
+
output_tokens: int,
|
|
31
|
+
) -> MessagesResponse:
|
|
32
|
+
return MessagesResponse(
|
|
33
|
+
id=f"msg_{uuid.uuid4()}",
|
|
34
|
+
model=request_data.model,
|
|
35
|
+
content=[{"type": "text", "text": text}],
|
|
36
|
+
stop_reason="end_turn",
|
|
37
|
+
usage=Usage(input_tokens=input_tokens, output_tokens=output_tokens),
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def try_prefix_detection(
|
|
42
|
+
request_data: MessagesRequest, settings: Settings
|
|
43
|
+
) -> MessagesResponse | None:
|
|
44
|
+
"""Fast prefix detection - return command prefix without API call."""
|
|
45
|
+
if not settings.fast_prefix_detection:
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
is_prefix_req, command = is_prefix_detection_request(request_data)
|
|
49
|
+
if not is_prefix_req:
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
logger.info("Optimization: Fast prefix detection request")
|
|
53
|
+
return _text_response(
|
|
54
|
+
request_data,
|
|
55
|
+
extract_command_prefix(command),
|
|
56
|
+
input_tokens=100,
|
|
57
|
+
output_tokens=5,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def try_quota_mock(
|
|
62
|
+
request_data: MessagesRequest, settings: Settings
|
|
63
|
+
) -> MessagesResponse | None:
|
|
64
|
+
"""Mock quota probe requests."""
|
|
65
|
+
if not settings.enable_network_probe_mock:
|
|
66
|
+
return None
|
|
67
|
+
if not is_quota_check_request(request_data):
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
logger.info("Optimization: Intercepted and mocked quota probe")
|
|
71
|
+
return _text_response(
|
|
72
|
+
request_data,
|
|
73
|
+
"Quota check passed.",
|
|
74
|
+
input_tokens=10,
|
|
75
|
+
output_tokens=5,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def try_title_skip(
|
|
80
|
+
request_data: MessagesRequest, settings: Settings
|
|
81
|
+
) -> MessagesResponse | None:
|
|
82
|
+
"""Skip title generation requests."""
|
|
83
|
+
if not settings.enable_title_generation_skip:
|
|
84
|
+
return None
|
|
85
|
+
if not is_title_generation_request(request_data):
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
logger.info("Optimization: Skipped title generation request")
|
|
89
|
+
return _text_response(
|
|
90
|
+
request_data,
|
|
91
|
+
"Conversation",
|
|
92
|
+
input_tokens=100,
|
|
93
|
+
output_tokens=5,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def try_suggestion_skip(
|
|
98
|
+
request_data: MessagesRequest, settings: Settings
|
|
99
|
+
) -> MessagesResponse | None:
|
|
100
|
+
"""Skip suggestion mode requests."""
|
|
101
|
+
if not settings.enable_suggestion_mode_skip:
|
|
102
|
+
return None
|
|
103
|
+
if not is_suggestion_mode_request(request_data):
|
|
104
|
+
return None
|
|
105
|
+
|
|
106
|
+
logger.info("Optimization: Skipped suggestion mode request")
|
|
107
|
+
return _text_response(
|
|
108
|
+
request_data,
|
|
109
|
+
"",
|
|
110
|
+
input_tokens=100,
|
|
111
|
+
output_tokens=1,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def try_filepath_mock(
|
|
116
|
+
request_data: MessagesRequest, settings: Settings
|
|
117
|
+
) -> MessagesResponse | None:
|
|
118
|
+
"""Mock filepath extraction requests."""
|
|
119
|
+
if not settings.enable_filepath_extraction_mock:
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
is_fp, cmd, output = is_filepath_extraction_request(request_data)
|
|
123
|
+
if not is_fp:
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
filepaths = extract_filepaths_from_command(cmd, output)
|
|
127
|
+
logger.info("Optimization: Mocked filepath extraction")
|
|
128
|
+
return _text_response(
|
|
129
|
+
request_data,
|
|
130
|
+
filepaths,
|
|
131
|
+
input_tokens=100,
|
|
132
|
+
output_tokens=10,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# Cheapest/most common optimizations first for faster short-circuit.
|
|
137
|
+
OPTIMIZATION_HANDLERS = [
|
|
138
|
+
try_quota_mock,
|
|
139
|
+
try_prefix_detection,
|
|
140
|
+
try_title_skip,
|
|
141
|
+
try_suggestion_skip,
|
|
142
|
+
try_filepath_mock,
|
|
143
|
+
]
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def try_optimizations(
|
|
147
|
+
request_data: MessagesRequest, settings: Settings
|
|
148
|
+
) -> MessagesResponse | None:
|
|
149
|
+
"""Run optimization handlers in order. Returns first match or None."""
|
|
150
|
+
for handler in OPTIMIZATION_HANDLERS:
|
|
151
|
+
result = handler(request_data, settings)
|
|
152
|
+
if result is not None:
|
|
153
|
+
return result
|
|
154
|
+
return None
|
api/request_pipeline.py
ADDED
|
@@ -0,0 +1,424 @@
|
|
|
1
|
+
"""API request pipeline for routing, intercepts, and provider execution."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import traceback
|
|
6
|
+
import uuid
|
|
7
|
+
from collections.abc import AsyncIterator, Callable
|
|
8
|
+
from dataclasses import replace
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from fastapi import HTTPException
|
|
12
|
+
from fastapi.responses import JSONResponse, StreamingResponse
|
|
13
|
+
from loguru import logger
|
|
14
|
+
|
|
15
|
+
from config.provider_catalog import PROVIDER_CATALOG
|
|
16
|
+
from config.settings import Settings
|
|
17
|
+
from core.anthropic import get_token_count, get_user_facing_error_message
|
|
18
|
+
from core.anthropic.sse import ANTHROPIC_SSE_RESPONSE_HEADERS
|
|
19
|
+
from core.openai_responses import OpenAIResponsesAdapter
|
|
20
|
+
from core.trace import api_messages_request_snapshot, trace_event, traced_async_stream
|
|
21
|
+
from providers.base import BaseProvider
|
|
22
|
+
from providers.exceptions import InvalidRequestError, ProviderError
|
|
23
|
+
|
|
24
|
+
from .detection import is_safety_classifier_request
|
|
25
|
+
from .model_router import ModelRouter, RoutedMessagesRequest
|
|
26
|
+
from .models.anthropic import MessagesRequest, TokenCountRequest
|
|
27
|
+
from .models.openai_responses import OpenAIResponsesRequest
|
|
28
|
+
from .models.responses import TokenCountResponse
|
|
29
|
+
from .optimization_handlers import try_optimizations
|
|
30
|
+
from .web_tools.egress import WebFetchEgressPolicy
|
|
31
|
+
from .web_tools.request import (
|
|
32
|
+
is_web_server_tool_request,
|
|
33
|
+
openai_chat_upstream_server_tool_error,
|
|
34
|
+
)
|
|
35
|
+
from .web_tools.streaming import stream_web_server_tool_response
|
|
36
|
+
|
|
37
|
+
TokenCounter = Callable[[list[Any], str | list[Any] | None, list[Any] | None], int]
|
|
38
|
+
ProviderGetter = Callable[[str], BaseProvider]
|
|
39
|
+
MessageIntercept = Callable[[RoutedMessagesRequest], object | None]
|
|
40
|
+
|
|
41
|
+
# Providers that use ``/chat/completions`` + Anthropic-to-OpenAI conversion.
|
|
42
|
+
_OPENAI_CHAT_UPSTREAM_IDS = frozenset(
|
|
43
|
+
provider_id
|
|
44
|
+
for provider_id, descriptor in PROVIDER_CATALOG.items()
|
|
45
|
+
if descriptor.transport_type == "openai_chat"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def anthropic_sse_streaming_response(body: AsyncIterator[str]) -> StreamingResponse:
|
|
50
|
+
"""Return a streaming response for Anthropic-style SSE streams."""
|
|
51
|
+
return StreamingResponse(
|
|
52
|
+
body,
|
|
53
|
+
media_type="text/event-stream",
|
|
54
|
+
headers=ANTHROPIC_SSE_RESPONSE_HEADERS,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def openai_responses_sse_streaming_response(
|
|
59
|
+
body: AsyncIterator[str],
|
|
60
|
+
) -> StreamingResponse:
|
|
61
|
+
"""Return a streaming response for OpenAI Responses-style SSE."""
|
|
62
|
+
return StreamingResponse(
|
|
63
|
+
body,
|
|
64
|
+
media_type="text/event-stream",
|
|
65
|
+
headers=OpenAIResponsesAdapter.sse_headers,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _http_status_for_unexpected_pipeline_exception(_exc: BaseException) -> int:
|
|
70
|
+
"""HTTP status for uncaught non-provider failures."""
|
|
71
|
+
return 500
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _log_unexpected_pipeline_exception(
|
|
75
|
+
settings: Settings,
|
|
76
|
+
exc: BaseException,
|
|
77
|
+
*,
|
|
78
|
+
context: str,
|
|
79
|
+
request_id: str | None = None,
|
|
80
|
+
) -> None:
|
|
81
|
+
"""Log API failures without echoing exception text unless opted in."""
|
|
82
|
+
if settings.log_api_error_tracebacks:
|
|
83
|
+
if request_id is not None:
|
|
84
|
+
logger.error("{} request_id={}: {}", context, request_id, exc)
|
|
85
|
+
else:
|
|
86
|
+
logger.error("{}: {}", context, exc)
|
|
87
|
+
logger.error(traceback.format_exc())
|
|
88
|
+
return
|
|
89
|
+
if request_id is not None:
|
|
90
|
+
logger.error(
|
|
91
|
+
"{} request_id={} exc_type={}",
|
|
92
|
+
context,
|
|
93
|
+
request_id,
|
|
94
|
+
type(exc).__name__,
|
|
95
|
+
)
|
|
96
|
+
else:
|
|
97
|
+
logger.error("{} exc_type={}", context, type(exc).__name__)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _require_non_empty_messages(messages: list[Any]) -> None:
|
|
101
|
+
if not messages:
|
|
102
|
+
raise InvalidRequestError("messages cannot be empty")
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class ApiRequestPipeline:
|
|
106
|
+
"""Coordinate API request intercepts, routing, and provider stream execution."""
|
|
107
|
+
|
|
108
|
+
def __init__(
|
|
109
|
+
self,
|
|
110
|
+
settings: Settings,
|
|
111
|
+
provider_getter: ProviderGetter,
|
|
112
|
+
model_router: ModelRouter | None = None,
|
|
113
|
+
token_counter: TokenCounter = get_token_count,
|
|
114
|
+
responses_adapter: OpenAIResponsesAdapter | None = None,
|
|
115
|
+
) -> None:
|
|
116
|
+
self._settings = settings
|
|
117
|
+
self._provider_getter = provider_getter
|
|
118
|
+
self._model_router = model_router or ModelRouter(settings)
|
|
119
|
+
self._token_counter = token_counter
|
|
120
|
+
self._responses_adapter = responses_adapter or OpenAIResponsesAdapter()
|
|
121
|
+
self._message_intercepts: tuple[MessageIntercept, ...] = (
|
|
122
|
+
self._intercept_web_server_tool,
|
|
123
|
+
self._intercept_local_optimization,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
def create_message(self, request_data: MessagesRequest) -> object:
|
|
127
|
+
"""Create an Anthropic-compatible message response."""
|
|
128
|
+
try:
|
|
129
|
+
_require_non_empty_messages(request_data.messages)
|
|
130
|
+
routed = self._model_router.resolve_messages_request(request_data)
|
|
131
|
+
routed = self._apply_message_routing_policies(routed)
|
|
132
|
+
self._reject_unsupported_server_tools(routed)
|
|
133
|
+
|
|
134
|
+
intercepted = self._run_message_intercepts(routed)
|
|
135
|
+
if intercepted is not None:
|
|
136
|
+
return intercepted
|
|
137
|
+
|
|
138
|
+
logger.debug("No optimization matched, routing to provider")
|
|
139
|
+
return anthropic_sse_streaming_response(
|
|
140
|
+
self._provider_stream(
|
|
141
|
+
routed,
|
|
142
|
+
wire_api="messages",
|
|
143
|
+
raw_log_label="FULL_PAYLOAD",
|
|
144
|
+
raw_log_payload=routed.request.model_dump(),
|
|
145
|
+
)
|
|
146
|
+
)
|
|
147
|
+
except ProviderError:
|
|
148
|
+
raise
|
|
149
|
+
except Exception as e:
|
|
150
|
+
_log_unexpected_pipeline_exception(
|
|
151
|
+
self._settings, e, context="CREATE_MESSAGE_ERROR"
|
|
152
|
+
)
|
|
153
|
+
raise HTTPException(
|
|
154
|
+
status_code=_http_status_for_unexpected_pipeline_exception(e),
|
|
155
|
+
detail=get_user_facing_error_message(e),
|
|
156
|
+
) from e
|
|
157
|
+
|
|
158
|
+
async def create_response(self, request_data: OpenAIResponsesRequest) -> object:
|
|
159
|
+
"""Create a streaming OpenAI Responses-compatible response."""
|
|
160
|
+
request_payload = request_data.model_dump(mode="json", exclude_none=True)
|
|
161
|
+
if request_data.stream is False:
|
|
162
|
+
invalid_request = InvalidRequestError(
|
|
163
|
+
"DevCopilot /v1/responses supports streaming only; omit stream or set stream=true."
|
|
164
|
+
)
|
|
165
|
+
return JSONResponse(
|
|
166
|
+
status_code=invalid_request.status_code,
|
|
167
|
+
content=self._responses_adapter.error_payload(
|
|
168
|
+
message=invalid_request.message,
|
|
169
|
+
error_type=invalid_request.error_type,
|
|
170
|
+
),
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
try:
|
|
174
|
+
anthropic_payload = self._responses_adapter.to_anthropic_payload(
|
|
175
|
+
request_payload
|
|
176
|
+
)
|
|
177
|
+
response_request = MessagesRequest(**anthropic_payload)
|
|
178
|
+
_require_non_empty_messages(response_request.messages)
|
|
179
|
+
routed = self._model_router.resolve_messages_request(response_request)
|
|
180
|
+
self._reject_unsupported_server_tools(routed)
|
|
181
|
+
|
|
182
|
+
streamed = self._provider_stream(
|
|
183
|
+
routed,
|
|
184
|
+
wire_api="responses",
|
|
185
|
+
raw_log_label="FULL_RESPONSES_PAYLOAD",
|
|
186
|
+
raw_log_payload=request_payload,
|
|
187
|
+
)
|
|
188
|
+
return openai_responses_sse_streaming_response(
|
|
189
|
+
self._responses_adapter.iter_sse_from_anthropic(
|
|
190
|
+
streamed,
|
|
191
|
+
request_payload,
|
|
192
|
+
)
|
|
193
|
+
)
|
|
194
|
+
except OpenAIResponsesAdapter.ConversionError as exc:
|
|
195
|
+
invalid_request = InvalidRequestError(str(exc))
|
|
196
|
+
return JSONResponse(
|
|
197
|
+
status_code=invalid_request.status_code,
|
|
198
|
+
content=self._responses_adapter.error_payload(
|
|
199
|
+
message=invalid_request.message,
|
|
200
|
+
error_type=invalid_request.error_type,
|
|
201
|
+
),
|
|
202
|
+
)
|
|
203
|
+
except ProviderError as exc:
|
|
204
|
+
return JSONResponse(
|
|
205
|
+
status_code=exc.status_code,
|
|
206
|
+
content=self._responses_adapter.error_payload(
|
|
207
|
+
message=exc.message,
|
|
208
|
+
error_type=exc.error_type,
|
|
209
|
+
),
|
|
210
|
+
)
|
|
211
|
+
except Exception as e:
|
|
212
|
+
_log_unexpected_pipeline_exception(
|
|
213
|
+
self._settings,
|
|
214
|
+
e,
|
|
215
|
+
context="CREATE_RESPONSE_ERROR",
|
|
216
|
+
)
|
|
217
|
+
return JSONResponse(
|
|
218
|
+
status_code=_http_status_for_unexpected_pipeline_exception(e),
|
|
219
|
+
content=self._responses_adapter.error_payload(
|
|
220
|
+
message=get_user_facing_error_message(e),
|
|
221
|
+
error_type="api_error",
|
|
222
|
+
),
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
def count_tokens(self, request_data: TokenCountRequest) -> TokenCountResponse:
|
|
226
|
+
"""Count tokens for a request after applying configured model routing."""
|
|
227
|
+
request_id = f"req_{uuid.uuid4().hex[:12]}"
|
|
228
|
+
with logger.contextualize(request_id=request_id):
|
|
229
|
+
try:
|
|
230
|
+
_require_non_empty_messages(request_data.messages)
|
|
231
|
+
routed = self._model_router.resolve_token_count_request(request_data)
|
|
232
|
+
tokens = self._token_counter(
|
|
233
|
+
routed.request.messages, routed.request.system, routed.request.tools
|
|
234
|
+
)
|
|
235
|
+
trace_event(
|
|
236
|
+
stage="routing",
|
|
237
|
+
event="api.route.resolved",
|
|
238
|
+
source="api",
|
|
239
|
+
kind="count_tokens",
|
|
240
|
+
provider_id=routed.resolved.provider_id,
|
|
241
|
+
provider_model=routed.resolved.provider_model,
|
|
242
|
+
provider_model_ref=routed.resolved.provider_model_ref,
|
|
243
|
+
gateway_model=routed.request.model,
|
|
244
|
+
)
|
|
245
|
+
trace_event(
|
|
246
|
+
stage="ingress",
|
|
247
|
+
event="api.count_tokens.completed",
|
|
248
|
+
source="api",
|
|
249
|
+
message_count=len(routed.request.messages),
|
|
250
|
+
input_tokens=tokens,
|
|
251
|
+
snapshot=api_messages_request_snapshot(routed.request),
|
|
252
|
+
)
|
|
253
|
+
return TokenCountResponse(input_tokens=tokens)
|
|
254
|
+
except ProviderError:
|
|
255
|
+
raise
|
|
256
|
+
except Exception as e:
|
|
257
|
+
_log_unexpected_pipeline_exception(
|
|
258
|
+
self._settings,
|
|
259
|
+
e,
|
|
260
|
+
context="COUNT_TOKENS_ERROR",
|
|
261
|
+
request_id=request_id,
|
|
262
|
+
)
|
|
263
|
+
raise HTTPException(
|
|
264
|
+
status_code=_http_status_for_unexpected_pipeline_exception(e),
|
|
265
|
+
detail=get_user_facing_error_message(e),
|
|
266
|
+
) from e
|
|
267
|
+
|
|
268
|
+
def _reject_unsupported_server_tools(self, routed: RoutedMessagesRequest) -> None:
|
|
269
|
+
if routed.resolved.provider_id not in _OPENAI_CHAT_UPSTREAM_IDS:
|
|
270
|
+
return
|
|
271
|
+
tool_err = openai_chat_upstream_server_tool_error(
|
|
272
|
+
routed.request,
|
|
273
|
+
web_tools_enabled=self._settings.enable_web_server_tools,
|
|
274
|
+
)
|
|
275
|
+
if tool_err is not None:
|
|
276
|
+
raise InvalidRequestError(tool_err)
|
|
277
|
+
|
|
278
|
+
def _apply_message_routing_policies(
|
|
279
|
+
self, routed: RoutedMessagesRequest
|
|
280
|
+
) -> RoutedMessagesRequest:
|
|
281
|
+
if not is_safety_classifier_request(routed.request):
|
|
282
|
+
return routed
|
|
283
|
+
changed = routed.resolved.thinking_enabled
|
|
284
|
+
trace_event(
|
|
285
|
+
stage="routing",
|
|
286
|
+
event="api.optimization.safety_classifier_no_thinking",
|
|
287
|
+
source="api",
|
|
288
|
+
model=routed.request.model,
|
|
289
|
+
changed=changed,
|
|
290
|
+
)
|
|
291
|
+
if not changed:
|
|
292
|
+
return routed
|
|
293
|
+
return RoutedMessagesRequest(
|
|
294
|
+
request=routed.request,
|
|
295
|
+
resolved=replace(routed.resolved, thinking_enabled=False),
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
def _run_message_intercepts(self, routed: RoutedMessagesRequest) -> object | None:
|
|
299
|
+
for intercept in self._message_intercepts:
|
|
300
|
+
result = intercept(routed)
|
|
301
|
+
if result is not None:
|
|
302
|
+
return result
|
|
303
|
+
return None
|
|
304
|
+
|
|
305
|
+
def _intercept_web_server_tool(
|
|
306
|
+
self, routed: RoutedMessagesRequest
|
|
307
|
+
) -> object | None:
|
|
308
|
+
if not self._settings.enable_web_server_tools:
|
|
309
|
+
return None
|
|
310
|
+
if not is_web_server_tool_request(routed.request):
|
|
311
|
+
return None
|
|
312
|
+
|
|
313
|
+
input_tokens = self._token_counter(
|
|
314
|
+
routed.request.messages, routed.request.system, routed.request.tools
|
|
315
|
+
)
|
|
316
|
+
trace_event(
|
|
317
|
+
stage="routing",
|
|
318
|
+
event="api.optimization.web_server_tool",
|
|
319
|
+
source="api",
|
|
320
|
+
model=routed.request.model,
|
|
321
|
+
)
|
|
322
|
+
egress = WebFetchEgressPolicy(
|
|
323
|
+
allow_private_network_targets=self._settings.web_fetch_allow_private_networks,
|
|
324
|
+
allowed_schemes=self._settings.web_fetch_allowed_scheme_set(),
|
|
325
|
+
)
|
|
326
|
+
return anthropic_sse_streaming_response(
|
|
327
|
+
stream_web_server_tool_response(
|
|
328
|
+
routed.request,
|
|
329
|
+
input_tokens=input_tokens,
|
|
330
|
+
web_fetch_egress=egress,
|
|
331
|
+
verbose_client_errors=self._settings.log_api_error_tracebacks,
|
|
332
|
+
),
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
def _intercept_local_optimization(
|
|
336
|
+
self, routed: RoutedMessagesRequest
|
|
337
|
+
) -> object | None:
|
|
338
|
+
optimized = try_optimizations(routed.request, self._settings)
|
|
339
|
+
if optimized is None:
|
|
340
|
+
return None
|
|
341
|
+
trace_event(
|
|
342
|
+
stage="routing",
|
|
343
|
+
event="api.optimization.short_circuit",
|
|
344
|
+
source="api",
|
|
345
|
+
model=routed.request.model,
|
|
346
|
+
)
|
|
347
|
+
return optimized
|
|
348
|
+
|
|
349
|
+
def _provider_stream(
|
|
350
|
+
self,
|
|
351
|
+
routed: RoutedMessagesRequest,
|
|
352
|
+
*,
|
|
353
|
+
wire_api: str,
|
|
354
|
+
raw_log_label: str,
|
|
355
|
+
raw_log_payload: Any,
|
|
356
|
+
) -> AsyncIterator[str]:
|
|
357
|
+
provider = self._provider_getter(routed.resolved.provider_id)
|
|
358
|
+
provider.preflight_stream(
|
|
359
|
+
routed.request,
|
|
360
|
+
thinking_enabled=routed.resolved.thinking_enabled,
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
route_trace: dict[str, Any] = {
|
|
364
|
+
"stage": "routing",
|
|
365
|
+
"event": "api.route.resolved",
|
|
366
|
+
"source": "api",
|
|
367
|
+
"provider_id": routed.resolved.provider_id,
|
|
368
|
+
"provider_model": routed.resolved.provider_model,
|
|
369
|
+
"provider_model_ref": routed.resolved.provider_model_ref,
|
|
370
|
+
"gateway_model": routed.request.model,
|
|
371
|
+
"thinking_enabled": routed.resolved.thinking_enabled,
|
|
372
|
+
}
|
|
373
|
+
if wire_api == "responses":
|
|
374
|
+
route_trace["wire_api"] = "responses"
|
|
375
|
+
trace_event(**route_trace)
|
|
376
|
+
|
|
377
|
+
request_id = f"req_{uuid.uuid4().hex[:12]}"
|
|
378
|
+
trace_event(
|
|
379
|
+
stage="ingress",
|
|
380
|
+
event=(
|
|
381
|
+
"api.responses.request.received"
|
|
382
|
+
if wire_api == "responses"
|
|
383
|
+
else "api.request.received"
|
|
384
|
+
),
|
|
385
|
+
source="api",
|
|
386
|
+
message_count=len(routed.request.messages),
|
|
387
|
+
snapshot=api_messages_request_snapshot(routed.request),
|
|
388
|
+
request_id=request_id,
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
if self._settings.log_raw_api_payloads:
|
|
392
|
+
logger.debug(f"{raw_log_label} [{{}}]: {{}}", request_id, raw_log_payload)
|
|
393
|
+
|
|
394
|
+
input_tokens = self._token_counter(
|
|
395
|
+
routed.request.messages,
|
|
396
|
+
routed.request.system,
|
|
397
|
+
routed.request.tools,
|
|
398
|
+
)
|
|
399
|
+
return traced_async_stream(
|
|
400
|
+
provider.stream_response(
|
|
401
|
+
routed.request,
|
|
402
|
+
input_tokens=input_tokens,
|
|
403
|
+
request_id=request_id,
|
|
404
|
+
thinking_enabled=routed.resolved.thinking_enabled,
|
|
405
|
+
),
|
|
406
|
+
stage="egress",
|
|
407
|
+
source="api",
|
|
408
|
+
complete_event=(
|
|
409
|
+
"api.responses.stream_completed"
|
|
410
|
+
if wire_api == "responses"
|
|
411
|
+
else "api.response.stream_completed"
|
|
412
|
+
),
|
|
413
|
+
interrupted_event=(
|
|
414
|
+
"api.responses.stream_interrupted"
|
|
415
|
+
if wire_api == "responses"
|
|
416
|
+
else "api.response.stream_interrupted"
|
|
417
|
+
),
|
|
418
|
+
chunk_event=None,
|
|
419
|
+
extra={
|
|
420
|
+
"request_id": request_id,
|
|
421
|
+
"provider_id": routed.resolved.provider_id,
|
|
422
|
+
"gateway_model": routed.request.model,
|
|
423
|
+
},
|
|
424
|
+
)
|