devcopilot 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. api/__init__.py +17 -0
  2. api/admin_config.py +1303 -0
  3. api/admin_routes.py +287 -0
  4. api/admin_static/admin.css +459 -0
  5. api/admin_static/admin.js +497 -0
  6. api/admin_static/index.html +77 -0
  7. api/admin_urls.py +34 -0
  8. api/app.py +194 -0
  9. api/command_utils.py +164 -0
  10. api/dependencies.py +144 -0
  11. api/detection.py +152 -0
  12. api/gateway_model_ids.py +54 -0
  13. api/model_catalog.py +133 -0
  14. api/model_router.py +125 -0
  15. api/models/__init__.py +45 -0
  16. api/models/anthropic.py +234 -0
  17. api/models/openai_responses.py +28 -0
  18. api/models/responses.py +60 -0
  19. api/optimization_handlers.py +154 -0
  20. api/request_pipeline.py +424 -0
  21. api/routes.py +156 -0
  22. api/runtime.py +334 -0
  23. api/validation_log.py +48 -0
  24. api/web_server_tools.py +22 -0
  25. api/web_tools/__init__.py +17 -0
  26. api/web_tools/constants.py +15 -0
  27. api/web_tools/egress.py +99 -0
  28. api/web_tools/outbound.py +278 -0
  29. api/web_tools/parsers.py +104 -0
  30. api/web_tools/request.py +87 -0
  31. api/web_tools/streaming.py +206 -0
  32. cli/__init__.py +5 -0
  33. cli/claude_env.py +12 -0
  34. cli/entrypoints.py +166 -0
  35. cli/env.example +209 -0
  36. cli/launchers/__init__.py +1 -0
  37. cli/launchers/claude.py +84 -0
  38. cli/launchers/codex.py +204 -0
  39. cli/launchers/codex_model_catalog.py +186 -0
  40. cli/launchers/common.py +93 -0
  41. cli/managed/__init__.py +6 -0
  42. cli/managed/claude.py +215 -0
  43. cli/managed/manager.py +157 -0
  44. cli/managed/session.py +260 -0
  45. cli/process_registry.py +78 -0
  46. config/__init__.py +5 -0
  47. config/constants.py +13 -0
  48. config/logging_config.py +159 -0
  49. config/nim.py +118 -0
  50. config/paths.py +91 -0
  51. config/provider_catalog.py +259 -0
  52. config/provider_ids.py +7 -0
  53. config/settings.py +538 -0
  54. core/__init__.py +1 -0
  55. core/anthropic/__init__.py +46 -0
  56. core/anthropic/content.py +31 -0
  57. core/anthropic/conversion.py +587 -0
  58. core/anthropic/emitted_sse_tracker.py +346 -0
  59. core/anthropic/errors.py +70 -0
  60. core/anthropic/native_messages_request.py +280 -0
  61. core/anthropic/native_sse_block_policy.py +313 -0
  62. core/anthropic/provider_stream_error.py +34 -0
  63. core/anthropic/server_tool_sse.py +14 -0
  64. core/anthropic/sse.py +440 -0
  65. core/anthropic/stream_contracts.py +205 -0
  66. core/anthropic/stream_recovery.py +346 -0
  67. core/anthropic/stream_recovery_session.py +133 -0
  68. core/anthropic/thinking.py +140 -0
  69. core/anthropic/tokens.py +117 -0
  70. core/anthropic/tools.py +212 -0
  71. core/anthropic/utils.py +9 -0
  72. core/openai_responses/__init__.py +5 -0
  73. core/openai_responses/adapter.py +31 -0
  74. core/openai_responses/anthropic_sse.py +59 -0
  75. core/openai_responses/errors.py +22 -0
  76. core/openai_responses/events.py +19 -0
  77. core/openai_responses/ids.py +21 -0
  78. core/openai_responses/input.py +258 -0
  79. core/openai_responses/items.py +37 -0
  80. core/openai_responses/reasoning.py +52 -0
  81. core/openai_responses/stream.py +25 -0
  82. core/openai_responses/stream_state.py +654 -0
  83. core/openai_responses/tools.py +374 -0
  84. core/openai_responses/usage.py +37 -0
  85. core/rate_limit.py +60 -0
  86. core/trace.py +216 -0
  87. devcopilot-0.2.0.dist-info/METADATA +687 -0
  88. devcopilot-0.2.0.dist-info/RECORD +189 -0
  89. devcopilot-0.2.0.dist-info/WHEEL +4 -0
  90. devcopilot-0.2.0.dist-info/entry_points.txt +6 -0
  91. devcopilot-0.2.0.dist-info/licenses/LICENSE +21 -0
  92. messaging/__init__.py +26 -0
  93. messaging/cli_event_constants.py +67 -0
  94. messaging/command_context.py +66 -0
  95. messaging/command_dispatcher.py +37 -0
  96. messaging/commands.py +275 -0
  97. messaging/event_parser.py +181 -0
  98. messaging/limiter.py +300 -0
  99. messaging/models.py +36 -0
  100. messaging/node_event_pipeline.py +127 -0
  101. messaging/node_runner.py +342 -0
  102. messaging/platforms/__init__.py +15 -0
  103. messaging/platforms/base.py +228 -0
  104. messaging/platforms/discord.py +567 -0
  105. messaging/platforms/factory.py +103 -0
  106. messaging/platforms/outbox.py +144 -0
  107. messaging/platforms/telegram.py +688 -0
  108. messaging/platforms/voice_flow.py +295 -0
  109. messaging/rendering/__init__.py +3 -0
  110. messaging/rendering/discord_markdown.py +318 -0
  111. messaging/rendering/markdown_tables.py +49 -0
  112. messaging/rendering/profiles.py +55 -0
  113. messaging/rendering/telegram_markdown.py +327 -0
  114. messaging/safe_diagnostics.py +17 -0
  115. messaging/session.py +334 -0
  116. messaging/transcript.py +581 -0
  117. messaging/transcription.py +164 -0
  118. messaging/trees/__init__.py +15 -0
  119. messaging/trees/data.py +482 -0
  120. messaging/trees/manager.py +433 -0
  121. messaging/trees/processor.py +179 -0
  122. messaging/trees/repository.py +177 -0
  123. messaging/turn_intake.py +235 -0
  124. messaging/ui_updates.py +101 -0
  125. messaging/voice.py +76 -0
  126. messaging/workflow.py +200 -0
  127. providers/__init__.py +31 -0
  128. providers/base.py +152 -0
  129. providers/cerebras/__init__.py +7 -0
  130. providers/cerebras/client.py +31 -0
  131. providers/cerebras/request.py +55 -0
  132. providers/codestral/__init__.py +7 -0
  133. providers/codestral/client.py +34 -0
  134. providers/deepseek/__init__.py +11 -0
  135. providers/deepseek/client.py +51 -0
  136. providers/deepseek/request.py +475 -0
  137. providers/defaults.py +41 -0
  138. providers/error_mapping.py +309 -0
  139. providers/exceptions.py +113 -0
  140. providers/fireworks/__init__.py +5 -0
  141. providers/fireworks/client.py +45 -0
  142. providers/fireworks/request.py +48 -0
  143. providers/gemini/__init__.py +7 -0
  144. providers/gemini/client.py +49 -0
  145. providers/gemini/request.py +199 -0
  146. providers/groq/__init__.py +7 -0
  147. providers/groq/client.py +31 -0
  148. providers/groq/request.py +83 -0
  149. providers/kimi/__init__.py +10 -0
  150. providers/kimi/client.py +53 -0
  151. providers/kimi/request.py +42 -0
  152. providers/llamacpp/__init__.py +3 -0
  153. providers/llamacpp/client.py +16 -0
  154. providers/lmstudio/__init__.py +5 -0
  155. providers/lmstudio/client.py +16 -0
  156. providers/mistral/__init__.py +7 -0
  157. providers/mistral/client.py +31 -0
  158. providers/mistral/request.py +37 -0
  159. providers/model_listing.py +133 -0
  160. providers/nvidia_nim/__init__.py +7 -0
  161. providers/nvidia_nim/client.py +91 -0
  162. providers/nvidia_nim/request.py +430 -0
  163. providers/nvidia_nim/voice.py +95 -0
  164. providers/ollama/__init__.py +7 -0
  165. providers/ollama/client.py +39 -0
  166. providers/open_router/__init__.py +7 -0
  167. providers/open_router/client.py +124 -0
  168. providers/open_router/request.py +42 -0
  169. providers/opencode/__init__.py +11 -0
  170. providers/opencode/client.py +31 -0
  171. providers/opencode/request.py +35 -0
  172. providers/rate_limit.py +300 -0
  173. providers/registry.py +527 -0
  174. providers/transports/__init__.py +1 -0
  175. providers/transports/anthropic_messages/__init__.py +5 -0
  176. providers/transports/anthropic_messages/http.py +118 -0
  177. providers/transports/anthropic_messages/recovery.py +206 -0
  178. providers/transports/anthropic_messages/stream.py +295 -0
  179. providers/transports/anthropic_messages/transport.py +236 -0
  180. providers/transports/openai_chat/__init__.py +5 -0
  181. providers/transports/openai_chat/recovery.py +217 -0
  182. providers/transports/openai_chat/stream.py +384 -0
  183. providers/transports/openai_chat/tool_calls.py +293 -0
  184. providers/transports/openai_chat/transport.py +156 -0
  185. providers/wafer/__init__.py +10 -0
  186. providers/wafer/client.py +50 -0
  187. providers/zai/__init__.py +10 -0
  188. providers/zai/client.py +46 -0
  189. providers/zai/request.py +42 -0
@@ -0,0 +1,154 @@
1
+ """Optimization handlers for fast-path API responses.
2
+
3
+ Each handler returns a MessagesResponse if the request matches and the
4
+ optimization is enabled, otherwise None.
5
+ """
6
+
7
+ import uuid
8
+
9
+ from loguru import logger
10
+
11
+ from config.settings import Settings
12
+
13
+ from .command_utils import extract_command_prefix, extract_filepaths_from_command
14
+ from .detection import (
15
+ is_filepath_extraction_request,
16
+ is_prefix_detection_request,
17
+ is_quota_check_request,
18
+ is_suggestion_mode_request,
19
+ is_title_generation_request,
20
+ )
21
+ from .models.anthropic import MessagesRequest
22
+ from .models.responses import MessagesResponse, Usage
23
+
24
+
25
+ def _text_response(
26
+ request_data: MessagesRequest,
27
+ text: str,
28
+ *,
29
+ input_tokens: int,
30
+ output_tokens: int,
31
+ ) -> MessagesResponse:
32
+ return MessagesResponse(
33
+ id=f"msg_{uuid.uuid4()}",
34
+ model=request_data.model,
35
+ content=[{"type": "text", "text": text}],
36
+ stop_reason="end_turn",
37
+ usage=Usage(input_tokens=input_tokens, output_tokens=output_tokens),
38
+ )
39
+
40
+
41
+ def try_prefix_detection(
42
+ request_data: MessagesRequest, settings: Settings
43
+ ) -> MessagesResponse | None:
44
+ """Fast prefix detection - return command prefix without API call."""
45
+ if not settings.fast_prefix_detection:
46
+ return None
47
+
48
+ is_prefix_req, command = is_prefix_detection_request(request_data)
49
+ if not is_prefix_req:
50
+ return None
51
+
52
+ logger.info("Optimization: Fast prefix detection request")
53
+ return _text_response(
54
+ request_data,
55
+ extract_command_prefix(command),
56
+ input_tokens=100,
57
+ output_tokens=5,
58
+ )
59
+
60
+
61
+ def try_quota_mock(
62
+ request_data: MessagesRequest, settings: Settings
63
+ ) -> MessagesResponse | None:
64
+ """Mock quota probe requests."""
65
+ if not settings.enable_network_probe_mock:
66
+ return None
67
+ if not is_quota_check_request(request_data):
68
+ return None
69
+
70
+ logger.info("Optimization: Intercepted and mocked quota probe")
71
+ return _text_response(
72
+ request_data,
73
+ "Quota check passed.",
74
+ input_tokens=10,
75
+ output_tokens=5,
76
+ )
77
+
78
+
79
+ def try_title_skip(
80
+ request_data: MessagesRequest, settings: Settings
81
+ ) -> MessagesResponse | None:
82
+ """Skip title generation requests."""
83
+ if not settings.enable_title_generation_skip:
84
+ return None
85
+ if not is_title_generation_request(request_data):
86
+ return None
87
+
88
+ logger.info("Optimization: Skipped title generation request")
89
+ return _text_response(
90
+ request_data,
91
+ "Conversation",
92
+ input_tokens=100,
93
+ output_tokens=5,
94
+ )
95
+
96
+
97
+ def try_suggestion_skip(
98
+ request_data: MessagesRequest, settings: Settings
99
+ ) -> MessagesResponse | None:
100
+ """Skip suggestion mode requests."""
101
+ if not settings.enable_suggestion_mode_skip:
102
+ return None
103
+ if not is_suggestion_mode_request(request_data):
104
+ return None
105
+
106
+ logger.info("Optimization: Skipped suggestion mode request")
107
+ return _text_response(
108
+ request_data,
109
+ "",
110
+ input_tokens=100,
111
+ output_tokens=1,
112
+ )
113
+
114
+
115
+ def try_filepath_mock(
116
+ request_data: MessagesRequest, settings: Settings
117
+ ) -> MessagesResponse | None:
118
+ """Mock filepath extraction requests."""
119
+ if not settings.enable_filepath_extraction_mock:
120
+ return None
121
+
122
+ is_fp, cmd, output = is_filepath_extraction_request(request_data)
123
+ if not is_fp:
124
+ return None
125
+
126
+ filepaths = extract_filepaths_from_command(cmd, output)
127
+ logger.info("Optimization: Mocked filepath extraction")
128
+ return _text_response(
129
+ request_data,
130
+ filepaths,
131
+ input_tokens=100,
132
+ output_tokens=10,
133
+ )
134
+
135
+
136
+ # Cheapest/most common optimizations first for faster short-circuit.
137
+ OPTIMIZATION_HANDLERS = [
138
+ try_quota_mock,
139
+ try_prefix_detection,
140
+ try_title_skip,
141
+ try_suggestion_skip,
142
+ try_filepath_mock,
143
+ ]
144
+
145
+
146
+ def try_optimizations(
147
+ request_data: MessagesRequest, settings: Settings
148
+ ) -> MessagesResponse | None:
149
+ """Run optimization handlers in order. Returns first match or None."""
150
+ for handler in OPTIMIZATION_HANDLERS:
151
+ result = handler(request_data, settings)
152
+ if result is not None:
153
+ return result
154
+ return None
@@ -0,0 +1,424 @@
1
+ """API request pipeline for routing, intercepts, and provider execution."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import traceback
6
+ import uuid
7
+ from collections.abc import AsyncIterator, Callable
8
+ from dataclasses import replace
9
+ from typing import Any
10
+
11
+ from fastapi import HTTPException
12
+ from fastapi.responses import JSONResponse, StreamingResponse
13
+ from loguru import logger
14
+
15
+ from config.provider_catalog import PROVIDER_CATALOG
16
+ from config.settings import Settings
17
+ from core.anthropic import get_token_count, get_user_facing_error_message
18
+ from core.anthropic.sse import ANTHROPIC_SSE_RESPONSE_HEADERS
19
+ from core.openai_responses import OpenAIResponsesAdapter
20
+ from core.trace import api_messages_request_snapshot, trace_event, traced_async_stream
21
+ from providers.base import BaseProvider
22
+ from providers.exceptions import InvalidRequestError, ProviderError
23
+
24
+ from .detection import is_safety_classifier_request
25
+ from .model_router import ModelRouter, RoutedMessagesRequest
26
+ from .models.anthropic import MessagesRequest, TokenCountRequest
27
+ from .models.openai_responses import OpenAIResponsesRequest
28
+ from .models.responses import TokenCountResponse
29
+ from .optimization_handlers import try_optimizations
30
+ from .web_tools.egress import WebFetchEgressPolicy
31
+ from .web_tools.request import (
32
+ is_web_server_tool_request,
33
+ openai_chat_upstream_server_tool_error,
34
+ )
35
+ from .web_tools.streaming import stream_web_server_tool_response
36
+
37
+ TokenCounter = Callable[[list[Any], str | list[Any] | None, list[Any] | None], int]
38
+ ProviderGetter = Callable[[str], BaseProvider]
39
+ MessageIntercept = Callable[[RoutedMessagesRequest], object | None]
40
+
41
+ # Providers that use ``/chat/completions`` + Anthropic-to-OpenAI conversion.
42
+ _OPENAI_CHAT_UPSTREAM_IDS = frozenset(
43
+ provider_id
44
+ for provider_id, descriptor in PROVIDER_CATALOG.items()
45
+ if descriptor.transport_type == "openai_chat"
46
+ )
47
+
48
+
49
+ def anthropic_sse_streaming_response(body: AsyncIterator[str]) -> StreamingResponse:
50
+ """Return a streaming response for Anthropic-style SSE streams."""
51
+ return StreamingResponse(
52
+ body,
53
+ media_type="text/event-stream",
54
+ headers=ANTHROPIC_SSE_RESPONSE_HEADERS,
55
+ )
56
+
57
+
58
+ def openai_responses_sse_streaming_response(
59
+ body: AsyncIterator[str],
60
+ ) -> StreamingResponse:
61
+ """Return a streaming response for OpenAI Responses-style SSE."""
62
+ return StreamingResponse(
63
+ body,
64
+ media_type="text/event-stream",
65
+ headers=OpenAIResponsesAdapter.sse_headers,
66
+ )
67
+
68
+
69
+ def _http_status_for_unexpected_pipeline_exception(_exc: BaseException) -> int:
70
+ """HTTP status for uncaught non-provider failures."""
71
+ return 500
72
+
73
+
74
+ def _log_unexpected_pipeline_exception(
75
+ settings: Settings,
76
+ exc: BaseException,
77
+ *,
78
+ context: str,
79
+ request_id: str | None = None,
80
+ ) -> None:
81
+ """Log API failures without echoing exception text unless opted in."""
82
+ if settings.log_api_error_tracebacks:
83
+ if request_id is not None:
84
+ logger.error("{} request_id={}: {}", context, request_id, exc)
85
+ else:
86
+ logger.error("{}: {}", context, exc)
87
+ logger.error(traceback.format_exc())
88
+ return
89
+ if request_id is not None:
90
+ logger.error(
91
+ "{} request_id={} exc_type={}",
92
+ context,
93
+ request_id,
94
+ type(exc).__name__,
95
+ )
96
+ else:
97
+ logger.error("{} exc_type={}", context, type(exc).__name__)
98
+
99
+
100
+ def _require_non_empty_messages(messages: list[Any]) -> None:
101
+ if not messages:
102
+ raise InvalidRequestError("messages cannot be empty")
103
+
104
+
105
+ class ApiRequestPipeline:
106
+ """Coordinate API request intercepts, routing, and provider stream execution."""
107
+
108
+ def __init__(
109
+ self,
110
+ settings: Settings,
111
+ provider_getter: ProviderGetter,
112
+ model_router: ModelRouter | None = None,
113
+ token_counter: TokenCounter = get_token_count,
114
+ responses_adapter: OpenAIResponsesAdapter | None = None,
115
+ ) -> None:
116
+ self._settings = settings
117
+ self._provider_getter = provider_getter
118
+ self._model_router = model_router or ModelRouter(settings)
119
+ self._token_counter = token_counter
120
+ self._responses_adapter = responses_adapter or OpenAIResponsesAdapter()
121
+ self._message_intercepts: tuple[MessageIntercept, ...] = (
122
+ self._intercept_web_server_tool,
123
+ self._intercept_local_optimization,
124
+ )
125
+
126
+ def create_message(self, request_data: MessagesRequest) -> object:
127
+ """Create an Anthropic-compatible message response."""
128
+ try:
129
+ _require_non_empty_messages(request_data.messages)
130
+ routed = self._model_router.resolve_messages_request(request_data)
131
+ routed = self._apply_message_routing_policies(routed)
132
+ self._reject_unsupported_server_tools(routed)
133
+
134
+ intercepted = self._run_message_intercepts(routed)
135
+ if intercepted is not None:
136
+ return intercepted
137
+
138
+ logger.debug("No optimization matched, routing to provider")
139
+ return anthropic_sse_streaming_response(
140
+ self._provider_stream(
141
+ routed,
142
+ wire_api="messages",
143
+ raw_log_label="FULL_PAYLOAD",
144
+ raw_log_payload=routed.request.model_dump(),
145
+ )
146
+ )
147
+ except ProviderError:
148
+ raise
149
+ except Exception as e:
150
+ _log_unexpected_pipeline_exception(
151
+ self._settings, e, context="CREATE_MESSAGE_ERROR"
152
+ )
153
+ raise HTTPException(
154
+ status_code=_http_status_for_unexpected_pipeline_exception(e),
155
+ detail=get_user_facing_error_message(e),
156
+ ) from e
157
+
158
+ async def create_response(self, request_data: OpenAIResponsesRequest) -> object:
159
+ """Create a streaming OpenAI Responses-compatible response."""
160
+ request_payload = request_data.model_dump(mode="json", exclude_none=True)
161
+ if request_data.stream is False:
162
+ invalid_request = InvalidRequestError(
163
+ "DevCopilot /v1/responses supports streaming only; omit stream or set stream=true."
164
+ )
165
+ return JSONResponse(
166
+ status_code=invalid_request.status_code,
167
+ content=self._responses_adapter.error_payload(
168
+ message=invalid_request.message,
169
+ error_type=invalid_request.error_type,
170
+ ),
171
+ )
172
+
173
+ try:
174
+ anthropic_payload = self._responses_adapter.to_anthropic_payload(
175
+ request_payload
176
+ )
177
+ response_request = MessagesRequest(**anthropic_payload)
178
+ _require_non_empty_messages(response_request.messages)
179
+ routed = self._model_router.resolve_messages_request(response_request)
180
+ self._reject_unsupported_server_tools(routed)
181
+
182
+ streamed = self._provider_stream(
183
+ routed,
184
+ wire_api="responses",
185
+ raw_log_label="FULL_RESPONSES_PAYLOAD",
186
+ raw_log_payload=request_payload,
187
+ )
188
+ return openai_responses_sse_streaming_response(
189
+ self._responses_adapter.iter_sse_from_anthropic(
190
+ streamed,
191
+ request_payload,
192
+ )
193
+ )
194
+ except OpenAIResponsesAdapter.ConversionError as exc:
195
+ invalid_request = InvalidRequestError(str(exc))
196
+ return JSONResponse(
197
+ status_code=invalid_request.status_code,
198
+ content=self._responses_adapter.error_payload(
199
+ message=invalid_request.message,
200
+ error_type=invalid_request.error_type,
201
+ ),
202
+ )
203
+ except ProviderError as exc:
204
+ return JSONResponse(
205
+ status_code=exc.status_code,
206
+ content=self._responses_adapter.error_payload(
207
+ message=exc.message,
208
+ error_type=exc.error_type,
209
+ ),
210
+ )
211
+ except Exception as e:
212
+ _log_unexpected_pipeline_exception(
213
+ self._settings,
214
+ e,
215
+ context="CREATE_RESPONSE_ERROR",
216
+ )
217
+ return JSONResponse(
218
+ status_code=_http_status_for_unexpected_pipeline_exception(e),
219
+ content=self._responses_adapter.error_payload(
220
+ message=get_user_facing_error_message(e),
221
+ error_type="api_error",
222
+ ),
223
+ )
224
+
225
+ def count_tokens(self, request_data: TokenCountRequest) -> TokenCountResponse:
226
+ """Count tokens for a request after applying configured model routing."""
227
+ request_id = f"req_{uuid.uuid4().hex[:12]}"
228
+ with logger.contextualize(request_id=request_id):
229
+ try:
230
+ _require_non_empty_messages(request_data.messages)
231
+ routed = self._model_router.resolve_token_count_request(request_data)
232
+ tokens = self._token_counter(
233
+ routed.request.messages, routed.request.system, routed.request.tools
234
+ )
235
+ trace_event(
236
+ stage="routing",
237
+ event="api.route.resolved",
238
+ source="api",
239
+ kind="count_tokens",
240
+ provider_id=routed.resolved.provider_id,
241
+ provider_model=routed.resolved.provider_model,
242
+ provider_model_ref=routed.resolved.provider_model_ref,
243
+ gateway_model=routed.request.model,
244
+ )
245
+ trace_event(
246
+ stage="ingress",
247
+ event="api.count_tokens.completed",
248
+ source="api",
249
+ message_count=len(routed.request.messages),
250
+ input_tokens=tokens,
251
+ snapshot=api_messages_request_snapshot(routed.request),
252
+ )
253
+ return TokenCountResponse(input_tokens=tokens)
254
+ except ProviderError:
255
+ raise
256
+ except Exception as e:
257
+ _log_unexpected_pipeline_exception(
258
+ self._settings,
259
+ e,
260
+ context="COUNT_TOKENS_ERROR",
261
+ request_id=request_id,
262
+ )
263
+ raise HTTPException(
264
+ status_code=_http_status_for_unexpected_pipeline_exception(e),
265
+ detail=get_user_facing_error_message(e),
266
+ ) from e
267
+
268
+ def _reject_unsupported_server_tools(self, routed: RoutedMessagesRequest) -> None:
269
+ if routed.resolved.provider_id not in _OPENAI_CHAT_UPSTREAM_IDS:
270
+ return
271
+ tool_err = openai_chat_upstream_server_tool_error(
272
+ routed.request,
273
+ web_tools_enabled=self._settings.enable_web_server_tools,
274
+ )
275
+ if tool_err is not None:
276
+ raise InvalidRequestError(tool_err)
277
+
278
+ def _apply_message_routing_policies(
279
+ self, routed: RoutedMessagesRequest
280
+ ) -> RoutedMessagesRequest:
281
+ if not is_safety_classifier_request(routed.request):
282
+ return routed
283
+ changed = routed.resolved.thinking_enabled
284
+ trace_event(
285
+ stage="routing",
286
+ event="api.optimization.safety_classifier_no_thinking",
287
+ source="api",
288
+ model=routed.request.model,
289
+ changed=changed,
290
+ )
291
+ if not changed:
292
+ return routed
293
+ return RoutedMessagesRequest(
294
+ request=routed.request,
295
+ resolved=replace(routed.resolved, thinking_enabled=False),
296
+ )
297
+
298
+ def _run_message_intercepts(self, routed: RoutedMessagesRequest) -> object | None:
299
+ for intercept in self._message_intercepts:
300
+ result = intercept(routed)
301
+ if result is not None:
302
+ return result
303
+ return None
304
+
305
+ def _intercept_web_server_tool(
306
+ self, routed: RoutedMessagesRequest
307
+ ) -> object | None:
308
+ if not self._settings.enable_web_server_tools:
309
+ return None
310
+ if not is_web_server_tool_request(routed.request):
311
+ return None
312
+
313
+ input_tokens = self._token_counter(
314
+ routed.request.messages, routed.request.system, routed.request.tools
315
+ )
316
+ trace_event(
317
+ stage="routing",
318
+ event="api.optimization.web_server_tool",
319
+ source="api",
320
+ model=routed.request.model,
321
+ )
322
+ egress = WebFetchEgressPolicy(
323
+ allow_private_network_targets=self._settings.web_fetch_allow_private_networks,
324
+ allowed_schemes=self._settings.web_fetch_allowed_scheme_set(),
325
+ )
326
+ return anthropic_sse_streaming_response(
327
+ stream_web_server_tool_response(
328
+ routed.request,
329
+ input_tokens=input_tokens,
330
+ web_fetch_egress=egress,
331
+ verbose_client_errors=self._settings.log_api_error_tracebacks,
332
+ ),
333
+ )
334
+
335
+ def _intercept_local_optimization(
336
+ self, routed: RoutedMessagesRequest
337
+ ) -> object | None:
338
+ optimized = try_optimizations(routed.request, self._settings)
339
+ if optimized is None:
340
+ return None
341
+ trace_event(
342
+ stage="routing",
343
+ event="api.optimization.short_circuit",
344
+ source="api",
345
+ model=routed.request.model,
346
+ )
347
+ return optimized
348
+
349
+ def _provider_stream(
350
+ self,
351
+ routed: RoutedMessagesRequest,
352
+ *,
353
+ wire_api: str,
354
+ raw_log_label: str,
355
+ raw_log_payload: Any,
356
+ ) -> AsyncIterator[str]:
357
+ provider = self._provider_getter(routed.resolved.provider_id)
358
+ provider.preflight_stream(
359
+ routed.request,
360
+ thinking_enabled=routed.resolved.thinking_enabled,
361
+ )
362
+
363
+ route_trace: dict[str, Any] = {
364
+ "stage": "routing",
365
+ "event": "api.route.resolved",
366
+ "source": "api",
367
+ "provider_id": routed.resolved.provider_id,
368
+ "provider_model": routed.resolved.provider_model,
369
+ "provider_model_ref": routed.resolved.provider_model_ref,
370
+ "gateway_model": routed.request.model,
371
+ "thinking_enabled": routed.resolved.thinking_enabled,
372
+ }
373
+ if wire_api == "responses":
374
+ route_trace["wire_api"] = "responses"
375
+ trace_event(**route_trace)
376
+
377
+ request_id = f"req_{uuid.uuid4().hex[:12]}"
378
+ trace_event(
379
+ stage="ingress",
380
+ event=(
381
+ "api.responses.request.received"
382
+ if wire_api == "responses"
383
+ else "api.request.received"
384
+ ),
385
+ source="api",
386
+ message_count=len(routed.request.messages),
387
+ snapshot=api_messages_request_snapshot(routed.request),
388
+ request_id=request_id,
389
+ )
390
+
391
+ if self._settings.log_raw_api_payloads:
392
+ logger.debug(f"{raw_log_label} [{{}}]: {{}}", request_id, raw_log_payload)
393
+
394
+ input_tokens = self._token_counter(
395
+ routed.request.messages,
396
+ routed.request.system,
397
+ routed.request.tools,
398
+ )
399
+ return traced_async_stream(
400
+ provider.stream_response(
401
+ routed.request,
402
+ input_tokens=input_tokens,
403
+ request_id=request_id,
404
+ thinking_enabled=routed.resolved.thinking_enabled,
405
+ ),
406
+ stage="egress",
407
+ source="api",
408
+ complete_event=(
409
+ "api.responses.stream_completed"
410
+ if wire_api == "responses"
411
+ else "api.response.stream_completed"
412
+ ),
413
+ interrupted_event=(
414
+ "api.responses.stream_interrupted"
415
+ if wire_api == "responses"
416
+ else "api.response.stream_interrupted"
417
+ ),
418
+ chunk_event=None,
419
+ extra={
420
+ "request_id": request_id,
421
+ "provider_id": routed.resolved.provider_id,
422
+ "gateway_model": routed.request.model,
423
+ },
424
+ )