router-maestro 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- router_maestro/__init__.py +1 -1
- router_maestro/cli/config.py +126 -0
- router_maestro/providers/__init__.py +8 -0
- router_maestro/providers/base.py +80 -0
- router_maestro/providers/copilot.py +324 -1
- router_maestro/routing/router.py +158 -2
- router_maestro/server/app.py +8 -1
- router_maestro/server/routes/__init__.py +8 -1
- router_maestro/server/routes/anthropic.py +79 -0
- router_maestro/server/routes/chat.py +1 -2
- router_maestro/server/routes/models.py +1 -2
- router_maestro/server/routes/responses.py +606 -0
- router_maestro/server/schemas/__init__.py +33 -0
- router_maestro/server/schemas/responses.py +214 -0
- {router_maestro-0.1.6.dist-info → router_maestro-0.1.8.dist-info}/METADATA +24 -3
- {router_maestro-0.1.6.dist-info → router_maestro-0.1.8.dist-info}/RECORD +19 -17
- {router_maestro-0.1.6.dist-info → router_maestro-0.1.8.dist-info}/WHEEL +0 -0
- {router_maestro-0.1.6.dist-info → router_maestro-0.1.8.dist-info}/entry_points.txt +0 -0
- {router_maestro-0.1.6.dist-info → router_maestro-0.1.8.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,606 @@
|
|
|
1
|
+
"""Responses API route for Codex models."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
import uuid
|
|
6
|
+
from collections.abc import AsyncGenerator
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from fastapi import APIRouter, HTTPException
|
|
10
|
+
from fastapi.responses import StreamingResponse
|
|
11
|
+
|
|
12
|
+
from router_maestro.providers import ProviderError
|
|
13
|
+
from router_maestro.providers import ResponsesRequest as InternalResponsesRequest
|
|
14
|
+
from router_maestro.routing import Router, get_router
|
|
15
|
+
from router_maestro.server.schemas import (
|
|
16
|
+
ResponsesRequest,
|
|
17
|
+
ResponsesResponse,
|
|
18
|
+
ResponsesUsage,
|
|
19
|
+
)
|
|
20
|
+
from router_maestro.utils import get_logger
|
|
21
|
+
|
|
22
|
+
logger = get_logger("server.routes.responses")
|
|
23
|
+
|
|
24
|
+
router = APIRouter()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def generate_id(prefix: str) -> str:
|
|
28
|
+
"""Generate a unique ID with given prefix."""
|
|
29
|
+
return f"{prefix}-{uuid.uuid4().hex[:16]}"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def sse_event(data: dict[str, Any]) -> str:
|
|
33
|
+
"""Format data as SSE event with event type field."""
|
|
34
|
+
event_type = data.get("type", "")
|
|
35
|
+
return f"event: {event_type}\ndata: {json.dumps(data)}\n\n"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def extract_text_from_content(content: str | list[Any]) -> str:
|
|
39
|
+
"""Extract text from content which can be a string or list of content blocks."""
|
|
40
|
+
if isinstance(content, str):
|
|
41
|
+
return content
|
|
42
|
+
|
|
43
|
+
texts = []
|
|
44
|
+
for block in content:
|
|
45
|
+
if isinstance(block, dict):
|
|
46
|
+
if block.get("type") in ("input_text", "output_text"):
|
|
47
|
+
texts.append(block.get("text", ""))
|
|
48
|
+
elif "text" in block:
|
|
49
|
+
texts.append(block.get("text", ""))
|
|
50
|
+
elif hasattr(block, "text"):
|
|
51
|
+
texts.append(block.text)
|
|
52
|
+
return "".join(texts)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def convert_content_to_serializable(content: Any) -> Any:
|
|
56
|
+
"""Convert content to JSON-serializable format.
|
|
57
|
+
|
|
58
|
+
Handles Pydantic models and nested structures.
|
|
59
|
+
"""
|
|
60
|
+
if isinstance(content, str):
|
|
61
|
+
return content
|
|
62
|
+
if hasattr(content, "model_dump"):
|
|
63
|
+
return content.model_dump(exclude_none=True)
|
|
64
|
+
if isinstance(content, list):
|
|
65
|
+
return [convert_content_to_serializable(item) for item in content]
|
|
66
|
+
if isinstance(content, dict):
|
|
67
|
+
return {k: convert_content_to_serializable(v) for k, v in content.items()}
|
|
68
|
+
return content
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def convert_input_to_internal(
|
|
72
|
+
input_data: str | list[Any],
|
|
73
|
+
) -> str | list[dict[str, Any]]:
|
|
74
|
+
"""Convert the incoming input format to internal format.
|
|
75
|
+
|
|
76
|
+
Preserves the original content format (string or array) as the upstream
|
|
77
|
+
Copilot API accepts both formats. Converts Pydantic models to dicts.
|
|
78
|
+
"""
|
|
79
|
+
if isinstance(input_data, str):
|
|
80
|
+
return input_data
|
|
81
|
+
|
|
82
|
+
items = []
|
|
83
|
+
for item in input_data:
|
|
84
|
+
if isinstance(item, dict):
|
|
85
|
+
item_type = item.get("type", "message")
|
|
86
|
+
|
|
87
|
+
if item_type == "message" or (item_type is None and "role" in item):
|
|
88
|
+
role = item.get("role", "user")
|
|
89
|
+
content = item.get("content", "")
|
|
90
|
+
# Convert content to serializable format
|
|
91
|
+
content = convert_content_to_serializable(content)
|
|
92
|
+
items.append({"type": "message", "role": role, "content": content})
|
|
93
|
+
|
|
94
|
+
elif item_type == "function_call":
|
|
95
|
+
items.append(
|
|
96
|
+
{
|
|
97
|
+
"type": "function_call",
|
|
98
|
+
"id": item.get("id"),
|
|
99
|
+
"call_id": item.get("call_id"),
|
|
100
|
+
"name": item.get("name"),
|
|
101
|
+
"arguments": item.get("arguments", "{}"),
|
|
102
|
+
"status": item.get("status", "completed"),
|
|
103
|
+
}
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
elif item_type == "function_call_output":
|
|
107
|
+
output = item.get("output", "")
|
|
108
|
+
if not isinstance(output, str):
|
|
109
|
+
output = json.dumps(output)
|
|
110
|
+
items.append(
|
|
111
|
+
{
|
|
112
|
+
"type": "function_call_output",
|
|
113
|
+
"call_id": item.get("call_id"),
|
|
114
|
+
"output": output,
|
|
115
|
+
}
|
|
116
|
+
)
|
|
117
|
+
else:
|
|
118
|
+
items.append(convert_content_to_serializable(item))
|
|
119
|
+
|
|
120
|
+
elif hasattr(item, "model_dump"):
|
|
121
|
+
# Pydantic model - convert to dict
|
|
122
|
+
items.append(item.model_dump(exclude_none=True))
|
|
123
|
+
|
|
124
|
+
elif hasattr(item, "role") and hasattr(item, "content"):
|
|
125
|
+
# Object with role and content attributes
|
|
126
|
+
content = convert_content_to_serializable(item.content)
|
|
127
|
+
items.append({"type": "message", "role": item.role, "content": content})
|
|
128
|
+
|
|
129
|
+
return items
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def convert_tools_to_internal(tools: list[Any] | None) -> list[dict[str, Any]] | None:
|
|
133
|
+
"""Convert tools to internal format."""
|
|
134
|
+
if not tools:
|
|
135
|
+
return None
|
|
136
|
+
result = []
|
|
137
|
+
for tool in tools:
|
|
138
|
+
if isinstance(tool, dict):
|
|
139
|
+
result.append(tool)
|
|
140
|
+
elif hasattr(tool, "model_dump"):
|
|
141
|
+
result.append(tool.model_dump(exclude_none=True))
|
|
142
|
+
else:
|
|
143
|
+
result.append(dict(tool))
|
|
144
|
+
return result
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def convert_tool_choice_to_internal(
|
|
148
|
+
tool_choice: str | Any | None,
|
|
149
|
+
) -> str | dict[str, Any] | None:
|
|
150
|
+
"""Convert tool_choice to internal format."""
|
|
151
|
+
if tool_choice is None:
|
|
152
|
+
return None
|
|
153
|
+
if isinstance(tool_choice, str):
|
|
154
|
+
return tool_choice
|
|
155
|
+
if isinstance(tool_choice, dict):
|
|
156
|
+
return tool_choice
|
|
157
|
+
if hasattr(tool_choice, "model_dump"):
|
|
158
|
+
return tool_choice.model_dump(exclude_none=True)
|
|
159
|
+
return dict(tool_choice)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def make_text_content(text: str) -> dict[str, Any]:
|
|
163
|
+
"""Create output_text content block."""
|
|
164
|
+
return {"type": "output_text", "text": text, "annotations": []}
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def make_usage(raw_usage: dict[str, Any] | None) -> dict[str, Any] | None:
|
|
168
|
+
"""Create properly structured usage object matching OpenAI spec."""
|
|
169
|
+
if not raw_usage:
|
|
170
|
+
return None
|
|
171
|
+
|
|
172
|
+
input_tokens = raw_usage.get("input_tokens", 0)
|
|
173
|
+
output_tokens = raw_usage.get("output_tokens", 0)
|
|
174
|
+
|
|
175
|
+
return {
|
|
176
|
+
"input_tokens": input_tokens,
|
|
177
|
+
"input_tokens_details": {"cached_tokens": 0},
|
|
178
|
+
"output_tokens": output_tokens,
|
|
179
|
+
"output_tokens_details": {"reasoning_tokens": 0},
|
|
180
|
+
"total_tokens": input_tokens + output_tokens,
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def make_message_item(msg_id: str, text: str, status: str = "completed") -> dict[str, Any]:
|
|
185
|
+
"""Create message output item."""
|
|
186
|
+
return {
|
|
187
|
+
"type": "message",
|
|
188
|
+
"id": msg_id,
|
|
189
|
+
"role": "assistant",
|
|
190
|
+
"content": [make_text_content(text)],
|
|
191
|
+
"status": status,
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def make_function_call_item(
|
|
196
|
+
fc_id: str, call_id: str, name: str, arguments: str, status: str = "completed"
|
|
197
|
+
) -> dict[str, Any]:
|
|
198
|
+
"""Create function_call output item."""
|
|
199
|
+
return {
|
|
200
|
+
"type": "function_call",
|
|
201
|
+
"id": fc_id,
|
|
202
|
+
"call_id": call_id,
|
|
203
|
+
"name": name,
|
|
204
|
+
"arguments": arguments,
|
|
205
|
+
"status": status,
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
@router.post("/api/openai/v1/responses")
|
|
210
|
+
async def create_response(request: ResponsesRequest):
|
|
211
|
+
"""Handle Responses API requests (for Codex models)."""
|
|
212
|
+
request_id = generate_id("req")
|
|
213
|
+
start_time = time.time()
|
|
214
|
+
|
|
215
|
+
logger.info(
|
|
216
|
+
"Received responses request: req_id=%s, model=%s, stream=%s, has_tools=%s",
|
|
217
|
+
request_id,
|
|
218
|
+
request.model,
|
|
219
|
+
request.stream,
|
|
220
|
+
request.tools is not None,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
model_router = get_router()
|
|
224
|
+
|
|
225
|
+
input_value = convert_input_to_internal(request.input)
|
|
226
|
+
|
|
227
|
+
internal_request = InternalResponsesRequest(
|
|
228
|
+
model=request.model,
|
|
229
|
+
input=input_value,
|
|
230
|
+
stream=request.stream,
|
|
231
|
+
instructions=request.instructions,
|
|
232
|
+
temperature=request.temperature,
|
|
233
|
+
max_output_tokens=request.max_output_tokens,
|
|
234
|
+
tools=convert_tools_to_internal(request.tools),
|
|
235
|
+
tool_choice=convert_tool_choice_to_internal(request.tool_choice),
|
|
236
|
+
parallel_tool_calls=request.parallel_tool_calls,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
if request.stream:
|
|
240
|
+
return StreamingResponse(
|
|
241
|
+
stream_response(model_router, internal_request, request_id, start_time),
|
|
242
|
+
media_type="text/event-stream",
|
|
243
|
+
headers={
|
|
244
|
+
"Cache-Control": "no-cache",
|
|
245
|
+
"Connection": "keep-alive",
|
|
246
|
+
"X-Accel-Buffering": "no",
|
|
247
|
+
},
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
try:
|
|
251
|
+
response, provider_name = await model_router.responses_completion(internal_request)
|
|
252
|
+
|
|
253
|
+
usage = None
|
|
254
|
+
if response.usage:
|
|
255
|
+
usage = ResponsesUsage(
|
|
256
|
+
input_tokens=response.usage.get("input_tokens", 0),
|
|
257
|
+
output_tokens=response.usage.get("output_tokens", 0),
|
|
258
|
+
total_tokens=response.usage.get("total_tokens", 0),
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
response_id = generate_id("resp")
|
|
262
|
+
output: list[dict[str, Any]] = []
|
|
263
|
+
|
|
264
|
+
if response.content:
|
|
265
|
+
message_id = generate_id("msg")
|
|
266
|
+
output.append(make_message_item(message_id, response.content))
|
|
267
|
+
|
|
268
|
+
if response.tool_calls:
|
|
269
|
+
for tc in response.tool_calls:
|
|
270
|
+
fc_id = generate_id("fc")
|
|
271
|
+
output.append(make_function_call_item(fc_id, tc.call_id, tc.name, tc.arguments))
|
|
272
|
+
|
|
273
|
+
return ResponsesResponse(
|
|
274
|
+
id=response_id,
|
|
275
|
+
model=response.model,
|
|
276
|
+
status="completed",
|
|
277
|
+
output=output,
|
|
278
|
+
usage=usage,
|
|
279
|
+
)
|
|
280
|
+
except ProviderError as e:
|
|
281
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
|
282
|
+
logger.error(
|
|
283
|
+
"Responses request failed: req_id=%s, elapsed=%.1fms, error=%s",
|
|
284
|
+
request_id,
|
|
285
|
+
elapsed_ms,
|
|
286
|
+
e,
|
|
287
|
+
)
|
|
288
|
+
raise HTTPException(status_code=e.status_code, detail=str(e))
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
async def stream_response(
|
|
292
|
+
model_router: Router,
|
|
293
|
+
request: InternalResponsesRequest,
|
|
294
|
+
request_id: str,
|
|
295
|
+
start_time: float,
|
|
296
|
+
) -> AsyncGenerator[str, None]:
|
|
297
|
+
"""Stream Responses API response."""
|
|
298
|
+
try:
|
|
299
|
+
stream, provider_name = await model_router.responses_completion_stream(request)
|
|
300
|
+
response_id = generate_id("resp")
|
|
301
|
+
created_at = int(time.time())
|
|
302
|
+
|
|
303
|
+
logger.debug(
|
|
304
|
+
"Stream started: req_id=%s, resp_id=%s, provider=%s",
|
|
305
|
+
request_id,
|
|
306
|
+
response_id,
|
|
307
|
+
provider_name,
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
# Base response object with all required fields (matching OpenAI spec)
|
|
311
|
+
base_response = {
|
|
312
|
+
"id": response_id,
|
|
313
|
+
"object": "response",
|
|
314
|
+
"created_at": created_at,
|
|
315
|
+
"model": request.model,
|
|
316
|
+
"error": None,
|
|
317
|
+
"incomplete_details": None,
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
output_items: list[dict[str, Any]] = []
|
|
321
|
+
output_index = 0
|
|
322
|
+
content_index = 0
|
|
323
|
+
|
|
324
|
+
current_message_id: str | None = None
|
|
325
|
+
accumulated_content = ""
|
|
326
|
+
message_started = False
|
|
327
|
+
|
|
328
|
+
final_usage = None
|
|
329
|
+
stream_completed = False
|
|
330
|
+
|
|
331
|
+
# response.created
|
|
332
|
+
yield sse_event(
|
|
333
|
+
{
|
|
334
|
+
"type": "response.created",
|
|
335
|
+
"response": {
|
|
336
|
+
**base_response,
|
|
337
|
+
"status": "in_progress",
|
|
338
|
+
"output": [],
|
|
339
|
+
},
|
|
340
|
+
}
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
# response.in_progress
|
|
344
|
+
yield sse_event(
|
|
345
|
+
{
|
|
346
|
+
"type": "response.in_progress",
|
|
347
|
+
"response": {
|
|
348
|
+
**base_response,
|
|
349
|
+
"status": "in_progress",
|
|
350
|
+
"output": [],
|
|
351
|
+
},
|
|
352
|
+
}
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
async for chunk in stream:
|
|
356
|
+
# Handle text content
|
|
357
|
+
if chunk.content:
|
|
358
|
+
if not message_started:
|
|
359
|
+
current_message_id = generate_id("msg")
|
|
360
|
+
message_started = True
|
|
361
|
+
|
|
362
|
+
# Note: content starts as empty array, matching OpenAI spec
|
|
363
|
+
yield sse_event(
|
|
364
|
+
{
|
|
365
|
+
"type": "response.output_item.added",
|
|
366
|
+
"output_index": output_index,
|
|
367
|
+
"item": {
|
|
368
|
+
"type": "message",
|
|
369
|
+
"id": current_message_id,
|
|
370
|
+
"role": "assistant",
|
|
371
|
+
"content": [],
|
|
372
|
+
"status": "in_progress",
|
|
373
|
+
},
|
|
374
|
+
}
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
yield sse_event(
|
|
378
|
+
{
|
|
379
|
+
"type": "response.content_part.added",
|
|
380
|
+
"item_id": current_message_id,
|
|
381
|
+
"output_index": output_index,
|
|
382
|
+
"content_index": content_index,
|
|
383
|
+
"part": make_text_content(""),
|
|
384
|
+
}
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
accumulated_content += chunk.content
|
|
388
|
+
|
|
389
|
+
yield sse_event(
|
|
390
|
+
{
|
|
391
|
+
"type": "response.output_text.delta",
|
|
392
|
+
"item_id": current_message_id,
|
|
393
|
+
"output_index": output_index,
|
|
394
|
+
"content_index": content_index,
|
|
395
|
+
"delta": chunk.content,
|
|
396
|
+
}
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
# Handle tool call delta
|
|
400
|
+
if chunk.tool_call_delta:
|
|
401
|
+
delta = chunk.tool_call_delta
|
|
402
|
+
if message_started and current_message_id:
|
|
403
|
+
# Close current message
|
|
404
|
+
for evt in _close_message_events(
|
|
405
|
+
current_message_id,
|
|
406
|
+
output_index,
|
|
407
|
+
content_index,
|
|
408
|
+
accumulated_content,
|
|
409
|
+
):
|
|
410
|
+
yield evt
|
|
411
|
+
output_items.append(make_message_item(current_message_id, accumulated_content))
|
|
412
|
+
output_index += 1
|
|
413
|
+
message_started = False
|
|
414
|
+
current_message_id = None
|
|
415
|
+
|
|
416
|
+
yield sse_event(
|
|
417
|
+
{
|
|
418
|
+
"type": "response.function_call_arguments.delta",
|
|
419
|
+
"item_id": delta.get("item_id", ""),
|
|
420
|
+
"output_index": delta.get("output_index", output_index),
|
|
421
|
+
"delta": delta.get("delta", ""),
|
|
422
|
+
}
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
# Handle complete tool call
|
|
426
|
+
if chunk.tool_call:
|
|
427
|
+
tc = chunk.tool_call
|
|
428
|
+
if message_started and current_message_id:
|
|
429
|
+
for evt in _close_message_events(
|
|
430
|
+
current_message_id,
|
|
431
|
+
output_index,
|
|
432
|
+
content_index,
|
|
433
|
+
accumulated_content,
|
|
434
|
+
):
|
|
435
|
+
yield evt
|
|
436
|
+
output_items.append(make_message_item(current_message_id, accumulated_content))
|
|
437
|
+
output_index += 1
|
|
438
|
+
message_started = False
|
|
439
|
+
current_message_id = None
|
|
440
|
+
|
|
441
|
+
fc_id = generate_id("fc")
|
|
442
|
+
fc_item = make_function_call_item(fc_id, tc.call_id, tc.name, tc.arguments)
|
|
443
|
+
|
|
444
|
+
yield sse_event(
|
|
445
|
+
{
|
|
446
|
+
"type": "response.output_item.added",
|
|
447
|
+
"output_index": output_index,
|
|
448
|
+
"item": make_function_call_item(
|
|
449
|
+
fc_id, tc.call_id, tc.name, "", "in_progress"
|
|
450
|
+
),
|
|
451
|
+
}
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
yield sse_event(
|
|
455
|
+
{
|
|
456
|
+
"type": "response.function_call_arguments.delta",
|
|
457
|
+
"item_id": fc_id,
|
|
458
|
+
"output_index": output_index,
|
|
459
|
+
"delta": tc.arguments,
|
|
460
|
+
}
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
yield sse_event(
|
|
464
|
+
{
|
|
465
|
+
"type": "response.function_call_arguments.done",
|
|
466
|
+
"item_id": fc_id,
|
|
467
|
+
"output_index": output_index,
|
|
468
|
+
"arguments": tc.arguments,
|
|
469
|
+
}
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
yield sse_event(
|
|
473
|
+
{
|
|
474
|
+
"type": "response.output_item.done",
|
|
475
|
+
"output_index": output_index,
|
|
476
|
+
"item": fc_item,
|
|
477
|
+
}
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
output_items.append(fc_item)
|
|
481
|
+
output_index += 1
|
|
482
|
+
|
|
483
|
+
if chunk.usage:
|
|
484
|
+
final_usage = chunk.usage
|
|
485
|
+
|
|
486
|
+
if chunk.finish_reason:
|
|
487
|
+
stream_completed = True
|
|
488
|
+
|
|
489
|
+
if message_started and current_message_id:
|
|
490
|
+
for evt in _close_message_events(
|
|
491
|
+
current_message_id,
|
|
492
|
+
output_index,
|
|
493
|
+
content_index,
|
|
494
|
+
accumulated_content,
|
|
495
|
+
):
|
|
496
|
+
yield evt
|
|
497
|
+
output_items.append(make_message_item(current_message_id, accumulated_content))
|
|
498
|
+
|
|
499
|
+
yield sse_event(
|
|
500
|
+
{
|
|
501
|
+
"type": "response.completed",
|
|
502
|
+
"response": {
|
|
503
|
+
**base_response,
|
|
504
|
+
"status": "completed",
|
|
505
|
+
"output": output_items,
|
|
506
|
+
"usage": make_usage(final_usage),
|
|
507
|
+
},
|
|
508
|
+
}
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
if not stream_completed:
|
|
512
|
+
logger.warning("Stream ended without finish_reason, sending completion events")
|
|
513
|
+
|
|
514
|
+
if message_started and current_message_id:
|
|
515
|
+
for evt in _close_message_events(
|
|
516
|
+
current_message_id,
|
|
517
|
+
output_index,
|
|
518
|
+
content_index,
|
|
519
|
+
accumulated_content,
|
|
520
|
+
):
|
|
521
|
+
yield evt
|
|
522
|
+
output_items.append(make_message_item(current_message_id, accumulated_content))
|
|
523
|
+
|
|
524
|
+
yield sse_event(
|
|
525
|
+
{
|
|
526
|
+
"type": "response.completed",
|
|
527
|
+
"response": {
|
|
528
|
+
**base_response,
|
|
529
|
+
"status": "completed",
|
|
530
|
+
"output": output_items,
|
|
531
|
+
"usage": make_usage(final_usage),
|
|
532
|
+
},
|
|
533
|
+
}
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
|
537
|
+
logger.info(
|
|
538
|
+
"Stream completed: req_id=%s, elapsed=%.1fms, output_items=%d",
|
|
539
|
+
request_id,
|
|
540
|
+
elapsed_ms,
|
|
541
|
+
len(output_items),
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
# NOTE: Do NOT send "data: [DONE]\n\n" - agent-maestro doesn't send it
|
|
545
|
+
# for Responses API
|
|
546
|
+
|
|
547
|
+
except ProviderError as e:
|
|
548
|
+
elapsed_ms = (time.time() - start_time) * 1000
|
|
549
|
+
logger.error(
|
|
550
|
+
"Stream failed: req_id=%s, elapsed=%.1fms, error=%s",
|
|
551
|
+
request_id,
|
|
552
|
+
elapsed_ms,
|
|
553
|
+
e,
|
|
554
|
+
)
|
|
555
|
+
# Send response.failed event matching OpenAI spec
|
|
556
|
+
yield sse_event(
|
|
557
|
+
{
|
|
558
|
+
"type": "response.failed",
|
|
559
|
+
"response": {
|
|
560
|
+
"id": response_id,
|
|
561
|
+
"object": "response",
|
|
562
|
+
"status": "failed",
|
|
563
|
+
"created_at": created_at,
|
|
564
|
+
"model": request.model,
|
|
565
|
+
"output": [],
|
|
566
|
+
"error": {
|
|
567
|
+
"code": "server_error",
|
|
568
|
+
"message": str(e),
|
|
569
|
+
},
|
|
570
|
+
"incomplete_details": None,
|
|
571
|
+
},
|
|
572
|
+
}
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
def _close_message_events(
|
|
577
|
+
msg_id: str, output_index: int, content_index: int, text: str
|
|
578
|
+
) -> list[str]:
|
|
579
|
+
"""Generate events to close a message output item."""
|
|
580
|
+
return [
|
|
581
|
+
sse_event(
|
|
582
|
+
{
|
|
583
|
+
"type": "response.output_text.done",
|
|
584
|
+
"item_id": msg_id,
|
|
585
|
+
"output_index": output_index,
|
|
586
|
+
"content_index": content_index,
|
|
587
|
+
"text": text,
|
|
588
|
+
}
|
|
589
|
+
),
|
|
590
|
+
sse_event(
|
|
591
|
+
{
|
|
592
|
+
"type": "response.content_part.done",
|
|
593
|
+
"item_id": msg_id,
|
|
594
|
+
"output_index": output_index,
|
|
595
|
+
"content_index": content_index,
|
|
596
|
+
"part": make_text_content(text),
|
|
597
|
+
}
|
|
598
|
+
),
|
|
599
|
+
sse_event(
|
|
600
|
+
{
|
|
601
|
+
"type": "response.output_item.done",
|
|
602
|
+
"output_index": output_index,
|
|
603
|
+
"item": make_message_item(msg_id, text),
|
|
604
|
+
}
|
|
605
|
+
),
|
|
606
|
+
]
|
|
@@ -25,6 +25,23 @@ from router_maestro.server.schemas.openai import (
|
|
|
25
25
|
ModelList,
|
|
26
26
|
ModelObject,
|
|
27
27
|
)
|
|
28
|
+
from router_maestro.server.schemas.responses import (
|
|
29
|
+
ResponsesDeltaEvent,
|
|
30
|
+
ResponsesDoneEvent,
|
|
31
|
+
ResponsesFunctionCallInput,
|
|
32
|
+
ResponsesFunctionCallOutput,
|
|
33
|
+
ResponsesFunctionTool,
|
|
34
|
+
ResponsesInputMessage,
|
|
35
|
+
ResponsesInputTextContent,
|
|
36
|
+
ResponsesMessageOutput,
|
|
37
|
+
ResponsesOutputText,
|
|
38
|
+
ResponsesReasoningOutput,
|
|
39
|
+
ResponsesRequest,
|
|
40
|
+
ResponsesResponse,
|
|
41
|
+
ResponsesStreamEvent,
|
|
42
|
+
ResponsesToolChoiceFunction,
|
|
43
|
+
ResponsesUsage,
|
|
44
|
+
)
|
|
28
45
|
|
|
29
46
|
__all__ = [
|
|
30
47
|
# Admin schemas
|
|
@@ -50,4 +67,20 @@ __all__ = [
|
|
|
50
67
|
"ErrorResponse",
|
|
51
68
|
"ModelList",
|
|
52
69
|
"ModelObject",
|
|
70
|
+
# Responses API schemas
|
|
71
|
+
"ResponsesDeltaEvent",
|
|
72
|
+
"ResponsesDoneEvent",
|
|
73
|
+
"ResponsesFunctionCallInput",
|
|
74
|
+
"ResponsesFunctionCallOutput",
|
|
75
|
+
"ResponsesFunctionTool",
|
|
76
|
+
"ResponsesInputMessage",
|
|
77
|
+
"ResponsesInputTextContent",
|
|
78
|
+
"ResponsesMessageOutput",
|
|
79
|
+
"ResponsesOutputText",
|
|
80
|
+
"ResponsesReasoningOutput",
|
|
81
|
+
"ResponsesRequest",
|
|
82
|
+
"ResponsesResponse",
|
|
83
|
+
"ResponsesStreamEvent",
|
|
84
|
+
"ResponsesToolChoiceFunction",
|
|
85
|
+
"ResponsesUsage",
|
|
53
86
|
]
|