router-maestro 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,517 @@
1
+ """Responses API route for Codex models."""
2
+
3
+ import json
4
+ import uuid
5
+ from collections.abc import AsyncGenerator
6
+ from typing import Any
7
+
8
+ from fastapi import APIRouter, HTTPException
9
+ from fastapi.responses import StreamingResponse
10
+
11
+ from router_maestro.providers import ProviderError
12
+ from router_maestro.providers import ResponsesRequest as InternalResponsesRequest
13
+ from router_maestro.routing import Router, get_router
14
+ from router_maestro.server.schemas import (
15
+ ResponsesRequest,
16
+ ResponsesResponse,
17
+ ResponsesUsage,
18
+ )
19
+ from router_maestro.utils import get_logger
20
+
21
+ logger = get_logger("server.routes.responses")
22
+
23
+ router = APIRouter()
24
+
25
+
26
+ def generate_id(prefix: str) -> str:
27
+ """Generate a unique ID with given prefix."""
28
+ return f"{prefix}-{uuid.uuid4().hex[:16]}"
29
+
30
+
31
+ def sse_event(data: dict[str, Any]) -> str:
32
+ """Format data as SSE event with event type field."""
33
+ event_type = data.get("type", "")
34
+ return f"event: {event_type}\ndata: {json.dumps(data)}\n\n"
35
+
36
+
37
+ def extract_text_from_content(content: str | list[Any]) -> str:
38
+ """Extract text from content which can be a string or list of content blocks."""
39
+ if isinstance(content, str):
40
+ return content
41
+
42
+ texts = []
43
+ for block in content:
44
+ if isinstance(block, dict):
45
+ if block.get("type") in ("input_text", "output_text"):
46
+ texts.append(block.get("text", ""))
47
+ elif "text" in block:
48
+ texts.append(block.get("text", ""))
49
+ elif hasattr(block, "text"):
50
+ texts.append(block.text)
51
+ return "".join(texts)
52
+
53
+
54
+ def convert_content_to_serializable(content: Any) -> Any:
55
+ """Convert content to JSON-serializable format.
56
+
57
+ Handles Pydantic models and nested structures.
58
+ """
59
+ if isinstance(content, str):
60
+ return content
61
+ if hasattr(content, "model_dump"):
62
+ return content.model_dump(exclude_none=True)
63
+ if isinstance(content, list):
64
+ return [convert_content_to_serializable(item) for item in content]
65
+ if isinstance(content, dict):
66
+ return {k: convert_content_to_serializable(v) for k, v in content.items()}
67
+ return content
68
+
69
+
70
+ def convert_input_to_internal(
71
+ input_data: str | list[Any],
72
+ ) -> str | list[dict[str, Any]]:
73
+ """Convert the incoming input format to internal format.
74
+
75
+ Preserves the original content format (string or array) as the upstream
76
+ Copilot API accepts both formats. Converts Pydantic models to dicts.
77
+ """
78
+ if isinstance(input_data, str):
79
+ return input_data
80
+
81
+ items = []
82
+ for item in input_data:
83
+ if isinstance(item, dict):
84
+ item_type = item.get("type", "message")
85
+
86
+ if item_type == "message" or (item_type is None and "role" in item):
87
+ role = item.get("role", "user")
88
+ content = item.get("content", "")
89
+ # Convert content to serializable format
90
+ content = convert_content_to_serializable(content)
91
+ items.append({"type": "message", "role": role, "content": content})
92
+
93
+ elif item_type == "function_call":
94
+ items.append(
95
+ {
96
+ "type": "function_call",
97
+ "id": item.get("id"),
98
+ "call_id": item.get("call_id"),
99
+ "name": item.get("name"),
100
+ "arguments": item.get("arguments", "{}"),
101
+ "status": item.get("status", "completed"),
102
+ }
103
+ )
104
+
105
+ elif item_type == "function_call_output":
106
+ output = item.get("output", "")
107
+ if not isinstance(output, str):
108
+ output = json.dumps(output)
109
+ items.append(
110
+ {
111
+ "type": "function_call_output",
112
+ "call_id": item.get("call_id"),
113
+ "output": output,
114
+ }
115
+ )
116
+ else:
117
+ items.append(convert_content_to_serializable(item))
118
+
119
+ elif hasattr(item, "model_dump"):
120
+ # Pydantic model - convert to dict
121
+ items.append(item.model_dump(exclude_none=True))
122
+
123
+ elif hasattr(item, "role") and hasattr(item, "content"):
124
+ # Object with role and content attributes
125
+ content = convert_content_to_serializable(item.content)
126
+ items.append({"type": "message", "role": item.role, "content": content})
127
+
128
+ return items
129
+
130
+
131
+ def convert_tools_to_internal(tools: list[Any] | None) -> list[dict[str, Any]] | None:
132
+ """Convert tools to internal format."""
133
+ if not tools:
134
+ return None
135
+ result = []
136
+ for tool in tools:
137
+ if isinstance(tool, dict):
138
+ result.append(tool)
139
+ elif hasattr(tool, "model_dump"):
140
+ result.append(tool.model_dump(exclude_none=True))
141
+ else:
142
+ result.append(dict(tool))
143
+ return result
144
+
145
+
146
+ def convert_tool_choice_to_internal(
147
+ tool_choice: str | Any | None,
148
+ ) -> str | dict[str, Any] | None:
149
+ """Convert tool_choice to internal format."""
150
+ if tool_choice is None:
151
+ return None
152
+ if isinstance(tool_choice, str):
153
+ return tool_choice
154
+ if isinstance(tool_choice, dict):
155
+ return tool_choice
156
+ if hasattr(tool_choice, "model_dump"):
157
+ return tool_choice.model_dump(exclude_none=True)
158
+ return dict(tool_choice)
159
+
160
+
161
+ def make_text_content(text: str) -> dict[str, Any]:
162
+ """Create output_text content block."""
163
+ return {"type": "output_text", "text": text, "annotations": []}
164
+
165
+
166
+ def make_message_item(msg_id: str, text: str, status: str = "completed") -> dict[str, Any]:
167
+ """Create message output item."""
168
+ return {
169
+ "type": "message",
170
+ "id": msg_id,
171
+ "role": "assistant",
172
+ "content": [make_text_content(text)],
173
+ "status": status,
174
+ }
175
+
176
+
177
+ def make_function_call_item(
178
+ fc_id: str, call_id: str, name: str, arguments: str, status: str = "completed"
179
+ ) -> dict[str, Any]:
180
+ """Create function_call output item."""
181
+ return {
182
+ "type": "function_call",
183
+ "id": fc_id,
184
+ "call_id": call_id,
185
+ "name": name,
186
+ "arguments": arguments,
187
+ "status": status,
188
+ }
189
+
190
+
191
+ @router.post("/api/openai/v1/responses")
192
+ async def create_response(request: ResponsesRequest):
193
+ """Handle Responses API requests (for Codex models)."""
194
+ logger.info(
195
+ "Received responses request: model=%s, stream=%s, has_tools=%s",
196
+ request.model,
197
+ request.stream,
198
+ request.tools is not None,
199
+ )
200
+ model_router = get_router()
201
+
202
+ input_value = convert_input_to_internal(request.input)
203
+
204
+ internal_request = InternalResponsesRequest(
205
+ model=request.model,
206
+ input=input_value,
207
+ stream=request.stream,
208
+ instructions=request.instructions,
209
+ temperature=request.temperature,
210
+ max_output_tokens=request.max_output_tokens,
211
+ tools=convert_tools_to_internal(request.tools),
212
+ tool_choice=convert_tool_choice_to_internal(request.tool_choice),
213
+ parallel_tool_calls=request.parallel_tool_calls,
214
+ )
215
+
216
+ if request.stream:
217
+ return StreamingResponse(
218
+ stream_response(model_router, internal_request),
219
+ media_type="text/event-stream",
220
+ )
221
+
222
+ try:
223
+ response, provider_name = await model_router.responses_completion(internal_request)
224
+
225
+ usage = None
226
+ if response.usage:
227
+ usage = ResponsesUsage(
228
+ input_tokens=response.usage.get("input_tokens", 0),
229
+ output_tokens=response.usage.get("output_tokens", 0),
230
+ total_tokens=response.usage.get("total_tokens", 0),
231
+ )
232
+
233
+ response_id = generate_id("resp")
234
+ output: list[dict[str, Any]] = []
235
+
236
+ if response.content:
237
+ message_id = generate_id("msg")
238
+ output.append(make_message_item(message_id, response.content))
239
+
240
+ if response.tool_calls:
241
+ for tc in response.tool_calls:
242
+ fc_id = generate_id("fc")
243
+ output.append(make_function_call_item(fc_id, tc.call_id, tc.name, tc.arguments))
244
+
245
+ return ResponsesResponse(
246
+ id=response_id,
247
+ model=response.model,
248
+ status="completed",
249
+ output=output,
250
+ usage=usage,
251
+ )
252
+ except ProviderError as e:
253
+ logger.error("Responses request failed: %s", e)
254
+ raise HTTPException(status_code=e.status_code, detail=str(e))
255
+
256
+
257
+ async def stream_response(
258
+ model_router: Router, request: InternalResponsesRequest
259
+ ) -> AsyncGenerator[str, None]:
260
+ """Stream Responses API response."""
261
+ try:
262
+ stream, provider_name = await model_router.responses_completion_stream(request)
263
+ response_id = generate_id("resp")
264
+
265
+ output_items: list[dict[str, Any]] = []
266
+ output_index = 0
267
+ content_index = 0
268
+
269
+ current_message_id: str | None = None
270
+ accumulated_content = ""
271
+ message_started = False
272
+
273
+ final_usage = None
274
+ stream_completed = False
275
+
276
+ # response.created
277
+ yield sse_event(
278
+ {
279
+ "type": "response.created",
280
+ "response": {
281
+ "id": response_id,
282
+ "object": "response",
283
+ "status": "in_progress",
284
+ "output": [],
285
+ },
286
+ }
287
+ )
288
+
289
+ # response.in_progress
290
+ yield sse_event(
291
+ {
292
+ "type": "response.in_progress",
293
+ "response": {
294
+ "id": response_id,
295
+ "object": "response",
296
+ "status": "in_progress",
297
+ "output": [],
298
+ },
299
+ }
300
+ )
301
+
302
+ async for chunk in stream:
303
+ # Handle text content
304
+ if chunk.content:
305
+ if not message_started:
306
+ current_message_id = generate_id("msg")
307
+ message_started = True
308
+
309
+ yield sse_event(
310
+ {
311
+ "type": "response.output_item.added",
312
+ "output_index": output_index,
313
+ "item": make_message_item(current_message_id, "", "in_progress"),
314
+ }
315
+ )
316
+
317
+ yield sse_event(
318
+ {
319
+ "type": "response.content_part.added",
320
+ "item_id": current_message_id,
321
+ "output_index": output_index,
322
+ "content_index": content_index,
323
+ "part": make_text_content(""),
324
+ }
325
+ )
326
+
327
+ accumulated_content += chunk.content
328
+
329
+ yield sse_event(
330
+ {
331
+ "type": "response.output_text.delta",
332
+ "item_id": current_message_id,
333
+ "output_index": output_index,
334
+ "content_index": content_index,
335
+ "delta": chunk.content,
336
+ }
337
+ )
338
+
339
+ # Handle tool call delta
340
+ if chunk.tool_call_delta:
341
+ delta = chunk.tool_call_delta
342
+ if message_started and current_message_id:
343
+ # Close current message
344
+ for evt in _close_message_events(
345
+ current_message_id,
346
+ output_index,
347
+ content_index,
348
+ accumulated_content,
349
+ ):
350
+ yield evt
351
+ output_items.append(make_message_item(current_message_id, accumulated_content))
352
+ output_index += 1
353
+ message_started = False
354
+ current_message_id = None
355
+
356
+ yield sse_event(
357
+ {
358
+ "type": "response.function_call_arguments.delta",
359
+ "item_id": delta.get("item_id", ""),
360
+ "output_index": delta.get("output_index", output_index),
361
+ "delta": delta.get("delta", ""),
362
+ }
363
+ )
364
+
365
+ # Handle complete tool call
366
+ if chunk.tool_call:
367
+ tc = chunk.tool_call
368
+ if message_started and current_message_id:
369
+ for evt in _close_message_events(
370
+ current_message_id,
371
+ output_index,
372
+ content_index,
373
+ accumulated_content,
374
+ ):
375
+ yield evt
376
+ output_items.append(make_message_item(current_message_id, accumulated_content))
377
+ output_index += 1
378
+ message_started = False
379
+ current_message_id = None
380
+
381
+ fc_id = generate_id("fc")
382
+ fc_item = make_function_call_item(fc_id, tc.call_id, tc.name, tc.arguments)
383
+
384
+ yield sse_event(
385
+ {
386
+ "type": "response.output_item.added",
387
+ "output_index": output_index,
388
+ "item": make_function_call_item(
389
+ fc_id, tc.call_id, tc.name, "", "in_progress"
390
+ ),
391
+ }
392
+ )
393
+
394
+ yield sse_event(
395
+ {
396
+ "type": "response.function_call_arguments.delta",
397
+ "item_id": fc_id,
398
+ "output_index": output_index,
399
+ "delta": tc.arguments,
400
+ }
401
+ )
402
+
403
+ yield sse_event(
404
+ {
405
+ "type": "response.function_call_arguments.done",
406
+ "item_id": fc_id,
407
+ "output_index": output_index,
408
+ "arguments": tc.arguments,
409
+ }
410
+ )
411
+
412
+ yield sse_event(
413
+ {
414
+ "type": "response.output_item.done",
415
+ "output_index": output_index,
416
+ "item": fc_item,
417
+ }
418
+ )
419
+
420
+ output_items.append(fc_item)
421
+ output_index += 1
422
+
423
+ if chunk.usage:
424
+ final_usage = chunk.usage
425
+
426
+ if chunk.finish_reason:
427
+ stream_completed = True
428
+
429
+ if message_started and current_message_id:
430
+ for evt in _close_message_events(
431
+ current_message_id,
432
+ output_index,
433
+ content_index,
434
+ accumulated_content,
435
+ ):
436
+ yield evt
437
+ output_items.append(make_message_item(current_message_id, accumulated_content))
438
+
439
+ yield sse_event(
440
+ {
441
+ "type": "response.completed",
442
+ "response": {
443
+ "id": response_id,
444
+ "object": "response",
445
+ "status": "completed",
446
+ "model": request.model,
447
+ "output": output_items,
448
+ "usage": final_usage,
449
+ },
450
+ }
451
+ )
452
+
453
+ if not stream_completed:
454
+ logger.warning("Stream ended without finish_reason, sending completion events")
455
+
456
+ if message_started and current_message_id:
457
+ for evt in _close_message_events(
458
+ current_message_id,
459
+ output_index,
460
+ content_index,
461
+ accumulated_content,
462
+ ):
463
+ yield evt
464
+ output_items.append(make_message_item(current_message_id, accumulated_content))
465
+
466
+ yield sse_event(
467
+ {
468
+ "type": "response.completed",
469
+ "response": {
470
+ "id": response_id,
471
+ "object": "response",
472
+ "status": "completed",
473
+ "model": request.model,
474
+ "output": output_items,
475
+ "usage": final_usage,
476
+ },
477
+ }
478
+ )
479
+
480
+ yield "data: [DONE]\n\n"
481
+
482
+ except ProviderError as e:
483
+ error_data = {"error": {"message": str(e), "type": "provider_error"}}
484
+ yield f"data: {json.dumps(error_data)}\n\n"
485
+
486
+
487
+ def _close_message_events(
488
+ msg_id: str, output_index: int, content_index: int, text: str
489
+ ) -> list[str]:
490
+ """Generate events to close a message output item."""
491
+ return [
492
+ sse_event(
493
+ {
494
+ "type": "response.output_text.done",
495
+ "item_id": msg_id,
496
+ "output_index": output_index,
497
+ "content_index": content_index,
498
+ "text": text,
499
+ }
500
+ ),
501
+ sse_event(
502
+ {
503
+ "type": "response.content_part.done",
504
+ "item_id": msg_id,
505
+ "output_index": output_index,
506
+ "content_index": content_index,
507
+ "part": make_text_content(text),
508
+ }
509
+ ),
510
+ sse_event(
511
+ {
512
+ "type": "response.output_item.done",
513
+ "output_index": output_index,
514
+ "item": make_message_item(msg_id, text),
515
+ }
516
+ ),
517
+ ]
@@ -25,6 +25,23 @@ from router_maestro.server.schemas.openai import (
25
25
  ModelList,
26
26
  ModelObject,
27
27
  )
28
+ from router_maestro.server.schemas.responses import (
29
+ ResponsesDeltaEvent,
30
+ ResponsesDoneEvent,
31
+ ResponsesFunctionCallInput,
32
+ ResponsesFunctionCallOutput,
33
+ ResponsesFunctionTool,
34
+ ResponsesInputMessage,
35
+ ResponsesInputTextContent,
36
+ ResponsesMessageOutput,
37
+ ResponsesOutputText,
38
+ ResponsesReasoningOutput,
39
+ ResponsesRequest,
40
+ ResponsesResponse,
41
+ ResponsesStreamEvent,
42
+ ResponsesToolChoiceFunction,
43
+ ResponsesUsage,
44
+ )
28
45
 
29
46
  __all__ = [
30
47
  # Admin schemas
@@ -50,4 +67,20 @@ __all__ = [
50
67
  "ErrorResponse",
51
68
  "ModelList",
52
69
  "ModelObject",
70
+ # Responses API schemas
71
+ "ResponsesDeltaEvent",
72
+ "ResponsesDoneEvent",
73
+ "ResponsesFunctionCallInput",
74
+ "ResponsesFunctionCallOutput",
75
+ "ResponsesFunctionTool",
76
+ "ResponsesInputMessage",
77
+ "ResponsesInputTextContent",
78
+ "ResponsesMessageOutput",
79
+ "ResponsesOutputText",
80
+ "ResponsesReasoningOutput",
81
+ "ResponsesRequest",
82
+ "ResponsesResponse",
83
+ "ResponsesStreamEvent",
84
+ "ResponsesToolChoiceFunction",
85
+ "ResponsesUsage",
53
86
  ]