router-maestro 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,606 @@
1
+ """Responses API route for Codex models."""
2
+
3
+ import json
4
+ import time
5
+ import uuid
6
+ from collections.abc import AsyncGenerator
7
+ from typing import Any
8
+
9
+ from fastapi import APIRouter, HTTPException
10
+ from fastapi.responses import StreamingResponse
11
+
12
+ from router_maestro.providers import ProviderError
13
+ from router_maestro.providers import ResponsesRequest as InternalResponsesRequest
14
+ from router_maestro.routing import Router, get_router
15
+ from router_maestro.server.schemas import (
16
+ ResponsesRequest,
17
+ ResponsesResponse,
18
+ ResponsesUsage,
19
+ )
20
+ from router_maestro.utils import get_logger
21
+
22
+ logger = get_logger("server.routes.responses")
23
+
24
+ router = APIRouter()
25
+
26
+
27
+ def generate_id(prefix: str) -> str:
28
+ """Generate a unique ID with given prefix."""
29
+ return f"{prefix}-{uuid.uuid4().hex[:16]}"
30
+
31
+
32
+ def sse_event(data: dict[str, Any]) -> str:
33
+ """Format data as SSE event with event type field."""
34
+ event_type = data.get("type", "")
35
+ return f"event: {event_type}\ndata: {json.dumps(data)}\n\n"
36
+
37
+
38
+ def extract_text_from_content(content: str | list[Any]) -> str:
39
+ """Extract text from content which can be a string or list of content blocks."""
40
+ if isinstance(content, str):
41
+ return content
42
+
43
+ texts = []
44
+ for block in content:
45
+ if isinstance(block, dict):
46
+ if block.get("type") in ("input_text", "output_text"):
47
+ texts.append(block.get("text", ""))
48
+ elif "text" in block:
49
+ texts.append(block.get("text", ""))
50
+ elif hasattr(block, "text"):
51
+ texts.append(block.text)
52
+ return "".join(texts)
53
+
54
+
55
+ def convert_content_to_serializable(content: Any) -> Any:
56
+ """Convert content to JSON-serializable format.
57
+
58
+ Handles Pydantic models and nested structures.
59
+ """
60
+ if isinstance(content, str):
61
+ return content
62
+ if hasattr(content, "model_dump"):
63
+ return content.model_dump(exclude_none=True)
64
+ if isinstance(content, list):
65
+ return [convert_content_to_serializable(item) for item in content]
66
+ if isinstance(content, dict):
67
+ return {k: convert_content_to_serializable(v) for k, v in content.items()}
68
+ return content
69
+
70
+
71
+ def convert_input_to_internal(
72
+ input_data: str | list[Any],
73
+ ) -> str | list[dict[str, Any]]:
74
+ """Convert the incoming input format to internal format.
75
+
76
+ Preserves the original content format (string or array) as the upstream
77
+ Copilot API accepts both formats. Converts Pydantic models to dicts.
78
+ """
79
+ if isinstance(input_data, str):
80
+ return input_data
81
+
82
+ items = []
83
+ for item in input_data:
84
+ if isinstance(item, dict):
85
+ item_type = item.get("type", "message")
86
+
87
+ if item_type == "message" or (item_type is None and "role" in item):
88
+ role = item.get("role", "user")
89
+ content = item.get("content", "")
90
+ # Convert content to serializable format
91
+ content = convert_content_to_serializable(content)
92
+ items.append({"type": "message", "role": role, "content": content})
93
+
94
+ elif item_type == "function_call":
95
+ items.append(
96
+ {
97
+ "type": "function_call",
98
+ "id": item.get("id"),
99
+ "call_id": item.get("call_id"),
100
+ "name": item.get("name"),
101
+ "arguments": item.get("arguments", "{}"),
102
+ "status": item.get("status", "completed"),
103
+ }
104
+ )
105
+
106
+ elif item_type == "function_call_output":
107
+ output = item.get("output", "")
108
+ if not isinstance(output, str):
109
+ output = json.dumps(output)
110
+ items.append(
111
+ {
112
+ "type": "function_call_output",
113
+ "call_id": item.get("call_id"),
114
+ "output": output,
115
+ }
116
+ )
117
+ else:
118
+ items.append(convert_content_to_serializable(item))
119
+
120
+ elif hasattr(item, "model_dump"):
121
+ # Pydantic model - convert to dict
122
+ items.append(item.model_dump(exclude_none=True))
123
+
124
+ elif hasattr(item, "role") and hasattr(item, "content"):
125
+ # Object with role and content attributes
126
+ content = convert_content_to_serializable(item.content)
127
+ items.append({"type": "message", "role": item.role, "content": content})
128
+
129
+ return items
130
+
131
+
132
+ def convert_tools_to_internal(tools: list[Any] | None) -> list[dict[str, Any]] | None:
133
+ """Convert tools to internal format."""
134
+ if not tools:
135
+ return None
136
+ result = []
137
+ for tool in tools:
138
+ if isinstance(tool, dict):
139
+ result.append(tool)
140
+ elif hasattr(tool, "model_dump"):
141
+ result.append(tool.model_dump(exclude_none=True))
142
+ else:
143
+ result.append(dict(tool))
144
+ return result
145
+
146
+
147
+ def convert_tool_choice_to_internal(
148
+ tool_choice: str | Any | None,
149
+ ) -> str | dict[str, Any] | None:
150
+ """Convert tool_choice to internal format."""
151
+ if tool_choice is None:
152
+ return None
153
+ if isinstance(tool_choice, str):
154
+ return tool_choice
155
+ if isinstance(tool_choice, dict):
156
+ return tool_choice
157
+ if hasattr(tool_choice, "model_dump"):
158
+ return tool_choice.model_dump(exclude_none=True)
159
+ return dict(tool_choice)
160
+
161
+
162
+ def make_text_content(text: str) -> dict[str, Any]:
163
+ """Create output_text content block."""
164
+ return {"type": "output_text", "text": text, "annotations": []}
165
+
166
+
167
+ def make_usage(raw_usage: dict[str, Any] | None) -> dict[str, Any] | None:
168
+ """Create properly structured usage object matching OpenAI spec."""
169
+ if not raw_usage:
170
+ return None
171
+
172
+ input_tokens = raw_usage.get("input_tokens", 0)
173
+ output_tokens = raw_usage.get("output_tokens", 0)
174
+
175
+ return {
176
+ "input_tokens": input_tokens,
177
+ "input_tokens_details": {"cached_tokens": 0},
178
+ "output_tokens": output_tokens,
179
+ "output_tokens_details": {"reasoning_tokens": 0},
180
+ "total_tokens": input_tokens + output_tokens,
181
+ }
182
+
183
+
184
+ def make_message_item(msg_id: str, text: str, status: str = "completed") -> dict[str, Any]:
185
+ """Create message output item."""
186
+ return {
187
+ "type": "message",
188
+ "id": msg_id,
189
+ "role": "assistant",
190
+ "content": [make_text_content(text)],
191
+ "status": status,
192
+ }
193
+
194
+
195
+ def make_function_call_item(
196
+ fc_id: str, call_id: str, name: str, arguments: str, status: str = "completed"
197
+ ) -> dict[str, Any]:
198
+ """Create function_call output item."""
199
+ return {
200
+ "type": "function_call",
201
+ "id": fc_id,
202
+ "call_id": call_id,
203
+ "name": name,
204
+ "arguments": arguments,
205
+ "status": status,
206
+ }
207
+
208
+
209
+ @router.post("/api/openai/v1/responses")
210
+ async def create_response(request: ResponsesRequest):
211
+ """Handle Responses API requests (for Codex models)."""
212
+ request_id = generate_id("req")
213
+ start_time = time.time()
214
+
215
+ logger.info(
216
+ "Received responses request: req_id=%s, model=%s, stream=%s, has_tools=%s",
217
+ request_id,
218
+ request.model,
219
+ request.stream,
220
+ request.tools is not None,
221
+ )
222
+
223
+ model_router = get_router()
224
+
225
+ input_value = convert_input_to_internal(request.input)
226
+
227
+ internal_request = InternalResponsesRequest(
228
+ model=request.model,
229
+ input=input_value,
230
+ stream=request.stream,
231
+ instructions=request.instructions,
232
+ temperature=request.temperature,
233
+ max_output_tokens=request.max_output_tokens,
234
+ tools=convert_tools_to_internal(request.tools),
235
+ tool_choice=convert_tool_choice_to_internal(request.tool_choice),
236
+ parallel_tool_calls=request.parallel_tool_calls,
237
+ )
238
+
239
+ if request.stream:
240
+ return StreamingResponse(
241
+ stream_response(model_router, internal_request, request_id, start_time),
242
+ media_type="text/event-stream",
243
+ headers={
244
+ "Cache-Control": "no-cache",
245
+ "Connection": "keep-alive",
246
+ "X-Accel-Buffering": "no",
247
+ },
248
+ )
249
+
250
+ try:
251
+ response, provider_name = await model_router.responses_completion(internal_request)
252
+
253
+ usage = None
254
+ if response.usage:
255
+ usage = ResponsesUsage(
256
+ input_tokens=response.usage.get("input_tokens", 0),
257
+ output_tokens=response.usage.get("output_tokens", 0),
258
+ total_tokens=response.usage.get("total_tokens", 0),
259
+ )
260
+
261
+ response_id = generate_id("resp")
262
+ output: list[dict[str, Any]] = []
263
+
264
+ if response.content:
265
+ message_id = generate_id("msg")
266
+ output.append(make_message_item(message_id, response.content))
267
+
268
+ if response.tool_calls:
269
+ for tc in response.tool_calls:
270
+ fc_id = generate_id("fc")
271
+ output.append(make_function_call_item(fc_id, tc.call_id, tc.name, tc.arguments))
272
+
273
+ return ResponsesResponse(
274
+ id=response_id,
275
+ model=response.model,
276
+ status="completed",
277
+ output=output,
278
+ usage=usage,
279
+ )
280
+ except ProviderError as e:
281
+ elapsed_ms = (time.time() - start_time) * 1000
282
+ logger.error(
283
+ "Responses request failed: req_id=%s, elapsed=%.1fms, error=%s",
284
+ request_id,
285
+ elapsed_ms,
286
+ e,
287
+ )
288
+ raise HTTPException(status_code=e.status_code, detail=str(e))
289
+
290
+
291
+ async def stream_response(
292
+ model_router: Router,
293
+ request: InternalResponsesRequest,
294
+ request_id: str,
295
+ start_time: float,
296
+ ) -> AsyncGenerator[str, None]:
297
+ """Stream Responses API response."""
298
+ try:
299
+ stream, provider_name = await model_router.responses_completion_stream(request)
300
+ response_id = generate_id("resp")
301
+ created_at = int(time.time())
302
+
303
+ logger.debug(
304
+ "Stream started: req_id=%s, resp_id=%s, provider=%s",
305
+ request_id,
306
+ response_id,
307
+ provider_name,
308
+ )
309
+
310
+ # Base response object with all required fields (matching OpenAI spec)
311
+ base_response = {
312
+ "id": response_id,
313
+ "object": "response",
314
+ "created_at": created_at,
315
+ "model": request.model,
316
+ "error": None,
317
+ "incomplete_details": None,
318
+ }
319
+
320
+ output_items: list[dict[str, Any]] = []
321
+ output_index = 0
322
+ content_index = 0
323
+
324
+ current_message_id: str | None = None
325
+ accumulated_content = ""
326
+ message_started = False
327
+
328
+ final_usage = None
329
+ stream_completed = False
330
+
331
+ # response.created
332
+ yield sse_event(
333
+ {
334
+ "type": "response.created",
335
+ "response": {
336
+ **base_response,
337
+ "status": "in_progress",
338
+ "output": [],
339
+ },
340
+ }
341
+ )
342
+
343
+ # response.in_progress
344
+ yield sse_event(
345
+ {
346
+ "type": "response.in_progress",
347
+ "response": {
348
+ **base_response,
349
+ "status": "in_progress",
350
+ "output": [],
351
+ },
352
+ }
353
+ )
354
+
355
+ async for chunk in stream:
356
+ # Handle text content
357
+ if chunk.content:
358
+ if not message_started:
359
+ current_message_id = generate_id("msg")
360
+ message_started = True
361
+
362
+ # Note: content starts as empty array, matching OpenAI spec
363
+ yield sse_event(
364
+ {
365
+ "type": "response.output_item.added",
366
+ "output_index": output_index,
367
+ "item": {
368
+ "type": "message",
369
+ "id": current_message_id,
370
+ "role": "assistant",
371
+ "content": [],
372
+ "status": "in_progress",
373
+ },
374
+ }
375
+ )
376
+
377
+ yield sse_event(
378
+ {
379
+ "type": "response.content_part.added",
380
+ "item_id": current_message_id,
381
+ "output_index": output_index,
382
+ "content_index": content_index,
383
+ "part": make_text_content(""),
384
+ }
385
+ )
386
+
387
+ accumulated_content += chunk.content
388
+
389
+ yield sse_event(
390
+ {
391
+ "type": "response.output_text.delta",
392
+ "item_id": current_message_id,
393
+ "output_index": output_index,
394
+ "content_index": content_index,
395
+ "delta": chunk.content,
396
+ }
397
+ )
398
+
399
+ # Handle tool call delta
400
+ if chunk.tool_call_delta:
401
+ delta = chunk.tool_call_delta
402
+ if message_started and current_message_id:
403
+ # Close current message
404
+ for evt in _close_message_events(
405
+ current_message_id,
406
+ output_index,
407
+ content_index,
408
+ accumulated_content,
409
+ ):
410
+ yield evt
411
+ output_items.append(make_message_item(current_message_id, accumulated_content))
412
+ output_index += 1
413
+ message_started = False
414
+ current_message_id = None
415
+
416
+ yield sse_event(
417
+ {
418
+ "type": "response.function_call_arguments.delta",
419
+ "item_id": delta.get("item_id", ""),
420
+ "output_index": delta.get("output_index", output_index),
421
+ "delta": delta.get("delta", ""),
422
+ }
423
+ )
424
+
425
+ # Handle complete tool call
426
+ if chunk.tool_call:
427
+ tc = chunk.tool_call
428
+ if message_started and current_message_id:
429
+ for evt in _close_message_events(
430
+ current_message_id,
431
+ output_index,
432
+ content_index,
433
+ accumulated_content,
434
+ ):
435
+ yield evt
436
+ output_items.append(make_message_item(current_message_id, accumulated_content))
437
+ output_index += 1
438
+ message_started = False
439
+ current_message_id = None
440
+
441
+ fc_id = generate_id("fc")
442
+ fc_item = make_function_call_item(fc_id, tc.call_id, tc.name, tc.arguments)
443
+
444
+ yield sse_event(
445
+ {
446
+ "type": "response.output_item.added",
447
+ "output_index": output_index,
448
+ "item": make_function_call_item(
449
+ fc_id, tc.call_id, tc.name, "", "in_progress"
450
+ ),
451
+ }
452
+ )
453
+
454
+ yield sse_event(
455
+ {
456
+ "type": "response.function_call_arguments.delta",
457
+ "item_id": fc_id,
458
+ "output_index": output_index,
459
+ "delta": tc.arguments,
460
+ }
461
+ )
462
+
463
+ yield sse_event(
464
+ {
465
+ "type": "response.function_call_arguments.done",
466
+ "item_id": fc_id,
467
+ "output_index": output_index,
468
+ "arguments": tc.arguments,
469
+ }
470
+ )
471
+
472
+ yield sse_event(
473
+ {
474
+ "type": "response.output_item.done",
475
+ "output_index": output_index,
476
+ "item": fc_item,
477
+ }
478
+ )
479
+
480
+ output_items.append(fc_item)
481
+ output_index += 1
482
+
483
+ if chunk.usage:
484
+ final_usage = chunk.usage
485
+
486
+ if chunk.finish_reason:
487
+ stream_completed = True
488
+
489
+ if message_started and current_message_id:
490
+ for evt in _close_message_events(
491
+ current_message_id,
492
+ output_index,
493
+ content_index,
494
+ accumulated_content,
495
+ ):
496
+ yield evt
497
+ output_items.append(make_message_item(current_message_id, accumulated_content))
498
+
499
+ yield sse_event(
500
+ {
501
+ "type": "response.completed",
502
+ "response": {
503
+ **base_response,
504
+ "status": "completed",
505
+ "output": output_items,
506
+ "usage": make_usage(final_usage),
507
+ },
508
+ }
509
+ )
510
+
511
+ if not stream_completed:
512
+ logger.warning("Stream ended without finish_reason, sending completion events")
513
+
514
+ if message_started and current_message_id:
515
+ for evt in _close_message_events(
516
+ current_message_id,
517
+ output_index,
518
+ content_index,
519
+ accumulated_content,
520
+ ):
521
+ yield evt
522
+ output_items.append(make_message_item(current_message_id, accumulated_content))
523
+
524
+ yield sse_event(
525
+ {
526
+ "type": "response.completed",
527
+ "response": {
528
+ **base_response,
529
+ "status": "completed",
530
+ "output": output_items,
531
+ "usage": make_usage(final_usage),
532
+ },
533
+ }
534
+ )
535
+
536
+ elapsed_ms = (time.time() - start_time) * 1000
537
+ logger.info(
538
+ "Stream completed: req_id=%s, elapsed=%.1fms, output_items=%d",
539
+ request_id,
540
+ elapsed_ms,
541
+ len(output_items),
542
+ )
543
+
544
+ # NOTE: Do NOT send "data: [DONE]\n\n" - agent-maestro doesn't send it
545
+ # for Responses API
546
+
547
+ except ProviderError as e:
548
+ elapsed_ms = (time.time() - start_time) * 1000
549
+ logger.error(
550
+ "Stream failed: req_id=%s, elapsed=%.1fms, error=%s",
551
+ request_id,
552
+ elapsed_ms,
553
+ e,
554
+ )
555
+ # Send response.failed event matching OpenAI spec
556
+ yield sse_event(
557
+ {
558
+ "type": "response.failed",
559
+ "response": {
560
+ "id": response_id,
561
+ "object": "response",
562
+ "status": "failed",
563
+ "created_at": created_at,
564
+ "model": request.model,
565
+ "output": [],
566
+ "error": {
567
+ "code": "server_error",
568
+ "message": str(e),
569
+ },
570
+ "incomplete_details": None,
571
+ },
572
+ }
573
+ )
574
+
575
+
576
+ def _close_message_events(
577
+ msg_id: str, output_index: int, content_index: int, text: str
578
+ ) -> list[str]:
579
+ """Generate events to close a message output item."""
580
+ return [
581
+ sse_event(
582
+ {
583
+ "type": "response.output_text.done",
584
+ "item_id": msg_id,
585
+ "output_index": output_index,
586
+ "content_index": content_index,
587
+ "text": text,
588
+ }
589
+ ),
590
+ sse_event(
591
+ {
592
+ "type": "response.content_part.done",
593
+ "item_id": msg_id,
594
+ "output_index": output_index,
595
+ "content_index": content_index,
596
+ "part": make_text_content(text),
597
+ }
598
+ ),
599
+ sse_event(
600
+ {
601
+ "type": "response.output_item.done",
602
+ "output_index": output_index,
603
+ "item": make_message_item(msg_id, text),
604
+ }
605
+ ),
606
+ ]
@@ -25,6 +25,23 @@ from router_maestro.server.schemas.openai import (
25
25
  ModelList,
26
26
  ModelObject,
27
27
  )
28
+ from router_maestro.server.schemas.responses import (
29
+ ResponsesDeltaEvent,
30
+ ResponsesDoneEvent,
31
+ ResponsesFunctionCallInput,
32
+ ResponsesFunctionCallOutput,
33
+ ResponsesFunctionTool,
34
+ ResponsesInputMessage,
35
+ ResponsesInputTextContent,
36
+ ResponsesMessageOutput,
37
+ ResponsesOutputText,
38
+ ResponsesReasoningOutput,
39
+ ResponsesRequest,
40
+ ResponsesResponse,
41
+ ResponsesStreamEvent,
42
+ ResponsesToolChoiceFunction,
43
+ ResponsesUsage,
44
+ )
28
45
 
29
46
  __all__ = [
30
47
  # Admin schemas
@@ -50,4 +67,20 @@ __all__ = [
50
67
  "ErrorResponse",
51
68
  "ModelList",
52
69
  "ModelObject",
70
+ # Responses API schemas
71
+ "ResponsesDeltaEvent",
72
+ "ResponsesDoneEvent",
73
+ "ResponsesFunctionCallInput",
74
+ "ResponsesFunctionCallOutput",
75
+ "ResponsesFunctionTool",
76
+ "ResponsesInputMessage",
77
+ "ResponsesInputTextContent",
78
+ "ResponsesMessageOutput",
79
+ "ResponsesOutputText",
80
+ "ResponsesReasoningOutput",
81
+ "ResponsesRequest",
82
+ "ResponsesResponse",
83
+ "ResponsesStreamEvent",
84
+ "ResponsesToolChoiceFunction",
85
+ "ResponsesUsage",
53
86
  ]