router-maestro 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,9 @@ from router_maestro.providers import (
19
19
  ModelInfo,
20
20
  OpenAICompatibleProvider,
21
21
  ProviderError,
22
+ ResponsesRequest,
23
+ ResponsesResponse,
24
+ ResponsesStreamChunk,
22
25
  )
23
26
  from router_maestro.utils import get_logger
24
27
 
@@ -382,7 +385,7 @@ class Router:
382
385
  try:
383
386
  await provider.ensure_token()
384
387
  if is_stream:
385
- stream = await provider.chat_completion_stream(actual_request)
388
+ stream = provider.chat_completion_stream(actual_request)
386
389
  logger.info("Stream request routed to %s", provider_name)
387
390
  return stream, provider_name
388
391
  else:
@@ -417,7 +420,7 @@ class Router:
417
420
  try:
418
421
  await other_provider.ensure_token()
419
422
  if is_stream:
420
- stream = await other_provider.chat_completion_stream(fallback_request)
423
+ stream = other_provider.chat_completion_stream(fallback_request)
421
424
  logger.info("Stream fallback succeeded via %s", other_name)
422
425
  return stream, other_name
423
426
  else:
@@ -479,6 +482,159 @@ class Router:
479
482
  )
480
483
  return result, used_provider # type: ignore
481
484
 
485
+ def _create_responses_request_with_model(
486
+ self, original_request: ResponsesRequest, model_id: str
487
+ ) -> ResponsesRequest:
488
+ """Create a new ResponsesRequest with a different model ID.
489
+
490
+ Args:
491
+ original_request: The original request
492
+ model_id: The new model ID to use
493
+
494
+ Returns:
495
+ New ResponsesRequest with updated model
496
+ """
497
+ return ResponsesRequest(
498
+ model=model_id,
499
+ input=original_request.input,
500
+ stream=original_request.stream,
501
+ instructions=original_request.instructions,
502
+ temperature=original_request.temperature,
503
+ max_output_tokens=original_request.max_output_tokens,
504
+ tools=original_request.tools,
505
+ tool_choice=original_request.tool_choice,
506
+ parallel_tool_calls=original_request.parallel_tool_calls,
507
+ )
508
+
509
+ async def _execute_responses_with_fallback(
510
+ self,
511
+ request: ResponsesRequest,
512
+ provider_name: str,
513
+ actual_model_id: str,
514
+ provider: BaseProvider,
515
+ fallback: bool,
516
+ is_stream: bool,
517
+ ) -> tuple[ResponsesResponse | AsyncIterator[ResponsesStreamChunk], str]:
518
+ """Execute Responses API request with fallback support.
519
+
520
+ Args:
521
+ request: Original responses request
522
+ provider_name: Name of the primary provider
523
+ actual_model_id: The actual model ID to use
524
+ provider: The primary provider instance
525
+ fallback: Whether to try fallback providers on error
526
+ is_stream: Whether this is a streaming request
527
+
528
+ Returns:
529
+ Tuple of (response or stream, provider_name)
530
+
531
+ Raises:
532
+ ProviderError: If all providers fail
533
+ """
534
+ actual_request = self._create_responses_request_with_model(request, actual_model_id)
535
+
536
+ try:
537
+ await provider.ensure_token()
538
+ if is_stream:
539
+ stream = provider.responses_completion_stream(actual_request)
540
+ logger.info("Responses stream request routed to %s", provider_name)
541
+ return stream, provider_name
542
+ else:
543
+ response = await provider.responses_completion(actual_request)
544
+ logger.info("Responses request completed via %s", provider_name)
545
+ return response, provider_name
546
+ except ProviderError as e:
547
+ logger.warning("Provider %s failed for responses: %s", provider_name, e)
548
+ if not fallback or not e.retryable:
549
+ raise
550
+
551
+ # Load fallback config
552
+ priorities_config = self._get_priorities_config()
553
+ fallback_config = priorities_config.fallback
554
+
555
+ if fallback_config.strategy == FallbackStrategy.NONE:
556
+ raise
557
+
558
+ # Get fallback candidates
559
+ candidates = self._get_fallback_candidates(
560
+ provider_name, actual_model_id, fallback_config.strategy
561
+ )
562
+
563
+ # Try fallback candidates up to maxRetries
564
+ for i, (other_name, other_model_id, other_provider) in enumerate(candidates):
565
+ if i >= fallback_config.maxRetries:
566
+ break
567
+
568
+ logger.info("Trying responses fallback: %s/%s", other_name, other_model_id)
569
+ fallback_request = self._create_responses_request_with_model(
570
+ request, other_model_id
571
+ )
572
+
573
+ try:
574
+ await other_provider.ensure_token()
575
+ if is_stream:
576
+ stream = other_provider.responses_completion_stream(fallback_request)
577
+ logger.info("Responses stream fallback succeeded via %s", other_name)
578
+ return stream, other_name
579
+ else:
580
+ response = await other_provider.responses_completion(fallback_request)
581
+ logger.info("Responses fallback succeeded via %s", other_name)
582
+ return response, other_name
583
+ except ProviderError as fallback_error:
584
+ logger.warning("Responses fallback %s failed: %s", other_name, fallback_error)
585
+ continue
586
+ raise
587
+
588
+ async def responses_completion(
589
+ self,
590
+ request: ResponsesRequest,
591
+ fallback: bool = True,
592
+ ) -> tuple[ResponsesResponse, str]:
593
+ """Route a Responses API completion request.
594
+
595
+ Args:
596
+ request: Responses completion request
597
+ fallback: Whether to try fallback providers on error
598
+
599
+ Returns:
600
+ Tuple of (response, provider_name)
601
+
602
+ Raises:
603
+ ProviderError: If model not found or all providers fail
604
+ """
605
+ provider_name, actual_model_id, provider = await self._resolve_provider(request.model)
606
+ logger.info("Routing responses request to %s/%s", provider_name, actual_model_id)
607
+
608
+ result, used_provider = await self._execute_responses_with_fallback(
609
+ request, provider_name, actual_model_id, provider, fallback, is_stream=False
610
+ )
611
+ return result, used_provider # type: ignore
612
+
613
+ async def responses_completion_stream(
614
+ self,
615
+ request: ResponsesRequest,
616
+ fallback: bool = True,
617
+ ) -> tuple[AsyncIterator[ResponsesStreamChunk], str]:
618
+ """Route a streaming Responses API completion request.
619
+
620
+ Args:
621
+ request: Responses completion request
622
+ fallback: Whether to try fallback providers on error
623
+
624
+ Returns:
625
+ Tuple of (stream iterator, provider_name)
626
+
627
+ Raises:
628
+ ProviderError: If model not found or all providers fail
629
+ """
630
+ provider_name, actual_model_id, provider = await self._resolve_provider(request.model)
631
+ logger.info("Routing responses stream request to %s/%s", provider_name, actual_model_id)
632
+
633
+ result, used_provider = await self._execute_responses_with_fallback(
634
+ request, provider_name, actual_model_id, provider, fallback, is_stream=True
635
+ )
636
+ return result, used_provider # type: ignore
637
+
482
638
  async def list_models(self) -> list[ModelInfo]:
483
639
  """List all available models from all authenticated providers.
484
640
 
@@ -9,7 +9,13 @@ from fastapi.middleware.cors import CORSMiddleware
9
9
  from router_maestro import __version__
10
10
  from router_maestro.routing import get_router
11
11
  from router_maestro.server.middleware import verify_api_key
12
- from router_maestro.server.routes import admin_router, anthropic_router, chat_router, models_router
12
+ from router_maestro.server.routes import (
13
+ admin_router,
14
+ anthropic_router,
15
+ chat_router,
16
+ models_router,
17
+ responses_router,
18
+ )
13
19
  from router_maestro.utils import get_logger, setup_logging
14
20
 
15
21
  logger = get_logger("server")
@@ -64,6 +70,7 @@ def create_app() -> FastAPI:
64
70
  # Include routers with API key verification
65
71
  app.include_router(chat_router, dependencies=[Depends(verify_api_key)])
66
72
  app.include_router(models_router, dependencies=[Depends(verify_api_key)])
73
+ app.include_router(responses_router, dependencies=[Depends(verify_api_key)])
67
74
  app.include_router(anthropic_router, dependencies=[Depends(verify_api_key)])
68
75
  app.include_router(admin_router, dependencies=[Depends(verify_api_key)])
69
76
 
@@ -4,5 +4,12 @@ from router_maestro.server.routes.admin import router as admin_router
4
4
  from router_maestro.server.routes.anthropic import router as anthropic_router
5
5
  from router_maestro.server.routes.chat import router as chat_router
6
6
  from router_maestro.server.routes.models import router as models_router
7
+ from router_maestro.server.routes.responses import router as responses_router
7
8
 
8
- __all__ = ["admin_router", "anthropic_router", "chat_router", "models_router"]
9
+ __all__ = [
10
+ "admin_router",
11
+ "anthropic_router",
12
+ "chat_router",
13
+ "models_router",
14
+ "responses_router",
15
+ ]
@@ -3,6 +3,7 @@
3
3
  import json
4
4
  import uuid
5
5
  from collections.abc import AsyncGenerator
6
+ from datetime import UTC, datetime
6
7
 
7
8
  from fastapi import APIRouter, HTTPException
8
9
  from fastapi.responses import StreamingResponse
@@ -13,6 +14,8 @@ from router_maestro.server.schemas.anthropic import (
13
14
  AnthropicCountTokensRequest,
14
15
  AnthropicMessagesRequest,
15
16
  AnthropicMessagesResponse,
17
+ AnthropicModelInfo,
18
+ AnthropicModelList,
16
19
  AnthropicStreamState,
17
20
  AnthropicTextBlock,
18
21
  AnthropicUsage,
@@ -33,6 +36,75 @@ logger = get_logger("server.routes.anthropic")
33
36
  router = APIRouter()
34
37
 
35
38
 
39
+ TEST_RESPONSE_TEXT = "This is a test response from Router-Maestro."
40
+
41
+
42
+ def _create_test_response() -> AnthropicMessagesResponse:
43
+ """Create a mock response for test model."""
44
+ return AnthropicMessagesResponse(
45
+ id=f"msg_{uuid.uuid4().hex[:24]}",
46
+ type="message",
47
+ role="assistant",
48
+ content=[AnthropicTextBlock(type="text", text=TEST_RESPONSE_TEXT)],
49
+ model="test",
50
+ stop_reason="end_turn",
51
+ stop_sequence=None,
52
+ usage=AnthropicUsage(input_tokens=10, output_tokens=10),
53
+ )
54
+
55
+
56
+ async def _stream_test_response() -> AsyncGenerator[str, None]:
57
+ """Stream a mock test response."""
58
+ response_id = f"msg_{uuid.uuid4().hex[:24]}"
59
+
60
+ # message_start event
61
+ message_start = {
62
+ "type": "message_start",
63
+ "message": {
64
+ "id": response_id,
65
+ "type": "message",
66
+ "role": "assistant",
67
+ "content": [],
68
+ "model": "test",
69
+ "stop_reason": None,
70
+ "stop_sequence": None,
71
+ "usage": {"input_tokens": 10, "output_tokens": 0},
72
+ },
73
+ }
74
+ yield f"event: message_start\ndata: {json.dumps(message_start)}\n\n"
75
+
76
+ # content_block_start event
77
+ block_start = {
78
+ "type": "content_block_start",
79
+ "index": 0,
80
+ "content_block": {"type": "text", "text": ""},
81
+ }
82
+ yield f"event: content_block_start\ndata: {json.dumps(block_start)}\n\n"
83
+
84
+ # content_block_delta event
85
+ block_delta = {
86
+ "type": "content_block_delta",
87
+ "index": 0,
88
+ "delta": {"type": "text_delta", "text": TEST_RESPONSE_TEXT},
89
+ }
90
+ yield f"event: content_block_delta\ndata: {json.dumps(block_delta)}\n\n"
91
+
92
+ # content_block_stop event
93
+ block_stop = {"type": "content_block_stop", "index": 0}
94
+ yield f"event: content_block_stop\ndata: {json.dumps(block_stop)}\n\n"
95
+
96
+ # message_delta event
97
+ message_delta = {
98
+ "type": "message_delta",
99
+ "delta": {"stop_reason": "end_turn", "stop_sequence": None},
100
+ "usage": {"output_tokens": 10},
101
+ }
102
+ yield f"event: message_delta\ndata: {json.dumps(message_delta)}\n\n"
103
+
104
+ # message_stop event
105
+ yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
106
+
107
+
36
108
  @router.post("/v1/messages")
37
109
  @router.post("/api/anthropic/v1/messages")
38
110
  async def messages(request: AnthropicMessagesRequest):
@@ -42,6 +114,16 @@ async def messages(request: AnthropicMessagesRequest):
42
114
  request.model,
43
115
  request.stream,
44
116
  )
117
+
118
+ # Handle test model
119
+ if request.model == "test":
120
+ if request.stream:
121
+ return StreamingResponse(
122
+ _stream_test_response(),
123
+ media_type="text/event-stream",
124
+ )
125
+ return _create_test_response()
126
+
45
127
  model_router = get_router()
46
128
 
47
129
  # Translate Anthropic request to OpenAI format
@@ -227,3 +309,93 @@ async def stream_response(
227
309
  },
228
310
  }
229
311
  yield f"event: error\ndata: {json.dumps(error_event)}\n\n"
312
+
313
+
314
+ def _generate_display_name(model_id: str) -> str:
315
+ """Generate a human-readable display name from model ID.
316
+
317
+ Transforms model IDs like 'github-copilot/claude-sonnet-4' into
318
+ 'Claude Sonnet 4 (github-copilot)'.
319
+ """
320
+ if "/" in model_id:
321
+ provider, model_name = model_id.split("/", 1)
322
+ else:
323
+ provider = ""
324
+ model_name = model_id
325
+
326
+ # Capitalize words and handle common patterns
327
+ words = model_name.replace("-", " ").replace("_", " ").split()
328
+ display_words = []
329
+ for word in words:
330
+ # Keep version numbers as-is
331
+ if word.replace(".", "").isdigit():
332
+ display_words.append(word)
333
+ else:
334
+ display_words.append(word.capitalize())
335
+
336
+ display_name = " ".join(display_words)
337
+ if provider:
338
+ display_name = f"{display_name} ({provider})"
339
+
340
+ return display_name
341
+
342
+
343
+ @router.get("/api/anthropic/v1/models")
344
+ async def list_models(
345
+ limit: int = 20,
346
+ after_id: str | None = None,
347
+ before_id: str | None = None,
348
+ ) -> AnthropicModelList:
349
+ """List available models in Anthropic format.
350
+
351
+ Args:
352
+ limit: Maximum number of models to return (default 20)
353
+ after_id: Return models after this ID (for forward pagination)
354
+ before_id: Return models before this ID (for backward pagination)
355
+ """
356
+ model_router = get_router()
357
+ models = await model_router.list_models()
358
+
359
+ # Generate ISO 8601 timestamp for created_at
360
+ # Using current time since actual creation dates aren't tracked
361
+ created_at = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
362
+
363
+ # Convert to Anthropic format
364
+ anthropic_models = [
365
+ AnthropicModelInfo(
366
+ id=model.id,
367
+ created_at=created_at,
368
+ display_name=_generate_display_name(model.id),
369
+ type="model",
370
+ )
371
+ for model in models
372
+ ]
373
+
374
+ # Handle pagination
375
+ start_idx = 0
376
+ if after_id:
377
+ for i, model in enumerate(anthropic_models):
378
+ if model.id == after_id:
379
+ start_idx = i + 1
380
+ break
381
+
382
+ end_idx = len(anthropic_models)
383
+ if before_id:
384
+ for i, model in enumerate(anthropic_models):
385
+ if model.id == before_id:
386
+ end_idx = i
387
+ break
388
+
389
+ # Apply limit
390
+ paginated = anthropic_models[start_idx : min(start_idx + limit, end_idx)]
391
+
392
+ first_id = paginated[0].id if paginated else None
393
+ last_id = paginated[-1].id if paginated else None
394
+ has_more = (start_idx + limit) < end_idx
395
+
396
+ return AnthropicModelList(
397
+ data=paginated,
398
+ first_id=first_id,
399
+ last_id=last_id,
400
+ has_more=has_more,
401
+ )
@@ -27,8 +27,7 @@ logger = get_logger("server.routes.chat")
27
27
  router = APIRouter()
28
28
 
29
29
 
30
- @router.post("/chat/completions")
31
- @router.post("/v1/chat/completions")
30
+ @router.post("/api/openai/v1/chat/completions")
32
31
  async def chat_completions(request: ChatCompletionRequest):
33
32
  """Handle chat completion requests."""
34
33
  logger.info(
@@ -15,8 +15,7 @@ def get_router() -> Router:
15
15
  return Router()
16
16
 
17
17
 
18
- @router.get("/models")
19
- @router.get("/v1/models")
18
+ @router.get("/api/openai/v1/models")
20
19
  async def list_models() -> ModelList:
21
20
  """List available models."""
22
21
  model_router = get_router()