router-maestro 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- router_maestro/__init__.py +1 -1
- router_maestro/cli/config.py +126 -0
- router_maestro/providers/__init__.py +8 -0
- router_maestro/providers/base.py +80 -0
- router_maestro/providers/copilot.py +322 -1
- router_maestro/routing/router.py +158 -2
- router_maestro/server/app.py +8 -1
- router_maestro/server/routes/__init__.py +8 -1
- router_maestro/server/routes/anthropic.py +172 -0
- router_maestro/server/routes/chat.py +1 -2
- router_maestro/server/routes/models.py +1 -2
- router_maestro/server/routes/responses.py +517 -0
- router_maestro/server/schemas/__init__.py +33 -0
- router_maestro/server/schemas/anthropic.py +21 -0
- router_maestro/server/schemas/responses.py +214 -0
- {router_maestro-0.1.5.dist-info → router_maestro-0.1.7.dist-info}/METADATA +27 -3
- {router_maestro-0.1.5.dist-info → router_maestro-0.1.7.dist-info}/RECORD +20 -18
- {router_maestro-0.1.5.dist-info → router_maestro-0.1.7.dist-info}/WHEEL +0 -0
- {router_maestro-0.1.5.dist-info → router_maestro-0.1.7.dist-info}/entry_points.txt +0 -0
- {router_maestro-0.1.5.dist-info → router_maestro-0.1.7.dist-info}/licenses/LICENSE +0 -0
router_maestro/routing/router.py
CHANGED
|
@@ -19,6 +19,9 @@ from router_maestro.providers import (
|
|
|
19
19
|
ModelInfo,
|
|
20
20
|
OpenAICompatibleProvider,
|
|
21
21
|
ProviderError,
|
|
22
|
+
ResponsesRequest,
|
|
23
|
+
ResponsesResponse,
|
|
24
|
+
ResponsesStreamChunk,
|
|
22
25
|
)
|
|
23
26
|
from router_maestro.utils import get_logger
|
|
24
27
|
|
|
@@ -382,7 +385,7 @@ class Router:
|
|
|
382
385
|
try:
|
|
383
386
|
await provider.ensure_token()
|
|
384
387
|
if is_stream:
|
|
385
|
-
stream =
|
|
388
|
+
stream = provider.chat_completion_stream(actual_request)
|
|
386
389
|
logger.info("Stream request routed to %s", provider_name)
|
|
387
390
|
return stream, provider_name
|
|
388
391
|
else:
|
|
@@ -417,7 +420,7 @@ class Router:
|
|
|
417
420
|
try:
|
|
418
421
|
await other_provider.ensure_token()
|
|
419
422
|
if is_stream:
|
|
420
|
-
stream =
|
|
423
|
+
stream = other_provider.chat_completion_stream(fallback_request)
|
|
421
424
|
logger.info("Stream fallback succeeded via %s", other_name)
|
|
422
425
|
return stream, other_name
|
|
423
426
|
else:
|
|
@@ -479,6 +482,159 @@ class Router:
|
|
|
479
482
|
)
|
|
480
483
|
return result, used_provider # type: ignore
|
|
481
484
|
|
|
485
|
+
def _create_responses_request_with_model(
|
|
486
|
+
self, original_request: ResponsesRequest, model_id: str
|
|
487
|
+
) -> ResponsesRequest:
|
|
488
|
+
"""Create a new ResponsesRequest with a different model ID.
|
|
489
|
+
|
|
490
|
+
Args:
|
|
491
|
+
original_request: The original request
|
|
492
|
+
model_id: The new model ID to use
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
New ResponsesRequest with updated model
|
|
496
|
+
"""
|
|
497
|
+
return ResponsesRequest(
|
|
498
|
+
model=model_id,
|
|
499
|
+
input=original_request.input,
|
|
500
|
+
stream=original_request.stream,
|
|
501
|
+
instructions=original_request.instructions,
|
|
502
|
+
temperature=original_request.temperature,
|
|
503
|
+
max_output_tokens=original_request.max_output_tokens,
|
|
504
|
+
tools=original_request.tools,
|
|
505
|
+
tool_choice=original_request.tool_choice,
|
|
506
|
+
parallel_tool_calls=original_request.parallel_tool_calls,
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
async def _execute_responses_with_fallback(
|
|
510
|
+
self,
|
|
511
|
+
request: ResponsesRequest,
|
|
512
|
+
provider_name: str,
|
|
513
|
+
actual_model_id: str,
|
|
514
|
+
provider: BaseProvider,
|
|
515
|
+
fallback: bool,
|
|
516
|
+
is_stream: bool,
|
|
517
|
+
) -> tuple[ResponsesResponse | AsyncIterator[ResponsesStreamChunk], str]:
|
|
518
|
+
"""Execute Responses API request with fallback support.
|
|
519
|
+
|
|
520
|
+
Args:
|
|
521
|
+
request: Original responses request
|
|
522
|
+
provider_name: Name of the primary provider
|
|
523
|
+
actual_model_id: The actual model ID to use
|
|
524
|
+
provider: The primary provider instance
|
|
525
|
+
fallback: Whether to try fallback providers on error
|
|
526
|
+
is_stream: Whether this is a streaming request
|
|
527
|
+
|
|
528
|
+
Returns:
|
|
529
|
+
Tuple of (response or stream, provider_name)
|
|
530
|
+
|
|
531
|
+
Raises:
|
|
532
|
+
ProviderError: If all providers fail
|
|
533
|
+
"""
|
|
534
|
+
actual_request = self._create_responses_request_with_model(request, actual_model_id)
|
|
535
|
+
|
|
536
|
+
try:
|
|
537
|
+
await provider.ensure_token()
|
|
538
|
+
if is_stream:
|
|
539
|
+
stream = provider.responses_completion_stream(actual_request)
|
|
540
|
+
logger.info("Responses stream request routed to %s", provider_name)
|
|
541
|
+
return stream, provider_name
|
|
542
|
+
else:
|
|
543
|
+
response = await provider.responses_completion(actual_request)
|
|
544
|
+
logger.info("Responses request completed via %s", provider_name)
|
|
545
|
+
return response, provider_name
|
|
546
|
+
except ProviderError as e:
|
|
547
|
+
logger.warning("Provider %s failed for responses: %s", provider_name, e)
|
|
548
|
+
if not fallback or not e.retryable:
|
|
549
|
+
raise
|
|
550
|
+
|
|
551
|
+
# Load fallback config
|
|
552
|
+
priorities_config = self._get_priorities_config()
|
|
553
|
+
fallback_config = priorities_config.fallback
|
|
554
|
+
|
|
555
|
+
if fallback_config.strategy == FallbackStrategy.NONE:
|
|
556
|
+
raise
|
|
557
|
+
|
|
558
|
+
# Get fallback candidates
|
|
559
|
+
candidates = self._get_fallback_candidates(
|
|
560
|
+
provider_name, actual_model_id, fallback_config.strategy
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
# Try fallback candidates up to maxRetries
|
|
564
|
+
for i, (other_name, other_model_id, other_provider) in enumerate(candidates):
|
|
565
|
+
if i >= fallback_config.maxRetries:
|
|
566
|
+
break
|
|
567
|
+
|
|
568
|
+
logger.info("Trying responses fallback: %s/%s", other_name, other_model_id)
|
|
569
|
+
fallback_request = self._create_responses_request_with_model(
|
|
570
|
+
request, other_model_id
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
try:
|
|
574
|
+
await other_provider.ensure_token()
|
|
575
|
+
if is_stream:
|
|
576
|
+
stream = other_provider.responses_completion_stream(fallback_request)
|
|
577
|
+
logger.info("Responses stream fallback succeeded via %s", other_name)
|
|
578
|
+
return stream, other_name
|
|
579
|
+
else:
|
|
580
|
+
response = await other_provider.responses_completion(fallback_request)
|
|
581
|
+
logger.info("Responses fallback succeeded via %s", other_name)
|
|
582
|
+
return response, other_name
|
|
583
|
+
except ProviderError as fallback_error:
|
|
584
|
+
logger.warning("Responses fallback %s failed: %s", other_name, fallback_error)
|
|
585
|
+
continue
|
|
586
|
+
raise
|
|
587
|
+
|
|
588
|
+
async def responses_completion(
|
|
589
|
+
self,
|
|
590
|
+
request: ResponsesRequest,
|
|
591
|
+
fallback: bool = True,
|
|
592
|
+
) -> tuple[ResponsesResponse, str]:
|
|
593
|
+
"""Route a Responses API completion request.
|
|
594
|
+
|
|
595
|
+
Args:
|
|
596
|
+
request: Responses completion request
|
|
597
|
+
fallback: Whether to try fallback providers on error
|
|
598
|
+
|
|
599
|
+
Returns:
|
|
600
|
+
Tuple of (response, provider_name)
|
|
601
|
+
|
|
602
|
+
Raises:
|
|
603
|
+
ProviderError: If model not found or all providers fail
|
|
604
|
+
"""
|
|
605
|
+
provider_name, actual_model_id, provider = await self._resolve_provider(request.model)
|
|
606
|
+
logger.info("Routing responses request to %s/%s", provider_name, actual_model_id)
|
|
607
|
+
|
|
608
|
+
result, used_provider = await self._execute_responses_with_fallback(
|
|
609
|
+
request, provider_name, actual_model_id, provider, fallback, is_stream=False
|
|
610
|
+
)
|
|
611
|
+
return result, used_provider # type: ignore
|
|
612
|
+
|
|
613
|
+
async def responses_completion_stream(
|
|
614
|
+
self,
|
|
615
|
+
request: ResponsesRequest,
|
|
616
|
+
fallback: bool = True,
|
|
617
|
+
) -> tuple[AsyncIterator[ResponsesStreamChunk], str]:
|
|
618
|
+
"""Route a streaming Responses API completion request.
|
|
619
|
+
|
|
620
|
+
Args:
|
|
621
|
+
request: Responses completion request
|
|
622
|
+
fallback: Whether to try fallback providers on error
|
|
623
|
+
|
|
624
|
+
Returns:
|
|
625
|
+
Tuple of (stream iterator, provider_name)
|
|
626
|
+
|
|
627
|
+
Raises:
|
|
628
|
+
ProviderError: If model not found or all providers fail
|
|
629
|
+
"""
|
|
630
|
+
provider_name, actual_model_id, provider = await self._resolve_provider(request.model)
|
|
631
|
+
logger.info("Routing responses stream request to %s/%s", provider_name, actual_model_id)
|
|
632
|
+
|
|
633
|
+
result, used_provider = await self._execute_responses_with_fallback(
|
|
634
|
+
request, provider_name, actual_model_id, provider, fallback, is_stream=True
|
|
635
|
+
)
|
|
636
|
+
return result, used_provider # type: ignore
|
|
637
|
+
|
|
482
638
|
async def list_models(self) -> list[ModelInfo]:
|
|
483
639
|
"""List all available models from all authenticated providers.
|
|
484
640
|
|
router_maestro/server/app.py
CHANGED
|
@@ -9,7 +9,13 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
|
9
9
|
from router_maestro import __version__
|
|
10
10
|
from router_maestro.routing import get_router
|
|
11
11
|
from router_maestro.server.middleware import verify_api_key
|
|
12
|
-
from router_maestro.server.routes import
|
|
12
|
+
from router_maestro.server.routes import (
|
|
13
|
+
admin_router,
|
|
14
|
+
anthropic_router,
|
|
15
|
+
chat_router,
|
|
16
|
+
models_router,
|
|
17
|
+
responses_router,
|
|
18
|
+
)
|
|
13
19
|
from router_maestro.utils import get_logger, setup_logging
|
|
14
20
|
|
|
15
21
|
logger = get_logger("server")
|
|
@@ -64,6 +70,7 @@ def create_app() -> FastAPI:
|
|
|
64
70
|
# Include routers with API key verification
|
|
65
71
|
app.include_router(chat_router, dependencies=[Depends(verify_api_key)])
|
|
66
72
|
app.include_router(models_router, dependencies=[Depends(verify_api_key)])
|
|
73
|
+
app.include_router(responses_router, dependencies=[Depends(verify_api_key)])
|
|
67
74
|
app.include_router(anthropic_router, dependencies=[Depends(verify_api_key)])
|
|
68
75
|
app.include_router(admin_router, dependencies=[Depends(verify_api_key)])
|
|
69
76
|
|
|
@@ -4,5 +4,12 @@ from router_maestro.server.routes.admin import router as admin_router
|
|
|
4
4
|
from router_maestro.server.routes.anthropic import router as anthropic_router
|
|
5
5
|
from router_maestro.server.routes.chat import router as chat_router
|
|
6
6
|
from router_maestro.server.routes.models import router as models_router
|
|
7
|
+
from router_maestro.server.routes.responses import router as responses_router
|
|
7
8
|
|
|
8
|
-
__all__ = [
|
|
9
|
+
__all__ = [
|
|
10
|
+
"admin_router",
|
|
11
|
+
"anthropic_router",
|
|
12
|
+
"chat_router",
|
|
13
|
+
"models_router",
|
|
14
|
+
"responses_router",
|
|
15
|
+
]
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import json
|
|
4
4
|
import uuid
|
|
5
5
|
from collections.abc import AsyncGenerator
|
|
6
|
+
from datetime import UTC, datetime
|
|
6
7
|
|
|
7
8
|
from fastapi import APIRouter, HTTPException
|
|
8
9
|
from fastapi.responses import StreamingResponse
|
|
@@ -13,6 +14,8 @@ from router_maestro.server.schemas.anthropic import (
|
|
|
13
14
|
AnthropicCountTokensRequest,
|
|
14
15
|
AnthropicMessagesRequest,
|
|
15
16
|
AnthropicMessagesResponse,
|
|
17
|
+
AnthropicModelInfo,
|
|
18
|
+
AnthropicModelList,
|
|
16
19
|
AnthropicStreamState,
|
|
17
20
|
AnthropicTextBlock,
|
|
18
21
|
AnthropicUsage,
|
|
@@ -33,6 +36,75 @@ logger = get_logger("server.routes.anthropic")
|
|
|
33
36
|
router = APIRouter()
|
|
34
37
|
|
|
35
38
|
|
|
39
|
+
TEST_RESPONSE_TEXT = "This is a test response from Router-Maestro."
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _create_test_response() -> AnthropicMessagesResponse:
|
|
43
|
+
"""Create a mock response for test model."""
|
|
44
|
+
return AnthropicMessagesResponse(
|
|
45
|
+
id=f"msg_{uuid.uuid4().hex[:24]}",
|
|
46
|
+
type="message",
|
|
47
|
+
role="assistant",
|
|
48
|
+
content=[AnthropicTextBlock(type="text", text=TEST_RESPONSE_TEXT)],
|
|
49
|
+
model="test",
|
|
50
|
+
stop_reason="end_turn",
|
|
51
|
+
stop_sequence=None,
|
|
52
|
+
usage=AnthropicUsage(input_tokens=10, output_tokens=10),
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
async def _stream_test_response() -> AsyncGenerator[str, None]:
|
|
57
|
+
"""Stream a mock test response."""
|
|
58
|
+
response_id = f"msg_{uuid.uuid4().hex[:24]}"
|
|
59
|
+
|
|
60
|
+
# message_start event
|
|
61
|
+
message_start = {
|
|
62
|
+
"type": "message_start",
|
|
63
|
+
"message": {
|
|
64
|
+
"id": response_id,
|
|
65
|
+
"type": "message",
|
|
66
|
+
"role": "assistant",
|
|
67
|
+
"content": [],
|
|
68
|
+
"model": "test",
|
|
69
|
+
"stop_reason": None,
|
|
70
|
+
"stop_sequence": None,
|
|
71
|
+
"usage": {"input_tokens": 10, "output_tokens": 0},
|
|
72
|
+
},
|
|
73
|
+
}
|
|
74
|
+
yield f"event: message_start\ndata: {json.dumps(message_start)}\n\n"
|
|
75
|
+
|
|
76
|
+
# content_block_start event
|
|
77
|
+
block_start = {
|
|
78
|
+
"type": "content_block_start",
|
|
79
|
+
"index": 0,
|
|
80
|
+
"content_block": {"type": "text", "text": ""},
|
|
81
|
+
}
|
|
82
|
+
yield f"event: content_block_start\ndata: {json.dumps(block_start)}\n\n"
|
|
83
|
+
|
|
84
|
+
# content_block_delta event
|
|
85
|
+
block_delta = {
|
|
86
|
+
"type": "content_block_delta",
|
|
87
|
+
"index": 0,
|
|
88
|
+
"delta": {"type": "text_delta", "text": TEST_RESPONSE_TEXT},
|
|
89
|
+
}
|
|
90
|
+
yield f"event: content_block_delta\ndata: {json.dumps(block_delta)}\n\n"
|
|
91
|
+
|
|
92
|
+
# content_block_stop event
|
|
93
|
+
block_stop = {"type": "content_block_stop", "index": 0}
|
|
94
|
+
yield f"event: content_block_stop\ndata: {json.dumps(block_stop)}\n\n"
|
|
95
|
+
|
|
96
|
+
# message_delta event
|
|
97
|
+
message_delta = {
|
|
98
|
+
"type": "message_delta",
|
|
99
|
+
"delta": {"stop_reason": "end_turn", "stop_sequence": None},
|
|
100
|
+
"usage": {"output_tokens": 10},
|
|
101
|
+
}
|
|
102
|
+
yield f"event: message_delta\ndata: {json.dumps(message_delta)}\n\n"
|
|
103
|
+
|
|
104
|
+
# message_stop event
|
|
105
|
+
yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
|
|
106
|
+
|
|
107
|
+
|
|
36
108
|
@router.post("/v1/messages")
|
|
37
109
|
@router.post("/api/anthropic/v1/messages")
|
|
38
110
|
async def messages(request: AnthropicMessagesRequest):
|
|
@@ -42,6 +114,16 @@ async def messages(request: AnthropicMessagesRequest):
|
|
|
42
114
|
request.model,
|
|
43
115
|
request.stream,
|
|
44
116
|
)
|
|
117
|
+
|
|
118
|
+
# Handle test model
|
|
119
|
+
if request.model == "test":
|
|
120
|
+
if request.stream:
|
|
121
|
+
return StreamingResponse(
|
|
122
|
+
_stream_test_response(),
|
|
123
|
+
media_type="text/event-stream",
|
|
124
|
+
)
|
|
125
|
+
return _create_test_response()
|
|
126
|
+
|
|
45
127
|
model_router = get_router()
|
|
46
128
|
|
|
47
129
|
# Translate Anthropic request to OpenAI format
|
|
@@ -227,3 +309,93 @@ async def stream_response(
|
|
|
227
309
|
},
|
|
228
310
|
}
|
|
229
311
|
yield f"event: error\ndata: {json.dumps(error_event)}\n\n"
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def _generate_display_name(model_id: str) -> str:
|
|
315
|
+
"""Generate a human-readable display name from model ID.
|
|
316
|
+
|
|
317
|
+
Transforms model IDs like 'github-copilot/claude-sonnet-4' into
|
|
318
|
+
'Claude Sonnet 4 (github-copilot)'.
|
|
319
|
+
"""
|
|
320
|
+
if "/" in model_id:
|
|
321
|
+
provider, model_name = model_id.split("/", 1)
|
|
322
|
+
else:
|
|
323
|
+
provider = ""
|
|
324
|
+
model_name = model_id
|
|
325
|
+
|
|
326
|
+
# Capitalize words and handle common patterns
|
|
327
|
+
words = model_name.replace("-", " ").replace("_", " ").split()
|
|
328
|
+
display_words = []
|
|
329
|
+
for word in words:
|
|
330
|
+
# Keep version numbers as-is
|
|
331
|
+
if word.replace(".", "").isdigit():
|
|
332
|
+
display_words.append(word)
|
|
333
|
+
else:
|
|
334
|
+
display_words.append(word.capitalize())
|
|
335
|
+
|
|
336
|
+
display_name = " ".join(display_words)
|
|
337
|
+
if provider:
|
|
338
|
+
display_name = f"{display_name} ({provider})"
|
|
339
|
+
|
|
340
|
+
return display_name
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
@router.get("/api/anthropic/v1/models")
|
|
344
|
+
async def list_models(
|
|
345
|
+
limit: int = 20,
|
|
346
|
+
after_id: str | None = None,
|
|
347
|
+
before_id: str | None = None,
|
|
348
|
+
) -> AnthropicModelList:
|
|
349
|
+
"""List available models in Anthropic format.
|
|
350
|
+
|
|
351
|
+
Args:
|
|
352
|
+
limit: Maximum number of models to return (default 20)
|
|
353
|
+
after_id: Return models after this ID (for forward pagination)
|
|
354
|
+
before_id: Return models before this ID (for backward pagination)
|
|
355
|
+
"""
|
|
356
|
+
model_router = get_router()
|
|
357
|
+
models = await model_router.list_models()
|
|
358
|
+
|
|
359
|
+
# Generate ISO 8601 timestamp for created_at
|
|
360
|
+
# Using current time since actual creation dates aren't tracked
|
|
361
|
+
created_at = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
362
|
+
|
|
363
|
+
# Convert to Anthropic format
|
|
364
|
+
anthropic_models = [
|
|
365
|
+
AnthropicModelInfo(
|
|
366
|
+
id=model.id,
|
|
367
|
+
created_at=created_at,
|
|
368
|
+
display_name=_generate_display_name(model.id),
|
|
369
|
+
type="model",
|
|
370
|
+
)
|
|
371
|
+
for model in models
|
|
372
|
+
]
|
|
373
|
+
|
|
374
|
+
# Handle pagination
|
|
375
|
+
start_idx = 0
|
|
376
|
+
if after_id:
|
|
377
|
+
for i, model in enumerate(anthropic_models):
|
|
378
|
+
if model.id == after_id:
|
|
379
|
+
start_idx = i + 1
|
|
380
|
+
break
|
|
381
|
+
|
|
382
|
+
end_idx = len(anthropic_models)
|
|
383
|
+
if before_id:
|
|
384
|
+
for i, model in enumerate(anthropic_models):
|
|
385
|
+
if model.id == before_id:
|
|
386
|
+
end_idx = i
|
|
387
|
+
break
|
|
388
|
+
|
|
389
|
+
# Apply limit
|
|
390
|
+
paginated = anthropic_models[start_idx : min(start_idx + limit, end_idx)]
|
|
391
|
+
|
|
392
|
+
first_id = paginated[0].id if paginated else None
|
|
393
|
+
last_id = paginated[-1].id if paginated else None
|
|
394
|
+
has_more = (start_idx + limit) < end_idx
|
|
395
|
+
|
|
396
|
+
return AnthropicModelList(
|
|
397
|
+
data=paginated,
|
|
398
|
+
first_id=first_id,
|
|
399
|
+
last_id=last_id,
|
|
400
|
+
has_more=has_more,
|
|
401
|
+
)
|
|
@@ -27,8 +27,7 @@ logger = get_logger("server.routes.chat")
|
|
|
27
27
|
router = APIRouter()
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
@router.post("/chat/completions")
|
|
31
|
-
@router.post("/v1/chat/completions")
|
|
30
|
+
@router.post("/api/openai/v1/chat/completions")
|
|
32
31
|
async def chat_completions(request: ChatCompletionRequest):
|
|
33
32
|
"""Handle chat completion requests."""
|
|
34
33
|
logger.info(
|