router-maestro 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,3 @@
1
1
  """Router-Maestro: Multi-model routing and load balancing system."""
2
2
 
3
- __version__ = "0.1.5"
3
+ __version__ = "0.1.7"
@@ -3,9 +3,11 @@
3
3
  import asyncio
4
4
  import json
5
5
  import shutil
6
+ import tomllib
6
7
  from datetime import datetime
7
8
  from pathlib import Path
8
9
 
10
+ import tomlkit
9
11
  import typer
10
12
  from rich.console import Console
11
13
  from rich.panel import Panel
@@ -24,6 +26,10 @@ CLI_TOOLS = {
24
26
  "name": "Claude Code",
25
27
  "description": "Generate settings.json for Claude Code CLI",
26
28
  },
29
+ "codex": {
30
+ "name": "OpenAI Codex",
31
+ "description": "Generate config.toml for OpenAI Codex CLI",
32
+ },
27
33
  }
28
34
 
29
35
 
@@ -35,6 +41,14 @@ def get_claude_code_paths() -> dict[str, Path]:
35
41
  }
36
42
 
37
43
 
44
+ def get_codex_paths() -> dict[str, Path]:
45
+ """Get Codex config paths."""
46
+ return {
47
+ "user": Path.home() / ".codex" / "config.toml",
48
+ "project": Path.cwd() / ".codex" / "config.toml",
49
+ }
50
+
51
+
38
52
  @app.callback(invoke_without_command=True)
39
53
  def config_callback(ctx: typer.Context) -> None:
40
54
  """Generate configuration for CLI tools (interactive selection if not specified)."""
@@ -60,6 +74,8 @@ def config_callback(ctx: typer.Context) -> None:
60
74
  # Dispatch to the appropriate command
61
75
  if tool_key == "claude-code":
62
76
  claude_code_config()
77
+ elif tool_key == "codex":
78
+ codex_config()
63
79
 
64
80
 
65
81
  @app.command(name="claude-code")
@@ -175,3 +191,113 @@ def claude_code_config() -> None:
175
191
  border_style="green",
176
192
  )
177
193
  )
194
+
195
+
196
+ @app.command(name="codex")
197
+ def codex_config() -> None:
198
+ """Generate OpenAI Codex CLI config.toml for router-maestro."""
199
+ # Step 1: Select level
200
+ console.print("\n[bold]Step 1: Select configuration level[/bold]")
201
+ console.print(" 1. User-level (~/.codex/config.toml)")
202
+ console.print(" 2. Project-level (./.codex/config.toml)")
203
+ choice = Prompt.ask("Select", choices=["1", "2"], default="1")
204
+
205
+ paths = get_codex_paths()
206
+ level = "user" if choice == "1" else "project"
207
+ config_path = paths[level]
208
+
209
+ # Step 2: Backup if exists
210
+ if config_path.exists():
211
+ console.print(f"\n[yellow]config.toml already exists at {config_path}[/yellow]")
212
+ if Confirm.ask("Backup existing file?", default=True):
213
+ backup_path = config_path.with_suffix(
214
+ f".toml.backup.{datetime.now().strftime('%Y%m%d_%H%M%S')}"
215
+ )
216
+ shutil.copy(config_path, backup_path)
217
+ console.print(f"[green]Backed up to {backup_path}[/green]")
218
+
219
+ # Step 3: Get models from server
220
+ try:
221
+ client = get_admin_client()
222
+ models = asyncio.run(client.list_models())
223
+ except ServerNotRunningError as e:
224
+ console.print(f"[red]{e}[/red]")
225
+ console.print("[dim]Tip: Start router-maestro server first.[/dim]")
226
+ raise typer.Exit(1)
227
+ except Exception as e:
228
+ console.print(f"[red]Error: {e}[/red]")
229
+ raise typer.Exit(1)
230
+
231
+ if not models:
232
+ console.print("[red]No models available. Please authenticate first.[/red]")
233
+ raise typer.Exit(1)
234
+
235
+ # Display models
236
+ console.print("\n[bold]Available models:[/bold]")
237
+ table = Table()
238
+ table.add_column("#", style="dim")
239
+ table.add_column("Model Key", style="green")
240
+ table.add_column("Name", style="white")
241
+ for i, model in enumerate(models, 1):
242
+ table.add_row(str(i), f"{model['provider']}/{model['id']}", model["name"])
243
+ console.print(table)
244
+
245
+ # Select model
246
+ console.print("\n[bold]Step 2: Select model[/bold]")
247
+ model_choice = Prompt.ask("Enter number (or 0 for auto-routing)", default="0")
248
+ selected_model = "router-maestro"
249
+ if model_choice != "0" and model_choice.isdigit():
250
+ idx = int(model_choice) - 1
251
+ if 0 <= idx < len(models):
252
+ m = models[idx]
253
+ selected_model = f"{m['provider']}/{m['id']}"
254
+
255
+ # Step 4: Generate config
256
+ client = get_admin_client()
257
+ base_url = (
258
+ client.endpoint.rstrip("/") if hasattr(client, "endpoint") else "http://localhost:8080"
259
+ )
260
+ openai_url = f"{base_url}/api/openai/v1"
261
+
262
+ # Load existing config to preserve other sections
263
+ existing_config: tomlkit.TOMLDocument = tomlkit.document()
264
+ if config_path.exists():
265
+ try:
266
+ with open(config_path, "rb") as f:
267
+ existing_config = tomlkit.load(f)
268
+ except (tomllib.TOMLDecodeError, OSError):
269
+ pass # If file is corrupted, start fresh
270
+
271
+ # Update configuration
272
+ existing_config["model"] = selected_model
273
+ existing_config["model_provider"] = "router-maestro"
274
+
275
+ # Create or update model_providers section
276
+ if "model_providers" not in existing_config:
277
+ existing_config["model_providers"] = tomlkit.table()
278
+
279
+ provider_config = tomlkit.table()
280
+ provider_config["name"] = "Router Maestro"
281
+ provider_config["base_url"] = openai_url
282
+ provider_config["env_key"] = "ROUTER_MAESTRO_API_KEY"
283
+ provider_config["wire_api"] = "responses"
284
+ existing_config["model_providers"]["router-maestro"] = provider_config
285
+
286
+ # Write config
287
+ config_path.parent.mkdir(parents=True, exist_ok=True)
288
+ with open(config_path, "w", encoding="utf-8") as f:
289
+ f.write(tomlkit.dumps(existing_config))
290
+
291
+ console.print(
292
+ Panel(
293
+ f"[green]Created {config_path}[/green]\n\n"
294
+ f"Model: {selected_model}\n\n"
295
+ f"Endpoint: {openai_url}\n\n"
296
+ "[dim]Start router-maestro server before using Codex:[/dim]\n"
297
+ " router-maestro server start\n\n"
298
+ "[dim]Set API key environment variable (optional):[/dim]\n"
299
+ " export ROUTER_MAESTRO_API_KEY=your-key",
300
+ title="Success",
301
+ border_style="green",
302
+ )
303
+ )
@@ -9,6 +9,10 @@ from router_maestro.providers.base import (
9
9
  Message,
10
10
  ModelInfo,
11
11
  ProviderError,
12
+ ResponsesRequest,
13
+ ResponsesResponse,
14
+ ResponsesStreamChunk,
15
+ ResponsesToolCall,
12
16
  )
13
17
  from router_maestro.providers.copilot import CopilotProvider
14
18
  from router_maestro.providers.openai import OpenAIProvider
@@ -23,6 +27,10 @@ __all__ = [
23
27
  "ChatResponse",
24
28
  "ChatStreamChunk",
25
29
  "ModelInfo",
30
+ "ResponsesRequest",
31
+ "ResponsesResponse",
32
+ "ResponsesStreamChunk",
33
+ "ResponsesToolCall",
26
34
  # Providers
27
35
  "CopilotProvider",
28
36
  "OpenAIProvider",
@@ -59,6 +59,53 @@ class ModelInfo:
59
59
  provider: str
60
60
 
61
61
 
62
+ @dataclass
63
+ class ResponsesToolCall:
64
+ """A tool/function call from the Responses API."""
65
+
66
+ call_id: str
67
+ name: str
68
+ arguments: str
69
+
70
+
71
+ @dataclass
72
+ class ResponsesRequest:
73
+ """Request for the Responses API (used by Codex models)."""
74
+
75
+ model: str
76
+ input: str | list # Can be string or list of message dicts
77
+ stream: bool = False
78
+ instructions: str | None = None
79
+ temperature: float = 1.0
80
+ max_output_tokens: int | None = None
81
+ # Tool support
82
+ tools: list[dict] | None = None
83
+ tool_choice: str | dict | None = None
84
+ parallel_tool_calls: bool | None = None
85
+
86
+
87
+ @dataclass
88
+ class ResponsesResponse:
89
+ """Response from the Responses API."""
90
+
91
+ content: str
92
+ model: str
93
+ usage: dict | None = None
94
+ tool_calls: list[ResponsesToolCall] | None = None
95
+
96
+
97
+ @dataclass
98
+ class ResponsesStreamChunk:
99
+ """A chunk from streaming Responses API completion."""
100
+
101
+ content: str
102
+ finish_reason: str | None = None
103
+ usage: dict | None = None
104
+ # Tool call support
105
+ tool_call: ResponsesToolCall | None = None # A complete tool call
106
+ tool_call_delta: dict | None = None # Partial tool call for streaming
107
+
108
+
62
109
  class ProviderError(Exception):
63
110
  """Error from a provider."""
64
111
 
@@ -121,3 +168,36 @@ class BaseProvider(ABC):
121
168
  Override this for providers that need token refresh.
122
169
  """
123
170
  pass
171
+
172
+ async def responses_completion(self, request: ResponsesRequest) -> ResponsesResponse:
173
+ """Generate a Responses API completion (for Codex models).
174
+
175
+ Args:
176
+ request: Responses completion request
177
+
178
+ Returns:
179
+ Responses completion response
180
+
181
+ Raises:
182
+ NotImplementedError: If provider does not support Responses API
183
+ """
184
+ raise NotImplementedError("Provider does not support Responses API")
185
+
186
+ async def responses_completion_stream(
187
+ self, request: ResponsesRequest
188
+ ) -> AsyncIterator[ResponsesStreamChunk]:
189
+ """Generate a streaming Responses API completion (for Codex models).
190
+
191
+ Args:
192
+ request: Responses completion request
193
+
194
+ Yields:
195
+ Responses completion chunks
196
+
197
+ Raises:
198
+ NotImplementedError: If provider does not support Responses API
199
+ """
200
+ raise NotImplementedError("Provider does not support Responses API")
201
+ # Make this a generator (required for type checking)
202
+ if False:
203
+ yield ResponsesStreamChunk(content="")
@@ -7,6 +7,7 @@ import httpx
7
7
 
8
8
  from router_maestro.auth import AuthManager, AuthType
9
9
  from router_maestro.auth.github_oauth import get_copilot_token
10
+ from router_maestro.auth.storage import OAuthCredential
10
11
  from router_maestro.providers.base import (
11
12
  BaseProvider,
12
13
  ChatRequest,
@@ -14,6 +15,10 @@ from router_maestro.providers.base import (
14
15
  ChatStreamChunk,
15
16
  ModelInfo,
16
17
  ProviderError,
18
+ ResponsesRequest,
19
+ ResponsesResponse,
20
+ ResponsesStreamChunk,
21
+ ResponsesToolCall,
17
22
  )
18
23
  from router_maestro.utils import get_logger
19
24
 
@@ -22,6 +27,7 @@ logger = get_logger("providers.copilot")
22
27
  COPILOT_BASE_URL = "https://api.githubcopilot.com"
23
28
  COPILOT_CHAT_URL = f"{COPILOT_BASE_URL}/chat/completions"
24
29
  COPILOT_MODELS_URL = f"{COPILOT_BASE_URL}/models"
30
+ COPILOT_RESPONSES_URL = f"{COPILOT_BASE_URL}/responses"
25
31
 
26
32
  # Model cache TTL in seconds (5 minutes)
27
33
  MODELS_CACHE_TTL = 300
@@ -50,7 +56,7 @@ class CopilotProvider(BaseProvider):
50
56
  async def ensure_token(self) -> None:
51
57
  """Ensure we have a valid Copilot token, refreshing if needed."""
52
58
  cred = self.auth_manager.get_credential("github-copilot")
53
- if not cred or cred.type != AuthType.OAUTH:
59
+ if not cred or not isinstance(cred, OAuthCredential):
54
60
  logger.error("Not authenticated with GitHub Copilot")
55
61
  raise ProviderError("Not authenticated with GitHub Copilot", status_code=401)
56
62
 
@@ -344,3 +350,318 @@ class CopilotProvider(BaseProvider):
344
350
  return self._models_cache
345
351
  logger.error("Failed to list Copilot models: %s", e)
346
352
  raise ProviderError(f"Failed to list models: {e}", retryable=True)
353
+
354
+ # Tools that are not supported by Copilot Responses API
355
+ UNSUPPORTED_TOOL_TYPES = {"web_search", "web_search_preview", "code_interpreter"}
356
+
357
+ def _filter_unsupported_tools(self, tools: list[dict] | None) -> list[dict] | None:
358
+ """Filter out tools that are not supported by Copilot API.
359
+
360
+ Args:
361
+ tools: List of tool definitions
362
+
363
+ Returns:
364
+ Filtered list of tools, or None if empty
365
+ """
366
+ if not tools:
367
+ return None
368
+
369
+ filtered = []
370
+ for tool in tools:
371
+ tool_type = tool.get("type", "function")
372
+ # Only include function tools, filter out unsupported built-in tools
373
+ if tool_type == "function":
374
+ filtered.append(tool)
375
+ elif tool_type not in self.UNSUPPORTED_TOOL_TYPES:
376
+ filtered.append(tool)
377
+ else:
378
+ logger.debug("Filtering out unsupported tool type: %s", tool_type)
379
+
380
+ return filtered if filtered else None
381
+
382
+ def _build_responses_payload(self, request: ResponsesRequest) -> dict:
383
+ """Build payload for Responses API request.
384
+
385
+ Args:
386
+ request: The responses request
387
+
388
+ Returns:
389
+ Payload dictionary for the API
390
+ """
391
+ payload: dict = {
392
+ "model": request.model,
393
+ "input": request.input,
394
+ "stream": request.stream,
395
+ }
396
+ if request.instructions:
397
+ payload["instructions"] = request.instructions
398
+ if request.temperature != 1.0:
399
+ payload["temperature"] = request.temperature
400
+ if request.max_output_tokens:
401
+ payload["max_output_tokens"] = request.max_output_tokens
402
+ # Tool support - filter out unsupported tools
403
+ filtered_tools = self._filter_unsupported_tools(request.tools)
404
+ if filtered_tools:
405
+ payload["tools"] = filtered_tools
406
+ if request.tool_choice:
407
+ payload["tool_choice"] = request.tool_choice
408
+ if request.parallel_tool_calls is not None:
409
+ payload["parallel_tool_calls"] = request.parallel_tool_calls
410
+ return payload
411
+
412
+ def _extract_response_content(self, data: dict) -> str:
413
+ """Extract text content from Responses API response.
414
+
415
+ Args:
416
+ data: The response JSON data
417
+
418
+ Returns:
419
+ The extracted text content
420
+ """
421
+ content = ""
422
+ for output in data.get("output", []):
423
+ if output.get("type") == "message":
424
+ for content_item in output.get("content", []):
425
+ if content_item.get("type") == "output_text":
426
+ content += content_item.get("text", "")
427
+ return content
428
+
429
+ def _extract_tool_calls(self, data: dict) -> list[ResponsesToolCall]:
430
+ """Extract tool calls from Responses API response.
431
+
432
+ Args:
433
+ data: The response JSON data
434
+
435
+ Returns:
436
+ List of tool calls
437
+ """
438
+ tool_calls = []
439
+ for output in data.get("output", []):
440
+ if output.get("type") == "function_call":
441
+ tool_calls.append(
442
+ ResponsesToolCall(
443
+ call_id=output.get("call_id", ""),
444
+ name=output.get("name", ""),
445
+ arguments=output.get("arguments", "{}"),
446
+ )
447
+ )
448
+ return tool_calls
449
+
450
+ async def responses_completion(self, request: ResponsesRequest) -> ResponsesResponse:
451
+ """Generate a Responses API completion via Copilot (for Codex models)."""
452
+ await self.ensure_token()
453
+
454
+ payload = self._build_responses_payload(request)
455
+
456
+ logger.debug("Copilot responses completion: model=%s", request.model)
457
+ client = self._get_client()
458
+ try:
459
+ response = await client.post(
460
+ COPILOT_RESPONSES_URL,
461
+ json=payload,
462
+ headers=self._get_headers(),
463
+ )
464
+ response.raise_for_status()
465
+ data = response.json()
466
+
467
+ content = self._extract_response_content(data)
468
+ tool_calls = self._extract_tool_calls(data)
469
+
470
+ usage = None
471
+ if "usage" in data:
472
+ usage = data["usage"]
473
+
474
+ logger.debug("Copilot responses completion successful")
475
+ return ResponsesResponse(
476
+ content=content,
477
+ model=data.get("model", request.model),
478
+ usage=usage,
479
+ tool_calls=tool_calls if tool_calls else None,
480
+ )
481
+ except httpx.HTTPStatusError as e:
482
+ retryable = e.response.status_code in (429, 500, 502, 503, 504)
483
+ try:
484
+ error_body = e.response.text
485
+ except Exception:
486
+ error_body = ""
487
+ logger.error(
488
+ "Copilot responses API error: %d - %s",
489
+ e.response.status_code,
490
+ error_body[:200],
491
+ )
492
+ raise ProviderError(
493
+ f"Copilot API error: {e.response.status_code} - {error_body}",
494
+ status_code=e.response.status_code,
495
+ retryable=retryable,
496
+ )
497
+ except httpx.HTTPError as e:
498
+ logger.error("Copilot responses HTTP error: %s", e)
499
+ raise ProviderError(f"HTTP error: {e}", retryable=True)
500
+
501
+ async def responses_completion_stream(
502
+ self, request: ResponsesRequest
503
+ ) -> AsyncIterator[ResponsesStreamChunk]:
504
+ """Generate a streaming Responses API completion via Copilot (for Codex models)."""
505
+ await self.ensure_token()
506
+
507
+ payload = self._build_responses_payload(request)
508
+ payload["stream"] = True
509
+
510
+ logger.debug("Copilot streaming responses: model=%s", request.model)
511
+ logger.debug("Copilot responses payload: %s", payload)
512
+ client = self._get_client()
513
+ try:
514
+ async with client.stream(
515
+ "POST",
516
+ COPILOT_RESPONSES_URL,
517
+ json=payload,
518
+ headers=self._get_headers(),
519
+ ) as response:
520
+ # Check for errors before processing stream
521
+ if response.status_code >= 400:
522
+ # Read the error body before the context closes
523
+ error_body = await response.aread()
524
+ error_text = error_body.decode("utf-8", errors="replace")
525
+ logger.error(
526
+ "Copilot responses stream API error: %d - %s",
527
+ response.status_code,
528
+ error_text,
529
+ )
530
+ retryable = response.status_code in (429, 500, 502, 503, 504)
531
+ raise ProviderError(
532
+ f"Copilot API error: {response.status_code} - {error_text}",
533
+ status_code=response.status_code,
534
+ retryable=retryable,
535
+ )
536
+
537
+ stream_finished = False
538
+ final_usage = None
539
+ # Track current function call being streamed
540
+ current_fc: dict | None = None
541
+
542
+ async for line in response.aiter_lines():
543
+ if stream_finished:
544
+ break
545
+
546
+ if not line or not line.startswith("data: "):
547
+ continue
548
+
549
+ data_str = line[6:] # Remove "data: " prefix
550
+ if data_str == "[DONE]":
551
+ # Stream ended, emit final chunk if we haven't already
552
+ if not stream_finished:
553
+ yield ResponsesStreamChunk(
554
+ content="",
555
+ finish_reason="stop",
556
+ usage=final_usage,
557
+ )
558
+ stream_finished = True
559
+ break
560
+
561
+ import json
562
+
563
+ data = json.loads(data_str)
564
+ event_type = data.get("type", "")
565
+
566
+ # Handle text delta events
567
+ if event_type == "response.output_text.delta":
568
+ delta_text = data.get("delta", "")
569
+ if delta_text:
570
+ yield ResponsesStreamChunk(content=delta_text)
571
+
572
+ # Handle function call output_item.added - start of a new function call
573
+ elif event_type == "response.output_item.added":
574
+ item = data.get("item", {})
575
+ if item.get("type") == "function_call":
576
+ current_fc = {
577
+ "id": item.get("id", ""),
578
+ "call_id": item.get("call_id", ""),
579
+ "name": item.get("name", ""),
580
+ "arguments": "",
581
+ "output_index": data.get("output_index", 0),
582
+ }
583
+
584
+ # Handle function call arguments delta
585
+ elif event_type == "response.function_call_arguments.delta":
586
+ delta = data.get("delta", "")
587
+ if current_fc and delta:
588
+ current_fc["arguments"] += delta
589
+ # Emit delta event for streaming
590
+ yield ResponsesStreamChunk(
591
+ content="",
592
+ tool_call_delta={
593
+ "type": "function_call_arguments_delta",
594
+ "item_id": current_fc["id"],
595
+ "call_id": current_fc["call_id"],
596
+ "name": current_fc["name"],
597
+ "output_index": current_fc["output_index"],
598
+ "delta": delta,
599
+ },
600
+ )
601
+
602
+ # Handle function call arguments done
603
+ elif event_type == "response.function_call_arguments.done":
604
+ if current_fc:
605
+ current_fc["arguments"] = data.get("arguments", current_fc["arguments"])
606
+ # Emit complete tool call
607
+ yield ResponsesStreamChunk(
608
+ content="",
609
+ tool_call=ResponsesToolCall(
610
+ call_id=current_fc["call_id"],
611
+ name=current_fc["name"],
612
+ arguments=current_fc["arguments"],
613
+ ),
614
+ )
615
+ current_fc = None
616
+
617
+ # Handle output_item.done for function calls
618
+ elif event_type == "response.output_item.done":
619
+ item = data.get("item", {})
620
+ if item.get("type") == "function_call":
621
+ # Emit complete tool call if not already done
622
+ yield ResponsesStreamChunk(
623
+ content="",
624
+ tool_call=ResponsesToolCall(
625
+ call_id=item.get("call_id", ""),
626
+ name=item.get("name", ""),
627
+ arguments=item.get("arguments", "{}"),
628
+ ),
629
+ )
630
+ current_fc = None
631
+
632
+ # Handle done event to get final usage
633
+ elif event_type == "response.done":
634
+ resp = data.get("response", {})
635
+ final_usage = resp.get("usage")
636
+ yield ResponsesStreamChunk(
637
+ content="",
638
+ finish_reason="stop",
639
+ usage=final_usage,
640
+ )
641
+ stream_finished = True
642
+
643
+ # Handle completed events
644
+ elif event_type == "response.completed":
645
+ # Final response received - emit finish chunk
646
+ resp = data.get("response", {})
647
+ if not final_usage:
648
+ final_usage = resp.get("usage")
649
+ yield ResponsesStreamChunk(
650
+ content="",
651
+ finish_reason="stop",
652
+ usage=final_usage,
653
+ )
654
+ stream_finished = True
655
+
656
+ # If stream ended without explicit completion event, emit final chunk
657
+ if not stream_finished:
658
+ logger.debug("Stream ended without completion event, emitting final chunk")
659
+ yield ResponsesStreamChunk(
660
+ content="",
661
+ finish_reason="stop",
662
+ usage=final_usage,
663
+ )
664
+
665
+ except httpx.HTTPError as e:
666
+ logger.error("Copilot responses stream HTTP error: %s", e)
667
+ raise ProviderError(f"HTTP error: {e}", retryable=True)