claude-code-llm-router 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_code_llm_router-0.2.0.dist-info/METADATA +516 -0
- claude_code_llm_router-0.2.0.dist-info/RECORD +22 -0
- claude_code_llm_router-0.2.0.dist-info/WHEEL +4 -0
- claude_code_llm_router-0.2.0.dist-info/entry_points.txt +3 -0
- claude_code_llm_router-0.2.0.dist-info/licenses/LICENSE +21 -0
- llm_router/__init__.py +3 -0
- llm_router/classifier.py +145 -0
- llm_router/claude_usage.py +303 -0
- llm_router/codex_agent.py +117 -0
- llm_router/config.py +111 -0
- llm_router/cost.py +300 -0
- llm_router/health.py +84 -0
- llm_router/media.py +375 -0
- llm_router/model_selector.py +98 -0
- llm_router/onboard.py +124 -0
- llm_router/orchestrator.py +210 -0
- llm_router/profiles.py +184 -0
- llm_router/provider_budget.py +168 -0
- llm_router/providers.py +80 -0
- llm_router/router.py +177 -0
- llm_router/server.py +1389 -0
- llm_router/types.py +242 -0
|
@@ -0,0 +1,516 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: claude-code-llm-router
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Multi-LLM router MCP server for Claude Code — smart complexity routing, Claude subscription monitoring, Codex integration, 20+ providers
|
|
5
|
+
Project-URL: Homepage, https://github.com/ypollak2/llm-router
|
|
6
|
+
Project-URL: Repository, https://github.com/ypollak2/llm-router
|
|
7
|
+
Project-URL: Issues, https://github.com/ypollak2/llm-router/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/ypollak2/llm-router/blob/main/CHANGELOG.md
|
|
9
|
+
Author-email: ypollak2 <ypollak2@users.noreply.github.com>
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: ai,claude,claude-code,gemini,litellm,llm,mcp,openai,router
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
23
|
+
Requires-Python: <3.14,>=3.10
|
|
24
|
+
Requires-Dist: aiosqlite>=0.20.0
|
|
25
|
+
Requires-Dist: litellm>=1.50.0
|
|
26
|
+
Requires-Dist: mcp>=1.0.0
|
|
27
|
+
Requires-Dist: pydantic-settings>=2.0
|
|
28
|
+
Requires-Dist: pydantic>=2.0
|
|
29
|
+
Provides-Extra: dev
|
|
30
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
32
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
|
|
35
|
+
<p align="center">
|
|
36
|
+
<img src="docs/logo.svg" alt="LLM Router" width="120" />
|
|
37
|
+
</p>
|
|
38
|
+
|
|
39
|
+
<h1 align="center">LLM Router</h1>
|
|
40
|
+
|
|
41
|
+
<p align="center">
|
|
42
|
+
<strong>One MCP server. Every AI model. Smart routing.</strong>
|
|
43
|
+
</p>
|
|
44
|
+
|
|
45
|
+
<p align="center">
|
|
46
|
+
Route text, image, video, and audio tasks to 20+ AI providers — automatically picking the best model for the job based on your budget and active profile.
|
|
47
|
+
</p>
|
|
48
|
+
|
|
49
|
+
<p align="center">
|
|
50
|
+
<a href="#quick-start">Quick Start</a> •
|
|
51
|
+
<a href="#how-it-works">How It Works</a> •
|
|
52
|
+
<a href="#providers">Providers</a> •
|
|
53
|
+
<a href="#routing-profiles">Profiles</a> •
|
|
54
|
+
<a href="#budget-control">Budget Control</a> •
|
|
55
|
+
<a href="docs/PROVIDERS.md">Provider Setup</a>
|
|
56
|
+
</p>
|
|
57
|
+
|
|
58
|
+
<p align="center">
|
|
59
|
+
<a href="https://github.com/ypollak2/llm-router/actions"><img src="https://img.shields.io/github/actions/workflow/status/ypollak2/llm-router/ci.yml?style=flat-square&label=tests" alt="Tests"></a>
|
|
60
|
+
<a href="https://github.com/ypollak2/llm-router/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-green?style=flat-square" alt="MIT License"></a>
|
|
61
|
+
<img src="https://img.shields.io/badge/python-3.10%E2%80%933.13-blue?style=flat-square" alt="Python">
|
|
62
|
+
<img src="https://img.shields.io/badge/MCP-1.0+-purple?style=flat-square" alt="MCP">
|
|
63
|
+
<img src="https://img.shields.io/badge/providers-20+-orange?style=flat-square" alt="Providers">
|
|
64
|
+
</p>
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
## The Problem
|
|
69
|
+
|
|
70
|
+
You use Claude Code (or any MCP client). You also have access to GPT-4o, Gemini, Perplexity, DALL-E, Runway, ElevenLabs — but switching between them is manual, slow, and expensive.
|
|
71
|
+
|
|
72
|
+
**LLM Router** gives your AI assistant one unified interface to all of them — and it automatically picks the right one based on what you're doing and what you can afford.
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
You: "Research the latest AI funding rounds"
|
|
76
|
+
Router: → Perplexity Sonar Pro (search-augmented, best for current facts)
|
|
77
|
+
|
|
78
|
+
You: "Generate a hero image for the landing page"
|
|
79
|
+
Router: → Flux Pro via fal.ai (best quality/cost for images)
|
|
80
|
+
|
|
81
|
+
You: "Write unit tests for the auth module"
|
|
82
|
+
Router: → Claude Sonnet (top coding model, within budget)
|
|
83
|
+
|
|
84
|
+
You: "Create a 5-second product demo clip"
|
|
85
|
+
Router: → Kling 2.0 via fal.ai (best value for short video)
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Why It Saves 40–70%
|
|
89
|
+
|
|
90
|
+
Most AI tasks don't need the most powerful model. The router matches complexity to capability automatically:
|
|
91
|
+
|
|
92
|
+
<p align="center">
|
|
93
|
+
<img src="docs/images/savings.svg" alt="Task Distribution" width="400" />
|
|
94
|
+
</p>
|
|
95
|
+
|
|
96
|
+
| | Without Router | With Router |
|
|
97
|
+
|--|---------------|-------------|
|
|
98
|
+
| Simple tasks (60%) | Opus $$$$ | Haiku $ |
|
|
99
|
+
| Moderate tasks (30%) | Opus $$$$ | Sonnet $$ |
|
|
100
|
+
| Complex tasks (10%) | Opus $$$$ | Opus $$$$ |
|
|
101
|
+
| **Monthly estimate** | **~$50** | **~$15–20** |
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## Quick Start
|
|
106
|
+
|
|
107
|
+
### Option A: Claude Code Plugin (Recommended)
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
claude plugin add ypollak2/llm-router
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Option B: Manual Install
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
git clone https://github.com/ypollak2/llm-router.git
|
|
117
|
+
cd llm-router
|
|
118
|
+
uv sync
|
|
119
|
+
./scripts/install.sh # registers as MCP server in Claude Code
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Get Running in 3 Steps
|
|
123
|
+
|
|
124
|
+
<p align="center">
|
|
125
|
+
<img src="docs/images/quickstart.svg" alt="Quick Start" width="700" />
|
|
126
|
+
</p>
|
|
127
|
+
|
|
128
|
+
> **Start for free**: Google's Gemini API has a [free tier](https://aistudio.google.com/apikey) with 1M tokens/day — no credit card needed. [Groq](https://console.groq.com/keys) also offers a generous free tier with ultra-fast inference.
|
|
129
|
+
|
|
130
|
+
### What You Get
|
|
131
|
+
|
|
132
|
+
- **20 MCP tools** — Smart routing, text, image, video, audio, setup, usage monitoring
|
|
133
|
+
- **`/route` skill** — Smart task classification and routing in one command
|
|
134
|
+
- **Smart classifier** — Auto-picks Claude Haiku/Sonnet/Opus based on complexity
|
|
135
|
+
- **Claude subscription monitoring** — Live session/weekly usage from claude.ai
|
|
136
|
+
- **Codex desktop integration** — Route tasks to local OpenAI Codex (free)
|
|
137
|
+
- **LLM Orchestrator agent** — Autonomous multi-step task decomposition across models
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
## How It Works
|
|
142
|
+
|
|
143
|
+
### Architecture
|
|
144
|
+
|
|
145
|
+
<p align="center">
|
|
146
|
+
<img src="docs/images/architecture.svg" alt="Architecture" width="700" />
|
|
147
|
+
</p>
|
|
148
|
+
|
|
149
|
+
### Routing Decision Flow
|
|
150
|
+
|
|
151
|
+
<p align="center">
|
|
152
|
+
<img src="docs/images/routing-flow.svg" alt="Routing Flow" width="600" />
|
|
153
|
+
</p>
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## Smart Routing (Claude Code Models)
|
|
158
|
+
|
|
159
|
+
Use Claude Code's own models (Haiku/Sonnet/Opus) **without extra API keys** via the smart classifier:
|
|
160
|
+
|
|
161
|
+
```
|
|
162
|
+
llm_classify("What is the capital of France?")
|
|
163
|
+
→ [S] simple (99%) → haiku
|
|
164
|
+
|
|
165
|
+
llm_classify("Write a REST API with auth and pagination")
|
|
166
|
+
→ [M] moderate (98%) → sonnet
|
|
167
|
+
|
|
168
|
+
llm_classify("Design a distributed CQRS architecture")
|
|
169
|
+
→ [C] complex (85%) → opus
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
### Complexity-First Routing
|
|
173
|
+
|
|
174
|
+
Complexity drives model selection — this is the real savings mechanism. You don't need opus for "what time is it?" and you don't want haiku for architecture design. Budget pressure is a late safety net, not the primary router.
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
# In .env
|
|
178
|
+
QUALITY_MODE=balanced # best | balanced | conserve
|
|
179
|
+
MIN_MODEL=haiku # floor: never route below this
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
| Claude Usage | Effect |
|
|
183
|
+
|-------------|--------|
|
|
184
|
+
| 0-85% | No downshift — complexity routing handles efficiency |
|
|
185
|
+
| 85-95% | Downshift by 1 tier + suggest external fallback |
|
|
186
|
+
| 95%+ | Downshift by 2 tiers + recommend external (Codex, OpenAI, Gemini) |
|
|
187
|
+
|
|
188
|
+
Budget pressure comes from **real Claude subscription data** (session %, weekly %) fetched live from claude.ai. The router also factors in **time until session reset** — if you're at 90% but the session resets in 5 minutes, no downshift needed.
|
|
189
|
+
|
|
190
|
+
### External Fallback
|
|
191
|
+
|
|
192
|
+
When Claude quota is tight (85%+), the router ranks available external models:
|
|
193
|
+
|
|
194
|
+
```
|
|
195
|
+
llm_classify("Design auth architecture")
|
|
196
|
+
# -> complex -> sonnet (downshifted from opus)
|
|
197
|
+
# pressure: [========..] 90%
|
|
198
|
+
# >> fallback: codex/gpt-5.4 (free, preserves Claude quota)
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
- **Codex (local)**: Free — uses your OpenAI desktop subscription
|
|
202
|
+
- **OpenAI API**: GPT-4o, o3 (ranked by quality, filtered by budget)
|
|
203
|
+
- **Gemini API**: gemini-2.5-pro, gemini-2.5-flash
|
|
204
|
+
|
|
205
|
+
Per-provider budgets via `LLM_ROUTER_BUDGET_OPENAI=10.00`, `LLM_ROUTER_BUDGET_GEMINI=5.00`.
|
|
206
|
+
|
|
207
|
+
### Claude Subscription Monitoring
|
|
208
|
+
|
|
209
|
+
Live usage data from your claude.ai account — no guessing:
|
|
210
|
+
|
|
211
|
+
```
|
|
212
|
+
+----------------------------------------------------------+
|
|
213
|
+
| Claude Subscription (Live) |
|
|
214
|
+
+----------------------------------------------------------+
|
|
215
|
+
| Session [====........] 35% resets in 3h 7m |
|
|
216
|
+
| Weekly (all) [===.........] 23% resets Fri 01:00 PM |
|
|
217
|
+
| Sonnet only [===.........] 26% resets Wed 10:00 AM |
|
|
218
|
+
+----------------------------------------------------------+
|
|
219
|
+
| OK 35% pressure -- full model selection |
|
|
220
|
+
+----------------------------------------------------------+
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
Fetched via Playwright from claude.ai's internal JSON API (same data the settings page uses). One `browser_evaluate` call, cached in memory for routing decisions.
|
|
224
|
+
|
|
225
|
+
---
|
|
226
|
+
|
|
227
|
+
## Providers
|
|
228
|
+
|
|
229
|
+
### Text & Code LLMs
|
|
230
|
+
|
|
231
|
+
| Provider | Models | Free Tier | Best For |
|
|
232
|
+
|----------|--------|-----------|----------|
|
|
233
|
+
| **Google Gemini** | 2.5 Pro, 2.5 Flash | **Yes** (1M tokens/day) | Generation, long context |
|
|
234
|
+
| **Groq** | Llama 3.3, Mixtral | **Yes** | Ultra-fast inference |
|
|
235
|
+
| **OpenAI** | GPT-4o, GPT-4o-mini, o3 | No | Code, analysis, reasoning |
|
|
236
|
+
| **Perplexity** | Sonar, Sonar Pro | No | Research, current events |
|
|
237
|
+
| **Anthropic** | Claude Sonnet, Haiku | No | Nuanced writing, safety |
|
|
238
|
+
| **Deepseek** | V3, Reasoner | Yes (limited) | Cost-effective reasoning |
|
|
239
|
+
| **Mistral** | Large, Small | Yes (limited) | Multilingual |
|
|
240
|
+
| **Together** | Llama 3, CodeLlama | Yes (limited) | Open-source models |
|
|
241
|
+
| **xAI** | Grok 3 | No | Real-time information |
|
|
242
|
+
| **Cohere** | Command R+ | Yes (trial) | RAG, enterprise search |
|
|
243
|
+
|
|
244
|
+
### Image Generation
|
|
245
|
+
|
|
246
|
+
| Provider | Models | Best For |
|
|
247
|
+
|----------|--------|----------|
|
|
248
|
+
| **Google Gemini** | Imagen 3 | High quality, integrated with text models |
|
|
249
|
+
| **fal.ai** | Flux Pro, Flux Dev | Quality/cost ratio, fast generation |
|
|
250
|
+
| **OpenAI** | DALL-E 3, DALL-E 2 | Prompt adherence, text in images |
|
|
251
|
+
| **Stability AI** | Stable Diffusion 3 | Fine control, open weights |
|
|
252
|
+
|
|
253
|
+
### Video Generation
|
|
254
|
+
|
|
255
|
+
| Provider | Models | Best For |
|
|
256
|
+
|----------|--------|----------|
|
|
257
|
+
| **Google Gemini** | Veo 2 | Integrated with Gemini ecosystem |
|
|
258
|
+
| **Runway** | Gen-3 Alpha | Professional quality, motion control |
|
|
259
|
+
| **fal.ai** | Kling, minimax | Value, fast generation |
|
|
260
|
+
| **Replicate** | Various | Open-source video models |
|
|
261
|
+
|
|
262
|
+
### Audio & Voice
|
|
263
|
+
|
|
264
|
+
| Provider | Models | Best For |
|
|
265
|
+
|----------|--------|----------|
|
|
266
|
+
| **ElevenLabs** | Multilingual v2 | Voice cloning, highest quality |
|
|
267
|
+
| **OpenAI** | TTS-1, TTS-1-HD | Cost-effective text-to-speech |
|
|
268
|
+
|
|
269
|
+
> **20+ providers and growing.** See [docs/PROVIDERS.md](docs/PROVIDERS.md) for full setup guides with API key links.
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
273
|
+
## MCP Tools
|
|
274
|
+
|
|
275
|
+
Once installed, Claude Code gets these 20 tools:
|
|
276
|
+
|
|
277
|
+
| Tool | What It Does |
|
|
278
|
+
|------|-------------|
|
|
279
|
+
| **Smart Routing** | |
|
|
280
|
+
| `llm_classify` | Classify complexity + recommend model with time-aware budget pressure |
|
|
281
|
+
| `llm_route` | Auto-classify, then route to the best external LLM |
|
|
282
|
+
| `llm_track_usage` | Report Claude Code token usage for budget tracking |
|
|
283
|
+
| **Text & Code** | |
|
|
284
|
+
| `llm_query` | General questions — auto-routed to the best text LLM |
|
|
285
|
+
| `llm_research` | Search-augmented answers via Perplexity |
|
|
286
|
+
| `llm_generate` | Creative content — writing, summaries, brainstorming |
|
|
287
|
+
| `llm_analyze` | Deep reasoning — analysis, debugging, problem decomposition |
|
|
288
|
+
| `llm_code` | Coding tasks — generation, refactoring, algorithms |
|
|
289
|
+
| **Media** | |
|
|
290
|
+
| `llm_image` | Image generation — Gemini Imagen, DALL-E, Flux, or SD |
|
|
291
|
+
| `llm_video` | Video generation — Gemini Veo, Runway, Kling, etc. |
|
|
292
|
+
| `llm_audio` | Voice/audio — TTS via ElevenLabs or OpenAI |
|
|
293
|
+
| **Orchestration** | |
|
|
294
|
+
| `llm_orchestrate` | Multi-step pipelines across multiple models |
|
|
295
|
+
| `llm_pipeline_templates` | List available orchestration templates |
|
|
296
|
+
| **Monitoring & Setup** | |
|
|
297
|
+
| `llm_check_usage` | Check live Claude subscription usage (session %, weekly %) |
|
|
298
|
+
| `llm_update_usage` | Feed live usage data from claude.ai into the router |
|
|
299
|
+
| `llm_codex` | Route tasks to local Codex desktop agent (free, uses OpenAI sub) |
|
|
300
|
+
| `llm_setup` | Discover API keys, add providers, get setup guides |
|
|
301
|
+
| `llm_set_profile` | Switch routing profile (budget / balanced / premium) |
|
|
302
|
+
| `llm_usage` | Unified dashboard — Claude sub, Codex, APIs, savings in one view |
|
|
303
|
+
| `llm_health` | Check provider availability and circuit breaker status |
|
|
304
|
+
| `llm_providers` | List all supported and configured providers |
|
|
305
|
+
|
|
306
|
+
---
|
|
307
|
+
|
|
308
|
+
## Routing Profiles
|
|
309
|
+
|
|
310
|
+
<p align="center">
|
|
311
|
+
<img src="docs/images/profiles.svg" alt="Routing Profiles" width="700" />
|
|
312
|
+
</p>
|
|
313
|
+
|
|
314
|
+
Three built-in profiles control the cost/quality tradeoff:
|
|
315
|
+
|
|
316
|
+
| | Budget | Balanced | Premium |
|
|
317
|
+
|--|--------|----------|---------|
|
|
318
|
+
| **Text** | Gemini Flash, GPT-4o-mini | GPT-4o, Claude Sonnet | o3, Claude Opus |
|
|
319
|
+
| **Research** | Perplexity Sonar | Sonar Pro | Sonar Pro |
|
|
320
|
+
| **Code** | Deepseek, Gemini Flash | Claude Sonnet, GPT-4o | Claude Opus, o3 |
|
|
321
|
+
| **Image** | Flux Dev, Imagen 3 Fast | Flux Pro, Imagen 3, DALL-E 3 | Imagen 3, DALL-E 3 |
|
|
322
|
+
| **Video** | minimax, Veo 2 | Kling, Veo 2, Runway Turbo | Veo 2, Runway Gen-3 |
|
|
323
|
+
| **Audio** | OpenAI TTS | ElevenLabs | ElevenLabs |
|
|
324
|
+
|
|
325
|
+
Switch anytime:
|
|
326
|
+
```
|
|
327
|
+
llm_set_profile("budget") # Development, drafts, exploration
|
|
328
|
+
llm_set_profile("balanced") # Production work, client deliverables
|
|
329
|
+
llm_set_profile("premium") # Critical tasks, maximum quality
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
---
|
|
333
|
+
|
|
334
|
+
## Budget Control
|
|
335
|
+
|
|
336
|
+
Set a monthly budget to prevent overspending:
|
|
337
|
+
|
|
338
|
+
```bash
|
|
339
|
+
# In .env
|
|
340
|
+
LLM_ROUTER_MONTHLY_BUDGET=50 # USD, 0 = unlimited
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
The router:
|
|
344
|
+
- **Tracks real-time spend** across all providers in SQLite
|
|
345
|
+
- **Blocks requests** when the monthly budget is reached
|
|
346
|
+
- **Shows budget status** in `llm_usage`
|
|
347
|
+
|
|
348
|
+
```
|
|
349
|
+
llm_usage("month")
|
|
350
|
+
|
|
351
|
+
## Usage Summary (month)
|
|
352
|
+
Calls: 142
|
|
353
|
+
Tokens: 240,000 in + 80,000 out = 320,000 total
|
|
354
|
+
Cost: $3.4200
|
|
355
|
+
Avg latency: 1200ms
|
|
356
|
+
|
|
357
|
+
### Budget Status
|
|
358
|
+
Monthly budget: $50.00
|
|
359
|
+
Spent this month: $3.4200 (6.8%)
|
|
360
|
+
Remaining: $46.5800
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
---
|
|
364
|
+
|
|
365
|
+
## Multi-Step Orchestration
|
|
366
|
+
|
|
367
|
+
Chain tasks across different models in a pipeline:
|
|
368
|
+
|
|
369
|
+
<p align="center">
|
|
370
|
+
<img src="docs/images/orchestration.svg" alt="Orchestration Pipeline" width="600" />
|
|
371
|
+
</p>
|
|
372
|
+
|
|
373
|
+
```
|
|
374
|
+
llm_orchestrate("Research AI trends and write a report", template="research_report")
|
|
375
|
+
```
|
|
376
|
+
|
|
377
|
+
Built-in templates:
|
|
378
|
+
|
|
379
|
+
| Template | Steps | Pipeline |
|
|
380
|
+
|----------|-------|----------|
|
|
381
|
+
| `research_report` | 3 | Research → Analyze → Write |
|
|
382
|
+
| `competitive_analysis` | 4 | Multi-source research → SWOT → Report |
|
|
383
|
+
| `content_pipeline` | 4 | Research → Draft → Review → Polish |
|
|
384
|
+
| `code_review_fix` | 3 | Review → Fix → Test |
|
|
385
|
+
|
|
386
|
+
---
|
|
387
|
+
|
|
388
|
+
## Configuration
|
|
389
|
+
|
|
390
|
+
### Environment Variables
|
|
391
|
+
|
|
392
|
+
```bash
|
|
393
|
+
# Required: at least one provider
|
|
394
|
+
GEMINI_API_KEY=AIza... # Free tier! https://aistudio.google.com/apikey
|
|
395
|
+
OPENAI_API_KEY=sk-proj-...
|
|
396
|
+
PERPLEXITY_API_KEY=pplx-...
|
|
397
|
+
|
|
398
|
+
# Optional: more providers (add as many as you want)
|
|
399
|
+
ANTHROPIC_API_KEY=sk-ant-...
|
|
400
|
+
DEEPSEEK_API_KEY=...
|
|
401
|
+
GROQ_API_KEY=gsk_...
|
|
402
|
+
FAL_KEY=...
|
|
403
|
+
ELEVENLABS_API_KEY=...
|
|
404
|
+
|
|
405
|
+
# Router config
|
|
406
|
+
LLM_ROUTER_PROFILE=balanced # budget | balanced | premium
|
|
407
|
+
LLM_ROUTER_MONTHLY_BUDGET=0 # USD, 0 = unlimited
|
|
408
|
+
|
|
409
|
+
# Smart routing (Claude Code model selection)
|
|
410
|
+
DAILY_TOKEN_BUDGET=0 # tokens/day, 0 = unlimited
|
|
411
|
+
QUALITY_MODE=balanced # best | balanced | conserve
|
|
412
|
+
MIN_MODEL=haiku # floor: haiku | sonnet | opus
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
See [.env.example](.env.example) for the full list of supported providers.
|
|
416
|
+
|
|
417
|
+
### Claude Code Integration
|
|
418
|
+
|
|
419
|
+
After running `./scripts/install.sh`, your `~/.claude.json` will include:
|
|
420
|
+
|
|
421
|
+
```json
|
|
422
|
+
{
|
|
423
|
+
"mcpServers": {
|
|
424
|
+
"llm-router": {
|
|
425
|
+
"command": "uv",
|
|
426
|
+
"args": ["run", "--directory", "/path/to/llm-router", "llm-router"]
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
```
|
|
431
|
+
|
|
432
|
+
---
|
|
433
|
+
|
|
434
|
+
## Development
|
|
435
|
+
|
|
436
|
+
```bash
|
|
437
|
+
# Install with dev dependencies
|
|
438
|
+
uv sync --extra dev
|
|
439
|
+
|
|
440
|
+
# Run tests
|
|
441
|
+
uv run pytest -v
|
|
442
|
+
|
|
443
|
+
# Run integration tests (requires real API keys)
|
|
444
|
+
uv run pytest tests/test_integration.py -v
|
|
445
|
+
|
|
446
|
+
# Lint
|
|
447
|
+
uv run ruff check src/
|
|
448
|
+
```
|
|
449
|
+
|
|
450
|
+
---
|
|
451
|
+
|
|
452
|
+
## Roadmap
|
|
453
|
+
|
|
454
|
+
See [ROADMAP.md](ROADMAP.md) for the detailed roadmap with phases and priorities.
|
|
455
|
+
|
|
456
|
+
### Completed (v0.1 + v0.2)
|
|
457
|
+
|
|
458
|
+
- [x] Core text LLM routing (10+ providers)
|
|
459
|
+
- [x] Configurable profiles (budget / balanced / premium)
|
|
460
|
+
- [x] Cost tracking with SQLite
|
|
461
|
+
- [x] Health checks with circuit breaker
|
|
462
|
+
- [x] Image generation (Gemini Imagen 3, DALL-E, Flux, SD)
|
|
463
|
+
- [x] Video generation (Gemini Veo 2, Runway, Kling, minimax)
|
|
464
|
+
- [x] Audio/voice routing (ElevenLabs, OpenAI TTS)
|
|
465
|
+
- [x] Monthly budget enforcement
|
|
466
|
+
- [x] Multi-step orchestration with pipeline templates
|
|
467
|
+
- [x] Claude Code plugin with orchestrator agent and /route skill
|
|
468
|
+
- [x] Freemium tier gating
|
|
469
|
+
- [x] CI with GitHub Actions
|
|
470
|
+
- [x] Smart complexity-first routing (simple->haiku, moderate->sonnet, complex->opus)
|
|
471
|
+
- [x] Live Claude subscription monitoring (session %, weekly %, Sonnet %)
|
|
472
|
+
- [x] Time-aware budget pressure (factors in session reset proximity)
|
|
473
|
+
- [x] External fallback ranking when Claude is tight (Codex, OpenAI, Gemini)
|
|
474
|
+
- [x] Codex desktop integration (local agent, free via OpenAI subscription)
|
|
475
|
+
- [x] Unified usage dashboard (Claude sub + Codex + APIs + savings)
|
|
476
|
+
- [x] `llm_setup` tool for API discovery and secure key management
|
|
477
|
+
- [x] Per-provider budget limits
|
|
478
|
+
- [x] ASCII box-drawing dashboard (terminal-friendly, no Unicode issues)
|
|
479
|
+
|
|
480
|
+
### Next Up (v0.3 — Caching & Automation)
|
|
481
|
+
|
|
482
|
+
- [ ] Prompt caching (exact-match hash + semantic similarity)
|
|
483
|
+
- [ ] Periodic usage pulse (auto-refresh during sessions)
|
|
484
|
+
- [ ] Streaming responses
|
|
485
|
+
- [ ] Auto-refresh Claude usage via Playwright hook
|
|
486
|
+
|
|
487
|
+
### Planned (v0.4 — Smart Classification)
|
|
488
|
+
|
|
489
|
+
- [ ] Embedding-based classifier (`all-MiniLM-L6-v2` + LogisticRegression, <15ms local inference)
|
|
490
|
+
- [ ] Context compaction (structural + opt-in LLM summarization)
|
|
491
|
+
- [ ] Classification quality framework (decision logging, outcome tracking, A/B testing)
|
|
492
|
+
- [ ] `llm_quality_report` tool — routing accuracy, savings metrics, downshift harm rate
|
|
493
|
+
|
|
494
|
+
---
|
|
495
|
+
|
|
496
|
+
## Contributing
|
|
497
|
+
|
|
498
|
+
We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
499
|
+
|
|
500
|
+
Key areas where help is needed:
|
|
501
|
+
- Adding new provider integrations
|
|
502
|
+
- Improving routing intelligence
|
|
503
|
+
- Testing across different MCP clients
|
|
504
|
+
- Documentation and examples
|
|
505
|
+
|
|
506
|
+
---
|
|
507
|
+
|
|
508
|
+
## License
|
|
509
|
+
|
|
510
|
+
[MIT](LICENSE) — use it however you want.
|
|
511
|
+
|
|
512
|
+
---
|
|
513
|
+
|
|
514
|
+
<p align="center">
|
|
515
|
+
<sub>Built with <a href="https://litellm.ai">LiteLLM</a> and <a href="https://modelcontextprotocol.io">MCP</a></sub>
|
|
516
|
+
</p>
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
llm_router/__init__.py,sha256=fy_TBL5DigkqQP5QpxZ_4bUw3-18dinDEjV-lPB_kL4,74
|
|
2
|
+
llm_router/classifier.py,sha256=YN7lrW-3HhXfnHOPSRoRh18FGkLsv3oAkxFYh2YFL-U,5179
|
|
3
|
+
llm_router/claude_usage.py,sha256=G3JHZVLNFaltVq1NYeiCmqQRZB5nqTQN0U1vgiWrd2w,11676
|
|
4
|
+
llm_router/codex_agent.py,sha256=_Hvzh5atmqqsWlGJCd5GlyjY5WYZfZor4vPJPZDUCoc,3536
|
|
5
|
+
llm_router/config.py,sha256=ZSSRsy77auIZ1NxVYi_moP4hV_8HoA4PlN0ygqV07nU,4066
|
|
6
|
+
llm_router/cost.py,sha256=Zb186LAULhDJxnmGR5ehJ7Wh7QEIVUnit0jwX_pELAQ,9921
|
|
7
|
+
llm_router/health.py,sha256=sr45Dy8XdHjTNbHeqxi4Efl_sQF0-fe5ciUBuLt3xPY,2477
|
|
8
|
+
llm_router/media.py,sha256=8yHXMz89LLNbPl4jHCjUNTT4L0AalY3PiF2poL0d61c,12263
|
|
9
|
+
llm_router/model_selector.py,sha256=5C9V_z5W9ONEX6hsiMbn9ADdczQVyOXM_Sn0s-Zl_6I,3664
|
|
10
|
+
llm_router/onboard.py,sha256=dHjJPdFaM5UtlVKf0rXPAOCsikeoKBzVx7D7-wyidMU,4555
|
|
11
|
+
llm_router/orchestrator.py,sha256=OJbPO16HoVurU9X4tXZntaHMYDlHIkGQMNwyv0GC3M0,8263
|
|
12
|
+
llm_router/profiles.py,sha256=jYruoNw_la-qc667MzWNAMCxwQJGQdIvo0HnsilmFjc,7218
|
|
13
|
+
llm_router/provider_budget.py,sha256=V6tfiqPZdpFsdETGoc7i6m9zPQFZ8NFp_wRUUTb5eHw,5455
|
|
14
|
+
llm_router/providers.py,sha256=WbiZLHGzat8y1MUawjT3o5f2wnxCiZUVp6nRcx5BYsI,2400
|
|
15
|
+
llm_router/router.py,sha256=IjlnbD54P5nh-xLD_ByPvdqumkNiii_kKuhecTrhqFo,6151
|
|
16
|
+
llm_router/server.py,sha256=jH0N53Eqr6IFTD9Favdb0QZMX5DGbOR3epI0fIOzuM0,52413
|
|
17
|
+
llm_router/types.py,sha256=eDNp8Hi72ShoQyeJ0Jt2VBiDOtrO6zsjBK6jzxm9sE0,8220
|
|
18
|
+
claude_code_llm_router-0.2.0.dist-info/METADATA,sha256=6Y5PzdjERjWRREHgCsvBcBeu3kcQBD8FVya74wgLqks,18074
|
|
19
|
+
claude_code_llm_router-0.2.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
20
|
+
claude_code_llm_router-0.2.0.dist-info/entry_points.txt,sha256=Djz2Dqf5T2BqRlfiwT8opyrQp6t130J8PbZYjwJ999I,99
|
|
21
|
+
claude_code_llm_router-0.2.0.dist-info/licenses/LICENSE,sha256=rgnFZDuEWiga_4AmZ3Aus0KG6y4PU6wc6i3y1cTjW9g,1080
|
|
22
|
+
claude_code_llm_router-0.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 LLM Router Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
llm_router/__init__.py
ADDED