modelswitch 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- modelswitch-0.1.1/.github/workflows/ci.yml +63 -0
- modelswitch-0.1.1/.gitignore +16 -0
- modelswitch-0.1.1/.zedrules +77 -0
- modelswitch-0.1.1/CLAUDE.md +123 -0
- modelswitch-0.1.1/DEVWORKFLOW.md +527 -0
- modelswitch-0.1.1/Dockerfile +16 -0
- modelswitch-0.1.1/PKG-INFO +293 -0
- modelswitch-0.1.1/README.md +270 -0
- modelswitch-0.1.1/TESTGUIDE.md +503 -0
- modelswitch-0.1.1/app/__init__.py +11 -0
- modelswitch-0.1.1/app/adapters/__init__.py +0 -0
- modelswitch-0.1.1/app/adapters/anthropic_adapter.py +427 -0
- modelswitch-0.1.1/app/adapters/base.py +63 -0
- modelswitch-0.1.1/app/adapters/litellm_adapter.py +21 -0
- modelswitch-0.1.1/app/adapters/openai_adapter.py +280 -0
- modelswitch-0.1.1/app/api/__init__.py +0 -0
- modelswitch-0.1.1/app/api/anthropic_routes.py +266 -0
- modelswitch-0.1.1/app/api/api_key_routes.py +142 -0
- modelswitch-0.1.1/app/api/config_routes.py +405 -0
- modelswitch-0.1.1/app/api/conversation_routes.py +128 -0
- modelswitch-0.1.1/app/api/log_routes.py +23 -0
- modelswitch-0.1.1/app/api/openai_routes.py +243 -0
- modelswitch-0.1.1/app/api/usage_routes.py +68 -0
- modelswitch-0.1.1/app/cli.py +283 -0
- modelswitch-0.1.1/app/cli_inner.py +37 -0
- modelswitch-0.1.1/app/config.yaml.example +85 -0
- modelswitch-0.1.1/app/core/__init__.py +0 -0
- modelswitch-0.1.1/app/core/chain_router.py +339 -0
- modelswitch-0.1.1/app/core/circuit_breaker.py +74 -0
- modelswitch-0.1.1/app/core/config.py +49 -0
- modelswitch-0.1.1/app/core/config_watcher.py +73 -0
- modelswitch-0.1.1/app/core/exceptions.py +56 -0
- modelswitch-0.1.1/app/core/middleware.py +356 -0
- modelswitch-0.1.1/app/core/request_queue.py +303 -0
- modelswitch-0.1.1/app/main.py +310 -0
- modelswitch-0.1.1/app/models/__init__.py +0 -0
- modelswitch-0.1.1/app/models/config_models.py +108 -0
- modelswitch-0.1.1/app/services/__init__.py +0 -0
- modelswitch-0.1.1/app/services/api_key_service.py +106 -0
- modelswitch-0.1.1/app/services/conv_indexer.py +396 -0
- modelswitch-0.1.1/app/services/usage_tracker.py +260 -0
- modelswitch-0.1.1/app/utils/__init__.py +0 -0
- modelswitch-0.1.1/app/utils/logging.py +205 -0
- modelswitch-0.1.1/app/utils/message_converter.py +313 -0
- modelswitch-0.1.1/app/utils/metrics.py +52 -0
- modelswitch-0.1.1/app/utils/tracking.py +141 -0
- modelswitch-0.1.1/app/web/css/style.css +1472 -0
- modelswitch-0.1.1/app/web/index.html +321 -0
- modelswitch-0.1.1/app/web/js/app.js +1643 -0
- modelswitch-0.1.1/app/web/js/i18n.js +476 -0
- modelswitch-0.1.1/app/workspace.py +81 -0
- modelswitch-0.1.1/docker-compose.yaml +20 -0
- modelswitch-0.1.1/modelswitch.egg-info/PKG-INFO +293 -0
- modelswitch-0.1.1/modelswitch.egg-info/SOURCES.txt +76 -0
- modelswitch-0.1.1/modelswitch.egg-info/dependency_links.txt +1 -0
- modelswitch-0.1.1/modelswitch.egg-info/entry_points.txt +2 -0
- modelswitch-0.1.1/modelswitch.egg-info/requires.txt +17 -0
- modelswitch-0.1.1/modelswitch.egg-info/top_level.txt +1 -0
- modelswitch-0.1.1/pyproject.toml +47 -0
- modelswitch-0.1.1/requirements.txt +10 -0
- modelswitch-0.1.1/scripts/smoketest.py +551 -0
- modelswitch-0.1.1/scripts/smoketest.sh +344 -0
- modelswitch-0.1.1/setup.cfg +4 -0
- modelswitch-0.1.1/tests/conftest.py +372 -0
- modelswitch-0.1.1/tests/test_anthropic_adapter.py +1572 -0
- modelswitch-0.1.1/tests/test_api_key_service.py +181 -0
- modelswitch-0.1.1/tests/test_api_routes.py +1821 -0
- modelswitch-0.1.1/tests/test_chain_router.py +866 -0
- modelswitch-0.1.1/tests/test_circuit_breaker.py +165 -0
- modelswitch-0.1.1/tests/test_client_compat.py +577 -0
- modelswitch-0.1.1/tests/test_config_models.py +187 -0
- modelswitch-0.1.1/tests/test_conversation_routes.py +917 -0
- modelswitch-0.1.1/tests/test_e2e.py +588 -0
- modelswitch-0.1.1/tests/test_message_converter.py +552 -0
- modelswitch-0.1.1/tests/test_openai_adapter.py +996 -0
- modelswitch-0.1.1/tests/test_request_queue.py +1230 -0
- modelswitch-0.1.1/tests/test_usage_tracker.py +166 -0
- modelswitch-0.1.1/todo_test.md +335 -0
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
tags: ["v*"]
|
|
7
|
+
pull_request:
|
|
8
|
+
branches: [main]
|
|
9
|
+
|
|
10
|
+
permissions:
|
|
11
|
+
contents: write
|
|
12
|
+
id-token: write
|
|
13
|
+
|
|
14
|
+
jobs:
|
|
15
|
+
test:
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
strategy:
|
|
18
|
+
fail-fast: false
|
|
19
|
+
matrix:
|
|
20
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
21
|
+
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v4
|
|
24
|
+
|
|
25
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
26
|
+
uses: actions/setup-python@v5
|
|
27
|
+
with:
|
|
28
|
+
python-version: ${{ matrix.python-version }}
|
|
29
|
+
|
|
30
|
+
- name: Install dependencies
|
|
31
|
+
run: |
|
|
32
|
+
python -m pip install --upgrade pip
|
|
33
|
+
pip install -r requirements.txt
|
|
34
|
+
pip install pytest pytest-asyncio pytest-timeout litellm==1.82.6
|
|
35
|
+
|
|
36
|
+
- name: Run tests
|
|
37
|
+
run: python -m pytest tests/ -v --timeout=60
|
|
38
|
+
|
|
39
|
+
publish:
|
|
40
|
+
name: Publish to PyPI
|
|
41
|
+
if: startsWith(github.ref, 'refs/tags/v')
|
|
42
|
+
needs: test
|
|
43
|
+
runs-on: ubuntu-latest
|
|
44
|
+
environment: pypi
|
|
45
|
+
|
|
46
|
+
steps:
|
|
47
|
+
- uses: actions/checkout@v4
|
|
48
|
+
with:
|
|
49
|
+
fetch-depth: 0
|
|
50
|
+
|
|
51
|
+
- name: Set up Python
|
|
52
|
+
uses: actions/setup-python@v5
|
|
53
|
+
with:
|
|
54
|
+
python-version: "3.12"
|
|
55
|
+
|
|
56
|
+
- name: Install build tools
|
|
57
|
+
run: python -m pip install --upgrade pip build
|
|
58
|
+
|
|
59
|
+
- name: Build package
|
|
60
|
+
run: python -m build
|
|
61
|
+
|
|
62
|
+
- name: Publish to PyPI
|
|
63
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# ModelSwitch Development Rules
|
|
2
|
+
|
|
3
|
+
## Project Overview
|
|
4
|
+
|
|
5
|
+
ModelSwitch is an LLM gateway proxy exposing OpenAI-compatible and Anthropic-compatible APIs with automatic fallback chains, rate limiting, and usage tracking.
|
|
6
|
+
|
|
7
|
+
## Key Constraints
|
|
8
|
+
|
|
9
|
+
### Dependencies
|
|
10
|
+
- `litellm` is pinned to `1.82.6` — **NEVER upgrade** (supply chain attack on 1.82.7/1.82.8)
|
|
11
|
+
|
|
12
|
+
### Middleware
|
|
13
|
+
- `GatewayMiddleware` is pure ASGI (`__call__(scope, receive, send)`)
|
|
14
|
+
- **NEVER** use `BaseHTTPMiddleware` or `@app.middleware("http")` — causes infinite recursion
|
|
15
|
+
|
|
16
|
+
### Streaming
|
|
17
|
+
- Use `resp_ref` closure pattern to capture usage from async stream generators
|
|
18
|
+
- Adapters return `AdapterResponse` with `body` (non-stream) or `stream` (async generator)
|
|
19
|
+
- Route handlers read `_stream_adapter_info` dict after stream completes
|
|
20
|
+
|
|
21
|
+
## Development Workflow
|
|
22
|
+
|
|
23
|
+
### Before Starting
|
|
24
|
+
1. Run `git status` — commit any uncommitted changes first
|
|
25
|
+
2. Create feature branch: `git checkout -b feature/[name]`
|
|
26
|
+
|
|
27
|
+
### During Development
|
|
28
|
+
1. Follow existing code patterns
|
|
29
|
+
2. Check `CLAUDE.md` for architecture details
|
|
30
|
+
3. Make small, focused commits
|
|
31
|
+
|
|
32
|
+
### Before Committing
|
|
33
|
+
1. Run tests: `python -m pytest tests/ -v --timeout=60`
|
|
34
|
+
2. Update documentation if architecture changed
|
|
35
|
+
3. Add test cases for new functionality
|
|
36
|
+
|
|
37
|
+
### Commit Format
|
|
38
|
+
```
|
|
39
|
+
<type>: <description>
|
|
40
|
+
|
|
41
|
+
- Change 1
|
|
42
|
+
- Change 2
|
|
43
|
+
|
|
44
|
+
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Types: `feat`, `fix`, `refactor`, `docs`, `test`, `chore`, `ci`
|
|
48
|
+
|
|
49
|
+
### After Pushing
|
|
50
|
+
- CI runs automatically on Python 3.10–3.13
|
|
51
|
+
- Check: https://github.com/ddmonster/modelswitch/actions
|
|
52
|
+
|
|
53
|
+
## Quick Commands
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
# Start server
|
|
57
|
+
python -m uvicorn app.main:app --host 0.0.0.0 --port 8000
|
|
58
|
+
|
|
59
|
+
# Run tests
|
|
60
|
+
python -m pytest tests/ -v --timeout=60
|
|
61
|
+
|
|
62
|
+
# Run specific test file
|
|
63
|
+
python -m pytest tests/test_message_converter.py -v
|
|
64
|
+
|
|
65
|
+
# Health check
|
|
66
|
+
curl -s http://localhost:8000/api/config/health
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Key Files
|
|
70
|
+
|
|
71
|
+
| File | Purpose |
|
|
72
|
+
|------|---------|
|
|
73
|
+
| `config.yaml` | Providers, models, API keys (hot-reloaded) |
|
|
74
|
+
| `app/core/chain_router.py` | Routes requests with fallback |
|
|
75
|
+
| `app/adapters/*.py` | Provider-specific adapters |
|
|
76
|
+
| `app/utils/message_converter.py` | OpenAI ↔ Anthropic conversion |
|
|
77
|
+
| `CLAUDE.md` | Full architecture documentation |
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Project Overview
|
|
6
|
+
|
|
7
|
+
ModelSwitch is an LLM gateway proxy that exposes OpenAI-compatible and Anthropic-compatible APIs. It routes requests to upstream providers (DashScope, GLM Plan, OpenAI, Anthropic) with automatic fallback chains, per-key rate limiting, usage tracking, tool use conversion, and a web management UI.
|
|
8
|
+
|
|
9
|
+
## Running the Server
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
# Start server (default port 8000)
|
|
13
|
+
python -m uvicorn app.main:app --host 0.0.0.0 --port 8000
|
|
14
|
+
|
|
15
|
+
# Docker
|
|
16
|
+
docker-compose up --build
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
The server takes ~10s to start due to provider connection warmup. Health check: `GET /api/config/health`.
|
|
20
|
+
|
|
21
|
+
## Key Commands
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
# Install dependencies (litellm is pinned to 1.82.6 — do not upgrade due to supply chain attack on 1.82.7/1.82.8)
|
|
25
|
+
pip install -r requirements.txt
|
|
26
|
+
|
|
27
|
+
# Run all tests (pytest-asyncio with strict mode)
|
|
28
|
+
python -m pytest tests/ -v
|
|
29
|
+
|
|
30
|
+
# Run a single test file
|
|
31
|
+
python -m pytest tests/test_message_converter.py -v
|
|
32
|
+
|
|
33
|
+
# Run with coverage
|
|
34
|
+
python -m pytest tests/ --cov=app --cov-report=term-missing
|
|
35
|
+
|
|
36
|
+
# Test an endpoint
|
|
37
|
+
curl -s http://localhost:8000/v1/chat/completions \
|
|
38
|
+
-H "Authorization: Bearer sk-gateway-admin" \
|
|
39
|
+
-H "Content-Type: application/json" \
|
|
40
|
+
-d '{"model":"glm5","messages":[{"role":"user","content":"hi"}],"max_tokens":100}'
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Architecture
|
|
44
|
+
|
|
45
|
+
### Request Flow
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
Client → GatewayMiddleware (auth/rate-limit) → Route Handler → ChainRouter → LiteLLMAdapter → Upstream Provider
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Config-Driven Design (`config.yaml`)
|
|
52
|
+
|
|
53
|
+
- **Providers** are top-level connection definitions (name, base_url, api_key, protocol type). API keys support `${ENV_VAR}` substitution.
|
|
54
|
+
- **Models** reference providers by name with priority/timeout. Two modes:
|
|
55
|
+
- `chain`: tries adapters by priority, falls back on failure (with circuit breaker + 1 retry per adapter)
|
|
56
|
+
- `adapter`: direct call to a single provider, no fallback
|
|
57
|
+
- **API Keys** are configured in the YAML, not a database. They have a `roles` field (`admin` | `user`, default `["user"]`). Admin keys can manage providers/models/keys via management API.
|
|
58
|
+
|
|
59
|
+
### Core Layer (`app/core/`)
|
|
60
|
+
|
|
61
|
+
| Component | Purpose |
|
|
62
|
+
|---|---|
|
|
63
|
+
| `chain_router.py` | Routes requests to adapters. Chain mode does first-chunk probe for streaming fallback. Uses `_adapter_info` dict to pass adapter name/latency/usage from stream generator to caller. |
|
|
64
|
+
| `circuit_breaker.py` | Per-provider circuit breaker: 5 failures → 30s open → half-open probe |
|
|
65
|
+
| `middleware.py` | Pure ASGI middleware (NOT BaseHTTPMiddleware — that causes infinite recursion). Three-tier auth: public paths (no auth), API key auth (any valid key), admin auth (role check). Plus rate limiting and CORS. |
|
|
66
|
+
| `config_watcher.py` | watchdog-based hot reload with 2s debounce |
|
|
67
|
+
|
|
68
|
+
### Middleware Constraint
|
|
69
|
+
|
|
70
|
+
`GatewayMiddleware` is a **pure ASGI middleware** (raw `__call__(scope, receive, send)`), registered via `app.add_middleware()`. It does **NOT** extend `BaseHTTPMiddleware` and must not be wrapped with `@app.middleware("http")`. Mixing these two patterns causes infinite recursion because `call_next` re-triggers the decorator wrapper.
|
|
71
|
+
|
|
72
|
+
### Adapter Layer (`app/adapters/`)
|
|
73
|
+
|
|
74
|
+
`LiteLLMAdapter` wraps `litellm.acompletion()`. Model names are prefixed with the provider type: `openai/glm-5`, `anthropic/claude-sonnet-4-20250514`. Returns `AdapterResponse` dataclass with `body` (non-stream) or `stream` (async generator).
|
|
75
|
+
|
|
76
|
+
Streaming usage: OpenAI adapter sets `stream_options: {include_usage: true}` to capture token counts from the final chunk. Usage is stored in `AdapterResponse.usage` after stream completes.
|
|
77
|
+
|
|
78
|
+
### Protocol Conversion (`app/utils/message_converter.py`)
|
|
79
|
+
|
|
80
|
+
Bidirectional conversion between Anthropic and OpenAI formats:
|
|
81
|
+
- `anthropic_to_openai_messages()`: System, messages, tools/tool_choice, tool_use blocks → tool_calls, tool_result blocks → role:"tool"
|
|
82
|
+
- `openai_stream_to_anthropic()`: OpenAI SSE chunks → Anthropic SSE events, including tool_calls deltas → input_json_delta events
|
|
83
|
+
|
|
84
|
+
### API Routes (`app/api/`)
|
|
85
|
+
|
|
86
|
+
- `openai_routes.py`: `GET /openai/models` (also `/v1/models`), `POST /openai/chat/completions` (also `/v1/chat/completions`)
|
|
87
|
+
- `anthropic_routes.py`: `POST /anthropic/messages` (also `/v1/messages`) — converts to/from OpenAI internally, including full tool_use conversion
|
|
88
|
+
- `config_routes.py`, `api_key_routes.py`: CRUD for providers/models/keys, writes back to `config.yaml`
|
|
89
|
+
- `usage_routes.py`: Aggregated stats with `group_by` (provider/model/api_key) and drill-down
|
|
90
|
+
- `log_routes.py`: Queries in-memory ring buffer (max 1000 entries)
|
|
91
|
+
- `conversation_routes.py`: Queries conversation log files (multi-file discovery with metadata streaming, on-demand full record fetch)
|
|
92
|
+
|
|
93
|
+
### Protocol Conversion — Full details in `app/utils/message_converter.py`
|
|
94
|
+
|
|
95
|
+
- **Request**: `anthropic_to_openai_messages()` — converts system, messages, tools, tool_choice, tool_use/tool_result blocks
|
|
96
|
+
- **Non-stream response**: `_convert_openai_to_anthropic_response()` in `anthropic_routes.py` — converts tool_calls to tool_use content blocks
|
|
97
|
+
- **Stream response**: `openai_stream_to_anthropic()` — handles text deltas and tool_calls deltas with multi-tool index tracking
|
|
98
|
+
|
|
99
|
+
### Request Tracking (`app/utils/tracking.py`)
|
|
100
|
+
|
|
101
|
+
Centralized `track_request()` called from both route files after chain_router returns. Records usage stats (via `usage_tracker.record()`) and debug logs (via `add_log_to_buffer()`).
|
|
102
|
+
|
|
103
|
+
For streaming: `chain_router._execute_chat_stream` populates `_adapter_info` dict with adapter name, latency, and usage. Route handlers read this in the `finally` block after stream consumption and set on `result` before tracking.
|
|
104
|
+
|
|
105
|
+
### Persistence
|
|
106
|
+
|
|
107
|
+
- Config: `config.yaml` (hot-reloaded via watchdog)
|
|
108
|
+
- Usage stats: SQLite at `data/usage.db`, batch-flushed every 10s
|
|
109
|
+
- Logs: Rotating file at `logs/gateway.log` + in-memory deque buffer
|
|
110
|
+
|
|
111
|
+
### Frontend
|
|
112
|
+
|
|
113
|
+
Single-page app in `web/` (HTML/CSS/JS, no build step). 7 tabs: Providers, Models, API Keys, Queue Monitor, Usage Stats, Debug Logs, Conversations. Login modal requires admin API key. Token persisted in localStorage. Served at `/` and `/web/`.
|
|
114
|
+
|
|
115
|
+
## Key Patterns
|
|
116
|
+
|
|
117
|
+
- Route handlers access shared state via `request.app.state` (chain_router, usage_tracker, api_key_service, config)
|
|
118
|
+
- The middleware injects auth info into `scope["state"]` (api_key, api_key_name, api_key_config, api_key_roles), which maps to `request.state` in route handlers
|
|
119
|
+
- Public paths (no auth required): `/`, `/health`, `/metrics`, `/docs`, `/web/*`, `/static/*`
|
|
120
|
+
- Auth-required paths (any valid key): `/v1/*`, `/openai/*`, `/anthropic/*`, `/api/usage`, `/api/logs`, `/api/conversations` — accepts `Authorization: Bearer <key>`, `x-api-key: <key>`, or bare `sk-*` header
|
|
121
|
+
- Admin-required paths (`roles: ["admin"]`): `/api/config/*`, `/api/keys/*`
|
|
122
|
+
- Error format adapts to route: OpenAI-style `{"error": {...}}` for `/openai/*` and `/v1/chat/completions`, Anthropic-style `{"type": "error", "error": {...}}` for `/anthropic/*` and `/v1/messages`
|
|
123
|
+
- Anthropic routes skip conversion when the first adapter is an Anthropic provider (passthrough mode)
|