modelswitch 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. modelswitch-0.1.1/.github/workflows/ci.yml +63 -0
  2. modelswitch-0.1.1/.gitignore +16 -0
  3. modelswitch-0.1.1/.zedrules +77 -0
  4. modelswitch-0.1.1/CLAUDE.md +123 -0
  5. modelswitch-0.1.1/DEVWORKFLOW.md +527 -0
  6. modelswitch-0.1.1/Dockerfile +16 -0
  7. modelswitch-0.1.1/PKG-INFO +293 -0
  8. modelswitch-0.1.1/README.md +270 -0
  9. modelswitch-0.1.1/TESTGUIDE.md +503 -0
  10. modelswitch-0.1.1/app/__init__.py +11 -0
  11. modelswitch-0.1.1/app/adapters/__init__.py +0 -0
  12. modelswitch-0.1.1/app/adapters/anthropic_adapter.py +427 -0
  13. modelswitch-0.1.1/app/adapters/base.py +63 -0
  14. modelswitch-0.1.1/app/adapters/litellm_adapter.py +21 -0
  15. modelswitch-0.1.1/app/adapters/openai_adapter.py +280 -0
  16. modelswitch-0.1.1/app/api/__init__.py +0 -0
  17. modelswitch-0.1.1/app/api/anthropic_routes.py +266 -0
  18. modelswitch-0.1.1/app/api/api_key_routes.py +142 -0
  19. modelswitch-0.1.1/app/api/config_routes.py +405 -0
  20. modelswitch-0.1.1/app/api/conversation_routes.py +128 -0
  21. modelswitch-0.1.1/app/api/log_routes.py +23 -0
  22. modelswitch-0.1.1/app/api/openai_routes.py +243 -0
  23. modelswitch-0.1.1/app/api/usage_routes.py +68 -0
  24. modelswitch-0.1.1/app/cli.py +283 -0
  25. modelswitch-0.1.1/app/cli_inner.py +37 -0
  26. modelswitch-0.1.1/app/config.yaml.example +85 -0
  27. modelswitch-0.1.1/app/core/__init__.py +0 -0
  28. modelswitch-0.1.1/app/core/chain_router.py +339 -0
  29. modelswitch-0.1.1/app/core/circuit_breaker.py +74 -0
  30. modelswitch-0.1.1/app/core/config.py +49 -0
  31. modelswitch-0.1.1/app/core/config_watcher.py +73 -0
  32. modelswitch-0.1.1/app/core/exceptions.py +56 -0
  33. modelswitch-0.1.1/app/core/middleware.py +356 -0
  34. modelswitch-0.1.1/app/core/request_queue.py +303 -0
  35. modelswitch-0.1.1/app/main.py +310 -0
  36. modelswitch-0.1.1/app/models/__init__.py +0 -0
  37. modelswitch-0.1.1/app/models/config_models.py +108 -0
  38. modelswitch-0.1.1/app/services/__init__.py +0 -0
  39. modelswitch-0.1.1/app/services/api_key_service.py +106 -0
  40. modelswitch-0.1.1/app/services/conv_indexer.py +396 -0
  41. modelswitch-0.1.1/app/services/usage_tracker.py +260 -0
  42. modelswitch-0.1.1/app/utils/__init__.py +0 -0
  43. modelswitch-0.1.1/app/utils/logging.py +205 -0
  44. modelswitch-0.1.1/app/utils/message_converter.py +313 -0
  45. modelswitch-0.1.1/app/utils/metrics.py +52 -0
  46. modelswitch-0.1.1/app/utils/tracking.py +141 -0
  47. modelswitch-0.1.1/app/web/css/style.css +1472 -0
  48. modelswitch-0.1.1/app/web/index.html +321 -0
  49. modelswitch-0.1.1/app/web/js/app.js +1643 -0
  50. modelswitch-0.1.1/app/web/js/i18n.js +476 -0
  51. modelswitch-0.1.1/app/workspace.py +81 -0
  52. modelswitch-0.1.1/docker-compose.yaml +20 -0
  53. modelswitch-0.1.1/modelswitch.egg-info/PKG-INFO +293 -0
  54. modelswitch-0.1.1/modelswitch.egg-info/SOURCES.txt +76 -0
  55. modelswitch-0.1.1/modelswitch.egg-info/dependency_links.txt +1 -0
  56. modelswitch-0.1.1/modelswitch.egg-info/entry_points.txt +2 -0
  57. modelswitch-0.1.1/modelswitch.egg-info/requires.txt +17 -0
  58. modelswitch-0.1.1/modelswitch.egg-info/top_level.txt +1 -0
  59. modelswitch-0.1.1/pyproject.toml +47 -0
  60. modelswitch-0.1.1/requirements.txt +10 -0
  61. modelswitch-0.1.1/scripts/smoketest.py +551 -0
  62. modelswitch-0.1.1/scripts/smoketest.sh +344 -0
  63. modelswitch-0.1.1/setup.cfg +4 -0
  64. modelswitch-0.1.1/tests/conftest.py +372 -0
  65. modelswitch-0.1.1/tests/test_anthropic_adapter.py +1572 -0
  66. modelswitch-0.1.1/tests/test_api_key_service.py +181 -0
  67. modelswitch-0.1.1/tests/test_api_routes.py +1821 -0
  68. modelswitch-0.1.1/tests/test_chain_router.py +866 -0
  69. modelswitch-0.1.1/tests/test_circuit_breaker.py +165 -0
  70. modelswitch-0.1.1/tests/test_client_compat.py +577 -0
  71. modelswitch-0.1.1/tests/test_config_models.py +187 -0
  72. modelswitch-0.1.1/tests/test_conversation_routes.py +917 -0
  73. modelswitch-0.1.1/tests/test_e2e.py +588 -0
  74. modelswitch-0.1.1/tests/test_message_converter.py +552 -0
  75. modelswitch-0.1.1/tests/test_openai_adapter.py +996 -0
  76. modelswitch-0.1.1/tests/test_request_queue.py +1230 -0
  77. modelswitch-0.1.1/tests/test_usage_tracker.py +166 -0
  78. modelswitch-0.1.1/todo_test.md +335 -0
@@ -0,0 +1,63 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ tags: ["v*"]
7
+ pull_request:
8
+ branches: [main]
9
+
10
+ permissions:
11
+ contents: write
12
+ id-token: write
13
+
14
+ jobs:
15
+ test:
16
+ runs-on: ubuntu-latest
17
+ strategy:
18
+ fail-fast: false
19
+ matrix:
20
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
21
+
22
+ steps:
23
+ - uses: actions/checkout@v4
24
+
25
+ - name: Set up Python ${{ matrix.python-version }}
26
+ uses: actions/setup-python@v5
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+
30
+ - name: Install dependencies
31
+ run: |
32
+ python -m pip install --upgrade pip
33
+ pip install -r requirements.txt
34
+ pip install pytest pytest-asyncio pytest-timeout litellm==1.82.6
35
+
36
+ - name: Run tests
37
+ run: python -m pytest tests/ -v --timeout=60
38
+
39
+ publish:
40
+ name: Publish to PyPI
41
+ if: startsWith(github.ref, 'refs/tags/v')
42
+ needs: test
43
+ runs-on: ubuntu-latest
44
+ environment: pypi
45
+
46
+ steps:
47
+ - uses: actions/checkout@v4
48
+ with:
49
+ fetch-depth: 0
50
+
51
+ - name: Set up Python
52
+ uses: actions/setup-python@v5
53
+ with:
54
+ python-version: "3.12"
55
+
56
+ - name: Install build tools
57
+ run: python -m pip install --upgrade pip build
58
+
59
+ - name: Build package
60
+ run: python -m build
61
+
62
+ - name: Publish to PyPI
63
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,16 @@
1
+ logs/
2
+ data/
3
+ __pycache__/
4
+ *.pyc
5
+ .env
6
+ .venv
7
+ .coverage
8
+ config.yaml
9
+ .DS_Store
10
+ .claude/
11
+ .pytest_cache/
12
+ .qoder/
13
+ .vscode/
14
+ dist/
15
+ *.egg-info/
16
+ build/
@@ -0,0 +1,77 @@
1
+ # ModelSwitch Development Rules
2
+
3
+ ## Project Overview
4
+
5
+ ModelSwitch is an LLM gateway proxy exposing OpenAI-compatible and Anthropic-compatible APIs with automatic fallback chains, rate limiting, and usage tracking.
6
+
7
+ ## Key Constraints
8
+
9
+ ### Dependencies
10
+ - `litellm` is pinned to `1.82.6` — **NEVER upgrade** (supply chain attack on 1.82.7/1.82.8)
11
+
12
+ ### Middleware
13
+ - `GatewayMiddleware` is pure ASGI (`__call__(scope, receive, send)`)
14
+ - **NEVER** use `BaseHTTPMiddleware` or `@app.middleware("http")` — causes infinite recursion
15
+
16
+ ### Streaming
17
+ - Use `resp_ref` closure pattern to capture usage from async stream generators
18
+ - Adapters return `AdapterResponse` with `body` (non-stream) or `stream` (async generator)
19
+ - Route handlers read `_stream_adapter_info` dict after stream completes
20
+
21
+ ## Development Workflow
22
+
23
+ ### Before Starting
24
+ 1. Run `git status` — commit any uncommitted changes first
25
+ 2. Create feature branch: `git checkout -b feature/[name]`
26
+
27
+ ### During Development
28
+ 1. Follow existing code patterns
29
+ 2. Check `CLAUDE.md` for architecture details
30
+ 3. Make small, focused commits
31
+
32
+ ### Before Committing
33
+ 1. Run tests: `python -m pytest tests/ -v --timeout=60`
34
+ 2. Update documentation if architecture changed
35
+ 3. Add test cases for new functionality
36
+
37
+ ### Commit Format
38
+ ```
39
+ <type>: <description>
40
+
41
+ - Change 1
42
+ - Change 2
43
+
44
+ Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
45
+ ```
46
+
47
+ Types: `feat`, `fix`, `refactor`, `docs`, `test`, `chore`, `ci`
48
+
49
+ ### After Pushing
50
+ - CI runs automatically on Python 3.10–3.13
51
+ - Check: https://github.com/ddmonster/modelswitch/actions
52
+
53
+ ## Quick Commands
54
+
55
+ ```bash
56
+ # Start server
57
+ python -m uvicorn app.main:app --host 0.0.0.0 --port 8000
58
+
59
+ # Run tests
60
+ python -m pytest tests/ -v --timeout=60
61
+
62
+ # Run specific test file
63
+ python -m pytest tests/test_message_converter.py -v
64
+
65
+ # Health check
66
+ curl -s http://localhost:8000/api/config/health
67
+ ```
68
+
69
+ ## Key Files
70
+
71
+ | File | Purpose |
72
+ |------|---------|
73
+ | `config.yaml` | Providers, models, API keys (hot-reloaded) |
74
+ | `app/core/chain_router.py` | Routes requests with fallback |
75
+ | `app/adapters/*.py` | Provider-specific adapters |
76
+ | `app/utils/message_converter.py` | OpenAI ↔ Anthropic conversion |
77
+ | `CLAUDE.md` | Full architecture documentation |
@@ -0,0 +1,123 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project Overview
6
+
7
+ ModelSwitch is an LLM gateway proxy that exposes OpenAI-compatible and Anthropic-compatible APIs. It routes requests to upstream providers (DashScope, GLM Plan, OpenAI, Anthropic) with automatic fallback chains, per-key rate limiting, usage tracking, tool use conversion, and a web management UI.
8
+
9
+ ## Running the Server
10
+
11
+ ```bash
12
+ # Start server (default port 8000)
13
+ python -m uvicorn app.main:app --host 0.0.0.0 --port 8000
14
+
15
+ # Docker
16
+ docker-compose up --build
17
+ ```
18
+
19
+ The server takes ~10s to start due to provider connection warmup. Health check: `GET /api/config/health`.
20
+
21
+ ## Key Commands
22
+
23
+ ```bash
24
+ # Install dependencies (litellm is pinned to 1.82.6 — do not upgrade due to supply chain attack on 1.82.7/1.82.8)
25
+ pip install -r requirements.txt
26
+
27
+ # Run all tests (pytest-asyncio with strict mode)
28
+ python -m pytest tests/ -v
29
+
30
+ # Run a single test file
31
+ python -m pytest tests/test_message_converter.py -v
32
+
33
+ # Run with coverage
34
+ python -m pytest tests/ --cov=app --cov-report=term-missing
35
+
36
+ # Test an endpoint
37
+ curl -s http://localhost:8000/v1/chat/completions \
38
+ -H "Authorization: Bearer sk-gateway-admin" \
39
+ -H "Content-Type: application/json" \
40
+ -d '{"model":"glm5","messages":[{"role":"user","content":"hi"}],"max_tokens":100}'
41
+ ```
42
+
43
+ ## Architecture
44
+
45
+ ### Request Flow
46
+
47
+ ```
48
+ Client → GatewayMiddleware (auth/rate-limit) → Route Handler → ChainRouter → LiteLLMAdapter → Upstream Provider
49
+ ```
50
+
51
+ ### Config-Driven Design (`config.yaml`)
52
+
53
+ - **Providers** are top-level connection definitions (name, base_url, api_key, protocol type). API keys support `${ENV_VAR}` substitution.
54
+ - **Models** reference providers by name with priority/timeout. Two modes:
55
+ - `chain`: tries adapters by priority, falls back on failure (with circuit breaker + 1 retry per adapter)
56
+ - `adapter`: direct call to a single provider, no fallback
57
+ - **API Keys** are configured in the YAML, not a database. They have a `roles` field (`admin` | `user`, default `["user"]`). Admin keys can manage providers/models/keys via management API.
58
+
59
+ ### Core Layer (`app/core/`)
60
+
61
+ | Component | Purpose |
62
+ |---|---|
63
+ | `chain_router.py` | Routes requests to adapters. Chain mode does first-chunk probe for streaming fallback. Uses `_adapter_info` dict to pass adapter name/latency/usage from stream generator to caller. |
64
+ | `circuit_breaker.py` | Per-provider circuit breaker: 5 failures → 30s open → half-open probe |
65
+ | `middleware.py` | Pure ASGI middleware (NOT BaseHTTPMiddleware — that causes infinite recursion). Three-tier auth: public paths (no auth), API key auth (any valid key), admin auth (role check). Plus rate limiting and CORS. |
66
+ | `config_watcher.py` | watchdog-based hot reload with 2s debounce |
67
+
68
+ ### Middleware Constraint
69
+
70
+ `GatewayMiddleware` is a **pure ASGI middleware** (raw `__call__(scope, receive, send)`), registered via `app.add_middleware()`. It does **NOT** extend `BaseHTTPMiddleware` and must not be wrapped with `@app.middleware("http")`. Mixing these two patterns causes infinite recursion because `call_next` re-triggers the decorator wrapper.
71
+
72
+ ### Adapter Layer (`app/adapters/`)
73
+
74
+ `LiteLLMAdapter` wraps `litellm.acompletion()`. Model names are prefixed with the provider type: `openai/glm-5`, `anthropic/claude-sonnet-4-20250514`. Returns `AdapterResponse` dataclass with `body` (non-stream) or `stream` (async generator).
75
+
76
+ Streaming usage: OpenAI adapter sets `stream_options: {include_usage: true}` to capture token counts from the final chunk. Usage is stored in `AdapterResponse.usage` after stream completes.
77
+
78
+ ### Protocol Conversion (`app/utils/message_converter.py`)
79
+
80
+ Bidirectional conversion between Anthropic and OpenAI formats:
81
+ - `anthropic_to_openai_messages()`: System, messages, tools/tool_choice, tool_use blocks → tool_calls, tool_result blocks → role:"tool"
82
+ - `openai_stream_to_anthropic()`: OpenAI SSE chunks → Anthropic SSE events, including tool_calls deltas → input_json_delta events
83
+
84
+ ### API Routes (`app/api/`)
85
+
86
+ - `openai_routes.py`: `GET /openai/models` (also `/v1/models`), `POST /openai/chat/completions` (also `/v1/chat/completions`)
87
+ - `anthropic_routes.py`: `POST /anthropic/messages` (also `/v1/messages`) — converts to/from OpenAI internally, including full tool_use conversion
88
+ - `config_routes.py`, `api_key_routes.py`: CRUD for providers/models/keys, writes back to `config.yaml`
89
+ - `usage_routes.py`: Aggregated stats with `group_by` (provider/model/api_key) and drill-down
90
+ - `log_routes.py`: Queries in-memory ring buffer (max 1000 entries)
91
+ - `conversation_routes.py`: Queries conversation log files (multi-file discovery with metadata streaming, on-demand full record fetch)
92
+
93
+ ### Protocol Conversion — Full details in `app/utils/message_converter.py`
94
+
95
+ - **Request**: `anthropic_to_openai_messages()` — converts system, messages, tools, tool_choice, tool_use/tool_result blocks
96
+ - **Non-stream response**: `_convert_openai_to_anthropic_response()` in `anthropic_routes.py` — converts tool_calls to tool_use content blocks
97
+ - **Stream response**: `openai_stream_to_anthropic()` — handles text deltas and tool_calls deltas with multi-tool index tracking
98
+
99
+ ### Request Tracking (`app/utils/tracking.py`)
100
+
101
+ Centralized `track_request()` called from both route files after chain_router returns. Records usage stats (via `usage_tracker.record()`) and debug logs (via `add_log_to_buffer()`).
102
+
103
+ For streaming: `chain_router._execute_chat_stream` populates `_adapter_info` dict with adapter name, latency, and usage. Route handlers read this in the `finally` block after stream consumption and set on `result` before tracking.
104
+
105
+ ### Persistence
106
+
107
+ - Config: `config.yaml` (hot-reloaded via watchdog)
108
+ - Usage stats: SQLite at `data/usage.db`, batch-flushed every 10s
109
+ - Logs: Rotating file at `logs/gateway.log` + in-memory deque buffer
110
+
111
+ ### Frontend
112
+
113
+ Single-page app in `web/` (HTML/CSS/JS, no build step). 7 tabs: Providers, Models, API Keys, Queue Monitor, Usage Stats, Debug Logs, Conversations. Login modal requires admin API key. Token persisted in localStorage. Served at `/` and `/web/`.
114
+
115
+ ## Key Patterns
116
+
117
+ - Route handlers access shared state via `request.app.state` (chain_router, usage_tracker, api_key_service, config)
118
+ - The middleware injects auth info into `scope["state"]` (api_key, api_key_name, api_key_config, api_key_roles), which maps to `request.state` in route handlers
119
+ - Public paths (no auth required): `/`, `/health`, `/metrics`, `/docs`, `/web/*`, `/static/*`
120
+ - Auth-required paths (any valid key): `/v1/*`, `/openai/*`, `/anthropic/*`, `/api/usage`, `/api/logs`, `/api/conversations` — accepts `Authorization: Bearer <key>`, `x-api-key: <key>`, or bare `sk-*` header
121
+ - Admin-required paths (`roles: ["admin"]`): `/api/config/*`, `/api/keys/*`
122
+ - Error format adapts to route: OpenAI-style `{"error": {...}}` for `/openai/*` and `/v1/chat/completions`, Anthropic-style `{"type": "error", "error": {...}}` for `/anthropic/*` and `/v1/messages`
123
+ - Anthropic routes skip conversion when the first adapter is an Anthropic provider (passthrough mode)