codex-proxy 4.0.0__tar.gz → 5.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/CHANGELOG.md +74 -0
- codex_proxy-5.0.0/PKG-INFO +483 -0
- codex_proxy-5.0.0/README.md +445 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/pyproject.toml +8 -2
- codex_proxy-5.0.0/src/codex_proxy/__init__.py +3 -0
- codex_proxy-5.0.0/src/codex_proxy/auth.py +252 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/config.py +174 -17
- codex_proxy-5.0.0/src/codex_proxy/cost.py +146 -0
- codex_proxy-5.0.0/src/codex_proxy/dashboard.py +282 -0
- codex_proxy-5.0.0/src/codex_proxy/db/__init__.py +73 -0
- codex_proxy-5.0.0/src/codex_proxy/db/crud_analytics.py +127 -0
- codex_proxy-5.0.0/src/codex_proxy/db/crud_budgets.py +140 -0
- codex_proxy-5.0.0/src/codex_proxy/db/crud_keys.py +109 -0
- codex_proxy-5.0.0/src/codex_proxy/db/crud_logs.py +80 -0
- codex_proxy-5.0.0/src/codex_proxy/db/crud_plugins.py +119 -0
- codex_proxy-5.0.0/src/codex_proxy/db/crud_providers.py +206 -0
- codex_proxy-5.0.0/src/codex_proxy/db/crud_users.py +76 -0
- codex_proxy-5.0.0/src/codex_proxy/db/engine.py +48 -0
- codex_proxy-5.0.0/src/codex_proxy/db/migrations.py +51 -0
- codex_proxy-5.0.0/src/codex_proxy/db/models.py +210 -0
- codex_proxy-5.0.0/src/codex_proxy/router.py +199 -0
- codex_proxy-5.0.0/src/codex_proxy/server.py +1476 -0
- codex_proxy-5.0.0/tests/test_auth_cost_router.py +387 -0
- codex_proxy-5.0.0/tests/test_db.py +332 -0
- codex_proxy-4.0.0/PKG-INFO +0 -567
- codex_proxy-4.0.0/README.md +0 -540
- codex_proxy-4.0.0/src/codex_proxy/__init__.py +0 -3
- codex_proxy-4.0.0/src/codex_proxy/server.py +0 -707
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/.dockerignore +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/.github/workflows/ci.yml +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/.github/workflows/release.yml +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/.gitignore +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/.pre-commit-config.yaml +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/CODE_OF_CONDUCT.md +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/CONTRIBUTING.md +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/Dockerfile +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/LICENSE +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/assets/tui-dashboard.png +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/docker-compose.yml +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/__main__.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/circuit_breaker.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/compaction.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/key_rotation.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/plugins.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/plugins_builtin.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/providers.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/rate_limiter.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/store.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/translator.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/tui.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/conftest.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_circuit_breaker.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_compaction.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_config.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_edge_cases.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_key_rotation.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_plugins.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_providers.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_rate_limiter.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_server.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_server_features.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_store.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_translator.py +0 -0
- {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_tui.py +0 -0
|
@@ -5,6 +5,80 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [5.0.0] - 2026-06-07
|
|
9
|
+
|
|
10
|
+
### Added — v5 Gateway Platform
|
|
11
|
+
|
|
12
|
+
**Database Layer**
|
|
13
|
+
- Async SQLAlchemy database with 13 tables (users, api_keys, providers, provider_keys, models, routing_rules, request_logs, budgets, cost_alerts, plugin_registry, plugin_instances, sessions, _schema_version)
|
|
14
|
+
- SQLite (default) or PostgreSQL (via asyncpg) backend
|
|
15
|
+
- Version-based migrations system
|
|
16
|
+
- 7 CRUD modules for all entities
|
|
17
|
+
|
|
18
|
+
**Multi-Provider Support**
|
|
19
|
+
- `[[providers]]` TOML array — configure multiple providers in one instance
|
|
20
|
+
- Per-provider clients, adapters, key rotators
|
|
21
|
+
- Model-to-provider resolution across all providers
|
|
22
|
+
- `/models` endpoint aggregates models from all providers
|
|
23
|
+
|
|
24
|
+
**JWT Authentication**
|
|
25
|
+
- POST `/auth/login`, `/auth/signup`, `/auth/refresh`, GET `/auth/me`
|
|
26
|
+
- Password hashing with bcrypt (SHA-256 fallback)
|
|
27
|
+
- JWT access + refresh tokens (PyJWT with stdlib HMAC-SHA256 fallback)
|
|
28
|
+
- Admin user auto-seeded on first startup
|
|
29
|
+
- Budget endpoints: GET/PUT `/auth/budget`
|
|
30
|
+
|
|
31
|
+
**Smart Router**
|
|
32
|
+
- 4 routing strategies: `fallback` (config order), `cost` (cheapest), `latency` (fastest), `weighted` (load balanced)
|
|
33
|
+
- Rolling latency tracker with per-provider health detection
|
|
34
|
+
- Unhealthy providers automatically skipped
|
|
35
|
+
- GET `/api/router/status` for detailed metrics
|
|
36
|
+
|
|
37
|
+
**Cost Tracking**
|
|
38
|
+
- Per-model pricing: input_price_per_million, output_price_per_million
|
|
39
|
+
- 25+ built-in model prices (GLM, GPT, Claude, Gemini, DeepSeek, Llama, etc.)
|
|
40
|
+
- Automatic cost calculation on every request (DB lookup → KNOWN_PRICING fallback → $0)
|
|
41
|
+
- Prices auto-seeded to DB on startup (idempotent)
|
|
42
|
+
- Cost aggregation analytics: GET `/api/stats`, `/api/usage`
|
|
43
|
+
|
|
44
|
+
**Budget Enforcement**
|
|
45
|
+
- Daily and monthly spend limits per user
|
|
46
|
+
- Requests blocked with 429 when budget exceeded
|
|
47
|
+
- Alert threshold notifications
|
|
48
|
+
- Budget status and alerts via API
|
|
49
|
+
|
|
50
|
+
**Web Dashboard**
|
|
51
|
+
- Embedded HTML+CSS+JS dashboard at GET `/dashboard`
|
|
52
|
+
- Dark theme, auto-refresh every 10 seconds
|
|
53
|
+
- Stats cards (requests, success rate, uptime, total cost)
|
|
54
|
+
- Cost breakdown table with visual bars
|
|
55
|
+
- Provider cards with model tags
|
|
56
|
+
- Router status with latency and health info
|
|
57
|
+
|
|
58
|
+
**v5 Config Sections** (all disabled by default for v4 compat)
|
|
59
|
+
- `[database]` — persistent storage settings
|
|
60
|
+
- `[auth]` — JWT authentication settings
|
|
61
|
+
- `[router]` — smart routing settings
|
|
62
|
+
- `[dashboard]` — web dashboard settings
|
|
63
|
+
|
|
64
|
+
**New Dependencies**
|
|
65
|
+
- `sqlalchemy>=2.0`, `aiosqlite>=0.20` (core)
|
|
66
|
+
- Optional: `asyncpg>=0.29` (postgres), `bcrypt>=4.0`, `PyJWT>=2.8`, `cryptography>=42.0` (enterprise)
|
|
67
|
+
|
|
68
|
+
**Testing**
|
|
69
|
+
- 270+ tests (up from 217): 20 DB tests, 33 auth/cost/router tests
|
|
70
|
+
- All existing v4 tests pass unchanged — zero regressions
|
|
71
|
+
|
|
72
|
+
### Changed
|
|
73
|
+
- Description updated: "LLM Gateway Platform — multi-provider proxy with smart routing, cost analytics, and web dashboard"
|
|
74
|
+
- `_log_request()` now calculates real costs and resolves provider_id from DB
|
|
75
|
+
- `/status` endpoint includes v5 features, auth, and router status
|
|
76
|
+
- Startup output shows all providers and v5 mode
|
|
77
|
+
|
|
78
|
+
### Backward Compatibility
|
|
79
|
+
- All v5 features **disabled by default** — v4 config works as-is with zero changes
|
|
80
|
+
- No breaking changes to existing API endpoints or behavior
|
|
81
|
+
|
|
8
82
|
## [4.0.0] - 2026-06-07
|
|
9
83
|
|
|
10
84
|
### Fixed
|
|
@@ -0,0 +1,483 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codex-proxy
|
|
3
|
+
Version: 5.0.0
|
|
4
|
+
Summary: LLM Gateway Platform — multi-provider proxy with smart routing, cost analytics, and web dashboard
|
|
5
|
+
Project-URL: Repository, https://github.com/ZiryaNoov/codex-proxy
|
|
6
|
+
Author-email: ZakPro <zakarinoo@gmail.com>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Keywords: chat-completions,codex,glm,openai,proxy,responses-api,z.ai
|
|
10
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Requires-Dist: aiosqlite>=0.20
|
|
17
|
+
Requires-Dist: fastapi>=0.115
|
|
18
|
+
Requires-Dist: httpx>=0.27
|
|
19
|
+
Requires-Dist: sqlalchemy>=2.0
|
|
20
|
+
Requires-Dist: tomli>=2.0; python_version < '3.11'
|
|
21
|
+
Requires-Dist: uvicorn>=0.30
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: aiosqlite>=0.20; extra == 'dev'
|
|
24
|
+
Requires-Dist: mypy>=1.13; extra == 'dev'
|
|
25
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
26
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
27
|
+
Requires-Dist: ruff>=0.8; extra == 'dev'
|
|
28
|
+
Provides-Extra: enterprise
|
|
29
|
+
Requires-Dist: asyncpg>=0.29; extra == 'enterprise'
|
|
30
|
+
Requires-Dist: bcrypt>=4.0; extra == 'enterprise'
|
|
31
|
+
Requires-Dist: cryptography>=42.0; extra == 'enterprise'
|
|
32
|
+
Requires-Dist: pyjwt>=2.8; extra == 'enterprise'
|
|
33
|
+
Provides-Extra: postgres
|
|
34
|
+
Requires-Dist: asyncpg>=0.29; extra == 'postgres'
|
|
35
|
+
Provides-Extra: tui
|
|
36
|
+
Requires-Dist: rich>=13.0; extra == 'tui'
|
|
37
|
+
Description-Content-Type: text/markdown
|
|
38
|
+
|
|
39
|
+
# codex-proxy
|
|
40
|
+
|
|
41
|
+
[](https://github.com/ZiryaNoov/codex-proxy/actions/workflows/ci.yml)
|
|
42
|
+
[](https://pypi.org/project/codex-proxy/)
|
|
43
|
+
[](https://pypi.org/project/codex-proxy/)
|
|
44
|
+
[](https://github.com/ZiryaNoov/codex-proxy/blob/main/LICENSE)
|
|
45
|
+
|
|
46
|
+
**Lightweight LLM Gateway Platform — multi-provider proxy with smart routing, JWT auth, cost analytics, and web dashboard.**
|
|
47
|
+
|
|
48
|
+
Use Codex CLI with **any** Chat Completions-compatible provider -- Z.AI, Groq,
|
|
49
|
+
Together AI, OpenRouter, Ollama, Fireworks, Anthropic, Gemini, DeepSeek, Mistral,
|
|
50
|
+
Cohere, NVIDIA NIM, and more.
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## Why codex-proxy?
|
|
55
|
+
|
|
56
|
+
| | codex-proxy | LiteLLM |
|
|
57
|
+
|---|---|---|
|
|
58
|
+
| **Install** | `pip install codex-proxy` | `pip install litellm[proxy]` |
|
|
59
|
+
| **Dependencies** | 6 (FastAPI, uvicorn, httpx, tomli, sqlalchemy, aiosqlite) | 50+ |
|
|
60
|
+
| **Config** | Single TOML file | YAML + env vars + DB |
|
|
61
|
+
| **Start time** | <1s | 3-5s |
|
|
62
|
+
| **Memory** | ~30MB | ~200MB+ |
|
|
63
|
+
| **Dashboard** | TUI + Web UI | Separate Admin UI |
|
|
64
|
+
| **Auth** | Built-in JWT | External |
|
|
65
|
+
| **Smart Routing** | 4 strategies | Basic |
|
|
66
|
+
| **Cost Tracking** | Per-model pricing + analytics | Via logging |
|
|
67
|
+
| **Circuit Breaker** | Per-key + global | Basic |
|
|
68
|
+
| **Plugins** | Built-in hook system | Callbacks only |
|
|
69
|
+
|
|
70
|
+
## Architecture
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
codex-proxy v5.0.0
|
|
74
|
+
┌────────────┐ ┌──────────────────────────────────┐ ┌──────────────────┐
|
|
75
|
+
│ │ │ │ │ │
|
|
76
|
+
│ Codex CLI │─────>│ FastAPI server │─────>│ LLM Provider │
|
|
77
|
+
│ Cursor │ │ localhost:4242 │ │ (CC endpoint) │
|
|
78
|
+
│ Any IDE │ │ │ │ │
|
|
79
|
+
│ │<─────│ Core: │<─────│ Z.AI / Groq / │
|
|
80
|
+
└────────────┘ │ . Translator │ │ Ollama / etc. │
|
|
81
|
+
│ . Response Store │ └──────────────────┘
|
|
82
|
+
Responses API │ . Circuit Breaker │
|
|
83
|
+
protocol │ . Key Rotator │ Chat Completions
|
|
84
|
+
│ . Compaction Engine │ protocol
|
|
85
|
+
│ . Plugin Registry │
|
|
86
|
+
│ . Rate Limiter │ ┌──────────────────┐
|
|
87
|
+
│ . Provider Adapters (12+) │ │ SQLite / PG DB │
|
|
88
|
+
│ │─────>│ │
|
|
89
|
+
│ v5 Gateway Features: │ │ . Users │
|
|
90
|
+
│ . Smart Router (4 strategies) │ │ . API Keys │
|
|
91
|
+
│ . JWT Auth (bcrypt + tokens) │ │ . Providers │
|
|
92
|
+
│ . Cost Tracking (25+ models) │ │ . Request Logs │
|
|
93
|
+
│ . Budget Enforcement │ │ . Budgets │
|
|
94
|
+
│ . Web Dashboard (/dashboard) │ │ . Analytics │
|
|
95
|
+
│ . Multi-Provider Routing │ └──────────────────┘
|
|
96
|
+
└──────────────────────────────────┘
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## Features
|
|
100
|
+
|
|
101
|
+
### Core
|
|
102
|
+
|
|
103
|
+
- **Protocol translation** -- Responses API to Chat Completions in real time
|
|
104
|
+
- **Streaming SSE** -- token-by-token delivery with full protocol mapping
|
|
105
|
+
- **WebSocket support** -- full Realtime API envelope handling
|
|
106
|
+
- **Reasoning passthrough** -- forwards thinking/reasoning tokens
|
|
107
|
+
- **Tool calls** -- full function calling support (definitions + results)
|
|
108
|
+
- **Multi-turn** -- `previous_response_id` via in-memory response store
|
|
109
|
+
- **Auto-retry** -- configurable retries on 5xx/transport errors
|
|
110
|
+
- **Rate limiting** -- per-client sliding window rate limiter
|
|
111
|
+
- **Admin auth** -- optional Bearer token on `/reload` and `/status` endpoints
|
|
112
|
+
- **CORS support** -- configurable allowed origins
|
|
113
|
+
- **Request size limits** -- configurable max body size (default 10MB)
|
|
114
|
+
|
|
115
|
+
### v5 Gateway Features
|
|
116
|
+
|
|
117
|
+
- **Multi-provider support** -- route to multiple providers via `[[providers]]` config
|
|
118
|
+
- **Smart routing** -- 4 strategies: `fallback`, `cost` (cheapest), `latency` (fastest), `weighted` (load balanced)
|
|
119
|
+
- **JWT authentication** -- login/signup/refresh tokens with bcrypt password hashing
|
|
120
|
+
- **Cost tracking** -- per-model pricing with automatic cost calculation on every request
|
|
121
|
+
- **Budget enforcement** -- set daily/monthly spend limits per user; requests blocked when exceeded
|
|
122
|
+
- **Web dashboard** -- dark-themed HTML dashboard at `/dashboard` with live stats, cost charts, provider cards
|
|
123
|
+
- **Database layer** -- async SQLAlchemy (SQLite or PostgreSQL) with 13 tables, migrations, and CRUD
|
|
124
|
+
- **25+ model prices** -- built-in pricing data for GLM, GPT, Claude, Gemini, DeepSeek, Llama, and more
|
|
125
|
+
|
|
126
|
+
### Reliability
|
|
127
|
+
|
|
128
|
+
- **Circuit breaker** -- global fail-fast when upstream is down (configurable threshold + recovery)
|
|
129
|
+
- **Multi-key rotation** -- round-robin across API keys with **per-key circuit breakers**; auth/rate-limit errors (401/403/429) trip individual keys
|
|
130
|
+
- **Context compaction** -- auto-trims long conversations to stay within model limits
|
|
131
|
+
|
|
132
|
+
### Observability
|
|
133
|
+
|
|
134
|
+
- **Live TUI dashboard** -- real-time metrics, circuit breaker state, key pool status, log tail, hotkeys
|
|
135
|
+
- **Web dashboard** -- browser-based dashboard with auto-refresh, cost breakdown, provider status, router metrics
|
|
136
|
+
- **Plugin system** -- hook-based middleware (`on_request`, `on_response`, `on_error`, `on_startup`, `on_shutdown`)
|
|
137
|
+
- **Config hot-reload** -- reload config without restart via TUI hotkey or `POST /reload`
|
|
138
|
+
|
|
139
|
+
### Ecosystem
|
|
140
|
+
|
|
141
|
+
- **12+ providers** -- Z.AI, Groq, Together, OpenRouter, Ollama, Fireworks, Anthropic, Gemini, DeepSeek, Mistral, Cohere, NVIDIA NIM
|
|
142
|
+
- **Provider adapters** -- per-provider header/request normalization
|
|
143
|
+
- **Docker-ready** -- Dockerfile and Compose file included
|
|
144
|
+
- **pip-installable** -- `pip install codex-proxy`, run with `codex-proxy` CLI
|
|
145
|
+
- **270+ tests** -- comprehensive test suite covering all modules
|
|
146
|
+
|
|
147
|
+
## Quick Start
|
|
148
|
+
|
|
149
|
+
### Install
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
pip install codex-proxy
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
With extras:
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
pip install "codex-proxy[tui]" # Terminal dashboard
|
|
159
|
+
pip install "codex-proxy[postgres]" # PostgreSQL backend
|
|
160
|
+
pip install "codex-proxy[enterprise]" # bcrypt + JWT + crypto + PG
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### Configure
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
codex-proxy --init
|
|
167
|
+
# Edit ~/.codex-proxy/config.toml with your provider details
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### Run
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
# Standard mode (v4 compatible)
|
|
174
|
+
codex-proxy
|
|
175
|
+
|
|
176
|
+
# With live TUI dashboard
|
|
177
|
+
codex-proxy --tui
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### Enable v5 Features
|
|
181
|
+
|
|
182
|
+
Add to your `~/.codex-proxy/config.toml`:
|
|
183
|
+
|
|
184
|
+
```toml
|
|
185
|
+
# Enable persistent database
|
|
186
|
+
[database]
|
|
187
|
+
enabled = true
|
|
188
|
+
# url = "" # empty = SQLite at ~/.codex-proxy/proxy.db
|
|
189
|
+
|
|
190
|
+
# Enable JWT authentication
|
|
191
|
+
[auth]
|
|
192
|
+
enabled = true
|
|
193
|
+
secret_key = "your-secret-key-here" # auto-generated if empty
|
|
194
|
+
admin_username = "admin"
|
|
195
|
+
admin_password = "changeme" # hashed on first startup
|
|
196
|
+
|
|
197
|
+
# Enable smart routing (use with [[providers]])
|
|
198
|
+
[router]
|
|
199
|
+
enabled = true
|
|
200
|
+
default_strategy = "fallback" # cost|latency|fallback|weighted
|
|
201
|
+
|
|
202
|
+
# Enable web dashboard
|
|
203
|
+
[dashboard]
|
|
204
|
+
enabled = true
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
### Multi-Provider Setup
|
|
208
|
+
|
|
209
|
+
```toml
|
|
210
|
+
[[providers]]
|
|
211
|
+
name = "zai"
|
|
212
|
+
display_name = "Z.AI"
|
|
213
|
+
base_url = "https://api.z.ai/api/paas/v4"
|
|
214
|
+
api_key_env = "OPENAI_API_KEY"
|
|
215
|
+
models = ["glm-5.1", "glm-5", "glm-4.7"]
|
|
216
|
+
default_model = "glm-5.1"
|
|
217
|
+
|
|
218
|
+
[[providers]]
|
|
219
|
+
name = "groq"
|
|
220
|
+
display_name = "Groq"
|
|
221
|
+
base_url = "https://api.groq.com/openai/v1"
|
|
222
|
+
api_key_env = "GROQ_API_KEY"
|
|
223
|
+
models = ["llama-4-maverick-17b"]
|
|
224
|
+
default_model = "llama-4-maverick-17b"
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
### Connect Codex CLI
|
|
228
|
+
|
|
229
|
+
```bash
|
|
230
|
+
export OPENAI_BASE_URL=http://127.0.0.1:4242
|
|
231
|
+
codex --model glm-5.1 "say hello"
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
## v5 API Endpoints
|
|
235
|
+
|
|
236
|
+
### Auth
|
|
237
|
+
|
|
238
|
+
| Endpoint | Method | Description |
|
|
239
|
+
|---|---|---|
|
|
240
|
+
| `/auth/login` | POST | Authenticate user, returns JWT tokens |
|
|
241
|
+
| `/auth/signup` | POST | Register new user (admin-only, or first user auto-admin) |
|
|
242
|
+
| `/auth/refresh` | POST | Refresh access token |
|
|
243
|
+
| `/auth/me` | GET | Get current user info |
|
|
244
|
+
| `/auth/budget` | GET | Get current user's budget status |
|
|
245
|
+
| `/auth/budget` | PUT | Set or update budget limits |
|
|
246
|
+
|
|
247
|
+
### Dashboard & Analytics
|
|
248
|
+
|
|
249
|
+
| Endpoint | Method | Description |
|
|
250
|
+
|---|---|---|
|
|
251
|
+
| `/dashboard` | GET | Web dashboard (HTML) |
|
|
252
|
+
| `/api/stats` | GET | Aggregated stats: requests, costs, per-model breakdown |
|
|
253
|
+
| `/api/usage` | GET | Cost/token usage (`?model=` filter, `?hours=` period) |
|
|
254
|
+
| `/api/providers` | GET | Provider status with routing info |
|
|
255
|
+
| `/api/router/status` | GET | Detailed smart router metrics |
|
|
256
|
+
|
|
257
|
+
### Core
|
|
258
|
+
|
|
259
|
+
| Endpoint | Method | Description |
|
|
260
|
+
|---|---|---|
|
|
261
|
+
| `/responses` | POST | Responses API (HTTP, streaming + non-streaming) |
|
|
262
|
+
| `/responses` | WS | Responses API (WebSocket) |
|
|
263
|
+
| `/responses/{id}` | GET | Retrieve a stored response |
|
|
264
|
+
| `/models` | GET | List all models across providers |
|
|
265
|
+
| `/v1/models` | GET | List models (v1 prefix alias) |
|
|
266
|
+
| `/health` | GET | Health check (`?check_backend=true` pings upstream) |
|
|
267
|
+
| `/status` | GET | Detailed server status |
|
|
268
|
+
| `/reload` | POST | Reload config from disk |
|
|
269
|
+
|
|
270
|
+
## Provider Examples
|
|
271
|
+
|
|
272
|
+
### Z.AI (GLM Models)
|
|
273
|
+
|
|
274
|
+
```toml
|
|
275
|
+
[provider]
|
|
276
|
+
name = "zai"
|
|
277
|
+
display_name = "Z.AI"
|
|
278
|
+
base_url = "https://api.z.ai/api/paas/v4"
|
|
279
|
+
api_key_env = "OPENAI_API_KEY"
|
|
280
|
+
models = ["glm-5.1", "glm-5", "glm-4.7"]
|
|
281
|
+
default_model = "glm-5.1"
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
### Groq
|
|
285
|
+
|
|
286
|
+
```toml
|
|
287
|
+
[provider]
|
|
288
|
+
name = "groq"
|
|
289
|
+
display_name = "Groq"
|
|
290
|
+
base_url = "https://api.groq.com/openai/v1"
|
|
291
|
+
api_key_env = "GROQ_API_KEY"
|
|
292
|
+
models = ["llama-4-maverick-17b", "mixtral-8x7b-32768"]
|
|
293
|
+
default_model = "llama-4-maverick-17b"
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
### Ollama (Local)
|
|
297
|
+
|
|
298
|
+
```toml
|
|
299
|
+
[provider]
|
|
300
|
+
name = "ollama"
|
|
301
|
+
display_name = "Ollama (local)"
|
|
302
|
+
base_url = "http://localhost:11434/v1"
|
|
303
|
+
api_key = "ollama"
|
|
304
|
+
models = ["qwen3:32b", "codellama:34b"]
|
|
305
|
+
default_model = "qwen3:32b"
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
### Anthropic
|
|
309
|
+
|
|
310
|
+
```toml
|
|
311
|
+
[provider]
|
|
312
|
+
name = "anthropic"
|
|
313
|
+
display_name = "Anthropic"
|
|
314
|
+
base_url = "https://api.anthropic.com/v1"
|
|
315
|
+
api_key_env = "ANTHROPIC_API_KEY"
|
|
316
|
+
models = ["claude-sonnet-4-20250514"]
|
|
317
|
+
default_model = "claude-sonnet-4-20250514"
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
### Google Gemini
|
|
321
|
+
|
|
322
|
+
```toml
|
|
323
|
+
[provider]
|
|
324
|
+
name = "gemini"
|
|
325
|
+
display_name = "Google Gemini"
|
|
326
|
+
base_url = "https://generativelanguage.googleapis.com/v1beta/openai"
|
|
327
|
+
api_key_env = "GEMINI_API_KEY"
|
|
328
|
+
models = ["gemini-2.5-flash"]
|
|
329
|
+
default_model = "gemini-2.5-flash"
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
### DeepSeek
|
|
333
|
+
|
|
334
|
+
```toml
|
|
335
|
+
[provider]
|
|
336
|
+
name = "deepseek"
|
|
337
|
+
display_name = "DeepSeek"
|
|
338
|
+
base_url = "https://api.deepseek.com/v1"
|
|
339
|
+
api_key_env = "DEEPSEEK_API_KEY"
|
|
340
|
+
models = ["deepseek-chat", "deepseek-reasoner"]
|
|
341
|
+
default_model = "deepseek-chat"
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
## Multi-Key Rotation
|
|
345
|
+
|
|
346
|
+
```toml
|
|
347
|
+
[provider]
|
|
348
|
+
name = "zai"
|
|
349
|
+
base_url = "https://api.z.ai/api/paas/v4"
|
|
350
|
+
api_keys = ["sk-key1", "sk-key2", "sk-key3"]
|
|
351
|
+
models = ["glm-5.1"]
|
|
352
|
+
default_model = "glm-5.1"
|
|
353
|
+
```
|
|
354
|
+
|
|
355
|
+
Each key gets its own circuit breaker. Auth errors (401/403/429) trip the
|
|
356
|
+
individual key; server errors (5xx) are handled by the global circuit breaker.
|
|
357
|
+
|
|
358
|
+
## Plugin System
|
|
359
|
+
|
|
360
|
+
```toml
|
|
361
|
+
[plugins]
|
|
362
|
+
enabled = true
|
|
363
|
+
plugins = [
|
|
364
|
+
"codex_proxy.plugins_builtin.LoggingPlugin",
|
|
365
|
+
]
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
```python
|
|
369
|
+
from codex_proxy.plugins import Plugin, PluginContext
|
|
370
|
+
|
|
371
|
+
class MyPlugin(Plugin):
|
|
372
|
+
async def on_request(self, ctx: PluginContext) -> None:
|
|
373
|
+
pass
|
|
374
|
+
|
|
375
|
+
async def on_response(self, ctx: PluginContext) -> None:
|
|
376
|
+
pass
|
|
377
|
+
|
|
378
|
+
async def on_error(self, ctx: PluginContext) -> None:
|
|
379
|
+
pass
|
|
380
|
+
```
|
|
381
|
+
|
|
382
|
+
## Configuration Reference
|
|
383
|
+
|
|
384
|
+
Config file: `~/.codex-proxy/config.toml`
|
|
385
|
+
|
|
386
|
+
### `[server]`
|
|
387
|
+
|
|
388
|
+
| Field | Type | Default | Description |
|
|
389
|
+
|---|---|---|---|
|
|
390
|
+
| `host` | string | `"127.0.0.1"` | Bind address |
|
|
391
|
+
| `port` | int | `4242` | Bind port |
|
|
392
|
+
| `log_level` | string | `"warning"` | Log verbosity |
|
|
393
|
+
| `max_retries` | int | `1` | Retries on 5xx/transport errors |
|
|
394
|
+
| `connect_timeout` | float | `10.0` | Seconds to connect to upstream |
|
|
395
|
+
| `read_timeout` | float | `180.0` | Seconds to wait for upstream response |
|
|
396
|
+
| `admin_token` | string | `""` | Bearer token for admin endpoints |
|
|
397
|
+
| `max_request_body_bytes` | int | `10485760` | Max request body size (10MB) |
|
|
398
|
+
| `cors_origins` | list | `[]` | Allowed CORS origins |
|
|
399
|
+
|
|
400
|
+
### `[database]` (v5)
|
|
401
|
+
|
|
402
|
+
| Field | Type | Default | Description |
|
|
403
|
+
|---|---|---|---|
|
|
404
|
+
| `enabled` | bool | `false` | Enable persistent storage |
|
|
405
|
+
| `url` | string | `""` | DB URL (empty = SQLite at `~/.codex-proxy/proxy.db`) |
|
|
406
|
+
| `echo` | bool | `false` | SQL debug logging |
|
|
407
|
+
|
|
408
|
+
### `[auth]` (v5)
|
|
409
|
+
|
|
410
|
+
| Field | Type | Default | Description |
|
|
411
|
+
|---|---|---|---|
|
|
412
|
+
| `enabled` | bool | `false` | Enable JWT authentication |
|
|
413
|
+
| `secret_key` | string | `""` | JWT signing key (auto-generated if empty) |
|
|
414
|
+
| `access_token_expire_minutes` | int | `15` | Access token lifetime |
|
|
415
|
+
| `refresh_token_expire_days` | int | `7` | Refresh token lifetime |
|
|
416
|
+
| `admin_username` | string | `"admin"` | Admin username (seeded on first startup) |
|
|
417
|
+
| `admin_password` | string | `""` | Admin password (default: `changeme`) |
|
|
418
|
+
|
|
419
|
+
### `[router]` (v5)
|
|
420
|
+
|
|
421
|
+
| Field | Type | Default | Description |
|
|
422
|
+
|---|---|---|---|
|
|
423
|
+
| `enabled` | bool | `false` | Enable smart routing |
|
|
424
|
+
| `default_strategy` | string | `"fallback"` | Strategy: `cost`, `latency`, `fallback`, `weighted` |
|
|
425
|
+
|
|
426
|
+
### `[dashboard]` (v5)
|
|
427
|
+
|
|
428
|
+
| Field | Type | Default | Description |
|
|
429
|
+
|---|---|---|---|
|
|
430
|
+
| `enabled` | bool | `false` | Serve web dashboard at `/dashboard` |
|
|
431
|
+
| `open_browser` | bool | `false` | Auto-open browser on startup |
|
|
432
|
+
|
|
433
|
+
### `[provider]`
|
|
434
|
+
|
|
435
|
+
| Field | Type | Default | Description |
|
|
436
|
+
|---|---|---|---|
|
|
437
|
+
| `name` | string | `"zai"` | Provider identifier |
|
|
438
|
+
| `display_name` | string | `"Z.AI"` | Human-readable name |
|
|
439
|
+
| `base_url` | string | Provider endpoint | Chat Completions URL |
|
|
440
|
+
| `api_key` | string | `""` | API key (inline) |
|
|
441
|
+
| `api_key_env` | string | `""` | Env var for API key |
|
|
442
|
+
| `api_keys` | list | `[]` | Multiple keys for rotation |
|
|
443
|
+
| `models` | list | `["glm-5.1", ...]` | Available model IDs |
|
|
444
|
+
| `default_model` | string | `"glm-5.1"` | Default model |
|
|
445
|
+
|
|
446
|
+
### `[circuit_breaker]`
|
|
447
|
+
|
|
448
|
+
| Field | Type | Default | Description |
|
|
449
|
+
|---|---|---|---|
|
|
450
|
+
| `enabled` | bool | `true` | Enable/disable |
|
|
451
|
+
| `failure_threshold` | int | `5` | Failures before opening |
|
|
452
|
+
| `recovery_timeout` | float | `30.0` | Seconds before half-open |
|
|
453
|
+
|
|
454
|
+
### `[compaction]`
|
|
455
|
+
|
|
456
|
+
| Field | Type | Default | Description |
|
|
457
|
+
|---|---|---|---|
|
|
458
|
+
| `enabled` | bool | `true` | Enable/disable |
|
|
459
|
+
| `max_messages` | int | `50` | Threshold to trigger |
|
|
460
|
+
| `keep_last` | int | `20` | Recent messages to keep |
|
|
461
|
+
|
|
462
|
+
## Docker
|
|
463
|
+
|
|
464
|
+
```bash
|
|
465
|
+
docker build -t codex-proxy .
|
|
466
|
+
docker run -d -p 4242:4242 \
|
|
467
|
+
-e CODEX_PROXY_API_KEY=your-key \
|
|
468
|
+
-e CODEX_PROXY_BASE_URL=https://api.z.ai/api/paas/v4 \
|
|
469
|
+
codex-proxy
|
|
470
|
+
```
|
|
471
|
+
|
|
472
|
+
## Development
|
|
473
|
+
|
|
474
|
+
```bash
|
|
475
|
+
git clone https://github.com/ZiryaNoov/codex-proxy.git
|
|
476
|
+
cd codex-proxy
|
|
477
|
+
pip install -e ".[dev,tui]"
|
|
478
|
+
pytest tests/ -v # 270+ tests
|
|
479
|
+
```
|
|
480
|
+
|
|
481
|
+
## License
|
|
482
|
+
|
|
483
|
+
[MIT](https://github.com/ZiryaNoov/codex-proxy/blob/main/LICENSE) -- ZakPro
|