codex-proxy 4.0.0__tar.gz → 5.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/CHANGELOG.md +74 -0
  2. codex_proxy-5.0.0/PKG-INFO +483 -0
  3. codex_proxy-5.0.0/README.md +445 -0
  4. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/pyproject.toml +8 -2
  5. codex_proxy-5.0.0/src/codex_proxy/__init__.py +3 -0
  6. codex_proxy-5.0.0/src/codex_proxy/auth.py +252 -0
  7. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/config.py +174 -17
  8. codex_proxy-5.0.0/src/codex_proxy/cost.py +146 -0
  9. codex_proxy-5.0.0/src/codex_proxy/dashboard.py +282 -0
  10. codex_proxy-5.0.0/src/codex_proxy/db/__init__.py +73 -0
  11. codex_proxy-5.0.0/src/codex_proxy/db/crud_analytics.py +127 -0
  12. codex_proxy-5.0.0/src/codex_proxy/db/crud_budgets.py +140 -0
  13. codex_proxy-5.0.0/src/codex_proxy/db/crud_keys.py +109 -0
  14. codex_proxy-5.0.0/src/codex_proxy/db/crud_logs.py +80 -0
  15. codex_proxy-5.0.0/src/codex_proxy/db/crud_plugins.py +119 -0
  16. codex_proxy-5.0.0/src/codex_proxy/db/crud_providers.py +206 -0
  17. codex_proxy-5.0.0/src/codex_proxy/db/crud_users.py +76 -0
  18. codex_proxy-5.0.0/src/codex_proxy/db/engine.py +48 -0
  19. codex_proxy-5.0.0/src/codex_proxy/db/migrations.py +51 -0
  20. codex_proxy-5.0.0/src/codex_proxy/db/models.py +210 -0
  21. codex_proxy-5.0.0/src/codex_proxy/router.py +199 -0
  22. codex_proxy-5.0.0/src/codex_proxy/server.py +1476 -0
  23. codex_proxy-5.0.0/tests/test_auth_cost_router.py +387 -0
  24. codex_proxy-5.0.0/tests/test_db.py +332 -0
  25. codex_proxy-4.0.0/PKG-INFO +0 -567
  26. codex_proxy-4.0.0/README.md +0 -540
  27. codex_proxy-4.0.0/src/codex_proxy/__init__.py +0 -3
  28. codex_proxy-4.0.0/src/codex_proxy/server.py +0 -707
  29. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/.dockerignore +0 -0
  30. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  31. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  32. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  33. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/.github/workflows/ci.yml +0 -0
  34. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/.github/workflows/release.yml +0 -0
  35. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/.gitignore +0 -0
  36. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/.pre-commit-config.yaml +0 -0
  37. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/CODE_OF_CONDUCT.md +0 -0
  38. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/CONTRIBUTING.md +0 -0
  39. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/Dockerfile +0 -0
  40. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/LICENSE +0 -0
  41. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/assets/tui-dashboard.png +0 -0
  42. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/docker-compose.yml +0 -0
  43. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/__main__.py +0 -0
  44. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/circuit_breaker.py +0 -0
  45. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/compaction.py +0 -0
  46. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/key_rotation.py +0 -0
  47. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/plugins.py +0 -0
  48. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/plugins_builtin.py +0 -0
  49. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/providers.py +0 -0
  50. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/rate_limiter.py +0 -0
  51. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/store.py +0 -0
  52. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/translator.py +0 -0
  53. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/src/codex_proxy/tui.py +0 -0
  54. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/conftest.py +0 -0
  55. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_circuit_breaker.py +0 -0
  56. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_compaction.py +0 -0
  57. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_config.py +0 -0
  58. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_edge_cases.py +0 -0
  59. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_key_rotation.py +0 -0
  60. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_plugins.py +0 -0
  61. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_providers.py +0 -0
  62. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_rate_limiter.py +0 -0
  63. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_server.py +0 -0
  64. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_server_features.py +0 -0
  65. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_store.py +0 -0
  66. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_translator.py +0 -0
  67. {codex_proxy-4.0.0 → codex_proxy-5.0.0}/tests/test_tui.py +0 -0
@@ -5,6 +5,80 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [5.0.0] - 2026-06-07
9
+
10
+ ### Added — v5 Gateway Platform
11
+
12
+ **Database Layer**
13
+ - Async SQLAlchemy database with 13 tables (users, api_keys, providers, provider_keys, models, routing_rules, request_logs, budgets, cost_alerts, plugin_registry, plugin_instances, sessions, _schema_version)
14
+ - SQLite (default) or PostgreSQL (via asyncpg) backend
15
+ - Version-based migrations system
16
+ - 7 CRUD modules for all entities
17
+
18
+ **Multi-Provider Support**
19
+ - `[[providers]]` TOML array — configure multiple providers in one instance
20
+ - Per-provider clients, adapters, key rotators
21
+ - Model-to-provider resolution across all providers
22
+ - `/models` endpoint aggregates models from all providers
23
+
24
+ **JWT Authentication**
25
+ - POST `/auth/login`, `/auth/signup`, `/auth/refresh`, GET `/auth/me`
26
+ - Password hashing with bcrypt (SHA-256 fallback)
27
+ - JWT access + refresh tokens (PyJWT with stdlib HMAC-SHA256 fallback)
28
+ - Admin user auto-seeded on first startup
29
+ - Budget endpoints: GET/PUT `/auth/budget`
30
+
31
+ **Smart Router**
32
+ - 4 routing strategies: `fallback` (config order), `cost` (cheapest), `latency` (fastest), `weighted` (load balanced)
33
+ - Rolling latency tracker with per-provider health detection
34
+ - Unhealthy providers automatically skipped
35
+ - GET `/api/router/status` for detailed metrics
36
+
37
+ **Cost Tracking**
38
+ - Per-model pricing: input_price_per_million, output_price_per_million
39
+ - 25+ built-in model prices (GLM, GPT, Claude, Gemini, DeepSeek, Llama, etc.)
40
+ - Automatic cost calculation on every request (DB lookup → KNOWN_PRICING fallback → $0)
41
+ - Prices auto-seeded to DB on startup (idempotent)
42
+ - Cost aggregation analytics: GET `/api/stats`, `/api/usage`
43
+
44
+ **Budget Enforcement**
45
+ - Daily and monthly spend limits per user
46
+ - Requests blocked with 429 when budget exceeded
47
+ - Alert threshold notifications
48
+ - Budget status and alerts via API
49
+
50
+ **Web Dashboard**
51
+ - Embedded HTML+CSS+JS dashboard at GET `/dashboard`
52
+ - Dark theme, auto-refresh every 10 seconds
53
+ - Stats cards (requests, success rate, uptime, total cost)
54
+ - Cost breakdown table with visual bars
55
+ - Provider cards with model tags
56
+ - Router status with latency and health info
57
+
58
+ **v5 Config Sections** (all disabled by default for v4 compat)
59
+ - `[database]` — persistent storage settings
60
+ - `[auth]` — JWT authentication settings
61
+ - `[router]` — smart routing settings
62
+ - `[dashboard]` — web dashboard settings
63
+
64
+ **New Dependencies**
65
+ - `sqlalchemy>=2.0`, `aiosqlite>=0.20` (core)
66
+ - Optional: `asyncpg>=0.29` (postgres), `bcrypt>=4.0`, `PyJWT>=2.8`, `cryptography>=42.0` (enterprise)
67
+
68
+ **Testing**
69
+ - 270+ tests (up from 217): 20 DB tests, 33 auth/cost/router tests
70
+ - All existing v4 tests pass unchanged — zero regressions
71
+
72
+ ### Changed
73
+ - Description updated: "LLM Gateway Platform — multi-provider proxy with smart routing, cost analytics, and web dashboard"
74
+ - `_log_request()` now calculates real costs and resolves provider_id from DB
75
+ - `/status` endpoint includes v5 features, auth, and router status
76
+ - Startup output shows all providers and v5 mode
77
+
78
+ ### Backward Compatibility
79
+ - All v5 features **disabled by default** — v4 config works as-is with zero changes
80
+ - No breaking changes to existing API endpoints or behavior
81
+
8
82
  ## [4.0.0] - 2026-06-07
9
83
 
10
84
  ### Fixed
@@ -0,0 +1,483 @@
1
+ Metadata-Version: 2.4
2
+ Name: codex-proxy
3
+ Version: 5.0.0
4
+ Summary: LLM Gateway Platform — multi-provider proxy with smart routing, cost analytics, and web dashboard
5
+ Project-URL: Repository, https://github.com/ZiryaNoov/codex-proxy
6
+ Author-email: ZakPro <zakarinoo@gmail.com>
7
+ License-Expression: MIT
8
+ License-File: LICENSE
9
+ Keywords: chat-completions,codex,glm,openai,proxy,responses-api,z.ai
10
+ Classifier: Development Status :: 5 - Production/Stable
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Topic :: Software Development :: Libraries
15
+ Requires-Python: >=3.10
16
+ Requires-Dist: aiosqlite>=0.20
17
+ Requires-Dist: fastapi>=0.115
18
+ Requires-Dist: httpx>=0.27
19
+ Requires-Dist: sqlalchemy>=2.0
20
+ Requires-Dist: tomli>=2.0; python_version < '3.11'
21
+ Requires-Dist: uvicorn>=0.30
22
+ Provides-Extra: dev
23
+ Requires-Dist: aiosqlite>=0.20; extra == 'dev'
24
+ Requires-Dist: mypy>=1.13; extra == 'dev'
25
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
26
+ Requires-Dist: pytest>=8.0; extra == 'dev'
27
+ Requires-Dist: ruff>=0.8; extra == 'dev'
28
+ Provides-Extra: enterprise
29
+ Requires-Dist: asyncpg>=0.29; extra == 'enterprise'
30
+ Requires-Dist: bcrypt>=4.0; extra == 'enterprise'
31
+ Requires-Dist: cryptography>=42.0; extra == 'enterprise'
32
+ Requires-Dist: pyjwt>=2.8; extra == 'enterprise'
33
+ Provides-Extra: postgres
34
+ Requires-Dist: asyncpg>=0.29; extra == 'postgres'
35
+ Provides-Extra: tui
36
+ Requires-Dist: rich>=13.0; extra == 'tui'
37
+ Description-Content-Type: text/markdown
38
+
39
+ # codex-proxy
40
+
41
+ [![CI](https://github.com/ZiryaNoov/codex-proxy/actions/workflows/ci.yml/badge.svg)](https://github.com/ZiryaNoov/codex-proxy/actions/workflows/ci.yml)
42
+ [![PyPI version](https://img.shields.io/pypi/v/codex-proxy.svg)](https://pypi.org/project/codex-proxy/)
43
+ [![Python version](https://img.shields.io/pypi/pyversions/codex-proxy.svg)](https://pypi.org/project/codex-proxy/)
44
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/ZiryaNoov/codex-proxy/blob/main/LICENSE)
45
+
46
+ **Lightweight LLM Gateway Platform — multi-provider proxy with smart routing, JWT auth, cost analytics, and web dashboard.**
47
+
48
+ Use Codex CLI with **any** Chat Completions-compatible provider -- Z.AI, Groq,
49
+ Together AI, OpenRouter, Ollama, Fireworks, Anthropic, Gemini, DeepSeek, Mistral,
50
+ Cohere, NVIDIA NIM, and more.
51
+
52
+ ---
53
+
54
+ ## Why codex-proxy?
55
+
56
+ | | codex-proxy | LiteLLM |
57
+ |---|---|---|
58
+ | **Install** | `pip install codex-proxy` | `pip install litellm[proxy]` |
59
+ | **Dependencies** | 6 (FastAPI, uvicorn, httpx, tomli, sqlalchemy, aiosqlite) | 50+ |
60
+ | **Config** | Single TOML file | YAML + env vars + DB |
61
+ | **Start time** | <1s | 3-5s |
62
+ | **Memory** | ~30MB | ~200MB+ |
63
+ | **Dashboard** | TUI + Web UI | Separate Admin UI |
64
+ | **Auth** | Built-in JWT | External |
65
+ | **Smart Routing** | 4 strategies | Basic |
66
+ | **Cost Tracking** | Per-model pricing + analytics | Via logging |
67
+ | **Circuit Breaker** | Per-key + global | Basic |
68
+ | **Plugins** | Built-in hook system | Callbacks only |
69
+
70
+ ## Architecture
71
+
72
+ ```
73
+ codex-proxy v5.0.0
74
+ ┌────────────┐ ┌──────────────────────────────────┐ ┌──────────────────┐
75
+ │ │ │ │ │ │
76
+ │ Codex CLI │─────>│ FastAPI server │─────>│ LLM Provider │
77
+ │ Cursor │ │ localhost:4242 │ │ (CC endpoint) │
78
+ │ Any IDE │ │ │ │ │
79
+ │ │<─────│ Core: │<─────│ Z.AI / Groq / │
80
+ └────────────┘ │ . Translator │ │ Ollama / etc. │
81
+ │ . Response Store │ └──────────────────┘
82
+ Responses API │ . Circuit Breaker │
83
+ protocol │ . Key Rotator │ Chat Completions
84
+ │ . Compaction Engine │ protocol
85
+ │ . Plugin Registry │
86
+ │ . Rate Limiter │ ┌──────────────────┐
87
+ │ . Provider Adapters (12+) │ │ SQLite / PG DB │
88
+ │ │─────>│ │
89
+ │ v5 Gateway Features: │ │ . Users │
90
+ │ . Smart Router (4 strategies) │ │ . API Keys │
91
+ │ . JWT Auth (bcrypt + tokens) │ │ . Providers │
92
+ │ . Cost Tracking (25+ models) │ │ . Request Logs │
93
+ │ . Budget Enforcement │ │ . Budgets │
94
+ │ . Web Dashboard (/dashboard) │ │ . Analytics │
95
+ │ . Multi-Provider Routing │ └──────────────────┘
96
+ └──────────────────────────────────┘
97
+ ```
98
+
99
+ ## Features
100
+
101
+ ### Core
102
+
103
+ - **Protocol translation** -- Responses API to Chat Completions in real time
104
+ - **Streaming SSE** -- token-by-token delivery with full protocol mapping
105
+ - **WebSocket support** -- full Realtime API envelope handling
106
+ - **Reasoning passthrough** -- forwards thinking/reasoning tokens
107
+ - **Tool calls** -- full function calling support (definitions + results)
108
+ - **Multi-turn** -- `previous_response_id` via in-memory response store
109
+ - **Auto-retry** -- configurable retries on 5xx/transport errors
110
+ - **Rate limiting** -- per-client sliding window rate limiter
111
+ - **Admin auth** -- optional Bearer token on `/reload` and `/status` endpoints
112
+ - **CORS support** -- configurable allowed origins
113
+ - **Request size limits** -- configurable max body size (default 10MB)
114
+
115
+ ### v5 Gateway Features
116
+
117
+ - **Multi-provider support** -- route to multiple providers via `[[providers]]` config
118
+ - **Smart routing** -- 4 strategies: `fallback`, `cost` (cheapest), `latency` (fastest), `weighted` (load balanced)
119
+ - **JWT authentication** -- login/signup/refresh tokens with bcrypt password hashing
120
+ - **Cost tracking** -- per-model pricing with automatic cost calculation on every request
121
+ - **Budget enforcement** -- set daily/monthly spend limits per user; requests blocked when exceeded
122
+ - **Web dashboard** -- dark-themed HTML dashboard at `/dashboard` with live stats, cost charts, provider cards
123
+ - **Database layer** -- async SQLAlchemy (SQLite or PostgreSQL) with 13 tables, migrations, and CRUD
124
+ - **25+ model prices** -- built-in pricing data for GLM, GPT, Claude, Gemini, DeepSeek, Llama, and more
125
+
126
+ ### Reliability
127
+
128
+ - **Circuit breaker** -- global fail-fast when upstream is down (configurable threshold + recovery)
129
+ - **Multi-key rotation** -- round-robin across API keys with **per-key circuit breakers**; auth/rate-limit errors (401/403/429) trip individual keys
130
+ - **Context compaction** -- auto-trims long conversations to stay within model limits
131
+
132
+ ### Observability
133
+
134
+ - **Live TUI dashboard** -- real-time metrics, circuit breaker state, key pool status, log tail, hotkeys
135
+ - **Web dashboard** -- browser-based dashboard with auto-refresh, cost breakdown, provider status, router metrics
136
+ - **Plugin system** -- hook-based middleware (`on_request`, `on_response`, `on_error`, `on_startup`, `on_shutdown`)
137
+ - **Config hot-reload** -- reload config without restart via TUI hotkey or `POST /reload`
138
+
139
+ ### Ecosystem
140
+
141
+ - **12+ providers** -- Z.AI, Groq, Together, OpenRouter, Ollama, Fireworks, Anthropic, Gemini, DeepSeek, Mistral, Cohere, NVIDIA NIM
142
+ - **Provider adapters** -- per-provider header/request normalization
143
+ - **Docker-ready** -- Dockerfile and Compose file included
144
+ - **pip-installable** -- `pip install codex-proxy`, run with `codex-proxy` CLI
145
+ - **270+ tests** -- comprehensive test suite covering all modules
146
+
147
+ ## Quick Start
148
+
149
+ ### Install
150
+
151
+ ```bash
152
+ pip install codex-proxy
153
+ ```
154
+
155
+ With extras:
156
+
157
+ ```bash
158
+ pip install "codex-proxy[tui]" # Terminal dashboard
159
+ pip install "codex-proxy[postgres]" # PostgreSQL backend
160
+ pip install "codex-proxy[enterprise]" # bcrypt + JWT + crypto + PG
161
+ ```
162
+
163
+ ### Configure
164
+
165
+ ```bash
166
+ codex-proxy --init
167
+ # Edit ~/.codex-proxy/config.toml with your provider details
168
+ ```
169
+
170
+ ### Run
171
+
172
+ ```bash
173
+ # Standard mode (v4 compatible)
174
+ codex-proxy
175
+
176
+ # With live TUI dashboard
177
+ codex-proxy --tui
178
+ ```
179
+
180
+ ### Enable v5 Features
181
+
182
+ Add to your `~/.codex-proxy/config.toml`:
183
+
184
+ ```toml
185
+ # Enable persistent database
186
+ [database]
187
+ enabled = true
188
+ # url = "" # empty = SQLite at ~/.codex-proxy/proxy.db
189
+
190
+ # Enable JWT authentication
191
+ [auth]
192
+ enabled = true
193
+ secret_key = "your-secret-key-here" # auto-generated if empty
194
+ admin_username = "admin"
195
+ admin_password = "changeme" # hashed on first startup
196
+
197
+ # Enable smart routing (use with [[providers]])
198
+ [router]
199
+ enabled = true
200
+ default_strategy = "fallback" # cost|latency|fallback|weighted
201
+
202
+ # Enable web dashboard
203
+ [dashboard]
204
+ enabled = true
205
+ ```
206
+
207
+ ### Multi-Provider Setup
208
+
209
+ ```toml
210
+ [[providers]]
211
+ name = "zai"
212
+ display_name = "Z.AI"
213
+ base_url = "https://api.z.ai/api/paas/v4"
214
+ api_key_env = "OPENAI_API_KEY"
215
+ models = ["glm-5.1", "glm-5", "glm-4.7"]
216
+ default_model = "glm-5.1"
217
+
218
+ [[providers]]
219
+ name = "groq"
220
+ display_name = "Groq"
221
+ base_url = "https://api.groq.com/openai/v1"
222
+ api_key_env = "GROQ_API_KEY"
223
+ models = ["llama-4-maverick-17b"]
224
+ default_model = "llama-4-maverick-17b"
225
+ ```
226
+
227
+ ### Connect Codex CLI
228
+
229
+ ```bash
230
+ export OPENAI_BASE_URL=http://127.0.0.1:4242
231
+ codex --model glm-5.1 "say hello"
232
+ ```
233
+
234
+ ## v5 API Endpoints
235
+
236
+ ### Auth
237
+
238
+ | Endpoint | Method | Description |
239
+ |---|---|---|
240
+ | `/auth/login` | POST | Authenticate user, returns JWT tokens |
241
+ | `/auth/signup` | POST | Register new user (admin-only, or first user auto-admin) |
242
+ | `/auth/refresh` | POST | Refresh access token |
243
+ | `/auth/me` | GET | Get current user info |
244
+ | `/auth/budget` | GET | Get current user's budget status |
245
+ | `/auth/budget` | PUT | Set or update budget limits |
246
+
247
+ ### Dashboard & Analytics
248
+
249
+ | Endpoint | Method | Description |
250
+ |---|---|---|
251
+ | `/dashboard` | GET | Web dashboard (HTML) |
252
+ | `/api/stats` | GET | Aggregated stats: requests, costs, per-model breakdown |
253
+ | `/api/usage` | GET | Cost/token usage (`?model=` filter, `?hours=` period) |
254
+ | `/api/providers` | GET | Provider status with routing info |
255
+ | `/api/router/status` | GET | Detailed smart router metrics |
256
+
257
+ ### Core
258
+
259
+ | Endpoint | Method | Description |
260
+ |---|---|---|
261
+ | `/responses` | POST | Responses API (HTTP, streaming + non-streaming) |
262
+ | `/responses` | WS | Responses API (WebSocket) |
263
+ | `/responses/{id}` | GET | Retrieve a stored response |
264
+ | `/models` | GET | List all models across providers |
265
+ | `/v1/models` | GET | List models (v1 prefix alias) |
266
+ | `/health` | GET | Health check (`?check_backend=true` pings upstream) |
267
+ | `/status` | GET | Detailed server status |
268
+ | `/reload` | POST | Reload config from disk |
269
+
270
+ ## Provider Examples
271
+
272
+ ### Z.AI (GLM Models)
273
+
274
+ ```toml
275
+ [provider]
276
+ name = "zai"
277
+ display_name = "Z.AI"
278
+ base_url = "https://api.z.ai/api/paas/v4"
279
+ api_key_env = "OPENAI_API_KEY"
280
+ models = ["glm-5.1", "glm-5", "glm-4.7"]
281
+ default_model = "glm-5.1"
282
+ ```
283
+
284
+ ### Groq
285
+
286
+ ```toml
287
+ [provider]
288
+ name = "groq"
289
+ display_name = "Groq"
290
+ base_url = "https://api.groq.com/openai/v1"
291
+ api_key_env = "GROQ_API_KEY"
292
+ models = ["llama-4-maverick-17b", "mixtral-8x7b-32768"]
293
+ default_model = "llama-4-maverick-17b"
294
+ ```
295
+
296
+ ### Ollama (Local)
297
+
298
+ ```toml
299
+ [provider]
300
+ name = "ollama"
301
+ display_name = "Ollama (local)"
302
+ base_url = "http://localhost:11434/v1"
303
+ api_key = "ollama"
304
+ models = ["qwen3:32b", "codellama:34b"]
305
+ default_model = "qwen3:32b"
306
+ ```
307
+
308
+ ### Anthropic
309
+
310
+ ```toml
311
+ [provider]
312
+ name = "anthropic"
313
+ display_name = "Anthropic"
314
+ base_url = "https://api.anthropic.com/v1"
315
+ api_key_env = "ANTHROPIC_API_KEY"
316
+ models = ["claude-sonnet-4-20250514"]
317
+ default_model = "claude-sonnet-4-20250514"
318
+ ```
319
+
320
+ ### Google Gemini
321
+
322
+ ```toml
323
+ [provider]
324
+ name = "gemini"
325
+ display_name = "Google Gemini"
326
+ base_url = "https://generativelanguage.googleapis.com/v1beta/openai"
327
+ api_key_env = "GEMINI_API_KEY"
328
+ models = ["gemini-2.5-flash"]
329
+ default_model = "gemini-2.5-flash"
330
+ ```
331
+
332
+ ### DeepSeek
333
+
334
+ ```toml
335
+ [provider]
336
+ name = "deepseek"
337
+ display_name = "DeepSeek"
338
+ base_url = "https://api.deepseek.com/v1"
339
+ api_key_env = "DEEPSEEK_API_KEY"
340
+ models = ["deepseek-chat", "deepseek-reasoner"]
341
+ default_model = "deepseek-chat"
342
+ ```
343
+
344
+ ## Multi-Key Rotation
345
+
346
+ ```toml
347
+ [provider]
348
+ name = "zai"
349
+ base_url = "https://api.z.ai/api/paas/v4"
350
+ api_keys = ["sk-key1", "sk-key2", "sk-key3"]
351
+ models = ["glm-5.1"]
352
+ default_model = "glm-5.1"
353
+ ```
354
+
355
+ Each key gets its own circuit breaker. Auth errors (401/403/429) trip the
356
+ individual key; server errors (5xx) are handled by the global circuit breaker.
357
+
358
+ ## Plugin System
359
+
360
+ ```toml
361
+ [plugins]
362
+ enabled = true
363
+ plugins = [
364
+ "codex_proxy.plugins_builtin.LoggingPlugin",
365
+ ]
366
+ ```
367
+
368
+ ```python
369
+ from codex_proxy.plugins import Plugin, PluginContext
370
+
371
+ class MyPlugin(Plugin):
372
+ async def on_request(self, ctx: PluginContext) -> None:
373
+ pass
374
+
375
+ async def on_response(self, ctx: PluginContext) -> None:
376
+ pass
377
+
378
+ async def on_error(self, ctx: PluginContext) -> None:
379
+ pass
380
+ ```
381
+
382
+ ## Configuration Reference
383
+
384
+ Config file: `~/.codex-proxy/config.toml`
385
+
386
+ ### `[server]`
387
+
388
+ | Field | Type | Default | Description |
389
+ |---|---|---|---|
390
+ | `host` | string | `"127.0.0.1"` | Bind address |
391
+ | `port` | int | `4242` | Bind port |
392
+ | `log_level` | string | `"warning"` | Log verbosity |
393
+ | `max_retries` | int | `1` | Retries on 5xx/transport errors |
394
+ | `connect_timeout` | float | `10.0` | Seconds to connect to upstream |
395
+ | `read_timeout` | float | `180.0` | Seconds to wait for upstream response |
396
+ | `admin_token` | string | `""` | Bearer token for admin endpoints |
397
+ | `max_request_body_bytes` | int | `10485760` | Max request body size (10MB) |
398
+ | `cors_origins` | list | `[]` | Allowed CORS origins |
399
+
400
+ ### `[database]` (v5)
401
+
402
+ | Field | Type | Default | Description |
403
+ |---|---|---|---|
404
+ | `enabled` | bool | `false` | Enable persistent storage |
405
+ | `url` | string | `""` | DB URL (empty = SQLite at `~/.codex-proxy/proxy.db`) |
406
+ | `echo` | bool | `false` | SQL debug logging |
407
+
408
+ ### `[auth]` (v5)
409
+
410
+ | Field | Type | Default | Description |
411
+ |---|---|---|---|
412
+ | `enabled` | bool | `false` | Enable JWT authentication |
413
+ | `secret_key` | string | `""` | JWT signing key (auto-generated if empty) |
414
+ | `access_token_expire_minutes` | int | `15` | Access token lifetime |
415
+ | `refresh_token_expire_days` | int | `7` | Refresh token lifetime |
416
+ | `admin_username` | string | `"admin"` | Admin username (seeded on first startup) |
417
+ | `admin_password` | string | `""` | Admin password (default: `changeme`) |
418
+
419
+ ### `[router]` (v5)
420
+
421
+ | Field | Type | Default | Description |
422
+ |---|---|---|---|
423
+ | `enabled` | bool | `false` | Enable smart routing |
424
+ | `default_strategy` | string | `"fallback"` | Strategy: `cost`, `latency`, `fallback`, `weighted` |
425
+
426
+ ### `[dashboard]` (v5)
427
+
428
+ | Field | Type | Default | Description |
429
+ |---|---|---|---|
430
+ | `enabled` | bool | `false` | Serve web dashboard at `/dashboard` |
431
+ | `open_browser` | bool | `false` | Auto-open browser on startup |
432
+
433
+ ### `[provider]`
434
+
435
+ | Field | Type | Default | Description |
436
+ |---|---|---|---|
437
+ | `name` | string | `"zai"` | Provider identifier |
438
+ | `display_name` | string | `"Z.AI"` | Human-readable name |
439
+ | `base_url` | string | Provider endpoint | Chat Completions URL |
440
+ | `api_key` | string | `""` | API key (inline) |
441
+ | `api_key_env` | string | `""` | Env var for API key |
442
+ | `api_keys` | list | `[]` | Multiple keys for rotation |
443
+ | `models` | list | `["glm-5.1", ...]` | Available model IDs |
444
+ | `default_model` | string | `"glm-5.1"` | Default model |
445
+
446
+ ### `[circuit_breaker]`
447
+
448
+ | Field | Type | Default | Description |
449
+ |---|---|---|---|
450
+ | `enabled` | bool | `true` | Enable/disable |
451
+ | `failure_threshold` | int | `5` | Failures before opening |
452
+ | `recovery_timeout` | float | `30.0` | Seconds before half-open |
453
+
454
+ ### `[compaction]`
455
+
456
+ | Field | Type | Default | Description |
457
+ |---|---|---|---|
458
+ | `enabled` | bool | `true` | Enable/disable |
459
+ | `max_messages` | int | `50` | Threshold to trigger |
460
+ | `keep_last` | int | `20` | Recent messages to keep |
461
+
462
+ ## Docker
463
+
464
+ ```bash
465
+ docker build -t codex-proxy .
466
+ docker run -d -p 4242:4242 \
467
+ -e CODEX_PROXY_API_KEY=your-key \
468
+ -e CODEX_PROXY_BASE_URL=https://api.z.ai/api/paas/v4 \
469
+ codex-proxy
470
+ ```
471
+
472
+ ## Development
473
+
474
+ ```bash
475
+ git clone https://github.com/ZiryaNoov/codex-proxy.git
476
+ cd codex-proxy
477
+ pip install -e ".[dev,tui]"
478
+ pytest tests/ -v # 270+ tests
479
+ ```
480
+
481
+ ## License
482
+
483
+ [MIT](https://github.com/ZiryaNoov/codex-proxy/blob/main/LICENSE) -- ZakPro