codex-proxy 3.1.0__tar.gz → 4.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. codex_proxy-4.0.0/.dockerignore +14 -0
  2. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/CHANGELOG.md +38 -1
  3. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/CONTRIBUTING.md +9 -4
  4. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/Dockerfile +2 -1
  5. codex_proxy-4.0.0/PKG-INFO +567 -0
  6. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/README.md +24 -4
  7. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/pyproject.toml +2 -1
  8. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/src/codex_proxy/__init__.py +1 -1
  9. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/src/codex_proxy/__main__.py +7 -2
  10. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/src/codex_proxy/circuit_breaker.py +1 -0
  11. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/src/codex_proxy/compaction.py +1 -1
  12. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/src/codex_proxy/config.py +43 -2
  13. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/src/codex_proxy/key_rotation.py +5 -0
  14. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/src/codex_proxy/providers.py +20 -0
  15. codex_proxy-4.0.0/src/codex_proxy/rate_limiter.py +45 -0
  16. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/src/codex_proxy/server.py +81 -21
  17. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/src/codex_proxy/store.py +8 -1
  18. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/src/codex_proxy/tui.py +6 -7
  19. codex_proxy-4.0.0/tests/test_edge_cases.py +142 -0
  20. codex_proxy-4.0.0/tests/test_rate_limiter.py +66 -0
  21. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/tests/test_server.py +1 -1
  22. codex_proxy-4.0.0/tests/test_server_features.py +153 -0
  23. codex_proxy-3.1.0/.dockerignore +0 -6
  24. codex_proxy-3.1.0/PKG-INFO +0 -25
  25. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  26. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  27. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  28. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/.github/workflows/ci.yml +0 -0
  29. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/.github/workflows/release.yml +0 -0
  30. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/.gitignore +0 -0
  31. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/.pre-commit-config.yaml +0 -0
  32. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/CODE_OF_CONDUCT.md +0 -0
  33. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/LICENSE +0 -0
  34. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/assets/tui-dashboard.png +0 -0
  35. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/docker-compose.yml +0 -0
  36. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/src/codex_proxy/plugins.py +0 -0
  37. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/src/codex_proxy/plugins_builtin.py +0 -0
  38. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/src/codex_proxy/translator.py +0 -0
  39. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/tests/conftest.py +0 -0
  40. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/tests/test_circuit_breaker.py +0 -0
  41. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/tests/test_compaction.py +0 -0
  42. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/tests/test_config.py +0 -0
  43. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/tests/test_key_rotation.py +0 -0
  44. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/tests/test_plugins.py +0 -0
  45. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/tests/test_providers.py +0 -0
  46. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/tests/test_store.py +0 -0
  47. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/tests/test_translator.py +0 -0
  48. {codex_proxy-3.1.0 → codex_proxy-4.0.0}/tests/test_tui.py +0 -0
@@ -0,0 +1,14 @@
1
+ __pycache__
2
+ *.pyc
3
+ *.pyo
4
+ .git
5
+ .github
6
+ .venv
7
+ .env
8
+ tests/
9
+ dist/
10
+ build/
11
+ *.egg-info
12
+ .ruff_cache
13
+ .mypy_cache
14
+ .pytest_cache
@@ -3,6 +3,44 @@
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [4.0.0] - 2026-06-07
9
+
10
+ ### Fixed
11
+
12
+ - **`__main__.py`**: `--port 0` and `--host ""` no longer silently ignored (`is not None` check)
13
+ - **`store.py`**: Removed production `assert` that could silently crash with `-O` flag
14
+ - **`compaction.py`**: Compaction notice now says "dropped" instead of misleading "summarized"
15
+ - **`tui.py`**: Success rate now correct after first request (off-by-one fix)
16
+ - **`circuit_breaker.py`**: `get_status()` now includes `last_failure_time` field
17
+ - **`Dockerfile`**: Binds to `0.0.0.0` for container networking
18
+
19
+ ### Changed
20
+
21
+ - Removed duplicate `_mask_key()` from `server.py` — single source in `key_rotation.py`
22
+ - `KeyRotator` now exposes `key_count` property (no more `_keys` access)
23
+ - `ResponseStore` now exposes `clear()` method (no more `_store` access)
24
+ - `server.py` no longer accesses private `_keys` on `KeyRotator`
25
+ - `tui.py` no longer accesses private `_store` on `ResponseStore`
26
+ - `tui.py` no longer shadows `state_colors` variable name
27
+ - Hardcoded `MAX_RETRIES`, `RETRY_DELAY`, httpx timeouts are now configurable via config
28
+ - Example config URL corrected to `ZiryaNoov/codex-proxy`
29
+
30
+ ### Added
31
+
32
+ - **CORS middleware** — configurable via `cors_origins` in `[server]` config
33
+ - **Admin authentication** — optional Bearer token on `/reload` and `/status` endpoints
34
+ - **Rate limiting** — per-client sliding window rate limiter (`[rate_limit]` config section)
35
+ - **Request size limits** — configurable `max_request_body_bytes` (default 10MB)
36
+ - **Configurable timeouts** — `connect_timeout`, `read_timeout`, `max_retries`, `retry_delay`
37
+ - **Provider adapters** for Together AI and Fireworks AI (11 adapters total)
38
+ - **`rate_limiter.py`** — new module with `RateLimiter` class
39
+ - **217 tests** (up from 184) with new test files:
40
+ - `test_rate_limiter.py` — rate limiter unit tests
41
+ - `test_server_features.py` — rate limiting, admin auth, request size integration tests
42
+ - `test_edge_cases.py` — compaction edge cases, config TOML, provider adapter registry
43
+ - `pyproject.toml` now includes `readme = "README.md"` for PyPI
6
44
 
7
45
  ## [3.1.0] - 2026-06-01
8
46
 
@@ -28,7 +66,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
28
66
 
29
67
  - `--print-config` now shows circuit_breaker and compaction settings
30
68
  - MIT `LICENSE` file
31
- and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
32
69
 
33
70
  ## [2.0.0] - 2026-05-31
34
71
 
@@ -54,7 +54,7 @@ pytest tests/test_translator.py -v
54
54
  pytest tests/ -v --tb=short
55
55
  ```
56
56
 
57
- The test suite uses **pytest** with 112+ tests covering the translator, config, store, server, providers, circuit breaker, and compaction modules.
57
+ The test suite uses **pytest** with 217+ tests covering the translator, config, store, server, providers, circuit breaker, compaction, rate limiter, and admin auth modules.
58
58
 
59
59
  ## Project Structure
60
60
 
@@ -67,10 +67,15 @@ codex-proxy/
67
67
  translator.py # Responses API <-> Chat Completions
68
68
  config.py # TOML config loading
69
69
  store.py # In-memory response store
70
- providers.py # Provider-specific adapters
70
+ providers.py # Provider-specific adapters (11 providers)
71
71
  circuit_breaker.py # Upstream resilience
72
72
  compaction.py # Context compaction
73
- tests/ # Test suite
73
+ key_rotation.py # Multi-key round-robin pool
74
+ plugins.py # Hook-based plugin system
75
+ plugins_builtin.py # Built-in plugins (LoggingPlugin)
76
+ rate_limiter.py # Per-client sliding window rate limiter
77
+ tui.py # Rich TUI dashboard
78
+ tests/ # Test suite (217+ tests)
74
79
  .github/workflows/ # CI/CD pipelines
75
80
  ```
76
81
 
@@ -118,7 +123,7 @@ Common types: `feat`, `fix`, `refactor`, `test`, `docs`, `chore`, `ci`, `perf`
118
123
 
119
124
  ## Reporting Issues
120
125
 
121
- - Use [GitHub Issues](https://github.com/ZakPro/codex-proxy/issues) for bugs and feature requests
126
+ - Use [GitHub Issues](https://github.com/ZiryaNoov/codex-proxy/issues) for bugs and feature requests
122
127
  - Include your Python version, OS, and relevant config (redact API keys)
123
128
  - For bugs, provide steps to reproduce and any error output
124
129
 
@@ -6,4 +6,5 @@ RUN pip install --no-cache-dir .
6
6
  EXPOSE 4242
7
7
  ENV CODEX_PROXY_API_KEY=""
8
8
  ENV CODEX_PROXY_BASE_URL=""
9
- CMD ["codex-proxy"]
9
+ ENV CODEX_PROXY_HOST="0.0.0.0"
10
+ CMD ["codex-proxy", "--host", "0.0.0.0"]
@@ -0,0 +1,567 @@
1
+ Metadata-Version: 2.4
2
+ Name: codex-proxy
3
+ Version: 4.0.0
4
+ Summary: Responses API to Chat Completions bridge for OpenAI Codex CLI
5
+ Project-URL: Repository, https://github.com/ZiryaNoov/codex-proxy
6
+ Author-email: ZakPro <zakarinoo@gmail.com>
7
+ License-Expression: MIT
8
+ License-File: LICENSE
9
+ Keywords: chat-completions,codex,glm,openai,proxy,responses-api,z.ai
10
+ Classifier: Development Status :: 5 - Production/Stable
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Topic :: Software Development :: Libraries
15
+ Requires-Python: >=3.10
16
+ Requires-Dist: fastapi>=0.115
17
+ Requires-Dist: httpx>=0.27
18
+ Requires-Dist: tomli>=2.0; python_version < '3.11'
19
+ Requires-Dist: uvicorn>=0.30
20
+ Provides-Extra: dev
21
+ Requires-Dist: mypy>=1.13; extra == 'dev'
22
+ Requires-Dist: pytest>=8.0; extra == 'dev'
23
+ Requires-Dist: ruff>=0.8; extra == 'dev'
24
+ Provides-Extra: tui
25
+ Requires-Dist: rich>=13.0; extra == 'tui'
26
+ Description-Content-Type: text/markdown
27
+
28
+ # codex-proxy
29
+
30
+ [![CI](https://github.com/ZiryaNoov/codex-proxy/actions/workflows/ci.yml/badge.svg)](https://github.com/ZiryaNoov/codex-proxy/actions/workflows/ci.yml)
31
+ [![PyPI version](https://img.shields.io/pypi/v/codex-proxy.svg)](https://pypi.org/project/codex-proxy/)
32
+ [![Python version](https://img.shields.io/pypi/pyversions/codex-proxy.svg)](https://pypi.org/project/codex-proxy/)
33
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/ZiryaNoov/codex-proxy/blob/main/LICENSE)
34
+
35
+ **Responses API to Chat Completions bridge for OpenAI Codex CLI.**
36
+
37
+ Use Codex CLI with **any** Chat Completions-compatible provider -- Z.AI, Groq,
38
+ Together AI, OpenRouter, Ollama, Fireworks, Anthropic, Gemini, DeepSeek, Mistral,
39
+ Cohere, NVIDIA NIM, and more.
40
+
41
+ ---
42
+
43
+ ## Why codex-proxy?
44
+
45
+ | | codex-proxy | LiteLLM |
46
+ |---|---|---|
47
+ | **Install** | `pip install codex-proxy` | `pip install litellm[proxy]` |
48
+ | **Dependencies** | 4 (FastAPI, uvicorn, httpx, tomli) | 50+ |
49
+ | **Config** | Single TOML file | YAML + env vars + DB |
50
+ | **Start time** | <1s | 3-5s |
51
+ | **Memory** | ~30MB | ~200MB+ |
52
+ | **Dashboard** | Built-in TUI (terminal) | Separate Admin UI |
53
+ | **Circuit Breaker** | Per-key + global | Basic |
54
+ | **Plugins** | Built-in hook system | Callbacks only |
55
+
56
+ If you need 100+ providers and enterprise features, use LiteLLM.
57
+ If you need a **lightweight, reliable proxy** with advanced resilience features
58
+ and a live dashboard, codex-proxy is for you.
59
+
60
+ ## Architecture
61
+
62
+ ```
63
+ codex-proxy v4.0.0
64
+ ┌────────────┐ ┌──────────────────────────┐ ┌──────────────────┐
65
+ │ │ │ │ │ │
66
+ │ Codex CLI │─────>│ FastAPI server │─────>│ LLM Provider │
67
+ │ Cursor │ │ localhost:4242 │ │ (CC endpoint) │
68
+ │ Any IDE │ │ │ │ │
69
+ │ │<─────│ . Translator │<─────│ Z.AI / Groq / │
70
+ └────────────┘ │ . Response Store │ │ Ollama / etc. │
71
+ │ . Circuit Breaker │ └──────────────────┘
72
+ Responses API │ . Key Rotator │
73
+ protocol │ . Compaction Engine │ Chat Completions
74
+ │ . Plugin Registry │ protocol
75
+ │ . Rate Limiter │
76
+ │ . Provider Adapters │
77
+ └──────────────────────────┘
78
+ ```
79
+
80
+ ## Features
81
+
82
+ ### Core
83
+
84
+ - **Protocol translation** -- Responses API to Chat Completions in real time
85
+ - **Streaming SSE** -- token-by-token delivery with full protocol mapping
86
+ - **WebSocket support** -- full Realtime API envelope handling
87
+ - **Reasoning passthrough** -- forwards thinking/reasoning tokens
88
+ - **Tool calls** -- full function calling support (definitions + results)
89
+ - **Multi-turn** -- `previous_response_id` via in-memory response store
90
+ - **Auto-retry** -- configurable retries on 5xx/transport errors
91
+ - **Rate limiting** -- per-client sliding window rate limiter
92
+ - **Admin auth** -- optional Bearer token on `/reload` and `/status` endpoints
93
+ - **CORS support** -- configurable allowed origins
94
+ - **Request size limits** -- configurable max body size (default 10MB)
95
+
96
+ ### Reliability
97
+
98
+ - **Circuit breaker** -- global fail-fast when upstream is down (configurable threshold + recovery)
99
+ - **Multi-key rotation** -- round-robin across API keys with **per-key circuit breakers**; auth/rate-limit errors (401/403/429) trip individual keys; 5xx handled by global breaker
100
+ - **Context compaction** -- auto-trims long conversations to stay within model limits
101
+
102
+ ### Observability
103
+
104
+ - **Live TUI dashboard** -- real-time metrics, circuit breaker state, key pool status, log tail, hotkeys (`r` reload, `c` clear store, `t` compact, `q` quit)
105
+ - **Plugin system** -- hook-based middleware (`on_request`, `on_response`, `on_error`, `on_startup`, `on_shutdown`) with built-in `LoggingPlugin`
106
+ - **Config hot-reload** -- reload config without restart via TUI hotkey or `POST /reload`
107
+
108
+ ### Ecosystem
109
+
110
+ - **12+ providers** -- Z.AI, Groq, Together, OpenRouter, Ollama, Fireworks, Anthropic, Gemini, DeepSeek, Mistral, Cohere, NVIDIA NIM
111
+ - **Provider adapters** -- per-provider header/request normalization
112
+ - **Docker-ready** -- Dockerfile and Compose file included
113
+ - **pip-installable** -- `pip install codex-proxy`, run with `codex-proxy` CLI
114
+ - **217+ tests** -- comprehensive test suite covering all modules
115
+
116
+ ## Quick Start
117
+
118
+ ### Install
119
+
120
+ ```bash
121
+ pip install codex-proxy
122
+ ```
123
+
124
+ For the TUI dashboard:
125
+
126
+ ```bash
127
+ pip install "codex-proxy[tui]"
128
+ ```
129
+
130
+ ### Configure
131
+
132
+ ```bash
133
+ codex-proxy --init
134
+ # Edit ~/.codex-proxy/config.toml with your provider details
135
+ ```
136
+
137
+ ### Run
138
+
139
+ ```bash
140
+ # Standard mode
141
+ codex-proxy
142
+
143
+ # With live dashboard
144
+ codex-proxy --tui
145
+ ```
146
+
147
+ ### Connect Codex CLI
148
+
149
+ Set the environment variable:
150
+
151
+ ```bash
152
+ export OPENAI_BASE_URL=http://127.0.0.1:4242
153
+ ```
154
+
155
+ Or edit `~/.codex/config.toml`:
156
+
157
+ ```toml
158
+ model = "glm-5.1"
159
+ ```
160
+
161
+ And set your API key in `~/.codex/auth.json`:
162
+
163
+ ```json
164
+ {
165
+ "auth_mode": "apikey",
166
+ "OPENAI_API_KEY": "your-provider-api-key"
167
+ }
168
+ ```
169
+
170
+ Then run:
171
+
172
+ ```bash
173
+ codex --model glm-5.1 "say hello"
174
+ ```
175
+
176
+ ## TUI Dashboard
177
+
178
+ Launch with `codex-proxy --tui` to see a live dashboard:
179
+
180
+ ![codex-proxy TUI Dashboard](assets/tui-dashboard.png)
181
+
182
+ Hotkeys: `r` reload config, `c` clear store, `t` show compaction info, `q` quit.
183
+
184
+ ## Provider Examples
185
+
186
+ ### Z.AI (GLM Models)
187
+
188
+ ```toml
189
+ [provider]
190
+ name = "zai"
191
+ display_name = "Z.AI"
192
+ base_url = "https://api.z.ai/api/paas/v4"
193
+ api_key_env = "OPENAI_API_KEY"
194
+ models = ["glm-5.1", "glm-5", "glm-4.7"]
195
+ default_model = "glm-5.1"
196
+ ```
197
+
198
+ ### Groq
199
+
200
+ ```toml
201
+ [provider]
202
+ name = "groq"
203
+ display_name = "Groq"
204
+ base_url = "https://api.groq.com/openai/v1"
205
+ api_key_env = "GROQ_API_KEY"
206
+ models = ["llama-4-maverick-17b", "mixtral-8x7b-32768"]
207
+ default_model = "llama-4-maverick-17b"
208
+ ```
209
+
210
+ ### Together AI
211
+
212
+ ```toml
213
+ [provider]
214
+ name = "together"
215
+ display_name = "Together AI"
216
+ base_url = "https://api.together.xyz/v1"
217
+ api_key_env = "TOGETHER_API_KEY"
218
+ models = ["meta-llama/Llama-3.3-70B-Instruct-Turbo"]
219
+ default_model = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
220
+ ```
221
+
222
+ ### OpenRouter
223
+
224
+ ```toml
225
+ [provider]
226
+ name = "openrouter"
227
+ display_name = "OpenRouter"
228
+ base_url = "https://openrouter.ai/api/v1"
229
+ api_key_env = "OPENROUTER_API_KEY"
230
+ models = ["deepseek/deepseek-chat-v3-0324"]
231
+ default_model = "deepseek/deepseek-chat-v3-0324"
232
+ ```
233
+
234
+ ### Ollama (Local)
235
+
236
+ ```toml
237
+ [provider]
238
+ name = "ollama"
239
+ display_name = "Ollama (local)"
240
+ base_url = "http://localhost:11434/v1"
241
+ api_key = "ollama"
242
+ models = ["qwen3:32b", "codellama:34b"]
243
+ default_model = "qwen3:32b"
244
+ ```
245
+
246
+ ### Anthropic
247
+
248
+ ```toml
249
+ [provider]
250
+ name = "anthropic"
251
+ display_name = "Anthropic"
252
+ base_url = "https://api.anthropic.com/v1"
253
+ api_key_env = "ANTHROPIC_API_KEY"
254
+ models = ["claude-sonnet-4-20250514"]
255
+ default_model = "claude-sonnet-4-20250514"
256
+ ```
257
+
258
+ ### Google Gemini
259
+
260
+ ```toml
261
+ [provider]
262
+ name = "gemini"
263
+ display_name = "Google Gemini"
264
+ base_url = "https://generativelanguage.googleapis.com/v1beta/openai"
265
+ api_key_env = "GEMINI_API_KEY"
266
+ models = ["gemini-2.5-flash"]
267
+ default_model = "gemini-2.5-flash"
268
+ ```
269
+
270
+ ### DeepSeek
271
+
272
+ ```toml
273
+ [provider]
274
+ name = "deepseek"
275
+ display_name = "DeepSeek"
276
+ base_url = "https://api.deepseek.com/v1"
277
+ api_key_env = "DEEPSEEK_API_KEY"
278
+ models = ["deepseek-chat", "deepseek-reasoner"]
279
+ default_model = "deepseek-chat"
280
+ ```
281
+
282
+ ### Mistral AI
283
+
284
+ ```toml
285
+ [provider]
286
+ name = "mistral"
287
+ display_name = "Mistral AI"
288
+ base_url = "https://api.mistral.ai/v1"
289
+ api_key_env = "MISTRAL_API_KEY"
290
+ models = ["mistral-large-latest"]
291
+ default_model = "mistral-large-latest"
292
+ ```
293
+
294
+ ### Cohere
295
+
296
+ ```toml
297
+ [provider]
298
+ name = "cohere"
299
+ display_name = "Cohere"
300
+ base_url = "https://api.cohere.com/compatibility/v1"
301
+ api_key_env = "CO_API_KEY"
302
+ models = ["command-a-03-2025"]
303
+ default_model = "command-a-03-2025"
304
+ ```
305
+
306
+ ### NVIDIA NIM
307
+
308
+ ```toml
309
+ [provider]
310
+ name = "nvidia"
311
+ display_name = "NVIDIA NIM"
312
+ base_url = "https://integrate.api.nvidia.com/v1"
313
+ api_key_env = "NVIDIA_API_KEY"
314
+ models = ["nvidia/llama-3.1-nemotron-ultra-253b-v1"]
315
+ default_model = "nvidia/llama-3.1-nemotron-ultra-253b-v1"
316
+ ```
317
+
318
+ ### Fireworks AI
319
+
320
+ ```toml
321
+ [provider]
322
+ name = "fireworks"
323
+ display_name = "Fireworks AI"
324
+ base_url = "https://api.fireworks.ai/inference/v1"
325
+ api_key_env = "FIREWORKS_API_KEY"
326
+ models = ["accounts/fireworks/models/llama4-maverick-instruct-basic"]
327
+ default_model = "accounts/fireworks/models/llama4-maverick-instruct-basic"
328
+ ```
329
+
330
+ ## Multi-Key Rotation
331
+
332
+ Distribute load across multiple API keys with automatic failover:
333
+
334
+ ```toml
335
+ [provider]
336
+ name = "zai"
337
+ base_url = "https://api.z.ai/api/paas/v4"
338
+ api_keys = ["sk-key1", "sk-key2", "sk-key3"]
339
+ # or load from env vars:
340
+ # api_keys_env = ["OPENAI_API_KEY_1", "OPENAI_API_KEY_2"]
341
+ models = ["glm-5.1"]
342
+ default_model = "glm-5.1"
343
+ ```
344
+
345
+ Each key gets its own circuit breaker. Auth errors (401/403/429) trip the
346
+ individual key; server errors (5xx) are handled by the global circuit breaker.
347
+ When a key's circuit opens, it's skipped until recovery. If all keys are open,
348
+ the first key is used as fallback.
349
+
350
+ ## Plugin System
351
+
352
+ Extend codex-proxy with custom middleware:
353
+
354
+ ```toml
355
+ [plugins]
356
+ enabled = true
357
+ plugins = [
358
+ "codex_proxy.plugins_builtin.LoggingPlugin",
359
+ ]
360
+ ```
361
+
362
+ Plugins implement async hooks:
363
+
364
+ ```python
365
+ from codex_proxy.plugins import Plugin, PluginContext
366
+
367
+ class MyPlugin(Plugin):
368
+ async def on_request(self, ctx: PluginContext) -> None:
369
+ # Called before forwarding to provider
370
+ pass
371
+
372
+ async def on_response(self, ctx: PluginContext) -> None:
373
+ # Called after successful response
374
+ pass
375
+
376
+ async def on_error(self, ctx: PluginContext) -> None:
377
+ # Called on failure
378
+ pass
379
+ ```
380
+
381
+ Broken plugins are isolated -- they cannot crash the proxy.
382
+
383
+ ## Configuration Reference
384
+
385
+ Config file: `~/.codex-proxy/config.toml`
386
+
387
+ ### `[server]`
388
+
389
+ | Field | Type | Default | Description |
390
+ |---|---|---|---|
391
+ | `host` | string | `"127.0.0.1"` | Bind address |
392
+ | `port` | int | `4242` | Bind port |
393
+ | `log_level` | string | `"warning"` | Log verbosity: `debug`, `info`, `warning`, `error` |
394
+ | `log_dir` | string | `~/.codex-proxy/logs` | Directory for debug log files |
395
+ | `max_retries` | int | `1` | Retries on 5xx/transport errors |
396
+ | `retry_delay` | float | `0.5` | Seconds between retries |
397
+ | `connect_timeout` | float | `10.0` | Seconds to connect to upstream |
398
+ | `read_timeout` | float | `180.0` | Seconds to wait for upstream response |
399
+ | `admin_token` | string | `""` | Bearer token for `/reload` and `/status` (empty = no auth) |
400
+ | `max_request_body_bytes` | int | `10485760` | Max request body size (10MB) |
401
+ | `cors_origins` | list | `[]` | Allowed CORS origins (empty = no CORS headers) |
402
+
403
+ ### `[store]`
404
+
405
+ | Field | Type | Default | Description |
406
+ |---|---|---|---|
407
+ | `ttl_seconds` | int | `600` | Response cache TTL in seconds (10 min) |
408
+ | `max_entries` | int | `100` | Maximum cached responses for `previous_response_id` |
409
+
410
+ ### `[provider]`
411
+
412
+ | Field | Type | Default | Description |
413
+ |---|---|---|---|
414
+ | `name` | string | `"zai"` | Provider identifier (used for adapter selection) |
415
+ | `display_name` | string | `"Z.AI"` | Human-readable provider name |
416
+ | `base_url` | string | `"https://api.z.ai/api/paas/v4"` | Provider Chat Completions endpoint |
417
+ | `api_key` | string | `""` | API key (inline) |
418
+ | `api_key_env` | string | `""` | Environment variable name for the API key |
419
+ | `api_keys` | list | `[]` | Multiple API keys for rotation |
420
+ | `api_keys_env` | list | `[]` | Env var names for multiple API keys |
421
+ | `models` | list | `["glm-5.1", ...]` | Available model IDs |
422
+ | `default_model` | string | `"glm-5.1"` | Default model when none specified |
423
+ | `stream` | bool | `true` | Enable streaming by default |
424
+ | `extra_headers` | dict | `{}` | Additional HTTP headers per request |
425
+
426
+ ### `[circuit_breaker]`
427
+
428
+ | Field | Type | Default | Description |
429
+ |---|---|---|---|
430
+ | `enabled` | bool | `true` | Enable/disable circuit breaker |
431
+ | `failure_threshold` | int | `5` | Consecutive failures before opening |
432
+ | `recovery_timeout` | float | `30.0` | Seconds before half-open retry |
433
+
434
+ ### `[compaction]`
435
+
436
+ | Field | Type | Default | Description |
437
+ |---|---|---|---|
438
+ | `enabled` | bool | `true` | Enable/disable context compaction |
439
+ | `max_messages` | int | `50` | Message count threshold to trigger compaction |
440
+ | `keep_last` | int | `20` | Number of recent messages to preserve |
441
+
442
+ ### `[plugins]`
443
+
444
+ | Field | Type | Default | Description |
445
+ |---|---|---|---|
446
+ | `enabled` | bool | `false` | Enable/disable plugin system |
447
+ | `plugins` | list | `[]` | Dotted paths to plugin classes |
448
+
449
+ ### `[rate_limit]`
450
+
451
+ | Field | Type | Default | Description |
452
+ |---|---|---|---|
453
+ | `enabled` | bool | `false` | Enable/disable per-client rate limiting |
454
+ | `max_requests` | int | `60` | Max requests per client per window |
455
+ | `window_seconds` | int | `60` | Sliding window duration in seconds |
456
+
457
+ ## Environment Variables
458
+
459
+ | Variable | Description |
460
+ |---|---|
461
+ | `CODEX_PROXY_API_KEY` | API key for the provider (highest priority) |
462
+ | `CODEX_PROXY_BASE_URL` | Override provider base URL |
463
+ | `CODEX_PROXY_MODEL` | Override default model name |
464
+ | `OPENAI_API_KEY` | Fallback API key when no config file exists |
465
+ | `OPENAI_BASE_URL` | Point Codex CLI to the proxy (`http://127.0.0.1:4242`) |
466
+
467
+ Environment variables are used when `~/.codex-proxy/config.toml` does not exist,
468
+ enabling zero-config deployment via env vars alone.
469
+
470
+ ## Docker
471
+
472
+ ### Build and Run
473
+
474
+ ```bash
475
+ docker build -t codex-proxy .
476
+ docker run -d \
477
+ -p 4242:4242 \
478
+ -e CODEX_PROXY_API_KEY=your-key \
479
+ -e CODEX_PROXY_BASE_URL=https://api.z.ai/api/paas/v4 \
480
+ -e CODEX_PROXY_MODEL=glm-5.1 \
481
+ codex-proxy
482
+ ```
483
+
484
+ ### Docker Compose
485
+
486
+ ```bash
487
+ # Set your API key
488
+ export CODEX_PROXY_API_KEY=your-key
489
+
490
+ # Start the proxy
491
+ docker compose up -d
492
+
493
+ # Check health
494
+ curl http://localhost:4242/health
495
+ ```
496
+
497
+ The Compose file includes a health check that polls `/health` every 30 seconds.
498
+
499
+ ## API Endpoints
500
+
501
+ | Endpoint | Method | Description |
502
+ |---|---|---|
503
+ | `/responses` | POST | Responses API (HTTP, streaming + non-streaming) |
504
+ | `/responses` | WS | Responses API (WebSocket, full envelope handling) |
505
+ | `/responses/{id}` | GET | Retrieve a stored response by ID |
506
+ | `/models` | GET | List available models |
507
+ | `/v1/models` | GET | List available models (v1 prefix alias) |
508
+ | `/health` | GET | Health check (`?check_backend=true` pings upstream) |
509
+ | `/status` | GET | Detailed server status (uptime, requests, provider info) |
510
+ | `/reload` | POST | Reload configuration from disk without restart |
511
+
512
+ ## CLI Options
513
+
514
+ ```
515
+ codex-proxy Start the proxy server
516
+ codex-proxy --tui Start with live TUI dashboard
517
+ codex-proxy --port 8080 Override bind port
518
+ codex-proxy --host 0.0.0.0 Override bind address
519
+ codex-proxy --config PATH Use custom config file
520
+ codex-proxy --init Write example config and exit
521
+ codex-proxy --print-config Print resolved config and exit
522
+ ```
523
+
524
+ ## Development
525
+
526
+ ### Setup
527
+
528
+ ```bash
529
+ git clone https://github.com/ZiryaNoov/codex-proxy.git
530
+ cd codex-proxy
531
+ pip install -e ".[dev,tui]"
532
+ ```
533
+
534
+ ### Testing
535
+
536
+ ```bash
537
+ pytest tests/ -v # 217+ tests
538
+ ```
539
+
540
+ ### Linting & Type Checking
541
+
542
+ ```bash
543
+ ruff check src/ tests/
544
+ mypy src/
545
+ ```
546
+
547
+ ### Pre-commit Hooks
548
+
549
+ ```bash
550
+ pre-commit install
551
+ ```
552
+
553
+ ## Contributing
554
+
555
+ Contributions are welcome! Please read the
556
+ [Contributing Guide](CONTRIBUTING.md) and
557
+ [Code of Conduct](CODE_OF_CONDUCT.md).
558
+
559
+ 1. Fork the repository
560
+ 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
561
+ 3. Commit your changes (`git commit -m 'Add amazing feature'`)
562
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
563
+ 5. Open a Pull Request
564
+
565
+ ## License
566
+
567
+ [MIT](https://github.com/ZiryaNoov/codex-proxy/blob/main/LICENSE) -- ZakPro