codexcomp 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codexcomp-0.3.0/.github/workflows/release.yml +40 -0
- codexcomp-0.3.0/.gitignore +5 -0
- codexcomp-0.3.0/LICENSE +27 -0
- codexcomp-0.3.0/PKG-INFO +259 -0
- codexcomp-0.3.0/README.md +233 -0
- codexcomp-0.3.0/README.zh-CN.md +208 -0
- codexcomp-0.3.0/codexcomp/__init__.py +0 -0
- codexcomp-0.3.0/codexcomp/cli.py +108 -0
- codexcomp-0.3.0/codexcomp/fold.py +361 -0
- codexcomp-0.3.0/codexcomp/server.py +248 -0
- codexcomp-0.3.0/codexcomp/service.py +231 -0
- codexcomp-0.3.0/pyproject.toml +46 -0
- codexcomp-0.3.0/systemd/codexcomp.service.example +18 -0
- codexcomp-0.3.0/test_fold.py +113 -0
- codexcomp-0.3.0/uv.lock +468 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
name: release
|
|
2
|
+
|
|
3
|
+
# Build and publish to PyPI on a version tag, using PyPI Trusted Publishing
|
|
4
|
+
# (OIDC) — no API token stored anywhere. Configure the trusted publisher once
|
|
5
|
+
# at https://pypi.org/manage/project/codexcomp/settings/publishing/
|
|
6
|
+
# (or as a pending publisher before the first release):
|
|
7
|
+
# owner: dzshzx repo: codexcomp workflow: release.yml environment: pypi
|
|
8
|
+
|
|
9
|
+
on:
|
|
10
|
+
push:
|
|
11
|
+
tags:
|
|
12
|
+
- "v*"
|
|
13
|
+
|
|
14
|
+
jobs:
|
|
15
|
+
build:
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
- name: Install uv
|
|
20
|
+
uses: astral-sh/setup-uv@v5
|
|
21
|
+
- name: Build sdist + wheel
|
|
22
|
+
run: uv build
|
|
23
|
+
- uses: actions/upload-artifact@v4
|
|
24
|
+
with:
|
|
25
|
+
name: dist
|
|
26
|
+
path: dist/
|
|
27
|
+
|
|
28
|
+
publish:
|
|
29
|
+
needs: build
|
|
30
|
+
runs-on: ubuntu-latest
|
|
31
|
+
environment: pypi
|
|
32
|
+
permissions:
|
|
33
|
+
id-token: write # required for Trusted Publishing (OIDC)
|
|
34
|
+
steps:
|
|
35
|
+
- uses: actions/download-artifact@v4
|
|
36
|
+
with:
|
|
37
|
+
name: dist
|
|
38
|
+
path: dist/
|
|
39
|
+
- name: Publish to PyPI
|
|
40
|
+
uses: pypa/gh-action-pypi-publish@v1.14.0
|
codexcomp-0.3.0/LICENSE
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 dzshzx
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
Mechanism inspiration: the 518n-2 truncation detection + fold-continuation
|
|
26
|
+
approach originates from neteroster/CodexCont (MIT). This project is an
|
|
27
|
+
independent, from-scratch implementation.
|
codexcomp-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codexcomp
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Local Responses proxy for OpenAI Codex CLI: folds gpt-5.5 518n-2 reasoning truncation (516 degradation) via the official openai_base_url wiring — no provider change, WebSocket-first, no fallback noise.
|
|
5
|
+
Project-URL: Homepage, https://github.com/dzshzx/codexcomp
|
|
6
|
+
Project-URL: Repository, https://github.com/dzshzx/codexcomp
|
|
7
|
+
Project-URL: Issues, https://github.com/dzshzx/codexcomp/issues
|
|
8
|
+
Author: dzshzx
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: 516,codex,gpt-5.5,openai,proxy,reasoning
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
+
Classifier: Topic :: Internet :: Proxy Servers
|
|
20
|
+
Requires-Python: >=3.12
|
|
21
|
+
Requires-Dist: httpx>=0.27
|
|
22
|
+
Requires-Dist: starlette>=0.41
|
|
23
|
+
Requires-Dist: uvicorn[standard]>=0.32
|
|
24
|
+
Requires-Dist: zstandard>=0.23
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# codexcomp
|
|
28
|
+
|
|
29
|
+
[](https://pypi.org/project/codexcomp/)
|
|
30
|
+
[](https://pypi.org/project/codexcomp/)
|
|
31
|
+
[](https://github.com/dzshzx/codexcomp/blob/main/LICENSE)
|
|
32
|
+
|
|
33
|
+
**English** · [简体中文](README.zh-CN.md)
|
|
34
|
+
|
|
35
|
+
A tiny local Responses proxy for the **OpenAI Codex CLI** that cures the gpt-5.5
|
|
36
|
+
**"516" reasoning-truncation degradation** — while leaving your `model_provider`
|
|
37
|
+
untouched, so session grouping, remote compaction and remote-control keep working.
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
uv tool install codexcomp # install
|
|
41
|
+
codexcomp # run (127.0.0.1:8787)
|
|
42
|
+
# then add one line to ~/.codex/config.toml: openai_base_url = "http://127.0.0.1:8787/v1"
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
> **Credits.** The detection-and-continue idea comes from
|
|
46
|
+
> [**neteroster/CodexCont**](https://github.com/neteroster/CodexCont) (MIT) — thank you.
|
|
47
|
+
> This project is an independent, from-scratch implementation that keeps the built-in
|
|
48
|
+
> provider intact; see [Differences](#differences-from-codexcont).
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## The problem: gpt-5.5 "516" degradation
|
|
53
|
+
|
|
54
|
+
On the OpenAI Codex CLI, gpt-5.5's reasoning sometimes gets cut short at a very
|
|
55
|
+
specific token count — `reasoning_tokens == 518 * n − 2` (i.e. **516, 1034, 1552, …**).
|
|
56
|
+
When a turn lands on that fingerprint, the model stops thinking early and the answer
|
|
57
|
+
quality drops sharply. It is an upstream issue with no official fix
|
|
58
|
+
([openai/codex#30364](https://github.com/openai/codex/issues/30364)).
|
|
59
|
+
|
|
60
|
+
`codexcomp` sits on `127.0.0.1` between Codex and the upstream Responses API.
|
|
61
|
+
When it sees a turn truncate on the `518n−2` fingerprint, it **makes the model keep
|
|
62
|
+
thinking** and **folds the extra rounds into a single downstream response**, so Codex
|
|
63
|
+
sees one clean, complete answer.
|
|
64
|
+
|
|
65
|
+
## How it works
|
|
66
|
+
|
|
67
|
+
The proxy streams every upstream round and runs a small state machine (`codexcomp/fold.py`):
|
|
68
|
+
|
|
69
|
+
1. **Detect.** At the end of each round it reads
|
|
70
|
+
`usage.output_tokens_details.reasoning_tokens`. If it equals `518n − 2` (with
|
|
71
|
+
`1 ≤ n ≤ 6`, and at most 3 continuation rounds), the round was truncated.
|
|
72
|
+
2. **Continue.** It discards that round's *tentative* output (the message / tool calls —
|
|
73
|
+
they were produced on truncated thinking), then replays the round's reasoning items
|
|
74
|
+
(including `encrypted_content`) plus a single `phase:"commentary"` assistant message
|
|
75
|
+
(`"Continue thinking..."`) as the next round's input. That nudges the model to resume
|
|
76
|
+
reasoning where it left off.
|
|
77
|
+
3. **Fold.** Reasoning is streamed live to Codex the whole time; only the *clean* final
|
|
78
|
+
round's output is flushed. The terminal event is rebuilt as if the whole thing were
|
|
79
|
+
one response — `input`/`cached` come from round 1 (so it never looks like a blown
|
|
80
|
+
context window), reasoning is summed, and the true cumulative cost is recorded under
|
|
81
|
+
`metadata.proxy_billed_usage`.
|
|
82
|
+
|
|
83
|
+
### Wiring: why the built-in provider stays intact
|
|
84
|
+
|
|
85
|
+
Codex is pointed at the proxy with **one top-level config key**, not a new provider:
|
|
86
|
+
|
|
87
|
+
```toml
|
|
88
|
+
# ~/.codex/config.toml (top level, before the first [table])
|
|
89
|
+
openai_base_url = "http://127.0.0.1:8787/v1"
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
`openai_base_url` overrides the base URL of the **built-in `openai` provider** in place.
|
|
93
|
+
This is the officially supported key
|
|
94
|
+
([openai/codex#16719](https://github.com/openai/codex/issues/16719); the same-name
|
|
95
|
+
`[model_providers.openai]` override is rejected by the maintainers, and the
|
|
96
|
+
`OPENAI_BASE_URL` env var was removed). Because the provider id stays `openai`:
|
|
97
|
+
|
|
98
|
+
- your conversation history is **not** re-bucketed/hidden by provider,
|
|
99
|
+
- **remote compaction** keeps working (`supports_remote_compaction` stays true),
|
|
100
|
+
- **remote-control** is unaffected (it uses the separate `chatgpt_base_url`).
|
|
101
|
+
|
|
102
|
+
### Differences from CodexCont
|
|
103
|
+
|
|
104
|
+
The 518n−2 detection + fold-continuation mechanism is [CodexCont]'s idea; the
|
|
105
|
+
implementation here is new and diverges on a few deliberate points:
|
|
106
|
+
|
|
107
|
+
| | codexcomp | CodexCont |
|
|
108
|
+
| --- | --- | --- |
|
|
109
|
+
| **Codex wiring** | top-level `openai_base_url` (**built-in provider unchanged**) | a new `[model_providers]` entry (history hidden per-provider, remote-control unusable, remote compaction lost) |
|
|
110
|
+
| **Downstream transport** | **WebSocket-first** — full `responses_websockets` protocol, plus SSE fallback | SSE only (Codex tries ws → 405 → ~5 reconnect warnings per session, then falls back) |
|
|
111
|
+
| **zstd request bodies** (0.142.x built-in provider) | decompressed natively, no Codex config change | needs `[features] enable_request_compression = false` |
|
|
112
|
+
| **`GET /v1/models`** (model-catalog refresh) | passed through (`/v1/*`) | not proxied (silently fails, relies on cache) |
|
|
113
|
+
| **Continuation** | commentary method only | commentary + legacy tool-pair + cross-turn repair, more knobs |
|
|
114
|
+
|
|
115
|
+
[CodexCont]: https://github.com/neteroster/CodexCont
|
|
116
|
+
|
|
117
|
+
## Install
|
|
118
|
+
|
|
119
|
+
Requires [uv](https://docs.astral.sh/uv/) (which manages Python for you) and the Codex
|
|
120
|
+
CLI (ChatGPT OAuth login; tested on 0.142.x).
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
uv tool install codexcomp # from PyPI
|
|
124
|
+
# or straight from source:
|
|
125
|
+
# uv tool install git+https://github.com/dzshzx/codexcomp
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
uv puts the executable in its bin dir (`~/.local/bin` on Unix/macOS; on Windows run
|
|
129
|
+
`where.exe codexcomp`; `uv tool update-shell` adds it to PATH). Then:
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
codexcomp # run in foreground (default 127.0.0.1:8787)
|
|
133
|
+
codexcomp --port 8790 --log-level debug
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Wire Codex to it (one line in `~/.codex/config.toml`, see above), and you're done.
|
|
137
|
+
**Disable** by commenting out the `openai_base_url` line and stopping the proxy. (If the
|
|
138
|
+
key stays but the proxy is down, Codex errors on an unreachable upstream.)
|
|
139
|
+
|
|
140
|
+
Upgrade / uninstall: `uv tool upgrade codexcomp` / `uv tool uninstall codexcomp`.
|
|
141
|
+
|
|
142
|
+
### Ports
|
|
143
|
+
|
|
144
|
+
The proxy's port **must match** the port in Codex's `openai_base_url`. If the default
|
|
145
|
+
port (8787) is busy, the proxy **exits with a clear message** rather than drifting — a
|
|
146
|
+
wired proxy that silently binds another port would just be unreachable. To use a
|
|
147
|
+
different port, pass `--port N` and set `openai_base_url` to the same `N`.
|
|
148
|
+
|
|
149
|
+
`--auto-port` is for interactive one-off runs only: on a conflict it scans for the next
|
|
150
|
+
free port and prints which `openai_base_url` to use. Don't use it for a wired service.
|
|
151
|
+
|
|
152
|
+
## Autostart (optional, off by default)
|
|
153
|
+
|
|
154
|
+
Installing registers **no** autostart — it's entirely your choice.
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
codexcomp install-service # register + start (current platform)
|
|
158
|
+
codexcomp uninstall-service # remove
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
`install-service` picks the per-user, runs-in-your-session mechanism (a system service
|
|
162
|
+
runs in a session with no user environment and can't reach the uv executable or your
|
|
163
|
+
proxy settings under your profile):
|
|
164
|
+
|
|
165
|
+
- **Linux / WSL** → a systemd **user** unit (`~/.config/systemd/user/`). Run
|
|
166
|
+
`loginctl enable-linger` once to start it at boot without logging in. Manual equivalent:
|
|
167
|
+
see `systemd/codexcomp.service.example`.
|
|
168
|
+
- **macOS** → a launchd **LaunchAgent** in `~/Library/LaunchAgents/` (starts at login, in
|
|
169
|
+
your GUI session). Load with `launchctl bootstrap gui/$(id -u) <plist>` /
|
|
170
|
+
`launchctl kickstart -k …`; remove with `launchctl bootout …`.
|
|
171
|
+
- **Windows** → **prints manual steps, registers nothing** (see below).
|
|
172
|
+
|
|
173
|
+
### Windows autostart is manual — on purpose
|
|
174
|
+
|
|
175
|
+
A program that writes an autostart entry (Startup VBS / Run key / scheduled task) and
|
|
176
|
+
launches a hidden process trips behavioral antivirus as trojan-like persistence —
|
|
177
|
+
Kaspersky's proactive-defense module flags the launching `python.exe` as
|
|
178
|
+
`PDM:Trojan.Win32.Generic`. A **user-created** Startup shortcut is trusted by the same AV.
|
|
179
|
+
|
|
180
|
+
So this package ships a windowless launcher, `codexcompw` (a Windows GUI-subsystem
|
|
181
|
+
exe — no console window at logon), and `install-service` just tells you how to point a
|
|
182
|
+
shortcut at it:
|
|
183
|
+
|
|
184
|
+
1. `Win+R` → `shell:startup` (opens the Startup folder).
|
|
185
|
+
2. New → Shortcut → target = the path from `where.exe codexcompw` (append
|
|
186
|
+
`--port N` if you use a custom port).
|
|
187
|
+
|
|
188
|
+
Delete the shortcut to disable it.
|
|
189
|
+
|
|
190
|
+
### Mirrored-networking shortcut (WSL ↔ Windows)
|
|
191
|
+
|
|
192
|
+
If your WSL2 uses `networkingMode=mirrored`, Windows and WSL **share `127.0.0.1`**. Then
|
|
193
|
+
you only need **one** proxy on either side — run it in WSL (as a systemd service), and on
|
|
194
|
+
the Windows side just add the `openai_base_url` line to `~/.codex/config.toml` pointing at
|
|
195
|
+
the same `127.0.0.1:8787`. No second proxy or Windows autostart needed (the only cost is
|
|
196
|
+
that Windows Codex depends on the WSL proxy being up).
|
|
197
|
+
|
|
198
|
+
## Verify
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
curl -sS http://127.0.0.1:8787/healthz # {"ok":true,...}
|
|
202
|
+
journalctl --user -u codexcomp -f | grep -E 'round|done' # Linux/WSL
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
A live fold looks like this (two chained 516s beaten, answer correct):
|
|
206
|
+
|
|
207
|
+
```
|
|
208
|
+
round 1: in=21550 out=664 reason=516 total=22214 | n=1 buffered=['function_call'] -> continue
|
|
209
|
+
round 2: in=22078 out=652 reason=516 total=22730 | n=1 buffered=['function_call'] -> continue
|
|
210
|
+
round 3: in=22606 out=566 reason=291 total=23172 | n=None buffered=[...] -> clean
|
|
211
|
+
done: 3 round(s) | ... | status=completed stop=natural
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## Develop
|
|
215
|
+
|
|
216
|
+
```bash
|
|
217
|
+
git clone https://github.com/dzshzx/codexcomp && cd codexcomp
|
|
218
|
+
uv sync
|
|
219
|
+
uv run python test_fold.py # fold state-machine self-test → ALL PASS
|
|
220
|
+
uv run codexcomp # run locally
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
Releases go out via PyPI Trusted Publishing (`.github/workflows/release.yml`, OIDC, no
|
|
224
|
+
stored token): push a `v*` tag and it builds + publishes automatically.
|
|
225
|
+
|
|
226
|
+
Layout:
|
|
227
|
+
|
|
228
|
+
- `codexcomp/fold.py` — fingerprint detection + fold state machine (transport-agnostic;
|
|
229
|
+
covered by `test_fold.py`).
|
|
230
|
+
- `codexcomp/server.py` — starlette transport: ws / SSE downstream, SSE upstream,
|
|
231
|
+
zstd/gzip request decompression, `/v1/*` passthrough.
|
|
232
|
+
- `codexcomp/cli.py` — CLI entry (`codexcomp`; loopback only; auth passthrough, stores
|
|
233
|
+
no credentials).
|
|
234
|
+
|
|
235
|
+
## Security & disclaimer
|
|
236
|
+
|
|
237
|
+
- The proxy is **auth passthrough** only: it forwards Codex's `Authorization` header and
|
|
238
|
+
never reads, stores, or logs any credential.
|
|
239
|
+
- It listens on the **loopback** address only — do not expose it on a non-loopback interface.
|
|
240
|
+
- **Unofficial**: it depends on upstream behavior that isn't a public contract (the
|
|
241
|
+
truncation fingerprint, the ws frame format). An OpenAI-side change may break it. Use at
|
|
242
|
+
your own risk.
|
|
243
|
+
- Continuation spends **extra real tokens** (see `metadata.proxy_billed_usage`); codexcomp
|
|
244
|
+
bounds this with an `n` window and a 3-round cap.
|
|
245
|
+
|
|
246
|
+
## Community
|
|
247
|
+
|
|
248
|
+
Built for and shared with the [**LINUX DO**](https://linux.do) community, where the
|
|
249
|
+
gpt-5.5 "516" degradation was diagnosed and discussed. Feedback and issues welcome there
|
|
250
|
+
and on [GitHub Issues](https://github.com/dzshzx/codexcomp/issues).
|
|
251
|
+
|
|
252
|
+
## License
|
|
253
|
+
|
|
254
|
+
[MIT](LICENSE). Fully open source, no closed parts.
|
|
255
|
+
|
|
256
|
+
Mechanism credit: [**neteroster/CodexCont**](https://github.com/neteroster/CodexCont) (MIT) —
|
|
257
|
+
this project reuses its 518n−2 detect-and-continue *idea* with an independent, from-scratch
|
|
258
|
+
implementation, and keeps the built-in provider intact (see [Differences](#differences-from-codexcont)).
|
|
259
|
+
CodexCont's MIT copyright notice is retained in [LICENSE](LICENSE).
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
# codexcomp
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/codexcomp/)
|
|
4
|
+
[](https://pypi.org/project/codexcomp/)
|
|
5
|
+
[](https://github.com/dzshzx/codexcomp/blob/main/LICENSE)
|
|
6
|
+
|
|
7
|
+
**English** · [简体中文](README.zh-CN.md)
|
|
8
|
+
|
|
9
|
+
A tiny local Responses proxy for the **OpenAI Codex CLI** that cures the gpt-5.5
|
|
10
|
+
**"516" reasoning-truncation degradation** — while leaving your `model_provider`
|
|
11
|
+
untouched, so session grouping, remote compaction and remote-control keep working.
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
uv tool install codexcomp # install
|
|
15
|
+
codexcomp # run (127.0.0.1:8787)
|
|
16
|
+
# then add one line to ~/.codex/config.toml: openai_base_url = "http://127.0.0.1:8787/v1"
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
> **Credits.** The detection-and-continue idea comes from
|
|
20
|
+
> [**neteroster/CodexCont**](https://github.com/neteroster/CodexCont) (MIT) — thank you.
|
|
21
|
+
> This project is an independent, from-scratch implementation that keeps the built-in
|
|
22
|
+
> provider intact; see [Differences](#differences-from-codexcont).
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## The problem: gpt-5.5 "516" degradation
|
|
27
|
+
|
|
28
|
+
On the OpenAI Codex CLI, gpt-5.5's reasoning sometimes gets cut short at a very
|
|
29
|
+
specific token count — `reasoning_tokens == 518 * n − 2` (i.e. **516, 1034, 1552, …**).
|
|
30
|
+
When a turn lands on that fingerprint, the model stops thinking early and the answer
|
|
31
|
+
quality drops sharply. It is an upstream issue with no official fix
|
|
32
|
+
([openai/codex#30364](https://github.com/openai/codex/issues/30364)).
|
|
33
|
+
|
|
34
|
+
`codexcomp` sits on `127.0.0.1` between Codex and the upstream Responses API.
|
|
35
|
+
When it sees a turn truncate on the `518n−2` fingerprint, it **makes the model keep
|
|
36
|
+
thinking** and **folds the extra rounds into a single downstream response**, so Codex
|
|
37
|
+
sees one clean, complete answer.
|
|
38
|
+
|
|
39
|
+
## How it works
|
|
40
|
+
|
|
41
|
+
The proxy streams every upstream round and runs a small state machine (`codexcomp/fold.py`):
|
|
42
|
+
|
|
43
|
+
1. **Detect.** At the end of each round it reads
|
|
44
|
+
`usage.output_tokens_details.reasoning_tokens`. If it equals `518n − 2` (with
|
|
45
|
+
`1 ≤ n ≤ 6`, and at most 3 continuation rounds), the round was truncated.
|
|
46
|
+
2. **Continue.** It discards that round's *tentative* output (the message / tool calls —
|
|
47
|
+
they were produced on truncated thinking), then replays the round's reasoning items
|
|
48
|
+
(including `encrypted_content`) plus a single `phase:"commentary"` assistant message
|
|
49
|
+
(`"Continue thinking..."`) as the next round's input. That nudges the model to resume
|
|
50
|
+
reasoning where it left off.
|
|
51
|
+
3. **Fold.** Reasoning is streamed live to Codex the whole time; only the *clean* final
|
|
52
|
+
round's output is flushed. The terminal event is rebuilt as if the whole thing were
|
|
53
|
+
one response — `input`/`cached` come from round 1 (so it never looks like a blown
|
|
54
|
+
context window), reasoning is summed, and the true cumulative cost is recorded under
|
|
55
|
+
`metadata.proxy_billed_usage`.
|
|
56
|
+
|
|
57
|
+
### Wiring: why the built-in provider stays intact
|
|
58
|
+
|
|
59
|
+
Codex is pointed at the proxy with **one top-level config key**, not a new provider:
|
|
60
|
+
|
|
61
|
+
```toml
|
|
62
|
+
# ~/.codex/config.toml (top level, before the first [table])
|
|
63
|
+
openai_base_url = "http://127.0.0.1:8787/v1"
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
`openai_base_url` overrides the base URL of the **built-in `openai` provider** in place.
|
|
67
|
+
This is the officially supported key
|
|
68
|
+
([openai/codex#16719](https://github.com/openai/codex/issues/16719); the same-name
|
|
69
|
+
`[model_providers.openai]` override is rejected by the maintainers, and the
|
|
70
|
+
`OPENAI_BASE_URL` env var was removed). Because the provider id stays `openai`:
|
|
71
|
+
|
|
72
|
+
- your conversation history is **not** re-bucketed/hidden by provider,
|
|
73
|
+
- **remote compaction** keeps working (`supports_remote_compaction` stays true),
|
|
74
|
+
- **remote-control** is unaffected (it uses the separate `chatgpt_base_url`).
|
|
75
|
+
|
|
76
|
+
### Differences from CodexCont
|
|
77
|
+
|
|
78
|
+
The 518n−2 detection + fold-continuation mechanism is [CodexCont]'s idea; the
|
|
79
|
+
implementation here is new and diverges on a few deliberate points:
|
|
80
|
+
|
|
81
|
+
| | codexcomp | CodexCont |
|
|
82
|
+
| --- | --- | --- |
|
|
83
|
+
| **Codex wiring** | top-level `openai_base_url` (**built-in provider unchanged**) | a new `[model_providers]` entry (history hidden per-provider, remote-control unusable, remote compaction lost) |
|
|
84
|
+
| **Downstream transport** | **WebSocket-first** — full `responses_websockets` protocol, plus SSE fallback | SSE only (Codex tries ws → 405 → ~5 reconnect warnings per session, then falls back) |
|
|
85
|
+
| **zstd request bodies** (0.142.x built-in provider) | decompressed natively, no Codex config change | needs `[features] enable_request_compression = false` |
|
|
86
|
+
| **`GET /v1/models`** (model-catalog refresh) | passed through (`/v1/*`) | not proxied (silently fails, relies on cache) |
|
|
87
|
+
| **Continuation** | commentary method only | commentary + legacy tool-pair + cross-turn repair, more knobs |
|
|
88
|
+
|
|
89
|
+
[CodexCont]: https://github.com/neteroster/CodexCont
|
|
90
|
+
|
|
91
|
+
## Install
|
|
92
|
+
|
|
93
|
+
Requires [uv](https://docs.astral.sh/uv/) (which manages Python for you) and the Codex
|
|
94
|
+
CLI (ChatGPT OAuth login; tested on 0.142.x).
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
uv tool install codexcomp # from PyPI
|
|
98
|
+
# or straight from source:
|
|
99
|
+
# uv tool install git+https://github.com/dzshzx/codexcomp
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
uv puts the executable in its bin dir (`~/.local/bin` on Unix/macOS; on Windows run
|
|
103
|
+
`where.exe codexcomp`; `uv tool update-shell` adds it to PATH). Then:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
codexcomp # run in foreground (default 127.0.0.1:8787)
|
|
107
|
+
codexcomp --port 8790 --log-level debug
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Wire Codex to it (one line in `~/.codex/config.toml`, see above), and you're done.
|
|
111
|
+
**Disable** by commenting out the `openai_base_url` line and stopping the proxy. (If the
|
|
112
|
+
key stays but the proxy is down, Codex errors on an unreachable upstream.)
|
|
113
|
+
|
|
114
|
+
Upgrade / uninstall: `uv tool upgrade codexcomp` / `uv tool uninstall codexcomp`.
|
|
115
|
+
|
|
116
|
+
### Ports
|
|
117
|
+
|
|
118
|
+
The proxy's port **must match** the port in Codex's `openai_base_url`. If the default
|
|
119
|
+
port (8787) is busy, the proxy **exits with a clear message** rather than drifting — a
|
|
120
|
+
wired proxy that silently binds another port would just be unreachable. To use a
|
|
121
|
+
different port, pass `--port N` and set `openai_base_url` to the same `N`.
|
|
122
|
+
|
|
123
|
+
`--auto-port` is for interactive one-off runs only: on a conflict it scans for the next
|
|
124
|
+
free port and prints which `openai_base_url` to use. Don't use it for a wired service.
|
|
125
|
+
|
|
126
|
+
## Autostart (optional, off by default)
|
|
127
|
+
|
|
128
|
+
Installing registers **no** autostart — it's entirely your choice.
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
codexcomp install-service # register + start (current platform)
|
|
132
|
+
codexcomp uninstall-service # remove
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
`install-service` picks the per-user, runs-in-your-session mechanism (a system service
|
|
136
|
+
runs in a session with no user environment and can't reach the uv executable or your
|
|
137
|
+
proxy settings under your profile):
|
|
138
|
+
|
|
139
|
+
- **Linux / WSL** → a systemd **user** unit (`~/.config/systemd/user/`). Run
|
|
140
|
+
`loginctl enable-linger` once to start it at boot without logging in. Manual equivalent:
|
|
141
|
+
see `systemd/codexcomp.service.example`.
|
|
142
|
+
- **macOS** → a launchd **LaunchAgent** in `~/Library/LaunchAgents/` (starts at login, in
|
|
143
|
+
your GUI session). Load with `launchctl bootstrap gui/$(id -u) <plist>` /
|
|
144
|
+
`launchctl kickstart -k …`; remove with `launchctl bootout …`.
|
|
145
|
+
- **Windows** → **prints manual steps, registers nothing** (see below).
|
|
146
|
+
|
|
147
|
+
### Windows autostart is manual — on purpose
|
|
148
|
+
|
|
149
|
+
A program that writes an autostart entry (Startup VBS / Run key / scheduled task) and
|
|
150
|
+
launches a hidden process trips behavioral antivirus as trojan-like persistence —
|
|
151
|
+
Kaspersky's proactive-defense module flags the launching `python.exe` as
|
|
152
|
+
`PDM:Trojan.Win32.Generic`. A **user-created** Startup shortcut is trusted by the same AV.
|
|
153
|
+
|
|
154
|
+
So this package ships a windowless launcher, `codexcompw` (a Windows GUI-subsystem
|
|
155
|
+
exe — no console window at logon), and `install-service` just tells you how to point a
|
|
156
|
+
shortcut at it:
|
|
157
|
+
|
|
158
|
+
1. `Win+R` → `shell:startup` (opens the Startup folder).
|
|
159
|
+
2. New → Shortcut → target = the path from `where.exe codexcompw` (append
|
|
160
|
+
`--port N` if you use a custom port).
|
|
161
|
+
|
|
162
|
+
Delete the shortcut to disable it.
|
|
163
|
+
|
|
164
|
+
### Mirrored-networking shortcut (WSL ↔ Windows)
|
|
165
|
+
|
|
166
|
+
If your WSL2 uses `networkingMode=mirrored`, Windows and WSL **share `127.0.0.1`**. Then
|
|
167
|
+
you only need **one** proxy on either side — run it in WSL (as a systemd service), and on
|
|
168
|
+
the Windows side just add the `openai_base_url` line to `~/.codex/config.toml` pointing at
|
|
169
|
+
the same `127.0.0.1:8787`. No second proxy or Windows autostart needed (the only cost is
|
|
170
|
+
that Windows Codex depends on the WSL proxy being up).
|
|
171
|
+
|
|
172
|
+
## Verify
|
|
173
|
+
|
|
174
|
+
```bash
|
|
175
|
+
curl -sS http://127.0.0.1:8787/healthz # {"ok":true,...}
|
|
176
|
+
journalctl --user -u codexcomp -f | grep -E 'round|done' # Linux/WSL
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
A live fold looks like this (two chained 516s beaten, answer correct):
|
|
180
|
+
|
|
181
|
+
```
|
|
182
|
+
round 1: in=21550 out=664 reason=516 total=22214 | n=1 buffered=['function_call'] -> continue
|
|
183
|
+
round 2: in=22078 out=652 reason=516 total=22730 | n=1 buffered=['function_call'] -> continue
|
|
184
|
+
round 3: in=22606 out=566 reason=291 total=23172 | n=None buffered=[...] -> clean
|
|
185
|
+
done: 3 round(s) | ... | status=completed stop=natural
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
## Develop
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
git clone https://github.com/dzshzx/codexcomp && cd codexcomp
|
|
192
|
+
uv sync
|
|
193
|
+
uv run python test_fold.py # fold state-machine self-test → ALL PASS
|
|
194
|
+
uv run codexcomp # run locally
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
Releases go out via PyPI Trusted Publishing (`.github/workflows/release.yml`, OIDC, no
|
|
198
|
+
stored token): push a `v*` tag and it builds + publishes automatically.
|
|
199
|
+
|
|
200
|
+
Layout:
|
|
201
|
+
|
|
202
|
+
- `codexcomp/fold.py` — fingerprint detection + fold state machine (transport-agnostic;
|
|
203
|
+
covered by `test_fold.py`).
|
|
204
|
+
- `codexcomp/server.py` — starlette transport: ws / SSE downstream, SSE upstream,
|
|
205
|
+
zstd/gzip request decompression, `/v1/*` passthrough.
|
|
206
|
+
- `codexcomp/cli.py` — CLI entry (`codexcomp`; loopback only; auth passthrough, stores
|
|
207
|
+
no credentials).
|
|
208
|
+
|
|
209
|
+
## Security & disclaimer
|
|
210
|
+
|
|
211
|
+
- The proxy is **auth passthrough** only: it forwards Codex's `Authorization` header and
|
|
212
|
+
never reads, stores, or logs any credential.
|
|
213
|
+
- It listens on the **loopback** address only — do not expose it on a non-loopback interface.
|
|
214
|
+
- **Unofficial**: it depends on upstream behavior that isn't a public contract (the
|
|
215
|
+
truncation fingerprint, the ws frame format). An OpenAI-side change may break it. Use at
|
|
216
|
+
your own risk.
|
|
217
|
+
- Continuation spends **extra real tokens** (see `metadata.proxy_billed_usage`); codexcomp
|
|
218
|
+
bounds this with an `n` window and a 3-round cap.
|
|
219
|
+
|
|
220
|
+
## Community
|
|
221
|
+
|
|
222
|
+
Built for and shared with the [**LINUX DO**](https://linux.do) community, where the
|
|
223
|
+
gpt-5.5 "516" degradation was diagnosed and discussed. Feedback and issues welcome there
|
|
224
|
+
and on [GitHub Issues](https://github.com/dzshzx/codexcomp/issues).
|
|
225
|
+
|
|
226
|
+
## License
|
|
227
|
+
|
|
228
|
+
[MIT](LICENSE). Fully open source, no closed parts.
|
|
229
|
+
|
|
230
|
+
Mechanism credit: [**neteroster/CodexCont**](https://github.com/neteroster/CodexCont) (MIT) —
|
|
231
|
+
this project reuses its 518n−2 detect-and-continue *idea* with an independent, from-scratch
|
|
232
|
+
implementation, and keeps the built-in provider intact (see [Differences](#differences-from-codexcont)).
|
|
233
|
+
CodexCont's MIT copyright notice is retained in [LICENSE](LICENSE).
|