codexcomp 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codexcomp/__init__.py ADDED
File without changes
codexcomp/cli.py ADDED
@@ -0,0 +1,108 @@
1
+ """codexcomp CLI entry point (installed via [project.scripts]).
2
+
3
+ Usage:
4
+ codexcomp [--host H] [--port P] [--upstream U] [--log-level L] run the proxy
5
+ codexcomp install-service [same flags] opt-in autostart for this platform
6
+ codexcomp uninstall-service remove the autostart entry
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import argparse
11
+ import logging
12
+ import os
13
+ import socket
14
+
15
+ from . import service
16
+
17
+ PORT_SCAN_TRIES = 20
18
+
19
+
20
+ def _add_run_flags(p: argparse.ArgumentParser) -> None:
21
+ p.add_argument("--host", default="127.0.0.1",
22
+ help="bind address (default: 127.0.0.1; keep it loopback)")
23
+ p.add_argument("--port", type=int, default=8787,
24
+ help="bind port (default: 8787). Must match Codex's openai_base_url; "
25
+ "if busy the proxy exits (a wired proxy must own its exact port).")
26
+ p.add_argument("--auto-port", action="store_true",
27
+ help="if --port is busy, scan for the next free port and print it. "
28
+ "Only for interactive one-off runs — you must then wire Codex to "
29
+ "the printed port. Not for a wired background service.")
30
+ p.add_argument("--upstream", default=None,
31
+ help="upstream base URL (default: https://chatgpt.com/backend-api/codex)")
32
+ p.add_argument("--log-level", default="info",
33
+ choices=["critical", "error", "warning", "info", "debug"])
34
+
35
+
36
+ def _port_in_use(host: str, port: int) -> bool:
37
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
38
+ s.settimeout(0.3)
39
+ return s.connect_ex((host, port)) == 0
40
+
41
+
42
+ def _pick_port(host: str, start: int) -> int:
43
+ """First free port at or after `start` (scans up to PORT_SCAN_TRIES). Falls
44
+ back to `start` so the bind fails loudly if nothing free was found."""
45
+ for port in range(start, start + PORT_SCAN_TRIES):
46
+ if not _port_in_use(host, port):
47
+ return port
48
+ return start
49
+
50
+
51
+ def _serve(args) -> int:
52
+ import uvicorn
53
+ if args.upstream:
54
+ os.environ["CODEXCOMP_UPSTREAM_BASE"] = args.upstream
55
+ port = args.port
56
+ if args.auto_port:
57
+ port = _pick_port(args.host, args.port)
58
+ if port != args.port:
59
+ print(f"port {args.port} in use; bound {port} instead — "
60
+ f"wire Codex to openai_base_url = \"http://{args.host}:{port}/v1\"",
61
+ flush=True)
62
+ elif _port_in_use(args.host, args.port):
63
+ # A wired proxy must own its exact port — fail loudly, don't drift.
64
+ print(f"error: port {args.port} is already in use. Free it, or pick another "
65
+ f"port with --port N (and set Codex's openai_base_url to match). "
66
+ f"Use --auto-port only for interactive one-off runs.", flush=True)
67
+ return 1
68
+ logging.basicConfig(level=args.log_level.upper(),
69
+ format="%(levelname)s:%(name)s:%(message)s")
70
+ uvicorn.run("codexcomp.server:app", host=args.host, port=port,
71
+ log_level=args.log_level)
72
+ return 0
73
+
74
+
75
+ def main() -> None:
76
+ parser = argparse.ArgumentParser(
77
+ prog="codexcomp",
78
+ description=(
79
+ "Local Responses proxy for Codex CLI: detects the gpt-5.5 518n-2 "
80
+ "reasoning-truncation fingerprint, auto-continues thinking, and folds "
81
+ "all rounds into one response. Wire Codex to it with the top-level "
82
+ 'config key: openai_base_url = "http://127.0.0.1:8787/v1". '
83
+ "Run with no subcommand to start the proxy."
84
+ ),
85
+ )
86
+ _add_run_flags(parser)
87
+ sub = parser.add_subparsers(dest="cmd")
88
+
89
+ p_install = sub.add_parser(
90
+ "install-service",
91
+ help="opt-in: register autostart (systemd user / launchd / scheduled task)")
92
+ _add_run_flags(p_install)
93
+
94
+ sub.add_parser("uninstall-service", help="remove the autostart entry")
95
+ p_run = sub.add_parser("run", help="start the proxy (default when no subcommand)")
96
+ _add_run_flags(p_run)
97
+
98
+ args = parser.parse_args()
99
+
100
+ if args.cmd == "install-service":
101
+ raise SystemExit(service.install(args.host, args.port, args.upstream, args.log_level))
102
+ if args.cmd == "uninstall-service":
103
+ raise SystemExit(service.uninstall())
104
+ raise SystemExit(_serve(args))
105
+
106
+
107
+ if __name__ == "__main__":
108
+ main()
codexcomp/fold.py ADDED
@@ -0,0 +1,361 @@
1
+ """518n-2 truncation detection + round folding for the Codex Responses event stream.
2
+
3
+ gpt-5.5 reasoning gets cut at reasoning_tokens == 518*n - 2 (openai/codex#30364).
4
+ When a round ends on that fingerprint we replay the conversation plus the round's
5
+ reasoning items and a phase:"commentary" nudge, then fold every round into ONE
6
+ downstream response: reasoning streams live, each round's tentative final output
7
+ (message / tool calls) is buffered and only the clean round's output is flushed.
8
+
9
+ Transport-agnostic: `fold()` consumes upstream events as dicts and yields
10
+ downstream events as dicts; serialization (SSE / WebSocket) lives in server.py.
11
+
12
+ Mechanism credit: neteroster/CodexCont (MIT). Implementation is original.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import logging
17
+ from typing import Any, AsyncIterator, Awaitable, Callable
18
+
19
+ log = logging.getLogger("codexcomp.fold")
20
+
21
+ STEP = 518
22
+ MIN_N = 1 # continue only when truncation tier n >= MIN_N
23
+ MAX_N = 6 # stop forcing once n > MAX_N (0 = no cap)
24
+ MAX_CONTINUE = 3 # continuation rounds after round 1 (runaway guard)
25
+ MARKER_TEXT = "Continue thinking..."
26
+ ENC_INCLUDE = "reasoning.encrypted_content"
27
+
28
+ TERMINAL_TYPES = ("response.completed", "response.failed", "response.incomplete")
29
+
30
+ # An opener returns the upstream event iterator for one round's body.
31
+ RoundOpener = Callable[[dict[str, Any]], Awaitable[AsyncIterator[dict[str, Any]]]]
32
+
33
+
34
+ class RoundOpenError(Exception):
35
+ """Continuation round could not be opened (upstream HTTP >= 400)."""
36
+
37
+ def __init__(self, status: int, detail: str):
38
+ super().__init__(f"upstream {status}: {detail[:200]}")
39
+ self.status = status
40
+
41
+
42
+ DONE = object() # sentinel an opener may yield to signal upstream sent [DONE]
43
+
44
+
45
+ # --- fingerprint -------------------------------------------------------------
46
+
47
+
48
+ def reasoning_tokens(usage: dict[str, Any] | None) -> int | None:
49
+ val = ((usage or {}).get("output_tokens_details") or {}).get("reasoning_tokens")
50
+ return int(val) if val is not None else None
51
+
52
+
53
+ def tier_n(tokens: int | None) -> int | None:
54
+ """n for reasoning_tokens == STEP*n - 2 (516, 1034, ...), else None."""
55
+ if tokens is None or tokens < STEP - 2 or (tokens + 2) % STEP != 0:
56
+ return None
57
+ return (tokens + 2) // STEP
58
+
59
+
60
+ def in_continue_window(n: int | None) -> bool:
61
+ return n is not None and n >= MIN_N and (MAX_N == 0 or n <= MAX_N)
62
+
63
+
64
+ # --- continuation payload ----------------------------------------------------
65
+
66
+
67
+ def commentary_nudge() -> dict[str, Any]:
68
+ """phase:"commentary" assistant message that provokes the model to resume
69
+ reasoning when replayed together with the encrypted reasoning items."""
70
+ return {
71
+ "type": "message",
72
+ "role": "assistant",
73
+ "content": [{"type": "output_text", "text": MARKER_TEXT}],
74
+ "phase": "commentary",
75
+ }
76
+
77
+
78
+ def next_round_body(base_body: dict[str, Any], input_items: list[Any]) -> dict[str, Any]:
79
+ """The agent's request re-shaped for a continuation round: explicit input,
80
+ always streamed, encrypted reasoning included, no previous_response_id
81
+ (state is carried in the replayed items)."""
82
+ body = dict(base_body)
83
+ body["stream"] = True
84
+ body["input"] = input_items
85
+ include = [str(x) for x in (base_body.get("include") or [])]
86
+ if ENC_INCLUDE not in include:
87
+ include.append(ENC_INCLUDE)
88
+ body["include"] = include
89
+ body.pop("previous_response_id", None)
90
+ return body
91
+
92
+
93
+ # --- usage accounting --------------------------------------------------------
94
+
95
+
96
+ def _sum_usage(acc: dict[str, Any], usage: dict[str, Any] | None) -> None:
97
+ if not usage:
98
+ return
99
+ for key in ("input_tokens", "output_tokens", "total_tokens"):
100
+ if usage.get(key) is not None:
101
+ acc[key] = acc.get(key, 0) + int(usage[key])
102
+ cached = (usage.get("input_tokens_details") or {}).get("cached_tokens")
103
+ if cached is not None:
104
+ acc.setdefault("input_tokens_details", {})
105
+ acc["input_tokens_details"]["cached_tokens"] = (
106
+ acc["input_tokens_details"].get("cached_tokens", 0) + int(cached)
107
+ )
108
+ rt = reasoning_tokens(usage)
109
+ if rt is not None:
110
+ acc.setdefault("output_tokens_details", {})
111
+ acc["output_tokens_details"]["reasoning_tokens"] = (
112
+ acc["output_tokens_details"].get("reasoning_tokens", 0) + rt
113
+ )
114
+
115
+
116
+ def agent_usage(
117
+ first: dict[str, Any] | None,
118
+ summed: dict[str, Any],
119
+ final_round: dict[str, Any] | None,
120
+ flushed_final: bool,
121
+ ) -> dict[str, Any]:
122
+ """Usage as if the fold were one response. input/cached come from round 1
123
+ (summing hidden rounds would fake a blown context window); reasoning is
124
+ summed because every round's reasoning reached the agent; output adds only
125
+ the flushed final round's non-reasoning part."""
126
+ first = first or {}
127
+ in_tok = first.get("input_tokens") or 0
128
+ cached = (first.get("input_tokens_details") or {}).get("cached_tokens")
129
+ reason = (summed.get("output_tokens_details") or {}).get("reasoning_tokens") or 0
130
+ final_part = 0
131
+ if flushed_final and final_round:
132
+ out = final_round.get("output_tokens") or 0
133
+ final_part = max(0, out - (reasoning_tokens(final_round) or 0))
134
+ usage: dict[str, Any] = {
135
+ "input_tokens": in_tok,
136
+ "output_tokens": reason + final_part,
137
+ "total_tokens": in_tok + reason + final_part,
138
+ "output_tokens_details": {"reasoning_tokens": reason},
139
+ }
140
+ if cached is not None:
141
+ usage["input_tokens_details"] = {"cached_tokens": cached}
142
+ return usage
143
+
144
+
145
+ def _fmt(usage: dict[str, Any] | None) -> str:
146
+ u = usage or {}
147
+ return (
148
+ f"in={u.get('input_tokens')} out={u.get('output_tokens')} "
149
+ f"reason={reasoning_tokens(u)} total={u.get('total_tokens')}"
150
+ )
151
+
152
+
153
+ # --- terminal reconstruction ---------------------------------------------------
154
+
155
+
156
+ def _terminal_event(
157
+ upstream_terminal: dict[str, Any] | None,
158
+ base_response: dict[str, Any] | None,
159
+ output: list[dict[str, Any]],
160
+ usage: dict[str, Any],
161
+ rounds: list[dict[str, Any]],
162
+ billed: dict[str, Any],
163
+ stopped_reason: str | None,
164
+ *,
165
+ incomplete_reason: str | None = None,
166
+ ) -> dict[str, Any]:
167
+ """Downstream terminal: round-1 response identity, upstream status (or a
168
+ synthetic incomplete), our reconstructed output + single-response usage,
169
+ true billed cost and per-round breakdown in metadata."""
170
+ tresp = (upstream_terminal or {}).get("response") or {}
171
+ resp = dict(base_response or tresp)
172
+ resp["output"] = output
173
+ resp["usage"] = usage
174
+ metadata = dict(resp.get("metadata") or {})
175
+ metadata["proxy_rounds"] = rounds
176
+ metadata["proxy_billed_usage"] = billed
177
+ if stopped_reason:
178
+ metadata["proxy_stopped_reason"] = stopped_reason
179
+ resp["metadata"] = metadata
180
+ if incomplete_reason is not None:
181
+ resp["status"] = "incomplete"
182
+ resp["incomplete_details"] = {"reason": incomplete_reason}
183
+ return {"type": "response.incomplete", "response": resp}
184
+ resp["status"] = tresp.get("status", "completed")
185
+ if "incomplete_details" in tresp:
186
+ resp["incomplete_details"] = tresp["incomplete_details"]
187
+ return {"type": (upstream_terminal or {}).get("type", "response.completed"), "response": resp}
188
+
189
+
190
+ # --- the fold ----------------------------------------------------------------
191
+
192
+
193
+ async def fold(
194
+ base_body: dict[str, Any],
195
+ open_round: RoundOpener,
196
+ ) -> AsyncIterator[dict[str, Any] | object]:
197
+ """Yield downstream events (dicts, plus the DONE sentinel when upstream sent
198
+ one). Every yielded event gets a proxy-owned sequence_number; output_index
199
+ is renumbered into one downstream space across rounds."""
200
+ orig_input = list(base_body.get("input") or [])
201
+ seq = 0
202
+ ds_oi = 0
203
+ base_response: dict[str, Any] | None = None
204
+ saw_done = False
205
+ final_output: list[dict[str, Any]] = []
206
+ replay_tail: list[Any] = []
207
+ summed_usage: dict[str, Any] = {}
208
+ first_usage: dict[str, Any] | None = None
209
+ rounds_info: list[dict[str, Any]] = []
210
+
211
+ def stamp(ev: dict[str, Any]) -> dict[str, Any]:
212
+ nonlocal seq
213
+ ev["sequence_number"] = seq
214
+ seq += 1
215
+ return ev
216
+
217
+ round_no = 0
218
+ events = await open_round(next_round_body(base_body, orig_input))
219
+
220
+ while True:
221
+ round_no += 1
222
+ oi_to_ds: dict[Any, int] = {}
223
+ kind: dict[Any, str] = {}
224
+ buffered: list[dict[str, Any]] = [] # {oi, item, events}
225
+ round_reasoning: list[dict[str, Any]] = []
226
+ terminal: dict[str, Any] | None = None
227
+ usage: dict[str, Any] | None = None
228
+
229
+ try:
230
+ async for ev in events:
231
+ if ev is DONE:
232
+ saw_done = True
233
+ continue
234
+ etype = ev.get("type", "")
235
+
236
+ if etype in ("response.created", "response.in_progress"):
237
+ if round_no == 1:
238
+ if etype == "response.created":
239
+ base_response = ev.get("response") or {}
240
+ yield stamp(ev)
241
+ continue
242
+ if etype in TERMINAL_TYPES:
243
+ terminal = ev
244
+ usage = (ev.get("response") or {}).get("usage")
245
+ break
246
+
247
+ oi = ev.get("output_index")
248
+ if etype == "response.output_item.added":
249
+ item = ev.get("item") or {}
250
+ if item.get("type") == "reasoning":
251
+ kind[oi] = "reasoning"
252
+ oi_to_ds[oi] = ds_oi
253
+ ev["output_index"] = ds_oi
254
+ ds_oi += 1
255
+ yield stamp(ev)
256
+ else:
257
+ kind[oi] = "buffered"
258
+ buffered.append({"oi": oi, "item": item, "events": [ev]})
259
+ continue
260
+
261
+ k = kind.get(oi)
262
+ if k == "reasoning":
263
+ if oi in oi_to_ds:
264
+ ev["output_index"] = oi_to_ds[oi]
265
+ if etype == "response.output_item.done":
266
+ item = ev.get("item") or {}
267
+ round_reasoning.append(item)
268
+ final_output.append(item)
269
+ yield stamp(ev)
270
+ elif k == "buffered":
271
+ entry = next(e for e in buffered if e["oi"] == oi)
272
+ entry["events"].append(ev)
273
+ if etype == "response.output_item.done":
274
+ entry["item"] = ev.get("item") or entry["item"]
275
+ else:
276
+ yield stamp(ev) # unknown scope: forward best-effort
277
+ except RoundOpenError:
278
+ raise # only raised before any event; handled by caller for round 1
279
+ except Exception as exc: # upstream died mid-stream
280
+ log.warning("round %d: upstream error mid-stream: %r", round_no, exc)
281
+ _sum_usage(summed_usage, usage)
282
+ yield stamp(_terminal_event(
283
+ None, base_response, final_output,
284
+ agent_usage(first_usage, summed_usage, usage, flushed_final=False),
285
+ rounds_info, summed_usage, "upstream_error",
286
+ incomplete_reason="upstream_error"))
287
+ return
288
+
289
+ # ---- round ended: decide continue / stop ----------------------------
290
+ _sum_usage(summed_usage, usage)
291
+ if round_no == 1:
292
+ first_usage = usage
293
+ rt = reasoning_tokens(usage)
294
+ n = tier_n(rt)
295
+ rounds_info.append({"round": round_no, "reasoning_tokens": rt, "n": n})
296
+ has_enc = bool(round_reasoning and round_reasoning[-1].get("encrypted_content"))
297
+
298
+ do_continue = (
299
+ terminal is not None
300
+ and in_continue_window(n)
301
+ and has_enc
302
+ and round_no <= MAX_CONTINUE
303
+ )
304
+ stopped_reason = None
305
+ if not do_continue and n is not None:
306
+ stopped_reason = (
307
+ "no_encrypted_content" if not has_enc
308
+ else "max_continue" if round_no > MAX_CONTINUE
309
+ else "tier_out_of_window"
310
+ )
311
+
312
+ log.info(
313
+ "round %d: %s | n=%s buffered=%s -> %s",
314
+ round_no, _fmt(usage), n,
315
+ [e["item"].get("type") for e in buffered],
316
+ "continue" if do_continue else
317
+ "upstream_eof" if terminal is None else stopped_reason or "clean",
318
+ )
319
+
320
+ if do_continue:
321
+ replay_tail.extend([*round_reasoning, commentary_nudge()])
322
+ try:
323
+ events = await open_round(next_round_body(base_body, orig_input + replay_tail))
324
+ except RoundOpenError as exc:
325
+ log.warning("continuation round %d failed to open: %s", round_no + 1, exc)
326
+ yield stamp(_terminal_event(
327
+ None, base_response, final_output,
328
+ agent_usage(first_usage, summed_usage, usage, flushed_final=False),
329
+ rounds_info, summed_usage, "upstream_error",
330
+ incomplete_reason="upstream_error"))
331
+ return
332
+ continue
333
+
334
+ if terminal is None: # EOF with no terminal: tentative output is NOT an answer
335
+ log.warning("round %d: upstream EOF with no terminal event", round_no)
336
+ yield stamp(_terminal_event(
337
+ None, base_response, final_output,
338
+ agent_usage(first_usage, summed_usage, usage, flushed_final=False),
339
+ rounds_info, summed_usage, "upstream_eof",
340
+ incomplete_reason="upstream_eof"))
341
+ return
342
+
343
+ # Clean stop: flush this round's buffered output as the real answer.
344
+ for entry in buffered:
345
+ for ev in entry["events"]:
346
+ if "output_index" in ev:
347
+ ev["output_index"] = ds_oi
348
+ yield stamp(ev)
349
+ ds_oi += 1
350
+ final_output.append(entry["item"])
351
+
352
+ status = (terminal.get("response") or {}).get("status", "completed")
353
+ log.info("done: %d round(s) | %s | status=%s stop=%s",
354
+ round_no, _fmt(summed_usage), status, stopped_reason or "natural")
355
+ yield stamp(_terminal_event(
356
+ terminal, base_response, final_output,
357
+ agent_usage(first_usage, summed_usage, usage, flushed_final=True),
358
+ rounds_info, summed_usage, stopped_reason))
359
+ if saw_done:
360
+ yield DONE
361
+ return
codexcomp/server.py ADDED
@@ -0,0 +1,248 @@
1
+ """codexcomp transport layer.
2
+
3
+ Downstream (Codex, wired via top-level `openai_base_url`):
4
+ * WebSocket /v1/responses — Codex's preferred transport (openai-beta
5
+ responses_websockets): client sends {"type":"response.create", ...body...}
6
+ frames, we answer with response.* event frames; the connection is reused
7
+ for sequential requests (prewarm + turns).
8
+ * POST /v1/responses — SSE fallback; request body may be zstd/gzip
9
+ compressed (built-in provider sends zstd when request compression is on).
10
+ * anything else under /v1/ — transparent passthrough to the upstream base
11
+ (Codex refreshes its model catalog via GET /v1/models).
12
+
13
+ Upstream is always the SSE POST endpoint; the fold state machine (fold.py) is
14
+ transport-agnostic.
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import gzip
19
+ import json
20
+ import logging
21
+ import os
22
+ import zlib
23
+ from typing import Any, AsyncIterator
24
+
25
+ import httpx
26
+ import zstandard
27
+ from starlette.applications import Starlette
28
+ from starlette.requests import Request
29
+ from starlette.responses import JSONResponse, Response, StreamingResponse
30
+ from starlette.routing import Route, WebSocketRoute
31
+ from starlette.websockets import WebSocket, WebSocketDisconnect
32
+
33
+ from .fold import DONE, RoundOpenError, fold
34
+
35
+ log = logging.getLogger("codexcomp.server")
36
+
37
+ UPSTREAM_BASE = os.environ.get(
38
+ "CODEXCOMP_UPSTREAM_BASE", "https://chatgpt.com/backend-api/codex"
39
+ ).rstrip("/")
40
+ RESPONSES_URL = UPSTREAM_BASE + "/responses"
41
+
42
+ # hop-by-hop / transport-specific headers never forwarded upstream
43
+ _DROP_HEADERS = {
44
+ "host", "connection", "upgrade", "keep-alive", "te", "trailer",
45
+ "transfer-encoding", "proxy-authorization", "proxy-connection",
46
+ "content-length", "content-encoding", "accept-encoding",
47
+ "sec-websocket-key", "sec-websocket-version", "sec-websocket-extensions",
48
+ "sec-websocket-protocol",
49
+ "openai-beta", # advertises the ws protocol; upstream round is plain SSE
50
+ }
51
+
52
+
53
+ def upstream_headers(raw: Any) -> dict[str, str]:
54
+ out = {}
55
+ for key, value in raw:
56
+ k = key.decode() if isinstance(key, bytes) else key
57
+ if k.lower() in _DROP_HEADERS:
58
+ continue
59
+ out[k] = value.decode() if isinstance(value, bytes) else value
60
+ out["accept"] = "text/event-stream"
61
+ return out
62
+
63
+
64
+ def decompress_body(data: bytes, encoding: str | None) -> bytes:
65
+ enc = (encoding or "").lower().strip()
66
+ if not enc or enc == "identity":
67
+ return data
68
+ if enc == "zstd":
69
+ return zstandard.ZstdDecompressor().decompressobj().decompress(data)
70
+ if enc == "gzip":
71
+ return gzip.decompress(data)
72
+ if enc == "deflate":
73
+ return zlib.decompress(data)
74
+ raise ValueError(f"unsupported content-encoding: {enc}")
75
+
76
+
77
+ # --- upstream SSE rounds ------------------------------------------------------
78
+
79
+
80
+ def parse_sse(text_chunks: AsyncIterator[str]) -> AsyncIterator[dict | object]:
81
+ """Incremental SSE parser: yields event dicts (from data: lines) and the
82
+ DONE sentinel for `data: [DONE]`."""
83
+
84
+ async def gen():
85
+ buf = ""
86
+ async for chunk in text_chunks:
87
+ buf += chunk
88
+ while "\n\n" in buf:
89
+ block, buf = buf.split("\n\n", 1)
90
+ data_lines = [
91
+ line[5:].lstrip()
92
+ for line in block.splitlines()
93
+ if line.startswith("data:")
94
+ ]
95
+ if not data_lines:
96
+ continue
97
+ data = "\n".join(data_lines)
98
+ if data == "[DONE]":
99
+ yield DONE
100
+ continue
101
+ try:
102
+ yield json.loads(data)
103
+ except json.JSONDecodeError:
104
+ log.warning("unparseable SSE data (len=%d), dropped", len(data))
105
+
106
+ return gen()
107
+
108
+
109
+ class UpstreamRounds:
110
+ """RoundOpener bound to one downstream request's headers; closes the
111
+ previous round's response before opening the next."""
112
+
113
+ def __init__(self, client: httpx.AsyncClient, headers: dict[str, str]):
114
+ self.client = client
115
+ self.headers = headers
116
+ self._resp: httpx.Response | None = None
117
+
118
+ async def open(self, body: dict[str, Any]) -> AsyncIterator[dict | object]:
119
+ await self.aclose()
120
+ req = self.client.build_request(
121
+ "POST", RESPONSES_URL,
122
+ content=json.dumps(body, ensure_ascii=False).encode(),
123
+ headers={**self.headers, "content-type": "application/json"},
124
+ timeout=httpx.Timeout(connect=30, read=600, write=60, pool=30),
125
+ )
126
+ resp = await self.client.send(req, stream=True)
127
+ if resp.status_code >= 400:
128
+ detail = (await resp.aread()).decode(errors="replace")
129
+ await resp.aclose()
130
+ raise RoundOpenError(resp.status_code, detail)
131
+ self._resp = resp
132
+ return parse_sse(resp.aiter_text())
133
+
134
+ async def aclose(self) -> None:
135
+ if self._resp is not None:
136
+ try:
137
+ await self._resp.aclose()
138
+ except Exception:
139
+ pass
140
+ self._resp = None
141
+
142
+
143
+ # --- downstream endpoints -----------------------------------------------------
144
+
145
+
146
+ def sse_bytes(ev: dict | object) -> bytes:
147
+ if ev is DONE:
148
+ return b"data: [DONE]\n\n"
149
+ etype = ev.get("type", "message") # type: ignore[union-attr]
150
+ return f"event: {etype}\ndata: {json.dumps(ev, ensure_ascii=False)}\n\n".encode()
151
+
152
+
153
+ async def responses_post(request: Request) -> Response:
154
+ raw = await request.body()
155
+ try:
156
+ raw = decompress_body(raw, request.headers.get("content-encoding"))
157
+ body = json.loads(raw)
158
+ except (ValueError, json.JSONDecodeError) as exc:
159
+ return JSONResponse({"error": f"bad request body: {exc}"}, status_code=400)
160
+
161
+ rounds = UpstreamRounds(request.app.state.client, upstream_headers(request.headers.raw))
162
+
163
+ async def stream() -> AsyncIterator[bytes]:
164
+ try:
165
+ async for ev in fold(body, rounds.open):
166
+ yield sse_bytes(ev)
167
+ except RoundOpenError as exc: # round 1 rejected: surface upstream error
168
+ yield sse_bytes({
169
+ "type": "response.failed",
170
+ "response": {"status": "failed",
171
+ "error": {"message": str(exc), "code": exc.status}},
172
+ })
173
+ finally:
174
+ await rounds.aclose()
175
+
176
+ return StreamingResponse(stream(), media_type="text/event-stream")
177
+
178
+
179
+ async def responses_ws(ws: WebSocket) -> None:
180
+ await ws.accept()
181
+ headers = upstream_headers(ws.headers.raw)
182
+ rounds = UpstreamRounds(ws.app.state.client, headers)
183
+ try:
184
+ while True:
185
+ try:
186
+ envelope = json.loads(await ws.receive_text())
187
+ except (WebSocketDisconnect, json.JSONDecodeError):
188
+ return
189
+ if envelope.get("type") != "response.create":
190
+ log.info("ws: ignoring frame type %s", envelope.get("type"))
191
+ continue
192
+ body = {k: v for k, v in envelope.items() if k != "type"}
193
+ try:
194
+ async for ev in fold(body, rounds.open):
195
+ if ev is DONE:
196
+ continue
197
+ await ws.send_text(json.dumps(ev, ensure_ascii=False))
198
+ except RoundOpenError as exc:
199
+ await ws.send_text(json.dumps({
200
+ "type": "response.failed",
201
+ "response": {"status": "failed",
202
+ "error": {"message": str(exc), "code": exc.status}},
203
+ }))
204
+ except WebSocketDisconnect:
205
+ pass
206
+ finally:
207
+ await rounds.aclose()
208
+
209
+
210
+ async def passthrough(request: Request) -> Response:
211
+ """Transparent proxy for every other /v1/* call (e.g. GET /v1/models)."""
212
+ suffix = request.path_params["path"]
213
+ url = f"{UPSTREAM_BASE}/{suffix}"
214
+ if request.url.query:
215
+ url += "?" + request.url.query
216
+ content = await request.body()
217
+ if content:
218
+ content = decompress_body(content, request.headers.get("content-encoding"))
219
+ headers = upstream_headers(request.headers.raw)
220
+ headers.pop("accept", None)
221
+ upstream = await request.app.state.client.request(
222
+ request.method, url, content=content or None, headers=headers,
223
+ timeout=httpx.Timeout(60),
224
+ )
225
+ drop = {"content-encoding", "transfer-encoding", "connection", "content-length"}
226
+ return Response(
227
+ upstream.content, status_code=upstream.status_code,
228
+ headers={k: v for k, v in upstream.headers.items() if k.lower() not in drop},
229
+ )
230
+
231
+
232
+ async def health(_: Request) -> JSONResponse:
233
+ return JSONResponse({"ok": True, "upstream": UPSTREAM_BASE})
234
+
235
+
236
+ def build_app() -> Starlette:
237
+ app = Starlette(routes=[
238
+ Route("/healthz", health),
239
+ Route("/v1/responses", responses_post, methods=["POST"]),
240
+ WebSocketRoute("/v1/responses", responses_ws),
241
+ Route("/v1/{path:path}", passthrough,
242
+ methods=["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD"]),
243
+ ])
244
+ app.state.client = httpx.AsyncClient(trust_env=True, http2=False)
245
+ return app
246
+
247
+
248
+ app = build_app()
codexcomp/service.py ADDED
@@ -0,0 +1,231 @@
1
+ """Optional autostart registration — strictly opt-in.
2
+
3
+ `codexcomp install-service` writes and activates a per-user autostart entry
4
+ for the current platform; `uninstall-service` removes it. Plain `uv tool install`
5
+ never touches any of this — autostart is always the user's explicit choice.
6
+
7
+ Per-user (not system-wide) on every platform: the proxy is a loopback service
8
+ used by Codex inside the user's own login session, needs the user's proxy
9
+ environment to reach upstream, and requires no root.
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import os
14
+ import platform
15
+ import shutil
16
+ import subprocess
17
+ import sys
18
+ from pathlib import Path
19
+
20
+ LABEL = "codexcomp"
21
+ MAC_LABEL = "com.dzshzx.codexcomp"
22
+
23
+
24
+ def _resolve_exe() -> str:
25
+ """Absolute path to the installed console-script executable.
26
+
27
+ Prefer PATH lookup (stable ~/.local/bin entry). Fall back to sys.argv[0]
28
+ when the tool dir isn't on PATH — and on Windows re-attach the .exe suffix,
29
+ which sys.argv[0] drops for console-script launchers.
30
+ """
31
+ found = shutil.which(LABEL)
32
+ if found:
33
+ return found
34
+ cand = os.path.abspath(sys.argv[0])
35
+ if os.name == "nt" and not cand.lower().endswith(".exe") and os.path.exists(cand + ".exe"):
36
+ cand += ".exe"
37
+ return cand
38
+
39
+
40
+ def _exe_and_args(host: str | None, port: int | None,
41
+ upstream: str | None, log_level: str | None) -> list[str]:
42
+ """Resolved executable path plus any non-default run flags."""
43
+ argv = [_resolve_exe()]
44
+ if host and host != "127.0.0.1":
45
+ argv += ["--host", host]
46
+ if port and port != 8787:
47
+ argv += ["--port", str(port)]
48
+ if upstream:
49
+ argv += ["--upstream", upstream]
50
+ if log_level and log_level != "info":
51
+ argv += ["--log-level", log_level]
52
+ return argv
53
+
54
+
55
+ def _run(cmd: list[str], *, check: bool = True) -> subprocess.CompletedProcess:
56
+ # errors="replace": Windows tools (taskkill/schtasks) emit localized,
57
+ # non-UTF-8 output that would otherwise raise UnicodeDecodeError.
58
+ return subprocess.run(cmd, check=check, capture_output=True,
59
+ text=True, errors="replace")
60
+
61
+
62
+ # --- Linux (systemd user unit) ----------------------------------------------
63
+
64
+
65
+ def _systemd_unit_path() -> Path:
66
+ base = Path(os.environ.get("XDG_CONFIG_HOME", Path.home() / ".config"))
67
+ return base / "systemd" / "user" / f"{LABEL}.service"
68
+
69
+
70
+ def _install_linux(argv: list[str]) -> None:
71
+ if not shutil.which("systemctl"):
72
+ raise RuntimeError(
73
+ "systemctl not found. Write a unit manually or use your init system; "
74
+ "see the systemd example in the project README.")
75
+ exec_start = " ".join(argv)
76
+ unit = f"""[Unit]
77
+ Description=codexcomp: local Responses proxy folding gpt-5.5 518n-2 reasoning truncation
78
+ After=network-online.target
79
+
80
+ [Service]
81
+ ExecStart={exec_start}
82
+ Restart=on-failure
83
+ RestartSec=2
84
+ # Drop a possible SOCKS proxy from the user manager env; httpx honors HTTP(S)_PROXY.
85
+ UnsetEnvironment=ALL_PROXY all_proxy SOCKS_PROXY socks_proxy
86
+
87
+ [Install]
88
+ WantedBy=default.target
89
+ """
90
+ path = _systemd_unit_path()
91
+ path.parent.mkdir(parents=True, exist_ok=True)
92
+ path.write_text(unit)
93
+ _run(["systemctl", "--user", "daemon-reload"])
94
+ _run(["systemctl", "--user", "enable", "--now", f"{LABEL}.service"])
95
+ print(f"installed + started systemd user service: {path}")
96
+ print(" tip: run 'loginctl enable-linger' once to start it at boot without login")
97
+ print(f" disable: codexcomp uninstall-service (or systemctl --user disable --now {LABEL})")
98
+
99
+
100
+ def _uninstall_linux() -> None:
101
+ if shutil.which("systemctl"):
102
+ _run(["systemctl", "--user", "disable", "--now", f"{LABEL}.service"], check=False)
103
+ path = _systemd_unit_path()
104
+ existed = path.exists()
105
+ path.unlink(missing_ok=True)
106
+ if shutil.which("systemctl"):
107
+ _run(["systemctl", "--user", "daemon-reload"], check=False)
108
+ print(f"removed systemd user service{'' if existed else ' (was not present)'}: {path}")
109
+
110
+
111
+ # --- macOS (launchd LaunchAgent) --------------------------------------------
112
+
113
+
114
+ def _plist_path() -> Path:
115
+ return Path.home() / "Library" / "LaunchAgents" / f"{MAC_LABEL}.plist"
116
+
117
+
118
+ def _install_macos(argv: list[str]) -> None:
119
+ args_xml = "\n".join(f" <string>{a}</string>" for a in argv)
120
+ log = f"/tmp/{LABEL}.log"
121
+ plist = f"""<?xml version="1.0" encoding="UTF-8"?>
122
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
123
+ <plist version="1.0">
124
+ <dict>
125
+ <key>Label</key> <string>{MAC_LABEL}</string>
126
+ <key>ProgramArguments</key>
127
+ <array>
128
+ {args_xml}
129
+ </array>
130
+ <key>RunAtLoad</key> <true/>
131
+ <key>KeepAlive</key> <true/>
132
+ <key>StandardOutPath</key> <string>{log}</string>
133
+ <key>StandardErrorPath</key><string>{log}</string>
134
+ </dict>
135
+ </plist>
136
+ """
137
+ path = _plist_path()
138
+ path.parent.mkdir(parents=True, exist_ok=True)
139
+ path.write_text(plist)
140
+ uid = os.getuid()
141
+ _run(["launchctl", "bootstrap", f"gui/{uid}", str(path)], check=False)
142
+ _run(["launchctl", "enable", f"gui/{uid}/{MAC_LABEL}"], check=False)
143
+ _run(["launchctl", "kickstart", "-k", f"gui/{uid}/{MAC_LABEL}"], check=False)
144
+ print(f"installed + started launchd LaunchAgent: {path}")
145
+ print(f" logs: {log}")
146
+ print(" disable: codexcomp uninstall-service")
147
+
148
+
149
+ def _uninstall_macos() -> None:
150
+ path = _plist_path()
151
+ uid = os.getuid()
152
+ _run(["launchctl", "bootout", f"gui/{uid}/{MAC_LABEL}"], check=False)
153
+ existed = path.exists()
154
+ path.unlink(missing_ok=True)
155
+ print(f"removed launchd LaunchAgent{'' if existed else ' (was not present)'}: {path}")
156
+
157
+
158
+ # --- Windows (manual autostart, by design) ----------------------------------
159
+ #
160
+ # We intentionally do NOT register Windows autostart programmatically. Writing an
161
+ # autostart entry (Startup VBS / Run key / task) and launching a hidden process
162
+ # is exactly the persistence pattern behavioral antivirus flags as a trojan
163
+ # (observed: Kaspersky PDM:Trojan.Win32.Generic on the launching python.exe).
164
+ # A user-created Startup shortcut is trusted by the same AV. So print the steps,
165
+ # pointing at the windowless launcher, and register nothing.
166
+
167
+
168
+ def _guiw_exe() -> str:
169
+ """The windowless launcher (codexcompw.exe) beside the console exe."""
170
+ console = _resolve_exe()
171
+ if console.lower().endswith(".exe"):
172
+ cand = console[:-4] + "w.exe"
173
+ if os.path.exists(cand):
174
+ return cand
175
+ return shutil.which(LABEL + "w") or (console[:-4] + "w.exe"
176
+ if console.lower().endswith(".exe") else console)
177
+
178
+
179
+ def _install_windows(argv: list[str]) -> None:
180
+ exe_w = _guiw_exe()
181
+ extra = subprocess.list2cmdline(argv[1:])
182
+ print("Windows autostart is not registered automatically — behavioral antivirus")
183
+ print("flags programmatic startup persistence as trojan-like. Set it up by hand")
184
+ print("(a user-created shortcut is AV-trusted):")
185
+ print(" 1. press Win+R, run: shell:startup")
186
+ print(f" 2. create a shortcut whose target is: {exe_w}")
187
+ if extra:
188
+ print(f" append these arguments: {extra}")
189
+ print(" (…codexcompw.exe is windowless — no console window at logon)")
190
+
191
+
192
+ def _uninstall_windows() -> None:
193
+ print("Windows autostart is manual: delete your codexcomp shortcut from")
194
+ print("the Startup folder (Win+R -> shell:startup).")
195
+
196
+
197
+ # --- dispatch ----------------------------------------------------------------
198
+
199
+
200
+ def install(host=None, port=None, upstream=None, log_level=None) -> int:
201
+ argv = _exe_and_args(host, port, upstream, log_level)
202
+ system = platform.system()
203
+ try:
204
+ if system == "Linux":
205
+ _install_linux(argv)
206
+ elif system == "Darwin":
207
+ _install_macos(argv)
208
+ elif system == "Windows":
209
+ _install_windows(argv)
210
+ else:
211
+ print(f"unsupported platform: {system}", file=sys.stderr)
212
+ return 2
213
+ except (RuntimeError, subprocess.CalledProcessError) as exc:
214
+ detail = getattr(exc, "stderr", "") or str(exc)
215
+ print(f"install-service failed: {detail}", file=sys.stderr)
216
+ return 1
217
+ return 0
218
+
219
+
220
+ def uninstall() -> int:
221
+ system = platform.system()
222
+ if system == "Linux":
223
+ _uninstall_linux()
224
+ elif system == "Darwin":
225
+ _uninstall_macos()
226
+ elif system == "Windows":
227
+ _uninstall_windows()
228
+ else:
229
+ print(f"unsupported platform: {system}", file=sys.stderr)
230
+ return 2
231
+ return 0
@@ -0,0 +1,259 @@
1
+ Metadata-Version: 2.4
2
+ Name: codexcomp
3
+ Version: 0.3.0
4
+ Summary: Local Responses proxy for OpenAI Codex CLI: folds gpt-5.5 518n-2 reasoning truncation (516 degradation) via the official openai_base_url wiring — no provider change, WebSocket-first, no fallback noise.
5
+ Project-URL: Homepage, https://github.com/dzshzx/codexcomp
6
+ Project-URL: Repository, https://github.com/dzshzx/codexcomp
7
+ Project-URL: Issues, https://github.com/dzshzx/codexcomp/issues
8
+ Author: dzshzx
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: 516,codex,gpt-5.5,openai,proxy,reasoning
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
19
+ Classifier: Topic :: Internet :: Proxy Servers
20
+ Requires-Python: >=3.12
21
+ Requires-Dist: httpx>=0.27
22
+ Requires-Dist: starlette>=0.41
23
+ Requires-Dist: uvicorn[standard]>=0.32
24
+ Requires-Dist: zstandard>=0.23
25
+ Description-Content-Type: text/markdown
26
+
27
+ # codexcomp
28
+
29
+ [![PyPI](https://img.shields.io/pypi/v/codexcomp.svg)](https://pypi.org/project/codexcomp/)
30
+ [![Python](https://img.shields.io/pypi/pyversions/codexcomp.svg)](https://pypi.org/project/codexcomp/)
31
+ [![License: MIT](https://img.shields.io/pypi/l/codexcomp.svg)](https://github.com/dzshzx/codexcomp/blob/main/LICENSE)
32
+
33
+ **English** · [简体中文](README.zh-CN.md)
34
+
35
+ A tiny local Responses proxy for the **OpenAI Codex CLI** that cures the gpt-5.5
36
+ **"516" reasoning-truncation degradation** — while leaving your `model_provider`
37
+ untouched, so session grouping, remote compaction and remote-control keep working.
38
+
39
+ ```bash
40
+ uv tool install codexcomp # install
41
+ codexcomp # run (127.0.0.1:8787)
42
+ # then add one line to ~/.codex/config.toml: openai_base_url = "http://127.0.0.1:8787/v1"
43
+ ```
44
+
45
+ > **Credits.** The detection-and-continue idea comes from
46
+ > [**neteroster/CodexCont**](https://github.com/neteroster/CodexCont) (MIT) — thank you.
47
+ > This project is an independent, from-scratch implementation that keeps the built-in
48
+ > provider intact; see [Differences](#differences-from-codexcont).
49
+
50
+ ---
51
+
52
+ ## The problem: gpt-5.5 "516" degradation
53
+
54
+ On the OpenAI Codex CLI, gpt-5.5's reasoning sometimes gets cut short at a very
55
+ specific token count — `reasoning_tokens == 518 * n − 2` (i.e. **516, 1034, 1552, …**).
56
+ When a turn lands on that fingerprint, the model stops thinking early and the answer
57
+ quality drops sharply. It is an upstream issue with no official fix
58
+ ([openai/codex#30364](https://github.com/openai/codex/issues/30364)).
59
+
60
+ `codexcomp` sits on `127.0.0.1` between Codex and the upstream Responses API.
61
+ When it sees a turn truncate on the `518n−2` fingerprint, it **makes the model keep
62
+ thinking** and **folds the extra rounds into a single downstream response**, so Codex
63
+ sees one clean, complete answer.
64
+
65
+ ## How it works
66
+
67
+ The proxy streams every upstream round and runs a small state machine (`codexcomp/fold.py`):
68
+
69
+ 1. **Detect.** At the end of each round it reads
70
+ `usage.output_tokens_details.reasoning_tokens`. If it equals `518n − 2` (with
71
+ `1 ≤ n ≤ 6`, and at most 3 continuation rounds), the round was truncated.
72
+ 2. **Continue.** It discards that round's *tentative* output (the message / tool calls —
73
+ they were produced on truncated thinking), then replays the round's reasoning items
74
+ (including `encrypted_content`) plus a single `phase:"commentary"` assistant message
75
+ (`"Continue thinking..."`) as the next round's input. That nudges the model to resume
76
+ reasoning where it left off.
77
+ 3. **Fold.** Reasoning is streamed live to Codex the whole time; only the *clean* final
78
+ round's output is flushed. The terminal event is rebuilt as if the whole thing were
79
+ one response — `input`/`cached` come from round 1 (so it never looks like a blown
80
+ context window), reasoning is summed, and the true cumulative cost is recorded under
81
+ `metadata.proxy_billed_usage`.
82
+
83
+ ### Wiring: why the built-in provider stays intact
84
+
85
+ Codex is pointed at the proxy with **one top-level config key**, not a new provider:
86
+
87
+ ```toml
88
+ # ~/.codex/config.toml (top level, before the first [table])
89
+ openai_base_url = "http://127.0.0.1:8787/v1"
90
+ ```
91
+
92
+ `openai_base_url` overrides the base URL of the **built-in `openai` provider** in place.
93
+ This is the officially supported key
94
+ ([openai/codex#16719](https://github.com/openai/codex/issues/16719); the same-name
95
+ `[model_providers.openai]` override is rejected by the maintainers, and the
96
+ `OPENAI_BASE_URL` env var was removed). Because the provider id stays `openai`:
97
+
98
+ - your conversation history is **not** re-bucketed/hidden by provider,
99
+ - **remote compaction** keeps working (`supports_remote_compaction` stays true),
100
+ - **remote-control** is unaffected (it uses the separate `chatgpt_base_url`).
101
+
102
+ ### Differences from CodexCont
103
+
104
+ The 518n−2 detection + fold-continuation mechanism is [CodexCont]'s idea; the
105
+ implementation here is new and diverges on a few deliberate points:
106
+
107
+ | | codexcomp | CodexCont |
108
+ | --- | --- | --- |
109
+ | **Codex wiring** | top-level `openai_base_url` (**built-in provider unchanged**) | a new `[model_providers]` entry (history hidden per-provider, remote-control unusable, remote compaction lost) |
110
+ | **Downstream transport** | **WebSocket-first** — full `responses_websockets` protocol, plus SSE fallback | SSE only (Codex tries ws → 405 → ~5 reconnect warnings per session, then falls back) |
111
+ | **zstd request bodies** (0.142.x built-in provider) | decompressed natively, no Codex config change | needs `[features] enable_request_compression = false` |
112
+ | **`GET /v1/models`** (model-catalog refresh) | passed through (`/v1/*`) | not proxied (silently fails, relies on cache) |
113
+ | **Continuation** | commentary method only | commentary + legacy tool-pair + cross-turn repair, more knobs |
114
+
115
+ [CodexCont]: https://github.com/neteroster/CodexCont
116
+
117
+ ## Install
118
+
119
+ Requires [uv](https://docs.astral.sh/uv/) (which manages Python for you) and the Codex
120
+ CLI (ChatGPT OAuth login; tested on 0.142.x).
121
+
122
+ ```bash
123
+ uv tool install codexcomp # from PyPI
124
+ # or straight from source:
125
+ # uv tool install git+https://github.com/dzshzx/codexcomp
126
+ ```
127
+
128
+ uv puts the executable in its bin dir (`~/.local/bin` on Unix/macOS; on Windows run
129
+ `where.exe codexcomp`; `uv tool update-shell` adds it to PATH). Then:
130
+
131
+ ```bash
132
+ codexcomp # run in foreground (default 127.0.0.1:8787)
133
+ codexcomp --port 8790 --log-level debug
134
+ ```
135
+
136
+ Wire Codex to it (one line in `~/.codex/config.toml`, see above), and you're done.
137
+ **Disable** by commenting out the `openai_base_url` line and stopping the proxy. (If the
138
+ key stays but the proxy is down, Codex errors on an unreachable upstream.)
139
+
140
+ Upgrade / uninstall: `uv tool upgrade codexcomp` / `uv tool uninstall codexcomp`.
141
+
142
+ ### Ports
143
+
144
+ The proxy's port **must match** the port in Codex's `openai_base_url`. If the default
145
+ port (8787) is busy, the proxy **exits with a clear message** rather than drifting — a
146
+ wired proxy that silently binds another port would just be unreachable. To use a
147
+ different port, pass `--port N` and set `openai_base_url` to the same `N`.
148
+
149
+ `--auto-port` is for interactive one-off runs only: on a conflict it scans for the next
150
+ free port and prints which `openai_base_url` to use. Don't use it for a wired service.
151
+
152
+ ## Autostart (optional, off by default)
153
+
154
+ Installing registers **no** autostart — it's entirely your choice.
155
+
156
+ ```bash
157
+ codexcomp install-service # register + start (current platform)
158
+ codexcomp uninstall-service # remove
159
+ ```
160
+
161
+ `install-service` picks the per-user, runs-in-your-session mechanism (a system service
162
+ runs in a session with no user environment and can't reach the uv executable or your
163
+ proxy settings under your profile):
164
+
165
+ - **Linux / WSL** → a systemd **user** unit (`~/.config/systemd/user/`). Run
166
+ `loginctl enable-linger` once to start it at boot without logging in. Manual equivalent:
167
+ see `systemd/codexcomp.service.example`.
168
+ - **macOS** → a launchd **LaunchAgent** in `~/Library/LaunchAgents/` (starts at login, in
169
+ your GUI session). Load with `launchctl bootstrap gui/$(id -u) <plist>` /
170
+ `launchctl kickstart -k …`; remove with `launchctl bootout …`.
171
+ - **Windows** → **prints manual steps, registers nothing** (see below).
172
+
173
+ ### Windows autostart is manual — on purpose
174
+
175
+ A program that writes an autostart entry (Startup VBS / Run key / scheduled task) and
176
+ launches a hidden process trips behavioral antivirus as trojan-like persistence —
177
+ Kaspersky's proactive-defense module flags the launching `python.exe` as
178
+ `PDM:Trojan.Win32.Generic`. A **user-created** Startup shortcut is trusted by the same AV.
179
+
180
+ So this package ships a windowless launcher, `codexcompw` (a Windows GUI-subsystem
181
+ exe — no console window at logon), and `install-service` just tells you how to point a
182
+ shortcut at it:
183
+
184
+ 1. `Win+R` → `shell:startup` (opens the Startup folder).
185
+ 2. New → Shortcut → target = the path from `where.exe codexcompw` (append
186
+ `--port N` if you use a custom port).
187
+
188
+ Delete the shortcut to disable it.
189
+
190
+ ### Mirrored-networking shortcut (WSL ↔ Windows)
191
+
192
+ If your WSL2 uses `networkingMode=mirrored`, Windows and WSL **share `127.0.0.1`**. Then
193
+ you only need **one** proxy on either side — run it in WSL (as a systemd service), and on
194
+ the Windows side just add the `openai_base_url` line to `~/.codex/config.toml` pointing at
195
+ the same `127.0.0.1:8787`. No second proxy or Windows autostart needed (the only cost is
196
+ that Windows Codex depends on the WSL proxy being up).
197
+
198
+ ## Verify
199
+
200
+ ```bash
201
+ curl -sS http://127.0.0.1:8787/healthz # {"ok":true,...}
202
+ journalctl --user -u codexcomp -f | grep -E 'round|done' # Linux/WSL
203
+ ```
204
+
205
+ A live fold looks like this (two chained 516s beaten, answer correct):
206
+
207
+ ```
208
+ round 1: in=21550 out=664 reason=516 total=22214 | n=1 buffered=['function_call'] -> continue
209
+ round 2: in=22078 out=652 reason=516 total=22730 | n=1 buffered=['function_call'] -> continue
210
+ round 3: in=22606 out=566 reason=291 total=23172 | n=None buffered=[...] -> clean
211
+ done: 3 round(s) | ... | status=completed stop=natural
212
+ ```
213
+
214
+ ## Develop
215
+
216
+ ```bash
217
+ git clone https://github.com/dzshzx/codexcomp && cd codexcomp
218
+ uv sync
219
+ uv run python test_fold.py # fold state-machine self-test → ALL PASS
220
+ uv run codexcomp # run locally
221
+ ```
222
+
223
+ Releases go out via PyPI Trusted Publishing (`.github/workflows/release.yml`, OIDC, no
224
+ stored token): push a `v*` tag and it builds + publishes automatically.
225
+
226
+ Layout:
227
+
228
+ - `codexcomp/fold.py` — fingerprint detection + fold state machine (transport-agnostic;
229
+ covered by `test_fold.py`).
230
+ - `codexcomp/server.py` — starlette transport: ws / SSE downstream, SSE upstream,
231
+ zstd/gzip request decompression, `/v1/*` passthrough.
232
+ - `codexcomp/cli.py` — CLI entry (`codexcomp`; loopback only; auth passthrough, stores
233
+ no credentials).
234
+
235
+ ## Security & disclaimer
236
+
237
+ - The proxy is **auth passthrough** only: it forwards Codex's `Authorization` header and
238
+ never reads, stores, or logs any credential.
239
+ - It listens on the **loopback** address only — do not expose it on a non-loopback interface.
240
+ - **Unofficial**: it depends on upstream behavior that isn't a public contract (the
241
+ truncation fingerprint, the ws frame format). An OpenAI-side change may break it. Use at
242
+ your own risk.
243
+ - Continuation spends **extra real tokens** (see `metadata.proxy_billed_usage`); codexcomp
244
+ bounds this with an `n` window and a 3-round cap.
245
+
246
+ ## Community
247
+
248
+ Built for and shared with the [**LINUX DO**](https://linux.do) community, where the
249
+ gpt-5.5 "516" degradation was diagnosed and discussed. Feedback and issues welcome there
250
+ and on [GitHub Issues](https://github.com/dzshzx/codexcomp/issues).
251
+
252
+ ## License
253
+
254
+ [MIT](LICENSE). Fully open source, no closed parts.
255
+
256
+ Mechanism credit: [**neteroster/CodexCont**](https://github.com/neteroster/CodexCont) (MIT) —
257
+ this project reuses its 518n−2 detect-and-continue *idea* with an independent, from-scratch
258
+ implementation, and keeps the built-in provider intact (see [Differences](#differences-from-codexcont)).
259
+ CodexCont's MIT copyright notice is retained in [LICENSE](LICENSE).
@@ -0,0 +1,10 @@
1
+ codexcomp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ codexcomp/cli.py,sha256=-g4fgzpaiIkVEagcFmCIOyxhZQ0Uju1sS0EcC3mTThE,4317
3
+ codexcomp/fold.py,sha256=yhsqYj8PYxeLcyeO5Nrg9-HYf3gPvaS3SWPb6OC9va4,14475
4
+ codexcomp/server.py,sha256=TpgoDGjjWve1bpn3UoEhIY2tk9vC8JawIoKM4TvjKSs,9105
5
+ codexcomp/service.py,sha256=lLxdlmOoejGJDfpg7xoPex_okfnAIGw0sfYdAxNPprY,8500
6
+ codexcomp-0.3.0.dist-info/METADATA,sha256=n7V-xkCmqcBng1_AMMstAyWp5kyWzX_0G68b8LA18UY,12259
7
+ codexcomp-0.3.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
8
+ codexcomp-0.3.0.dist-info/entry_points.txt,sha256=HT7pEwUVjp7ijkwsmI9qWHJR_v-6m0lOf9kvFFHI-20,96
9
+ codexcomp-0.3.0.dist-info/licenses/LICENSE,sha256=xUs31HROJwQ3ywBsM36wSEgCHZbFGQLJXm0iiR06iJY,1258
10
+ codexcomp-0.3.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,5 @@
1
+ [console_scripts]
2
+ codexcomp = codexcomp.cli:main
3
+
4
+ [gui_scripts]
5
+ codexcompw = codexcomp.cli:main
@@ -0,0 +1,27 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 dzshzx
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
23
+ ---
24
+
25
+ Mechanism inspiration: the 518n-2 truncation detection + fold-continuation
26
+ approach originates from neteroster/CodexCont (MIT). This project is an
27
+ independent, from-scratch implementation.