trace-to-skill 0.1.78 → 0.1.80

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -120,6 +120,14 @@ Common signals include `user cancelled MCP tool call`, `request_user_input is no
120
120
 
121
121
  The fix is to capture the Codex version, MCP server name and transport, tool name, exposed callable name, whether `tools/list` and manual `tools/call` succeed, `approval_policy`, sandbox mode, exec or interactive mode, elicitation setting, namespace or `serverName` metadata, exact `item.started` / `item.completed` JSONL, stderr or backpressure evidence, and whether restarting or reinitializing the transport changes the result.
122
122
 
123
+ ## Codex Streamable HTTP MCP
124
+
125
+ Streamable HTTP and SSE MCP servers can be reachable and still fail inside Codex before or during tool calls. This is different from discovery mismatch because the server may initialize or expose tools, and different from stdio runtime failure because the failure sits in HTTP framing, JSON-RPC parsing, session reuse, auth expectations, or reconnect behavior.
126
+
127
+ Common signals include Penpot response parse or `JsonRpcMessage deserialize` errors, `Content-Type: text/event-stream` SSE frames that Codex cannot parse, n8n `initialize` success followed by `Transport closed`, DingTalk OAuth/login gating that contradicts local config, stale `streamable-http` session ids after a remote server restart, missing header or User-Agent requirements, and recovery only after restarting Codex.
128
+
129
+ The fix is to capture Codex version, MCP server name, transport URL without secrets, initialize/tools/list/tools/call results, HTTP status, `Content-Type`, SSE event framing, JSON-RPC message shape, session id before and after reconnect or server restart, auth/OAuth expectations, User-Agent and header requirements, exact parse/deserialize error, whether curl or another MCP client succeeds, and whether restarting Codex or reinitializing the transport recovers.
130
+
123
131
  ## Codex MCP Discovery Mismatch
124
132
 
125
133
  Codex MCP servers can work in CLI or one config scope but disappear in another surface before any tool call is possible. This is different from runtime failure: the user may have no `mcp__*` tools exposed in VS Code, Desktop, WSL, a remote session, project-local `.codex/config.toml`, or an older conversation even though CLI `/mcp` works.
@@ -146,11 +154,11 @@ The fix is to capture app and CLI versions, OS, session or thread id, rollout JS
146
154
 
147
155
  ## Codex Token Burn
148
156
 
149
- Codex usage can drain unexpectedly even when users cannot tell whether the cause is useful model work, background process polling, idle app activity, compaction/replay overhead, retry loops, subagent fan-out, fast-mode drift, large context, long `AGENTS.md`, MCP/skill overhead, or cached-token-heavy turns.
157
+ Codex usage can drain unexpectedly even when users cannot tell whether the cause is useful model work, backend prompt-cache collapse, background process polling, idle app activity, compaction/replay overhead, retry loops, subagent fan-out, fast-mode drift, large context, long `AGENTS.md`, MCP/skill overhead, or cached-token-heavy turns.
150
158
 
151
- Common signals include tokens `burning very fast`, usage dropping by visible percentages after one or two prompts, weekly allowance depletion under normal usage, 5-hour usage reaching 0%, large `input` plus `cached input` totals, `write_stdin` empty polling, background commands repeatedly checking for no new output, Codex using daily usage while idle or only open, compaction tax, retry/tool loops, and requests for usage attribution across normal turns, compaction, retries, subagents, and background polling.
159
+ Common signals include tokens `burning very fast`, usage dropping by visible percentages after one or two prompts, weekly allowance depletion under normal usage, 5-hour usage reaching 0%, large `input` plus `cached input` totals, `input_tokens` plus `cached_input_tokens` / `cached_tokens` rows, `prompt_cache_key` staying stable while cached tokens drop, websocket reconnect notes, `write_stdin` empty polling, background commands repeatedly checking for no new output, Codex using daily usage while idle or only open, compaction tax, retry/tool loops, and requests for usage attribution across normal turns, compaction, retries, subagents, prompt cache, and background polling.
152
160
 
153
- The fix is to capture plan/workspace, client and version, model and reasoning/speed settings, fast-mode/large-context/subagent/review flags, recent `/status` and usage-dashboard deltas, local token totals including cached input/output/reasoning if available, background process ids and `write_stdin` poll cadence, compaction attempts and failures, retry/tool-loop counts, whether the app was idle, and a minimal reproduction with before/after usage percentages.
161
+ The fix is to capture plan/workspace, client and version, model and reasoning/speed settings, fast-mode/large-context/subagent/review flags, recent `/status` and usage-dashboard deltas, local token totals including cached input/output/reasoning if available, adjacent prompt-cache rows with `input_tokens`, `cached_input_tokens` / `cached_tokens`, `prompt_cache_key`, response ids, transport and reconnect timing, background process ids and `write_stdin` poll cadence, compaction attempts and failures, retry/tool-loop counts, whether the app was idle, and a minimal reproduction with before/after usage percentages.
154
162
 
155
163
  ## Codex Resource Leak
156
164
 
@@ -3,18 +3,18 @@
3
3
  | Field | Value |
4
4
  | --- | --- |
5
5
  | Repository | https://github.com/grnbtqdbyx-create/trace-to-skill |
6
- | Package | trace-to-skill@0.1.78 |
6
+ | Package | trace-to-skill@0.1.80 |
7
7
  | License | Apache-2.0 |
8
8
  | Codex readiness | ready (100/100) |
9
- | Benchmark | pass, 33 cases |
9
+ | Benchmark | pass, 34 cases |
10
10
 
11
11
  ## Why This Repository Qualifies
12
12
 
13
- trace-to-skill helps open-source maintainers adopt Codex safely by turning failed coding-agent runs into evidence-backed rules, reusable workflows, and CI gates. It supports real maintenance work: PR review, issue triage, release quality, MCP risk, prompt-injection defense, privacy-preserving trace sharing, and repeat failure reduction. The repository is ready, scores 100/100 on the local Codex readiness doctor, and ships a deterministic benchmark with 33 public fixture cases.
13
+ trace-to-skill helps open-source maintainers adopt Codex safely by turning failed coding-agent runs into evidence-backed rules, reusable workflows, and CI gates. It supports real maintenance work: PR review, issue triage, release quality, MCP risk, prompt-injection defense, privacy-preserving trace sharing, and repeat failure reduction. The repository is ready, scores 100/100 on the local Codex readiness doctor, and ships a deterministic benchmark with 34 public fixture cases.
14
14
 
15
15
  ### 500-Character Version
16
16
 
17
- > trace-to-skill helps open-source maintainers adopt Codex safely by turning failed coding-agent runs into evidence-backed rules, reusable workflows, and CI gates. It supports real maintenance work: PR review, issue triage, release quality, MCP risk, prompt-injection defense, privacy-preserving trace sharing, and repeat failure reduction. The repository is ready, scores 100/100 on the local Codex readiness doctor, and ships a deterministic benchmark with 33 public fixture cases.
17
+ > trace-to-skill helps open-source maintainers adopt Codex safely by turning failed coding-agent runs into evidence-backed rules, reusable workflows, and CI gates. It supports real maintenance work: PR review, issue triage, release quality, MCP risk, prompt-injection defense, privacy-preserving trace sharing, and repeat failure reduction. The repository is ready, scores 100/100 on the local Codex readiness doctor, and ships a deterministic benchmark with 34 public fixture cases.
18
18
 
19
19
  ## How API Credits Would Be Used
20
20
 
@@ -27,10 +27,10 @@ API credits would power optional maintainer workflows on top of the local determ
27
27
  ## Evidence
28
28
 
29
29
  - Public repository: https://github.com/grnbtqdbyx-create/trace-to-skill
30
- - One-command package: npx trace-to-skill@0.1.78
30
+ - One-command package: npx trace-to-skill@0.1.80
31
31
  - Open-source license: Apache-2.0
32
32
  - Codex readiness doctor: ready, 100/100, 0 failed checks.
33
- - Public fixture benchmark: pass, 33 cases.
33
+ - Public fixture benchmark: pass, 34 cases.
34
34
  - Maintainer control: generated rules are suggestions, evidence is line-linked, and secrets can be redacted before sharing.
35
35
 
36
36
  ## Next Steps Before Submitting
package/docs/SCORECARD.md CHANGED
@@ -9,7 +9,7 @@ Status: **pass**
9
9
  | Failed doctor checks | 0 |
10
10
  | Critical findings | 0 |
11
11
  | Built-in benchmark | pass |
12
- | Benchmark cases | 33 |
12
+ | Benchmark cases | 34 |
13
13
 
14
14
  ## Doctor Summary
15
15
 
@@ -45,11 +45,12 @@ This benchmark runs the public fixture pack that ships with the repository and p
45
45
  | MCP config with secret exposure | `fixtures/mcp-risk.json` | 59 | 2 | 1 | `mcp_risk`, `secret_exposure` | pass |
46
46
  | Sensitive file access in agent context | `fixtures/sensitive-file-access.md` | 75 | 2 | 0 | `sensitive_file_access`, `weak_evidence` | pass |
47
47
  | Codex MCP runtime failure | `fixtures/codex-mcp-runtime.md` | 75 | 2 | 0 | `codex_mcp_runtime`, `weak_evidence` | pass |
48
+ | Codex Streamable HTTP MCP parse and handshake failure | `fixtures/codex-mcp-streamable-http.md` | 75 | 2 | 0 | `codex_mcp_streamable_http`, `weak_evidence` | pass |
48
49
  | Codex MCP discovery and config-scope mismatch | `fixtures/codex-mcp-discovery-mismatch.md` | 75 | 2 | 0 | `codex_mcp_discovery_mismatch`, `weak_evidence` | pass |
49
50
  | Codex plugin runtime and bundled capability failure | `fixtures/codex-plugin-runtime.md` | 59 | 3 | 0 | `codex_plugin_runtime`, `codex_windows_helper_path`, `weak_evidence` | pass |
50
51
  | Codex file tree and workspace navigation UI failure | `fixtures/codex-file-tree-ui.md` | 75 | 2 | 0 | `codex_file_tree_ui`, `weak_evidence` | pass |
51
52
  | Codex session resume and state failure | `fixtures/codex-session-state.md` | 59 | 3 | 0 | `codex_resource_leak`, `codex_session_state`, `weak_evidence` | pass |
52
- | Codex token burn and usage-drain loop | `fixtures/codex-token-burn.md` | 59 | 3 | 0 | `codex_resource_leak`, `codex_token_burn`, `weak_evidence` | pass |
53
+ | Codex token burn and usage-drain loop | `fixtures/codex-token-burn.md` | 75 | 2 | 0 | `codex_token_burn`, `weak_evidence` | pass |
53
54
  | Codex resource leak and runaway process | `fixtures/codex-resource-leak.md` | 75 | 2 | 0 | `codex_resource_leak`, `weak_evidence` | pass |
54
55
  | Codex tool-call integrity and rollback failure | `fixtures/codex-tool-call-integrity.md` | 43 | 4 | 0 | `codex_resource_leak`, `codex_subagent_lifecycle`, `codex_tool_call_integrity`, `weak_evidence` | pass |
55
56
  | Codex apply_patch Add File overwrite safety | `fixtures/codex-apply-patch-overwrite.md` | 75 | 2 | 0 | `codex_tool_call_integrity`, `weak_evidence` | pass |
package/docs/USE_CASES.md CHANGED
@@ -18,6 +18,7 @@ npx trace-to-skill demo clipboard-attachment
18
18
  npx trace-to-skill demo deeplink-launch
19
19
  npx trace-to-skill demo connector-auth-cache
20
20
  npx trace-to-skill demo mcp-discovery-mismatch
21
+ npx trace-to-skill demo mcp-streamable-http
21
22
  npx trace-to-skill demo terminal-output-integrity
22
23
  npx trace-to-skill demo subagent-lifecycle
23
24
  npx trace-to-skill sensitive-audit .
@@ -29,7 +30,7 @@ What it proves:
29
30
 
30
31
  - packaged fixtures can produce a real Codex issue report immediately
31
32
  - maintainers can inspect the output shape before sharing any private log
32
- - demos cover remote compact failures, Windows helper path failures, patch overwrite safety, approval friction, latency, Thinking hangs, clipboard/attachment regressions, deeplink/OAuth launch regressions, connector auth-cache regressions, MCP discovery/config-scope mismatches, terminal output/scrollback integrity, subagent lifecycle drift, token burn, sensitive files, and prompt injection
33
+ - demos cover remote compact failures, Windows helper path failures, patch overwrite safety, approval friction, latency, Thinking hangs, clipboard/attachment regressions, deeplink/OAuth launch regressions, connector auth-cache regressions, MCP discovery/config-scope mismatches, Streamable HTTP MCP parse/handshake failures, terminal output/scrollback integrity, subagent lifecycle drift, token burn, sensitive files, and prompt injection
33
34
  - `sensitive-audit` scans filenames and paths before an agent run, without reading file contents, so teams can build `.agentignore`, `.aiexclude`, `.codexignore`, `.gitignore`, or sandbox permission profiles from a concrete repo report
34
35
  - `lsp-audit` scans repo language signals and PATH availability so teams know which language servers are ready before asking Codex for symbol-aware edits
35
36
 
@@ -54,7 +55,7 @@ What it proves:
54
55
  Recommended CI surface:
55
56
 
56
57
  ```yaml
57
- - uses: grnbtqdbyx-create/trace-to-skill@v0.1.78
58
+ - uses: grnbtqdbyx-create/trace-to-skill@v0.1.80
58
59
  with:
59
60
  mode: all
60
61
  doctor-threshold: "85"
@@ -145,19 +146,20 @@ This catches signals such as `Error running remote compact task`, `timeout waiti
145
146
 
146
147
  ## 8. Codex Usage Evidence Packaging
147
148
 
148
- Use this when a Codex usage issue has scattered evidence across `/status`, dashboard notes, reset tables, token totals, cached input, and local overhead clues.
149
+ Use this when a Codex usage issue has scattered evidence across `/status`, dashboard notes, reset tables, token totals, prompt-cache rows, cached input, and local overhead clues.
149
150
 
150
151
  ```bash
151
152
  npx trace-to-skill usage-evidence ./usage-notes.md --output usage-evidence.md
152
153
  npx trace-to-skill usage-evidence ./usage-notes.md --format json
153
154
  ```
154
155
 
155
- This turns Markdown polling tables, CSV-like rows, JSON/JSONL snapshots, `reset_at` values, usage-limit errors, rapid drain experiment notes like `1% in 4 minutes`, `22 credits`, or `70% weekly in a day`, `Token usage: total=... cached` lines, `write_stdin` polling, compaction loops, retry/tool loops, subagent fan-out, and idle-drain notes into a single report with a usage receipt.
156
+ This turns Markdown polling tables, CSV-like rows, JSON/JSONL snapshots, `reset_at` values, usage-limit errors, rapid drain experiment notes like `1% in 4 minutes`, `22 credits`, or `70% weekly in a day`, prompt-cache rows with `input_tokens`, `cached_input_tokens` / `cached_tokens`, `prompt_cache_key`, response ids, websocket/reconnect notes, `Token usage: total=... cached` lines, `write_stdin` polling, compaction loops, retry/tool loops, subagent fan-out, and idle-drain notes into a single report with a usage receipt.
156
157
 
157
158
  The receipt separates:
158
159
 
159
160
  - backend quota-window percentage evidence
160
161
  - local token totals, including cached input and reasoning
162
+ - prompt-cache records and adjacent cache-collapse events
161
163
  - bounded rapid-drain experiment rows with model, plan, prompt count, elapsed time, percent, and credits when present
162
164
  - orchestration-overhead signals that may burn usage without accepted work
163
165
  - suspected cause buckets to keep public reports comparable
@@ -258,7 +260,7 @@ npx trace-to-skill analyze ./runs --format json
258
260
  npx trace-to-skill codex-report ./runs --output openai-codex-issue.md
259
261
  ```
260
262
 
261
- This catches signals such as tokens `burning very fast`, usage dropping by visible percentages after one or two prompts, weekly allowance depletion, 5-hour usage reaching 0%, large `input` plus `cached input` totals, `write_stdin` empty polling, background commands repeatedly reporting no new output, idle app usage, compaction tax, retry/tool loops, and missing attribution between normal turns, compaction, background polling, subagents, and retries.
263
+ This catches signals such as tokens `burning very fast`, usage dropping by visible percentages after one or two prompts, weekly allowance depletion, 5-hour usage reaching 0%, large `input` plus `cached input` totals, `input_tokens` / `cached_input_tokens` / `prompt_cache_key` rows that show cache collapse, `write_stdin` empty polling, background commands repeatedly reporting no new output, idle app usage, compaction tax, retry/tool loops, and missing attribution between normal turns, compaction, background polling, subagents, and retries.
262
264
 
263
265
  For public reports, prefer `usage-evidence` first so the quota-window, local-token, and orchestration-overhead layers are visible separately.
264
266
 
@@ -375,7 +377,21 @@ This catches signals such as `MCP servers not detected in Codex VS Code extensio
375
377
 
376
378
  Include app/CLI/extension version, OS, IDE, remote/WSL/SSH state, workspace root, effective `CODEX_HOME`, all config files considered (`~/.codex/config.toml`, project `.codex/config.toml`, `.vscode/mcp.json`, `.mcp.json`), redacted MCP sections, trust/profile/default-permissions state, `codex mcp list`, `codex mcp get <server>`, CLI-versus-Desktop/VS Code comparison, loaded config path/log lines, whether moving the same server to user-global config fixes it, and whether the current session exposes `mcp__*` tools.
377
379
 
378
- ## 25. Patch Overwrite Guard
380
+ ## 25. Codex Streamable HTTP MCP Evidence
381
+
382
+ Use this when a Streamable HTTP or SSE MCP server is reachable but Codex fails during JSON-RPC parsing, handshake, auth gating, stale session reuse, or reconnect.
383
+
384
+ ```bash
385
+ npx trace-to-skill demo mcp-streamable-http
386
+ npx trace-to-skill analyze ./runs --format json
387
+ npx trace-to-skill codex-report ./runs --output openai-codex-mcp-streamable-http.md
388
+ ```
389
+
390
+ This catches signals such as Penpot `JsonRpcMessage deserialize` or response-parse failures, `Content-Type: text/event-stream` framing problems, n8n `initialize` followed by `Transport closed`, DingTalk OAuth/login gates that do not match config expectations, stale `streamable-http` session ids after server restart, missing header/User-Agent requirements, and recovery only after restarting Codex.
391
+
392
+ Include Codex version, MCP server name, transport URL without secrets, initialize/tools/list/tools/call results, HTTP status, `Content-Type`, SSE event framing, JSON-RPC message shape, session id before and after reconnect or server restart, auth/OAuth expectations, User-Agent/header requirements, exact parse/deserialize error, whether curl or another MCP client succeeds, and whether restarting Codex or reinitializing the transport recovers.
393
+
394
+ ## 26. Patch Overwrite Guard
379
395
 
380
396
  Use this before applying a generated patch when you want create/update/delete semantics checked against the actual workspace.
381
397
 
@@ -392,7 +408,7 @@ For a public demo report:
392
408
  npx trace-to-skill demo patch-overwrite
393
409
  ```
394
410
 
395
- ## 26. Sensitive Path Preflight Before Agent Runs
411
+ ## 27. Sensitive Path Preflight Before Agent Runs
396
412
 
397
413
  Use this before giving an AI coding agent a repository.
398
414
 
@@ -407,7 +423,7 @@ This finds sensitive-looking paths such as `.env`, `.env.*`, `.npmrc`, `.pypirc`
407
423
 
408
424
  The output includes a stable JSON schema plus recommended exclude globs that can seed `.agentignore`, `.aiexclude`, `.codexignore`, `.gitignore`, local sandbox permission profiles, or team security review checklists. `--format ignore` renders a reviewable generated file candidate and still does not mutate the repo. It is a preflight report, not a sandbox boundary.
409
425
 
410
- ## 27. Workspace Checkpoint Before Agent Runs
426
+ ## 28. Workspace Checkpoint Before Agent Runs
411
427
 
412
428
  Use this before giving Codex, Claude, Cursor, or another coding agent a dirty repository where untracked local work matters.
413
429
 
@@ -420,7 +436,7 @@ This writes a local checkpoint bundle with `status.txt`, staged and unstaged bin
420
436
 
421
437
  This is useful for OpenAI/Codex `/undo` and `/rewind` discussions where users need workspace protection beyond conversation rewind, especially when untracked files are outside normal commit history.
422
438
 
423
- ## 28. OpenAI Codex Issue Report
439
+ ## 29. OpenAI Codex Issue Report
424
440
 
425
441
  Use this when you want to file or update an OpenAI/Codex issue with a concise, evidence-backed report instead of pasting a full transcript.
426
442
 
@@ -433,7 +449,7 @@ The report includes the likely Codex failure class, line-linked evidence, diagno
433
449
 
434
450
  For a cluster-to-command map of current Codex issue patterns, see [CODEX_ISSUE_MAP.md](CODEX_ISSUE_MAP.md).
435
451
 
436
- ## 29. Sensitive File Access Evidence
452
+ ## 30. Sensitive File Access Evidence
437
453
 
438
454
  Use this when a trace suggests an agent read, attached, uploaded, diffed, or indexed credential-bearing files.
439
455
 
@@ -446,7 +462,7 @@ This catches signals such as `.env`, `.env.production`, `.npmrc`, `.pypirc`, `.n
446
462
 
447
463
  Before publishing evidence, run `trace-to-skill redact` and attach only redacted excerpts plus the file path/class.
448
464
 
449
- ## 30. GitHub Context Guard
465
+ ## 31. GitHub Context Guard
450
466
 
451
467
  Use this before an agent reads untrusted GitHub text.
452
468
 
@@ -463,7 +479,7 @@ Use it when:
463
479
  - a bot asks Codex to triage untrusted user reports
464
480
  - logs or comments might contain instructions like "ignore previous instructions" or "print secrets"
465
481
 
466
- ## 30. Failed Agent Run To Reviewable Rule
482
+ ## 32. Failed Agent Run To Reviewable Rule
467
483
 
468
484
  Use this when a coding agent made a repeated workflow mistake.
469
485
 
@@ -481,7 +497,7 @@ Recommended maintainer loop:
481
497
  4. Copy only evidence-backed rules into the real policy file.
482
498
  5. Run `eval` or `scorecard` in CI so the same failure does not silently return.
483
499
 
484
- ## 31. Privacy-Preserving Adoption
500
+ ## 33. Privacy-Preserving Adoption
485
501
 
486
502
  Use this when you want public evidence without leaking private traces.
487
503
 
@@ -0,0 +1,29 @@
1
+ # Codex Streamable HTTP MCP Fixture
2
+
3
+ ## Penpot parse failure
4
+
5
+ Codex CLI 0.136.0 is configured with the Streamable HTTP MCP server `penpot`.
6
+ The server is reachable and `tools/list` succeeds, but the streamable HTTP client fails to parse Penpot MCP responses before `tools/call`.
7
+
8
+ stderr:
9
+
10
+ ```text
11
+ JsonRpcMessage deserialize error while reading streamable-http response
12
+ response parse failed after Content-Type: text/event-stream
13
+ ```
14
+
15
+ The server sends SSE event frames with JSON-RPC payloads, and another MCP client can read the same endpoint successfully.
16
+
17
+ ## n8n initialize handshake
18
+
19
+ MCP Streamable HTTP handshake fails with `n8n` after initialize.
20
+ `initialize` returns 200, then the next `tools/list` reports Transport closed and the session id is not accepted.
21
+
22
+ ## DingTalk auth gate
23
+
24
+ The DingTalk Streamable HTTP MCP server is incorrectly gated behind OAuth/login even though the server is configured as unauthenticated in the local server config.
25
+
26
+ ## Stale session after restart
27
+
28
+ After a remote MCP server restart, Codex reuses a stale streamable-http session id.
29
+ Subsequent tool calls fail until restarting Codex instead of reconnecting or reinitializing the transport.
@@ -29,6 +29,14 @@ During background waits, the cadence is about one poll every 5-10 seconds.
29
29
  Cached tokens are still charged by some API/proxy billing paths.
30
30
  ```
31
31
 
32
+ ## Prompt cache collapse
33
+
34
+ ```text
35
+ Previous WebSocket request: input_tokens=183,426 cached_input_tokens=152,448 prompt_cache_key="019e74ff-6cf1-7d40-80ce-0c8baa3ad6cf" id="resp_0442f7dc" outcome=incremental
36
+ Reconnect request: input_tokens=184,739 cached_tokens=91,520 prompt_cache_key="019e74ff-6cf1-7d40-80ce-0c8baa3ad6cf" id="resp_0fd6c965" outcome=incremental websocket reconnect
37
+ Recovered request: input_tokens=185,010 cached_input_tokens=153,000 prompt_cache_key="019e74ff-6cf1-7d40-80ce-0c8baa3ad6cf" id="resp_1a2b3c" outcome=incremental
38
+ ```
39
+
32
40
  ## Compaction and replay cost
33
41
 
34
42
  Another report said:
@@ -39,4 +47,4 @@ The weekly usage limit depletes unusually fast on 5.5, worsened by unstable cont
39
47
  Failed context compaction forces users to restart tasks and re-explain project state, creating compaction tax.
40
48
  ```
41
49
 
42
- The report should include the plan/workspace, app or CLI version, model, reasoning effort, speed mode, large context setting, subagent and /review usage, recent `/status` and dashboard deltas, token totals including cached input/output/reasoning, background process ids, write_stdin poll cadence, compaction attempts, retry/tool-loop counts, whether the app was idle, and before/after usage percentages.
50
+ The report should include the plan/workspace, app or CLI version, model, reasoning effort, speed mode, large context setting, subagent and /review usage, recent `/status` and dashboard deltas, token totals including cached input/output/reasoning, adjacent prompt-cache rows with response ids and prompt_cache_key, background process ids, write_stdin poll cadence, compaction attempts, retry/tool-loop counts, whether the app was idle, and before/after usage percentages.
package/llms.txt CHANGED
@@ -23,6 +23,7 @@ Runtime: Node.js 20+
23
23
  - Codex auth and connectivity failures such as `token_exchange_failed`, `auth.openai.com/oauth/token`, missing `ca-certificates`, proxy or MITM TLS behavior, IPv6 fallback problems, Cloudflare challenge responses, and ChatGPT stream disconnects
24
24
  - Codex mobile and remote-control route health failures such as `Waiting for desktop`, `Directory Unavailable`, stale listeners on `127.0.0.1:14567`, stale `server_name` enrollment, empty backend environments, and incomplete helper bundles
25
25
  - Codex MCP runtime failures such as cancelled non-interactive approvals, `request_user_input is not supported in exec mode`, dropped namespace or `serverName` metadata, `unsupported call: mcp__...__...`, and closed `StdioServerTransport` sessions
26
+ - Codex Streamable HTTP MCP failures where Penpot, n8n, DingTalk, or another HTTP/SSE MCP server initializes but fails JSON-RPC parsing, `Content-Type: text/event-stream` handling, handshakes, OAuth gating, stale session ids, missing headers, or reconnects
26
27
  - Codex MCP discovery and config-scope mismatches where CLI `/mcp` works but VS Code, Desktop, WSL, project `.codex/config.toml`, `CODEX_HOME`, or an older conversation exposes no `mcp__*` tools
27
28
  - Codex terminal output and scrollback integrity failures where streamed lines disappear, get overwritten, truncate, duplicate, misalign, snap to the bottom, or only survive in raw logs/transcripts
28
29
  - Codex subagent lifecycle failures where completed or closed agents remain visible, `thread_spawn_edges` drift, child threads crowd the recent list, `agent thread limit reached` blocks spawns, or compaction loses prior subagent IDs; `session-audit` can summarize local subagent signal counts without exposing transcripts
@@ -32,10 +33,10 @@ Runtime: Node.js 20+
32
33
  - Codex clipboard, copy/export, long paste, and generated `Pasted text.txt` attachment regressions that break prompt, `/goal`, preview/edit, or support-report workflows
33
34
  - Codex deeplink, OAuth callback, notification click, browser-extension activation, mobile pairing, and `codex app <path>` launch regressions
34
35
  - Codex app connector stale auth/cache regressions such as `401 Reauthentication required`, unchanged `link_*`, `isAccessible: false`, and broken `codex_apps_tools` metadata
35
- - Codex token-burn and usage-drain failures such as rapid drain experiments (`1% in 4 minutes`, `22 credits`, `70% weekly in a day`), background `write_stdin` polling, idle app usage, compaction tax, retry/tool loops, cached-token-heavy turns, fast-mode drift, subagent fan-out, and unclear usage attribution
36
+ - Codex token-burn, prompt-cache collapse, and usage-drain failures such as rapid drain experiments (`1% in 4 minutes`, `22 credits`, `70% weekly in a day`), `input_tokens` / `cached_input_tokens` / `prompt_cache_key` rows, websocket reconnect cache drops, background `write_stdin` polling, idle app usage, compaction tax, retry/tool loops, cached-token-heavy turns, fast-mode drift, subagent fan-out, and unclear usage attribution
36
37
  - Codex process evidence packaging for Windows PowerShell/pwsh CIM polling, high-CPU helpers, stale process-manager entries, and renderer runaways
37
38
  - Codex usage reset schedule drift such as weekly reset dates moving, `reset_at` jumping, saved usage disappearing, outage compensation resets changing the anchor, and `/status` disagreeing with enforcement
38
- - Codex usage evidence packaging for scattered `/status`, reset-table, usage-limit, token-total, cached-input, and orchestration-overhead snippets
39
+ - Codex usage evidence packaging for scattered `/status`, reset-table, usage-limit, token-total, prompt-cache, cached-input, and orchestration-overhead snippets
39
40
  - Codex usage receipts that separate backend quota-window percentages, bounded drain experiments, local token totals, and overhead signals such as background polling, compaction loops, retry/tool loops, subagent fan-out, or idle drain
40
41
  - Codex undo/rewind support workflows where maintainers need a local pre-agent workspace checkpoint with git diffs plus copied changed/untracked files before agent edits
41
42
  - Codex resource leaks and runaway local processes such as high CPU/GPU, `Code Helper`, `Codex Helper Renderer`, orphaned shell snapshots, log floods, thinking animation GPU loops, and non-Git workspace CPU loops
@@ -81,6 +82,7 @@ npx trace-to-skill demo clipboard-attachment
81
82
  npx trace-to-skill demo deeplink-launch
82
83
  npx trace-to-skill demo connector-auth-cache
83
84
  npx trace-to-skill demo mcp-discovery-mismatch
85
+ npx trace-to-skill demo mcp-streamable-http
84
86
  npx trace-to-skill demo terminal-output-integrity
85
87
  npx trace-to-skill demo subagent-lifecycle
86
88
  npx trace-to-skill lint-agents .
@@ -109,7 +111,7 @@ npx trace-to-skill init --comment --sarif
109
111
  ## GitHub Action
110
112
 
111
113
  ```yaml
112
- - uses: grnbtqdbyx-create/trace-to-skill@v0.1.78
114
+ - uses: grnbtqdbyx-create/trace-to-skill@v0.1.80
113
115
  with:
114
116
  mode: all
115
117
  doctor-threshold: "85"
@@ -216,6 +218,7 @@ npx trace-to-skill init --comment --sarif
216
218
  - Codex usage evidence report
217
219
  - Codex rate-limit evidence report
218
220
  - Codex cached input token usage drain
221
+ - Codex prompt cache collapse evidence with prompt_cache_key, cached_input_tokens, cached_tokens, response ids, websocket reconnect, and low cache hit rate
219
222
  - Codex weekly reset date changed
220
223
  - Codex usage reset_at jumping
221
224
  - Codex deterministic reset schedule
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "trace-to-skill",
3
- "version": "0.1.78",
3
+ "version": "0.1.80",
4
4
  "description": "Turn failed AI coding-agent runs into reusable AGENTS.md rules, SKILL.md files, and eval evidence.",
5
5
  "type": "module",
6
6
  "main": "dist/src/index.js",
@@ -119,6 +119,10 @@
119
119
  "mcp-config",
120
120
  "codex-vscode",
121
121
  "mcp-runtime",
122
+ "codex-mcp-streamable-http",
123
+ "streamable-http-mcp",
124
+ "mcp-http",
125
+ "mcp-sse",
122
126
  "codex-plugins",
123
127
  "codex-file-tree",
124
128
  "codex-navigation",
@@ -151,6 +155,9 @@
151
155
  "codex-rate-limits",
152
156
  "codex-usage-evidence",
153
157
  "codex-rate-limit-evidence",
158
+ "codex-prompt-cache",
159
+ "prompt-cache-collapse",
160
+ "cache-hit-rate",
154
161
  "usage-receipt",
155
162
  "codex-usage-receipt",
156
163
  "codex-usage-spike",
@@ -85,6 +85,7 @@
85
85
  "codex_subagent_lifecycle",
86
86
  "codex_mcp_discovery_mismatch",
87
87
  "codex_mcp_runtime",
88
+ "codex_mcp_streamable_http",
88
89
  "codex_plugin_runtime",
89
90
  "codex_file_tree_ui",
90
91
  "codex_session_state",
@@ -3,7 +3,7 @@
3
3
  "$id": "https://github.com/grnbtqdbyx-create/trace-to-skill/schemas/usage-evidence-result.schema.json",
4
4
  "title": "trace-to-skill Codex usage evidence result",
5
5
  "type": "object",
6
- "required": ["generatedAt", "status", "inputs", "summary", "snapshots", "tokenUsage", "drainExperiments", "receipt", "findings", "checklist"],
6
+ "required": ["generatedAt", "status", "inputs", "summary", "snapshots", "tokenUsage", "cacheRecords", "cacheCollapseEvents", "drainExperiments", "receipt", "findings", "checklist"],
7
7
  "additionalProperties": false,
8
8
  "properties": {
9
9
  "generatedAt": {
@@ -20,7 +20,7 @@
20
20
  },
21
21
  "summary": {
22
22
  "type": "object",
23
- "required": ["snapshots", "tokenUsageRecords", "usageLimitSignals", "resetDriftWindows", "highCachedInputRecords", "drainExperiments", "overheadSignals"],
23
+ "required": ["snapshots", "tokenUsageRecords", "usageLimitSignals", "resetDriftWindows", "highCachedInputRecords", "cacheRecords", "cacheCollapseEvents", "drainExperiments", "overheadSignals"],
24
24
  "additionalProperties": false,
25
25
  "properties": {
26
26
  "snapshots": {
@@ -43,6 +43,14 @@
43
43
  "type": "integer",
44
44
  "minimum": 0
45
45
  },
46
+ "cacheRecords": {
47
+ "type": "integer",
48
+ "minimum": 0
49
+ },
50
+ "cacheCollapseEvents": {
51
+ "type": "integer",
52
+ "minimum": 0
53
+ },
46
54
  "drainExperiments": {
47
55
  "type": "integer",
48
56
  "minimum": 0
@@ -65,6 +73,18 @@
65
73
  "$ref": "#/$defs/tokenUsage"
66
74
  }
67
75
  },
76
+ "cacheRecords": {
77
+ "type": "array",
78
+ "items": {
79
+ "$ref": "#/$defs/cacheRecord"
80
+ }
81
+ },
82
+ "cacheCollapseEvents": {
83
+ "type": "array",
84
+ "items": {
85
+ "$ref": "#/$defs/cacheCollapseEvent"
86
+ }
87
+ },
68
88
  "drainExperiments": {
69
89
  "type": "array",
70
90
  "items": {
@@ -160,6 +180,7 @@
160
180
  "quota_percentage_jump",
161
181
  "usage_limit_with_remaining_quota",
162
182
  "high_cached_input",
183
+ "prompt_cache_collapse",
163
184
  "high_total_tokens",
164
185
  "orchestration_overhead_signal",
165
186
  "rapid_quota_drain_experiment"
@@ -204,7 +225,7 @@
204
225
  },
205
226
  "receipt": {
206
227
  "type": "object",
207
- "required": ["quotaWindows", "localTokenTotals", "drainExperiments", "overheadSignals", "suspectedCauses"],
228
+ "required": ["quotaWindows", "localTokenTotals", "drainExperiments", "overheadSignals", "cacheCollapseEvents", "suspectedCauses"],
208
229
  "additionalProperties": false,
209
230
  "properties": {
210
231
  "quotaWindows": {
@@ -228,6 +249,12 @@
228
249
  "$ref": "#/$defs/overheadSignal"
229
250
  }
230
251
  },
252
+ "cacheCollapseEvents": {
253
+ "type": "array",
254
+ "items": {
255
+ "$ref": "#/$defs/cacheCollapseEvent"
256
+ }
257
+ },
231
258
  "suspectedCauses": {
232
259
  "type": "array",
233
260
  "items": {
@@ -321,6 +348,86 @@
321
348
  }
322
349
  }
323
350
  },
351
+ "cacheRecord": {
352
+ "type": "object",
353
+ "required": ["source", "line", "excerpt"],
354
+ "additionalProperties": false,
355
+ "properties": {
356
+ "source": {
357
+ "type": "string"
358
+ },
359
+ "line": {
360
+ "type": "integer",
361
+ "minimum": 1
362
+ },
363
+ "inputTokens": {
364
+ "type": "number"
365
+ },
366
+ "cachedInputTokens": {
367
+ "type": "number"
368
+ },
369
+ "cacheHitPercent": {
370
+ "type": "number"
371
+ },
372
+ "promptCacheKey": {
373
+ "type": "string"
374
+ },
375
+ "responseId": {
376
+ "type": "string"
377
+ },
378
+ "transport": {
379
+ "type": "string"
380
+ },
381
+ "outcome": {
382
+ "type": "string"
383
+ },
384
+ "excerpt": {
385
+ "type": "string"
386
+ }
387
+ }
388
+ },
389
+ "cacheCollapseEvent": {
390
+ "type": "object",
391
+ "required": ["source", "line", "previousLine", "cachedInputTokens", "previousCachedInputTokens", "dropPercent", "excerpt"],
392
+ "additionalProperties": false,
393
+ "properties": {
394
+ "source": {
395
+ "type": "string"
396
+ },
397
+ "line": {
398
+ "type": "integer",
399
+ "minimum": 1
400
+ },
401
+ "previousLine": {
402
+ "type": "integer",
403
+ "minimum": 1
404
+ },
405
+ "inputTokens": {
406
+ "type": "number"
407
+ },
408
+ "cachedInputTokens": {
409
+ "type": "number"
410
+ },
411
+ "previousCachedInputTokens": {
412
+ "type": "number"
413
+ },
414
+ "dropPercent": {
415
+ "type": "number"
416
+ },
417
+ "promptCacheKey": {
418
+ "type": "string"
419
+ },
420
+ "responseId": {
421
+ "type": "string"
422
+ },
423
+ "previousResponseId": {
424
+ "type": "string"
425
+ },
426
+ "excerpt": {
427
+ "type": "string"
428
+ }
429
+ }
430
+ },
324
431
  "overheadSignal": {
325
432
  "type": "object",
326
433
  "required": ["kind", "source", "line", "excerpt"],