trace-to-skill 0.1.83 → 0.1.85
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -4
- package/dist/src/benchmark.js +6 -0
- package/dist/src/benchmark.js.map +1 -1
- package/dist/src/cli.js +13 -0
- package/dist/src/cli.js.map +1 -1
- package/dist/src/demo.js +8 -0
- package/dist/src/demo.js.map +1 -1
- package/dist/src/index.d.ts +2 -0
- package/dist/src/index.js +1 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/issueMap.d.ts +48 -0
- package/dist/src/issueMap.js +311 -0
- package/dist/src/issueMap.js.map +1 -0
- package/dist/src/ossBrief.js +3 -2
- package/dist/src/ossBrief.js.map +1 -1
- package/dist/src/rules.js +17 -0
- package/dist/src/rules.js.map +1 -1
- package/dist/src/types.d.ts +1 -1
- package/docs/BENCHMARK.md +1 -0
- package/docs/CODEX_GITHUB_ISSUE_PAIN_MAP.md +115 -0
- package/docs/CODEX_ISSUE_MAP.md +3 -0
- package/docs/DEMO.md +2 -0
- package/docs/DISCOVERY.md +6 -1
- package/docs/FAILURE_TAXONOMY.md +8 -0
- package/docs/OPENAI_OSS_BRIEF.md +6 -6
- package/docs/SCORECARD.md +2 -1
- package/docs/USE_CASES.md +68 -36
- package/fixtures/codex-usage-bucket-confusion.md +33 -0
- package/fixtures/github-codex-issues-export.json +71 -0
- package/llms.txt +4 -1
- package/package.json +10 -2
- package/schemas/analysis-result.schema.json +1 -0
package/docs/USE_CASES.md
CHANGED
|
@@ -24,6 +24,7 @@ npx trace-to-skill demo mcp-streamable-http
|
|
|
24
24
|
npx trace-to-skill demo hooks-runtime
|
|
25
25
|
npx trace-to-skill demo terminal-output-integrity
|
|
26
26
|
npx trace-to-skill demo subagent-lifecycle
|
|
27
|
+
npx trace-to-skill demo usage-bucket-confusion
|
|
27
28
|
npx trace-to-skill sensitive-audit .
|
|
28
29
|
npx trace-to-skill sensitive-audit . --format ignore --ignore-target codexignore --output .codexignore.generated
|
|
29
30
|
npx trace-to-skill lsp-audit .
|
|
@@ -33,7 +34,7 @@ What it proves:
|
|
|
33
34
|
|
|
34
35
|
- packaged fixtures can produce a real Codex issue report immediately
|
|
35
36
|
- maintainers can inspect the output shape before sharing any private log
|
|
36
|
-
- demos cover remote compact failures, context fork bloat, subagent prompt leakage, Windows helper path failures, patch overwrite safety, approval friction, latency, Thinking hangs, clipboard/attachment regressions, deeplink/OAuth launch regressions, connector auth-cache regressions, MCP discovery/config-scope mismatches, Streamable HTTP MCP parse/handshake failures, hooks runtime failures, terminal output/scrollback integrity, subagent lifecycle drift, token burn, sensitive files, and prompt injection
|
|
37
|
+
- demos cover remote compact failures, context fork bloat, subagent prompt leakage, usage bucket confusion, Windows helper path failures, patch overwrite safety, approval friction, latency, Thinking hangs, clipboard/attachment regressions, deeplink/OAuth launch regressions, connector auth-cache regressions, MCP discovery/config-scope mismatches, Streamable HTTP MCP parse/handshake failures, hooks runtime failures, terminal output/scrollback integrity, subagent lifecycle drift, token burn, sensitive files, and prompt injection
|
|
37
38
|
- `sensitive-audit` scans filenames and paths before an agent run, without reading file contents, so teams can build `.agentignore`, `.aiexclude`, `.codexignore`, `.gitignore`, or sandbox permission profiles from a concrete repo report
|
|
38
39
|
- `lsp-audit` scans repo language signals and PATH availability so teams know which language servers are ready before asking Codex for symbol-aware edits
|
|
39
40
|
|
|
@@ -58,7 +59,7 @@ What it proves:
|
|
|
58
59
|
Recommended CI surface:
|
|
59
60
|
|
|
60
61
|
```yaml
|
|
61
|
-
- uses: grnbtqdbyx-create/trace-to-skill@v0.1.
|
|
62
|
+
- uses: grnbtqdbyx-create/trace-to-skill@v0.1.85
|
|
62
63
|
with:
|
|
63
64
|
mode: all
|
|
64
65
|
doctor-threshold: "85"
|
|
@@ -68,7 +69,24 @@ Recommended CI surface:
|
|
|
68
69
|
github-token: ${{ github.token }}
|
|
69
70
|
```
|
|
70
71
|
|
|
71
|
-
## 3.
|
|
72
|
+
## 3. GitHub Issue Demand Mining
|
|
73
|
+
|
|
74
|
+
Use this when you want to see what Codex users are actually complaining about on GitHub before choosing the next fixture, report template, or diagnostic helper.
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
gh issue list --repo openai/codex --state open --limit 100 --json number,title,body,url,labels,comments,createdAt,updatedAt > codex-issues.json
|
|
78
|
+
npx trace-to-skill issue-map codex-issues.json --output codex-issue-map.md
|
|
79
|
+
npx trace-to-skill issue-map codex-issues.json --format json
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
What it proves:
|
|
83
|
+
|
|
84
|
+
- public issue clusters can be ranked without private telemetry
|
|
85
|
+
- high-comment pain points are mapped to deterministic failure classes such as token burn, remote compact, MCP discovery, usage buckets, context drift, sandbox, and resource leaks
|
|
86
|
+
- maintainers get example issue links and evidence-rule prompts for the next support artifact
|
|
87
|
+
- OSS builders can show that new work is grounded in real GitHub demand instead of generic agent demos
|
|
88
|
+
|
|
89
|
+
## 4. AGENTS.md And MCP Hygiene
|
|
72
90
|
|
|
73
91
|
Use this before giving Codex broad repository access.
|
|
74
92
|
|
|
@@ -88,7 +106,7 @@ This checks:
|
|
|
88
106
|
|
|
89
107
|
The goal is not to ban powerful tools. The goal is to make trust boundaries visible before an agent acts.
|
|
90
108
|
|
|
91
|
-
##
|
|
109
|
+
## 5. Language-Server Readiness Before Agent Edits
|
|
92
110
|
|
|
93
111
|
Use this when a repo wants Codex to navigate definitions, references, diagnostics, or rename/refactor flows, but language-server setup differs across machines.
|
|
94
112
|
|
|
@@ -106,7 +124,7 @@ What it proves:
|
|
|
106
124
|
|
|
107
125
|
It does not auto-install anything or grant new permissions; it is a readiness report.
|
|
108
126
|
|
|
109
|
-
##
|
|
127
|
+
## 6. Sandbox And Permission Failure Triage
|
|
110
128
|
|
|
111
129
|
Use this when Codex cannot start tools, apply patches, or write to the workspace because sandbox setup or permissions fail.
|
|
112
130
|
|
|
@@ -127,7 +145,7 @@ For Codex App reports where Speed resets from Fast to Standard after restart, in
|
|
|
127
145
|
|
|
128
146
|
`diagnostics-bundle` combines the config, plugin, and session summaries into a metadata-only support folder with a manifest and README. Use it when OpenAI asks for more evidence but raw `config.toml`, SQLite state, rollout JSONL, and local logs should not be posted publicly.
|
|
129
147
|
|
|
130
|
-
##
|
|
148
|
+
## 7. Codex Auth And Connectivity Triage
|
|
131
149
|
|
|
132
150
|
Use this when Codex cannot log in, exchange an auth token, stream a response, or connect through a container, proxy, VPN, corporate CA, IPv6 network, or Cloudflare challenge.
|
|
133
151
|
|
|
@@ -137,7 +155,7 @@ npx trace-to-skill analyze ./runs --format json
|
|
|
137
155
|
|
|
138
156
|
This catches signals such as `token_exchange_failed`, `auth.openai.com/oauth/token`, `codex_login::server`, `cf-mitigated: challenge`, missing `ca-certificates`, `update-ca-certificates`, `CODEX_CA_CERTIFICATE`, IPv6 fallback evidence, proxy/MITM TLS failures, and `stream disconnected before completion` on `chatgpt.com/backend-api/codex/responses`.
|
|
139
157
|
|
|
140
|
-
##
|
|
158
|
+
## 8. Codex Remote Compact Failure Triage
|
|
141
159
|
|
|
142
160
|
Use this when `/compact` or auto-compaction fails during a long Codex session and the user cannot continue without recreating context.
|
|
143
161
|
|
|
@@ -147,7 +165,7 @@ npx trace-to-skill codex-report ./runs --output openai-codex-compact-issue.md
|
|
|
147
165
|
|
|
148
166
|
This catches signals such as `Error running remote compact task`, `timeout waiting for child process to exit`, `stream disconnected before completion`, `responses/compact`, `tcp_user_timeout`, `stream_idle_timeout_ms`, provider-id timeout workarounds, Azure provider config drift, and long-running tasks broken by failed compaction.
|
|
149
167
|
|
|
150
|
-
##
|
|
168
|
+
## 9. Codex Context Fork Bloat Evidence
|
|
151
169
|
|
|
152
170
|
Use this when a conversation fork carries duplicate parent context, inflates token counts, or breaks prompt-cache lineage before new work happens.
|
|
153
171
|
|
|
@@ -161,7 +179,7 @@ This catches signals such as forked conversations carrying the full parent trans
|
|
|
161
179
|
|
|
162
180
|
Include Codex app/CLI/extension version, surface, model, fork source thread id, forked thread id, fork action timestamp, fork boundary marker, `input_tokens` and `cached_input_tokens` before and after the fork, `prompt_cache_key` before and after, cache hit rate, duplicated parent-turn or tool-transcript examples with line ids, whether new files were read before the token jump, compaction state, subagent or `fork_context` history, minimal reproduction steps, and whether a fresh thread or non-fork continuation avoids the bloat.
|
|
163
181
|
|
|
164
|
-
##
|
|
182
|
+
## 10. Codex Subagent Prompt Leakage Evidence
|
|
165
183
|
|
|
166
184
|
Use this when `spawn_agent` child tasks are not isolated, especially with `fork_turns: "none"` or same-turn parallel subagent spawning.
|
|
167
185
|
|
|
@@ -175,7 +193,7 @@ This catches signals such as delegated `spawn_agent` messages recorded as assist
|
|
|
175
193
|
|
|
176
194
|
Include Codex Desktop/app/CLI version, MultiAgentV2 state, OS, model, parent thread id, child thread ids, exact `spawn_agent` arguments, `fork_turns`, role/profile, whether `multi_tool_use.parallel` or same-turn parallel spawning was used, redacted child rollout line order, first user/task message, assistant/commentary envelope lines, sibling prompt excerpts, `wait_agent` and `close_agent` results, unexpected child tool calls, and sequential single-child versus parallel-child controls.
|
|
177
195
|
|
|
178
|
-
##
|
|
196
|
+
## 11. Codex Usage Evidence Packaging
|
|
179
197
|
|
|
180
198
|
Use this when a Codex usage issue has scattered evidence across `/status`, dashboard notes, reset tables, token totals, prompt-cache rows, cached input, and local overhead clues.
|
|
181
199
|
|
|
@@ -195,7 +213,21 @@ The receipt separates:
|
|
|
195
213
|
- orchestration-overhead signals that may burn usage without accepted work
|
|
196
214
|
- suspected cause buckets to keep public reports comparable
|
|
197
215
|
|
|
198
|
-
##
|
|
216
|
+
## 12. Codex Usage Bucket Scope Evidence
|
|
217
|
+
|
|
218
|
+
Use this when Codex usage UI shows 5h and weekly percentages but does not explain the accounting scope.
|
|
219
|
+
|
|
220
|
+
```bash
|
|
221
|
+
npx trace-to-skill demo usage-bucket-confusion
|
|
222
|
+
npx trace-to-skill analyze ./runs --format json
|
|
223
|
+
npx trace-to-skill codex-report ./runs --output openai-codex-usage-bucket-confusion.md
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
This catches signals such as `Usage remaining`, `5h 97%`, `Weekly 95%`, reset dates without scope labels, percent remaining versus percent used ambiguity, rolling 7-day versus natural-week ambiguity, and uncertainty about whether app, CLI, cloud tasks, reviews, other devices, or other workspaces share the weekly pool.
|
|
227
|
+
|
|
228
|
+
Include subscription plan, account/workspace, app/CLI version, surface, timestamp, screenshot or redacted popover text, 5h percentage, weekly percentage, reset time/date, whether values are used or remaining, whether weekly is rolling or calendar-based, whether weekly includes app/CLI/cloud/review usage, `/status` output, usage dashboard state, and whether other devices or workspaces show the same values.
|
|
229
|
+
|
|
230
|
+
## 13. Codex Windows Helper Path Triage
|
|
199
231
|
|
|
200
232
|
Use this when Codex Desktop on Windows discovers bundled tools or plugin helpers but cannot execute them from the integrated terminal, tool runner, Browser, Chrome, Computer Use, or node_repl path.
|
|
201
233
|
|
|
@@ -205,7 +237,7 @@ npx trace-to-skill codex-report ./runs --output openai-codex-windows-helper-issu
|
|
|
205
237
|
|
|
206
238
|
This catches signals such as `Program 'rg.exe' failed to run`, `Access is denied`, `WindowsApps\OpenAI.Codex...\app\resources`, missing `%LOCALAPPDATA%\OpenAI\Codex\bin`, missing MSIX LocalCache helper bins, `CodexSandboxUsers` ACL/RX problems, `copyfile` failures from WindowsApps bundled plugin manifests, EFS/Application Protected attributes, `windows sandbox failed: spawn setup refresh`, `missing-helper-path`, and unavailable Browser/Chrome/Computer Use plugin helpers.
|
|
207
239
|
|
|
208
|
-
##
|
|
240
|
+
## 14. Codex Mobile And Remote-Control Route Health
|
|
209
241
|
|
|
210
242
|
Use this when Codex mobile, SSH remote, or desktop remote-control says it is connected but commands do not reach the expected host, workspace, or app-server.
|
|
211
243
|
|
|
@@ -215,7 +247,7 @@ npx trace-to-skill analyze ./runs --format json
|
|
|
215
247
|
|
|
216
248
|
This catches signals such as `Waiting for desktop`, `Directory: Unavailable`, stale `server_name` enrollment, stale remote-control listener, `127.0.0.1:14567`, missing cached helper files such as `codex-windows-sandbox-setup.exe` or `codex-command-runner.exe`, empty backend environments, stale Android session lists, and temporary recovery after re-pairing or listener restart.
|
|
217
249
|
|
|
218
|
-
##
|
|
250
|
+
## 15. Codex Terminal Output And Scrollback Integrity
|
|
219
251
|
|
|
220
252
|
Use this when Codex terminal output, streamed assistant text, or scrollback becomes untrustworthy even though raw logs, transcripts, or transaction views still contain the missing lines.
|
|
221
253
|
|
|
@@ -229,7 +261,7 @@ This catches signals such as Windows Terminal scrollback lines disappearing, str
|
|
|
229
261
|
|
|
230
262
|
Include the Codex CLI/app/extension version, OS, shell, terminal emulator and version, WSL/SSH/tmux/Zellij state, model, whether streaming was active, exact scroll action, terminal dimensions and scrollback settings, first missing or duplicated line id, raw log/transcript proof, terminal capture, numbered-line harness output, control run, `/resume` or transcript recovery behavior, and whether another terminal or downgrade changes the result.
|
|
231
263
|
|
|
232
|
-
##
|
|
264
|
+
## 16. Codex Subagent Lifecycle And State Reconciliation
|
|
233
265
|
|
|
234
266
|
Use this when subagents appear completed, closed, stale, or interrupted but Codex cannot reconcile UI state, live handles, persisted spawn edges, parent discoverability, and active spawn quota.
|
|
235
267
|
|
|
@@ -243,7 +275,7 @@ This catches signals such as completed subagents remaining visible in the Subage
|
|
|
243
275
|
|
|
244
276
|
Include Codex app/CLI/extension version, OS, surface, model, subscription/workspace, root thread id, subagent ids/nicknames/roles, spawn/close/list commands, `close_agent` results, `list_agents` or `/agents` output, `thread_spawn_edges` status counts, `agents.max_threads` or registry quota evidence, recent-list/sidebar behavior, child-thread archive/top-level status, last-progress or halt reason, MCP server state, compaction/resume timing, redacted UI evidence, restart/reload behavior, and whether stale agents are UI-only or still block spawns.
|
|
245
277
|
|
|
246
|
-
##
|
|
278
|
+
## 17. Codex MCP Runtime Triage
|
|
247
279
|
|
|
248
280
|
Use this when MCP tools are configured and visible, but Codex cannot actually call them at runtime.
|
|
249
281
|
|
|
@@ -254,7 +286,7 @@ npx trace-to-skill config-audit ~/.codex --format json
|
|
|
254
286
|
|
|
255
287
|
This catches signals such as `user cancelled MCP tool call`, `request_user_input is not supported in exec mode`, `Approve app tool call?`, `tool_call_mcp_elicitation`, routed callable names like `mcp__node_repl__js` becoming `unsupported call`, deferred discovery dropping namespace or `serverName`, `tools/list` succeeding while Codex routing fails, and stdio transport lifecycle failures such as `Transport closed`, `stdin_end`, `stdin_close`, `transport_close`, or stderr backpressure.
|
|
256
288
|
|
|
257
|
-
##
|
|
289
|
+
## 18. Codex Resume And Session State Triage
|
|
258
290
|
|
|
259
291
|
Use this when long Codex sessions become difficult to resume, Desktop history rendering gets sluggish, or local state migrations break goals/projects/history.
|
|
260
292
|
|
|
@@ -271,7 +303,7 @@ This catches signals such as `codex resume` picker hangs, `codex resume <id>` wo
|
|
|
271
303
|
|
|
272
304
|
For mixed resume, crash, config, plugin, or history issues, `diagnostics-bundle` writes the session, config, and plugin reports together with a checklist of files not to attach publicly.
|
|
273
305
|
|
|
274
|
-
##
|
|
306
|
+
## 19. Codex File Tree UI Evidence
|
|
275
307
|
|
|
276
308
|
Use this when Codex Desktop cannot reveal project files through the native file tree, folder icon, floating file panel, or built-in preview.
|
|
277
309
|
|
|
@@ -282,7 +314,7 @@ npx trace-to-skill codex-report ./runs --output openai-codex-issue.md
|
|
|
282
314
|
|
|
283
315
|
This catches signals such as `View > Toggle File Tree` doing nothing, `Cmd+Shift+E` or `Ctrl+Shift+E` having no visible effect, the folder icon disappearing, the floating file panel showing stale or unclickable entries after add/rename/delete operations, and `.doc`, `.pdf`, or `.ppt` previews failing until restart.
|
|
284
316
|
|
|
285
|
-
##
|
|
317
|
+
## 20. Codex Token Burn Attribution
|
|
286
318
|
|
|
287
319
|
Use this when Codex usage drains faster than expected and the trace needs to separate useful model work from orchestration overhead.
|
|
288
320
|
|
|
@@ -295,7 +327,7 @@ This catches signals such as tokens `burning very fast`, usage dropping by visib
|
|
|
295
327
|
|
|
296
328
|
For public reports, prefer `usage-evidence` first so the quota-window, local-token, and orchestration-overhead layers are visible separately.
|
|
297
329
|
|
|
298
|
-
##
|
|
330
|
+
## 21. Usage Reset Drift Evidence
|
|
299
331
|
|
|
300
332
|
Use this when Codex reset timing changes unexpectedly or users lose the ability to plan paid usage.
|
|
301
333
|
|
|
@@ -306,7 +338,7 @@ npx trace-to-skill codex-report ./runs --output openai-codex-issue.md
|
|
|
306
338
|
|
|
307
339
|
This catches signals such as weekly reset dates moving from one date to another, `reset_at` jumping after the first prompt, saved weekly usage being wiped or pushed into the next window, outage compensation resets changing the anchor, `/status` and dashboard disagreement, and requests for deterministic reset schedules or rollover of unused prior-window usage.
|
|
308
340
|
|
|
309
|
-
##
|
|
341
|
+
## 22. Quota And Usage-Limit Evidence
|
|
310
342
|
|
|
311
343
|
Use this when Codex blocks a prompt with a usage-limit message but another surface still shows remaining quota.
|
|
312
344
|
|
|
@@ -316,7 +348,7 @@ npx trace-to-skill analyze ./runs --format json
|
|
|
316
348
|
|
|
317
349
|
This catches traces where `/status` or the usage page shows remaining 5h or weekly quota, accounts appear to share limits unexpectedly, a Team account inherits a Plus account's limit state, or quota reset times jump after logout/login.
|
|
318
350
|
|
|
319
|
-
##
|
|
351
|
+
## 23. Codex Resource Leak Evidence
|
|
320
352
|
|
|
321
353
|
Use this when Codex Desktop, the VS Code extension, renderer, app-server, GPU process, shell snapshot, or helper process keeps burning local resources after the useful work should be idle.
|
|
322
354
|
|
|
@@ -338,7 +370,7 @@ npx trace-to-skill process-audit ./process-notes.md --format json
|
|
|
338
370
|
|
|
339
371
|
`process-audit` packages Task Manager, System Informer, `Get-CimInstance`, `ps`, `top`, or handwritten process measurement snippets into a smaller public report. It detects PowerShell/pwsh CIM polling such as `Get-CimInstance Win32_Process`, high-CPU Codex/helper/renderer samples, stale `process_manager/chat_processes.json` mentions, and runaway helper signals without inspecting live processes or asking users to post full raw process dumps.
|
|
340
372
|
|
|
341
|
-
##
|
|
373
|
+
## 24. Codex Thinking Hang Evidence
|
|
342
374
|
|
|
343
375
|
Use this when Codex accepts a prompt, finishes a local tool call, or keeps a Responses stream open but the UI/CLI remains on Thinking or Working with no visible assistant follow-up.
|
|
344
376
|
|
|
@@ -352,7 +384,7 @@ This catches signals such as `turn/start`, `task_started`, a completed local too
|
|
|
352
384
|
|
|
353
385
|
Include the Codex version, OS, model and reasoning/speed settings, turn or thread id, prompt timestamp, last successful tool output, first `response_item` timestamp, `responses_http` or websocket transport evidence, `time.busy` / `time.idle`, MCP/subagent state, stop/interrupt behavior, and whether a new thread or minimal config recovers.
|
|
354
386
|
|
|
355
|
-
##
|
|
387
|
+
## 25. Codex Clipboard And Pasted-Text Attachment Evidence
|
|
356
388
|
|
|
357
389
|
Use this when copy/export, long pasted prompts, or generated `Pasted text.txt` attachments break Codex prompt, `/goal`, or support-report workflows.
|
|
358
390
|
|
|
@@ -366,7 +398,7 @@ This catches signals such as `Copy as Markdown` disappearing from the Copy menu,
|
|
|
366
398
|
|
|
367
399
|
Include app version, OS, surface, exact copy menu items, source text size, paste action, visible editor text, generated attachment name/path/size, `pasted-text-attachments.json` or fileAttachments metadata, command path such as `/goal`, preview/edit/revert actions tried, clipboard payload format, and whether paste-as-text, opt-out, explicit file reference, or downgrade changes behavior.
|
|
368
400
|
|
|
369
|
-
##
|
|
401
|
+
## 26. Codex Deeplink And External Launch Evidence
|
|
370
402
|
|
|
371
403
|
Use this when OAuth callbacks, notification clicks, browser extension activation, mobile pairing, or CLI app-open commands fail to route back into Codex.
|
|
372
404
|
|
|
@@ -380,7 +412,7 @@ This catches signals such as `codex://oauth_callback?code=...` opening an Electr
|
|
|
380
412
|
|
|
381
413
|
Include app/CLI/extension version, OS/build, install source, package id/path, affected surface, exact redacted URI shape, browser and connector/plugin name, error dialog text, whether the app was already running, AppX/MSIX evidence such as AppUserModelID and DelegateExecute, HKCU/HKCR `codex` keys, command-line arguments, repair/reinstall/re-register attempts, and whether manual `codex://test` or `Start-Process` reproduces.
|
|
382
414
|
|
|
383
|
-
##
|
|
415
|
+
## 27. Codex App Connector Auth Cache Evidence
|
|
384
416
|
|
|
385
417
|
Use this when Codex app connectors appear installed but keep stale auth or discovery metadata after a reauth-required response.
|
|
386
418
|
|
|
@@ -394,7 +426,7 @@ This catches signals such as `401: "Server returned 401: 'Reauthentication requi
|
|
|
394
426
|
|
|
395
427
|
Include app/CLI version, OS, connector/plugin name and id, installed plugin root, exact tool name, redacted `codex_apps_tools` and `codex_app_directory` metadata, `link_*` id before/after reconnect, `isAccessible` state, restart/remove/re-add/cache-clear/sign-in attempts, ChatGPT app page state, and whether an external MCP workaround succeeds.
|
|
396
428
|
|
|
397
|
-
##
|
|
429
|
+
## 28. Codex MCP Discovery And Config Scope Evidence
|
|
398
430
|
|
|
399
431
|
Use this when MCP servers work in Codex CLI or one config scope but are missing in VS Code, Desktop, WSL, remote sessions, project-local config, or an older conversation.
|
|
400
432
|
|
|
@@ -408,7 +440,7 @@ This catches signals such as `MCP servers not detected in Codex VS Code extensio
|
|
|
408
440
|
|
|
409
441
|
Include app/CLI/extension version, OS, IDE, remote/WSL/SSH state, workspace root, effective `CODEX_HOME`, all config files considered (`~/.codex/config.toml`, project `.codex/config.toml`, `.vscode/mcp.json`, `.mcp.json`), redacted MCP sections, trust/profile/default-permissions state, `codex mcp list`, `codex mcp get <server>`, CLI-versus-Desktop/VS Code comparison, loaded config path/log lines, whether moving the same server to user-global config fixes it, and whether the current session exposes `mcp__*` tools.
|
|
410
442
|
|
|
411
|
-
##
|
|
443
|
+
## 29. Codex Streamable HTTP MCP Evidence
|
|
412
444
|
|
|
413
445
|
Use this when a Streamable HTTP or SSE MCP server is reachable but Codex fails during JSON-RPC parsing, handshake, auth gating, stale session reuse, or reconnect.
|
|
414
446
|
|
|
@@ -422,7 +454,7 @@ This catches signals such as Penpot `JsonRpcMessage deserialize` or response-par
|
|
|
422
454
|
|
|
423
455
|
Include Codex version, MCP server name, transport URL without secrets, initialize/tools/list/tools/call results, HTTP status, `Content-Type`, SSE event framing, JSON-RPC message shape, session id before and after reconnect or server restart, auth/OAuth expectations, User-Agent/header requirements, exact parse/deserialize error, whether curl or another MCP client succeeds, and whether restarting Codex or reinitializing the transport recovers.
|
|
424
456
|
|
|
425
|
-
##
|
|
457
|
+
## 30. Codex Hooks Runtime Evidence
|
|
426
458
|
|
|
427
459
|
Use this when Codex hooks duplicate, stop firing, emit stale deprecation warnings, behave differently across CLI/Desktop/Code Mode/Windows, or become hard to inspect in settings.
|
|
428
460
|
|
|
@@ -436,7 +468,7 @@ This catches signals such as duplicate Hooks entries for one tool call, `PostToo
|
|
|
436
468
|
|
|
437
469
|
Include Codex app/CLI/extension version, OS, surface, shell or Desktop route, `[features].hooks` and `hooks.json` snippets without secrets, hook event type, matcher, handler command/name, expected versus observed fire count, duplicate event ids, exact deprecation warning, trust state, live-edit/rate-limit/auto-restore timing, Code Mode `exec` versus normal CLI comparison, linked-worktree cwd, Hooks settings UI screenshot if relevant, and whether restart/reload/new session restores behavior.
|
|
438
470
|
|
|
439
|
-
##
|
|
471
|
+
## 31. Patch Overwrite Guard
|
|
440
472
|
|
|
441
473
|
Use this before applying a generated patch when you want create/update/delete semantics checked against the actual workspace.
|
|
442
474
|
|
|
@@ -453,7 +485,7 @@ For a public demo report:
|
|
|
453
485
|
npx trace-to-skill demo patch-overwrite
|
|
454
486
|
```
|
|
455
487
|
|
|
456
|
-
##
|
|
488
|
+
## 32. Sensitive Path Preflight Before Agent Runs
|
|
457
489
|
|
|
458
490
|
Use this before giving an AI coding agent a repository.
|
|
459
491
|
|
|
@@ -468,7 +500,7 @@ This finds sensitive-looking paths such as `.env`, `.env.*`, `.npmrc`, `.pypirc`
|
|
|
468
500
|
|
|
469
501
|
The output includes a stable JSON schema plus recommended exclude globs that can seed `.agentignore`, `.aiexclude`, `.codexignore`, `.gitignore`, local sandbox permission profiles, or team security review checklists. `--format ignore` renders a reviewable generated file candidate and still does not mutate the repo. It is a preflight report, not a sandbox boundary.
|
|
470
502
|
|
|
471
|
-
##
|
|
503
|
+
## 33. Workspace Checkpoint Before Agent Runs
|
|
472
504
|
|
|
473
505
|
Use this before giving Codex, Claude, Cursor, or another coding agent a dirty repository where untracked local work matters.
|
|
474
506
|
|
|
@@ -481,7 +513,7 @@ This writes a local checkpoint bundle with `status.txt`, staged and unstaged bin
|
|
|
481
513
|
|
|
482
514
|
This is useful for OpenAI/Codex `/undo` and `/rewind` discussions where users need workspace protection beyond conversation rewind, especially when untracked files are outside normal commit history.
|
|
483
515
|
|
|
484
|
-
##
|
|
516
|
+
## 34. OpenAI Codex Issue Report
|
|
485
517
|
|
|
486
518
|
Use this when you want to file or update an OpenAI/Codex issue with a concise, evidence-backed report instead of pasting a full transcript.
|
|
487
519
|
|
|
@@ -494,7 +526,7 @@ The report includes the likely Codex failure class, line-linked evidence, diagno
|
|
|
494
526
|
|
|
495
527
|
For a cluster-to-command map of current Codex issue patterns, see [CODEX_ISSUE_MAP.md](CODEX_ISSUE_MAP.md).
|
|
496
528
|
|
|
497
|
-
##
|
|
529
|
+
## 35. Sensitive File Access Evidence
|
|
498
530
|
|
|
499
531
|
Use this when a trace suggests an agent read, attached, uploaded, diffed, or indexed credential-bearing files.
|
|
500
532
|
|
|
@@ -507,7 +539,7 @@ This catches signals such as `.env`, `.env.production`, `.npmrc`, `.pypirc`, `.n
|
|
|
507
539
|
|
|
508
540
|
Before publishing evidence, run `trace-to-skill redact` and attach only redacted excerpts plus the file path/class.
|
|
509
541
|
|
|
510
|
-
##
|
|
542
|
+
## 36. GitHub Context Guard
|
|
511
543
|
|
|
512
544
|
Use this before an agent reads untrusted GitHub text.
|
|
513
545
|
|
|
@@ -524,7 +556,7 @@ Use it when:
|
|
|
524
556
|
- a bot asks Codex to triage untrusted user reports
|
|
525
557
|
- logs or comments might contain instructions like "ignore previous instructions" or "print secrets"
|
|
526
558
|
|
|
527
|
-
##
|
|
559
|
+
## 37. Failed Agent Run To Reviewable Rule
|
|
528
560
|
|
|
529
561
|
Use this when a coding agent made a repeated workflow mistake.
|
|
530
562
|
|
|
@@ -542,7 +574,7 @@ Recommended maintainer loop:
|
|
|
542
574
|
4. Copy only evidence-backed rules into the real policy file.
|
|
543
575
|
5. Run `eval` or `scorecard` in CI so the same failure does not silently return.
|
|
544
576
|
|
|
545
|
-
##
|
|
577
|
+
## 38. Privacy-Preserving Adoption
|
|
546
578
|
|
|
547
579
|
Use this when you want public evidence without leaking private traces.
|
|
548
580
|
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Codex Usage Bucket Confusion
|
|
2
|
+
|
|
3
|
+
## Summary
|
|
4
|
+
|
|
5
|
+
Codex Desktop usage popover shows short-term and weekly buckets as compact percentages, but the scope and semantics are unclear.
|
|
6
|
+
|
|
7
|
+
## Evidence
|
|
8
|
+
|
|
9
|
+
- The in-app usage popover shows `Usage remaining`.
|
|
10
|
+
- The 5h row shows `5h 97% 6:23 PM`.
|
|
11
|
+
- The Weekly row shows `Weekly 95% Jun 7`.
|
|
12
|
+
- The user believes this is the first 5-hour usage window of the week, so `5h: 97% remaining` and `Weekly: 95% remaining` look contradictory.
|
|
13
|
+
- The popover does not say whether percentages are percent remaining or percent used.
|
|
14
|
+
- The popover does not say whether the weekly bucket is a natural week, rolling 7-day window, or account-wide pool.
|
|
15
|
+
- It is unclear whether weekly usage includes Codex Desktop, CLI, cloud tasks, reviews, other devices, or other workspaces.
|
|
16
|
+
- Without scope labels, the usage popover looks like a metering bug or contradictory quota display.
|
|
17
|
+
|
|
18
|
+
## Expected
|
|
19
|
+
|
|
20
|
+
The UI should label the accounting scope explicitly:
|
|
21
|
+
|
|
22
|
+
```text
|
|
23
|
+
Current 5h window: 97% remaining
|
|
24
|
+
Weekly pool: 95% remaining
|
|
25
|
+
Includes all Codex usage across app, CLI, cloud tasks, reviews, and devices
|
|
26
|
+
Resets Jun 7
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
If the weekly bucket is rolling rather than calendar-week based, the popover should say `Rolling 7-day window`.
|
|
30
|
+
|
|
31
|
+
## Diagnostics To Attach
|
|
32
|
+
|
|
33
|
+
Include subscription plan, account/workspace, app/CLI version, surface, timestamp, screenshot or redacted text of the popover, 5h percentage, weekly percentage, reset time/date, whether the percentages mean used or remaining, whether weekly is rolling or calendar-based, whether the weekly pool includes app/CLI/cloud/review usage, `/status` output, usage dashboard state, and whether other devices or workspaces show the same values.
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"number": 14593,
|
|
4
|
+
"title": "Burning tokens very fast",
|
|
5
|
+
"body": "Codex is burning tokens very fast while idle. Weekly usage drops 70% in one day with repeated compaction loops, retries, background polling, and no accepted edits. /status disagrees with the dashboard and cached input tokens keep replaying.",
|
|
6
|
+
"url": "https://github.com/openai/codex/issues/14593",
|
|
7
|
+
"labels": [
|
|
8
|
+
{ "name": "bug" },
|
|
9
|
+
{ "name": "rate-limits" }
|
|
10
|
+
],
|
|
11
|
+
"commentsCount": 593,
|
|
12
|
+
"reactions": { "totalCount": 41 },
|
|
13
|
+
"updatedAt": "2026-06-01T00:00:00Z"
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
"number": 14860,
|
|
17
|
+
"title": "Error running remote compact task",
|
|
18
|
+
"body": "The long Codex session fails during /compact. The client prints Error running remote compact task, responses/compact, stream disconnected before completion, and timeout waiting for child process to exit. A provider config workaround only changes the failure timing.",
|
|
19
|
+
"url": "https://github.com/openai/codex/issues/14860",
|
|
20
|
+
"labels": [
|
|
21
|
+
{ "name": "bug" },
|
|
22
|
+
{ "name": "context" }
|
|
23
|
+
],
|
|
24
|
+
"commentsCount": 90,
|
|
25
|
+
"reactions": { "+1": 15 },
|
|
26
|
+
"updatedAt": "2026-05-31T22:00:00Z"
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"number": 6465,
|
|
30
|
+
"title": "MCP servers not detected in Codex VS Code extension but working in Codex CLI",
|
|
31
|
+
"body": "MCP servers not detected in the VS Code extension or Desktop, but working in Codex CLI from ~/.codex/config.toml. tools/list is empty in one surface, project .codex/config.toml is ignored, and serverName metadata is missing.",
|
|
32
|
+
"url": "https://github.com/openai/codex/issues/6465",
|
|
33
|
+
"labels": [
|
|
34
|
+
{ "name": "bug" },
|
|
35
|
+
{ "name": "extension" },
|
|
36
|
+
{ "name": "mcp" }
|
|
37
|
+
],
|
|
38
|
+
"comments": [
|
|
39
|
+
{ "body": "I can reproduce this across WSL and Desktop with the same config scope mismatch." }
|
|
40
|
+
],
|
|
41
|
+
"commentsCount": 55,
|
|
42
|
+
"reactions": { "totalCount": 8 },
|
|
43
|
+
"updatedAt": "2026-05-30T20:00:00Z"
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
"number": 25471,
|
|
47
|
+
"title": "Codex usage popover shows confusing remaining percentages for 5h vs weekly buckets",
|
|
48
|
+
"body": "Usage remaining shows 5h 97% and Weekly 95% Jun 7. The popover does not say whether the percentages are percent remaining or percent used, whether weekly is rolling 7-day or calendar week, or whether app, CLI, cloud tasks, reviews, devices, and workspaces share the same pool.",
|
|
49
|
+
"url": "https://github.com/openai/codex/issues/25471",
|
|
50
|
+
"labels": [
|
|
51
|
+
{ "name": "enhancement" },
|
|
52
|
+
{ "name": "rate-limits" },
|
|
53
|
+
{ "name": "app" }
|
|
54
|
+
],
|
|
55
|
+
"commentsCount": 1,
|
|
56
|
+
"reactions": 2,
|
|
57
|
+
"updatedAt": "2026-06-01T01:00:00Z"
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"number": 99999,
|
|
61
|
+
"title": "Add a fun launch animation",
|
|
62
|
+
"body": "This is a cosmetic request with no evidence of a failed agent run.",
|
|
63
|
+
"url": "https://github.com/openai/codex/issues/99999",
|
|
64
|
+
"labels": [
|
|
65
|
+
{ "name": "enhancement" }
|
|
66
|
+
],
|
|
67
|
+
"commentsCount": 0,
|
|
68
|
+
"reactions": 0,
|
|
69
|
+
"updatedAt": "2026-05-01T00:00:00Z"
|
|
70
|
+
}
|
|
71
|
+
]
|
package/llms.txt
CHANGED
|
@@ -36,6 +36,7 @@ Runtime: Node.js 20+
|
|
|
36
36
|
- Codex app connector stale auth/cache regressions such as `401 Reauthentication required`, unchanged `link_*`, `isAccessible: false`, and broken `codex_apps_tools` metadata
|
|
37
37
|
- Codex context fork bloat and prompt-cache lineage failures where conversation forks duplicate parent transcript blocks, inflate `input_tokens`, change `prompt_cache_key`, drop cache hit rate, or leak `fork_context` subagent history into child context before new work happens
|
|
38
38
|
- Codex subagent prompt leakage where `spawn_agent` with `fork_turns: "none"` delivers assistant/commentary prompt envelopes, same-turn parallel children see sibling prompts, or `wait_agent`/`close_agent` completes despite the wrong child task
|
|
39
|
+
- Codex usage popover bucket confusion where `Usage remaining`, `5h`, weekly percentages, reset dates, percent remaining vs percent used, rolling 7-day vs calendar-week, or account/workspace/device scope are unclear
|
|
39
40
|
- Codex token-burn, prompt-cache collapse, and usage-drain failures such as rapid drain experiments (`1% in 4 minutes`, `22 credits`, `70% weekly in a day`), `input_tokens` / `cached_input_tokens` / `prompt_cache_key` rows, websocket reconnect cache drops, background `write_stdin` polling, idle app usage, compaction tax, retry/tool loops, cached-token-heavy turns, fast-mode drift, subagent fan-out, and unclear usage attribution
|
|
40
41
|
- Codex process evidence packaging for Windows PowerShell/pwsh CIM polling, high-CPU helpers, stale process-manager entries, and renderer runaways
|
|
41
42
|
- Codex usage reset schedule drift such as weekly reset dates moving, `reset_at` jumping, saved usage disappearing, outage compensation resets changing the anchor, and `/status` disagreeing with enforcement
|
|
@@ -86,6 +87,7 @@ npx trace-to-skill demo deeplink-launch
|
|
|
86
87
|
npx trace-to-skill demo connector-auth-cache
|
|
87
88
|
npx trace-to-skill demo context-fork-bloat
|
|
88
89
|
npx trace-to-skill demo subagent-prompt-leakage
|
|
90
|
+
npx trace-to-skill demo usage-bucket-confusion
|
|
89
91
|
npx trace-to-skill demo mcp-discovery-mismatch
|
|
90
92
|
npx trace-to-skill demo mcp-streamable-http
|
|
91
93
|
npx trace-to-skill demo hooks-runtime
|
|
@@ -100,6 +102,7 @@ npx trace-to-skill plugin-audit ~/.codex --app /Applications/Codex.app --format
|
|
|
100
102
|
npx trace-to-skill diagnostics-bundle ~/.codex --output codex-diagnostics
|
|
101
103
|
npx trace-to-skill usage-evidence ./usage-notes.md --output usage-evidence.md
|
|
102
104
|
npx trace-to-skill process-audit ./process-notes.md --output process-audit.md
|
|
105
|
+
npx trace-to-skill issue-map codex-issues.json --output codex-issue-map.md
|
|
103
106
|
npx trace-to-skill checkpoint . --output .trace-to-skill/checkpoints/before-codex
|
|
104
107
|
npx trace-to-skill redact ./runs --output redacted-runs
|
|
105
108
|
npx trace-to-skill sensitive-audit . --format json
|
|
@@ -117,7 +120,7 @@ npx trace-to-skill init --comment --sarif
|
|
|
117
120
|
## GitHub Action
|
|
118
121
|
|
|
119
122
|
```yaml
|
|
120
|
-
- uses: grnbtqdbyx-create/trace-to-skill@v0.1.
|
|
123
|
+
- uses: grnbtqdbyx-create/trace-to-skill@v0.1.85
|
|
121
124
|
with:
|
|
122
125
|
mode: all
|
|
123
126
|
doctor-threshold: "85"
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "trace-to-skill",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.85",
|
|
4
4
|
"description": "Turn failed AI coding-agent runs into reusable AGENTS.md rules, SKILL.md files, and eval evidence.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/src/index.js",
|
|
@@ -21,6 +21,7 @@
|
|
|
21
21
|
"docs/ADOPTION_GUIDE.md",
|
|
22
22
|
"docs/AGENTS_LINT.md",
|
|
23
23
|
"docs/BENCHMARK.md",
|
|
24
|
+
"docs/CODEX_GITHUB_ISSUE_PAIN_MAP.md",
|
|
24
25
|
"docs/CODEX_ISSUE_MAP.md",
|
|
25
26
|
"docs/DEMO.md",
|
|
26
27
|
"docs/DISCOVERY.md",
|
|
@@ -41,7 +42,7 @@
|
|
|
41
42
|
"build": "tsc -p tsconfig.json",
|
|
42
43
|
"clean": "rm -rf dist coverage",
|
|
43
44
|
"test": "npm run build && node --test dist/tests/*.test.js",
|
|
44
|
-
"check": "npm run test && node dist/src/cli.js doctor . --format json > /tmp/trace-to-skill-doctor.json && node dist/src/cli.js lint-agents . --format json > /tmp/trace-to-skill-agents-lint.json && node dist/src/cli.js analyze fixtures --format json > /tmp/trace-to-skill-smoke.json && node dist/src/cli.js usage-evidence fixtures --format json > /tmp/trace-to-skill-usage-evidence.json && node dist/src/cli.js process-audit fixtures/safe-run.md --format json > /tmp/trace-to-skill-process-audit.json && node dist/src/cli.js checkpoint . --output /tmp/trace-to-skill-checkpoint --format json > /tmp/trace-to-skill-checkpoint.json && node dist/src/cli.js sensitive-audit . --format json > /tmp/trace-to-skill-sensitive-audit.json && node dist/src/cli.js lsp-audit . --format json > /tmp/trace-to-skill-lsp-audit.json && node dist/src/cli.js suggest fixtures --target agents-md > /tmp/trace-to-skill-suggest.md && node dist/src/cli.js demo --format json > /tmp/trace-to-skill-demo.json && node dist/src/cli.js benchmark --format json > /tmp/trace-to-skill-benchmark.json && node dist/src/cli.js scorecard . --format json > /tmp/trace-to-skill-scorecard.json && node dist/src/cli.js oss-brief . --format json > /tmp/trace-to-skill-oss-brief.json",
|
|
45
|
+
"check": "npm run test && node dist/src/cli.js doctor . --format json > /tmp/trace-to-skill-doctor.json && node dist/src/cli.js lint-agents . --format json > /tmp/trace-to-skill-agents-lint.json && node dist/src/cli.js analyze fixtures --format json > /tmp/trace-to-skill-smoke.json && node dist/src/cli.js usage-evidence fixtures --format json > /tmp/trace-to-skill-usage-evidence.json && node dist/src/cli.js issue-map fixtures/github-codex-issues-export.json --format json > /tmp/trace-to-skill-issue-map.json && node dist/src/cli.js process-audit fixtures/safe-run.md --format json > /tmp/trace-to-skill-process-audit.json && node dist/src/cli.js checkpoint . --output /tmp/trace-to-skill-checkpoint --format json > /tmp/trace-to-skill-checkpoint.json && node dist/src/cli.js sensitive-audit . --format json > /tmp/trace-to-skill-sensitive-audit.json && node dist/src/cli.js lsp-audit . --format json > /tmp/trace-to-skill-lsp-audit.json && node dist/src/cli.js suggest fixtures --target agents-md > /tmp/trace-to-skill-suggest.md && node dist/src/cli.js demo --format json > /tmp/trace-to-skill-demo.json && node dist/src/cli.js benchmark --format json > /tmp/trace-to-skill-benchmark.json && node dist/src/cli.js scorecard . --format json > /tmp/trace-to-skill-scorecard.json && node dist/src/cli.js oss-brief . --format json > /tmp/trace-to-skill-oss-brief.json",
|
|
45
46
|
"prepack": "npm run build",
|
|
46
47
|
"prepare": "npm run build"
|
|
47
48
|
},
|
|
@@ -140,12 +141,19 @@
|
|
|
140
141
|
"codex-session-index",
|
|
141
142
|
"codex-project-history",
|
|
142
143
|
"codex-issue-report",
|
|
144
|
+
"codex-issue-map",
|
|
145
|
+
"github-issue-map",
|
|
146
|
+
"openai-issue-mining",
|
|
147
|
+
"maintainer-pain-map",
|
|
143
148
|
"openai-triage",
|
|
144
149
|
"openai-oss",
|
|
145
150
|
"oss-maintainers",
|
|
146
151
|
"codex-demo",
|
|
147
152
|
"codex-token-burn",
|
|
148
153
|
"codex-usage",
|
|
154
|
+
"codex-usage-bucket",
|
|
155
|
+
"usage-popover",
|
|
156
|
+
"rate-limit-ui",
|
|
149
157
|
"codex-reset",
|
|
150
158
|
"codex-usage-reset",
|
|
151
159
|
"codex-resource-leak",
|