@biggora/claude-plugins 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +3 -1
- package/README.md +7 -22
- package/codex-cli-workspace/iteration-1/benchmark.json +122 -0
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/eval_metadata.json +13 -0
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/with_skill/grading.json +52 -0
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/with_skill/outputs/response.md +163 -0
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/with_skill/timing.json +5 -0
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/without_skill/grading.json +58 -0
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/without_skill/outputs/response.md +151 -0
- package/codex-cli-workspace/iteration-1/eval-1-ci-integration/without_skill/timing.json +5 -0
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/eval_metadata.json +13 -0
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/grading.json +52 -0
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/outputs/response.md +86 -0
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/timing.json +5 -0
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/grading.json +58 -0
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/outputs/response.md +164 -0
- package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/timing.json +5 -0
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/eval_metadata.json +13 -0
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/with_skill/grading.json +52 -0
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/with_skill/outputs/response.md +130 -0
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/with_skill/timing.json +5 -0
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/without_skill/grading.json +64 -0
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/without_skill/outputs/response.md +209 -0
- package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/without_skill/timing.json +5 -0
- package/codex-cli-workspace/iteration-1/review.html +1325 -0
- package/gemini-cli-workspace/iteration-1/benchmark.json +86 -0
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/eval_metadata.json +37 -0
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/with_skill/grading.json +37 -0
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/with_skill/outputs/response.md +401 -0
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/with_skill/timing.json +5 -0
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/without_skill/grading.json +37 -0
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/without_skill/outputs/response.md +405 -0
- package/gemini-cli-workspace/iteration-1/eval-1-cicd-setup/without_skill/timing.json +5 -0
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/eval_metadata.json +37 -0
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/grading.json +37 -0
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/outputs/response.md +212 -0
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/timing.json +5 -0
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/grading.json +37 -0
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/outputs/response.md +427 -0
- package/gemini-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/timing.json +5 -0
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/eval_metadata.json +32 -0
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/with_skill/grading.json +32 -0
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/with_skill/outputs/response.md +171 -0
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/with_skill/timing.json +5 -0
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/without_skill/grading.json +32 -0
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/without_skill/outputs/response.md +199 -0
- package/gemini-cli-workspace/iteration-1/eval-3-custom-slash-command/without_skill/timing.json +5 -0
- package/gemini-cli-workspace/iteration-1/review.html +1325 -0
- package/gemini-cli-workspace/iteration-2/benchmark.json +173 -0
- package/gemini-cli-workspace/iteration-2/benchmark.md +28 -0
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/eval_metadata.json +37 -0
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/with_skill/grading.json +37 -0
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/with_skill/outputs/response.md +195 -0
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/with_skill/timing.json +5 -0
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/without_skill/grading.json +37 -0
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/without_skill/outputs/response.md +377 -0
- package/gemini-cli-workspace/iteration-2/eval-1-cicd-setup/without_skill/timing.json +5 -0
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/eval_metadata.json +37 -0
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/with_skill/grading.json +37 -0
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/with_skill/outputs/response.md +127 -0
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/with_skill/timing.json +5 -0
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/without_skill/grading.json +37 -0
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/without_skill/outputs/response.md +164 -0
- package/gemini-cli-workspace/iteration-2/eval-2-mcp-server-config/without_skill/timing.json +5 -0
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/eval_metadata.json +32 -0
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/with_skill/grading.json +32 -0
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/with_skill/outputs/response.md +91 -0
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/with_skill/timing.json +5 -0
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/without_skill/grading.json +32 -0
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/without_skill/outputs/response.md +112 -0
- package/gemini-cli-workspace/iteration-2/eval-3-custom-slash-command/without_skill/timing.json +5 -0
- package/gemini-cli-workspace/iteration-2/eval-viewer.html +1325 -0
- package/package.json +1 -1
- package/registry/registry.json +31 -1
- package/screen-recording-workspace/evals.json +41 -0
- package/screen-recording-workspace/iteration-1/benchmark.json +102 -0
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/eval_metadata.json +31 -0
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/with_skill/grading.json +11 -0
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/with_skill/outputs/demo.mp4 +0 -0
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/with_skill/timing.json +5 -0
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/without_skill/grading.json +11 -0
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/without_skill/outputs/demo.mp4 +0 -0
- package/screen-recording-workspace/iteration-1/eval-0-fullscreen/without_skill/timing.json +5 -0
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/eval_metadata.json +31 -0
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/with_skill/grading.json +11 -0
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/with_skill/outputs/region_capture.mp4 +0 -0
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/with_skill/timing.json +5 -0
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/without_skill/grading.json +11 -0
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/without_skill/outputs/region_capture.mp4 +0 -0
- package/screen-recording-workspace/iteration-1/eval-1-region-audio/without_skill/timing.json +5 -0
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/eval_metadata.json +31 -0
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/with_skill/grading.json +11 -0
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/with_skill/outputs/fallback_recording.mp4 +0 -0
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/with_skill/timing.json +5 -0
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/without_skill/grading.json +11 -0
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/without_skill/outputs/fallback_recording.mp4 +0 -0
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/without_skill/outputs/record_screen.py +67 -0
- package/screen-recording-workspace/iteration-1/eval-2-python-fallback/without_skill/timing.json +5 -0
- package/screen-recording-workspace/iteration-1/review.html +1325 -0
- package/src/skills/codex-cli/SKILL.md +21 -11
- package/src/skills/codex-cli/evals/evals.json +47 -0
- package/src/skills/gemini-cli/SKILL.md +27 -13
- package/src/skills/gemini-cli/evals/evals.json +46 -0
- package/src/skills/gemini-cli/references/commands.md +21 -14
- package/src/skills/gemini-cli/references/configuration.md +23 -18
- package/src/skills/gemini-cli/references/headless-and-scripting.md +7 -17
- package/src/skills/gemini-cli/references/mcp-and-extensions.md +12 -6
- package/src/skills/notebook-lm/SKILL.md +1 -1
- package/src/skills/screen-recording/SKILL.md +243 -213
- package/src/skills/screen-recording/references/design-patterns.md +4 -2
- package/src/skills/screen-recording/references/ffmpeg-recording.md +473 -0
- package/src/skills/screen-recording/references/{approach1-programmatic.md → programmatic-generation.md} +45 -22
- package/src/skills/screen-recording/references/python-fallback.md +222 -0
- package/src/skills/tm-search/SKILL.md +242 -106
- package/src/skills/tm-search/evals/evals.json +23 -0
- package/src/skills/tm-search/references/scraping-fallback.md +60 -95
- package/src/skills/tm-search/scripts/tm_search.py +453 -375
- package/src/skills/youtube-thumbnail/SKILL.md +11 -1
- package/src/skills/screen-recording/references/approach2-xvfb.md +0 -232
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"expectations": [
|
|
3
|
+
{
|
|
4
|
+
"text": "Points to ~/.codex/config.toml for MCP server configuration",
|
|
5
|
+
"passed": true,
|
|
6
|
+
"evidence": "Line 5: 'Add the following to your Codex configuration file at ~/.codex/config.toml'"
|
|
7
|
+
},
|
|
8
|
+
{
|
|
9
|
+
"text": "Shows [mcp_servers.name] TOML section with command, args fields",
|
|
10
|
+
"passed": true,
|
|
11
|
+
"evidence": "Line 8: '[mcp_servers.my-python-server]' with command and args fields"
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"text": "Shows $GITHUB_TOKEN or $ENV_VAR pattern for environment variable references",
|
|
15
|
+
"passed": true,
|
|
16
|
+
"evidence": "Line 11: 'env = { GITHUB_TOKEN = \"$GITHUB_TOKEN\" }'"
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"text": "Shows 'codex mcp' command for running codex as an MCP server",
|
|
20
|
+
"passed": true,
|
|
21
|
+
"evidence": "Line 53: 'codex mcp'"
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"text": "Uses correct Python command (python/python3) in the MCP server command field",
|
|
25
|
+
"passed": true,
|
|
26
|
+
"evidence": "Line 9: 'command = \"python\"'"
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"text": "Does not invent non-existent config keys or MCP server fields",
|
|
30
|
+
"passed": true,
|
|
31
|
+
"evidence": "All config keys (command, args, env, url, model, approval_policy, network_access) match the skill documentation"
|
|
32
|
+
}
|
|
33
|
+
],
|
|
34
|
+
"summary": {
|
|
35
|
+
"passed": 6,
|
|
36
|
+
"failed": 0,
|
|
37
|
+
"total": 6,
|
|
38
|
+
"pass_rate": 1.0
|
|
39
|
+
},
|
|
40
|
+
"claims": [
|
|
41
|
+
{
|
|
42
|
+
"claim": "MCP servers can use url form for HTTP endpoints",
|
|
43
|
+
"type": "factual",
|
|
44
|
+
"verified": true,
|
|
45
|
+
"evidence": "Matches skill doc: '[mcp_servers.my-api] url = \"https://my-server.example.com/mcp\"'"
|
|
46
|
+
}
|
|
47
|
+
],
|
|
48
|
+
"eval_feedback": {
|
|
49
|
+
"suggestions": [],
|
|
50
|
+
"overall": "No suggestions, evals look solid"
|
|
51
|
+
}
|
|
52
|
+
}
|
package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/with_skill/outputs/response.md
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# Adding a Custom MCP Server to Codex CLI and Running Codex as an MCP Server
|
|
2
|
+
|
|
3
|
+
## Adding Your Python-based MCP Server with GITHUB_TOKEN
|
|
4
|
+
|
|
5
|
+
Add the following to your Codex configuration file at `~/.codex/config.toml`:
|
|
6
|
+
|
|
7
|
+
```toml
|
|
8
|
+
[mcp_servers.my-python-server]
|
|
9
|
+
command = "python"
|
|
10
|
+
args = ["/path/to/your/mcp_server.py"]
|
|
11
|
+
env = { GITHUB_TOKEN = "$GITHUB_TOKEN" }
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Key details:
|
|
15
|
+
|
|
16
|
+
- **`command`** -- the executable to launch your server (e.g., `python`, `python3`, or an absolute path to your Python interpreter).
|
|
17
|
+
- **`args`** -- an array of arguments passed to the command. Point this at your MCP server script or module entry point.
|
|
18
|
+
- **`env`** -- a table of environment variables forwarded to the server process. Using `"$GITHUB_TOKEN"` tells Codex to pull the value from your shell environment at runtime, so you do not hardcode the token in the config file. Make sure `GITHUB_TOKEN` is exported in your shell (e.g., via `export GITHUB_TOKEN="ghp_..."` in your `.bashrc`/`.zshrc` or a `.env` loader).
|
|
19
|
+
|
|
20
|
+
If your Python server is packaged as a module you run with `python -m`, the config would look like:
|
|
21
|
+
|
|
22
|
+
```toml
|
|
23
|
+
[mcp_servers.my-python-server]
|
|
24
|
+
command = "python"
|
|
25
|
+
args = ["-m", "my_mcp_server"]
|
|
26
|
+
env = { GITHUB_TOKEN = "$GITHUB_TOKEN" }
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
If your MCP server exposes an HTTP endpoint instead of running as a stdio subprocess, use the `url` form:
|
|
30
|
+
|
|
31
|
+
```toml
|
|
32
|
+
[mcp_servers.my-python-server]
|
|
33
|
+
url = "https://localhost:8080/mcp"
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
In that case you would manage the `GITHUB_TOKEN` env var in whatever process starts the HTTP server, not in the Codex config.
|
|
37
|
+
|
|
38
|
+
### Verifying it works
|
|
39
|
+
|
|
40
|
+
After adding the config, launch Codex and use the `/mcp` slash command in the TUI to list all available MCP tools. Your server's tools should appear in the list.
|
|
41
|
+
|
|
42
|
+
### Per-project config
|
|
43
|
+
|
|
44
|
+
You can also place MCP server config in a project-level `.codex/config.toml` (in your project root) if the server is only relevant to a specific project. Project config is only loaded for trusted projects.
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## Running Codex Itself as an MCP Server
|
|
49
|
+
|
|
50
|
+
To expose Codex CLI as an MCP server that other agents can consume, run:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
codex mcp
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
This starts Codex in MCP server mode, making its capabilities (code reading, editing, execution) available as MCP tools to any MCP-compatible client or agent. Other agents can then connect to Codex the same way Codex connects to third-party MCP servers -- either via stdio or by pointing at the running process.
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## Full Config Example Combining Both
|
|
61
|
+
|
|
62
|
+
Here is a complete `~/.codex/config.toml` that sets up your Python MCP server, a GitHub MCP server (for reference), and general Codex settings:
|
|
63
|
+
|
|
64
|
+
```toml
|
|
65
|
+
model = "gpt-5-codex"
|
|
66
|
+
approval_policy = "on-request"
|
|
67
|
+
|
|
68
|
+
[sandbox_workspace_write]
|
|
69
|
+
network_access = false
|
|
70
|
+
|
|
71
|
+
[mcp_servers.github]
|
|
72
|
+
command = "npx"
|
|
73
|
+
args = ["-y", "@modelcontextprotocol/server-github"]
|
|
74
|
+
env = { GITHUB_TOKEN = "$GITHUB_TOKEN" }
|
|
75
|
+
|
|
76
|
+
[mcp_servers.my-python-server]
|
|
77
|
+
command = "python"
|
|
78
|
+
args = ["/path/to/your/mcp_server.py"]
|
|
79
|
+
env = { GITHUB_TOKEN = "$GITHUB_TOKEN" }
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
If you need network access for your MCP server to make API calls, set `network_access = true` under `[sandbox_workspace_write]`, or pass it as a one-off flag:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
codex -c 'sandbox_workspace_write.network_access=true' "Use my-python-server to fetch data"
|
|
86
|
+
```
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
{
|
|
2
|
+
"expectations": [
|
|
3
|
+
{
|
|
4
|
+
"text": "Points to ~/.codex/config.toml for MCP server configuration",
|
|
5
|
+
"passed": false,
|
|
6
|
+
"evidence": "Uses '~/.codex/config.json' (line 11) — wrong format. Codex CLI uses TOML (config.toml), not JSON."
|
|
7
|
+
},
|
|
8
|
+
{
|
|
9
|
+
"text": "Shows [mcp_servers.name] TOML section with command, args fields",
|
|
10
|
+
"passed": false,
|
|
11
|
+
"evidence": "Uses JSON 'mcpServers' key (line 20) instead of TOML [mcp_servers.name] sections. Entire config is in JSON format."
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"text": "Shows $GITHUB_TOKEN or $ENV_VAR pattern for environment variable references",
|
|
15
|
+
"passed": false,
|
|
16
|
+
"evidence": "Hardcodes 'your-github-token-here' (line 25) instead of using the $GITHUB_TOKEN reference pattern that Codex supports for pulling from shell environment."
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"text": "Shows 'codex mcp' command for running codex as an MCP server",
|
|
20
|
+
"passed": false,
|
|
21
|
+
"evidence": "Uses hallucinated 'codex --mcp-server' flag (line 98). The correct command is 'codex mcp' (a subcommand, not a flag)."
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"text": "Uses correct Python command (python/python3) in the MCP server command field",
|
|
25
|
+
"passed": true,
|
|
26
|
+
"evidence": "Line 22: '\"command\": \"python\"'"
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"text": "Does not invent non-existent config keys or MCP server fields",
|
|
30
|
+
"passed": false,
|
|
31
|
+
"evidence": "Hallucinated: JSON config format (mcpServers), --mcp-server flag, config.json file. Real format is TOML with [mcp_servers.name] sections."
|
|
32
|
+
}
|
|
33
|
+
],
|
|
34
|
+
"summary": {
|
|
35
|
+
"passed": 1,
|
|
36
|
+
"failed": 5,
|
|
37
|
+
"total": 6,
|
|
38
|
+
"pass_rate": 0.167
|
|
39
|
+
},
|
|
40
|
+
"claims": [
|
|
41
|
+
{
|
|
42
|
+
"claim": "Codex uses config.json for configuration",
|
|
43
|
+
"type": "factual",
|
|
44
|
+
"verified": false,
|
|
45
|
+
"evidence": "Codex CLI uses ~/.codex/config.toml (TOML format)"
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
"claim": "codex --mcp-server starts MCP server mode",
|
|
49
|
+
"type": "factual",
|
|
50
|
+
"verified": false,
|
|
51
|
+
"evidence": "Correct command is 'codex mcp' (subcommand)"
|
|
52
|
+
}
|
|
53
|
+
],
|
|
54
|
+
"eval_feedback": {
|
|
55
|
+
"suggestions": [],
|
|
56
|
+
"overall": "Baseline heavily hallucinated — wrong config format, wrong MCP command, hardcoded secrets"
|
|
57
|
+
}
|
|
58
|
+
}
|
package/codex-cli-workspace/iteration-1/eval-2-mcp-server-config/without_skill/outputs/response.md
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# Adding a Custom MCP Server to Codex CLI & Running Codex as an MCP Server
|
|
2
|
+
|
|
3
|
+
## Part 1: Adding a Custom Python MCP Server with GITHUB_TOKEN
|
|
4
|
+
|
|
5
|
+
Codex CLI supports MCP (Model Context Protocol) servers that extend its capabilities with custom tools. You configure them in your Codex CLI configuration file.
|
|
6
|
+
|
|
7
|
+
### Configuration File Location
|
|
8
|
+
|
|
9
|
+
Codex CLI reads its configuration from a JSON file. The typical location is:
|
|
10
|
+
|
|
11
|
+
- **Linux/macOS**: `~/.codex/config.json` (or potentially `~/.config/codex/config.json`)
|
|
12
|
+
- You can also use a project-local configuration
|
|
13
|
+
|
|
14
|
+
### Adding Your Python MCP Server
|
|
15
|
+
|
|
16
|
+
In your Codex CLI configuration file, add an `mcpServers` section. For a Python-based MCP server that needs the `GITHUB_TOKEN` environment variable:
|
|
17
|
+
|
|
18
|
+
```json
|
|
19
|
+
{
|
|
20
|
+
"mcpServers": {
|
|
21
|
+
"my-github-server": {
|
|
22
|
+
"command": "python",
|
|
23
|
+
"args": ["/path/to/your/mcp_server.py"],
|
|
24
|
+
"env": {
|
|
25
|
+
"GITHUB_TOKEN": "your-github-token-here"
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Configuration Fields
|
|
33
|
+
|
|
34
|
+
| Field | Description |
|
|
35
|
+
|-------|-------------|
|
|
36
|
+
| `command` | The executable to run (e.g., `python`, `python3`, `node`) |
|
|
37
|
+
| `args` | Array of arguments passed to the command |
|
|
38
|
+
| `env` | Object of environment variables to set for the server process |
|
|
39
|
+
|
|
40
|
+
### Using a Virtual Environment
|
|
41
|
+
|
|
42
|
+
If your Python MCP server has dependencies managed via a virtual environment, point to the Python binary inside the venv:
|
|
43
|
+
|
|
44
|
+
```json
|
|
45
|
+
{
|
|
46
|
+
"mcpServers": {
|
|
47
|
+
"my-github-server": {
|
|
48
|
+
"command": "/path/to/venv/bin/python",
|
|
49
|
+
"args": ["/path/to/your/mcp_server.py"],
|
|
50
|
+
"env": {
|
|
51
|
+
"GITHUB_TOKEN": "your-github-token-here"
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### Using uvx or pipx
|
|
59
|
+
|
|
60
|
+
If your MCP server is distributed as a Python package, you can use `uvx` (from the `uv` tool) or `pipx`:
|
|
61
|
+
|
|
62
|
+
```json
|
|
63
|
+
{
|
|
64
|
+
"mcpServers": {
|
|
65
|
+
"my-github-server": {
|
|
66
|
+
"command": "uvx",
|
|
67
|
+
"args": ["my-github-mcp-server"],
|
|
68
|
+
"env": {
|
|
69
|
+
"GITHUB_TOKEN": "your-github-token-here"
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### Security Tip for the Token
|
|
77
|
+
|
|
78
|
+
Rather than hardcoding the token in the config file, you can reference an environment variable that is already set in your shell session. However, the `env` field in the MCP config explicitly sets environment variables for the child process. To avoid storing secrets in config files, consider:
|
|
79
|
+
|
|
80
|
+
1. Setting `GITHUB_TOKEN` in your shell profile (`.bashrc`, `.zshrc`) so it is inherited by child processes automatically. In that case you may not need the `env` field at all.
|
|
81
|
+
2. Using a secrets manager or `.env` file that your MCP server reads directly.
|
|
82
|
+
|
|
83
|
+
### Verifying the Server is Connected
|
|
84
|
+
|
|
85
|
+
After configuring the MCP server, start Codex CLI. It should automatically launch and connect to your MCP server via stdio. The tools provided by your server will be available in the Codex session. You can verify by asking Codex to list available tools or by using one of the tools your server exposes.
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## Part 2: Running Codex CLI as an MCP Server
|
|
90
|
+
|
|
91
|
+
Codex CLI can itself be run as an MCP server, allowing other agents or tools that support MCP to consume it as a tool provider.
|
|
92
|
+
|
|
93
|
+
### Starting Codex as an MCP Server
|
|
94
|
+
|
|
95
|
+
Use the `--mcp-server` flag (or similar) to start Codex in MCP server mode:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
codex --mcp-server
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
This starts Codex CLI listening on **stdio** (stdin/stdout) using the MCP protocol, making it available as a tool for other MCP-compatible clients.
|
|
102
|
+
|
|
103
|
+
### Consuming Codex from Another Agent
|
|
104
|
+
|
|
105
|
+
In the consuming agent's MCP configuration, add Codex as a server:
|
|
106
|
+
|
|
107
|
+
```json
|
|
108
|
+
{
|
|
109
|
+
"mcpServers": {
|
|
110
|
+
"codex": {
|
|
111
|
+
"command": "codex",
|
|
112
|
+
"args": ["--mcp-server"],
|
|
113
|
+
"env": {
|
|
114
|
+
"OPENAI_API_KEY": "your-openai-api-key"
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Example: Adding Codex as an MCP Server in Claude Desktop
|
|
122
|
+
|
|
123
|
+
If you are using Claude Desktop (which also supports MCP), you can add Codex to the Claude Desktop MCP config (typically `claude_desktop_config.json`):
|
|
124
|
+
|
|
125
|
+
```json
|
|
126
|
+
{
|
|
127
|
+
"mcpServers": {
|
|
128
|
+
"codex": {
|
|
129
|
+
"command": "npx",
|
|
130
|
+
"args": ["@openai/codex", "--mcp-server"],
|
|
131
|
+
"env": {
|
|
132
|
+
"OPENAI_API_KEY": "your-openai-api-key"
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### What Codex Exposes as an MCP Server
|
|
140
|
+
|
|
141
|
+
When running as an MCP server, Codex typically exposes tools that allow the consuming agent to:
|
|
142
|
+
|
|
143
|
+
- Execute coding tasks via Codex's AI capabilities
|
|
144
|
+
- Run shell commands in a sandboxed environment
|
|
145
|
+
- Perform file operations (read, write, edit)
|
|
146
|
+
- Leverage Codex's code generation and reasoning abilities
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
## Summary
|
|
151
|
+
|
|
152
|
+
| Goal | Configuration |
|
|
153
|
+
|------|---------------|
|
|
154
|
+
| Add custom MCP server to Codex | Add entry to `mcpServers` in Codex config with `command`, `args`, and `env` |
|
|
155
|
+
| Pass `GITHUB_TOKEN` to MCP server | Use the `env` field in the server config |
|
|
156
|
+
| Run Codex as an MCP server | Start with `codex --mcp-server` flag |
|
|
157
|
+
| Consume Codex from another agent | Add Codex as an MCP server entry in the consuming agent's config |
|
|
158
|
+
|
|
159
|
+
### Key Points
|
|
160
|
+
|
|
161
|
+
- MCP servers communicate over **stdio** by default (stdin/stdout JSON-RPC messages).
|
|
162
|
+
- The `env` field lets you pass environment variables like `GITHUB_TOKEN` to the server process.
|
|
163
|
+
- Codex CLI can both **consume** MCP servers (as a client) and **be consumed** as an MCP server (acting as a server for other agents).
|
|
164
|
+
- Make sure your Python MCP server implements the MCP protocol correctly (handles `initialize`, `tools/list`, `tools/call` methods over stdio).
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"eval_id": 3,
|
|
3
|
+
"eval_name": "profiles-troubleshooting",
|
|
4
|
+
"prompt": "I'm getting Landlock errors on WSL2 when running codex. Also, I need to set up two profiles in config.toml — one for safe daily dev and one for CI with no approvals. How do I fix the Landlock issue and configure the profiles?",
|
|
5
|
+
"assertions": [
|
|
6
|
+
{"id": "landlock-wsl-fix", "text": "Mentions updating WSL2 kernel or using --dangerously-bypass-approvals-and-sandbox (--yolo) as workarounds"},
|
|
7
|
+
{"id": "profiles-toml-syntax", "text": "Shows correct [profiles.name] TOML syntax for defining profiles"},
|
|
8
|
+
{"id": "ci-profile-no-approval", "text": "CI profile uses approval_policy = 'never' for fully automated mode"},
|
|
9
|
+
{"id": "dev-profile-safe", "text": "Dev profile uses a safe approval_policy like 'on-request' or 'untrusted'"},
|
|
10
|
+
{"id": "profile-usage-flag", "text": "Shows --profile flag for selecting a profile at runtime"},
|
|
11
|
+
{"id": "no-hallucinated-fixes", "text": "Does not invent non-existent troubleshooting steps or config options"}
|
|
12
|
+
]
|
|
13
|
+
}
|
package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/with_skill/grading.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"expectations": [
|
|
3
|
+
{
|
|
4
|
+
"text": "Mentions updating WSL2 kernel or using --dangerously-bypass-approvals-and-sandbox (--yolo) as workarounds",
|
|
5
|
+
"passed": true,
|
|
6
|
+
"evidence": "Option 1: 'wsl --update' to update kernel. Option 2: '--dangerously-bypass-approvals-and-sandbox' with '(aliased as --yolo)'"
|
|
7
|
+
},
|
|
8
|
+
{
|
|
9
|
+
"text": "Shows correct [profiles.name] TOML syntax for defining profiles",
|
|
10
|
+
"passed": true,
|
|
11
|
+
"evidence": "Lines 66-79: '[profiles.dev]' and '[profiles.ci]' sections in config.toml"
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"text": "CI profile uses approval_policy = 'never' for fully automated mode",
|
|
15
|
+
"passed": true,
|
|
16
|
+
"evidence": "Line 77: 'approval_policy = \"never\"'"
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"text": "Dev profile uses a safe approval_policy like 'on-request' or 'untrusted'",
|
|
20
|
+
"passed": true,
|
|
21
|
+
"evidence": "Line 68: 'approval_policy = \"on-request\"'"
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"text": "Shows --profile flag for selecting a profile at runtime",
|
|
25
|
+
"passed": true,
|
|
26
|
+
"evidence": "Lines 85-93: 'codex --profile dev' and 'codex -p dev'"
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"text": "Does not invent non-existent troubleshooting steps or config options",
|
|
30
|
+
"passed": true,
|
|
31
|
+
"evidence": "All flags (--dangerously-bypass-approvals-and-sandbox, --yolo, -s danger-full-access, --profile, -p) and config keys (approval_policy, model, profile) match the skill documentation"
|
|
32
|
+
}
|
|
33
|
+
],
|
|
34
|
+
"summary": {
|
|
35
|
+
"passed": 6,
|
|
36
|
+
"failed": 0,
|
|
37
|
+
"total": 6,
|
|
38
|
+
"pass_rate": 1.0
|
|
39
|
+
},
|
|
40
|
+
"claims": [
|
|
41
|
+
{
|
|
42
|
+
"claim": "danger-full-access sandbox mode avoids Landlock syscall",
|
|
43
|
+
"type": "factual",
|
|
44
|
+
"verified": true,
|
|
45
|
+
"evidence": "Reasonable inference — danger-full-access provides unrestricted access per the skill doc"
|
|
46
|
+
}
|
|
47
|
+
],
|
|
48
|
+
"eval_feedback": {
|
|
49
|
+
"suggestions": [],
|
|
50
|
+
"overall": "No suggestions, evals look solid"
|
|
51
|
+
}
|
|
52
|
+
}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# Fixing Landlock Errors on WSL2 and Setting Up Codex Profiles
|
|
2
|
+
|
|
3
|
+
## Fixing the Landlock Error
|
|
4
|
+
|
|
5
|
+
Landlock is a Linux kernel security feature that Codex uses for filesystem sandboxing. WSL2 kernels often ship with an older version that does not fully support Landlock, which causes Codex to fail when it tries to enforce sandbox restrictions.
|
|
6
|
+
|
|
7
|
+
### Options to fix it
|
|
8
|
+
|
|
9
|
+
**Option 1: Update your WSL2 kernel** (recommended long-term fix)
|
|
10
|
+
|
|
11
|
+
Update WSL2 to get a kernel version that supports Landlock (5.13+, ideally 6.1+):
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# From a Windows PowerShell (admin):
|
|
15
|
+
wsl --update
|
|
16
|
+
|
|
17
|
+
# Then restart WSL:
|
|
18
|
+
wsl --shutdown
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
After restarting, verify the kernel version inside WSL:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
uname -r
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
If the version is 5.13 or higher, Landlock should work and the error should be resolved.
|
|
28
|
+
|
|
29
|
+
**Option 2: Bypass the sandbox** (for isolated/trusted environments only)
|
|
30
|
+
|
|
31
|
+
If updating the kernel is not an option, you can bypass the sandbox entirely using the `--dangerously-bypass-approvals-and-sandbox` flag (aliased as `--yolo`):
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
codex --dangerously-bypass-approvals-and-sandbox "your task here"
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Only use this in isolated environments (containers, throwaway VMs, CI runners) since it removes all filesystem and approval restrictions.
|
|
38
|
+
|
|
39
|
+
**Option 3: Use `danger-full-access` sandbox mode**
|
|
40
|
+
|
|
41
|
+
A slightly less drastic alternative is to use the unrestricted sandbox mode while keeping approval prompts active:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
codex -s danger-full-access "your task here"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
This avoids the Landlock syscall entirely while still requiring approval for sensitive actions.
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## Setting Up Two Profiles in config.toml
|
|
52
|
+
|
|
53
|
+
Edit (or create) `~/.codex/config.toml` and add the following:
|
|
54
|
+
|
|
55
|
+
```toml
|
|
56
|
+
# Default settings (used when no profile is specified)
|
|
57
|
+
model = "gpt-5-codex"
|
|
58
|
+
approval_policy = "on-request"
|
|
59
|
+
|
|
60
|
+
[sandbox_workspace_write]
|
|
61
|
+
network_access = false
|
|
62
|
+
|
|
63
|
+
# ----------------------------------------------------------
|
|
64
|
+
# Profile: safe daily development
|
|
65
|
+
# ----------------------------------------------------------
|
|
66
|
+
[profiles.dev]
|
|
67
|
+
model = "gpt-5-codex"
|
|
68
|
+
approval_policy = "on-request"
|
|
69
|
+
# This is equivalent to --full-auto: it will ask before
|
|
70
|
+
# sensitive actions but auto-approve reads and workspace writes.
|
|
71
|
+
|
|
72
|
+
# ----------------------------------------------------------
|
|
73
|
+
# Profile: CI with no approvals
|
|
74
|
+
# ----------------------------------------------------------
|
|
75
|
+
[profiles.ci]
|
|
76
|
+
model = "gpt-5-codex"
|
|
77
|
+
approval_policy = "never"
|
|
78
|
+
# "never" means Codex will not pause for any approval prompts,
|
|
79
|
+
# making it suitable for fully automated CI pipelines.
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Using the profiles
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
# Daily development (safe, asks before sensitive actions)
|
|
86
|
+
codex --profile dev "Refactor the auth module"
|
|
87
|
+
|
|
88
|
+
# CI pipeline (no approvals, fully automated)
|
|
89
|
+
codex --profile ci "Run tests and fix failures"
|
|
90
|
+
|
|
91
|
+
# Short form
|
|
92
|
+
codex -p dev "task"
|
|
93
|
+
codex -p ci "task"
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Setting a default profile
|
|
97
|
+
|
|
98
|
+
If you want `dev` to be your default without typing `--profile` every time, add this at the top level of your config:
|
|
99
|
+
|
|
100
|
+
```toml
|
|
101
|
+
profile = "dev"
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Then you only need to explicitly pass `--profile ci` when running in CI.
|
|
105
|
+
|
|
106
|
+
### GitHub Actions example using the CI profile
|
|
107
|
+
|
|
108
|
+
```yaml
|
|
109
|
+
- name: Run Codex
|
|
110
|
+
run: |
|
|
111
|
+
npm i -g @openai/codex
|
|
112
|
+
codex exec --profile ci "Update CHANGELOG for this release"
|
|
113
|
+
env:
|
|
114
|
+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Config precedence reminder
|
|
118
|
+
|
|
119
|
+
CLI flags always override profile settings, which override user config defaults:
|
|
120
|
+
|
|
121
|
+
1. CLI flags (`--model`, `-a`, `-c key=value`)
|
|
122
|
+
2. Project config (`.codex/config.toml` in project root)
|
|
123
|
+
3. User config (`~/.codex/config.toml`, including profiles)
|
|
124
|
+
4. Built-in defaults
|
|
125
|
+
|
|
126
|
+
So even with a profile active, you can override individual settings on the command line, for example:
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
codex --profile ci --model gpt-5-pro "task"
|
|
130
|
+
```
|
package/codex-cli-workspace/iteration-1/eval-3-profiles-troubleshooting/without_skill/grading.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
{
|
|
2
|
+
"expectations": [
|
|
3
|
+
{
|
|
4
|
+
"text": "Mentions updating WSL2 kernel or using --dangerously-bypass-approvals-and-sandbox (--yolo) as workarounds",
|
|
5
|
+
"passed": false,
|
|
6
|
+
"evidence": "Mentions 'wsl --update' (correct) but does NOT mention --dangerously-bypass-approvals-and-sandbox or --yolo. Instead invents '--no-sandbox' flag and 'CODEX_SANDBOX_TYPE=none' env var, neither of which exist."
|
|
7
|
+
},
|
|
8
|
+
{
|
|
9
|
+
"text": "Shows correct [profiles.name] TOML syntax for defining profiles",
|
|
10
|
+
"passed": false,
|
|
11
|
+
"evidence": "Uses '[profile.dev]' and '[profile.ci]' (singular 'profile') instead of the correct '[profiles.dev]' and '[profiles.ci]' (plural 'profiles')."
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"text": "CI profile uses approval_policy = 'never' for fully automated mode",
|
|
15
|
+
"passed": false,
|
|
16
|
+
"evidence": "Uses 'approval_mode = \"full-auto\"' — wrong key name (should be approval_policy) and wrong value (should be 'never'). 'full-auto' is a CLI shortcut flag, not an approval_policy value."
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"text": "Dev profile uses a safe approval_policy like 'on-request' or 'untrusted'",
|
|
20
|
+
"passed": false,
|
|
21
|
+
"evidence": "Uses 'approval_mode = \"suggest\"' — wrong key name (should be approval_policy) and 'suggest' is not a valid approval policy value. Valid values are: untrusted, on-request, never, reject."
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"text": "Shows --profile flag for selecting a profile at runtime",
|
|
25
|
+
"passed": true,
|
|
26
|
+
"evidence": "Line 157: 'codex --profile dev' and 'codex --profile ci'"
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"text": "Does not invent non-existent troubleshooting steps or config options",
|
|
30
|
+
"passed": false,
|
|
31
|
+
"evidence": "Hallucinated: '--no-sandbox' flag, 'CODEX_SANDBOX_TYPE=none' env var, 'sandbox = none/permissive' config keys, 'approval_mode' key (should be approval_policy), 'suggest'/'auto-edit'/'full-auto' as config values, '[profile.name]' syntax (should be [profiles.name])."
|
|
32
|
+
}
|
|
33
|
+
],
|
|
34
|
+
"summary": {
|
|
35
|
+
"passed": 1,
|
|
36
|
+
"failed": 5,
|
|
37
|
+
"total": 6,
|
|
38
|
+
"pass_rate": 0.167
|
|
39
|
+
},
|
|
40
|
+
"claims": [
|
|
41
|
+
{
|
|
42
|
+
"claim": "Approval modes are suggest, auto-edit, full-auto",
|
|
43
|
+
"type": "factual",
|
|
44
|
+
"verified": false,
|
|
45
|
+
"evidence": "Real approval_policy values: untrusted, on-request, never, reject"
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
"claim": "--no-sandbox disables sandboxing",
|
|
49
|
+
"type": "factual",
|
|
50
|
+
"verified": false,
|
|
51
|
+
"evidence": "No --no-sandbox flag exists. The bypass flag is --dangerously-bypass-approvals-and-sandbox (--yolo)"
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
"claim": "Profile sections use [profile.name] syntax",
|
|
55
|
+
"type": "factual",
|
|
56
|
+
"verified": false,
|
|
57
|
+
"evidence": "Correct syntax is [profiles.name] (plural)"
|
|
58
|
+
}
|
|
59
|
+
],
|
|
60
|
+
"eval_feedback": {
|
|
61
|
+
"suggestions": [],
|
|
62
|
+
"overall": "Baseline massively hallucinated — wrong config keys, wrong values, invented flags and env vars"
|
|
63
|
+
}
|
|
64
|
+
}
|