@sabaiway/agent-workflow-kit 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,59 @@
1
+ import { describe, it } from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+ import { mkdtempSync, mkdirSync, writeFileSync, chmodSync, rmSync } from 'node:fs';
4
+ import { tmpdir } from 'node:os';
5
+ import { join, dirname } from 'node:path';
6
+ import { fileURLToPath } from 'node:url';
7
+ import { spawnSync } from 'node:child_process';
8
+
9
+ const HERE = dirname(fileURLToPath(import.meta.url));
10
+ const WRAPPER = join(HERE, 'agy.sh');
11
+
12
+ // Build a sandbox HOME whose ~/.local/bin holds a STUB `agy`. The wrapper prepends
13
+ // "$HOME/.local/bin" to PATH, so it resolves our stub instead of the real binary — no network,
14
+ // no real subscription CLI, fully hermetic.
15
+ const makeSandbox = (stubBody) => {
16
+ const home = mkdtempSync(join(tmpdir(), 'agy-wrapper-test-'));
17
+ const bin = join(home, '.local', 'bin');
18
+ mkdirSync(bin, { recursive: true });
19
+ const stub = join(bin, 'agy');
20
+ writeFileSync(stub, stubBody, { mode: 0o755 });
21
+ chmodSync(stub, 0o755);
22
+ return home;
23
+ };
24
+
25
+ const runWrapper = (home, env, prompt = 'hello') =>
26
+ spawnSync('bash', [WRAPPER, prompt], {
27
+ env: { HOME: home, PATH: `${join(home, '.local', 'bin')}:${process.env.PATH}`, ...env },
28
+ encoding: 'utf8',
29
+ timeout: 20000,
30
+ });
31
+
32
+ describe('agy.sh — hard wall-clock cap (timeout(1))', () => {
33
+ it('kills a hung agy at AGY_HARD_TIMEOUT and reports it (non-zero + actionable guidance)', () => {
34
+ const home = makeSandbox('#!/usr/bin/env bash\nsleep 30\n');
35
+ const started = Date.now();
36
+ const r = runWrapper(home, { AGY_HARD_TIMEOUT: '2s', AGY_TIMEOUT: '2s', AGY_MODEL: '' });
37
+ const elapsed = Date.now() - started;
38
+ rmSync(home, { recursive: true, force: true });
39
+ assert.ok(elapsed < 13000, `wrapper must return well under the kill-after window, took ${elapsed}ms`);
40
+ assert.notEqual(r.status, 0, 'a timed-out run must exit non-zero');
41
+ assert.match(r.stderr, /exceeded the hard cap/, 'must explain the hard-cap kill');
42
+ });
43
+
44
+ it('passes a fast agy run through unchanged (exit 0, stdout preserved)', () => {
45
+ const home = makeSandbox('#!/usr/bin/env bash\necho "OK reply"\nexit 0\n');
46
+ const r = runWrapper(home, { AGY_HARD_TIMEOUT: '10s', AGY_TIMEOUT: '10s', AGY_MODEL: '' });
47
+ rmSync(home, { recursive: true, force: true });
48
+ assert.equal(r.status, 0, `expected clean exit, got ${r.status}; stderr=${r.stderr}`);
49
+ assert.match(r.stdout, /OK reply/);
50
+ });
51
+
52
+ it('propagates a non-timeout agy failure code verbatim (no false hard-cap message)', () => {
53
+ const home = makeSandbox('#!/usr/bin/env bash\necho "boom" >&2\nexit 3\n');
54
+ const r = runWrapper(home, { AGY_HARD_TIMEOUT: '10s', AGY_TIMEOUT: '10s', AGY_MODEL: '' });
55
+ rmSync(home, { recursive: true, force: true });
56
+ assert.equal(r.status, 3, 'a genuine agy failure code must pass through');
57
+ assert.doesNotMatch(r.stderr, /exceeded the hard cap/, 'must not mislabel a non-timeout failure');
58
+ });
59
+ });
@@ -0,0 +1,22 @@
1
+ {
2
+ "family": "agent-workflow",
3
+ "schema": 1,
4
+ "name": "antigravity-cli-bridge",
5
+ "kind": "execution-backend",
6
+ "version": "1.0.0",
7
+ "provides": ["review", "probe"],
8
+ "roles": {
9
+ "review": { "cmd": "agy-run", "source": "bin/agy.sh", "template": "references/review-prompt.md", "output": "advisory" },
10
+ "probe": { "cmd": "agy-run", "source": "bin/agy.sh", "output": "advisory" }
11
+ },
12
+ "detect": {
13
+ "installed": {
14
+ "env": "ANTIGRAVITY_CLI_BRIDGE_DIR",
15
+ "default": "~/.claude/skills/antigravity-cli-bridge",
16
+ "file": "SKILL.md"
17
+ }
18
+ },
19
+ "cost": "subscription",
20
+ "quota": { "kind": "subscription", "finite": true },
21
+ "provenance": { "author": "sabaiway", "source": "github:sabaiway/agent-workflow" }
22
+ }
@@ -0,0 +1,108 @@
1
+ # How the main agent drives `agy`
2
+
3
+ `agy` is a **delegated-execution backend**: the main agent stays the orchestrator and hands `agy` a
4
+ bounded, self-contained sub-task. `agy` answers from the **subscription** quota, so the goal is
5
+ maximum useful output per token of that quota. Treat its output as **advisory** — the main agent owns
6
+ edits, verification, and final judgment.
7
+
8
+ ## Delegation checklist
9
+
10
+ 1. Pick the narrowest useful question.
11
+ 2. Choose the cheapest model that can answer it.
12
+ 3. Include only the relevant excerpts, paths, constraints, and the expected output shape.
13
+ 4. State permission boundaries in the prompt (no edits, no git writes).
14
+ 5. Run `agy-run` headlessly.
15
+ 6. Treat the response as advisory and verify before acting.
16
+
17
+ ## Model selection
18
+
19
+ | Task | Model |
20
+ |---|---|
21
+ | Reachability / smoke / "is it wired?" | `Gemini 3.5 Flash (Low)` |
22
+ | Cheap probes, summaries | `Gemini 3.5 Flash (Medium)` |
23
+ | Quick review with a little more effort | `Gemini 3.5 Flash (High)` |
24
+ | Reasoning, plan critique, careful drafting | `Gemini 3.1 Pro (High)` (wrapper default) |
25
+ | Same reasoning, lower quota cost | `Gemini 3.1 Pro (Low)` |
26
+ | A different engine's opinion | `Claude Sonnet 4.6 (Thinking)`, `Claude Opus 4.6 (Thinking)`, or `GPT-OSS 120B (Medium)` |
27
+
28
+ Don't reach for Pro by reflex — Flash answers most reachability/probe questions for a fraction of the
29
+ quota.
30
+
31
+ ## Quota economy
32
+
33
+ Subscription quota is finite. Prefer:
34
+
35
+ - A short probe on Flash before a large Pro run.
36
+ - One sharp question over broad "review everything" prompts.
37
+ - Prompt files with trimmed excerpts instead of whole repositories.
38
+ - `AGY_TIMEOUT=2m` for probes, longer timeouts only for deep reviews.
39
+ - Reusing a conversation with `--continue` when the context is already loaded.
40
+
41
+ ## Continue vs. fresh
42
+
43
+ ```bash
44
+ # Continue the most recent conversation (cheaper than re-sending context):
45
+ agy-run "Given your previous review, list only the top three risks." -- --continue
46
+
47
+ # Resume a specific conversation by id:
48
+ agy-run "Continue from the prior architecture critique; focus on test gaps." -- --conversation <id>
49
+ ```
50
+
51
+ Use conversation state only when it saves quota or preserves useful context. For auditable decisions,
52
+ prefer self-contained prompts.
53
+
54
+ ## Escalation policy (edits, network, git)
55
+
56
+ The wrapper passes no `--add-dir`, no `--dangerously-skip-permissions`, and no `--sandbox`. Treat this
57
+ as a **policy boundary you enforce in the prompt, not an enforced sandbox** — so prompt `agy` as a
58
+ read-only reviewer, and reach for `-- --sandbox` for anything that might trigger terminal/tool work:
59
+
60
+ ```text
61
+ Do not edit files. Do not run git write commands. Do not branch, add, commit, stash, reset, or
62
+ rewrite history. Return findings and suggested changes only.
63
+ ```
64
+
65
+ - **Repo edits** stay with the orchestrator. If a flow truly needs `agy` to write files, opt in
66
+ explicitly — `agy-run "..." -- --add-dir . --dangerously-skip-permissions` — and review the diff.
67
+ - **New dependencies / network installs** are done by hand, not by `agy`.
68
+ - **Git writes** (branch/commit) are never delegated — the orchestrator commits after review.
69
+ - Prefer `-- --sandbox` for any prompt that might trigger terminal work.
70
+
71
+ ## Project-context prompts
72
+
73
+ Probe reachability from a project root (cheap model):
74
+
75
+ ```bash
76
+ AGY_MODEL="Gemini 3.5 Flash (Low)" agy-run \
77
+ "Read the cwd context file and report the dialogue language plus one Hard Constraint."
78
+ AGY_MODEL="Gemini 3.5 Flash (Low)" agy-run \
79
+ "Without using a file pointer, is there a project-specific planning skill in this repo? Name it and cite its path."
80
+ ```
81
+
82
+ Plan-review prompt shape:
83
+
84
+ ```text
85
+ You are reviewing the plan below from the current repository root.
86
+ Use the root context file and per-workspace skills if they are reachable.
87
+ Do not edit files. Do not run git write commands.
88
+ Return: 1) blocking issues 2) non-blocking risks 3) missing verification 4) a concise recommendation.
89
+ The implementation plan text follows in this same prompt.
90
+ ```
91
+
92
+ Diff/code-review prompt shape (provide the diff as text):
93
+
94
+ ```text
95
+ Review this diff against the stated constraints.
96
+ Focus on bugs, behavioural regressions, missing tests, and violations of the project rules.
97
+ Cite file paths and line hints from the diff where possible. Do not summarise unless there are no findings.
98
+ The project constraints and diff text follow in this same prompt.
99
+ ```
100
+
101
+ ## Handling output
102
+
103
+ `agy` returns plain text. Do not assume it is complete, current, or machine-valid. Before acting:
104
+
105
+ - Check claims against local files or primary sources available to the main agent.
106
+ - Re-run local tests and linters yourself.
107
+ - Reject advice that conflicts with user instructions, repository rules, or security boundaries.
108
+ - Summarise uncertainty clearly when reporting back to the user.
@@ -0,0 +1,93 @@
1
+ # `agy` models & flags (reference)
2
+
3
+ The source of truth is the live binary: `agy --version`, `agy --help`, `agy models`. The tables below
4
+ were captured from **v1.0.10**; if the binary disagrees, the binary wins. The wrapper command is
5
+ `agy-run`, backed by `bin/agy.sh`.
6
+
7
+ ## Headless behaviour
8
+
9
+ Use `-p`, `--print`, or `--prompt` to run one non-interactive prompt and print the text response. The
10
+ wrapper always uses headless `-p`. **There is no JSON output mode in v1.0.10** — ask for Markdown,
11
+ bullets, tables, or fenced blocks when the caller needs structure, then validate the text yourself.
12
+
13
+ ## Wrapper contract
14
+
15
+ ```bash
16
+ agy-run <prompt | - | @file> [-- extra agy flags...]
17
+ ```
18
+
19
+ Inputs:
20
+
21
+ - Prompt text: `agy-run "say OK"`.
22
+ - Stdin: `echo "say OK" | agy-run -`.
23
+ - Prompt file: `agy-run @prompt.md`.
24
+ - Extra `agy` flags after `--`: `agy-run @prompt.md -- --add-dir . --continue`. Extra args **without**
25
+ the `--` separator are rejected with a usage error (they are never silently dropped).
26
+ - A literal prompt that **begins with `@`** is read as a file path. Pass such prompts via stdin
27
+ instead: `printf '%s' '@handle, review this' | agy-run -`.
28
+
29
+ Environment:
30
+
31
+ | Var | Default | Effect |
32
+ |---|---|---|
33
+ | `AGY_MODEL` | `Gemini 3.1 Pro (High)` | model display string; set empty (`AGY_MODEL=`) to drop `--model` and let `agy` use `settings.json` |
34
+ | `AGY_TIMEOUT` | `5m` | value passed to `--print-timeout` |
35
+
36
+ Subscription invariant: the wrapper prepends `$HOME/.local/bin` to `PATH` and clears
37
+ `ANTIGRAVITY_API_KEY` / `GEMINI_API_KEY` / `GOOGLE_API_KEY` / `GOOGLE_GENAI_API_KEY` before execution.
38
+ Auth comes from the user's cached OAuth token, never from bundled credentials.
39
+
40
+ ## Models
41
+
42
+ Pass the **exact display string** from `agy models`, or set `AGY_MODEL`.
43
+
44
+ | Model string | Practical use |
45
+ |---|---|
46
+ | `Gemini 3.5 Flash (Low)` | lowest-cost smoke tests and simple rewrites |
47
+ | `Gemini 3.5 Flash (Medium)` | cheap probes, fast summaries, context-reachability checks |
48
+ | `Gemini 3.5 Flash (High)` | fast review when a little more reasoning effort is useful |
49
+ | `Gemini 3.1 Pro (Low)` | cheaper Pro pass for medium reasoning |
50
+ | `Gemini 3.1 Pro (High)` | wrapper default; hard reasoning, plan critique, architecture review |
51
+ | `Claude Sonnet 4.6 (Thinking)` | cross-vendor reasoning comparison |
52
+ | `Claude Opus 4.6 (Thinking)` | expensive deep critique when the user wants another high-end pass |
53
+ | `GPT-OSS 120B (Medium)` | open-weights-style comparison / diversity pass |
54
+
55
+ Examples:
56
+
57
+ ```bash
58
+ AGY_MODEL="Gemini 3.5 Flash (Medium)" agy-run "Read AGENTS.md and report one Hard Constraint."
59
+ AGY_MODEL="Claude Sonnet 4.6 (Thinking)" AGY_TIMEOUT=10m agy-run @review-prompt.md
60
+ ```
61
+
62
+ ## Flags (from `agy --help`, v1.0.10)
63
+
64
+ | Flag | Meaning | Notes |
65
+ |---|---|---|
66
+ | `-p`, `--print`, `--prompt` | run one headless prompt and print the text response | the wrapper uses `-p` |
67
+ | `--print-timeout <dur>` | cap headless wait time | CLI default `5m0s`; wrapper default `5m` via `AGY_TIMEOUT` |
68
+ | `--model <string>` | select a model | must match an `agy models` display string exactly |
69
+ | `-i`, `--prompt-interactive` | run an initial prompt, then continue interactively | not used by the wrapper |
70
+ | `-c`, `--continue` | continue the most recent conversation | pass after the wrapper's `--` |
71
+ | `--conversation <id>` | resume a specific conversation by id | use only when the user provides/records the id |
72
+ | `--add-dir <dir>` | add a directory to the workspace | repeatable; for explicit extra context |
73
+ | `--dangerously-skip-permissions` | auto-approve all tool permissions | avoid by default; use only with explicit user approval |
74
+ | `--sandbox` | run with terminal restrictions enabled | prefer when delegating a prompt that might trigger tool/terminal work |
75
+ | `--log-file <path>` | override the CLI log-file path | keep logs secret-free and out of committed artifacts |
76
+
77
+ ## Subcommands (v1.0.10)
78
+
79
+ `changelog`, `help`, `install`, `models`, `plugin` / `plugins`, `update`.
80
+
81
+ **Not available in v1.0.10:** any JSON output mode, and any `agy inspect`. Output is plain text.
82
+
83
+ ## Project-context flags
84
+
85
+ `agy` reads context from its current working directory:
86
+
87
+ ```text
88
+ .antigravity.md > GEMINI.md > AGENTS.md
89
+ .agents/skills/
90
+ ```
91
+
92
+ Use `--add-dir` for extra directories not already reachable from cwd. Subdirectory `CLAUDE.md` files
93
+ are **not** auto-loaded — include those local rules manually in the prompt when they matter.
@@ -0,0 +1,51 @@
1
+ # Review prompt template — `agy-run` (review role)
2
+
3
+ The `review` role of `antigravity-cli-bridge` delegates a **read-only second opinion** to `agy`.
4
+ `agy` cannot see the conversation and (in v1.0.10) has no JSON output, so the prompt must be
5
+ **self-contained** and ask for **plain-Markdown findings only** — no repo edits, no git writes.
6
+ Fill the `{{…}}` slots, pipe it to `agy-run`, then verify every finding locally before acting.
7
+
8
+ ```text
9
+ You are a meticulous staff-level reviewer giving a SECOND OPINION. You are read-only:
10
+ do not propose to edit files, run commands, or make git changes — return findings only.
11
+
12
+ ## What to review
13
+ {{TARGET}} # e.g. "the implementation plan below" or "the working-tree diff below"
14
+
15
+ ## Project rules
16
+ Read the repo's root AGENTS.md (your cwd) and obey its Hard Constraints and conventions.
17
+ If AGENTS.md declares a verification/gate set, judge the change against it; if it declares
18
+ none, say so — do NOT invent checks.
19
+
20
+ ## Material
21
+ {{CONTENT}} # paste the plan text, or the unified diff, or the file excerpts under review
22
+
23
+ ## Focus (optional)
24
+ {{FOCUS}} # e.g. "correctness of the new reducer", "backward-compat of the stamp takeover"
25
+
26
+ ## Output — Markdown, this exact shape, nothing else
27
+ ### Verdict
28
+ One line: SHIP / SHIP WITH NITS / REWORK, plus a one-sentence reason.
29
+ ### Blocking
30
+ Numbered. Correctness bugs, contract violations, data loss, security. Cite file:line.
31
+ Empty? write "none".
32
+ ### Non-blocking
33
+ Numbered. Simplifications, reuse, naming, missing tests. Cite file:line.
34
+ ### Questions
35
+ Anything ambiguous that changes your verdict if answered.
36
+ ```
37
+
38
+ ## Usage
39
+
40
+ ```bash
41
+ # critique a plan
42
+ AGY_MODEL="Gemini 3.1 Pro (High)" agy-run @/tmp/review-prompt.filled.md
43
+
44
+ # critique the current diff (build the prompt with the diff pasted into {{CONTENT}})
45
+ git diff | ... # assemble the filled prompt, then:
46
+ agy-run @/tmp/review-prompt.filled.md
47
+ ```
48
+
49
+ Treat the result as **advisory** — `agy` output may be incomplete or out of date. The orchestrator
50
+ re-runs the project's real gates and owns every accepted change. See
51
+ [`driving-agy.md`](./driving-agy.md).
@@ -0,0 +1,65 @@
1
+ # Setting up Antigravity CLI (`agy`) on a clean machine
2
+
3
+ This setup is **secret-free**. `agy` itself is **not** bundled — it requires a binary install and a
4
+ one-time interactive sign-in with your own subscription. Do this once per machine, then the skill
5
+ works in any project.
6
+
7
+ ## 1. Install the binary
8
+
9
+ ```bash
10
+ curl -fsSL https://antigravity.google/cli/install.sh | bash
11
+ export PATH="$HOME/.local/bin:$PATH" # add to ~/.bashrc / ~/.zshrc to persist
12
+ agy --version # expect 1.0.10 or newer
13
+ ```
14
+
15
+ - The binary is **`agy`** (not `antigravity`); it installs to `~/.local/bin/agy`.
16
+ - Keep `$HOME/.local/bin` on `PATH` (the wrapper also prepends it defensively).
17
+
18
+ ## 2. Sign in once (subscription only)
19
+
20
+ Run `agy` once interactively and complete the **OAuth** sign-in with a **Google AI Pro/Ultra**
21
+ account:
22
+
23
+ ```bash
24
+ agy
25
+ ```
26
+
27
+ This caches an OAuth token under `~/.gemini/antigravity-cli/` (`antigravity-oauth-token`). That token
28
+ is **personal** — never copy, commit, package, print, or share that directory or token. This skill
29
+ needs no API keys and must not be configured with API-key billing; the wrapper unsets every
30
+ `*_API_KEY` so billing can never silently fall back to pay-as-you-go.
31
+
32
+ ## 3. Put the wrapper on `PATH` as `agy-run`
33
+
34
+ The skill ships the wrapper at `bin/agy.sh`. Expose it on `PATH` under the stable name `agy-run`
35
+ (idempotent; refuses to clobber a non-symlink):
36
+
37
+ ```bash
38
+ mkdir -p "$HOME/.local/bin"
39
+ skill_dir="$HOME/.claude/skills/antigravity-cli-bridge" # adjust if installed elsewhere
40
+ dst="$HOME/.local/bin/agy-run"
41
+ if [ -e "$dst" ] && [ ! -L "$dst" ]; then
42
+ echo "STOP: $dst exists and is not a symlink"; exit 1
43
+ fi
44
+ chmod +x "$skill_dir/bin/agy.sh"
45
+ ln -sfn "$skill_dir/bin/agy.sh" "$dst"
46
+ export PATH="$HOME/.local/bin:$PATH"
47
+ command -v agy-run
48
+ ```
49
+
50
+ ## 4. Smoke test
51
+
52
+ ```bash
53
+ agy --version
54
+ echo "say OK" | agy-run -
55
+ ```
56
+
57
+ Expected: the version prints (`1.0.10` or newer), then a short reply containing `OK`. If `agy-run`
58
+ reports `'agy' not found`, fix your `PATH` (step 1). If it asks you to sign in, complete step 2.
59
+
60
+ ## Notes
61
+
62
+ - `agy-run` is headless and plain-text only; there is no JSON output mode.
63
+ - `AGY_MODEL` selects the exact model display string; `AGY_TIMEOUT` controls `--print-timeout`.
64
+ - Extra `agy` flags go after `--`, e.g. `agy-run @prompt.md -- --add-dir .`.
65
+ - Re-run interactive `agy` only when the OAuth token expires or the account changes.
@@ -0,0 +1,148 @@
1
+ ---
2
+ name: codex-cli-bridge
3
+ description: Delegate work to the OpenAI Codex CLI (`codex`) under a ChatGPT subscription — run plan/instruction EXECUTION in a sandboxed workspace, or get a read-only ADVISORY review of a plan or working-tree diff — as a second delegated-execution backend beside Antigravity. Use when the user wants to hand a bounded coding task or plan to `codex exec`, get a second-opinion review from codex, install or authenticate Codex CLI, understand its sandbox/network/approval policy, drive codex efficiently from the main agent (exec vs review, resume, the commit boundary), bridge project context (`AGENTS.md`) into codex, or troubleshoot codex flags, models, auth, or its no-TTY headless behaviour.
4
+ metadata:
5
+ version: '1.0.0'
6
+ ---
7
+
8
+ # codex-cli-bridge
9
+
10
+ Bridges the main agent to the **OpenAI Codex CLI** (`codex`) as a **delegated-execution backend**
11
+ beside Antigravity. The main agent stays the orchestrator — owning decisions, the edits it accepts,
12
+ verification, and user-facing claims — and hands `codex` a bounded sub-task answered from a **ChatGPT
13
+ subscription** (no pay-as-you-go billing). Codex has two roles here: a **sandboxed executor** that
14
+ edits a repo under a fixed policy (`codex-exec`), and a **read-only reviewer** that critiques a plan
15
+ or a working-tree diff and only emits findings (`codex-review`).
16
+
17
+ ## Overview / when to use
18
+
19
+ Use this skill when the user wants to:
20
+
21
+ - Delegate plan or instruction EXECUTION to `codex` in a workspace-write sandbox (network OFF).
22
+ - Get a second-opinion ADVISORY review of an implementation plan or the current diff.
23
+ - Install, authenticate, smoke-test, or troubleshoot `codex`, or understand its sandbox/flags/models.
24
+ - Drive codex efficiently from the main agent (exec vs review, `resume`, the commit boundary).
25
+
26
+ Do **not** use it to bundle secrets, bypass subscription auth, use api-key billing, or let codex
27
+ commit / push on its own.
28
+
29
+ ## Install
30
+
31
+ Clean-machine setup is in [`setup/README.md`](setup/README.md). In short: install the `codex`
32
+ binary, run `codex login` once under a ChatGPT subscription, then expose this skill's two wrappers on
33
+ `PATH` as `codex-exec` ([`bin/codex-exec.sh`](bin/codex-exec.sh)) and `codex-review`
34
+ ([`bin/codex-review.sh`](bin/codex-review.sh)).
35
+
36
+ ## Auth — subscription only (invariant)
37
+
38
+ `codex` authenticates with the cached **ChatGPT login** under `CODEX_HOME` (`~/.codex`). Never read,
39
+ print, copy, commit, or package `~/.codex/auth.json` — it is personal and is **never bundled** with
40
+ this skill. Both wrappers enforce the subscription path before invoking codex:
41
+
42
+ - they **unset every `*_API_KEY`** (plus `OPENAI_API_KEY` / `CODEX_API_KEY` / `OPENAI_BASE_URL`) so a
43
+ stray key can never silently switch you to paid api-key billing;
44
+ - they pass **`--ignore-user-config`** so a personal `~/.codex/config.toml` cannot change model,
45
+ sandbox, or approval behaviour (auth still works — codex reads the login from `CODEX_HOME`
46
+ regardless of that flag);
47
+ - they **preflight `codex login status`** and refuse to run unless it reports `Logged in using ChatGPT`.
48
+
49
+ ## Models
50
+
51
+ The wrappers default to `gpt-5.5` at reasoning effort `xhigh` (the strongest setting verified in this
52
+ environment), both overridable per call. `codex --version` reports the CLI version, **not** the model
53
+ list — check your Codex CLI / ChatGPT account for the model slugs available to you, or let a wrong
54
+ `-m` surface the error.
55
+
56
+ | Variable | Default | Effect |
57
+ |---|---|---|
58
+ | `CODEX_MODEL` | `gpt-5.5` | model passed to `-m` |
59
+ | `CODEX_EFFORT` | `xhigh` | reasoning effort passed to `-c model_reasoning_effort=…` |
60
+
61
+ ```bash
62
+ CODEX_MODEL=<slug> CODEX_EFFORT=<low|medium|high|xhigh> codex-exec <file>
63
+ ```
64
+
65
+ ## Usage
66
+
67
+ Drive codex only through the two wrappers (installed on `PATH`), run from the target project root:
68
+
69
+ ```bash
70
+ # EXECUTION (workspace-write sandbox, network OFF, never prompts):
71
+ codex-exec docs/plans/<slug>.md # drive a plan file
72
+ echo "apply review fix: ..." | codex-exec - # ad-hoc instruction from stdin
73
+ CODEX_MODEL=<slug> codex-exec <file> # override the model
74
+ codex-exec <file|-> -- <extra codex flags...> # passthrough codex flags after `--`
75
+
76
+ # REVIEW (read-only sandbox — codex cannot edit anything, only emits findings):
77
+ codex-review plan docs/plans/<slug>.md # critique a plan
78
+ codex-review code # review the current working-tree diff
79
+ codex-review code "focus on the new reducer" # review with extra focus
80
+ ```
81
+
82
+ `codex exec` is headless: there is **no TTY**, so `approval_policy=never` — anything needing
83
+ escalation is refused and reported, never interactively approved. Extra `codex` flags go after a
84
+ literal `--`; args without the separator are rejected (never silently dropped). Full flag/policy
85
+ detail: [`references/sandbox-and-flags.md`](references/sandbox-and-flags.md).
86
+
87
+ ## Project context (how `codex` sees the repo)
88
+
89
+ From its **current working directory** `codex` auto-reads the root **`AGENTS.md`** — so when you run a
90
+ wrapper from a project root, the project's Hard Constraints are available to codex with no wiring (a
91
+ probe confirmed codex returned a repo's declared dialogue language from `AGENTS.md`). The wrappers
92
+ therefore **hardcode no project rules**: the orchestrator contract tells codex to read the target
93
+ `AGENTS.md` and obey it.
94
+
95
+ **Fallback is strict.** Both wrappers preflight that they run inside a git work tree and that a root
96
+ `AGENTS.md` exists — if either is missing they **STOP and report** (a wasted subscription run is
97
+ avoided). And the execution contract tells codex: if the project declares **no** verification/gate
98
+ set, **STOP and report** rather than invent checks. Pass `--skip-git-repo-check` to codex only when
99
+ you truly mean it.
100
+
101
+ ## How the main agent drives `codex` efficiently
102
+
103
+ See [`references/driving-codex.md`](references/driving-codex.md) for the full playbook. Essentials:
104
+
105
+ - **`codex-exec` for doing, `codex-review` for judging.** Use exec to implement a plan/fix under the
106
+ sandbox; use review to get advisory findings on a plan or diff without any edits.
107
+ - **The orchestrator commits — codex never does.** The execution contract forbids every git write
108
+ (branch/add/commit/stash/reset/checkout/tag/rewrite); you review codex's diff, then commit yourself.
109
+ - **Treat output as advisory** and verify before acting — re-run the project's gates yourself, reject
110
+ advice that conflicts with user instructions or repo rules.
111
+ - **Hand codex a self-contained task.** It cannot see your conversation — for an ad-hoc instruction,
112
+ embed the goal, the relevant paths, and the expected result; codex reads `AGENTS.md` for the rules.
113
+ - **Re-dispatch with `codex exec resume`** (run codex directly — the wrapper's flag/stdin shape can't
114
+ host the `resume` subcommand) instead of re-sending context. **Caveat:** resume runs outside the
115
+ wrapper and may not re-accept `--sandbox` / policy flags — restate the policy, or start a fresh
116
+ `codex-exec` run when a guaranteed sandbox/network posture matters.
117
+ - **Network is OFF in exec.** New dependencies and any network step are installed by hand, then codex
118
+ is re-dispatched.
119
+
120
+ ## Complementary skills (optional, standalone-first)
121
+
122
+ The wrappers work in any git repo where `codex` is installed and authenticated. The skills below are
123
+ **not required** — surface them only when they actually help.
124
+
125
+ - **`antigravity-cli-bridge`** (sibling backend, Google `agy`) — recommend **by actual presence**: if
126
+ `~/.claude/skills/antigravity-cli-bridge/` exists you have a **second delegated engine** (codex for
127
+ sandboxed repo edits with gates; `agy` for subscription-quota Gemini/Claude/GPT-OSS reasoning). If
128
+ it is **not** installed, treat it as a planned sibling — don't assume it exists.
129
+ - **`agent-workflow-memory`** (family **context provider**) — if the target project has **no**
130
+ `AGENTS.md` + `docs/ai/`, codex has no root context to read (and the wrappers' preflight will
131
+ STOP). The memory substrate is what creates that context. Soft-recommend it (only when the user
132
+ wants the memory workflow): `npx @sabaiway/agent-workflow-memory init`, or bootstrap the whole
133
+ family via the **`agent-workflow-kit`** orchestrator (`npx @sabaiway/agent-workflow-kit init`),
134
+ which delegates substrate deployment to memory and injects the workflow methodology. Never a
135
+ prerequisite.
136
+
137
+ ## Known limitations
138
+
139
+ - **Network is OFF** in `codex-exec` (`sandbox_workspace_write.network_access=false`): codex cannot
140
+ install dependencies or reach the network — do that by hand, then re-dispatch.
141
+ - **No live approvals** — `codex exec` has no TTY, so `approval_policy=never`; an action that would
142
+ need escalation is reported, not approved interactively.
143
+ - **`resume` may drop sandbox/policy flags** — restate the policy or start a fresh run when the
144
+ posture matters (see the driving reference).
145
+ - **bubblewrap** — on Linux, if `bubblewrap` is not on `PATH` codex prints a warning and uses a
146
+ bundled copy; install it via your package manager to silence the warning.
147
+ - codex output is advisory and may be incomplete or out of date — the main agent verifies before
148
+ acting.