@sabaiway/agent-workflow-kit 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +31 -0
- package/README.md +12 -5
- package/SKILL.md +23 -2
- package/bin/install.mjs +33 -50
- package/bin/install.test.mjs +30 -1
- package/bridges/antigravity-cli-bridge/SKILL.md +178 -0
- package/bridges/antigravity-cli-bridge/bin/agy.sh +133 -0
- package/bridges/antigravity-cli-bridge/bin/agy.test.mjs +59 -0
- package/bridges/antigravity-cli-bridge/capability.json +22 -0
- package/bridges/antigravity-cli-bridge/references/driving-agy.md +108 -0
- package/bridges/antigravity-cli-bridge/references/models-and-flags.md +93 -0
- package/bridges/antigravity-cli-bridge/references/review-prompt.md +51 -0
- package/bridges/antigravity-cli-bridge/setup/README.md +65 -0
- package/bridges/codex-cli-bridge/SKILL.md +148 -0
- package/bridges/codex-cli-bridge/bin/codex-exec.sh +143 -0
- package/bridges/codex-cli-bridge/bin/codex-review.sh +84 -0
- package/bridges/codex-cli-bridge/capability.json +22 -0
- package/bridges/codex-cli-bridge/references/driving-codex.md +97 -0
- package/bridges/codex-cli-bridge/references/sandbox-and-flags.md +105 -0
- package/bridges/codex-cli-bridge/setup/README.md +78 -0
- package/capability.json +1 -1
- package/package.json +3 -2
- package/tools/detect-backends.mjs +36 -0
- package/tools/detect-backends.test.mjs +102 -0
- package/tools/fs-safe.mjs +129 -0
- package/tools/fs-safe.test.mjs +200 -0
- package/tools/setup-backends.mjs +468 -0
- package/tools/setup-backends.test.mjs +500 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { describe, it } from 'node:test';
|
|
2
|
+
import assert from 'node:assert/strict';
|
|
3
|
+
import { mkdtempSync, mkdirSync, writeFileSync, chmodSync, rmSync } from 'node:fs';
|
|
4
|
+
import { tmpdir } from 'node:os';
|
|
5
|
+
import { join, dirname } from 'node:path';
|
|
6
|
+
import { fileURLToPath } from 'node:url';
|
|
7
|
+
import { spawnSync } from 'node:child_process';
|
|
8
|
+
|
|
9
|
+
const HERE = dirname(fileURLToPath(import.meta.url));
|
|
10
|
+
const WRAPPER = join(HERE, 'agy.sh');
|
|
11
|
+
|
|
12
|
+
// Build a sandbox HOME whose ~/.local/bin holds a STUB `agy`. The wrapper prepends
|
|
13
|
+
// "$HOME/.local/bin" to PATH, so it resolves our stub instead of the real binary — no network,
|
|
14
|
+
// no real subscription CLI, fully hermetic.
|
|
15
|
+
const makeSandbox = (stubBody) => {
|
|
16
|
+
const home = mkdtempSync(join(tmpdir(), 'agy-wrapper-test-'));
|
|
17
|
+
const bin = join(home, '.local', 'bin');
|
|
18
|
+
mkdirSync(bin, { recursive: true });
|
|
19
|
+
const stub = join(bin, 'agy');
|
|
20
|
+
writeFileSync(stub, stubBody, { mode: 0o755 });
|
|
21
|
+
chmodSync(stub, 0o755);
|
|
22
|
+
return home;
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
const runWrapper = (home, env, prompt = 'hello') =>
|
|
26
|
+
spawnSync('bash', [WRAPPER, prompt], {
|
|
27
|
+
env: { HOME: home, PATH: `${join(home, '.local', 'bin')}:${process.env.PATH}`, ...env },
|
|
28
|
+
encoding: 'utf8',
|
|
29
|
+
timeout: 20000,
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
describe('agy.sh — hard wall-clock cap (timeout(1))', () => {
|
|
33
|
+
it('kills a hung agy at AGY_HARD_TIMEOUT and reports it (non-zero + actionable guidance)', () => {
|
|
34
|
+
const home = makeSandbox('#!/usr/bin/env bash\nsleep 30\n');
|
|
35
|
+
const started = Date.now();
|
|
36
|
+
const r = runWrapper(home, { AGY_HARD_TIMEOUT: '2s', AGY_TIMEOUT: '2s', AGY_MODEL: '' });
|
|
37
|
+
const elapsed = Date.now() - started;
|
|
38
|
+
rmSync(home, { recursive: true, force: true });
|
|
39
|
+
assert.ok(elapsed < 13000, `wrapper must return well under the kill-after window, took ${elapsed}ms`);
|
|
40
|
+
assert.notEqual(r.status, 0, 'a timed-out run must exit non-zero');
|
|
41
|
+
assert.match(r.stderr, /exceeded the hard cap/, 'must explain the hard-cap kill');
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it('passes a fast agy run through unchanged (exit 0, stdout preserved)', () => {
|
|
45
|
+
const home = makeSandbox('#!/usr/bin/env bash\necho "OK reply"\nexit 0\n');
|
|
46
|
+
const r = runWrapper(home, { AGY_HARD_TIMEOUT: '10s', AGY_TIMEOUT: '10s', AGY_MODEL: '' });
|
|
47
|
+
rmSync(home, { recursive: true, force: true });
|
|
48
|
+
assert.equal(r.status, 0, `expected clean exit, got ${r.status}; stderr=${r.stderr}`);
|
|
49
|
+
assert.match(r.stdout, /OK reply/);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it('propagates a non-timeout agy failure code verbatim (no false hard-cap message)', () => {
|
|
53
|
+
const home = makeSandbox('#!/usr/bin/env bash\necho "boom" >&2\nexit 3\n');
|
|
54
|
+
const r = runWrapper(home, { AGY_HARD_TIMEOUT: '10s', AGY_TIMEOUT: '10s', AGY_MODEL: '' });
|
|
55
|
+
rmSync(home, { recursive: true, force: true });
|
|
56
|
+
assert.equal(r.status, 3, 'a genuine agy failure code must pass through');
|
|
57
|
+
assert.doesNotMatch(r.stderr, /exceeded the hard cap/, 'must not mislabel a non-timeout failure');
|
|
58
|
+
});
|
|
59
|
+
});
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"family": "agent-workflow",
|
|
3
|
+
"schema": 1,
|
|
4
|
+
"name": "antigravity-cli-bridge",
|
|
5
|
+
"kind": "execution-backend",
|
|
6
|
+
"version": "1.0.0",
|
|
7
|
+
"provides": ["review", "probe"],
|
|
8
|
+
"roles": {
|
|
9
|
+
"review": { "cmd": "agy-run", "source": "bin/agy.sh", "template": "references/review-prompt.md", "output": "advisory" },
|
|
10
|
+
"probe": { "cmd": "agy-run", "source": "bin/agy.sh", "output": "advisory" }
|
|
11
|
+
},
|
|
12
|
+
"detect": {
|
|
13
|
+
"installed": {
|
|
14
|
+
"env": "ANTIGRAVITY_CLI_BRIDGE_DIR",
|
|
15
|
+
"default": "~/.claude/skills/antigravity-cli-bridge",
|
|
16
|
+
"file": "SKILL.md"
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"cost": "subscription",
|
|
20
|
+
"quota": { "kind": "subscription", "finite": true },
|
|
21
|
+
"provenance": { "author": "sabaiway", "source": "github:sabaiway/agent-workflow" }
|
|
22
|
+
}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# How the main agent drives `agy`
|
|
2
|
+
|
|
3
|
+
`agy` is a **delegated-execution backend**: the main agent stays the orchestrator and hands `agy` a
|
|
4
|
+
bounded, self-contained sub-task. `agy` answers from the **subscription** quota, so the goal is
|
|
5
|
+
maximum useful output per token of that quota. Treat its output as **advisory** — the main agent owns
|
|
6
|
+
edits, verification, and final judgment.
|
|
7
|
+
|
|
8
|
+
## Delegation checklist
|
|
9
|
+
|
|
10
|
+
1. Pick the narrowest useful question.
|
|
11
|
+
2. Choose the cheapest model that can answer it.
|
|
12
|
+
3. Include only the relevant excerpts, paths, constraints, and the expected output shape.
|
|
13
|
+
4. State permission boundaries in the prompt (no edits, no git writes).
|
|
14
|
+
5. Run `agy-run` headlessly.
|
|
15
|
+
6. Treat the response as advisory and verify before acting.
|
|
16
|
+
|
|
17
|
+
## Model selection
|
|
18
|
+
|
|
19
|
+
| Task | Model |
|
|
20
|
+
|---|---|
|
|
21
|
+
| Reachability / smoke / "is it wired?" | `Gemini 3.5 Flash (Low)` |
|
|
22
|
+
| Cheap probes, summaries | `Gemini 3.5 Flash (Medium)` |
|
|
23
|
+
| Quick review with a little more effort | `Gemini 3.5 Flash (High)` |
|
|
24
|
+
| Reasoning, plan critique, careful drafting | `Gemini 3.1 Pro (High)` (wrapper default) |
|
|
25
|
+
| Same reasoning, lower quota cost | `Gemini 3.1 Pro (Low)` |
|
|
26
|
+
| A different engine's opinion | `Claude Sonnet 4.6 (Thinking)`, `Claude Opus 4.6 (Thinking)`, or `GPT-OSS 120B (Medium)` |
|
|
27
|
+
|
|
28
|
+
Don't reach for Pro by reflex — Flash answers most reachability/probe questions for a fraction of the
|
|
29
|
+
quota.
|
|
30
|
+
|
|
31
|
+
## Quota economy
|
|
32
|
+
|
|
33
|
+
Subscription quota is finite. Prefer:
|
|
34
|
+
|
|
35
|
+
- A short probe on Flash before a large Pro run.
|
|
36
|
+
- One sharp question over broad "review everything" prompts.
|
|
37
|
+
- Prompt files with trimmed excerpts instead of whole repositories.
|
|
38
|
+
- `AGY_TIMEOUT=2m` for probes, longer timeouts only for deep reviews.
|
|
39
|
+
- Reusing a conversation with `--continue` when the context is already loaded.
|
|
40
|
+
|
|
41
|
+
## Continue vs. fresh
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
# Continue the most recent conversation (cheaper than re-sending context):
|
|
45
|
+
agy-run "Given your previous review, list only the top three risks." -- --continue
|
|
46
|
+
|
|
47
|
+
# Resume a specific conversation by id:
|
|
48
|
+
agy-run "Continue from the prior architecture critique; focus on test gaps." -- --conversation <id>
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Use conversation state only when it saves quota or preserves useful context. For auditable decisions,
|
|
52
|
+
prefer self-contained prompts.
|
|
53
|
+
|
|
54
|
+
## Escalation policy (edits, network, git)
|
|
55
|
+
|
|
56
|
+
The wrapper passes no `--add-dir`, no `--dangerously-skip-permissions`, and no `--sandbox`. Treat this
|
|
57
|
+
as a **policy boundary you enforce in the prompt, not an enforced sandbox** — so prompt `agy` as a
|
|
58
|
+
read-only reviewer, and reach for `-- --sandbox` for anything that might trigger terminal/tool work:
|
|
59
|
+
|
|
60
|
+
```text
|
|
61
|
+
Do not edit files. Do not run git write commands. Do not branch, add, commit, stash, reset, or
|
|
62
|
+
rewrite history. Return findings and suggested changes only.
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
- **Repo edits** stay with the orchestrator. If a flow truly needs `agy` to write files, opt in
|
|
66
|
+
explicitly — `agy-run "..." -- --add-dir . --dangerously-skip-permissions` — and review the diff.
|
|
67
|
+
- **New dependencies / network installs** are done by hand, not by `agy`.
|
|
68
|
+
- **Git writes** (branch/commit) are never delegated — the orchestrator commits after review.
|
|
69
|
+
- Prefer `-- --sandbox` for any prompt that might trigger terminal work.
|
|
70
|
+
|
|
71
|
+
## Project-context prompts
|
|
72
|
+
|
|
73
|
+
Probe reachability from a project root (cheap model):
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
AGY_MODEL="Gemini 3.5 Flash (Low)" agy-run \
|
|
77
|
+
"Read the cwd context file and report the dialogue language plus one Hard Constraint."
|
|
78
|
+
AGY_MODEL="Gemini 3.5 Flash (Low)" agy-run \
|
|
79
|
+
"Without using a file pointer, is there a project-specific planning skill in this repo? Name it and cite its path."
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Plan-review prompt shape:
|
|
83
|
+
|
|
84
|
+
```text
|
|
85
|
+
You are reviewing the plan below from the current repository root.
|
|
86
|
+
Use the root context file and per-workspace skills if they are reachable.
|
|
87
|
+
Do not edit files. Do not run git write commands.
|
|
88
|
+
Return: 1) blocking issues 2) non-blocking risks 3) missing verification 4) a concise recommendation.
|
|
89
|
+
The implementation plan text follows in this same prompt.
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Diff/code-review prompt shape (provide the diff as text):
|
|
93
|
+
|
|
94
|
+
```text
|
|
95
|
+
Review this diff against the stated constraints.
|
|
96
|
+
Focus on bugs, behavioural regressions, missing tests, and violations of the project rules.
|
|
97
|
+
Cite file paths and line hints from the diff where possible. Do not summarise unless there are no findings.
|
|
98
|
+
The project constraints and diff text follow in this same prompt.
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Handling output
|
|
102
|
+
|
|
103
|
+
`agy` returns plain text. Do not assume it is complete, current, or machine-valid. Before acting:
|
|
104
|
+
|
|
105
|
+
- Check claims against local files or primary sources available to the main agent.
|
|
106
|
+
- Re-run local tests and linters yourself.
|
|
107
|
+
- Reject advice that conflicts with user instructions, repository rules, or security boundaries.
|
|
108
|
+
- Summarise uncertainty clearly when reporting back to the user.
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# `agy` models & flags (reference)
|
|
2
|
+
|
|
3
|
+
The source of truth is the live binary: `agy --version`, `agy --help`, `agy models`. The tables below
|
|
4
|
+
were captured from **v1.0.10**; if the binary disagrees, the binary wins. The wrapper command is
|
|
5
|
+
`agy-run`, backed by `bin/agy.sh`.
|
|
6
|
+
|
|
7
|
+
## Headless behaviour
|
|
8
|
+
|
|
9
|
+
Use `-p`, `--print`, or `--prompt` to run one non-interactive prompt and print the text response. The
|
|
10
|
+
wrapper always uses headless `-p`. **There is no JSON output mode in v1.0.10** — ask for Markdown,
|
|
11
|
+
bullets, tables, or fenced blocks when the caller needs structure, then validate the text yourself.
|
|
12
|
+
|
|
13
|
+
## Wrapper contract
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
agy-run <prompt | - | @file> [-- extra agy flags...]
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Inputs:
|
|
20
|
+
|
|
21
|
+
- Prompt text: `agy-run "say OK"`.
|
|
22
|
+
- Stdin: `echo "say OK" | agy-run -`.
|
|
23
|
+
- Prompt file: `agy-run @prompt.md`.
|
|
24
|
+
- Extra `agy` flags after `--`: `agy-run @prompt.md -- --add-dir . --continue`. Extra args **without**
|
|
25
|
+
the `--` separator are rejected with a usage error (they are never silently dropped).
|
|
26
|
+
- A literal prompt that **begins with `@`** is read as a file path. Pass such prompts via stdin
|
|
27
|
+
instead: `printf '%s' '@handle, review this' | agy-run -`.
|
|
28
|
+
|
|
29
|
+
Environment:
|
|
30
|
+
|
|
31
|
+
| Var | Default | Effect |
|
|
32
|
+
|---|---|---|
|
|
33
|
+
| `AGY_MODEL` | `Gemini 3.1 Pro (High)` | model display string; set empty (`AGY_MODEL=`) to drop `--model` and let `agy` use `settings.json` |
|
|
34
|
+
| `AGY_TIMEOUT` | `5m` | value passed to `--print-timeout` |
|
|
35
|
+
|
|
36
|
+
Subscription invariant: the wrapper prepends `$HOME/.local/bin` to `PATH` and clears
|
|
37
|
+
`ANTIGRAVITY_API_KEY` / `GEMINI_API_KEY` / `GOOGLE_API_KEY` / `GOOGLE_GENAI_API_KEY` before execution.
|
|
38
|
+
Auth comes from the user's cached OAuth token, never from bundled credentials.
|
|
39
|
+
|
|
40
|
+
## Models
|
|
41
|
+
|
|
42
|
+
Pass the **exact display string** from `agy models`, or set `AGY_MODEL`.
|
|
43
|
+
|
|
44
|
+
| Model string | Practical use |
|
|
45
|
+
|---|---|
|
|
46
|
+
| `Gemini 3.5 Flash (Low)` | lowest-cost smoke tests and simple rewrites |
|
|
47
|
+
| `Gemini 3.5 Flash (Medium)` | cheap probes, fast summaries, context-reachability checks |
|
|
48
|
+
| `Gemini 3.5 Flash (High)` | fast review when a little more reasoning effort is useful |
|
|
49
|
+
| `Gemini 3.1 Pro (Low)` | cheaper Pro pass for medium reasoning |
|
|
50
|
+
| `Gemini 3.1 Pro (High)` | wrapper default; hard reasoning, plan critique, architecture review |
|
|
51
|
+
| `Claude Sonnet 4.6 (Thinking)` | cross-vendor reasoning comparison |
|
|
52
|
+
| `Claude Opus 4.6 (Thinking)` | expensive deep critique when the user wants another high-end pass |
|
|
53
|
+
| `GPT-OSS 120B (Medium)` | open-weights-style comparison / diversity pass |
|
|
54
|
+
|
|
55
|
+
Examples:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
AGY_MODEL="Gemini 3.5 Flash (Medium)" agy-run "Read AGENTS.md and report one Hard Constraint."
|
|
59
|
+
AGY_MODEL="Claude Sonnet 4.6 (Thinking)" AGY_TIMEOUT=10m agy-run @review-prompt.md
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Flags (from `agy --help`, v1.0.10)
|
|
63
|
+
|
|
64
|
+
| Flag | Meaning | Notes |
|
|
65
|
+
|---|---|---|
|
|
66
|
+
| `-p`, `--print`, `--prompt` | run one headless prompt and print the text response | the wrapper uses `-p` |
|
|
67
|
+
| `--print-timeout <dur>` | cap headless wait time | CLI default `5m0s`; wrapper default `5m` via `AGY_TIMEOUT` |
|
|
68
|
+
| `--model <string>` | select a model | must match an `agy models` display string exactly |
|
|
69
|
+
| `-i`, `--prompt-interactive` | run an initial prompt, then continue interactively | not used by the wrapper |
|
|
70
|
+
| `-c`, `--continue` | continue the most recent conversation | pass after the wrapper's `--` |
|
|
71
|
+
| `--conversation <id>` | resume a specific conversation by id | use only when the user provides/records the id |
|
|
72
|
+
| `--add-dir <dir>` | add a directory to the workspace | repeatable; for explicit extra context |
|
|
73
|
+
| `--dangerously-skip-permissions` | auto-approve all tool permissions | avoid by default; use only with explicit user approval |
|
|
74
|
+
| `--sandbox` | run with terminal restrictions enabled | prefer when delegating a prompt that might trigger tool/terminal work |
|
|
75
|
+
| `--log-file <path>` | override the CLI log-file path | keep logs secret-free and out of committed artifacts |
|
|
76
|
+
|
|
77
|
+
## Subcommands (v1.0.10)
|
|
78
|
+
|
|
79
|
+
`changelog`, `help`, `install`, `models`, `plugin` / `plugins`, `update`.
|
|
80
|
+
|
|
81
|
+
**Not available in v1.0.10:** any JSON output mode, and any `agy inspect`. Output is plain text.
|
|
82
|
+
|
|
83
|
+
## Project-context flags
|
|
84
|
+
|
|
85
|
+
`agy` reads context from its current working directory:
|
|
86
|
+
|
|
87
|
+
```text
|
|
88
|
+
.antigravity.md > GEMINI.md > AGENTS.md
|
|
89
|
+
.agents/skills/
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Use `--add-dir` for extra directories not already reachable from cwd. Subdirectory `CLAUDE.md` files
|
|
93
|
+
are **not** auto-loaded — include those local rules manually in the prompt when they matter.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Review prompt template — `agy-run` (review role)
|
|
2
|
+
|
|
3
|
+
The `review` role of `antigravity-cli-bridge` delegates a **read-only second opinion** to `agy`.
|
|
4
|
+
`agy` cannot see the conversation and (in v1.0.10) has no JSON output, so the prompt must be
|
|
5
|
+
**self-contained** and ask for **plain-Markdown findings only** — no repo edits, no git writes.
|
|
6
|
+
Fill the `{{…}}` slots, pipe it to `agy-run`, then verify every finding locally before acting.
|
|
7
|
+
|
|
8
|
+
```text
|
|
9
|
+
You are a meticulous staff-level reviewer giving a SECOND OPINION. You are read-only:
|
|
10
|
+
do not propose to edit files, run commands, or make git changes — return findings only.
|
|
11
|
+
|
|
12
|
+
## What to review
|
|
13
|
+
{{TARGET}} # e.g. "the implementation plan below" or "the working-tree diff below"
|
|
14
|
+
|
|
15
|
+
## Project rules
|
|
16
|
+
Read the repo's root AGENTS.md (your cwd) and obey its Hard Constraints and conventions.
|
|
17
|
+
If AGENTS.md declares a verification/gate set, judge the change against it; if it declares
|
|
18
|
+
none, say so — do NOT invent checks.
|
|
19
|
+
|
|
20
|
+
## Material
|
|
21
|
+
{{CONTENT}} # paste the plan text, or the unified diff, or the file excerpts under review
|
|
22
|
+
|
|
23
|
+
## Focus (optional)
|
|
24
|
+
{{FOCUS}} # e.g. "correctness of the new reducer", "backward-compat of the stamp takeover"
|
|
25
|
+
|
|
26
|
+
## Output — Markdown, this exact shape, nothing else
|
|
27
|
+
### Verdict
|
|
28
|
+
One line: SHIP / SHIP WITH NITS / REWORK, plus a one-sentence reason.
|
|
29
|
+
### Blocking
|
|
30
|
+
Numbered. Correctness bugs, contract violations, data loss, security. Cite file:line.
|
|
31
|
+
Empty? write "none".
|
|
32
|
+
### Non-blocking
|
|
33
|
+
Numbered. Simplifications, reuse, naming, missing tests. Cite file:line.
|
|
34
|
+
### Questions
|
|
35
|
+
Anything ambiguous that changes your verdict if answered.
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Usage
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
# critique a plan
|
|
42
|
+
AGY_MODEL="Gemini 3.1 Pro (High)" agy-run @/tmp/review-prompt.filled.md
|
|
43
|
+
|
|
44
|
+
# critique the current diff (build the prompt with the diff pasted into {{CONTENT}})
|
|
45
|
+
git diff | ... # assemble the filled prompt, then:
|
|
46
|
+
agy-run @/tmp/review-prompt.filled.md
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Treat the result as **advisory** — `agy` output may be incomplete or out of date. The orchestrator
|
|
50
|
+
re-runs the project's real gates and owns every accepted change. See
|
|
51
|
+
[`driving-agy.md`](./driving-agy.md).
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# Setting up Antigravity CLI (`agy`) on a clean machine
|
|
2
|
+
|
|
3
|
+
This setup is **secret-free**. `agy` itself is **not** bundled — it requires a binary install and a
|
|
4
|
+
one-time interactive sign-in with your own subscription. Do this once per machine, then the skill
|
|
5
|
+
works in any project.
|
|
6
|
+
|
|
7
|
+
## 1. Install the binary
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
curl -fsSL https://antigravity.google/cli/install.sh | bash
|
|
11
|
+
export PATH="$HOME/.local/bin:$PATH" # add to ~/.bashrc / ~/.zshrc to persist
|
|
12
|
+
agy --version # expect 1.0.10 or newer
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
- The binary is **`agy`** (not `antigravity`); it installs to `~/.local/bin/agy`.
|
|
16
|
+
- Keep `$HOME/.local/bin` on `PATH` (the wrapper also prepends it defensively).
|
|
17
|
+
|
|
18
|
+
## 2. Sign in once (subscription only)
|
|
19
|
+
|
|
20
|
+
Run `agy` once interactively and complete the **OAuth** sign-in with a **Google AI Pro/Ultra**
|
|
21
|
+
account:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
agy
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
This caches an OAuth token under `~/.gemini/antigravity-cli/` (`antigravity-oauth-token`). That token
|
|
28
|
+
is **personal** — never copy, commit, package, print, or share that directory or token. This skill
|
|
29
|
+
needs no API keys and must not be configured with API-key billing; the wrapper unsets every
|
|
30
|
+
`*_API_KEY` so billing can never silently fall back to pay-as-you-go.
|
|
31
|
+
|
|
32
|
+
## 3. Put the wrapper on `PATH` as `agy-run`
|
|
33
|
+
|
|
34
|
+
The skill ships the wrapper at `bin/agy.sh`. Expose it on `PATH` under the stable name `agy-run`
|
|
35
|
+
(idempotent; refuses to clobber a non-symlink):
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
mkdir -p "$HOME/.local/bin"
|
|
39
|
+
skill_dir="$HOME/.claude/skills/antigravity-cli-bridge" # adjust if installed elsewhere
|
|
40
|
+
dst="$HOME/.local/bin/agy-run"
|
|
41
|
+
if [ -e "$dst" ] && [ ! -L "$dst" ]; then
|
|
42
|
+
echo "STOP: $dst exists and is not a symlink"; exit 1
|
|
43
|
+
fi
|
|
44
|
+
chmod +x "$skill_dir/bin/agy.sh"
|
|
45
|
+
ln -sfn "$skill_dir/bin/agy.sh" "$dst"
|
|
46
|
+
export PATH="$HOME/.local/bin:$PATH"
|
|
47
|
+
command -v agy-run
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## 4. Smoke test
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
agy --version
|
|
54
|
+
echo "say OK" | agy-run -
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Expected: the version prints (`1.0.10` or newer), then a short reply containing `OK`. If `agy-run`
|
|
58
|
+
reports `'agy' not found`, fix your `PATH` (step 1). If it asks you to sign in, complete step 2.
|
|
59
|
+
|
|
60
|
+
## Notes
|
|
61
|
+
|
|
62
|
+
- `agy-run` is headless and plain-text only; there is no JSON output mode.
|
|
63
|
+
- `AGY_MODEL` selects the exact model display string; `AGY_TIMEOUT` controls `--print-timeout`.
|
|
64
|
+
- Extra `agy` flags go after `--`, e.g. `agy-run @prompt.md -- --add-dir .`.
|
|
65
|
+
- Re-run interactive `agy` only when the OAuth token expires or the account changes.
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: codex-cli-bridge
|
|
3
|
+
description: Delegate work to the OpenAI Codex CLI (`codex`) under a ChatGPT subscription — run plan/instruction EXECUTION in a sandboxed workspace, or get a read-only ADVISORY review of a plan or working-tree diff — as a second delegated-execution backend beside Antigravity. Use when the user wants to hand a bounded coding task or plan to `codex exec`, get a second-opinion review from codex, install or authenticate Codex CLI, understand its sandbox/network/approval policy, drive codex efficiently from the main agent (exec vs review, resume, the commit boundary), bridge project context (`AGENTS.md`) into codex, or troubleshoot codex flags, models, auth, or its no-TTY headless behaviour.
|
|
4
|
+
metadata:
|
|
5
|
+
version: '1.0.0'
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# codex-cli-bridge
|
|
9
|
+
|
|
10
|
+
Bridges the main agent to the **OpenAI Codex CLI** (`codex`) as a **delegated-execution backend**
|
|
11
|
+
beside Antigravity. The main agent stays the orchestrator — owning decisions, the edits it accepts,
|
|
12
|
+
verification, and user-facing claims — and hands `codex` a bounded sub-task answered from a **ChatGPT
|
|
13
|
+
subscription** (no pay-as-you-go billing). Codex has two roles here: a **sandboxed executor** that
|
|
14
|
+
edits a repo under a fixed policy (`codex-exec`), and a **read-only reviewer** that critiques a plan
|
|
15
|
+
or a working-tree diff and only emits findings (`codex-review`).
|
|
16
|
+
|
|
17
|
+
## Overview / when to use
|
|
18
|
+
|
|
19
|
+
Use this skill when the user wants to:
|
|
20
|
+
|
|
21
|
+
- Delegate plan or instruction EXECUTION to `codex` in a workspace-write sandbox (network OFF).
|
|
22
|
+
- Get a second-opinion ADVISORY review of an implementation plan or the current diff.
|
|
23
|
+
- Install, authenticate, smoke-test, or troubleshoot `codex`, or understand its sandbox/flags/models.
|
|
24
|
+
- Drive codex efficiently from the main agent (exec vs review, `resume`, the commit boundary).
|
|
25
|
+
|
|
26
|
+
Do **not** use it to bundle secrets, bypass subscription auth, use api-key billing, or let codex
|
|
27
|
+
commit / push on its own.
|
|
28
|
+
|
|
29
|
+
## Install
|
|
30
|
+
|
|
31
|
+
Clean-machine setup is in [`setup/README.md`](setup/README.md). In short: install the `codex`
|
|
32
|
+
binary, run `codex login` once under a ChatGPT subscription, then expose this skill's two wrappers on
|
|
33
|
+
`PATH` as `codex-exec` ([`bin/codex-exec.sh`](bin/codex-exec.sh)) and `codex-review`
|
|
34
|
+
([`bin/codex-review.sh`](bin/codex-review.sh)).
|
|
35
|
+
|
|
36
|
+
## Auth — subscription only (invariant)
|
|
37
|
+
|
|
38
|
+
`codex` authenticates with the cached **ChatGPT login** under `CODEX_HOME` (`~/.codex`). Never read,
|
|
39
|
+
print, copy, commit, or package `~/.codex/auth.json` — it is personal and is **never bundled** with
|
|
40
|
+
this skill. Both wrappers enforce the subscription path before invoking codex:
|
|
41
|
+
|
|
42
|
+
- they **unset every `*_API_KEY`** (plus `OPENAI_API_KEY` / `CODEX_API_KEY` / `OPENAI_BASE_URL`) so a
|
|
43
|
+
stray key can never silently switch you to paid api-key billing;
|
|
44
|
+
- they pass **`--ignore-user-config`** so a personal `~/.codex/config.toml` cannot change model,
|
|
45
|
+
sandbox, or approval behaviour (auth still works — codex reads the login from `CODEX_HOME`
|
|
46
|
+
regardless of that flag);
|
|
47
|
+
- they **preflight `codex login status`** and refuse to run unless it reports `Logged in using ChatGPT`.
|
|
48
|
+
|
|
49
|
+
## Models
|
|
50
|
+
|
|
51
|
+
The wrappers default to `gpt-5.5` at reasoning effort `xhigh` (the strongest setting verified in this
|
|
52
|
+
environment), both overridable per call. `codex --version` reports the CLI version, **not** the model
|
|
53
|
+
list — check your Codex CLI / ChatGPT account for the model slugs available to you, or let a wrong
|
|
54
|
+
`-m` surface the error.
|
|
55
|
+
|
|
56
|
+
| Variable | Default | Effect |
|
|
57
|
+
|---|---|---|
|
|
58
|
+
| `CODEX_MODEL` | `gpt-5.5` | model passed to `-m` |
|
|
59
|
+
| `CODEX_EFFORT` | `xhigh` | reasoning effort passed to `-c model_reasoning_effort=…` |
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
CODEX_MODEL=<slug> CODEX_EFFORT=<low|medium|high|xhigh> codex-exec <file>
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Usage
|
|
66
|
+
|
|
67
|
+
Drive codex only through the two wrappers (installed on `PATH`), run from the target project root:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
# EXECUTION (workspace-write sandbox, network OFF, never prompts):
|
|
71
|
+
codex-exec docs/plans/<slug>.md # drive a plan file
|
|
72
|
+
echo "apply review fix: ..." | codex-exec - # ad-hoc instruction from stdin
|
|
73
|
+
CODEX_MODEL=<slug> codex-exec <file> # override the model
|
|
74
|
+
codex-exec <file|-> -- <extra codex flags...> # passthrough codex flags after `--`
|
|
75
|
+
|
|
76
|
+
# REVIEW (read-only sandbox — codex cannot edit anything, only emits findings):
|
|
77
|
+
codex-review plan docs/plans/<slug>.md # critique a plan
|
|
78
|
+
codex-review code # review the current working-tree diff
|
|
79
|
+
codex-review code "focus on the new reducer" # review with extra focus
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
`codex exec` is headless: there is **no TTY**, so `approval_policy=never` — anything needing
|
|
83
|
+
escalation is refused and reported, never interactively approved. Extra `codex` flags go after a
|
|
84
|
+
literal `--`; args without the separator are rejected (never silently dropped). Full flag/policy
|
|
85
|
+
detail: [`references/sandbox-and-flags.md`](references/sandbox-and-flags.md).
|
|
86
|
+
|
|
87
|
+
## Project context (how `codex` sees the repo)
|
|
88
|
+
|
|
89
|
+
From its **current working directory** `codex` auto-reads the root **`AGENTS.md`** — so when you run a
|
|
90
|
+
wrapper from a project root, the project's Hard Constraints are available to codex with no wiring (a
|
|
91
|
+
probe confirmed codex returned a repo's declared dialogue language from `AGENTS.md`). The wrappers
|
|
92
|
+
therefore **hardcode no project rules**: the orchestrator contract tells codex to read the target
|
|
93
|
+
`AGENTS.md` and obey it.
|
|
94
|
+
|
|
95
|
+
**Fallback is strict.** Both wrappers preflight that they run inside a git work tree and that a root
|
|
96
|
+
`AGENTS.md` exists — if either is missing they **STOP and report** (a wasted subscription run is
|
|
97
|
+
avoided). And the execution contract tells codex: if the project declares **no** verification/gate
|
|
98
|
+
set, **STOP and report** rather than invent checks. Pass `--skip-git-repo-check` to codex only when
|
|
99
|
+
you truly mean it.
|
|
100
|
+
|
|
101
|
+
## How the main agent drives `codex` efficiently
|
|
102
|
+
|
|
103
|
+
See [`references/driving-codex.md`](references/driving-codex.md) for the full playbook. Essentials:
|
|
104
|
+
|
|
105
|
+
- **`codex-exec` for doing, `codex-review` for judging.** Use exec to implement a plan/fix under the
|
|
106
|
+
sandbox; use review to get advisory findings on a plan or diff without any edits.
|
|
107
|
+
- **The orchestrator commits — codex never does.** The execution contract forbids every git write
|
|
108
|
+
(branch/add/commit/stash/reset/checkout/tag/rewrite); you review codex's diff, then commit yourself.
|
|
109
|
+
- **Treat output as advisory** and verify before acting — re-run the project's gates yourself, reject
|
|
110
|
+
advice that conflicts with user instructions or repo rules.
|
|
111
|
+
- **Hand codex a self-contained task.** It cannot see your conversation — for an ad-hoc instruction,
|
|
112
|
+
embed the goal, the relevant paths, and the expected result; codex reads `AGENTS.md` for the rules.
|
|
113
|
+
- **Re-dispatch with `codex exec resume`** (run codex directly — the wrapper's flag/stdin shape can't
|
|
114
|
+
host the `resume` subcommand) instead of re-sending context. **Caveat:** resume runs outside the
|
|
115
|
+
wrapper and may not re-accept `--sandbox` / policy flags — restate the policy, or start a fresh
|
|
116
|
+
`codex-exec` run when a guaranteed sandbox/network posture matters.
|
|
117
|
+
- **Network is OFF in exec.** New dependencies and any network step are installed by hand, then codex
|
|
118
|
+
is re-dispatched.
|
|
119
|
+
|
|
120
|
+
## Complementary skills (optional, standalone-first)
|
|
121
|
+
|
|
122
|
+
The wrappers work in any git repo where `codex` is installed and authenticated. The skills below are
|
|
123
|
+
**not required** — surface them only when they actually help.
|
|
124
|
+
|
|
125
|
+
- **`antigravity-cli-bridge`** (sibling backend, Google `agy`) — recommend **by actual presence**: if
|
|
126
|
+
`~/.claude/skills/antigravity-cli-bridge/` exists you have a **second delegated engine** (codex for
|
|
127
|
+
sandboxed repo edits with gates; `agy` for subscription-quota Gemini/Claude/GPT-OSS reasoning). If
|
|
128
|
+
it is **not** installed, treat it as a planned sibling — don't assume it exists.
|
|
129
|
+
- **`agent-workflow-memory`** (family **context provider**) — if the target project has **no**
|
|
130
|
+
`AGENTS.md` + `docs/ai/`, codex has no root context to read (and the wrappers' preflight will
|
|
131
|
+
STOP). The memory substrate is what creates that context. Soft-recommend it (only when the user
|
|
132
|
+
wants the memory workflow): `npx @sabaiway/agent-workflow-memory init`, or bootstrap the whole
|
|
133
|
+
family via the **`agent-workflow-kit`** orchestrator (`npx @sabaiway/agent-workflow-kit init`),
|
|
134
|
+
which delegates substrate deployment to memory and injects the workflow methodology. Never a
|
|
135
|
+
prerequisite.
|
|
136
|
+
|
|
137
|
+
## Known limitations
|
|
138
|
+
|
|
139
|
+
- **Network is OFF** in `codex-exec` (`sandbox_workspace_write.network_access=false`): codex cannot
|
|
140
|
+
install dependencies or reach the network — do that by hand, then re-dispatch.
|
|
141
|
+
- **No live approvals** — `codex exec` has no TTY, so `approval_policy=never`; an action that would
|
|
142
|
+
need escalation is reported, not approved interactively.
|
|
143
|
+
- **`resume` may drop sandbox/policy flags** — restate the policy or start a fresh run when the
|
|
144
|
+
posture matters (see the driving reference).
|
|
145
|
+
- **bubblewrap** — on Linux, if `bubblewrap` is not on `PATH` codex prints a warning and uses a
|
|
146
|
+
bundled copy; install it via your package manager to silence the warning.
|
|
147
|
+
- codex output is advisory and may be incomplete or out of date — the main agent verifies before
|
|
148
|
+
acting.
|