@a5c-ai/babysitter-codex 0.1.6-staging.c7c4cba5 → 0.1.6-staging.cbfb13a1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.codex-plugin/plugin.json +6 -0
- package/README.md +6 -18
- package/hooks/babysitter-session-start.sh +14 -7
- package/hooks/babysitter-stop-hook.sh +14 -7
- package/hooks/user-prompt-submit.sh +2 -8
- package/package.json +2 -2
- package/skills/babysit/SKILL.md +17 -377
- package/skills/call/SKILL.md +1 -0
- package/skills/retrospect/SKILL.md +42 -2
- package/skills/yolo/SKILL.md +3 -0
|
@@ -18,6 +18,12 @@
|
|
|
18
18
|
"skills"
|
|
19
19
|
],
|
|
20
20
|
"skills": "./skills/",
|
|
21
|
+
"skillEntries": [
|
|
22
|
+
{
|
|
23
|
+
"name": "babysitter:retrospect",
|
|
24
|
+
"argumentHint": "[run-id...] [--all] Run IDs or --all for all runs"
|
|
25
|
+
}
|
|
26
|
+
],
|
|
21
27
|
"hooks": "./hooks.json",
|
|
22
28
|
"apps": "./.app.json",
|
|
23
29
|
"interface": {
|
package/README.md
CHANGED
|
@@ -22,29 +22,17 @@ Install the SDK CLI first:
|
|
|
22
22
|
npm install -g @a5c-ai/babysitter-sdk
|
|
23
23
|
```
|
|
24
24
|
|
|
25
|
-
|
|
25
|
+
clone the repo and install the plugin globally:
|
|
26
26
|
|
|
27
27
|
```bash
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
This copies the plugin into `~/.codex/plugins/babysitter-codex`, registers it
|
|
32
|
-
in `~/.agents/plugins/marketplace.json`, merges the required global Codex
|
|
33
|
-
config into `~/.codex/config.toml`, installs the active global Codex
|
|
34
|
-
`skills/`, `hooks/`, and `hooks.json` surface under `~/.codex/`, and ensures
|
|
35
|
-
the Babysitter process library is active in `~/.a5c`.
|
|
28
|
+
git clone https://github.com/a5c-ai/babysitter.git
|
|
29
|
+
cd babysitter
|
|
30
|
+
codex
|
|
36
31
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
```bash
|
|
40
|
-
npx @a5c-ai/babysitter-codex install --workspace /path/to/repo
|
|
32
|
+
> /plugins
|
|
41
33
|
```
|
|
42
34
|
|
|
43
|
-
|
|
44
|
-
it in `<workspace>/.agents/plugins/marketplace.json`, merges
|
|
45
|
-
`<workspace>/.codex/config.toml`, installs the active workspace Codex
|
|
46
|
-
`skills/`, `hooks/`, and `hooks.json` surface under `<workspace>/.codex/`, and
|
|
47
|
-
records install metadata under `<workspace>/.a5c/team/`.
|
|
35
|
+
then navigate to the 'babysitter' entry and select 'Install'.
|
|
48
36
|
|
|
49
37
|
## Integration Model
|
|
50
38
|
|
|
@@ -11,16 +11,23 @@ export CODEX_PLUGIN_ROOT="${CODEX_PLUGIN_ROOT:-${PLUGIN_ROOT}}"
|
|
|
11
11
|
export BABYSITTER_STATE_DIR="${STATE_DIR}"
|
|
12
12
|
|
|
13
13
|
mkdir -p "$LOG_DIR" 2>/dev/null
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
14
|
+
|
|
15
|
+
blog() {
|
|
16
|
+
local msg="$1"
|
|
17
|
+
local ts
|
|
18
|
+
ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
19
|
+
echo "[INFO] $ts $msg" >> "$LOG_FILE" 2>/dev/null
|
|
20
|
+
babysitter log --type hook --label "hook:session-start" --message "$msg" --source shell-hook 2>/dev/null || true
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
blog "Hook script invoked"
|
|
24
|
+
blog "PLUGIN_ROOT=$PLUGIN_ROOT"
|
|
25
|
+
blog "STATE_DIR=$STATE_DIR"
|
|
19
26
|
|
|
20
27
|
INPUT_FILE=$(mktemp 2>/dev/null || echo "/tmp/codex-session-start-hook-$$.json")
|
|
21
28
|
cat > "$INPUT_FILE"
|
|
22
29
|
|
|
23
|
-
|
|
30
|
+
blog "Hook input received ($(wc -c < "$INPUT_FILE") bytes)"
|
|
24
31
|
|
|
25
32
|
RESULT=$(babysitter hook:run \
|
|
26
33
|
--hook-type session-start \
|
|
@@ -30,7 +37,7 @@ RESULT=$(babysitter hook:run \
|
|
|
30
37
|
< "$INPUT_FILE" 2>"$LOG_DIR/babysitter-session-start-hook-stderr.log")
|
|
31
38
|
EXIT_CODE=$?
|
|
32
39
|
|
|
33
|
-
|
|
40
|
+
blog "CLI exit code=$EXIT_CODE"
|
|
34
41
|
|
|
35
42
|
rm -f "$INPUT_FILE" 2>/dev/null
|
|
36
43
|
printf '%s\n' "$RESULT"
|
|
@@ -11,16 +11,23 @@ export CODEX_PLUGIN_ROOT="${CODEX_PLUGIN_ROOT:-${PLUGIN_ROOT}}"
|
|
|
11
11
|
export BABYSITTER_STATE_DIR="${STATE_DIR}"
|
|
12
12
|
|
|
13
13
|
mkdir -p "$LOG_DIR" 2>/dev/null
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
14
|
+
|
|
15
|
+
blog() {
|
|
16
|
+
local msg="$1"
|
|
17
|
+
local ts
|
|
18
|
+
ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
19
|
+
echo "[INFO] $ts $msg" >> "$LOG_FILE" 2>/dev/null
|
|
20
|
+
babysitter log --type hook --label "hook:stop" --message "$msg" --source shell-hook 2>/dev/null || true
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
blog "Hook script invoked"
|
|
24
|
+
blog "PLUGIN_ROOT=$PLUGIN_ROOT"
|
|
25
|
+
blog "STATE_DIR=$STATE_DIR"
|
|
19
26
|
|
|
20
27
|
INPUT_FILE=$(mktemp 2>/dev/null || echo "/tmp/codex-stop-hook-$$.json")
|
|
21
28
|
cat > "$INPUT_FILE"
|
|
22
29
|
|
|
23
|
-
|
|
30
|
+
blog "Hook input received ($(wc -c < "$INPUT_FILE") bytes)"
|
|
24
31
|
|
|
25
32
|
RESULT=$(babysitter hook:run \
|
|
26
33
|
--hook-type stop \
|
|
@@ -30,7 +37,7 @@ RESULT=$(babysitter hook:run \
|
|
|
30
37
|
< "$INPUT_FILE" 2>"$LOG_DIR/babysitter-stop-hook-stderr.log")
|
|
31
38
|
EXIT_CODE=$?
|
|
32
39
|
|
|
33
|
-
|
|
40
|
+
blog "CLI exit code=$EXIT_CODE"
|
|
34
41
|
|
|
35
42
|
rm -f "$INPUT_FILE" 2>/dev/null
|
|
36
43
|
printf '%s\n' "$RESULT"
|
|
@@ -5,22 +5,16 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
5
5
|
PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
|
6
6
|
STATE_DIR="${BABYSITTER_STATE_DIR:-${PWD}/.a5c}"
|
|
7
7
|
LOG_DIR="${BABYSITTER_LOG_DIR:-$PLUGIN_ROOT/.a5c/logs}"
|
|
8
|
-
LOG_FILE="$LOG_DIR/babysitter-user-prompt-submit-hook.log"
|
|
9
8
|
|
|
10
9
|
export CODEX_PLUGIN_ROOT="${CODEX_PLUGIN_ROOT:-${PLUGIN_ROOT}}"
|
|
11
10
|
export BABYSITTER_STATE_DIR="${STATE_DIR}"
|
|
12
11
|
|
|
13
12
|
mkdir -p "$LOG_DIR" 2>/dev/null
|
|
14
|
-
{
|
|
15
|
-
echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) Hook script invoked"
|
|
16
|
-
echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) PLUGIN_ROOT=$PLUGIN_ROOT"
|
|
17
|
-
echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) STATE_DIR=$STATE_DIR"
|
|
18
|
-
} >> "$LOG_FILE" 2>/dev/null
|
|
19
13
|
|
|
20
14
|
INPUT_FILE=$(mktemp 2>/dev/null || echo "/tmp/codex-user-prompt-submit-hook-$$.json")
|
|
21
15
|
cat > "$INPUT_FILE"
|
|
22
16
|
|
|
23
|
-
|
|
17
|
+
babysitter log --type hook --label "hook:user-prompt-submit" --message "Hook invoked" --source shell-hook 2>/dev/null || true
|
|
24
18
|
|
|
25
19
|
RESULT=$(babysitter hook:run \
|
|
26
20
|
--hook-type user-prompt-submit \
|
|
@@ -30,7 +24,7 @@ RESULT=$(babysitter hook:run \
|
|
|
30
24
|
< "$INPUT_FILE" 2>"$LOG_DIR/babysitter-user-prompt-submit-hook-stderr.log")
|
|
31
25
|
EXIT_CODE=$?
|
|
32
26
|
|
|
33
|
-
|
|
27
|
+
babysitter log --type hook --label "hook:user-prompt-submit" --message "CLI exit code=$EXIT_CODE" --source shell-hook 2>/dev/null || true
|
|
34
28
|
|
|
35
29
|
rm -f "$INPUT_FILE" 2>/dev/null
|
|
36
30
|
if [ -n "$RESULT" ]; then
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@a5c-ai/babysitter-codex",
|
|
3
|
-
"version": "0.1.6-staging.
|
|
3
|
+
"version": "0.1.6-staging.cbfb13a1",
|
|
4
4
|
"description": "Babysitter Codex skill bundle and integration package for OpenAI Codex CLI with SDK-managed process-library bootstrapping, 15 orchestration modes, and BOM-safe SKILL installation",
|
|
5
5
|
"scripts": {
|
|
6
6
|
"test": "node test/integration.test.js && node test/packaged-install.test.js",
|
|
@@ -43,6 +43,6 @@
|
|
|
43
43
|
},
|
|
44
44
|
"homepage": "https://github.com/a5c-ai/babysitter/tree/main/plugins/babysitter-codex#readme",
|
|
45
45
|
"dependencies": {
|
|
46
|
-
"@a5c-ai/babysitter-sdk": "0.0.183-staging.
|
|
46
|
+
"@a5c-ai/babysitter-sdk": "0.0.183-staging.cbfb13a1"
|
|
47
47
|
}
|
|
48
48
|
}
|
package/skills/babysit/SKILL.md
CHANGED
|
@@ -1,407 +1,47 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: babysit
|
|
3
3
|
description: >-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
diagnose run health, install Codex integration, or assimilate a methodology.
|
|
4
|
+
Orchestrate via @babysitter. Use this skill when asked to babysit a run,
|
|
5
|
+
orchestrate a process or whenever it is called explicitly. (babysit,
|
|
6
|
+
babysitter, orchestrate, orchestrate a run, workflow, etc.)
|
|
8
7
|
---
|
|
9
8
|
|
|
10
9
|
# babysit
|
|
11
10
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
- the installed plugin under `~/.codex/plugins/babysitter-codex` or `<workspace>/plugins/babysitter-codex`
|
|
15
|
-
- the plugin skill tree under `skills/babysit` and `skills/<mode>`
|
|
16
|
-
- the plugin hook registry at `hooks.json`
|
|
17
|
-
- the plugin hook scripts under `hooks/`
|
|
18
|
-
- global `~/.codex/config.toml`
|
|
19
|
-
- optional workspace `.codex/config.toml`
|
|
20
|
-
- workspace `.a5c/`
|
|
21
|
-
- shared global `.a5c/` process-library state
|
|
22
|
-
- the Babysitter SDK CLI for `run:create`, `run:iterate`, `run:status`,
|
|
23
|
-
`task:list`, `task:post`, and process-library binding
|
|
24
|
-
|
|
25
|
-
Native Windows caveat: Codex does not execute hooks on Windows yet. When the
|
|
26
|
-
current Codex session is running on native Windows, do not yield the turn and
|
|
27
|
-
wait for the `Stop` hook. Keep driving the Babysitter loop in the current turn
|
|
28
|
-
until the run completes or you hit a real user breakpoint that requires chat
|
|
29
|
-
input. Only rely on the `SessionStart`/`Stop` hook handoff on platforms where
|
|
30
|
-
Codex hook execution is actually available.
|
|
31
|
-
|
|
32
|
-
## Choosing a Mode
|
|
33
|
-
|
|
34
|
-
Use this skill whenever it is invoked directly, and whenever one of the
|
|
35
|
-
installed mode-wrapper skills such as `$call`, `$plan`, `$resume`, or `$yolo`
|
|
36
|
-
loads it.
|
|
37
|
-
|
|
38
|
-
Choose the mode from either:
|
|
39
|
-
|
|
40
|
-
1. the direct user intent when the skill is invoked as `$babysit`
|
|
41
|
-
2. the installed wrapper skill name when the user invoked `$call`, `$plan`,
|
|
42
|
-
`$resume`, `$yolo`, and the rest
|
|
43
|
-
|
|
44
|
-
| User intent | Mode |
|
|
45
|
-
|-------------|------|
|
|
46
|
-
| Start an orchestration run | `call` |
|
|
47
|
-
| Work an issue-centric flow | `issue` |
|
|
48
|
-
| Run autonomously | `yolo` |
|
|
49
|
-
| Run continuously / recurring workflow | `forever` |
|
|
50
|
-
| Resume an existing run | `resume` |
|
|
51
|
-
| Plan without executing | `plan` |
|
|
52
|
-
| Observe or inspect a run | `observe` |
|
|
53
|
-
| Summarize a completed run | `retrospect` |
|
|
54
|
-
| Diagnose run health | `doctor` |
|
|
55
|
-
| Change or inspect model routing | `model` |
|
|
56
|
-
| Help and documentation | `help` |
|
|
57
|
-
| Install into a project | `project-install` |
|
|
58
|
-
| Install user profile/setup | `user-install` |
|
|
59
|
-
| Install team-pinned setup | `team-install` |
|
|
60
|
-
| Assimilate external methodology | `assimilate` |
|
|
61
|
-
|
|
62
|
-
Deprecated prompt aliases are not the Codex command surface anymore. Do not
|
|
63
|
-
depend on `.codex/prompts` for normal operation.
|
|
11
|
+
Orchestrate `.a5c/runs/<runId>/` through iterative execution.
|
|
64
12
|
|
|
65
13
|
## Dependencies
|
|
66
14
|
|
|
67
15
|
### Babysitter SDK and CLI
|
|
68
16
|
|
|
69
|
-
|
|
17
|
+
Read the SDK version from `versions.json` to ensure version compatibility:
|
|
70
18
|
|
|
71
19
|
```bash
|
|
72
|
-
|
|
20
|
+
SDK_VERSION=$(node -e "try{console.log(JSON.parse(require('fs').readFileSync('${CODEX_PLUGIN_ROOT}/versions.json','utf8')).sdkVersion||'latest')}catch{console.log('latest')}")
|
|
21
|
+
sudo npm i -g @a5c-ai/babysitter-sdk@$SDK_VERSION
|
|
22
|
+
# sudo is depending on the env and system
|
|
73
23
|
```
|
|
74
24
|
|
|
75
|
-
|
|
25
|
+
then use the CLI alias: CLI="babysitter"
|
|
76
26
|
|
|
77
|
-
|
|
78
|
-
CLI="npx -y @a5c-ai/babysitter-sdk"
|
|
79
|
-
```
|
|
27
|
+
**Alternatively, use the CLI alias:** `CLI="npx -y @a5c-ai/babysitter-sdk@$SDK_VERSION"`
|
|
80
28
|
|
|
81
29
|
### jq
|
|
82
30
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
## Core Iteration Workflow
|
|
86
|
-
|
|
87
|
-
The Babysitter workflow has 8 steps:
|
|
88
|
-
|
|
89
|
-
1. **Create or find the process** - interview the user or parse the prompt,
|
|
90
|
-
research the repo and process library, and build a process definition
|
|
91
|
-
2. **Create run and bind session** - create the run via the Babysitter CLI and
|
|
92
|
-
bind it to the current Codex session honestly
|
|
93
|
-
3. **Run iteration** - execute one orchestration step
|
|
94
|
-
4. **Get effects** - inspect pending effects
|
|
95
|
-
5. **Perform effects** - execute the requested tasks through skills, agents, or
|
|
96
|
-
shell work
|
|
97
|
-
6. **Post results** - commit results back through `task:post`
|
|
98
|
-
7. **Stop and yield** - on platforms with working Codex hooks, the Codex stop
|
|
99
|
-
hook decides whether to continue; BUT if running on Windows, stay in-turn and
|
|
100
|
-
continue the loop yourself instead of waiting for a hook that will not run
|
|
101
|
-
8. **Completion proof** - finish only when the emitted proof is returned
|
|
102
|
-
|
|
103
|
-
### 1. Create or find the process for the run
|
|
104
|
-
|
|
105
|
-
#### Interview phase
|
|
106
|
-
|
|
107
|
-
##### Interactive mode (default)
|
|
108
|
-
|
|
109
|
-
Interview the user for intent, requirements, goals, scope, and constraints
|
|
110
|
-
before entering the hook-driven loop.
|
|
111
|
-
|
|
112
|
-
This phase should be iterative and adaptive:
|
|
113
|
-
|
|
114
|
-
- inspect the current repo state first
|
|
115
|
-
- resolve the active process-library root with
|
|
116
|
-
`babysitter process-library:active --json`
|
|
117
|
-
- conduct an actual search against that active process library before writing a
|
|
118
|
-
process
|
|
119
|
-
- research the repo, online references, methodologies, specializations, skills,
|
|
120
|
-
agents, and related processes as needed
|
|
121
|
-
- ask the user follow-up questions when the intent or constraints are still not
|
|
122
|
-
clear
|
|
123
|
-
|
|
124
|
-
Do not plan more than one step ahead during the interview phase. After each
|
|
125
|
-
step, decide the next best step from the current evidence.
|
|
126
|
-
|
|
127
|
-
The `process-library:active` command bootstraps the shared global SDK process
|
|
128
|
-
library automatically if no binding exists yet. Read:
|
|
129
|
-
|
|
130
|
-
- `binding.dir` as the active process-library root that must be searched
|
|
131
|
-
- `defaultSpec.cloneDir` as the cloned repo root when adjacent repo-level
|
|
132
|
-
material is needed
|
|
133
|
-
|
|
134
|
-
After that, treat `specializations/**/**/**`, `methodologies/`, `contrib/`, and
|
|
135
|
-
`reference/` as paths relative to `binding.dir`.
|
|
136
|
-
|
|
137
|
-
##### Non-interactive mode
|
|
138
|
-
|
|
139
|
-
When running non-interactively:
|
|
140
|
-
|
|
141
|
-
1. parse the initial prompt to extract intent, scope, and constraints
|
|
142
|
-
2. inspect the repo structure
|
|
143
|
-
3. resolve the active process-library root with
|
|
144
|
-
`babysitter process-library:active --json`
|
|
145
|
-
4. search that active library for the most relevant specialization,
|
|
146
|
-
methodology, process, skill, or agent
|
|
147
|
-
5. proceed directly to process creation
|
|
148
|
-
|
|
149
|
-
Do not skip the active-library search step.
|
|
150
|
-
|
|
151
|
-
#### User Profile Integration
|
|
152
|
-
|
|
153
|
-
Before building the process, check for an existing user profile:
|
|
154
|
-
|
|
155
|
-
1. run `babysitter profile:read --user --json`
|
|
156
|
-
2. use the profile to pre-fill user preferences, expertise, and communication
|
|
157
|
-
style
|
|
158
|
-
3. calibrate breakpoint density from `breakpointTolerance`
|
|
159
|
-
4. prefer tools, skills, and agents the user already uses
|
|
160
|
-
5. adapt explanations and breakpoint text to the user's communication style
|
|
161
|
-
6. if no profile exists, proceed normally and consider suggesting `$user-install`
|
|
162
|
-
|
|
163
|
-
All profile read/write/merge/render operations must go through the Babysitter
|
|
164
|
-
CLI, never direct SDK imports.
|
|
165
|
-
|
|
166
|
-
#### Process creation phase
|
|
167
|
-
|
|
168
|
-
After the interview phase, create the full custom process files for the run
|
|
169
|
-
according to the process-library patterns and the process-creation guidelines
|
|
170
|
-
below.
|
|
171
|
-
|
|
172
|
-
Install `@a5c-ai/babysitter-sdk` into `.a5c/` if it is missing. When doing so,
|
|
173
|
-
run the install from the project root and use either `npm i --prefix .a5c ...`
|
|
174
|
-
or a subshell so the working directory does not stay inside `.a5c/`.
|
|
175
|
-
|
|
176
|
-
Always use an **absolute path** for `--entry` when calling `run:create`.
|
|
177
|
-
|
|
178
|
-
After the process is created and before creating the run:
|
|
179
|
-
|
|
180
|
-
- in interactive mode, describe the process at a high level, generate
|
|
181
|
-
`[process-name].diagram.md` and `[process-name].process.md`, and get user
|
|
182
|
-
confirmation before proceeding
|
|
183
|
-
- in non-interactive mode, proceed directly to `run:create`
|
|
184
|
-
|
|
185
|
-
Common mistakes to avoid:
|
|
186
|
-
|
|
187
|
-
- wrong: skipping repo/process-library research before writing the process
|
|
188
|
-
- wrong: bypassing the orchestration model with helper scripts or inline logic
|
|
189
|
-
- wrong: using `kind: 'node'` in generated tasks
|
|
190
|
-
- correct: use `agent` or `skill` tasks for reasoning work, with `shell` only
|
|
191
|
-
for existing CLIs, tests, linters, git, or builds
|
|
192
|
-
- correct: include verification loops, refinement loops, quality gates, and
|
|
193
|
-
breakpoints where appropriate
|
|
194
|
-
|
|
195
|
-
### 2. Create run and bind session
|
|
196
|
-
|
|
197
|
-
For new runs:
|
|
198
|
-
|
|
199
|
-
```bash
|
|
200
|
-
$CLI run:create \
|
|
201
|
-
--process-id <id> \
|
|
202
|
-
--entry <absolute-path>#<export> \
|
|
203
|
-
--inputs <file> \
|
|
204
|
-
--prompt "$PROMPT" \
|
|
205
|
-
--harness codex \
|
|
206
|
-
--state-dir .a5c \
|
|
207
|
-
--plugin-root "${CODEX_PLUGIN_ROOT}" \
|
|
208
|
-
--json
|
|
209
|
-
```
|
|
210
|
-
|
|
211
|
-
Required flags:
|
|
212
|
-
|
|
213
|
-
- `--process-id <id>` - unique identifier for the process definition
|
|
214
|
-
- `--entry <absolute-path>#<export>` - process JS file plus named export
|
|
215
|
-
- `--prompt "$PROMPT"` - the user's initial request
|
|
216
|
-
- `--harness codex` - activates Codex session binding
|
|
217
|
-
- `--state-dir .a5c` - required for honest workspace-local Codex session state
|
|
218
|
-
- `--plugin-root "${CODEX_PLUGIN_ROOT}"` - plugin root used for session/state
|
|
219
|
-
resolution
|
|
220
|
-
|
|
221
|
-
Optional flags:
|
|
222
|
-
|
|
223
|
-
- `--inputs <file>` - process input JSON
|
|
224
|
-
- `--run-id <id>` - override the generated run id
|
|
225
|
-
- `--runs-dir <dir>` - override the default runs directory
|
|
31
|
+
make sure you have jq installed and available in the path. if not, install it.
|
|
226
32
|
|
|
227
|
-
|
|
228
|
-
explicitly. The Codex adapter auto-resolves the session/thread id from
|
|
229
|
-
`CODEX_THREAD_ID`, `CODEX_SESSION_ID`, or `CODEX_ENV_FILE`. Only pass
|
|
230
|
-
`--session-id` in out-of-band recovery flows where no ambient Codex session
|
|
231
|
-
identity exists.
|
|
33
|
+
## Instructions
|
|
232
34
|
|
|
233
|
-
|
|
234
|
-
workspace `.a5c`, not the global `~/.a5c`, so the Stop hook can find the same
|
|
235
|
-
session state file in later turns.
|
|
236
|
-
|
|
237
|
-
On Windows, still bind the session honestly with `run:create`, but do
|
|
238
|
-
not assume that later turns will be resumed by Codex hooks. After `run:create`
|
|
239
|
-
you must keep executing the orchestration loop yourself in the current turn
|
|
240
|
-
until completion or a real user-facing breakpoint. (calling `run:iterate` in the same turn is fine on Windows because the hooks won't run there, but do not do that in the normal Codex plugin path where the hooks are expected to drive the loop).
|
|
241
|
-
|
|
242
|
-
For resuming existing runs in a manual recovery flow:
|
|
243
|
-
|
|
244
|
-
```bash
|
|
245
|
-
$CLI session:resume \
|
|
246
|
-
--session-id <id> \
|
|
247
|
-
--state-dir .a5c \
|
|
248
|
-
--run-id <runId> \
|
|
249
|
-
--runs-dir .a5c/runs \
|
|
250
|
-
--json
|
|
251
|
-
```
|
|
252
|
-
|
|
253
|
-
### 3. Run iteration
|
|
35
|
+
Run the following command to get full orchestration instructions:
|
|
254
36
|
|
|
255
37
|
```bash
|
|
256
|
-
|
|
38
|
+
babysitter instructions:babysit-skill --harness codex --interactive
|
|
257
39
|
```
|
|
258
40
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
- `"executed"` - tasks executed, continue looping
|
|
262
|
-
- `"waiting"` - breakpoint or sleep is pending
|
|
263
|
-
- `"completed"` - run finished successfully
|
|
264
|
-
- `"failed"` - run failed
|
|
265
|
-
- `"none"` - no runnable effects exist
|
|
266
|
-
|
|
267
|
-
### 4. Get effects
|
|
41
|
+
For non-interactive runs (e.g., with `-p` flag or no question tool):
|
|
268
42
|
|
|
269
43
|
```bash
|
|
270
|
-
|
|
44
|
+
babysitter instructions:babysit-skill --harness codex --no-interactive
|
|
271
45
|
```
|
|
272
46
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
Run the effect externally to the SDK, then post the outcome summary with
|
|
276
|
-
`task:post`.
|
|
277
|
-
|
|
278
|
-
Important:
|
|
279
|
-
|
|
280
|
-
- delegate using Codex skills or agent tooling when possible
|
|
281
|
-
- make sure the requested change actually happened
|
|
282
|
-
- do not describe or imply success without verifying the requested effect
|
|
283
|
-
- do not use the `babysit` skill itself inside delegated task execution
|
|
284
|
-
|
|
285
|
-
#### 5.1 Breakpoint handling
|
|
286
|
-
|
|
287
|
-
##### Interactive mode
|
|
288
|
-
|
|
289
|
-
Ask the user explicitly for approval. If the Codex environment provides a
|
|
290
|
-
structured question UI, include explicit approve/reject options. If not, ask in
|
|
291
|
-
chat and require an explicit approval response.
|
|
292
|
-
|
|
293
|
-
Never infer approval from silence, ambiguity, or dismissal.
|
|
294
|
-
|
|
295
|
-
Breakpoint rejections must still be posted with `--status ok` and a value such
|
|
296
|
-
as `{"approved": false, "response": "..."}`.
|
|
297
|
-
|
|
298
|
-
##### Non-interactive mode
|
|
299
|
-
|
|
300
|
-
Choose the best option from context and post the result. Rejections still use
|
|
301
|
-
`--status ok` with `{"approved": false}`.
|
|
302
|
-
|
|
303
|
-
### 6. Results posting
|
|
304
|
-
|
|
305
|
-
Never write `result.json` directly.
|
|
306
|
-
|
|
307
|
-
Workflow:
|
|
308
|
-
|
|
309
|
-
1. write the result value to `tasks/<effectId>/output.json`
|
|
310
|
-
2. call `task:post` with `--value tasks/<effectId>/output.json`
|
|
311
|
-
3. let the SDK write `result.json`, append the journal event, and update state
|
|
312
|
-
|
|
313
|
-
### 7. Stop after every phase after run-session association
|
|
314
|
-
|
|
315
|
-
After `run:create` or any posted effect result, end the current assistant turn
|
|
316
|
-
and yield back to the Codex hook loop. Do not run multiple `run:iterate` steps
|
|
317
|
-
in the same turn.
|
|
318
|
-
|
|
319
|
-
Native Windows exception: because Codex does not execute hooks on Windows yet,
|
|
320
|
-
do not end the turn and wait for the hook loop there. Continue with the next
|
|
321
|
-
`run:iterate`, inspect tasks, perform effects, and post results in the same
|
|
322
|
-
turn until the run completes or you need explicit user input for a breakpoint.
|
|
323
|
-
Do not tell the user you are waiting for the stop hook on Windows.
|
|
324
|
-
|
|
325
|
-
### 8. Completion proof
|
|
326
|
-
|
|
327
|
-
When `run:iterate` or `run:status` returns `completionProof`, return that exact
|
|
328
|
-
value wrapped in `<promise>...</promise>`.
|
|
329
|
-
|
|
330
|
-
## Hook Loop
|
|
331
|
-
|
|
332
|
-
Global install must register the plugin in `~/.agents/plugins/marketplace.json`
|
|
333
|
-
with the plugin bundle at `~/.codex/plugins/babysitter-codex`, and must merge
|
|
334
|
-
`~/.codex/config.toml`.
|
|
335
|
-
|
|
336
|
-
Workspace onboarding may also register the plugin in
|
|
337
|
-
`<workspace>/.agents/plugins/marketplace.json` with the plugin bundle at
|
|
338
|
-
`<workspace>/plugins/babysitter-codex`, and may merge `.codex/config.toml` for
|
|
339
|
-
repo-local pinning.
|
|
340
|
-
|
|
341
|
-
Both levels must provide:
|
|
342
|
-
|
|
343
|
-
1. `SessionStart` seeds `.a5c` session state
|
|
344
|
-
2. `UserPromptSubmit` performs prompt-time transformations when needed
|
|
345
|
-
3. `Stop` decides whether the run is complete or Codex should receive the next
|
|
346
|
-
Babysitter iteration context
|
|
347
|
-
|
|
348
|
-
On native Windows, treat these hook registrations as installation/configuration
|
|
349
|
-
state only. Codex currently does not execute them there, so the skill must keep
|
|
350
|
-
the orchestration loop moving in-turn instead of waiting for hook callbacks.
|
|
351
|
-
|
|
352
|
-
## Task Kinds
|
|
353
|
-
|
|
354
|
-
Never generate `kind: 'node'` effects.
|
|
355
|
-
|
|
356
|
-
| Kind | When to use |
|
|
357
|
-
|------|-------------|
|
|
358
|
-
| `agent` | default for planning, implementation, analysis, debugging, scoring, research |
|
|
359
|
-
| `skill` | when a matching installed skill exists |
|
|
360
|
-
| `shell` | existing CLI tools, tests, git, linters, builds |
|
|
361
|
-
| `breakpoint` | human approval gates |
|
|
362
|
-
| `sleep` | time gates |
|
|
363
|
-
|
|
364
|
-
## Process Creation Guidelines
|
|
365
|
-
|
|
366
|
-
- always research the repo and the active process library before writing the
|
|
367
|
-
process
|
|
368
|
-
- prefer composing multiple relevant library processes rather than copying just
|
|
369
|
-
one template blindly
|
|
370
|
-
- include verification and refinement loops
|
|
371
|
-
- prefer processes that close the widest practical quality loop
|
|
372
|
-
- add `@skill` and `@agent` discovery markers to generated process files for
|
|
373
|
-
the dependencies you actually selected
|
|
374
|
-
- prefer incremental work that can be tested as you go
|
|
375
|
-
|
|
376
|
-
Search for relevant processes, skills, agents, methodologies, and references
|
|
377
|
-
in:
|
|
378
|
-
|
|
379
|
-
1. `.a5c/processes/`
|
|
380
|
-
2. the active process-library root from `binding.dir`
|
|
381
|
-
3. the cloned repo root from `defaultSpec.cloneDir` when adjacent material is
|
|
382
|
-
needed
|
|
383
|
-
|
|
384
|
-
## Codex-Specific Rules
|
|
385
|
-
|
|
386
|
-
- `$babysit` is the core skill
|
|
387
|
-
- `$call`, `$plan`, `$resume`, `$yolo`, and the other mode skills are thin
|
|
388
|
-
wrappers that must only load `babysit` for the matching mode
|
|
389
|
-
- do not fabricate a session id
|
|
390
|
-
- on Windows env, never claim that you are yielding to or waiting for the
|
|
391
|
-
Codex stop hook; continue the Babysitter loop in the current turn instead
|
|
392
|
-
|
|
393
|
-
## Critical Rules
|
|
394
|
-
|
|
395
|
-
CRITICAL RULE: The completion proof is emitted only when the run is truly
|
|
396
|
-
completed. Output `<promise>SECRET</promise>` only when the orchestration status
|
|
397
|
-
is completed.
|
|
398
|
-
|
|
399
|
-
CRITICAL RULE: Never bypass the Babysitter orchestration model when this skill
|
|
400
|
-
is active. Do not replace it with ad-hoc direct execution.
|
|
401
|
-
|
|
402
|
-
CRITICAL RULE: Never build helper scripts or wrapper programs to drive the run.
|
|
403
|
-
Use the CLI and the hook loop directly.
|
|
404
|
-
|
|
405
|
-
CRITICAL RULE: In interactive mode, never auto-approve breakpoints.
|
|
406
|
-
|
|
407
|
-
CRITICAL RULE: Do not use `kind: 'node'` in generated process files.
|
|
47
|
+
Follow the instructions returned by the command above to orchestrate the run.
|
package/skills/call/SKILL.md
CHANGED
|
@@ -12,5 +12,6 @@ Resolve the request in `call` mode:
|
|
|
12
12
|
- treat everything after `$call` as the initial Babysitter request for a new
|
|
13
13
|
orchestration run
|
|
14
14
|
- create the process, create the run, and enter the Babysitter loop
|
|
15
|
+
- using this always means the user meant an interactive run.
|
|
15
16
|
- do not create a separate command surface here; this skill only forwards into
|
|
16
17
|
`babysit`
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: retrospect
|
|
3
|
-
description: Summarize or retrospect on
|
|
3
|
+
description: Summarize or retrospect on one or more completed Babysitter runs.
|
|
4
|
+
argument-hint: "[run-id...] [--all] Optional run IDs or --all for all runs"
|
|
4
5
|
---
|
|
5
6
|
|
|
6
7
|
# retrospect
|
|
@@ -9,7 +10,46 @@ Load and use the installed `babysit` skill.
|
|
|
9
10
|
|
|
10
11
|
Resolve the request in `retrospect` mode:
|
|
11
12
|
|
|
12
|
-
- treat everything after `$retrospect` as the run selector to summarize
|
|
13
|
+
- treat everything after `$retrospect` as the run selector(s) to summarize
|
|
13
14
|
- focus on the run history, outcomes, lessons, and gaps
|
|
14
15
|
- do not create a separate command surface here; this skill only forwards into
|
|
15
16
|
`babysit`
|
|
17
|
+
|
|
18
|
+
## Phase 1: Resolve Target Run(s)
|
|
19
|
+
|
|
20
|
+
- If `--all` or "all" is present in args: list all runs via `ls -lt .a5c/runs/` and collect all completed/failed run IDs
|
|
21
|
+
- If multiple run IDs are provided: use all of them
|
|
22
|
+
- Otherwise: existing behavior (resolve the latest single run)
|
|
23
|
+
- Use `ask_user` to confirm run selection in interactive mode
|
|
24
|
+
|
|
25
|
+
## Phase 2: Load Run Data
|
|
26
|
+
|
|
27
|
+
For each selected run, load:
|
|
28
|
+
- `run.json` metadata
|
|
29
|
+
- Journal events
|
|
30
|
+
- Task definitions and results
|
|
31
|
+
- State snapshots
|
|
32
|
+
|
|
33
|
+
## Phase 3: Analysis
|
|
34
|
+
|
|
35
|
+
Perform standard per-run analysis (outcomes, process effectiveness, suggestions).
|
|
36
|
+
|
|
37
|
+
### Cross-Run Pattern Analysis (multi-run mode)
|
|
38
|
+
|
|
39
|
+
When analyzing multiple runs, additionally cover:
|
|
40
|
+
- **Common failure modes** across runs
|
|
41
|
+
- **Velocity trends** (tasks/time across runs)
|
|
42
|
+
- **Process evolution** (how processes changed over time)
|
|
43
|
+
- **Repeated breakpoint patterns**
|
|
44
|
+
|
|
45
|
+
## Phase 4: Suggestions
|
|
46
|
+
|
|
47
|
+
Provide actionable suggestions for process improvements, optimizations, and fixes.
|
|
48
|
+
|
|
49
|
+
## Phase 5: Implementation
|
|
50
|
+
|
|
51
|
+
If the user agrees, implement improvements to processes, skills, or configuration.
|
|
52
|
+
|
|
53
|
+
## Phase 6: Cleanup Suggestion
|
|
54
|
+
|
|
55
|
+
After analysis, suggest: "Consider running `babysitter cleanup` (or `/babysitter:cleanup`) to clean up old run data and reclaim disk space."
|
package/skills/yolo/SKILL.md
CHANGED
|
@@ -12,5 +12,8 @@ Resolve the request in `yolo` mode:
|
|
|
12
12
|
- treat everything after `$yolo` as the autonomous execution request
|
|
13
13
|
- follow the `babysit` skill contract while optimizing for minimal manual
|
|
14
14
|
interruption
|
|
15
|
+
- using this means the user wants to run autonomously with minimal manual
|
|
16
|
+
interruption, so optimize for that by skipping or minimizing any steps that
|
|
17
|
+
would require user input or decision-making during the run
|
|
15
18
|
- do not create a separate command surface here; this skill only forwards into
|
|
16
19
|
`babysit`
|