@a5c-ai/babysitter-codex 0.1.6-staging.c7c4cba5 → 0.1.6-staging.cbfb13a1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,12 @@
18
18
  "skills"
19
19
  ],
20
20
  "skills": "./skills/",
21
+ "skillEntries": [
22
+ {
23
+ "name": "babysitter:retrospect",
24
+ "argumentHint": "[run-id...] [--all] Run IDs or --all for all runs"
25
+ }
26
+ ],
21
27
  "hooks": "./hooks.json",
22
28
  "apps": "./.app.json",
23
29
  "interface": {
package/README.md CHANGED
@@ -22,29 +22,17 @@ Install the SDK CLI first:
22
22
  npm install -g @a5c-ai/babysitter-sdk
23
23
  ```
24
24
 
25
- Install the Codex plugin globally:
25
+ clone the repo and install the plugin globally:
26
26
 
27
27
  ```bash
28
- npx @a5c-ai/babysitter-codex install
29
- ```
30
-
31
- This copies the plugin into `~/.codex/plugins/babysitter-codex`, registers it
32
- in `~/.agents/plugins/marketplace.json`, merges the required global Codex
33
- config into `~/.codex/config.toml`, installs the active global Codex
34
- `skills/`, `hooks/`, and `hooks.json` surface under `~/.codex/`, and ensures
35
- the Babysitter process library is active in `~/.a5c`.
28
+ git clone https://github.com/a5c-ai/babysitter.git
29
+ cd babysitter
30
+ codex
36
31
 
37
- Install the plugin into a specific workspace:
38
-
39
- ```bash
40
- npx @a5c-ai/babysitter-codex install --workspace /path/to/repo
32
+ > /plugins
41
33
  ```
42
34
 
43
- This copies the plugin into `<workspace>/plugins/babysitter-codex`, registers
44
- it in `<workspace>/.agents/plugins/marketplace.json`, merges
45
- `<workspace>/.codex/config.toml`, installs the active workspace Codex
46
- `skills/`, `hooks/`, and `hooks.json` surface under `<workspace>/.codex/`, and
47
- records install metadata under `<workspace>/.a5c/team/`.
35
+ then navigate to the 'babysitter' entry and select 'Install'.
48
36
 
49
37
  ## Integration Model
50
38
 
@@ -11,16 +11,23 @@ export CODEX_PLUGIN_ROOT="${CODEX_PLUGIN_ROOT:-${PLUGIN_ROOT}}"
11
11
  export BABYSITTER_STATE_DIR="${STATE_DIR}"
12
12
 
13
13
  mkdir -p "$LOG_DIR" 2>/dev/null
14
- {
15
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) Hook script invoked"
16
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) PLUGIN_ROOT=$PLUGIN_ROOT"
17
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) STATE_DIR=$STATE_DIR"
18
- } >> "$LOG_FILE" 2>/dev/null
14
+
15
+ blog() {
16
+ local msg="$1"
17
+ local ts
18
+ ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
19
+ echo "[INFO] $ts $msg" >> "$LOG_FILE" 2>/dev/null
20
+ babysitter log --type hook --label "hook:session-start" --message "$msg" --source shell-hook 2>/dev/null || true
21
+ }
22
+
23
+ blog "Hook script invoked"
24
+ blog "PLUGIN_ROOT=$PLUGIN_ROOT"
25
+ blog "STATE_DIR=$STATE_DIR"
19
26
 
20
27
  INPUT_FILE=$(mktemp 2>/dev/null || echo "/tmp/codex-session-start-hook-$$.json")
21
28
  cat > "$INPUT_FILE"
22
29
 
23
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) Hook input received ($(wc -c < "$INPUT_FILE") bytes)" >> "$LOG_FILE" 2>/dev/null
30
+ blog "Hook input received ($(wc -c < "$INPUT_FILE") bytes)"
24
31
 
25
32
  RESULT=$(babysitter hook:run \
26
33
  --hook-type session-start \
@@ -30,7 +37,7 @@ RESULT=$(babysitter hook:run \
30
37
  < "$INPUT_FILE" 2>"$LOG_DIR/babysitter-session-start-hook-stderr.log")
31
38
  EXIT_CODE=$?
32
39
 
33
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) CLI exit code=$EXIT_CODE" >> "$LOG_FILE" 2>/dev/null
40
+ blog "CLI exit code=$EXIT_CODE"
34
41
 
35
42
  rm -f "$INPUT_FILE" 2>/dev/null
36
43
  printf '%s\n' "$RESULT"
@@ -11,16 +11,23 @@ export CODEX_PLUGIN_ROOT="${CODEX_PLUGIN_ROOT:-${PLUGIN_ROOT}}"
11
11
  export BABYSITTER_STATE_DIR="${STATE_DIR}"
12
12
 
13
13
  mkdir -p "$LOG_DIR" 2>/dev/null
14
- {
15
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) Hook script invoked"
16
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) PLUGIN_ROOT=$PLUGIN_ROOT"
17
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) STATE_DIR=$STATE_DIR"
18
- } >> "$LOG_FILE" 2>/dev/null
14
+
15
+ blog() {
16
+ local msg="$1"
17
+ local ts
18
+ ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
19
+ echo "[INFO] $ts $msg" >> "$LOG_FILE" 2>/dev/null
20
+ babysitter log --type hook --label "hook:stop" --message "$msg" --source shell-hook 2>/dev/null || true
21
+ }
22
+
23
+ blog "Hook script invoked"
24
+ blog "PLUGIN_ROOT=$PLUGIN_ROOT"
25
+ blog "STATE_DIR=$STATE_DIR"
19
26
 
20
27
  INPUT_FILE=$(mktemp 2>/dev/null || echo "/tmp/codex-stop-hook-$$.json")
21
28
  cat > "$INPUT_FILE"
22
29
 
23
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) Hook input received ($(wc -c < "$INPUT_FILE") bytes)" >> "$LOG_FILE" 2>/dev/null
30
+ blog "Hook input received ($(wc -c < "$INPUT_FILE") bytes)"
24
31
 
25
32
  RESULT=$(babysitter hook:run \
26
33
  --hook-type stop \
@@ -30,7 +37,7 @@ RESULT=$(babysitter hook:run \
30
37
  < "$INPUT_FILE" 2>"$LOG_DIR/babysitter-stop-hook-stderr.log")
31
38
  EXIT_CODE=$?
32
39
 
33
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) CLI exit code=$EXIT_CODE" >> "$LOG_FILE" 2>/dev/null
40
+ blog "CLI exit code=$EXIT_CODE"
34
41
 
35
42
  rm -f "$INPUT_FILE" 2>/dev/null
36
43
  printf '%s\n' "$RESULT"
@@ -5,22 +5,16 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
5
5
  PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
6
6
  STATE_DIR="${BABYSITTER_STATE_DIR:-${PWD}/.a5c}"
7
7
  LOG_DIR="${BABYSITTER_LOG_DIR:-$PLUGIN_ROOT/.a5c/logs}"
8
- LOG_FILE="$LOG_DIR/babysitter-user-prompt-submit-hook.log"
9
8
 
10
9
  export CODEX_PLUGIN_ROOT="${CODEX_PLUGIN_ROOT:-${PLUGIN_ROOT}}"
11
10
  export BABYSITTER_STATE_DIR="${STATE_DIR}"
12
11
 
13
12
  mkdir -p "$LOG_DIR" 2>/dev/null
14
- {
15
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) Hook script invoked"
16
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) PLUGIN_ROOT=$PLUGIN_ROOT"
17
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) STATE_DIR=$STATE_DIR"
18
- } >> "$LOG_FILE" 2>/dev/null
19
13
 
20
14
  INPUT_FILE=$(mktemp 2>/dev/null || echo "/tmp/codex-user-prompt-submit-hook-$$.json")
21
15
  cat > "$INPUT_FILE"
22
16
 
23
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) Hook input received ($(wc -c < "$INPUT_FILE") bytes)" >> "$LOG_FILE" 2>/dev/null
17
+ babysitter log --type hook --label "hook:user-prompt-submit" --message "Hook invoked" --source shell-hook 2>/dev/null || true
24
18
 
25
19
  RESULT=$(babysitter hook:run \
26
20
  --hook-type user-prompt-submit \
@@ -30,7 +24,7 @@ RESULT=$(babysitter hook:run \
30
24
  < "$INPUT_FILE" 2>"$LOG_DIR/babysitter-user-prompt-submit-hook-stderr.log")
31
25
  EXIT_CODE=$?
32
26
 
33
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) CLI exit code=$EXIT_CODE" >> "$LOG_FILE" 2>/dev/null
27
+ babysitter log --type hook --label "hook:user-prompt-submit" --message "CLI exit code=$EXIT_CODE" --source shell-hook 2>/dev/null || true
34
28
 
35
29
  rm -f "$INPUT_FILE" 2>/dev/null
36
30
  if [ -n "$RESULT" ]; then
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@a5c-ai/babysitter-codex",
3
- "version": "0.1.6-staging.c7c4cba5",
3
+ "version": "0.1.6-staging.cbfb13a1",
4
4
  "description": "Babysitter Codex skill bundle and integration package for OpenAI Codex CLI with SDK-managed process-library bootstrapping, 15 orchestration modes, and BOM-safe SKILL installation",
5
5
  "scripts": {
6
6
  "test": "node test/integration.test.js && node test/packaged-install.test.js",
@@ -43,6 +43,6 @@
43
43
  },
44
44
  "homepage": "https://github.com/a5c-ai/babysitter/tree/main/plugins/babysitter-codex#readme",
45
45
  "dependencies": {
46
- "@a5c-ai/babysitter-sdk": "0.0.183-staging.c7c4cba5"
46
+ "@a5c-ai/babysitter-sdk": "0.0.183-staging.cbfb13a1"
47
47
  }
48
48
  }
@@ -1,407 +1,47 @@
1
1
  ---
2
2
  name: babysit
3
3
  description: >-
4
- Run babysitter workflows from Codex using the installed babysit skill bundle,
5
- Codex mode-wrapper skills, Codex hooks/config, and the Babysitter SDK runtime
6
- loop. Use when the user wants to babysit a task, start or resume a run,
7
- diagnose run health, install Codex integration, or assimilate a methodology.
4
+ Orchestrate via @babysitter. Use this skill when asked to babysit a run,
5
+ orchestrate a process or whenever it is called explicitly. (babysit,
6
+ babysitter, orchestrate, orchestrate a run, workflow, etc.)
8
7
  ---
9
8
 
10
9
  # babysit
11
10
 
12
- Babysitter on Codex is implemented as:
13
-
14
- - the installed plugin under `~/.codex/plugins/babysitter-codex` or `<workspace>/plugins/babysitter-codex`
15
- - the plugin skill tree under `skills/babysit` and `skills/<mode>`
16
- - the plugin hook registry at `hooks.json`
17
- - the plugin hook scripts under `hooks/`
18
- - global `~/.codex/config.toml`
19
- - optional workspace `.codex/config.toml`
20
- - workspace `.a5c/`
21
- - shared global `.a5c/` process-library state
22
- - the Babysitter SDK CLI for `run:create`, `run:iterate`, `run:status`,
23
- `task:list`, `task:post`, and process-library binding
24
-
25
- Native Windows caveat: Codex does not execute hooks on Windows yet. When the
26
- current Codex session is running on native Windows, do not yield the turn and
27
- wait for the `Stop` hook. Keep driving the Babysitter loop in the current turn
28
- until the run completes or you hit a real user breakpoint that requires chat
29
- input. Only rely on the `SessionStart`/`Stop` hook handoff on platforms where
30
- Codex hook execution is actually available.
31
-
32
- ## Choosing a Mode
33
-
34
- Use this skill whenever it is invoked directly, and whenever one of the
35
- installed mode-wrapper skills such as `$call`, `$plan`, `$resume`, or `$yolo`
36
- loads it.
37
-
38
- Choose the mode from either:
39
-
40
- 1. the direct user intent when the skill is invoked as `$babysit`
41
- 2. the installed wrapper skill name when the user invoked `$call`, `$plan`,
42
- `$resume`, `$yolo`, and the rest
43
-
44
- | User intent | Mode |
45
- |-------------|------|
46
- | Start an orchestration run | `call` |
47
- | Work an issue-centric flow | `issue` |
48
- | Run autonomously | `yolo` |
49
- | Run continuously / recurring workflow | `forever` |
50
- | Resume an existing run | `resume` |
51
- | Plan without executing | `plan` |
52
- | Observe or inspect a run | `observe` |
53
- | Summarize a completed run | `retrospect` |
54
- | Diagnose run health | `doctor` |
55
- | Change or inspect model routing | `model` |
56
- | Help and documentation | `help` |
57
- | Install into a project | `project-install` |
58
- | Install user profile/setup | `user-install` |
59
- | Install team-pinned setup | `team-install` |
60
- | Assimilate external methodology | `assimilate` |
61
-
62
- Deprecated prompt aliases are not the Codex command surface anymore. Do not
63
- depend on `.codex/prompts` for normal operation.
11
+ Orchestrate `.a5c/runs/<runId>/` through iterative execution.
64
12
 
65
13
  ## Dependencies
66
14
 
67
15
  ### Babysitter SDK and CLI
68
16
 
69
- Use the installed CLI alias:
17
+ Read the SDK version from `versions.json` to ensure version compatibility:
70
18
 
71
19
  ```bash
72
- CLI="babysitter"
20
+ SDK_VERSION=$(node -e "try{console.log(JSON.parse(require('fs').readFileSync('${CODEX_PLUGIN_ROOT}/versions.json','utf8')).sdkVersion||'latest')}catch{console.log('latest')}")
21
+ sudo npm i -g @a5c-ai/babysitter-sdk@$SDK_VERSION
22
+ # sudo is depending on the env and system
73
23
  ```
74
24
 
75
- If it is not available on the path, use:
25
+ then use the CLI alias: CLI="babysitter"
76
26
 
77
- ```bash
78
- CLI="npx -y @a5c-ai/babysitter-sdk"
79
- ```
27
+ **Alternatively, use the CLI alias:** `CLI="npx -y @a5c-ai/babysitter-sdk@$SDK_VERSION"`
80
28
 
81
29
  ### jq
82
30
 
83
- Make sure `jq` is available in the path. Install it if missing.
84
-
85
- ## Core Iteration Workflow
86
-
87
- The Babysitter workflow has 8 steps:
88
-
89
- 1. **Create or find the process** - interview the user or parse the prompt,
90
- research the repo and process library, and build a process definition
91
- 2. **Create run and bind session** - create the run via the Babysitter CLI and
92
- bind it to the current Codex session honestly
93
- 3. **Run iteration** - execute one orchestration step
94
- 4. **Get effects** - inspect pending effects
95
- 5. **Perform effects** - execute the requested tasks through skills, agents, or
96
- shell work
97
- 6. **Post results** - commit results back through `task:post`
98
- 7. **Stop and yield** - on platforms with working Codex hooks, the Codex stop
99
- hook decides whether to continue; BUT if running on Windows, stay in-turn and
100
- continue the loop yourself instead of waiting for a hook that will not run
101
- 8. **Completion proof** - finish only when the emitted proof is returned
102
-
103
- ### 1. Create or find the process for the run
104
-
105
- #### Interview phase
106
-
107
- ##### Interactive mode (default)
108
-
109
- Interview the user for intent, requirements, goals, scope, and constraints
110
- before entering the hook-driven loop.
111
-
112
- This phase should be iterative and adaptive:
113
-
114
- - inspect the current repo state first
115
- - resolve the active process-library root with
116
- `babysitter process-library:active --json`
117
- - conduct an actual search against that active process library before writing a
118
- process
119
- - research the repo, online references, methodologies, specializations, skills,
120
- agents, and related processes as needed
121
- - ask the user follow-up questions when the intent or constraints are still not
122
- clear
123
-
124
- Do not plan more than one step ahead during the interview phase. After each
125
- step, decide the next best step from the current evidence.
126
-
127
- The `process-library:active` command bootstraps the shared global SDK process
128
- library automatically if no binding exists yet. Read:
129
-
130
- - `binding.dir` as the active process-library root that must be searched
131
- - `defaultSpec.cloneDir` as the cloned repo root when adjacent repo-level
132
- material is needed
133
-
134
- After that, treat `specializations/**/**/**`, `methodologies/`, `contrib/`, and
135
- `reference/` as paths relative to `binding.dir`.
136
-
137
- ##### Non-interactive mode
138
-
139
- When running non-interactively:
140
-
141
- 1. parse the initial prompt to extract intent, scope, and constraints
142
- 2. inspect the repo structure
143
- 3. resolve the active process-library root with
144
- `babysitter process-library:active --json`
145
- 4. search that active library for the most relevant specialization,
146
- methodology, process, skill, or agent
147
- 5. proceed directly to process creation
148
-
149
- Do not skip the active-library search step.
150
-
151
- #### User Profile Integration
152
-
153
- Before building the process, check for an existing user profile:
154
-
155
- 1. run `babysitter profile:read --user --json`
156
- 2. use the profile to pre-fill user preferences, expertise, and communication
157
- style
158
- 3. calibrate breakpoint density from `breakpointTolerance`
159
- 4. prefer tools, skills, and agents the user already uses
160
- 5. adapt explanations and breakpoint text to the user's communication style
161
- 6. if no profile exists, proceed normally and consider suggesting `$user-install`
162
-
163
- All profile read/write/merge/render operations must go through the Babysitter
164
- CLI, never direct SDK imports.
165
-
166
- #### Process creation phase
167
-
168
- After the interview phase, create the full custom process files for the run
169
- according to the process-library patterns and the process-creation guidelines
170
- below.
171
-
172
- Install `@a5c-ai/babysitter-sdk` into `.a5c/` if it is missing. When doing so,
173
- run the install from the project root and use either `npm i --prefix .a5c ...`
174
- or a subshell so the working directory does not stay inside `.a5c/`.
175
-
176
- Always use an **absolute path** for `--entry` when calling `run:create`.
177
-
178
- After the process is created and before creating the run:
179
-
180
- - in interactive mode, describe the process at a high level, generate
181
- `[process-name].diagram.md` and `[process-name].process.md`, and get user
182
- confirmation before proceeding
183
- - in non-interactive mode, proceed directly to `run:create`
184
-
185
- Common mistakes to avoid:
186
-
187
- - wrong: skipping repo/process-library research before writing the process
188
- - wrong: bypassing the orchestration model with helper scripts or inline logic
189
- - wrong: using `kind: 'node'` in generated tasks
190
- - correct: use `agent` or `skill` tasks for reasoning work, with `shell` only
191
- for existing CLIs, tests, linters, git, or builds
192
- - correct: include verification loops, refinement loops, quality gates, and
193
- breakpoints where appropriate
194
-
195
- ### 2. Create run and bind session
196
-
197
- For new runs:
198
-
199
- ```bash
200
- $CLI run:create \
201
- --process-id <id> \
202
- --entry <absolute-path>#<export> \
203
- --inputs <file> \
204
- --prompt "$PROMPT" \
205
- --harness codex \
206
- --state-dir .a5c \
207
- --plugin-root "${CODEX_PLUGIN_ROOT}" \
208
- --json
209
- ```
210
-
211
- Required flags:
212
-
213
- - `--process-id <id>` - unique identifier for the process definition
214
- - `--entry <absolute-path>#<export>` - process JS file plus named export
215
- - `--prompt "$PROMPT"` - the user's initial request
216
- - `--harness codex` - activates Codex session binding
217
- - `--state-dir .a5c` - required for honest workspace-local Codex session state
218
- - `--plugin-root "${CODEX_PLUGIN_ROOT}"` - plugin root used for session/state
219
- resolution
220
-
221
- Optional flags:
222
-
223
- - `--inputs <file>` - process input JSON
224
- - `--run-id <id>` - override the generated run id
225
- - `--runs-dir <dir>` - override the default runs directory
31
+ make sure you have jq installed and available in the path. if not, install it.
226
32
 
227
- Inside a real Codex hook/session environment, do **not** pass `--session-id`
228
- explicitly. The Codex adapter auto-resolves the session/thread id from
229
- `CODEX_THREAD_ID`, `CODEX_SESSION_ID`, or `CODEX_ENV_FILE`. Only pass
230
- `--session-id` in out-of-band recovery flows where no ambient Codex session
231
- identity exists.
33
+ ## Instructions
232
34
 
233
- In normal Codex usage, `run:create` must bind the session into the active
234
- workspace `.a5c`, not the global `~/.a5c`, so the Stop hook can find the same
235
- session state file in later turns.
236
-
237
- On Windows, still bind the session honestly with `run:create`, but do
238
- not assume that later turns will be resumed by Codex hooks. After `run:create`
239
- you must keep executing the orchestration loop yourself in the current turn
240
- until completion or a real user-facing breakpoint. (calling `run:iterate` in the same turn is fine on Windows because the hooks won't run there, but do not do that in the normal Codex plugin path where the hooks are expected to drive the loop).
241
-
242
- For resuming existing runs in a manual recovery flow:
243
-
244
- ```bash
245
- $CLI session:resume \
246
- --session-id <id> \
247
- --state-dir .a5c \
248
- --run-id <runId> \
249
- --runs-dir .a5c/runs \
250
- --json
251
- ```
252
-
253
- ### 3. Run iteration
35
+ Run the following command to get full orchestration instructions:
254
36
 
255
37
  ```bash
256
- $CLI run:iterate .a5c/runs/<runId> --json --iteration <n> --plugin-root "${CODEX_PLUGIN_ROOT}"
38
+ babysitter instructions:babysit-skill --harness codex --interactive
257
39
  ```
258
40
 
259
- Status values:
260
-
261
- - `"executed"` - tasks executed, continue looping
262
- - `"waiting"` - breakpoint or sleep is pending
263
- - `"completed"` - run finished successfully
264
- - `"failed"` - run failed
265
- - `"none"` - no runnable effects exist
266
-
267
- ### 4. Get effects
41
+ For non-interactive runs (e.g., with `-p` flag or no question tool):
268
42
 
269
43
  ```bash
270
- $CLI task:list .a5c/runs/<runId> --pending --json
44
+ babysitter instructions:babysit-skill --harness codex --no-interactive
271
45
  ```
272
46
 
273
- ### 5. Perform effects
274
-
275
- Run the effect externally to the SDK, then post the outcome summary with
276
- `task:post`.
277
-
278
- Important:
279
-
280
- - delegate using Codex skills or agent tooling when possible
281
- - make sure the requested change actually happened
282
- - do not describe or imply success without verifying the requested effect
283
- - do not use the `babysit` skill itself inside delegated task execution
284
-
285
- #### 5.1 Breakpoint handling
286
-
287
- ##### Interactive mode
288
-
289
- Ask the user explicitly for approval. If the Codex environment provides a
290
- structured question UI, include explicit approve/reject options. If not, ask in
291
- chat and require an explicit approval response.
292
-
293
- Never infer approval from silence, ambiguity, or dismissal.
294
-
295
- Breakpoint rejections must still be posted with `--status ok` and a value such
296
- as `{"approved": false, "response": "..."}`.
297
-
298
- ##### Non-interactive mode
299
-
300
- Choose the best option from context and post the result. Rejections still use
301
- `--status ok` with `{"approved": false}`.
302
-
303
- ### 6. Results posting
304
-
305
- Never write `result.json` directly.
306
-
307
- Workflow:
308
-
309
- 1. write the result value to `tasks/<effectId>/output.json`
310
- 2. call `task:post` with `--value tasks/<effectId>/output.json`
311
- 3. let the SDK write `result.json`, append the journal event, and update state
312
-
313
- ### 7. Stop after every phase after run-session association
314
-
315
- After `run:create` or any posted effect result, end the current assistant turn
316
- and yield back to the Codex hook loop. Do not run multiple `run:iterate` steps
317
- in the same turn.
318
-
319
- Native Windows exception: because Codex does not execute hooks on Windows yet,
320
- do not end the turn and wait for the hook loop there. Continue with the next
321
- `run:iterate`, inspect tasks, perform effects, and post results in the same
322
- turn until the run completes or you need explicit user input for a breakpoint.
323
- Do not tell the user you are waiting for the stop hook on Windows.
324
-
325
- ### 8. Completion proof
326
-
327
- When `run:iterate` or `run:status` returns `completionProof`, return that exact
328
- value wrapped in `<promise>...</promise>`.
329
-
330
- ## Hook Loop
331
-
332
- Global install must register the plugin in `~/.agents/plugins/marketplace.json`
333
- with the plugin bundle at `~/.codex/plugins/babysitter-codex`, and must merge
334
- `~/.codex/config.toml`.
335
-
336
- Workspace onboarding may also register the plugin in
337
- `<workspace>/.agents/plugins/marketplace.json` with the plugin bundle at
338
- `<workspace>/plugins/babysitter-codex`, and may merge `.codex/config.toml` for
339
- repo-local pinning.
340
-
341
- Both levels must provide:
342
-
343
- 1. `SessionStart` seeds `.a5c` session state
344
- 2. `UserPromptSubmit` performs prompt-time transformations when needed
345
- 3. `Stop` decides whether the run is complete or Codex should receive the next
346
- Babysitter iteration context
347
-
348
- On native Windows, treat these hook registrations as installation/configuration
349
- state only. Codex currently does not execute them there, so the skill must keep
350
- the orchestration loop moving in-turn instead of waiting for hook callbacks.
351
-
352
- ## Task Kinds
353
-
354
- Never generate `kind: 'node'` effects.
355
-
356
- | Kind | When to use |
357
- |------|-------------|
358
- | `agent` | default for planning, implementation, analysis, debugging, scoring, research |
359
- | `skill` | when a matching installed skill exists |
360
- | `shell` | existing CLI tools, tests, git, linters, builds |
361
- | `breakpoint` | human approval gates |
362
- | `sleep` | time gates |
363
-
364
- ## Process Creation Guidelines
365
-
366
- - always research the repo and the active process library before writing the
367
- process
368
- - prefer composing multiple relevant library processes rather than copying just
369
- one template blindly
370
- - include verification and refinement loops
371
- - prefer processes that close the widest practical quality loop
372
- - add `@skill` and `@agent` discovery markers to generated process files for
373
- the dependencies you actually selected
374
- - prefer incremental work that can be tested as you go
375
-
376
- Search for relevant processes, skills, agents, methodologies, and references
377
- in:
378
-
379
- 1. `.a5c/processes/`
380
- 2. the active process-library root from `binding.dir`
381
- 3. the cloned repo root from `defaultSpec.cloneDir` when adjacent material is
382
- needed
383
-
384
- ## Codex-Specific Rules
385
-
386
- - `$babysit` is the core skill
387
- - `$call`, `$plan`, `$resume`, `$yolo`, and the other mode skills are thin
388
- wrappers that must only load `babysit` for the matching mode
389
- - do not fabricate a session id
390
- - on Windows env, never claim that you are yielding to or waiting for the
391
- Codex stop hook; continue the Babysitter loop in the current turn instead
392
-
393
- ## Critical Rules
394
-
395
- CRITICAL RULE: The completion proof is emitted only when the run is truly
396
- completed. Output `<promise>SECRET</promise>` only when the orchestration status
397
- is completed.
398
-
399
- CRITICAL RULE: Never bypass the Babysitter orchestration model when this skill
400
- is active. Do not replace it with ad-hoc direct execution.
401
-
402
- CRITICAL RULE: Never build helper scripts or wrapper programs to drive the run.
403
- Use the CLI and the hook loop directly.
404
-
405
- CRITICAL RULE: In interactive mode, never auto-approve breakpoints.
406
-
407
- CRITICAL RULE: Do not use `kind: 'node'` in generated process files.
47
+ Follow the instructions returned by the command above to orchestrate the run.
@@ -12,5 +12,6 @@ Resolve the request in `call` mode:
12
12
  - treat everything after `$call` as the initial Babysitter request for a new
13
13
  orchestration run
14
14
  - create the process, create the run, and enter the Babysitter loop
15
+ - using this always means the user meant an interactive run.
15
16
  - do not create a separate command surface here; this skill only forwards into
16
17
  `babysit`
@@ -1,6 +1,7 @@
1
1
  ---
2
2
  name: retrospect
3
- description: Summarize or retrospect on a completed Babysitter run.
3
+ description: Summarize or retrospect on one or more completed Babysitter runs.
4
+ argument-hint: "[run-id...] [--all] Optional run IDs or --all for all runs"
4
5
  ---
5
6
 
6
7
  # retrospect
@@ -9,7 +10,46 @@ Load and use the installed `babysit` skill.
9
10
 
10
11
  Resolve the request in `retrospect` mode:
11
12
 
12
- - treat everything after `$retrospect` as the run selector to summarize
13
+ - treat everything after `$retrospect` as the run selector(s) to summarize
13
14
  - focus on the run history, outcomes, lessons, and gaps
14
15
  - do not create a separate command surface here; this skill only forwards into
15
16
  `babysit`
17
+
18
+ ## Phase 1: Resolve Target Run(s)
19
+
20
+ - If `--all` or "all" is present in args: list all runs via `ls -lt .a5c/runs/` and collect all completed/failed run IDs
21
+ - If multiple run IDs are provided: use all of them
22
+ - Otherwise: existing behavior (resolve the latest single run)
23
+ - Use `ask_user` to confirm run selection in interactive mode
24
+
25
+ ## Phase 2: Load Run Data
26
+
27
+ For each selected run, load:
28
+ - `run.json` metadata
29
+ - Journal events
30
+ - Task definitions and results
31
+ - State snapshots
32
+
33
+ ## Phase 3: Analysis
34
+
35
+ Perform standard per-run analysis (outcomes, process effectiveness, suggestions).
36
+
37
+ ### Cross-Run Pattern Analysis (multi-run mode)
38
+
39
+ When analyzing multiple runs, additionally cover:
40
+ - **Common failure modes** across runs
41
+ - **Velocity trends** (tasks/time across runs)
42
+ - **Process evolution** (how processes changed over time)
43
+ - **Repeated breakpoint patterns**
44
+
45
+ ## Phase 4: Suggestions
46
+
47
+ Provide actionable suggestions for process improvements, optimizations, and fixes.
48
+
49
+ ## Phase 5: Implementation
50
+
51
+ If the user agrees, implement improvements to processes, skills, or configuration.
52
+
53
+ ## Phase 6: Cleanup Suggestion
54
+
55
+ After analysis, suggest: "Consider running `babysitter cleanup` (or `/babysitter:cleanup`) to clean up old run data and reclaim disk space."
@@ -12,5 +12,8 @@ Resolve the request in `yolo` mode:
12
12
  - treat everything after `$yolo` as the autonomous execution request
13
13
  - follow the `babysit` skill contract while optimizing for minimal manual
14
14
  interruption
15
+ - using this means the user wants to run autonomously with minimal manual
16
+ interruption, so optimize for that by skipping or minimizing any steps that
17
+ would require user input or decision-making during the run
15
18
  - do not create a separate command surface here; this skill only forwards into
16
19
  `babysit`