@a5c-ai/babysitter-codex 0.1.6-staging.05b1f6af → 0.1.6-staging.08ae538f

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,12 @@
18
18
  "skills"
19
19
  ],
20
20
  "skills": "./skills/",
21
+ "skillEntries": [
22
+ {
23
+ "name": "babysitter:retrospect",
24
+ "argumentHint": "[run-id...] [--all] Run IDs or --all for all runs"
25
+ }
26
+ ],
21
27
  "hooks": "./hooks.json",
22
28
  "apps": "./.app.json",
23
29
  "interface": {
@@ -11,16 +11,23 @@ export CODEX_PLUGIN_ROOT="${CODEX_PLUGIN_ROOT:-${PLUGIN_ROOT}}"
11
11
  export BABYSITTER_STATE_DIR="${STATE_DIR}"
12
12
 
13
13
  mkdir -p "$LOG_DIR" 2>/dev/null
14
- {
15
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) Hook script invoked"
16
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) PLUGIN_ROOT=$PLUGIN_ROOT"
17
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) STATE_DIR=$STATE_DIR"
18
- } >> "$LOG_FILE" 2>/dev/null
14
+
15
+ blog() {
16
+ local msg="$1"
17
+ local ts
18
+ ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
19
+ echo "[INFO] $ts $msg" >> "$LOG_FILE" 2>/dev/null
20
+ babysitter log --type hook --label "hook:session-start" --message "$msg" --source shell-hook 2>/dev/null || true
21
+ }
22
+
23
+ blog "Hook script invoked"
24
+ blog "PLUGIN_ROOT=$PLUGIN_ROOT"
25
+ blog "STATE_DIR=$STATE_DIR"
19
26
 
20
27
  INPUT_FILE=$(mktemp 2>/dev/null || echo "/tmp/codex-session-start-hook-$$.json")
21
28
  cat > "$INPUT_FILE"
22
29
 
23
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) Hook input received ($(wc -c < "$INPUT_FILE") bytes)" >> "$LOG_FILE" 2>/dev/null
30
+ blog "Hook input received ($(wc -c < "$INPUT_FILE") bytes)"
24
31
 
25
32
  RESULT=$(babysitter hook:run \
26
33
  --hook-type session-start \
@@ -30,7 +37,7 @@ RESULT=$(babysitter hook:run \
30
37
  < "$INPUT_FILE" 2>"$LOG_DIR/babysitter-session-start-hook-stderr.log")
31
38
  EXIT_CODE=$?
32
39
 
33
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) CLI exit code=$EXIT_CODE" >> "$LOG_FILE" 2>/dev/null
40
+ blog "CLI exit code=$EXIT_CODE"
34
41
 
35
42
  rm -f "$INPUT_FILE" 2>/dev/null
36
43
  printf '%s\n' "$RESULT"
@@ -11,16 +11,23 @@ export CODEX_PLUGIN_ROOT="${CODEX_PLUGIN_ROOT:-${PLUGIN_ROOT}}"
11
11
  export BABYSITTER_STATE_DIR="${STATE_DIR}"
12
12
 
13
13
  mkdir -p "$LOG_DIR" 2>/dev/null
14
- {
15
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) Hook script invoked"
16
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) PLUGIN_ROOT=$PLUGIN_ROOT"
17
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) STATE_DIR=$STATE_DIR"
18
- } >> "$LOG_FILE" 2>/dev/null
14
+
15
+ blog() {
16
+ local msg="$1"
17
+ local ts
18
+ ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
19
+ echo "[INFO] $ts $msg" >> "$LOG_FILE" 2>/dev/null
20
+ babysitter log --type hook --label "hook:stop" --message "$msg" --source shell-hook 2>/dev/null || true
21
+ }
22
+
23
+ blog "Hook script invoked"
24
+ blog "PLUGIN_ROOT=$PLUGIN_ROOT"
25
+ blog "STATE_DIR=$STATE_DIR"
19
26
 
20
27
  INPUT_FILE=$(mktemp 2>/dev/null || echo "/tmp/codex-stop-hook-$$.json")
21
28
  cat > "$INPUT_FILE"
22
29
 
23
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) Hook input received ($(wc -c < "$INPUT_FILE") bytes)" >> "$LOG_FILE" 2>/dev/null
30
+ blog "Hook input received ($(wc -c < "$INPUT_FILE") bytes)"
24
31
 
25
32
  RESULT=$(babysitter hook:run \
26
33
  --hook-type stop \
@@ -30,7 +37,7 @@ RESULT=$(babysitter hook:run \
30
37
  < "$INPUT_FILE" 2>"$LOG_DIR/babysitter-stop-hook-stderr.log")
31
38
  EXIT_CODE=$?
32
39
 
33
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) CLI exit code=$EXIT_CODE" >> "$LOG_FILE" 2>/dev/null
40
+ blog "CLI exit code=$EXIT_CODE"
34
41
 
35
42
  rm -f "$INPUT_FILE" 2>/dev/null
36
43
  printf '%s\n' "$RESULT"
@@ -5,22 +5,16 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
5
5
  PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
6
6
  STATE_DIR="${BABYSITTER_STATE_DIR:-${PWD}/.a5c}"
7
7
  LOG_DIR="${BABYSITTER_LOG_DIR:-$PLUGIN_ROOT/.a5c/logs}"
8
- LOG_FILE="$LOG_DIR/babysitter-user-prompt-submit-hook.log"
9
8
 
10
9
  export CODEX_PLUGIN_ROOT="${CODEX_PLUGIN_ROOT:-${PLUGIN_ROOT}}"
11
10
  export BABYSITTER_STATE_DIR="${STATE_DIR}"
12
11
 
13
12
  mkdir -p "$LOG_DIR" 2>/dev/null
14
- {
15
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) Hook script invoked"
16
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) PLUGIN_ROOT=$PLUGIN_ROOT"
17
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) STATE_DIR=$STATE_DIR"
18
- } >> "$LOG_FILE" 2>/dev/null
19
13
 
20
14
  INPUT_FILE=$(mktemp 2>/dev/null || echo "/tmp/codex-user-prompt-submit-hook-$$.json")
21
15
  cat > "$INPUT_FILE"
22
16
 
23
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) Hook input received ($(wc -c < "$INPUT_FILE") bytes)" >> "$LOG_FILE" 2>/dev/null
17
+ babysitter log --type hook --label "hook:user-prompt-submit" --message "Hook invoked" --source shell-hook 2>/dev/null || true
24
18
 
25
19
  RESULT=$(babysitter hook:run \
26
20
  --hook-type user-prompt-submit \
@@ -30,7 +24,7 @@ RESULT=$(babysitter hook:run \
30
24
  < "$INPUT_FILE" 2>"$LOG_DIR/babysitter-user-prompt-submit-hook-stderr.log")
31
25
  EXIT_CODE=$?
32
26
 
33
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) CLI exit code=$EXIT_CODE" >> "$LOG_FILE" 2>/dev/null
27
+ babysitter log --type hook --label "hook:user-prompt-submit" --message "CLI exit code=$EXIT_CODE" --source shell-hook 2>/dev/null || true
34
28
 
35
29
  rm -f "$INPUT_FILE" 2>/dev/null
36
30
  if [ -n "$RESULT" ]; then
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@a5c-ai/babysitter-codex",
3
- "version": "0.1.6-staging.05b1f6af",
3
+ "version": "0.1.6-staging.08ae538f",
4
4
  "description": "Babysitter Codex skill bundle and integration package for OpenAI Codex CLI with SDK-managed process-library bootstrapping, 15 orchestration modes, and BOM-safe SKILL installation",
5
5
  "scripts": {
6
6
  "test": "node test/integration.test.js && node test/packaged-install.test.js",
@@ -43,6 +43,6 @@
43
43
  },
44
44
  "homepage": "https://github.com/a5c-ai/babysitter/tree/main/plugins/babysitter-codex#readme",
45
45
  "dependencies": {
46
- "@a5c-ai/babysitter-sdk": "0.0.183-staging.05b1f6af"
46
+ "@a5c-ai/babysitter-sdk": "0.0.183-staging.08ae538f"
47
47
  }
48
48
  }
@@ -8,869 +8,40 @@ description: >-
8
8
 
9
9
  # babysit
10
10
 
11
- Orchestrate `.a5c/runs/<runId>/` through iterative execution. Use the SDK CLI to drive the orchestration loop.
12
-
13
- Native Windows caveat: Codex does not execute hooks on Windows yet. When
14
- running on native Windows, do not yield the turn and wait for the Stop hook.
15
- Keep driving the Babysitter loop in the current turn until the run completes or
16
- you hit a real user breakpoint that requires chat input.
17
-
18
- ## Non-Negotiables
19
-
20
- - Preserve user intent over speed. Never optimize for "get any completion proof"
21
- at the expense of the requested scope, quality, or process style.
22
- - Do not create temporary/minimal throwaway processes to force completion unless
23
- the user explicitly approves a reduced-scope recovery path.
24
- - If blocked, repair the current run/session first. Only switch strategy after
25
- stating the blocker and getting user approval when the new strategy changes
26
- intent or scope.
27
- - Use the Babysitter orchestration model end-to-end. Do not bypass with ad-hoc
28
- execution, fake outputs, or side workflows that are not represented as tasks.
11
+ Orchestrate `.a5c/runs/<runId>/` through iterative execution.
29
12
 
30
13
  ## Dependencies
31
14
 
32
15
  ### Babysitter SDK and CLI
33
16
 
34
- Use the installed CLI alias:
17
+ Read the SDK version from `versions.json` to ensure version compatibility:
35
18
 
36
19
  ```bash
37
- CLI="babysitter"
20
+ SDK_VERSION=$(node -e "try{console.log(JSON.parse(require('fs').readFileSync('${CODEX_PLUGIN_ROOT}/versions.json','utf8')).sdkVersion||'latest')}catch{console.log('latest')}")
21
+ sudo npm i -g @a5c-ai/babysitter-sdk@$SDK_VERSION
22
+ # sudo is depending on the env and system
38
23
  ```
39
24
 
40
- If it is not available on the path, use:
25
+ then use the CLI alias: CLI="babysitter"
41
26
 
42
- ```bash
43
- CLI="npx -y @a5c-ai/babysitter-sdk"
44
- ```
27
+ **Alternatively, use the CLI alias:** `CLI="npx -y @a5c-ai/babysitter-sdk@$SDK_VERSION"`
45
28
 
46
29
  ### jq
47
30
 
48
- Make sure `jq` is available in the path. Install it if missing.
49
-
50
- ---
51
-
52
- ## Core Iteration Workflow
53
-
54
- The Babysitter workflow has 8 steps:
55
-
56
- 1. **Create or find the process** - interview the user or parse the prompt,
57
- research the repo and process library, and build a process definition
58
- 2. **Create run and bind session** - create the run via the Babysitter CLI and
59
- bind it to the current Codex session
60
- 3. **Run iteration** - execute one orchestration step
61
- 4. **Get effects** - inspect pending effects
62
- 5. **Perform effects** - execute the requested tasks through skills, agents, or
63
- shell work
64
- 6. **Post results** - commit results back through `task:post`
65
- 7. **Stop and yield** - the Codex stop hook decides whether to continue (on
66
- Windows, stay in-turn and continue the loop yourself instead)
67
- 8. **Completion proof** - finish only when the emitted proof is returned
68
-
69
- ### 1. Create or find the process for the run
70
-
71
- #### Interview phase
72
-
73
- ##### Interactive mode (default)
74
-
75
- Interview the user for the intent, requirements, goal, scope, etc.
76
-
77
- A multi-step phase to understand the intent and perspective to approach the
78
- process building after researching the repo, short research online if needed,
79
- short research in the target repo, additional instructions, intent and library
80
- (processes, specializations, skills, subagents, methodologies, references, etc.)
81
- / guide for methodology building. You MUST resolve the active library root with
82
- `babysitter process-library:active --json` before process authoring, and you MUST
83
- conduct an actual search against that active process library instead of skipping
84
- directly to writing a process. The `process-library:active` command bootstraps
85
- the shared global SDK process library automatically if no binding exists yet.
86
- Read `binding.dir` from the returned JSON to get the active process-library root
87
- that must be searched. If you need the cloned repo root itself, read
88
- `defaultSpec.cloneDir` from the same JSON. After that, treat
89
- `specializations/**/**/**`, `methodologies/`, `contrib/`, and `reference/` as
90
- paths relative to `binding.dir`.
91
-
92
- The first step should be to look at the state of the repo, then find the most
93
- relevant processes, specializations, skills, subagents, methodologies,
94
- references, etc. to use as a reference. Use the babysitter CLI discover command
95
- to find the relevant processes, skills, subagents, etc. at various stages.
96
-
97
- Then this phase can have: research online, research the repo, user questions, and
98
- other steps one after the other until the intent, requirements, goal, scope, etc.
99
- are clear and the user is satisfied with the understanding. After each step,
100
- decide the type of next step to take. Do not plan more than 1 step ahead in this
101
- phase. The same step type can be used more than once in this phase.
102
-
103
- ##### Non-interactive mode (running with -p flag or no AskUserQuestion tool)
104
-
105
- When running non-interactively, skip the interview phase entirely. Instead:
106
-
107
- 1. Parse the initial prompt to extract intent, scope, and requirements.
108
- 2. Research the repo structure to understand the codebase.
109
- 3. Resolve the active process-library root with
110
- `babysitter process-library:active --json`, then search that active library
111
- for the most relevant specialization/methodology. Do not skip this search
112
- step.
113
- 4. Proceed directly to the process creation phase using the extracted
114
- requirements.
115
-
116
- #### User Profile Integration
117
-
118
- Before building the process, check for an existing user profile to personalize
119
- the orchestration:
120
-
121
- 1. **Read user profile**: Run `babysitter profile:read --user --json` to load
122
- the user profile. **Always use the CLI for profile operations -- never import
123
- or call SDK profile functions directly.**
124
-
125
- 2. **Pre-fill context**: Use the profile to understand the user's specialties,
126
- expertise levels, preferences, and communication style. This informs how you
127
- conduct the interview (skip questions the profile already answers) and how you
128
- build the process.
129
-
130
- 3. **Breakpoint density**: Use the `breakpointTolerance` field to calibrate
131
- breakpoint placement in the generated process:
132
- - `minimal`/`low` (expert users): Fewer breakpoints -- only at critical
133
- decision points (architecture choices, deployment, destructive operations)
134
- - `moderate` (intermediate users): Standard breakpoints at phase boundaries
135
- - `high`/`maximum` (novice users): More breakpoints -- add review gates after
136
- each implementation step, before each integration, and at every quality gate
137
- - Always respect `alwaysBreakOn` for operations that must always pause (e.g.,
138
- destructive-git, deploy)
139
- - If `skipBreakpointsForKnownPatterns` is true, reduce breakpoints for
140
- operations the user has previously approved
31
+ make sure you have jq installed and available in the path. if not, install it.
141
32
 
142
- 4. **Tool preferences**: Use `toolPreferences` and `installedSkills`/
143
- `installedAgents` to prioritize which agents and skills to use in the process.
144
- Prefer tools the user is familiar with.
33
+ ## Instructions
145
34
 
146
- 5. **Communication style**: Adapt process descriptions and breakpoint questions
147
- to match the user's `communicationStyle` preferences (tone, explanationDepth,
148
- preferredResponseFormat).
149
-
150
- 6. **If no profile exists**: Proceed normally with the interview phase.
151
-
152
- 7. **CLI profile commands (mandatory)**: **All profile operations MUST use the
153
- babysitter CLI -- never import SDK profile functions directly.**
154
- - `babysitter profile:read --user --json`
155
- - `babysitter profile:read --project --json`
156
- - `babysitter profile:write --user --input <file> --json`
157
- - `babysitter profile:write --project --input <file> --json`
158
- - `babysitter profile:merge --user --input <file> --json`
159
- - `babysitter profile:merge --project --input <file> --json`
160
- - `babysitter profile:render --user`
161
- - `babysitter profile:render --project`
162
-
163
- Use `--dir <dir>` to override the default profile directory when needed.
164
-
165
- #### Process creation phase
166
-
167
- After the interview phase, create the complete custom process files (js and
168
- jsons) for the run according to the Process Creation Guidelines and
169
- methodologies section. Also install the babysitter-sdk inside `.a5c/` if it is
170
- not already installed. **IMPORTANT**: When installing into `.a5c/`, use
171
- `npm i --prefix .a5c @a5c-ai/babysitter-sdk` or a subshell
172
- `(cd .a5c && npm i @a5c-ai/babysitter-sdk)` to avoid leaving CWD inside
173
- `.a5c/`, which causes doubled path resolution bugs.
174
-
175
- You must abide the syntax and structure of the process files from the process
176
- library.
177
-
178
- **IMPORTANT -- Path resolution**: Always use **absolute paths** for `--entry`
179
- when calling `run:create`, and always run the CLI from the **project root**
180
- directory (not from `.a5c/`).
181
-
182
- **User profile awareness**: If a user profile was loaded in the User Profile
183
- Integration step, use it to inform process design -- adjust breakpoint density
184
- per the user's tolerance level, select agents/skills the user prefers, and match
185
- the process complexity to the user's expertise.
186
-
187
- **IMPORTANT -- Profile I/O in processes**: When generating process files, all
188
- profile read/write/merge operations MUST use the babysitter CLI commands
189
- (`babysitter profile:read`, `profile:write`, `profile:merge`,
190
- `profile:render`). Never instruct agents to import or call SDK profile functions
191
- directly.
192
-
193
- After the process is created and before creating the run:
194
-
195
- - **Interactive mode**: describe the process at high level (not the code or
196
- implementation details) to the user and ask for confirmation to use it, also
197
- generate it as a [process-name].diagram.md and [process-name].process.md file.
198
- If the user is not satisfied with the process, go back to the process creation
199
- phase and modify the process according to the feedback.
200
- - **Non-interactive mode**: proceed directly to creating the run without user
201
- confirmation.
202
-
203
- #### Intent Fidelity Checks (required before `run:create`)
204
-
205
- Before calling `run:create`, verify and document in your working notes:
206
-
207
- 1. The process scope matches the user prompt (no silent scope cuts).
208
- 2. The process structure follows library style/composition patterns rather than
209
- a one-off minimal flow.
210
- 3. Quality gates exist (verification/refinement loops, integration checks,
211
- and/or breakpoints appropriate for the task).
212
- 4. Any scope reduction, simplification, or recovery tradeoff is explicitly
213
- approved by the user before execution.
214
-
215
- If any check fails, do not call `run:create` yet; fix the process or ask the
216
- user for approval of the tradeoff.
217
-
218
- **Common mistakes to avoid:**
219
- - wrong: skipping repo/process-library research before writing the process
220
- - wrong: bypassing the orchestration model with helper scripts or inline logic
221
- - wrong: using `kind: 'node'` in generated tasks
222
- - correct: use `agent` or `skill` tasks for reasoning work, with `shell` only
223
- for existing CLIs, tests, linters, git, or builds
224
- - correct: include verification loops, refinement loops, quality gates, and
225
- breakpoints where appropriate
226
-
227
- ### 2. Create run and bind session (single command):
228
-
229
- **For new runs:**
35
+ Run the following command to get full orchestration instructions:
230
36
 
231
37
  ```bash
232
- $CLI run:create \
233
- --process-id <id> \
234
- --entry <absolute-path>#<export> \
235
- --inputs <file> \
236
- --prompt "$PROMPT" \
237
- --harness codex \
238
- --state-dir .a5c \
239
- --plugin-root "${CODEX_PLUGIN_ROOT}" \
240
- --json
38
+ babysitter instructions:babysit-skill --harness codex --interactive
241
39
  ```
242
40
 
243
- **Required flags:**
244
- - `--process-id <id>` -- unique identifier for the process definition
245
- - `--entry <absolute-path>#<export>` -- path to the process JS file and its
246
- named export (e.g., `./my-process.js#process`)
247
- - `--prompt "$PROMPT"` -- the user's initial prompt/request text
248
- - `--harness codex` -- activates Codex session binding. The session ID is
249
- auto-resolved from `CODEX_THREAD_ID`, `CODEX_SESSION_ID`, or `CODEX_ENV_FILE`.
250
- - `--state-dir .a5c` -- required for honest workspace-local session state
251
- - `--plugin-root "${CODEX_PLUGIN_ROOT}"` -- plugin root for state resolution
252
-
253
- **Optional flags:**
254
- - `--inputs <file>` -- path to a JSON file with process inputs
255
- - `--run-id <id>` -- override auto-generated run ID
256
- - `--runs-dir <dir>` -- override runs directory (default: `.a5c/runs`)
257
-
258
- Do **not** pass `--session-id` explicitly inside a real Codex session. The Codex
259
- adapter auto-resolves the session/thread ID from environment variables. Only pass
260
- `--session-id` in out-of-band recovery flows.
261
-
262
- **Common mistakes to avoid:**
263
- - wrong: Calling `session:init` explicitly
264
- - wrong: Fabricating a session ID when none is available from the environment
265
- - wrong: Trying to bind the session in a separate step after run creation
266
- - correct: Using `--harness codex` with `run:create` to create the run AND
267
- auto-bind the session, relying on environment variables for honest session
268
- binding
269
-
270
- **For resuming existing runs:**
41
+ For non-interactive runs (e.g., with `-p` flag or no question tool):
271
42
 
272
43
  ```bash
273
- $CLI session:resume \
274
- --session-id <id> \
275
- --state-dir .a5c \
276
- --run-id <runId> --runs-dir .a5c/runs --json
44
+ babysitter instructions:babysit-skill --harness codex --no-interactive
277
45
  ```
278
46
 
279
- ### 3. Run Iteration
280
-
281
- ```bash
282
- $CLI run:iterate .a5c/runs/<runId> --json --iteration <n> --plugin-root "${CODEX_PLUGIN_ROOT}"
283
- ```
284
-
285
- **Output:**
286
- ```json
287
- {
288
- "iteration": 1,
289
- "status": "executed|waiting|completed|failed|none",
290
- "action": "executed-tasks|waiting|none",
291
- "reason": "auto-runnable-tasks|breakpoint-waiting|terminal-state",
292
- "count": 3,
293
- "completionProof": "only-present-when-completed",
294
- "metadata": { "runId": "...", "processId": "..." }
295
- }
296
- ```
297
-
298
- **Status values:**
299
- - `"executed"` - Tasks executed, continue looping
300
- - `"waiting"` - Breakpoint/sleep, pause until released
301
- - `"completed"` - Run finished successfully
302
- - `"failed"` - Run failed with error
303
- - `"none"` - No pending effects
304
-
305
- **Common mistake to avoid:**
306
- - wrong: Calling run:iterate, performing the effect, posting the result,
307
- then calling run:iterate again in the same session
308
- - correct: Calling run:iterate, performing the effect, posting the result,
309
- then STOPPING the session so the hook triggers the next iteration
310
- (except on Windows, where you must continue in-turn)
311
-
312
- ### 4. Get Effects
313
-
314
- ```bash
315
- $CLI task:list .a5c/runs/<runId> --pending --json
316
- ```
317
-
318
- **Output:**
319
- ```json
320
- {
321
- "tasks": [
322
- {
323
- "effectId": "effect-abc123",
324
- "kind": "agent|skill|breakpoint",
325
- "label": "auto",
326
- "status": "requested"
327
- }
328
- ]
329
- }
330
- ```
331
-
332
- ### 5. Perform Effects
333
-
334
- Run the effect externally to the SDK (by you, your hook, or another worker).
335
- After execution (by delegation to an agent or skill), post the outcome summary
336
- into the run by calling `task:post`, which:
337
- - Writes the committed result to `tasks/<effectId>/result.json`
338
- - Appends an `EFFECT_RESOLVED` event to the journal
339
- - Updates the state cache
340
-
341
- IMPORTANT:
342
- - Delegate using the Task tool if possible.
343
- - Make sure the change was actually performed and not described or implied.
344
- (for example, if code files were mentioned as created in the summary, make
345
- sure they were actually created.)
346
- - Include in the instructions to the agent or skill to perform the task in
347
- full and return only the summary result in the requested schema.
348
-
349
- #### 5.1 Breakpoint Handling
350
-
351
- ##### 5.1.0 Mode Detection and Breakpoint Policy
352
-
353
- - If the user is present in chat, default to interactive breakpoint handling.
354
- - Use non-interactive handling only when execution context is explicitly
355
- non-interactive (for example no question tool / explicit non-interactive run).
356
- - Never auto-approve breakpoints when mode is ambiguous. Treat ambiguity as
357
- interactive and ask explicitly.
358
- - Any mode switch that changes approval behavior must be stated explicitly in
359
- the run notes.
360
-
361
- ##### 5.1.1 Interactive mode
362
-
363
- Ask the user explicitly for approval. Include explicit approve/reject options
364
- so the user's intent is unambiguous.
365
-
366
- **CRITICAL: Response validation rules:**
367
- - If the response is empty, no selection, or dismissed: treat as **NOT
368
- approved**. Re-ask the question or keep the breakpoint pending. Do NOT
369
- proceed.
370
- - NEVER fabricate, synthesize, or infer approval text. Only pass through the
371
- user's actual selected response verbatim.
372
- - NEVER assume approval from ambiguous, empty, or missing responses. When in
373
- doubt, the answer is "not approved".
374
-
375
- **CRITICAL: Breakpoint rejection posting rules:**
376
- - Breakpoint rejection MUST be posted with `--status ok` and a value of
377
- `{"approved": false, "response": "..."}`. NEVER use `--status error` for a
378
- user rejection -- that signals a task execution failure and will trigger
379
- `RUN_FAILED`, requiring manual journal surgery to recover.
380
- - Only use `--status error` if the question tool itself throws an error.
381
-
382
- **Breakpoint posting examples:**
383
-
384
- ```bash
385
- # User approved the breakpoint
386
- echo '{"approved": true, "response": "Looks good, proceed"}' > tasks/<effectId>/output.json
387
- $CLI task:post <runId> <effectId> --status ok --value tasks/<effectId>/output.json
388
-
389
- # User rejected the breakpoint (ALWAYS use --status ok, not --status error)
390
- echo '{"approved": false, "response": "Stop here"}' > tasks/<effectId>/output.json
391
- $CLI task:post <runId> <effectId> --status ok --value tasks/<effectId>/output.json
392
- ```
393
-
394
- **Breakpoint value payload schema:**
395
-
396
- | Field | Type | Required | Description |
397
- |-------|------|----------|-------------|
398
- | `approved` | `boolean` | Yes | Whether the user approved the breakpoint |
399
- | `response` | `string` | No | The user's response text or selected option |
400
- | `feedback` | `string` | No | Additional feedback from the user |
401
-
402
- **Breakpoint routing fields:**
403
-
404
- When calling `ctx.breakpoint()`, you can include routing fields to control who receives the breakpoint and how responses are collected:
405
-
406
- | Field | Type | Required | Description |
407
- |-------|------|----------|-------------|
408
- | `expert` | `string \| string[]` | No | Domain expert identifier, or `'owner'` to route back to the run requester |
409
- | `tags` | `string[]` | No | Categorization tags for filtering breakpoints |
410
- | `strategy` | `'single' \| 'first-response-wins' \| 'collect-all' \| 'quorum'` | No | Response collection strategy. Only meaningful when `expert !== 'owner'`. Default: `'single'` |
411
- | `previousFeedback` | `string` | No | Feedback from a previous rejection (used in retry loops) |
412
- | `attempt` | `number` | No | Current retry attempt number |
413
-
414
- **Breakpoint rejection handling -- retry/refine pattern:**
415
-
416
- Processes must ALWAYS loop back on rejection, never fail. Use the following clean retry/refine pattern:
417
-
418
- ```javascript
419
- let lastFeedback = null;
420
- for (let attempt = 0; attempt < 3; attempt++) {
421
- if (lastFeedback) {
422
- currentResult = await ctx.task(refineTask, { ...args, feedback: lastFeedback, attempt: attempt + 1 });
423
- }
424
- const approval = await ctx.breakpoint({
425
- question: 'Review and approve this step?',
426
- options: ['Approve', 'Request changes'],
427
- expert: 'owner',
428
- tags: ['approval-gate'],
429
- previousFeedback: lastFeedback || undefined,
430
- attempt: attempt > 0 ? attempt + 1 : undefined,
431
- });
432
- if (approval.approved) break;
433
- lastFeedback = approval.response || approval.feedback || 'Changes requested';
434
- }
435
- ```
436
-
437
- ##### 5.1.2 Non-interactive mode
438
-
439
- Choose the best option from context and post the result. Rejections still use
440
- `--status ok` with `{"approved": false}`.
441
-
442
- ### 6. Results Posting
443
-
444
- **IMPORTANT**: Do NOT write `result.json` directly. The SDK owns that file.
445
-
446
- **Workflow:**
447
-
448
- 1. Write the result **value** to a separate file (e.g., `output.json` or
449
- `value.json`):
450
- ```json
451
- {
452
- "score": 85,
453
- "details": { ... }
454
- }
455
- ```
456
-
457
- 2. Post the result, passing the value file:
458
- ```bash
459
- $CLI task:post .a5c/runs/<runId> <effectId> \
460
- --status ok \
461
- --value tasks/<effectId>/output.json \
462
- --json
463
- ```
464
-
465
- The `task:post` command will:
466
- - Read the value from your file
467
- - Write the complete `result.json` (including schema, metadata, and your value)
468
- - Append an `EFFECT_RESOLVED` event to the journal
469
- - Update the state cache
470
-
471
- **Available flags:**
472
- - `--status <ok|error>` (required)
473
- - `--value <file>` - Result value (for status=ok)
474
- - `--error <file>` - Error payload (for status=error)
475
- - `--stdout-file <file>` - Capture stdout
476
- - `--stderr-file <file>` - Capture stderr
477
- - `--started-at <iso8601>` - Task start time
478
- - `--finished-at <iso8601>` - Task end time
479
- - `--metadata <file>` - Additional metadata JSON
480
-
481
- **Common mistake to avoid:**
482
- ```bash
483
- # wrong: Writing result.json directly
484
- echo '{"result": {...}}' > tasks/<effectId>/result.json
485
- $CLI task:post <runId> <effectId> --status ok
486
-
487
- # correct: Write value to separate file, let SDK create result.json
488
- echo '{"score": 85}' > tasks/<effectId>/output.json
489
- $CLI task:post <runId> <effectId> --status ok --value tasks/<effectId>/output.json
490
- ```
491
-
492
- **Breakpoint-specific posting:**
493
-
494
- ```bash
495
- # Breakpoint approval
496
- echo '{"approved": true, "response": "User approved"}' > tasks/<effectId>/output.json
497
- $CLI task:post <runId> <effectId> --status ok --value tasks/<effectId>/output.json
498
-
499
- # Breakpoint rejection (ALWAYS use --status ok, not --status error)
500
- echo '{"approved": false, "response": "User rejected"}' > tasks/<effectId>/output.json
501
- $CLI task:post <runId> <effectId> --status ok --value tasks/<effectId>/output.json
502
- ```
503
-
504
- ---
505
-
506
- ### 7. STOP after every phase after run-session association
507
-
508
- After `run:create` or any posted effect result, end the current turn and yield
509
- back to the Codex hook loop. The stop hook drives the loop, not you. Do not run
510
- multiple `run:iterate` steps in the same turn.
511
-
512
- **Native Windows exception**: because Codex does not execute hooks on Windows
513
- yet, do not end the turn and wait for the hook loop there. Continue with the
514
- next `run:iterate`, inspect tasks, perform effects, and post results in the same
515
- turn until the run completes or you need explicit user input for a breakpoint.
516
-
517
- Common mistakes to avoid:
518
- - wrong: Stopping the session before run-session association
519
- - correct: Stopping the session after run-session association, then after every
520
- iteration, letting the hook call you back to continue the loop until
521
- completion.
522
- - wrong: Calling run:iterate multiple times in the same session without stopping
523
- and letting the hook call you back (except on Windows where this is required).
524
- - correct: Calling run:iterate once, then stopping and letting the hook call you
525
- back for the next iteration until completion.
526
-
527
- On native Windows, an in-turn loop across `run:iterate` -> `task:list` ->
528
- effect execution -> `task:post` is expected and valid because hooks do not
529
- resume the session. This is not considered bypassing the orchestration model as
530
- long as each iteration/effect is handled through the CLI and run journal.
531
-
532
- ### 8. Completion Proof
533
-
534
- When the run is completed, the CLI will emit a `completionProof` value in the
535
- output of `run:iterate` and `run:status`. You must return that exact value
536
- wrapped in a `<promise>...</promise>` tag to signal completion to the stop-hook
537
- loop.
538
-
539
- ### Progressing Between The Phases
540
-
541
- After you run `run:create`, progress through the phases above by stopping the
542
- session, which will trigger the stop-hook and call you with the next phase.
543
- After you finish each phase after `run:create`, stop the session and return
544
- control to the user until you are called again by the hook.
545
-
546
- ## Task Kinds
547
-
548
- **CRITICAL RULE: NEVER use `node` kind effects in generated processes.** The
549
- `node` kind executes inline Node.js scripts which bypasses the agent
550
- orchestration model entirely. Every task that would be a `node` effect MUST be
551
- converted to an `agent` or `skill` effect instead. The only acceptable
552
- non-agent kinds are `shell` (for running existing test suites, git commands, or
553
- build tools), `breakpoint` (human approval), and `sleep` (time gates).
554
-
555
- | Kind | Description | Executor | When to use |
556
- |------|-------------|----------|-------------|
557
- | ~~`node`~~ | ~~Node.js script~~ | ~~Local node process~~ | **NEVER -- forbidden. Convert to `agent` or `skill`.** |
558
- | `shell` | Shell command | Local shell process | Only for running existing CLI tools, test suites, git, linters, builds |
559
- | `agent` | LLM agent | Agent runtime | **Default for all tasks** -- planning, implementation, analysis, verification, scoring, debugging, code writing, research |
560
- | `skill` | Installed skill | Skill system | When a matching installed skill exists (preferred over agent when available) |
561
- | `breakpoint` | Human approval | UI/CLI | Decision gates requiring user input |
562
- | `sleep` | Time gate | Scheduler | Time-based pauses |
563
-
564
- ### Effect Execution Hints
565
-
566
- Tasks can include an `execution` field to express preferences about how the effect should be executed:
567
-
568
- | Field | Description |
569
- |-------|-------------|
570
- | `execution.model` | Preferred model for the task (e.g., `'claude-opus-4-6'`). Used for subagent selection. |
571
-
572
- Example:
573
-
574
- ```javascript
575
- defineTask('my-task', (args, taskCtx) => ({
576
- kind: 'agent',
577
- title: 'My task',
578
- execution: {
579
- model: 'claude-opus-4-6',
580
- },
581
- agent: {
582
- name: 'general-purpose',
583
- prompt: {
584
- role: 'Task executor',
585
- task: 'Perform the requested work',
586
- context: { ...args },
587
- instructions: ['Execute the task'],
588
- outputFormat: 'JSON'
589
- },
590
- outputSchema: { type: 'object', required: ['result'] }
591
- },
592
- io: {
593
- inputJsonPath: `tasks/${taskCtx.effectId}/input.json`,
594
- outputJsonPath: `tasks/${taskCtx.effectId}/result.json`
595
- }
596
- }));
597
- ```
598
-
599
- ### Agent Task Example
600
-
601
- Important: Check which subagents and agents are actually available before
602
- assigning the name. If none, pass the general-purpose subagent. Check the
603
- subagents and agents in the plugin (in nested folders) and to find relevant
604
- subagents and agents to use as a reference. Specifically check subagents and
605
- agents in folders next to the reference process file.
606
-
607
- When executing the agent task, use the Task tool. Never use the Babysitter skill
608
- or agent to execute the task.
609
-
610
- ```javascript
611
- export const agentTask = defineTask('agent-scorer', (args, taskCtx) => ({
612
- kind: 'agent',
613
- title: 'Agent scoring',
614
- agent: {
615
- name: 'quality-scorer',
616
- prompt: {
617
- role: 'QA engineer',
618
- task: 'Score results 0-100',
619
- context: { ...args },
620
- instructions: ['Review', 'Score', 'Recommend'],
621
- outputFormat: 'JSON'
622
- },
623
- outputSchema: {
624
- type: 'object',
625
- required: ['score']
626
- }
627
- },
628
-
629
- io: {
630
- inputJsonPath: `tasks/${taskCtx.effectId}/input.json`,
631
- outputJsonPath: `tasks/${taskCtx.effectId}/output.json`
632
- }
633
- }));
634
- ```
635
-
636
- ### Skill Task Example
637
-
638
- Important: Check which skills are actually available before assigning the skill
639
- name. Check the skills in the plugin (in nested folders) and to find relevant
640
- skills to use as a reference. Skills are preferred over subagents for executing
641
- tasks.
642
-
643
- ```javascript
644
- export const skillTask = defineTask('analyzer-skill', (args, taskCtx) => ({
645
- kind: 'skill',
646
- title: 'Analyze codebase',
647
-
648
- skill: {
649
- name: 'codebase-analyzer',
650
- context: {
651
- scope: args.scope,
652
- depth: args.depth,
653
- analysisType: args.type,
654
- criteria: ['Code consistency', 'Naming conventions', 'Error handling'],
655
- instructions: [
656
- 'Scan specified paths for code patterns',
657
- 'Analyze consistency across the codebase',
658
- 'Check naming conventions',
659
- 'Review error handling patterns',
660
- 'Generate structured analysis report'
661
- ]
662
- }
663
- },
664
-
665
- io: {
666
- inputJsonPath: `tasks/${taskCtx.effectId}/input.json`,
667
- outputJsonPath: `tasks/${taskCtx.effectId}/output.json`
668
- }
669
- }));
670
- ```
671
-
672
- ---
673
-
674
- ## Quick Commands Reference
675
-
676
- **Create run (with session binding):**
677
- ```bash
678
- $CLI run:create --process-id <id> --entry <path>#<export> --inputs <file> \
679
- --prompt "$PROMPT" --harness codex \
680
- --state-dir .a5c --plugin-root "${CODEX_PLUGIN_ROOT}" --json
681
- ```
682
-
683
- **Check status:**
684
- ```bash
685
- $CLI run:status <runId> --json
686
- ```
687
-
688
- When the run completes, `run:iterate` and `run:status` emit `completionProof`.
689
- Use that exact value in a `<promise>...</promise>` tag to end the loop.
690
-
691
- **View events:**
692
- ```bash
693
- $CLI run:events <runId> --limit 20 --reverse
694
- ```
695
-
696
- **List tasks:**
697
- ```bash
698
- $CLI task:list <runId> --pending --json
699
- ```
700
-
701
- **Post task result:**
702
- ```bash
703
- $CLI task:post <runId> <effectId> --status <ok|error> --json
704
- ```
705
-
706
- **Iterate:**
707
- ```bash
708
- $CLI run:iterate <runId> --json --iteration <n> --plugin-root "${CODEX_PLUGIN_ROOT}"
709
- ```
710
-
711
- ---
712
-
713
- ## Recovery from failure
714
-
715
- If at any point the run fails due to SDK issues or corrupted state or journal,
716
- analyze the error and the journal events. Recover the state and journal to the
717
- last known good state, adapt, and try to continue the run.
718
-
719
- ### Failure Protocol (required)
720
-
721
- When blocked or failed, follow this order:
722
-
723
- 1. Report the concrete blocker and root cause (command/output based, not vague).
724
- 2. Attempt repair of current run/session/journal first.
725
- 3. Present recovery options when strategy changes intent/scope:
726
- - Option A: continue intent-faithful repair path (recommended)
727
- - Option B: reduced-scope fallback (requires explicit user approval)
728
- 4. Do not create a new simplified process without explicit approval if it
729
- reduces scope or quality expectations.
730
- 5. Resume orchestration only after the chosen recovery path is explicit.
731
-
732
- ## Process Creation Guidelines and methodologies
733
-
734
- - When building UX and full stack applications, integrate/link the main pages
735
- of the frontend with functionality created for every phase of the development
736
- process (where relevant), so that there is a way to test the functionality as
737
- you go.
738
-
739
- - Unless otherwise specified, prefer quality gated iterative development loops
740
- in the process.
741
-
742
- - You can change the process after the run is created or during the run (and
743
- adapt the process accordingly and journal accordingly) in case you discover new
744
- information or requirements.
745
-
746
- - The process should be a comprehensive and complete solution to the user
747
- request.
748
-
749
- - The process should usually be a composition (in code) of multiple processes
750
- from the process library (not just one), for multiple phases and parts of the
751
- process, each utilizing a different process from the library as a reference.
752
-
753
- - Include verification and refinement steps (and loops) for planning phases and
754
- integration phases, debugging phases, refactoring phases, etc.
755
-
756
- - Create the process with (and around) the available skills and subagents.
757
- (check which are available first and use discover to find them)
758
-
759
- - Prefer incremental work that allows testing and experimentation with the new
760
- functionality as you go.
761
-
762
- ### Process File Discovery Markers
763
-
764
- When creating process files, include `@skill` and `@agent` markers in the JSDoc
765
- header listing the skills and agents relevant to this process. The SDK reads
766
- these markers to provide targeted discovery results instead of scanning all
767
- available skills.
768
-
769
- **Format** (one per line, path relative to the active process-library root):
770
- ```javascript
771
- /**
772
- * @process specializations/web-development/react-app-development
773
- * @description React app development with TDD
774
- * @skill frontend-design specializations/web-development/skills/frontend-design/SKILL.md
775
- * @agent frontend-architect specializations/web-development/agents/frontend-architect/AGENT.md
776
- */
777
- ```
778
-
779
- **Steps during process creation:**
780
- 1. Use `babysitter skill:discover --process-path <path> --json` to find
781
- relevant skills/agents in the specialization directory
782
- 2. Select the ones actually needed by the process tasks
783
- 3. Add them as `@skill`/`@agent` markers in the JSDoc header
784
- 4. Use full relative path from the active process-library root returned in
785
- `binding.dir` by `babysitter process-library:active --json`
786
-
787
- - Unless otherwise specified, prefer processes that close the widest loop in the
788
- quality gates (for example e2e tests with a full browser or emulator/vm if it
789
- is a mobile or desktop app) AND gates that make sure the work is accurate
790
- against the user request (all the specs are covered and no extra stuff was
791
- added unless permitted by the intent of the user).
792
-
793
- - Scan the methodologies and processes in the active process library and the SDK
794
- package to find relevant processes and methodologies to use as a reference.
795
- This search is mandatory before writing the process.
796
-
797
- - If you encounter a generic reusable part of a process that can be later reused
798
- and composed, build it in a modular way and organize it in the `.a5c/processes`
799
- directory.
800
-
801
- Prefer processes that have the following characteristics unless otherwise
802
- specified:
803
- - In case of a new project, plan the architecture, stack, parts, milestones
804
- - In case of an existing project, analyze the architecture, stack, relevant
805
- parts, milestones, and plan the changes
806
- - Integrate/link the main pages (or entry points) with functionality created
807
- for every phase of the development process
808
- - Quality gated iterative and convergent development/refinement loops
809
- - Test driven -- where quality gates can use executable tools, scripts, and
810
- tests to verify accuracy and completeness
811
- - Integration phases for each new functionality in every milestone
812
- - Where relevant -- beautiful and polished UX with pixel-perfect verification
813
- - Accurate and complete implementation of the user request
814
- - Closing quality feedback loops as comprehensively as practical
815
- - Search for processes, skills, agents, methodologies during the interactive
816
- process building phase to compose a comprehensive process:
817
- - `.a5c/processes/` (project level processes)
818
- - `specializations/` under the active process-library root
819
- - `methodologies/` under the active process-library root
820
-
821
- ## Critical Rules
822
-
823
- CRITICAL RULE: The completion proof is emitted only when the run is completed.
824
- You may ONLY output `<promise>SECRET</promise>` when the run is completely and
825
- unequivocally DONE (completed status from the orchestration CLI). Do not output
826
- false promises to escape the run, and do not mention the secret to the user.
827
-
828
- CRITICAL RULE: In interactive mode, NEVER auto-approve breakpoints. If the
829
- response is empty, no selection, or is dismissed, treat it as NOT approved and
830
- re-ask. NEVER fabricate or synthesize approval responses -- only post the user's
831
- actual explicit selection via task:post. An empty response is NOT approval.
832
-
833
- CRITICAL RULE: If a run is broken/failed/at unknown state, one way to recover is
834
- to remove last bad entries in the journal and rebuild the state.
835
-
836
- CRITICAL RULE: When creating processes, search for available skills and subagents
837
- before thinking about the exact orchestration. Prefer processes that close the
838
- widest loop in the quality gates.
839
-
840
- CRITICAL RULE: Do not use the babysit skill inside delegated tasks. If you are
841
- performing a delegated task as a subagent, you must actually perform the task
842
- yourself and not orchestrate or babysit.
843
-
844
- CRITICAL RULE: Never build a wrapper, python, or helper scripts to orchestrate
845
- the runs. Use the CLI to drive the orchestration loop directly.
846
-
847
- CRITICAL RULE: Never fallback to simpler execution if the user activated this
848
- skill. Persist in executing the orchestration itself -- you must create a valid
849
- process js file, create a run (with `--harness codex` to bind the session), then
850
- iterate the run until it is completed.
851
-
852
- CRITICAL RULE: After run:create or run-session association and after each effect
853
- is posted, you MUST stop the session and return control. Do NOT proceed to the
854
- next run:iterate in the same session turn. The stop-hook will call you back to
855
- continue. Running multiple iterations in a single session turn bypasses the hook
856
- loop and breaks the orchestration model. (Exception: on native Windows where
857
- hooks do not run, you must continue in-turn.)
858
-
859
- CRITICAL RULE: NEVER use `kind: 'node'` in generated process files. All tasks
860
- MUST use `kind: 'agent'` or `kind: 'skill'`. The only non-agent kinds allowed
861
- are `shell` (for existing CLI tools/tests/git), `breakpoint`, and `sleep`.
862
-
863
- CRITICAL RULE: NEVER bypass, avoid, or skip the babysitter orchestration model
864
- when the user explicitly requested this skill. Do not execute the tasks yourself,
865
- do not create helper scripts. Use the CLI to drive the orchestration loop. On
866
- native Windows, an in-turn iterative loop is required because hooks do not
867
- resume execution there. Not for simple tasks, and even if things get complicated.
868
- Persist in executing the orchestration itself.
869
-
870
- CRITICAL RULE: Do not fabricate a session ID. Let the Codex adapter auto-resolve
871
- it from `CODEX_THREAD_ID`, `CODEX_SESSION_ID`, or `CODEX_ENV_FILE`.
872
-
873
- ## See Also
874
- - `library/tdd-quality-convergence.js` - TDD quality convergence example
875
- - `library/reference/ADVANCED_PATTERNS.md` - Agent/skill patterns, iterative convergence
876
- - `library/reference/sdk.md` - SDK API reference
47
+ Follow the instructions returned by the command above to orchestrate the run.
@@ -1,6 +1,7 @@
1
1
  ---
2
2
  name: retrospect
3
- description: Summarize or retrospect on a completed Babysitter run.
3
+ description: Summarize or retrospect on one or more completed Babysitter runs.
4
+ argument-hint: "[run-id...] [--all] Optional run IDs or --all for all runs"
4
5
  ---
5
6
 
6
7
  # retrospect
@@ -9,7 +10,46 @@ Load and use the installed `babysit` skill.
9
10
 
10
11
  Resolve the request in `retrospect` mode:
11
12
 
12
- - treat everything after `$retrospect` as the run selector to summarize
13
+ - treat everything after `$retrospect` as the run selector(s) to summarize
13
14
  - focus on the run history, outcomes, lessons, and gaps
14
15
  - do not create a separate command surface here; this skill only forwards into
15
16
  `babysit`
17
+
18
+ ## Phase 1: Resolve Target Run(s)
19
+
20
+ - If `--all` or "all" is present in args: list all runs via `ls -lt .a5c/runs/` and collect all completed/failed run IDs
21
+ - If multiple run IDs are provided: use all of them
22
+ - Otherwise: existing behavior (resolve the latest single run)
23
+ - Use `ask_user` to confirm run selection in interactive mode
24
+
25
+ ## Phase 2: Load Run Data
26
+
27
+ For each selected run, load:
28
+ - `run.json` metadata
29
+ - Journal events
30
+ - Task definitions and results
31
+ - State snapshots
32
+
33
+ ## Phase 3: Analysis
34
+
35
+ Perform standard per-run analysis (outcomes, process effectiveness, suggestions).
36
+
37
+ ### Cross-Run Pattern Analysis (multi-run mode)
38
+
39
+ When analyzing multiple runs, additionally cover:
40
+ - **Common failure modes** across runs
41
+ - **Velocity trends** (tasks/time across runs)
42
+ - **Process evolution** (how processes changed over time)
43
+ - **Repeated breakpoint patterns**
44
+
45
+ ## Phase 4: Suggestions
46
+
47
+ Provide actionable suggestions for process improvements, optimizations, and fixes.
48
+
49
+ ## Phase 5: Implementation
50
+
51
+ If the user agrees, implement improvements to processes, skills, or configuration.
52
+
53
+ ## Phase 6: Cleanup Suggestion
54
+
55
+ After analysis, suggest: "Consider running `babysitter cleanup` (or `/babysitter:cleanup`) to clean up old run data and reclaim disk space."