@a5c-ai/babysitter-codex 0.1.6-staging.f4d4d1df → 0.1.7-staging.347227ea

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  {
2
- "name": "babysitter-codex",
2
+ "name": "babysitter",
3
3
  "version": "0.1.5",
4
4
  "description": "Babysitter orchestration plugin for Codex with skill entrypoints and lifecycle hooks.",
5
5
  "author": {
package/README.md CHANGED
@@ -50,9 +50,9 @@ The process library is fetched and bound through the SDK CLI in
50
50
 
51
51
  After `install --workspace`, the important files are:
52
52
 
53
- - `plugins/babysitter-codex/.codex-plugin/plugin.json`
54
- - `plugins/babysitter-codex/skills/babysit/SKILL.md`
55
- - `plugins/babysitter-codex/hooks.json`
53
+ - `plugins/babysitter/.codex-plugin/plugin.json`
54
+ - `plugins/babysitter/skills/babysit/SKILL.md`
55
+ - `plugins/babysitter/hooks.json`
56
56
  - `.codex/skills/`
57
57
  - `.codex/hooks/`
58
58
  - `.codex/hooks.json`
@@ -67,10 +67,10 @@ Verify the installed plugin bundle:
67
67
 
68
68
  ```bash
69
69
  npm ls -g @a5c-ai/babysitter-codex --depth=0
70
- test -f ~/.codex/plugins/babysitter-codex/.codex-plugin/plugin.json
71
- test -f ~/.codex/plugins/babysitter-codex/hooks.json
72
- test -f ~/.codex/plugins/babysitter-codex/hooks/babysitter-stop-hook.sh
73
- test -f ~/.codex/plugins/babysitter-codex/skills/babysit/SKILL.md
70
+ test -f ~/.codex/plugins/babysitter/.codex-plugin/plugin.json
71
+ test -f ~/.codex/plugins/babysitter/hooks.json
72
+ test -f ~/.codex/plugins/babysitter/hooks/babysitter-stop-hook.sh
73
+ test -f ~/.codex/plugins/babysitter/skills/babysit/SKILL.md
74
74
  test -f ~/.codex/hooks.json
75
75
  test -f ~/.codex/hooks/babysitter-stop-hook.sh
76
76
  test -f ~/.codex/skills/babysit/SKILL.md
@@ -5,7 +5,7 @@ const os = require('os');
5
5
  const path = require('path');
6
6
  const { spawnSync } = require('child_process');
7
7
 
8
- const PLUGIN_NAME = 'babysitter-codex';
8
+ const PLUGIN_NAME = 'babysitter';
9
9
  const PLUGIN_CATEGORY = 'Coding';
10
10
  const LEGACY_SKILL_NAMES = [
11
11
  'babysit',
@@ -489,8 +489,8 @@ function warnWindowsHooks() {
489
489
  if (process.platform !== 'win32') {
490
490
  return;
491
491
  }
492
- console.warn('[babysitter-codex] Warning: Codex hooks are currently disabled on native Windows.');
493
- console.warn('[babysitter-codex] The plugin will install correctly, but SessionStart/UserPromptSubmit/Stop hooks will not fire until Codex enables Windows hook execution.');
492
+ console.warn('[babysitter] Warning: Codex hooks are currently disabled on native Windows.');
493
+ console.warn('[babysitter] The plugin will install correctly, but SessionStart/UserPromptSubmit/Stop hooks will not fire until Codex enables Windows hook execution.');
494
494
  }
495
495
 
496
496
  module.exports = {
package/bin/install.js CHANGED
@@ -21,7 +21,7 @@ function main() {
21
21
  const pluginRoot = getHomePluginRoot();
22
22
  const marketplacePath = getHomeMarketplacePath();
23
23
 
24
- console.log(`[babysitter-codex] Installing plugin to ${pluginRoot}`);
24
+ console.log(`[babysitter] Installing plugin to ${pluginRoot}`);
25
25
 
26
26
  try {
27
27
  copyPluginBundle(PACKAGE_ROOT, pluginRoot);
@@ -30,17 +30,17 @@ function main() {
30
30
  installCodexSurface(PACKAGE_ROOT, codexHome);
31
31
 
32
32
  const active = ensureGlobalProcessLibrary(PACKAGE_ROOT);
33
- console.log(`[babysitter-codex] marketplace: ${marketplacePath}`);
34
- console.log(`[babysitter-codex] process library: ${active.binding?.dir}`);
33
+ console.log(`[babysitter] marketplace: ${marketplacePath}`);
34
+ console.log(`[babysitter] process library: ${active.binding?.dir}`);
35
35
  if (active.defaultSpec?.cloneDir) {
36
- console.log(`[babysitter-codex] process library clone: ${active.defaultSpec.cloneDir}`);
36
+ console.log(`[babysitter] process library clone: ${active.defaultSpec.cloneDir}`);
37
37
  }
38
- console.log(`[babysitter-codex] process library state: ${active.stateFile}`);
38
+ console.log(`[babysitter] process library state: ${active.stateFile}`);
39
39
  warnWindowsHooks();
40
- console.log('[babysitter-codex] Installation complete!');
41
- console.log('[babysitter-codex] Restart Codex to pick up the installed plugin and config changes.');
40
+ console.log('[babysitter] Installation complete!');
41
+ console.log('[babysitter] Restart Codex to pick up the installed plugin and config changes.');
42
42
  } catch (err) {
43
- console.error(`[babysitter-codex] Failed to install plugin: ${err.message}`);
43
+ console.error(`[babysitter] Failed to install plugin: ${err.message}`);
44
44
  process.exitCode = 1;
45
45
  }
46
46
  }
package/bin/uninstall.js CHANGED
@@ -19,10 +19,10 @@ function main() {
19
19
  if (fs.existsSync(pluginRoot)) {
20
20
  try {
21
21
  fs.rmSync(pluginRoot, { recursive: true, force: true });
22
- console.log(`[babysitter-codex] Removed ${pluginRoot}`);
22
+ console.log(`[babysitter] Removed ${pluginRoot}`);
23
23
  removedPlugin = true;
24
24
  } catch (err) {
25
- console.warn(`[babysitter-codex] Warning: Could not remove plugin directory ${pluginRoot}: ${err.message}`);
25
+ console.warn(`[babysitter] Warning: Could not remove plugin directory ${pluginRoot}: ${err.message}`);
26
26
  }
27
27
  }
28
28
 
@@ -30,11 +30,11 @@ function main() {
30
30
  removeLegacyCodexSurface(codexHome);
31
31
 
32
32
  if (!removedPlugin) {
33
- console.log('[babysitter-codex] Plugin directory not found, legacy Codex surface cleaned if present.');
33
+ console.log('[babysitter] Plugin directory not found, legacy Codex surface cleaned if present.');
34
34
  return;
35
35
  }
36
36
 
37
- console.log('[babysitter-codex] Restart Codex to complete uninstallation.');
37
+ console.log('[babysitter] Restart Codex to complete uninstallation.');
38
38
  }
39
39
 
40
40
  main();
@@ -11,16 +11,23 @@ export CODEX_PLUGIN_ROOT="${CODEX_PLUGIN_ROOT:-${PLUGIN_ROOT}}"
11
11
  export BABYSITTER_STATE_DIR="${STATE_DIR}"
12
12
 
13
13
  mkdir -p "$LOG_DIR" 2>/dev/null
14
- {
15
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) Hook script invoked"
16
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) PLUGIN_ROOT=$PLUGIN_ROOT"
17
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) STATE_DIR=$STATE_DIR"
18
- } >> "$LOG_FILE" 2>/dev/null
14
+
15
+ blog() {
16
+ local msg="$1"
17
+ local ts
18
+ ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
19
+ echo "[INFO] $ts $msg" >> "$LOG_FILE" 2>/dev/null
20
+ babysitter log --type hook --label "hook:session-start" --message "$msg" --source shell-hook 2>/dev/null || true
21
+ }
22
+
23
+ blog "Hook script invoked"
24
+ blog "PLUGIN_ROOT=$PLUGIN_ROOT"
25
+ blog "STATE_DIR=$STATE_DIR"
19
26
 
20
27
  INPUT_FILE=$(mktemp 2>/dev/null || echo "/tmp/codex-session-start-hook-$$.json")
21
28
  cat > "$INPUT_FILE"
22
29
 
23
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) Hook input received ($(wc -c < "$INPUT_FILE") bytes)" >> "$LOG_FILE" 2>/dev/null
30
+ blog "Hook input received ($(wc -c < "$INPUT_FILE") bytes)"
24
31
 
25
32
  RESULT=$(babysitter hook:run \
26
33
  --hook-type session-start \
@@ -30,7 +37,7 @@ RESULT=$(babysitter hook:run \
30
37
  < "$INPUT_FILE" 2>"$LOG_DIR/babysitter-session-start-hook-stderr.log")
31
38
  EXIT_CODE=$?
32
39
 
33
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) CLI exit code=$EXIT_CODE" >> "$LOG_FILE" 2>/dev/null
40
+ blog "CLI exit code=$EXIT_CODE"
34
41
 
35
42
  rm -f "$INPUT_FILE" 2>/dev/null
36
43
  printf '%s\n' "$RESULT"
@@ -11,16 +11,23 @@ export CODEX_PLUGIN_ROOT="${CODEX_PLUGIN_ROOT:-${PLUGIN_ROOT}}"
11
11
  export BABYSITTER_STATE_DIR="${STATE_DIR}"
12
12
 
13
13
  mkdir -p "$LOG_DIR" 2>/dev/null
14
- {
15
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) Hook script invoked"
16
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) PLUGIN_ROOT=$PLUGIN_ROOT"
17
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) STATE_DIR=$STATE_DIR"
18
- } >> "$LOG_FILE" 2>/dev/null
14
+
15
+ blog() {
16
+ local msg="$1"
17
+ local ts
18
+ ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
19
+ echo "[INFO] $ts $msg" >> "$LOG_FILE" 2>/dev/null
20
+ babysitter log --type hook --label "hook:stop" --message "$msg" --source shell-hook 2>/dev/null || true
21
+ }
22
+
23
+ blog "Hook script invoked"
24
+ blog "PLUGIN_ROOT=$PLUGIN_ROOT"
25
+ blog "STATE_DIR=$STATE_DIR"
19
26
 
20
27
  INPUT_FILE=$(mktemp 2>/dev/null || echo "/tmp/codex-stop-hook-$$.json")
21
28
  cat > "$INPUT_FILE"
22
29
 
23
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) Hook input received ($(wc -c < "$INPUT_FILE") bytes)" >> "$LOG_FILE" 2>/dev/null
30
+ blog "Hook input received ($(wc -c < "$INPUT_FILE") bytes)"
24
31
 
25
32
  RESULT=$(babysitter hook:run \
26
33
  --hook-type stop \
@@ -30,7 +37,7 @@ RESULT=$(babysitter hook:run \
30
37
  < "$INPUT_FILE" 2>"$LOG_DIR/babysitter-stop-hook-stderr.log")
31
38
  EXIT_CODE=$?
32
39
 
33
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) CLI exit code=$EXIT_CODE" >> "$LOG_FILE" 2>/dev/null
40
+ blog "CLI exit code=$EXIT_CODE"
34
41
 
35
42
  rm -f "$INPUT_FILE" 2>/dev/null
36
43
  printf '%s\n' "$RESULT"
@@ -5,22 +5,16 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
5
5
  PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
6
6
  STATE_DIR="${BABYSITTER_STATE_DIR:-${PWD}/.a5c}"
7
7
  LOG_DIR="${BABYSITTER_LOG_DIR:-$PLUGIN_ROOT/.a5c/logs}"
8
- LOG_FILE="$LOG_DIR/babysitter-user-prompt-submit-hook.log"
9
8
 
10
9
  export CODEX_PLUGIN_ROOT="${CODEX_PLUGIN_ROOT:-${PLUGIN_ROOT}}"
11
10
  export BABYSITTER_STATE_DIR="${STATE_DIR}"
12
11
 
13
12
  mkdir -p "$LOG_DIR" 2>/dev/null
14
- {
15
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) Hook script invoked"
16
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) PLUGIN_ROOT=$PLUGIN_ROOT"
17
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) STATE_DIR=$STATE_DIR"
18
- } >> "$LOG_FILE" 2>/dev/null
19
13
 
20
14
  INPUT_FILE=$(mktemp 2>/dev/null || echo "/tmp/codex-user-prompt-submit-hook-$$.json")
21
15
  cat > "$INPUT_FILE"
22
16
 
23
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) Hook input received ($(wc -c < "$INPUT_FILE") bytes)" >> "$LOG_FILE" 2>/dev/null
17
+ babysitter log --type hook --label "hook:user-prompt-submit" --message "Hook invoked" --source shell-hook 2>/dev/null || true
24
18
 
25
19
  RESULT=$(babysitter hook:run \
26
20
  --hook-type user-prompt-submit \
@@ -30,7 +24,7 @@ RESULT=$(babysitter hook:run \
30
24
  < "$INPUT_FILE" 2>"$LOG_DIR/babysitter-user-prompt-submit-hook-stderr.log")
31
25
  EXIT_CODE=$?
32
26
 
33
- echo "[INFO] $(date -u +%Y-%m-%dT%H:%M:%SZ) CLI exit code=$EXIT_CODE" >> "$LOG_FILE" 2>/dev/null
27
+ babysitter log --type hook --label "hook:user-prompt-submit" --message "CLI exit code=$EXIT_CODE" --source shell-hook 2>/dev/null || true
34
28
 
35
29
  rm -f "$INPUT_FILE" 2>/dev/null
36
30
  if [ -n "$RESULT" ]; then
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@a5c-ai/babysitter-codex",
3
- "version": "0.1.6-staging.f4d4d1df",
3
+ "version": "0.1.7-staging.347227ea",
4
4
  "description": "Babysitter Codex skill bundle and integration package for OpenAI Codex CLI with SDK-managed process-library bootstrapping, 15 orchestration modes, and BOM-safe SKILL installation",
5
5
  "scripts": {
6
6
  "test": "node test/integration.test.js && node test/packaged-install.test.js",
@@ -43,6 +43,6 @@
43
43
  },
44
44
  "homepage": "https://github.com/a5c-ai/babysitter/tree/main/plugins/babysitter-codex#readme",
45
45
  "dependencies": {
46
- "@a5c-ai/babysitter-sdk": "0.0.183-staging.f4d4d1df"
46
+ "@a5c-ai/babysitter-sdk": "0.0.184-staging.347227ea"
47
47
  }
48
48
  }
@@ -32,7 +32,7 @@ function main() {
32
32
  const args = parseArgs(process.argv);
33
33
  const packageRoot = path.resolve(process.env.BABYSITTER_PACKAGE_ROOT || path.join(__dirname, '..'));
34
34
  const workspaceRoot = args.workspace;
35
- const workspacePluginRoot = path.join(workspaceRoot, 'plugins', 'babysitter-codex');
35
+ const workspacePluginRoot = path.join(workspaceRoot, 'plugins', 'babysitter');
36
36
  const workspaceMarketplacePath = path.join(workspaceRoot, '.agents', 'plugins', 'marketplace.json');
37
37
  const workspaceConfigPath = path.join(workspaceRoot, '.codex', 'config.toml');
38
38
 
@@ -8,791 +8,40 @@ description: >-
8
8
 
9
9
  # babysit
10
10
 
11
- Orchestrate `.a5c/runs/<runId>/` through iterative execution. Use the SDK CLI to drive the orchestration loop.
12
-
13
- Native Windows caveat: Codex does not execute hooks on Windows yet. When
14
- running on native Windows, do not yield the turn and wait for the Stop hook.
15
- Keep driving the Babysitter loop in the current turn until the run completes or
16
- you hit a real user breakpoint that requires chat input.
17
-
18
- ## Non-Negotiables
19
-
20
- - Preserve user intent over speed. Never optimize for "get any completion proof"
21
- at the expense of the requested scope, quality, or process style.
22
- - Do not create temporary/minimal throwaway processes to force completion unless
23
- the user explicitly approves a reduced-scope recovery path.
24
- - If blocked, repair the current run/session first. Only switch strategy after
25
- stating the blocker and getting user approval when the new strategy changes
26
- intent or scope.
27
- - Use the Babysitter orchestration model end-to-end. Do not bypass with ad-hoc
28
- execution, fake outputs, or side workflows that are not represented as tasks.
11
+ Orchestrate `.a5c/runs/<runId>/` through iterative execution.
29
12
 
30
13
  ## Dependencies
31
14
 
32
15
  ### Babysitter SDK and CLI
33
16
 
34
- Use the installed CLI alias:
17
+ Read the SDK version from `versions.json` to ensure version compatibility:
35
18
 
36
19
  ```bash
37
- CLI="babysitter"
20
+ SDK_VERSION=$(node -e "try{console.log(JSON.parse(require('fs').readFileSync('${CODEX_PLUGIN_ROOT}/versions.json','utf8')).sdkVersion||'latest')}catch{console.log('latest')}")
21
+ sudo npm i -g @a5c-ai/babysitter-sdk@$SDK_VERSION
22
+ # sudo is depending on the env and system
38
23
  ```
39
24
 
40
- If it is not available on the path, use:
25
+ then use the CLI alias: CLI="babysitter"
41
26
 
42
- ```bash
43
- CLI="npx -y @a5c-ai/babysitter-sdk"
44
- ```
27
+ **Alternatively, use the CLI alias:** `CLI="npx -y @a5c-ai/babysitter-sdk@$SDK_VERSION"`
45
28
 
46
29
  ### jq
47
30
 
48
- Make sure `jq` is available in the path. Install it if missing.
49
-
50
- ---
51
-
52
- ## Core Iteration Workflow
53
-
54
- The Babysitter workflow has 8 steps:
55
-
56
- 1. **Create or find the process** - interview the user or parse the prompt,
57
- research the repo and process library, and build a process definition
58
- 2. **Create run and bind session** - create the run via the Babysitter CLI and
59
- bind it to the current Codex session
60
- 3. **Run iteration** - execute one orchestration step
61
- 4. **Get effects** - inspect pending effects
62
- 5. **Perform effects** - execute the requested tasks through skills, agents, or
63
- shell work
64
- 6. **Post results** - commit results back through `task:post`
65
- 7. **Stop and yield** - the Codex stop hook decides whether to continue (on
66
- Windows, stay in-turn and continue the loop yourself instead)
67
- 8. **Completion proof** - finish only when the emitted proof is returned
68
-
69
- ### 1. Create or find the process for the run
70
-
71
- #### Interview phase
72
-
73
- ##### Interactive mode (default)
74
-
75
- Interview the user for the intent, requirements, goal, scope, etc.
76
-
77
- A multi-step phase to understand the intent and perspective to approach the
78
- process building after researching the repo, short research online if needed,
79
- short research in the target repo, additional instructions, intent and library
80
- (processes, specializations, skills, subagents, methodologies, references, etc.)
81
- / guide for methodology building. You MUST resolve the active library root with
82
- `babysitter process-library:active --json` before process authoring, and you MUST
83
- conduct an actual search against that active process library instead of skipping
84
- directly to writing a process. The `process-library:active` command bootstraps
85
- the shared global SDK process library automatically if no binding exists yet.
86
- Read `binding.dir` from the returned JSON to get the active process-library root
87
- that must be searched. If you need the cloned repo root itself, read
88
- `defaultSpec.cloneDir` from the same JSON. After that, treat
89
- `specializations/**/**/**`, `methodologies/`, `contrib/`, and `reference/` as
90
- paths relative to `binding.dir`.
91
-
92
- The first step should be to look at the state of the repo, then find the most
93
- relevant processes, specializations, skills, subagents, methodologies,
94
- references, etc. to use as a reference. Use the babysitter CLI discover command
95
- to find the relevant processes, skills, subagents, etc. at various stages.
96
-
97
- Then this phase can have: research online, research the repo, user questions, and
98
- other steps one after the other until the intent, requirements, goal, scope, etc.
99
- are clear and the user is satisfied with the understanding. After each step,
100
- decide the type of next step to take. Do not plan more than 1 step ahead in this
101
- phase. The same step type can be used more than once in this phase.
102
-
103
- ##### Non-interactive mode (running with -p flag or no AskUserQuestion tool)
104
-
105
- When running non-interactively, skip the interview phase entirely. Instead:
106
-
107
- 1. Parse the initial prompt to extract intent, scope, and requirements.
108
- 2. Research the repo structure to understand the codebase.
109
- 3. Resolve the active process-library root with
110
- `babysitter process-library:active --json`, then search that active library
111
- for the most relevant specialization/methodology. Do not skip this search
112
- step.
113
- 4. Proceed directly to the process creation phase using the extracted
114
- requirements.
115
-
116
- #### User Profile Integration
117
-
118
- Before building the process, check for an existing user profile to personalize
119
- the orchestration:
120
-
121
- 1. **Read user profile**: Run `babysitter profile:read --user --json` to load
122
- the user profile. **Always use the CLI for profile operations -- never import
123
- or call SDK profile functions directly.**
124
-
125
- 2. **Pre-fill context**: Use the profile to understand the user's specialties,
126
- expertise levels, preferences, and communication style. This informs how you
127
- conduct the interview (skip questions the profile already answers) and how you
128
- build the process.
129
-
130
- 3. **Breakpoint density**: Use the `breakpointTolerance` field to calibrate
131
- breakpoint placement in the generated process:
132
- - `minimal`/`low` (expert users): Fewer breakpoints -- only at critical
133
- decision points (architecture choices, deployment, destructive operations)
134
- - `moderate` (intermediate users): Standard breakpoints at phase boundaries
135
- - `high`/`maximum` (novice users): More breakpoints -- add review gates after
136
- each implementation step, before each integration, and at every quality gate
137
- - Always respect `alwaysBreakOn` for operations that must always pause (e.g.,
138
- destructive-git, deploy)
139
- - If `skipBreakpointsForKnownPatterns` is true, reduce breakpoints for
140
- operations the user has previously approved
141
-
142
- 4. **Tool preferences**: Use `toolPreferences` and `installedSkills`/
143
- `installedAgents` to prioritize which agents and skills to use in the process.
144
- Prefer tools the user is familiar with.
145
-
146
- 5. **Communication style**: Adapt process descriptions and breakpoint questions
147
- to match the user's `communicationStyle` preferences (tone, explanationDepth,
148
- preferredResponseFormat).
149
-
150
- 6. **If no profile exists**: Proceed normally with the interview phase.
151
-
152
- 7. **CLI profile commands (mandatory)**: **All profile operations MUST use the
153
- babysitter CLI -- never import SDK profile functions directly.**
154
- - `babysitter profile:read --user --json`
155
- - `babysitter profile:read --project --json`
156
- - `babysitter profile:write --user --input <file> --json`
157
- - `babysitter profile:write --project --input <file> --json`
158
- - `babysitter profile:merge --user --input <file> --json`
159
- - `babysitter profile:merge --project --input <file> --json`
160
- - `babysitter profile:render --user`
161
- - `babysitter profile:render --project`
162
-
163
- Use `--dir <dir>` to override the default profile directory when needed.
164
-
165
- #### Process creation phase
166
-
167
- After the interview phase, create the complete custom process files (js and
168
- jsons) for the run according to the Process Creation Guidelines and
169
- methodologies section. Also install the babysitter-sdk inside `.a5c/` if it is
170
- not already installed. **IMPORTANT**: When installing into `.a5c/`, use
171
- `npm i --prefix .a5c @a5c-ai/babysitter-sdk` or a subshell
172
- `(cd .a5c && npm i @a5c-ai/babysitter-sdk)` to avoid leaving CWD inside
173
- `.a5c/`, which causes doubled path resolution bugs.
174
-
175
- You must abide the syntax and structure of the process files from the process
176
- library.
177
-
178
- **IMPORTANT -- Path resolution**: Always use **absolute paths** for `--entry`
179
- when calling `run:create`, and always run the CLI from the **project root**
180
- directory (not from `.a5c/`).
181
-
182
- **User profile awareness**: If a user profile was loaded in the User Profile
183
- Integration step, use it to inform process design -- adjust breakpoint density
184
- per the user's tolerance level, select agents/skills the user prefers, and match
185
- the process complexity to the user's expertise.
186
-
187
- **IMPORTANT -- Profile I/O in processes**: When generating process files, all
188
- profile read/write/merge operations MUST use the babysitter CLI commands
189
- (`babysitter profile:read`, `profile:write`, `profile:merge`,
190
- `profile:render`). Never instruct agents to import or call SDK profile functions
191
- directly.
192
-
193
- After the process is created and before creating the run:
194
-
195
- - **Interactive mode**: describe the process at high level (not the code or
196
- implementation details) to the user and ask for confirmation to use it, also
197
- generate it as a [process-name].diagram.md and [process-name].process.md file.
198
- If the user is not satisfied with the process, go back to the process creation
199
- phase and modify the process according to the feedback.
200
- - **Non-interactive mode**: proceed directly to creating the run without user
201
- confirmation.
202
-
203
- #### Intent Fidelity Checks (required before `run:create`)
204
-
205
- Before calling `run:create`, verify and document in your working notes:
206
-
207
- 1. The process scope matches the user prompt (no silent scope cuts).
208
- 2. The process structure follows library style/composition patterns rather than
209
- a one-off minimal flow.
210
- 3. Quality gates exist (verification/refinement loops, integration checks,
211
- and/or breakpoints appropriate for the task).
212
- 4. Any scope reduction, simplification, or recovery tradeoff is explicitly
213
- approved by the user before execution.
214
-
215
- If any check fails, do not call `run:create` yet; fix the process or ask the
216
- user for approval of the tradeoff.
217
-
218
- **Common mistakes to avoid:**
219
- - wrong: skipping repo/process-library research before writing the process
220
- - wrong: bypassing the orchestration model with helper scripts or inline logic
221
- - wrong: using `kind: 'node'` in generated tasks
222
- - correct: use `agent` or `skill` tasks for reasoning work, with `shell` only
223
- for existing CLIs, tests, linters, git, or builds
224
- - correct: include verification loops, refinement loops, quality gates, and
225
- breakpoints where appropriate
226
-
227
- ### 2. Create run and bind session (single command):
228
-
229
- **For new runs:**
230
-
231
- ```bash
232
- $CLI run:create \
233
- --process-id <id> \
234
- --entry <absolute-path>#<export> \
235
- --inputs <file> \
236
- --prompt "$PROMPT" \
237
- --harness codex \
238
- --state-dir .a5c \
239
- --plugin-root "${CODEX_PLUGIN_ROOT}" \
240
- --json
241
- ```
242
-
243
- **Required flags:**
244
- - `--process-id <id>` -- unique identifier for the process definition
245
- - `--entry <absolute-path>#<export>` -- path to the process JS file and its
246
- named export (e.g., `./my-process.js#process`)
247
- - `--prompt "$PROMPT"` -- the user's initial prompt/request text
248
- - `--harness codex` -- activates Codex session binding. The session ID is
249
- auto-resolved from `CODEX_THREAD_ID`, `CODEX_SESSION_ID`, or `CODEX_ENV_FILE`.
250
- - `--state-dir .a5c` -- required for honest workspace-local session state
251
- - `--plugin-root "${CODEX_PLUGIN_ROOT}"` -- plugin root for state resolution
252
-
253
- **Optional flags:**
254
- - `--inputs <file>` -- path to a JSON file with process inputs
255
- - `--run-id <id>` -- override auto-generated run ID
256
- - `--runs-dir <dir>` -- override runs directory (default: `.a5c/runs`)
257
-
258
- Do **not** pass `--session-id` explicitly inside a real Codex session. The Codex
259
- adapter auto-resolves the session/thread ID from environment variables. Only pass
260
- `--session-id` in out-of-band recovery flows.
261
-
262
- **Common mistakes to avoid:**
263
- - wrong: Calling `session:init` explicitly
264
- - wrong: Fabricating a session ID when none is available from the environment
265
- - wrong: Trying to bind the session in a separate step after run creation
266
- - correct: Using `--harness codex` with `run:create` to create the run AND
267
- auto-bind the session, relying on environment variables for honest session
268
- binding
269
-
270
- **For resuming existing runs:**
271
-
272
- ```bash
273
- $CLI session:resume \
274
- --session-id <id> \
275
- --state-dir .a5c \
276
- --run-id <runId> --runs-dir .a5c/runs --json
277
- ```
278
-
279
- ### 3. Run Iteration
280
-
281
- ```bash
282
- $CLI run:iterate .a5c/runs/<runId> --json --iteration <n> --plugin-root "${CODEX_PLUGIN_ROOT}"
283
- ```
284
-
285
- **Output:**
286
- ```json
287
- {
288
- "iteration": 1,
289
- "status": "executed|waiting|completed|failed|none",
290
- "action": "executed-tasks|waiting|none",
291
- "reason": "auto-runnable-tasks|breakpoint-waiting|terminal-state",
292
- "count": 3,
293
- "completionProof": "only-present-when-completed",
294
- "metadata": { "runId": "...", "processId": "..." }
295
- }
296
- ```
297
-
298
- **Status values:**
299
- - `"executed"` - Tasks executed, continue looping
300
- - `"waiting"` - Breakpoint/sleep, pause until released
301
- - `"completed"` - Run finished successfully
302
- - `"failed"` - Run failed with error
303
- - `"none"` - No pending effects
304
-
305
- **Common mistake to avoid:**
306
- - wrong: Calling run:iterate, performing the effect, posting the result,
307
- then calling run:iterate again in the same session
308
- - correct: Calling run:iterate, performing the effect, posting the result,
309
- then STOPPING the session so the hook triggers the next iteration
310
- (except on Windows, where you must continue in-turn)
311
-
312
- ### 4. Get Effects
313
-
314
- ```bash
315
- $CLI task:list .a5c/runs/<runId> --pending --json
316
- ```
317
-
318
- **Output:**
319
- ```json
320
- {
321
- "tasks": [
322
- {
323
- "effectId": "effect-abc123",
324
- "kind": "agent|skill|breakpoint",
325
- "label": "auto",
326
- "status": "requested"
327
- }
328
- ]
329
- }
330
- ```
331
-
332
- ### 5. Perform Effects
333
-
334
- Run the effect externally to the SDK (by you, your hook, or another worker).
335
- After execution (by delegation to an agent or skill), post the outcome summary
336
- into the run by calling `task:post`, which:
337
- - Writes the committed result to `tasks/<effectId>/result.json`
338
- - Appends an `EFFECT_RESOLVED` event to the journal
339
- - Updates the state cache
340
-
341
- IMPORTANT:
342
- - Delegate using the Task tool if possible.
343
- - Make sure the change was actually performed and not described or implied.
344
- (for example, if code files were mentioned as created in the summary, make
345
- sure they were actually created.)
346
- - Include in the instructions to the agent or skill to perform the task in
347
- full and return only the summary result in the requested schema.
348
-
349
- #### 5.1 Breakpoint Handling
350
-
351
- ##### 5.1.0 Mode Detection and Breakpoint Policy
352
-
353
- - If the user is present in chat, default to interactive breakpoint handling.
354
- - Use non-interactive handling only when execution context is explicitly
355
- non-interactive (for example no question tool / explicit non-interactive run).
356
- - Never auto-approve breakpoints when mode is ambiguous. Treat ambiguity as
357
- interactive and ask explicitly.
358
- - Any mode switch that changes approval behavior must be stated explicitly in
359
- the run notes.
360
-
361
- ##### 5.1.1 Interactive mode
362
-
363
- Ask the user explicitly for approval. Include explicit approve/reject options
364
- so the user's intent is unambiguous.
365
-
366
- **CRITICAL: Response validation rules:**
367
- - If the response is empty, no selection, or dismissed: treat as **NOT
368
- approved**. Re-ask the question or keep the breakpoint pending. Do NOT
369
- proceed.
370
- - NEVER fabricate, synthesize, or infer approval text. Only pass through the
371
- user's actual selected response verbatim.
372
- - NEVER assume approval from ambiguous, empty, or missing responses. When in
373
- doubt, the answer is "not approved".
374
-
375
- **CRITICAL: Breakpoint rejection posting rules:**
376
- - Breakpoint rejection MUST be posted with `--status ok` and a value of
377
- `{"approved": false, "response": "..."}`. NEVER use `--status error` for a
378
- user rejection -- that signals a task execution failure and will trigger
379
- `RUN_FAILED`, requiring manual journal surgery to recover.
380
- - Only use `--status error` if the question tool itself throws an error.
381
-
382
- **Breakpoint posting examples:**
383
-
384
- ```bash
385
- # User approved the breakpoint
386
- echo '{"approved": true, "response": "Looks good, proceed"}' > tasks/<effectId>/output.json
387
- $CLI task:post <runId> <effectId> --status ok --value tasks/<effectId>/output.json
388
-
389
- # User rejected the breakpoint (ALWAYS use --status ok, not --status error)
390
- echo '{"approved": false, "response": "Stop here"}' > tasks/<effectId>/output.json
391
- $CLI task:post <runId> <effectId> --status ok --value tasks/<effectId>/output.json
392
- ```
393
-
394
- ##### 5.1.2 Non-interactive mode
395
-
396
- Choose the best option from context and post the result. Rejections still use
397
- `--status ok` with `{"approved": false}`.
398
-
399
- ### 6. Results Posting
400
-
401
- **IMPORTANT**: Do NOT write `result.json` directly. The SDK owns that file.
402
-
403
- **Workflow:**
404
-
405
- 1. Write the result **value** to a separate file (e.g., `output.json` or
406
- `value.json`):
407
- ```json
408
- {
409
- "score": 85,
410
- "details": { ... }
411
- }
412
- ```
413
-
414
- 2. Post the result, passing the value file:
415
- ```bash
416
- $CLI task:post .a5c/runs/<runId> <effectId> \
417
- --status ok \
418
- --value tasks/<effectId>/output.json \
419
- --json
420
- ```
421
-
422
- The `task:post` command will:
423
- - Read the value from your file
424
- - Write the complete `result.json` (including schema, metadata, and your value)
425
- - Append an `EFFECT_RESOLVED` event to the journal
426
- - Update the state cache
427
-
428
- **Available flags:**
429
- - `--status <ok|error>` (required)
430
- - `--value <file>` - Result value (for status=ok)
431
- - `--error <file>` - Error payload (for status=error)
432
- - `--stdout-file <file>` - Capture stdout
433
- - `--stderr-file <file>` - Capture stderr
434
- - `--started-at <iso8601>` - Task start time
435
- - `--finished-at <iso8601>` - Task end time
436
- - `--metadata <file>` - Additional metadata JSON
437
-
438
- **Common mistake to avoid:**
439
- ```bash
440
- # wrong: Writing result.json directly
441
- echo '{"result": {...}}' > tasks/<effectId>/result.json
442
- $CLI task:post <runId> <effectId> --status ok
443
-
444
- # correct: Write value to separate file, let SDK create result.json
445
- echo '{"score": 85}' > tasks/<effectId>/output.json
446
- $CLI task:post <runId> <effectId> --status ok --value tasks/<effectId>/output.json
447
- ```
448
-
449
- **Breakpoint-specific posting:**
450
-
451
- ```bash
452
- # Breakpoint approval
453
- echo '{"approved": true, "response": "User approved"}' > tasks/<effectId>/output.json
454
- $CLI task:post <runId> <effectId> --status ok --value tasks/<effectId>/output.json
455
-
456
- # Breakpoint rejection (ALWAYS use --status ok, not --status error)
457
- echo '{"approved": false, "response": "User rejected"}' > tasks/<effectId>/output.json
458
- $CLI task:post <runId> <effectId> --status ok --value tasks/<effectId>/output.json
459
- ```
460
-
461
- ---
462
-
463
- ### 7. STOP after every phase after run-session association
464
-
465
- After `run:create` or any posted effect result, end the current turn and yield
466
- back to the Codex hook loop. The stop hook drives the loop, not you. Do not run
467
- multiple `run:iterate` steps in the same turn.
468
-
469
- **Native Windows exception**: because Codex does not execute hooks on Windows
470
- yet, do not end the turn and wait for the hook loop there. Continue with the
471
- next `run:iterate`, inspect tasks, perform effects, and post results in the same
472
- turn until the run completes or you need explicit user input for a breakpoint.
473
-
474
- Common mistakes to avoid:
475
- - wrong: Stopping the session before run-session association
476
- - correct: Stopping the session after run-session association, then after every
477
- iteration, letting the hook call you back to continue the loop until
478
- completion.
479
- - wrong: Calling run:iterate multiple times in the same session without stopping
480
- and letting the hook call you back (except on Windows where this is required).
481
- - correct: Calling run:iterate once, then stopping and letting the hook call you
482
- back for the next iteration until completion.
483
-
484
- On native Windows, an in-turn loop across `run:iterate` -> `task:list` ->
485
- effect execution -> `task:post` is expected and valid because hooks do not
486
- resume the session. This is not considered bypassing the orchestration model as
487
- long as each iteration/effect is handled through the CLI and run journal.
488
-
489
- ### 8. Completion Proof
31
+ make sure you have jq installed and available in the path. if not, install it.
490
32
 
491
- When the run is completed, the CLI will emit a `completionProof` value in the
492
- output of `run:iterate` and `run:status`. You must return that exact value
493
- wrapped in a `<promise>...</promise>` tag to signal completion to the stop-hook
494
- loop.
33
+ ## Instructions
495
34
 
496
- ### Progressing Between The Phases
35
+ Run the following command to get full orchestration instructions:
497
36
 
498
- After you run `run:create`, progress through the phases above by stopping the
499
- session, which will trigger the stop-hook and call you with the next phase.
500
- After you finish each phase after `run:create`, stop the session and return
501
- control to the user until you are called again by the hook.
502
-
503
- ## Task Kinds
504
-
505
- **CRITICAL RULE: NEVER use `node` kind effects in generated processes.** The
506
- `node` kind executes inline Node.js scripts which bypasses the agent
507
- orchestration model entirely. Every task that would be a `node` effect MUST be
508
- converted to an `agent` or `skill` effect instead. The only acceptable
509
- non-agent kinds are `shell` (for running existing test suites, git commands, or
510
- build tools), `breakpoint` (human approval), and `sleep` (time gates).
511
-
512
- | Kind | Description | Executor | When to use |
513
- |------|-------------|----------|-------------|
514
- | ~~`node`~~ | ~~Node.js script~~ | ~~Local node process~~ | **NEVER -- forbidden. Convert to `agent` or `skill`.** |
515
- | `shell` | Shell command | Local shell process | Only for running existing CLI tools, test suites, git, linters, builds |
516
- | `agent` | LLM agent | Agent runtime | **Default for all tasks** -- planning, implementation, analysis, verification, scoring, debugging, code writing, research |
517
- | `skill` | Installed skill | Skill system | When a matching installed skill exists (preferred over agent when available) |
518
- | `breakpoint` | Human approval | UI/CLI | Decision gates requiring user input |
519
- | `sleep` | Time gate | Scheduler | Time-based pauses |
520
-
521
- ### Agent Task Example
522
-
523
- Important: Check which subagents and agents are actually available before
524
- assigning the name. If none, pass the general-purpose subagent. Check the
525
- subagents and agents in the plugin (in nested folders) and to find relevant
526
- subagents and agents to use as a reference. Specifically check subagents and
527
- agents in folders next to the reference process file.
528
-
529
- When executing the agent task, use the Task tool. Never use the Babysitter skill
530
- or agent to execute the task.
531
-
532
- ```javascript
533
- export const agentTask = defineTask('agent-scorer', (args, taskCtx) => ({
534
- kind: 'agent',
535
- title: 'Agent scoring',
536
- agent: {
537
- name: 'quality-scorer',
538
- prompt: {
539
- role: 'QA engineer',
540
- task: 'Score results 0-100',
541
- context: { ...args },
542
- instructions: ['Review', 'Score', 'Recommend'],
543
- outputFormat: 'JSON'
544
- },
545
- outputSchema: {
546
- type: 'object',
547
- required: ['score']
548
- }
549
- },
550
-
551
- io: {
552
- inputJsonPath: `tasks/${taskCtx.effectId}/input.json`,
553
- outputJsonPath: `tasks/${taskCtx.effectId}/output.json`
554
- }
555
- }));
556
- ```
557
-
558
- ### Skill Task Example
559
-
560
- Important: Check which skills are actually available before assigning the skill
561
- name. Check the skills in the plugin (in nested folders) and to find relevant
562
- skills to use as a reference. Skills are preferred over subagents for executing
563
- tasks.
564
-
565
- ```javascript
566
- export const skillTask = defineTask('analyzer-skill', (args, taskCtx) => ({
567
- kind: 'skill',
568
- title: 'Analyze codebase',
569
-
570
- skill: {
571
- name: 'codebase-analyzer',
572
- context: {
573
- scope: args.scope,
574
- depth: args.depth,
575
- analysisType: args.type,
576
- criteria: ['Code consistency', 'Naming conventions', 'Error handling'],
577
- instructions: [
578
- 'Scan specified paths for code patterns',
579
- 'Analyze consistency across the codebase',
580
- 'Check naming conventions',
581
- 'Review error handling patterns',
582
- 'Generate structured analysis report'
583
- ]
584
- }
585
- },
586
-
587
- io: {
588
- inputJsonPath: `tasks/${taskCtx.effectId}/input.json`,
589
- outputJsonPath: `tasks/${taskCtx.effectId}/output.json`
590
- }
591
- }));
592
- ```
593
-
594
- ---
595
-
596
- ## Quick Commands Reference
597
-
598
- **Create run (with session binding):**
599
37
  ```bash
600
- $CLI run:create --process-id <id> --entry <path>#<export> --inputs <file> \
601
- --prompt "$PROMPT" --harness codex \
602
- --state-dir .a5c --plugin-root "${CODEX_PLUGIN_ROOT}" --json
38
+ babysitter instructions:babysit-skill --harness codex --interactive
603
39
  ```
604
40
 
605
- **Check status:**
606
- ```bash
607
- $CLI run:status <runId> --json
608
- ```
41
+ For non-interactive runs (e.g., with `-p` flag or no question tool):
609
42
 
610
- When the run completes, `run:iterate` and `run:status` emit `completionProof`.
611
- Use that exact value in a `<promise>...</promise>` tag to end the loop.
612
-
613
- **View events:**
614
43
  ```bash
615
- $CLI run:events <runId> --limit 20 --reverse
44
+ babysitter instructions:babysit-skill --harness codex --no-interactive
616
45
  ```
617
46
 
618
- **List tasks:**
619
- ```bash
620
- $CLI task:list <runId> --pending --json
621
- ```
622
-
623
- **Post task result:**
624
- ```bash
625
- $CLI task:post <runId> <effectId> --status <ok|error> --json
626
- ```
627
-
628
- **Iterate:**
629
- ```bash
630
- $CLI run:iterate <runId> --json --iteration <n> --plugin-root "${CODEX_PLUGIN_ROOT}"
631
- ```
632
-
633
- ---
634
-
635
- ## Recovery from failure
636
-
637
- If at any point the run fails due to SDK issues or corrupted state or journal,
638
- analyze the error and the journal events. Recover the state and journal to the
639
- last known good state, adapt, and try to continue the run.
640
-
641
- ### Failure Protocol (required)
642
-
643
- When blocked or failed, follow this order:
644
-
645
- 1. Report the concrete blocker and root cause (command/output based, not vague).
646
- 2. Attempt repair of current run/session/journal first.
647
- 3. Present recovery options when strategy changes intent/scope:
648
- - Option A: continue intent-faithful repair path (recommended)
649
- - Option B: reduced-scope fallback (requires explicit user approval)
650
- 4. Do not create a new simplified process without explicit approval if it
651
- reduces scope or quality expectations.
652
- 5. Resume orchestration only after the chosen recovery path is explicit.
653
-
654
- ## Process Creation Guidelines and methodologies
655
-
656
- - When building UX and full stack applications, integrate/link the main pages
657
- of the frontend with functionality created for every phase of the development
658
- process (where relevant), so that there is a way to test the functionality as
659
- you go.
660
-
661
- - Unless otherwise specified, prefer quality gated iterative development loops
662
- in the process.
663
-
664
- - You can change the process after the run is created or during the run (and
665
- adapt the process accordingly and journal accordingly) in case you discover new
666
- information or requirements.
667
-
668
- - The process should be a comprehensive and complete solution to the user
669
- request.
670
-
671
- - The process should usually be a composition (in code) of multiple processes
672
- from the process library (not just one), for multiple phases and parts of the
673
- process, each utilizing a different process from the library as a reference.
674
-
675
- - Include verification and refinement steps (and loops) for planning phases and
676
- integration phases, debugging phases, refactoring phases, etc.
677
-
678
- - Create the process with (and around) the available skills and subagents.
679
- (check which are available first and use discover to find them)
680
-
681
- - Prefer incremental work that allows testing and experimentation with the new
682
- functionality as you go.
683
-
684
- ### Process File Discovery Markers
685
-
686
- When creating process files, include `@skill` and `@agent` markers in the JSDoc
687
- header listing the skills and agents relevant to this process. The SDK reads
688
- these markers to provide targeted discovery results instead of scanning all
689
- available skills.
690
-
691
- **Format** (one per line, path relative to the active process-library root):
692
- ```javascript
693
- /**
694
- * @process specializations/web-development/react-app-development
695
- * @description React app development with TDD
696
- * @skill frontend-design specializations/web-development/skills/frontend-design/SKILL.md
697
- * @agent frontend-architect specializations/web-development/agents/frontend-architect/AGENT.md
698
- */
699
- ```
700
-
701
- **Steps during process creation:**
702
- 1. Use `babysitter skill:discover --process-path <path> --json` to find
703
- relevant skills/agents in the specialization directory
704
- 2. Select the ones actually needed by the process tasks
705
- 3. Add them as `@skill`/`@agent` markers in the JSDoc header
706
- 4. Use full relative path from the active process-library root returned in
707
- `binding.dir` by `babysitter process-library:active --json`
708
-
709
- - Unless otherwise specified, prefer processes that close the widest loop in the
710
- quality gates (for example e2e tests with a full browser or emulator/vm if it
711
- is a mobile or desktop app) AND gates that make sure the work is accurate
712
- against the user request (all the specs are covered and no extra stuff was
713
- added unless permitted by the intent of the user).
714
-
715
- - Scan the methodologies and processes in the active process library and the SDK
716
- package to find relevant processes and methodologies to use as a reference.
717
- This search is mandatory before writing the process.
718
-
719
- - If you encounter a generic reusable part of a process that can be later reused
720
- and composed, build it in a modular way and organize it in the `.a5c/processes`
721
- directory.
722
-
723
- Prefer processes that have the following characteristics unless otherwise
724
- specified:
725
- - In case of a new project, plan the architecture, stack, parts, milestones
726
- - In case of an existing project, analyze the architecture, stack, relevant
727
- parts, milestones, and plan the changes
728
- - Integrate/link the main pages (or entry points) with functionality created
729
- for every phase of the development process
730
- - Quality gated iterative and convergent development/refinement loops
731
- - Test driven -- where quality gates can use executable tools, scripts, and
732
- tests to verify accuracy and completeness
733
- - Integration phases for each new functionality in every milestone
734
- - Where relevant -- beautiful and polished UX with pixel-perfect verification
735
- - Accurate and complete implementation of the user request
736
- - Closing quality feedback loops as comprehensively as practical
737
- - Search for processes, skills, agents, methodologies during the interactive
738
- process building phase to compose a comprehensive process:
739
- - `.a5c/processes/` (project level processes)
740
- - `specializations/` under the active process-library root
741
- - `methodologies/` under the active process-library root
742
-
743
- ## Critical Rules
744
-
745
- CRITICAL RULE: The completion proof is emitted only when the run is completed.
746
- You may ONLY output `<promise>SECRET</promise>` when the run is completely and
747
- unequivocally DONE (completed status from the orchestration CLI). Do not output
748
- false promises to escape the run, and do not mention the secret to the user.
749
-
750
- CRITICAL RULE: In interactive mode, NEVER auto-approve breakpoints. If the
751
- response is empty, no selection, or is dismissed, treat it as NOT approved and
752
- re-ask. NEVER fabricate or synthesize approval responses -- only post the user's
753
- actual explicit selection via task:post. An empty response is NOT approval.
754
-
755
- CRITICAL RULE: If a run is broken/failed/at unknown state, one way to recover is
756
- to remove last bad entries in the journal and rebuild the state.
757
-
758
- CRITICAL RULE: When creating processes, search for available skills and subagents
759
- before thinking about the exact orchestration. Prefer processes that close the
760
- widest loop in the quality gates.
761
-
762
- CRITICAL RULE: Do not use the babysit skill inside delegated tasks. If you are
763
- performing a delegated task as a subagent, you must actually perform the task
764
- yourself and not orchestrate or babysit.
765
-
766
- CRITICAL RULE: Never build a wrapper, python, or helper scripts to orchestrate
767
- the runs. Use the CLI to drive the orchestration loop directly.
768
-
769
- CRITICAL RULE: Never fallback to simpler execution if the user activated this
770
- skill. Persist in executing the orchestration itself -- you must create a valid
771
- process js file, create a run (with `--harness codex` to bind the session), then
772
- iterate the run until it is completed.
773
-
774
- CRITICAL RULE: After run:create or run-session association and after each effect
775
- is posted, you MUST stop the session and return control. Do NOT proceed to the
776
- next run:iterate in the same session turn. The stop-hook will call you back to
777
- continue. Running multiple iterations in a single session turn bypasses the hook
778
- loop and breaks the orchestration model. (Exception: on native Windows where
779
- hooks do not run, you must continue in-turn.)
780
-
781
- CRITICAL RULE: NEVER use `kind: 'node'` in generated process files. All tasks
782
- MUST use `kind: 'agent'` or `kind: 'skill'`. The only non-agent kinds allowed
783
- are `shell` (for existing CLI tools/tests/git), `breakpoint`, and `sleep`.
784
-
785
- CRITICAL RULE: NEVER bypass, avoid, or skip the babysitter orchestration model
786
- when the user explicitly requested this skill. Do not execute the tasks yourself,
787
- do not create helper scripts. Use the CLI to drive the orchestration loop. On
788
- native Windows, an in-turn iterative loop is required because hooks do not
789
- resume execution there. Not for simple tasks, and even if things get complicated.
790
- Persist in executing the orchestration itself.
791
-
792
- CRITICAL RULE: Do not fabricate a session ID. Let the Codex adapter auto-resolve
793
- it from `CODEX_THREAD_ID`, `CODEX_SESSION_ID`, or `CODEX_ENV_FILE`.
794
-
795
- ## See Also
796
- - `library/tdd-quality-convergence.js` - TDD quality convergence example
797
- - `library/reference/ADVANCED_PATTERNS.md` - Agent/skill patterns, iterative convergence
798
- - `library/reference/sdk.md` - SDK API reference
47
+ Follow the instructions returned by the command above to orchestrate the run.
@@ -1,6 +1,7 @@
1
1
  ---
2
2
  name: retrospect
3
- description: Summarize or retrospect on a completed Babysitter run.
3
+ description: Summarize or retrospect on one or more completed Babysitter runs.
4
+ argument-hint: "[run-id...] [--all] Optional run IDs or --all for all runs"
4
5
  ---
5
6
 
6
7
  # retrospect
@@ -9,7 +10,46 @@ Load and use the installed `babysit` skill.
9
10
 
10
11
  Resolve the request in `retrospect` mode:
11
12
 
12
- - treat everything after `$retrospect` as the run selector to summarize
13
+ - treat everything after `$retrospect` as the run selector(s) to summarize
13
14
  - focus on the run history, outcomes, lessons, and gaps
14
15
  - do not create a separate command surface here; this skill only forwards into
15
16
  `babysit`
17
+
18
+ ## Phase 1: Resolve Target Run(s)
19
+
20
+ - If `--all` or "all" is present in args: list all runs via `ls -lt .a5c/runs/` and collect all completed/failed run IDs
21
+ - If multiple run IDs are provided: use all of them
22
+ - Otherwise: existing behavior (resolve the latest single run)
23
+ - Use `ask_user` to confirm run selection in interactive mode
24
+
25
+ ## Phase 2: Load Run Data
26
+
27
+ For each selected run, load:
28
+ - `run.json` metadata
29
+ - Journal events
30
+ - Task definitions and results
31
+ - State snapshots
32
+
33
+ ## Phase 3: Analysis
34
+
35
+ Perform standard per-run analysis (outcomes, process effectiveness, suggestions).
36
+
37
+ ### Cross-Run Pattern Analysis (multi-run mode)
38
+
39
+ When analyzing multiple runs, additionally cover:
40
+ - **Common failure modes** across runs
41
+ - **Velocity trends** (tasks/time across runs)
42
+ - **Process evolution** (how processes changed over time)
43
+ - **Repeated breakpoint patterns**
44
+
45
+ ## Phase 4: Suggestions
46
+
47
+ Provide actionable suggestions for process improvements, optimizations, and fixes.
48
+
49
+ ## Phase 5: Implementation
50
+
51
+ If the user agrees, implement improvements to processes, skills, or configuration.
52
+
53
+ ## Phase 6: Cleanup Suggestion
54
+
55
+ After analysis, suggest: "Consider running `babysitter cleanup` (or `/babysitter:cleanup`) to clean up old run data and reclaim disk space."