@a5c-ai/babysitter-sdk 0.0.17 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"main.d.ts","sourceRoot":"","sources":["../../src/cli/main.ts"],"names":[],"mappings":";AAq5CA,wBAAgB,mBAAmB;eAEf,MAAM,EAAE,GAA2B,OAAO,CAAC,MAAM,CAAC;kBAyCpD,MAAM;EAIvB"}
1
+ {"version":3,"file":"main.d.ts","sourceRoot":"","sources":["../../src/cli/main.ts"],"names":[],"mappings":";AAonDA,wBAAgB,mBAAmB;eAEf,MAAM,EAAE,GAA2B,OAAO,CAAC,MAAM,CAAC;kBA4CpD,MAAM;EAIvB"}
package/dist/cli/main.js CHANGED
@@ -37,6 +37,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
37
37
  exports.createBabysitterCli = createBabysitterCli;
38
38
  const node_fs_1 = require("node:fs");
39
39
  const path = __importStar(require("node:path"));
40
+ const os = __importStar(require("node:os"));
40
41
  const nodeTaskRunner_1 = require("./nodeTaskRunner");
41
42
  const orchestrateIteration_1 = require("../runtime/orchestrateIteration");
42
43
  const createRun_1 = require("../runtime/createRun");
@@ -55,6 +56,7 @@ const USAGE = `Usage:
55
56
  babysitter run:continue <runDir> [--runs-dir <dir>] [--json] [--dry-run] [--auto-node-tasks] [--auto-node-max <n>] [--auto-node-label <text>]
56
57
  babysitter task:list <runDir> [--runs-dir <dir>] [--pending] [--kind <kind>] [--json]
57
58
  babysitter task:show <runDir> <effectId> [--runs-dir <dir>] [--json]
59
+ babysitter skill:install [--type <claude|codex|cursor>] [--scope <local|global>] [--skills-dir <dir>] [--force] [--json] [--dry-run]
58
60
 
59
61
  Global flags:
60
62
  --runs-dir <dir> Override the runs directory (defaults to current working directory).
@@ -63,14 +65,20 @@ Global flags:
63
65
  --verbose Log resolved paths and options to stderr for debugging.
64
66
  --help, -h Show this help text.`;
65
67
  const LARGE_RESULT_PREVIEW_LIMIT = 1024 * 1024; // 1 MiB
68
+ const DEFAULT_SKILL_TARGET = "codex";
69
+ const DEFAULT_SKILL_SCOPE = "local";
66
70
  function parseArgs(argv) {
67
71
  const [initialCommand, ...rest] = argv;
68
72
  const parsed = {
69
73
  command: initialCommand,
70
74
  runsDir: ".",
75
+ skillsDir: undefined,
76
+ skillType: DEFAULT_SKILL_TARGET,
77
+ skillScope: DEFAULT_SKILL_SCOPE,
71
78
  json: false,
72
79
  dryRun: false,
73
80
  verbose: false,
81
+ force: false,
74
82
  helpRequested: false,
75
83
  autoNodeTasks: false,
76
84
  pendingOnly: false,
@@ -91,6 +99,18 @@ function parseArgs(argv) {
91
99
  parsed.runsDir = expectFlagValue(rest, ++i, "--runs-dir");
92
100
  continue;
93
101
  }
102
+ if (arg === "--skills-dir") {
103
+ parsed.skillsDir = expectFlagValue(rest, ++i, "--skills-dir");
104
+ continue;
105
+ }
106
+ if (arg === "--type") {
107
+ parsed.skillType = expectSkillTarget(expectFlagValue(rest, ++i, "--type"), "--type");
108
+ continue;
109
+ }
110
+ if (arg === "--scope") {
111
+ parsed.skillScope = expectSkillScope(expectFlagValue(rest, ++i, "--scope"), "--scope");
112
+ continue;
113
+ }
94
114
  if (arg === "--json") {
95
115
  parsed.json = true;
96
116
  continue;
@@ -99,6 +119,10 @@ function parseArgs(argv) {
99
119
  parsed.dryRun = true;
100
120
  continue;
101
121
  }
122
+ if (arg === "--force") {
123
+ parsed.force = true;
124
+ continue;
125
+ }
102
126
  if (arg === "--verbose") {
103
127
  parsed.verbose = true;
104
128
  continue;
@@ -212,6 +236,29 @@ function parsePositiveInteger(raw, flag) {
212
236
  }
213
237
  return Math.floor(parsed);
214
238
  }
239
+ function expectSkillTarget(raw, flag) {
240
+ const normalized = raw.trim().toLowerCase();
241
+ if (normalized === "claude" || normalized === "codex" || normalized === "cursor") {
242
+ return normalized;
243
+ }
244
+ throw new Error(`${flag} must be one of: claude, codex, cursor`);
245
+ }
246
+ function expectSkillScope(raw, flag) {
247
+ const normalized = raw.trim().toLowerCase();
248
+ if (normalized === "local" || normalized === "global") {
249
+ return normalized;
250
+ }
251
+ throw new Error(`${flag} must be one of: local, global`);
252
+ }
253
+ function resolveSkillsDir(parsed) {
254
+ if (parsed.skillsDir) {
255
+ return path.resolve(parsed.skillsDir);
256
+ }
257
+ const scopeBase = parsed.skillScope === "global"
258
+ ? path.join(os.homedir(), `.${parsed.skillType}`)
259
+ : path.resolve(`.${parsed.skillType}`);
260
+ return path.join(scopeBase, "skills");
261
+ }
215
262
  function summarizeActions(actions) {
216
263
  return actions.map((action) => ({
217
264
  effectId: action.effectId,
@@ -309,6 +356,84 @@ function formatVerboseValue(value) {
309
356
  return String(value);
310
357
  return JSON.stringify(value);
311
358
  }
359
+ function resolveBundledSkillsRoot() {
360
+ return path.resolve(__dirname, "..", "..", "skills");
361
+ }
362
+ async function listBundledSkillDirs() {
363
+ const root = resolveBundledSkillsRoot();
364
+ const entries = await node_fs_1.promises.readdir(root, { withFileTypes: true });
365
+ return entries.filter((entry) => entry.isDirectory()).map((entry) => entry.name).sort();
366
+ }
367
+ async function pathExists(filePath) {
368
+ try {
369
+ await node_fs_1.promises.stat(filePath);
370
+ return true;
371
+ }
372
+ catch (error) {
373
+ const err = error;
374
+ if (err.code === "ENOENT") {
375
+ return false;
376
+ }
377
+ throw error;
378
+ }
379
+ }
380
+ function toPosixPath(value) {
381
+ return value.replace(/\\/g, "/");
382
+ }
383
+ async function installBundledSkillDir(skillName, options) {
384
+ const sourceDir = path.join(resolveBundledSkillsRoot(), skillName);
385
+ const destinationDir = path.join(options.skillsDir, skillName);
386
+ try {
387
+ const sourceExists = await pathExists(sourceDir);
388
+ if (!sourceExists) {
389
+ return {
390
+ name: skillName,
391
+ status: "error",
392
+ sourceDir,
393
+ destinationDir,
394
+ message: "bundled skill missing",
395
+ };
396
+ }
397
+ const destinationExists = await pathExists(destinationDir);
398
+ if (destinationExists && !options.force) {
399
+ return {
400
+ name: skillName,
401
+ status: "skipped",
402
+ sourceDir,
403
+ destinationDir,
404
+ message: "already installed",
405
+ };
406
+ }
407
+ if (options.dryRun) {
408
+ return {
409
+ name: skillName,
410
+ status: "planned",
411
+ sourceDir,
412
+ destinationDir,
413
+ };
414
+ }
415
+ if (destinationExists && options.force) {
416
+ await node_fs_1.promises.rm(destinationDir, { recursive: true, force: true });
417
+ }
418
+ await node_fs_1.promises.mkdir(options.skillsDir, { recursive: true });
419
+ await node_fs_1.promises.cp(sourceDir, destinationDir, { recursive: true });
420
+ return {
421
+ name: skillName,
422
+ status: "installed",
423
+ sourceDir,
424
+ destinationDir,
425
+ };
426
+ }
427
+ catch (error) {
428
+ return {
429
+ name: skillName,
430
+ status: "error",
431
+ sourceDir,
432
+ destinationDir,
433
+ message: error instanceof Error ? error.message : String(error),
434
+ };
435
+ }
436
+ }
312
437
  function allowSecretLogs(parsed) {
313
438
  if (!parsed.json || !parsed.verbose) {
314
439
  return false;
@@ -1037,6 +1162,86 @@ async function handleTaskShow(parsed) {
1037
1162
  }
1038
1163
  return 0;
1039
1164
  }
1165
+ async function handleSkillInstall(parsed) {
1166
+ const skillsDir = resolveSkillsDir(parsed);
1167
+ logVerbose("skill:install", parsed, {
1168
+ skillsDir,
1169
+ type: parsed.skillType,
1170
+ scope: parsed.skillScope,
1171
+ dryRun: parsed.dryRun,
1172
+ force: parsed.force,
1173
+ json: parsed.json,
1174
+ });
1175
+ const results = [];
1176
+ let skillNames;
1177
+ try {
1178
+ skillNames = await listBundledSkillDirs();
1179
+ }
1180
+ catch (error) {
1181
+ const message = error instanceof Error ? error.message : String(error);
1182
+ if (parsed.json) {
1183
+ console.log(JSON.stringify({ skillsDir, type: parsed.skillType, scope: parsed.skillScope, error: message, results: [] }));
1184
+ }
1185
+ else {
1186
+ console.error(`[skill:install] failed to read bundled skills: ${message}`);
1187
+ }
1188
+ return 1;
1189
+ }
1190
+ if (!skillNames.length) {
1191
+ if (parsed.json) {
1192
+ console.log(JSON.stringify({ skillsDir, type: parsed.skillType, scope: parsed.skillScope, error: "no bundled skills found", results: [] }));
1193
+ }
1194
+ else {
1195
+ console.error("[skill:install] no bundled skills found");
1196
+ }
1197
+ return 1;
1198
+ }
1199
+ for (const skillName of skillNames) {
1200
+ results.push(await installBundledSkillDir(skillName, { skillsDir, dryRun: parsed.dryRun, force: parsed.force }));
1201
+ }
1202
+ const counts = { installed: 0, skipped: 0, planned: 0, error: 0 };
1203
+ for (const result of results) {
1204
+ if (result.status === "installed")
1205
+ counts.installed += 1;
1206
+ else if (result.status === "skipped")
1207
+ counts.skipped += 1;
1208
+ else if (result.status === "planned")
1209
+ counts.planned += 1;
1210
+ else
1211
+ counts.error += 1;
1212
+ }
1213
+ if (parsed.json) {
1214
+ console.log(JSON.stringify({ skillsDir, type: parsed.skillType, scope: parsed.skillScope, results }));
1215
+ return counts.error > 0 ? 1 : 0;
1216
+ }
1217
+ const parts = [`[skill:install] dir=${skillsDir}`];
1218
+ if (!parsed.skillsDir) {
1219
+ parts.push(`type=${parsed.skillType}`);
1220
+ parts.push(`scope=${parsed.skillScope}`);
1221
+ }
1222
+ if (parsed.dryRun)
1223
+ parts.push("dryRun=true");
1224
+ if (parsed.force)
1225
+ parts.push("force=true");
1226
+ if (counts.installed)
1227
+ parts.push(`installed=${counts.installed}`);
1228
+ if (counts.skipped)
1229
+ parts.push(`skipped=${counts.skipped}`);
1230
+ if (counts.planned)
1231
+ parts.push(`planned=${counts.planned}`);
1232
+ if (counts.error)
1233
+ parts.push(`errors=${counts.error}`);
1234
+ console.log(parts.join(" "));
1235
+ for (const result of results) {
1236
+ const relativeDest = toPosixPath(path.relative(skillsDir, result.destinationDir));
1237
+ const relativeSource = toPosixPath(path.relative(skillsDir, result.sourceDir));
1238
+ const destLabel = relativeDest.startsWith("..") ? toPosixPath(result.destinationDir) : relativeDest;
1239
+ const sourceLabel = relativeSource.startsWith("..") ? toPosixPath(result.sourceDir) : relativeSource;
1240
+ const messageSuffix = result.message ? ` message=${result.message}` : "";
1241
+ console.log(`- ${result.name} status=${result.status} dest=${destLabel} src=${sourceLabel}${messageSuffix}`);
1242
+ }
1243
+ return counts.error > 0 ? 1 : 0;
1244
+ }
1040
1245
  function toTaskListEntry(record, runDir) {
1041
1246
  return {
1042
1247
  effectId: record.effectId,
@@ -1326,6 +1531,9 @@ function createBabysitterCli() {
1326
1531
  if (parsed.command === "task:show") {
1327
1532
  return await handleTaskShow(parsed);
1328
1533
  }
1534
+ if (parsed.command === "skill:install") {
1535
+ return await handleSkillInstall(parsed);
1536
+ }
1329
1537
  console.error(USAGE);
1330
1538
  return 1;
1331
1539
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@a5c-ai/babysitter-sdk",
3
- "version": "0.0.17",
3
+ "version": "0.0.18",
4
4
  "description": "Storage and run-registry primitives for event-sourced babysitter workflows.",
5
5
  "license": "UNLICENSED",
6
6
  "type": "commonjs",
@@ -10,7 +10,8 @@
10
10
  "babysitter": "dist/cli/main.js"
11
11
  },
12
12
  "files": [
13
- "dist"
13
+ "dist",
14
+ "skills"
14
15
  ],
15
16
  "scripts": {
16
17
  "build": "tsc -p tsconfig.json",
@@ -0,0 +1,203 @@
1
+ ---
2
+ name: babysitter
3
+ description: Orchestrate .a5c runs via @a5c-ai/babysitter-sdk CLI (create, continue, inspect, task ops). Use when the user asks to orchestrate or babysit a run; delegate breakpoint communication to the babysitter-breakpoint skill.
4
+ ---
5
+
6
+ # babysitter
7
+
8
+ You are **babysitter**—the orchestrator that keeps `.a5c/runs/<runId>/` in a healthy, deterministic state. Follow an event-sourced workflow and use the `@a5c-ai/babysitter-sdk` CLI wherever possible instead of manual scripts. The CLI exposes the surface documented in `docs/cli-examples.md` (`run:create`, `run:status`, `run:events`, `run:continue`, `task:list`, `task:run`, etc.).
9
+
10
+ We operate in an **iterative, quality-gated loop**:
11
+
12
+ 1. Run preflight checks (CLI version, global flags) before every session.
13
+ 2. Execute a single CLI-driven orchestration step.
14
+ 3. Verify the output against the SDK/CLI references (field names, metadata, redaction rules).
15
+ 4. Repeat until the run converges (status `completed`/`failed`). Stop immediately if verification fails; fix the drift first.
16
+
17
+ > **CLI alias:** all examples use
18
+ > `CLI="npx -y @a5c-ai/babysitter-sdk"`
19
+ > so you can run commands from repo root like `"$CLI run:status .a5c/runs/<id>"`. Adjust if you install the binary globally.
20
+
21
+ ---
22
+
23
+ ## 1. Setup & quality gate
24
+
25
+ 1. Verify CLI availability **and capture version** (quality gate)
26
+
27
+ ```bash
28
+ $CLI --version
29
+ $CLI run:status .a5c/runs/example --help # sanity check output
30
+ ```
31
+
32
+ Ensure the help text matches the options documented in `docs/cli-examples.md` (global flags, redaction notes). Record the version in your notes.
33
+ 2. Ensure the CLI orchestration output is accessible by running a quick
34
+ `run:status` or `run:events` check before you proceed.
35
+
36
+ Do **not** proceed if the CLI check fails; fix the issue (missing Node, npx resolution, command errors) first—this is your quality gate.
37
+
38
+ ---
39
+
40
+ ## 2. Core orchestration rules (self-contained)
41
+
42
+ - **CLI-first**: use the babysitter CLI for all run state and orchestration.
43
+ - **Run entry**: you are either resuming a run id or creating a new run from a high-level task.
44
+ - **Main.js approval**: when you create a new run, inspect `.a5c/processes/**` for a suitable starting point, generate `.a5c/runs/<runId>/code/main.js`, and create `.a5c/runs/<runId>/artifacts/process.md`. Before any orchestration, use the `babysitter-breakpoint` skill to request approval of the process, inputs, and `main.js`. Do not proceed until the breakpoint is released with approval. After approval, do not change `main.js` without explicit instruction.
45
+ - **Missing babysitter-breakpoint skill**: if the `babysitter-breakpoint` skill is unavailable, install it from npm by running `npm install -g @a5c-ai/babysitter-breakpoints`, then `breakpoints install-skill --target codex --scope global`. If the `breakpoints` CLI is not on PATH, run `npx -y @a5c-ai/babysitter-breakpoints install-skill --target codex --scope global`. Restart Codex after install.
46
+ - **Do the work**: execute the CLI-provided next action yourself; do not defer
47
+ to agent runners or function templates.
48
+ - **Follow the process**: execute exactly what `code/main.js` (and imported files) prescribe; only deviate when the user explicitly instructs it.
49
+ - **Helper scripts**: if needed, store them in `.a5c/orchestrator_scripts/` or `.a5c/runs/<runId>/orchestrator/`, never as whole-iteration automation.
50
+ - **Journal/state ownership**: do not edit `journal.jsonl` or `state.json` by hand; use the CLI and agent outputs so state stays deterministic.
51
+ - **Wrapper semantics**: if a function call is wrapped with `newRun` or `@run`, create a new run and orchestrate it separately, then report the result to the parent run. If a function list is wrapped with `parallel(...)`, orchestrate them in parallel and return once all are complete.
52
+ - **Sleep handling**: when encountering `sleep(...)`, record start/end via CLI events/notes so the process is resumable.
53
+
54
+
55
+
56
+ ---
57
+
58
+ ## 3. Inputs you may receive
59
+
60
+ - **Resume existing run**: user supplies run id (e.g., `run-20260109-101648-dev-build`). All artifacts live under `.a5c/runs/<runId>/`.
61
+ - **Create new run**: user provides a high-level task. You must initialize a fresh run id, craft `code/main.js`, update `inputs.json`, etc.
62
+
63
+ Regardless of the entry point, always:
64
+
65
+ 1. Read/understand `.a5c/runs/<runId>/code/main.js` and referenced recipe files (`.a5c/processes/**`).
66
+ 2. Review `inputs.json`, `state.json`, and the latest journal entries (via CLI).
67
+
68
+ ---
69
+
70
+ ## 4. CLI workflows
71
+
72
+ ### 3.1 Inspecting a run
73
+
74
+ ```bash
75
+ $CLI run:status .a5c/runs/<runId>
76
+ $CLI run:events .a5c/runs/<runId> --limit 50 --reverse # tail recent events
77
+ ```
78
+
79
+ Use `--json` when you need machine-readable data. These commands replace manual `tail` or ad-hoc scripts; they also echo deterministic metadata pairs (`stateVersion`, `journalHead`, `pending[...]`).
80
+
81
+ ### 3.2 Creating a run
82
+
83
+ ```bash
84
+ $CLI run:create \
85
+ --process-id dev/build \
86
+ --entry .a5c/processes/roles/development/recipes/full_project.js#fullProject \
87
+ --inputs examples/inputs/build.json \
88
+ --run-id "run-$(date -u +%Y%m%d-%H%M%S)-dev-build"
89
+ ```
90
+
91
+ The CLI prints the new run id + directory. Immediately open `.a5c/runs/<runId>/code/main.js` to ensure it reflects the requested recipe; if you generate a custom `main.js`, still store it under `code/` and capture the narrative in `artifacts/process.md`. Mermaid diagrams are no longer required.
92
+
93
+ ### 3.3 Driving iterations
94
+
95
+ Use `run:step` for single iterations or `run:continue` for full loops:
96
+
97
+ ```bash
98
+ $CLI run:step .a5c/runs/<runId> --json
99
+ $CLI run:continue .a5c/runs/<runId> --auto-node-tasks \
100
+ --auto-node-max 5 \
101
+ --runs-dir .a5c/runs
102
+ ```
103
+
104
+ CLI output tells you the status (`waiting/completed/failed`), pending effects, and metadata. If it hits a breakpoint or needs manual input, use the `babysitter-breakpoint` skill; wait for release before continuing. When auto-running node tasks, the CLI logs each `effectId` and scheduler hints so you don’t need to script those paths yourself.
105
+
106
+ > **Quality gate:** compare the JSON payload to the structure documented in `docs/cli-examples.md` §3–§6 (`pending`, `autoRun.executed/pending`, `metadata.stateVersion/pendingEffectsByKind`). If a field is missing or renamed, stop and reconcile with the SDK team before proceeding; otherwise documentation and harnesses will drift.
107
+
108
+ ### 3.4 Working with tasks
109
+
110
+ ```bash
111
+ $CLI task:list .a5c/runs/<runId> --pending
112
+ $CLI task:show .a5c/runs/<runId> <effectId> --json
113
+ $CLI task:run .a5c/runs/<runId> <effectId> --dry-run
114
+ $CLI task:run .a5c/runs/<runId> <effectId> \
115
+ --json --verbose \
116
+ -- env BABYSITTER_ALLOW_SECRET_LOGS=true
117
+ ```
118
+
119
+ Use these instead of manually inspecting `tasks/<effectId>`. Remember: raw payloads remain redacted unless `BABYSITTER_ALLOW_SECRET_LOGS` **and** `--json --verbose` are set. Verify the output includes `payloads: redacted…` whenever the guard is disabled; treat deviations as failures that must be investigated.
120
+
121
+ ### 3.5 Journal utilities
122
+
123
+ ```bash
124
+ $CLI run:events .a5c/runs/<runId> --limit 20
125
+ $CLI run:events .a5c/runs/<runId> --reverse --json > tmp/events.json
126
+ ```
127
+
128
+ The CLI already writes events for actions, notes, artifacts, sleeps, etc.
129
+
130
+ ---
131
+
132
+ ## 5. Orchestration loop (CLI-first)
133
+
134
+ 1. **Read process + state**
135
+ - `code/main.js`, imported recipes
136
+ - `state.json`, `inputs.json`, plus recent journal entries via `$CLI run:events …`
137
+ 2. **Determine next action** from `code/main.js` and/or the CLI orchestration
138
+ output (pending effects, task payloads, or explicit next-step notes).
139
+ 3. **Execute the next action** directly in the repo, following the CLI
140
+ instructions verbatim and updating artifacts as needed.
141
+ 4. **Journal & state are auto-managed** by the CLI as long as you drive iterations with `run:step` / `run:continue`. Do not edit `journal.jsonl` or `state.json` directly.
142
+ 5. **Breakpoints/sleep**: when CLI reports `Awaiting input`, use the `babysitter-breakpoint` skill to collect the missing information and wait for release. For sleeps, log start/end using CLI events; no manual timers.
143
+
144
+ Loop until `status` is `completed` or `failed`. Never edit `journal.jsonl` or `state.json` directly; use CLI commands or agent outputs that update them.
145
+
146
+ > **Iteration verification:** after every CLI loop, run `$CLI run:status .a5c/runs/<runId> --json` and confirm `stateVersion` increased (or stayed steady when waiting), pending counts match expectations, and metadata fields are present (for example `stateVersion`, `pendingEffectsByKind`, and `autoRun`). If not, pause and reconcile before issuing more actions.
147
+
148
+ ---
149
+
150
+ ## 6. Artifacts & documentation
151
+
152
+ - Store specs, summaries, and diagrams under `.a5c/runs/<runId>/artifacts/`. Reference them in CLI notes (e.g., `$CLI run:events … --note "uploaded part7_spec.md"` currently not supported; instead, add an `artifact` journal entry by running the documented helper script if needed, but prefer CLI notes once available).
153
+ - Provide an updated `process.md` for every `main.js` you craft (Mermaid diagrams have been retired, so no additional `.mermaid.md` artifact is needed).
154
+
155
+ ---
156
+
157
+ ## 7. Troubleshooting
158
+
159
+ | Issue | Resolution |
160
+ | --- | --- |
161
+ | CLI missing / npx fails | Verify Node/npm are on PATH and retry `npx -y @a5c-ai/babysitter-sdk --version` |
162
+ | CLI command fails (bad args) | Run `$CLI help` or `$CLI <command> --help` and fix flags |
163
+ | Need alternate runs dir | Pass `--runs-dir <path>` on every CLI invocation |
164
+ | Want JSON output | Append `--json` (many commands support it) |
165
+ | Need to view CLI env | `env | grep BABYSITTER` |
166
+
167
+ If a CLI command crashes mid-iteration, capture the stderr, add a note to the run, and re-run `run:step` once fixed.
168
+
169
+ ---
170
+
171
+ ## 8. Next-action execution
172
+
173
+ When `code/main.js` or the CLI orchestration indicates a next action, execute it
174
+ immediately and record outputs through the CLI-driven workflow. Avoid any
175
+ function-template or agent-runner indirection.
176
+
177
+ ---
178
+
179
+ ## 9. Example session
180
+
181
+ ```bash
182
+ CLI="npx -y @a5c-ai/babysitter-sdk"
183
+
184
+ # Start work on a new request
185
+ $CLI run:create --process-id dev/project --entry .a5c/processes/... --inputs ./inputs.json
186
+ # => runId=run-20260114-101500-dev-project
187
+
188
+ # Review latest instructions
189
+ $CLI run:status .a5c/runs/run-20260114-101500-dev-project
190
+ $CLI run:events .a5c/runs/run-20260114-101500-dev-project --limit 20 --reverse
191
+
192
+ # Drive the next iteration
193
+ $CLI run:continue .a5c/runs/run-20260114-101500-dev-project --auto-node-tasks --auto-node-max 3
194
+
195
+ # List and run pending tasks if needed
196
+ $CLI task:list .a5c/runs/run-20260114-101500-dev-project --pending
197
+ $CLI task:run .a5c/runs/run-20260114-101500-dev-project ef-node-123 --dry-run
198
+
199
+ # Resume after breakpoint release + feedback
200
+ $CLI run:continue .a5c/runs/run-20260114-101500-dev-project
201
+ ```
202
+
203
+ Use this pattern anytime the user says “babysit this run” or “orchestrate via babysitter.” Keep the process deterministic by staying inside the CLI wherever it offers a command; only fall back to manual scripts when the CLI surface truly lacks a capability.
@@ -0,0 +1,35 @@
1
+ ---
2
+ name: babysitter-score
3
+ allowed-tools: Bash(*) Read Write
4
+ description: Executes the next CLI-orchestrated action when a score step is requested.
5
+ metadata:
6
+ author: a5c-ai
7
+ version: "1.0"
8
+ ---
9
+
10
+ # babysitter-score
11
+
12
+ You are a next-action executor. The CLI orchestration output is the source of
13
+ truth for what to do next.
14
+
15
+ ## Task
16
+ Execute the next action described by the CLI orchestration output. Treat any
17
+ inputs you receive as instructions for that next action.
18
+
19
+ ## Constraints
20
+ - Make the smallest correct change set.
21
+ - Follow any `AGENTS.md` instructions in scope.
22
+ - Prefer adding a self-contained demo or runnable artifact when applicable.
23
+ - If there are tests that are cheap and relevant, run them and report results.
24
+ - Do not invent new steps beyond the CLI-provided action.
25
+
26
+ ## Deliverable
27
+ - Apply changes directly to the working tree.
28
+ - Write a short work summary to stdout:
29
+ - What changed (files)
30
+ - Why
31
+ - How to run / verify
32
+ - Commands run (if any) and results
33
+
34
+ ## Output
35
+ Return a summary of the work and files touched as the final message.