@a5c-ai/babysitter-sdk 0.0.16 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/main.d.ts +1 -0
- package/dist/cli/main.d.ts.map +1 -1
- package/dist/cli/main.js +209 -0
- package/package.json +3 -2
- package/skills/babysitter/SKILL.md +203 -0
- package/skills/babysitter-score/SKILL.md +35 -0
package/dist/cli/main.d.ts
CHANGED
package/dist/cli/main.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"main.d.ts","sourceRoot":"","sources":["../../src/cli/main.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"main.d.ts","sourceRoot":"","sources":["../../src/cli/main.ts"],"names":[],"mappings":";AAonDA,wBAAgB,mBAAmB;eAEf,MAAM,EAAE,GAA2B,OAAO,CAAC,MAAM,CAAC;kBA4CpD,MAAM;EAIvB"}
|
package/dist/cli/main.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
1
2
|
"use strict";
|
|
2
3
|
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
4
|
if (k2 === undefined) k2 = k;
|
|
@@ -36,6 +37,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
36
37
|
exports.createBabysitterCli = createBabysitterCli;
|
|
37
38
|
const node_fs_1 = require("node:fs");
|
|
38
39
|
const path = __importStar(require("node:path"));
|
|
40
|
+
const os = __importStar(require("node:os"));
|
|
39
41
|
const nodeTaskRunner_1 = require("./nodeTaskRunner");
|
|
40
42
|
const orchestrateIteration_1 = require("../runtime/orchestrateIteration");
|
|
41
43
|
const createRun_1 = require("../runtime/createRun");
|
|
@@ -54,6 +56,7 @@ const USAGE = `Usage:
|
|
|
54
56
|
babysitter run:continue <runDir> [--runs-dir <dir>] [--json] [--dry-run] [--auto-node-tasks] [--auto-node-max <n>] [--auto-node-label <text>]
|
|
55
57
|
babysitter task:list <runDir> [--runs-dir <dir>] [--pending] [--kind <kind>] [--json]
|
|
56
58
|
babysitter task:show <runDir> <effectId> [--runs-dir <dir>] [--json]
|
|
59
|
+
babysitter skill:install [--type <claude|codex|cursor>] [--scope <local|global>] [--skills-dir <dir>] [--force] [--json] [--dry-run]
|
|
57
60
|
|
|
58
61
|
Global flags:
|
|
59
62
|
--runs-dir <dir> Override the runs directory (defaults to current working directory).
|
|
@@ -62,14 +65,20 @@ Global flags:
|
|
|
62
65
|
--verbose Log resolved paths and options to stderr for debugging.
|
|
63
66
|
--help, -h Show this help text.`;
|
|
64
67
|
const LARGE_RESULT_PREVIEW_LIMIT = 1024 * 1024; // 1 MiB
|
|
68
|
+
const DEFAULT_SKILL_TARGET = "codex";
|
|
69
|
+
const DEFAULT_SKILL_SCOPE = "local";
|
|
65
70
|
function parseArgs(argv) {
|
|
66
71
|
const [initialCommand, ...rest] = argv;
|
|
67
72
|
const parsed = {
|
|
68
73
|
command: initialCommand,
|
|
69
74
|
runsDir: ".",
|
|
75
|
+
skillsDir: undefined,
|
|
76
|
+
skillType: DEFAULT_SKILL_TARGET,
|
|
77
|
+
skillScope: DEFAULT_SKILL_SCOPE,
|
|
70
78
|
json: false,
|
|
71
79
|
dryRun: false,
|
|
72
80
|
verbose: false,
|
|
81
|
+
force: false,
|
|
73
82
|
helpRequested: false,
|
|
74
83
|
autoNodeTasks: false,
|
|
75
84
|
pendingOnly: false,
|
|
@@ -90,6 +99,18 @@ function parseArgs(argv) {
|
|
|
90
99
|
parsed.runsDir = expectFlagValue(rest, ++i, "--runs-dir");
|
|
91
100
|
continue;
|
|
92
101
|
}
|
|
102
|
+
if (arg === "--skills-dir") {
|
|
103
|
+
parsed.skillsDir = expectFlagValue(rest, ++i, "--skills-dir");
|
|
104
|
+
continue;
|
|
105
|
+
}
|
|
106
|
+
if (arg === "--type") {
|
|
107
|
+
parsed.skillType = expectSkillTarget(expectFlagValue(rest, ++i, "--type"), "--type");
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
if (arg === "--scope") {
|
|
111
|
+
parsed.skillScope = expectSkillScope(expectFlagValue(rest, ++i, "--scope"), "--scope");
|
|
112
|
+
continue;
|
|
113
|
+
}
|
|
93
114
|
if (arg === "--json") {
|
|
94
115
|
parsed.json = true;
|
|
95
116
|
continue;
|
|
@@ -98,6 +119,10 @@ function parseArgs(argv) {
|
|
|
98
119
|
parsed.dryRun = true;
|
|
99
120
|
continue;
|
|
100
121
|
}
|
|
122
|
+
if (arg === "--force") {
|
|
123
|
+
parsed.force = true;
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
101
126
|
if (arg === "--verbose") {
|
|
102
127
|
parsed.verbose = true;
|
|
103
128
|
continue;
|
|
@@ -211,6 +236,29 @@ function parsePositiveInteger(raw, flag) {
|
|
|
211
236
|
}
|
|
212
237
|
return Math.floor(parsed);
|
|
213
238
|
}
|
|
239
|
+
function expectSkillTarget(raw, flag) {
|
|
240
|
+
const normalized = raw.trim().toLowerCase();
|
|
241
|
+
if (normalized === "claude" || normalized === "codex" || normalized === "cursor") {
|
|
242
|
+
return normalized;
|
|
243
|
+
}
|
|
244
|
+
throw new Error(`${flag} must be one of: claude, codex, cursor`);
|
|
245
|
+
}
|
|
246
|
+
function expectSkillScope(raw, flag) {
|
|
247
|
+
const normalized = raw.trim().toLowerCase();
|
|
248
|
+
if (normalized === "local" || normalized === "global") {
|
|
249
|
+
return normalized;
|
|
250
|
+
}
|
|
251
|
+
throw new Error(`${flag} must be one of: local, global`);
|
|
252
|
+
}
|
|
253
|
+
function resolveSkillsDir(parsed) {
|
|
254
|
+
if (parsed.skillsDir) {
|
|
255
|
+
return path.resolve(parsed.skillsDir);
|
|
256
|
+
}
|
|
257
|
+
const scopeBase = parsed.skillScope === "global"
|
|
258
|
+
? path.join(os.homedir(), `.${parsed.skillType}`)
|
|
259
|
+
: path.resolve(`.${parsed.skillType}`);
|
|
260
|
+
return path.join(scopeBase, "skills");
|
|
261
|
+
}
|
|
214
262
|
function summarizeActions(actions) {
|
|
215
263
|
return actions.map((action) => ({
|
|
216
264
|
effectId: action.effectId,
|
|
@@ -308,6 +356,84 @@ function formatVerboseValue(value) {
|
|
|
308
356
|
return String(value);
|
|
309
357
|
return JSON.stringify(value);
|
|
310
358
|
}
|
|
359
|
+
function resolveBundledSkillsRoot() {
|
|
360
|
+
return path.resolve(__dirname, "..", "..", "skills");
|
|
361
|
+
}
|
|
362
|
+
async function listBundledSkillDirs() {
|
|
363
|
+
const root = resolveBundledSkillsRoot();
|
|
364
|
+
const entries = await node_fs_1.promises.readdir(root, { withFileTypes: true });
|
|
365
|
+
return entries.filter((entry) => entry.isDirectory()).map((entry) => entry.name).sort();
|
|
366
|
+
}
|
|
367
|
+
async function pathExists(filePath) {
|
|
368
|
+
try {
|
|
369
|
+
await node_fs_1.promises.stat(filePath);
|
|
370
|
+
return true;
|
|
371
|
+
}
|
|
372
|
+
catch (error) {
|
|
373
|
+
const err = error;
|
|
374
|
+
if (err.code === "ENOENT") {
|
|
375
|
+
return false;
|
|
376
|
+
}
|
|
377
|
+
throw error;
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
function toPosixPath(value) {
|
|
381
|
+
return value.replace(/\\/g, "/");
|
|
382
|
+
}
|
|
383
|
+
async function installBundledSkillDir(skillName, options) {
|
|
384
|
+
const sourceDir = path.join(resolveBundledSkillsRoot(), skillName);
|
|
385
|
+
const destinationDir = path.join(options.skillsDir, skillName);
|
|
386
|
+
try {
|
|
387
|
+
const sourceExists = await pathExists(sourceDir);
|
|
388
|
+
if (!sourceExists) {
|
|
389
|
+
return {
|
|
390
|
+
name: skillName,
|
|
391
|
+
status: "error",
|
|
392
|
+
sourceDir,
|
|
393
|
+
destinationDir,
|
|
394
|
+
message: "bundled skill missing",
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
const destinationExists = await pathExists(destinationDir);
|
|
398
|
+
if (destinationExists && !options.force) {
|
|
399
|
+
return {
|
|
400
|
+
name: skillName,
|
|
401
|
+
status: "skipped",
|
|
402
|
+
sourceDir,
|
|
403
|
+
destinationDir,
|
|
404
|
+
message: "already installed",
|
|
405
|
+
};
|
|
406
|
+
}
|
|
407
|
+
if (options.dryRun) {
|
|
408
|
+
return {
|
|
409
|
+
name: skillName,
|
|
410
|
+
status: "planned",
|
|
411
|
+
sourceDir,
|
|
412
|
+
destinationDir,
|
|
413
|
+
};
|
|
414
|
+
}
|
|
415
|
+
if (destinationExists && options.force) {
|
|
416
|
+
await node_fs_1.promises.rm(destinationDir, { recursive: true, force: true });
|
|
417
|
+
}
|
|
418
|
+
await node_fs_1.promises.mkdir(options.skillsDir, { recursive: true });
|
|
419
|
+
await node_fs_1.promises.cp(sourceDir, destinationDir, { recursive: true });
|
|
420
|
+
return {
|
|
421
|
+
name: skillName,
|
|
422
|
+
status: "installed",
|
|
423
|
+
sourceDir,
|
|
424
|
+
destinationDir,
|
|
425
|
+
};
|
|
426
|
+
}
|
|
427
|
+
catch (error) {
|
|
428
|
+
return {
|
|
429
|
+
name: skillName,
|
|
430
|
+
status: "error",
|
|
431
|
+
sourceDir,
|
|
432
|
+
destinationDir,
|
|
433
|
+
message: error instanceof Error ? error.message : String(error),
|
|
434
|
+
};
|
|
435
|
+
}
|
|
436
|
+
}
|
|
311
437
|
function allowSecretLogs(parsed) {
|
|
312
438
|
if (!parsed.json || !parsed.verbose) {
|
|
313
439
|
return false;
|
|
@@ -1036,6 +1162,86 @@ async function handleTaskShow(parsed) {
|
|
|
1036
1162
|
}
|
|
1037
1163
|
return 0;
|
|
1038
1164
|
}
|
|
1165
|
+
async function handleSkillInstall(parsed) {
|
|
1166
|
+
const skillsDir = resolveSkillsDir(parsed);
|
|
1167
|
+
logVerbose("skill:install", parsed, {
|
|
1168
|
+
skillsDir,
|
|
1169
|
+
type: parsed.skillType,
|
|
1170
|
+
scope: parsed.skillScope,
|
|
1171
|
+
dryRun: parsed.dryRun,
|
|
1172
|
+
force: parsed.force,
|
|
1173
|
+
json: parsed.json,
|
|
1174
|
+
});
|
|
1175
|
+
const results = [];
|
|
1176
|
+
let skillNames;
|
|
1177
|
+
try {
|
|
1178
|
+
skillNames = await listBundledSkillDirs();
|
|
1179
|
+
}
|
|
1180
|
+
catch (error) {
|
|
1181
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1182
|
+
if (parsed.json) {
|
|
1183
|
+
console.log(JSON.stringify({ skillsDir, type: parsed.skillType, scope: parsed.skillScope, error: message, results: [] }));
|
|
1184
|
+
}
|
|
1185
|
+
else {
|
|
1186
|
+
console.error(`[skill:install] failed to read bundled skills: ${message}`);
|
|
1187
|
+
}
|
|
1188
|
+
return 1;
|
|
1189
|
+
}
|
|
1190
|
+
if (!skillNames.length) {
|
|
1191
|
+
if (parsed.json) {
|
|
1192
|
+
console.log(JSON.stringify({ skillsDir, type: parsed.skillType, scope: parsed.skillScope, error: "no bundled skills found", results: [] }));
|
|
1193
|
+
}
|
|
1194
|
+
else {
|
|
1195
|
+
console.error("[skill:install] no bundled skills found");
|
|
1196
|
+
}
|
|
1197
|
+
return 1;
|
|
1198
|
+
}
|
|
1199
|
+
for (const skillName of skillNames) {
|
|
1200
|
+
results.push(await installBundledSkillDir(skillName, { skillsDir, dryRun: parsed.dryRun, force: parsed.force }));
|
|
1201
|
+
}
|
|
1202
|
+
const counts = { installed: 0, skipped: 0, planned: 0, error: 0 };
|
|
1203
|
+
for (const result of results) {
|
|
1204
|
+
if (result.status === "installed")
|
|
1205
|
+
counts.installed += 1;
|
|
1206
|
+
else if (result.status === "skipped")
|
|
1207
|
+
counts.skipped += 1;
|
|
1208
|
+
else if (result.status === "planned")
|
|
1209
|
+
counts.planned += 1;
|
|
1210
|
+
else
|
|
1211
|
+
counts.error += 1;
|
|
1212
|
+
}
|
|
1213
|
+
if (parsed.json) {
|
|
1214
|
+
console.log(JSON.stringify({ skillsDir, type: parsed.skillType, scope: parsed.skillScope, results }));
|
|
1215
|
+
return counts.error > 0 ? 1 : 0;
|
|
1216
|
+
}
|
|
1217
|
+
const parts = [`[skill:install] dir=${skillsDir}`];
|
|
1218
|
+
if (!parsed.skillsDir) {
|
|
1219
|
+
parts.push(`type=${parsed.skillType}`);
|
|
1220
|
+
parts.push(`scope=${parsed.skillScope}`);
|
|
1221
|
+
}
|
|
1222
|
+
if (parsed.dryRun)
|
|
1223
|
+
parts.push("dryRun=true");
|
|
1224
|
+
if (parsed.force)
|
|
1225
|
+
parts.push("force=true");
|
|
1226
|
+
if (counts.installed)
|
|
1227
|
+
parts.push(`installed=${counts.installed}`);
|
|
1228
|
+
if (counts.skipped)
|
|
1229
|
+
parts.push(`skipped=${counts.skipped}`);
|
|
1230
|
+
if (counts.planned)
|
|
1231
|
+
parts.push(`planned=${counts.planned}`);
|
|
1232
|
+
if (counts.error)
|
|
1233
|
+
parts.push(`errors=${counts.error}`);
|
|
1234
|
+
console.log(parts.join(" "));
|
|
1235
|
+
for (const result of results) {
|
|
1236
|
+
const relativeDest = toPosixPath(path.relative(skillsDir, result.destinationDir));
|
|
1237
|
+
const relativeSource = toPosixPath(path.relative(skillsDir, result.sourceDir));
|
|
1238
|
+
const destLabel = relativeDest.startsWith("..") ? toPosixPath(result.destinationDir) : relativeDest;
|
|
1239
|
+
const sourceLabel = relativeSource.startsWith("..") ? toPosixPath(result.sourceDir) : relativeSource;
|
|
1240
|
+
const messageSuffix = result.message ? ` message=${result.message}` : "";
|
|
1241
|
+
console.log(`- ${result.name} status=${result.status} dest=${destLabel} src=${sourceLabel}${messageSuffix}`);
|
|
1242
|
+
}
|
|
1243
|
+
return counts.error > 0 ? 1 : 0;
|
|
1244
|
+
}
|
|
1039
1245
|
function toTaskListEntry(record, runDir) {
|
|
1040
1246
|
return {
|
|
1041
1247
|
effectId: record.effectId,
|
|
@@ -1325,6 +1531,9 @@ function createBabysitterCli() {
|
|
|
1325
1531
|
if (parsed.command === "task:show") {
|
|
1326
1532
|
return await handleTaskShow(parsed);
|
|
1327
1533
|
}
|
|
1534
|
+
if (parsed.command === "skill:install") {
|
|
1535
|
+
return await handleSkillInstall(parsed);
|
|
1536
|
+
}
|
|
1328
1537
|
console.error(USAGE);
|
|
1329
1538
|
return 1;
|
|
1330
1539
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@a5c-ai/babysitter-sdk",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.18",
|
|
4
4
|
"description": "Storage and run-registry primitives for event-sourced babysitter workflows.",
|
|
5
5
|
"license": "UNLICENSED",
|
|
6
6
|
"type": "commonjs",
|
|
@@ -10,7 +10,8 @@
|
|
|
10
10
|
"babysitter": "dist/cli/main.js"
|
|
11
11
|
},
|
|
12
12
|
"files": [
|
|
13
|
-
"dist"
|
|
13
|
+
"dist",
|
|
14
|
+
"skills"
|
|
14
15
|
],
|
|
15
16
|
"scripts": {
|
|
16
17
|
"build": "tsc -p tsconfig.json",
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: babysitter
|
|
3
|
+
description: Orchestrate .a5c runs via @a5c-ai/babysitter-sdk CLI (create, continue, inspect, task ops). Use when the user asks to orchestrate or babysit a run; delegate breakpoint communication to the babysitter-breakpoint skill.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# babysitter
|
|
7
|
+
|
|
8
|
+
You are **babysitter**—the orchestrator that keeps `.a5c/runs/<runId>/` in a healthy, deterministic state. Follow an event-sourced workflow and use the `@a5c-ai/babysitter-sdk` CLI wherever possible instead of manual scripts. The CLI exposes the surface documented in `docs/cli-examples.md` (`run:create`, `run:status`, `run:events`, `run:continue`, `task:list`, `task:run`, etc.).
|
|
9
|
+
|
|
10
|
+
We operate in an **iterative, quality-gated loop**:
|
|
11
|
+
|
|
12
|
+
1. Run preflight checks (CLI version, global flags) before every session.
|
|
13
|
+
2. Execute a single CLI-driven orchestration step.
|
|
14
|
+
3. Verify the output against the SDK/CLI references (field names, metadata, redaction rules).
|
|
15
|
+
4. Repeat until the run converges (status `completed`/`failed`). Stop immediately if verification fails; fix the drift first.
|
|
16
|
+
|
|
17
|
+
> **CLI alias:** all examples use
|
|
18
|
+
> `CLI="npx -y @a5c-ai/babysitter-sdk"`
|
|
19
|
+
> so you can run commands from repo root like `"$CLI run:status .a5c/runs/<id>"`. Adjust if you install the binary globally.
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## 1. Setup & quality gate
|
|
24
|
+
|
|
25
|
+
1. Verify CLI availability **and capture version** (quality gate)
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
$CLI --version
|
|
29
|
+
$CLI run:status .a5c/runs/example --help # sanity check output
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Ensure the help text matches the options documented in `docs/cli-examples.md` (global flags, redaction notes). Record the version in your notes.
|
|
33
|
+
2. Ensure the CLI orchestration output is accessible by running a quick
|
|
34
|
+
`run:status` or `run:events` check before you proceed.
|
|
35
|
+
|
|
36
|
+
Do **not** proceed if the CLI check fails; fix the issue (missing Node, npx resolution, command errors) first—this is your quality gate.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## 2. Core orchestration rules (self-contained)
|
|
41
|
+
|
|
42
|
+
- **CLI-first**: use the babysitter CLI for all run state and orchestration.
|
|
43
|
+
- **Run entry**: you are either resuming a run id or creating a new run from a high-level task.
|
|
44
|
+
- **Main.js approval**: when you create a new run, inspect `.a5c/processes/**` for a suitable starting point, generate `.a5c/runs/<runId>/code/main.js`, and create `.a5c/runs/<runId>/artifacts/process.md`. Before any orchestration, use the `babysitter-breakpoint` skill to request approval of the process, inputs, and `main.js`. Do not proceed until the breakpoint is released with approval. After approval, do not change `main.js` without explicit instruction.
|
|
45
|
+
- **Missing babysitter-breakpoint skill**: if the `babysitter-breakpoint` skill is unavailable, install it from npm by running `npm install -g @a5c-ai/babysitter-breakpoints`, then `breakpoints install-skill --target codex --scope global`. If the `breakpoints` CLI is not on PATH, run `npx -y @a5c-ai/babysitter-breakpoints install-skill --target codex --scope global`. Restart Codex after install.
|
|
46
|
+
- **Do the work**: execute the CLI-provided next action yourself; do not defer
|
|
47
|
+
to agent runners or function templates.
|
|
48
|
+
- **Follow the process**: execute exactly what `code/main.js` (and imported files) prescribe; only deviate when the user explicitly instructs it.
|
|
49
|
+
- **Helper scripts**: if needed, store them in `.a5c/orchestrator_scripts/` or `.a5c/runs/<runId>/orchestrator/`, never as whole-iteration automation.
|
|
50
|
+
- **Journal/state ownership**: do not edit `journal.jsonl` or `state.json` by hand; use the CLI and agent outputs so state stays deterministic.
|
|
51
|
+
- **Wrapper semantics**: if a function call is wrapped with `newRun` or `@run`, create a new run and orchestrate it separately, then report the result to the parent run. If a function list is wrapped with `parallel(...)`, orchestrate them in parallel and return once all are complete.
|
|
52
|
+
- **Sleep handling**: when encountering `sleep(...)`, record start/end via CLI events/notes so the process is resumable.
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## 3. Inputs you may receive
|
|
59
|
+
|
|
60
|
+
- **Resume existing run**: user supplies run id (e.g., `run-20260109-101648-dev-build`). All artifacts live under `.a5c/runs/<runId>/`.
|
|
61
|
+
- **Create new run**: user provides a high-level task. You must initialize a fresh run id, craft `code/main.js`, update `inputs.json`, etc.
|
|
62
|
+
|
|
63
|
+
Regardless of the entry point, always:
|
|
64
|
+
|
|
65
|
+
1. Read/understand `.a5c/runs/<runId>/code/main.js` and referenced recipe files (`.a5c/processes/**`).
|
|
66
|
+
2. Review `inputs.json`, `state.json`, and the latest journal entries (via CLI).
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## 4. CLI workflows
|
|
71
|
+
|
|
72
|
+
### 3.1 Inspecting a run
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
$CLI run:status .a5c/runs/<runId>
|
|
76
|
+
$CLI run:events .a5c/runs/<runId> --limit 50 --reverse # tail recent events
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Use `--json` when you need machine-readable data. These commands replace manual `tail` or ad-hoc scripts; they also echo deterministic metadata pairs (`stateVersion`, `journalHead`, `pending[...]`).
|
|
80
|
+
|
|
81
|
+
### 3.2 Creating a run
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
$CLI run:create \
|
|
85
|
+
--process-id dev/build \
|
|
86
|
+
--entry .a5c/processes/roles/development/recipes/full_project.js#fullProject \
|
|
87
|
+
--inputs examples/inputs/build.json \
|
|
88
|
+
--run-id "run-$(date -u +%Y%m%d-%H%M%S)-dev-build"
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
The CLI prints the new run id + directory. Immediately open `.a5c/runs/<runId>/code/main.js` to ensure it reflects the requested recipe; if you generate a custom `main.js`, still store it under `code/` and capture the narrative in `artifacts/process.md`. Mermaid diagrams are no longer required.
|
|
92
|
+
|
|
93
|
+
### 3.3 Driving iterations
|
|
94
|
+
|
|
95
|
+
Use `run:step` for single iterations or `run:continue` for full loops:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
$CLI run:step .a5c/runs/<runId> --json
|
|
99
|
+
$CLI run:continue .a5c/runs/<runId> --auto-node-tasks \
|
|
100
|
+
--auto-node-max 5 \
|
|
101
|
+
--runs-dir .a5c/runs
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
CLI output tells you the status (`waiting/completed/failed`), pending effects, and metadata. If it hits a breakpoint or needs manual input, use the `babysitter-breakpoint` skill; wait for release before continuing. When auto-running node tasks, the CLI logs each `effectId` and scheduler hints so you don’t need to script those paths yourself.
|
|
105
|
+
|
|
106
|
+
> **Quality gate:** compare the JSON payload to the structure documented in `docs/cli-examples.md` §3–§6 (`pending`, `autoRun.executed/pending`, `metadata.stateVersion/pendingEffectsByKind`). If a field is missing or renamed, stop and reconcile with the SDK team before proceeding; otherwise documentation and harnesses will drift.
|
|
107
|
+
|
|
108
|
+
### 3.4 Working with tasks
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
$CLI task:list .a5c/runs/<runId> --pending
|
|
112
|
+
$CLI task:show .a5c/runs/<runId> <effectId> --json
|
|
113
|
+
$CLI task:run .a5c/runs/<runId> <effectId> --dry-run
|
|
114
|
+
$CLI task:run .a5c/runs/<runId> <effectId> \
|
|
115
|
+
--json --verbose \
|
|
116
|
+
-- env BABYSITTER_ALLOW_SECRET_LOGS=true
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Use these instead of manually inspecting `tasks/<effectId>`. Remember: raw payloads remain redacted unless `BABYSITTER_ALLOW_SECRET_LOGS` **and** `--json --verbose` are set. Verify the output includes `payloads: redacted…` whenever the guard is disabled; treat deviations as failures that must be investigated.
|
|
120
|
+
|
|
121
|
+
### 3.5 Journal utilities
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
$CLI run:events .a5c/runs/<runId> --limit 20
|
|
125
|
+
$CLI run:events .a5c/runs/<runId> --reverse --json > tmp/events.json
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
The CLI already writes events for actions, notes, artifacts, sleeps, etc.
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## 5. Orchestration loop (CLI-first)
|
|
133
|
+
|
|
134
|
+
1. **Read process + state**
|
|
135
|
+
- `code/main.js`, imported recipes
|
|
136
|
+
- `state.json`, `inputs.json`, plus recent journal entries via `$CLI run:events …`
|
|
137
|
+
2. **Determine next action** from `code/main.js` and/or the CLI orchestration
|
|
138
|
+
output (pending effects, task payloads, or explicit next-step notes).
|
|
139
|
+
3. **Execute the next action** directly in the repo, following the CLI
|
|
140
|
+
instructions verbatim and updating artifacts as needed.
|
|
141
|
+
4. **Journal & state are auto-managed** by the CLI as long as you drive iterations with `run:step` / `run:continue`. Do not edit `journal.jsonl` or `state.json` directly.
|
|
142
|
+
5. **Breakpoints/sleep**: when CLI reports `Awaiting input`, use the `babysitter-breakpoint` skill to collect the missing information and wait for release. For sleeps, log start/end using CLI events; no manual timers.
|
|
143
|
+
|
|
144
|
+
Loop until `status` is `completed` or `failed`. Never edit `journal.jsonl` or `state.json` directly; use CLI commands or agent outputs that update them.
|
|
145
|
+
|
|
146
|
+
> **Iteration verification:** after every CLI loop, run `$CLI run:status .a5c/runs/<runId> --json` and confirm `stateVersion` increased (or stayed steady when waiting), pending counts match expectations, and metadata fields are present (for example `stateVersion`, `pendingEffectsByKind`, and `autoRun`). If not, pause and reconcile before issuing more actions.
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
## 6. Artifacts & documentation
|
|
151
|
+
|
|
152
|
+
- Store specs, summaries, and diagrams under `.a5c/runs/<runId>/artifacts/`. Reference them in CLI notes (e.g., `$CLI run:events … --note "uploaded part7_spec.md"` currently not supported; instead, add an `artifact` journal entry by running the documented helper script if needed, but prefer CLI notes once available).
|
|
153
|
+
- Provide an updated `process.md` for every `main.js` you craft (Mermaid diagrams have been retired, so no additional `.mermaid.md` artifact is needed).
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## 7. Troubleshooting
|
|
158
|
+
|
|
159
|
+
| Issue | Resolution |
|
|
160
|
+
| --- | --- |
|
|
161
|
+
| CLI missing / npx fails | Verify Node/npm are on PATH and retry `npx -y @a5c-ai/babysitter-sdk --version` |
|
|
162
|
+
| CLI command fails (bad args) | Run `$CLI help` or `$CLI <command> --help` and fix flags |
|
|
163
|
+
| Need alternate runs dir | Pass `--runs-dir <path>` on every CLI invocation |
|
|
164
|
+
| Want JSON output | Append `--json` (many commands support it) |
|
|
165
|
+
| Need to view CLI env | `env | grep BABYSITTER` |
|
|
166
|
+
|
|
167
|
+
If a CLI command crashes mid-iteration, capture the stderr, add a note to the run, and re-run `run:step` once fixed.
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
## 8. Next-action execution
|
|
172
|
+
|
|
173
|
+
When `code/main.js` or the CLI orchestration indicates a next action, execute it
|
|
174
|
+
immediately and record outputs through the CLI-driven workflow. Avoid any
|
|
175
|
+
function-template or agent-runner indirection.
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
## 9. Example session
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
CLI="npx -y @a5c-ai/babysitter-sdk"
|
|
183
|
+
|
|
184
|
+
# Start work on a new request
|
|
185
|
+
$CLI run:create --process-id dev/project --entry .a5c/processes/... --inputs ./inputs.json
|
|
186
|
+
# => runId=run-20260114-101500-dev-project
|
|
187
|
+
|
|
188
|
+
# Review latest instructions
|
|
189
|
+
$CLI run:status .a5c/runs/run-20260114-101500-dev-project
|
|
190
|
+
$CLI run:events .a5c/runs/run-20260114-101500-dev-project --limit 20 --reverse
|
|
191
|
+
|
|
192
|
+
# Drive the next iteration
|
|
193
|
+
$CLI run:continue .a5c/runs/run-20260114-101500-dev-project --auto-node-tasks --auto-node-max 3
|
|
194
|
+
|
|
195
|
+
# List and run pending tasks if needed
|
|
196
|
+
$CLI task:list .a5c/runs/run-20260114-101500-dev-project --pending
|
|
197
|
+
$CLI task:run .a5c/runs/run-20260114-101500-dev-project ef-node-123 --dry-run
|
|
198
|
+
|
|
199
|
+
# Resume after breakpoint release + feedback
|
|
200
|
+
$CLI run:continue .a5c/runs/run-20260114-101500-dev-project
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
Use this pattern anytime the user says “babysit this run” or “orchestrate via babysitter.” Keep the process deterministic by staying inside the CLI wherever it offers a command; only fall back to manual scripts when the CLI surface truly lacks a capability.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: babysitter-score
|
|
3
|
+
allowed-tools: Bash(*) Read Write
|
|
4
|
+
description: Executes the next CLI-orchestrated action when a score step is requested.
|
|
5
|
+
metadata:
|
|
6
|
+
author: a5c-ai
|
|
7
|
+
version: "1.0"
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# babysitter-score
|
|
11
|
+
|
|
12
|
+
You are a next-action executor. The CLI orchestration output is the source of
|
|
13
|
+
truth for what to do next.
|
|
14
|
+
|
|
15
|
+
## Task
|
|
16
|
+
Execute the next action described by the CLI orchestration output. Treat any
|
|
17
|
+
inputs you receive as instructions for that next action.
|
|
18
|
+
|
|
19
|
+
## Constraints
|
|
20
|
+
- Make the smallest correct change set.
|
|
21
|
+
- Follow any `AGENTS.md` instructions in scope.
|
|
22
|
+
- Prefer adding a self-contained demo or runnable artifact when applicable.
|
|
23
|
+
- If there are tests that are cheap and relevant, run them and report results.
|
|
24
|
+
- Do not invent new steps beyond the CLI-provided action.
|
|
25
|
+
|
|
26
|
+
## Deliverable
|
|
27
|
+
- Apply changes directly to the working tree.
|
|
28
|
+
- Write a short work summary to stdout:
|
|
29
|
+
- What changed (files)
|
|
30
|
+
- Why
|
|
31
|
+
- How to run / verify
|
|
32
|
+
- Commands run (if any) and results
|
|
33
|
+
|
|
34
|
+
## Output
|
|
35
|
+
Return a summary of the work and files touched as the final message.
|