npm - @a5c-ai/babysitter-gemini-cli - Versions diffs - 5.0.1-staging.04ca6ab00d21 - Mend

@a5c-ai/babysitter-gemini-cli 5.0.1-staging.04ca6ab00d21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/README.md +53 -0
package/bin/cli.js +96 -0
package/bin/install-shared.js +219 -0
package/bin/install.js +30 -0
package/bin/uninstall.js +24 -0
package/commands/assimilate.toml +3 -0
package/commands/call.toml +3 -0
package/commands/cleanup.toml +3 -0
package/commands/contrib.toml +3 -0
package/commands/doctor.toml +3 -0
package/commands/forever.toml +3 -0
package/commands/help.toml +3 -0
package/commands/observe.toml +3 -0
package/commands/plan.toml +3 -0
package/commands/plugins.toml +3 -0
package/commands/project-install.toml +3 -0
package/commands/resume.toml +3 -0
package/commands/retrospect.toml +3 -0
package/commands/user-install.toml +3 -0
package/commands/yolo.toml +3 -0
package/hooks/babysitter-proxied-after-agent.sh +3 -0
package/hooks/babysitter-proxied-after-tool.sh +3 -0
package/hooks/babysitter-proxied-before-tool.sh +3 -0
package/hooks/babysitter-proxied-pre-compact.sh +3 -0
package/hooks/babysitter-proxied-session-end.sh +3 -0
package/hooks/babysitter-proxied-session-idle.sh +3 -0
package/hooks/babysitter-proxied-session-start.sh +11 -0
package/hooks/babysitter-proxied-shell-env.sh +3 -0
package/hooks/babysitter-proxied-user-prompt-submit.sh +3 -0
package/hooks/hooks.json +122 -0
package/package.json +47 -0
package/plugin.json +55 -0
package/scripts/create-release-tag.mjs +18 -0
package/scripts/publish-from-tag.mjs +41 -0
package/scripts/team-install.js +23 -0
package/skills/babysit/SKILL.md +58 -0
package/versions.json +4 -0

package/README.md ADDED Viewed

@@ -0,0 +1,53 @@
+# babysitter
+Orchestrate complex, multi-step workflows with event-sourced state management, hook-based extensibility, and human-in-the-loop approval
+## Prerequisites
+Install the Babysitter CLI once. The `babysitter` command is backed by the SDK and exposes the canonical harness/plugin installer used in tests:
+```bash
+npm install -g @a5c-ai/babysitter
+```
+## Installation — Gemini CLI
+```bash
+npm install -g @a5c-ai/babysitter-gemini-cli
+babysitter-gemini-cli install --global
+```
+Restart Gemini CLI to pick up the installed plugin.
+For scriptable installs, prefer the SDK helper shape:
+```bash
+babysitter harness:install-plugin <harness>
+babysitter harness:install-plugin <harness> --workspace /path/to/repo
+```
+## What's Included
+- **Skills**: babysit
+- **Hooks**: SessionStart, UserPromptSubmit, PreToolUse, PostToolUse, PreCompact, AfterAgent, SessionEnd, SessionIdle, ShellEnv
+- **Commands**: (directory)
+- **CLI**: Install/uninstall scripts for global and workspace setup
+## Verification
+```bash
+babysitter harness:discover --json | grep gemini
+```
+## Integration Model
+The plugin provides:
+- Core orchestration skill for multi-step workflow management
+- Lifecycle hooks for session state, orchestration loops, and token compression
+- Command wrappers for plan, resume, doctor, and other operations
+The process library is fetched and bound through the SDK CLI.

package/bin/cli.js ADDED Viewed

@@ -0,0 +1,96 @@
+#!/usr/bin/env node
+'use strict';
+const path = require('path');
+const { spawnSync } = require('child_process');
+const PACKAGE_ROOT = path.resolve(__dirname, '..');
+let shared;
+try { shared = require('./install-shared'); } catch {}
+function printUsage() {
+  console.error([
+    'Usage:',
+    '  babysitter-gemini-cli install [--global]',
+    '  babysitter-gemini-cli install --workspace [path]',
+    '  babysitter-gemini-cli uninstall',
+  ].join('\n'));
+}
+function parseInstallArgs(argv) {
+  let scope = 'global';
+  let workspace = null;
+  const passthrough = [];
+  for (let i = 0; i < argv.length; i += 1) {
+    const arg = argv[i];
+    if (arg === '--global') {
+      scope = 'global';
+      continue;
+    }
+    if (arg === '--workspace') {
+      scope = 'workspace';
+      const next = argv[i + 1];
+      if (next && !next.startsWith('-')) {
+        workspace = path.resolve(next);
+        i += 1;
+      } else {
+        workspace = process.cwd();
+      }
+      continue;
+    }
+    passthrough.push(arg);
+  }
+  return { scope, workspace, passthrough };
+}
+function runNodeScript(scriptPath, args, extraEnv = {}) {
+  const result = spawnSync(process.execPath, [scriptPath, ...args], {
+    cwd: process.cwd(),
+    stdio: 'inherit',
+    env: { ...process.env, ...extraEnv },
+  });
+  process.exitCode = result.status ?? 1;
+}
+function main() {
+  const [command, ...rest] = process.argv.slice(2);
+  if (!command || command === '--help' || command === '-h' || command === 'help') {
+    printUsage();
+    process.exitCode = command ? 0 : 1;
+    return;
+  }
+  if (command === 'install') {
+    if (shared && typeof shared.harnessCliRoute === 'function' && shared.harnessCliRoute(rest, PACKAGE_ROOT, runNodeScript)) {
+      return;
+    }
+    const parsed = parseInstallArgs(rest);
+    if (parsed.scope === 'workspace') {
+      const args = [];
+      if (parsed.workspace) {
+        args.push('--workspace', parsed.workspace);
+      }
+      args.push(...parsed.passthrough);
+      runNodeScript(
+        path.join(PACKAGE_ROOT, 'scripts', 'team-install.js'),
+        args,
+        { PLUGIN_PACKAGE_ROOT: PACKAGE_ROOT },
+      );
+      return;
+    }
+    runNodeScript(path.join(PACKAGE_ROOT, 'bin', 'install.js'), parsed.passthrough);
+    return;
+  }
+  if (command === 'uninstall') {
+    runNodeScript(path.join(PACKAGE_ROOT, 'bin', 'uninstall.js'), rest);
+    return;
+  }
+  printUsage();
+  process.exitCode = 1;
+}
+main();

package/bin/install-shared.js ADDED Viewed

@@ -0,0 +1,219 @@
+'use strict';
+const fs = require('fs');
+const os = require('os');
+const path = require('path');
+const { spawnSync } = require('child_process');
+const PLUGIN_NAME = "babysitter";
+const PLUGIN_CATEGORY = 'Coding';
+function getUserHome() {
+  return os.homedir();
+}
+function getHarnessHome() {
+  return path.join(os.homedir(), '.a5c');
+}
+function getHomePluginRoot(scope) {
+  if (scope === 'workspace') return path.join(process.cwd(), '.a5c', 'plugins', PLUGIN_NAME);
+  return path.join(path.join(getHarnessHome(), 'plugins'), PLUGIN_NAME);
+}
+function getHomeMarketplacePath() {
+  return path.join(getHarnessHome(), 'plugins', 'marketplace.json');
+}
+function writeFileIfChanged(filePath, contents) {
+  try {
+    const existing = fs.readFileSync(filePath, 'utf8');
+    if (existing === contents) return false;
+  } catch {}
+  fs.mkdirSync(path.dirname(filePath), { recursive: true });
+  fs.writeFileSync(filePath, contents);
+  return true;
+}
+function copyRecursive(src, dest) {
+  fs.mkdirSync(dest, { recursive: true });
+  for (const entry of fs.readdirSync(src, { withFileTypes: true })) {
+    if (entry.name === 'node_modules' || entry.name === '.git') continue;
+    const s = path.join(src, entry.name);
+    const d = path.join(dest, entry.name);
+    if (entry.isDirectory()) {
+      copyRecursive(s, d);
+    } else {
+      fs.copyFileSync(s, d);
+    }
+  }
+}
+function copyPluginBundle(packageRoot, pluginRoot) {
+  const bundleEntries = fs.readdirSync(packageRoot).filter(
+    e => !['node_modules', '.git', 'test', 'dist'].includes(e)
+  );
+  fs.mkdirSync(pluginRoot, { recursive: true });
+  for (const entry of bundleEntries) {
+    const src = path.join(packageRoot, entry);
+    const dest = path.join(pluginRoot, entry);
+    const stat = fs.statSync(src);
+    if (stat.isDirectory()) {
+      copyRecursive(src, dest);
+    } else {
+      fs.copyFileSync(src, dest);
+    }
+  }
+}
+function readJson(filePath) {
+  try {
+    return JSON.parse(fs.readFileSync(filePath, 'utf8'));
+  } catch {
+    return null;
+  }
+}
+function writeJson(filePath, value) {
+  fs.mkdirSync(path.dirname(filePath), { recursive: true });
+  fs.writeFileSync(filePath, JSON.stringify(value, null, 2) + '\n');
+}
+function ensureExecutable(filePath) {
+  try {
+    fs.chmodSync(filePath, 0o755);
+  } catch {}
+}
+function normalizeMarketplaceSourcePath(source, marketplacePath) {
+  if (typeof source === 'string') {
+    return path.relative(path.dirname(marketplacePath), source).replace(/\\/g, '/');
+  }
+  return source;
+}
+function ensureMarketplaceEntry(marketplacePath, pluginRoot) {
+  let marketplace = readJson(marketplacePath) || {
+    name: "a5c.ai",
+    plugins: [],
+  };
+  if (!Array.isArray(marketplace.plugins)) marketplace.plugins = [];
+  const idx = marketplace.plugins.findIndex(p => p.name === PLUGIN_NAME);
+  const relSource = './' + normalizeMarketplaceSourcePath(pluginRoot, marketplacePath);
+  const entry = {
+    name: PLUGIN_NAME,
+    source: relSource,
+    description: "Orchestrate complex, multi-step workflows with event-sourced state management, hook-based extensibility, and human-in-the-loop approval",
+    version: "5.0.1-staging.04ca6ab00d21",
+    author: { name: "a5c.ai" },
+  };
+  if (idx >= 0) marketplace.plugins[idx] = entry;
+  else marketplace.plugins.push(entry);
+  writeJson(marketplacePath, marketplace);
+}
+function removeMarketplaceEntry(marketplacePath) {
+  const marketplace = readJson(marketplacePath);
+  if (!marketplace || !Array.isArray(marketplace.plugins)) return;
+  marketplace.plugins = marketplace.plugins.filter(p => p.name !== PLUGIN_NAME);
+  writeJson(marketplacePath, marketplace);
+}
+function warnWindowsHooks() {
+  if (process.platform === 'win32') {
+    console.warn('[' + PLUGIN_NAME + '] Windows detected — shell hooks (.sh) require Git Bash or WSL.');
+  }
+}
+function runPostInstall(pluginRoot) {
+  const postInstall = path.join(pluginRoot, 'scripts', 'post-install.js');
+  if (fs.existsSync(postInstall)) {
+    spawnSync(process.execPath, [postInstall], {
+      cwd: pluginRoot, stdio: 'inherit',
+      env: { ...process.env, PLUGIN_ROOT: pluginRoot },
+    });
+  }
+}
+function getGlobalStateDir() {
+  return process.env.BABYSITTER_GLOBAL_STATE_DIR || path.join(getUserHome(), '.a5c');
+}
+function resolveCliCommand(packageRoot) {
+  try {
+    const result = spawnSync('babysitter', ['--version'], { stdio: 'pipe', timeout: 10000 });
+    if (result.status === 0) return 'babysitter';
+  } catch {}
+  const versionsPath = path.join(packageRoot, 'versions.json');
+  const versions = readJson(versionsPath) || {};
+  const ver = versions.sdkVersion || 'latest';
+  return `npx -y @a5c-ai/babysitter-sdk@${ver}`;
+}
+function runCli(packageRoot, cliArgs, options = {}) {
+  const cmd = resolveCliCommand(packageRoot);
+  const parts = cmd.split(' ');
+  const result = spawnSync(parts[0], [...parts.slice(1), ...cliArgs], {
+    stdio: options.stdio || 'inherit',
+    timeout: options.timeout || 120000,
+    cwd: options.cwd || process.cwd(),
+    env: { ...process.env, ...options.env },
+  });
+  return result;
+}
+function ensureGlobalProcessLibrary(packageRoot) {
+  const stateDir = getGlobalStateDir();
+  const activeFile = path.join(stateDir, 'active', 'process-library.json');
+  let active = readJson(activeFile);
+  if (active && active.binding && active.binding.dir) {
+    return active;
+  }
+  const defaultSpec = readJson(path.join(stateDir, 'process-library-defaults.json'));
+  const cloneDir = defaultSpec && defaultSpec.cloneDir
+    ? defaultSpec.cloneDir
+    : path.join(stateDir, 'process-library', PLUGIN_NAME + '-repo');
+  runCli(packageRoot, [
+    'process-library:clone',
+    '--dir', cloneDir,
+    '--state-dir', stateDir,
+    '--json',
+  ], { stdio: 'pipe' });
+  runCli(packageRoot, [
+    'process-library:use',
+    '--dir', cloneDir,
+    '--state-dir', stateDir,
+    '--json',
+  ], { stdio: 'pipe' });
+  active = readJson(activeFile);
+  return {
+    binding: active && active.binding ? active.binding : { dir: cloneDir },
+    defaultSpec: defaultSpec || { cloneDir },
+    stateFile: activeFile,
+  };
+}
+module.exports = {
+  PLUGIN_NAME,
+  PLUGIN_CATEGORY,
+  getUserHome,
+  getHarnessHome,
+  getHomePluginRoot,
+  getHomeMarketplacePath,
+  writeFileIfChanged,
+  copyRecursive,
+  copyPluginBundle,
+  readJson,
+  writeJson,
+  ensureExecutable,
+  normalizeMarketplaceSourcePath,
+  ensureMarketplaceEntry,
+  removeMarketplaceEntry,
+  warnWindowsHooks,
+  runPostInstall,
+  getGlobalStateDir,
+  resolveCliCommand,
+  runCli,
+  ensureGlobalProcessLibrary,
+};

package/bin/install.js ADDED Viewed

@@ -0,0 +1,30 @@
+#!/usr/bin/env node
+'use strict';
+const path = require('path');
+const shared = require('./install-shared');
+const PACKAGE_ROOT = path.resolve(__dirname, '..');
+function main() {
+  const pluginRoot = shared.getHomePluginRoot();
+  const marketplacePath = shared.getHomeMarketplacePath();
+  console.log(`[${shared.PLUGIN_NAME}] Installing plugin to ${pluginRoot}`);
+  try {
+    shared.copyPluginBundle(PACKAGE_ROOT, pluginRoot);
+    shared.ensureMarketplaceEntry(marketplacePath, pluginRoot);
+    if (typeof shared.harnessInstall === 'function') {
+      shared.harnessInstall(PACKAGE_ROOT, pluginRoot);
+    }
+    shared.runPostInstall && shared.runPostInstall(pluginRoot);
+    console.log(`[${shared.PLUGIN_NAME}] Installation complete!`);
+    console.log(`[${shared.PLUGIN_NAME}] Restart your IDE/CLI to pick up the plugin.`);
+  } catch (err) {
+    console.error(`[${shared.PLUGIN_NAME}] Failed to install: ${err.message}`);
+    process.exitCode = 1;
+  }
+}
+main();

package/bin/uninstall.js ADDED Viewed

@@ -0,0 +1,24 @@
+#!/usr/bin/env node
+'use strict';
+const fs = require('fs');
+const shared = require('./install-shared');
+function main() {
+  const pluginRoot = shared.getHomePluginRoot();
+  if (!fs.existsSync(pluginRoot)) {
+    console.log(`[${shared.PLUGIN_NAME}] Plugin not installed at ${pluginRoot}`);
+    return;
+  }
+  try {
+    fs.rmSync(pluginRoot, { recursive: true, force: true });
+    console.log(`[${shared.PLUGIN_NAME}] Uninstalled from ${pluginRoot}`);
+  } catch (err) {
+    console.error(`[${shared.PLUGIN_NAME}] Failed to uninstall: ${err.message}`);
+    process.exitCode = 1;
+  }
+}
+main();

package/commands/assimilate.toml ADDED Viewed

@@ -0,0 +1,3 @@
+description = "Assimilate an external methodology, harness, or specification into babysitter process definitions with skills and agents."
+prompt = "Invoke the babysitter:babysit skill (using the Skill tool) and follow its instructions (SKILL.md).\n\nUse the assimilation domain processes from the active process library to convert external sources into well-defined babysitter process definitions with accompanying skills/ and agents/ directories.\n\nIf the workspace does not already have an active process-library binding, initialize it first through the shared global SDK binding:\n\n```bash\nbabysitter process-library:active --json\n```\n\nRun the process after formalizing it.\n\nAvailable assimilation workflows:\n- **methodology-assimilation** (`specializations/meta/assimilation/workflows/methodology-assimilation`) - Learns an external methodology from its repo and converts procedural instructions, commands, and manual flows into babysitter processes with refactored skills and agents. Supports output as methodology or specialization.\n- **harness integration** (`specializations/meta/assimilation/harness/*`) - Integrates babysitter SDK with a specific AI coding harness (generic, codex, opencode, gemini-cli, openclaw, antigravity).\n\nDuring the interview phase, determine which assimilation workflow to use based on the user's target:\n- If the target is a **repo URL or methodology name** then use the methodology-assimilation workflow.\n- If the target is a **harness name** (e.g. codex, opencode, antigravity) then use the matching harness process.\n- If the target is a **specification or other source** then adapt the methodology-assimilation workflow for the spec format.\n- If unclear, ask the user to clarify the assimilation target and type.\n\n## After Assimilation: Contribute Back\n\nAfter successfully assimilating a methodology or harness integration, prompt the user to share it with the community. The assimilated process definitions, skills, and agents could benefit other babysitter users:\n\n- **Completed a methodology assimilation**: `/babysitter:contrib library contribution: assimilated [methodology-name] into babysitter process definitions`\n- **Completed a harness integration**: `/babysitter:contrib library contribution: [harness-name] harness integration`\n- **Hit issues during assimilation** (e.g. unsupported patterns, missing SDK features): `/babysitter:contrib bug report: assimilation of [target] failed because [description]` or `/babysitter:contrib feature request: [what the SDK needs to support]`\n\nEven just reporting that an assimilation didn't work well helps improve babysitter for everyone."

package/commands/call.toml ADDED Viewed

@@ -0,0 +1,3 @@
+description = "Orchestrate a babysitter run. use this command to start babysitting a complex workflow."
+prompt = "Invoke the babysitter:babysit skill (using the Skill tool) and follow its instructions (SKILL.md). Then continue executing the returned instructions in this same turn. Do not stop after the Skill tool returns; carry the requested run through to completion proof.\n\nUser arguments for this command:\n\n$ARGUMENTS"

package/commands/cleanup.toml ADDED Viewed

@@ -0,0 +1,3 @@
+description = "Clean up .a5c/runs and .a5c/processes directories. Aggregates insights from completed/failed runs into docs/run-history-insights.md, then removes old run data and orphaned process files."
+prompt = "Invoke the babysitter:babysit skill (using the Skill tool) and follow its instructions (SKILL.md).\n\nCreate and run a cleanup process using the process at `skills\\babysit\\process\\cradle\\cleanup-runs.js/processes/cleanup-runs.js`.\n\nImplementation notes (for the process):\n- Parse arguments for `--dry-run` flag (if present, set dryRun: true in inputs) and `--keep-days N` (default: 7)\n\nCRITICAL: The cleanup MUST follow this exact phase order. Do NOT delete any run before Phase 2 completes.\n\nPhase 1 — Scan:\n- Scan .a5c/runs/ for all runs\n- Classify each as terminal (completed/failed) or active (in-progress/created)\n- Identify terminal runs older than the keep-days threshold as removal candidates\n- Never mark active/in-progress runs for removal\n- Count and report: total runs, terminal, active, removal candidates, disk usage\n\nPhase 2 — Aggregate insights (BEFORE any deletion):\n- For EVERY removal candidate, read its run.json and journal/ events\n- Extract: processId, prompt, status, event count, created date, task summaries\n- Group by process type and extract patterns (retry counts, convergence behavior, failure modes)\n- Append a new dated section to docs/run-history-insights.md with:\n  - Summary statistics (runs removed, disk freed, runs retained)\n  - Run categories with counts and descriptions\n  - Key patterns observed (multi-batch convergence, retry behavior, etc.)\n  - What worked well / what didn't from the run data\n- This file MUST be written and verified before proceeding to Phase 3\n\nPhase 3 — Confirm removal:\n- In interactive mode, show the user what will be removed via a breakpoint\n- In non-interactive mode (yolo), proceed with defaults\n- In dry-run mode, stop here and show what would be removed\n\nPhase 4 — Remove:\n- Delete the terminal runs older than keep-days threshold\n- Identify and remove orphaned process files not referenced by remaining runs\n- Show remaining run count and disk usage after cleanup"

package/commands/contrib.toml ADDED Viewed

@@ -0,0 +1,3 @@
+description = "Submit feedback or contribute to babysitter project"
+prompt = "Invoke the babysitter:babysit skill (using the Skill tool) and follow its instructions (SKILL.md).\n\n## Process Routing\n\nContribution processes live under the active process library's `cradle/` directory. Resolve the active library root with `babysitter process-library:active --json` and route based on arguments:\n\n### Issue-based (opens a GitHub issue in a5c-ai/babysitter)\n * **Bug report** → `cradle/bug-report.js#process` — Report a bug in the SDK, CLI, process library, etc.\n * **Feature request** → `cradle/feature-request.js#process` — Request a new feature or enhancement\n * **Documentation question** → `cradle/documentation-question.js#process` — Ask about undocumented behavior or missing docs\n\n### PR-based (forks repo, creates branch, submits PR to a5c-ai/babysitter)\n * **Bugfix** → `cradle/bugfix.js#process` — User already has the fix for a bug\n * **Feature implementation** → `cradle/feature-implementation-contribute.js#process` — User already has a feature implementation\n * **Harness integration** → `cradle/feature-harness-integration-contribute.js#process` — User has a harness (CI/CD, IDE, editor) integration\n * **Library contribution** → `cradle/library-contribution.js#process` — New or improved process/skill/subagent for the library\n * **Documentation answer** → `cradle/documentation-contribute-answer.js#process` — User has an answer for an unanswered docs question\n\n### Router (when arguments are empty or general)\n * **Contribute** → `cradle/contribute.js#process` — Explains contribution types and routes to the specific process\n\n## Contribution Rules\n\n * PR-based contributions: fork the babysitter repo (a5c-ai/babysitter) for the user, ask to star if not already starred, perform changes, submit PR\n * Issue-based contributions: gather details, search for duplicates, review, then open an issue in a5c-ai/babysitter\n * Add breakpoints (permissions) before ALL gh actions (fork, star, submit PR/issue) to allow user review and cancellation\n * If arguments are empty: use the `contribute.js` router process to show options and route accordingly"

package/commands/doctor.toml ADDED Viewed

@@ -0,0 +1,3 @@
+description = "Diagnose babysitter run health - journal integrity, state cache, effects, locks, sessions, logs, and disk usage"
+prompt = "You are a diagnostic agent for the babysitter runtime. Your job is to perform a comprehensive health check across 14 areas and produce a structured diagnostic report. Follow each section methodically. Track results as you go and produce the final summary at the end.\n\nInitialize a results tracker with these 14 checks, all starting as PENDING:\n1. Run Discovery\n2. Journal Integrity\n3. State Cache Consistency\n4. Effect Status\n5. Lock Status\n6. Session State\n7. Log Analysis\n8. Disk Usage\n9. Process Validation\n10. Hook Execution Health\n11. Session-ID Provenance\n12. Ancestor Liveness\n13. Concurrent Session Detection\n14. Windows Ancestor-Walk Strategy\n\n---\n\n## 1. Run Discovery\n\n**Goal:** Identify the target run and display its metadata.\n\n- List all runs by running: `ls -lt .a5c/runs/`\n- If the user provided a run ID argument, use that as the run ID. Otherwise, use the most recent run directory (the first entry from the listing).\n- Store the resolved run ID and construct the run directory path: `.a5c/runs/<runId>`\n- Verify the run directory exists. If it does not exist, report FAIL for this check and stop the entire diagnostic (no run to diagnose).\n- Show run metadata by running: `npx babysitter run:status .a5c/runs/<runId> --json`\n- Parse and display: runId, processId, entrypoint/importPath, createdAt, current state.\n- Mark this check as PASS.\n\n---\n\n## 2. Journal Integrity\n\n**Goal:** Verify the append-only event journal is well-formed and uncorrupted.\n\n- List all journal events by running: `npx babysitter run:events .a5c/runs/<runId> --json`\n- List all files in `.a5c/runs/<runId>/journal/` sorted by name.\n- If the journal directory is empty or missing, mark as FAIL and note \"No journal entries found.\"\n\nFor each journal file (named `<seq>.<ulid>.json`):\n\n**Sequential numbering check:**\n- Extract the sequence number prefix from each filename (e.g., `000001` from `000001.01JAXYZ.json`).\n- Verify sequence numbers are contiguous starting from 000001 with no gaps.\n- If gaps found, mark as WARN and list the missing sequence numbers.\n\n**Checksum verification:**\n\nThe SDK computes checksums as follows: it first builds the event payload **without** the `checksum` field (`{ type, recordedAt, data }`), serializes it with `JSON.stringify(payload, null, 2) + \"\\n\"` (pretty-printed with a trailing newline), then computes SHA256 of that string. To verify:\n\n- Read each journal file as JSON.\n- Extract and remove the `checksum` field from the parsed object.\n- Re-serialize the remaining object with `JSON.stringify(remaining, null, 2) + \"\\n\"` — **must** use 2-space indentation and a trailing newline to match the SDK.\n- Compute SHA256 (hex) of that exact string.\n- Compare computed checksum with the stored checksum.\n- If any mismatch, mark as FAIL and list the corrupt files.\n\nExample bash one-liner for a single file:\n```bash\nnode -e \"const fs=require('fs'); const f=process.argv[1]; const obj=JSON.parse(fs.readFileSync(f,'utf8')); const stored=obj.checksum; delete obj.checksum; const expected=require('crypto').createHash('sha256').update(JSON.stringify(obj,null,2)+'\\n').digest('hex'); console.log(stored===expected?'OK':'MISMATCH',f)\" <file>\n```\n\n**Timestamp monotonicity check:**\n- Extract `recordedAt` from each event.\n- Verify each timestamp is >= the previous one.\n- If any timestamp goes backward, mark as WARN and list the offending entries.\n\n**Event type summary:**\n- Count events by type: RUN_CREATED, EFFECT_REQUESTED, EFFECT_RESOLVED, STOP_HOOK_INVOKED, RUN_COMPLETED, RUN_FAILED, and any other types encountered.\n- Display the counts in a table.\n\n**Orphan detection:**\n- Flag any files in the journal directory that do not match the expected `<seq>.<ulid>.json` naming pattern.\n\nIf all sub-checks pass, mark as PASS. If any sub-check is WARN, mark as WARN. If any sub-check is FAIL, mark as FAIL.\n\n---\n\n## 3. State Cache Consistency\n\n**Goal:** Verify the derived state cache matches the current journal.\n\n- Check if `.a5c/runs/<runId>/state/state.json` exists.\n- If it does not exist, mark as WARN and recommend: `npx babysitter run:rebuild-state .a5c/runs/<runId>`\n\nIf it exists:\n- Read `state.json` and extract the `journalHead` field (contains `seq`, `ulid`, and `checksum`).\n- Determine the actual last journal entry by reading the last file in `.a5c/runs/<runId>/journal/` (highest sequence number).\n- Extract the sequence number and ULID from the last journal filename, and the checksum from its content.\n- Compare:\n  - `journalHead.seq` should match the last journal file's sequence number.\n  - `journalHead.ulid` should match the last journal file's ULID.\n  - `journalHead.checksum` should match the last journal file's checksum.\n- If all match, mark as PASS.\n- If any mismatch, mark as WARN and recommend: `npx babysitter run:rebuild-state .a5c/runs/<runId>`\n- Also verify `schemaVersion` field is present and report its value.\n\n---\n\n## 4. Effect Status\n\n**Goal:** Identify stuck, errored, or pending effects.\n\n- Run: `npx babysitter task:list .a5c/runs/<runId> --json`\n- Run: `npx babysitter task:list .a5c/runs/<runId> --pending --json`\n- Parse the JSON output from both commands.\n\n**All effects summary:**\n- Count total effects, resolved effects, and pending effects.\n- Group and count effects by `kind` (node, breakpoint, orchestrator_task, sleep, etc.).\n\n**Stuck effect detection:**\n- For each pending effect, check its `requestedAt` timestamp.\n- If any pending effect was requested more than 30 minutes ago, flag it as STUCK.\n- List stuck effects with their effectId, kind, taskId, and age.\n\n**Error detection:**\n- Identify any effects with error status in their results.\n- List errored effects with their effectId and error message.\n\n**Pending summary:**\n- Summarize pending effects grouped by kind with count per kind.\n\nMark as PASS if no stuck or errored effects. Mark as WARN if there are pending effects older than 30 minutes. Mark as FAIL if there are errored effects.\n\n---\n\n## 5. Lock Status\n\n**Goal:** Detect stale or orphaned run locks.\n\n- Check if `.a5c/runs/<runId>/run.lock` exists.\n- If it does not exist, mark as PASS (\"No lock held -- run is not actively being iterated\").\n\nIf it exists:\n- Read the lock file (JSON with `pid`, `owner`, `acquiredAt`).\n- Display the lock info: PID, owner, acquired time, and age of the lock.\n- Check if the PID is still alive by running: `kill -0 <pid> 2>/dev/null; echo $?` (exit code 0 means alive, non-zero means dead). On Windows/MINGW, use `tasklist //FI \"PID eq <pid>\" 2>/dev/null` or equivalent.\n- If the process is alive, mark as PASS (\"Lock held by active process\").\n- If the process is dead, mark as FAIL (\"Stale lock detected -- process <pid> is no longer running\").\n  - Recommend: `rm .a5c/runs/<runId>/run.lock`\n\n---\n\n## 6. Session State\n\n**Goal:** Inspect babysitter session files for health and detect runaway loops.\n\n- Search for session state files using Glob:\n  - `.a5c/state/*.md`\n  - `.a5c/state/*.json`\n- For each session state file found:\n  - Read the file and extract available information: iteration count, associated runId, timestamps, session status.\n  - Display: filename, iteration count, runId (if present), last activity time.\n\n**Runaway loop detection:**\n- If any session file contains iteration timing data, compute the average time between iterations.\n- If the average iteration time is less than 3 seconds, flag as WARN (\"Possible runaway loop detected -- average iteration time is under 3 seconds\").\n\n**Session classification:**\n- Active: session has recent activity (within last 30 minutes).\n- Stale: session has no activity for more than 30 minutes.\n- Display counts of active vs stale sessions.\n\nMark as PASS if no issues. Mark as WARN if runaway loops or stale sessions detected.\n\n---\n\n## 7. Log Analysis\n\n**Goal:** Analyze babysitter log files for errors, warnings, and stop hook decisions.\n\nRead the last 50 lines of each of these log files (if they exist):\n- `${BABYSITTER_LOG_DIR:-$HOME/.a5c/logs}/hooks.log`\n- `${BABYSITTER_LOG_DIR:-$HOME/.a5c/logs}/babysitter-stop-hook.log`\n- `${BABYSITTER_LOG_DIR:-$HOME/.a5c/logs}/babysitter-stop-hook-stderr.log`\n- `${BABYSITTER_LOG_DIR:-$HOME/.a5c/logs}/babysitter-session-start-hook.log`\n- `${BABYSITTER_LOG_DIR:-$HOME/.a5c/logs}/babysitter-session-start-hook-stderr.log`\n- `${BABYSITTER_LOG_DIR:-$HOME/.a5c/logs}/babysitter.log`\n- `${BABYSITTER_LOG_DIR:-$HOME/.a5c/logs}/` and relevant run/session specific logs there\n\n\nFor each log file:\n- If the file does not exist, note it as \"Not found (OK if hooks have not run yet).\"\n- If the file exists, analyze its content.\n\n**Stop hook analysis (babysitter-stop-hook.log):**\n- Count lines containing \"approve\" vs \"block\" decisions (case-insensitive).\n- Display the approve/block ratio.\n- Show the last 20 stop hook decision entries (lines containing \"approve\" or \"block\").\n- Count and display CLI exit codes from lines containing \"CLI exit code=\".\n\n**Stderr analysis (babysitter-stop-hook-stderr.log, babysitter-session-start-hook-stderr.log):**\n- If stderr logs contain content, display the last 20 lines from each.\n- Look for common failure patterns: \"command not found\", \"MODULE_NOT_FOUND\", \"ENOENT\", \"EACCES\", \"permission denied\", \"npm ERR\", \"Cannot find module\".\n- Flag any stderr content as a potential issue.\n\n**Error/Warning detection (all logs):**\n- Count and list lines containing \"ERROR\" or \"WARN\" (case-insensitive).\n- Display the last 10 error/warning lines from each log.\n\nMark as PASS if no ERROR lines found and stderr logs are empty. Mark as WARN if WARN lines found or stderr has content but no ERROR. Mark as FAIL if ERROR lines found.\n\n---\n\n## 8. Disk Usage\n\n**Goal:** Report disk consumption and identify oversized files.\n\n- Run `du -sh .a5c/runs/<runId>` for the total run directory size.\n- Run `du -sh` on each subdirectory:\n  - `.a5c/runs/<runId>/journal/`\n  - `.a5c/runs/<runId>/tasks/`\n  - `.a5c/runs/<runId>/blobs/`\n  - `.a5c/runs/<runId>/state/`\n  - `.a5c/runs/<runId>/process/` (if it exists)\n\n- Display results in a table: directory, size.\n\n**Large file detection:**\n- Find individual files larger than 10MB within the run directory: `find .a5c/runs/<runId> -type f -size +10M -exec ls -lh {} \\;`\n- If any found, list them with their paths and sizes.\n\n- Report the total run directory size prominently.\n\nMark as PASS if total size < 500MB and no files > 10MB. Mark as WARN if total size > 500MB or any files > 10MB. Mark as FAIL if total size > 2GB.\n\n---\n\n## 9. Process Validation\n\n**Goal:** Verify the process entrypoint and SDK dependency are valid.\n\n- Read `.a5c/runs/<runId>/run.json` and extract the `importPath` (or `entrypoint`) field.\n- Check if the referenced process file exists on disk. Use Glob or file read to verify.\n- If the file does not exist, mark as FAIL (\"Process entrypoint not found on disk\").\n\n**SDK dependency check:**\n- Read `.a5c/package.json` (if it exists) or the project root `package.json`.\n- Check for `@a5c-ai/babysitter-sdk` in `dependencies` or `devDependencies`.\n- Report the installed version.\n- If the dependency is missing, mark as WARN.\n- If present, verify it looks like a valid semver version and mark as PASS.\n\n---\n\n## 10. Hook Execution Health\n\n**Goal:** Verify that the stop hook and session-start hook are properly configured, can execute, and have been running. If the stop hook has NOT been running, diagnose why.\n\n### 10a. Hook Registration\n\n- Locate the plugin root. Check for `CLAUDE_PLUGIN_ROOT` env var first, or search for a babysitter `hooks.json` by walking up from the current directory.\n- If found, read `hooks.json` and verify:\n  - A `Stop` hook entry exists with a command referencing `babysitter-stop-hook.sh`.\n  - A `SessionStart` hook entry exists with a command referencing `babysitter-session-start-hook.sh`.\n- If `hooks.json` is not found, mark as FAIL (\"Hook registration file not found — hooks are not registered with Claude Code\").\n\n### 10b. Hook Script Availability\n\n- Locate the hook scripts relative to the plugin root:\n  - `hooks/babysitter-stop-hook.sh`\n  - `hooks/babysitter-session-start-hook.sh`\n- For each script:\n  - Check if the file exists.\n  - Check if it is executable (`test -x <path>`).\n- If any script is missing or not executable, mark as FAIL and list which scripts are missing/not-executable.\n\n### 10c. CLI Availability (babysitter command)\n\nThe hooks delegate to the `babysitter` CLI. Check if it is available:\n- Run: `command -v babysitter 2>/dev/null && babysitter --version 2>/dev/null`\n- If the command is found, display its path and version. Mark sub-check as PASS.\n- If not found, check the user-local prefix: `$HOME/.local/bin/babysitter --version 2>/dev/null`\n- If neither is found, mark sub-check as FAIL (\"babysitter CLI not found — hooks will fail with exit code 127. Install with: `npm i -g @a5c-ai/babysitter-sdk`\").\n\n### 10d. Stop Hook Execution Evidence\n\nCheck whether the stop hook has actually been invoked during this run's lifetime:\n\n**From log files:**\n- Read `${BABYSITTER_LOG_DIR:-$HOME/.a5c/logs}/babysitter-stop-hook.log` (if it exists).\n- Count the number of \"Hook script invoked\" lines. This is the total invocation count.\n- Count the number of \"CLI exit code=\" lines and extract exit codes.\n- If the log file does not exist or has zero invocations, the stop hook has NOT been running.\n\n**From journal events:**\n- Search the run's journal events for `STOP_HOOK_INVOKED` type events (using the run:events output from section 2 if available).\n- Count the number of STOP_HOOK_INVOKED events.\n- If present, display the last 5 with their timestamps and decision data.\n- If no STOP_HOOK_INVOKED events exist in the journal, note that the stop hook has not recorded any decisions for this run.\n\n**From stderr:**\n- Read `${BABYSITTER_LOG_DIR:-$HOME/.a5c/logs}/babysitter-stop-hook-stderr.log`.\n- If it contains error output, display it and diagnose:\n  - \"command not found\" or exit code 127 → CLI not installed (see 10c)\n  - \"MODULE_NOT_FOUND\" or \"Cannot find module\" → SDK package corrupted or not built\n  - \"ENOENT\" → Missing file referenced by the hook\n  - \"EACCES\" or \"permission denied\" → Permission issue on hook script or CLI\n  - \"npm ERR\" → npm installation failure during hook execution\n\n### 10e. Stop Hook Not Running — Root Cause Diagnosis\n\nIf the stop hook shows NO evidence of execution (no log entries, no journal events, zero invocations):\n\nPerform these diagnostic steps in order and report the first failure found:\n\n1. **Plugin not installed**: Check if `CLAUDE_PLUGIN_ROOT` is set or if a babysitter plugin directory exists relative to the project root. If neither exists, report: \"Plugin not installed — the babysitter plugin directory is missing.\"\n\n2. **Plugin not enabled**: Check for Claude settings files:\n   - `~/.claude/settings.json` — look for `babysitter` in `enabledPlugins`.\n   - `~/.claude/plugins/installed_plugins.json` — look for `babysitter` in the plugins list.\n   - If not found in either, report: \"Plugin not enabled in Claude Code settings.\"\n\n3. **hooks.json not registered**: If `hooks.json` doesn't contain a `Stop` hook entry (checked in 10a), report: \"Stop hook not registered in hooks.json.\"\n\n4. **Hook script missing or not executable**: If the stop hook script doesn't exist or isn't executable (checked in 10b), report with the specific file path.\n\n5. **CLI not available**: If `babysitter` CLI is not found (checked in 10c), report: \"babysitter CLI not installed — hook script will fail silently.\"\n\n6. **Hook running but failing silently**: If the log file exists but shows exit codes other than 0, or if stderr has content, report: \"Stop hook is being invoked but failing — see stderr log for details.\"\n\n7. **No active session**: If no session state files exist (from section 6), report: \"No active babysitter session — the stop hook only activates when a session is bound to a run.\"\n\n8. **All checks pass but hook still not running**: Report: \"All prerequisites are met but the stop hook shows no evidence of execution. Possible causes: Claude Code may not be invoking plugin hooks (check Claude Code version), or the session may have ended before the hook could fire.\"\n\n### 10f. Verdict\n\nMark as PASS if:\n- Hook registration is correct (10a)\n- Hook scripts exist and are executable (10b)\n- CLI is available (10c)\n- There is evidence of stop hook execution (10d) with exit code 0\n\nMark as WARN if:\n- Hooks are registered and scripts exist, but there's no evidence of execution yet\n- Stop hook ran but had non-zero exit codes\n\nMark as FAIL if:\n- Hook registration is missing\n- Hook scripts are missing or not executable\n- CLI is not available\n- Stop hook is failing (consistent non-zero exit codes or stderr errors)\n\n---\n\n## 11. Session-ID Provenance\n\n**Goal:** Verify how the current babysitter session ID was resolved and flag stale or shadowed values.\n\n- Invoke: `npx babysitter session:whoami --json`\n- Parse the output and inspect the `resolvedFrom` field. Classify as follows:\n  - `resolvedFrom: \"pid-marker\"` → mark as PASS (\"Session ID derives from the live Claude Code ancestor process -- authoritative\").\n  - `resolvedFrom: \"env-file\"` → mark as PASS with a note (\"CLAUDE_ENV_FILE was used; typically healthy\").\n  - `resolvedFrom: \"env-var\"` → mark as WARN (\"`AGENT_SESSION_ID` is set without a corroborating PID marker. Likely stale from a prior Claude Code session -- see GitHub issue #130\").\n    - Remediation: run `babysitter session:cleanup` and start a fresh Claude Code session, or `unset AGENT_SESSION_ID` before invoking babysitter.\n  - `resolvedFrom: \"none\"` → mark as ERROR (\"No session ID resolvable. Either no session-start hook fired, or the ancestor walk failed\").\n\n**Env-var shadow check:**\n- Independently inspect `envVarPresent` and `envVarMatches` in the output.\n- If `envVarPresent && !envVarMatches`, mark as WARN (\"`AGENT_SESSION_ID` in env does not match the resolved session ID; a stale value is shadowing the authoritative one. Unset the env var\").\n\n---\n\n## 12. Ancestor Liveness\n\n**Goal:** Confirm the PID marker references a live Claude Code process.\n\n- Reuse the `session:whoami --json` output from check 11.\n- Inspect the `ancestorAlive` field.\n- If `ancestorAlive === false`, mark as ERROR (\"The PID marker references a dead Claude Code process\").\n  - Remediation: `babysitter session:cleanup`.\n- Otherwise mark as PASS.\n\n---\n\n## 13. Concurrent Session Detection\n\n**Goal:** Surface multiple live harness sessions that may compete for the same session ID.\n\n- Enumerate files in `~/.a5c/` matching the pattern `current-session-*-pid-*`.\n- Count markers per harness (derived from the filename).\n- If more than one live marker exists for the same harness, mark as INFO (\"Multiple live Claude Code / harness sessions detected; ensure each shell scopes `AGENT_SESSION_ID` appropriately -- the PID marker handles this automatically\").\n- Otherwise mark as PASS.\n\n---\n\n## 14. Windows Ancestor-Walk Strategy\n\n**Goal:** Verify the ancestor-walk strategy works on Windows, where `wmic` is no longer guaranteed to be present.\n\n- Only run this check when `process.platform === 'win32'`. On other platforms, mark as PASS (\"Not applicable -- non-Windows platform\").\n- Attempt the ancestor walk by invoking `npx babysitter session:whoami --json` (reuse output from check 11 if available).\n- If resolution succeeded (any `resolvedFrom` other than `none`), mark as PASS.\n- If `resolvedFrom: \"none\"` on Windows:\n  - Test `wmic` availability: `where wmic` via shell.\n  - If absent, document that Windows 11 24H2 removed `wmic`; the fallback PowerShell CIM path should handle this.\n  - If the PowerShell ancestor walk also failed, mark as ERROR with remediation: ensure PowerShell is available (`powershell -NoProfile -Command \"Get-CimInstance Win32_Process -Filter ProcessId=$PID\"` should work).\n- If the cascade works but is slow (>5s on first probe), add an INFO note on first-probe latency.\n\n---\n\n## Final Report\n\nAfter completing all 14 checks, produce the diagnostic report in this format:\n\n```\n============================================\n  BABYSITTER DIAGNOSTIC REPORT\n  Run: <runId>\n  Time: <current timestamp>\n============================================\n\nOVERALL HEALTH: <HEALTHY | WARNING | CRITICAL>\n\n--------------------------------------------\n  CHECK RESULTS\n--------------------------------------------\n\n| #  | Check                    | Status |\n|----|--------------------------|--------|\n| 1  | Run Discovery            | <status> |\n| 2  | Journal Integrity        | <status> |\n| 3  | State Cache Consistency  | <status> |\n| 4  | Effect Status            | <status> |\n| 5  | Lock Status              | <status> |\n| 6  | Session State            | <status> |\n| 7  | Log Analysis             | <status> |\n| 8  | Disk Usage               | <status> |\n| 9  | Process Validation       | <status> |\n| 10 | Hook Execution Health    | <status> |\n| 11 | Session-ID Provenance    | <status> |\n| 12 | Ancestor Liveness        | <status> |\n| 13 | Concurrent Session Detection | <status> |\n| 14 | Windows Ancestor-Walk Strategy | <status> |\n\n--------------------------------------------\n  ISSUES & RECOMMENDATIONS\n--------------------------------------------\n\n<For each WARN or FAIL check, list:>\n- [WARN|FAIL] <Check name>: <description of issue>\n  Fix: <specific actionable command or instruction>\n\n--------------------------------------------\n```\n\n**Overall health determination:**\n- **HEALTHY**: All 14 checks are PASS (INFO notes are acceptable).\n- **WARNING**: At least one check is WARN but none are FAIL or ERROR.\n- **CRITICAL**: At least one check is FAIL or ERROR.\n\nPresent the full detailed findings for each check BEFORE the summary table, so the user can see the evidence. End with the summary table and recommendations. Also, create a single HTML report file with all the findings that uses the arwes UI framework and open it for the user in the browser.\n\n---\n\n## Escalation: Claude /debug\n\nIf any check results in FAIL and the root cause is unclear after your own analysis -- especially for environment issues, hook execution failures, CLI availability problems, or permission errors that may relate to the Claude Code runtime itself -- invoke the built-in Claude `/debug` command to get additional diagnostic context from the Claude Code environment. This is particularly useful for:\n- Hook scripts that should be running but show no evidence of execution (check 10)\n- Permission or path resolution issues that don't match expected behavior\n- Unexpected CLI behavior that might be a Claude Code environment issue rather than a babysitter issue\n\nCall `/debug` with a summary of the failing check and what you've already ruled out, so it can focus on environment-level causes.\n\n---\n\n## After Diagnosis: Contribute Back\n\nAfter diagnosing issues, prompt the user to report or fix what was found -- they don't need to fix the bug themselves, just reporting it is valuable. Suggest `/babysitter:contrib` based on the situation:\n\n- **Found a bug but didn't fix it**: `/babysitter:contrib bug report: [what the doctor found, e.g. \"state cache rebuild silently drops EFFECT_RESOLVED events when journal has duplicate invocation keys\"]`\n- **Found and fixed a bug**: `/babysitter:contrib bugfix: [description of the fix]`\n- **Found confusing or missing docs that made diagnosis harder**: `/babysitter:contrib documentation question: [what was unclear or missing]`\n- **Found an issue in a plugin**: `/babysitter:contrib bug report: [plugin-name] [description]`\n- **Improved a process or skill during diagnosis**: `/babysitter:contrib library contribution: [description]`\n\nExample prompt after diagnosis:\n\n> \"Diagnosis found a stale lock -- process 12847 crashed without cleanup. This is a known edge case in the orchestration loop. Even if you don't want to fix it yourself, reporting it helps: run `/babysitter:contrib bug report: orchestration loop doesn't release lock on unhandled rejection` to open an issue.\"\n\n---\n\n## Self-Heal Suggestions\n\nIf any of checks 11-14 surface issues (stale env vars, dead ancestor PIDs, shadowed session IDs, or Windows ancestor-walk failures), suggest the following remediation sequence, in order. Present it as an actionable block:\n\n```bash\n# 1. Cleanup dead markers and orphaned state files\nbabysitter session:cleanup --dry-run   # preview\nbabysitter session:cleanup             # apply\n\n# 2. Unset a stale env var\nunset AGENT_SESSION_ID\n\n# 3. Re-bind a run explicitly if needed\nbabysitter session:resume --session-id <fresh-id> --state-dir ~/.a5c --run-id <runId> --runs-dir .a5c/runs\n\n# 4. Start a fresh Claude Code session (closes and reopens the session)\n```\n\nRun steps 1 and 2 first; re-run `/babysitter:doctor` after each step to confirm the session-provenance checks return to PASS. Step 3 is only needed when a specific run must be re-bound to the fresh session. If the issue persists after step 4, escalate via `/debug` or `/babysitter:contrib`."

package/commands/forever.toml ADDED Viewed

@@ -0,0 +1,3 @@
+description = "Use this command to start babysitting a never-ending babysitter run."
+prompt = "Invoke the babysitter:babysit skill (using the Skill tool) and follow its instructions (SKILL.md). but create a process that uses an infinte loop and a ctx.sleep to create a never-ending babysitter loop. an example of such process is a daily process that reads new support ticket every day and tries to resolve them, then sleeps for 4 hours and repeats the process."

package/commands/help.toml ADDED Viewed

@@ -0,0 +1,3 @@
+description = "help and documentation for babysitter command usage, processes, skills, agents, and methodologies. use this command to understand how to use babysitter effectively."
+prompt = "## if no arguments provided:\n\nshow this message:\n\n```\nWelcome to the Babysitter Help Center! Here you can find documentation and guidance on how to use Babysitter effectively.\n\nDocumentation: Explore our comprehensive documentation to understand Babysitter's features, processes, skills, agents, and methodologies. Read the Docs: https://github.com/a5c-ai/babysitter\n\nOr ask specific questions about commands, processes, skills, agents, methodologies, domains, specialities to get targeted help.\n\nJust type /babysitter:help followed by your question or the topic you want to learn more about.\n\n\nPRIMARY COMMANDS\n================\n\n/babysitter:call [input]\n  Start a babysitter-orchestrated run. Babysitter analyzes your request, interviews you\n  to gather requirements, selects or creates the best process definition (from 50+\n  domain-specific processes covering science, business, engineering, and more), then\n  executes it step by step with breakpoints where you can steer direction.\n\n  How it works: The babysitter skill reads your input, explores the process library to\n  find matching processes, interviews you to refine scope, creates an SDK run with\n  run:create, and orchestrates iterations with run:iterate -- dispatching tasks,\n  handling breakpoints, and posting results until the run completes or you pause it.\n\n  Example: /babysitter:call migrate our Express.js REST API to Fastify, keeping all\n  existing routes and middleware behavior identical, with integration tests proving\n  parity\n\n\n/babysitter:resume [run id or name]\n  Resume a paused or interrupted babysitter run. If you don't specify a run, babysitter\n  discovers all runs under .a5c/runs/, shows their status (created, waiting, completed,\n  failed), and suggests which incomplete run to pick up based on its process, pending\n  effects, and last activity.\n\n  How it works: Reads run metadata and journal, rebuilds state cache if stale, identifies\n  pending effects (breakpoints awaiting approval, tasks needing results), and continues\n  orchestration from exactly where it left off -- no work is repeated thanks to the\n  replay engine.\n\n  Example: /babysitter:resume\n  (discovers runs and offers: \"Run abc123 is waiting on a breakpoint in the 'review\n  test results' phase of your API migration -- resume this one?\")\n\n\n/babysitter:yolo [input]\n  Start a babysitter run in fully autonomous mode. Identical to /call but all breakpoints\n  are auto-approved and no user interaction is requested. The babysitter makes every\n  decision on its own until the run completes or hits a critical failure it can't recover\n  from. Best for well-understood tasks where you trust the process.\n\n  How it works: Same orchestration as /call, but the process context is configured to\n  skip breakpoint effects -- instead of pausing for human approval, each breakpoint\n  resolves immediately with an auto-approve result.\n\n  Example: /babysitter:yolo add comprehensive unit tests for all functions in\n  src/utils/ using vitest with >90% branch coverage\n\n\n/babysitter:plan [input]\n  Generate a detailed execution plan without running anything. Babysitter goes through\n  the full interview and process selection flow, designs the process definition with\n  all tasks, breakpoints, and dependencies, but stops before creating the actual SDK run.\n  You get a complete plan you can review, modify, or execute later with /call.\n\n  How it works: Runs the babysitter skill's planning phase only -- analyzes input,\n  matches to domain processes, interviews for requirements, then outputs the process\n  definition file and a human-readable execution plan showing each phase, task, and\n  decision point.\n\n  Example: /babysitter:plan redesign our database schema to support multi-tenancy,\n  migrate existing data, and update all queries -- I want to review the plan before\n  we touch anything\n\n\n/babysitter:forever [input]\n  Start a babysitter run that loops indefinitely with sleep intervals. Designed for\n  ongoing operational tasks: monitoring, periodic maintenance, continuous improvement,\n  or recurring workflows. The process uses an infinite loop with ctx.sleepUntil() to\n  pause between iterations.\n\n  How it works: Creates a process definition with a while(true) loop. Each cycle performs\n  the task (e.g., check metrics, process tickets, run audits), then calls ctx.sleepUntil()\n  to pause for a configured interval. The run stays in \"waiting\" state during sleep and\n  resumes automatically when the sleep expires on the next orchestration iteration.\n\n  Example: /babysitter:forever every 4 hours, check our GitHub issues labeled \"bug\",\n  attempt to reproduce and fix any that look straightforward, and submit PRs for the fixes\n\n\nSECONDARY COMMANDS\n==================\n\n/babysitter:doctor [issue]\n  Run a comprehensive 10-point health check on a babysitter run. Inspects journal\n  integrity (checksum verification, sequence gaps, timestamp ordering), state cache\n  consistency, stuck/errored effects, stale locks, session state, log files, disk usage,\n  process validation, and hook execution health. Produces a structured diagnostic report\n  with PASS/WARN/FAIL status per check and specific fix commands.\n\n  If no run ID is provided, automatically targets the most recent run. Can also diagnose\n  environment-wide issues like missing CLI, unregistered hooks, or plugin problems.\n\n  Example: /babysitter:doctor\n  (checks the latest run: \"CRITICAL -- Check 5 Lock Status: FAIL -- stale lock detected,\n  process 12847 is no longer running. Fix: rm .a5c/runs/abc123/run.lock\")\n\n\n/babysitter:assimilate [target]\n  Convert an external methodology, AI coding harness, or specification into native\n  babysitter process definitions. Takes a GitHub repo URL, harness name, or spec file\n  and produces a complete process package with skills/ and agents/ directories.\n\n  Two workflows available:\n  - Methodology assimilation: clones the repo, learns its procedures and commands,\n    converts manual flows into babysitter processes with refactored skills and agents\n  - Harness integration: wires babysitter's SDK into a specific AI coding tool\n    (codex, opencode, gemini-cli, antigravity, etc.) so it can orchestrate runs\n\n  Example: /babysitter:assimilate https://github.com/some-org/their-deployment-playbook\n  (clones the repo, analyzes their deployment procedures, and generates babysitter\n  processes that replicate the same workflow with proper task definitions and breakpoints)\n\n\n/babysitter:user-install\n  First-time onboarding for new babysitter users. Installs dependencies, runs an\n  interactive interview about your development specialties, preferred tools, coding\n  style, and how much autonomy you want babysitter to have. Builds a user profile\n  stored at ~/.a5c/user-profile.json that personalizes future runs.\n\n  Uses the cradle/user-install process which covers: dependency verification, user\n  interview (expertise areas, preferred languages, IDE, terminal setup), profile\n  generation, tool configuration, and optional global plugin installation.\n\n  Example: /babysitter:user-install\n  (walks you through: \"What's your primary programming language? What frameworks do\n  you use most? Do you prefer babysitter to auto-approve routine tasks or always ask?\")\n\n\n/babysitter:project-install\n  Onboard a new or existing project for babysitter orchestration. Researches the\n  codebase (reads package.json, scans directory structure, identifies frameworks and\n  patterns), interviews you about project goals and workflows, generates a project\n  profile at .a5c/project-profile.json, and optionally sets up CI/CD integration.\n\n  Uses the cradle/project-install process which covers: codebase analysis, project\n  interview, profile creation, recommended plugin installation, hook configuration,\n  and optional CI pipeline setup.\n\n  Example: /babysitter:project-install\n  (scans your repo: \"I see this is a Next.js 16 app with Tailwind, using vitest for\n  tests and PostgreSQL. What are your main development goals for this project?\")\n\n\n/babysitter:retrospect [run id or name]\n  Analyze a completed run to extract lessons and improve future runs. Reviews what\n  happened (journal events, task results, timing, errors), evaluates the process that\n  was followed, and suggests concrete improvements to process definitions, skills,\n  and agents. Interactive -- multiple breakpoints let you steer the analysis and\n  decide which improvements to implement.\n\n  Covers: run result analysis, process effectiveness review, improvement suggestions,\n  implementation of changes, and routing to /contrib if improvements belong in the\n  shared process library.\n\n  Example: /babysitter:retrospect\n  (analyzes the last run: \"The API migration run completed but the 'verify parity'\n  phase took 8 iterations because test assertions were too brittle. Suggestion: add\n  a fuzzy comparison step before strict assertion. Implement this fix?\")\n\n\n/babysitter:plugins [action]\n  Manage babysitter plugins: list installed plugins, browse marketplaces, install,\n  update, configure, uninstall, or create new plugins. Plugins are version-managed\n  instruction packages (not executable code) that guide the agent through install,\n  configure, and uninstall steps via markdown files.\n\n  Without arguments: shows installed plugins (name, version, marketplace, dates) and\n  available marketplaces. With arguments: routes to the specific action.\n\n  Key actions:\n  - install <name> --global|--project: fetch install.md from marketplace and execute\n  - configure <name> --global|--project: fetch configure.md and walk through options\n  - update <name> --global|--project: resolve migration chain via BFS and apply steps\n  - uninstall <name> --global|--project: fetch uninstall.md and execute removal\n  - create: scaffold a new plugin package with the meta/plugin-creation process\n\n  Example: /babysitter:plugins install sound-hooks --project\n  (fetches sound-hooks from marketplace, reads install.md, walks you through player\n  detection, sound selection, hook configuration, and registers in plugin-registry.json)\n\n\n/babysitter:contrib [feedback]\n  Submit feedback or contribute to the babysitter project. Routes to the appropriate\n  workflow based on what you want to do:\n\n  Issue-based (opens GitHub issue in a5c-ai/babysitter):\n  - Bug report: describe a bug in the SDK, CLI, or process library\n  - Feature request: propose a new feature or enhancement\n  - Documentation question: flag undocumented behavior or missing docs\n\n  PR-based (forks repo, creates branch, submits PR):\n  - Bugfix: you already have a fix ready\n  - Feature implementation: you've built a new feature\n  - Library contribution: new or improved process/skill/agent for the library\n  - Harness integration: CI/CD or IDE integration\n\n  Without arguments: shows all contribution types and helps you pick the right one.\n  Breakpoints are placed before all GitHub actions (fork, star, PR, issue) so you\n  can review before anything is submitted.\n\n  Example: /babysitter:contrib bug report: plugin:update-registry fails when the\n  marketplace hasn't been cloned yet, even though the registry update doesn't need\n  marketplace access\n\n\n/babysitter:observe\n  Launch the babysitter observer dashboard -- a real-time web UI that monitors active\n  and past runs. Displays task progress, journal events, orchestration state, and\n  effect status in your browser. Useful when running /yolo or /forever to watch\n  progress without interrupting the run.\n\n  How it works: Runs npx @a5c-ai/babysitter-observer-dashboard@latest which watches\n  the .a5c/runs/ directory (or a parent directory containing multiple projects) and\n  serves a live dashboard. The process is blocking -- it runs until you stop it, and\n  it prints the local URL to share with the user. Do not use `babysitter observe`\n  as a fallback; the core Babysitter CLI does not expose that subcommand.\n\n  Example: /babysitter:observe\n  (opens browser showing all runs with live-updating task\n  status, journal event stream, and effect resolution timeline)\n```\n\n## if arguments provided:\n\nif the argument is \"command [command name]\", \"process [process name]\", \"skill [skill name]\", \"agent [agent name]\", or \"methodology [methodology name]\", then show the detailed documentation for that specific command, process, skill, agent, or methodology after reading the relevant files."

package/commands/observe.toml ADDED Viewed

@@ -0,0 +1,3 @@
+description = "Launch the babysitter observer dashboard. Installs and runs the real-time observer UI that watches babysitter runs, displaying task progress, journal events, and orchestration state in your browser."
+prompt = "Run the babysitter observer dashboard:\n\n1. Determine the watch directory — this is usually the project's container directory (the parent of the project dir), or the current working directory if not specified.\n2. Launch the standalone dashboard package: `npx -y @a5c-ai/babysitter-observer-dashboard@latest --watch-dir <dir>`.\n3. This is a blocking process — it will keep running until stopped.\n4. Report the URL printed by the dashboard to the user, then open it in the browser.\n\nDo not fall back to `babysitter observe`; the core Babysitter CLI does not expose\nthat subcommand. Some harness runtimes may provide a separate\n`agent-platform observe` surface, but this skill uses the verified standalone\ndashboard package."

package/commands/plan.toml ADDED Viewed

@@ -0,0 +1,3 @@
+description = "Plan a babysitter run. use this command to plan a complex workflow, without actually running it."
+prompt = "Invoke the babysitter:babysit skill (using the Skill tool) and follow its instructions (SKILL.md). focus on creating the best process possible, but without creating and running the actual run."

package/commands/plugins.toml ADDED Viewed

@@ -0,0 +1,3 @@
+description = "manage babysitter plugins. use this command to see the list of installed babysitter plugins, their status, and manage them (install, update, uninstall, list from marketplace, add marketplace, configure plugin, create new plugin, etc)."
+prompt = "This command installs and manages plugins for babysitter. A plugin is a version-managed package of contextual instructions (for install, uninstall, configure, and update/migrate between versions), not a conventional software plugin.\n\nif the command is run without arguments, it lists all installed plugins with their name, version, marketplace, installation date, and last update date. as well as marketplaces added to the system. and instructions on how to install new plugins from marketplaces.\nif there are no marketplaces added, add the default marketplace:\n```bash\nbabysitter plugin:add-marketplace --marketplace-url https://github.com/a5c-ai/babysitter --marketplace-path plugins/a5c/marketplace/marketplace.json --global --json\n```\n\nPlugins can be installed at two scopes:\n- **global** (`--global`): stored under `~/.a5c/`, available for all projects\n- **project** (`--project`): stored under `<projectDir>/.a5c/`, project-specific\n\n## Marketplace Management\n\nMarketplaces are git repositories containing a `marketplace.json` manifest and plugin package directories. The SDK clones them locally with `--depth 1`.\n\n**Storage locations:**\n- Global: `~/.a5c/marketplaces/<name>/`\n- Project: `<projectDir>/.a5c/marketplaces/<name>/`\n\nThe marketplace name is derived from the git URL's last path segment (stripping `.git` suffix and trailing slashes).\n\n### Adding a marketplace\n\n```bash\nbabysitter plugin:add-marketplace --marketplace-url <url> [--marketplace-path <relative-path>] [--marketplace-branch <ref>] [--force] --global|--project [--json]\n```\n\nClones the marketplace repository to the local marketplaces directory. Use `--marketplace-path` to specify the relative path to `marketplace.json` within the repo (for monorepos or repos where the manifest is not at the root). Use `--marketplace-branch` to clone a specific branch, tag, or ref (defaults to the repo's default branch). Use `--force` to replace an existing marketplace clone (deletes and re-clones).\n\n### Updating a marketplace\n\n```bash\nbabysitter plugin:update-marketplace --marketplace-name <name> [--marketplace-branch <ref>] --global|--project [--json]\n```\n\nRuns `git pull` on the local marketplace clone to fetch latest changes. Use `--marketplace-branch` to switch to a different branch before pulling (works even with shallow clones).\n\n### Listing plugins in a marketplace\n\n```bash\nbabysitter plugin:list-plugins --marketplace-name <name> --global|--project [--json]\n```\n\nReads the `marketplace.json` manifest and returns all available plugins sorted alphabetically by name. Each entry includes: name, description, latestVersion, versions array, packagePath, tags, and author.\n\n## Plugin Installation\n\n**Note:** For `plugin:install`, `plugin:update`, `plugin:configure`, and `plugin:list-plugins`, the `--marketplace-name` flag is auto-detected when only one marketplace is cloned for the given scope. You can omit it if there's only one marketplace.\n\n### Flow\n\n1. Update the marketplace: `babysitter plugin:update-marketplace --marketplace-name <name> --global|--project`\n2. Check current state: `babysitter plugin:list-installed --global|--project` to see installed plugins and versions\n3. Install the plugin:\n\n```bash\nbabysitter plugin:install --plugin-name <name> [--marketplace-name <mp>] --global|--project [--json]\n```\n\nThis command resolves the plugin package path from the marketplace manifest, reads `install.md` from the plugin package directory, and returns the installation instructions. If an `install-process.js` file exists, the instructions may reference it as an automated install process.\n\n4. The agent performs the installation steps as defined in `install.md`\n5. The agent updates the registry:\n\n```bash\nbabysitter plugin:update-registry --plugin-name <name> --plugin-version <ver> --marketplace-name <mp> --global|--project [--json]\n```\n\n## Plugin Update (with migrations)\n\n```bash\nbabysitter plugin:update --plugin-name <name> --marketplace-name <mp> --global|--project [--json]\n```\n\nThis command:\n1. Reads the currently installed version from the registry\n2. Resolves the latest version from the marketplace manifest\n3. Looks in the plugin package's `migrations/` directory for migration files\n4. Uses BFS over the migration graph to find the shortest path from the installed version to the target version\n5. Returns the ordered migration instructions (content of each migration file in sequence)\n\n**Migration filename format:** `<fromVersion>_to_<toVersion>.<ext>` where:\n- Versions may contain alphanumerics, dots, dashes (e.g. `1.0.0`, `2.0.0-beta`)\n- Extensions: `.md` for markdown instructions, `.js` for executable process files\n- Examples: `1.0.0_to_1.1.0.md`, `2.0.0-beta_to_2.0.0.js`\n\nAfter performing the migration steps, update the registry:\n\n```bash\nbabysitter plugin:update-registry --plugin-name <name> --plugin-version <new-ver> --marketplace-name <mp> --global|--project [--json]\n```\n\n## Plugin Uninstallation\n\n```bash\nbabysitter plugin:uninstall --plugin-name <name> --marketplace-name <mp> --global|--project [--json]\n```\n\nReads `uninstall.md` from the plugin package directory and returns the uninstall instructions. After performing the uninstall steps, remove from registry:\n\n```bash\nbabysitter plugin:remove-from-registry --plugin-name <name> --global|--project [--json]\n```\n\n## Plugin Configuration\n\n```bash\nbabysitter plugin:configure --plugin-name <name> --marketplace-name <mp> --global|--project [--json]\n```\n\nReads `configure.md` from the plugin package directory and returns configuration instructions.\n\n## Registry Management\n\nThe plugin registry (`plugin-registry.json`) tracks installed plugins with schema version `2026.01.plugin-registry-v1`. Writes use atomic file operations (temp + rename) for crash safety.\n\n**Storage locations:**\n- Global: `~/.a5c/plugin-registry.json`\n- Project: `<projectDir>/.a5c/plugin-registry.json`\n\n### List installed plugins\n\n```bash\nbabysitter plugin:list-installed --global|--project [--json]\n```\n\nReturns all installed plugins sorted alphabetically. In `--json` mode, returns an array of registry entries. In human mode, displays a formatted table with name, version, marketplace, and timestamps.\n\n### Remove from registry\n\n```bash\nbabysitter plugin:remove-from-registry --plugin-name <name> --global|--project [--json]\n```\n\nRemoves a plugin entry from the registry. Returns error if the plugin is not present.\n\n## Plugin Creation\n\nTo create a new plugin package from scratch, use the `meta/plugin-creation` babysitter process. This process guides you through requirements analysis, structure design, instruction authoring, optional process file generation, validation, and marketplace integration.\n\n### Using the plugin creation process\n\nOrchestrate a babysitter run with the plugin creation process:\n\n```bash\n# Create inputs file\ncat > /tmp/plugin-inputs.json << 'EOF'\n{\n  \"pluginName\": \"my-plugin\",\n  \"description\": \"What the plugin does — be specific about install/configure/uninstall behavior\",\n  \"scope\": \"project\",\n  \"outputDir\": \"./plugins\",\n  \"components\": {\n    \"installProcess\": false,\n    \"configureProcess\": false,\n    \"uninstallProcess\": false,\n    \"migrations\": false,\n    \"processFiles\": false\n  },\n  \"marketplace\": {\n    \"name\": \"my-marketplace\",\n    \"author\": \"my-org\",\n    \"tags\": [\"category1\", \"category2\"]\n  }\n}\nEOF\n\n# Create and run\nbabysitter run:create \\\n  --process-id meta/plugin-creation \\\n  --entry library/specializations/meta/plugin-creation.js#process \\\n  --inputs /tmp/plugin-inputs.json \\\n  --prompt \"Create a new babysitter plugin package\" \\\n  --json\n```\n\n### What the process generates\n\nThe process creates a complete plugin package directory:\n\n| File | Description |\n|------|-------------|\n| `install.md` | Agent-readable installation instructions with numbered steps |\n| `uninstall.md` | Reversal instructions for clean removal |\n| `configure.md` | Configuration options table and adjustment instructions |\n| `install-process.js` | *(optional)* Automated babysitter process for complex install steps |\n| `configure-process.js` | *(optional)* Automated configuration process |\n| `process/main.js` | *(optional)* Main process the plugin contributes |\n| `marketplace-entry.json` | Ready-to-use marketplace.json entry for publishing |\n\n### Process phases\n\n1. **Requirements Analysis** — Analyzes plugin purpose, prerequisites, config options, file structure\n2. **Structure Design** — Plans directory layout and file inventory (with review breakpoint)\n3. **Instruction Authoring** — Writes install.md, uninstall.md, configure.md\n4. **Process Files** — Creates optional babysitter process files (install-process.js, configure-process.js, process/main.js)\n5. **Validation** — Verifies package completeness, instruction quality, path correctness\n6. **Marketplace Integration** — Generates marketplace.json entry for publishing\n\n### Quick creation (without orchestration)\n\nFor simple plugins that only need instruction files, you can create the package manually following the structure below and the [Plugin Author Guide](docs/plugins/plugin-author-guide.md).\n\n## Plugin Package Structure\n\n```\nmy-plugin/\n  package.json         # Optional (name field used as plugin ID, falls back to directory name)\n  install.md           # Markdown instructions for installation\n  uninstall.md         # Markdown instructions for removal\n  configure.md         # Markdown instructions for configuration\n  install-process.js   # Optional automated install process\n  uninstall-process.js # Optional automated uninstall process\n  configure-process.js # Optional automated configure process\n  migrations/          # Version migration files\n    1.0.0_to_1.1.0.md\n    1.1.0_to_2.0.0.js\n  process/             # Process definition files (collected recursively)\n    main.js\n```\n\n## All CLI Commands Summary\n\nAll commands accept `--json` for machine-readable output and `--global|--project`.\n\n| Command | Required Flags | Description |\n|---------|---------------|-------------|\n| `plugin:add-marketplace` | `--marketplace-url`, `--global\\|--project` [`--marketplace-path`, `--marketplace-branch`, `--force`] | Clone a marketplace repository |\n| `plugin:update-marketplace` | `--marketplace-name`, `--global\\|--project` [`--marketplace-branch`] | Pull latest marketplace changes (optionally switch branch) |\n| `plugin:list-plugins` | `--marketplace-name`, `--global\\|--project` | List available plugins in a marketplace |\n| `plugin:install` | `--plugin-name`, `--marketplace-name`, `--global\\|--project` | Get install instructions for a plugin |\n| `plugin:uninstall` | `--plugin-name`, `--marketplace-name`, `--global\\|--project` | Get uninstall instructions for a plugin |\n| `plugin:update` | `--plugin-name`, `--marketplace-name`, `--global\\|--project` | Resolve migration chain and get update instructions |\n| `plugin:configure` | `--plugin-name`, `--marketplace-name`, `--global\\|--project` | Get configuration instructions for a plugin |\n| `plugin:list-installed` | `--global\\|--project` | List all installed plugins |\n| `plugin:update-registry` | `--plugin-name`, `--plugin-version`, `--marketplace-name`, `--global\\|--project` | Register or update a plugin entry |\n| `plugin:remove-from-registry` | `--plugin-name`, `--global\\|--project` | Remove a plugin entry from the registry |\n\n## After Plugin Work: Contribute Back\n\nAfter working with plugins, prompt the user to share improvements or report issues:\n\n- **Improved a plugin's install/configure/uninstall instructions**: `/babysitter:contrib library contribution: improved [plugin-name] [install|configure|uninstall] instructions`\n- **Created a new plugin**: `/babysitter:contrib library contribution: new plugin [plugin-name] -- [description]`\n- **Found a bug in a plugin or the plugin system**: `/babysitter:contrib bug report: [description, e.g. \"plugin:update-registry fails when marketplace hasn't been cloned\"]`\n- **Plugin install/configure instructions were confusing or wrong**: `/babysitter:contrib bug report: [plugin-name] install instructions [description of what was wrong]`\n- **Have an idea for a new plugin**: `/babysitter:contrib feature request: plugin idea -- [description]`\n\nEven reporting that a plugin's instructions were unclear helps improve it for the next user."

package/commands/project-install.toml ADDED Viewed

@@ -0,0 +1,3 @@
+description = "Set up a project for babysitting. Guides you through onboarding a new or existing project — researches the codebase, interviews you about goals and workflows, builds the project profile, installs the best tools, and optionally configures CI/CD integration."
+prompt = "Invoke the babysitter:babysit skill (using the Skill tool) and follow its instructions (SKILL.md).\n\nBefore using the process library, resolve the active library root through the SDK CLI. If no binding exists yet, initialize the shared global SDK binding with:\n\n```bash\nbabysitter process-library:active --json\n```\n\nThen use the `cradle/project-install` process from the active process library.\n\nWhen the run completes, end with a friendly message that includes a polite and humorous ask to star the repo on GitHub: https://github.com/a5c-ai/babysitter"

package/commands/resume.toml ADDED Viewed

@@ -0,0 +1,3 @@
+description = "Resume orchestrating of a babysitter run. use this command to resume babysitting a complex workflow."
+prompt = "Invoke the babysitter:babysit skill (using the Skill tool) and follow its instructions (SKILL.md). to resume a run.\nif no run was given, discover the runs and suggest which incomplete run to resume based on the run's status, inputs, process , etc."

package/commands/retrospect.toml ADDED Viewed

@@ -0,0 +1,3 @@
+description = "Analysis for a run and its results, process, suggestions for process improvements, process optimizations, fixes, etc. for the next runs."
+prompt = "Invoke the babysitter:babysit skill (using the Skill tool) and follow its instructions (SKILL.md).\n\ncreate and run a retrospect process:\n\n### Run Selection\n\n- `--all` or \"all runs\": list all completed/failed runs and analyze collectively\n- Multiple run IDs: analyze each specified run\n- Single run ID or no ID: existing behavior (latest run)\n- In interactive mode with no run specified: ask user whether to analyze latest, select specific runs, or all runs\n\n### Cross-Run Analysis (multi-run mode)\n\nWhen analyzing multiple runs, the retrospect process should additionally cover:\n- Common failure patterns across runs\n- Velocity trends (tasks/time across runs)\n- Process evolution (how processes changed)\n- Repeated breakpoint patterns\n- Aggregate quality metrics\n\nimplementations notes (for the process):\n- The process should analyze the run, the process that was followed, and provide suggestions for improvements, optimizations, and fixes.\n- The process should such have many breakpoints where the user can steer the process, provide feedback, and make decisions about how to proceed with the retrospect.\n- The process should be designed to be flexible and adaptable to different types of runs, projects, and goals, and should be able to provide insights and suggestions that are relevant and actionable for the user. (modification to the process, skills, etc.)\n- The process should be designed to be iterative, allowing the user to go through multiple rounds of analysis and improvement, and should be able to track the changes and improvements made over time.\n- The process should cover:\n    - Analysis of the run and its results, including what went well, what didn't go well, and what could be improved.\n    - Analysis of the process that was followed, including what steps were taken, what tools were used, and how effective they were.\n    - Suggestions for improvements, optimizations, and fixes for both the run and the process.\n    - Implementing the improvements, optimizations, and fixes, and tracking the changes made over time.\n### Cleanup Suggestion\n\nAfter retrospect analysis, suggest running `/babysitter:cleanup` to clean up old run data and reclaim disk space.\n\n    - Ending by explicitly prompting the user to contribute back -- even just reporting an issue is valuable, they don't need to implement the fix themselves. After analysis, display a clear call-to-action:\n\n      \"You've identified [specific insight/improvement]. This could help other babysitter users too. Run `/babysitter:contrib` to share it upstream -- you can either report it as an issue or submit a PR with the fix.\"\n\n      Route to the specific contrib workflow based on what the user wants to do:\n\n      **Just reporting (no code changes needed):**\n      - Found a bug or weakness in a process -> `/babysitter:contrib bug report: [description of what went wrong]`\n      - Found missing or confusing documentation -> `/babysitter:contrib documentation question: [what was unclear]`\n      - Have an idea for improvement but don't want to implement it -> `/babysitter:contrib feature request: [description]`\n\n      **Contributing code changes:**\n      - Process/skill/agent improvements -> `/babysitter:contrib library contribution: [description]`\n      - Bug fixes in SDK or CLI -> `/babysitter:contrib bugfix: [description]`\n      - Plugin instruction improvements -> `/babysitter:contrib library contribution: improved [plugin-name] [install|configure|uninstall] instructions`"

package/commands/user-install.toml ADDED Viewed

@@ -0,0 +1,3 @@
+description = "Set up babysitter for yourself. Guides you through onboarding — installs dependencies, interviews you about your specialties and preferences, builds your user profile, and configures the best tools for your workflow."
+prompt = "Invoke the babysitter:babysit skill (using the Skill tool) and follow its instructions (SKILL.md).\n\nBefore using the process library, resolve the active library root through the SDK CLI. If no binding exists yet, initialize the shared global SDK binding with:\n\n```bash\nbabysitter process-library:active --json\n```\n\nThen use the `cradle/user-install` process from the active process library.\n\nWhen the run completes, end with a friendly message that includes a polite and humorous ask to star the repo on GitHub: https://github.com/a5c-ai/babysitter"

package/commands/yolo.toml ADDED Viewed

@@ -0,0 +1,3 @@
+description = "Orchestrate a babysitter run. use this command to start babysitting a complex workflow in a non-interactive mode, without any user interaction or breakpoints in the run."
+prompt = "Run the Babysitter orchestration instructions directly through the CLI, without any user interaction or breakpoints. In Claude Code, use Bash to run `babysitter instructions:babysit-skill --harness claude-code --no-interactive`; in Codex, run `babysitter instructions:babysit-skill --harness codex --no-interactive`; in other harnesses, use the same command with that harness id. Then follow the returned instructions in this same turn until completion proof is produced. Do not stop after reading the instructions, do not invoke the Skill tool first, and use the non-interactive/no-breakpoints path when the instructions offer a mode choice.\n\nUser arguments for this command:\n\n$ARGUMENTS"

package/hooks/babysitter-proxied-after-agent.sh ADDED Viewed

@@ -0,0 +1,3 @@
+#!/bin/bash
+set -euo pipefail
+babysitter hook:run --harness unified --hook-type after-agent --json

package/hooks/babysitter-proxied-after-tool.sh ADDED Viewed

@@ -0,0 +1,3 @@
+#!/bin/bash
+set -euo pipefail
+babysitter hook:run --harness unified --hook-type post-tool-use --json

package/hooks/babysitter-proxied-before-tool.sh ADDED Viewed

@@ -0,0 +1,3 @@
+#!/bin/bash
+set -euo pipefail
+babysitter hook:run --harness unified --hook-type pre-tool-use --json

package/hooks/babysitter-proxied-pre-compact.sh ADDED Viewed

@@ -0,0 +1,3 @@
+#!/bin/bash
+set -euo pipefail
+babysitter hook:run --harness unified --hook-type pre-compact --json

package/hooks/babysitter-proxied-session-end.sh ADDED Viewed

@@ -0,0 +1,3 @@
+#!/bin/bash
+set -euo pipefail
+babysitter hook:run --harness unified --hook-type session-end --json

package/hooks/babysitter-proxied-session-idle.sh ADDED Viewed

@@ -0,0 +1,3 @@
+#!/bin/bash
+set -euo pipefail
+babysitter hook:run --harness unified --hook-type session-idle --json

package/hooks/babysitter-proxied-session-start.sh ADDED Viewed

@@ -0,0 +1,11 @@
+#!/bin/bash
+# Session Start — installs SDK if needed, then runs hook handler.
+set -euo pipefail
+PLUGIN_ROOT="${PLUGIN_ROOT:-$(cd "$(dirname "$0")/.." && pwd)}"
+SDK_VERSION=$(node -e "try{console.log(JSON.parse(require('fs').readFileSync('${PLUGIN_ROOT}/versions.json','utf8')).sdkVersion||'latest')}catch{console.log('latest')}" 2>/dev/null || echo "latest")
+if ! command -v babysitter &>/dev/null; then
+  npm i -g "@a5c-ai/babysitter-sdk@${SDK_VERSION}" --loglevel=error 2>/dev/null || \
+  npm i -g "@a5c-ai/babysitter-sdk@${SDK_VERSION}" --prefix "$HOME/.local" --loglevel=error 2>/dev/null || true
+  [ -d "$HOME/.local/bin" ] && export PATH="$HOME/.local/bin:$PATH"
+fi
+babysitter hook:run --harness unified --hook-type session-start --json

package/hooks/babysitter-proxied-shell-env.sh ADDED Viewed

@@ -0,0 +1,3 @@
+#!/bin/bash
+set -euo pipefail
+babysitter hook:run --harness unified --hook-type shell-env --json

package/hooks/babysitter-proxied-user-prompt-submit.sh ADDED Viewed

@@ -0,0 +1,3 @@
+#!/bin/bash
+set -euo pipefail
+babysitter hook:run --harness unified --hook-type user-prompt-submit --json

package/hooks/hooks.json ADDED Viewed

@@ -0,0 +1,122 @@
+{
+  "description": "babysitter plugin hooks for Gemini CLI",
+  "hooks": {
+    "SessionStart": [
+      {
+        "hooks": [
+          {
+            "name": "babysitter-session-start",
+            "type": "command",
+            "command": "a5c-hooks-mux invoke --adapter gemini --handler \"bash ${GEMINI_EXTENSION_PATH}/hooks/babysitter-proxied-session-start.sh\" --json",
+            "timeout": 30000,
+            "description": "babysitter SessionStart hook"
+          }
+        ]
+      }
+    ],
+    "UserPromptSubmit": [
+      {
+        "hooks": [
+          {
+            "name": "babysitter-user-prompt-submit",
+            "type": "command",
+            "command": "a5c-hooks-mux invoke --adapter gemini --handler \"bash ${GEMINI_EXTENSION_PATH}/hooks/babysitter-proxied-user-prompt-submit.sh\" --json",
+            "timeout": 30000,
+            "description": "babysitter UserPromptSubmit hook"
+          }
+        ]
+      }
+    ],
+    "BeforeTool": [
+      {
+        "hooks": [
+          {
+            "name": "babysitter-pre-tool-use",
+            "type": "command",
+            "command": "a5c-hooks-mux invoke --adapter gemini --handler \"bash ${GEMINI_EXTENSION_PATH}/hooks/babysitter-proxied-before-tool.sh\" --json",
+            "timeout": 30000,
+            "description": "babysitter PreToolUse hook"
+          }
+        ]
+      }
+    ],
+    "AfterTool": [
+      {
+        "hooks": [
+          {
+            "name": "babysitter-post-tool-use",
+            "type": "command",
+            "command": "a5c-hooks-mux invoke --adapter gemini --handler \"bash ${GEMINI_EXTENSION_PATH}/hooks/babysitter-proxied-after-tool.sh\" --json",
+            "timeout": 30000,
+            "description": "babysitter PostToolUse hook"
+          }
+        ]
+      }
+    ],
+    "PreCompact": [
+      {
+        "hooks": [
+          {
+            "name": "babysitter-pre-compact",
+            "type": "command",
+            "command": "a5c-hooks-mux invoke --adapter gemini --handler \"bash ${GEMINI_EXTENSION_PATH}/hooks/babysitter-proxied-pre-compact.sh\" --json",
+            "timeout": 30000,
+            "description": "babysitter PreCompact hook"
+          }
+        ]
+      }
+    ],
+    "AfterAgent": [
+      {
+        "hooks": [
+          {
+            "name": "babysitter-after-agent",
+            "type": "command",
+            "command": "a5c-hooks-mux invoke --adapter gemini --handler \"bash ${GEMINI_EXTENSION_PATH}/hooks/babysitter-proxied-after-agent.sh\" --json",
+            "timeout": 30000,
+            "description": "babysitter AfterAgent hook"
+          }
+        ]
+      }
+    ],
+    "SessionEnd": [
+      {
+        "hooks": [
+          {
+            "name": "babysitter-session-end",
+            "type": "command",
+            "command": "a5c-hooks-mux invoke --adapter gemini --handler \"bash ${GEMINI_EXTENSION_PATH}/hooks/babysitter-proxied-session-end.sh\" --json",
+            "timeout": 30000,
+            "description": "babysitter SessionEnd hook"
+          }
+        ]
+      }
+    ],
+    "SessionIdle": [
+      {
+        "hooks": [
+          {
+            "name": "babysitter-session-idle",
+            "type": "command",
+            "command": "a5c-hooks-mux invoke --adapter gemini --handler \"bash ${GEMINI_EXTENSION_PATH}/hooks/babysitter-proxied-session-idle.sh\" --json",
+            "timeout": 30000,
+            "description": "babysitter SessionIdle hook"
+          }
+        ]
+      }
+    ],
+    "ShellEnv": [
+      {
+        "hooks": [
+          {
+            "name": "babysitter-shell-env",
+            "type": "command",
+            "command": "a5c-hooks-mux invoke --adapter gemini --handler \"bash ${GEMINI_EXTENSION_PATH}/hooks/babysitter-proxied-shell-env.sh\" --json",
+            "timeout": 30000,
+            "description": "babysitter ShellEnv hook"
+          }
+        ]
+      }
+    ]
+  }
+}

package/package.json ADDED Viewed

@@ -0,0 +1,47 @@
+{
+  "name": "@a5c-ai/babysitter-gemini-cli",
+  "version": "5.0.1-staging.04ca6ab00d21",
+  "description": "Orchestrate complex, multi-step workflows with event-sourced state management, hook-based extensibility, and human-in-the-loop approval",
+  "scripts": {
+    "deploy": "npm publish --access public",
+    "deploy:staging": "npm publish --access public --tag staging",
+    "plugin:install": "node bin/install.js --global",
+    "plugin:uninstall": "node bin/uninstall.js --global",
+    "team:install": "node scripts/team-install.js"
+  },
+  "bin": {
+    "babysitter-gemini-cli": "bin/cli.js"
+  },
+  "files": [
+    "bin/",
+    "hooks/",
+    "skills/",
+    "commands/",
+    "scripts/",
+    "plugin.json",
+    "README.md",
+    "versions.json",
+    "package.json"
+  ],
+  "keywords": [
+    "babysitter",
+    "gemini-cli",
+    "orchestration"
+  ],
+  "author": "a5c.ai",
+  "license": "MIT",
+  "publishConfig": {
+    "access": "public"
+  },
+  "dependencies": {
+    "@a5c-ai/babysitter-sdk": "5.0.1-staging.04ca6ab00d21"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/a5c-ai/babysitter-gemini.git"
+  },
+  "homepage": "https://github.com/a5c-ai/babysitter-gemini#readme",
+  "bugs": {
+    "url": "https://github.com/a5c-ai/babysitter-gemini/issues"
+  }
+}

package/plugin.json ADDED Viewed

@@ -0,0 +1,55 @@
+{
+  "name": "babysitter",
+  "version": "5.0.1-staging.04ca6ab00d21",
+  "description": "Orchestrate complex, multi-step workflows with event-sourced state management, hook-based extensibility, and human-in-the-loop approval",
+  "author": "a5c.ai",
+  "license": "MIT",
+  "harness": "gemini-cli",
+  "hooks": {
+    "SessionStart": "hooks/babysitter-proxied-session-start.sh",
+    "UserPromptSubmit": "hooks/babysitter-proxied-user-prompt-submit.sh",
+    "PreToolUse": "hooks/babysitter-proxied-before-tool.sh",
+    "PostToolUse": "hooks/babysitter-proxied-after-tool.sh",
+    "PreCompact": "hooks/babysitter-proxied-pre-compact.sh",
+    "AfterAgent": "hooks/babysitter-proxied-after-agent.sh",
+    "SessionEnd": "hooks/babysitter-proxied-session-end.sh",
+    "SessionIdle": "hooks/babysitter-proxied-session-idle.sh",
+    "ShellEnv": "hooks/babysitter-proxied-shell-env.sh"
+  },
+  "commands": [
+    "commands/assimilate.toml",
+    "commands/call.toml",
+    "commands/cleanup.toml",
+    "commands/contrib.toml",
+    "commands/doctor.toml",
+    "commands/forever.toml",
+    "commands/help.toml",
+    "commands/observe.toml",
+    "commands/plan.toml",
+    "commands/plugins.toml",
+    "commands/project-install.toml",
+    "commands/resume.toml",
+    "commands/retrospect.toml",
+    "commands/user-install.toml",
+    "commands/yolo.toml"
+  ],
+  "skills": [],
+  "contextFileName": "GEMINI.md",
+  "extensionManifest": "gemini-extension.json",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/a5c-ai/babysitter"
+  },
+  "keywords": [
+    "orchestration",
+    "workflow",
+    "automation",
+    "event-sourced",
+    "hooks",
+    "TDD",
+    "quality-convergence",
+    "agent",
+    "LLM",
+    "gemini-cli"
+  ]
+}

package/scripts/create-release-tag.mjs ADDED Viewed

@@ -0,0 +1,18 @@
+#!/usr/bin/env node
+import { spawnSync } from 'node:child_process';
+import { existsSync, readFileSync } from 'node:fs';
+function run(command, args) {
+  const result = spawnSync(command, args, { encoding: 'utf8', stdio: 'inherit' });
+  if (result.status !== 0) process.exit(result.status || 1);
+}
+const branch = process.env.GITHUB_REF_NAME || 'develop';
+const sha = (process.env.GITHUB_SHA || '').slice(0, 12);
+const version = existsSync('package.json') ? JSON.parse(readFileSync('package.json', 'utf8')).version : JSON.parse(readFileSync('versions.json', 'utf8')).sdkVersion;
+const normalized = String(version).replace(/[^0-9A-Za-z._-]/g, '-');
+const tag = 'release/' + branch + '/v' + normalized + '-' + sha;
+run('git', ['config', 'user.name', 'github-actions[bot]']);
+run('git', ['config', 'user.email', 'github-actions[bot]@users.noreply.github.com']);
+run('git', ['tag', tag]);
+run('git', ['push', 'origin', tag]);

package/scripts/publish-from-tag.mjs ADDED Viewed

@@ -0,0 +1,41 @@
+#!/usr/bin/env node
+import { spawnSync } from 'node:child_process';
+import { readFileSync } from 'node:fs';
+function run(command, args, options = {}) {
+  const result = spawnSync(command, args, { stdio: options.stdio || 'inherit', encoding: options.encoding });
+  if (result.status !== 0 && !options.allowFailure) process.exit(result.status || 1);
+  return result;
+}
+function npmView(packageSpec) {
+  return run('npm', ['view', packageSpec, 'version'], { allowFailure: true, stdio: 'pipe', encoding: 'utf8' }).status === 0;
+}
+const pkg = JSON.parse(readFileSync('package.json', 'utf8'));
+const ref = process.env.GITHUB_REF_NAME || '';
+const branch = ref.split('/')[1] || 'develop';
+const tag = branch === 'main' ? 'latest' : branch;
+if (!process.env.NODE_AUTH_TOKEN) {
+  console.log('NODE_AUTH_TOKEN is not configured; skipping npm publish.');
+  process.exit(0);
+}
+if (npmView(pkg.name + '@' + pkg.version)) {
+  console.log(pkg.name + '@' + pkg.version + ' already exists; ensuring dist-tag ' + tag + '.');
+  run('npm', ['dist-tag', 'add', pkg.name + '@' + pkg.version, tag], { allowFailure: true });
+  process.exit(0);
+}
+for (const field of ['dependencies', 'peerDependencies', 'optionalDependencies']) {
+  for (const [name, version] of Object.entries(pkg[field] || {})) {
+    if (!name.startsWith('@a5c-ai/') || version.startsWith('^') || version.startsWith('~') || version === '*' || version.startsWith('workspace:')) continue;
+    if (!npmView(name + '@' + version)) {
+      console.log('Required internal dependency ' + name + '@' + version + ' is not published yet; skipping npm publish.');
+      process.exit(0);
+    }
+  }
+}
+run('npm', ['publish', '--access', 'public', '--tag', tag]);

package/scripts/team-install.js ADDED Viewed

@@ -0,0 +1,23 @@
+#!/usr/bin/env node
+'use strict';
+var path = require('path');
+var shared = require('../bin/install-shared');
+var workspace = process.cwd();
+for (var i = 0; i < process.argv.length; i++) {
+  if (process.argv[i] === '--workspace' && process.argv[i + 1]) {
+    workspace = path.resolve(process.argv[i + 1]);
+  }
+}
+var src = process.env.PLUGIN_PACKAGE_ROOT || path.resolve(__dirname, '..');
+var dest = shared.getHomePluginRoot('workspace');
+console.log('[babysitter] Team install to ' + dest);
+shared.copyPluginBundle(src, dest);
+if (typeof shared.harnessTeamInstall === 'function') {
+  shared.harnessTeamInstall(src, dest, workspace);
+}
+shared.runPostInstall(dest);
+console.log('[babysitter] Team install complete.');

package/skills/babysit/SKILL.md ADDED Viewed

@@ -0,0 +1,58 @@
+---
+name: babysit
+description: Execute via @babysitter. Use this skill when asked to babysit a task, do anything that is structured process-driven (even a loop) or whenever it is called explicitly. (babysit, babysitter, with a process, orchestrate, orchestrate a run, workflow, loop until, etc.)
+allowed-tools: Read, Grep, Write, Task, Bash, Edit, Grep, Glob, WebFetch, WebSearch, Search, AskUserQuestion, TodoWrite, TodoRead, Skill, BashOutput, KillShell, MultiEdit, LS
+version: 0.1.3
+---
+# babysit
+Follow the commands below exactly. but apply judgement and creativity to get the job done. no matter how small or simple the task may seem, you must use the babysitter SDK and run lifecycle to execute it. This is non-negotiable. If you are asked to do something that seems like it could be done with a simple tool call, you must still use the babysitter SDK to create a run, define tasks, execute them, and complete the run. This is how you will learn and demonstrate mastery of the babysitter system. Always follow the full process, even for trivial tasks.
+Subagents that need a scratch checkout or working directory must create it under
+`/tmp/<descriptive-name>/`, not under `.a5c/runs/<runId>/work`. Before returning
+deliverables, validate that no run-dir worktree was left behind, for example:
+```bash
+find .a5c/runs -maxdepth 3 -name work -type d -print
+```
+That command should print nothing. If it prints a non-empty work directory, move
+or remove only the scratch data you created before returning.
+## Dependencies
+### Babysitter SDK and CLI
+Read the SDK version from `versions.json` to ensure version compatibility:
+```bash
+SDK_VERSION=$(node -e "try{console.log(JSON.parse(require('fs').readFileSync('${CLAUDE_PLUGIN_ROOT}/versions.json','utf8')).sdkVersion||'latest')}catch{console.log('latest')}")
+npm i -g @a5c-ai/babysitter-sdk@$SDK_VERSION || npm i -g @a5c-ai/babysitter-sdk@latest
+CLI="npx -y @a5c-ai/babysitter-sdk@$SDK_VERSION"
+```
+If the pinned version fails to install (e.g. not yet published), the fallback installs `latest`.
+If `babysitter` is already installed globally at the correct version, you may use `CLI="babysitter"` instead.
+### jq
+Make sure `jq` is installed and available in the path. If not, install it.
+## Instructions
+Run the following command to get full instructions:
+```bash
+$CLI instructions:babysit-skill --harness claude-code --interactive
+```
+For non-interactive mode (running with `-p` flag or no AskUserQuestion tool):
+```bash
+$CLI instructions:babysit-skill --harness claude-code --no-interactive
+```
+Follow the instructions returned by the command above to orchestrate the run.

package/versions.json ADDED Viewed

@@ -0,0 +1,4 @@
+{
+  "sdkVersion": "5.0.1-staging.04ca6ab00d21",
+  "extensionVersion": "5.0.1-staging.04ca6ab00d21"
+}