npm - @a5c-ai/babysitter-codex - Versions diffs - 5.0.1-staging.d7c7b44ac4e1 → 5.0.1-staging.d8bdfcceaf4a - Mend

@a5c-ai/babysitter-codex 5.0.1-staging.d7c7b44ac4e1 → 5.0.1-staging.d8bdfcceaf4a

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/.codex-plugin/plugin.json +1 -1
package/README.md +20 -7
package/bin/install-shared.js +3 -3
package/hooks/babysitter-proxied-post-tool-use.sh +3 -0
package/hooks/babysitter-proxied-pre-tool-use.sh +3 -0
package/hooks/babysitter-proxied-session-end.sh +3 -0
package/hooks.json +36 -3
package/package.json +1 -1
package/skills/babysit/SKILL.md +2 -4
package/skills/call/SKILL.md +5 -1
package/skills/cleanup/SKILL.md +30 -8
package/skills/help/SKILL.md +2 -1
package/skills/observe/SKILL.md +6 -1
package/skills/yolo/SKILL.md +5 -1

package/.codex-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "babysitter",
-  "version": "5.0.1-staging.d7c7b44ac4e1",
+  "version": "5.0.1-staging.d8bdfcceaf4a",
   "description": "Orchestrate complex, multi-step workflows with event-sourced state management, hook-based extensibility, and human-in-the-loop approval",
   "author": {
     "name": "a5c.ai",

package/README.md CHANGED Viewed

@@ -17,23 +17,36 @@ workspace-local Codex surface for team setup.
 ## Installation
-Install the SDK CLI first:
+Install the Babysitter CLI once:
 ```bash
-npm install -g @a5c-ai/babysitter-sdk
+npm install -g @a5c-ai/babysitter
 ```
-clone the repo and install the plugin globally:
+Install the Codex plugin through the SDK helper. This is the canonical path used by the installer tests and resolves to `npx --yes @a5c-ai/babysitter-codex install ...` under the hood:
 ```bash
-npx -y @a5c-ai/babysitter-codex install --global
+# Global install
+babysitter harness:install-plugin codex
-codex
+# Workspace install
+babysitter harness:install-plugin codex --workspace /path/to/repo
+```
+You can also run the published package installer directly:
+```bash
+npx --yes @a5c-ai/babysitter-codex install --global
+npx --yes @a5c-ai/babysitter-codex install --workspace /path/to/repo
+```
+Then open Codex and finish enabling the plugin from the plugin UI:
-> /plugins
+```text
+/plugins
 ```
-then navigate to the 'babysitter' entry and select 'Install'.
+Navigate to the `babysitter` entry and select `Install`.
 If Codex was already open when you ran `install --global`, start a new thread
 after installing from `/plugins` before expecting `babysitter:*` skills such as

package/bin/install-shared.js CHANGED Viewed

@@ -104,7 +104,7 @@ function ensureMarketplaceEntry(marketplacePath, pluginRoot) {
     name: PLUGIN_NAME,
     source: relSource,
     description: "Orchestrate complex, multi-step workflows with event-sourced state management, hook-based extensibility, and human-in-the-loop approval",
-    version: "5.0.1-staging.d7c7b44ac4e1",
+    version: "5.0.1-staging.d8bdfcceaf4a",
     author: { name: "a5c.ai" },
   };
   if (idx >= 0) marketplace.plugins[idx] = entry;
@@ -297,7 +297,7 @@ function renderCodexConfigToml() {
     'writable_roots = [".a5c", ".codex"]',
     '',
     '[features]',
-    'codex_hooks = true',
+    'hooks = true',
     'multi_agent = true',
     '',
     '[agents]',
@@ -406,7 +406,7 @@ function mergeCodexConfig(existing) {
   content = insertRootKey(content, 'sandbox_mode', 'sandbox_mode = "workspace-write"');
   content = insertRootKey(content, 'project_doc_max_bytes', 'project_doc_max_bytes = 65536');
   content = ensureWritableRoots(content);
-  content = ensureSectionLine(content, 'features', 'codex_hooks', 'codex_hooks = true');
+  content = ensureSectionLine(content, 'features', 'hooks', 'hooks = true');
   content = ensureSectionLine(content, 'features', 'multi_agent', 'multi_agent = true');
   content = ensureSectionLine(content, 'agents', 'max_depth', 'max_depth = 3');
   content = ensureSectionLine(content, 'agents', 'max_threads', 'max_threads = 4');

package/hooks/babysitter-proxied-post-tool-use.sh ADDED Viewed

@@ -0,0 +1,3 @@
+#!/bin/bash
+set -euo pipefail
+babysitter hook:run --harness unified --hook-type post-tool-use --json

package/hooks/babysitter-proxied-pre-tool-use.sh ADDED Viewed

@@ -0,0 +1,3 @@
+#!/bin/bash
+set -euo pipefail
+babysitter hook:run --harness unified --hook-type pre-tool-use --json

package/hooks/babysitter-proxied-session-end.sh ADDED Viewed

@@ -0,0 +1,3 @@
+#!/bin/bash
+set -euo pipefail
+babysitter hook:run --harness unified --hook-type session-end --json

package/hooks.json CHANGED Viewed

@@ -6,7 +6,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "npx -y -p @a5c-ai/hooks-mux-cli -c \"a5c-hooks-mux invoke --adapter codex --handler 'bash ./hooks/babysitter-proxied-session-start.sh' --json\""
+            "command": "a5c-hooks-mux invoke --adapter codex --handler \"bash .codex/hooks/babysitter-proxied-session-start.sh\" --json"
           }
         ]
       }
@@ -17,7 +17,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "npx -y -p @a5c-ai/hooks-mux-cli -c \"a5c-hooks-mux invoke --adapter codex --handler 'bash ./hooks/babysitter-proxied-stop.sh' --json\""
+            "command": "a5c-hooks-mux invoke --adapter codex --handler \"bash .codex/hooks/babysitter-proxied-stop.sh\" --json"
           }
         ]
       }
@@ -28,7 +28,40 @@
         "hooks": [
           {
             "type": "command",
-            "command": "npx -y -p @a5c-ai/hooks-mux-cli -c \"a5c-hooks-mux invoke --adapter codex --handler 'bash ./hooks/babysitter-proxied-user-prompt-submit.sh' --json\""
+            "command": "a5c-hooks-mux invoke --adapter codex --handler \"bash .codex/hooks/babysitter-proxied-user-prompt-submit.sh\" --json"
+          }
+        ]
+      }
+    ],
+    "PreToolUse": [
+      {
+        "matcher": ".*",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "a5c-hooks-mux invoke --adapter codex --handler \"bash .codex/hooks/babysitter-proxied-pre-tool-use.sh\" --json"
+          }
+        ]
+      }
+    ],
+    "PostToolUse": [
+      {
+        "matcher": ".*",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "a5c-hooks-mux invoke --adapter codex --handler \"bash .codex/hooks/babysitter-proxied-post-tool-use.sh\" --json"
+          }
+        ]
+      }
+    ],
+    "SessionEnd": [
+      {
+        "matcher": ".*",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "a5c-hooks-mux invoke --adapter codex --handler \"bash .codex/hooks/babysitter-proxied-session-end.sh\" --json"
           }
         ]
       }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@a5c-ai/babysitter-codex",
-  "version": "5.0.1-staging.d7c7b44ac4e1",
+  "version": "5.0.1-staging.d8bdfcceaf4a",
   "description": "Orchestrate complex, multi-step workflows with event-sourced state management, hook-based extensibility, and human-in-the-loop approval",
   "scripts": {
     "test": "npm run validate:ci",

package/skills/babysit/SKILL.md CHANGED Viewed

@@ -1,14 +1,12 @@
 ---
 name: babysit
 description: >-
-  Orchestrate via @babysitter. Use this skill when asked to babysit a run,
-  orchestrate a process or whenever it is called explicitly. (babysit,
-  babysitter, orchestrate, orchestrate a run, workflow, etc.)
+  Execute via @babysitter. Use this skill when asked to babysit a task, do anything that is structured process-driven (even a loop) or whenever it is called explicitly. (babysit, babysitter, with a process, orchestrate, orchestrate a run, workflow, loop until, etc.)
 ---
 # babysit
-Orchestrate `.a5c/runs/<runId>/` through iterative execution.
+Follow the commands below exactly. but apply judgement and creativity to get the job done. no matter how small or simple the task may seem, you must use the babysitter SDK and run lifecycle to execute it. This is non-negotiable. If you are asked to do something that seems like it could be done with a simple tool call, you must still use the babysitter SDK to create a run, define tasks, execute them, and complete the run. This is how you will learn and demonstrate mastery of the babysitter system. Always follow the full process, even for trivial tasks.
 ## Dependencies

package/skills/call/SKILL.md CHANGED Viewed

@@ -5,4 +5,8 @@ description: Orchestrate a babysitter run. use this command to start babysitting
 # call
-Invoke the babysitter:babysit skill (using the Skill tool) and follow its instructions (SKILL.md).
+Invoke the babysitter:babysit skill (using the Skill tool) and follow its instructions (SKILL.md). Then continue executing the returned instructions in this same turn. Do not stop after the Skill tool returns; carry the requested run through to completion proof.
+User arguments for this command:
+$ARGUMENTS

package/skills/cleanup/SKILL.md CHANGED Viewed

@@ -11,11 +11,33 @@ Create and run a cleanup process using the process at `skills\babysit\process\cr
 Implementation notes (for the process):
 - Parse arguments for `--dry-run` flag (if present, set dryRun: true in inputs) and `--keep-days N` (default: 7)
-- The process scans .a5c/runs/ for completed/failed runs, aggregates insights, writes summaries, then removes old data
-- Always show the user what will be removed before removing (in interactive mode via breakpoints)
-- In non-interactive mode (yolo), proceed with cleanup using defaults
-- The insights file goes to docs/run-history-insights.md
-- Only remove terminal runs (completed/failed) older than the keep-days threshold
-- Never remove active/in-progress runs
-- Remove orphaned process files not referenced by remaining runs
-- After cleanup, show remaining run count and disk usage
+CRITICAL: The cleanup MUST follow this exact phase order. Do NOT delete any run before Phase 2 completes.
+Phase 1 — Scan:
+- Scan .a5c/runs/ for all runs
+- Classify each as terminal (completed/failed) or active (in-progress/created)
+- Identify terminal runs older than the keep-days threshold as removal candidates
+- Never mark active/in-progress runs for removal
+- Count and report: total runs, terminal, active, removal candidates, disk usage
+Phase 2 — Aggregate insights (BEFORE any deletion):
+- For EVERY removal candidate, read its run.json and journal/ events
+- Extract: processId, prompt, status, event count, created date, task summaries
+- Group by process type and extract patterns (retry counts, convergence behavior, failure modes)
+- Append a new dated section to docs/run-history-insights.md with:
+  - Summary statistics (runs removed, disk freed, runs retained)
+  - Run categories with counts and descriptions
+  - Key patterns observed (multi-batch convergence, retry behavior, etc.)
+  - What worked well / what didn't from the run data
+- This file MUST be written and verified before proceeding to Phase 3
+Phase 3 — Confirm removal:
+- In interactive mode, show the user what will be removed via a breakpoint
+- In non-interactive mode (yolo), proceed with defaults
+- In dry-run mode, stop here and show what would be removed
+Phase 4 — Remove:
+- Delete the terminal runs older than keep-days threshold
+- Identify and remove orphaned process files not referenced by remaining runs
+- Show remaining run count and disk usage after cleanup

package/skills/help/SKILL.md CHANGED Viewed

@@ -234,7 +234,8 @@ SECONDARY COMMANDS
   How it works: Runs npx @a5c-ai/babysitter-observer-dashboard@latest which watches
   the .a5c/runs/ directory (or a parent directory containing multiple projects) and
   serves a live dashboard. The process is blocking -- it runs until you stop it, and
-  it prints the local URL to share with the user.
+  it prints the local URL to share with the user. Do not use `babysitter observe`
+  as a fallback; the core Babysitter CLI does not expose that subcommand.
   Example: /babysitter:observe
   (opens browser showing all runs with live-updating task

package/skills/observe/SKILL.md CHANGED Viewed

@@ -8,6 +8,11 @@ description: Launch the babysitter observer dashboard. Installs and runs the rea
 Run the babysitter observer dashboard:
 1. Determine the watch directory — this is usually the project's container directory (the parent of the project dir), or the current working directory if not specified.
-2. Launch the dashboard: `npx -y @a5c-ai/babysitter-observer-dashboard@latest --watch-dir <dir>`
+2. Launch the standalone dashboard package: `npx -y @a5c-ai/babysitter-observer-dashboard@latest --watch-dir <dir>`.
 3. This is a blocking process — it will keep running until stopped.
 4. Report the URL printed by the dashboard to the user, then open it in the browser.
+Do not fall back to `babysitter observe`; the core Babysitter CLI does not expose
+that subcommand. Some harness runtimes may provide a separate
+`agent-platform observe` surface, but this skill uses the verified standalone
+dashboard package.

package/skills/yolo/SKILL.md CHANGED Viewed

@@ -5,4 +5,8 @@ description: Orchestrate a babysitter run. use this command to start babysitting
 # yolo
-Invoke the babysitter:babysit skill (using the Skill tool) and follow its instructions (SKILL.md). but without any user interaction or breakpoints in the run.
+Run the Babysitter orchestration instructions directly through the CLI, without any user interaction or breakpoints. In Claude Code, use Bash to run `babysitter instructions:babysit-skill --harness claude-code --no-interactive`; in Codex, run `babysitter instructions:babysit-skill --harness codex --no-interactive`; in other harnesses, use the same command with that harness id. Then follow the returned instructions in this same turn until completion proof is produced. Do not stop after reading the instructions, do not invoke the Skill tool first, and use the non-interactive/no-breakpoints path when the instructions offer a mode choice.
+User arguments for this command:
+$ARGUMENTS