@a5c-ai/babysitter-codex 5.0.1-staging.d7c7b44ac4e1 → 5.0.1-staging.d8bdfcceaf4a

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "babysitter",
3
- "version": "5.0.1-staging.d7c7b44ac4e1",
3
+ "version": "5.0.1-staging.d8bdfcceaf4a",
4
4
  "description": "Orchestrate complex, multi-step workflows with event-sourced state management, hook-based extensibility, and human-in-the-loop approval",
5
5
  "author": {
6
6
  "name": "a5c.ai",
package/README.md CHANGED
@@ -17,23 +17,36 @@ workspace-local Codex surface for team setup.
17
17
 
18
18
  ## Installation
19
19
 
20
- Install the SDK CLI first:
20
+ Install the Babysitter CLI once:
21
21
 
22
22
  ```bash
23
- npm install -g @a5c-ai/babysitter-sdk
23
+ npm install -g @a5c-ai/babysitter
24
24
  ```
25
25
 
26
- clone the repo and install the plugin globally:
26
+ Install the Codex plugin through the SDK helper. This is the canonical path used by the installer tests and resolves to `npx --yes @a5c-ai/babysitter-codex install ...` under the hood:
27
27
 
28
28
  ```bash
29
- npx -y @a5c-ai/babysitter-codex install --global
29
+ # Global install
30
+ babysitter harness:install-plugin codex
30
31
 
31
- codex
32
+ # Workspace install
33
+ babysitter harness:install-plugin codex --workspace /path/to/repo
34
+ ```
35
+
36
+ You can also run the published package installer directly:
37
+
38
+ ```bash
39
+ npx --yes @a5c-ai/babysitter-codex install --global
40
+ npx --yes @a5c-ai/babysitter-codex install --workspace /path/to/repo
41
+ ```
42
+
43
+ Then open Codex and finish enabling the plugin from the plugin UI:
32
44
 
33
- > /plugins
45
+ ```text
46
+ /plugins
34
47
  ```
35
48
 
36
- then navigate to the 'babysitter' entry and select 'Install'.
49
+ Navigate to the `babysitter` entry and select `Install`.
37
50
 
38
51
  If Codex was already open when you ran `install --global`, start a new thread
39
52
  after installing from `/plugins` before expecting `babysitter:*` skills such as
@@ -104,7 +104,7 @@ function ensureMarketplaceEntry(marketplacePath, pluginRoot) {
104
104
  name: PLUGIN_NAME,
105
105
  source: relSource,
106
106
  description: "Orchestrate complex, multi-step workflows with event-sourced state management, hook-based extensibility, and human-in-the-loop approval",
107
- version: "5.0.1-staging.d7c7b44ac4e1",
107
+ version: "5.0.1-staging.d8bdfcceaf4a",
108
108
  author: { name: "a5c.ai" },
109
109
  };
110
110
  if (idx >= 0) marketplace.plugins[idx] = entry;
@@ -297,7 +297,7 @@ function renderCodexConfigToml() {
297
297
  'writable_roots = [".a5c", ".codex"]',
298
298
  '',
299
299
  '[features]',
300
- 'codex_hooks = true',
300
+ 'hooks = true',
301
301
  'multi_agent = true',
302
302
  '',
303
303
  '[agents]',
@@ -406,7 +406,7 @@ function mergeCodexConfig(existing) {
406
406
  content = insertRootKey(content, 'sandbox_mode', 'sandbox_mode = "workspace-write"');
407
407
  content = insertRootKey(content, 'project_doc_max_bytes', 'project_doc_max_bytes = 65536');
408
408
  content = ensureWritableRoots(content);
409
- content = ensureSectionLine(content, 'features', 'codex_hooks', 'codex_hooks = true');
409
+ content = ensureSectionLine(content, 'features', 'hooks', 'hooks = true');
410
410
  content = ensureSectionLine(content, 'features', 'multi_agent', 'multi_agent = true');
411
411
  content = ensureSectionLine(content, 'agents', 'max_depth', 'max_depth = 3');
412
412
  content = ensureSectionLine(content, 'agents', 'max_threads', 'max_threads = 4');
@@ -0,0 +1,3 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ babysitter hook:run --harness unified --hook-type post-tool-use --json
@@ -0,0 +1,3 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ babysitter hook:run --harness unified --hook-type pre-tool-use --json
@@ -0,0 +1,3 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ babysitter hook:run --harness unified --hook-type session-end --json
package/hooks.json CHANGED
@@ -6,7 +6,7 @@
6
6
  "hooks": [
7
7
  {
8
8
  "type": "command",
9
- "command": "npx -y -p @a5c-ai/hooks-mux-cli -c \"a5c-hooks-mux invoke --adapter codex --handler 'bash ./hooks/babysitter-proxied-session-start.sh' --json\""
9
+ "command": "a5c-hooks-mux invoke --adapter codex --handler \"bash .codex/hooks/babysitter-proxied-session-start.sh\" --json"
10
10
  }
11
11
  ]
12
12
  }
@@ -17,7 +17,7 @@
17
17
  "hooks": [
18
18
  {
19
19
  "type": "command",
20
- "command": "npx -y -p @a5c-ai/hooks-mux-cli -c \"a5c-hooks-mux invoke --adapter codex --handler 'bash ./hooks/babysitter-proxied-stop.sh' --json\""
20
+ "command": "a5c-hooks-mux invoke --adapter codex --handler \"bash .codex/hooks/babysitter-proxied-stop.sh\" --json"
21
21
  }
22
22
  ]
23
23
  }
@@ -28,7 +28,40 @@
28
28
  "hooks": [
29
29
  {
30
30
  "type": "command",
31
- "command": "npx -y -p @a5c-ai/hooks-mux-cli -c \"a5c-hooks-mux invoke --adapter codex --handler 'bash ./hooks/babysitter-proxied-user-prompt-submit.sh' --json\""
31
+ "command": "a5c-hooks-mux invoke --adapter codex --handler \"bash .codex/hooks/babysitter-proxied-user-prompt-submit.sh\" --json"
32
+ }
33
+ ]
34
+ }
35
+ ],
36
+ "PreToolUse": [
37
+ {
38
+ "matcher": ".*",
39
+ "hooks": [
40
+ {
41
+ "type": "command",
42
+ "command": "a5c-hooks-mux invoke --adapter codex --handler \"bash .codex/hooks/babysitter-proxied-pre-tool-use.sh\" --json"
43
+ }
44
+ ]
45
+ }
46
+ ],
47
+ "PostToolUse": [
48
+ {
49
+ "matcher": ".*",
50
+ "hooks": [
51
+ {
52
+ "type": "command",
53
+ "command": "a5c-hooks-mux invoke --adapter codex --handler \"bash .codex/hooks/babysitter-proxied-post-tool-use.sh\" --json"
54
+ }
55
+ ]
56
+ }
57
+ ],
58
+ "SessionEnd": [
59
+ {
60
+ "matcher": ".*",
61
+ "hooks": [
62
+ {
63
+ "type": "command",
64
+ "command": "a5c-hooks-mux invoke --adapter codex --handler \"bash .codex/hooks/babysitter-proxied-session-end.sh\" --json"
32
65
  }
33
66
  ]
34
67
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@a5c-ai/babysitter-codex",
3
- "version": "5.0.1-staging.d7c7b44ac4e1",
3
+ "version": "5.0.1-staging.d8bdfcceaf4a",
4
4
  "description": "Orchestrate complex, multi-step workflows with event-sourced state management, hook-based extensibility, and human-in-the-loop approval",
5
5
  "scripts": {
6
6
  "test": "npm run validate:ci",
@@ -1,14 +1,12 @@
1
1
  ---
2
2
  name: babysit
3
3
  description: >-
4
- Orchestrate via @babysitter. Use this skill when asked to babysit a run,
5
- orchestrate a process or whenever it is called explicitly. (babysit,
6
- babysitter, orchestrate, orchestrate a run, workflow, etc.)
4
+ Execute via @babysitter. Use this skill when asked to babysit a task, do anything that is structured process-driven (even a loop) or whenever it is called explicitly. (babysit, babysitter, with a process, orchestrate, orchestrate a run, workflow, loop until, etc.)
7
5
  ---
8
6
 
9
7
  # babysit
10
8
 
11
- Orchestrate `.a5c/runs/<runId>/` through iterative execution.
9
+ Follow the commands below exactly. but apply judgement and creativity to get the job done. no matter how small or simple the task may seem, you must use the babysitter SDK and run lifecycle to execute it. This is non-negotiable. If you are asked to do something that seems like it could be done with a simple tool call, you must still use the babysitter SDK to create a run, define tasks, execute them, and complete the run. This is how you will learn and demonstrate mastery of the babysitter system. Always follow the full process, even for trivial tasks.
12
10
 
13
11
  ## Dependencies
14
12
 
@@ -5,4 +5,8 @@ description: Orchestrate a babysitter run. use this command to start babysitting
5
5
 
6
6
  # call
7
7
 
8
- Invoke the babysitter:babysit skill (using the Skill tool) and follow its instructions (SKILL.md).
8
+ Invoke the babysitter:babysit skill (using the Skill tool) and follow its instructions (SKILL.md). Then continue executing the returned instructions in this same turn. Do not stop after the Skill tool returns; carry the requested run through to completion proof.
9
+
10
+ User arguments for this command:
11
+
12
+ $ARGUMENTS
@@ -11,11 +11,33 @@ Create and run a cleanup process using the process at `skills\babysit\process\cr
11
11
 
12
12
  Implementation notes (for the process):
13
13
  - Parse arguments for `--dry-run` flag (if present, set dryRun: true in inputs) and `--keep-days N` (default: 7)
14
- - The process scans .a5c/runs/ for completed/failed runs, aggregates insights, writes summaries, then removes old data
15
- - Always show the user what will be removed before removing (in interactive mode via breakpoints)
16
- - In non-interactive mode (yolo), proceed with cleanup using defaults
17
- - The insights file goes to docs/run-history-insights.md
18
- - Only remove terminal runs (completed/failed) older than the keep-days threshold
19
- - Never remove active/in-progress runs
20
- - Remove orphaned process files not referenced by remaining runs
21
- - After cleanup, show remaining run count and disk usage
14
+
15
+ CRITICAL: The cleanup MUST follow this exact phase order. Do NOT delete any run before Phase 2 completes.
16
+
17
+ Phase 1 Scan:
18
+ - Scan .a5c/runs/ for all runs
19
+ - Classify each as terminal (completed/failed) or active (in-progress/created)
20
+ - Identify terminal runs older than the keep-days threshold as removal candidates
21
+ - Never mark active/in-progress runs for removal
22
+ - Count and report: total runs, terminal, active, removal candidates, disk usage
23
+
24
+ Phase 2 — Aggregate insights (BEFORE any deletion):
25
+ - For EVERY removal candidate, read its run.json and journal/ events
26
+ - Extract: processId, prompt, status, event count, created date, task summaries
27
+ - Group by process type and extract patterns (retry counts, convergence behavior, failure modes)
28
+ - Append a new dated section to docs/run-history-insights.md with:
29
+ - Summary statistics (runs removed, disk freed, runs retained)
30
+ - Run categories with counts and descriptions
31
+ - Key patterns observed (multi-batch convergence, retry behavior, etc.)
32
+ - What worked well / what didn't from the run data
33
+ - This file MUST be written and verified before proceeding to Phase 3
34
+
35
+ Phase 3 — Confirm removal:
36
+ - In interactive mode, show the user what will be removed via a breakpoint
37
+ - In non-interactive mode (yolo), proceed with defaults
38
+ - In dry-run mode, stop here and show what would be removed
39
+
40
+ Phase 4 — Remove:
41
+ - Delete the terminal runs older than keep-days threshold
42
+ - Identify and remove orphaned process files not referenced by remaining runs
43
+ - Show remaining run count and disk usage after cleanup
@@ -234,7 +234,8 @@ SECONDARY COMMANDS
234
234
  How it works: Runs npx @a5c-ai/babysitter-observer-dashboard@latest which watches
235
235
  the .a5c/runs/ directory (or a parent directory containing multiple projects) and
236
236
  serves a live dashboard. The process is blocking -- it runs until you stop it, and
237
- it prints the local URL to share with the user.
237
+ it prints the local URL to share with the user. Do not use `babysitter observe`
238
+ as a fallback; the core Babysitter CLI does not expose that subcommand.
238
239
 
239
240
  Example: /babysitter:observe
240
241
  (opens browser showing all runs with live-updating task
@@ -8,6 +8,11 @@ description: Launch the babysitter observer dashboard. Installs and runs the rea
8
8
  Run the babysitter observer dashboard:
9
9
 
10
10
  1. Determine the watch directory — this is usually the project's container directory (the parent of the project dir), or the current working directory if not specified.
11
- 2. Launch the dashboard: `npx -y @a5c-ai/babysitter-observer-dashboard@latest --watch-dir <dir>`
11
+ 2. Launch the standalone dashboard package: `npx -y @a5c-ai/babysitter-observer-dashboard@latest --watch-dir <dir>`.
12
12
  3. This is a blocking process — it will keep running until stopped.
13
13
  4. Report the URL printed by the dashboard to the user, then open it in the browser.
14
+
15
+ Do not fall back to `babysitter observe`; the core Babysitter CLI does not expose
16
+ that subcommand. Some harness runtimes may provide a separate
17
+ `agent-platform observe` surface, but this skill uses the verified standalone
18
+ dashboard package.
@@ -5,4 +5,8 @@ description: Orchestrate a babysitter run. use this command to start babysitting
5
5
 
6
6
  # yolo
7
7
 
8
- Invoke the babysitter:babysit skill (using the Skill tool) and follow its instructions (SKILL.md). but without any user interaction or breakpoints in the run.
8
+ Run the Babysitter orchestration instructions directly through the CLI, without any user interaction or breakpoints. In Claude Code, use Bash to run `babysitter instructions:babysit-skill --harness claude-code --no-interactive`; in Codex, run `babysitter instructions:babysit-skill --harness codex --no-interactive`; in other harnesses, use the same command with that harness id. Then follow the returned instructions in this same turn until completion proof is produced. Do not stop after reading the instructions, do not invoke the Skill tool first, and use the non-interactive/no-breakpoints path when the instructions offer a mode choice.
9
+
10
+ User arguments for this command:
11
+
12
+ $ARGUMENTS