@adaptic/maestro 1.1.7 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/.claude/commands/init-maestro.md +502 -260
  2. package/README.md +47 -2
  3. package/bin/maestro.mjs +1 -1
  4. package/docs/guides/agents-observe-setup.md +64 -0
  5. package/docs/guides/ccxray-diagnostics.md +65 -0
  6. package/docs/guides/claude-mem-setup.md +79 -0
  7. package/docs/guides/claude-pace-setup.md +56 -0
  8. package/docs/guides/claudraband-sessions.md +98 -0
  9. package/docs/guides/clawteam-swarm.md +116 -0
  10. package/docs/guides/code-review-graph-setup.md +86 -0
  11. package/docs/guides/email-setup.md +399 -0
  12. package/docs/guides/media-generation-setup.md +349 -0
  13. package/docs/guides/outbound-governance-setup.md +438 -0
  14. package/docs/guides/pdf-generation-setup.md +315 -0
  15. package/docs/guides/poller-daemon-setup.md +550 -0
  16. package/docs/guides/rag-context-setup.md +459 -0
  17. package/docs/guides/self-optimization-pattern.md +82 -0
  18. package/docs/guides/slack-setup.md +350 -0
  19. package/docs/guides/twilio-subaccounts-setup.md +223 -0
  20. package/docs/guides/voice-sms-setup.md +698 -0
  21. package/docs/guides/webhook-relay-setup.md +349 -0
  22. package/docs/guides/whatsapp-setup.md +282 -0
  23. package/docs/runbooks/mac-mini-bootstrap.md +21 -0
  24. package/package.json +2 -1
  25. package/plugins/maestro-skills/plugin.json +16 -0
  26. package/plugins/maestro-skills/skills/agents-observe.md +110 -0
  27. package/plugins/maestro-skills/skills/ccxray-diagnostics.md +91 -0
  28. package/plugins/maestro-skills/skills/claude-pace.md +61 -0
  29. package/plugins/maestro-skills/skills/code-review-graph.md +99 -0
  30. package/scaffold/CLAUDE.md +64 -0
  31. package/scaffold/config/agent.ts.example +2 -1
  32. package/scaffold/config/caller-id-map.yaml +46 -0
  33. package/scaffold/config/known-agents.json +35 -0
  34. package/scripts/daemon/classifier.mjs +264 -50
  35. package/scripts/daemon/dispatcher.mjs +109 -5
  36. package/scripts/daemon/launchd-wrapper-generic.sh +96 -0
  37. package/scripts/daemon/launchd-wrapper-slack-events.sh +37 -0
  38. package/scripts/daemon/launchd-wrapper.sh +91 -0
  39. package/scripts/daemon/lib/session-router.mjs +274 -0
  40. package/scripts/daemon/lib/session-router.test.mjs +295 -0
  41. package/scripts/daemon/prompt-builder.mjs +51 -11
  42. package/scripts/daemon/responder.mjs +234 -19
  43. package/scripts/daemon/session-lock.mjs +194 -0
  44. package/scripts/daemon/sophie-daemon.mjs +16 -2
  45. package/scripts/email-signature.html +20 -4
  46. package/scripts/local-triggers/generate-plists.sh +62 -10
  47. package/scripts/media-generation/README.md +2 -0
  48. package/scripts/pdf-generation/README.md +2 -0
  49. package/scripts/poller/imap-client.mjs +4 -2
  50. package/scripts/poller/slack-poller.mjs +126 -59
  51. package/scripts/poller/trigger.mjs +12 -1
  52. package/scripts/setup/init-agent.sh +91 -1
  53. package/scripts/setup/install-dev-tools.sh +150 -0
  54. package/scripts/spawn-session.sh +21 -6
  55. package/workflows/continuous/backlog-executor.yaml +141 -0
  56. package/workflows/daily/evening-wrap.yaml +41 -1
  57. package/workflows/daily/morning-brief.yaml +17 -0
  58. package/workflows/event-driven/agent-failure-investigation.yaml +137 -0
  59. package/workflows/event-driven/pr-review.yaml +104 -0
  60. package/workflows/weekly/engineering-health.yaml +154 -0
@@ -50,6 +50,22 @@
50
50
  {
51
51
  "name": "schedule-meeting",
52
52
  "description": "Schedule a meeting using Google Calendar MCP or Calendly. Handle timezone conversion, find availability, send invites."
53
+ },
54
+ {
55
+ "name": "claude-pace",
56
+ "description": "Check Claude API rate limit quota — 5-hour/7-day usage, reset countdowns, pace delta. Use before heavy backlog cycles or when quota warnings appear."
57
+ },
58
+ {
59
+ "name": "ccxray-diagnostics",
60
+ "description": "Run token/cost X-ray diagnostics on Claude API sessions — per-turn breakdown, context window heatmaps, system prompt diffs. Use for debugging expensive sessions."
61
+ },
62
+ {
63
+ "name": "code-review-graph",
64
+ "description": "Structural code analysis via tree-sitter knowledge graph — blast radius, review context, architecture overview, semantic search. Use for PR reviews and refactoring."
65
+ },
66
+ {
67
+ "name": "agents-observe",
68
+ "description": "Multi-agent observability dashboard — monitor parallel agent teams, tool calls, session state, and performance in real time. Use when debugging agent failures."
53
69
  }
54
70
  ]
55
71
  }
@@ -0,0 +1,110 @@
1
+ ---
2
+ name: agents-observe
3
+ description: Multi-agent observability dashboard — monitor parallel agent teams, tool calls, session state, and performance in real time. Use when debugging agent failures or profiling backlog execution.
4
+ ---
5
+
6
+ # Multi-Agent Observability (agents-observe)
7
+
8
+ You are a Maestro agent. Use agents-observe to monitor, debug, and profile Claude Code agent teams in real time.
9
+
10
+ ## When to Invoke
11
+
12
+ - **Monitoring parallel execution** — watching multiple backlog executor agents running simultaneously
13
+ - **Debugging agent failures** — an agent crashed, hung, or produced unexpected results
14
+ - **Performance profiling** — identifying which agents or tool calls are slow
15
+ - **Post-mortem analysis** — reviewing what happened in a completed session
16
+ - **Orchestrator oversight** — the orchestrator agent needs visibility into sub-agent activity
17
+
18
+ ## Background
19
+
20
+ agents-observe captures tool calls, agent hierarchy, and session state via background hooks. Data is stored in SQLite and streamed to a WebSocket-powered dashboard with 3-5ms latency. It is NOT always-on due to SQLite write overhead — enable on demand for specific debugging sessions.
21
+
22
+ ## Steps
23
+
24
+ ### Starting Observation
25
+
26
+ 1. **Launch the dashboard**:
27
+ ```bash
28
+ agents-observe serve
29
+ ```
30
+ Opens the dashboard at `http://localhost:3847`. Active Claude Code sessions appear automatically.
31
+
32
+ 2. **Monitor live sessions** — the dashboard shows:
33
+ - Agent hierarchy (parent orchestrator and child sub-agents)
34
+ - Real-time tool call stream with timing
35
+ - Session state (active, idle, completed, errored)
36
+ - Per-agent resource usage
37
+
38
+ ### Querying Historical Data
39
+
40
+ 3. **Query by session** — inspect a specific session's full history:
41
+ ```bash
42
+ agents-observe query --session <session-id>
43
+ ```
44
+
45
+ 4. **Query by tool** — find all uses of a specific tool in a time window:
46
+ ```bash
47
+ agents-observe query --tool Write --last 1h
48
+ ```
49
+
50
+ 5. **Query by agent** — filter to a specific agent's activity:
51
+ ```bash
52
+ agents-observe query --agent backlog-executor --last 2h
53
+ ```
54
+
55
+ ### Analysis Workflows
56
+
57
+ 6. **Debugging a failed agent**:
58
+ - Find the session ID from workflow logs or the dashboard
59
+ - Query the session to see the full tool call sequence
60
+ - Identify the last successful tool call before failure
61
+ - Check for error patterns: timeout, rate limit, permission denied, context overflow
62
+
63
+ 7. **Performance profiling**:
64
+ - Compare tool call durations across agents
65
+ - Identify bottleneck tools (slow reads, large writes)
66
+ - Check for redundant tool calls (same file read multiple times)
67
+ - Measure time-to-first-action for each sub-agent
68
+
69
+ 8. **Parallel execution monitoring**:
70
+ - Verify all expected sub-agents launched successfully
71
+ - Watch for agents blocking on shared resources (file locks, API limits)
72
+ - Detect agents that completed quickly vs those that are stuck
73
+ - Identify ordering issues in agent dependencies
74
+
75
+ ## Output
76
+
77
+ Report findings appropriate to the workflow:
78
+
79
+ **For debugging:**
80
+ ```
81
+ ## Agent Failure Analysis: [session-id]
82
+ - Agent: [name]
83
+ - Failed at: [timestamp]
84
+ - Last successful tool: [tool name] at [timestamp]
85
+ - Error: [error message or pattern]
86
+ - Root cause: [analysis]
87
+ - Recommendation: [fix]
88
+ ```
89
+
90
+ **For performance profiling:**
91
+ ```
92
+ ## Agent Performance Profile
93
+ | Agent | Duration | Tool Calls | Slowest Tool | Outcome |
94
+ |---|---|---|---|---|
95
+ | [name] | [Xs] | [N] | [tool, Xms] | [success/fail] |
96
+
97
+ ### Bottlenecks
98
+ - [Specific finding, e.g., "Agent X read config.yaml 7 times — cache recommended"]
99
+
100
+ ### Recommendations
101
+ - [Actionable optimization]
102
+ ```
103
+
104
+ ## Integration Notes
105
+
106
+ - Do NOT leave agents-observe running permanently — SQLite writes add overhead to every tool call
107
+ - Enable for specific debugging sessions, then shut down
108
+ - Dashboard port is 3847 — ensure no conflicts with other local services
109
+ - Pairs well with ccxray for combined tool-call and token-level analysis
110
+ - Session data persists in SQLite after the dashboard is closed — queryable later
@@ -0,0 +1,91 @@
1
+ ---
2
+ name: ccxray-diagnostics
3
+ description: Run token/cost X-ray diagnostics on Claude API sessions — per-turn breakdown, context window heatmaps, system prompt diffs. Use for debugging expensive sessions or comparing sub-agent efficiency.
4
+ ---
5
+
6
+ # Token/Cost X-Ray Diagnostics (ccxray)
7
+
8
+ You are a Maestro agent. Use ccxray to diagnose token usage, cost, and context window utilization across agent sessions.
9
+
10
+ ## When to Invoke
11
+
12
+ - **Debugging expensive sessions** — a backlog cycle consumed unexpectedly high tokens
13
+ - **Sub-agent efficiency comparison** — comparing token cost across parallel agents doing similar work
14
+ - **Context window analysis** — investigating whether agents are hitting context limits or carrying stale context
15
+ - **System prompt auditing** — diffing system prompts across agents to find bloat
16
+ - **Post-incident analysis** — understanding what went wrong in a failed or degraded session
17
+
18
+ ## Background
19
+
20
+ ccxray is a transparent HTTP proxy that intercepts all Claude API calls and provides a live dashboard with per-turn token/cost breakdowns. It is an on-demand diagnostic tool, not always-on infrastructure.
21
+
22
+ - Proxy + dashboard: `npx ccxray claude`
23
+ - Logs stored at: `~/.ccxray/logs/`
24
+ - Dashboard shows live data during the session
25
+
26
+ ## Steps
27
+
28
+ 1. **Launch a diagnostic session** — run the target agent through ccxray:
29
+ ```bash
30
+ npx ccxray claude
31
+ ```
32
+ This wraps the Claude Code session in the ccxray proxy. All API calls are intercepted and logged.
33
+
34
+ 2. **Open the dashboard** — ccxray serves a local dashboard showing:
35
+ - Per-turn input/output token counts
36
+ - Cumulative cost tracking
37
+ - Context window heatmap (what is filling the window)
38
+ - System prompt content and diffs between turns
39
+
40
+ 3. **Analyze token distribution** — identify where tokens are being spent:
41
+ - Large system prompts eating context budget
42
+ - Repeated tool outputs inflating input tokens
43
+ - Verbose agent responses consuming output tokens
44
+ - Context window near capacity causing cache misses
45
+
46
+ 4. **Compare sub-agents** — if multiple agents ran in parallel, compare their ccxray logs:
47
+ - Which agent consumed the most tokens per unit of work?
48
+ - Are any agents carrying unnecessary context?
49
+ - Are system prompts appropriately sized for each agent's role?
50
+
51
+ 5. **Review historical logs** — check `~/.ccxray/logs/` for past session data:
52
+ ```bash
53
+ ls -lt ~/.ccxray/logs/
54
+ ```
55
+
56
+ 6. **Produce findings** — summarize token/cost insights and recommend optimizations.
57
+
58
+ ## Output
59
+
60
+ Report the following:
61
+
62
+ ```
63
+ ## Token/Cost X-Ray Report
64
+
65
+ ### Session Summary
66
+ - Total input tokens: [N]
67
+ - Total output tokens: [N]
68
+ - Estimated cost: $[X.XX]
69
+ - Turns: [N]
70
+ - Context window peak: [N]% utilization
71
+
72
+ ### Top Token Consumers
73
+ 1. [Turn/phase] — [N] tokens ([reason])
74
+ 2. [Turn/phase] — [N] tokens ([reason])
75
+
76
+ ### Optimization Recommendations
77
+ - [Specific recommendation, e.g., "Reduce system prompt by removing unused policy sections"]
78
+ - [Specific recommendation, e.g., "Cache tool outputs instead of re-reading files"]
79
+
80
+ ### Sub-Agent Comparison (if applicable)
81
+ | Agent | Input Tokens | Output Tokens | Cost | Efficiency |
82
+ |---|---|---|---|---|
83
+ | [name] | [N] | [N] | $[X] | [notes] |
84
+ ```
85
+
86
+ ## Integration Notes
87
+
88
+ - This is an on-demand tool — do NOT run ccxray on every session, only when diagnosing issues
89
+ - ccxray adds minimal overhead (~5ms per request) but writes logs to disk
90
+ - Use for periodic token budget audits per backlog cycle (e.g., weekly)
91
+ - Pair with claude-pace to correlate token usage with quota consumption
@@ -0,0 +1,61 @@
1
+ ---
2
+ name: claude-pace
3
+ description: Check Claude API rate limit quota — 5-hour/7-day usage, reset countdowns, pace delta. Use before heavy backlog cycles, when quota warnings appear, or to decide whether to defer non-urgent work.
4
+ ---
5
+
6
+ # Rate Limit Monitoring (claude-pace)
7
+
8
+ You are a Maestro agent. Use the claude-pace status line plugin to monitor API quota and make informed scheduling decisions.
9
+
10
+ ## When to Invoke
11
+
12
+ - **Before heavy backlog cycles** — check headroom before spawning parallel agents
13
+ - **When quota warnings appear** — "hit your limit" or rate-limit errors in logs
14
+ - **Capacity planning** — deciding whether to defer improvement-backlog or non-urgent items
15
+ - **Post-incident** — confirming quota has recovered after a rate-limit outage
16
+
17
+ ## Background
18
+
19
+ claude-pace is a Claude Code status line plugin (Bash + jq, ~10ms latency). It is auto-installed by `init-agent.sh` via `claude plugin marketplace add Astro-Han/claude-pace`. It displays real-time quota data in the Claude Code status bar.
20
+
21
+ The key signal is **pace delta**:
22
+ - **Green** — headroom available, safe to proceed with full parallelism
23
+ - **Yellow** — approaching limits, reduce parallel agent count
24
+ - **Red** — near or at limit, defer all non-urgent work immediately
25
+
26
+ ## Steps
27
+
28
+ 1. **Read the status line** — claude-pace displays 5-hour window usage, 7-day rolling usage, and time until next reset directly in the Claude Code status bar.
29
+
30
+ 2. **Evaluate pace delta** — determine current capacity:
31
+ - **Green (>40% remaining)**: Proceed normally. Full parallel agent spawning is safe.
32
+ - **Yellow (15-40% remaining)**: Reduce to 2 parallel agents max. Defer improvement-backlog items.
33
+ - **Red (<15% remaining)**: Critical items only. No parallel spawning. Defer all non-urgent work.
34
+
35
+ 3. **Check reset countdown** — if quota is constrained, note when the 5-hour window resets. Schedule deferred work for after reset.
36
+
37
+ 4. **Adjust execution plan** based on findings:
38
+ - Prioritize critical/high-priority queue items only when constrained
39
+ - Move improvement-backlog and low-priority items to next cycle
40
+ - Reduce `parallel_with` fan-out in workflow steps
41
+ - Log the capacity decision in the session audit
42
+
43
+ 5. **If rate-limit outage is suspected**, check workflow logs for the pattern: `"started"` entry without a matching `"completed"` entry, and inspect `.log` files for `"hit your limit"` strings.
44
+
45
+ ## Output
46
+
47
+ Report the following to the calling agent or session:
48
+
49
+ ```
50
+ ## Quota Status
51
+ - 5-hour window: [X]% used — resets in [T]
52
+ - 7-day rolling: [X]% used
53
+ - Pace delta: [green/yellow/red]
54
+ - Recommendation: [proceed normally / reduce parallelism / critical-only mode]
55
+ ```
56
+
57
+ ## Integration Notes
58
+
59
+ - Complements Maestro's reactive rate-limit detection (checking logs post-facto) with proactive visibility
60
+ - Does NOT require launching a separate process — data is in the status bar
61
+ - When pace delta is red, the backlog executor should automatically shrink its batch size from 3-5 items to 1-2
@@ -0,0 +1,99 @@
1
+ ---
2
+ name: code-review-graph
3
+ description: Structural code analysis via tree-sitter knowledge graph — blast radius, review context, architecture overview, semantic search. Use for PR reviews, refactoring, and engineering health checks.
4
+ ---
5
+
6
+ # Structural Code Analysis (code-review-graph)
7
+
8
+ You are a Maestro agent. Use code-review-graph to build and query a tree-sitter-based knowledge graph of any codebase for structural analysis.
9
+
10
+ ## When to Invoke
11
+
12
+ - **PR reviews** — assess blast radius of changes before approving
13
+ - **Refactoring** — understand call chains, import graphs, and dependencies before making changes
14
+ - **Engineering health checks** — generate architecture overviews for repos in the portfolio
15
+ - **Pre-change analysis** — determine what will break if a function/class is modified
16
+ - **Semantic code search** — find functions, patterns, or relationships across a codebase
17
+
18
+ ## Background
19
+
20
+ code-review-graph uses tree-sitter to parse source code and build a knowledge graph mapping functions, classes, imports, and call relationships. It provides both CLI tools and an MCP server with specialized analysis tools.
21
+
22
+ ## Steps
23
+
24
+ ### One-Time Setup
25
+
26
+ 1. **Index the target repository**:
27
+ ```bash
28
+ npx code-review-graph index /path/to/repo
29
+ ```
30
+ This parses all source files and builds the knowledge graph. Re-run after significant changes.
31
+
32
+ 2. **Configure as MCP server** (optional, for persistent access):
33
+ Add to `.claude/settings.json` under `mcpServers`:
34
+ ```json
35
+ {
36
+ "code-review-graph": {
37
+ "command": "npx",
38
+ "args": ["code-review-graph", "serve", "--port", "3848"]
39
+ }
40
+ }
41
+ ```
42
+
43
+ ### Analysis Workflows
44
+
45
+ 3. **Blast radius analysis** — before approving a PR or making a change:
46
+ - Query which functions/modules depend on the changed code
47
+ - Identify downstream consumers that may break
48
+ - Map the full call chain from changed function to entry points
49
+ - Tool: `blast_radius` (via MCP) or CLI equivalent
50
+
51
+ 4. **Review context** — when reviewing a PR:
52
+ - Pull structural context for changed files (what calls them, what they call)
53
+ - Identify if changes touch hot paths or critical modules
54
+ - Surface related functions that should have been changed but were not
55
+ - Tool: `review_context` (via MCP)
56
+
57
+ 5. **Architecture overview** — for engineering health checks:
58
+ - Generate module dependency graph
59
+ - Identify circular dependencies
60
+ - Map public API surface vs internal implementation
61
+ - Surface orphaned code (defined but never called)
62
+ - Tool: `architecture_overview` (via MCP)
63
+
64
+ 6. **Semantic search** — finding code by intent:
65
+ - Search for functions matching a description or pattern
66
+ - Find all implementations of a given interface
67
+ - Locate similar code patterns across the codebase
68
+ - Tool: `semantic_search` (via MCP)
69
+
70
+ ## Output
71
+
72
+ Report findings appropriate to the workflow:
73
+
74
+ **For blast radius:**
75
+ ```
76
+ ## Blast Radius: [function/file]
77
+ - Direct dependents: [N] functions in [N] files
78
+ - Transitive impact: [N] modules
79
+ - Risk level: [low/medium/high]
80
+ - Affected entry points: [list]
81
+ ```
82
+
83
+ **For architecture overview:**
84
+ ```
85
+ ## Architecture: [repo]
86
+ - Modules: [N]
87
+ - Key dependency chains: [list top 3]
88
+ - Circular dependencies: [list or "none"]
89
+ - Orphaned exports: [count]
90
+ - Hottest module (most dependents): [name]
91
+ ```
92
+
93
+ ## Integration Notes
94
+
95
+ - Recommended for coding-oriented agents (engineering, platform, product)
96
+ - Optional for operational agents (comms, hiring, strategy) — they rarely need code-level analysis
97
+ - Re-index after merging significant PRs to keep the graph current
98
+ - The MCP server runs on port 3848 by default — ensure no conflicts
99
+ - Graph build time scales with repo size; large monorepos may take 30-60 seconds
@@ -131,6 +131,70 @@ npm run gen:list # List available prompt specs
131
131
 
132
132
  - 5-layer defence defined in `policies/prompt-injection-defence.yaml`
133
133
 
134
+ ### Persistent Session Memory (Claude-Mem)
135
+
136
+ - **Claude-Mem** automatically captures tool usage, generates semantic summaries, and injects relevant context into future sessions
137
+ - Lifecycle hooks: SessionStart, UserPromptSubmit, PostToolUse, Stop, SessionEnd
138
+ - Storage: Local SQLite database with Chroma vector search at `~/.claude-mem/`
139
+ - Configuration: `~/.claude-mem/settings.json`
140
+ - Install/update: `npx claude-mem install`
141
+
142
+ ### Rate Limit Awareness (Claude-Pace)
143
+
144
+ - **Claude-Pace** displays real-time quota usage (5-hour + 7-day), reset countdowns, and burn rate in the status line
145
+ - Pace delta indicator: green = headroom, red = burning too fast
146
+ - Pure Bash + jq — ~10ms runtime, no network calls
147
+ - Install: `claude plugin marketplace add Astro-Han/claude-pace && claude plugin install claude-pace`
148
+ - See `docs/guides/claude-pace-setup.md`
149
+
150
+ ### Token Diagnostics (ccxray)
151
+
152
+ - **ccxray** provides X-ray vision into Claude Code sessions via HTTP proxy + live dashboard
153
+ - Per-turn token/cost breakdown, context window heatmaps, system prompt diffs
154
+ - On-demand diagnostic tool (not default runtime): `npx ccxray claude`
155
+ - See `docs/guides/ccxray-diagnostics.md`
156
+
157
+ ### Persistent Sessions (Claudraband)
158
+
159
+ - **Claudraband** adds persistent, resumable sessions and daemon mode with HTTP API to Claude Code
160
+ - Daemon mode: `npx @halfwhey/claudraband serve --port 7842`
161
+ - Prompt injection into running sessions: `npx @halfwhey/claudraband prompt --session <id> "keep going"`
162
+ - Enable as session backend: set `MAESTRO_SESSION_BACKEND=claudraband` in `.env`
163
+ - See `docs/guides/claudraband-sessions.md`
164
+
165
+ ### Swarm Orchestration (ClawTeam) [optional]
166
+
167
+ - **ClawTeam** orchestrates multi-agent coding swarms using git worktrees + tmux + filesystem messaging
168
+ - Leader/worker pattern with task dependencies and auto-unblocking
169
+ - Each worker gets its own worktree branch — no file conflicts between agents
170
+ - Enable: set `MAESTRO_ENABLE_SWARM=1` before running `init-agent.sh`
171
+ - See `docs/guides/clawteam-swarm.md`
172
+
173
+ ### Multi-Agent Observability (Agents Observe) [optional]
174
+
175
+ - **Agents Observe** provides a real-time dashboard for monitoring Claude Code agent teams
176
+ - WebSocket-streamed UI showing live tool calls, agent hierarchy trees, and session state
177
+ - 3-5ms latency via background hooks with SQLite storage
178
+ - Best for debugging parallel agent execution and performance profiling
179
+ - Install: `./scripts/setup/install-dev-tools.sh --tool agents-observe`
180
+ - See `docs/guides/agents-observe-setup.md`
181
+
182
+ ### Codebase Knowledge Graph (code-review-graph) [optional]
183
+
184
+ - **code-review-graph** builds a Tree-sitter-powered structural knowledge graph of codebases
185
+ - MCP tools: blast-radius analysis, review context, architecture overview, semantic search, refactor planning
186
+ - Auto-updates on file changes — persistent structural awareness
187
+ - Best for coding-oriented agents doing PR reviews, engineering health checks, and refactoring
188
+ - Install: `./scripts/setup/install-dev-tools.sh --tool code-review-graph`
189
+ - See `docs/guides/code-review-graph-setup.md`
190
+
191
+ ### Self-Optimization Pattern (AutoAgent-inspired) [optional]
192
+
193
+ - Standardized framework for agents to autonomously improve their own configuration via benchmark-driven iteration
194
+ - Define scoring rubrics → run scenarios → propose changes → compare scores → keep if better
195
+ - Experiment log: `self-optimization/experiments.jsonl`
196
+ - See `docs/guides/self-optimization-pattern.md`
197
+
134
198
  ### Audit & Hooks
135
199
 
136
200
  - Pre-send audit hook gates all outbound Slack/Gmail communications
@@ -57,7 +57,8 @@ export const agent = {
57
57
  /** Agent's phone number (E.164 format) — used for Twilio SMS/voice/WhatsApp */
58
58
  phone: '+16282656712',
59
59
 
60
- /** Agent's Slack member ID (e.g. U097N5R0M7U) — used for mentions and DM routing */
60
+ /** Agent's Slack member ID (e.g. U097N5R0M7U) — used for mentions, DM routing,
61
+ * and cross-agent message filtering (see config/known-agents.json) */
61
62
  slackMemberId: '',
62
63
 
63
64
  // ---------------------------------------------------------------------------
@@ -0,0 +1,46 @@
1
+ # Caller ID to user mapping for voice/SMS authorization
2
+ #
3
+ # Used by: sms-handler.mjs, parse-voice-transcript.mjs, user-context-search.py
4
+ # Maps phone numbers, Slack IDs, and emails to user identities with access levels.
5
+ #
6
+ # Access levels:
7
+ # ceo — Full access to all paths (memory, knowledge, state, outputs, docs, logs, all interactions)
8
+ # leadership — Company knowledge + docs + research/briefs + own interaction logs
9
+ # partner — Public knowledge + research + own interaction logs
10
+ # default — Unknown callers — public company knowledge only (knowledge/sources/)
11
+ #
12
+ # Hot-reloaded every 60 seconds by sms-handler.mjs — no restart needed after edits.
13
+ #
14
+ # Setup: See docs/guides/voice-sms-setup.md for full configuration instructions.
15
+
16
+ users:
17
+ # ── Principal (the person this agent reports to) ──────────────────────────
18
+ # Replace with your principal's details. This entry should have access_level: ceo.
19
+ principal:
20
+ name: "Principal Full Name"
21
+ phone: ["+1XXXXXXXXXX"]
22
+ whatsapp: ["+1XXXXXXXXXX"]
23
+ slack_id: "UXXXXXXXXXX"
24
+ email: "principal@company.com"
25
+ access_level: ceo
26
+
27
+ # ── Leadership / Team ─────────────────────────────────────────────────────
28
+ # Add team members who should be recognized by the SMS handler and voice parser.
29
+ # Each entry needs at minimum: name, slack_id, email, and access_level.
30
+ # Phone numbers are only needed if the person will contact the agent via SMS/voice.
31
+
32
+ # example-leader:
33
+ # name: "Example Leader"
34
+ # phone: []
35
+ # slack_id: "UXXXXXXXXXX"
36
+ # email: "leader@company.com"
37
+ # access_level: leadership
38
+
39
+ # ── Partners / External ───────────────────────────────────────────────────
40
+
41
+ # example-partner:
42
+ # name: "External Partner"
43
+ # phone: []
44
+ # slack_id: ""
45
+ # email: "partner@external.com"
46
+ # access_level: partner
@@ -0,0 +1,35 @@
1
+ {
2
+ "_comment": "Registry of all AI agents in the organisation. Used by the daemon classifier to prevent cross-agent message interception — when a message @-mentions a specific agent, only that agent's daemon should respond. Each agent repo should have its own copy of this file. Update when agents are added or removed.",
3
+ "agents": [
4
+ {
5
+ "name": "Sophie",
6
+ "slackId": "U099N1JFPRQ",
7
+ "role": "Chief of Staff",
8
+ "repo": "sophie-ai"
9
+ },
10
+ {
11
+ "name": "Ravi",
12
+ "slackId": "U099N1JE0LA",
13
+ "role": "CTO",
14
+ "repo": "ravi-ai"
15
+ },
16
+ {
17
+ "name": "Lucas",
18
+ "slackId": "U099N17LVCJ",
19
+ "role": "Head of Legal Ops",
20
+ "repo": "lucas-ai"
21
+ },
22
+ {
23
+ "name": "Jacob",
24
+ "slackId": "U099N19NXT4",
25
+ "role": "Head of Engineering",
26
+ "repo": "jacob-ai"
27
+ },
28
+ {
29
+ "name": "Hessa",
30
+ "slackId": "U0ALUES7ERY",
31
+ "role": "Head of AI Research",
32
+ "repo": "hessa-surface"
33
+ }
34
+ ]
35
+ }