@adaptic/maestro 1.1.8 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/init-maestro.md +304 -8
- package/README.md +28 -0
- package/bin/maestro.mjs +1 -1
- package/docs/guides/agents-observe-setup.md +64 -0
- package/docs/guides/ccxray-diagnostics.md +65 -0
- package/docs/guides/claude-mem-setup.md +79 -0
- package/docs/guides/claude-pace-setup.md +56 -0
- package/docs/guides/claudraband-sessions.md +98 -0
- package/docs/guides/clawteam-swarm.md +116 -0
- package/docs/guides/code-review-graph-setup.md +86 -0
- package/docs/guides/self-optimization-pattern.md +82 -0
- package/docs/guides/slack-setup.md +4 -2
- package/docs/guides/twilio-subaccounts-setup.md +223 -0
- package/docs/guides/webhook-relay-setup.md +349 -0
- package/package.json +2 -1
- package/plugins/maestro-skills/plugin.json +16 -0
- package/plugins/maestro-skills/skills/agents-observe.md +110 -0
- package/plugins/maestro-skills/skills/ccxray-diagnostics.md +91 -0
- package/plugins/maestro-skills/skills/claude-pace.md +61 -0
- package/plugins/maestro-skills/skills/code-review-graph.md +99 -0
- package/scaffold/CLAUDE.md +64 -0
- package/scaffold/config/agent.ts.example +2 -1
- package/scaffold/config/known-agents.json +35 -0
- package/scripts/daemon/classifier.mjs +264 -50
- package/scripts/daemon/dispatcher.mjs +109 -5
- package/scripts/daemon/launchd-wrapper-generic.sh +96 -0
- package/scripts/daemon/launchd-wrapper-slack-events.sh +37 -0
- package/scripts/daemon/launchd-wrapper.sh +91 -0
- package/scripts/daemon/lib/session-router.mjs +274 -0
- package/scripts/daemon/lib/session-router.test.mjs +295 -0
- package/scripts/daemon/prompt-builder.mjs +51 -11
- package/scripts/daemon/responder.mjs +234 -19
- package/scripts/daemon/session-lock.mjs +194 -0
- package/scripts/daemon/sophie-daemon.mjs +16 -2
- package/scripts/email-signature.html +20 -4
- package/scripts/local-triggers/generate-plists.sh +62 -10
- package/scripts/poller/imap-client.mjs +4 -2
- package/scripts/poller/slack-poller.mjs +104 -52
- package/scripts/setup/init-agent.sh +91 -1
- package/scripts/setup/install-dev-tools.sh +150 -0
- package/scripts/spawn-session.sh +21 -6
- package/workflows/continuous/backlog-executor.yaml +141 -0
- package/workflows/daily/evening-wrap.yaml +41 -1
- package/workflows/daily/morning-brief.yaml +17 -0
- package/workflows/event-driven/agent-failure-investigation.yaml +137 -0
- package/workflows/event-driven/pr-review.yaml +104 -0
- package/workflows/weekly/engineering-health.yaml +154 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: code-review-graph
|
|
3
|
+
description: Structural code analysis via tree-sitter knowledge graph — blast radius, review context, architecture overview, semantic search. Use for PR reviews, refactoring, and engineering health checks.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Structural Code Analysis (code-review-graph)
|
|
7
|
+
|
|
8
|
+
You are a Maestro agent. Use code-review-graph to build and query a tree-sitter-based knowledge graph of any codebase for structural analysis.
|
|
9
|
+
|
|
10
|
+
## When to Invoke
|
|
11
|
+
|
|
12
|
+
- **PR reviews** — assess blast radius of changes before approving
|
|
13
|
+
- **Refactoring** — understand call chains, import graphs, and dependencies before making changes
|
|
14
|
+
- **Engineering health checks** — generate architecture overviews for repos in the portfolio
|
|
15
|
+
- **Pre-change analysis** — determine what will break if a function/class is modified
|
|
16
|
+
- **Semantic code search** — find functions, patterns, or relationships across a codebase
|
|
17
|
+
|
|
18
|
+
## Background
|
|
19
|
+
|
|
20
|
+
code-review-graph uses tree-sitter to parse source code and build a knowledge graph mapping functions, classes, imports, and call relationships. It provides both CLI tools and an MCP server with specialized analysis tools.
|
|
21
|
+
|
|
22
|
+
## Steps
|
|
23
|
+
|
|
24
|
+
### One-Time Setup
|
|
25
|
+
|
|
26
|
+
1. **Index the target repository**:
|
|
27
|
+
```bash
|
|
28
|
+
npx code-review-graph index /path/to/repo
|
|
29
|
+
```
|
|
30
|
+
This parses all source files and builds the knowledge graph. Re-run after significant changes.
|
|
31
|
+
|
|
32
|
+
2. **Configure as MCP server** (optional, for persistent access):
|
|
33
|
+
Add to `.claude/settings.json` under `mcpServers`:
|
|
34
|
+
```json
|
|
35
|
+
{
|
|
36
|
+
"code-review-graph": {
|
|
37
|
+
"command": "npx",
|
|
38
|
+
"args": ["code-review-graph", "serve", "--port", "3848"]
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### Analysis Workflows
|
|
44
|
+
|
|
45
|
+
3. **Blast radius analysis** — before approving a PR or making a change:
|
|
46
|
+
- Query which functions/modules depend on the changed code
|
|
47
|
+
- Identify downstream consumers that may break
|
|
48
|
+
- Map the full call chain from changed function to entry points
|
|
49
|
+
- Tool: `blast_radius` (via MCP) or CLI equivalent
|
|
50
|
+
|
|
51
|
+
4. **Review context** — when reviewing a PR:
|
|
52
|
+
- Pull structural context for changed files (what calls them, what they call)
|
|
53
|
+
- Identify if changes touch hot paths or critical modules
|
|
54
|
+
- Surface related functions that should have been changed but were not
|
|
55
|
+
- Tool: `review_context` (via MCP)
|
|
56
|
+
|
|
57
|
+
5. **Architecture overview** — for engineering health checks:
|
|
58
|
+
- Generate module dependency graph
|
|
59
|
+
- Identify circular dependencies
|
|
60
|
+
- Map public API surface vs internal implementation
|
|
61
|
+
- Surface orphaned code (defined but never called)
|
|
62
|
+
- Tool: `architecture_overview` (via MCP)
|
|
63
|
+
|
|
64
|
+
6. **Semantic search** — finding code by intent:
|
|
65
|
+
- Search for functions matching a description or pattern
|
|
66
|
+
- Find all implementations of a given interface
|
|
67
|
+
- Locate similar code patterns across the codebase
|
|
68
|
+
- Tool: `semantic_search` (via MCP)
|
|
69
|
+
|
|
70
|
+
## Output
|
|
71
|
+
|
|
72
|
+
Report findings appropriate to the workflow:
|
|
73
|
+
|
|
74
|
+
**For blast radius:**
|
|
75
|
+
```
|
|
76
|
+
## Blast Radius: [function/file]
|
|
77
|
+
- Direct dependents: [N] functions in [N] files
|
|
78
|
+
- Transitive impact: [N] modules
|
|
79
|
+
- Risk level: [low/medium/high]
|
|
80
|
+
- Affected entry points: [list]
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
**For architecture overview:**
|
|
84
|
+
```
|
|
85
|
+
## Architecture: [repo]
|
|
86
|
+
- Modules: [N]
|
|
87
|
+
- Key dependency chains: [list top 3]
|
|
88
|
+
- Circular dependencies: [list or "none"]
|
|
89
|
+
- Orphaned exports: [count]
|
|
90
|
+
- Hottest module (most dependents): [name]
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Integration Notes
|
|
94
|
+
|
|
95
|
+
- Recommended for coding-oriented agents (engineering, platform, product)
|
|
96
|
+
- Optional for operational agents (comms, hiring, strategy) — they rarely need code-level analysis
|
|
97
|
+
- Re-index after merging significant PRs to keep the graph current
|
|
98
|
+
- The MCP server runs on port 3848 by default — ensure no conflicts
|
|
99
|
+
- Graph build time scales with repo size; large monorepos may take 30-60 seconds
|
package/scaffold/CLAUDE.md
CHANGED
|
@@ -131,6 +131,70 @@ npm run gen:list # List available prompt specs
|
|
|
131
131
|
|
|
132
132
|
- 5-layer defence defined in `policies/prompt-injection-defence.yaml`
|
|
133
133
|
|
|
134
|
+
### Persistent Session Memory (Claude-Mem)
|
|
135
|
+
|
|
136
|
+
- **Claude-Mem** automatically captures tool usage, generates semantic summaries, and injects relevant context into future sessions
|
|
137
|
+
- Lifecycle hooks: SessionStart, UserPromptSubmit, PostToolUse, Stop, SessionEnd
|
|
138
|
+
- Storage: Local SQLite database with Chroma vector search at `~/.claude-mem/`
|
|
139
|
+
- Configuration: `~/.claude-mem/settings.json`
|
|
140
|
+
- Install/update: `npx claude-mem install`
|
|
141
|
+
|
|
142
|
+
### Rate Limit Awareness (Claude-Pace)
|
|
143
|
+
|
|
144
|
+
- **Claude-Pace** displays real-time quota usage (5-hour + 7-day), reset countdowns, and burn rate in the status line
|
|
145
|
+
- Pace delta indicator: green = headroom, red = burning too fast
|
|
146
|
+
- Pure Bash + jq — ~10ms runtime, no network calls
|
|
147
|
+
- Install: `claude plugin marketplace add Astro-Han/claude-pace && claude plugin install claude-pace`
|
|
148
|
+
- See `docs/guides/claude-pace-setup.md`
|
|
149
|
+
|
|
150
|
+
### Token Diagnostics (ccxray)
|
|
151
|
+
|
|
152
|
+
- **ccxray** provides X-ray vision into Claude Code sessions via HTTP proxy + live dashboard
|
|
153
|
+
- Per-turn token/cost breakdown, context window heatmaps, system prompt diffs
|
|
154
|
+
- On-demand diagnostic tool (not default runtime): `npx ccxray claude`
|
|
155
|
+
- See `docs/guides/ccxray-diagnostics.md`
|
|
156
|
+
|
|
157
|
+
### Persistent Sessions (Claudraband)
|
|
158
|
+
|
|
159
|
+
- **Claudraband** adds persistent, resumable sessions and daemon mode with HTTP API to Claude Code
|
|
160
|
+
- Daemon mode: `npx @halfwhey/claudraband serve --port 7842`
|
|
161
|
+
- Prompt injection into running sessions: `npx @halfwhey/claudraband prompt --session <id> "keep going"`
|
|
162
|
+
- Enable as session backend: set `MAESTRO_SESSION_BACKEND=claudraband` in `.env`
|
|
163
|
+
- See `docs/guides/claudraband-sessions.md`
|
|
164
|
+
|
|
165
|
+
### Swarm Orchestration (ClawTeam) [optional]
|
|
166
|
+
|
|
167
|
+
- **ClawTeam** orchestrates multi-agent coding swarms using git worktrees + tmux + filesystem messaging
|
|
168
|
+
- Leader/worker pattern with task dependencies and auto-unblocking
|
|
169
|
+
- Each worker gets its own worktree branch — no file conflicts between agents
|
|
170
|
+
- Enable: set `MAESTRO_ENABLE_SWARM=1` before running `init-agent.sh`
|
|
171
|
+
- See `docs/guides/clawteam-swarm.md`
|
|
172
|
+
|
|
173
|
+
### Multi-Agent Observability (Agents Observe) [optional]
|
|
174
|
+
|
|
175
|
+
- **Agents Observe** provides a real-time dashboard for monitoring Claude Code agent teams
|
|
176
|
+
- WebSocket-streamed UI showing live tool calls, agent hierarchy trees, and session state
|
|
177
|
+
- 3-5ms latency via background hooks with SQLite storage
|
|
178
|
+
- Best for debugging parallel agent execution and performance profiling
|
|
179
|
+
- Install: `./scripts/setup/install-dev-tools.sh --tool agents-observe`
|
|
180
|
+
- See `docs/guides/agents-observe-setup.md`
|
|
181
|
+
|
|
182
|
+
### Codebase Knowledge Graph (code-review-graph) [optional]
|
|
183
|
+
|
|
184
|
+
- **code-review-graph** builds a Tree-sitter-powered structural knowledge graph of codebases
|
|
185
|
+
- MCP tools: blast-radius analysis, review context, architecture overview, semantic search, refactor planning
|
|
186
|
+
- Auto-updates on file changes — persistent structural awareness
|
|
187
|
+
- Best for coding-oriented agents doing PR reviews, engineering health checks, and refactoring
|
|
188
|
+
- Install: `./scripts/setup/install-dev-tools.sh --tool code-review-graph`
|
|
189
|
+
- See `docs/guides/code-review-graph-setup.md`
|
|
190
|
+
|
|
191
|
+
### Self-Optimization Pattern (AutoAgent-inspired) [optional]
|
|
192
|
+
|
|
193
|
+
- Standardized framework for agents to autonomously improve their own configuration via benchmark-driven iteration
|
|
194
|
+
- Define scoring rubrics → run scenarios → propose changes → compare scores → keep if better
|
|
195
|
+
- Experiment log: `self-optimization/experiments.jsonl`
|
|
196
|
+
- See `docs/guides/self-optimization-pattern.md`
|
|
197
|
+
|
|
134
198
|
### Audit & Hooks
|
|
135
199
|
|
|
136
200
|
- Pre-send audit hook gates all outbound Slack/Gmail communications
|
|
@@ -57,7 +57,8 @@ export const agent = {
|
|
|
57
57
|
/** Agent's phone number (E.164 format) — used for Twilio SMS/voice/WhatsApp */
|
|
58
58
|
phone: '+16282656712',
|
|
59
59
|
|
|
60
|
-
/** Agent's Slack member ID (e.g. U097N5R0M7U) — used for mentions
|
|
60
|
+
/** Agent's Slack member ID (e.g. U097N5R0M7U) — used for mentions, DM routing,
|
|
61
|
+
* and cross-agent message filtering (see config/known-agents.json) */
|
|
61
62
|
slackMemberId: '',
|
|
62
63
|
|
|
63
64
|
// ---------------------------------------------------------------------------
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_comment": "Registry of all AI agents in the organisation. Used by the daemon classifier to prevent cross-agent message interception — when a message @-mentions a specific agent, only that agent's daemon should respond. Each agent repo should have its own copy of this file. Update when agents are added or removed.",
|
|
3
|
+
"agents": [
|
|
4
|
+
{
|
|
5
|
+
"name": "Sophie",
|
|
6
|
+
"slackId": "U099N1JFPRQ",
|
|
7
|
+
"role": "Chief of Staff",
|
|
8
|
+
"repo": "sophie-ai"
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
"name": "Ravi",
|
|
12
|
+
"slackId": "U099N1JE0LA",
|
|
13
|
+
"role": "CTO",
|
|
14
|
+
"repo": "ravi-ai"
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"name": "Lucas",
|
|
18
|
+
"slackId": "U099N17LVCJ",
|
|
19
|
+
"role": "Head of Legal Ops",
|
|
20
|
+
"repo": "lucas-ai"
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
"name": "Jacob",
|
|
24
|
+
"slackId": "U099N19NXT4",
|
|
25
|
+
"role": "Head of Engineering",
|
|
26
|
+
"repo": "jacob-ai"
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"name": "Hessa",
|
|
30
|
+
"slackId": "U0ALUES7ERY",
|
|
31
|
+
"role": "Head of AI Research",
|
|
32
|
+
"repo": "hessa-surface"
|
|
33
|
+
}
|
|
34
|
+
]
|
|
35
|
+
}
|
|
@@ -5,28 +5,99 @@
|
|
|
5
5
|
* action type, and which Claude model should handle them.
|
|
6
6
|
*
|
|
7
7
|
* Classification chain:
|
|
8
|
-
* 1.
|
|
8
|
+
* 1. Claude Haiku via `claude --print` CLI (Max subscription, no API quota)
|
|
9
9
|
* 2. OpenAI gpt-4o-mini (fallback)
|
|
10
10
|
* 3. Rule-based heuristics (final fallback)
|
|
11
|
+
*
|
|
12
|
+
* Tier 1 was migrated off `@anthropic-ai/sdk` per CEO directive
|
|
13
|
+
* (Slack DM D099N1JGKRQ, 2026-04-27 09:38Z + 11:33Z): all agent
|
|
14
|
+
* daemon paths must funnel through Claude Code CLI sessions, not
|
|
15
|
+
* the Anthropic API. The Tier 2 OpenAI fallback is retained as a
|
|
16
|
+
* safety net for when the CLI is unavailable; Tier 3 rule-based
|
|
17
|
+
* remains the final fallback.
|
|
11
18
|
*/
|
|
12
19
|
|
|
13
|
-
import Anthropic from "@anthropic-ai/sdk";
|
|
14
20
|
import OpenAI from "openai";
|
|
15
21
|
import dotenv from "dotenv";
|
|
22
|
+
import { spawn } from "child_process";
|
|
16
23
|
import { fileURLToPath } from "url";
|
|
17
24
|
import { dirname, resolve } from "path";
|
|
25
|
+
import { readFileSync } from "fs";
|
|
18
26
|
|
|
19
27
|
// Load .env from project root
|
|
20
28
|
const __filename = fileURLToPath(import.meta.url);
|
|
21
29
|
const __dirname = dirname(__filename);
|
|
22
30
|
dotenv.config({ path: resolve(__dirname, "../../.env") });
|
|
23
31
|
|
|
32
|
+
// ── Agent identity + peer agents ──────────────────────────────────────────
|
|
33
|
+
// Loaded from config/known-agents.json and AGENT_DIR to determine which
|
|
34
|
+
// agent is "me" and which are peers. Prevents cross-agent message
|
|
35
|
+
// interception in shared channels (ib-20260418-cross-agent-routing).
|
|
36
|
+
|
|
37
|
+
const AGENT_DIR = process.env.AGENT_DIR || process.env.AGENT_ROOT || resolve(__dirname, "../..");
|
|
38
|
+
|
|
39
|
+
let _agentIdentity = null; // { name, slackId, role }
|
|
40
|
+
let _peerAgents = []; // [{ name, slackId, role }] — all agents EXCEPT me
|
|
41
|
+
|
|
42
|
+
function loadAgentRegistry() {
|
|
43
|
+
if (_agentIdentity) return; // already loaded
|
|
44
|
+
|
|
45
|
+
// 1. Load known-agents.json (try agent repo first, then maestro scaffold)
|
|
46
|
+
let agents = [];
|
|
47
|
+
for (const base of [AGENT_DIR, resolve(process.env.HOME || "", "maestro")]) {
|
|
48
|
+
try {
|
|
49
|
+
const raw = readFileSync(resolve(base, "config/known-agents.json"), "utf-8");
|
|
50
|
+
agents = JSON.parse(raw).agents || [];
|
|
51
|
+
if (agents.length > 0) break;
|
|
52
|
+
} catch { /* try next */ }
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// 2. Determine which agent is "me" — match by AGENT_DIR basename
|
|
56
|
+
const repoSlug = AGENT_DIR.split("/").pop(); // e.g. "sophie-ai"
|
|
57
|
+
const me = agents.find(a => a.repo === repoSlug);
|
|
58
|
+
|
|
59
|
+
if (me) {
|
|
60
|
+
_agentIdentity = me;
|
|
61
|
+
_peerAgents = agents.filter(a => a.slackId !== me.slackId);
|
|
62
|
+
} else {
|
|
63
|
+
// Fallback: try to extract from agent.ts or CLAUDE.md
|
|
64
|
+
try {
|
|
65
|
+
const agentTs = readFileSync(resolve(AGENT_DIR, "config/agent.ts"), "utf-8");
|
|
66
|
+
const nameMatch = agentTs.match(/firstName:\s*['"](\w+)['"]/);
|
|
67
|
+
const slackMatch = agentTs.match(/slackMemberId:\s*['"]([^'"]+)['"]/);
|
|
68
|
+
_agentIdentity = {
|
|
69
|
+
name: nameMatch ? nameMatch[1] : "Unknown",
|
|
70
|
+
slackId: slackMatch ? slackMatch[1] : "",
|
|
71
|
+
role: "agent",
|
|
72
|
+
};
|
|
73
|
+
_peerAgents = agents.filter(a => a.slackId !== (_agentIdentity.slackId || "NONE"));
|
|
74
|
+
} catch {
|
|
75
|
+
_agentIdentity = { name: "Sophie", slackId: "", role: "agent" };
|
|
76
|
+
_peerAgents = agents;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
console.log(`[classifier] Agent identity: ${_agentIdentity.name} (${_agentIdentity.slackId || "no slack ID"}), ${_peerAgents.length} peer agents`);
|
|
81
|
+
}
|
|
82
|
+
|
|
24
83
|
const ANTHROPIC_MODEL = "claude-haiku-4-5-20251001";
|
|
25
84
|
const OPENAI_MODEL = "gpt-4o-mini";
|
|
85
|
+
const CLAUDE_BIN = process.env.CLAUDE_BIN || "/Users/sophie/.local/bin/claude";
|
|
86
|
+
const CLAUDE_CLI_TIMEOUT_MS = 30_000;
|
|
26
87
|
|
|
27
88
|
// ── System prompt shared by both LLM classifiers ────────────────────────────
|
|
28
89
|
|
|
29
|
-
|
|
90
|
+
function buildSystemPrompt() {
|
|
91
|
+
loadAgentRegistry();
|
|
92
|
+
const agentName = _agentIdentity.name;
|
|
93
|
+
const agentRole = _agentIdentity.role || "agent";
|
|
94
|
+
|
|
95
|
+
// Build peer agents context for the LLM so it knows who the other agents are
|
|
96
|
+
const peerContext = _peerAgents.length > 0
|
|
97
|
+
? `\nKNOWN PEER AGENTS (other AI agents in the organisation — if a message @-mentions one of these by name or Slack ID, it is NOT directed at ${agentName}):\n${_peerAgents.map(a => `- ${a.name} (${a.role}, Slack: <@${a.slackId}>)`).join("\n")}\n`
|
|
98
|
+
: "";
|
|
99
|
+
|
|
100
|
+
return `You are a message classifier for ${agentName}, ${agentRole} (AI-operated) at Adaptic.ai. ${agentName} is the autonomous executive command layer for CEO Mehran Granfar.
|
|
30
101
|
|
|
31
102
|
Your job: classify each incoming message and return a JSON object with exactly these fields:
|
|
32
103
|
|
|
@@ -38,7 +109,7 @@ Your job: classify each incoming message and return a JSON object with exactly t
|
|
|
38
109
|
"category": "action_required" | "fyi" | "ignore",
|
|
39
110
|
"directed_at_sophie": true | false
|
|
40
111
|
}
|
|
41
|
-
|
|
112
|
+
${peerContext}
|
|
42
113
|
Classification rules:
|
|
43
114
|
|
|
44
115
|
PRIORITY:
|
|
@@ -64,21 +135,22 @@ CATEGORY:
|
|
|
64
135
|
- "fyi": Informational, no action needed
|
|
65
136
|
- "ignore": No value
|
|
66
137
|
|
|
67
|
-
DIRECTED_AT_SOPHIE (CRITICAL — determines whether
|
|
68
|
-
- true: The message is a DM to
|
|
69
|
-
- true: The message explicitly mentions
|
|
138
|
+
DIRECTED_AT_SOPHIE (CRITICAL — determines whether ${agentName} should respond. Default to false in channels/group chats.):
|
|
139
|
+
- true: The message is a DM to ${agentName} (1:1)
|
|
140
|
+
- true: The message explicitly mentions ${agentName} by name or @mention
|
|
70
141
|
- true: The message is from the CEO in a DM (1:1 conversation)
|
|
71
|
-
- true: The message explicitly asks
|
|
72
|
-
- true: In a thread where
|
|
73
|
-
-
|
|
142
|
+
- true: The message explicitly asks ${agentName} to do something they're responsible for AND addresses them specifically by name or @mention
|
|
143
|
+
- true: In a thread where ${agentName} previously replied, the message is a DIRECT reply to what ${agentName} said (e.g., answering their question, responding to their update) — not just any message in the same thread
|
|
144
|
+
- **FALSE — CROSS-AGENT RULE**: If the message explicitly @-mentions or names a DIFFERENT known AI agent (see KNOWN PEER AGENTS above), it is NOT directed at ${agentName} — even if the topic is relevant to ${agentName}'s role. The sender chose a specific agent; respect that routing.
|
|
145
|
+
- false: CEO messages in channels/group chats that don't mention ${agentName} — the CEO talks to many people, not just ${agentName}
|
|
74
146
|
- false: The message is general channel chatter between other team members
|
|
75
|
-
- false: The message is a conversation between two people that doesn't involve
|
|
76
|
-
- false: The message is a status update, FYI, or announcement not requiring
|
|
77
|
-
- false: The message is someone responding to another person (not
|
|
78
|
-
- false: Short reactions like "thanks", "nice", "got it", "+1", emoji-only messages — even in threads
|
|
147
|
+
- false: The message is a conversation between two people that doesn't involve ${agentName}, even if ${agentName} previously participated in the same thread
|
|
148
|
+
- false: The message is a status update, FYI, or announcement not requiring ${agentName}'s response
|
|
149
|
+
- false: The message is someone responding to another person (not ${agentName}) in a thread
|
|
150
|
+
- false: Short reactions like "thanks", "nice", "got it", "+1", emoji-only messages — even in threads ${agentName} is in
|
|
79
151
|
- false: Someone sharing a link, article, or resource with the channel generally
|
|
80
|
-
- false:
|
|
81
|
-
- CRITICAL DEFAULT: When in doubt in any multi-person channel, group chat, or thread, ALWAYS default to false.
|
|
152
|
+
- false: ${agentName} being in a thread does NOT mean every subsequent message is for them — only direct replies to their messages count
|
|
153
|
+
- CRITICAL DEFAULT: When in doubt in any multi-person channel, group chat, or thread, ALWAYS default to false. ${agentName} must NOT insert themselves into conversations they weren't invited to. It is far worse to respond unnecessarily than to miss a message — someone will @mention ${agentName} if they need them.
|
|
82
154
|
|
|
83
155
|
Context:
|
|
84
156
|
- sender_privilege "ceo" = Mehran Granfar (always critical, always opus)
|
|
@@ -106,8 +178,11 @@ Output: {"priority":"normal","action":"archive","model":"sonnet","summary":"Team
|
|
|
106
178
|
Input: External email: "Following up on our conversation about the Singapore entity"
|
|
107
179
|
Output: {"priority":"normal","action":"respond","model":"sonnet","summary":"External follow-up on Singapore entity discussion","category":"action_required","directed_at_sophie":true}
|
|
108
180
|
|
|
109
|
-
Input: Channel message from Jacob: "
|
|
110
|
-
Output: {"priority":"normal","action":"respond","model":"sonnet","summary":"Jacob asking
|
|
181
|
+
Input: Channel message from Jacob: "@${agentName} can you check the engine deployment?"
|
|
182
|
+
Output: {"priority":"normal","action":"respond","model":"sonnet","summary":"Jacob asking ${agentName} to check engine deployment","category":"action_required","directed_at_sophie":true}
|
|
183
|
+
|
|
184
|
+
Input: Channel message from CEO: "@AnotherAgent can you review the agentic libraries in this channel?"
|
|
185
|
+
Output: {"priority":"normal","action":"archive","model":"sonnet","summary":"CEO asking another agent to review agentic libraries — not directed at ${agentName}","category":"fyi","directed_at_sophie":false}
|
|
111
186
|
|
|
112
187
|
Input: Channel message from Hootan to Nima: "Nima, did you file the DIFC response yet?"
|
|
113
188
|
Output: {"priority":"normal","action":"archive","model":"sonnet","summary":"Hootan asking Nima about DIFC response filing","category":"fyi","directed_at_sophie":false}
|
|
@@ -115,11 +190,11 @@ Output: {"priority":"normal","action":"archive","model":"sonnet","summary":"Hoot
|
|
|
115
190
|
Input: Channel thread — Jacob: "pushed the fix" / Hootan: "nice, looks good"
|
|
116
191
|
Output: {"priority":"normal","action":"archive","model":"sonnet","summary":"Team exchange about code fix","category":"fyi","directed_at_sophie":false}
|
|
117
192
|
|
|
118
|
-
Input: Thread where
|
|
193
|
+
Input: Thread where ${agentName} previously replied — Jacob: "yeah that makes sense, I'll handle it" (responding to Hootan, not ${agentName})
|
|
119
194
|
Output: {"priority":"normal","action":"archive","model":"sonnet","summary":"Jacob acknowledging Hootan in thread","category":"fyi","directed_at_sophie":false}
|
|
120
195
|
|
|
121
|
-
Input: Thread where
|
|
122
|
-
Output: {"priority":"high","action":"respond","model":"sonnet","summary":"Hootan asking
|
|
196
|
+
Input: Thread where ${agentName} previously replied — Hootan: "${agentName}, can you send the updated doc?"
|
|
197
|
+
Output: {"priority":"high","action":"respond","model":"sonnet","summary":"Hootan asking ${agentName} for updated document","category":"action_required","directed_at_sophie":true}
|
|
123
198
|
|
|
124
199
|
Input: Channel message: "FYI — the Cayman entity docs are signed and filed"
|
|
125
200
|
Output: {"priority":"normal","action":"archive","model":"sonnet","summary":"FYI: Cayman entity docs signed and filed","category":"fyi","directed_at_sophie":false}
|
|
@@ -128,6 +203,7 @@ Input: Group chat — Nima: "Hootan, can we sync on the DFSA gaps tomorrow?"
|
|
|
128
203
|
Output: {"priority":"normal","action":"archive","model":"sonnet","summary":"Nima asking Hootan to sync on DFSA gaps","category":"fyi","directed_at_sophie":false}
|
|
129
204
|
|
|
130
205
|
Return ONLY the JSON object. No explanation, no markdown fencing, no extra text.`;
|
|
206
|
+
}
|
|
131
207
|
|
|
132
208
|
// ── Format the inbox item into a human-readable prompt ──────────────────────
|
|
133
209
|
|
|
@@ -158,7 +234,20 @@ function formatItemPrompt(item) {
|
|
|
158
234
|
function parseClassification(text) {
|
|
159
235
|
// Strip markdown code fences if present
|
|
160
236
|
const cleaned = text.replace(/^```(?:json)?\s*/m, "").replace(/\s*```$/m, "").trim();
|
|
161
|
-
|
|
237
|
+
let parsed;
|
|
238
|
+
try {
|
|
239
|
+
parsed = JSON.parse(cleaned);
|
|
240
|
+
} catch (_) {
|
|
241
|
+
// CLI output may include prose around the JSON. Try regex-extract the
|
|
242
|
+
// first top-level {...} block. (Greedy match up to the last closing
|
|
243
|
+
// brace works for our flat schema; nested objects would need a real
|
|
244
|
+
// parser, but our classification objects are flat.)
|
|
245
|
+
const match = cleaned.match(/\{[\s\S]*\}/);
|
|
246
|
+
if (!match) {
|
|
247
|
+
throw new Error(`classifier: no JSON object found in output: ${cleaned.slice(0, 200)}`);
|
|
248
|
+
}
|
|
249
|
+
parsed = JSON.parse(match[0]);
|
|
250
|
+
}
|
|
162
251
|
|
|
163
252
|
// Validate and coerce fields
|
|
164
253
|
const validPriorities = ["critical", "high", "normal", "ignore"];
|
|
@@ -176,31 +265,109 @@ function parseClassification(text) {
|
|
|
176
265
|
};
|
|
177
266
|
}
|
|
178
267
|
|
|
179
|
-
// ── Primary:
|
|
268
|
+
// ── Primary: Claude Haiku via `claude --print` CLI ──────────────────────────
|
|
269
|
+
//
|
|
270
|
+
// We invoke the `claude` binary (Max subscription, Claude Code CLI) rather
|
|
271
|
+
// than the Anthropic API SDK, per CEO directive (Slack DM D099N1JGKRQ,
|
|
272
|
+
// 2026-04-27 09:38Z + 11:33Z): the daemon must NOT consume Anthropic API
|
|
273
|
+
// quota; all model calls must funnel through `claude --print` sessions.
|
|
274
|
+
//
|
|
275
|
+
// Implementation notes:
|
|
276
|
+
// • child_process.spawn (not exec) — avoids shell-escape injection on
|
|
277
|
+
// potentially-hostile user message content.
|
|
278
|
+
// • System prompt rides on --append-system-prompt; the user prompt is
|
|
279
|
+
// written to stdin and the pipe is closed.
|
|
280
|
+
// • Output is parsed via parseClassification() which already handles
|
|
281
|
+
// ```json``` fences and bare JSON.
|
|
282
|
+
// • Any failure (non-zero exit, timeout, unparseable output) throws so
|
|
283
|
+
// the OpenAI Tier 2 fallback engages exactly as before.
|
|
284
|
+
|
|
285
|
+
async function runClaudeCLI(systemPrompt, userPrompt) {
|
|
286
|
+
return new Promise((resolvePromise, rejectPromise) => {
|
|
287
|
+
const args = [
|
|
288
|
+
"--print",
|
|
289
|
+
"--dangerously-skip-permissions",
|
|
290
|
+
"--model", ANTHROPIC_MODEL,
|
|
291
|
+
"--append-system-prompt", systemPrompt,
|
|
292
|
+
];
|
|
293
|
+
|
|
294
|
+
const proc = spawn(CLAUDE_BIN, args, {
|
|
295
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
296
|
+
// Force claude CLI onto keychain OAuth (Max subscription); strip any
|
|
297
|
+
// stale ANTHROPIC_API_KEY/AUTH_TOKEN inherited from the daemon env.
|
|
298
|
+
env: { ...process.env, ANTHROPIC_API_KEY: "", ANTHROPIC_AUTH_TOKEN: "" },
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
let stdout = "";
|
|
302
|
+
let stderr = "";
|
|
303
|
+
let settled = false;
|
|
304
|
+
|
|
305
|
+
const timer = setTimeout(() => {
|
|
306
|
+
if (settled) return;
|
|
307
|
+
settled = true;
|
|
308
|
+
try { proc.kill("SIGTERM"); } catch (_) { /* noop */ }
|
|
309
|
+
setTimeout(() => { try { if (!proc.killed) proc.kill("SIGKILL"); } catch (_) { /* noop */ } }, 2000);
|
|
310
|
+
rejectPromise(new Error(`claude CLI timed out after ${CLAUDE_CLI_TIMEOUT_MS}ms`));
|
|
311
|
+
}, CLAUDE_CLI_TIMEOUT_MS);
|
|
312
|
+
|
|
313
|
+
proc.stdout.on("data", (chunk) => { stdout += chunk.toString(); });
|
|
314
|
+
proc.stderr.on("data", (chunk) => { stderr += chunk.toString(); });
|
|
315
|
+
|
|
316
|
+
proc.on("error", (err) => {
|
|
317
|
+
if (settled) return;
|
|
318
|
+
settled = true;
|
|
319
|
+
clearTimeout(timer);
|
|
320
|
+
rejectPromise(new Error(`claude CLI spawn error: ${err.message}`));
|
|
321
|
+
});
|
|
322
|
+
|
|
323
|
+
proc.on("close", (code) => {
|
|
324
|
+
if (settled) return;
|
|
325
|
+
settled = true;
|
|
326
|
+
clearTimeout(timer);
|
|
327
|
+
if (code !== 0) {
|
|
328
|
+
const tail = (stderr || "").trim().slice(-500);
|
|
329
|
+
rejectPromise(new Error(`claude CLI exited ${code}: ${tail || "no stderr"}`));
|
|
330
|
+
return;
|
|
331
|
+
}
|
|
332
|
+
resolvePromise(stdout);
|
|
333
|
+
});
|
|
334
|
+
|
|
335
|
+
// Write the user prompt to stdin and close.
|
|
336
|
+
try {
|
|
337
|
+
proc.stdin.end(userPrompt, "utf8");
|
|
338
|
+
} catch (err) {
|
|
339
|
+
if (settled) return;
|
|
340
|
+
settled = true;
|
|
341
|
+
clearTimeout(timer);
|
|
342
|
+
rejectPromise(new Error(`claude CLI stdin write error: ${err.message}`));
|
|
343
|
+
}
|
|
344
|
+
});
|
|
345
|
+
}
|
|
180
346
|
|
|
181
347
|
async function classifyWithAnthropic(item) {
|
|
182
|
-
const
|
|
183
|
-
const
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
return parseClassification(
|
|
348
|
+
const systemPrompt = buildSystemPrompt();
|
|
349
|
+
const userPrompt =
|
|
350
|
+
`${formatItemPrompt(item)}\n\n` +
|
|
351
|
+
`Respond ONLY with the JSON object specified in the system prompt. ` +
|
|
352
|
+
`No markdown fences, no commentary, no preamble — JSON only.`;
|
|
353
|
+
|
|
354
|
+
const stdout = await runClaudeCLI(systemPrompt, userPrompt);
|
|
355
|
+
if (!stdout || !stdout.trim()) {
|
|
356
|
+
throw new Error("claude CLI returned empty stdout");
|
|
357
|
+
}
|
|
358
|
+
return parseClassification(stdout);
|
|
193
359
|
}
|
|
194
360
|
|
|
195
361
|
// ── Fallback 1: OpenAI gpt-4o-mini ─────────────────────────────────────────
|
|
196
362
|
|
|
197
363
|
async function classifyWithOpenAI(item) {
|
|
364
|
+
const systemPrompt = buildSystemPrompt();
|
|
198
365
|
const client = new OpenAI();
|
|
199
366
|
const response = await client.chat.completions.create({
|
|
200
367
|
model: OPENAI_MODEL,
|
|
201
368
|
max_tokens: 256,
|
|
202
369
|
messages: [
|
|
203
|
-
{ role: "system", content:
|
|
370
|
+
{ role: "system", content: systemPrompt },
|
|
204
371
|
{ role: "user", content: formatItemPrompt(item) },
|
|
205
372
|
],
|
|
206
373
|
});
|
|
@@ -211,42 +378,89 @@ async function classifyWithOpenAI(item) {
|
|
|
211
378
|
// ── Fallback 2: Rule-based classification ───────────────────────────────────
|
|
212
379
|
|
|
213
380
|
/**
|
|
214
|
-
*
|
|
381
|
+
* Check if the message explicitly @-mentions a DIFFERENT known AI agent.
|
|
382
|
+
* If it does AND does NOT also mention the current agent, the message
|
|
383
|
+
* should be skipped — the sender chose a specific agent.
|
|
384
|
+
*
|
|
385
|
+
* Added in ib-20260418-cross-agent-routing to fix the recurring issue
|
|
386
|
+
* where Lucas's agent intercepted messages tagged for Sophie in #dev-tooling.
|
|
387
|
+
*/
|
|
388
|
+
function mentionsOtherAgent(item) {
|
|
389
|
+
loadAgentRegistry();
|
|
390
|
+
if (_peerAgents.length === 0) return false;
|
|
391
|
+
|
|
392
|
+
const content = (item.content || "").toLowerCase();
|
|
393
|
+
const myName = (_agentIdentity.name || "").toLowerCase();
|
|
394
|
+
const mySlackId = (_agentIdentity.slackId || "").toLowerCase();
|
|
395
|
+
|
|
396
|
+
// Check if the message mentions the CURRENT agent (by name or Slack ID)
|
|
397
|
+
const mentionsMe = (myName && content.includes(myName))
|
|
398
|
+
|| (mySlackId && content.includes(mySlackId.toLowerCase()));
|
|
399
|
+
|
|
400
|
+
// Check if the message mentions any PEER agent
|
|
401
|
+
const mentionedPeer = _peerAgents.find(peer => {
|
|
402
|
+
const peerName = (peer.name || "").toLowerCase();
|
|
403
|
+
const peerSlackId = (peer.slackId || "").toLowerCase();
|
|
404
|
+
return (peerName && content.includes(peerName))
|
|
405
|
+
|| (peerSlackId && content.includes(peerSlackId.toLowerCase()));
|
|
406
|
+
});
|
|
407
|
+
|
|
408
|
+
// If a peer is mentioned but I am NOT mentioned, this is directed at them
|
|
409
|
+
if (mentionedPeer && !mentionsMe) {
|
|
410
|
+
return true;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
return false;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
/**
|
|
417
|
+
* Rule-based check for whether a message is directed at the current agent.
|
|
215
418
|
* Used by both the rule-based classifier fallback and as a pre-check
|
|
216
419
|
* that can override the LLM's assessment in clear-cut cases.
|
|
420
|
+
*
|
|
421
|
+
* Backward-compatible export name: isDirectedAtSophie (used by sophie-daemon.mjs).
|
|
217
422
|
*/
|
|
218
423
|
function isDirectedAtSophie(item) {
|
|
219
|
-
|
|
424
|
+
loadAgentRegistry();
|
|
425
|
+
|
|
426
|
+
// ── Cross-agent gate (ib-20260418-cross-agent-routing) ──────────────────
|
|
427
|
+
// If the message explicitly @-mentions a different known agent and does
|
|
428
|
+
// NOT mention the current agent, skip it — the sender chose someone else.
|
|
429
|
+
if (mentionsOtherAgent(item)) return false;
|
|
430
|
+
|
|
431
|
+
// DMs are always directed at the current agent
|
|
220
432
|
if (item.is_dm) return true;
|
|
221
433
|
if ((item.channel || "").startsWith("dm/")) return true;
|
|
222
434
|
|
|
223
|
-
// CEO DMs are always directed at
|
|
224
|
-
// the CEO may be talking to other people — don't auto-insert Sophie.
|
|
225
|
-
// CEO channel messages only count as directed if they mention Sophie.
|
|
435
|
+
// CEO DMs are always directed at the current agent
|
|
226
436
|
if (item.sender_privilege === "ceo" && item.is_dm) return true;
|
|
227
437
|
|
|
228
438
|
// Explicit @mention or name mention in the message content
|
|
229
439
|
const content = (item.content || "").toLowerCase();
|
|
230
|
-
|
|
440
|
+
const myName = (_agentIdentity.name || "sophie").toLowerCase();
|
|
441
|
+
const mySlackId = (_agentIdentity.slackId || "").toLowerCase();
|
|
442
|
+
|
|
443
|
+
if (content.includes(myName)) return true;
|
|
444
|
+
if (mySlackId && content.includes(mySlackId.toLowerCase())) return true;
|
|
231
445
|
|
|
232
|
-
// Gmail/calendar messages are directed at
|
|
446
|
+
// Gmail/calendar messages are directed at the current agent (they're in its inbox)
|
|
233
447
|
if (item.service === "gmail" || item.service === "calendar") return true;
|
|
234
448
|
|
|
235
|
-
//
|
|
449
|
+
// Agent being in a thread is NOT enough on its own — in group chats,
|
|
236
450
|
// other people may be talking to each other. Only treat it as directed
|
|
237
451
|
// if the message also contains a question, request keyword, or is a
|
|
238
|
-
// direct follow-up to
|
|
239
|
-
|
|
240
|
-
|
|
452
|
+
// direct follow-up to the agent's last message in the thread.
|
|
453
|
+
const agentNameRegex = new RegExp(`^${myName}:`, "im");
|
|
454
|
+
if (item.sophie_in_thread || (item.thread_context && agentNameRegex.test(item.thread_context))) {
|
|
455
|
+
// Check if the agent was the last speaker in the thread
|
|
241
456
|
if (item.thread_context) {
|
|
242
457
|
const lines = item.thread_context.trim().split("\n").filter(Boolean);
|
|
243
458
|
const lastLine = lines[lines.length - 1] || "";
|
|
244
|
-
if (
|
|
459
|
+
if (agentNameRegex.test(lastLine)) return true;
|
|
245
460
|
}
|
|
246
|
-
// Check for question marks or request-like language
|
|
461
|
+
// Check for question marks or request-like language
|
|
247
462
|
if (/\?/.test(content)) return true;
|
|
248
463
|
if (/(?:can you|could you|please|would you|do you|let me know|update|status|any update)/i.test(content)) return true;
|
|
249
|
-
// Otherwise, don't assume the message is for Sophie just because she's in the thread
|
|
250
464
|
}
|
|
251
465
|
|
|
252
466
|
return false;
|
|
@@ -323,7 +537,7 @@ function classifyWithRules(item) {
|
|
|
323
537
|
}
|
|
324
538
|
|
|
325
539
|
// Export for use in daemon's pre-classification gate
|
|
326
|
-
export { isDirectedAtSophie };
|
|
540
|
+
export { isDirectedAtSophie, mentionsOtherAgent, loadAgentRegistry };
|
|
327
541
|
|
|
328
542
|
// ── Main export ─────────────────────────────────────────────────────────────
|
|
329
543
|
|