nodebench-mcp 1.4.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/NODEBENCH_AGENTS.md +154 -2
- package/README.md +214 -215
- package/dist/__tests__/comparativeBench.test.d.ts +1 -0
- package/dist/__tests__/comparativeBench.test.js +722 -0
- package/dist/__tests__/comparativeBench.test.js.map +1 -0
- package/dist/__tests__/evalHarness.test.js +24 -2
- package/dist/__tests__/evalHarness.test.js.map +1 -1
- package/dist/__tests__/gaiaCapabilityEval.test.d.ts +14 -0
- package/dist/__tests__/gaiaCapabilityEval.test.js +420 -0
- package/dist/__tests__/gaiaCapabilityEval.test.js.map +1 -0
- package/dist/__tests__/gaiaCapabilityFilesEval.test.d.ts +15 -0
- package/dist/__tests__/gaiaCapabilityFilesEval.test.js +303 -0
- package/dist/__tests__/gaiaCapabilityFilesEval.test.js.map +1 -0
- package/dist/__tests__/openDatasetParallelEvalGaia.test.d.ts +7 -0
- package/dist/__tests__/openDatasetParallelEvalGaia.test.js +279 -0
- package/dist/__tests__/openDatasetParallelEvalGaia.test.js.map +1 -0
- package/dist/__tests__/openDatasetPerfComparison.test.d.ts +10 -0
- package/dist/__tests__/openDatasetPerfComparison.test.js +318 -0
- package/dist/__tests__/openDatasetPerfComparison.test.js.map +1 -0
- package/dist/__tests__/tools.test.js +155 -7
- package/dist/__tests__/tools.test.js.map +1 -1
- package/dist/__tests__/toolsetGatingEval.test.d.ts +1 -0
- package/dist/__tests__/toolsetGatingEval.test.js +1031 -0
- package/dist/__tests__/toolsetGatingEval.test.js.map +1 -0
- package/dist/db.js +56 -0
- package/dist/db.js.map +1 -1
- package/dist/index.js +462 -28
- package/dist/index.js.map +1 -1
- package/dist/tools/localFileTools.d.ts +15 -0
- package/dist/tools/localFileTools.js +386 -0
- package/dist/tools/localFileTools.js.map +1 -0
- package/dist/tools/metaTools.js +170 -3
- package/dist/tools/metaTools.js.map +1 -1
- package/dist/tools/parallelAgentTools.d.ts +18 -0
- package/dist/tools/parallelAgentTools.js +1272 -0
- package/dist/tools/parallelAgentTools.js.map +1 -0
- package/dist/tools/selfEvalTools.js +240 -10
- package/dist/tools/selfEvalTools.js.map +1 -1
- package/dist/tools/webTools.js +171 -37
- package/dist/tools/webTools.js.map +1 -1
- package/package.json +26 -8
|
@@ -0,0 +1,1272 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parallel Agent Coordination Tools
|
|
3
|
+
*
|
|
4
|
+
* Inspired by Anthropic's "Building a C Compiler with Parallel Claudes" (Feb 2026).
|
|
5
|
+
* Implements task locking, role specialization, context budget management,
|
|
6
|
+
* and oracle-based testing patterns for multi-agent development workflows.
|
|
7
|
+
*
|
|
8
|
+
* Key patterns from the blog post:
|
|
9
|
+
* - Task locking: Prevent two agents from solving the same problem simultaneously
|
|
10
|
+
* - Agent roles: Specialization (implementer, dedup, perf, docs, critic)
|
|
11
|
+
* - Context window management: Prevent pollution, track budget, pre-compute summaries
|
|
12
|
+
* - Oracle testing: Compare against known-good reference outputs
|
|
13
|
+
* - Progress tracking: Maintain running docs of status for fresh agent sessions
|
|
14
|
+
*
|
|
15
|
+
* Reference: https://www.anthropic.com/engineering/building-c-compiler
|
|
16
|
+
*/
|
|
17
|
+
import { getDb, genId } from "../db.js";
|
|
18
|
+
const PREDEFINED_ROLES = {
|
|
19
|
+
implementer: {
|
|
20
|
+
description: "Primary feature implementer. Picks failing tests and implements fixes.",
|
|
21
|
+
instructions: "Focus on making failing tests pass. Pick the next most obvious failing test, fix it, run tests, commit. Avoid refactoring unrelated code. Update progress notes after each commit.",
|
|
22
|
+
},
|
|
23
|
+
dedup_reviewer: {
|
|
24
|
+
description: "Code deduplication specialist. Finds and coalesces duplicate implementations.",
|
|
25
|
+
instructions: "Search for duplicated logic across the codebase. Coalesce into shared utilities. Do NOT change external behavior. Run all tests after each consolidation. Log each dedup as a learning.",
|
|
26
|
+
},
|
|
27
|
+
performance_optimizer: {
|
|
28
|
+
description: "Performance specialist. Profiles and optimizes hot paths.",
|
|
29
|
+
instructions: "Profile the system for bottlenecks. Optimize hot paths without changing correctness. Benchmark before and after. Use oracle comparisons to verify output hasn't changed. Document optimizations as learnings.",
|
|
30
|
+
},
|
|
31
|
+
documentation_maintainer: {
|
|
32
|
+
description: "Documentation specialist. Keeps READMEs, progress files, and docs in sync.",
|
|
33
|
+
instructions: "Review all documentation for accuracy against current code. Update READMEs, progress files, and inline docs. Ensure new agents can orient themselves quickly. Use decide_re_update before creating new files.",
|
|
34
|
+
},
|
|
35
|
+
code_quality_critic: {
|
|
36
|
+
description: "Code quality reviewer. Structural improvements and pattern enforcement.",
|
|
37
|
+
instructions: "Review code from the perspective of an expert developer. Identify structural issues, anti-patterns, and opportunities for improvement. Make changes that improve maintainability without breaking tests. Log patterns discovered as learnings.",
|
|
38
|
+
},
|
|
39
|
+
test_writer: {
|
|
40
|
+
description: "Test specialist. Writes and improves test coverage.",
|
|
41
|
+
instructions: "Identify untested code paths. Write targeted tests for edge cases and failure modes. Ensure tests are deterministic and fast. Use oracle comparisons for complex output validation. Log test patterns as learnings.",
|
|
42
|
+
},
|
|
43
|
+
security_auditor: {
|
|
44
|
+
description: "Security specialist. Finds and fixes vulnerabilities.",
|
|
45
|
+
instructions: "Audit code for security vulnerabilities: injection, auth bypass, data exposure, unsafe defaults. Log each finding as a CRITICAL or HIGH gap. Fix vulnerabilities and verify with targeted tests.",
|
|
46
|
+
},
|
|
47
|
+
};
|
|
48
|
+
// ============================================================================
|
|
49
|
+
// Portable AGENTS.md Generator
|
|
50
|
+
// ============================================================================
|
|
51
|
+
function generateParallelAgentsMdSection(techStack = "general", projectName = "this project", maxAgents = 4, includeNodebench = true) {
|
|
52
|
+
const isTs = techStack.toLowerCase().includes("typescript") || techStack.toLowerCase().includes("node") || techStack.toLowerCase().includes("js");
|
|
53
|
+
const isPython = techStack.toLowerCase().includes("python");
|
|
54
|
+
const buildCmd = isTs ? "npm run build" : isPython ? "python -m py_compile" : "make build";
|
|
55
|
+
const testCmd = isTs ? "npm test" : isPython ? "pytest" : "make test";
|
|
56
|
+
const lintCmd = isTs ? "npx tsc --noEmit" : isPython ? "ruff check ." : "make lint";
|
|
57
|
+
// Role recommendations based on agent count
|
|
58
|
+
const roleRecs = maxAgents >= 4
|
|
59
|
+
? `- Agent 1: **implementer** — Primary feature work
|
|
60
|
+
- Agent 2: **test_writer** — Test coverage and edge cases
|
|
61
|
+
- Agent 3: **code_quality_critic** — Refactoring and pattern enforcement
|
|
62
|
+
- Agent 4: **documentation_maintainer** — Docs, progress files, READMEs`
|
|
63
|
+
: maxAgents >= 2
|
|
64
|
+
? `- Agent 1: **implementer** — Feature work and bug fixes
|
|
65
|
+
- Agent 2: **test_writer** — Tests and quality review`
|
|
66
|
+
: `- Agent 1: **implementer** — All work (single agent mode)`;
|
|
67
|
+
let md = `## Parallel Agent Coordination Protocol
|
|
68
|
+
|
|
69
|
+
> Based on Anthropic's "Building a C Compiler with Parallel Claudes" (Feb 2026).
|
|
70
|
+
> Reference: https://www.anthropic.com/engineering/building-c-compiler
|
|
71
|
+
|
|
72
|
+
This section enables ${maxAgents} AI agents to work on ${projectName} in parallel without conflicts.
|
|
73
|
+
|
|
74
|
+
### Task Locking Protocol
|
|
75
|
+
|
|
76
|
+
**Before starting any work**, claim your task to prevent duplicate effort:
|
|
77
|
+
|
|
78
|
+
1. Check \`.parallel-agents/current_tasks/\` for active claims
|
|
79
|
+
2. Create a lock file: \`.parallel-agents/current_tasks/<task_key>.lock\`
|
|
80
|
+
- Content: \`{ "agent": "<session_id>", "started": "<ISO timestamp>", "description": "<what you plan to do>" }\`
|
|
81
|
+
3. Do your work
|
|
82
|
+
4. When done, delete the lock file and update \`.parallel-agents/progress.md\`
|
|
83
|
+
|
|
84
|
+
**If a lock file already exists for your intended task**: pick a different task. Do NOT delete another agent's lock.
|
|
85
|
+
|
|
86
|
+
**If a lock file is stale** (older than 2 hours with no progress update): the agent may have crashed. You may reclaim it — but add a note in progress.md.
|
|
87
|
+
|
|
88
|
+
### Role Specialization
|
|
89
|
+
|
|
90
|
+
Recommended role assignments for ${maxAgents} parallel agents:
|
|
91
|
+
|
|
92
|
+
${roleRecs}
|
|
93
|
+
|
|
94
|
+
Each agent should:
|
|
95
|
+
- Stay focused on their role's responsibilities
|
|
96
|
+
- Avoid making changes outside their scope
|
|
97
|
+
- Update progress.md after each significant commit
|
|
98
|
+
- Record learnings about patterns discovered in their domain
|
|
99
|
+
|
|
100
|
+
### Oracle Testing Workflow
|
|
101
|
+
|
|
102
|
+
Use known-good reference outputs to validate changes:
|
|
103
|
+
|
|
104
|
+
1. **Capture oracle**: Run the reference implementation and save output
|
|
105
|
+
\`\`\`
|
|
106
|
+
${isTs ? "node reference-impl.js > .parallel-agents/oracle/test_1.golden" : isPython ? "python reference_impl.py > .parallel-agents/oracle/test_1.golden" : "./reference-impl > .parallel-agents/oracle/test_1.golden"}
|
|
107
|
+
\`\`\`
|
|
108
|
+
2. **Compare**: After changes, run your implementation and diff against golden file
|
|
109
|
+
\`\`\`
|
|
110
|
+
${isTs ? "node your-impl.js > /tmp/actual.txt && diff .parallel-agents/oracle/test_1.golden /tmp/actual.txt" : isPython ? "python your_impl.py > /tmp/actual.txt && diff .parallel-agents/oracle/test_1.golden /tmp/actual.txt" : "./your-impl > /tmp/actual.txt && diff .parallel-agents/oracle/test_1.golden /tmp/actual.txt"}
|
|
111
|
+
\`\`\`
|
|
112
|
+
3. **Triage failures**: Each failing comparison is an independent work item — assign to a different agent
|
|
113
|
+
4. **Delta debugging**: If tests pass alone but fail together, split the set in half to isolate the conflict
|
|
114
|
+
|
|
115
|
+
### Context Budget Rules
|
|
116
|
+
|
|
117
|
+
LLM agents have finite context windows. Prevent pollution:
|
|
118
|
+
|
|
119
|
+
- **DO NOT** print thousands of lines of test output — log to file, print summary only
|
|
120
|
+
- **DO NOT** read entire large files — use targeted grep/search
|
|
121
|
+
- **DO** pre-compute aggregate stats before reporting
|
|
122
|
+
- **DO** use \`--fast\` mode (1-10% random sample) for large test suites during development
|
|
123
|
+
- **DO** log errors with ERROR prefix on same line for easy grep
|
|
124
|
+
- **Budget guideline**: If a single tool output exceeds ~5,000 tokens, summarize it first
|
|
125
|
+
|
|
126
|
+
### Progress File Protocol
|
|
127
|
+
|
|
128
|
+
File: \`.parallel-agents/progress.md\`
|
|
129
|
+
|
|
130
|
+
Every agent MUST read this file at session start and update it after significant work:
|
|
131
|
+
|
|
132
|
+
- **Current Status**: What's done, what's in progress
|
|
133
|
+
- **Active Agents**: Who is working on what (check lock files too)
|
|
134
|
+
- **Blocked Items**: What needs help from another agent or human
|
|
135
|
+
- **Failed Approaches**: What was tried and didn't work (prevents other agents from repeating mistakes)
|
|
136
|
+
- **Key Decisions**: Architectural choices made during parallel work
|
|
137
|
+
|
|
138
|
+
### Anti-Patterns to Avoid
|
|
139
|
+
|
|
140
|
+
- **Two agents on same task**: Always check lock files before starting
|
|
141
|
+
- **Context dumping**: Never paste >100 lines of raw output into context
|
|
142
|
+
- **Stuck loops**: If stuck >30 minutes on one problem, mark as blocked and move on
|
|
143
|
+
- **Silent overwrites**: Always pull/rebase before pushing — check for other agents' recent commits
|
|
144
|
+
- **No progress updates**: Fresh agents waste time re-orienting without progress.md updates
|
|
145
|
+
- **Scope creep**: Stay in your role — an implementer should not refactor unless assigned as critic
|
|
146
|
+
|
|
147
|
+
### Flywheel Verification (After Bootstrap)
|
|
148
|
+
|
|
149
|
+
Run this 6-step check to verify parallel agent setup works:
|
|
150
|
+
|
|
151
|
+
1. **Static Analysis**: \`${lintCmd}\` — zero errors
|
|
152
|
+
2. **Happy Path**: One agent claims task → does work → releases → progress.md updated
|
|
153
|
+
3. **Conflict Test**: Two agents claim same task → second gets conflict
|
|
154
|
+
4. **Oracle Test**: Create golden file → make change → diff catches it
|
|
155
|
+
5. **Gap Re-scan**: Re-run detection — all 7 categories should show as present
|
|
156
|
+
6. **Document**: Record any new learnings discovered during verification
|
|
157
|
+
`;
|
|
158
|
+
if (includeNodebench) {
|
|
159
|
+
md += `
|
|
160
|
+
### NodeBench MCP Setup (Optional but Recommended)
|
|
161
|
+
|
|
162
|
+
Install nodebench-mcp for full parallel agent tool support:
|
|
163
|
+
|
|
164
|
+
\`\`\`bash
|
|
165
|
+
# Claude Code CLI
|
|
166
|
+
claude mcp add nodebench -- npx -y nodebench-mcp
|
|
167
|
+
|
|
168
|
+
# Or manual config in .claude.json / settings.json
|
|
169
|
+
{
|
|
170
|
+
"mcpServers": {
|
|
171
|
+
"nodebench": {
|
|
172
|
+
"command": "npx",
|
|
173
|
+
"args": ["-y", "nodebench-mcp"]
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
\`\`\`
|
|
178
|
+
|
|
179
|
+
**Tool mapping** (file-based protocol → MCP tools):
|
|
180
|
+
|
|
181
|
+
| File-Based | NodeBench MCP Tool | Description |
|
|
182
|
+
|------------|-------------------|-------------|
|
|
183
|
+
| Lock file in \`current_tasks/\` | \`claim_agent_task\` | Claim a task lock |
|
|
184
|
+
| Delete lock file | \`release_agent_task\` | Release with progress note |
|
|
185
|
+
| Read \`current_tasks/\` | \`list_agent_tasks\` | See all claims |
|
|
186
|
+
| Manual role notes | \`assign_agent_role\` | 7 predefined roles |
|
|
187
|
+
| \`diff\` against golden file | \`run_oracle_comparison\` | Oracle testing with history |
|
|
188
|
+
| Read progress.md | \`get_parallel_status\` | Full orientation overview |
|
|
189
|
+
| Manual token counting | \`log_context_budget\` | Automated budget tracking |
|
|
190
|
+
| Run detection manually | \`bootstrap_parallel_agents\` | Auto-detect and scaffold |
|
|
191
|
+
|
|
192
|
+
**First-time setup with MCP**:
|
|
193
|
+
\`\`\`
|
|
194
|
+
> Use bootstrap_parallel_agents to scan this project and set up parallel agent infrastructure
|
|
195
|
+
> Use getMethodology("parallel_agent_teams") for the full 6-step workflow
|
|
196
|
+
\`\`\`
|
|
197
|
+
`;
|
|
198
|
+
}
|
|
199
|
+
return md;
|
|
200
|
+
}
|
|
201
|
+
export const parallelAgentTools = [
|
|
202
|
+
// ─── Task Locking ───────────────────────────────────────────
|
|
203
|
+
{
|
|
204
|
+
name: "claim_agent_task",
|
|
205
|
+
description: "Claim a task lock so other parallel agents know you're working on it. Prevents duplicate work when multiple agents run simultaneously. Based on Anthropic's parallel Claude task locking pattern. Returns conflict info if another agent already claimed this task.",
|
|
206
|
+
inputSchema: {
|
|
207
|
+
type: "object",
|
|
208
|
+
properties: {
|
|
209
|
+
taskKey: {
|
|
210
|
+
type: "string",
|
|
211
|
+
description: "Unique task identifier (e.g. 'fix_auth_middleware', 'implement_ssr_hydration'). Use snake_case, descriptive names.",
|
|
212
|
+
},
|
|
213
|
+
description: {
|
|
214
|
+
type: "string",
|
|
215
|
+
description: "What you plan to do for this task",
|
|
216
|
+
},
|
|
217
|
+
sessionId: {
|
|
218
|
+
type: "string",
|
|
219
|
+
description: "Your agent session ID. If omitted, uses the MCP connection session.",
|
|
220
|
+
},
|
|
221
|
+
},
|
|
222
|
+
required: ["taskKey"],
|
|
223
|
+
},
|
|
224
|
+
handler: async (args) => {
|
|
225
|
+
const db = getDb();
|
|
226
|
+
const taskKey = args.taskKey;
|
|
227
|
+
const sessionId = args.sessionId || `agent_${Date.now()}`;
|
|
228
|
+
const description = args.description || "";
|
|
229
|
+
// Check if task is already claimed by another active agent
|
|
230
|
+
const existing = db
|
|
231
|
+
.prepare("SELECT * FROM agent_tasks WHERE task_key = ? AND status = 'claimed'")
|
|
232
|
+
.get(taskKey);
|
|
233
|
+
if (existing && existing.session_id !== sessionId) {
|
|
234
|
+
return {
|
|
235
|
+
claimed: false,
|
|
236
|
+
conflict: true,
|
|
237
|
+
existingClaim: {
|
|
238
|
+
sessionId: existing.session_id,
|
|
239
|
+
claimedAt: existing.claimed_at,
|
|
240
|
+
description: existing.description,
|
|
241
|
+
progressNote: existing.progress_note,
|
|
242
|
+
},
|
|
243
|
+
suggestion: "Another agent is already working on this task. Pick a different task or wait for them to release it. Use list_agent_tasks to see all current claims.",
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
// Claim or re-claim the task
|
|
247
|
+
const id = genId("task");
|
|
248
|
+
db.prepare("INSERT OR REPLACE INTO agent_tasks (id, task_key, session_id, status, description, claimed_at) VALUES (?, ?, ?, 'claimed', ?, datetime('now'))").run(id, taskKey, sessionId, description);
|
|
249
|
+
// Count total active tasks for this session
|
|
250
|
+
const myTasks = db
|
|
251
|
+
.prepare("SELECT COUNT(*) as c FROM agent_tasks WHERE session_id = ? AND status = 'claimed'")
|
|
252
|
+
.get(sessionId);
|
|
253
|
+
return {
|
|
254
|
+
claimed: true,
|
|
255
|
+
taskId: id,
|
|
256
|
+
taskKey,
|
|
257
|
+
sessionId,
|
|
258
|
+
activeTasks: myTasks.c,
|
|
259
|
+
tip: "Update progress with release_agent_task when done. Other agents can see your claim via list_agent_tasks.",
|
|
260
|
+
};
|
|
261
|
+
},
|
|
262
|
+
},
|
|
263
|
+
{
|
|
264
|
+
name: "release_agent_task",
|
|
265
|
+
description: "Release a task lock after completing work. Updates status and optionally records a progress note for the next agent session. Part of the parallel agent coordination pattern.",
|
|
266
|
+
inputSchema: {
|
|
267
|
+
type: "object",
|
|
268
|
+
properties: {
|
|
269
|
+
taskKey: {
|
|
270
|
+
type: "string",
|
|
271
|
+
description: "The task key to release",
|
|
272
|
+
},
|
|
273
|
+
status: {
|
|
274
|
+
type: "string",
|
|
275
|
+
enum: ["completed", "blocked", "abandoned"],
|
|
276
|
+
description: "Final status: completed (done), blocked (needs help), abandoned (giving up)",
|
|
277
|
+
},
|
|
278
|
+
progressNote: {
|
|
279
|
+
type: "string",
|
|
280
|
+
description: "Note for the next agent picking up this task (e.g. failed approaches, remaining work)",
|
|
281
|
+
},
|
|
282
|
+
sessionId: {
|
|
283
|
+
type: "string",
|
|
284
|
+
description: "Your agent session ID (must match the claim)",
|
|
285
|
+
},
|
|
286
|
+
},
|
|
287
|
+
required: ["taskKey"],
|
|
288
|
+
},
|
|
289
|
+
handler: async (args) => {
|
|
290
|
+
const db = getDb();
|
|
291
|
+
const taskKey = args.taskKey;
|
|
292
|
+
const status = args.status || "completed";
|
|
293
|
+
const progressNote = args.progressNote || "";
|
|
294
|
+
const sessionId = args.sessionId;
|
|
295
|
+
// Find the active claim
|
|
296
|
+
let query = "UPDATE agent_tasks SET status = ?, progress_note = ?, released_at = datetime('now') WHERE task_key = ? AND status = 'claimed'";
|
|
297
|
+
const params = [status, progressNote, taskKey];
|
|
298
|
+
if (sessionId) {
|
|
299
|
+
query += " AND session_id = ?";
|
|
300
|
+
params.push(sessionId);
|
|
301
|
+
}
|
|
302
|
+
const result = db.prepare(query).run(...params);
|
|
303
|
+
if (result.changes === 0) {
|
|
304
|
+
return {
|
|
305
|
+
released: false,
|
|
306
|
+
error: "No active claim found for this task key",
|
|
307
|
+
};
|
|
308
|
+
}
|
|
309
|
+
return {
|
|
310
|
+
released: true,
|
|
311
|
+
taskKey,
|
|
312
|
+
status,
|
|
313
|
+
progressNote: progressNote || "(none)",
|
|
314
|
+
tip: status === "blocked"
|
|
315
|
+
? "Task marked as blocked. Another agent or human should review the progress note."
|
|
316
|
+
: status === "abandoned"
|
|
317
|
+
? "Task abandoned. Consider recording a learning about why this failed."
|
|
318
|
+
: "Task completed. Other agents can now pick related tasks.",
|
|
319
|
+
};
|
|
320
|
+
},
|
|
321
|
+
},
|
|
322
|
+
{
|
|
323
|
+
name: "list_agent_tasks",
|
|
324
|
+
description: "List all current task claims across parallel agents. Shows who is working on what, blocked tasks, and recently completed work. Essential for new agent sessions to orient themselves and avoid duplicate work.",
|
|
325
|
+
inputSchema: {
|
|
326
|
+
type: "object",
|
|
327
|
+
properties: {
|
|
328
|
+
status: {
|
|
329
|
+
type: "string",
|
|
330
|
+
enum: ["claimed", "completed", "blocked", "abandoned", "all"],
|
|
331
|
+
description: "Filter by status (default: 'all')",
|
|
332
|
+
},
|
|
333
|
+
limit: {
|
|
334
|
+
type: "number",
|
|
335
|
+
description: "Max results (default: 50)",
|
|
336
|
+
},
|
|
337
|
+
},
|
|
338
|
+
},
|
|
339
|
+
handler: async (args) => {
|
|
340
|
+
const db = getDb();
|
|
341
|
+
const status = args.status || "all";
|
|
342
|
+
const limit = args.limit || 50;
|
|
343
|
+
let query = "SELECT * FROM agent_tasks";
|
|
344
|
+
const params = [];
|
|
345
|
+
if (status !== "all") {
|
|
346
|
+
query += " WHERE status = ?";
|
|
347
|
+
params.push(status);
|
|
348
|
+
}
|
|
349
|
+
query += " ORDER BY claimed_at DESC LIMIT ?";
|
|
350
|
+
params.push(limit);
|
|
351
|
+
const tasks = db.prepare(query).all(...params);
|
|
352
|
+
// Summary stats
|
|
353
|
+
const stats = db
|
|
354
|
+
.prepare("SELECT status, COUNT(*) as count FROM agent_tasks GROUP BY status")
|
|
355
|
+
.all();
|
|
356
|
+
return {
|
|
357
|
+
tasks: tasks.map((t) => ({
|
|
358
|
+
taskKey: t.task_key,
|
|
359
|
+
sessionId: t.session_id,
|
|
360
|
+
status: t.status,
|
|
361
|
+
description: t.description,
|
|
362
|
+
progressNote: t.progress_note,
|
|
363
|
+
claimedAt: t.claimed_at,
|
|
364
|
+
releasedAt: t.released_at,
|
|
365
|
+
})),
|
|
366
|
+
stats: Object.fromEntries(stats.map((s) => [s.status, s.count])),
|
|
367
|
+
total: tasks.length,
|
|
368
|
+
tip: "Claimed tasks are actively being worked on. Pick unclaimed work or blocked tasks that need fresh eyes.",
|
|
369
|
+
};
|
|
370
|
+
},
|
|
371
|
+
},
|
|
372
|
+
// ─── Agent Role Specialization ──────────────────────────────
|
|
373
|
+
{
|
|
374
|
+
name: "assign_agent_role",
|
|
375
|
+
description: 'Assign a specialized role to the current agent session. Roles define focus area and behavioral instructions. Predefined roles: implementer, dedup_reviewer, performance_optimizer, documentation_maintainer, code_quality_critic, test_writer, security_auditor. Based on Anthropic\'s "multiple agent roles" pattern where specialized agents handle dedup, performance, documentation, and code quality.',
|
|
376
|
+
inputSchema: {
|
|
377
|
+
type: "object",
|
|
378
|
+
properties: {
|
|
379
|
+
role: {
|
|
380
|
+
type: "string",
|
|
381
|
+
description: "Role name. Use predefined: implementer, dedup_reviewer, performance_optimizer, documentation_maintainer, code_quality_critic, test_writer, security_auditor. Or define a custom role.",
|
|
382
|
+
},
|
|
383
|
+
sessionId: {
|
|
384
|
+
type: "string",
|
|
385
|
+
description: "Agent session ID to assign the role to",
|
|
386
|
+
},
|
|
387
|
+
customInstructions: {
|
|
388
|
+
type: "string",
|
|
389
|
+
description: "Custom instructions for the role (overrides predefined instructions if set)",
|
|
390
|
+
},
|
|
391
|
+
focusArea: {
|
|
392
|
+
type: "string",
|
|
393
|
+
description: "Specific area to focus on (e.g. 'auth module', 'API routes', 'frontend components')",
|
|
394
|
+
},
|
|
395
|
+
},
|
|
396
|
+
required: ["role"],
|
|
397
|
+
},
|
|
398
|
+
handler: async (args) => {
|
|
399
|
+
const db = getDb();
|
|
400
|
+
const role = args.role;
|
|
401
|
+
const sessionId = args.sessionId || `agent_${Date.now()}`;
|
|
402
|
+
const focusArea = args.focusArea || "";
|
|
403
|
+
const predefined = PREDEFINED_ROLES[role];
|
|
404
|
+
const instructions = args.customInstructions ||
|
|
405
|
+
(predefined ? predefined.instructions : `Custom role: ${role}`);
|
|
406
|
+
const id = genId("role");
|
|
407
|
+
db.prepare("INSERT OR REPLACE INTO agent_roles (id, session_id, role, instructions, focus_area, created_at) VALUES (?, ?, ?, ?, ?, datetime('now'))").run(id, sessionId, role, instructions, focusArea);
|
|
408
|
+
return {
|
|
409
|
+
assigned: true,
|
|
410
|
+
role,
|
|
411
|
+
sessionId,
|
|
412
|
+
description: predefined?.description || `Custom role: ${role}`,
|
|
413
|
+
instructions,
|
|
414
|
+
focusArea: focusArea || "(general)",
|
|
415
|
+
availableRoles: Object.keys(PREDEFINED_ROLES),
|
|
416
|
+
tip: "Your role shapes what tasks you should claim and how you approach work. Use claim_agent_task to pick tasks aligned with your role.",
|
|
417
|
+
};
|
|
418
|
+
},
|
|
419
|
+
},
|
|
420
|
+
{
|
|
421
|
+
name: "get_agent_role",
|
|
422
|
+
description: "Get the current agent's assigned role and instructions. Returns role-specific behavioral guidance. If no role is assigned, suggests one based on current project state.",
|
|
423
|
+
inputSchema: {
|
|
424
|
+
type: "object",
|
|
425
|
+
properties: {
|
|
426
|
+
sessionId: {
|
|
427
|
+
type: "string",
|
|
428
|
+
description: "Agent session ID to look up",
|
|
429
|
+
},
|
|
430
|
+
},
|
|
431
|
+
},
|
|
432
|
+
handler: async (args) => {
|
|
433
|
+
const db = getDb();
|
|
434
|
+
const sessionId = args.sessionId;
|
|
435
|
+
if (sessionId) {
|
|
436
|
+
const role = db
|
|
437
|
+
.prepare("SELECT * FROM agent_roles WHERE session_id = ?")
|
|
438
|
+
.get(sessionId);
|
|
439
|
+
if (role) {
|
|
440
|
+
return {
|
|
441
|
+
hasRole: true,
|
|
442
|
+
role: role.role,
|
|
443
|
+
instructions: role.instructions,
|
|
444
|
+
focusArea: role.focus_area,
|
|
445
|
+
assignedAt: role.created_at,
|
|
446
|
+
};
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
// No role assigned — list all active roles
|
|
450
|
+
const activeRoles = db
|
|
451
|
+
.prepare("SELECT * FROM agent_roles ORDER BY created_at DESC LIMIT 20")
|
|
452
|
+
.all();
|
|
453
|
+
return {
|
|
454
|
+
hasRole: false,
|
|
455
|
+
activeRoles: activeRoles.map((r) => ({
|
|
456
|
+
sessionId: r.session_id,
|
|
457
|
+
role: r.role,
|
|
458
|
+
focusArea: r.focus_area,
|
|
459
|
+
assignedAt: r.created_at,
|
|
460
|
+
})),
|
|
461
|
+
availableRoles: Object.entries(PREDEFINED_ROLES).map(([k, v]) => ({
|
|
462
|
+
role: k,
|
|
463
|
+
description: v.description,
|
|
464
|
+
})),
|
|
465
|
+
tip: "No role assigned for this session. Call assign_agent_role to specialize. This helps parallel agents coordinate by role.",
|
|
466
|
+
};
|
|
467
|
+
},
|
|
468
|
+
},
|
|
469
|
+
// ─── Context Window Budget Management ───────────────────────
|
|
470
|
+
{
|
|
471
|
+
name: "log_context_budget",
|
|
472
|
+
description: "Track context window usage to prevent pollution. LLM agents have finite context and, as Anthropic's blog notes, test harnesses should NOT print thousands of useless bytes. Use this to track token usage, flag when approaching limits, and recommend summarization. Implements the 'context window pollution prevention' pattern.",
|
|
473
|
+
inputSchema: {
|
|
474
|
+
type: "object",
|
|
475
|
+
properties: {
|
|
476
|
+
sessionId: {
|
|
477
|
+
type: "string",
|
|
478
|
+
description: "Agent session ID",
|
|
479
|
+
},
|
|
480
|
+
eventType: {
|
|
481
|
+
type: "string",
|
|
482
|
+
enum: [
|
|
483
|
+
"tool_output",
|
|
484
|
+
"file_read",
|
|
485
|
+
"test_output",
|
|
486
|
+
"log_output",
|
|
487
|
+
"search_result",
|
|
488
|
+
"checkpoint",
|
|
489
|
+
],
|
|
490
|
+
description: "What kind of content consumed context",
|
|
491
|
+
},
|
|
492
|
+
tokensUsed: {
|
|
493
|
+
type: "number",
|
|
494
|
+
description: "Approximate tokens consumed by this event (estimate: chars / 4)",
|
|
495
|
+
},
|
|
496
|
+
tokensLimit: {
|
|
497
|
+
type: "number",
|
|
498
|
+
description: "Total context window limit (default: 200000 for Claude)",
|
|
499
|
+
},
|
|
500
|
+
description: {
|
|
501
|
+
type: "string",
|
|
502
|
+
description: "What generated this context usage",
|
|
503
|
+
},
|
|
504
|
+
},
|
|
505
|
+
required: ["eventType", "tokensUsed"],
|
|
506
|
+
},
|
|
507
|
+
handler: async (args) => {
|
|
508
|
+
const db = getDb();
|
|
509
|
+
const sessionId = args.sessionId || `agent_${Date.now()}`;
|
|
510
|
+
const eventType = args.eventType;
|
|
511
|
+
const tokensUsed = args.tokensUsed;
|
|
512
|
+
const tokensLimit = args.tokensLimit || 200000;
|
|
513
|
+
const description = args.description || "";
|
|
514
|
+
const id = genId("ctx");
|
|
515
|
+
db.prepare("INSERT INTO context_budget_log (id, session_id, event_type, tokens_used, tokens_limit, description, created_at) VALUES (?, ?, ?, ?, ?, ?, datetime('now'))").run(id, sessionId, eventType, tokensUsed, tokensLimit, description);
|
|
516
|
+
// Calculate total usage for this session
|
|
517
|
+
const total = db
|
|
518
|
+
.prepare("SELECT SUM(tokens_used) as total FROM context_budget_log WHERE session_id = ?")
|
|
519
|
+
.get(sessionId);
|
|
520
|
+
const totalUsed = total?.total || 0;
|
|
521
|
+
const percentUsed = Math.round((totalUsed / tokensLimit) * 100);
|
|
522
|
+
// Breakdown by event type
|
|
523
|
+
const breakdown = db
|
|
524
|
+
.prepare("SELECT event_type, SUM(tokens_used) as total, COUNT(*) as count FROM context_budget_log WHERE session_id = ? GROUP BY event_type ORDER BY total DESC")
|
|
525
|
+
.all(sessionId);
|
|
526
|
+
const warnings = [];
|
|
527
|
+
if (percentUsed > 80) {
|
|
528
|
+
warnings.push("CRITICAL: Over 80% context budget used. Summarize findings and start a fresh session.");
|
|
529
|
+
}
|
|
530
|
+
else if (percentUsed > 60) {
|
|
531
|
+
warnings.push("WARNING: Over 60% context budget used. Avoid reading large files. Use targeted grep instead of full file reads.");
|
|
532
|
+
}
|
|
533
|
+
else if (percentUsed > 40) {
|
|
534
|
+
warnings.push("NOTE: Approaching 40% context budget. Consider pre-computing summaries rather than dumping raw output.");
|
|
535
|
+
}
|
|
536
|
+
// Check for the biggest polluter
|
|
537
|
+
if (breakdown.length > 0 && breakdown[0].total > tokensLimit * 0.3) {
|
|
538
|
+
warnings.push(`Biggest context consumer: '${breakdown[0].event_type}' (${breakdown[0].total} tokens, ${breakdown[0].count} events). Consider reducing output from this source.`);
|
|
539
|
+
}
|
|
540
|
+
return {
|
|
541
|
+
logged: true,
|
|
542
|
+
sessionId,
|
|
543
|
+
event: { type: eventType, tokens: tokensUsed, description },
|
|
544
|
+
budget: {
|
|
545
|
+
totalUsed,
|
|
546
|
+
limit: tokensLimit,
|
|
547
|
+
percentUsed,
|
|
548
|
+
remaining: tokensLimit - totalUsed,
|
|
549
|
+
},
|
|
550
|
+
breakdown: breakdown.map((b) => ({
|
|
551
|
+
eventType: b.event_type,
|
|
552
|
+
totalTokens: b.total,
|
|
553
|
+
eventCount: b.count,
|
|
554
|
+
})),
|
|
555
|
+
warnings,
|
|
556
|
+
bestPractices: [
|
|
557
|
+
"Log errors with ERROR prefix on same line for easy grep",
|
|
558
|
+
"Pre-compute aggregate stats instead of dumping raw data",
|
|
559
|
+
"Use --fast mode (random 1-10% sample) for large test suites",
|
|
560
|
+
"Write detailed output to log files, print only summaries to context",
|
|
561
|
+
],
|
|
562
|
+
};
|
|
563
|
+
},
|
|
564
|
+
},
|
|
565
|
+
// ─── Oracle-Based Testing ───────────────────────────────────
|
|
566
|
+
{
|
|
567
|
+
name: "run_oracle_comparison",
|
|
568
|
+
description: 'Compare actual output against a known-good oracle reference. Based on Anthropic\'s pattern of using GCC as an "online known-good compiler oracle" to identify which specific components are broken. The oracle pattern enables parallel debugging: each agent can work on different failing comparisons independently.',
|
|
569
|
+
inputSchema: {
|
|
570
|
+
type: "object",
|
|
571
|
+
properties: {
|
|
572
|
+
testLabel: {
|
|
573
|
+
type: "string",
|
|
574
|
+
description: "Label for this comparison (e.g. 'auth_middleware_output', 'api_response_format')",
|
|
575
|
+
},
|
|
576
|
+
actualOutput: {
|
|
577
|
+
type: "string",
|
|
578
|
+
description: "The actual output from your implementation",
|
|
579
|
+
},
|
|
580
|
+
expectedOutput: {
|
|
581
|
+
type: "string",
|
|
582
|
+
description: "The known-good reference output (oracle)",
|
|
583
|
+
},
|
|
584
|
+
oracleSource: {
|
|
585
|
+
type: "string",
|
|
586
|
+
description: "Where the oracle output came from (e.g. 'production_v2.1', 'reference_implementation', 'golden_file')",
|
|
587
|
+
},
|
|
588
|
+
sessionId: {
|
|
589
|
+
type: "string",
|
|
590
|
+
description: "Agent session ID for tracking",
|
|
591
|
+
},
|
|
592
|
+
cycleId: {
|
|
593
|
+
type: "string",
|
|
594
|
+
description: "Verification cycle ID to link this comparison to",
|
|
595
|
+
},
|
|
596
|
+
},
|
|
597
|
+
required: ["testLabel", "actualOutput", "expectedOutput", "oracleSource"],
|
|
598
|
+
},
|
|
599
|
+
handler: async (args) => {
|
|
600
|
+
const db = getDb();
|
|
601
|
+
const testLabel = args.testLabel;
|
|
602
|
+
const actualOutput = args.actualOutput;
|
|
603
|
+
const expectedOutput = args.expectedOutput;
|
|
604
|
+
const oracleSource = args.oracleSource;
|
|
605
|
+
const sessionId = args.sessionId || "";
|
|
606
|
+
const cycleId = args.cycleId || "";
|
|
607
|
+
// Compute match and diff
|
|
608
|
+
const exactMatch = actualOutput === expectedOutput;
|
|
609
|
+
// Simple line-level diff
|
|
610
|
+
const actualLines = actualOutput.split("\n");
|
|
611
|
+
const expectedLines = expectedOutput.split("\n");
|
|
612
|
+
const diffLines = [];
|
|
613
|
+
const maxLines = Math.max(actualLines.length, expectedLines.length);
|
|
614
|
+
let matchingLines = 0;
|
|
615
|
+
for (let i = 0; i < maxLines; i++) {
|
|
616
|
+
const a = actualLines[i] ?? "(missing)";
|
|
617
|
+
const e = expectedLines[i] ?? "(missing)";
|
|
618
|
+
if (a === e) {
|
|
619
|
+
matchingLines++;
|
|
620
|
+
}
|
|
621
|
+
else {
|
|
622
|
+
if (diffLines.length < 20) {
|
|
623
|
+
diffLines.push(`Line ${i + 1}: expected "${e.slice(0, 100)}" got "${a.slice(0, 100)}"`);
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
const matchPercent = maxLines > 0 ? Math.round((matchingLines / maxLines) * 100) : 100;
|
|
628
|
+
const diffSummary = diffLines.length > 0
|
|
629
|
+
? diffLines.join("\n")
|
|
630
|
+
: "Exact match — no differences";
|
|
631
|
+
const id = genId("oracle");
|
|
632
|
+
db.prepare("INSERT INTO oracle_comparisons (id, test_label, oracle_source, actual_output, expected_output, match, diff_summary, session_id, cycle_id, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))").run(id, testLabel, oracleSource, actualOutput, expectedOutput, exactMatch ? 1 : 0, diffSummary, sessionId, cycleId);
|
|
633
|
+
// Get recent comparison history for this label
|
|
634
|
+
const history = db
|
|
635
|
+
.prepare("SELECT match, created_at FROM oracle_comparisons WHERE test_label = ? ORDER BY created_at DESC LIMIT 5")
|
|
636
|
+
.all(testLabel);
|
|
637
|
+
return {
|
|
638
|
+
comparisonId: id,
|
|
639
|
+
testLabel,
|
|
640
|
+
oracleSource,
|
|
641
|
+
result: {
|
|
642
|
+
exactMatch,
|
|
643
|
+
matchPercent,
|
|
644
|
+
totalLines: maxLines,
|
|
645
|
+
matchingLines,
|
|
646
|
+
diffCount: maxLines - matchingLines,
|
|
647
|
+
},
|
|
648
|
+
diff: diffSummary,
|
|
649
|
+
history: history.map((h) => ({
|
|
650
|
+
match: h.match === 1,
|
|
651
|
+
at: h.created_at,
|
|
652
|
+
})),
|
|
653
|
+
tip: exactMatch
|
|
654
|
+
? "Output matches oracle. Safe to proceed."
|
|
655
|
+
: `${maxLines - matchingLines} lines differ. Fix differences before committing. Each differing section can be assigned to a parallel agent.`,
|
|
656
|
+
};
|
|
657
|
+
},
|
|
658
|
+
},
|
|
659
|
+
// ─── Parallel Agent Overview ────────────────────────────────
|
|
660
|
+
{
|
|
661
|
+
name: "get_parallel_status",
|
|
662
|
+
description: "Get a comprehensive overview of all parallel agent activity: active task claims, role assignments, context budget status, and recent oracle comparison results. Essential for new agent sessions to orient themselves (Anthropic pattern: 'agents dropped into a fresh container with no context').",
|
|
663
|
+
inputSchema: {
|
|
664
|
+
type: "object",
|
|
665
|
+
properties: {
|
|
666
|
+
includeHistory: {
|
|
667
|
+
type: "boolean",
|
|
668
|
+
description: "Include completed/abandoned tasks and past comparisons (default: false)",
|
|
669
|
+
},
|
|
670
|
+
},
|
|
671
|
+
},
|
|
672
|
+
handler: async (args) => {
|
|
673
|
+
const db = getDb();
|
|
674
|
+
const includeHistory = args.includeHistory || false;
|
|
675
|
+
// Active tasks
|
|
676
|
+
const activeTasks = db
|
|
677
|
+
.prepare("SELECT * FROM agent_tasks WHERE status = 'claimed' ORDER BY claimed_at DESC")
|
|
678
|
+
.all();
|
|
679
|
+
const blockedTasks = db
|
|
680
|
+
.prepare("SELECT * FROM agent_tasks WHERE status = 'blocked' ORDER BY released_at DESC LIMIT 10")
|
|
681
|
+
.all();
|
|
682
|
+
// Roles
|
|
683
|
+
const roles = db
|
|
684
|
+
.prepare("SELECT * FROM agent_roles ORDER BY created_at DESC LIMIT 20")
|
|
685
|
+
.all();
|
|
686
|
+
// Recent oracle results
|
|
687
|
+
const recentOracle = db
|
|
688
|
+
.prepare("SELECT test_label, match, oracle_source, created_at FROM oracle_comparisons ORDER BY created_at DESC LIMIT 10")
|
|
689
|
+
.all();
|
|
690
|
+
// Context budget summaries
|
|
691
|
+
const budgetSummary = db
|
|
692
|
+
.prepare("SELECT session_id, SUM(tokens_used) as total_tokens, MAX(tokens_limit) as budget, COUNT(*) as events FROM context_budget_log GROUP BY session_id ORDER BY total_tokens DESC LIMIT 10")
|
|
693
|
+
.all();
|
|
694
|
+
// Task stats
|
|
695
|
+
const taskStats = db
|
|
696
|
+
.prepare("SELECT status, COUNT(*) as count FROM agent_tasks GROUP BY status")
|
|
697
|
+
.all();
|
|
698
|
+
// Optional history
|
|
699
|
+
let completedTasks = [];
|
|
700
|
+
if (includeHistory) {
|
|
701
|
+
completedTasks = db
|
|
702
|
+
.prepare("SELECT * FROM agent_tasks WHERE status IN ('completed', 'abandoned') ORDER BY released_at DESC LIMIT 20")
|
|
703
|
+
.all();
|
|
704
|
+
}
|
|
705
|
+
// Failed oracle comparisons (opportunities for parallel work)
|
|
706
|
+
const failedOracle = db
|
|
707
|
+
.prepare("SELECT test_label, diff_summary, oracle_source, created_at FROM oracle_comparisons WHERE match = 0 ORDER BY created_at DESC LIMIT 10")
|
|
708
|
+
.all();
|
|
709
|
+
return {
|
|
710
|
+
activeTasks: activeTasks.map((t) => ({
|
|
711
|
+
taskKey: t.task_key,
|
|
712
|
+
sessionId: t.session_id,
|
|
713
|
+
description: t.description,
|
|
714
|
+
claimedAt: t.claimed_at,
|
|
715
|
+
})),
|
|
716
|
+
blockedTasks: blockedTasks.map((t) => ({
|
|
717
|
+
taskKey: t.task_key,
|
|
718
|
+
progressNote: t.progress_note,
|
|
719
|
+
releasedAt: t.released_at,
|
|
720
|
+
})),
|
|
721
|
+
roles: roles.map((r) => ({
|
|
722
|
+
sessionId: r.session_id,
|
|
723
|
+
role: r.role,
|
|
724
|
+
focusArea: r.focus_area,
|
|
725
|
+
})),
|
|
726
|
+
taskStats: Object.fromEntries(taskStats.map((s) => [s.status, s.count])),
|
|
727
|
+
recentOracleResults: recentOracle.map((o) => ({
|
|
728
|
+
testLabel: o.test_label,
|
|
729
|
+
match: o.match === 1,
|
|
730
|
+
oracleSource: o.oracle_source,
|
|
731
|
+
at: o.created_at,
|
|
732
|
+
})),
|
|
733
|
+
failedOracleTests: failedOracle.map((o) => ({
|
|
734
|
+
testLabel: o.test_label,
|
|
735
|
+
diffSummary: (o.diff_summary || "").slice(0, 200),
|
|
736
|
+
oracleSource: o.oracle_source,
|
|
737
|
+
})),
|
|
738
|
+
contextBudgets: budgetSummary.map((b) => ({
|
|
739
|
+
sessionId: b.session_id,
|
|
740
|
+
totalTokens: b.total_tokens,
|
|
741
|
+
budget: b.budget,
|
|
742
|
+
percentUsed: Math.round((b.total_tokens / b.budget) * 100),
|
|
743
|
+
events: b.events,
|
|
744
|
+
})),
|
|
745
|
+
...(includeHistory ? { completedTasks: completedTasks.map((t) => ({
|
|
746
|
+
taskKey: t.task_key,
|
|
747
|
+
status: t.status,
|
|
748
|
+
progressNote: t.progress_note,
|
|
749
|
+
releasedAt: t.released_at,
|
|
750
|
+
})) } : {}),
|
|
751
|
+
orientation: {
|
|
752
|
+
summary: `${activeTasks.length} active tasks, ${blockedTasks.length} blocked, ${roles.length} agents with roles, ${failedOracle.length} failing oracle tests`,
|
|
753
|
+
nextSteps: [
|
|
754
|
+
activeTasks.length > 0
|
|
755
|
+
? "Review active tasks — avoid claiming the same work"
|
|
756
|
+
: "No active tasks — pick the next most impactful work item",
|
|
757
|
+
blockedTasks.length > 0
|
|
758
|
+
? "Blocked tasks need fresh eyes — review progress notes"
|
|
759
|
+
: null,
|
|
760
|
+
failedOracle.length > 0
|
|
761
|
+
? `${failedOracle.length} oracle tests failing — each can be assigned to a different agent`
|
|
762
|
+
: null,
|
|
763
|
+
].filter(Boolean),
|
|
764
|
+
},
|
|
765
|
+
};
|
|
766
|
+
},
|
|
767
|
+
},
|
|
768
|
+
// ─── Bootstrap Parallel Agents for External Repos ──────────
|
|
769
|
+
{
|
|
770
|
+
name: "bootstrap_parallel_agents",
|
|
771
|
+
description: "Detect whether a target project repo has parallel agent infrastructure and, if not, scaffold everything needed. Scans for task coordination, role configs, oracle testing, context budget tracking, progress files, AGENTS.md parallel sections, and git worktrees. Returns a gap report with severity ratings and ready-to-use scaffold commands. Uses the AI Flywheel closed loop: detect → research → implement → test → fix → document. Works on ANY project directory — not just nodebench.",
|
|
772
|
+
inputSchema: {
|
|
773
|
+
type: "object",
|
|
774
|
+
properties: {
|
|
775
|
+
projectRoot: {
|
|
776
|
+
type: "string",
|
|
777
|
+
description: "Root directory of the target project to scan and bootstrap (default: current working directory)",
|
|
778
|
+
},
|
|
779
|
+
dryRun: {
|
|
780
|
+
type: "boolean",
|
|
781
|
+
description: "Preview only — show what would be created without writing files (default: true)",
|
|
782
|
+
},
|
|
783
|
+
includeAgentsMd: {
|
|
784
|
+
type: "boolean",
|
|
785
|
+
description: "Generate and include a portable AGENTS.md parallel section for the target repo (default: true)",
|
|
786
|
+
},
|
|
787
|
+
techStack: {
|
|
788
|
+
type: "string",
|
|
789
|
+
description: "Target project's tech stack hint (e.g. 'TypeScript/Node', 'Python/FastAPI', 'Rust') — helps generate idiomatic scaffolds",
|
|
790
|
+
},
|
|
791
|
+
},
|
|
792
|
+
},
|
|
793
|
+
handler: async (args) => {
|
|
794
|
+
const projectRoot = args.projectRoot || process.cwd();
|
|
795
|
+
const dryRun = args.dryRun !== false;
|
|
796
|
+
const includeAgentsMd = args.includeAgentsMd !== false;
|
|
797
|
+
const techStack = args.techStack || "unknown";
|
|
798
|
+
// ── Phase 1: Detection ──────────────────────────────────
|
|
799
|
+
const fs = await import("fs");
|
|
800
|
+
const path = await import("path");
|
|
801
|
+
const results = [];
|
|
802
|
+
// Helper: check if a path exists
|
|
803
|
+
const exists = (p) => {
|
|
804
|
+
try {
|
|
805
|
+
fs.accessSync(p);
|
|
806
|
+
return true;
|
|
807
|
+
}
|
|
808
|
+
catch {
|
|
809
|
+
return false;
|
|
810
|
+
}
|
|
811
|
+
};
|
|
812
|
+
// Helper: check if a file contains a pattern
|
|
813
|
+
const fileContains = (filePath, patterns) => {
|
|
814
|
+
try {
|
|
815
|
+
const content = fs.readFileSync(filePath, "utf-8").toLowerCase();
|
|
816
|
+
return patterns.filter((p) => content.includes(p.toLowerCase()));
|
|
817
|
+
}
|
|
818
|
+
catch {
|
|
819
|
+
return [];
|
|
820
|
+
}
|
|
821
|
+
};
|
|
822
|
+
// Helper: find files matching patterns in top-level dirs
|
|
823
|
+
const findFiles = (root, namePatterns, maxDepth = 3) => {
|
|
824
|
+
const found = [];
|
|
825
|
+
const scan = (dir, depth) => {
|
|
826
|
+
if (depth > maxDepth)
|
|
827
|
+
return;
|
|
828
|
+
try {
|
|
829
|
+
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
830
|
+
for (const entry of entries) {
|
|
831
|
+
if (entry.name.startsWith(".") && entry.name !== ".parallel-agents")
|
|
832
|
+
continue;
|
|
833
|
+
if (entry.name === "node_modules" || entry.name === "dist" || entry.name === "__pycache__")
|
|
834
|
+
continue;
|
|
835
|
+
const full = path.join(dir, entry.name);
|
|
836
|
+
if (entry.isFile()) {
|
|
837
|
+
const lower = entry.name.toLowerCase();
|
|
838
|
+
if (namePatterns.some((p) => lower.includes(p.toLowerCase()))) {
|
|
839
|
+
found.push(full);
|
|
840
|
+
}
|
|
841
|
+
}
|
|
842
|
+
else if (entry.isDirectory()) {
|
|
843
|
+
scan(full, depth + 1);
|
|
844
|
+
}
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
catch { /* permission denied, etc */ }
|
|
848
|
+
};
|
|
849
|
+
scan(root, 0);
|
|
850
|
+
return found;
|
|
851
|
+
};
|
|
852
|
+
// 1. Task Coordination
|
|
853
|
+
{
|
|
854
|
+
const evidence = [];
|
|
855
|
+
const taskDirs = ["current_tasks", ".parallel-agents", "tasks", ".tasks"];
|
|
856
|
+
for (const d of taskDirs) {
|
|
857
|
+
if (exists(path.join(projectRoot, d)))
|
|
858
|
+
evidence.push(`Directory found: ${d}/`);
|
|
859
|
+
}
|
|
860
|
+
const taskFiles = findFiles(projectRoot, ["task_lock", "taskLock", "claim_task", "claimTask"]);
|
|
861
|
+
for (const f of taskFiles)
|
|
862
|
+
evidence.push(`Task file: ${path.relative(projectRoot, f)}`);
|
|
863
|
+
// Check AGENTS.md / CLAUDE.md for task coordination mentions
|
|
864
|
+
for (const agentsFile of ["AGENTS.md", "CLAUDE.md", "agents.md"]) {
|
|
865
|
+
const matches = fileContains(path.join(projectRoot, agentsFile), ["task lock", "claim_task", "parallel agent", "worktree"]);
|
|
866
|
+
if (matches.length > 0)
|
|
867
|
+
evidence.push(`${agentsFile} mentions: ${matches.join(", ")}`);
|
|
868
|
+
}
|
|
869
|
+
results.push({
|
|
870
|
+
category: "task_coordination",
|
|
871
|
+
detected: evidence.length > 0,
|
|
872
|
+
confidence: Math.min(evidence.length * 0.3, 1),
|
|
873
|
+
evidence,
|
|
874
|
+
severity: "CRITICAL",
|
|
875
|
+
});
|
|
876
|
+
}
|
|
877
|
+
// 2. Role Configuration
|
|
878
|
+
{
|
|
879
|
+
const evidence = [];
|
|
880
|
+
const roleFiles = findFiles(projectRoot, ["role", "agent_role", "agentRole"]);
|
|
881
|
+
for (const f of roleFiles.slice(0, 5))
|
|
882
|
+
evidence.push(`Role file: ${path.relative(projectRoot, f)}`);
|
|
883
|
+
for (const agentsFile of ["AGENTS.md", "CLAUDE.md"]) {
|
|
884
|
+
const matches = fileContains(path.join(projectRoot, agentsFile), ["agent role", "role specializ", "implementer", "dedup_reviewer"]);
|
|
885
|
+
if (matches.length > 0)
|
|
886
|
+
evidence.push(`${agentsFile} mentions roles: ${matches.join(", ")}`);
|
|
887
|
+
}
|
|
888
|
+
results.push({
|
|
889
|
+
category: "role_specialization",
|
|
890
|
+
detected: evidence.length > 0,
|
|
891
|
+
confidence: Math.min(evidence.length * 0.35, 1),
|
|
892
|
+
evidence,
|
|
893
|
+
severity: "HIGH",
|
|
894
|
+
});
|
|
895
|
+
}
|
|
896
|
+
// 3. Oracle Testing
|
|
897
|
+
{
|
|
898
|
+
const evidence = [];
|
|
899
|
+
const oracleDirs = ["oracle", "golden", "golden_files", "reference_outputs", "snapshots", "__snapshots__"];
|
|
900
|
+
for (const d of oracleDirs) {
|
|
901
|
+
if (exists(path.join(projectRoot, d)))
|
|
902
|
+
evidence.push(`Oracle dir: ${d}/`);
|
|
903
|
+
}
|
|
904
|
+
const oracleFiles = findFiles(projectRoot, ["oracle", "golden", "reference_output", "snapshot"]);
|
|
905
|
+
for (const f of oracleFiles.slice(0, 5))
|
|
906
|
+
evidence.push(`Oracle file: ${path.relative(projectRoot, f)}`);
|
|
907
|
+
results.push({
|
|
908
|
+
category: "oracle_testing",
|
|
909
|
+
detected: evidence.length > 0,
|
|
910
|
+
confidence: Math.min(evidence.length * 0.25, 1),
|
|
911
|
+
evidence,
|
|
912
|
+
severity: "HIGH",
|
|
913
|
+
});
|
|
914
|
+
}
|
|
915
|
+
// 4. Context Budget Tracking
|
|
916
|
+
{
|
|
917
|
+
const evidence = [];
|
|
918
|
+
const budgetFiles = findFiles(projectRoot, ["context_budget", "contextBudget", "token_budget", "tokenBudget"]);
|
|
919
|
+
for (const f of budgetFiles.slice(0, 5))
|
|
920
|
+
evidence.push(`Budget file: ${path.relative(projectRoot, f)}`);
|
|
921
|
+
for (const agentsFile of ["AGENTS.md", "CLAUDE.md"]) {
|
|
922
|
+
const matches = fileContains(path.join(projectRoot, agentsFile), ["context budget", "token budget", "context pollution", "context window"]);
|
|
923
|
+
if (matches.length > 0)
|
|
924
|
+
evidence.push(`${agentsFile} mentions: ${matches.join(", ")}`);
|
|
925
|
+
}
|
|
926
|
+
results.push({
|
|
927
|
+
category: "context_budget",
|
|
928
|
+
detected: evidence.length > 0,
|
|
929
|
+
confidence: Math.min(evidence.length * 0.35, 1),
|
|
930
|
+
evidence,
|
|
931
|
+
severity: "MEDIUM",
|
|
932
|
+
});
|
|
933
|
+
}
|
|
934
|
+
// 5. Progress Files
|
|
935
|
+
{
|
|
936
|
+
const evidence = [];
|
|
937
|
+
const progressFiles = ["PROGRESS.md", "progress.md", "claude-progress.txt", "STATUS.md", "CHANGELOG.md"];
|
|
938
|
+
for (const f of progressFiles) {
|
|
939
|
+
if (exists(path.join(projectRoot, f)))
|
|
940
|
+
evidence.push(`Progress file: ${f}`);
|
|
941
|
+
}
|
|
942
|
+
results.push({
|
|
943
|
+
category: "progress_files",
|
|
944
|
+
detected: evidence.length > 0,
|
|
945
|
+
confidence: Math.min(evidence.length * 0.4, 1),
|
|
946
|
+
evidence,
|
|
947
|
+
severity: "MEDIUM",
|
|
948
|
+
});
|
|
949
|
+
}
|
|
950
|
+
// 6. AGENTS.md Parallel Section
|
|
951
|
+
{
|
|
952
|
+
const evidence = [];
|
|
953
|
+
for (const agentsFile of ["AGENTS.md", "CLAUDE.md", "agents.md", "NODEBENCH_AGENTS.md"]) {
|
|
954
|
+
const fp = path.join(projectRoot, agentsFile);
|
|
955
|
+
if (exists(fp)) {
|
|
956
|
+
evidence.push(`Found: ${agentsFile}`);
|
|
957
|
+
const matches = fileContains(fp, ["parallel agent", "multi-agent", "subagent", "worktree", "task locking"]);
|
|
958
|
+
if (matches.length > 0)
|
|
959
|
+
evidence.push(`${agentsFile} has parallel content: ${matches.join(", ")}`);
|
|
960
|
+
}
|
|
961
|
+
}
|
|
962
|
+
results.push({
|
|
963
|
+
category: "agents_md_parallel",
|
|
964
|
+
detected: evidence.some((e) => e.includes("parallel content")),
|
|
965
|
+
confidence: evidence.some((e) => e.includes("parallel content")) ? 0.9 : 0,
|
|
966
|
+
evidence,
|
|
967
|
+
severity: "CRITICAL",
|
|
968
|
+
});
|
|
969
|
+
}
|
|
970
|
+
// 7. Git Worktrees
|
|
971
|
+
{
|
|
972
|
+
const evidence = [];
|
|
973
|
+
const worktreeDir = path.join(projectRoot, ".git", "worktrees");
|
|
974
|
+
if (exists(worktreeDir)) {
|
|
975
|
+
try {
|
|
976
|
+
const wts = fs.readdirSync(worktreeDir);
|
|
977
|
+
evidence.push(`Git worktrees found: ${wts.length} (${wts.slice(0, 5).join(", ")})`);
|
|
978
|
+
}
|
|
979
|
+
catch { /* no access */ }
|
|
980
|
+
}
|
|
981
|
+
results.push({
|
|
982
|
+
category: "git_worktrees",
|
|
983
|
+
detected: evidence.length > 0,
|
|
984
|
+
confidence: evidence.length > 0 ? 0.9 : 0,
|
|
985
|
+
evidence,
|
|
986
|
+
severity: "LOW",
|
|
987
|
+
});
|
|
988
|
+
}
|
|
989
|
+
// ── Phase 2: Gap Report ─────────────────────────────────
|
|
990
|
+
const missing = results.filter((r) => !r.detected);
|
|
991
|
+
const detected = results.filter((r) => r.detected);
|
|
992
|
+
const hasParallelInfra = missing.filter((m) => m.severity === "CRITICAL").length === 0;
|
|
993
|
+
const scaffoldFiles = [];
|
|
994
|
+
// Determine comment style based on tech stack
|
|
995
|
+
const isTs = techStack.toLowerCase().includes("typescript") || techStack.toLowerCase().includes("node") || techStack.toLowerCase().includes("js");
|
|
996
|
+
const isPython = techStack.toLowerCase().includes("python");
|
|
997
|
+
const isRust = techStack.toLowerCase().includes("rust");
|
|
998
|
+
// Task coordination directory
|
|
999
|
+
if (!results.find((r) => r.category === "task_coordination")?.detected) {
|
|
1000
|
+
scaffoldFiles.push({
|
|
1001
|
+
path: ".parallel-agents/README.md",
|
|
1002
|
+
content: `# Parallel Agent Coordination
|
|
1003
|
+
|
|
1004
|
+
This directory manages parallel agent task coordination.
|
|
1005
|
+
|
|
1006
|
+
## Structure
|
|
1007
|
+
- \`current_tasks/\` — Active task lock files (one per claimed task)
|
|
1008
|
+
- \`progress.md\` — Running status document for agent orientation
|
|
1009
|
+
- \`roles.json\` — Active role assignments
|
|
1010
|
+
- \`oracle/\` — Golden reference outputs for oracle testing
|
|
1011
|
+
|
|
1012
|
+
## How it works
|
|
1013
|
+
1. Before starting work, an agent creates a lock file in \`current_tasks/\`
|
|
1014
|
+
2. Other agents check this directory to avoid duplicate work
|
|
1015
|
+
3. When done, the agent removes the lock and updates \`progress.md\`
|
|
1016
|
+
|
|
1017
|
+
## Using with NodeBench MCP
|
|
1018
|
+
If you have nodebench-mcp installed, these operations are handled by:
|
|
1019
|
+
- \`claim_agent_task\` / \`release_agent_task\` — Task locking
|
|
1020
|
+
- \`assign_agent_role\` — Role specialization
|
|
1021
|
+
- \`run_oracle_comparison\` — Oracle testing
|
|
1022
|
+
- \`get_parallel_status\` — Agent orientation
|
|
1023
|
+
|
|
1024
|
+
Install: \`npx -y nodebench-mcp\` or \`claude mcp add nodebench -- npx -y nodebench-mcp\`
|
|
1025
|
+
`,
|
|
1026
|
+
description: "Parallel agents coordination directory README",
|
|
1027
|
+
});
|
|
1028
|
+
scaffoldFiles.push({
|
|
1029
|
+
path: ".parallel-agents/current_tasks/.gitkeep",
|
|
1030
|
+
content: "",
|
|
1031
|
+
description: "Task lock directory (empty, agents create lock files here)",
|
|
1032
|
+
});
|
|
1033
|
+
scaffoldFiles.push({
|
|
1034
|
+
path: ".parallel-agents/oracle/.gitkeep",
|
|
1035
|
+
content: "",
|
|
1036
|
+
description: "Oracle golden files directory",
|
|
1037
|
+
});
|
|
1038
|
+
}
|
|
1039
|
+
// Progress file
|
|
1040
|
+
if (!results.find((r) => r.category === "progress_files")?.detected) {
|
|
1041
|
+
scaffoldFiles.push({
|
|
1042
|
+
path: ".parallel-agents/progress.md",
|
|
1043
|
+
content: `# Parallel Agent Progress
|
|
1044
|
+
|
|
1045
|
+
> Updated by agents after each work session. Read this FIRST when starting a new session.
|
|
1046
|
+
|
|
1047
|
+
## Current Status
|
|
1048
|
+
- [ ] No tasks started yet
|
|
1049
|
+
|
|
1050
|
+
## Active Agents
|
|
1051
|
+
(none)
|
|
1052
|
+
|
|
1053
|
+
## Completed Work
|
|
1054
|
+
(none yet)
|
|
1055
|
+
|
|
1056
|
+
## Blocked Items
|
|
1057
|
+
(none)
|
|
1058
|
+
|
|
1059
|
+
## Failed Approaches
|
|
1060
|
+
(Record what didn't work so other agents don't repeat mistakes)
|
|
1061
|
+
|
|
1062
|
+
## Key Decisions
|
|
1063
|
+
(Record architectural or design decisions made during parallel work)
|
|
1064
|
+
`,
|
|
1065
|
+
description: "Running progress document for agent orientation",
|
|
1066
|
+
});
|
|
1067
|
+
}
|
|
1068
|
+
// Role configuration
|
|
1069
|
+
if (!results.find((r) => r.category === "role_specialization")?.detected) {
|
|
1070
|
+
scaffoldFiles.push({
|
|
1071
|
+
path: ".parallel-agents/roles.json",
|
|
1072
|
+
content: JSON.stringify({
|
|
1073
|
+
_comment: "Agent role assignments. Updated by assign_agent_role or manually.",
|
|
1074
|
+
predefinedRoles: {
|
|
1075
|
+
implementer: "Primary feature work. Picks failing tests, implements fixes.",
|
|
1076
|
+
test_writer: "Writes targeted tests for edge cases and failure modes.",
|
|
1077
|
+
code_quality_critic: "Structural improvements, pattern enforcement.",
|
|
1078
|
+
documentation_maintainer: "Keeps READMEs and progress files in sync.",
|
|
1079
|
+
dedup_reviewer: "Finds and coalesces duplicate implementations.",
|
|
1080
|
+
performance_optimizer: "Profiles bottlenecks, optimizes hot paths.",
|
|
1081
|
+
security_auditor: "Audits for vulnerabilities, logs CRITICAL gaps.",
|
|
1082
|
+
},
|
|
1083
|
+
activeAssignments: [],
|
|
1084
|
+
}, null, 2),
|
|
1085
|
+
description: "Role definitions and active assignments",
|
|
1086
|
+
});
|
|
1087
|
+
}
|
|
1088
|
+
// AGENTS.md parallel section
|
|
1089
|
+
let agentsMdContent = "";
|
|
1090
|
+
if (!results.find((r) => r.category === "agents_md_parallel")?.detected && includeAgentsMd) {
|
|
1091
|
+
agentsMdContent = generateParallelAgentsMdSection(techStack);
|
|
1092
|
+
const existingAgentsMd = exists(path.join(projectRoot, "AGENTS.md"));
|
|
1093
|
+
scaffoldFiles.push({
|
|
1094
|
+
path: existingAgentsMd ? "AGENTS.md.parallel-append" : "AGENTS.md",
|
|
1095
|
+
content: existingAgentsMd
|
|
1096
|
+
? `\n\n${agentsMdContent}`
|
|
1097
|
+
: `# Agent Instructions\n\n${agentsMdContent}`,
|
|
1098
|
+
description: existingAgentsMd
|
|
1099
|
+
? "Append this content to your existing AGENTS.md"
|
|
1100
|
+
: "New AGENTS.md with parallel agent coordination section",
|
|
1101
|
+
});
|
|
1102
|
+
}
|
|
1103
|
+
// ── Phase 4: Write files (if not dry run) ──────────────
|
|
1104
|
+
const created = [];
|
|
1105
|
+
if (!dryRun) {
|
|
1106
|
+
for (const file of scaffoldFiles) {
|
|
1107
|
+
const fullPath = path.join(projectRoot, file.path);
|
|
1108
|
+
const dir = path.dirname(fullPath);
|
|
1109
|
+
try {
|
|
1110
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
1111
|
+
// Don't overwrite existing files (except .gitkeep and append markers)
|
|
1112
|
+
if (!file.path.endsWith(".gitkeep") && !file.path.endsWith("-append") && exists(fullPath)) {
|
|
1113
|
+
continue;
|
|
1114
|
+
}
|
|
1115
|
+
fs.writeFileSync(fullPath, file.content, "utf-8");
|
|
1116
|
+
created.push(file.path);
|
|
1117
|
+
}
|
|
1118
|
+
catch (e) {
|
|
1119
|
+
// Log but don't fail
|
|
1120
|
+
created.push(`FAILED: ${file.path} — ${e.message}`);
|
|
1121
|
+
}
|
|
1122
|
+
}
|
|
1123
|
+
}
|
|
1124
|
+
// ── Phase 5: Flywheel Verification Plan ────────────────
|
|
1125
|
+
const flywheelPlan = [
|
|
1126
|
+
{
|
|
1127
|
+
step: 1,
|
|
1128
|
+
name: "Static Analysis",
|
|
1129
|
+
action: "Verify scaffold files are valid and don't conflict with existing project structure",
|
|
1130
|
+
tool: "run_closed_loop({ steps: [{ step: 'compile', passed: true }] })",
|
|
1131
|
+
},
|
|
1132
|
+
{
|
|
1133
|
+
step: 2,
|
|
1134
|
+
name: "Happy Path Test",
|
|
1135
|
+
action: "Have one agent claim a task, do work, release it. Verify progress.md updates.",
|
|
1136
|
+
tool: "claim_agent_task → release_agent_task → list_agent_tasks",
|
|
1137
|
+
},
|
|
1138
|
+
{
|
|
1139
|
+
step: 3,
|
|
1140
|
+
name: "Conflict Test",
|
|
1141
|
+
action: "Have two agents try to claim the same task. Verify the second gets a conflict response.",
|
|
1142
|
+
tool: "claim_agent_task (agent A) → claim_agent_task (agent B, same key)",
|
|
1143
|
+
},
|
|
1144
|
+
{
|
|
1145
|
+
step: 4,
|
|
1146
|
+
name: "Oracle Validation",
|
|
1147
|
+
action: "Create a golden file, run oracle comparison, verify match detection works.",
|
|
1148
|
+
tool: "run_oracle_comparison({ testLabel: 'smoke', actualOutput: 'hello', expectedOutput: 'hello', oracleSource: 'manual' })",
|
|
1149
|
+
},
|
|
1150
|
+
{
|
|
1151
|
+
step: 5,
|
|
1152
|
+
name: "Gap Analysis",
|
|
1153
|
+
action: "Re-run bootstrap_parallel_agents to verify all gaps are now filled.",
|
|
1154
|
+
tool: "bootstrap_parallel_agents({ projectRoot: '...', dryRun: true })",
|
|
1155
|
+
},
|
|
1156
|
+
{
|
|
1157
|
+
step: 6,
|
|
1158
|
+
name: "Document",
|
|
1159
|
+
action: "Record learnings and update AGENTS.md with any new patterns discovered.",
|
|
1160
|
+
tool: "record_learning + update_agents_md",
|
|
1161
|
+
},
|
|
1162
|
+
];
|
|
1163
|
+
return {
|
|
1164
|
+
projectRoot,
|
|
1165
|
+
dryRun,
|
|
1166
|
+
detection: {
|
|
1167
|
+
hasParallelInfra,
|
|
1168
|
+
detected: detected.map((r) => ({
|
|
1169
|
+
category: r.category,
|
|
1170
|
+
confidence: r.confidence,
|
|
1171
|
+
evidence: r.evidence,
|
|
1172
|
+
})),
|
|
1173
|
+
missing: missing.map((r) => ({
|
|
1174
|
+
category: r.category,
|
|
1175
|
+
severity: r.severity,
|
|
1176
|
+
description: {
|
|
1177
|
+
task_coordination: "No task locking mechanism — parallel agents may duplicate work",
|
|
1178
|
+
role_specialization: "No role configuration — agents won't specialize effectively",
|
|
1179
|
+
oracle_testing: "No oracle/golden file infrastructure — can't validate against known-good references",
|
|
1180
|
+
context_budget: "No context budget tracking — risk of context window pollution",
|
|
1181
|
+
progress_files: "No progress files — fresh agent sessions can't orient themselves",
|
|
1182
|
+
agents_md_parallel: "AGENTS.md has no parallel agent section — agents won't know the coordination protocol",
|
|
1183
|
+
git_worktrees: "No git worktrees — parallel agents will need separate clones or worktrees",
|
|
1184
|
+
}[r.category] || `Missing ${r.category}`,
|
|
1185
|
+
})),
|
|
1186
|
+
score: `${detected.length}/${results.length} capabilities present`,
|
|
1187
|
+
},
|
|
1188
|
+
scaffold: {
|
|
1189
|
+
files: scaffoldFiles.map((f) => ({
|
|
1190
|
+
path: f.path,
|
|
1191
|
+
description: f.description,
|
|
1192
|
+
sizeBytes: f.content.length,
|
|
1193
|
+
})),
|
|
1194
|
+
totalFiles: scaffoldFiles.length,
|
|
1195
|
+
...(dryRun ? {} : { created }),
|
|
1196
|
+
},
|
|
1197
|
+
flywheelPlan,
|
|
1198
|
+
nextSteps: [
|
|
1199
|
+
dryRun && scaffoldFiles.length > 0
|
|
1200
|
+
? "Run with dryRun=false to create scaffold files"
|
|
1201
|
+
: null,
|
|
1202
|
+
scaffoldFiles.some((f) => f.path.endsWith("-append"))
|
|
1203
|
+
? "Manually append the AGENTS.md.parallel-append content to your existing AGENTS.md"
|
|
1204
|
+
: null,
|
|
1205
|
+
"Run the 6-step flywheel verification plan above to validate the setup",
|
|
1206
|
+
"Install nodebench-mcp for full tool support: claude mcp add nodebench -- npx -y nodebench-mcp",
|
|
1207
|
+
"Set up 3-5 git worktrees for maximum parallel throughput: git worktree add ../project-wt1 -b agent-1",
|
|
1208
|
+
missing.length === 0
|
|
1209
|
+
? "All parallel agent infrastructure detected! Ready for multi-agent work."
|
|
1210
|
+
: null,
|
|
1211
|
+
].filter(Boolean),
|
|
1212
|
+
tip: hasParallelInfra
|
|
1213
|
+
? "This project already has parallel agent infrastructure. Use get_parallel_status to orient and start working."
|
|
1214
|
+
: `This project is missing ${missing.length} parallel agent capabilities. ${dryRun ? "Run with dryRun=false to scaffold them automatically." : `Scaffolded ${created.length} files. Run the flywheel plan to verify.`}`,
|
|
1215
|
+
};
|
|
1216
|
+
},
|
|
1217
|
+
},
|
|
1218
|
+
{
|
|
1219
|
+
name: "generate_parallel_agents_md",
|
|
1220
|
+
description: "Generate a portable, framework-agnostic AGENTS.md section for parallel agent coordination. Designed to be dropped into ANY project repo so that AI agents (Claude, GPT, etc.) automatically know how to coordinate in parallel. Includes task locking protocol, role definitions, oracle testing workflow, context budget rules, and anti-patterns. Output is ready to paste into an existing AGENTS.md or use standalone.",
|
|
1221
|
+
inputSchema: {
|
|
1222
|
+
type: "object",
|
|
1223
|
+
properties: {
|
|
1224
|
+
techStack: {
|
|
1225
|
+
type: "string",
|
|
1226
|
+
description: "Target project tech stack (e.g. 'TypeScript/React', 'Python/Django', 'Rust'). Tailors examples to the stack.",
|
|
1227
|
+
},
|
|
1228
|
+
projectName: {
|
|
1229
|
+
type: "string",
|
|
1230
|
+
description: "Project name for the header (default: 'this project')",
|
|
1231
|
+
},
|
|
1232
|
+
maxAgents: {
|
|
1233
|
+
type: "number",
|
|
1234
|
+
description: "Expected max parallel agents (default: 4). Affects role recommendations.",
|
|
1235
|
+
},
|
|
1236
|
+
includeNodebenchSetup: {
|
|
1237
|
+
type: "boolean",
|
|
1238
|
+
description: "Include nodebench-mcp installation and tool mapping instructions (default: true)",
|
|
1239
|
+
},
|
|
1240
|
+
},
|
|
1241
|
+
},
|
|
1242
|
+
handler: async (args) => {
|
|
1243
|
+
const techStack = args.techStack || "general";
|
|
1244
|
+
const projectName = args.projectName || "this project";
|
|
1245
|
+
const maxAgents = args.maxAgents || 4;
|
|
1246
|
+
const includeNodebench = args.includeNodebenchSetup !== false;
|
|
1247
|
+
const content = generateParallelAgentsMdSection(techStack, projectName, maxAgents, includeNodebench);
|
|
1248
|
+
return {
|
|
1249
|
+
format: "markdown",
|
|
1250
|
+
content,
|
|
1251
|
+
usage: [
|
|
1252
|
+
"Option A: Paste into your existing AGENTS.md (append at the end)",
|
|
1253
|
+
"Option B: Save as a new AGENTS.md in your project root",
|
|
1254
|
+
"Option C: Save as .parallel-agents/PROTOCOL.md for a standalone guide",
|
|
1255
|
+
],
|
|
1256
|
+
charCount: content.length,
|
|
1257
|
+
sections: [
|
|
1258
|
+
"Parallel Agent Coordination Protocol",
|
|
1259
|
+
"Task Locking Protocol",
|
|
1260
|
+
"Role Specialization",
|
|
1261
|
+
"Oracle Testing Workflow",
|
|
1262
|
+
"Context Budget Rules",
|
|
1263
|
+
"Progress File Protocol",
|
|
1264
|
+
"Anti-Patterns",
|
|
1265
|
+
"Flywheel Verification",
|
|
1266
|
+
includeNodebench ? "NodeBench MCP Setup" : null,
|
|
1267
|
+
].filter(Boolean),
|
|
1268
|
+
};
|
|
1269
|
+
},
|
|
1270
|
+
},
|
|
1271
|
+
];
|
|
1272
|
+
//# sourceMappingURL=parallelAgentTools.js.map
|