nodebench-mcp 2.11.0 → 2.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/NODEBENCH_AGENTS.md +809 -809
- package/README.md +443 -431
- package/STYLE_GUIDE.md +477 -477
- package/dist/__tests__/evalHarness.test.js +1 -1
- package/dist/__tests__/gaiaCapabilityAudioEval.test.js +9 -14
- package/dist/__tests__/gaiaCapabilityAudioEval.test.js.map +1 -1
- package/dist/__tests__/gaiaCapabilityEval.test.js +88 -14
- package/dist/__tests__/gaiaCapabilityEval.test.js.map +1 -1
- package/dist/__tests__/gaiaCapabilityFilesEval.test.js +9 -5
- package/dist/__tests__/gaiaCapabilityFilesEval.test.js.map +1 -1
- package/dist/__tests__/gaiaCapabilityMediaEval.test.js +165 -17
- package/dist/__tests__/gaiaCapabilityMediaEval.test.js.map +1 -1
- package/dist/__tests__/helpers/answerMatch.d.ts +36 -7
- package/dist/__tests__/helpers/answerMatch.js +224 -35
- package/dist/__tests__/helpers/answerMatch.js.map +1 -1
- package/dist/__tests__/helpers/textLlm.d.ts +1 -1
- package/dist/__tests__/presetRealWorldBench.test.d.ts +1 -0
- package/dist/__tests__/presetRealWorldBench.test.js +850 -0
- package/dist/__tests__/presetRealWorldBench.test.js.map +1 -0
- package/dist/__tests__/tools.test.js +20 -7
- package/dist/__tests__/tools.test.js.map +1 -1
- package/dist/__tests__/toolsetGatingEval.test.js +21 -11
- package/dist/__tests__/toolsetGatingEval.test.js.map +1 -1
- package/dist/db.js +21 -0
- package/dist/db.js.map +1 -1
- package/dist/index.js +424 -327
- package/dist/index.js.map +1 -1
- package/dist/tools/agentBootstrapTools.js +258 -258
- package/dist/tools/boilerplateTools.js +144 -144
- package/dist/tools/cCompilerBenchmarkTools.js +33 -33
- package/dist/tools/documentationTools.js +59 -59
- package/dist/tools/flywheelTools.js +6 -6
- package/dist/tools/gitWorkflowTools.d.ts +11 -0
- package/dist/tools/gitWorkflowTools.js +580 -0
- package/dist/tools/gitWorkflowTools.js.map +1 -0
- package/dist/tools/learningTools.js +26 -26
- package/dist/tools/localFileTools.d.ts +3 -0
- package/dist/tools/localFileTools.js +3164 -125
- package/dist/tools/localFileTools.js.map +1 -1
- package/dist/tools/metaTools.js +82 -0
- package/dist/tools/metaTools.js.map +1 -1
- package/dist/tools/parallelAgentTools.js +228 -0
- package/dist/tools/parallelAgentTools.js.map +1 -1
- package/dist/tools/patternTools.d.ts +13 -0
- package/dist/tools/patternTools.js +456 -0
- package/dist/tools/patternTools.js.map +1 -0
- package/dist/tools/reconTools.js +31 -31
- package/dist/tools/selfEvalTools.js +44 -44
- package/dist/tools/seoTools.d.ts +16 -0
- package/dist/tools/seoTools.js +866 -0
- package/dist/tools/seoTools.js.map +1 -0
- package/dist/tools/sessionMemoryTools.d.ts +15 -0
- package/dist/tools/sessionMemoryTools.js +348 -0
- package/dist/tools/sessionMemoryTools.js.map +1 -0
- package/dist/tools/toolRegistry.d.ts +4 -0
- package/dist/tools/toolRegistry.js +489 -0
- package/dist/tools/toolRegistry.js.map +1 -1
- package/dist/tools/toonTools.d.ts +15 -0
- package/dist/tools/toonTools.js +94 -0
- package/dist/tools/toonTools.js.map +1 -0
- package/dist/tools/verificationTools.js +41 -41
- package/dist/tools/visionTools.js +17 -17
- package/dist/tools/voiceBridgeTools.d.ts +15 -0
- package/dist/tools/voiceBridgeTools.js +1427 -0
- package/dist/tools/voiceBridgeTools.js.map +1 -0
- package/dist/tools/webTools.js +18 -18
- package/package.json +102 -101
|
@@ -54,52 +54,52 @@ function generatePackageJson(projectName, techStack) {
|
|
|
54
54
|
}, null, 2);
|
|
55
55
|
}
|
|
56
56
|
function generateAgentsMd(projectName, techStack) {
|
|
57
|
-
return `# ${projectName} — Agent Instructions
|
|
58
|
-
|
|
59
|
-
## Project Overview
|
|
60
|
-
- **Name**: ${projectName}
|
|
61
|
-
- **Tech Stack**: ${techStack}
|
|
62
|
-
- **Created**: ${new Date().toISOString().split("T")[0]}
|
|
63
|
-
|
|
64
|
-
## Quick Start for AI Agents
|
|
65
|
-
|
|
66
|
-
### First Session
|
|
67
|
-
1. Call \`bootstrap_project\` to register this project with NodeBench MCP
|
|
68
|
-
2. Call \`search_all_knowledge\` to check for relevant past findings
|
|
69
|
-
3. Call \`getMethodology("overview")\` to see all available methodologies
|
|
70
|
-
|
|
71
|
-
### Every Task
|
|
72
|
-
1. \`search_all_knowledge\` — Check what the system already knows
|
|
73
|
-
2. \`start_verification_cycle\` — Begin 6-phase verification
|
|
74
|
-
3. Follow phases 1-6 (guided by tool responses)
|
|
75
|
-
4. \`run_mandatory_flywheel\` — 6-step final check before declaring done
|
|
76
|
-
5. \`record_learning\` — Capture what you discovered
|
|
77
|
-
|
|
78
|
-
### Progressive Tool Discovery
|
|
79
|
-
- \`discover_tools("what you want to do")\` — Hybrid search with relevance scoring
|
|
80
|
-
- \`get_tool_quick_ref("tool_name")\` — What to do after calling any tool
|
|
81
|
-
- \`get_workflow_chain("new_feature")\` — Full step-by-step tool sequences
|
|
82
|
-
|
|
83
|
-
## Architecture
|
|
84
|
-
<!-- Describe your project architecture here -->
|
|
85
|
-
|
|
86
|
-
## Conventions
|
|
87
|
-
<!-- Describe coding conventions, patterns, and standards here -->
|
|
88
|
-
|
|
89
|
-
## Quality Gates
|
|
90
|
-
- All changes must pass \`run_mandatory_flywheel\` before shipping
|
|
91
|
-
- UI changes must pass the \`ui_ux_qa\` quality gate
|
|
92
|
-
- Code changes must pass \`run_closed_loop\` (compile→lint→test→debug)
|
|
93
|
-
|
|
94
|
-
## Known Gotchas
|
|
95
|
-
<!-- Record known issues and workarounds here. Also stored in NodeBench learnings DB. -->
|
|
96
|
-
|
|
97
|
-
## Parallel Agent Coordination
|
|
98
|
-
When using multiple agents (Claude Code subagents, worktrees, or terminals):
|
|
99
|
-
1. \`claim_agent_task\` before starting work (prevents duplicate effort)
|
|
100
|
-
2. \`assign_agent_role\` for specialization
|
|
101
|
-
3. \`release_agent_task\` with progress note when done
|
|
102
|
-
4. \`get_parallel_status\` to see all agent activity
|
|
57
|
+
return `# ${projectName} — Agent Instructions
|
|
58
|
+
|
|
59
|
+
## Project Overview
|
|
60
|
+
- **Name**: ${projectName}
|
|
61
|
+
- **Tech Stack**: ${techStack}
|
|
62
|
+
- **Created**: ${new Date().toISOString().split("T")[0]}
|
|
63
|
+
|
|
64
|
+
## Quick Start for AI Agents
|
|
65
|
+
|
|
66
|
+
### First Session
|
|
67
|
+
1. Call \`bootstrap_project\` to register this project with NodeBench MCP
|
|
68
|
+
2. Call \`search_all_knowledge\` to check for relevant past findings
|
|
69
|
+
3. Call \`getMethodology("overview")\` to see all available methodologies
|
|
70
|
+
|
|
71
|
+
### Every Task
|
|
72
|
+
1. \`search_all_knowledge\` — Check what the system already knows
|
|
73
|
+
2. \`start_verification_cycle\` — Begin 6-phase verification
|
|
74
|
+
3. Follow phases 1-6 (guided by tool responses)
|
|
75
|
+
4. \`run_mandatory_flywheel\` — 6-step final check before declaring done
|
|
76
|
+
5. \`record_learning\` — Capture what you discovered
|
|
77
|
+
|
|
78
|
+
### Progressive Tool Discovery
|
|
79
|
+
- \`discover_tools("what you want to do")\` — Hybrid search with relevance scoring
|
|
80
|
+
- \`get_tool_quick_ref("tool_name")\` — What to do after calling any tool
|
|
81
|
+
- \`get_workflow_chain("new_feature")\` — Full step-by-step tool sequences
|
|
82
|
+
|
|
83
|
+
## Architecture
|
|
84
|
+
<!-- Describe your project architecture here -->
|
|
85
|
+
|
|
86
|
+
## Conventions
|
|
87
|
+
<!-- Describe coding conventions, patterns, and standards here -->
|
|
88
|
+
|
|
89
|
+
## Quality Gates
|
|
90
|
+
- All changes must pass \`run_mandatory_flywheel\` before shipping
|
|
91
|
+
- UI changes must pass the \`ui_ux_qa\` quality gate
|
|
92
|
+
- Code changes must pass \`run_closed_loop\` (compile→lint→test→debug)
|
|
93
|
+
|
|
94
|
+
## Known Gotchas
|
|
95
|
+
<!-- Record known issues and workarounds here. Also stored in NodeBench learnings DB. -->
|
|
96
|
+
|
|
97
|
+
## Parallel Agent Coordination
|
|
98
|
+
When using multiple agents (Claude Code subagents, worktrees, or terminals):
|
|
99
|
+
1. \`claim_agent_task\` before starting work (prevents duplicate effort)
|
|
100
|
+
2. \`assign_agent_role\` for specialization
|
|
101
|
+
3. \`release_agent_task\` with progress note when done
|
|
102
|
+
4. \`get_parallel_status\` to see all agent activity
|
|
103
103
|
`;
|
|
104
104
|
}
|
|
105
105
|
function generateMcpConfig(projectName) {
|
|
@@ -114,116 +114,116 @@ function generateMcpConfig(projectName) {
|
|
|
114
114
|
}, null, 2);
|
|
115
115
|
}
|
|
116
116
|
function generateGithubActions() {
|
|
117
|
-
return `name: NodeBench Quality Gate
|
|
118
|
-
on:
|
|
119
|
-
pull_request:
|
|
120
|
-
branches: [main]
|
|
121
|
-
push:
|
|
122
|
-
branches: [main]
|
|
123
|
-
|
|
124
|
-
jobs:
|
|
125
|
-
quality-gate:
|
|
126
|
-
runs-on: ubuntu-latest
|
|
127
|
-
steps:
|
|
128
|
-
- uses: actions/checkout@v4
|
|
129
|
-
- uses: actions/setup-node@v4
|
|
130
|
-
with:
|
|
131
|
-
node-version: '22'
|
|
132
|
-
- run: npm ci
|
|
133
|
-
- run: npm run build
|
|
134
|
-
- run: npm run test
|
|
135
|
-
- run: npm run lint
|
|
117
|
+
return `name: NodeBench Quality Gate
|
|
118
|
+
on:
|
|
119
|
+
pull_request:
|
|
120
|
+
branches: [main]
|
|
121
|
+
push:
|
|
122
|
+
branches: [main]
|
|
123
|
+
|
|
124
|
+
jobs:
|
|
125
|
+
quality-gate:
|
|
126
|
+
runs-on: ubuntu-latest
|
|
127
|
+
steps:
|
|
128
|
+
- uses: actions/checkout@v4
|
|
129
|
+
- uses: actions/setup-node@v4
|
|
130
|
+
with:
|
|
131
|
+
node-version: '22'
|
|
132
|
+
- run: npm ci
|
|
133
|
+
- run: npm run build
|
|
134
|
+
- run: npm run test
|
|
135
|
+
- run: npm run lint
|
|
136
136
|
`;
|
|
137
137
|
}
|
|
138
138
|
function generateReadme(projectName, techStack) {
|
|
139
|
-
return `# ${projectName}
|
|
140
|
-
|
|
141
|
-
Built with NodeBench MCP methodology for rigorous AI-assisted development.
|
|
142
|
-
|
|
143
|
-
## Quick Start
|
|
144
|
-
|
|
145
|
-
\`\`\`bash
|
|
146
|
-
npm install
|
|
147
|
-
npm run build
|
|
148
|
-
npm run test
|
|
149
|
-
\`\`\`
|
|
150
|
-
|
|
151
|
-
## NodeBench MCP Integration
|
|
152
|
-
|
|
153
|
-
This project is pre-configured for [NodeBench MCP](https://github.com/nodebench/nodebench-ai) — tools that make AI agents catch the bugs they normally ship.
|
|
154
|
-
|
|
155
|
-
### For AI Agents
|
|
156
|
-
See [AGENTS.md](./AGENTS.md) for detailed instructions.
|
|
157
|
-
|
|
158
|
-
### Key Commands
|
|
159
|
-
- \`npm run mcp:start\` — Start NodeBench MCP (full toolset)
|
|
160
|
-
- \`npm run mcp:lite\` — Lightweight mode (34 tools)
|
|
161
|
-
- \`npm run mcp:core\` — Core mode (79 tools)
|
|
162
|
-
|
|
163
|
-
### MCP Configuration
|
|
164
|
-
The \`.mcp.json\` file configures NodeBench MCP for your IDE.
|
|
165
|
-
|
|
166
|
-
## Tech Stack
|
|
167
|
-
${techStack}
|
|
168
|
-
|
|
169
|
-
## Development Workflow
|
|
170
|
-
1. **Research** → \`search_all_knowledge\`, \`run_recon\`
|
|
171
|
-
2. **Implement** → Write code following conventions in AGENTS.md
|
|
172
|
-
3. **Test** → \`run_closed_loop\` (compile→lint→test→debug)
|
|
173
|
-
4. **Verify** → \`run_mandatory_flywheel\` (6-step verification)
|
|
174
|
-
5. **Ship** → \`record_learning\`, \`promote_to_eval\`
|
|
139
|
+
return `# ${projectName}
|
|
140
|
+
|
|
141
|
+
Built with NodeBench MCP methodology for rigorous AI-assisted development.
|
|
142
|
+
|
|
143
|
+
## Quick Start
|
|
144
|
+
|
|
145
|
+
\`\`\`bash
|
|
146
|
+
npm install
|
|
147
|
+
npm run build
|
|
148
|
+
npm run test
|
|
149
|
+
\`\`\`
|
|
150
|
+
|
|
151
|
+
## NodeBench MCP Integration
|
|
152
|
+
|
|
153
|
+
This project is pre-configured for [NodeBench MCP](https://github.com/nodebench/nodebench-ai) — tools that make AI agents catch the bugs they normally ship.
|
|
154
|
+
|
|
155
|
+
### For AI Agents
|
|
156
|
+
See [AGENTS.md](./AGENTS.md) for detailed instructions.
|
|
157
|
+
|
|
158
|
+
### Key Commands
|
|
159
|
+
- \`npm run mcp:start\` — Start NodeBench MCP (full toolset)
|
|
160
|
+
- \`npm run mcp:lite\` — Lightweight mode (34 tools)
|
|
161
|
+
- \`npm run mcp:core\` — Core mode (79 tools)
|
|
162
|
+
|
|
163
|
+
### MCP Configuration
|
|
164
|
+
The \`.mcp.json\` file configures NodeBench MCP for your IDE.
|
|
165
|
+
|
|
166
|
+
## Tech Stack
|
|
167
|
+
${techStack}
|
|
168
|
+
|
|
169
|
+
## Development Workflow
|
|
170
|
+
1. **Research** → \`search_all_knowledge\`, \`run_recon\`
|
|
171
|
+
2. **Implement** → Write code following conventions in AGENTS.md
|
|
172
|
+
3. **Test** → \`run_closed_loop\` (compile→lint→test→debug)
|
|
173
|
+
4. **Verify** → \`run_mandatory_flywheel\` (6-step verification)
|
|
174
|
+
5. **Ship** → \`record_learning\`, \`promote_to_eval\`
|
|
175
175
|
`;
|
|
176
176
|
}
|
|
177
177
|
function generateParallelAgentsReadme() {
|
|
178
|
-
return `# Parallel Agent Coordination
|
|
179
|
-
|
|
180
|
-
This directory supports multi-agent workflows with NodeBench MCP.
|
|
181
|
-
|
|
182
|
-
## Files
|
|
183
|
-
- \`current_tasks/\` — Lock files for claimed tasks
|
|
184
|
-
- \`oracle/\` — Known-good reference outputs for oracle testing
|
|
185
|
-
- \`roles.json\` — Agent role assignments
|
|
186
|
-
- \`progress.md\` — Running status for agent orientation
|
|
187
|
-
|
|
188
|
-
## Usage
|
|
189
|
-
1. \`claim_agent_task({ taskKey: "...", description: "..." })\`
|
|
190
|
-
2. Do the work
|
|
191
|
-
3. \`release_agent_task({ taskKey: "...", status: "completed", progressNote: "..." })\`
|
|
192
|
-
|
|
193
|
-
See AGENTS.md for full protocol.
|
|
178
|
+
return `# Parallel Agent Coordination
|
|
179
|
+
|
|
180
|
+
This directory supports multi-agent workflows with NodeBench MCP.
|
|
181
|
+
|
|
182
|
+
## Files
|
|
183
|
+
- \`current_tasks/\` — Lock files for claimed tasks
|
|
184
|
+
- \`oracle/\` — Known-good reference outputs for oracle testing
|
|
185
|
+
- \`roles.json\` — Agent role assignments
|
|
186
|
+
- \`progress.md\` — Running status for agent orientation
|
|
187
|
+
|
|
188
|
+
## Usage
|
|
189
|
+
1. \`claim_agent_task({ taskKey: "...", description: "..." })\`
|
|
190
|
+
2. Do the work
|
|
191
|
+
3. \`release_agent_task({ taskKey: "...", status: "completed", progressNote: "..." })\`
|
|
192
|
+
|
|
193
|
+
See AGENTS.md for full protocol.
|
|
194
194
|
`;
|
|
195
195
|
}
|
|
196
196
|
function generateProgressMd(projectName) {
|
|
197
|
-
return `# ${projectName} — Progress Tracker
|
|
198
|
-
|
|
199
|
-
## Current Status
|
|
200
|
-
- **Phase**: Setup
|
|
201
|
-
- **Last Updated**: ${new Date().toISOString()}
|
|
202
|
-
- **Active Agents**: 0
|
|
203
|
-
|
|
204
|
-
## Completed Tasks
|
|
205
|
-
<!-- Tasks will be logged here as agents complete them -->
|
|
206
|
-
|
|
207
|
-
## Blocked Tasks
|
|
208
|
-
<!-- Tasks that need fresh eyes or external input -->
|
|
209
|
-
|
|
210
|
-
## Architecture Decisions
|
|
211
|
-
<!-- Key decisions and their rationale -->
|
|
212
|
-
|
|
213
|
-
## Known Issues
|
|
214
|
-
<!-- Active issues to be aware of -->
|
|
197
|
+
return `# ${projectName} — Progress Tracker
|
|
198
|
+
|
|
199
|
+
## Current Status
|
|
200
|
+
- **Phase**: Setup
|
|
201
|
+
- **Last Updated**: ${new Date().toISOString()}
|
|
202
|
+
- **Active Agents**: 0
|
|
203
|
+
|
|
204
|
+
## Completed Tasks
|
|
205
|
+
<!-- Tasks will be logged here as agents complete them -->
|
|
206
|
+
|
|
207
|
+
## Blocked Tasks
|
|
208
|
+
<!-- Tasks that need fresh eyes or external input -->
|
|
209
|
+
|
|
210
|
+
## Architecture Decisions
|
|
211
|
+
<!-- Key decisions and their rationale -->
|
|
212
|
+
|
|
213
|
+
## Known Issues
|
|
214
|
+
<!-- Active issues to be aware of -->
|
|
215
215
|
`;
|
|
216
216
|
}
|
|
217
217
|
function generateGitignore() {
|
|
218
|
-
return `node_modules/
|
|
219
|
-
dist/
|
|
220
|
-
.env
|
|
221
|
-
.env.local
|
|
222
|
-
*.log
|
|
223
|
-
.nodebench/
|
|
224
|
-
.parallel-agents/current_tasks/*.lock
|
|
225
|
-
.tmp/
|
|
226
|
-
coverage/
|
|
218
|
+
return `node_modules/
|
|
219
|
+
dist/
|
|
220
|
+
.env
|
|
221
|
+
.env.local
|
|
222
|
+
*.log
|
|
223
|
+
.nodebench/
|
|
224
|
+
.parallel-agents/current_tasks/*.lock
|
|
225
|
+
.tmp/
|
|
226
|
+
coverage/
|
|
227
227
|
`;
|
|
228
228
|
}
|
|
229
229
|
function generateTsConfig() {
|
|
@@ -109,39 +109,39 @@ const CHALLENGES = {
|
|
|
109
109
|
// ── DB schema extension ──────────────────────────────────────────────────
|
|
110
110
|
function ensureBenchmarkTables() {
|
|
111
111
|
const db = getDb();
|
|
112
|
-
db.exec(`
|
|
113
|
-
CREATE TABLE IF NOT EXISTS autonomy_benchmarks (
|
|
114
|
-
id TEXT PRIMARY KEY,
|
|
115
|
-
challenge_key TEXT NOT NULL,
|
|
116
|
-
challenge_name TEXT NOT NULL,
|
|
117
|
-
status TEXT NOT NULL DEFAULT 'active',
|
|
118
|
-
started_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
119
|
-
completed_at TEXT,
|
|
120
|
-
total_points INTEGER NOT NULL DEFAULT 0,
|
|
121
|
-
earned_points INTEGER NOT NULL DEFAULT 0,
|
|
122
|
-
milestones_completed INTEGER NOT NULL DEFAULT 0,
|
|
123
|
-
milestones_total INTEGER NOT NULL DEFAULT 0,
|
|
124
|
-
duration_minutes REAL,
|
|
125
|
-
tools_used TEXT,
|
|
126
|
-
context_tokens_estimate INTEGER,
|
|
127
|
-
notes TEXT,
|
|
128
|
-
score_pct REAL
|
|
129
|
-
);
|
|
130
|
-
CREATE TABLE IF NOT EXISTS benchmark_milestones (
|
|
131
|
-
id TEXT PRIMARY KEY,
|
|
132
|
-
benchmark_id TEXT NOT NULL,
|
|
133
|
-
milestone_id TEXT NOT NULL,
|
|
134
|
-
milestone_name TEXT NOT NULL,
|
|
135
|
-
status TEXT NOT NULL DEFAULT 'pending',
|
|
136
|
-
points INTEGER NOT NULL DEFAULT 0,
|
|
137
|
-
started_at TEXT,
|
|
138
|
-
completed_at TEXT,
|
|
139
|
-
duration_minutes REAL,
|
|
140
|
-
tools_used TEXT,
|
|
141
|
-
verification_passed INTEGER,
|
|
142
|
-
notes TEXT,
|
|
143
|
-
FOREIGN KEY (benchmark_id) REFERENCES autonomy_benchmarks(id)
|
|
144
|
-
);
|
|
112
|
+
db.exec(`
|
|
113
|
+
CREATE TABLE IF NOT EXISTS autonomy_benchmarks (
|
|
114
|
+
id TEXT PRIMARY KEY,
|
|
115
|
+
challenge_key TEXT NOT NULL,
|
|
116
|
+
challenge_name TEXT NOT NULL,
|
|
117
|
+
status TEXT NOT NULL DEFAULT 'active',
|
|
118
|
+
started_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
119
|
+
completed_at TEXT,
|
|
120
|
+
total_points INTEGER NOT NULL DEFAULT 0,
|
|
121
|
+
earned_points INTEGER NOT NULL DEFAULT 0,
|
|
122
|
+
milestones_completed INTEGER NOT NULL DEFAULT 0,
|
|
123
|
+
milestones_total INTEGER NOT NULL DEFAULT 0,
|
|
124
|
+
duration_minutes REAL,
|
|
125
|
+
tools_used TEXT,
|
|
126
|
+
context_tokens_estimate INTEGER,
|
|
127
|
+
notes TEXT,
|
|
128
|
+
score_pct REAL
|
|
129
|
+
);
|
|
130
|
+
CREATE TABLE IF NOT EXISTS benchmark_milestones (
|
|
131
|
+
id TEXT PRIMARY KEY,
|
|
132
|
+
benchmark_id TEXT NOT NULL,
|
|
133
|
+
milestone_id TEXT NOT NULL,
|
|
134
|
+
milestone_name TEXT NOT NULL,
|
|
135
|
+
status TEXT NOT NULL DEFAULT 'pending',
|
|
136
|
+
points INTEGER NOT NULL DEFAULT 0,
|
|
137
|
+
started_at TEXT,
|
|
138
|
+
completed_at TEXT,
|
|
139
|
+
duration_minutes REAL,
|
|
140
|
+
tools_used TEXT,
|
|
141
|
+
verification_passed INTEGER,
|
|
142
|
+
notes TEXT,
|
|
143
|
+
FOREIGN KEY (benchmark_id) REFERENCES autonomy_benchmarks(id)
|
|
144
|
+
);
|
|
145
145
|
`);
|
|
146
146
|
}
|
|
147
147
|
// ── Tools ────────────────────────────────────────────────────────────────
|
|
@@ -24,65 +24,65 @@ async function canImport(pkg) {
|
|
|
24
24
|
// ─── AGENTS.md template ───────────────────────────────────────────────────────
|
|
25
25
|
const QUICK_REFS_PATTERN = "**→ Quick Refs:**";
|
|
26
26
|
const QUICK_REFS_REGEX = /\*\*→ Quick Refs:\*\*/;
|
|
27
|
-
const AGENTS_MD_TEMPLATE = `# AGENTS.md
|
|
28
|
-
|
|
29
|
-
This file provides instructions for AI agents working on this project.
|
|
30
|
-
|
|
31
|
-
**→ Quick Refs:** Start with [Project Overview](#project-overview) | Set up: [Development Setup](#development-setup) | Run: [Key Commands](#key-commands)
|
|
32
|
-
|
|
33
|
-
---
|
|
34
|
-
|
|
35
|
-
## Project Overview
|
|
36
|
-
|
|
37
|
-
<!-- Describe your project's purpose, architecture, and key components -->
|
|
38
|
-
|
|
39
|
-
**→ Quick Refs:** Tech details: [Tech Stack](#tech-stack) | Code style: [Coding Conventions](#coding-conventions)
|
|
40
|
-
|
|
41
|
-
## Development Setup
|
|
42
|
-
|
|
43
|
-
<!-- Document setup steps: dependencies, environment variables, build commands -->
|
|
44
|
-
|
|
45
|
-
**→ Quick Refs:** Key commands: [Key Commands](#key-commands) | Testing: [Testing](#testing)
|
|
46
|
-
|
|
47
|
-
## Tech Stack
|
|
48
|
-
|
|
49
|
-
<!-- List frameworks, libraries, and tools used -->
|
|
50
|
-
|
|
51
|
-
**→ Quick Refs:** Conventions: [Coding Conventions](#coding-conventions) | Setup: [Development Setup](#development-setup)
|
|
52
|
-
|
|
53
|
-
## Coding Conventions
|
|
54
|
-
|
|
55
|
-
<!-- Document code style, naming conventions, patterns to follow -->
|
|
56
|
-
|
|
57
|
-
**→ Quick Refs:** Testing: [Testing](#testing) | Edge cases: [Edge Cases & Learnings](#edge-cases--learnings)
|
|
58
|
-
|
|
59
|
-
## Testing
|
|
60
|
-
|
|
61
|
-
<!-- Document how to run tests, what testing frameworks are used -->
|
|
62
|
-
|
|
63
|
-
**→ Quick Refs:** Commands: [Key Commands](#key-commands) | Edge cases: [Edge Cases & Learnings](#edge-cases--learnings)
|
|
64
|
-
|
|
65
|
-
## Key Commands
|
|
66
|
-
|
|
67
|
-
\`\`\`bash
|
|
68
|
-
# Add your common commands here
|
|
69
|
-
npm install
|
|
70
|
-
npm run build
|
|
71
|
-
npm run test
|
|
72
|
-
\`\`\`
|
|
73
|
-
|
|
74
|
-
**→ Quick Refs:** Setup: [Development Setup](#development-setup) | Testing: [Testing](#testing)
|
|
75
|
-
|
|
76
|
-
## Edge Cases & Learnings
|
|
77
|
-
|
|
78
|
-
<!-- Document gotchas, edge cases, and lessons learned -->
|
|
79
|
-
|
|
80
|
-
**→ Quick Refs:** Conventions: [Coding Conventions](#coding-conventions) | Testing: [Testing](#testing)
|
|
81
|
-
|
|
82
|
-
---
|
|
83
|
-
|
|
84
|
-
*This file is maintained by NodeBench MCP. Use \`update_agents_md\` to add learnings automatically.*
|
|
85
|
-
*Every section MUST include \`**→ Quick Refs:**\` for agent chunking context.*
|
|
27
|
+
const AGENTS_MD_TEMPLATE = `# AGENTS.md
|
|
28
|
+
|
|
29
|
+
This file provides instructions for AI agents working on this project.
|
|
30
|
+
|
|
31
|
+
**→ Quick Refs:** Start with [Project Overview](#project-overview) | Set up: [Development Setup](#development-setup) | Run: [Key Commands](#key-commands)
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## Project Overview
|
|
36
|
+
|
|
37
|
+
<!-- Describe your project's purpose, architecture, and key components -->
|
|
38
|
+
|
|
39
|
+
**→ Quick Refs:** Tech details: [Tech Stack](#tech-stack) | Code style: [Coding Conventions](#coding-conventions)
|
|
40
|
+
|
|
41
|
+
## Development Setup
|
|
42
|
+
|
|
43
|
+
<!-- Document setup steps: dependencies, environment variables, build commands -->
|
|
44
|
+
|
|
45
|
+
**→ Quick Refs:** Key commands: [Key Commands](#key-commands) | Testing: [Testing](#testing)
|
|
46
|
+
|
|
47
|
+
## Tech Stack
|
|
48
|
+
|
|
49
|
+
<!-- List frameworks, libraries, and tools used -->
|
|
50
|
+
|
|
51
|
+
**→ Quick Refs:** Conventions: [Coding Conventions](#coding-conventions) | Setup: [Development Setup](#development-setup)
|
|
52
|
+
|
|
53
|
+
## Coding Conventions
|
|
54
|
+
|
|
55
|
+
<!-- Document code style, naming conventions, patterns to follow -->
|
|
56
|
+
|
|
57
|
+
**→ Quick Refs:** Testing: [Testing](#testing) | Edge cases: [Edge Cases & Learnings](#edge-cases--learnings)
|
|
58
|
+
|
|
59
|
+
## Testing
|
|
60
|
+
|
|
61
|
+
<!-- Document how to run tests, what testing frameworks are used -->
|
|
62
|
+
|
|
63
|
+
**→ Quick Refs:** Commands: [Key Commands](#key-commands) | Edge cases: [Edge Cases & Learnings](#edge-cases--learnings)
|
|
64
|
+
|
|
65
|
+
## Key Commands
|
|
66
|
+
|
|
67
|
+
\`\`\`bash
|
|
68
|
+
# Add your common commands here
|
|
69
|
+
npm install
|
|
70
|
+
npm run build
|
|
71
|
+
npm run test
|
|
72
|
+
\`\`\`
|
|
73
|
+
|
|
74
|
+
**→ Quick Refs:** Setup: [Development Setup](#development-setup) | Testing: [Testing](#testing)
|
|
75
|
+
|
|
76
|
+
## Edge Cases & Learnings
|
|
77
|
+
|
|
78
|
+
<!-- Document gotchas, edge cases, and lessons learned -->
|
|
79
|
+
|
|
80
|
+
**→ Quick Refs:** Conventions: [Coding Conventions](#coding-conventions) | Testing: [Testing](#testing)
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
*This file is maintained by NodeBench MCP. Use \`update_agents_md\` to add learnings automatically.*
|
|
85
|
+
*Every section MUST include \`**→ Quick Refs:**\` for agent chunking context.*
|
|
86
86
|
`;
|
|
87
87
|
function parseSections(content) {
|
|
88
88
|
const lines = content.split("\n");
|
|
@@ -208,12 +208,12 @@ export const flywheelTools = [
|
|
|
208
208
|
cycleId,
|
|
209
209
|
title,
|
|
210
210
|
linkedEvalRun: evalRunId,
|
|
211
|
-
phase1Instructions: `Phase 1: Context Gathering — Investigate the regression.
|
|
212
|
-
The eval run "${evalRun.name}" showed regression. Research:
|
|
213
|
-
- What changed since the baseline eval?
|
|
214
|
-
- Which test cases failed that previously passed?
|
|
215
|
-
- Is this a code change, upstream API change, or data drift?
|
|
216
|
-
|
|
211
|
+
phase1Instructions: `Phase 1: Context Gathering — Investigate the regression.
|
|
212
|
+
The eval run "${evalRun.name}" showed regression. Research:
|
|
213
|
+
- What changed since the baseline eval?
|
|
214
|
+
- Which test cases failed that previously passed?
|
|
215
|
+
- Is this a code change, upstream API change, or data drift?
|
|
216
|
+
|
|
217
217
|
Start by calling search_learnings to check for known related issues.`,
|
|
218
218
|
};
|
|
219
219
|
},
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Git Workflow tools — branch compliance, PR review checklists, and merge gates.
|
|
3
|
+
*
|
|
4
|
+
* - check_git_compliance: Validate branch state, uncommitted changes, conventional commits
|
|
5
|
+
* - review_pr_checklist: Structured PR review with verification/eval cross-reference
|
|
6
|
+
* - enforce_merge_gate: Pre-merge validation combining quality gates + verification + eval
|
|
7
|
+
*
|
|
8
|
+
* All git commands are wrapped in try/catch for environments where git is unavailable.
|
|
9
|
+
*/
|
|
10
|
+
import type { McpTool } from "../types.js";
|
|
11
|
+
export declare const gitWorkflowTools: McpTool[];
|