nodebench-mcp 2.11.0 → 2.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/NODEBENCH_AGENTS.md +809 -809
- package/README.md +443 -431
- package/STYLE_GUIDE.md +477 -477
- package/dist/__tests__/evalHarness.test.js +1 -1
- package/dist/__tests__/gaiaCapabilityAudioEval.test.js +9 -14
- package/dist/__tests__/gaiaCapabilityAudioEval.test.js.map +1 -1
- package/dist/__tests__/gaiaCapabilityEval.test.js +88 -14
- package/dist/__tests__/gaiaCapabilityEval.test.js.map +1 -1
- package/dist/__tests__/gaiaCapabilityFilesEval.test.js +9 -5
- package/dist/__tests__/gaiaCapabilityFilesEval.test.js.map +1 -1
- package/dist/__tests__/gaiaCapabilityMediaEval.test.js +165 -17
- package/dist/__tests__/gaiaCapabilityMediaEval.test.js.map +1 -1
- package/dist/__tests__/helpers/answerMatch.d.ts +36 -7
- package/dist/__tests__/helpers/answerMatch.js +224 -35
- package/dist/__tests__/helpers/answerMatch.js.map +1 -1
- package/dist/__tests__/helpers/textLlm.d.ts +1 -1
- package/dist/__tests__/presetRealWorldBench.test.d.ts +1 -0
- package/dist/__tests__/presetRealWorldBench.test.js +850 -0
- package/dist/__tests__/presetRealWorldBench.test.js.map +1 -0
- package/dist/__tests__/tools.test.js +20 -7
- package/dist/__tests__/tools.test.js.map +1 -1
- package/dist/__tests__/toolsetGatingEval.test.js +21 -11
- package/dist/__tests__/toolsetGatingEval.test.js.map +1 -1
- package/dist/db.js +21 -0
- package/dist/db.js.map +1 -1
- package/dist/index.js +424 -327
- package/dist/index.js.map +1 -1
- package/dist/tools/agentBootstrapTools.js +258 -258
- package/dist/tools/boilerplateTools.js +144 -144
- package/dist/tools/cCompilerBenchmarkTools.js +33 -33
- package/dist/tools/documentationTools.js +59 -59
- package/dist/tools/flywheelTools.js +6 -6
- package/dist/tools/gitWorkflowTools.d.ts +11 -0
- package/dist/tools/gitWorkflowTools.js +580 -0
- package/dist/tools/gitWorkflowTools.js.map +1 -0
- package/dist/tools/learningTools.js +26 -26
- package/dist/tools/localFileTools.d.ts +3 -0
- package/dist/tools/localFileTools.js +3164 -125
- package/dist/tools/localFileTools.js.map +1 -1
- package/dist/tools/metaTools.js +82 -0
- package/dist/tools/metaTools.js.map +1 -1
- package/dist/tools/parallelAgentTools.js +228 -0
- package/dist/tools/parallelAgentTools.js.map +1 -1
- package/dist/tools/patternTools.d.ts +13 -0
- package/dist/tools/patternTools.js +456 -0
- package/dist/tools/patternTools.js.map +1 -0
- package/dist/tools/reconTools.js +31 -31
- package/dist/tools/selfEvalTools.js +44 -44
- package/dist/tools/seoTools.d.ts +16 -0
- package/dist/tools/seoTools.js +866 -0
- package/dist/tools/seoTools.js.map +1 -0
- package/dist/tools/sessionMemoryTools.d.ts +15 -0
- package/dist/tools/sessionMemoryTools.js +348 -0
- package/dist/tools/sessionMemoryTools.js.map +1 -0
- package/dist/tools/toolRegistry.d.ts +4 -0
- package/dist/tools/toolRegistry.js +489 -0
- package/dist/tools/toolRegistry.js.map +1 -1
- package/dist/tools/toonTools.d.ts +15 -0
- package/dist/tools/toonTools.js +94 -0
- package/dist/tools/toonTools.js.map +1 -0
- package/dist/tools/verificationTools.js +41 -41
- package/dist/tools/visionTools.js +17 -17
- package/dist/tools/voiceBridgeTools.d.ts +15 -0
- package/dist/tools/voiceBridgeTools.js +1427 -0
- package/dist/tools/voiceBridgeTools.js.map +1 -0
- package/dist/tools/webTools.js +18 -18
- package/package.json +102 -101
package/STYLE_GUIDE.md
CHANGED
|
@@ -1,477 +1,477 @@
|
|
|
1
|
-
# NodeBench MCP Style Guide
|
|
2
|
-
|
|
3
|
-
Based on OpenClaw patterns and industry best practices for autonomous agent systems.
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## Directory Organization
|
|
8
|
-
|
|
9
|
-
### Root Structure
|
|
10
|
-
|
|
11
|
-
```
|
|
12
|
-
packages/mcp-local/
|
|
13
|
-
├── src/
|
|
14
|
-
│ ├── tools/ # MCP tool implementations (grouped by domain)
|
|
15
|
-
│ ├── __tests__/ # Test files mirror src/ structure
|
|
16
|
-
│ ├── db.ts # Database initialization
|
|
17
|
-
│ ├── types.ts # Shared TypeScript types
|
|
18
|
-
│ └── index.ts # Entry point, tool registration
|
|
19
|
-
├── skills/ # Portable agent skills (SKILL.md format)
|
|
20
|
-
├── templates/ # Infrastructure scaffolding templates
|
|
21
|
-
├── docs/
|
|
22
|
-
│ ├── AGENTS.md # Agent operating instructions
|
|
23
|
-
│ ├── TOOLS.md # Tool catalog with examples
|
|
24
|
-
│ └── SOUL.md # Agent personality/values
|
|
25
|
-
├── STYLE_GUIDE.md # This file
|
|
26
|
-
├── NODEBENCH_AGENTS.md # Public-ready agent protocol
|
|
27
|
-
├── package.json
|
|
28
|
-
├── tsconfig.json
|
|
29
|
-
└── README.md
|
|
30
|
-
```
|
|
31
|
-
|
|
32
|
-
### Tool File Organization
|
|
33
|
-
|
|
34
|
-
Each domain gets its own file in `src/tools/`:
|
|
35
|
-
|
|
36
|
-
```
|
|
37
|
-
src/tools/
|
|
38
|
-
├── verificationTools.ts # 6-phase verification cycle
|
|
39
|
-
├── evalTools.ts # Eval-driven development
|
|
40
|
-
├── qualityGateTools.ts # Boolean check gates
|
|
41
|
-
├── learningTools.ts # Persistent knowledge base
|
|
42
|
-
├── flywheelTools.ts # AI Flywheel orchestration
|
|
43
|
-
├── reconTools.ts # Research & discovery
|
|
44
|
-
├── uiCaptureTools.ts # Screenshot capture
|
|
45
|
-
├── visionTools.ts # AI vision analysis
|
|
46
|
-
├── webTools.ts # Web search & fetch
|
|
47
|
-
├── githubTools.ts # GitHub API tools
|
|
48
|
-
├── documentationTools.ts # AGENTS.md maintenance
|
|
49
|
-
├── agentBootstrapTools.ts # Self-discovery & implementation
|
|
50
|
-
└── metaTools.ts # Tool discovery & methodology
|
|
51
|
-
```
|
|
52
|
-
|
|
53
|
-
---
|
|
54
|
-
|
|
55
|
-
## Naming Conventions
|
|
56
|
-
|
|
57
|
-
### Files
|
|
58
|
-
|
|
59
|
-
| Type | Convention | Example |
|
|
60
|
-
|------|------------|---------|
|
|
61
|
-
| Tool modules | `{domain}Tools.ts` | `verificationTools.ts` |
|
|
62
|
-
| Query files | `{domain}Queries.ts` | `agentLoopQueries.ts` |
|
|
63
|
-
| Action files | `{domain}Actions.ts` | `postingActions.ts` |
|
|
64
|
-
| Schema files | `schema.ts` | `schema.ts` |
|
|
65
|
-
| Test files | `{module}.test.ts` | `tools.test.ts` |
|
|
66
|
-
|
|
67
|
-
### Functions
|
|
68
|
-
|
|
69
|
-
| Type | Convention | Example |
|
|
70
|
-
|------|------------|---------|
|
|
71
|
-
| MCP tool handlers | `camelCase` verb | `discoverInfrastructure` |
|
|
72
|
-
| Internal helpers | `_camelCase` | `_buildSourceCitation` |
|
|
73
|
-
| Type guards | `is{Type}` | `isVerificationResult` |
|
|
74
|
-
| Factory functions | `create{Thing}` | `createMetaTools` |
|
|
75
|
-
|
|
76
|
-
### Types
|
|
77
|
-
|
|
78
|
-
| Type | Convention | Example |
|
|
79
|
-
|------|------------|---------|
|
|
80
|
-
| Interfaces | `PascalCase` | `VerificationStep` |
|
|
81
|
-
| Type aliases | `PascalCase` | `SourceCitation` |
|
|
82
|
-
| Enums | `PascalCase` | `RiskTier` |
|
|
83
|
-
| Constants | `SCREAMING_SNAKE` | `AUTHORITATIVE_SOURCES` |
|
|
84
|
-
|
|
85
|
-
---
|
|
86
|
-
|
|
87
|
-
## SKILL.md Format (OpenClaw Standard)
|
|
88
|
-
|
|
89
|
-
```yaml
|
|
90
|
-
---
|
|
91
|
-
name: skill-name
|
|
92
|
-
version: 1.0.0
|
|
93
|
-
author: username
|
|
94
|
-
triggers:
|
|
95
|
-
- keyword1
|
|
96
|
-
- keyword2
|
|
97
|
-
requires:
|
|
98
|
-
- tool1
|
|
99
|
-
- tool2
|
|
100
|
-
---
|
|
101
|
-
|
|
102
|
-
# Skill Name
|
|
103
|
-
|
|
104
|
-
## Purpose
|
|
105
|
-
One-sentence description.
|
|
106
|
-
|
|
107
|
-
## When to Use
|
|
108
|
-
Bullet points.
|
|
109
|
-
|
|
110
|
-
## Steps
|
|
111
|
-
1. First step
|
|
112
|
-
2. Second step
|
|
113
|
-
|
|
114
|
-
## Example
|
|
115
|
-
```typescript
|
|
116
|
-
// Usage example
|
|
117
|
-
```
|
|
118
|
-
|
|
119
|
-
## References
|
|
120
|
-
- [Source](url)
|
|
121
|
-
```
|
|
122
|
-
|
|
123
|
-
---
|
|
124
|
-
|
|
125
|
-
## Agent Instruction Files
|
|
126
|
-
|
|
127
|
-
### AGENTS.md (Required)
|
|
128
|
-
|
|
129
|
-
The primary instruction file. Contains:
|
|
130
|
-
|
|
131
|
-
1. **Quick Setup** — Copy-paste installation
|
|
132
|
-
2. **AI Flywheel** — Mandatory verification steps
|
|
133
|
-
3. **Tool Categories** — Quick reference table
|
|
134
|
-
4. **Workflow Guides** — Common patterns
|
|
135
|
-
5. **Environment Setup** — API keys, dependencies
|
|
136
|
-
|
|
137
|
-
### SOUL.md (Optional)
|
|
138
|
-
|
|
139
|
-
Defines agent personality and values:
|
|
140
|
-
|
|
141
|
-
```markdown
|
|
142
|
-
# Agent Soul
|
|
143
|
-
|
|
144
|
-
## Identity
|
|
145
|
-
Who the agent is and what it represents.
|
|
146
|
-
|
|
147
|
-
## Values
|
|
148
|
-
- Value 1: Description
|
|
149
|
-
- Value 2: Description
|
|
150
|
-
|
|
151
|
-
## Boundaries
|
|
152
|
-
- Will not: X
|
|
153
|
-
- Will not: Y
|
|
154
|
-
|
|
155
|
-
## Voice
|
|
156
|
-
How the agent communicates.
|
|
157
|
-
```
|
|
158
|
-
|
|
159
|
-
### TOOLS.md (Optional)
|
|
160
|
-
|
|
161
|
-
Detailed tool documentation with examples:
|
|
162
|
-
|
|
163
|
-
```markdown
|
|
164
|
-
# Tool Catalog
|
|
165
|
-
|
|
166
|
-
## Category: Verification
|
|
167
|
-
|
|
168
|
-
### start_verification_cycle
|
|
169
|
-
**Purpose:** Begin 6-phase verification
|
|
170
|
-
**Input:** `{ goal: string, context?: string }`
|
|
171
|
-
**Output:** `{ cycleId: string, status: "active" }`
|
|
172
|
-
|
|
173
|
-
**Example:**
|
|
174
|
-
```json
|
|
175
|
-
{
|
|
176
|
-
"goal": "Implement OAuth flow",
|
|
177
|
-
"context": "Using Convex + Auth0"
|
|
178
|
-
}
|
|
179
|
-
```
|
|
180
|
-
```
|
|
181
|
-
|
|
182
|
-
---
|
|
183
|
-
|
|
184
|
-
## Code Patterns
|
|
185
|
-
|
|
186
|
-
### Tool Implementation
|
|
187
|
-
|
|
188
|
-
```typescript
|
|
189
|
-
// 1. Types at top
|
|
190
|
-
interface ToolInput {
|
|
191
|
-
param: string;
|
|
192
|
-
optional?: number;
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
interface ToolOutput {
|
|
196
|
-
result: string;
|
|
197
|
-
metadata: object;
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
// 2. Constants after types
|
|
201
|
-
const DEFAULTS = {
|
|
202
|
-
timeout: 30000,
|
|
203
|
-
retries: 3,
|
|
204
|
-
};
|
|
205
|
-
|
|
206
|
-
// 3. Handler function
|
|
207
|
-
async function handleTool(args: ToolInput): Promise<ToolOutput> {
|
|
208
|
-
// Implementation
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
// 4. Export tool definition
|
|
212
|
-
export const myTools: McpTool[] = [
|
|
213
|
-
{
|
|
214
|
-
name: "tool_name",
|
|
215
|
-
description: "Concise description for agent discovery",
|
|
216
|
-
inputSchema: { /* JSON Schema */ },
|
|
217
|
-
handler: handleTool,
|
|
218
|
-
},
|
|
219
|
-
];
|
|
220
|
-
```
|
|
221
|
-
|
|
222
|
-
### Error Handling
|
|
223
|
-
|
|
224
|
-
```typescript
|
|
225
|
-
// Prefer specific error types
|
|
226
|
-
class VerificationError extends Error {
|
|
227
|
-
constructor(
|
|
228
|
-
message: string,
|
|
229
|
-
public readonly phase: number,
|
|
230
|
-
public readonly recoverable: boolean
|
|
231
|
-
) {
|
|
232
|
-
super(message);
|
|
233
|
-
this.name = "VerificationError";
|
|
234
|
-
}
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
// Throw with context
|
|
238
|
-
throw new VerificationError(
|
|
239
|
-
"Gap analysis found dead code",
|
|
240
|
-
4,
|
|
241
|
-
true
|
|
242
|
-
);
|
|
243
|
-
```
|
|
244
|
-
|
|
245
|
-
### Async Patterns
|
|
246
|
-
|
|
247
|
-
```typescript
|
|
248
|
-
// Parallel when independent
|
|
249
|
-
const [result1, result2] = await Promise.all([
|
|
250
|
-
fetchFirst(),
|
|
251
|
-
fetchSecond(),
|
|
252
|
-
]);
|
|
253
|
-
|
|
254
|
-
// Sequential when dependent
|
|
255
|
-
const first = await fetchFirst();
|
|
256
|
-
const second = await fetchWithFirst(first.id);
|
|
257
|
-
```
|
|
258
|
-
|
|
259
|
-
---
|
|
260
|
-
|
|
261
|
-
## Risk-Tiered Execution
|
|
262
|
-
|
|
263
|
-
Actions are classified by reversibility and blast radius:
|
|
264
|
-
|
|
265
|
-
### Tier 1: Low Risk (Auto-Approved)
|
|
266
|
-
- Reading files, searching, analyzing
|
|
267
|
-
- Creating local temp files
|
|
268
|
-
- Running static analysis
|
|
269
|
-
|
|
270
|
-
### Tier 2: Medium Risk (Log + Proceed)
|
|
271
|
-
- Writing to local files
|
|
272
|
-
- Running tests
|
|
273
|
-
- Creating branches
|
|
274
|
-
|
|
275
|
-
### Tier 3: High Risk (Require Confirmation)
|
|
276
|
-
- Pushing to remote
|
|
277
|
-
- Posting to external services
|
|
278
|
-
- Modifying production config
|
|
279
|
-
- Deleting files/branches
|
|
280
|
-
|
|
281
|
-
```typescript
|
|
282
|
-
const RISK_TIERS = {
|
|
283
|
-
low: { autoApprove: true, log: false },
|
|
284
|
-
medium: { autoApprove: true, log: true },
|
|
285
|
-
high: { autoApprove: false, requireConfirmation: true },
|
|
286
|
-
};
|
|
287
|
-
```
|
|
288
|
-
|
|
289
|
-
---
|
|
290
|
-
|
|
291
|
-
## Re-Update Before Create Pattern
|
|
292
|
-
|
|
293
|
-
**CRITICAL:** Before creating any new file, always check if updating existing instructions would be better.
|
|
294
|
-
|
|
295
|
-
```typescript
|
|
296
|
-
async function beforeCreate(target: string): Promise<"update" | "create"> {
|
|
297
|
-
// 1. Check if similar file exists
|
|
298
|
-
const existing = await findSimilar(target);
|
|
299
|
-
if (existing) {
|
|
300
|
-
// 2. Check if updating is better
|
|
301
|
-
const updateBenefit = await evaluateUpdate(existing, target);
|
|
302
|
-
if (updateBenefit > 0.7) {
|
|
303
|
-
return "update";
|
|
304
|
-
}
|
|
305
|
-
}
|
|
306
|
-
return "create";
|
|
307
|
-
}
|
|
308
|
-
```
|
|
309
|
-
|
|
310
|
-
### Checklist Before Creating Files
|
|
311
|
-
|
|
312
|
-
1. Does a similar file already exist?
|
|
313
|
-
2. Would adding to AGENTS.md be clearer?
|
|
314
|
-
3. Is this a one-time need or reusable pattern?
|
|
315
|
-
4. Does the team have a convention for this type of content?
|
|
316
|
-
|
|
317
|
-
---
|
|
318
|
-
|
|
319
|
-
## Autonomous Loop Pattern (Ralph Wiggum)
|
|
320
|
-
|
|
321
|
-
For long-running autonomous agents, implement stop-hooks:
|
|
322
|
-
|
|
323
|
-
```typescript
|
|
324
|
-
interface AutonomousLoopConfig {
|
|
325
|
-
maxIterations: number;
|
|
326
|
-
maxDurationMs: number;
|
|
327
|
-
checkpointInterval: number;
|
|
328
|
-
stopConditions: StopCondition[];
|
|
329
|
-
}
|
|
330
|
-
|
|
331
|
-
async function autonomousLoop(config: AutonomousLoopConfig) {
|
|
332
|
-
let iteration = 0;
|
|
333
|
-
const startTime = Date.now();
|
|
334
|
-
|
|
335
|
-
while (true) {
|
|
336
|
-
// 1. Check stop conditions
|
|
337
|
-
for (const condition of config.stopConditions) {
|
|
338
|
-
if (await condition.check()) {
|
|
339
|
-
return { stopped: true, reason: condition.name };
|
|
340
|
-
}
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
// 2. Check limits
|
|
344
|
-
if (iteration >= config.maxIterations) {
|
|
345
|
-
return { stopped: true, reason: "max_iterations" };
|
|
346
|
-
}
|
|
347
|
-
if (Date.now() - startTime > config.maxDurationMs) {
|
|
348
|
-
return { stopped: true, reason: "timeout" };
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
// 3. Checkpoint
|
|
352
|
-
if (iteration % config.checkpointInterval === 0) {
|
|
353
|
-
await saveCheckpoint(iteration);
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
// 4. Execute work
|
|
357
|
-
await executeWorkUnit();
|
|
358
|
-
iteration++;
|
|
359
|
-
}
|
|
360
|
-
}
|
|
361
|
-
```
|
|
362
|
-
|
|
363
|
-
---
|
|
364
|
-
|
|
365
|
-
## Directory Scaffolding
|
|
366
|
-
|
|
367
|
-
When self-implementing infrastructure, use this structure:
|
|
368
|
-
|
|
369
|
-
```typescript
|
|
370
|
-
const SCAFFOLD_STRUCTURE = {
|
|
371
|
-
agent_loop: {
|
|
372
|
-
files: [
|
|
373
|
-
"convex/domains/agents/agentLoop.ts",
|
|
374
|
-
"convex/domains/agents/agentLoopQueries.ts",
|
|
375
|
-
"convex/domains/agents/schema.ts",
|
|
376
|
-
],
|
|
377
|
-
testFiles: [
|
|
378
|
-
"convex/domains/agents/__tests__/agentLoop.test.ts",
|
|
379
|
-
],
|
|
380
|
-
},
|
|
381
|
-
telemetry: {
|
|
382
|
-
files: [
|
|
383
|
-
"convex/domains/observability/telemetry.ts",
|
|
384
|
-
"convex/domains/observability/spans.ts",
|
|
385
|
-
"convex/domains/observability/schema.ts",
|
|
386
|
-
],
|
|
387
|
-
testFiles: [
|
|
388
|
-
"convex/domains/observability/__tests__/telemetry.test.ts",
|
|
389
|
-
],
|
|
390
|
-
},
|
|
391
|
-
// ... etc
|
|
392
|
-
};
|
|
393
|
-
```
|
|
394
|
-
|
|
395
|
-
---
|
|
396
|
-
|
|
397
|
-
## Source Citation Format
|
|
398
|
-
|
|
399
|
-
Always cite authoritative sources:
|
|
400
|
-
|
|
401
|
-
```typescript
|
|
402
|
-
interface SourceCitation {
|
|
403
|
-
title: string; // Human-readable title
|
|
404
|
-
url: string; // Direct URL
|
|
405
|
-
authority: "tier1" | "tier2" | "tier3"; // Reliability tier
|
|
406
|
-
publishedAt?: string; // ISO date if known
|
|
407
|
-
relevance: string; // Why this source matters
|
|
408
|
-
}
|
|
409
|
-
|
|
410
|
-
// Tier 1: Official documentation (Anthropic, OpenAI, etc.)
|
|
411
|
-
// Tier 2: Trusted community (GitHub popular repos, well-known blogs)
|
|
412
|
-
// Tier 3: General community (forums, smaller repos)
|
|
413
|
-
```
|
|
414
|
-
|
|
415
|
-
---
|
|
416
|
-
|
|
417
|
-
## Testing Conventions
|
|
418
|
-
|
|
419
|
-
### Unit Tests
|
|
420
|
-
|
|
421
|
-
```typescript
|
|
422
|
-
describe("tool: discover_infrastructure", () => {
|
|
423
|
-
it("should detect agent_loop patterns", async () => {
|
|
424
|
-
const result = await discoverInfrastructure({
|
|
425
|
-
categories: ["agent_loop"],
|
|
426
|
-
});
|
|
427
|
-
expect(result.discovered).toContainEqual(
|
|
428
|
-
expect.objectContaining({ category: "agent_loop" })
|
|
429
|
-
);
|
|
430
|
-
});
|
|
431
|
-
|
|
432
|
-
it("should return bootstrap plan for missing components", async () => {
|
|
433
|
-
const result = await discoverInfrastructure({});
|
|
434
|
-
expect(result.bootstrapPlan).toBeDefined();
|
|
435
|
-
expect(result.bootstrapPlan.length).toBeGreaterThan(0);
|
|
436
|
-
});
|
|
437
|
-
});
|
|
438
|
-
```
|
|
439
|
-
|
|
440
|
-
### Integration Tests
|
|
441
|
-
|
|
442
|
-
```typescript
|
|
443
|
-
describe("integration: triple verification flow", () => {
|
|
444
|
-
it("should complete V1 → V2 → V3 pipeline", async () => {
|
|
445
|
-
const result = await tripleVerify({
|
|
446
|
-
target: "my-feature",
|
|
447
|
-
scope: "full",
|
|
448
|
-
});
|
|
449
|
-
expect(result.verification1_internal.status).toBeDefined();
|
|
450
|
-
expect(result.verification2_external.status).toBeDefined();
|
|
451
|
-
expect(result.verification3_synthesis.status).toBeDefined();
|
|
452
|
-
});
|
|
453
|
-
});
|
|
454
|
-
```
|
|
455
|
-
|
|
456
|
-
---
|
|
457
|
-
|
|
458
|
-
## Commit Message Format
|
|
459
|
-
|
|
460
|
-
```
|
|
461
|
-
type(scope): short description
|
|
462
|
-
|
|
463
|
-
- Bullet point details
|
|
464
|
-
- Another detail
|
|
465
|
-
|
|
466
|
-
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
|
|
467
|
-
```
|
|
468
|
-
|
|
469
|
-
Types: `feat`, `fix`, `docs`, `test`, `refactor`, `style`, `chore`
|
|
470
|
-
|
|
471
|
-
---
|
|
472
|
-
|
|
473
|
-
## Version History
|
|
474
|
-
|
|
475
|
-
| Version | Date | Changes |
|
|
476
|
-
|---------|------|---------|
|
|
477
|
-
| 1.0.0 | 2026-02-05 | Initial style guide based on OpenClaw patterns |
|
|
1
|
+
# NodeBench MCP Style Guide
|
|
2
|
+
|
|
3
|
+
Based on OpenClaw patterns and industry best practices for autonomous agent systems.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Directory Organization
|
|
8
|
+
|
|
9
|
+
### Root Structure
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
packages/mcp-local/
|
|
13
|
+
├── src/
|
|
14
|
+
│ ├── tools/ # MCP tool implementations (grouped by domain)
|
|
15
|
+
│ ├── __tests__/ # Test files mirror src/ structure
|
|
16
|
+
│ ├── db.ts # Database initialization
|
|
17
|
+
│ ├── types.ts # Shared TypeScript types
|
|
18
|
+
│ └── index.ts # Entry point, tool registration
|
|
19
|
+
├── skills/ # Portable agent skills (SKILL.md format)
|
|
20
|
+
├── templates/ # Infrastructure scaffolding templates
|
|
21
|
+
├── docs/
|
|
22
|
+
│ ├── AGENTS.md # Agent operating instructions
|
|
23
|
+
│ ├── TOOLS.md # Tool catalog with examples
|
|
24
|
+
│ └── SOUL.md # Agent personality/values
|
|
25
|
+
├── STYLE_GUIDE.md # This file
|
|
26
|
+
├── NODEBENCH_AGENTS.md # Public-ready agent protocol
|
|
27
|
+
├── package.json
|
|
28
|
+
├── tsconfig.json
|
|
29
|
+
└── README.md
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Tool File Organization
|
|
33
|
+
|
|
34
|
+
Each domain gets its own file in `src/tools/`:
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
src/tools/
|
|
38
|
+
├── verificationTools.ts # 6-phase verification cycle
|
|
39
|
+
├── evalTools.ts # Eval-driven development
|
|
40
|
+
├── qualityGateTools.ts # Boolean check gates
|
|
41
|
+
├── learningTools.ts # Persistent knowledge base
|
|
42
|
+
├── flywheelTools.ts # AI Flywheel orchestration
|
|
43
|
+
├── reconTools.ts # Research & discovery
|
|
44
|
+
├── uiCaptureTools.ts # Screenshot capture
|
|
45
|
+
├── visionTools.ts # AI vision analysis
|
|
46
|
+
├── webTools.ts # Web search & fetch
|
|
47
|
+
├── githubTools.ts # GitHub API tools
|
|
48
|
+
├── documentationTools.ts # AGENTS.md maintenance
|
|
49
|
+
├── agentBootstrapTools.ts # Self-discovery & implementation
|
|
50
|
+
└── metaTools.ts # Tool discovery & methodology
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## Naming Conventions
|
|
56
|
+
|
|
57
|
+
### Files
|
|
58
|
+
|
|
59
|
+
| Type | Convention | Example |
|
|
60
|
+
|------|------------|---------|
|
|
61
|
+
| Tool modules | `{domain}Tools.ts` | `verificationTools.ts` |
|
|
62
|
+
| Query files | `{domain}Queries.ts` | `agentLoopQueries.ts` |
|
|
63
|
+
| Action files | `{domain}Actions.ts` | `postingActions.ts` |
|
|
64
|
+
| Schema files | `schema.ts` | `schema.ts` |
|
|
65
|
+
| Test files | `{module}.test.ts` | `tools.test.ts` |
|
|
66
|
+
|
|
67
|
+
### Functions
|
|
68
|
+
|
|
69
|
+
| Type | Convention | Example |
|
|
70
|
+
|------|------------|---------|
|
|
71
|
+
| MCP tool handlers | `camelCase` verb | `discoverInfrastructure` |
|
|
72
|
+
| Internal helpers | `_camelCase` | `_buildSourceCitation` |
|
|
73
|
+
| Type guards | `is{Type}` | `isVerificationResult` |
|
|
74
|
+
| Factory functions | `create{Thing}` | `createMetaTools` |
|
|
75
|
+
|
|
76
|
+
### Types
|
|
77
|
+
|
|
78
|
+
| Type | Convention | Example |
|
|
79
|
+
|------|------------|---------|
|
|
80
|
+
| Interfaces | `PascalCase` | `VerificationStep` |
|
|
81
|
+
| Type aliases | `PascalCase` | `SourceCitation` |
|
|
82
|
+
| Enums | `PascalCase` | `RiskTier` |
|
|
83
|
+
| Constants | `SCREAMING_SNAKE` | `AUTHORITATIVE_SOURCES` |
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## SKILL.md Format (OpenClaw Standard)
|
|
88
|
+
|
|
89
|
+
```yaml
|
|
90
|
+
---
|
|
91
|
+
name: skill-name
|
|
92
|
+
version: 1.0.0
|
|
93
|
+
author: username
|
|
94
|
+
triggers:
|
|
95
|
+
- keyword1
|
|
96
|
+
- keyword2
|
|
97
|
+
requires:
|
|
98
|
+
- tool1
|
|
99
|
+
- tool2
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
# Skill Name
|
|
103
|
+
|
|
104
|
+
## Purpose
|
|
105
|
+
One-sentence description.
|
|
106
|
+
|
|
107
|
+
## When to Use
|
|
108
|
+
Bullet points.
|
|
109
|
+
|
|
110
|
+
## Steps
|
|
111
|
+
1. First step
|
|
112
|
+
2. Second step
|
|
113
|
+
|
|
114
|
+
## Example
|
|
115
|
+
```typescript
|
|
116
|
+
// Usage example
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## References
|
|
120
|
+
- [Source](url)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## Agent Instruction Files
|
|
126
|
+
|
|
127
|
+
### AGENTS.md (Required)
|
|
128
|
+
|
|
129
|
+
The primary instruction file. Contains:
|
|
130
|
+
|
|
131
|
+
1. **Quick Setup** — Copy-paste installation
|
|
132
|
+
2. **AI Flywheel** — Mandatory verification steps
|
|
133
|
+
3. **Tool Categories** — Quick reference table
|
|
134
|
+
4. **Workflow Guides** — Common patterns
|
|
135
|
+
5. **Environment Setup** — API keys, dependencies
|
|
136
|
+
|
|
137
|
+
### SOUL.md (Optional)
|
|
138
|
+
|
|
139
|
+
Defines agent personality and values:
|
|
140
|
+
|
|
141
|
+
```markdown
|
|
142
|
+
# Agent Soul
|
|
143
|
+
|
|
144
|
+
## Identity
|
|
145
|
+
Who the agent is and what it represents.
|
|
146
|
+
|
|
147
|
+
## Values
|
|
148
|
+
- Value 1: Description
|
|
149
|
+
- Value 2: Description
|
|
150
|
+
|
|
151
|
+
## Boundaries
|
|
152
|
+
- Will not: X
|
|
153
|
+
- Will not: Y
|
|
154
|
+
|
|
155
|
+
## Voice
|
|
156
|
+
How the agent communicates.
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### TOOLS.md (Optional)
|
|
160
|
+
|
|
161
|
+
Detailed tool documentation with examples:
|
|
162
|
+
|
|
163
|
+
```markdown
|
|
164
|
+
# Tool Catalog
|
|
165
|
+
|
|
166
|
+
## Category: Verification
|
|
167
|
+
|
|
168
|
+
### start_verification_cycle
|
|
169
|
+
**Purpose:** Begin 6-phase verification
|
|
170
|
+
**Input:** `{ goal: string, context?: string }`
|
|
171
|
+
**Output:** `{ cycleId: string, status: "active" }`
|
|
172
|
+
|
|
173
|
+
**Example:**
|
|
174
|
+
```json
|
|
175
|
+
{
|
|
176
|
+
"goal": "Implement OAuth flow",
|
|
177
|
+
"context": "Using Convex + Auth0"
|
|
178
|
+
}
|
|
179
|
+
```
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
## Code Patterns
|
|
185
|
+
|
|
186
|
+
### Tool Implementation
|
|
187
|
+
|
|
188
|
+
```typescript
|
|
189
|
+
// 1. Types at top
|
|
190
|
+
interface ToolInput {
|
|
191
|
+
param: string;
|
|
192
|
+
optional?: number;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
interface ToolOutput {
|
|
196
|
+
result: string;
|
|
197
|
+
metadata: object;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// 2. Constants after types
|
|
201
|
+
const DEFAULTS = {
|
|
202
|
+
timeout: 30000,
|
|
203
|
+
retries: 3,
|
|
204
|
+
};
|
|
205
|
+
|
|
206
|
+
// 3. Handler function
|
|
207
|
+
async function handleTool(args: ToolInput): Promise<ToolOutput> {
|
|
208
|
+
// Implementation
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// 4. Export tool definition
|
|
212
|
+
export const myTools: McpTool[] = [
|
|
213
|
+
{
|
|
214
|
+
name: "tool_name",
|
|
215
|
+
description: "Concise description for agent discovery",
|
|
216
|
+
inputSchema: { /* JSON Schema */ },
|
|
217
|
+
handler: handleTool,
|
|
218
|
+
},
|
|
219
|
+
];
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
### Error Handling
|
|
223
|
+
|
|
224
|
+
```typescript
|
|
225
|
+
// Prefer specific error types
|
|
226
|
+
class VerificationError extends Error {
|
|
227
|
+
constructor(
|
|
228
|
+
message: string,
|
|
229
|
+
public readonly phase: number,
|
|
230
|
+
public readonly recoverable: boolean
|
|
231
|
+
) {
|
|
232
|
+
super(message);
|
|
233
|
+
this.name = "VerificationError";
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Throw with context
|
|
238
|
+
throw new VerificationError(
|
|
239
|
+
"Gap analysis found dead code",
|
|
240
|
+
4,
|
|
241
|
+
true
|
|
242
|
+
);
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
### Async Patterns
|
|
246
|
+
|
|
247
|
+
```typescript
|
|
248
|
+
// Parallel when independent
|
|
249
|
+
const [result1, result2] = await Promise.all([
|
|
250
|
+
fetchFirst(),
|
|
251
|
+
fetchSecond(),
|
|
252
|
+
]);
|
|
253
|
+
|
|
254
|
+
// Sequential when dependent
|
|
255
|
+
const first = await fetchFirst();
|
|
256
|
+
const second = await fetchWithFirst(first.id);
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
---
|
|
260
|
+
|
|
261
|
+
## Risk-Tiered Execution
|
|
262
|
+
|
|
263
|
+
Actions are classified by reversibility and blast radius:
|
|
264
|
+
|
|
265
|
+
### Tier 1: Low Risk (Auto-Approved)
|
|
266
|
+
- Reading files, searching, analyzing
|
|
267
|
+
- Creating local temp files
|
|
268
|
+
- Running static analysis
|
|
269
|
+
|
|
270
|
+
### Tier 2: Medium Risk (Log + Proceed)
|
|
271
|
+
- Writing to local files
|
|
272
|
+
- Running tests
|
|
273
|
+
- Creating branches
|
|
274
|
+
|
|
275
|
+
### Tier 3: High Risk (Require Confirmation)
|
|
276
|
+
- Pushing to remote
|
|
277
|
+
- Posting to external services
|
|
278
|
+
- Modifying production config
|
|
279
|
+
- Deleting files/branches
|
|
280
|
+
|
|
281
|
+
```typescript
|
|
282
|
+
const RISK_TIERS = {
|
|
283
|
+
low: { autoApprove: true, log: false },
|
|
284
|
+
medium: { autoApprove: true, log: true },
|
|
285
|
+
high: { autoApprove: false, requireConfirmation: true },
|
|
286
|
+
};
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
---
|
|
290
|
+
|
|
291
|
+
## Re-Update Before Create Pattern
|
|
292
|
+
|
|
293
|
+
**CRITICAL:** Before creating any new file, always check if updating existing instructions would be better.
|
|
294
|
+
|
|
295
|
+
```typescript
|
|
296
|
+
async function beforeCreate(target: string): Promise<"update" | "create"> {
|
|
297
|
+
// 1. Check if similar file exists
|
|
298
|
+
const existing = await findSimilar(target);
|
|
299
|
+
if (existing) {
|
|
300
|
+
// 2. Check if updating is better
|
|
301
|
+
const updateBenefit = await evaluateUpdate(existing, target);
|
|
302
|
+
if (updateBenefit > 0.7) {
|
|
303
|
+
return "update";
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
return "create";
|
|
307
|
+
}
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
### Checklist Before Creating Files
|
|
311
|
+
|
|
312
|
+
1. Does a similar file already exist?
|
|
313
|
+
2. Would adding to AGENTS.md be clearer?
|
|
314
|
+
3. Is this a one-time need or reusable pattern?
|
|
315
|
+
4. Does the team have a convention for this type of content?
|
|
316
|
+
|
|
317
|
+
---
|
|
318
|
+
|
|
319
|
+
## Autonomous Loop Pattern (Ralph Wiggum)
|
|
320
|
+
|
|
321
|
+
For long-running autonomous agents, implement stop-hooks:
|
|
322
|
+
|
|
323
|
+
```typescript
|
|
324
|
+
interface AutonomousLoopConfig {
|
|
325
|
+
maxIterations: number;
|
|
326
|
+
maxDurationMs: number;
|
|
327
|
+
checkpointInterval: number;
|
|
328
|
+
stopConditions: StopCondition[];
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
async function autonomousLoop(config: AutonomousLoopConfig) {
|
|
332
|
+
let iteration = 0;
|
|
333
|
+
const startTime = Date.now();
|
|
334
|
+
|
|
335
|
+
while (true) {
|
|
336
|
+
// 1. Check stop conditions
|
|
337
|
+
for (const condition of config.stopConditions) {
|
|
338
|
+
if (await condition.check()) {
|
|
339
|
+
return { stopped: true, reason: condition.name };
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// 2. Check limits
|
|
344
|
+
if (iteration >= config.maxIterations) {
|
|
345
|
+
return { stopped: true, reason: "max_iterations" };
|
|
346
|
+
}
|
|
347
|
+
if (Date.now() - startTime > config.maxDurationMs) {
|
|
348
|
+
return { stopped: true, reason: "timeout" };
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// 3. Checkpoint
|
|
352
|
+
if (iteration % config.checkpointInterval === 0) {
|
|
353
|
+
await saveCheckpoint(iteration);
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// 4. Execute work
|
|
357
|
+
await executeWorkUnit();
|
|
358
|
+
iteration++;
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
---
|
|
364
|
+
|
|
365
|
+
## Directory Scaffolding
|
|
366
|
+
|
|
367
|
+
When self-implementing infrastructure, use this structure:
|
|
368
|
+
|
|
369
|
+
```typescript
|
|
370
|
+
const SCAFFOLD_STRUCTURE = {
|
|
371
|
+
agent_loop: {
|
|
372
|
+
files: [
|
|
373
|
+
"convex/domains/agents/agentLoop.ts",
|
|
374
|
+
"convex/domains/agents/agentLoopQueries.ts",
|
|
375
|
+
"convex/domains/agents/schema.ts",
|
|
376
|
+
],
|
|
377
|
+
testFiles: [
|
|
378
|
+
"convex/domains/agents/__tests__/agentLoop.test.ts",
|
|
379
|
+
],
|
|
380
|
+
},
|
|
381
|
+
telemetry: {
|
|
382
|
+
files: [
|
|
383
|
+
"convex/domains/observability/telemetry.ts",
|
|
384
|
+
"convex/domains/observability/spans.ts",
|
|
385
|
+
"convex/domains/observability/schema.ts",
|
|
386
|
+
],
|
|
387
|
+
testFiles: [
|
|
388
|
+
"convex/domains/observability/__tests__/telemetry.test.ts",
|
|
389
|
+
],
|
|
390
|
+
},
|
|
391
|
+
// ... etc
|
|
392
|
+
};
|
|
393
|
+
```
|
|
394
|
+
|
|
395
|
+
---
|
|
396
|
+
|
|
397
|
+
## Source Citation Format
|
|
398
|
+
|
|
399
|
+
Always cite authoritative sources:
|
|
400
|
+
|
|
401
|
+
```typescript
|
|
402
|
+
interface SourceCitation {
|
|
403
|
+
title: string; // Human-readable title
|
|
404
|
+
url: string; // Direct URL
|
|
405
|
+
authority: "tier1" | "tier2" | "tier3"; // Reliability tier
|
|
406
|
+
publishedAt?: string; // ISO date if known
|
|
407
|
+
relevance: string; // Why this source matters
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
// Tier 1: Official documentation (Anthropic, OpenAI, etc.)
|
|
411
|
+
// Tier 2: Trusted community (GitHub popular repos, well-known blogs)
|
|
412
|
+
// Tier 3: General community (forums, smaller repos)
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
---
|
|
416
|
+
|
|
417
|
+
## Testing Conventions
|
|
418
|
+
|
|
419
|
+
### Unit Tests
|
|
420
|
+
|
|
421
|
+
```typescript
|
|
422
|
+
describe("tool: discover_infrastructure", () => {
|
|
423
|
+
it("should detect agent_loop patterns", async () => {
|
|
424
|
+
const result = await discoverInfrastructure({
|
|
425
|
+
categories: ["agent_loop"],
|
|
426
|
+
});
|
|
427
|
+
expect(result.discovered).toContainEqual(
|
|
428
|
+
expect.objectContaining({ category: "agent_loop" })
|
|
429
|
+
);
|
|
430
|
+
});
|
|
431
|
+
|
|
432
|
+
it("should return bootstrap plan for missing components", async () => {
|
|
433
|
+
const result = await discoverInfrastructure({});
|
|
434
|
+
expect(result.bootstrapPlan).toBeDefined();
|
|
435
|
+
expect(result.bootstrapPlan.length).toBeGreaterThan(0);
|
|
436
|
+
});
|
|
437
|
+
});
|
|
438
|
+
```
|
|
439
|
+
|
|
440
|
+
### Integration Tests
|
|
441
|
+
|
|
442
|
+
```typescript
|
|
443
|
+
describe("integration: triple verification flow", () => {
|
|
444
|
+
it("should complete V1 → V2 → V3 pipeline", async () => {
|
|
445
|
+
const result = await tripleVerify({
|
|
446
|
+
target: "my-feature",
|
|
447
|
+
scope: "full",
|
|
448
|
+
});
|
|
449
|
+
expect(result.verification1_internal.status).toBeDefined();
|
|
450
|
+
expect(result.verification2_external.status).toBeDefined();
|
|
451
|
+
expect(result.verification3_synthesis.status).toBeDefined();
|
|
452
|
+
});
|
|
453
|
+
});
|
|
454
|
+
```
|
|
455
|
+
|
|
456
|
+
---
|
|
457
|
+
|
|
458
|
+
## Commit Message Format
|
|
459
|
+
|
|
460
|
+
```
|
|
461
|
+
type(scope): short description
|
|
462
|
+
|
|
463
|
+
- Bullet point details
|
|
464
|
+
- Another detail
|
|
465
|
+
|
|
466
|
+
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
|
|
467
|
+
```
|
|
468
|
+
|
|
469
|
+
Types: `feat`, `fix`, `docs`, `test`, `refactor`, `style`, `chore`
|
|
470
|
+
|
|
471
|
+
---
|
|
472
|
+
|
|
473
|
+
## Version History
|
|
474
|
+
|
|
475
|
+
| Version | Date | Changes |
|
|
476
|
+
|---------|------|---------|
|
|
477
|
+
| 1.0.0 | 2026-02-05 | Initial style guide based on OpenClaw patterns |
|