nodebench-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +237 -0
- package/dist/__tests__/tools.test.d.ts +1 -0
- package/dist/__tests__/tools.test.js +402 -0
- package/dist/__tests__/tools.test.js.map +1 -0
- package/dist/db.d.ts +4 -0
- package/dist/db.js +198 -0
- package/dist/db.js.map +1 -0
- package/dist/index.d.ts +19 -0
- package/dist/index.js +237 -0
- package/dist/index.js.map +1 -0
- package/dist/tools/documentTools.d.ts +5 -0
- package/dist/tools/documentTools.js +524 -0
- package/dist/tools/documentTools.js.map +1 -0
- package/dist/tools/documentationTools.d.ts +12 -0
- package/dist/tools/documentationTools.js +647 -0
- package/dist/tools/documentationTools.js.map +1 -0
- package/dist/tools/evalTools.d.ts +6 -0
- package/dist/tools/evalTools.js +335 -0
- package/dist/tools/evalTools.js.map +1 -0
- package/dist/tools/financialTools.d.ts +10 -0
- package/dist/tools/financialTools.js +403 -0
- package/dist/tools/financialTools.js.map +1 -0
- package/dist/tools/flywheelTools.d.ts +6 -0
- package/dist/tools/flywheelTools.js +366 -0
- package/dist/tools/flywheelTools.js.map +1 -0
- package/dist/tools/githubTools.d.ts +12 -0
- package/dist/tools/githubTools.js +432 -0
- package/dist/tools/githubTools.js.map +1 -0
- package/dist/tools/learningTools.d.ts +6 -0
- package/dist/tools/learningTools.js +199 -0
- package/dist/tools/learningTools.js.map +1 -0
- package/dist/tools/memoryTools.d.ts +5 -0
- package/dist/tools/memoryTools.js +137 -0
- package/dist/tools/memoryTools.js.map +1 -0
- package/dist/tools/metaTools.d.ts +7 -0
- package/dist/tools/metaTools.js +837 -0
- package/dist/tools/metaTools.js.map +1 -0
- package/dist/tools/planningTools.d.ts +5 -0
- package/dist/tools/planningTools.js +147 -0
- package/dist/tools/planningTools.js.map +1 -0
- package/dist/tools/qualityGateTools.d.ts +6 -0
- package/dist/tools/qualityGateTools.js +347 -0
- package/dist/tools/qualityGateTools.js.map +1 -0
- package/dist/tools/reconTools.d.ts +8 -0
- package/dist/tools/reconTools.js +729 -0
- package/dist/tools/reconTools.js.map +1 -0
- package/dist/tools/searchTools.d.ts +5 -0
- package/dist/tools/searchTools.js +145 -0
- package/dist/tools/searchTools.js.map +1 -0
- package/dist/tools/uiCaptureTools.d.ts +8 -0
- package/dist/tools/uiCaptureTools.js +339 -0
- package/dist/tools/uiCaptureTools.js.map +1 -0
- package/dist/tools/verificationTools.d.ts +6 -0
- package/dist/tools/verificationTools.js +472 -0
- package/dist/tools/verificationTools.js.map +1 -0
- package/dist/tools/visionTools.d.ts +12 -0
- package/dist/tools/visionTools.js +553 -0
- package/dist/tools/visionTools.js.map +1 -0
- package/dist/tools/webTools.d.ts +12 -0
- package/dist/tools/webTools.js +443 -0
- package/dist/tools/webTools.js.map +1 -0
- package/dist/types.d.ts +16 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +66 -0
|
@@ -0,0 +1,837 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Meta tools — tool discovery and methodology guidance.
|
|
3
|
+
* findTools helps agents discover what's available.
|
|
4
|
+
* getMethodology teaches agents the development process.
|
|
5
|
+
*/
|
|
6
|
+
const METHODOLOGY_CONTENT = {
|
|
7
|
+
verification: {
|
|
8
|
+
title: "6-Phase Iterative Deep-Dive Verification Process",
|
|
9
|
+
description: "Standard verification workflow for any non-trivial implementation. Run this before declaring any integration, migration, or protocol-level change done.",
|
|
10
|
+
steps: [
|
|
11
|
+
{
|
|
12
|
+
phase: 1,
|
|
13
|
+
name: "Context Gathering",
|
|
14
|
+
description: "Launch parallel research into SDK/protocol specs, current codebase patterns, dispatcher/backend audits, and external API status. Use recon tools to structure this research.",
|
|
15
|
+
tools: [
|
|
16
|
+
"search_learnings",
|
|
17
|
+
"run_recon",
|
|
18
|
+
"check_framework_updates",
|
|
19
|
+
"log_recon_finding",
|
|
20
|
+
],
|
|
21
|
+
action: "Start with search_learnings for past issues. Call run_recon to structure your research (include projectContext for holistic view). Use check_framework_updates for known ecosystems. Log findings with log_recon_finding. Then call log_phase_findings with your context notes.",
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
phase: 2,
|
|
25
|
+
name: "Gap Analysis",
|
|
26
|
+
description: "Compare findings against current implementation. Categorize each gap as CRITICAL/HIGH/MEDIUM/LOW.",
|
|
27
|
+
tools: ["log_gap"],
|
|
28
|
+
action: "Record each gap with severity, root cause, and fix strategy. Call log_phase_findings when complete.",
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
phase: 3,
|
|
32
|
+
name: "Implementation",
|
|
33
|
+
description: "Fix CRITICAL and HIGH gaps first. Each fix is discrete and testable. Follow existing patterns.",
|
|
34
|
+
tools: ["resolve_gap"],
|
|
35
|
+
action: "Implement fixes, call resolve_gap for each. Call log_phase_findings when done.",
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
phase: 4,
|
|
39
|
+
name: "Testing & Validation",
|
|
40
|
+
description: "Run tests at 5 layers: static (tsc), unit, integration, manual, live_e2e. ALL must pass.",
|
|
41
|
+
tools: ["log_test_result", "run_closed_loop"],
|
|
42
|
+
action: "Record each test with log_test_result. Use run_closed_loop for compile/lint/test cycle. Call log_phase_findings when all layers pass.",
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
phase: 5,
|
|
46
|
+
name: "Self-Closed-Loop Verification",
|
|
47
|
+
description: "Parallel checks: spec compliance, functional correctness, argument compatibility. Any FAIL loops back to Phase 3.",
|
|
48
|
+
tools: ["run_quality_gate"],
|
|
49
|
+
action: "Run verification checks. If any fail, loop back to Phase 3. Call log_phase_findings with results.",
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
phase: 6,
|
|
53
|
+
name: "Document Learnings",
|
|
54
|
+
description: "Record edge cases, gotchas, patterns discovered. This prevents future regressions.",
|
|
55
|
+
tools: ["record_learning"],
|
|
56
|
+
action: "Call record_learning for each discovery. Call log_phase_findings to complete the cycle.",
|
|
57
|
+
},
|
|
58
|
+
],
|
|
59
|
+
composesWith: "After completing, use promote_to_eval to feed test cases into the Eval loop.",
|
|
60
|
+
},
|
|
61
|
+
eval: {
|
|
62
|
+
title: "Eval-Driven Development Loop",
|
|
63
|
+
description: "Continuous improvement cycle. Changes only ship if evals improve — never on gut feel alone. The eval batch is the gatekeeper, not human intuition.",
|
|
64
|
+
steps: [
|
|
65
|
+
{
|
|
66
|
+
step: 1,
|
|
67
|
+
name: "Run Eval Batch",
|
|
68
|
+
description: "Define test cases with input (prompt/scenario), intent (ground truth goal), and expected behavior.",
|
|
69
|
+
tools: ["start_eval_run"],
|
|
70
|
+
action: "Create an eval run with your test cases.",
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
step: 2,
|
|
74
|
+
name: "Execute & Record",
|
|
75
|
+
description: "Run each test case. Record actual results, verdict (pass/fail/partial), telemetry, and judge notes.",
|
|
76
|
+
tools: ["record_eval_result"],
|
|
77
|
+
action: "For each case: execute it, then record what happened with record_eval_result.",
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
step: 3,
|
|
81
|
+
name: "Aggregate & Analyze",
|
|
82
|
+
description: "Finalize the run. Compute pass rate, average score, failure patterns, and improvement suggestions.",
|
|
83
|
+
tools: ["complete_eval_run"],
|
|
84
|
+
action: "Call complete_eval_run to get aggregate scores.",
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
step: 4,
|
|
88
|
+
name: "Compare & Decide",
|
|
89
|
+
description: "Compare against baseline. DEPLOY if improved, REVERT if regressed, INVESTIGATE if flat.",
|
|
90
|
+
tools: ["compare_eval_runs"],
|
|
91
|
+
action: "Call compare_eval_runs with baseline and candidate run IDs. Follow the recommendation.",
|
|
92
|
+
},
|
|
93
|
+
{
|
|
94
|
+
step: 5,
|
|
95
|
+
name: "Track Over Time",
|
|
96
|
+
description: "Monitor eval history for drift. Regressions trigger verification investigations.",
|
|
97
|
+
tools: ["list_eval_runs", "trigger_investigation"],
|
|
98
|
+
action: "Use list_eval_runs to spot trends. If regression detected, call trigger_investigation.",
|
|
99
|
+
},
|
|
100
|
+
],
|
|
101
|
+
composesWith: "Regressions trigger 6-Phase Verification via trigger_investigation.",
|
|
102
|
+
},
|
|
103
|
+
flywheel: {
|
|
104
|
+
title: "The AI Flywheel (Verification × Eval)",
|
|
105
|
+
description: "The 6-Phase Verification (inner loop) and Eval-Driven Development (outer loop) are not separate processes — they are nested loops that reinforce each other. Every verification produces eval artifacts. Every eval regression triggers verification.",
|
|
106
|
+
steps: [
|
|
107
|
+
{
|
|
108
|
+
name: "Inner → Outer (Verification feeds Evals)",
|
|
109
|
+
items: [
|
|
110
|
+
"Phase 4 test cases become eval batch test cases with known-good expected outputs",
|
|
111
|
+
"Phase 5 PASS/FAIL checklists become eval scoring rubrics",
|
|
112
|
+
"Phase 6 edge cases become adversarial eval cases targeting discovered failure modes",
|
|
113
|
+
],
|
|
114
|
+
tools: ["promote_to_eval"],
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
name: "Outer → Inner (Evals trigger Verification)",
|
|
118
|
+
items: [
|
|
119
|
+
"Tool calling inefficiency → Phase 2 gap analysis on that tool",
|
|
120
|
+
"Eval scores regress after deploy → Full Phase 1-6 cycle (treat as production incident)",
|
|
121
|
+
"New tool/prompt change suggested → Phase 3 implementation + Phase 4-5 validation",
|
|
122
|
+
"Recurring failure pattern → Phase 1 deep dive into root cause",
|
|
123
|
+
],
|
|
124
|
+
tools: ["trigger_investigation"],
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
name: "When to use which",
|
|
128
|
+
items: [
|
|
129
|
+
"Building/changing a feature → Run 6-Phase Verification (Is this correct?)",
|
|
130
|
+
"Measuring system quality over time → Run Eval loop (Is this better?)",
|
|
131
|
+
"Both, always → They compound. Every 6-Phase run produces eval artifacts. Every eval regression triggers 6-Phase.",
|
|
132
|
+
],
|
|
133
|
+
tools: ["get_flywheel_status"],
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
name: "Mandatory Minimum Verification",
|
|
137
|
+
items: [
|
|
138
|
+
"Every non-trivial change MUST pass the 6-step mandatory flywheel before being declared done.",
|
|
139
|
+
"This is the floor, not the ceiling. The full 6-Phase cycle is the gold standard.",
|
|
140
|
+
"Only skip for trivial changes (typos, comments, config) with explicit justification.",
|
|
141
|
+
'Call getMethodology("mandatory_flywheel") for the full 6-step checklist and the variety-check dead-code example.',
|
|
142
|
+
],
|
|
143
|
+
tools: ["run_mandatory_flywheel"],
|
|
144
|
+
},
|
|
145
|
+
],
|
|
146
|
+
},
|
|
147
|
+
ui_ux_qa: {
|
|
148
|
+
title: "UI/UX QA Verification (Frontend Changes)",
|
|
149
|
+
description: "After any implementation that touches frontend UI — new components, layout changes, interaction updates — run this QA process before declaring work done. Uses existing testing infrastructure: Vitest, Playwright, Storybook, Lighthouse.",
|
|
150
|
+
steps: [
|
|
151
|
+
{
|
|
152
|
+
step: 1,
|
|
153
|
+
name: "Component Verification",
|
|
154
|
+
description: "Run component tests for changed files. Check for React render errors, missing props, broken imports.",
|
|
155
|
+
tools: ["run_closed_loop"],
|
|
156
|
+
action: "Run `npm run test:run` to execute Vitest component tests. Run compile + lint + test closed loop. FAIL if any component test fails.",
|
|
157
|
+
},
|
|
158
|
+
{
|
|
159
|
+
step: 2,
|
|
160
|
+
name: "Visual & Layout QA",
|
|
161
|
+
description: "Capture screenshots at 3 breakpoints (375px mobile, 768px tablet, 1280px desktop) using capture_responsive_suite. Verify visual consistency with adjacent components. Run Storybook to check component in isolation.",
|
|
162
|
+
tools: ["capture_responsive_suite", "capture_ui_screenshot", "get_gate_preset"],
|
|
163
|
+
action: 'Call capture_responsive_suite(url, label) to screenshot at mobile/tablet/desktop. Visually inspect each capture. Run `npm run storybook` for isolated view. Call get_gate_preset("ui_ux_qa") for the full rule checklist.',
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
step: 3,
|
|
167
|
+
name: "Accessibility Audit",
|
|
168
|
+
description: "Verify keyboard navigation (Tab order, Enter/Space activation). Check aria-labels on interactive elements. Run Storybook a11y addon panel for automated axe checks. Run Lighthouse accessibility audit.",
|
|
169
|
+
tools: ["run_quality_gate"],
|
|
170
|
+
action: "Tab through the changed UI. Check Storybook's Accessibility panel for axe violations. Run `npm run perf:lighthouse` for accessibility score. Record results.",
|
|
171
|
+
},
|
|
172
|
+
{
|
|
173
|
+
step: 4,
|
|
174
|
+
name: "Interaction & State QA",
|
|
175
|
+
description: "Test loading states, error states, empty states. Verify form validation and error messages. Check hover/focus/active states on interactive elements.",
|
|
176
|
+
tools: ["log_test_result"],
|
|
177
|
+
action: 'Manually test each interaction state. Log results with log_test_result(layer: "manual"). FAIL if any async operation lacks loading/error/empty states.',
|
|
178
|
+
},
|
|
179
|
+
{
|
|
180
|
+
step: 5,
|
|
181
|
+
name: "E2E Smoke Test",
|
|
182
|
+
description: "Run relevant Playwright tests. If no existing test covers the change, write one. Capture screenshot for visual reference using capture_ui_screenshot.",
|
|
183
|
+
tools: ["log_test_result", "capture_ui_screenshot"],
|
|
184
|
+
action: 'Run `npm run test:e2e` for Playwright tests. Call capture_ui_screenshot for visual reference of the final state. Log results with log_test_result(layer: "live_e2e"). If no test exists, create one in tests/e2e/.',
|
|
185
|
+
},
|
|
186
|
+
{
|
|
187
|
+
step: 6,
|
|
188
|
+
name: "Record & Gate",
|
|
189
|
+
description: "Run the ui_ux_qa quality gate with boolean results for all 8 rules. Record learnings for any UI gotchas discovered.",
|
|
190
|
+
tools: ["run_quality_gate", "record_learning"],
|
|
191
|
+
action: 'Evaluate all 8 rules from the ui_ux_qa preset. Call run_quality_gate(gateName: "ui_ux_qa", rules: [...]) with your results. Call record_learning for any UI patterns or gotchas discovered.',
|
|
192
|
+
},
|
|
193
|
+
],
|
|
194
|
+
composesWith: "Use after Phase 3 (Implementation) of a verification cycle when the change involves frontend code. The ui_ux_qa gate result feeds into Phase 5 (Self-Verification).",
|
|
195
|
+
commands: {
|
|
196
|
+
component_tests: "npm run test:run",
|
|
197
|
+
e2e_tests: "npm run test:e2e",
|
|
198
|
+
storybook: "npm run storybook",
|
|
199
|
+
lighthouse: "npm run perf:lighthouse",
|
|
200
|
+
bundle_analysis: "npm run perf:bundle",
|
|
201
|
+
},
|
|
202
|
+
},
|
|
203
|
+
agentic_vision: {
|
|
204
|
+
title: "Agentic Vision (AI-Powered Visual Verification)",
|
|
205
|
+
description: "Use AI vision models to analyze UI screenshots programmatically. The Discover-Capture-Analyze-Manipulate-Iterate-Gate loop provides automated visual QA that goes beyond what rule-based checks can catch. Gemini with code execution provides the richest analysis (zoom, crop, compute within the model). Falls back to GPT-4o, Claude, or OpenRouter vision.",
|
|
206
|
+
steps: [
|
|
207
|
+
{
|
|
208
|
+
step: 1,
|
|
209
|
+
name: "Discover",
|
|
210
|
+
description: "Check what vision capabilities are available in the current environment. API keys and SDKs determine which providers can be used for visual analysis.",
|
|
211
|
+
tools: ["discover_vision_env"],
|
|
212
|
+
action: "Call discover_vision_env to see available providers. If none are available, set an API key (GEMINI_API_KEY recommended for agentic vision with code execution).",
|
|
213
|
+
},
|
|
214
|
+
{
|
|
215
|
+
step: 2,
|
|
216
|
+
name: "Capture",
|
|
217
|
+
description: "Take screenshots of the UI at relevant viewports. Use capture_responsive_suite for comprehensive coverage or capture_ui_screenshot for a specific viewport.",
|
|
218
|
+
tools: ["capture_ui_screenshot", "capture_responsive_suite"],
|
|
219
|
+
action: "Call capture_responsive_suite(url, label) for 3-breakpoint coverage, or capture_ui_screenshot(url) for a single viewport. Both return base64 images inline + console errors.",
|
|
220
|
+
},
|
|
221
|
+
{
|
|
222
|
+
step: 3,
|
|
223
|
+
name: "Analyze",
|
|
224
|
+
description: "Send the captured screenshot to a vision model for AI-powered analysis. The model evaluates layout, spacing, typography, color, accessibility, and component states. Gemini with code execution can zoom into regions, measure distances, and annotate issues autonomously.",
|
|
225
|
+
tools: ["analyze_screenshot"],
|
|
226
|
+
action: 'Call analyze_screenshot(imageBase64) with the base64 data from step 2. Optionally provide a custom prompt for focused analysis (e.g., "Check if the navigation menu is accessible on mobile"). Gemini will use code execution to zoom, crop, and compute.',
|
|
227
|
+
},
|
|
228
|
+
{
|
|
229
|
+
step: 4,
|
|
230
|
+
name: "Manipulate (Optional)",
|
|
231
|
+
description: "If the analysis identifies specific regions of concern, crop or annotate the screenshot for deeper inspection or documentation. Crop to isolate a component, annotate to mark issues.",
|
|
232
|
+
tools: ["manipulate_screenshot"],
|
|
233
|
+
action: "Call manipulate_screenshot with operation='crop' to extract a region, or operation='annotate' to mark issues with bounding boxes and labels. Feed cropped images back to analyze_screenshot for focused analysis.",
|
|
234
|
+
},
|
|
235
|
+
{
|
|
236
|
+
step: 5,
|
|
237
|
+
name: "Iterate",
|
|
238
|
+
description: "If issues were found, fix the code, re-capture, and re-analyze. Repeat until the analysis shows no CRITICAL or HIGH issues. This is the closed loop.",
|
|
239
|
+
tools: ["capture_ui_screenshot", "analyze_screenshot"],
|
|
240
|
+
action: "Fix identified issues in code. Re-capture the same URL. Re-analyze. Compare before/after. Continue until the vision model reports clean results.",
|
|
241
|
+
},
|
|
242
|
+
{
|
|
243
|
+
step: 6,
|
|
244
|
+
name: "Gate",
|
|
245
|
+
description: "Run the ui_ux_qa quality gate using the vision analysis results to inform your boolean evaluation of each gate rule.",
|
|
246
|
+
tools: ["run_quality_gate", "get_gate_preset", "record_learning"],
|
|
247
|
+
action: 'Call get_gate_preset("ui_ux_qa") for the 8 rules. Use vision analysis results to evaluate each rule with evidence. Call run_quality_gate with boolean results. Record any visual patterns or gotchas as learnings.',
|
|
248
|
+
},
|
|
249
|
+
],
|
|
250
|
+
composesWith: "Use after capture_ui_screenshot or capture_responsive_suite to add AI-powered analysis. Combines with ui_ux_qa methodology (getMethodology('ui_ux_qa')) for comprehensive frontend verification.",
|
|
251
|
+
providerPriority: {
|
|
252
|
+
gemini: "Best choice. Code execution enables agentic vision — model can zoom, crop, measure, annotate within its reasoning loop. Set GEMINI_API_KEY.",
|
|
253
|
+
openai: "GPT-4o provides strong vision analysis without code execution. Set OPENAI_API_KEY.",
|
|
254
|
+
anthropic: "Claude provides detailed text analysis of visual elements. Set ANTHROPIC_API_KEY.",
|
|
255
|
+
openrouter: "Routes to various vision models via OpenAI-compatible API. Fallback option. Set OPENROUTER_API_KEY.",
|
|
256
|
+
},
|
|
257
|
+
},
|
|
258
|
+
quality_gates: {
|
|
259
|
+
title: "Quality Gates (Boolean Check Pattern)",
|
|
260
|
+
description: "Deterministic pre-action validation using boolean checks. Define rules, evaluate each one, aggregate into pass/fail. Built-in presets: engagement (content quality), code_review (implementation quality), deploy_readiness (pre-deploy checklist), ui_ux_qa (frontend UI/UX verification).",
|
|
261
|
+
steps: [
|
|
262
|
+
{
|
|
263
|
+
step: 1,
|
|
264
|
+
name: "Get Rules",
|
|
265
|
+
description: "Use a built-in preset or define custom rules. Each rule has a name, description, and evaluation hint.",
|
|
266
|
+
tools: ["get_gate_preset"],
|
|
267
|
+
action: 'Call get_gate_preset with "engagement", "code_review", or "deploy_readiness" to get rule definitions.',
|
|
268
|
+
},
|
|
269
|
+
{
|
|
270
|
+
step: 2,
|
|
271
|
+
name: "Evaluate",
|
|
272
|
+
description: "Check each rule against your target (content, code, PR). Record boolean pass/fail for each.",
|
|
273
|
+
tools: [],
|
|
274
|
+
action: "Evaluate each rule yourself. Note which pass and which fail.",
|
|
275
|
+
},
|
|
276
|
+
{
|
|
277
|
+
step: 3,
|
|
278
|
+
name: "Record Results",
|
|
279
|
+
description: "Submit rule results. The tool aggregates score and persists the run.",
|
|
280
|
+
tools: ["run_quality_gate"],
|
|
281
|
+
action: "Call run_quality_gate with your boolean results. Review failures.",
|
|
282
|
+
},
|
|
283
|
+
{
|
|
284
|
+
step: 4,
|
|
285
|
+
name: "Track Trends",
|
|
286
|
+
description: "Monitor pass/fail trends over time for each gate.",
|
|
287
|
+
tools: ["get_gate_history"],
|
|
288
|
+
action: "Call get_gate_history to see if quality is improving.",
|
|
289
|
+
},
|
|
290
|
+
],
|
|
291
|
+
},
|
|
292
|
+
closed_loop: {
|
|
293
|
+
title: "Closed Loop Verification",
|
|
294
|
+
description: "Local green loop: compile → lint → test → self-debug. Never present changes to anyone without a full green loop. If any step fails: read logs, hypothesize, fix, restart from the failed step.",
|
|
295
|
+
steps: [
|
|
296
|
+
{
|
|
297
|
+
step: 1,
|
|
298
|
+
name: "Compile",
|
|
299
|
+
description: "Build clean. No errors.",
|
|
300
|
+
action: "Run your build command (tsc, go build, cargo build, etc.)",
|
|
301
|
+
},
|
|
302
|
+
{
|
|
303
|
+
step: 2,
|
|
304
|
+
name: "Lint",
|
|
305
|
+
description: "Style clean. No warnings.",
|
|
306
|
+
action: "Run your linter (eslint, golint, clippy, etc.)",
|
|
307
|
+
},
|
|
308
|
+
{
|
|
309
|
+
step: 3,
|
|
310
|
+
name: "Test",
|
|
311
|
+
description: "Run automated test suites. All must pass.",
|
|
312
|
+
action: "Run your test suite (jest, pytest, go test, etc.)",
|
|
313
|
+
},
|
|
314
|
+
{
|
|
315
|
+
step: 4,
|
|
316
|
+
name: "Self-Debug",
|
|
317
|
+
description: "If steps 1-3 fail: read logs, hypothesize root cause, fix, restart loop.",
|
|
318
|
+
action: "Analyze failures, implement fix, go back to step 1. Repeat until green.",
|
|
319
|
+
},
|
|
320
|
+
],
|
|
321
|
+
composesWith: "Record the loop result with run_closed_loop. Use as part of Phase 4 in a verification cycle.",
|
|
322
|
+
},
|
|
323
|
+
learnings: {
|
|
324
|
+
title: "Learnings (Persistent Knowledge Base)",
|
|
325
|
+
description: "Store edge cases, gotchas, patterns, and regressions discovered during development. Search before starting new work to avoid repeating mistakes. Always record learnings after completing a verification cycle.",
|
|
326
|
+
steps: [
|
|
327
|
+
{
|
|
328
|
+
step: 1,
|
|
329
|
+
name: "Search Before You Start",
|
|
330
|
+
description: "Before implementing anything, search for relevant learnings from past work. Use search_all_knowledge for a unified view across learnings, recon findings, and resolved gaps.",
|
|
331
|
+
tools: ["search_learnings", "search_all_knowledge"],
|
|
332
|
+
action: 'Call search_all_knowledge with what you\'re about to work on (e.g. "convex http routing"). This searches learnings, recon findings, AND resolved gaps in one call.',
|
|
333
|
+
},
|
|
334
|
+
{
|
|
335
|
+
step: 2,
|
|
336
|
+
name: "Record As You Go",
|
|
337
|
+
description: "When you discover an edge case, gotcha, or pattern, record it immediately.",
|
|
338
|
+
tools: ["record_learning"],
|
|
339
|
+
action: "Call record_learning with key, content, category, and tags.",
|
|
340
|
+
},
|
|
341
|
+
{
|
|
342
|
+
step: 3,
|
|
343
|
+
name: "Browse & Review",
|
|
344
|
+
description: "Periodically review the knowledge base to refresh your understanding.",
|
|
345
|
+
tools: ["list_learnings"],
|
|
346
|
+
action: "Call list_learnings with a category filter to browse specific types of learnings.",
|
|
347
|
+
},
|
|
348
|
+
],
|
|
349
|
+
composesWith: "Learnings are recorded in Phase 6 of verification cycles. They feed into future Phase 1 context gathering.",
|
|
350
|
+
},
|
|
351
|
+
mandatory_flywheel: {
|
|
352
|
+
title: "Mandatory AI Flywheel Testing (Minimum 6 Steps)",
|
|
353
|
+
description: "After any non-trivial code change, feature addition, or bug fix, this verification process MUST be run before declaring work done. This is not optional. It catches production bugs that smoke tests miss.",
|
|
354
|
+
steps: [
|
|
355
|
+
{
|
|
356
|
+
step: 1,
|
|
357
|
+
name: "Static Analysis",
|
|
358
|
+
description: "Type checking must pass with zero errors. tsc --noEmit, convex dev --once --typecheck=enable, or your stack's equivalent.",
|
|
359
|
+
action: "Run all static analysis tools. Must be completely green.",
|
|
360
|
+
},
|
|
361
|
+
{
|
|
362
|
+
step: 2,
|
|
363
|
+
name: "Happy-Path Test",
|
|
364
|
+
description: "Run the changed functionality with valid inputs and confirm expected output.",
|
|
365
|
+
action: "Execute the main use case with known-good inputs. Verify output matches expectations exactly.",
|
|
366
|
+
},
|
|
367
|
+
{
|
|
368
|
+
step: 3,
|
|
369
|
+
name: "Failure-Path Test",
|
|
370
|
+
description: "Test each failure mode the code is supposed to handle: invalid inputs, edge cases, error states.",
|
|
371
|
+
action: "Send bad inputs, trigger error paths, test boundary conditions. Ensure errors are handled, not swallowed silently.",
|
|
372
|
+
},
|
|
373
|
+
{
|
|
374
|
+
step: 4,
|
|
375
|
+
name: "Gap Analysis",
|
|
376
|
+
description: "Review code for dead code, unused variables, missing integrations, or logic that doesn't match stated intent.",
|
|
377
|
+
action: "Read the implementation line-by-line. Look for: data fetched but never used, conditionals always true/false, missing error handling, incomplete features.",
|
|
378
|
+
},
|
|
379
|
+
{
|
|
380
|
+
step: 5,
|
|
381
|
+
name: "Fix and Re-Verify",
|
|
382
|
+
description: "If any gap is found, fix it and re-run steps 1-3 FROM SCRATCH. Don't just re-run the failed step.",
|
|
383
|
+
action: "Fix the gap, then restart from step 1. The full re-run catches cascading issues the fix might introduce.",
|
|
384
|
+
},
|
|
385
|
+
{
|
|
386
|
+
step: 6,
|
|
387
|
+
name: "Deploy and Document",
|
|
388
|
+
description: "Deploy the verified fix. Document any gaps found and how they were resolved.",
|
|
389
|
+
tools: ["record_learning"],
|
|
390
|
+
action: "Deploy to production. Call record_learning for each gap discovered during this process.",
|
|
391
|
+
},
|
|
392
|
+
],
|
|
393
|
+
whenToSkip: "Only skip for trivial changes where blast radius is near zero: typo fixes, comment updates, config tweaks with no code execution impact. All other changes require all 6 steps.",
|
|
394
|
+
realWorldExample: {
|
|
395
|
+
title: "The Variety Check Dead-Code Bug",
|
|
396
|
+
description: "The first deployment of the pre-post verification pipeline had a bug where the variety check fetched scheduled queue items but never actually compared entities against them (dead code). This was only caught because the flywheel process was run after the initial 'it works' smoke tests. Without it, the bug would have gone to production silently.",
|
|
397
|
+
lesson: "Smoke tests show 'it runs' but not 'it works correctly'. Step 4 (Gap Analysis) catches logic bugs that type systems and basic tests miss. Step 5 (Fix and Re-Verify from scratch) catches cascading issues.",
|
|
398
|
+
},
|
|
399
|
+
tools: ["run_mandatory_flywheel"],
|
|
400
|
+
},
|
|
401
|
+
reconnaissance: {
|
|
402
|
+
title: "Reconnaissance & Research (Structured Phase 1 Context Gathering)",
|
|
403
|
+
description: "Before implementing or fixing anything, gather comprehensive context about BOTH external sources (latest SDK versions, API changes, known issues) AND internal context (existing codebase patterns, project architecture, team conventions). Structure this research as a trackable recon session.",
|
|
404
|
+
steps: [
|
|
405
|
+
{
|
|
406
|
+
step: 1,
|
|
407
|
+
name: "Start Recon Session",
|
|
408
|
+
description: "Define what you're researching and why. Provide project context (tech stack, versions, architecture) for holistic analysis.",
|
|
409
|
+
tools: ["run_recon"],
|
|
410
|
+
action: "Call run_recon with target, description, and projectContext. If project context is unknown, the tool will suggest questions to ask.",
|
|
411
|
+
},
|
|
412
|
+
{
|
|
413
|
+
step: 2,
|
|
414
|
+
name: "Check Framework Sources",
|
|
415
|
+
description: "Use pre-built source checklists for known ecosystems: anthropic, langchain, openai, google, mcp.",
|
|
416
|
+
tools: ["check_framework_updates"],
|
|
417
|
+
action: "Call check_framework_updates for each relevant ecosystem. Visit each source URL systematically.",
|
|
418
|
+
},
|
|
419
|
+
{
|
|
420
|
+
step: 3,
|
|
421
|
+
name: "Visit Sources & Record Findings",
|
|
422
|
+
description: "Check each source systematically. Record EVERY relevant finding — both from external sources and from the existing codebase.",
|
|
423
|
+
tools: ["log_recon_finding"],
|
|
424
|
+
action: "For each discovery: call log_recon_finding. Categories: breaking_change, new_feature, deprecation, best_practice, dataset, benchmark, codebase_pattern, existing_implementation.",
|
|
425
|
+
},
|
|
426
|
+
{
|
|
427
|
+
step: 4,
|
|
428
|
+
name: "Aggregate & Prioritize",
|
|
429
|
+
description: "Review all findings grouped by category. Prioritize: breaking changes > deprecations > existing implementations > new features.",
|
|
430
|
+
tools: ["get_recon_summary", "search_all_knowledge"],
|
|
431
|
+
action: "Call get_recon_summary with completeSession=true. Use search_all_knowledge to cross-reference with past learnings and resolved gaps. Use prioritized action items to inform gap analysis.",
|
|
432
|
+
},
|
|
433
|
+
],
|
|
434
|
+
composesWith: "Use at the start of Phase 1 (Context Gathering) in a verification cycle. Findings inform Phase 2 (Gap Analysis). Also call search_learnings to check past findings.",
|
|
435
|
+
categories: {
|
|
436
|
+
breaking_change: "Requires immediate action before deploying",
|
|
437
|
+
deprecation: "Plan migration path, may break in future",
|
|
438
|
+
new_feature: "Potential improvements or capabilities to leverage",
|
|
439
|
+
best_practice: "Recommended patterns or approaches",
|
|
440
|
+
dataset: "Useful datasets or benchmarks for evaluation",
|
|
441
|
+
benchmark: "Performance or quality baselines to evaluate against",
|
|
442
|
+
codebase_pattern: "Existing patterns in the project codebase",
|
|
443
|
+
existing_implementation: "Code that already handles part of what you're building",
|
|
444
|
+
},
|
|
445
|
+
},
|
|
446
|
+
project_ideation: {
|
|
447
|
+
title: "Project Ideation & Validation Process",
|
|
448
|
+
description: "Structured approach for validating project concepts before development. Captures requirements, constraints, success metrics, and competitive analysis. Use this before starting any new project or major feature.",
|
|
449
|
+
steps: [
|
|
450
|
+
{
|
|
451
|
+
phase: 1,
|
|
452
|
+
name: "Define Concept",
|
|
453
|
+
description: "Define the problem, target users, and core value proposition. Search existing knowledge for similar past projects.",
|
|
454
|
+
tools: ["record_learning", "search_all_knowledge"],
|
|
455
|
+
action: "Document the problem statement and why it matters. Call search_all_knowledge to check for similar past projects or relevant learnings.",
|
|
456
|
+
},
|
|
457
|
+
{
|
|
458
|
+
phase: 2,
|
|
459
|
+
name: "Research Market",
|
|
460
|
+
description: "Validate demand, find prior art, and understand the competitive landscape.",
|
|
461
|
+
tools: ["web_search", "search_github", "research_job_market"],
|
|
462
|
+
action: "Use web_search for market research. Call search_github to find similar projects. Use research_job_market to understand skill demand.",
|
|
463
|
+
},
|
|
464
|
+
{
|
|
465
|
+
phase: 3,
|
|
466
|
+
name: "Analyze Competition",
|
|
467
|
+
description: "Study competitor implementations, understand patterns, and identify differentiation opportunities.",
|
|
468
|
+
tools: ["fetch_url", "analyze_repo"],
|
|
469
|
+
action: "Use fetch_url to read competitor docs. Call analyze_repo to understand their tech stack and patterns. Note what to adopt vs. differentiate.",
|
|
470
|
+
},
|
|
471
|
+
{
|
|
472
|
+
phase: 4,
|
|
473
|
+
name: "Define Requirements",
|
|
474
|
+
description: "List functional and non-functional requirements with priority and rationale.",
|
|
475
|
+
tools: ["log_recon_finding", "update_agents_md"],
|
|
476
|
+
action: "Record each requirement with log_recon_finding. Update AGENTS.md with project requirements using update_agents_md.",
|
|
477
|
+
},
|
|
478
|
+
{
|
|
479
|
+
phase: 5,
|
|
480
|
+
name: "Plan Metrics",
|
|
481
|
+
description: "Define measurable success criteria and create baseline eval test cases.",
|
|
482
|
+
tools: ["start_eval_run"],
|
|
483
|
+
action: "Create eval test cases that represent success. Call start_eval_run with initial baseline cases.",
|
|
484
|
+
},
|
|
485
|
+
{
|
|
486
|
+
phase: 6,
|
|
487
|
+
name: "Gate Approval",
|
|
488
|
+
description: "Document findings, run quality gate, and mark project as ready or needs rework.",
|
|
489
|
+
tools: ["run_quality_gate", "record_learning"],
|
|
490
|
+
action: "Call run_quality_gate with ideation rules. Record key decisions with record_learning. Mark project as approved or needs iteration.",
|
|
491
|
+
},
|
|
492
|
+
],
|
|
493
|
+
composesWith: "Phase 1 (Concept) informs verification cycle planning. Success metrics feed directly into eval batches. Requirements inform AGENTS.md documentation.",
|
|
494
|
+
},
|
|
495
|
+
tech_stack_2026: {
|
|
496
|
+
title: "Tech Stack & Dependency Management (2026)",
|
|
497
|
+
description: "Systematic approach to evaluating, documenting, and maintaining technology choices. Ensures alignment with project goals and minimizes tech debt. Run periodically or when considering new dependencies.",
|
|
498
|
+
steps: [
|
|
499
|
+
{
|
|
500
|
+
step: 1,
|
|
501
|
+
name: "Inventory Current Stack",
|
|
502
|
+
description: "Document all frameworks, libraries, and tools in use with versions and purposes.",
|
|
503
|
+
tools: ["record_learning", "search_all_knowledge", "setup_local_env"],
|
|
504
|
+
action: "Call setup_local_env to detect current environment. List every dependency with version, purpose, and maintenance status. Use search_all_knowledge to find past tech decisions.",
|
|
505
|
+
},
|
|
506
|
+
{
|
|
507
|
+
step: 2,
|
|
508
|
+
name: "Evaluate Against Goals",
|
|
509
|
+
description: "Compare each component against current project requirements. Check for updates and breaking changes.",
|
|
510
|
+
tools: ["check_framework_updates", "web_search"],
|
|
511
|
+
action: "For each major dependency, call check_framework_updates for known ecosystems. Use web_search for latest release notes and migration guides.",
|
|
512
|
+
},
|
|
513
|
+
{
|
|
514
|
+
step: 3,
|
|
515
|
+
name: "Research Alternatives",
|
|
516
|
+
description: "Research newer or better-suited alternatives. Compare features, community, performance.",
|
|
517
|
+
tools: ["search_github", "analyze_repo", "fetch_url"],
|
|
518
|
+
action: "Use search_github to find promising alternatives. Call analyze_repo to understand their architecture. Use fetch_url to read their documentation.",
|
|
519
|
+
},
|
|
520
|
+
{
|
|
521
|
+
step: 4,
|
|
522
|
+
name: "Plan Migrations",
|
|
523
|
+
description: "For each desired change, outline migration steps and risk assessment.",
|
|
524
|
+
tools: ["log_gap"],
|
|
525
|
+
action: "Log each migration as a gap with complexity and risk level. Prioritize: breaking changes > critical perf gains > nice-to-haves.",
|
|
526
|
+
},
|
|
527
|
+
{
|
|
528
|
+
step: 5,
|
|
529
|
+
name: "Document Rationale",
|
|
530
|
+
description: "Record why each tech choice was made and when to reconsider.",
|
|
531
|
+
tools: ["record_learning", "update_agents_md"],
|
|
532
|
+
action: "Call record_learning with decision rationale, trade-offs, and review frequency. Update AGENTS.md Tech Stack section with update_agents_md.",
|
|
533
|
+
},
|
|
534
|
+
],
|
|
535
|
+
composesWith: "Migrations discovered become verification cycles. Findings inform agentic vision setup (provider choices). Updates feed into AGENTS.md maintenance.",
|
|
536
|
+
},
|
|
537
|
+
telemetry_setup: {
|
|
538
|
+
title: "Telemetry & Instrumentation Setup",
|
|
539
|
+
description: "Establish observability for system behavior, performance, and quality metrics. Build dashboards and alerting for early problem detection.",
|
|
540
|
+
steps: [
|
|
541
|
+
{
|
|
542
|
+
step: 1,
|
|
543
|
+
name: "Define Metrics & Signals",
|
|
544
|
+
description: "Identify what to measure: latency, errors, quality scores, user actions.",
|
|
545
|
+
tools: ["record_learning"],
|
|
546
|
+
action: "Document each metric's purpose, collection method, and alerting thresholds. Call record_learning to persist the instrumentation plan.",
|
|
547
|
+
},
|
|
548
|
+
{
|
|
549
|
+
step: 2,
|
|
550
|
+
name: "Instrument Code",
|
|
551
|
+
description: "Add logging, tracing, and metric collection to key functions.",
|
|
552
|
+
tools: ["log_test_result"],
|
|
553
|
+
action: "Add telemetry calls at phase boundaries. Use log_test_result to capture execution metrics during development.",
|
|
554
|
+
},
|
|
555
|
+
{
|
|
556
|
+
step: 3,
|
|
557
|
+
name: "Aggregate & Visualize",
|
|
558
|
+
description: "Set up dashboards and queries to view metric trends.",
|
|
559
|
+
action: "Build dashboards in your observability tool (DataDog, Prometheus, CloudWatch). Include: pass rates, cycle times, tool latencies.",
|
|
560
|
+
},
|
|
561
|
+
{
|
|
562
|
+
step: 4,
|
|
563
|
+
name: "Define Alerts & SLOs",
|
|
564
|
+
description: "Create alert rules and service-level objectives.",
|
|
565
|
+
action: "Set alert thresholds for critical metrics. Define SLOs: eval pass rate >95%, verification cycle time <4 hours.",
|
|
566
|
+
},
|
|
567
|
+
{
|
|
568
|
+
step: 5,
|
|
569
|
+
name: "Review & Adjust",
|
|
570
|
+
description: "Periodically review telemetry to catch patterns and adjust thresholds.",
|
|
571
|
+
tools: ["search_learnings", "get_gate_history", "list_eval_runs"],
|
|
572
|
+
action: "Weekly review of metric trends. Use get_gate_history and list_eval_runs as data sources. Adjust thresholds as system matures.",
|
|
573
|
+
},
|
|
574
|
+
],
|
|
575
|
+
composesWith: "Telemetry data enriches verification and eval outputs. Use get_gate_history and list_eval_runs as dashboard data sources.",
|
|
576
|
+
},
|
|
577
|
+
agents_md_maintenance: {
|
|
578
|
+
title: "AGENTS.md Documentation Maintenance",
|
|
579
|
+
description: "Keep AGENTS.md synchronized with actual agent implementations, tool updates, and deployment procedures. Single source of truth for agent setup and troubleshooting.",
|
|
580
|
+
steps: [
|
|
581
|
+
{
|
|
582
|
+
step: 1,
|
|
583
|
+
name: "Audit Current Documentation",
|
|
584
|
+
description: "Review AGENTS.md against actual implementations. Flag mismatches.",
|
|
585
|
+
tools: ["update_agents_md", "search_all_knowledge"],
|
|
586
|
+
action: 'Call update_agents_md({ operation: "read" }) to see current sections. Compare against actual code. Use search_all_knowledge to find recent learnings.',
|
|
587
|
+
},
|
|
588
|
+
{
|
|
589
|
+
step: 2,
|
|
590
|
+
name: "Identify Changes",
|
|
591
|
+
description: "List all recent code changes that impact agent setup or deployment.",
|
|
592
|
+
tools: ["search_github", "setup_local_env"],
|
|
593
|
+
action: "Call setup_local_env to check current environment. Use search_github to find recent changes in relevant repos. Note new tools, changed schemas, breaking changes.",
|
|
594
|
+
},
|
|
595
|
+
{
|
|
596
|
+
step: 3,
|
|
597
|
+
name: "Update Sections",
|
|
598
|
+
description: "Sync AGENTS.md sections: tool catalog, setup steps, environment variables.",
|
|
599
|
+
tools: ["update_agents_md", "record_learning"],
|
|
600
|
+
action: 'Call update_agents_md({ operation: "update_section", section: "...", content: "..." }) for each outdated section. Record gotchas with record_learning.',
|
|
601
|
+
},
|
|
602
|
+
{
|
|
603
|
+
step: 4,
|
|
604
|
+
name: "Verify with Real Setup",
|
|
605
|
+
description: "Test setup steps on a clean machine or CI environment.",
|
|
606
|
+
tools: ["run_closed_loop"],
|
|
607
|
+
action: "Follow AGENTS.md steps exactly. Run run_closed_loop to verify build/test pass. If failures, update docs or fix the process.",
|
|
608
|
+
},
|
|
609
|
+
{
|
|
610
|
+
step: 5,
|
|
611
|
+
name: "Version & Schedule",
|
|
612
|
+
description: "Document when AGENTS.md was last updated. Schedule periodic reviews.",
|
|
613
|
+
tools: ["record_learning"],
|
|
614
|
+
action: 'Add timestamp header to AGENTS.md. Call record_learning with category "documentation" to schedule monthly review.',
|
|
615
|
+
},
|
|
616
|
+
],
|
|
617
|
+
composesWith: "AGENTS.md is the source of truth for Phase 1 context gathering. Keep in sync with actual implementations and tool changes.",
|
|
618
|
+
},
|
|
619
|
+
overview: {
|
|
620
|
+
title: "NodeBench Development Methodology — Overview",
|
|
621
|
+
description: "A dual-loop system for rigorous development. The inner loop (6-Phase Verification) ensures correctness. The outer loop (Eval-Driven Development) ensures improvement. Together they form the AI Flywheel.",
|
|
622
|
+
steps: [
|
|
623
|
+
{
|
|
624
|
+
name: "Start here",
|
|
625
|
+
description: "Call getMethodology with a specific topic to get detailed guidance.",
|
|
626
|
+
topics: {
|
|
627
|
+
verification: "6-Phase Verification — systematic correctness checking",
|
|
628
|
+
eval: "Eval-Driven Development — measure improvement objectively",
|
|
629
|
+
flywheel: "AI Flywheel — how the two loops compose and reinforce each other",
|
|
630
|
+
mandatory_flywheel: "Mandatory Flywheel — 6-step minimum verification before declaring work done",
|
|
631
|
+
reconnaissance: "Reconnaissance — structured research and context gathering for Phase 1",
|
|
632
|
+
quality_gates: "Quality Gates — boolean check validation pattern",
|
|
633
|
+
ui_ux_qa: "UI/UX QA — frontend verification after UI implementations",
|
|
634
|
+
agentic_vision: "Agentic Vision — AI-powered visual verification using vision models (Gemini code execution, GPT-4o, Claude)",
|
|
635
|
+
closed_loop: "Closed Loop — compile/lint/test/debug before presenting work",
|
|
636
|
+
learnings: "Learnings — persistent knowledge base to prevent repeating mistakes",
|
|
637
|
+
project_ideation: "Project Ideation — validate concepts before development with market research and competitive analysis",
|
|
638
|
+
tech_stack_2026: "Tech Stack Management — evaluate and maintain technology choices for 2026",
|
|
639
|
+
telemetry_setup: "Telemetry Setup — observability, instrumentation, and early problem detection",
|
|
640
|
+
agents_md_maintenance: "AGENTS.md Maintenance — keep documentation synchronized with implementations",
|
|
641
|
+
},
|
|
642
|
+
},
|
|
643
|
+
{
|
|
644
|
+
name: "Quick start for first-time setup",
|
|
645
|
+
sequence: [
|
|
646
|
+
"1. bootstrap_project — Register your project (tech stack, architecture, conventions)",
|
|
647
|
+
'2. getMethodology("overview") — See all available methodologies',
|
|
648
|
+
"3. search_all_knowledge — Check if the knowledge base has relevant past findings",
|
|
649
|
+
],
|
|
650
|
+
},
|
|
651
|
+
{
|
|
652
|
+
name: "Quick start for a new feature",
|
|
653
|
+
sequence: [
|
|
654
|
+
"1. search_all_knowledge — Check learnings, recon findings, and resolved gaps",
|
|
655
|
+
"2. start_verification_cycle — Begin 6-phase process",
|
|
656
|
+
"3. Follow phases 1-6 (guided by tool responses)",
|
|
657
|
+
"4. record_learning — Capture what you discovered",
|
|
658
|
+
"5. promote_to_eval — Turn findings into eval cases",
|
|
659
|
+
"6. start_eval_run + compare_eval_runs — Verify improvement",
|
|
660
|
+
],
|
|
661
|
+
},
|
|
662
|
+
{
|
|
663
|
+
name: "Quick start for checking quality",
|
|
664
|
+
sequence: [
|
|
665
|
+
"1. get_gate_preset — Get rules for your context",
|
|
666
|
+
"2. Evaluate each rule",
|
|
667
|
+
"3. run_quality_gate — Record results",
|
|
668
|
+
"4. run_closed_loop — Ensure compile/lint/test pass",
|
|
669
|
+
],
|
|
670
|
+
},
|
|
671
|
+
],
|
|
672
|
+
},
|
|
673
|
+
};
|
|
674
|
+
export function createMetaTools(allTools) {
|
|
675
|
+
return [
|
|
676
|
+
{
|
|
677
|
+
name: "findTools",
|
|
678
|
+
description: "Search available methodology tools by keyword or capability description. Returns matching tool names and descriptions. Use this to discover which tools are available for a task.",
|
|
679
|
+
inputSchema: {
|
|
680
|
+
type: "object",
|
|
681
|
+
properties: {
|
|
682
|
+
query: {
|
|
683
|
+
type: "string",
|
|
684
|
+
description: 'What you want to do (e.g. "verify implementation", "track quality", "record edge case")',
|
|
685
|
+
},
|
|
686
|
+
category: {
|
|
687
|
+
type: "string",
|
|
688
|
+
enum: [
|
|
689
|
+
"verification",
|
|
690
|
+
"eval",
|
|
691
|
+
"quality_gate",
|
|
692
|
+
"learning",
|
|
693
|
+
"flywheel",
|
|
694
|
+
"reconnaissance",
|
|
695
|
+
"ui_capture",
|
|
696
|
+
"vision",
|
|
697
|
+
"web",
|
|
698
|
+
"github",
|
|
699
|
+
"documentation",
|
|
700
|
+
"meta",
|
|
701
|
+
],
|
|
702
|
+
description: "Filter by tool category (optional)",
|
|
703
|
+
},
|
|
704
|
+
},
|
|
705
|
+
required: ["query"],
|
|
706
|
+
},
|
|
707
|
+
handler: async (args) => {
|
|
708
|
+
const query = (args.query ?? "").toLowerCase();
|
|
709
|
+
const category = args.category;
|
|
710
|
+
// Category mapping
|
|
711
|
+
const categoryMap = {
|
|
712
|
+
verification: [
|
|
713
|
+
"start_verification_cycle",
|
|
714
|
+
"log_phase_findings",
|
|
715
|
+
"log_gap",
|
|
716
|
+
"resolve_gap",
|
|
717
|
+
"log_test_result",
|
|
718
|
+
"get_verification_status",
|
|
719
|
+
"list_verification_cycles",
|
|
720
|
+
],
|
|
721
|
+
eval: [
|
|
722
|
+
"start_eval_run",
|
|
723
|
+
"record_eval_result",
|
|
724
|
+
"complete_eval_run",
|
|
725
|
+
"compare_eval_runs",
|
|
726
|
+
"list_eval_runs",
|
|
727
|
+
],
|
|
728
|
+
quality_gate: [
|
|
729
|
+
"run_quality_gate",
|
|
730
|
+
"get_gate_preset",
|
|
731
|
+
"get_gate_history",
|
|
732
|
+
"run_closed_loop",
|
|
733
|
+
],
|
|
734
|
+
learning: [
|
|
735
|
+
"record_learning",
|
|
736
|
+
"search_learnings",
|
|
737
|
+
"list_learnings",
|
|
738
|
+
"delete_learning",
|
|
739
|
+
],
|
|
740
|
+
flywheel: [
|
|
741
|
+
"get_flywheel_status",
|
|
742
|
+
"promote_to_eval",
|
|
743
|
+
"trigger_investigation",
|
|
744
|
+
"run_mandatory_flywheel",
|
|
745
|
+
],
|
|
746
|
+
reconnaissance: [
|
|
747
|
+
"run_recon",
|
|
748
|
+
"log_recon_finding",
|
|
749
|
+
"get_recon_summary",
|
|
750
|
+
"check_framework_updates",
|
|
751
|
+
"search_all_knowledge",
|
|
752
|
+
"bootstrap_project",
|
|
753
|
+
"get_project_context",
|
|
754
|
+
],
|
|
755
|
+
ui_capture: [
|
|
756
|
+
"capture_ui_screenshot",
|
|
757
|
+
"capture_responsive_suite",
|
|
758
|
+
],
|
|
759
|
+
vision: [
|
|
760
|
+
"discover_vision_env",
|
|
761
|
+
"analyze_screenshot",
|
|
762
|
+
"manipulate_screenshot",
|
|
763
|
+
],
|
|
764
|
+
web: [
|
|
765
|
+
"web_search",
|
|
766
|
+
"fetch_url",
|
|
767
|
+
],
|
|
768
|
+
github: [
|
|
769
|
+
"search_github",
|
|
770
|
+
"analyze_repo",
|
|
771
|
+
],
|
|
772
|
+
documentation: [
|
|
773
|
+
"update_agents_md",
|
|
774
|
+
"research_job_market",
|
|
775
|
+
"setup_local_env",
|
|
776
|
+
],
|
|
777
|
+
meta: ["findTools", "getMethodology"],
|
|
778
|
+
};
|
|
779
|
+
let candidates = allTools;
|
|
780
|
+
if (category && categoryMap[category]) {
|
|
781
|
+
const names = new Set(categoryMap[category]);
|
|
782
|
+
candidates = allTools.filter((t) => names.has(t.name));
|
|
783
|
+
}
|
|
784
|
+
const matches = candidates.filter((t) => {
|
|
785
|
+
const text = `${t.name} ${t.description}`.toLowerCase();
|
|
786
|
+
return query.split(/\s+/).some((word) => text.includes(word));
|
|
787
|
+
});
|
|
788
|
+
return {
|
|
789
|
+
query,
|
|
790
|
+
count: matches.length,
|
|
791
|
+
tools: matches.map((t) => ({
|
|
792
|
+
name: t.name,
|
|
793
|
+
description: t.description,
|
|
794
|
+
})),
|
|
795
|
+
};
|
|
796
|
+
},
|
|
797
|
+
},
|
|
798
|
+
{
|
|
799
|
+
name: "getMethodology",
|
|
800
|
+
description: 'Get step-by-step guidance for a development methodology. Topics: verification, eval, flywheel, mandatory_flywheel, reconnaissance, quality_gates, ui_ux_qa, agentic_vision, closed_loop, learnings, project_ideation, tech_stack_2026, telemetry_setup, agents_md_maintenance, overview. Call with topic "overview" to see all available methodologies.',
|
|
801
|
+
inputSchema: {
|
|
802
|
+
type: "object",
|
|
803
|
+
properties: {
|
|
804
|
+
topic: {
|
|
805
|
+
type: "string",
|
|
806
|
+
enum: [
|
|
807
|
+
"verification",
|
|
808
|
+
"eval",
|
|
809
|
+
"flywheel",
|
|
810
|
+
"mandatory_flywheel",
|
|
811
|
+
"reconnaissance",
|
|
812
|
+
"quality_gates",
|
|
813
|
+
"ui_ux_qa",
|
|
814
|
+
"agentic_vision",
|
|
815
|
+
"closed_loop",
|
|
816
|
+
"learnings",
|
|
817
|
+
"project_ideation",
|
|
818
|
+
"tech_stack_2026",
|
|
819
|
+
"telemetry_setup",
|
|
820
|
+
"agents_md_maintenance",
|
|
821
|
+
"overview",
|
|
822
|
+
],
|
|
823
|
+
description: "Which methodology to explain",
|
|
824
|
+
},
|
|
825
|
+
},
|
|
826
|
+
required: ["topic"],
|
|
827
|
+
},
|
|
828
|
+
handler: async (args) => {
|
|
829
|
+
const content = METHODOLOGY_CONTENT[args.topic];
|
|
830
|
+
if (!content)
|
|
831
|
+
throw new Error(`Unknown topic: ${args.topic}. Available: ${Object.keys(METHODOLOGY_CONTENT).join(", ")}`);
|
|
832
|
+
return content;
|
|
833
|
+
},
|
|
834
|
+
},
|
|
835
|
+
];
|
|
836
|
+
}
|
|
837
|
+
//# sourceMappingURL=metaTools.js.map
|