nodebench-mcp 2.22.0 → 2.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/NODEBENCH_AGENTS.md +5 -4
  2. package/README.md +495 -280
  3. package/dist/__tests__/architectComplex.test.js +3 -5
  4. package/dist/__tests__/architectComplex.test.js.map +1 -1
  5. package/dist/__tests__/batchAutopilot.test.d.ts +8 -0
  6. package/dist/__tests__/batchAutopilot.test.js +218 -0
  7. package/dist/__tests__/batchAutopilot.test.js.map +1 -0
  8. package/dist/__tests__/cliSubcommands.test.d.ts +1 -0
  9. package/dist/__tests__/cliSubcommands.test.js +138 -0
  10. package/dist/__tests__/cliSubcommands.test.js.map +1 -0
  11. package/dist/__tests__/evalHarness.test.js +1 -1
  12. package/dist/__tests__/forecastingDogfood.test.d.ts +9 -0
  13. package/dist/__tests__/forecastingDogfood.test.js +284 -0
  14. package/dist/__tests__/forecastingDogfood.test.js.map +1 -0
  15. package/dist/__tests__/forecastingScoring.test.d.ts +9 -0
  16. package/dist/__tests__/forecastingScoring.test.js +202 -0
  17. package/dist/__tests__/forecastingScoring.test.js.map +1 -0
  18. package/dist/__tests__/localDashboard.test.d.ts +1 -0
  19. package/dist/__tests__/localDashboard.test.js +226 -0
  20. package/dist/__tests__/localDashboard.test.js.map +1 -0
  21. package/dist/__tests__/multiHopDogfood.test.d.ts +12 -0
  22. package/dist/__tests__/multiHopDogfood.test.js +303 -0
  23. package/dist/__tests__/multiHopDogfood.test.js.map +1 -0
  24. package/dist/__tests__/openclawDogfood.test.d.ts +23 -0
  25. package/dist/__tests__/openclawDogfood.test.js +535 -0
  26. package/dist/__tests__/openclawDogfood.test.js.map +1 -0
  27. package/dist/__tests__/openclawMessaging.test.d.ts +14 -0
  28. package/dist/__tests__/openclawMessaging.test.js +232 -0
  29. package/dist/__tests__/openclawMessaging.test.js.map +1 -0
  30. package/dist/__tests__/tools.test.js +7 -3
  31. package/dist/__tests__/tools.test.js.map +1 -1
  32. package/dist/__tests__/traceabilityDogfood.test.d.ts +12 -0
  33. package/dist/__tests__/traceabilityDogfood.test.js +241 -0
  34. package/dist/__tests__/traceabilityDogfood.test.js.map +1 -0
  35. package/dist/__tests__/webmcpTools.test.d.ts +7 -0
  36. package/dist/__tests__/webmcpTools.test.js +195 -0
  37. package/dist/__tests__/webmcpTools.test.js.map +1 -0
  38. package/dist/dashboard/briefHtml.d.ts +20 -0
  39. package/dist/dashboard/briefHtml.js +1000 -0
  40. package/dist/dashboard/briefHtml.js.map +1 -0
  41. package/dist/dashboard/briefServer.d.ts +18 -0
  42. package/dist/dashboard/briefServer.js +320 -0
  43. package/dist/dashboard/briefServer.js.map +1 -0
  44. package/dist/dashboard/html.d.ts +18 -0
  45. package/dist/dashboard/html.js +1491 -0
  46. package/dist/dashboard/html.js.map +1 -0
  47. package/dist/dashboard/server.d.ts +17 -0
  48. package/dist/dashboard/server.js +403 -0
  49. package/dist/dashboard/server.js.map +1 -0
  50. package/dist/db.js +38 -0
  51. package/dist/db.js.map +1 -1
  52. package/dist/index.js +211 -5
  53. package/dist/index.js.map +1 -1
  54. package/dist/tools/critterTools.js +4 -0
  55. package/dist/tools/critterTools.js.map +1 -1
  56. package/dist/tools/forecastingTools.d.ts +11 -0
  57. package/dist/tools/forecastingTools.js +616 -0
  58. package/dist/tools/forecastingTools.js.map +1 -0
  59. package/dist/tools/localDashboardTools.d.ts +8 -0
  60. package/dist/tools/localDashboardTools.js +332 -0
  61. package/dist/tools/localDashboardTools.js.map +1 -0
  62. package/dist/tools/metaTools.js +170 -1
  63. package/dist/tools/metaTools.js.map +1 -1
  64. package/dist/tools/openclawTools.d.ts +11 -0
  65. package/dist/tools/openclawTools.js +1017 -0
  66. package/dist/tools/openclawTools.js.map +1 -0
  67. package/dist/tools/overstoryTools.d.ts +14 -0
  68. package/dist/tools/overstoryTools.js +426 -0
  69. package/dist/tools/overstoryTools.js.map +1 -0
  70. package/dist/tools/prReportTools.d.ts +11 -0
  71. package/dist/tools/prReportTools.js +911 -0
  72. package/dist/tools/prReportTools.js.map +1 -0
  73. package/dist/tools/progressiveDiscoveryTools.js +28 -9
  74. package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
  75. package/dist/tools/selfEvalTools.js +8 -1
  76. package/dist/tools/selfEvalTools.js.map +1 -1
  77. package/dist/tools/sessionMemoryTools.js +14 -2
  78. package/dist/tools/sessionMemoryTools.js.map +1 -1
  79. package/dist/tools/skillUpdateTools.d.ts +24 -0
  80. package/dist/tools/skillUpdateTools.js +469 -0
  81. package/dist/tools/skillUpdateTools.js.map +1 -0
  82. package/dist/tools/toolRegistry.js +178 -0
  83. package/dist/tools/toolRegistry.js.map +1 -1
  84. package/dist/tools/uiUxDiveAdvancedTools.js +61 -0
  85. package/dist/tools/uiUxDiveAdvancedTools.js.map +1 -1
  86. package/dist/tools/uiUxDiveTools.js +154 -1
  87. package/dist/tools/uiUxDiveTools.js.map +1 -1
  88. package/dist/tools/visualQaTools.d.ts +2 -0
  89. package/dist/tools/visualQaTools.js +1088 -0
  90. package/dist/tools/visualQaTools.js.map +1 -0
  91. package/dist/tools/webmcpTools.d.ts +16 -0
  92. package/dist/tools/webmcpTools.js +703 -0
  93. package/dist/tools/webmcpTools.js.map +1 -0
  94. package/dist/toolsetRegistry.js +4 -0
  95. package/dist/toolsetRegistry.js.map +1 -1
  96. package/package.json +1 -1
@@ -0,0 +1,332 @@
1
+ /**
2
+ * Local Dashboard MCP Tools
3
+ *
4
+ * 5 tools for operating the local Daily Brief dashboard via Claude Code.
5
+ * All tools read from local SQLite — zero network dependency.
6
+ */
7
+ import { getDb, genId } from "../db.js";
8
+ import { getBriefDashboardUrl, startBriefDashboardServer } from "../dashboard/briefServer.js";
9
+ function safeParseJson(s) {
10
+ if (!s)
11
+ return null;
12
+ try {
13
+ return JSON.parse(s);
14
+ }
15
+ catch {
16
+ return s;
17
+ }
18
+ }
19
+ export const localDashboardTools = [
20
+ // ── sync_daily_brief ────────────────────────────────────────────────
21
+ {
22
+ name: "sync_daily_brief",
23
+ description: "Sync daily brief + narrative data from Convex to local SQLite. Requires CONVEX_SITE_URL and MCP_SECRET environment variables. Returns sync summary with row counts and timing.",
24
+ inputSchema: {
25
+ type: "object",
26
+ properties: {
27
+ days: {
28
+ type: "number",
29
+ description: "Number of days to sync (default: 7)",
30
+ },
31
+ force: {
32
+ type: "boolean",
33
+ description: "Force re-sync even if data already exists (default: false)",
34
+ },
35
+ },
36
+ },
37
+ handler: async (args) => {
38
+ const siteUrl = process.env.CONVEX_SITE_URL || process.env.VITE_CONVEX_URL;
39
+ if (!siteUrl) {
40
+ return {
41
+ error: true,
42
+ message: "Missing CONVEX_SITE_URL or VITE_CONVEX_URL environment variable",
43
+ setupInstructions: "Set CONVEX_SITE_URL in .env.local or as an environment variable",
44
+ };
45
+ }
46
+ const secret = process.env.MCP_SECRET;
47
+ if (!secret) {
48
+ return {
49
+ error: true,
50
+ message: "Missing MCP_SECRET environment variable",
51
+ setupInstructions: "Set MCP_SECRET via: npx convex env set MCP_SECRET <value>",
52
+ };
53
+ }
54
+ const db = getDb();
55
+ const syncId = genId("sync");
56
+ const startMs = Date.now();
57
+ db.prepare("INSERT INTO sync_runs (id, status) VALUES (?, 'running')").run(syncId);
58
+ const counts = {};
59
+ try {
60
+ // Fetch latest dashboard snapshot
61
+ const snapRes = await fetch(`${siteUrl.replace(/\/$/, "")}/api/mcpGateway`, {
62
+ method: "POST",
63
+ headers: { "Content-Type": "application/json", "x-mcp-secret": secret },
64
+ body: JSON.stringify({ fn: "getLatestDashboardSnapshot", args: {} }),
65
+ });
66
+ const snapData = await snapRes.json();
67
+ if (snapData.success && snapData.data) {
68
+ const s = snapData.data;
69
+ db.prepare(`
70
+ INSERT OR REPLACE INTO brief_snapshots (id, date_string, generated_at, dashboard_metrics, source_summary, version)
71
+ VALUES (?, ?, ?, ?, ?, 1)
72
+ `).run(s._id ?? genId("snap"), s.dateString ?? new Date().toISOString().slice(0, 10), s.generatedAt ?? Date.now(), JSON.stringify(s.dashboardMetrics ?? {}), JSON.stringify(s.sourceSummary ?? null));
73
+ counts.brief_snapshots = 1;
74
+ }
75
+ // Fetch narrative threads
76
+ const threadRes = await fetch(`${siteUrl.replace(/\/$/, "")}/api/mcpGateway`, {
77
+ method: "POST",
78
+ headers: { "Content-Type": "application/json", "x-mcp-secret": secret },
79
+ body: JSON.stringify({ fn: "getPublicThreads", args: { limit: 100 } }),
80
+ });
81
+ const threadData = await threadRes.json();
82
+ if (threadData.success && Array.isArray(threadData.data)) {
83
+ const upsert = db.prepare(`
84
+ INSERT OR REPLACE INTO narrative_threads_local
85
+ (id, thread_id, name, slug, thesis, counter_thesis, entity_keys, topic_tags,
86
+ current_phase, first_event_at, latest_event_at, event_count, plot_twist_count, quality)
87
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
88
+ `);
89
+ const tx = db.transaction((items) => {
90
+ for (const t of items) {
91
+ upsert.run(t._id, t.threadId ?? t._id, t.name, t.slug ?? "", t.thesis, t.counterThesis ?? null, JSON.stringify(t.entityKeys ?? []), JSON.stringify(t.topicTags ?? []), t.currentPhase, t.firstEventAt ?? null, t.latestEventAt ?? null, t.eventCount ?? 0, t.plotTwistCount ?? 0, JSON.stringify(t.quality ?? null));
92
+ }
93
+ });
94
+ tx(threadData.data);
95
+ counts.narrative_threads = threadData.data.length;
96
+ }
97
+ const durationMs = Date.now() - startMs;
98
+ db.prepare(`
99
+ UPDATE sync_runs SET status = 'success', completed_at = datetime('now'),
100
+ tables_synced = ?, duration_ms = ?
101
+ WHERE id = ?
102
+ `).run(JSON.stringify(counts), durationMs, syncId);
103
+ return {
104
+ success: true,
105
+ syncId,
106
+ durationMs,
107
+ counts,
108
+ dashboardUrl: getBriefDashboardUrl(),
109
+ };
110
+ }
111
+ catch (err) {
112
+ const durationMs = Date.now() - startMs;
113
+ db.prepare(`
114
+ UPDATE sync_runs SET status = 'error', completed_at = datetime('now'),
115
+ error = ?, duration_ms = ?
116
+ WHERE id = ?
117
+ `).run(err.message, durationMs, syncId);
118
+ return { error: true, message: err.message, syncId, durationMs };
119
+ }
120
+ },
121
+ },
122
+ // ── get_daily_brief_summary ─────────────────────────────────────────
123
+ {
124
+ name: "get_daily_brief_summary",
125
+ description: "Get the latest daily brief summary from local SQLite. Returns dashboard metrics, features, and source summary. No network needed — reads cached data.",
126
+ inputSchema: {
127
+ type: "object",
128
+ properties: {
129
+ date: {
130
+ type: "string",
131
+ description: "Date string (YYYY-MM-DD). Omit for latest.",
132
+ },
133
+ },
134
+ },
135
+ handler: async (args) => {
136
+ const db = getDb();
137
+ const row = args.date
138
+ ? db.prepare("SELECT * FROM brief_snapshots WHERE date_string = ? ORDER BY version DESC LIMIT 1").get(args.date)
139
+ : db.prepare("SELECT * FROM brief_snapshots ORDER BY generated_at DESC LIMIT 1").get();
140
+ if (!row) {
141
+ return {
142
+ empty: true,
143
+ message: args.date
144
+ ? `No brief for ${args.date}. Run sync_daily_brief to fetch data.`
145
+ : "No briefs synced yet. Run sync_daily_brief to fetch data.",
146
+ tip: "Use sync_daily_brief tool or run: npm run local:sync",
147
+ };
148
+ }
149
+ return {
150
+ dateString: row.date_string,
151
+ generatedAt: row.generated_at,
152
+ version: row.version,
153
+ dashboardMetrics: safeParseJson(row.dashboard_metrics),
154
+ sourceSummary: safeParseJson(row.source_summary),
155
+ syncedAt: row.synced_at,
156
+ dashboardUrl: getBriefDashboardUrl(),
157
+ };
158
+ },
159
+ },
160
+ // ── get_narrative_status ────────────────────────────────────────────
161
+ {
162
+ name: "get_narrative_status",
163
+ description: "Get narrative thread status from local SQLite. Returns threads grouped by phase (emerging, escalating, climax, resolution, dormant) with event counts. No network needed.",
164
+ inputSchema: {
165
+ type: "object",
166
+ properties: {
167
+ phase: {
168
+ type: "string",
169
+ enum: ["emerging", "escalating", "climax", "resolution", "dormant"],
170
+ description: "Filter by phase (optional)",
171
+ },
172
+ limit: {
173
+ type: "number",
174
+ description: "Max threads to return (default: 20)",
175
+ },
176
+ },
177
+ },
178
+ handler: async (args) => {
179
+ const db = getDb();
180
+ const limit = Math.min(args.limit ?? 20, 100);
181
+ const query = args.phase
182
+ ? "SELECT * FROM narrative_threads_local WHERE current_phase = ? ORDER BY latest_event_at DESC LIMIT ?"
183
+ : "SELECT * FROM narrative_threads_local ORDER BY latest_event_at DESC LIMIT ?";
184
+ const rows = args.phase
185
+ ? db.prepare(query).all(args.phase, limit)
186
+ : db.prepare(query).all(limit);
187
+ if (rows.length === 0) {
188
+ return {
189
+ empty: true,
190
+ message: "No narrative threads synced. Run sync_daily_brief to fetch data.",
191
+ };
192
+ }
193
+ // Group by phase
194
+ const grouped = {};
195
+ for (const r of rows) {
196
+ const phase = r.current_phase || "dormant";
197
+ if (!grouped[phase])
198
+ grouped[phase] = [];
199
+ grouped[phase].push({
200
+ id: r.id,
201
+ name: r.name,
202
+ thesis: r.thesis,
203
+ eventCount: r.event_count,
204
+ plotTwistCount: r.plot_twist_count,
205
+ latestEventAt: r.latest_event_at,
206
+ entityKeys: safeParseJson(r.entity_keys),
207
+ topicTags: safeParseJson(r.topic_tags),
208
+ quality: safeParseJson(r.quality),
209
+ });
210
+ }
211
+ // Phase distribution
212
+ const distribution = {};
213
+ try {
214
+ const dist = db.prepare("SELECT current_phase, COUNT(*) as count FROM narrative_threads_local GROUP BY current_phase").all();
215
+ for (const d of dist)
216
+ distribution[d.current_phase] = d.count;
217
+ }
218
+ catch { /* table may not exist */ }
219
+ return {
220
+ totalThreads: rows.length,
221
+ phaseDistribution: distribution,
222
+ threads: grouped,
223
+ dashboardUrl: getBriefDashboardUrl(),
224
+ };
225
+ },
226
+ },
227
+ // ── get_ops_dashboard ───────────────────────────────────────────────
228
+ {
229
+ name: "get_ops_dashboard",
230
+ description: "Get operational dashboard status from local SQLite. Returns last sync info, tool call frequency, active verification cycles, and data counts. No network needed.",
231
+ inputSchema: {
232
+ type: "object",
233
+ properties: {},
234
+ },
235
+ handler: async () => {
236
+ const db = getDb();
237
+ // Last sync
238
+ let lastSync = null;
239
+ try {
240
+ const row = db.prepare("SELECT * FROM sync_runs ORDER BY started_at DESC LIMIT 1").get();
241
+ if (row) {
242
+ lastSync = {
243
+ ...row,
244
+ tables_synced: safeParseJson(row.tables_synced),
245
+ };
246
+ }
247
+ }
248
+ catch { /* table may not exist */ }
249
+ // Tool call frequency (last 24h)
250
+ let toolStats = [];
251
+ try {
252
+ toolStats = db.prepare(`
253
+ SELECT tool_name, COUNT(*) as count, AVG(duration_ms) as avg_duration,
254
+ SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) as errors
255
+ FROM tool_call_log
256
+ WHERE created_at > datetime('now', '-1 day')
257
+ GROUP BY tool_name ORDER BY count DESC LIMIT 15
258
+ `).all();
259
+ }
260
+ catch { /* table may not exist */ }
261
+ // Active verification cycles
262
+ let activeCycles = [];
263
+ try {
264
+ activeCycles = db.prepare(`
265
+ SELECT id, title, status, created_at FROM verification_cycles
266
+ WHERE status NOT IN ('completed', 'abandoned')
267
+ ORDER BY created_at DESC LIMIT 5
268
+ `).all();
269
+ }
270
+ catch { /* table may not exist */ }
271
+ // Data counts
272
+ let briefCount = 0, threadCount = 0, eventCount = 0;
273
+ try {
274
+ briefCount = db.prepare("SELECT COUNT(*) as c FROM brief_snapshots").get()?.c ?? 0;
275
+ threadCount = db.prepare("SELECT COUNT(*) as c FROM narrative_threads_local").get()?.c ?? 0;
276
+ eventCount = db.prepare("SELECT COUNT(*) as c FROM narrative_events_local").get()?.c ?? 0;
277
+ }
278
+ catch { /* tables may not exist */ }
279
+ // Privacy/audience stats
280
+ let privacyMode = null;
281
+ try {
282
+ const todayEvents = db.prepare(`
283
+ SELECT COUNT(*) as count,
284
+ SUM(CASE WHEN is_public = 1 THEN 1 ELSE 0 END) as public_triggers
285
+ FROM audience_events WHERE created_at > datetime('now', '-1 day')
286
+ `).get();
287
+ if (todayEvents && todayEvents.count > 0) {
288
+ privacyMode = {
289
+ triggeredToday: todayEvents.public_triggers ?? 0,
290
+ totalEvents: todayEvents.count,
291
+ };
292
+ }
293
+ }
294
+ catch { /* table may not exist */ }
295
+ return {
296
+ lastSync,
297
+ toolCallFrequency: toolStats,
298
+ activeCycles,
299
+ dataCounts: { briefs: briefCount, threads: threadCount, events: eventCount },
300
+ privacyMode,
301
+ dashboardUrl: getBriefDashboardUrl(),
302
+ };
303
+ },
304
+ },
305
+ // ── open_local_dashboard ────────────────────────────────────────────
306
+ {
307
+ name: "open_local_dashboard",
308
+ description: "Start the local Daily Brief dashboard server if needed, and return the URL. The dashboard shows Brief metrics, Narrative thread lanes, and Ops status — all from local SQLite.",
309
+ inputSchema: {
310
+ type: "object",
311
+ properties: {},
312
+ },
313
+ handler: async () => {
314
+ let url = getBriefDashboardUrl();
315
+ if (!url) {
316
+ try {
317
+ const port = await startBriefDashboardServer(getDb(), 6275);
318
+ url = `http://127.0.0.1:${port}`;
319
+ }
320
+ catch (err) {
321
+ return { error: true, message: `Failed to start dashboard: ${err.message}` };
322
+ }
323
+ }
324
+ return {
325
+ url,
326
+ views: ["Brief (metrics, features, sources)", "Narrative (thread lanes by phase)", "Ops (sync status, tool frequency)"],
327
+ tip: "Open the URL in a browser to see the dashboard. Data auto-refreshes every 30s.",
328
+ };
329
+ },
330
+ },
331
+ ];
332
+ //# sourceMappingURL=localDashboardTools.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"localDashboardTools.js","sourceRoot":"","sources":["../../src/tools/localDashboardTools.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EAAE,oBAAoB,EAAE,yBAAyB,EAAE,MAAM,6BAA6B,CAAC;AAE9F,SAAS,aAAa,CAAC,CAA4B;IACjD,IAAI,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IACpB,IAAI,CAAC;QAAC,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAAC,CAAC;IAAC,MAAM,CAAC;QAAC,OAAO,CAAC,CAAC;IAAC,CAAC;AACnD,CAAC;AAED,MAAM,CAAC,MAAM,mBAAmB,GAAc;IAC5C,uEAAuE;IACvE;QACE,IAAI,EAAE,kBAAkB;QACxB,WAAW,EACT,gLAAgL;QAClL,WAAW,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,IAAI,EAAE;oBACJ,IAAI,EAAE,QAAQ;oBACd,WAAW,EAAE,qCAAqC;iBACnD;gBACD,KAAK,EAAE;oBACL,IAAI,EAAE,SAAS;oBACf,WAAW,EAAE,4DAA4D;iBAC1E;aACF;SACF;QACD,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE;YACtB,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC;YAC3E,IAAI,CAAC,OAAO,EAAE,CAAC;gBACb,OAAO;oBACL,KAAK,EAAE,IAAI;oBACX,OAAO,EAAE,iEAAiE;oBAC1E,iBAAiB,EAAE,iEAAiE;iBACrF,CAAC;YACJ,CAAC;YAED,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC;YACtC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO;oBACL,KAAK,EAAE,IAAI;oBACX,OAAO,EAAE,yCAAyC;oBAClD,iBAAiB,EAAE,2DAA2D;iBAC/E,CAAC;YACJ,CAAC;YAED,MAAM,EAAE,GAAG,KAAK,EAAE,CAAC;YACnB,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;YAC7B,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC3B,EAAE,CAAC,OAAO,CAAC,0DAA0D,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YAEnF,MAAM,MAAM,GAA2B,EAAE,CAAC;YAE1C,IAAI,CAAC;gBACH,kCAAkC;gBAClC,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,iBAAiB,EAAE;oBAC1E,MAAM,EAAE,MAAM;oBACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE,cAAc,EAAE,MAAM,EAAE;oBACvE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,4BAA4B,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC;iBACrE,CAAC,CAAC;gBACH,MAAM,QAAQ,GAAQ,MAAM,OAAO,CAAC,IAAI,EAAE,CAAC;gBAC3C,IAAI,QAAQ,CAAC,OAAO,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC;oBACtC,MAAM,CAAC,GAAG,QAAQ,CAAC,IAAI,CAAC;oBACxB,EAAE,CAAC,OAAO,CAAC;;;WAGV,CAAC,CAAC,GAAG,CACJ,CAAC,CAAC,GAAG,IAAI,KAAK,CAAC,MAAM,CAAC,EACtB,CAAC,CAAC,UAAU,IAAI,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EACrD,CAAC,CAAC,WAAW,IAAI,IAAI,CAAC,GAAG,EAAE,EAC3B,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,gBAAgB,IAAI,EAAE,CAAC,EACxC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,aAAa,IAAI,IAAI,CAAC,CACxC,CAAC;oBACF,MAAM,CAAC,eAAe,GAAG,CAAC,CAAC;gBAC7B,CAAC;gBAED,0BAA0B;gBAC1B,MAAM,SAAS,GAAG,MAAM,KAAK,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,iBAAiB,EAAE;oBAC5E,MAAM,EAAE,MAAM;oBACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE,cAAc,EAAE,MAAM,EAAE;oBACvE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,kBAAkB,EAAE,IAAI,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,EAAE,CAAC;iBACvE,CAAC,CAAC;gBACH,MAAM,UAAU,GAAQ,MAAM,SAAS,CAAC,IAAI,EAAE,CAAC;gBAC/C,IAAI,UAAU,CAAC,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;oBACzD,MAAM,MAAM,GAAG,EAAE,CAAC,OAAO,CAAC;;;;;WAKzB,CAAC,CAAC;oBACH,MAAM,EAAE,GAAG,EAAE,CAAC,WAAW,CAAC,CAAC,KAAY,EAAE,EAAE;wBACzC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;4BACtB,MAAM,CAAC,GAAG,CACR,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,IAAI,EAAE,EAChD,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,aAAa,IAAI,IAAI,EACjC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,IAAI,EAAE,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,IAAI,EAAE,CAAC,EACrE,CAAC,CAAC,YAAY,EAAE,CAAC,CAAC,YAAY,IAAI,IAAI,EAAE,CAAC,CAAC,aAAa,IAAI,IAAI,EAC/D,CAAC,CAAC,UAAU,IAAI,CAAC,EAAE,CAAC,CAAC,cAAc,IAAI,CAAC,EACxC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,IAAI,IAAI,CAAC,CAClC,CAAC;wBACJ,CAAC;oBACH,CAAC,CAAC,CAAC;oBACH,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;oBACpB,MAAM,CAAC,iBAAiB,GAAG,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC;gBACpD,CAAC;gBAED,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC;gBACxC,EAAE,CAAC,OAAO,CAAC;;;;SAIV,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,UAAU,EAAE,MAAM,CAAC,CAAC;gBAEnD,OAAO;oBACL,OAAO,EAAE,IAAI;oBACb,MAAM;oBACN,UAAU;oBACV,MAAM;oBACN,YAAY,EAAE,oBAAoB,EAAE;iBACrC,CAAC;YACJ,CAAC;YAAC,OAAO,GAAQ,EAAE,CAAC;gBAClB,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC;gBACxC,EAAE,CAAC,OAAO,CAAC;;;;SAIV,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,EAAE,UAAU,EAAE,MAAM,CAAC,CAAC;gBACxC,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;YACnE,CAAC;QACH,CAAC;KACF;IAED,uEAAuE;IACvE;QACE,IAAI,EAAE,yBAAyB;QAC/B,WAAW,EACT,uJAAuJ;QACzJ,WAAW,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,IAAI,EAAE;oBACJ,IAAI,EAAE,QAAQ;oBACd,WAAW,EAAE,4CAA4C;iBAC1D;aACF;SACF;QACD,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE;YACtB,MAAM,EAAE,GAAG,KAAK,EAAE,CAAC;YACnB,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI;gBACnB,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,mFAAmF,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAQ;gBACvH,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,kEAAkE,CAAC,CAAC,GAAG,EAAS,CAAC;YAEhG,IAAI,CAAC,GAAG,EAAE,CAAC;gBACT,OAAO;oBACL,KAAK,EAAE,IAAI;oBACX,OAAO,EAAE,IAAI,CAAC,IAAI;wBAChB,CAAC,CAAC,gBAAgB,IAAI,CAAC,IAAI,uCAAuC;wBAClE,CAAC,CAAC,2DAA2D;oBAC/D,GAAG,EAAE,sDAAsD;iBAC5D,CAAC;YACJ,CAAC;YAED,OAAO;gBACL,UAAU,EAAE,GAAG,CAAC,WAAW;gBAC3B,WAAW,EAAE,GAAG,CAAC,YAAY;gBAC7B,OAAO,EAAE,GAAG,CAAC,OAAO;gBACpB,gBAAgB,EAAE,aAAa,CAAC,GAAG,CAAC,iBAAiB,CAAC;gBACtD,aAAa,EAAE,aAAa,CAAC,GAAG,CAAC,cAAc,CAAC;gBAChD,QAAQ,EAAE,GAAG,CAAC,SAAS;gBACvB,YAAY,EAAE,oBAAoB,EAAE;aACrC,CAAC;QACJ,CAAC;KACF;IAED,uEAAuE;IACvE;QACE,IAAI,EAAE,sBAAsB;QAC5B,WAAW,EACT,2KAA2K;QAC7K,WAAW,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,KAAK,EAAE;oBACL,IAAI,EAAE,QAAQ;oBACd,IAAI,EAAE,CAAC,UAAU,EAAE,YAAY,EAAE,QAAQ,EAAE,YAAY,EAAE,SAAS,CAAC;oBACnE,WAAW,EAAE,4BAA4B;iBAC1C;gBACD,KAAK,EAAE;oBACL,IAAI,EAAE,QAAQ;oBACd,WAAW,EAAE,qCAAqC;iBACnD;aACF;SACF;QACD,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE;YACtB,MAAM,EAAE,GAAG,KAAK,EAAE,CAAC;YACnB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,EAAE,GAAG,CAAC,CAAC;YAE9C,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK;gBACtB,CAAC,CAAC,qGAAqG;gBACvG,CAAC,CAAC,6EAA6E,CAAC;YAElF,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK;gBACrB,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,KAAK,CAAU;gBACnD,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,KAAK,CAAU,CAAC;YAE1C,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACtB,OAAO;oBACL,KAAK,EAAE,IAAI;oBACX,OAAO,EAAE,kEAAkE;iBAC5E,CAAC;YACJ,CAAC;YAED,iBAAiB;YACjB,MAAM,OAAO,GAA0B,EAAE,CAAC;YAC1C,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;gBACrB,MAAM,KAAK,GAAG,CAAC,CAAC,aAAa,IAAI,SAAS,CAAC;gBAC3C,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC;oBAAE,OAAO,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;gBACzC,OAAO,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC;oBAClB,EAAE,EAAE,CAAC,CAAC,EAAE;oBACR,IAAI,EAAE,CAAC,CAAC,IAAI;oBACZ,MAAM,EAAE,CAAC,CAAC,MAAM;oBAChB,UAAU,EAAE,CAAC,CAAC,WAAW;oBACzB,cAAc,EAAE,CAAC,CAAC,gBAAgB;oBAClC,aAAa,EAAE,CAAC,CAAC,eAAe;oBAChC,UAAU,EAAE,aAAa,CAAC,CAAC,CAAC,WAAW,CAAC;oBACxC,SAAS,EAAE,aAAa,CAAC,CAAC,CAAC,UAAU,CAAC;oBACtC,OAAO,EAAE,aAAa,CAAC,CAAC,CAAC,OAAO,CAAC;iBAClC,CAAC,CAAC;YACL,CAAC;YAED,qBAAqB;YACrB,MAAM,YAAY,GAA2B,EAAE,CAAC;YAChD,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CACrB,6FAA6F,CAC9F,CAAC,GAAG,EAAW,CAAC;gBACjB,KAAK,MAAM,CAAC,IAAI,IAAI;oBAAE,YAAY,CAAC,CAAC,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC;YAChE,CAAC;YAAC,MAAM,CAAC,CAAC,yBAAyB,CAAC,CAAC;YAErC,OAAO;gBACL,YAAY,EAAE,IAAI,CAAC,MAAM;gBACzB,iBAAiB,EAAE,YAAY;gBAC/B,OAAO,EAAE,OAAO;gBAChB,YAAY,EAAE,oBAAoB,EAAE;aACrC,CAAC;QACJ,CAAC;KACF;IAED,uEAAuE;IACvE;QACE,IAAI,EAAE,mBAAmB;QACzB,WAAW,EACT,kKAAkK;QACpK,WAAW,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE,EAAE;SACf;QACD,OAAO,EAAE,KAAK,IAAI,EAAE;YAClB,MAAM,EAAE,GAAG,KAAK,EAAE,CAAC;YAEnB,YAAY;YACZ,IAAI,QAAQ,GAAQ,IAAI,CAAC;YACzB,IAAI,CAAC;gBACH,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC,0DAA0D,CAAC,CAAC,GAAG,EAAS,CAAC;gBAChG,IAAI,GAAG,EAAE,CAAC;oBACR,QAAQ,GAAG;wBACT,GAAG,GAAG;wBACN,aAAa,EAAE,aAAa,CAAC,GAAG,CAAC,aAAa,CAAC;qBAChD,CAAC;gBACJ,CAAC;YACH,CAAC;YAAC,MAAM,CAAC,CAAC,yBAAyB,CAAC,CAAC;YAErC,iCAAiC;YACjC,IAAI,SAAS,GAAU,EAAE,CAAC;YAC1B,IAAI,CAAC;gBACH,SAAS,GAAG,EAAE,CAAC,OAAO,CAAC;;;;;;SAMtB,CAAC,CAAC,GAAG,EAAW,CAAC;YACpB,CAAC;YAAC,MAAM,CAAC,CAAC,yBAAyB,CAAC,CAAC;YAErC,6BAA6B;YAC7B,IAAI,YAAY,GAAU,EAAE,CAAC;YAC7B,IAAI,CAAC;gBACH,YAAY,GAAG,EAAE,CAAC,OAAO,CAAC;;;;SAIzB,CAAC,CAAC,GAAG,EAAW,CAAC;YACpB,CAAC;YAAC,MAAM,CAAC,CAAC,yBAAyB,CAAC,CAAC;YAErC,cAAc;YACd,IAAI,UAAU,GAAG,CAAC,EAAE,WAAW,GAAG,CAAC,EAAE,UAAU,GAAG,CAAC,CAAC;YACpD,IAAI,CAAC;gBACH,UAAU,GAAI,EAAE,CAAC,OAAO,CAAC,2CAA2C,CAAC,CAAC,GAAG,EAAU,EAAE,CAAC,IAAI,CAAC,CAAC;gBAC5F,WAAW,GAAI,EAAE,CAAC,OAAO,CAAC,mDAAmD,CAAC,CAAC,GAAG,EAAU,EAAE,CAAC,IAAI,CAAC,CAAC;gBACrG,UAAU,GAAI,EAAE,CAAC,OAAO,CAAC,kDAAkD,CAAC,CAAC,GAAG,EAAU,EAAE,CAAC,IAAI,CAAC,CAAC;YACrG,CAAC;YAAC,MAAM,CAAC,CAAC,0BAA0B,CAAC,CAAC;YAEtC,yBAAyB;YACzB,IAAI,WAAW,GAAQ,IAAI,CAAC;YAC5B,IAAI,CAAC;gBACH,MAAM,WAAW,GAAG,EAAE,CAAC,OAAO,CAAC;;;;SAI9B,CAAC,CAAC,GAAG,EAAS,CAAC;gBAChB,IAAI,WAAW,IAAI,WAAW,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC;oBACzC,WAAW,GAAG;wBACZ,cAAc,EAAE,WAAW,CAAC,eAAe,IAAI,CAAC;wBAChD,WAAW,EAAE,WAAW,CAAC,KAAK;qBAC/B,CAAC;gBACJ,CAAC;YACH,CAAC;YAAC,MAAM,CAAC,CAAC,yBAAyB,CAAC,CAAC;YAErC,OAAO;gBACL,QAAQ;gBACR,iBAAiB,EAAE,SAAS;gBAC5B,YAAY;gBACZ,UAAU,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,EAAE,UAAU,EAAE;gBAC5E,WAAW;gBACX,YAAY,EAAE,oBAAoB,EAAE;aACrC,CAAC;QACJ,CAAC;KACF;IAED,uEAAuE;IACvE;QACE,IAAI,EAAE,sBAAsB;QAC5B,WAAW,EACT,gLAAgL;QAClL,WAAW,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE,EAAE;SACf;QACD,OAAO,EAAE,KAAK,IAAI,EAAE;YAClB,IAAI,GAAG,GAAG,oBAAoB,EAAE,CAAC;YACjC,IAAI,CAAC,GAAG,EAAE,CAAC;gBACT,IAAI,CAAC;oBACH,MAAM,IAAI,GAAG,MAAM,yBAAyB,CAAC,KAAK,EAAE,EAAE,IAAI,CAAC,CAAC;oBAC5D,GAAG,GAAG,oBAAoB,IAAI,EAAE,CAAC;gBACnC,CAAC;gBAAC,OAAO,GAAQ,EAAE,CAAC;oBAClB,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,8BAA8B,GAAG,CAAC,OAAO,EAAE,EAAE,CAAC;gBAC/E,CAAC;YACH,CAAC;YACD,OAAO;gBACL,GAAG;gBACH,KAAK,EAAE,CAAC,oCAAoC,EAAE,mCAAmC,EAAE,mCAAmC,CAAC;gBACvH,GAAG,EAAE,gFAAgF;aACtF,CAAC;QACJ,CAAC;KACF;CACF,CAAC"}
@@ -211,6 +211,60 @@ const METHODOLOGY_CONTENT = {
211
211
  bundle_analysis: "npm run perf:bundle",
212
212
  },
213
213
  },
214
+ flywheel_ui_dogfood: {
215
+ title: "Flywheel Mode: UI Dogfood + Walkthrough Video QA",
216
+ description: "Continuous UI verification loop: capture a full walkthrough video + screenshots, run Gemini-based video/screenshot QA, fix root causes (5-whys), and re-capture evidence until the UI is stable, accessible, and performant. Do not stop at 'build passes' — ship dogfood evidence.",
217
+ steps: [
218
+ {
219
+ step: 1,
220
+ name: "Launch + Trajectory",
221
+ description: "Start the dev server and publish a visible trajectory link in the UI within 60 seconds. The trajectory is the audit trail of iterations and evidence.",
222
+ tools: ["record_learning"],
223
+ action: "Start dev server. Ensure `/dogfood` shows the latest manifest + walkthrough. Record a learning if trajectory/evidence is missing.",
224
+ },
225
+ {
226
+ step: 2,
227
+ name: "Capture Evidence (Scribe + Video)",
228
+ description: "Run a full route walkthrough capture (screens + interactions) so issues are visible and replayable.",
229
+ tools: ["capture_responsive_suite", "capture_ui_screenshot"],
230
+ action: "Run `npm run dogfood:full:local` to generate `public/dogfood/*` (gallery + walkthrough video + frames + scribe).",
231
+ },
232
+ {
233
+ step: 3,
234
+ name: "Gemini QA (Video + Screens)",
235
+ description: "Use Gemini video/screenshot understanding to flag UI regressions: flashes, layout shift, focus traps, broken empty states, and performance jank.",
236
+ tools: ["analyze_ui_screenshot"],
237
+ action: "Run `npm run dogfood:qa:gemini` and review results in `/dogfood`. Convert actionable issues into deterministic fixes (no band-aids).",
238
+ },
239
+ {
240
+ step: 4,
241
+ name: "Root Cause + Fix (5 Whys)",
242
+ description: "At each checkpoint, immediately seek issues and follow symptom → intermediate state → root cause. Fix the cause so the symptom is impossible.",
243
+ tools: ["run_closed_loop", "record_learning"],
244
+ action: "Do a 5-whys chain. Implement the smallest correct fix. Re-run `tsc` + build. Record learnings for any repeated failure mode.",
245
+ },
246
+ {
247
+ step: 5,
248
+ name: "Motion Safety (Seizure / Flash Policy)",
249
+ description: "Remove or soften high-contrast flashes (large-area pulses/fades). Ship stable backgrounds and subtle loading states. Always honor prefers-reduced-motion.",
250
+ tools: ["run_quality_gate"],
251
+ action: "FAIL if any route shows full-viewport flashing/pulsing/fade transitions. Prefer per-view fallbacks and non-animated skeletons for large surfaces.",
252
+ },
253
+ {
254
+ step: 6,
255
+ name: "Poll + Adapt",
256
+ description: "Poll every 60 seconds. After 3 consecutive failures on the same issue, change strategy (instrument, isolate, or rollback).",
257
+ tools: ["record_learning"],
258
+ action: "Keep iterating until the dogfood artifacts + QA runs show no new P0/P1 issues.",
259
+ },
260
+ ],
261
+ commands: {
262
+ capture: "npm run dogfood:full:local",
263
+ gemini_qa: "npm run dogfood:qa:gemini",
264
+ build: "npm run build",
265
+ typecheck: "npx tsc --noEmit",
266
+ },
267
+ },
214
268
  agentic_vision: {
215
269
  title: "Agentic Vision (AI-Powered Visual Verification)",
216
270
  description: "Use AI vision models to analyze UI screenshots programmatically. The Discover-Capture-Analyze-Manipulate-Iterate-Gate loop provides automated visual QA that goes beyond what rule-based checks can catch. Gemini with code execution provides the richest analysis (zoom, crop, compute within the model). Falls back to GPT-5-mini, Claude, or OpenRouter vision.",
@@ -1242,6 +1296,117 @@ const METHODOLOGY_CONTENT = {
1242
1296
  "For production: implement interrupt handling (user speaks while TTS is playing)",
1243
1297
  ],
1244
1298
  },
1299
+ analyst_diagnostic: {
1300
+ title: "Analyst Diagnostic — Root Cause Over Bandaids",
1301
+ description: "Guide yourself like an analyst diagnosing the root cause, NOT a junior dev slapping on a bandaid. Mandatory for all bug work.",
1302
+ steps: [
1303
+ {
1304
+ step: 1,
1305
+ name: "Reproduce",
1306
+ description: "Confirm the exact failure mode before touching any code.",
1307
+ tools: ["search_all_knowledge"],
1308
+ action: "Check if this root cause is already known. If yes, apply the known fix. If no, proceed to step 2.",
1309
+ },
1310
+ {
1311
+ step: 2,
1312
+ name: "Trace Upstream",
1313
+ description: "Walk from symptom → intermediate state → root cause. Don't stop at the first error you see.",
1314
+ tools: ["run_recon", "log_recon_finding"],
1315
+ action: "Use recon tools to trace the chain. Log each intermediate finding.",
1316
+ },
1317
+ {
1318
+ step: 3,
1319
+ name: "Ask 'Why' 5 Times",
1320
+ description: "Each answer should go one level deeper into the system. If you can't get to 5, you haven't found the root cause yet.",
1321
+ action: "Document each 'why' level. The 5th answer is usually the real fix.",
1322
+ },
1323
+ {
1324
+ step: 4,
1325
+ name: "Fix the Cause",
1326
+ description: "The right fix makes the symptom impossible, not just invisible.",
1327
+ tools: ["resolve_gap"],
1328
+ action: "Implement the fix. It should address the root cause from step 3, not the symptom from step 1.",
1329
+ },
1330
+ {
1331
+ step: 5,
1332
+ name: "Verify No Sideways Shift",
1333
+ description: "Bandaids move bugs sideways. Check adjacent behavior to confirm the fix didn't introduce new issues.",
1334
+ tools: ["log_test_result", "run_closed_loop"],
1335
+ action: "Run tests. Check adjacent functionality. Confirm the bug can't recur.",
1336
+ },
1337
+ {
1338
+ step: 6,
1339
+ name: "Record the Root Cause",
1340
+ description: "Document what you found so the next person doesn't re-discover it.",
1341
+ tools: ["record_learning"],
1342
+ action: "Call record_learning with the root cause, not just the fix.",
1343
+ },
1344
+ ],
1345
+ redFlags: [
1346
+ "Adding try/catch that swallows errors without understanding them",
1347
+ "Adding ?. optional chaining to mask undefined instead of finding why it's undefined",
1348
+ "Adding 'as any' to silence type errors instead of fixing the type mismatch",
1349
+ "Adding timeouts/retries to paper over race conditions",
1350
+ "Deleting a failing test instead of fixing the code it tests",
1351
+ "'It works now' without understanding why it didn't before",
1352
+ ],
1353
+ },
1354
+ scenario_testing: {
1355
+ title: "Scenario-Based Testing — Real Human Behavior at Scale",
1356
+ description: "Tests that don't model real human behavior are false confidence. Every test must be scenario-based: start from a real user persona and goal, simulate realistic behavior, and verify at scale. Covers all behavior angles and both short-running (burst) and long-running (sustained) scenarios.",
1357
+ steps: [
1358
+ {
1359
+ step: 1,
1360
+ name: "Define the User Persona",
1361
+ description: "Who is the user in this scenario? First-timer, power user, distracted user, adversarial actor, mobile user on a slow network, or concurrent session? The persona determines which failure modes matter.",
1362
+ action: "Before writing any assertion, write: 'User: <persona>. Goal: <what they want to achieve>. Prior state: <what's already in the system>.' If you can't answer these, you don't have a test scenario yet.",
1363
+ },
1364
+ {
1365
+ step: 2,
1366
+ name: "Specify Action Sequence + Timing",
1367
+ description: "What does the user do, in what order, with what timing between actions? Rushed clicks differ from deliberate actions. Concurrent users produce race conditions that single-user tests miss entirely.",
1368
+ action: "Document: 'Actions: [step-by-step sequence with timing]. Concurrency: [1 user / N concurrent].' For concurrent scenarios, specify what overlaps and at what rate.",
1369
+ },
1370
+ {
1371
+ step: 3,
1372
+ name: "Set the Scale Axis",
1373
+ description: "A test that passes at 1 user is necessary but not sufficient. Define what happens at 10x and 100x. Single-user happy paths cannot catch connection pool exhaustion, rate limit throttling, lock contention, or thundering herd.",
1374
+ action: "For every scenario, write: 'Scale: 1x (baseline) / 10x (concurrent) / 100x (sustained).' If you only have the 1x case, the test suite has a known gap — document it.",
1375
+ },
1376
+ {
1377
+ step: 4,
1378
+ name: "Set the Duration Axis",
1379
+ description: "Short-running tests (burst/spike) and long-running tests (sustained load, state accumulation) catch different failure modes. A system that handles 10 requests in 1 second may fail after 10,000 requests over 30 minutes due to memory leaks, DB growth, stale caches, or token expiry.",
1380
+ action: "For each scenario, specify: 'Duration: single request / session-length / multi-day accumulation.' If you have only short-running coverage, add a long-running counterpart or mark the gap explicitly.",
1381
+ },
1382
+ {
1383
+ step: 5,
1384
+ name: "Cover All Behavior Angles",
1385
+ description: "Six required angles per feature: (1) happy path, (2) all sad paths, (3) concurrent access, (4) degraded conditions (slow network, auth expiry, partial API failures), (5) long-running accumulation (DB growth, stale cache), (6) adversarial inputs (injection, replay, unexpected payloads).",
1386
+ action: "Use a checklist: happy ✓ / sad paths ✓ / concurrent ✓ / degraded ✓ / long-running ✓ / adversarial ✓. Missing boxes are documented gaps, not implicit coverage.",
1387
+ },
1388
+ {
1389
+ step: 6,
1390
+ name: "Verify Side Effects, Not Just Return Values",
1391
+ description: "Assertions that only check return values miss the failure modes that matter in production: state changes, side effects, downstream consequences. Did the DB update? Did the cache invalidate? Did the audit log record? Did the notification fire?",
1392
+ action: "After every action assertion, add assertions for: state in the DB, downstream side effects, observable UI change, and any async consequences. If you can't assert them, add instrumentation.",
1393
+ },
1394
+ ],
1395
+ antiPatterns: [
1396
+ "Simple unit tests with no scenario context",
1397
+ "Tests that only cover the happy path",
1398
+ "Tests that mock everything and test nothing real",
1399
+ "Tests that pass at 1 user and are never run at 10+",
1400
+ "Hard-coded user state assumptions that won't hold in production",
1401
+ "'It passes in CI' declared without production-realistic data volume or concurrency",
1402
+ "Declaring a feature 'tested' after a single integration test with synthetic clean data",
1403
+ "Assertions only on return values — no state or side effect checks",
1404
+ ],
1405
+ scenarioAnatomy: {
1406
+ template: "Scenario: <name> | User: <persona> | Goal: <user goal> | Prior state: <system state before> | Actions: <sequence with timing> | Scale: <1x/10x/100x> | Duration: <single/session/multi-day> | Expected: <state + side effects + UI> | Edge cases: <degraded/adversarial/partial>",
1407
+ example: "Scenario: Power user submits form on slow 3G | User: power user, 50+ prior submissions, impatient | Goal: submit expense report | Prior state: DB has 10,000 existing reports, cache warm | Actions: fill form (5s), click submit, network drops to 3G mid-request, retry after 2s | Scale: 10 concurrent submitters | Duration: 30-minute session with 20 submissions | Expected: exactly-once submission, retry shows loading, no duplicate DB row, audit log entry | Edge cases: double-click, browser back mid-submit, session expiry at step 3",
1408
+ },
1409
+ },
1245
1410
  overview: {
1246
1411
  title: "NodeBench Development Methodology — Overview",
1247
1412
  description: "A dual-loop system for rigorous development. The inner loop (6-Phase Verification) ensures correctness. The outer loop (Eval-Driven Development) ensures improvement. Together they form the AI Flywheel.",
@@ -1274,6 +1439,8 @@ const METHODOLOGY_CONTENT = {
1274
1439
  toon_format: "TOON Format — Token-Oriented Object Notation for ~40% token savings on LLM payloads",
1275
1440
  seo_audit: "SEO Audit — technical SEO, content analysis, performance, WordPress security",
1276
1441
  voice_bridge: "Voice Bridge — STT/TTS/LLM pipeline design, scaffold generation, latency benchmarking",
1442
+ analyst_diagnostic: "Analyst Diagnostic — root-cause diagnosis over bandaids. Mandatory for all bug work: reproduce, trace upstream, ask 'why' 5 times, fix the cause not the symptom",
1443
+ scenario_testing: "Scenario-Based Testing — no shallow tests, real user personas, scale axis (1x/10x/100x), duration axis (burst + sustained), all behavior angles (happy/sad/adversarial/concurrent/degraded)",
1277
1444
  },
1278
1445
  },
1279
1446
  {
@@ -1554,7 +1721,7 @@ export function createMetaTools(allTools) {
1554
1721
  },
1555
1722
  {
1556
1723
  name: "getMethodology",
1557
- description: 'Get step-by-step guidance for a development methodology. Topics: verification, eval, flywheel, mandatory_flywheel, reconnaissance, quality_gates, ui_ux_qa, agentic_vision, closed_loop, learnings, project_ideation, tech_stack_2026, telemetry_setup, agents_md_maintenance, agent_bootstrap, autonomous_maintenance, parallel_agent_teams, self_reinforced_learning, academic_paper_writing, agent_evaluation, controlled_evaluation, overview. Call with topic "overview" to see all available methodologies.',
1724
+ description: 'Get step-by-step guidance for a development methodology. Topics: verification, eval, flywheel, mandatory_flywheel, reconnaissance, quality_gates, ui_ux_qa, flywheel_ui_dogfood, agentic_vision, closed_loop, learnings, project_ideation, tech_stack_2026, telemetry_setup, agents_md_maintenance, agent_bootstrap, autonomous_maintenance, parallel_agent_teams, self_reinforced_learning, academic_paper_writing, agent_evaluation, controlled_evaluation, scenario_testing, overview. Call with topic "overview" to see all available methodologies.',
1558
1725
  inputSchema: {
1559
1726
  type: "object",
1560
1727
  properties: {
@@ -1568,6 +1735,7 @@ export function createMetaTools(allTools) {
1568
1735
  "reconnaissance",
1569
1736
  "quality_gates",
1570
1737
  "ui_ux_qa",
1738
+ "flywheel_ui_dogfood",
1571
1739
  "agentic_vision",
1572
1740
  "closed_loop",
1573
1741
  "learnings",
@@ -1582,6 +1750,7 @@ export function createMetaTools(allTools) {
1582
1750
  "academic_paper_writing",
1583
1751
  "agent_evaluation",
1584
1752
  "controlled_evaluation",
1753
+ "scenario_testing",
1585
1754
  "overview",
1586
1755
  ],
1587
1756
  description: "Which methodology to explain",