npm - nodebench-mcp - Versions diffs - 2.22.0 → 2.26.0 - Mend

nodebench-mcp 2.22.0 → 2.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

package/NODEBENCH_AGENTS.md +5 -4
package/README.md +495 -280
package/dist/__tests__/architectComplex.test.js +3 -5
package/dist/__tests__/architectComplex.test.js.map +1 -1
package/dist/__tests__/batchAutopilot.test.d.ts +8 -0
package/dist/__tests__/batchAutopilot.test.js +218 -0
package/dist/__tests__/batchAutopilot.test.js.map +1 -0
package/dist/__tests__/cliSubcommands.test.d.ts +1 -0
package/dist/__tests__/cliSubcommands.test.js +138 -0
package/dist/__tests__/cliSubcommands.test.js.map +1 -0
package/dist/__tests__/evalHarness.test.js +1 -1
package/dist/__tests__/forecastingDogfood.test.d.ts +9 -0
package/dist/__tests__/forecastingDogfood.test.js +284 -0
package/dist/__tests__/forecastingDogfood.test.js.map +1 -0
package/dist/__tests__/forecastingScoring.test.d.ts +9 -0
package/dist/__tests__/forecastingScoring.test.js +202 -0
package/dist/__tests__/forecastingScoring.test.js.map +1 -0
package/dist/__tests__/localDashboard.test.d.ts +1 -0
package/dist/__tests__/localDashboard.test.js +226 -0
package/dist/__tests__/localDashboard.test.js.map +1 -0
package/dist/__tests__/multiHopDogfood.test.d.ts +12 -0
package/dist/__tests__/multiHopDogfood.test.js +303 -0
package/dist/__tests__/multiHopDogfood.test.js.map +1 -0
package/dist/__tests__/openclawDogfood.test.d.ts +23 -0
package/dist/__tests__/openclawDogfood.test.js +535 -0
package/dist/__tests__/openclawDogfood.test.js.map +1 -0
package/dist/__tests__/openclawMessaging.test.d.ts +14 -0
package/dist/__tests__/openclawMessaging.test.js +232 -0
package/dist/__tests__/openclawMessaging.test.js.map +1 -0
package/dist/__tests__/tools.test.js +7 -3
package/dist/__tests__/tools.test.js.map +1 -1
package/dist/__tests__/traceabilityDogfood.test.d.ts +12 -0
package/dist/__tests__/traceabilityDogfood.test.js +241 -0
package/dist/__tests__/traceabilityDogfood.test.js.map +1 -0
package/dist/__tests__/webmcpTools.test.d.ts +7 -0
package/dist/__tests__/webmcpTools.test.js +195 -0
package/dist/__tests__/webmcpTools.test.js.map +1 -0
package/dist/dashboard/briefHtml.d.ts +20 -0
package/dist/dashboard/briefHtml.js +1000 -0
package/dist/dashboard/briefHtml.js.map +1 -0
package/dist/dashboard/briefServer.d.ts +18 -0
package/dist/dashboard/briefServer.js +320 -0
package/dist/dashboard/briefServer.js.map +1 -0
package/dist/dashboard/html.d.ts +18 -0
package/dist/dashboard/html.js +1491 -0
package/dist/dashboard/html.js.map +1 -0
package/dist/dashboard/server.d.ts +17 -0
package/dist/dashboard/server.js +403 -0
package/dist/dashboard/server.js.map +1 -0
package/dist/db.js +38 -0
package/dist/db.js.map +1 -1
package/dist/index.js +211 -5
package/dist/index.js.map +1 -1
package/dist/tools/critterTools.js +4 -0
package/dist/tools/critterTools.js.map +1 -1
package/dist/tools/forecastingTools.d.ts +11 -0
package/dist/tools/forecastingTools.js +616 -0
package/dist/tools/forecastingTools.js.map +1 -0
package/dist/tools/localDashboardTools.d.ts +8 -0
package/dist/tools/localDashboardTools.js +332 -0
package/dist/tools/localDashboardTools.js.map +1 -0
package/dist/tools/metaTools.js +170 -1
package/dist/tools/metaTools.js.map +1 -1
package/dist/tools/openclawTools.d.ts +11 -0
package/dist/tools/openclawTools.js +1017 -0
package/dist/tools/openclawTools.js.map +1 -0
package/dist/tools/overstoryTools.d.ts +14 -0
package/dist/tools/overstoryTools.js +426 -0
package/dist/tools/overstoryTools.js.map +1 -0
package/dist/tools/prReportTools.d.ts +11 -0
package/dist/tools/prReportTools.js +911 -0
package/dist/tools/prReportTools.js.map +1 -0
package/dist/tools/progressiveDiscoveryTools.js +28 -9
package/dist/tools/progressiveDiscoveryTools.js.map +1 -1
package/dist/tools/selfEvalTools.js +8 -1
package/dist/tools/selfEvalTools.js.map +1 -1
package/dist/tools/sessionMemoryTools.js +14 -2
package/dist/tools/sessionMemoryTools.js.map +1 -1
package/dist/tools/skillUpdateTools.d.ts +24 -0
package/dist/tools/skillUpdateTools.js +469 -0
package/dist/tools/skillUpdateTools.js.map +1 -0
package/dist/tools/toolRegistry.js +178 -0
package/dist/tools/toolRegistry.js.map +1 -1
package/dist/tools/uiUxDiveAdvancedTools.js +61 -0
package/dist/tools/uiUxDiveAdvancedTools.js.map +1 -1
package/dist/tools/uiUxDiveTools.js +154 -1
package/dist/tools/uiUxDiveTools.js.map +1 -1
package/dist/tools/visualQaTools.d.ts +2 -0
package/dist/tools/visualQaTools.js +1088 -0
package/dist/tools/visualQaTools.js.map +1 -0
package/dist/tools/webmcpTools.d.ts +16 -0
package/dist/tools/webmcpTools.js +703 -0
package/dist/tools/webmcpTools.js.map +1 -0
package/dist/toolsetRegistry.js +4 -0
package/dist/toolsetRegistry.js.map +1 -1
package/package.json +1 -1

package/dist/tools/localDashboardTools.js ADDED Viewed

@@ -0,0 +1,332 @@
+/**
+ * Local Dashboard MCP Tools
+ *
+ * 5 tools for operating the local Daily Brief dashboard via Claude Code.
+ * All tools read from local SQLite — zero network dependency.
+ */
+import { getDb, genId } from "../db.js";
+import { getBriefDashboardUrl, startBriefDashboardServer } from "../dashboard/briefServer.js";
+function safeParseJson(s) {
+    if (!s)
+        return null;
+    try {
+        return JSON.parse(s);
+    }
+    catch {
+        return s;
+    }
+}
+export const localDashboardTools = [
+    // ── sync_daily_brief ────────────────────────────────────────────────
+    {
+        name: "sync_daily_brief",
+        description: "Sync daily brief + narrative data from Convex to local SQLite. Requires CONVEX_SITE_URL and MCP_SECRET environment variables. Returns sync summary with row counts and timing.",
+        inputSchema: {
+            type: "object",
+            properties: {
+                days: {
+                    type: "number",
+                    description: "Number of days to sync (default: 7)",
+                },
+                force: {
+                    type: "boolean",
+                    description: "Force re-sync even if data already exists (default: false)",
+                },
+            },
+        },
+        handler: async (args) => {
+            const siteUrl = process.env.CONVEX_SITE_URL || process.env.VITE_CONVEX_URL;
+            if (!siteUrl) {
+                return {
+                    error: true,
+                    message: "Missing CONVEX_SITE_URL or VITE_CONVEX_URL environment variable",
+                    setupInstructions: "Set CONVEX_SITE_URL in .env.local or as an environment variable",
+                };
+            }
+            const secret = process.env.MCP_SECRET;
+            if (!secret) {
+                return {
+                    error: true,
+                    message: "Missing MCP_SECRET environment variable",
+                    setupInstructions: "Set MCP_SECRET via: npx convex env set MCP_SECRET <value>",
+                };
+            }
+            const db = getDb();
+            const syncId = genId("sync");
+            const startMs = Date.now();
+            db.prepare("INSERT INTO sync_runs (id, status) VALUES (?, 'running')").run(syncId);
+            const counts = {};
+            try {
+                // Fetch latest dashboard snapshot
+                const snapRes = await fetch(`${siteUrl.replace(/\/$/, "")}/api/mcpGateway`, {
+                    method: "POST",
+                    headers: { "Content-Type": "application/json", "x-mcp-secret": secret },
+                    body: JSON.stringify({ fn: "getLatestDashboardSnapshot", args: {} }),
+                });
+                const snapData = await snapRes.json();
+                if (snapData.success && snapData.data) {
+                    const s = snapData.data;
+                    db.prepare(`
+            INSERT OR REPLACE INTO brief_snapshots (id, date_string, generated_at, dashboard_metrics, source_summary, version)
+            VALUES (?, ?, ?, ?, ?, 1)
+          `).run(s._id ?? genId("snap"), s.dateString ?? new Date().toISOString().slice(0, 10), s.generatedAt ?? Date.now(), JSON.stringify(s.dashboardMetrics ?? {}), JSON.stringify(s.sourceSummary ?? null));
+                    counts.brief_snapshots = 1;
+                }
+                // Fetch narrative threads
+                const threadRes = await fetch(`${siteUrl.replace(/\/$/, "")}/api/mcpGateway`, {
+                    method: "POST",
+                    headers: { "Content-Type": "application/json", "x-mcp-secret": secret },
+                    body: JSON.stringify({ fn: "getPublicThreads", args: { limit: 100 } }),
+                });
+                const threadData = await threadRes.json();
+                if (threadData.success && Array.isArray(threadData.data)) {
+                    const upsert = db.prepare(`
+            INSERT OR REPLACE INTO narrative_threads_local
+            (id, thread_id, name, slug, thesis, counter_thesis, entity_keys, topic_tags,
+             current_phase, first_event_at, latest_event_at, event_count, plot_twist_count, quality)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+          `);
+                    const tx = db.transaction((items) => {
+                        for (const t of items) {
+                            upsert.run(t._id, t.threadId ?? t._id, t.name, t.slug ?? "", t.thesis, t.counterThesis ?? null, JSON.stringify(t.entityKeys ?? []), JSON.stringify(t.topicTags ?? []), t.currentPhase, t.firstEventAt ?? null, t.latestEventAt ?? null, t.eventCount ?? 0, t.plotTwistCount ?? 0, JSON.stringify(t.quality ?? null));
+                        }
+                    });
+                    tx(threadData.data);
+                    counts.narrative_threads = threadData.data.length;
+                }
+                const durationMs = Date.now() - startMs;
+                db.prepare(`
+          UPDATE sync_runs SET status = 'success', completed_at = datetime('now'),
+            tables_synced = ?, duration_ms = ?
+          WHERE id = ?
+        `).run(JSON.stringify(counts), durationMs, syncId);
+                return {
+                    success: true,
+                    syncId,
+                    durationMs,
+                    counts,
+                    dashboardUrl: getBriefDashboardUrl(),
+                };
+            }
+            catch (err) {
+                const durationMs = Date.now() - startMs;
+                db.prepare(`
+          UPDATE sync_runs SET status = 'error', completed_at = datetime('now'),
+            error = ?, duration_ms = ?
+          WHERE id = ?
+        `).run(err.message, durationMs, syncId);
+                return { error: true, message: err.message, syncId, durationMs };
+            }
+        },
+    },
+    // ── get_daily_brief_summary ─────────────────────────────────────────
+    {
+        name: "get_daily_brief_summary",
+        description: "Get the latest daily brief summary from local SQLite. Returns dashboard metrics, features, and source summary. No network needed — reads cached data.",
+        inputSchema: {
+            type: "object",
+            properties: {
+                date: {
+                    type: "string",
+                    description: "Date string (YYYY-MM-DD). Omit for latest.",
+                },
+            },
+        },
+        handler: async (args) => {
+            const db = getDb();
+            const row = args.date
+                ? db.prepare("SELECT * FROM brief_snapshots WHERE date_string = ? ORDER BY version DESC LIMIT 1").get(args.date)
+                : db.prepare("SELECT * FROM brief_snapshots ORDER BY generated_at DESC LIMIT 1").get();
+            if (!row) {
+                return {
+                    empty: true,
+                    message: args.date
+                        ? `No brief for ${args.date}. Run sync_daily_brief to fetch data.`
+                        : "No briefs synced yet. Run sync_daily_brief to fetch data.",
+                    tip: "Use sync_daily_brief tool or run: npm run local:sync",
+                };
+            }
+            return {
+                dateString: row.date_string,
+                generatedAt: row.generated_at,
+                version: row.version,
+                dashboardMetrics: safeParseJson(row.dashboard_metrics),
+                sourceSummary: safeParseJson(row.source_summary),
+                syncedAt: row.synced_at,
+                dashboardUrl: getBriefDashboardUrl(),
+            };
+        },
+    },
+    // ── get_narrative_status ────────────────────────────────────────────
+    {
+        name: "get_narrative_status",
+        description: "Get narrative thread status from local SQLite. Returns threads grouped by phase (emerging, escalating, climax, resolution, dormant) with event counts. No network needed.",
+        inputSchema: {
+            type: "object",
+            properties: {
+                phase: {
+                    type: "string",
+                    enum: ["emerging", "escalating", "climax", "resolution", "dormant"],
+                    description: "Filter by phase (optional)",
+                },
+                limit: {
+                    type: "number",
+                    description: "Max threads to return (default: 20)",
+                },
+            },
+        },
+        handler: async (args) => {
+            const db = getDb();
+            const limit = Math.min(args.limit ?? 20, 100);
+            const query = args.phase
+                ? "SELECT * FROM narrative_threads_local WHERE current_phase = ? ORDER BY latest_event_at DESC LIMIT ?"
+                : "SELECT * FROM narrative_threads_local ORDER BY latest_event_at DESC LIMIT ?";
+            const rows = args.phase
+                ? db.prepare(query).all(args.phase, limit)
+                : db.prepare(query).all(limit);
+            if (rows.length === 0) {
+                return {
+                    empty: true,
+                    message: "No narrative threads synced. Run sync_daily_brief to fetch data.",
+                };
+            }
+            // Group by phase
+            const grouped = {};
+            for (const r of rows) {
+                const phase = r.current_phase || "dormant";
+                if (!grouped[phase])
+                    grouped[phase] = [];
+                grouped[phase].push({
+                    id: r.id,
+                    name: r.name,
+                    thesis: r.thesis,
+                    eventCount: r.event_count,
+                    plotTwistCount: r.plot_twist_count,
+                    latestEventAt: r.latest_event_at,
+                    entityKeys: safeParseJson(r.entity_keys),
+                    topicTags: safeParseJson(r.topic_tags),
+                    quality: safeParseJson(r.quality),
+                });
+            }
+            // Phase distribution
+            const distribution = {};
+            try {
+                const dist = db.prepare("SELECT current_phase, COUNT(*) as count FROM narrative_threads_local GROUP BY current_phase").all();
+                for (const d of dist)
+                    distribution[d.current_phase] = d.count;
+            }
+            catch { /* table may not exist */ }
+            return {
+                totalThreads: rows.length,
+                phaseDistribution: distribution,
+                threads: grouped,
+                dashboardUrl: getBriefDashboardUrl(),
+            };
+        },
+    },
+    // ── get_ops_dashboard ───────────────────────────────────────────────
+    {
+        name: "get_ops_dashboard",
+        description: "Get operational dashboard status from local SQLite. Returns last sync info, tool call frequency, active verification cycles, and data counts. No network needed.",
+        inputSchema: {
+            type: "object",
+            properties: {},
+        },
+        handler: async () => {
+            const db = getDb();
+            // Last sync
+            let lastSync = null;
+            try {
+                const row = db.prepare("SELECT * FROM sync_runs ORDER BY started_at DESC LIMIT 1").get();
+                if (row) {
+                    lastSync = {
+                        ...row,
+                        tables_synced: safeParseJson(row.tables_synced),
+                    };
+                }
+            }
+            catch { /* table may not exist */ }
+            // Tool call frequency (last 24h)
+            let toolStats = [];
+            try {
+                toolStats = db.prepare(`
+          SELECT tool_name, COUNT(*) as count, AVG(duration_ms) as avg_duration,
+            SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) as errors
+          FROM tool_call_log
+          WHERE created_at > datetime('now', '-1 day')
+          GROUP BY tool_name ORDER BY count DESC LIMIT 15
+        `).all();
+            }
+            catch { /* table may not exist */ }
+            // Active verification cycles
+            let activeCycles = [];
+            try {
+                activeCycles = db.prepare(`
+          SELECT id, title, status, created_at FROM verification_cycles
+          WHERE status NOT IN ('completed', 'abandoned')
+          ORDER BY created_at DESC LIMIT 5
+        `).all();
+            }
+            catch { /* table may not exist */ }
+            // Data counts
+            let briefCount = 0, threadCount = 0, eventCount = 0;
+            try {
+                briefCount = db.prepare("SELECT COUNT(*) as c FROM brief_snapshots").get()?.c ?? 0;
+                threadCount = db.prepare("SELECT COUNT(*) as c FROM narrative_threads_local").get()?.c ?? 0;
+                eventCount = db.prepare("SELECT COUNT(*) as c FROM narrative_events_local").get()?.c ?? 0;
+            }
+            catch { /* tables may not exist */ }
+            // Privacy/audience stats
+            let privacyMode = null;
+            try {
+                const todayEvents = db.prepare(`
+          SELECT COUNT(*) as count,
+            SUM(CASE WHEN is_public = 1 THEN 1 ELSE 0 END) as public_triggers
+          FROM audience_events WHERE created_at > datetime('now', '-1 day')
+        `).get();
+                if (todayEvents && todayEvents.count > 0) {
+                    privacyMode = {
+                        triggeredToday: todayEvents.public_triggers ?? 0,
+                        totalEvents: todayEvents.count,
+                    };
+                }
+            }
+            catch { /* table may not exist */ }
+            return {
+                lastSync,
+                toolCallFrequency: toolStats,
+                activeCycles,
+                dataCounts: { briefs: briefCount, threads: threadCount, events: eventCount },
+                privacyMode,
+                dashboardUrl: getBriefDashboardUrl(),
+            };
+        },
+    },
+    // ── open_local_dashboard ────────────────────────────────────────────
+    {
+        name: "open_local_dashboard",
+        description: "Start the local Daily Brief dashboard server if needed, and return the URL. The dashboard shows Brief metrics, Narrative thread lanes, and Ops status — all from local SQLite.",
+        inputSchema: {
+            type: "object",
+            properties: {},
+        },
+        handler: async () => {
+            let url = getBriefDashboardUrl();
+            if (!url) {
+                try {
+                    const port = await startBriefDashboardServer(getDb(), 6275);
+                    url = `http://127.0.0.1:${port}`;
+                }
+                catch (err) {
+                    return { error: true, message: `Failed to start dashboard: ${err.message}` };
+                }
+            }
+            return {
+                url,
+                views: ["Brief (metrics, features, sources)", "Narrative (thread lanes by phase)", "Ops (sync status, tool frequency)"],
+                tip: "Open the URL in a browser to see the dashboard. Data auto-refreshes every 30s.",
+            };
+        },
+    },
+];
+//# sourceMappingURL=localDashboardTools.js.map

package/dist/tools/localDashboardTools.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"localDashboardTools.js","sourceRoot":"","sources":["../../src/tools/localDashboardTools.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EAAE,oBAAoB,EAAE,yBAAyB,EAAE,MAAM,6BAA6B,CAAC;AAE9F,SAAS,aAAa,CAAC,CAA4B;IACjD,IAAI,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IACpB,IAAI,CAAC;QAAC,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAAC,CAAC;IAAC,MAAM,CAAC;QAAC,OAAO,CAAC,CAAC;IAAC,CAAC;AACnD,CAAC;AAED,MAAM,CAAC,MAAM,mBAAmB,GAAc;IAC5C,uEAAuE;IACvE;QACE,IAAI,EAAE,kBAAkB;QACxB,WAAW,EACT,gLAAgL;QAClL,WAAW,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,IAAI,EAAE;oBACJ,IAAI,EAAE,QAAQ;oBACd,WAAW,EAAE,qCAAqC;iBACnD;gBACD,KAAK,EAAE;oBACL,IAAI,EAAE,SAAS;oBACf,WAAW,EAAE,4DAA4D;iBAC1E;aACF;SACF;QACD,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE;YACtB,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC;YAC3E,IAAI,CAAC,OAAO,EAAE,CAAC;gBACb,OAAO;oBACL,KAAK,EAAE,IAAI;oBACX,OAAO,EAAE,iEAAiE;oBAC1E,iBAAiB,EAAE,iEAAiE;iBACrF,CAAC;YACJ,CAAC;YAED,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC;YACtC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO;oBACL,KAAK,EAAE,IAAI;oBACX,OAAO,EAAE,yCAAyC;oBAClD,iBAAiB,EAAE,2DAA2D;iBAC/E,CAAC;YACJ,CAAC;YAED,MAAM,EAAE,GAAG,KAAK,EAAE,CAAC;YACnB,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;YAC7B,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC3B,EAAE,CAAC,OAAO,CAAC,0DAA0D,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YAEnF,MAAM,MAAM,GAA2B,EAAE,CAAC;YAE1C,IAAI,CAAC;gBACH,kCAAkC;gBAClC,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,iBAAiB,EAAE;oBAC1E,MAAM,EAAE,MAAM;oBACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE,cAAc,EAAE,MAAM,EAAE;oBACvE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,4BAA4B,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC;iBACrE,CAAC,CAAC;gBACH,MAAM,QAAQ,GAAQ,MAAM,OAAO,CAAC,IAAI,EAAE,CAAC;gBAC3C,IAAI,QAAQ,CAAC,OAAO,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC;oBACtC,MAAM,CAAC,GAAG,QAAQ,CAAC,IAAI,CAAC;oBACxB,EAAE,CAAC,OAAO,CAAC;;;WAGV,CAAC,CAAC,GAAG,CACJ,CAAC,CAAC,GAAG,IAAI,KAAK,CAAC,MAAM,CAAC,EACtB,CAAC,CAAC,UAAU,IAAI,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EACrD,CAAC,CAAC,WAAW,IAAI,IAAI,CAAC,GAAG,EAAE,EAC3B,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,gBAAgB,IAAI,EAAE,CAAC,EACxC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,aAAa,IAAI,IAAI,CAAC,CACxC,CAAC;oBACF,MAAM,CAAC,eAAe,GAAG,CAAC,CAAC;gBAC7B,CAAC;gBAED,0BAA0B;gBAC1B,MAAM,SAAS,GAAG,MAAM,KAAK,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,iBAAiB,EAAE;oBAC5E,MAAM,EAAE,MAAM;oBACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE,cAAc,EAAE,MAAM,EAAE;oBACvE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,kBAAkB,EAAE,IAAI,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,EAAE,CAAC;iBACvE,CAAC,CAAC;gBACH,MAAM,UAAU,GAAQ,MAAM,SAAS,CAAC,IAAI,EAAE,CAAC;gBAC/C,IAAI,UAAU,CAAC,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;oBACzD,MAAM,MAAM,GAAG,EAAE,CAAC,OAAO,CAAC;;;;;WAKzB,CAAC,CAAC;oBACH,MAAM,EAAE,GAAG,EAAE,CAAC,WAAW,CAAC,CAAC,KAAY,EAAE,EAAE;wBACzC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;4BACtB,MAAM,CAAC,GAAG,CACR,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,IAAI,EAAE,EAChD,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,aAAa,IAAI,IAAI,EACjC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,IAAI,EAAE,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,IAAI,EAAE,CAAC,EACrE,CAAC,CAAC,YAAY,EAAE,CAAC,CAAC,YAAY,IAAI,IAAI,EAAE,CAAC,CAAC,aAAa,IAAI,IAAI,EAC/D,CAAC,CAAC,UAAU,IAAI,CAAC,EAAE,CAAC,CAAC,cAAc,IAAI,CAAC,EACxC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,IAAI,IAAI,CAAC,CAClC,CAAC;wBACJ,CAAC;oBACH,CAAC,CAAC,CAAC;oBACH,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;oBACpB,MAAM,CAAC,iBAAiB,GAAG,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC;gBACpD,CAAC;gBAED,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC;gBACxC,EAAE,CAAC,OAAO,CAAC;;;;SAIV,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,UAAU,EAAE,MAAM,CAAC,CAAC;gBAEnD,OAAO;oBACL,OAAO,EAAE,IAAI;oBACb,MAAM;oBACN,UAAU;oBACV,MAAM;oBACN,YAAY,EAAE,oBAAoB,EAAE;iBACrC,CAAC;YACJ,CAAC;YAAC,OAAO,GAAQ,EAAE,CAAC;gBAClB,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC;gBACxC,EAAE,CAAC,OAAO,CAAC;;;;SAIV,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,EAAE,UAAU,EAAE,MAAM,CAAC,CAAC;gBACxC,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;YACnE,CAAC;QACH,CAAC;KACF;IAED,uEAAuE;IACvE;QACE,IAAI,EAAE,yBAAyB;QAC/B,WAAW,EACT,uJAAuJ;QACzJ,WAAW,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,IAAI,EAAE;oBACJ,IAAI,EAAE,QAAQ;oBACd,WAAW,EAAE,4CAA4C;iBAC1D;aACF;SACF;QACD,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE;YACtB,MAAM,EAAE,GAAG,KAAK,EAAE,CAAC;YACnB,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI;gBACnB,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,mFAAmF,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAQ;gBACvH,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,kEAAkE,CAAC,CAAC,GAAG,EAAS,CAAC;YAEhG,IAAI,CAAC,GAAG,EAAE,CAAC;gBACT,OAAO;oBACL,KAAK,EAAE,IAAI;oBACX,OAAO,EAAE,IAAI,CAAC,IAAI;wBAChB,CAAC,CAAC,gBAAgB,IAAI,CAAC,IAAI,uCAAuC;wBAClE,CAAC,CAAC,2DAA2D;oBAC/D,GAAG,EAAE,sDAAsD;iBAC5D,CAAC;YACJ,CAAC;YAED,OAAO;gBACL,UAAU,EAAE,GAAG,CAAC,WAAW;gBAC3B,WAAW,EAAE,GAAG,CAAC,YAAY;gBAC7B,OAAO,EAAE,GAAG,CAAC,OAAO;gBACpB,gBAAgB,EAAE,aAAa,CAAC,GAAG,CAAC,iBAAiB,CAAC;gBACtD,aAAa,EAAE,aAAa,CAAC,GAAG,CAAC,cAAc,CAAC;gBAChD,QAAQ,EAAE,GAAG,CAAC,SAAS;gBACvB,YAAY,EAAE,oBAAoB,EAAE;aACrC,CAAC;QACJ,CAAC;KACF;IAED,uEAAuE;IACvE;QACE,IAAI,EAAE,sBAAsB;QAC5B,WAAW,EACT,2KAA2K;QAC7K,WAAW,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE;gBACV,KAAK,EAAE;oBACL,IAAI,EAAE,QAAQ;oBACd,IAAI,EAAE,CAAC,UAAU,EAAE,YAAY,EAAE,QAAQ,EAAE,YAAY,EAAE,SAAS,CAAC;oBACnE,WAAW,EAAE,4BAA4B;iBAC1C;gBACD,KAAK,EAAE;oBACL,IAAI,EAAE,QAAQ;oBACd,WAAW,EAAE,qCAAqC;iBACnD;aACF;SACF;QACD,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE;YACtB,MAAM,EAAE,GAAG,KAAK,EAAE,CAAC;YACnB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,EAAE,GAAG,CAAC,CAAC;YAE9C,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK;gBACtB,CAAC,CAAC,qGAAqG;gBACvG,CAAC,CAAC,6EAA6E,CAAC;YAElF,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK;gBACrB,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,KAAK,CAAU;gBACnD,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,KAAK,CAAU,CAAC;YAE1C,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACtB,OAAO;oBACL,KAAK,EAAE,IAAI;oBACX,OAAO,EAAE,kEAAkE;iBAC5E,CAAC;YACJ,CAAC;YAED,iBAAiB;YACjB,MAAM,OAAO,GAA0B,EAAE,CAAC;YAC1C,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;gBACrB,MAAM,KAAK,GAAG,CAAC,CAAC,aAAa,IAAI,SAAS,CAAC;gBAC3C,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC;oBAAE,OAAO,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;gBACzC,OAAO,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC;oBAClB,EAAE,EAAE,CAAC,CAAC,EAAE;oBACR,IAAI,EAAE,CAAC,CAAC,IAAI;oBACZ,MAAM,EAAE,CAAC,CAAC,MAAM;oBAChB,UAAU,EAAE,CAAC,CAAC,WAAW;oBACzB,cAAc,EAAE,CAAC,CAAC,gBAAgB;oBAClC,aAAa,EAAE,CAAC,CAAC,eAAe;oBAChC,UAAU,EAAE,aAAa,CAAC,CAAC,CAAC,WAAW,CAAC;oBACxC,SAAS,EAAE,aAAa,CAAC,CAAC,CAAC,UAAU,CAAC;oBACtC,OAAO,EAAE,aAAa,CAAC,CAAC,CAAC,OAAO,CAAC;iBAClC,CAAC,CAAC;YACL,CAAC;YAED,qBAAqB;YACrB,MAAM,YAAY,GAA2B,EAAE,CAAC;YAChD,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CACrB,6FAA6F,CAC9F,CAAC,GAAG,EAAW,CAAC;gBACjB,KAAK,MAAM,CAAC,IAAI,IAAI;oBAAE,YAAY,CAAC,CAAC,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC;YAChE,CAAC;YAAC,MAAM,CAAC,CAAC,yBAAyB,CAAC,CAAC;YAErC,OAAO;gBACL,YAAY,EAAE,IAAI,CAAC,MAAM;gBACzB,iBAAiB,EAAE,YAAY;gBAC/B,OAAO,EAAE,OAAO;gBAChB,YAAY,EAAE,oBAAoB,EAAE;aACrC,CAAC;QACJ,CAAC;KACF;IAED,uEAAuE;IACvE;QACE,IAAI,EAAE,mBAAmB;QACzB,WAAW,EACT,kKAAkK;QACpK,WAAW,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE,EAAE;SACf;QACD,OAAO,EAAE,KAAK,IAAI,EAAE;YAClB,MAAM,EAAE,GAAG,KAAK,EAAE,CAAC;YAEnB,YAAY;YACZ,IAAI,QAAQ,GAAQ,IAAI,CAAC;YACzB,IAAI,CAAC;gBACH,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC,0DAA0D,CAAC,CAAC,GAAG,EAAS,CAAC;gBAChG,IAAI,GAAG,EAAE,CAAC;oBACR,QAAQ,GAAG;wBACT,GAAG,GAAG;wBACN,aAAa,EAAE,aAAa,CAAC,GAAG,CAAC,aAAa,CAAC;qBAChD,CAAC;gBACJ,CAAC;YACH,CAAC;YAAC,MAAM,CAAC,CAAC,yBAAyB,CAAC,CAAC;YAErC,iCAAiC;YACjC,IAAI,SAAS,GAAU,EAAE,CAAC;YAC1B,IAAI,CAAC;gBACH,SAAS,GAAG,EAAE,CAAC,OAAO,CAAC;;;;;;SAMtB,CAAC,CAAC,GAAG,EAAW,CAAC;YACpB,CAAC;YAAC,MAAM,CAAC,CAAC,yBAAyB,CAAC,CAAC;YAErC,6BAA6B;YAC7B,IAAI,YAAY,GAAU,EAAE,CAAC;YAC7B,IAAI,CAAC;gBACH,YAAY,GAAG,EAAE,CAAC,OAAO,CAAC;;;;SAIzB,CAAC,CAAC,GAAG,EAAW,CAAC;YACpB,CAAC;YAAC,MAAM,CAAC,CAAC,yBAAyB,CAAC,CAAC;YAErC,cAAc;YACd,IAAI,UAAU,GAAG,CAAC,EAAE,WAAW,GAAG,CAAC,EAAE,UAAU,GAAG,CAAC,CAAC;YACpD,IAAI,CAAC;gBACH,UAAU,GAAI,EAAE,CAAC,OAAO,CAAC,2CAA2C,CAAC,CAAC,GAAG,EAAU,EAAE,CAAC,IAAI,CAAC,CAAC;gBAC5F,WAAW,GAAI,EAAE,CAAC,OAAO,CAAC,mDAAmD,CAAC,CAAC,GAAG,EAAU,EAAE,CAAC,IAAI,CAAC,CAAC;gBACrG,UAAU,GAAI,EAAE,CAAC,OAAO,CAAC,kDAAkD,CAAC,CAAC,GAAG,EAAU,EAAE,CAAC,IAAI,CAAC,CAAC;YACrG,CAAC;YAAC,MAAM,CAAC,CAAC,0BAA0B,CAAC,CAAC;YAEtC,yBAAyB;YACzB,IAAI,WAAW,GAAQ,IAAI,CAAC;YAC5B,IAAI,CAAC;gBACH,MAAM,WAAW,GAAG,EAAE,CAAC,OAAO,CAAC;;;;SAI9B,CAAC,CAAC,GAAG,EAAS,CAAC;gBAChB,IAAI,WAAW,IAAI,WAAW,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC;oBACzC,WAAW,GAAG;wBACZ,cAAc,EAAE,WAAW,CAAC,eAAe,IAAI,CAAC;wBAChD,WAAW,EAAE,WAAW,CAAC,KAAK;qBAC/B,CAAC;gBACJ,CAAC;YACH,CAAC;YAAC,MAAM,CAAC,CAAC,yBAAyB,CAAC,CAAC;YAErC,OAAO;gBACL,QAAQ;gBACR,iBAAiB,EAAE,SAAS;gBAC5B,YAAY;gBACZ,UAAU,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,EAAE,UAAU,EAAE;gBAC5E,WAAW;gBACX,YAAY,EAAE,oBAAoB,EAAE;aACrC,CAAC;QACJ,CAAC;KACF;IAED,uEAAuE;IACvE;QACE,IAAI,EAAE,sBAAsB;QAC5B,WAAW,EACT,gLAAgL;QAClL,WAAW,EAAE;YACX,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE,EAAE;SACf;QACD,OAAO,EAAE,KAAK,IAAI,EAAE;YAClB,IAAI,GAAG,GAAG,oBAAoB,EAAE,CAAC;YACjC,IAAI,CAAC,GAAG,EAAE,CAAC;gBACT,IAAI,CAAC;oBACH,MAAM,IAAI,GAAG,MAAM,yBAAyB,CAAC,KAAK,EAAE,EAAE,IAAI,CAAC,CAAC;oBAC5D,GAAG,GAAG,oBAAoB,IAAI,EAAE,CAAC;gBACnC,CAAC;gBAAC,OAAO,GAAQ,EAAE,CAAC;oBAClB,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,8BAA8B,GAAG,CAAC,OAAO,EAAE,EAAE,CAAC;gBAC/E,CAAC;YACH,CAAC;YACD,OAAO;gBACL,GAAG;gBACH,KAAK,EAAE,CAAC,oCAAoC,EAAE,mCAAmC,EAAE,mCAAmC,CAAC;gBACvH,GAAG,EAAE,gFAAgF;aACtF,CAAC;QACJ,CAAC;KACF;CACF,CAAC"}

package/dist/tools/metaTools.js CHANGED Viewed

@@ -211,6 +211,60 @@ const METHODOLOGY_CONTENT = {
             bundle_analysis: "npm run perf:bundle",
         },
     },
+    flywheel_ui_dogfood: {
+        title: "Flywheel Mode: UI Dogfood + Walkthrough Video QA",
+        description: "Continuous UI verification loop: capture a full walkthrough video + screenshots, run Gemini-based video/screenshot QA, fix root causes (5-whys), and re-capture evidence until the UI is stable, accessible, and performant. Do not stop at 'build passes' — ship dogfood evidence.",
+        steps: [
+            {
+                step: 1,
+                name: "Launch + Trajectory",
+                description: "Start the dev server and publish a visible trajectory link in the UI within 60 seconds. The trajectory is the audit trail of iterations and evidence.",
+                tools: ["record_learning"],
+                action: "Start dev server. Ensure `/dogfood` shows the latest manifest + walkthrough. Record a learning if trajectory/evidence is missing.",
+            },
+            {
+                step: 2,
+                name: "Capture Evidence (Scribe + Video)",
+                description: "Run a full route walkthrough capture (screens + interactions) so issues are visible and replayable.",
+                tools: ["capture_responsive_suite", "capture_ui_screenshot"],
+                action: "Run `npm run dogfood:full:local` to generate `public/dogfood/*` (gallery + walkthrough video + frames + scribe).",
+            },
+            {
+                step: 3,
+                name: "Gemini QA (Video + Screens)",
+                description: "Use Gemini video/screenshot understanding to flag UI regressions: flashes, layout shift, focus traps, broken empty states, and performance jank.",
+                tools: ["analyze_ui_screenshot"],
+                action: "Run `npm run dogfood:qa:gemini` and review results in `/dogfood`. Convert actionable issues into deterministic fixes (no band-aids).",
+            },
+            {
+                step: 4,
+                name: "Root Cause + Fix (5 Whys)",
+                description: "At each checkpoint, immediately seek issues and follow symptom → intermediate state → root cause. Fix the cause so the symptom is impossible.",
+                tools: ["run_closed_loop", "record_learning"],
+                action: "Do a 5-whys chain. Implement the smallest correct fix. Re-run `tsc` + build. Record learnings for any repeated failure mode.",
+            },
+            {
+                step: 5,
+                name: "Motion Safety (Seizure / Flash Policy)",
+                description: "Remove or soften high-contrast flashes (large-area pulses/fades). Ship stable backgrounds and subtle loading states. Always honor prefers-reduced-motion.",
+                tools: ["run_quality_gate"],
+                action: "FAIL if any route shows full-viewport flashing/pulsing/fade transitions. Prefer per-view fallbacks and non-animated skeletons for large surfaces.",
+            },
+            {
+                step: 6,
+                name: "Poll + Adapt",
+                description: "Poll every 60 seconds. After 3 consecutive failures on the same issue, change strategy (instrument, isolate, or rollback).",
+                tools: ["record_learning"],
+                action: "Keep iterating until the dogfood artifacts + QA runs show no new P0/P1 issues.",
+            },
+        ],
+        commands: {
+            capture: "npm run dogfood:full:local",
+            gemini_qa: "npm run dogfood:qa:gemini",
+            build: "npm run build",
+            typecheck: "npx tsc --noEmit",
+        },
+    },
     agentic_vision: {
         title: "Agentic Vision (AI-Powered Visual Verification)",
         description: "Use AI vision models to analyze UI screenshots programmatically. The Discover-Capture-Analyze-Manipulate-Iterate-Gate loop provides automated visual QA that goes beyond what rule-based checks can catch. Gemini with code execution provides the richest analysis (zoom, crop, compute within the model). Falls back to GPT-5-mini, Claude, or OpenRouter vision.",
@@ -1242,6 +1296,117 @@ const METHODOLOGY_CONTENT = {
             "For production: implement interrupt handling (user speaks while TTS is playing)",
         ],
     },
+    analyst_diagnostic: {
+        title: "Analyst Diagnostic — Root Cause Over Bandaids",
+        description: "Guide yourself like an analyst diagnosing the root cause, NOT a junior dev slapping on a bandaid. Mandatory for all bug work.",
+        steps: [
+            {
+                step: 1,
+                name: "Reproduce",
+                description: "Confirm the exact failure mode before touching any code.",
+                tools: ["search_all_knowledge"],
+                action: "Check if this root cause is already known. If yes, apply the known fix. If no, proceed to step 2.",
+            },
+            {
+                step: 2,
+                name: "Trace Upstream",
+                description: "Walk from symptom → intermediate state → root cause. Don't stop at the first error you see.",
+                tools: ["run_recon", "log_recon_finding"],
+                action: "Use recon tools to trace the chain. Log each intermediate finding.",
+            },
+            {
+                step: 3,
+                name: "Ask 'Why' 5 Times",
+                description: "Each answer should go one level deeper into the system. If you can't get to 5, you haven't found the root cause yet.",
+                action: "Document each 'why' level. The 5th answer is usually the real fix.",
+            },
+            {
+                step: 4,
+                name: "Fix the Cause",
+                description: "The right fix makes the symptom impossible, not just invisible.",
+                tools: ["resolve_gap"],
+                action: "Implement the fix. It should address the root cause from step 3, not the symptom from step 1.",
+            },
+            {
+                step: 5,
+                name: "Verify No Sideways Shift",
+                description: "Bandaids move bugs sideways. Check adjacent behavior to confirm the fix didn't introduce new issues.",
+                tools: ["log_test_result", "run_closed_loop"],
+                action: "Run tests. Check adjacent functionality. Confirm the bug can't recur.",
+            },
+            {
+                step: 6,
+                name: "Record the Root Cause",
+                description: "Document what you found so the next person doesn't re-discover it.",
+                tools: ["record_learning"],
+                action: "Call record_learning with the root cause, not just the fix.",
+            },
+        ],
+        redFlags: [
+            "Adding try/catch that swallows errors without understanding them",
+            "Adding ?. optional chaining to mask undefined instead of finding why it's undefined",
+            "Adding 'as any' to silence type errors instead of fixing the type mismatch",
+            "Adding timeouts/retries to paper over race conditions",
+            "Deleting a failing test instead of fixing the code it tests",
+            "'It works now' without understanding why it didn't before",
+        ],
+    },
+    scenario_testing: {
+        title: "Scenario-Based Testing — Real Human Behavior at Scale",
+        description: "Tests that don't model real human behavior are false confidence. Every test must be scenario-based: start from a real user persona and goal, simulate realistic behavior, and verify at scale. Covers all behavior angles and both short-running (burst) and long-running (sustained) scenarios.",
+        steps: [
+            {
+                step: 1,
+                name: "Define the User Persona",
+                description: "Who is the user in this scenario? First-timer, power user, distracted user, adversarial actor, mobile user on a slow network, or concurrent session? The persona determines which failure modes matter.",
+                action: "Before writing any assertion, write: 'User: <persona>. Goal: <what they want to achieve>. Prior state: <what's already in the system>.' If you can't answer these, you don't have a test scenario yet.",
+            },
+            {
+                step: 2,
+                name: "Specify Action Sequence + Timing",
+                description: "What does the user do, in what order, with what timing between actions? Rushed clicks differ from deliberate actions. Concurrent users produce race conditions that single-user tests miss entirely.",
+                action: "Document: 'Actions: [step-by-step sequence with timing]. Concurrency: [1 user / N concurrent].' For concurrent scenarios, specify what overlaps and at what rate.",
+            },
+            {
+                step: 3,
+                name: "Set the Scale Axis",
+                description: "A test that passes at 1 user is necessary but not sufficient. Define what happens at 10x and 100x. Single-user happy paths cannot catch connection pool exhaustion, rate limit throttling, lock contention, or thundering herd.",
+                action: "For every scenario, write: 'Scale: 1x (baseline) / 10x (concurrent) / 100x (sustained).' If you only have the 1x case, the test suite has a known gap — document it.",
+            },
+            {
+                step: 4,
+                name: "Set the Duration Axis",
+                description: "Short-running tests (burst/spike) and long-running tests (sustained load, state accumulation) catch different failure modes. A system that handles 10 requests in 1 second may fail after 10,000 requests over 30 minutes due to memory leaks, DB growth, stale caches, or token expiry.",
+                action: "For each scenario, specify: 'Duration: single request / session-length / multi-day accumulation.' If you have only short-running coverage, add a long-running counterpart or mark the gap explicitly.",
+            },
+            {
+                step: 5,
+                name: "Cover All Behavior Angles",
+                description: "Six required angles per feature: (1) happy path, (2) all sad paths, (3) concurrent access, (4) degraded conditions (slow network, auth expiry, partial API failures), (5) long-running accumulation (DB growth, stale cache), (6) adversarial inputs (injection, replay, unexpected payloads).",
+                action: "Use a checklist: happy ✓ / sad paths ✓ / concurrent ✓ / degraded ✓ / long-running ✓ / adversarial ✓. Missing boxes are documented gaps, not implicit coverage.",
+            },
+            {
+                step: 6,
+                name: "Verify Side Effects, Not Just Return Values",
+                description: "Assertions that only check return values miss the failure modes that matter in production: state changes, side effects, downstream consequences. Did the DB update? Did the cache invalidate? Did the audit log record? Did the notification fire?",
+                action: "After every action assertion, add assertions for: state in the DB, downstream side effects, observable UI change, and any async consequences. If you can't assert them, add instrumentation.",
+            },
+        ],
+        antiPatterns: [
+            "Simple unit tests with no scenario context",
+            "Tests that only cover the happy path",
+            "Tests that mock everything and test nothing real",
+            "Tests that pass at 1 user and are never run at 10+",
+            "Hard-coded user state assumptions that won't hold in production",
+            "'It passes in CI' declared without production-realistic data volume or concurrency",
+            "Declaring a feature 'tested' after a single integration test with synthetic clean data",
+            "Assertions only on return values — no state or side effect checks",
+        ],
+        scenarioAnatomy: {
+            template: "Scenario: <name> | User: <persona> | Goal: <user goal> | Prior state: <system state before> | Actions: <sequence with timing> | Scale: <1x/10x/100x> | Duration: <single/session/multi-day> | Expected: <state + side effects + UI> | Edge cases: <degraded/adversarial/partial>",
+            example: "Scenario: Power user submits form on slow 3G | User: power user, 50+ prior submissions, impatient | Goal: submit expense report | Prior state: DB has 10,000 existing reports, cache warm | Actions: fill form (5s), click submit, network drops to 3G mid-request, retry after 2s | Scale: 10 concurrent submitters | Duration: 30-minute session with 20 submissions | Expected: exactly-once submission, retry shows loading, no duplicate DB row, audit log entry | Edge cases: double-click, browser back mid-submit, session expiry at step 3",
+        },
+    },
     overview: {
         title: "NodeBench Development Methodology — Overview",
         description: "A dual-loop system for rigorous development. The inner loop (6-Phase Verification) ensures correctness. The outer loop (Eval-Driven Development) ensures improvement. Together they form the AI Flywheel.",
@@ -1274,6 +1439,8 @@ const METHODOLOGY_CONTENT = {
                     toon_format: "TOON Format — Token-Oriented Object Notation for ~40% token savings on LLM payloads",
                     seo_audit: "SEO Audit — technical SEO, content analysis, performance, WordPress security",
                     voice_bridge: "Voice Bridge — STT/TTS/LLM pipeline design, scaffold generation, latency benchmarking",
+                    analyst_diagnostic: "Analyst Diagnostic — root-cause diagnosis over bandaids. Mandatory for all bug work: reproduce, trace upstream, ask 'why' 5 times, fix the cause not the symptom",
+                    scenario_testing: "Scenario-Based Testing — no shallow tests, real user personas, scale axis (1x/10x/100x), duration axis (burst + sustained), all behavior angles (happy/sad/adversarial/concurrent/degraded)",
                 },
             },
             {
@@ -1554,7 +1721,7 @@ export function createMetaTools(allTools) {
         },
         {
             name: "getMethodology",
-            description: 'Get step-by-step guidance for a development methodology. Topics: verification, eval, flywheel, mandatory_flywheel, reconnaissance, quality_gates, ui_ux_qa, agentic_vision, closed_loop, learnings, project_ideation, tech_stack_2026, telemetry_setup, agents_md_maintenance, agent_bootstrap, autonomous_maintenance, parallel_agent_teams, self_reinforced_learning, academic_paper_writing, agent_evaluation, controlled_evaluation, overview. Call with topic "overview" to see all available methodologies.',
+            description: 'Get step-by-step guidance for a development methodology. Topics: verification, eval, flywheel, mandatory_flywheel, reconnaissance, quality_gates, ui_ux_qa, flywheel_ui_dogfood, agentic_vision, closed_loop, learnings, project_ideation, tech_stack_2026, telemetry_setup, agents_md_maintenance, agent_bootstrap, autonomous_maintenance, parallel_agent_teams, self_reinforced_learning, academic_paper_writing, agent_evaluation, controlled_evaluation, scenario_testing, overview. Call with topic "overview" to see all available methodologies.',
             inputSchema: {
                 type: "object",
                 properties: {
@@ -1568,6 +1735,7 @@ export function createMetaTools(allTools) {
                             "reconnaissance",
                             "quality_gates",
                             "ui_ux_qa",
+                            "flywheel_ui_dogfood",
                             "agentic_vision",
                             "closed_loop",
                             "learnings",
@@ -1582,6 +1750,7 @@ export function createMetaTools(allTools) {
                             "academic_paper_writing",
                             "agent_evaluation",
                             "controlled_evaluation",
+                            "scenario_testing",
                             "overview",
                         ],
                         description: "Which methodology to explain",