npm - deepflow - Versions diffs - 0.1.88 → 0.1.89 - Mend

deepflow 0.1.88 → 0.1.89

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/bin/install.js +45 -9
package/hooks/df-dashboard-push.js +170 -0
package/hooks/df-execution-history.js +120 -0
package/hooks/df-invariant-check.js +126 -0
package/hooks/df-worktree-guard.js +101 -0
package/package.json +1 -1
package/src/commands/df/dashboard.md +35 -0
package/src/commands/df/execute.md +157 -8
package/src/commands/df/report.md +2 -0
package/templates/config-template.yaml +10 -0

package/bin/install.js CHANGED Viewed

@@ -187,7 +187,7 @@ async function main() {
   console.log('  skills/          — gap-discovery, atomic-commits, code-completeness, browse-fetch, browse-verify');
   console.log('  agents/          — reasoner (/df:auto — autonomous execution via /loop)');
   if (level === 'global') {
-    console.log('  hooks/           — statusline, update checker, invariant checker');
+    console.log('  hooks/           — statusline, update checker, invariant checker, worktree guard');
   }
   console.log('  hooks/df-spec-*  — spec validation (auto-enforced by /df:spec and /df:plan)');
   console.log('  env/             — ENABLE_LSP_TOOL (code navigation via goToDefinition, findReferences, workspaceSymbol)');
@@ -239,6 +239,10 @@ async function configureHooks(claudeDir) {
   const consolidationCheckCmd = `node "${path.join(claudeDir, 'hooks', 'df-consolidation-check.js')}"`;
   const quotaLoggerCmd = `node "${path.join(claudeDir, 'hooks', 'df-quota-logger.js')}"`;
   const toolUsageCmd = `node "${path.join(claudeDir, 'hooks', 'df-tool-usage.js')}"`;
+  const dashboardPushCmd = `node "${path.join(claudeDir, 'hooks', 'df-dashboard-push.js')}"`;
+  const executionHistoryCmd = `node "${path.join(claudeDir, 'hooks', 'df-execution-history.js')}"`;
+  const worktreeGuardCmd = `node "${path.join(claudeDir, 'hooks', 'df-worktree-guard.js')}"`;
+  const invariantCheckCmd = `node "${path.join(claudeDir, 'hooks', 'df-invariant-check.js')}"`;
   let settings = {};
@@ -324,10 +328,10 @@ async function configureHooks(claudeDir) {
     settings.hooks.SessionEnd = [];
   }
-  // Remove any existing quota logger from SessionEnd
+  // Remove any existing quota logger / dashboard push from SessionEnd
   settings.hooks.SessionEnd = settings.hooks.SessionEnd.filter(hook => {
     const cmd = hook.hooks?.[0]?.command || '';
-    return !cmd.includes('df-quota-logger');
+    return !cmd.includes('df-quota-logger') && !cmd.includes('df-dashboard-push');
   });
   // Add quota logger to SessionEnd
@@ -337,17 +341,25 @@ async function configureHooks(claudeDir) {
       command: quotaLoggerCmd
     }]
   });
-  log('Quota logger configured');
+  // Add dashboard push to SessionEnd (fire-and-forget, skips when dashboard_url unset)
+  settings.hooks.SessionEnd.push({
+    hooks: [{
+      type: 'command',
+      command: dashboardPushCmd
+    }]
+  });
+  log('Quota logger + dashboard push configured (SessionEnd)');
   // Configure PostToolUse hook for tool usage instrumentation
   if (!settings.hooks.PostToolUse) {
     settings.hooks.PostToolUse = [];
   }
-  // Remove any existing deepflow tool usage hooks from PostToolUse
+  // Remove any existing deepflow tool usage / execution history / worktree guard / invariant check hooks from PostToolUse
   settings.hooks.PostToolUse = settings.hooks.PostToolUse.filter(hook => {
     const cmd = hook.hooks?.[0]?.command || '';
-    return !cmd.includes('df-tool-usage');
+    return !cmd.includes('df-tool-usage') && !cmd.includes('df-execution-history') && !cmd.includes('df-worktree-guard') && !cmd.includes('df-invariant-check');
   });
   // Add tool usage hook
@@ -357,6 +369,30 @@ async function configureHooks(claudeDir) {
       command: toolUsageCmd
     }]
   });
+  // Add execution history hook
+  settings.hooks.PostToolUse.push({
+    hooks: [{
+      type: 'command',
+      command: executionHistoryCmd
+    }]
+  });
+  // Add worktree guard hook (blocks Write/Edit to main-branch files when df/* worktree exists)
+  settings.hooks.PostToolUse.push({
+    hooks: [{
+      type: 'command',
+      command: worktreeGuardCmd
+    }]
+  });
+  // Add invariant check hook (exits 1 on hard failures after git commit)
+  settings.hooks.PostToolUse.push({
+    hooks: [{
+      type: 'command',
+      command: invariantCheckCmd
+    }]
+  });
   log('PostToolUse hook configured');
   fs.writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
@@ -539,7 +575,7 @@ async function uninstall() {
   ];
   if (level === 'global') {
-    toRemove.push('hooks/df-statusline.js', 'hooks/df-check-update.js', 'hooks/df-consolidation-check.js', 'hooks/df-invariant-check.js', 'hooks/df-quota-logger.js', 'hooks/df-tool-usage.js');
+    toRemove.push('hooks/df-statusline.js', 'hooks/df-check-update.js', 'hooks/df-consolidation-check.js', 'hooks/df-invariant-check.js', 'hooks/df-quota-logger.js', 'hooks/df-tool-usage.js', 'hooks/df-dashboard-push.js', 'hooks/df-execution-history.js', 'hooks/df-worktree-guard.js');
   }
   for (const item of toRemove) {
@@ -577,7 +613,7 @@ async function uninstall() {
         if (settings.hooks?.SessionEnd) {
           settings.hooks.SessionEnd = settings.hooks.SessionEnd.filter(hook => {
             const cmd = hook.hooks?.[0]?.command || '';
-            return !cmd.includes('df-quota-logger');
+            return !cmd.includes('df-quota-logger') && !cmd.includes('df-dashboard-push');
           });
           if (settings.hooks.SessionEnd.length === 0) {
             delete settings.hooks.SessionEnd;
@@ -586,7 +622,7 @@ async function uninstall() {
         if (settings.hooks?.PostToolUse) {
           settings.hooks.PostToolUse = settings.hooks.PostToolUse.filter(hook => {
             const cmd = hook.hooks?.[0]?.command || '';
-            return !cmd.includes('df-tool-usage');
+            return !cmd.includes('df-tool-usage') && !cmd.includes('df-execution-history') && !cmd.includes('df-worktree-guard') && !cmd.includes('df-invariant-check');
           });
           if (settings.hooks.PostToolUse.length === 0) {
             delete settings.hooks.PostToolUse;

package/hooks/df-dashboard-push.js ADDED Viewed

@@ -0,0 +1,170 @@
+#!/usr/bin/env node
+/**
+ * deepflow dashboard push — SessionEnd hook
+ * Collects session summary (tokens, duration, tool calls, model), gets
+ * git user.name, and POSTs to dashboard_url from .deepflow/config.yaml.
+ * Silently skips if dashboard_url is not configured.
+ * Fire-and-forget: exits immediately after spawning background worker.
+ */
+'use strict';
+// Spawn background process so the hook returns immediately
+if (process.argv[2] !== '--background') {
+  const { spawn } = require('child_process');
+  const child = spawn(process.execPath, [__filename, '--background'], {
+    detached: true,
+    stdio: 'ignore',
+    // Pass stdin data through env so background process can read it
+    env: { ...process.env, _DF_HOOK_INPUT: getStdinSync() }
+  });
+  child.unref();
+  process.exit(0);
+}
+// --- Background process ---
+const fs = require('fs');
+const path = require('path');
+const os = require('os');
+const { execFileSync } = require('child_process');
+const https = require('https');
+const http = require('http');
+function getStdinSync() {
+  // Non-blocking stdin read for the parent process (limited buffer)
+  try {
+    return fs.readFileSync('/dev/stdin', { encoding: 'utf8', flag: 'rs' }) || '';
+  } catch (_e) {
+    return '';
+  }
+}
+/** Read .deepflow/config.yaml and extract dashboard_url (no yaml dep — regex parse). */
+function getDashboardUrl(cwd) {
+  const configPath = path.join(cwd, '.deepflow', 'config.yaml');
+  if (!fs.existsSync(configPath)) return null;
+  try {
+    const content = fs.readFileSync(configPath, 'utf8');
+    const match = content.match(/^\s*dashboard_url\s*:\s*(.+)$/m);
+    if (!match) return null;
+    const val = match[1].trim().replace(/^['"]|['"]$/g, '');
+    return val || null;
+  } catch (_e) {
+    return null;
+  }
+}
+/** Get git user.name in the given directory. Returns 'unknown' on failure. */
+function getGitUser(cwd) {
+  try {
+    return execFileSync('git', ['config', 'user.name'], {
+      cwd,
+      encoding: 'utf8',
+      timeout: 3000,
+      stdio: ['ignore', 'pipe', 'ignore']
+    }).trim() || 'unknown';
+  } catch (_e) {
+    return process.env.USER || 'unknown';
+  }
+}
+/** POST JSON payload to url. Returns true on 200. */
+function postJson(url, payload) {
+  return new Promise((resolve) => {
+    let parsed;
+    try {
+      parsed = new URL(url);
+    } catch (_e) {
+      resolve(false);
+      return;
+    }
+    const body = JSON.stringify(payload);
+    const isHttps = parsed.protocol === 'https:';
+    const lib = isHttps ? https : http;
+    const options = {
+      hostname: parsed.hostname,
+      port: parsed.port || (isHttps ? 443 : 80),
+      path: parsed.pathname,
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'Content-Length': Buffer.byteLength(body)
+      },
+      timeout: 10000
+    };
+    const req = lib.request(options, (res) => {
+      res.resume();
+      res.on('end', () => resolve(res.statusCode === 200));
+    });
+    req.on('error', () => resolve(false));
+    req.on('timeout', () => { req.destroy(); resolve(false); });
+    req.write(body);
+    req.end();
+  });
+}
+async function main() {
+  try {
+    const cwd = process.env.CLAUDE_PROJECT_DIR || process.cwd();
+    const dashboardUrl = getDashboardUrl(cwd);
+    // Silently skip if not configured
+    if (!dashboardUrl) process.exit(0);
+    // Parse session data from hook input (passed via env)
+    let hookData = {};
+    try {
+      const raw = process.env._DF_HOOK_INPUT || '';
+      if (raw) hookData = JSON.parse(raw);
+    } catch (_e) {
+      // fallback: empty data, we'll still send what we know
+    }
+    const gitUser = getGitUser(cwd);
+    const projectName = path.basename(cwd);
+    const ts = new Date().toISOString();
+    // Extract token fields from hook data (Claude Code SessionEnd format)
+    const usage = hookData.usage || hookData.context_window?.current_usage || {};
+    const inputTokens = usage.input_tokens || 0;
+    const outputTokens = usage.output_tokens || 0;
+    const cacheReadTokens = usage.cache_read_input_tokens || usage.cache_read_tokens || 0;
+    const cacheCreationTokens = usage.cache_creation_input_tokens || usage.cache_creation_tokens || 0;
+    const model = hookData.model?.id || hookData.model?.display_name || hookData.model || 'unknown';
+    const sessionId = hookData.session_id || hookData.sessionId || `${gitUser}:${projectName}:${ts}`;
+    const durationMs = hookData.duration_ms || null;
+    const toolCalls = hookData.tool_calls || hookData.tool_use_count || 0;
+    const payload = {
+      user: gitUser,
+      project: projectName,
+      session_id: sessionId,
+      model,
+      tokens: {
+        [model]: {
+          input: inputTokens,
+          output: outputTokens,
+          cache_read: cacheReadTokens,
+          cache_creation: cacheCreationTokens
+        }
+      },
+      started_at: hookData.started_at || ts,
+      ended_at: ts,
+      duration_ms: durationMs,
+      tool_calls: toolCalls
+    };
+    const ingestUrl = dashboardUrl.replace(/\/$/, '') + '/api/ingest';
+    await postJson(ingestUrl, payload);
+  } catch (_e) {
+    // Never break session end
+  }
+  process.exit(0);
+}
+main();

package/hooks/df-execution-history.js ADDED Viewed

@@ -0,0 +1,120 @@
+#!/usr/bin/env node
+/**
+ * deepflow execution history recorder
+ * PostToolUse hook: fires when the Agent tool completes.
+ * Appends task_start + task_end records to {cwd}/.deepflow/execution-history.jsonl.
+ * Exits silently (code 0) on all errors — never blocks tool execution (REQ-8).
+ *
+ * Output record fields:
+ *   type, task_id, spec, session_id, timestamp, status
+ */
+'use strict';
+const fs = require('fs');
+const path = require('path');
+/**
+ * Extract task_id from Agent prompt.
+ * Pattern: T{n} anywhere in the prompt, e.g. "T21: fix bug" → "T21"
+ * Falls back to DEEPFLOW_TASK_ID env var (C-6).
+ */
+function extractTaskId(prompt) {
+  if (prompt) {
+    const match = prompt.match(/T(\d+)/);
+    if (match) return `T${match[1]}`;
+  }
+  return process.env.DEEPFLOW_TASK_ID || null;
+}
+/**
+ * Extract spec name from Agent prompt.
+ * Looks for pattern: "spec: {name}" or "spec:{name}"
+ */
+function extractSpec(prompt) {
+  if (!prompt) return null;
+  const match = prompt.match(/spec:\s*(\S+)/i);
+  return match ? match[1] : null;
+}
+/**
+ * Parse task status from tool_response content.
+ * Looks for TASK_STATUS:{pass|revert|fail} in the response text.
+ * Defaults to "unknown" if not found (REQ-2).
+ */
+function extractStatus(toolResponse) {
+  const responseStr = JSON.stringify(toolResponse || '');
+  const match = responseStr.match(/TASK_STATUS:(pass|revert|fail)/);
+  return match ? match[1] : 'unknown';
+}
+/**
+ * Resolve the project root from cwd.
+ * Walks up to find the .deepflow directory, or falls back to cwd itself.
+ */
+function resolveProjectRoot(cwd) {
+  if (!cwd) return process.cwd();
+  // If inside a worktree, strip down to the project root
+  const worktreeMatch = cwd.match(/^(.*?)(?:\/\.deepflow\/worktrees\/[^/]+)/);
+  if (worktreeMatch) return worktreeMatch[1];
+  return cwd;
+}
+// Read all stdin, then process
+let raw = '';
+process.stdin.setEncoding('utf8');
+process.stdin.on('data', chunk => { raw += chunk; });
+process.stdin.on('end', () => {
+  try {
+    const data = JSON.parse(raw);
+    // Only fire for Agent tool calls
+    if (data.tool_name !== 'Agent') {
+      process.exit(0);
+    }
+    const prompt = (data.tool_input && data.tool_input.prompt) || '';
+    const taskId = extractTaskId(prompt);
+    // Only record if we have a task_id
+    if (!taskId) {
+      process.exit(0);
+    }
+    const cwd = data.cwd || process.cwd();
+    const projectRoot = resolveProjectRoot(cwd);
+    const historyFile = path.join(projectRoot, '.deepflow', 'execution-history.jsonl');
+    const timestamp = new Date().toISOString();
+    const sessionId = data.session_id || null;
+    const spec = extractSpec(prompt);
+    const status = extractStatus(data.tool_response);
+    const startRecord = {
+      type: 'task_start',
+      task_id: taskId,
+      spec,
+      session_id: sessionId,
+      timestamp,
+    };
+    const endRecord = {
+      type: 'task_end',
+      task_id: taskId,
+      session_id: sessionId,
+      status,
+      timestamp,
+    };
+    const logDir = path.dirname(historyFile);
+    if (!fs.existsSync(logDir)) {
+      fs.mkdirSync(logDir, { recursive: true });
+    }
+    fs.appendFileSync(historyFile, JSON.stringify(startRecord) + '\n');
+    fs.appendFileSync(historyFile, JSON.stringify(endRecord) + '\n');
+  } catch (_e) {
+    // Fail silently — never break tool execution (REQ-8)
+  }
+  process.exit(0);
+});

package/hooks/df-invariant-check.js CHANGED Viewed

@@ -1020,6 +1020,130 @@ function checkInvariants(diff, specContent, opts = {}) {
   return { hard, advisory };
 }
+// ── Hook entry point (REQ-5 AC-7) ────────────────────────────────────────────
+//
+// When called as a PostToolUse hook, Claude Code pipes a JSON payload on stdin:
+//   { tool_name, tool_input, tool_response, cwd, ... }
+//
+// We fire after any Bash call that looks like a git commit, extract the diff
+// from HEAD~1, load the active spec from .deepflow/, and exit(1) on hard failures.
+//
+// Detection: if stdin is not a TTY we treat it as hook mode and attempt JSON parse.
+// If the payload is not a git-commit Bash call we exit(0) silently.
+function loadActiveSpec(cwd) {
+  const deepflowDir = path.join(cwd, '.deepflow');
+  let specContent = null;
+  try {
+    // Look for doing-*.md specs first (in-progress)
+    const entries = fs.readdirSync(deepflowDir);
+    const doingSpec = entries.find((e) => e.startsWith('doing-') && e.endsWith('.md'));
+    if (doingSpec) {
+      specContent = fs.readFileSync(path.join(deepflowDir, doingSpec), 'utf8');
+      return specContent;
+    }
+    // Fall back to specs/ subdirectory
+    const specsDir = path.join(cwd, 'specs');
+    if (fs.existsSync(specsDir)) {
+      const specEntries = fs.readdirSync(specsDir);
+      const doingInSpecs = specEntries.find((e) => e.startsWith('doing-') && e.endsWith('.md'));
+      if (doingInSpecs) {
+        specContent = fs.readFileSync(path.join(specsDir, doingInSpecs), 'utf8');
+        return specContent;
+      }
+    }
+  } catch (_) {
+    // Cannot read .deepflow or specs dir — return null
+  }
+  return null;
+}
+function extractDiffFromLastCommit(cwd) {
+  try {
+    return execSync('git diff HEAD~1 HEAD', {
+      encoding: 'utf8',
+      cwd,
+      stdio: ['ignore', 'pipe', 'ignore'],
+    });
+  } catch (_) {
+    return null;
+  }
+}
+function isGitCommitBash(toolName, toolInput) {
+  if (toolName !== 'Bash') return false;
+  const cmd = (toolInput && (toolInput.command || toolInput.cmd || '')) || '';
+  return /git\s+commit\b/.test(cmd);
+}
+// Run hook mode when stdin is not a TTY (i.e., piped payload from Claude Code)
+if (!process.stdin.isTTY) {
+  let raw = '';
+  process.stdin.setEncoding('utf8');
+  process.stdin.on('data', (chunk) => { raw += chunk; });
+  process.stdin.on('end', () => {
+    let data;
+    try {
+      data = JSON.parse(raw);
+    } catch (_) {
+      // Not valid JSON — not a hook payload, exit silently
+      process.exit(0);
+    }
+    try {
+      const toolName = data.tool_name || '';
+      const toolInput = data.tool_input || {};
+      // Only run after a git commit bash call
+      if (!isGitCommitBash(toolName, toolInput)) {
+        process.exit(0);
+      }
+      const cwd = data.cwd || process.cwd();
+      const diff = extractDiffFromLastCommit(cwd);
+      if (!diff) {
+        // No diff available (e.g. initial commit) — pass through
+        process.exit(0);
+      }
+      const specContent = loadActiveSpec(cwd);
+      if (!specContent) {
+        // No active spec found — not a deepflow project or no spec in progress
+        process.exit(0);
+      }
+      const results = checkInvariants(diff, specContent, { mode: 'auto', taskType: 'implementation', projectRoot: cwd });
+      if (results.hard.length > 0) {
+        console.error('[df-invariant-check] Hard invariant failures detected:');
+        const outputLines = formatOutput(results);
+        for (const line of outputLines) {
+          if (results.hard.some((v) => formatViolation(v) === line)) {
+            console.error(`  ${line}`);
+          }
+        }
+        process.exit(1);
+      }
+      if (results.advisory.length > 0) {
+        console.warn('[df-invariant-check] Advisory warnings:');
+        for (const v of results.advisory) {
+          console.warn(`  ${formatViolation(v)}`);
+        }
+      }
+      process.exit(0);
+    } catch (_err) {
+      // Unexpected error — fail open so we never break non-deepflow projects
+      process.exit(0);
+    }
+  });
+} else {
 // ── CLI entry point (REQ-6) ───────────────────────────────────────────────────
 if (require.main === module) {
   const args = process.argv.slice(2);
@@ -1091,6 +1215,8 @@ if (require.main === module) {
   process.exit(results.hard.length > 0 ? 1 : 0);
 }
+} // end else (TTY / CLI mode)
 module.exports = {
   checkInvariants,
   checkLspAvailability,

package/hooks/df-worktree-guard.js ADDED Viewed

@@ -0,0 +1,101 @@
+#!/usr/bin/env node
+/**
+ * deepflow worktree guard
+ * PostToolUse hook: blocks Write/Edit to main-branch files when a df/* worktree exists.
+ *
+ * REQ-3 AC-4: exit(1) to block the tool call when all conditions hold:
+ *   1. tool_name is Write or Edit
+ *   2. current branch is main (or master)
+ *   3. a df/* worktree branch exists
+ *   4. file_path is NOT on the allowlist (.deepflow/, PLAN.md, specs/)
+ *
+ * REQ-3 AC-5: allowlisted paths always pass through (no false positives).
+ *
+ * Exits silently (code 0) on parse errors or git failures — never breaks tool
+ * execution in non-deepflow projects.
+ */
+'use strict';
+const { execFileSync } = require('child_process');
+// Paths that are always allowed regardless of worktree state
+const ALLOWLIST = [
+  /(?:^|\/)\.deepflow\//,
+  /(?:^|\/)PLAN\.md$/,
+  /(?:^|\/)specs\//,
+];
+function isAllowlisted(filePath) {
+  return ALLOWLIST.some(re => re.test(filePath));
+}
+function currentBranch(cwd) {
+  try {
+    return execFileSync('git', ['rev-parse', '--abbrev-ref', 'HEAD'], {
+      encoding: 'utf8',
+      cwd,
+      stdio: ['ignore', 'pipe', 'ignore'],
+    }).trim();
+  } catch (_) {
+    return null;
+  }
+}
+function dfWorktreeExists(cwd) {
+  try {
+    const out = execFileSync('git', ['branch', '--list', 'df/*'], {
+      encoding: 'utf8',
+      cwd,
+      stdio: ['ignore', 'pipe', 'ignore'],
+    });
+    return out.trim().length > 0;
+  } catch (_) {
+    return false;
+  }
+}
+let raw = '';
+process.stdin.setEncoding('utf8');
+process.stdin.on('data', chunk => { raw += chunk; });
+process.stdin.on('end', () => {
+  try {
+    const data = JSON.parse(raw);
+    const toolName = data.tool_name || '';
+    // Only guard Write and Edit
+    if (toolName !== 'Write' && toolName !== 'Edit') {
+      process.exit(0);
+    }
+    const filePath = (data.tool_input && data.tool_input.file_path) || '';
+    const cwd = data.cwd || process.cwd();
+    // Allowlisted paths always pass
+    if (isAllowlisted(filePath)) {
+      process.exit(0);
+    }
+    const branch = currentBranch(cwd);
+    // Only guard when on main/master
+    if (branch !== 'main' && branch !== 'master') {
+      process.exit(0);
+    }
+    // Block only when a df/* worktree branch exists
+    if (!dfWorktreeExists(cwd)) {
+      process.exit(0);
+    }
+    // All conditions met — block the write
+    console.error(
+      `[df-worktree-guard] Blocked ${toolName} to "${filePath}" on main branch ` +
+      `while df/* worktree exists. Make changes inside the worktree branch instead.`
+    );
+    process.exit(1);
+  } catch (_e) {
+    // Parse or unexpected error — fail open so we never break non-deepflow projects
+    process.exit(0);
+  }
+});

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "deepflow",
-  "version": "0.1.88",
+  "version": "0.1.89",
   "description": "Doing reveals what thinking can't predict — spec-driven iterative development for Claude Code",
   "keywords": [
     "claude",

package/src/commands/df/dashboard.md ADDED Viewed

@@ -0,0 +1,35 @@
+---
+name: df:dashboard
+description: View deepflow dashboard in team mode (via URL) or local mode (via CLI server)
+allowed-tools: [Read, Bash]
+---
+# /df:dashboard — Deepflow Dashboard
+View the deepflow dashboard in team or local mode.
+**NEVER:** Spawn agents, use Task tool, use AskUserQuestion, run git, EnterPlanMode, ExitPlanMode
+**ONLY:** Read config, run npx deepflow-dashboard, open browser
+## Behavior
+1. **Check config mode**
+   - Read `.deepflow/config.yaml`
+   - If `dashboard_url` key exists and is non-empty: TEAM MODE
+   - Else: LOCAL MODE
+2. **TEAM MODE** (dashboard_url configured)
+   - Display: `Dashboard URL configured: {dashboard_url}`
+   - Open URL in browser via `open "{dashboard_url}"` (macOS) or appropriate command for OS
+3. **LOCAL MODE** (no dashboard_url)
+   - Display: `Starting local deepflow dashboard server...`
+   - Run: `npx deepflow-dashboard`
+   - Instruct user to open http://localhost:3000 (or configured port) in browser
+## Rules
+- Gracefully handle missing config.yaml (treat as LOCAL MODE)
+- If dashboard_url exists but is empty string, treat as LOCAL MODE
+- Always confirm mode and action before executing

package/src/commands/df/execute.md CHANGED Viewed

@@ -21,10 +21,11 @@ Each task = one background agent. **NEVER use TaskOutput** (100KB+ transcripts e
 2. STOP. End turn. Do NOT poll.
 3. On EACH notification:
    a. Ratchet check (§5.5)
-   b. Passed → TaskUpdate(status: "completed"), update PLAN.md [x] + commit hash
-   c. Failed → partial salvage (§5.5). Salvaged → passed. Not → git revert, TaskUpdate(status: "pending")
-   d. Report ONE line: "✓ T1: ratchet passed (abc123)" or "⚕ T1: salvaged (abc124)" or "✗ T1: reverted"
-   e. NOT all done → end turn, wait | ALL done → next wave or finish
+   b. Passed → wave test agent (§5.6). Tests pass → re-snapshot (§5.6) → TaskUpdate(status: "completed"), update PLAN.md [x] + commit hash
+   c. Failed → partial salvage (§5.5). Salvaged → wave test agent (§5.6). Not → git revert, TaskUpdate(status: "pending")
+   d. Wave test agent failed after max attempts → revert ALL task commits, TaskUpdate(status: "pending")
+   e. Report ONE line: "✓ T1: ratchet+tests passed (abc123)" or "⚕ T1: salvaged+tested (abc124)" or "✗ T1: reverted" or "✗ T1: test agent failed, reverted"
+   f. NOT all done → end turn, wait | ALL done → next wave or finish
 4. Between waves: context ≥50% → checkpoint and exit.
 5. Repeat until: all done, all blocked, or context ≥50%.
 ```
@@ -53,7 +54,26 @@ git -C ${WORKTREE_PATH} ls-files | grep -E '\.(test|spec)\.[^/]+$|^test_|_test\.
 ### 1.7. NO-TESTS BOOTSTRAP
-Zero test files → spawn ONE bootstrap agent (§6 Bootstrap). Pass → re-snapshot, end cycle. Fail → revert, halt "Bootstrap failed — manual intervention required". Subsequent cycles use bootstrapped tests as baseline.
+<!-- AC-1: zero test files triggers bootstrap before wave 1 -->
+<!-- AC-2: bootstrap success re-snapshots auto-snapshot.txt; subsequent tasks use updated snapshot -->
+<!-- AC-3: bootstrap failure with default model retries with Opus; double failure halts with specific message -->
+**Gate:** After §1.6 snapshot, check `auto-snapshot.txt`:
+```bash
+SNAPSHOT_COUNT=$(wc -l < .deepflow/auto-snapshot.txt | tr -d ' ')
+```
+If `SNAPSHOT_COUNT` is `0` (zero test files found), MUST spawn bootstrap agent before wave 1. No implementation tasks may start until bootstrap completes successfully.
+**Bootstrap flow:**
+1. Spawn `Agent(model="{default_model}", ...)` with Bootstrap prompt (§6). End turn, wait for notification.
+2. **On success (TASK_STATUS:pass):** Re-snapshot immediately:
+   ```bash
+   git -C ${WORKTREE_PATH} ls-files | grep -E '\.(test|spec)\.[^/]+$|^test_|_test\.[^/]+$|^tests/|__tests__/' > .deepflow/auto-snapshot.txt
+   ```
+   All subsequent tasks use this updated snapshot as their ratchet baseline. Proceed to wave 1.
+3. **On failure (TASK_STATUS:fail) with default model:** Retry ONCE with `Agent(model="opus", ...)` using the same Bootstrap prompt.
+   - Opus success → re-snapshot (same command above) → proceed to wave 1.
+   - Opus failure → halt with message: `"Bootstrap failed with both default and Opus — manual intervention required"`. Do not proceed.
 ### 2. LOAD PLAN
@@ -115,6 +135,46 @@ Omit if context.json/token-history.jsonl/awk unavailable. Never fail ratchet for
 1. Lint/typecheck-only (build+tests passed): spawn `Agent(model="haiku")` to fix. Re-ratchet. Fail → revert both.
 2. Build/test failure → `git revert HEAD --no-edit` (no salvage).
+### 5.6. WAVE TEST AGENT
+<!-- AC-8: After wave ratchet passes, Opus test agent spawns and writes unit tests -->
+<!-- AC-9: Test failures trigger implementer re-spawn with failure feedback; max 3 attempts then revert -->
+<!-- AC-12: auto-snapshot.txt re-generated after wave test agent commits; wave N+1 ratchet includes wave N tests -->
+**Trigger:** After ratchet check passes (or after successful salvage) for a task.
+**Attempt tracking:** Initialize `attempt_count = 1` and `failure_feedback = ""` per task when first spawned. Max 3 total attempts (1 initial + 2 retries).
+**Flow:**
+1. Capture the implementation diff: `git -C ${WORKTREE_PATH} diff HEAD~1` → store as `IMPL_DIFF`.
+2. Spawn `Agent(model="opus")` with Wave Test prompt (§6). `run_in_background=true`. End turn, wait.
+3. On notification:
+   a. Run ratchet check (§5.5) — all new + pre-existing tests must pass.
+   b. **Tests pass** → commit stands. **Re-snapshot** immediately so wave N+1 ratchet includes wave N tests:
+      ```bash
+      git -C ${WORKTREE_PATH} ls-files | grep -E '\.(test|spec)\.[^/]+$|^test_|_test\.[^/]+$|^tests/|__tests__/' > .deepflow/auto-snapshot.txt
+      ```
+      Task complete. Report: `"✓ T{n}: ratchet+tests passed ({hash})"`.
+   c. **Tests fail** →
+      - If `attempt_count < 3`:
+        - `git revert HEAD --no-edit` (revert test commit)
+        - `git revert HEAD --no-edit` (revert implementation commit)
+        - Accumulate failure output: `failure_feedback += "Attempt {N}: {truncated_test_output}\n"`
+        - `attempt_count += 1`
+        - Re-spawn implementer agent with original prompt + failure feedback appendix:
+          ```
+          PREVIOUS FAILURES (attempt {N-1} of 3):
+          {failure_feedback}
+          Fix the issues above. Do NOT repeat the same mistakes.
+          ```
+        - On implementer notification: ratchet check (§5.5). Passed → goto step 1 (spawn test agent again). Failed → same retry logic.
+      - If `attempt_count >= 3`:
+        - Revert ALL commits back to pre-task state: `git -C ${WORKTREE_PATH} reset --hard {pre_task_commit}`
+        - `TaskUpdate(status: "pending")`
+        - Report: `"✗ T{n}: test agent failed after 3 attempts, reverted"`
+**Output truncation for failure feedback:** Test failures → test names + last 30 lines of output. Build failures → last 15 lines. Cap total `failure_feedback` at 200 lines.
 ### 5.7. PARALLEL SPIKE PROBES
 Trigger: ≥2 [SPIKE] tasks with same blocker or identical hypothesis.
@@ -186,19 +246,48 @@ REPEAT:
 --- START ---
 {task_id}: {description}  Files: {files}  Spec: {spec}
 {If reverted: DO NOT repeat: - Cycle {N}: "{reason}"}
+{If spike insights exist:
+spike_results:
+  hypothesis: {hypothesis from spike_insights}
+  outcome: {outcome}
+  edge_cases: {edge_cases}
+  insight: {insight from probe_learnings}
+}
 Success criteria: {ACs from spec relevant to this task}
 --- MIDDLE (omit for low effort; omit deps for medium) ---
 Impact: Callers: {file} ({why}) | Duplicates: [active→consolidate] [dead→DELETE] | Data flow: {consumers}
 Prior tasks: {dep_id}: {summary}
 Steps: 1. chub search/get for APIs 2. LSP findReferences, add unlisted callers 3. Read all Impact files 4. Implement 5. Commit
 --- END ---
-Spike results: {winner learnings}
 Duplicates: [active]→consolidate [dead]→DELETE. ONLY job: code+commit. No merge/rename/checkout.
+Last line of your response MUST be: TASK_STATUS:pass (if successful) or TASK_STATUS:fail (if failed) or TASK_STATUS:revert (if reverted)
+```
+**Bootstrap:** `BOOTSTRAP: Write tests for edit_scope files. Do NOT change implementation. Commit as test({spec}): bootstrap. Last line: TASK_STATUS:pass or TASK_STATUS:fail`
+**Wave Test** (`Agent(model="opus")`):
 ```
+--- START ---
+You are a QA engineer. Write unit tests for the following code changes.
+Use {test_framework}. Test behavioral correctness, not implementation details.
+Spec: {spec}. Task: {task_id}.
+Implementation diff:
+{IMPL_DIFF}
-**Bootstrap:** `BOOTSTRAP: Write tests for edit_scope files. Do NOT change implementation. Commit as test({spec}): bootstrap`
+--- MIDDLE ---
+Files changed: {changed_files}
+Existing test patterns: {test_file_examples from auto-snapshot.txt, first 3}
-**Spike:** `{task_id} [SPIKE]: {hypothesis}. Files+Spec. {reverted warnings}. Minimal spike. Commit as spike({spec}): {desc}`
+--- END ---
+Write thorough unit tests covering: happy paths, edge cases, error handling.
+Follow existing test conventions in the codebase.
+Commit as: test({spec}): wave-{N} unit tests
+Do NOT modify implementation files. ONLY add/edit test files.
+Last line of your response MUST be: TASK_STATUS:pass or TASK_STATUS:fail
+```
+**Spike:** `{task_id} [SPIKE]: {hypothesis}. Files+Spec. {reverted warnings}. Minimal spike. Commit as spike({spec}): {desc}. Last line: TASK_STATUS:pass or TASK_STATUS:fail`
 **Optimize Task** (`Agent(model="opus")`):
 ```
@@ -210,6 +299,7 @@ CONSTRAINT: ONE atomic change.
 Last 5 cycles + failed hypotheses + Impact/deps.
 --- END ---
 {Learnings}. ONE change + commit. No metric run, no multiple changes.
+Last line of your response MUST be: TASK_STATUS:pass or TASK_STATUS:fail or TASK_STATUS:revert
 ```
 **Optimize Probe** (`Agent(model="opus")`):
@@ -224,11 +314,68 @@ Current/Target. Role instruction:
 Full history + all failed hypotheses.
 --- END ---
 ONE atomic change. Commit. STOP.
+Last line of your response MUST be: TASK_STATUS:pass or TASK_STATUS:fail or TASK_STATUS:revert
+```
+**Final Test** (`Agent(model="opus")`):
+```
+--- START ---
+You are an independent QA engineer. You have ONLY the spec and exported interfaces below.
+You cannot read implementation files — you must treat the system as a black box.
+Write integration tests that verify EACH acceptance criterion from the spec.
+Spec:
+{SPEC_CONTENT}
+Exported interfaces:
+{EXPORTED_INTERFACES}
+--- END ---
+Write integration tests covering every AC in the spec.
+Test through public interfaces only — no internal imports, no implementation details.
+If an AC cannot be tested through exports alone, write a test stub with a TODO comment explaining why.
+Commit as: test({spec}): integration tests
+Do NOT read or modify implementation files. ONLY add/edit test files.
+Last line of your response MUST be: TASK_STATUS:pass or TASK_STATUS:fail
 ```
 ### 8. COMPLETE SPECS
+<!-- AC-10: After all waves, Opus black-box test agent spawns with spec + exports only (no implementation) -->
+<!-- AC-11: Final integration tests must all pass before merge proceeds; failure blocks merge -->
 All tasks done for `doing-*` spec:
+**8.1. Final Test Agent (black-box integration tests):**
+Before merge, spawn an independent Opus QA agent that sees ONLY the spec and exported interfaces — never implementation source.
+1. Extract exported interfaces from the worktree (public API surface):
+   ```bash
+   # Collect exported symbols — adapt pattern to language
+   git -C ${WORKTREE_PATH} diff main --name-only | xargs grep -h '^\(export\|pub \|func \|def \)' 2>/dev/null | head -100
+   ```
+   Store result as `EXPORTED_INTERFACES`. Also load spec content: `cat specs/doing-{name}.md` → `SPEC_CONTENT`.
+2. Spawn `Agent(model="opus")` with Final Test prompt (§6). `run_in_background=true`. End turn, wait.
+3. On notification:
+   a. Run ratchet check (§5.5) — all integration tests must pass.
+   b. **Tests pass** → commit stands. Proceed to step 8.2 (merge).
+   c. **Tests fail** → **merge is blocked**. Do NOT retry. Report:
+      `"✗ Final integration tests failed for {spec} — merge blocked, requires human review"`
+      Leave worktree intact. Set all spec tasks back to `TaskUpdate(status: "pending")`.
+      Write failure details to `.deepflow/results/final-test-{spec}.yaml`:
+      ```yaml
+      spec: {spec}
+      status: blocked
+      reason: "Final integration tests failed"
+      output: |
+        {truncated test output — last 30 lines}
+      ```
+      STOP. Do not proceed to merge.
+**8.2. Merge and cleanup:**
 1. `skill: "df:verify", args: "doing-{name}"` — runs L0-L4 gates, merges, cleans worktree, renames doing→done, extracts decisions. Fail (fix tasks added) → stop; `--continue` picks them up.
 2. Remove spec's ENTIRE section from PLAN.md. Recalculate Summary table.
@@ -280,4 +427,6 @@ Reverted task: `TaskUpdate(status: "pending")`, dependents stay blocked. Repeate
 | Ratchet + metric both required | Keep only if both pass |
 | Plateau → probes | 3 cycles <1% triggers probes |
 | Circuit breaker = 3 reverts | Halts, needs human |
+| Wave test after ratchet | Opus writes tests; 3 attempts then revert |
+| Final test before merge | Opus black-box integration tests; failure blocks merge, no retry |
 | Probe diversity | ≥1 contraditoria + ≥1 ingenua |

package/src/commands/df/report.md CHANGED Viewed

@@ -6,6 +6,8 @@ allowed-tools: [Read, Write, Bash]
 # /df:report — Session Cost Report
+> **DEPRECATED:** Use `/df:dashboard` instead to view deepflow metrics and status.
 ## Orchestrator Role
 Aggregate token usage data and produce a structured report.

package/templates/config-template.yaml CHANGED Viewed

@@ -96,6 +96,16 @@ quality:
   # Timeout in seconds to wait for the dev server to become ready (default: 30)
   browser_timeout: 30
+# deepflow-dashboard team mode settings
+# dashboard_url: URL of the shared team server for POST ingestion
+# Leave blank (or omit) to use local-only mode (no data is pushed)
+# Example: http://team-server:3334
+dashboard_url: ""
+# Port for `npx deepflow-dashboard serve` (team server mode)
+# Default: 3334  (3333 is reserved for local mode)
+dashboard_port: 3334
 # Recommended .gitignore entries
 # Add these entries to your .gitignore to exclude instrumentation artifacts
 gitignore_entries: