npm - agileflow - Versions diffs - 3.1.0 → 3.2.1 - Mend

agileflow 3.1.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

package/CHANGELOG.md +10 -0
package/README.md +57 -85
package/lib/dashboard-automations.js +130 -0
package/lib/dashboard-git.js +254 -0
package/lib/dashboard-inbox.js +64 -0
package/lib/dashboard-protocol.js +1 -0
package/lib/dashboard-server.js +114 -924
package/lib/dashboard-session.js +136 -0
package/lib/dashboard-status.js +72 -0
package/lib/dashboard-terminal.js +354 -0
package/lib/dashboard-websocket.js +88 -0
package/lib/drivers/codex-driver.ts +4 -4
package/lib/logger.js +106 -0
package/package.json +4 -2
package/scripts/agileflow-configure.js +2 -2
package/scripts/agileflow-welcome.js +409 -434
package/scripts/claude-tmux.sh +80 -2
package/scripts/context-loader.js +4 -9
package/scripts/lib/browser-qa-evidence.js +409 -0
package/scripts/lib/browser-qa-status.js +192 -0
package/scripts/lib/command-prereqs.js +280 -0
package/scripts/lib/configure-detect.js +92 -2
package/scripts/lib/configure-features.js +295 -1
package/scripts/lib/context-formatter.js +468 -233
package/scripts/lib/context-loader.js +27 -15
package/scripts/lib/damage-control-utils.js +8 -1
package/scripts/lib/feature-catalog.js +321 -0
package/scripts/lib/portable-tasks-cli.js +274 -0
package/scripts/lib/portable-tasks.js +479 -0
package/scripts/lib/signal-detectors.js +1 -1
package/scripts/lib/team-events.js +86 -1
package/scripts/obtain-context.js +28 -4
package/scripts/smart-detect.js +17 -0
package/scripts/strip-ai-attribution.js +63 -0
package/scripts/team-manager.js +7 -2
package/scripts/welcome-deferred.js +437 -0
package/src/core/agents/browser-qa.md +328 -0
package/src/core/agents/perf-analyzer-assets.md +174 -0
package/src/core/agents/perf-analyzer-bundle.md +165 -0
package/src/core/agents/perf-analyzer-caching.md +160 -0
package/src/core/agents/perf-analyzer-compute.md +165 -0
package/src/core/agents/perf-analyzer-memory.md +182 -0
package/src/core/agents/perf-analyzer-network.md +157 -0
package/src/core/agents/perf-analyzer-queries.md +155 -0
package/src/core/agents/perf-analyzer-rendering.md +156 -0
package/src/core/agents/perf-consensus.md +280 -0
package/src/core/agents/security-analyzer-api.md +199 -0
package/src/core/agents/security-analyzer-auth.md +160 -0
package/src/core/agents/security-analyzer-authz.md +168 -0
package/src/core/agents/security-analyzer-deps.md +147 -0
package/src/core/agents/security-analyzer-infra.md +176 -0
package/src/core/agents/security-analyzer-injection.md +148 -0
package/src/core/agents/security-analyzer-input.md +191 -0
package/src/core/agents/security-analyzer-secrets.md +175 -0
package/src/core/agents/security-consensus.md +276 -0
package/src/core/agents/test-analyzer-assertions.md +181 -0
package/src/core/agents/test-analyzer-coverage.md +183 -0
package/src/core/agents/test-analyzer-fragility.md +185 -0
package/src/core/agents/test-analyzer-integration.md +155 -0
package/src/core/agents/test-analyzer-maintenance.md +173 -0
package/src/core/agents/test-analyzer-mocking.md +178 -0
package/src/core/agents/test-analyzer-patterns.md +189 -0
package/src/core/agents/test-analyzer-structure.md +177 -0
package/src/core/agents/test-consensus.md +294 -0
package/src/core/commands/{legal/audit.md → audit/legal.md} +13 -13
package/src/core/commands/{logic/audit.md → audit/logic.md} +12 -12
package/src/core/commands/audit/performance.md +443 -0
package/src/core/commands/audit/security.md +443 -0
package/src/core/commands/audit/test.md +442 -0
package/src/core/commands/babysit.md +505 -463
package/src/core/commands/browser-qa.md +240 -0
package/src/core/commands/configure.md +8 -8
package/src/core/commands/research/ask.md +42 -9
package/src/core/commands/research/import.md +14 -8
package/src/core/commands/research/list.md +17 -16
package/src/core/commands/research/synthesize.md +8 -8
package/src/core/commands/research/view.md +28 -4
package/src/core/commands/whats-new.md +2 -2
package/src/core/experts/devops/expertise.yaml +13 -2
package/src/core/experts/documentation/expertise.yaml +26 -4
package/src/core/profiles/COMPARISON.md +170 -0
package/src/core/profiles/README.md +178 -0
package/src/core/profiles/claude-code.yaml +111 -0
package/src/core/profiles/codex.yaml +103 -0
package/src/core/profiles/cursor.yaml +134 -0
package/src/core/profiles/examples.js +250 -0
package/src/core/profiles/loader.js +235 -0
package/src/core/profiles/windsurf.yaml +159 -0
package/src/core/teams/logic-audit.json +6 -0
package/src/core/teams/perf-audit.json +71 -0
package/src/core/teams/security-audit.json +71 -0
package/src/core/teams/test-audit.json +71 -0
package/src/core/templates/browser-qa-spec.yaml +94 -0
package/src/core/templates/command-prerequisites.yaml +169 -0
package/src/core/templates/damage-control-patterns.yaml +9 -0
package/tools/cli/installers/ide/_base-ide.js +33 -3
package/tools/cli/installers/ide/claude-code.js +2 -69
package/tools/cli/installers/ide/codex.js +9 -9
package/tools/cli/installers/ide/cursor.js +165 -4
package/tools/cli/installers/ide/windsurf.js +237 -6
package/tools/cli/lib/content-transformer.js +234 -9
package/tools/cli/lib/docs-setup.js +1 -1
package/tools/cli/lib/ide-generator.js +357 -0
package/tools/cli/lib/ide-registry.js +2 -2
package/scripts/tmux-task-name.sh +0 -105
package/scripts/tmux-task-watcher.sh +0 -344

package/scripts/claude-tmux.sh CHANGED Viewed

@@ -155,6 +155,81 @@ if command -v tmux &> /dev/null; then
   unset _TMUX_BASE _TMUX_SOCK_DIR
 fi
+# ══════════════════════════════════════════════════════════════════════════════
+# TAB FORMAT BUILDER — dynamic compaction based on window count & terminal width
+# Uses tmux 3.2+ #{e|...} numeric operators for cascading tier selection
+# ══════════════════════════════════════════════════════════════════════════════
+build_tab_format() {
+  # Chrome-like tab compaction: 14 tiers with threshold = width for minimal waste.
+  # Per-window budget (width/windows) picks the largest tier that fits.
+  # #{pN:#{=N:var}} = exactly N visible chars (truncate long + pad short names).
+  # ── Active tab: orange bg index + dark bg name ──────────────────────────
+  # "  I  " prefix = 5 visible chars (wide); " I " = 3 chars (narrow)
+  # Width = prefix + 1(space) + pN(name) + 1(space)
+  local a0='#[fg=#1a1b26 bg=#e8683a bold]  #I  #[fg=#e8683a bg=#2d2f3a]#[fg=#e0e0e0] #{p33:#{=33:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
+  local a1='#[fg=#1a1b26 bg=#e8683a bold]  #I  #[fg=#e8683a bg=#2d2f3a]#[fg=#e0e0e0] #{p20:#{=20:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
+  local a2='#[fg=#1a1b26 bg=#e8683a bold]  #I  #[fg=#e8683a bg=#2d2f3a]#[fg=#e0e0e0] #{p13:#{=13:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
+  local a3='#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e8683a bg=#2d2f3a]#[fg=#e0e0e0] #{p11:#{=11:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
+  local a4='#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e8683a bg=#2d2f3a]#[fg=#e0e0e0] #{p8:#{=8:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
+  local a5='#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e8683a bg=#2d2f3a]#[fg=#e0e0e0] #{p6:#{=6:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
+  local a6='#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e0e0e0 bg=#2d2f3a] #{p5:#{=5:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
+  local a7='#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e0e0e0 bg=#2d2f3a] #{p4:#{=4:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
+  local a8='#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e0e0e0 bg=#2d2f3a] #{p3:#{=3:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
+  local a9='#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e0e0e0 bg=#2d2f3a] #{p2:#{=2:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
+  local a10='#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e0e0e0 bg=#2d2f3a] #{p1:#{=1:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
+  local a11='#[fg=#1a1b26 bg=#e8683a bold]  #I  #[bg=#1a1b26 fg=#e8683a]'
+  local a12='#[fg=#1a1b26 bg=#e8683a bold] #I #[bg=#1a1b26 fg=#e8683a]'
+  local a13='#[fg=#e8683a bold]#I#[fg=default]'
+  # ── Inactive tab: gray text ─────────────────────────────────────────────
+  # "  I:" prefix = 4 visible chars (wide); " I:" = 3 chars (narrow)
+  # Width = prefix + pN(name) + 1(space)
+  local i0='#[fg=#8a8a8a]  #I:#{p35:#{=35:window_name}} '
+  local i1='#[fg=#8a8a8a]  #I:#{p22:#{=22:window_name}} '
+  local i2='#[fg=#8a8a8a]  #I:#{p15:#{=15:window_name}} '
+  local i3='#[fg=#8a8a8a] #I:#{p12:#{=12:window_name}} '
+  local i4='#[fg=#8a8a8a] #I:#{p9:#{=9:window_name}} '
+  local i5='#[fg=#8a8a8a] #I:#{p7:#{=7:window_name}} '
+  local i6='#[fg=#8a8a8a] #I:#{p6:#{=6:window_name}} '
+  local i7='#[fg=#8a8a8a] #I:#{p5:#{=5:window_name}} '
+  local i8='#[fg=#8a8a8a] #I:#{p4:#{=4:window_name}} '
+  local i9='#[fg=#8a8a8a] #I:#{p3:#{=3:window_name}} '
+  local i10='#[fg=#8a8a8a] #I:#{p2:#{=2:window_name}} '
+  local i11='#[fg=#8a8a8a] #I:#{p1:#{=1:window_name}} '
+  local i12='#[fg=#8a8a8a] #I '
+  local i13='#[fg=#565a6e]#I'
+  # ── Tier selection: budget = width / windows ─────────────────────────────
+  local budget='#{e|/|:#{client_width},#{session_windows}}'
+  local cp="#{?#{e|>=|:${budget},"
+  local cm='},'
+  local cs='}'
+  # 14 tiers: threshold = format width → minimal wasted space.
+  # 81-col fill: 2-11 wins >=95%, 12-16 wins >=86%.
+  #
+  #   Tier  >=Thr  Width  81-col example       Fill%
+  #   T0     40     40    2 wins (40ea)          98%
+  #   T1     27     27    3 wins (27ea)         100%
+  #   T2     20     20    4 wins (20ea)          98%
+  #   T3     16     16    5 wins (16ea)          98%
+  #   T4     13     13    6 wins (13ea)          96%
+  #   T5     11     11    7 wins (11ea)          95%
+  #   T6     10     10    8 wins (10ea)          98%
+  #   T7      9      9    9 wins (9ea)          100%
+  #   T8      8      8   10 wins (8ea)           98%
+  #   T9      7      7   11 wins (7ea)           95%
+  #   T10     6      6   12-13 wins            88-96%
+  #   T11     5      5   14-16 wins            86-98%
+  #   T12     3      3   17-27 wins
+  #   T13  fallback  1   28+ wins
+  local active="${cp}40${cm}${a0},${cp}27${cm}${a1},${cp}20${cm}${a2},${cp}16${cm}${a3},${cp}13${cm}${a4},${cp}11${cm}${a5},${cp}10${cm}${a6},${cp}9${cm}${a7},${cp}8${cm}${a8},${cp}7${cm}${a9},${cp}6${cm}${a10},${cp}5${cm}${a11},${cp}3${cm}${a12},${a13}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}"
+  local inactive="${cp}40${cm}${i0},${cp}27${cm}${i1},${cp}20${cm}${i2},${cp}16${cm}${i3},${cp}13${cm}${i4},${cp}11${cm}${i5},${cp}10${cm}${i6},${cp}9${cm}${i7},${cp}8${cm}${i8},${cp}7${cm}${i9},${cp}6${cm}${i10},${cp}5${cm}${i11},${cp}3${cm}${i12},${i13}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}"
+  echo "#[bg=#1a1b26]#{W:#{?window_active,${active},${inactive}}}"
+}
 # ══════════════════════════════════════════════════════════════════════════════
 # TMUX CONFIGURATION FUNCTION — applies theme, keybinds, and status bar
 # Defined early so --refresh can use it before any session logic
@@ -189,8 +264,11 @@ configure_tmux_session() {
   # Uses #() for live branch updates (runs on status-interval, every 30s)
   tmux set-option -t "$target_session" status-format[0] "#[bg=#1a1b26]  #[fg=#e8683a bold]#{s/claude-//:session_name}  #[fg=#3b4261]·  #[fg=#7aa2f7]󰘬 #(git -C #{pane_current_path} branch --show-current 2>/dev/null || echo '-')#[align=right]#[fg=#565a6e]Alt+h help  "
-  # Line 1 (bottom): Window tabs with smart truncation and brand color
-  tmux set-option -t "$target_session" status-format[1] "#[bg=#1a1b26]#{W:#{?window_active,#[fg=#1a1b26 bg=#e8683a bold]  #I  #[fg=#e8683a bg=#2d2f3a]#[fg=#e0e0e0] #{=15:window_name} #[bg=#1a1b26 fg=#2d2f3a],#[fg=#8a8a8a]  #I:#{=|8|...:window_name}  }}"
+  # Line 1 (bottom): Window tabs with dynamic compaction
+  # Tabs auto-shrink based on window count and terminal width
+  local tab_format
+  tab_format=$(build_tab_format)
+  tmux set-option -t "$target_session" status-format[1] "$tab_format"
   # Pane border styling - blue inactive, orange active
   tmux set-option -t "$target_session" pane-border-style "fg=#3d59a1"

package/scripts/context-loader.js CHANGED Viewed

@@ -39,15 +39,10 @@ try {
   // Feature flags not available
 }
-// Colors for output
-const c = {
-  dim: '\x1b[2m',
-  cyan: '\x1b[36m',
-  yellow: '\x1b[33m',
-  green: '\x1b[32m',
-  red: '\x1b[31m',
-  reset: '\x1b[0m',
-};
+// Colors for output (use shared color utilities)
+const { c } = require('../lib/colors');
+const { createLogger } = require('../lib/logger');
+const log = createLogger('context-loader');
 /**
  * Find project root by looking for .agileflow or .git directory

package/scripts/lib/browser-qa-evidence.js ADDED Viewed

@@ -0,0 +1,409 @@
+/**
+ * browser-qa-evidence.js - Screenshot evidence trail management
+ *
+ * Manages organized storage for agentic browser test evidence including
+ * screenshots, result metadata, and automatic retention cleanup.
+ *
+ * Storage Structure:
+ *   .agileflow/ui-review/
+ *   ├── specs/              # YAML test scenario definitions
+ *   ├── runs/               # Timestamped test run evidence
+ *   │   └── YYYY-MM-DD_HH-MM-SS/
+ *   │       ├── summary.json        # Aggregated run results
+ *   │       └── AGENTIC-001/        # Per-scenario evidence
+ *   │           ├── results.json    # Scenario results + metadata
+ *   │           ├── step-1-navigate.png
+ *   │           ├── step-2-click.png
+ *   │           └── step-3-assert_FAILED.png
+ *   └── baselines/          # Reference screenshots for visual diff
+ *
+ * Usage:
+ *   const evidence = require('./lib/browser-qa-evidence');
+ *   const runDir = evidence.createRunDirectory(projectRoot);
+ *   const scenarioDir = evidence.createScenarioDirectory(runDir, 'AGENTIC-001');
+ *   evidence.saveStepResult(scenarioDir, stepResult);
+ *   evidence.saveRunSummary(runDir, results);
+ *   evidence.cleanupOldRuns(projectRoot, 30); // 30-day retention
+ */
+const fs = require('fs');
+const path = require('path');
+const UI_REVIEW_DIR = '.agileflow/ui-review';
+const RUNS_DIR = 'runs';
+const SPECS_DIR = 'specs';
+const BASELINES_DIR = 'baselines';
+const DEFAULT_RETENTION_DAYS = 30;
+/**
+ * Get the base ui-review directory path
+ * @param {string} projectRoot - Project root directory
+ * @returns {string} Path to .agileflow/ui-review/
+ */
+function getBaseDir(projectRoot) {
+  return path.join(projectRoot, UI_REVIEW_DIR);
+}
+/**
+ * Ensure the ui-review directory structure exists
+ * @param {string} projectRoot - Project root directory
+ * @returns {{ base: string, runs: string, specs: string, baselines: string }}
+ */
+function ensureDirectoryStructure(projectRoot) {
+  const base = getBaseDir(projectRoot);
+  const runs = path.join(base, RUNS_DIR);
+  const specs = path.join(base, SPECS_DIR);
+  const baselines = path.join(base, BASELINES_DIR);
+  for (const dir of [base, runs, specs, baselines]) {
+    if (!fs.existsSync(dir)) {
+      fs.mkdirSync(dir, { recursive: true });
+    }
+  }
+  return { base, runs, specs, baselines };
+}
+/**
+ * Create a timestamped run directory for this test execution
+ * @param {string} projectRoot - Project root directory
+ * @param {Date} [timestamp] - Optional timestamp (defaults to now)
+ * @returns {string} Path to the new run directory
+ */
+function createRunDirectory(projectRoot, timestamp) {
+  const dirs = ensureDirectoryStructure(projectRoot);
+  const ts = timestamp || new Date();
+  const dirName = formatTimestamp(ts);
+  const runDir = path.join(dirs.runs, dirName);
+  if (!fs.existsSync(runDir)) {
+    fs.mkdirSync(runDir, { recursive: true });
+  }
+  return runDir;
+}
+/**
+ * Create a scenario-specific directory within a run
+ * @param {string} runDir - Path to the run directory
+ * @param {string} testId - Test ID (e.g., 'AGENTIC-001')
+ * @returns {string} Path to the scenario directory
+ */
+function createScenarioDirectory(runDir, testId) {
+  const scenarioDir = path.join(runDir, testId);
+  if (!fs.existsSync(scenarioDir)) {
+    fs.mkdirSync(scenarioDir, { recursive: true });
+  }
+  return scenarioDir;
+}
+/**
+ * Generate a screenshot filename for a test step
+ * @param {number} stepIndex - Zero-based step index
+ * @param {string} stepName - Human-readable step name
+ * @param {boolean} [failed=false] - Whether the step failed
+ * @returns {string} Filename like 'step-1-navigate.png' or 'step-1-navigate_FAILED.png'
+ */
+function getScreenshotFilename(stepIndex, stepName, failed) {
+  const slug = stepName
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-|-$/g, '')
+    .slice(0, 40);
+  const suffix = failed ? '_FAILED' : '';
+  return `step-${stepIndex + 1}-${slug}${suffix}.png`;
+}
+/**
+ * Save a step result to the scenario directory
+ * @param {string} scenarioDir - Path to the scenario directory
+ * @param {object} stepResult - Step execution result
+ * @param {number} stepResult.index - Zero-based step index
+ * @param {string} stepResult.name - Step name
+ * @param {'passed'|'failed'|'skipped'} stepResult.status - Step status
+ * @param {number} stepResult.duration_ms - Step duration in milliseconds
+ * @param {string} [stepResult.screenshot] - Screenshot filename (if captured)
+ * @param {string} [stepResult.error] - Error message (if failed)
+ * @param {'timeout'|'assertion'|'agent_error'|'infrastructure'} [stepResult.error_type] - Error classification
+ */
+function saveStepResult(scenarioDir, stepResult) {
+  const resultsPath = path.join(scenarioDir, 'results.json');
+  let results = { steps: [] };
+  if (fs.existsSync(resultsPath)) {
+    try {
+      results = JSON.parse(fs.readFileSync(resultsPath, 'utf-8'));
+    } catch {
+      results = { steps: [] };
+    }
+  }
+  results.steps.push(stepResult);
+  fs.writeFileSync(resultsPath, JSON.stringify(results, null, 2));
+}
+/**
+ * Save complete scenario results
+ * @param {string} scenarioDir - Path to the scenario directory
+ * @param {object} scenarioResult - Complete scenario result
+ * @param {string} scenarioResult.test_id - Test ID
+ * @param {string} [scenarioResult.story_id] - Associated story ID
+ * @param {string} scenarioResult.name - Scenario name
+ * @param {string} scenarioResult.timestamp - ISO timestamp
+ * @param {'validated'|'warning'|'failed'} scenarioResult.status - Overall status
+ * @param {number} scenarioResult.pass_rate - Pass rate (0-1)
+ * @param {number} scenarioResult.attempts - Total attempts
+ * @param {number} scenarioResult.successful_attempts - Successful attempts
+ * @param {Array} scenarioResult.steps - Step results array
+ */
+function saveScenarioResult(scenarioDir, scenarioResult) {
+  const resultsPath = path.join(scenarioDir, 'results.json');
+  fs.writeFileSync(resultsPath, JSON.stringify(scenarioResult, null, 2));
+}
+/**
+ * Save aggregated run summary across all scenarios
+ * @param {string} runDir - Path to the run directory
+ * @param {object} summary - Run summary
+ * @param {string} summary.timestamp - ISO timestamp
+ * @param {number} summary.total_scenarios - Total scenarios executed
+ * @param {number} summary.validated - Scenarios that passed (>=80%)
+ * @param {number} summary.warnings - Scenarios with warnings (70-79%)
+ * @param {number} summary.failed - Scenarios that failed (<70%)
+ * @param {Array} summary.scenarios - Individual scenario results
+ */
+function saveRunSummary(runDir, summary) {
+  const summaryPath = path.join(runDir, 'summary.json');
+  fs.writeFileSync(summaryPath, JSON.stringify(summary, null, 2));
+}
+/**
+ * Load a run summary
+ * @param {string} runDir - Path to the run directory
+ * @returns {object|null} Run summary or null if not found
+ */
+function loadRunSummary(runDir) {
+  const summaryPath = path.join(runDir, 'summary.json');
+  if (!fs.existsSync(summaryPath)) return null;
+  try {
+    return JSON.parse(fs.readFileSync(summaryPath, 'utf-8'));
+  } catch {
+    return null;
+  }
+}
+/**
+ * List all test runs, sorted by newest first
+ * @param {string} projectRoot - Project root directory
+ * @returns {Array<{ dir: string, timestamp: string, summary: object|null }>}
+ */
+function listRuns(projectRoot) {
+  const runsDir = path.join(getBaseDir(projectRoot), RUNS_DIR);
+  if (!fs.existsSync(runsDir)) return [];
+  return fs
+    .readdirSync(runsDir)
+    .filter(name => /^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}$/.test(name))
+    .sort()
+    .reverse()
+    .map(name => {
+      const dir = path.join(runsDir, name);
+      return {
+        dir,
+        timestamp: name,
+        summary: loadRunSummary(dir),
+      };
+    });
+}
+/**
+ * List available YAML test specs
+ * @param {string} projectRoot - Project root directory
+ * @returns {string[]} Array of spec file paths
+ */
+function listSpecs(projectRoot) {
+  const specsDir = path.join(getBaseDir(projectRoot), SPECS_DIR);
+  if (!fs.existsSync(specsDir)) return [];
+  const specs = [];
+  function walk(dir) {
+    for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
+      const fullPath = path.join(dir, entry.name);
+      if (entry.isDirectory()) {
+        walk(fullPath);
+      } else if (entry.name.endsWith('.yaml') || entry.name.endsWith('.yml')) {
+        specs.push(fullPath);
+      }
+    }
+  }
+  walk(specsDir);
+  return specs;
+}
+/**
+ * Clean up old test runs beyond the retention period
+ * @param {string} projectRoot - Project root directory
+ * @param {number} [retentionDays=30] - Number of days to retain evidence
+ * @returns {{ removed: number, kept: number, errors: string[] }}
+ */
+function cleanupOldRuns(projectRoot, retentionDays) {
+  const days = retentionDays || DEFAULT_RETENTION_DAYS;
+  const runsDir = path.join(getBaseDir(projectRoot), RUNS_DIR);
+  const cutoff = new Date();
+  cutoff.setDate(cutoff.getDate() - days);
+  const result = { removed: 0, kept: 0, errors: [] };
+  if (!fs.existsSync(runsDir)) return result;
+  const entries = fs.readdirSync(runsDir);
+  for (const name of entries) {
+    if (!/^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}$/.test(name)) continue;
+    const runDate = parseTimestamp(name);
+    if (!runDate) {
+      result.errors.push(`Invalid timestamp format: ${name}`);
+      continue;
+    }
+    if (runDate < cutoff) {
+      const runDir = path.join(runsDir, name);
+      try {
+        fs.rmSync(runDir, { recursive: true, force: true });
+        result.removed++;
+      } catch (err) {
+        result.errors.push(`Failed to remove ${name}: ${err.message}`);
+      }
+    } else {
+      result.kept++;
+    }
+  }
+  return result;
+}
+/**
+ * Calculate pass rate from multiple attempt results
+ * @param {number} successful - Number of successful attempts
+ * @param {number} total - Total attempts
+ * @returns {number} Pass rate between 0 and 1
+ */
+function calculatePassRate(successful, total) {
+  if (total === 0) return 0;
+  return successful / total;
+}
+/**
+ * Classify a pass rate into a status
+ * @param {number} passRate - Pass rate between 0 and 1
+ * @param {number} [threshold=0.80] - Validation threshold
+ * @returns {'validated'|'warning'|'failed'}
+ */
+function classifyPassRate(passRate, threshold) {
+  const t = threshold || 0.8;
+  // Use integer math to avoid floating point precision issues
+  const rate = Math.round(passRate * 1000);
+  const thresh = Math.round(t * 1000);
+  if (rate >= thresh) return 'validated';
+  if (rate >= thresh - 100) return 'warning';
+  return 'failed';
+}
+/**
+ * Classify an error for retry decisions
+ * @param {Error|string} error - The error that occurred
+ * @returns {'timeout'|'assertion'|'agent_error'|'infrastructure'}
+ */
+function classifyError(error) {
+  const msg = typeof error === 'string' ? error : error.message || '';
+  const lower = msg.toLowerCase();
+  if (
+    lower.includes('timeout') ||
+    lower.includes('timed out') ||
+    lower.includes('navigation timeout')
+  ) {
+    return 'timeout';
+  }
+  // Check infrastructure before assertion - "unexpectedly" contains "expect"
+  if (
+    lower.includes('econnrefused') ||
+    lower.includes('enotfound') ||
+    lower.includes('browser') ||
+    lower.includes('chromium')
+  ) {
+    return 'infrastructure';
+  }
+  if (
+    lower.includes('assert') ||
+    lower.includes('expected') ||
+    lower.includes('not found') ||
+    lower.includes('mismatch')
+  ) {
+    return 'assertion';
+  }
+  return 'agent_error';
+}
+/**
+ * Determine if an error type is retryable
+ * @param {'timeout'|'assertion'|'agent_error'|'infrastructure'} errorType
+ * @returns {boolean}
+ */
+function isRetryable(errorType) {
+  return errorType === 'timeout' || errorType === 'agent_error';
+}
+// --- Internal helpers ---
+function formatTimestamp(date) {
+  const pad = n => String(n).padStart(2, '0');
+  return [
+    date.getFullYear(),
+    '-',
+    pad(date.getMonth() + 1),
+    '-',
+    pad(date.getDate()),
+    '_',
+    pad(date.getHours()),
+    '-',
+    pad(date.getMinutes()),
+    '-',
+    pad(date.getSeconds()),
+  ].join('');
+}
+function parseTimestamp(str) {
+  const match = str.match(/^(\d{4})-(\d{2})-(\d{2})_(\d{2})-(\d{2})-(\d{2})$/);
+  if (!match) return null;
+  return new Date(
+    parseInt(match[1]),
+    parseInt(match[2]) - 1,
+    parseInt(match[3]),
+    parseInt(match[4]),
+    parseInt(match[5]),
+    parseInt(match[6])
+  );
+}
+module.exports = {
+  getBaseDir,
+  ensureDirectoryStructure,
+  createRunDirectory,
+  createScenarioDirectory,
+  getScreenshotFilename,
+  saveStepResult,
+  saveScenarioResult,
+  saveRunSummary,
+  loadRunSummary,
+  listRuns,
+  listSpecs,
+  cleanupOldRuns,
+  calculatePassRate,
+  classifyPassRate,
+  classifyError,
+  isRetryable,
+  // Constants
+  UI_REVIEW_DIR,
+  DEFAULT_RETENTION_DAYS,
+};