agileflow 3.1.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/CHANGELOG.md +10 -0
  2. package/README.md +57 -85
  3. package/lib/dashboard-automations.js +130 -0
  4. package/lib/dashboard-git.js +254 -0
  5. package/lib/dashboard-inbox.js +64 -0
  6. package/lib/dashboard-protocol.js +1 -0
  7. package/lib/dashboard-server.js +114 -924
  8. package/lib/dashboard-session.js +136 -0
  9. package/lib/dashboard-status.js +72 -0
  10. package/lib/dashboard-terminal.js +354 -0
  11. package/lib/dashboard-websocket.js +88 -0
  12. package/lib/drivers/codex-driver.ts +4 -4
  13. package/lib/logger.js +106 -0
  14. package/package.json +4 -2
  15. package/scripts/agileflow-configure.js +2 -2
  16. package/scripts/agileflow-welcome.js +409 -434
  17. package/scripts/claude-tmux.sh +80 -2
  18. package/scripts/context-loader.js +4 -9
  19. package/scripts/lib/browser-qa-evidence.js +409 -0
  20. package/scripts/lib/browser-qa-status.js +192 -0
  21. package/scripts/lib/command-prereqs.js +280 -0
  22. package/scripts/lib/configure-detect.js +92 -2
  23. package/scripts/lib/configure-features.js +295 -1
  24. package/scripts/lib/context-formatter.js +468 -233
  25. package/scripts/lib/context-loader.js +27 -15
  26. package/scripts/lib/damage-control-utils.js +8 -1
  27. package/scripts/lib/feature-catalog.js +321 -0
  28. package/scripts/lib/portable-tasks-cli.js +274 -0
  29. package/scripts/lib/portable-tasks.js +479 -0
  30. package/scripts/lib/signal-detectors.js +1 -1
  31. package/scripts/lib/team-events.js +86 -1
  32. package/scripts/obtain-context.js +28 -4
  33. package/scripts/smart-detect.js +17 -0
  34. package/scripts/strip-ai-attribution.js +63 -0
  35. package/scripts/team-manager.js +7 -2
  36. package/scripts/welcome-deferred.js +437 -0
  37. package/src/core/agents/browser-qa.md +328 -0
  38. package/src/core/agents/perf-analyzer-assets.md +174 -0
  39. package/src/core/agents/perf-analyzer-bundle.md +165 -0
  40. package/src/core/agents/perf-analyzer-caching.md +160 -0
  41. package/src/core/agents/perf-analyzer-compute.md +165 -0
  42. package/src/core/agents/perf-analyzer-memory.md +182 -0
  43. package/src/core/agents/perf-analyzer-network.md +157 -0
  44. package/src/core/agents/perf-analyzer-queries.md +155 -0
  45. package/src/core/agents/perf-analyzer-rendering.md +156 -0
  46. package/src/core/agents/perf-consensus.md +280 -0
  47. package/src/core/agents/security-analyzer-api.md +199 -0
  48. package/src/core/agents/security-analyzer-auth.md +160 -0
  49. package/src/core/agents/security-analyzer-authz.md +168 -0
  50. package/src/core/agents/security-analyzer-deps.md +147 -0
  51. package/src/core/agents/security-analyzer-infra.md +176 -0
  52. package/src/core/agents/security-analyzer-injection.md +148 -0
  53. package/src/core/agents/security-analyzer-input.md +191 -0
  54. package/src/core/agents/security-analyzer-secrets.md +175 -0
  55. package/src/core/agents/security-consensus.md +276 -0
  56. package/src/core/agents/test-analyzer-assertions.md +181 -0
  57. package/src/core/agents/test-analyzer-coverage.md +183 -0
  58. package/src/core/agents/test-analyzer-fragility.md +185 -0
  59. package/src/core/agents/test-analyzer-integration.md +155 -0
  60. package/src/core/agents/test-analyzer-maintenance.md +173 -0
  61. package/src/core/agents/test-analyzer-mocking.md +178 -0
  62. package/src/core/agents/test-analyzer-patterns.md +189 -0
  63. package/src/core/agents/test-analyzer-structure.md +177 -0
  64. package/src/core/agents/test-consensus.md +294 -0
  65. package/src/core/commands/{legal/audit.md → audit/legal.md} +13 -13
  66. package/src/core/commands/{logic/audit.md → audit/logic.md} +12 -12
  67. package/src/core/commands/audit/performance.md +443 -0
  68. package/src/core/commands/audit/security.md +443 -0
  69. package/src/core/commands/audit/test.md +442 -0
  70. package/src/core/commands/babysit.md +505 -463
  71. package/src/core/commands/browser-qa.md +240 -0
  72. package/src/core/commands/configure.md +8 -8
  73. package/src/core/commands/research/ask.md +42 -9
  74. package/src/core/commands/research/import.md +14 -8
  75. package/src/core/commands/research/list.md +17 -16
  76. package/src/core/commands/research/synthesize.md +8 -8
  77. package/src/core/commands/research/view.md +28 -4
  78. package/src/core/commands/whats-new.md +2 -2
  79. package/src/core/experts/devops/expertise.yaml +13 -2
  80. package/src/core/experts/documentation/expertise.yaml +26 -4
  81. package/src/core/profiles/COMPARISON.md +170 -0
  82. package/src/core/profiles/README.md +178 -0
  83. package/src/core/profiles/claude-code.yaml +111 -0
  84. package/src/core/profiles/codex.yaml +103 -0
  85. package/src/core/profiles/cursor.yaml +134 -0
  86. package/src/core/profiles/examples.js +250 -0
  87. package/src/core/profiles/loader.js +235 -0
  88. package/src/core/profiles/windsurf.yaml +159 -0
  89. package/src/core/teams/logic-audit.json +6 -0
  90. package/src/core/teams/perf-audit.json +71 -0
  91. package/src/core/teams/security-audit.json +71 -0
  92. package/src/core/teams/test-audit.json +71 -0
  93. package/src/core/templates/browser-qa-spec.yaml +94 -0
  94. package/src/core/templates/command-prerequisites.yaml +169 -0
  95. package/src/core/templates/damage-control-patterns.yaml +9 -0
  96. package/tools/cli/installers/ide/_base-ide.js +33 -3
  97. package/tools/cli/installers/ide/claude-code.js +2 -69
  98. package/tools/cli/installers/ide/codex.js +9 -9
  99. package/tools/cli/installers/ide/cursor.js +165 -4
  100. package/tools/cli/installers/ide/windsurf.js +237 -6
  101. package/tools/cli/lib/content-transformer.js +234 -9
  102. package/tools/cli/lib/docs-setup.js +1 -1
  103. package/tools/cli/lib/ide-generator.js +357 -0
  104. package/tools/cli/lib/ide-registry.js +2 -2
  105. package/scripts/tmux-task-name.sh +0 -105
  106. package/scripts/tmux-task-watcher.sh +0 -344
@@ -155,6 +155,81 @@ if command -v tmux &> /dev/null; then
155
155
  unset _TMUX_BASE _TMUX_SOCK_DIR
156
156
  fi
157
157
 
158
+ # ══════════════════════════════════════════════════════════════════════════════
159
+ # TAB FORMAT BUILDER — dynamic compaction based on window count & terminal width
160
+ # Uses tmux 3.2+ #{e|...} numeric operators for cascading tier selection
161
+ # ══════════════════════════════════════════════════════════════════════════════
162
+ build_tab_format() {
163
+ # Chrome-like tab compaction: 14 tiers with threshold = width for minimal waste.
164
+ # Per-window budget (width/windows) picks the largest tier that fits.
165
+ # #{pN:#{=N:var}} = exactly N visible chars (truncate long + pad short names).
166
+
167
+ # ── Active tab: orange bg index + dark bg name ──────────────────────────
168
+ # " I " prefix = 5 visible chars (wide); " I " = 3 chars (narrow)
169
+ # Width = prefix + 1(space) + pN(name) + 1(space)
170
+ local a0='#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e8683a bg=#2d2f3a]#[fg=#e0e0e0] #{p33:#{=33:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
171
+ local a1='#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e8683a bg=#2d2f3a]#[fg=#e0e0e0] #{p20:#{=20:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
172
+ local a2='#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e8683a bg=#2d2f3a]#[fg=#e0e0e0] #{p13:#{=13:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
173
+ local a3='#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e8683a bg=#2d2f3a]#[fg=#e0e0e0] #{p11:#{=11:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
174
+ local a4='#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e8683a bg=#2d2f3a]#[fg=#e0e0e0] #{p8:#{=8:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
175
+ local a5='#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e8683a bg=#2d2f3a]#[fg=#e0e0e0] #{p6:#{=6:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
176
+ local a6='#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e0e0e0 bg=#2d2f3a] #{p5:#{=5:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
177
+ local a7='#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e0e0e0 bg=#2d2f3a] #{p4:#{=4:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
178
+ local a8='#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e0e0e0 bg=#2d2f3a] #{p3:#{=3:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
179
+ local a9='#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e0e0e0 bg=#2d2f3a] #{p2:#{=2:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
180
+ local a10='#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e0e0e0 bg=#2d2f3a] #{p1:#{=1:window_name}} #[bg=#1a1b26 fg=#2d2f3a]'
181
+ local a11='#[fg=#1a1b26 bg=#e8683a bold] #I #[bg=#1a1b26 fg=#e8683a]'
182
+ local a12='#[fg=#1a1b26 bg=#e8683a bold] #I #[bg=#1a1b26 fg=#e8683a]'
183
+ local a13='#[fg=#e8683a bold]#I#[fg=default]'
184
+
185
+ # ── Inactive tab: gray text ─────────────────────────────────────────────
186
+ # " I:" prefix = 4 visible chars (wide); " I:" = 3 chars (narrow)
187
+ # Width = prefix + pN(name) + 1(space)
188
+ local i0='#[fg=#8a8a8a] #I:#{p35:#{=35:window_name}} '
189
+ local i1='#[fg=#8a8a8a] #I:#{p22:#{=22:window_name}} '
190
+ local i2='#[fg=#8a8a8a] #I:#{p15:#{=15:window_name}} '
191
+ local i3='#[fg=#8a8a8a] #I:#{p12:#{=12:window_name}} '
192
+ local i4='#[fg=#8a8a8a] #I:#{p9:#{=9:window_name}} '
193
+ local i5='#[fg=#8a8a8a] #I:#{p7:#{=7:window_name}} '
194
+ local i6='#[fg=#8a8a8a] #I:#{p6:#{=6:window_name}} '
195
+ local i7='#[fg=#8a8a8a] #I:#{p5:#{=5:window_name}} '
196
+ local i8='#[fg=#8a8a8a] #I:#{p4:#{=4:window_name}} '
197
+ local i9='#[fg=#8a8a8a] #I:#{p3:#{=3:window_name}} '
198
+ local i10='#[fg=#8a8a8a] #I:#{p2:#{=2:window_name}} '
199
+ local i11='#[fg=#8a8a8a] #I:#{p1:#{=1:window_name}} '
200
+ local i12='#[fg=#8a8a8a] #I '
201
+ local i13='#[fg=#565a6e]#I'
202
+
203
+ # ── Tier selection: budget = width / windows ─────────────────────────────
204
+ local budget='#{e|/|:#{client_width},#{session_windows}}'
205
+ local cp="#{?#{e|>=|:${budget},"
206
+ local cm='},'
207
+ local cs='}'
208
+
209
+ # 14 tiers: threshold = format width → minimal wasted space.
210
+ # 81-col fill: 2-11 wins >=95%, 12-16 wins >=86%.
211
+ #
212
+ # Tier >=Thr Width 81-col example Fill%
213
+ # T0 40 40 2 wins (40ea) 98%
214
+ # T1 27 27 3 wins (27ea) 100%
215
+ # T2 20 20 4 wins (20ea) 98%
216
+ # T3 16 16 5 wins (16ea) 98%
217
+ # T4 13 13 6 wins (13ea) 96%
218
+ # T5 11 11 7 wins (11ea) 95%
219
+ # T6 10 10 8 wins (10ea) 98%
220
+ # T7 9 9 9 wins (9ea) 100%
221
+ # T8 8 8 10 wins (8ea) 98%
222
+ # T9 7 7 11 wins (7ea) 95%
223
+ # T10 6 6 12-13 wins 88-96%
224
+ # T11 5 5 14-16 wins 86-98%
225
+ # T12 3 3 17-27 wins
226
+ # T13 fallback 1 28+ wins
227
+ local active="${cp}40${cm}${a0},${cp}27${cm}${a1},${cp}20${cm}${a2},${cp}16${cm}${a3},${cp}13${cm}${a4},${cp}11${cm}${a5},${cp}10${cm}${a6},${cp}9${cm}${a7},${cp}8${cm}${a8},${cp}7${cm}${a9},${cp}6${cm}${a10},${cp}5${cm}${a11},${cp}3${cm}${a12},${a13}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}"
228
+ local inactive="${cp}40${cm}${i0},${cp}27${cm}${i1},${cp}20${cm}${i2},${cp}16${cm}${i3},${cp}13${cm}${i4},${cp}11${cm}${i5},${cp}10${cm}${i6},${cp}9${cm}${i7},${cp}8${cm}${i8},${cp}7${cm}${i9},${cp}6${cm}${i10},${cp}5${cm}${i11},${cp}3${cm}${i12},${i13}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}${cs}"
229
+
230
+ echo "#[bg=#1a1b26]#{W:#{?window_active,${active},${inactive}}}"
231
+ }
232
+
158
233
  # ══════════════════════════════════════════════════════════════════════════════
159
234
  # TMUX CONFIGURATION FUNCTION — applies theme, keybinds, and status bar
160
235
  # Defined early so --refresh can use it before any session logic
@@ -189,8 +264,11 @@ configure_tmux_session() {
189
264
  # Uses #() for live branch updates (runs on status-interval, every 30s)
190
265
  tmux set-option -t "$target_session" status-format[0] "#[bg=#1a1b26] #[fg=#e8683a bold]#{s/claude-//:session_name} #[fg=#3b4261]· #[fg=#7aa2f7]󰘬 #(git -C #{pane_current_path} branch --show-current 2>/dev/null || echo '-')#[align=right]#[fg=#565a6e]Alt+h help "
191
266
 
192
- # Line 1 (bottom): Window tabs with smart truncation and brand color
193
- tmux set-option -t "$target_session" status-format[1] "#[bg=#1a1b26]#{W:#{?window_active,#[fg=#1a1b26 bg=#e8683a bold] #I #[fg=#e8683a bg=#2d2f3a]#[fg=#e0e0e0] #{=15:window_name} #[bg=#1a1b26 fg=#2d2f3a],#[fg=#8a8a8a] #I:#{=|8|...:window_name} }}"
267
+ # Line 1 (bottom): Window tabs with dynamic compaction
268
+ # Tabs auto-shrink based on window count and terminal width
269
+ local tab_format
270
+ tab_format=$(build_tab_format)
271
+ tmux set-option -t "$target_session" status-format[1] "$tab_format"
194
272
 
195
273
  # Pane border styling - blue inactive, orange active
196
274
  tmux set-option -t "$target_session" pane-border-style "fg=#3d59a1"
@@ -39,15 +39,10 @@ try {
39
39
  // Feature flags not available
40
40
  }
41
41
 
42
- // Colors for output
43
- const c = {
44
- dim: '\x1b[2m',
45
- cyan: '\x1b[36m',
46
- yellow: '\x1b[33m',
47
- green: '\x1b[32m',
48
- red: '\x1b[31m',
49
- reset: '\x1b[0m',
50
- };
42
+ // Colors for output (use shared color utilities)
43
+ const { c } = require('../lib/colors');
44
+ const { createLogger } = require('../lib/logger');
45
+ const log = createLogger('context-loader');
51
46
 
52
47
  /**
53
48
  * Find project root by looking for .agileflow or .git directory
@@ -0,0 +1,409 @@
1
+ /**
2
+ * browser-qa-evidence.js - Screenshot evidence trail management
3
+ *
4
+ * Manages organized storage for agentic browser test evidence including
5
+ * screenshots, result metadata, and automatic retention cleanup.
6
+ *
7
+ * Storage Structure:
8
+ * .agileflow/ui-review/
9
+ * ├── specs/ # YAML test scenario definitions
10
+ * ├── runs/ # Timestamped test run evidence
11
+ * │ └── YYYY-MM-DD_HH-MM-SS/
12
+ * │ ├── summary.json # Aggregated run results
13
+ * │ └── AGENTIC-001/ # Per-scenario evidence
14
+ * │ ├── results.json # Scenario results + metadata
15
+ * │ ├── step-1-navigate.png
16
+ * │ ├── step-2-click.png
17
+ * │ └── step-3-assert_FAILED.png
18
+ * └── baselines/ # Reference screenshots for visual diff
19
+ *
20
+ * Usage:
21
+ * const evidence = require('./lib/browser-qa-evidence');
22
+ * const runDir = evidence.createRunDirectory(projectRoot);
23
+ * const scenarioDir = evidence.createScenarioDirectory(runDir, 'AGENTIC-001');
24
+ * evidence.saveStepResult(scenarioDir, stepResult);
25
+ * evidence.saveRunSummary(runDir, results);
26
+ * evidence.cleanupOldRuns(projectRoot, 30); // 30-day retention
27
+ */
28
+
29
+ const fs = require('fs');
30
+ const path = require('path');
31
+
32
+ const UI_REVIEW_DIR = '.agileflow/ui-review';
33
+ const RUNS_DIR = 'runs';
34
+ const SPECS_DIR = 'specs';
35
+ const BASELINES_DIR = 'baselines';
36
+ const DEFAULT_RETENTION_DAYS = 30;
37
+
38
+ /**
39
+ * Get the base ui-review directory path
40
+ * @param {string} projectRoot - Project root directory
41
+ * @returns {string} Path to .agileflow/ui-review/
42
+ */
43
+ function getBaseDir(projectRoot) {
44
+ return path.join(projectRoot, UI_REVIEW_DIR);
45
+ }
46
+
47
+ /**
48
+ * Ensure the ui-review directory structure exists
49
+ * @param {string} projectRoot - Project root directory
50
+ * @returns {{ base: string, runs: string, specs: string, baselines: string }}
51
+ */
52
+ function ensureDirectoryStructure(projectRoot) {
53
+ const base = getBaseDir(projectRoot);
54
+ const runs = path.join(base, RUNS_DIR);
55
+ const specs = path.join(base, SPECS_DIR);
56
+ const baselines = path.join(base, BASELINES_DIR);
57
+
58
+ for (const dir of [base, runs, specs, baselines]) {
59
+ if (!fs.existsSync(dir)) {
60
+ fs.mkdirSync(dir, { recursive: true });
61
+ }
62
+ }
63
+
64
+ return { base, runs, specs, baselines };
65
+ }
66
+
67
+ /**
68
+ * Create a timestamped run directory for this test execution
69
+ * @param {string} projectRoot - Project root directory
70
+ * @param {Date} [timestamp] - Optional timestamp (defaults to now)
71
+ * @returns {string} Path to the new run directory
72
+ */
73
+ function createRunDirectory(projectRoot, timestamp) {
74
+ const dirs = ensureDirectoryStructure(projectRoot);
75
+ const ts = timestamp || new Date();
76
+ const dirName = formatTimestamp(ts);
77
+ const runDir = path.join(dirs.runs, dirName);
78
+
79
+ if (!fs.existsSync(runDir)) {
80
+ fs.mkdirSync(runDir, { recursive: true });
81
+ }
82
+
83
+ return runDir;
84
+ }
85
+
86
+ /**
87
+ * Create a scenario-specific directory within a run
88
+ * @param {string} runDir - Path to the run directory
89
+ * @param {string} testId - Test ID (e.g., 'AGENTIC-001')
90
+ * @returns {string} Path to the scenario directory
91
+ */
92
+ function createScenarioDirectory(runDir, testId) {
93
+ const scenarioDir = path.join(runDir, testId);
94
+ if (!fs.existsSync(scenarioDir)) {
95
+ fs.mkdirSync(scenarioDir, { recursive: true });
96
+ }
97
+ return scenarioDir;
98
+ }
99
+
100
+ /**
101
+ * Generate a screenshot filename for a test step
102
+ * @param {number} stepIndex - Zero-based step index
103
+ * @param {string} stepName - Human-readable step name
104
+ * @param {boolean} [failed=false] - Whether the step failed
105
+ * @returns {string} Filename like 'step-1-navigate.png' or 'step-1-navigate_FAILED.png'
106
+ */
107
+ function getScreenshotFilename(stepIndex, stepName, failed) {
108
+ const slug = stepName
109
+ .toLowerCase()
110
+ .replace(/[^a-z0-9]+/g, '-')
111
+ .replace(/^-|-$/g, '')
112
+ .slice(0, 40);
113
+ const suffix = failed ? '_FAILED' : '';
114
+ return `step-${stepIndex + 1}-${slug}${suffix}.png`;
115
+ }
116
+
117
+ /**
118
+ * Save a step result to the scenario directory
119
+ * @param {string} scenarioDir - Path to the scenario directory
120
+ * @param {object} stepResult - Step execution result
121
+ * @param {number} stepResult.index - Zero-based step index
122
+ * @param {string} stepResult.name - Step name
123
+ * @param {'passed'|'failed'|'skipped'} stepResult.status - Step status
124
+ * @param {number} stepResult.duration_ms - Step duration in milliseconds
125
+ * @param {string} [stepResult.screenshot] - Screenshot filename (if captured)
126
+ * @param {string} [stepResult.error] - Error message (if failed)
127
+ * @param {'timeout'|'assertion'|'agent_error'|'infrastructure'} [stepResult.error_type] - Error classification
128
+ */
129
+ function saveStepResult(scenarioDir, stepResult) {
130
+ const resultsPath = path.join(scenarioDir, 'results.json');
131
+ let results = { steps: [] };
132
+
133
+ if (fs.existsSync(resultsPath)) {
134
+ try {
135
+ results = JSON.parse(fs.readFileSync(resultsPath, 'utf-8'));
136
+ } catch {
137
+ results = { steps: [] };
138
+ }
139
+ }
140
+
141
+ results.steps.push(stepResult);
142
+ fs.writeFileSync(resultsPath, JSON.stringify(results, null, 2));
143
+ }
144
+
145
+ /**
146
+ * Save complete scenario results
147
+ * @param {string} scenarioDir - Path to the scenario directory
148
+ * @param {object} scenarioResult - Complete scenario result
149
+ * @param {string} scenarioResult.test_id - Test ID
150
+ * @param {string} [scenarioResult.story_id] - Associated story ID
151
+ * @param {string} scenarioResult.name - Scenario name
152
+ * @param {string} scenarioResult.timestamp - ISO timestamp
153
+ * @param {'validated'|'warning'|'failed'} scenarioResult.status - Overall status
154
+ * @param {number} scenarioResult.pass_rate - Pass rate (0-1)
155
+ * @param {number} scenarioResult.attempts - Total attempts
156
+ * @param {number} scenarioResult.successful_attempts - Successful attempts
157
+ * @param {Array} scenarioResult.steps - Step results array
158
+ */
159
+ function saveScenarioResult(scenarioDir, scenarioResult) {
160
+ const resultsPath = path.join(scenarioDir, 'results.json');
161
+ fs.writeFileSync(resultsPath, JSON.stringify(scenarioResult, null, 2));
162
+ }
163
+
164
+ /**
165
+ * Save aggregated run summary across all scenarios
166
+ * @param {string} runDir - Path to the run directory
167
+ * @param {object} summary - Run summary
168
+ * @param {string} summary.timestamp - ISO timestamp
169
+ * @param {number} summary.total_scenarios - Total scenarios executed
170
+ * @param {number} summary.validated - Scenarios that passed (>=80%)
171
+ * @param {number} summary.warnings - Scenarios with warnings (70-79%)
172
+ * @param {number} summary.failed - Scenarios that failed (<70%)
173
+ * @param {Array} summary.scenarios - Individual scenario results
174
+ */
175
+ function saveRunSummary(runDir, summary) {
176
+ const summaryPath = path.join(runDir, 'summary.json');
177
+ fs.writeFileSync(summaryPath, JSON.stringify(summary, null, 2));
178
+ }
179
+
180
+ /**
181
+ * Load a run summary
182
+ * @param {string} runDir - Path to the run directory
183
+ * @returns {object|null} Run summary or null if not found
184
+ */
185
+ function loadRunSummary(runDir) {
186
+ const summaryPath = path.join(runDir, 'summary.json');
187
+ if (!fs.existsSync(summaryPath)) return null;
188
+ try {
189
+ return JSON.parse(fs.readFileSync(summaryPath, 'utf-8'));
190
+ } catch {
191
+ return null;
192
+ }
193
+ }
194
+
195
+ /**
196
+ * List all test runs, sorted by newest first
197
+ * @param {string} projectRoot - Project root directory
198
+ * @returns {Array<{ dir: string, timestamp: string, summary: object|null }>}
199
+ */
200
+ function listRuns(projectRoot) {
201
+ const runsDir = path.join(getBaseDir(projectRoot), RUNS_DIR);
202
+ if (!fs.existsSync(runsDir)) return [];
203
+
204
+ return fs
205
+ .readdirSync(runsDir)
206
+ .filter(name => /^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}$/.test(name))
207
+ .sort()
208
+ .reverse()
209
+ .map(name => {
210
+ const dir = path.join(runsDir, name);
211
+ return {
212
+ dir,
213
+ timestamp: name,
214
+ summary: loadRunSummary(dir),
215
+ };
216
+ });
217
+ }
218
+
219
+ /**
220
+ * List available YAML test specs
221
+ * @param {string} projectRoot - Project root directory
222
+ * @returns {string[]} Array of spec file paths
223
+ */
224
+ function listSpecs(projectRoot) {
225
+ const specsDir = path.join(getBaseDir(projectRoot), SPECS_DIR);
226
+ if (!fs.existsSync(specsDir)) return [];
227
+
228
+ const specs = [];
229
+ function walk(dir) {
230
+ for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
231
+ const fullPath = path.join(dir, entry.name);
232
+ if (entry.isDirectory()) {
233
+ walk(fullPath);
234
+ } else if (entry.name.endsWith('.yaml') || entry.name.endsWith('.yml')) {
235
+ specs.push(fullPath);
236
+ }
237
+ }
238
+ }
239
+ walk(specsDir);
240
+ return specs;
241
+ }
242
+
243
+ /**
244
+ * Clean up old test runs beyond the retention period
245
+ * @param {string} projectRoot - Project root directory
246
+ * @param {number} [retentionDays=30] - Number of days to retain evidence
247
+ * @returns {{ removed: number, kept: number, errors: string[] }}
248
+ */
249
+ function cleanupOldRuns(projectRoot, retentionDays) {
250
+ const days = retentionDays || DEFAULT_RETENTION_DAYS;
251
+ const runsDir = path.join(getBaseDir(projectRoot), RUNS_DIR);
252
+ const cutoff = new Date();
253
+ cutoff.setDate(cutoff.getDate() - days);
254
+
255
+ const result = { removed: 0, kept: 0, errors: [] };
256
+
257
+ if (!fs.existsSync(runsDir)) return result;
258
+
259
+ const entries = fs.readdirSync(runsDir);
260
+ for (const name of entries) {
261
+ if (!/^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}$/.test(name)) continue;
262
+
263
+ const runDate = parseTimestamp(name);
264
+ if (!runDate) {
265
+ result.errors.push(`Invalid timestamp format: ${name}`);
266
+ continue;
267
+ }
268
+
269
+ if (runDate < cutoff) {
270
+ const runDir = path.join(runsDir, name);
271
+ try {
272
+ fs.rmSync(runDir, { recursive: true, force: true });
273
+ result.removed++;
274
+ } catch (err) {
275
+ result.errors.push(`Failed to remove ${name}: ${err.message}`);
276
+ }
277
+ } else {
278
+ result.kept++;
279
+ }
280
+ }
281
+
282
+ return result;
283
+ }
284
+
285
+ /**
286
+ * Calculate pass rate from multiple attempt results
287
+ * @param {number} successful - Number of successful attempts
288
+ * @param {number} total - Total attempts
289
+ * @returns {number} Pass rate between 0 and 1
290
+ */
291
+ function calculatePassRate(successful, total) {
292
+ if (total === 0) return 0;
293
+ return successful / total;
294
+ }
295
+
296
+ /**
297
+ * Classify a pass rate into a status
298
+ * @param {number} passRate - Pass rate between 0 and 1
299
+ * @param {number} [threshold=0.80] - Validation threshold
300
+ * @returns {'validated'|'warning'|'failed'}
301
+ */
302
+ function classifyPassRate(passRate, threshold) {
303
+ const t = threshold || 0.8;
304
+ // Use integer math to avoid floating point precision issues
305
+ const rate = Math.round(passRate * 1000);
306
+ const thresh = Math.round(t * 1000);
307
+ if (rate >= thresh) return 'validated';
308
+ if (rate >= thresh - 100) return 'warning';
309
+ return 'failed';
310
+ }
311
+
312
+ /**
313
+ * Classify an error for retry decisions
314
+ * @param {Error|string} error - The error that occurred
315
+ * @returns {'timeout'|'assertion'|'agent_error'|'infrastructure'}
316
+ */
317
+ function classifyError(error) {
318
+ const msg = typeof error === 'string' ? error : error.message || '';
319
+ const lower = msg.toLowerCase();
320
+
321
+ if (
322
+ lower.includes('timeout') ||
323
+ lower.includes('timed out') ||
324
+ lower.includes('navigation timeout')
325
+ ) {
326
+ return 'timeout';
327
+ }
328
+ // Check infrastructure before assertion - "unexpectedly" contains "expect"
329
+ if (
330
+ lower.includes('econnrefused') ||
331
+ lower.includes('enotfound') ||
332
+ lower.includes('browser') ||
333
+ lower.includes('chromium')
334
+ ) {
335
+ return 'infrastructure';
336
+ }
337
+ if (
338
+ lower.includes('assert') ||
339
+ lower.includes('expected') ||
340
+ lower.includes('not found') ||
341
+ lower.includes('mismatch')
342
+ ) {
343
+ return 'assertion';
344
+ }
345
+ return 'agent_error';
346
+ }
347
+
348
+ /**
349
+ * Determine if an error type is retryable
350
+ * @param {'timeout'|'assertion'|'agent_error'|'infrastructure'} errorType
351
+ * @returns {boolean}
352
+ */
353
+ function isRetryable(errorType) {
354
+ return errorType === 'timeout' || errorType === 'agent_error';
355
+ }
356
+
357
+ // --- Internal helpers ---
358
+
359
+ function formatTimestamp(date) {
360
+ const pad = n => String(n).padStart(2, '0');
361
+ return [
362
+ date.getFullYear(),
363
+ '-',
364
+ pad(date.getMonth() + 1),
365
+ '-',
366
+ pad(date.getDate()),
367
+ '_',
368
+ pad(date.getHours()),
369
+ '-',
370
+ pad(date.getMinutes()),
371
+ '-',
372
+ pad(date.getSeconds()),
373
+ ].join('');
374
+ }
375
+
376
+ function parseTimestamp(str) {
377
+ const match = str.match(/^(\d{4})-(\d{2})-(\d{2})_(\d{2})-(\d{2})-(\d{2})$/);
378
+ if (!match) return null;
379
+ return new Date(
380
+ parseInt(match[1]),
381
+ parseInt(match[2]) - 1,
382
+ parseInt(match[3]),
383
+ parseInt(match[4]),
384
+ parseInt(match[5]),
385
+ parseInt(match[6])
386
+ );
387
+ }
388
+
389
+ module.exports = {
390
+ getBaseDir,
391
+ ensureDirectoryStructure,
392
+ createRunDirectory,
393
+ createScenarioDirectory,
394
+ getScreenshotFilename,
395
+ saveStepResult,
396
+ saveScenarioResult,
397
+ saveRunSummary,
398
+ loadRunSummary,
399
+ listRuns,
400
+ listSpecs,
401
+ cleanupOldRuns,
402
+ calculatePassRate,
403
+ classifyPassRate,
404
+ classifyError,
405
+ isRetryable,
406
+ // Constants
407
+ UI_REVIEW_DIR,
408
+ DEFAULT_RETENTION_DAYS,
409
+ };