@nerviq/cli 1.8.5 → 1.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/benchmark.js CHANGED
@@ -1,346 +1,346 @@
1
- const fs = require('fs');
2
- const os = require('os');
3
- const path = require('path');
4
-
5
- const { version } = require('../package.json');
6
- const { audit } = require('./audit');
7
- const { setup } = require('./setup');
8
- const { analyzeProject } = require('./analyze');
9
- const { getGovernanceSummary } = require('./governance');
10
-
11
- function copyProject(sourceDir, targetDir) {
12
- fs.mkdirSync(targetDir, { recursive: true });
13
- const entries = fs.readdirSync(sourceDir, { withFileTypes: true });
14
- for (const entry of entries) {
15
- if (entry.name === '.git' || entry.name === 'node_modules' || entry.name === '__pycache__') {
16
- continue;
17
- }
18
- const from = path.join(sourceDir, entry.name);
19
- const to = path.join(targetDir, entry.name);
20
- if (entry.isDirectory()) {
21
- copyProject(from, to);
22
- } else if (entry.isFile()) {
23
- fs.copyFileSync(from, to);
24
- } else if (entry.isSymbolicLink && entry.isSymbolicLink()) {
25
- // Symlinks are skipped in benchmark sandbox — log for awareness
26
- process.stderr.write(` Note: symlink skipped in benchmark: ${entry.name}\n`);
27
- }
28
- }
29
- }
30
-
31
- function summarizeAudit(result) {
32
- return {
33
- score: result.score,
34
- organicScore: result.organicScore,
35
- passed: result.passed,
36
- failed: result.failed,
37
- checkCount: result.checkCount,
38
- quickWins: result.quickWins,
39
- };
40
- }
41
-
42
- function buildWorkflowEvidence(before, after, analysisReport, governanceSummary) {
43
- const tasks = [
44
- {
45
- key: 'discover-without-writes',
46
- label: 'Discover next actions without writing files',
47
- passed: before.checkCount > 0 && Array.isArray(before.quickWins),
48
- evidence: `Baseline audit returned ${before.checkCount} applicable checks and ${before.quickWins.length} quick wins.`,
49
- },
50
- {
51
- key: 'starter-safe-improvement',
52
- label: 'Apply starter-safe improvements in isolation',
53
- passed: after.score >= before.score && after.failed <= before.failed,
54
- evidence: `Score moved ${before.score} -> ${after.score}; failed checks moved ${before.failed} -> ${after.failed}.`,
55
- },
56
- {
57
- key: 'governed-rollout-surface',
58
- label: 'Expose governed rollout controls',
59
- passed: governanceSummary.permissionProfiles.length >= 3 && governanceSummary.hookRegistry.length >= 1,
60
- evidence: `${governanceSummary.permissionProfiles.length} profiles and ${governanceSummary.hookRegistry.length} governed hooks available.`,
61
- },
62
- {
63
- key: 'domain-pack-guidance',
64
- label: 'Recommend a domain pack for the repo',
65
- passed: analysisReport.recommendedDomainPacks.length > 0,
66
- evidence: analysisReport.recommendedDomainPacks.map(pack => pack.label).join(', ') || 'No domain pack recommendation generated.',
67
- },
68
- {
69
- key: 'mcp-pack-guidance',
70
- label: 'Recommend MCP packs when appropriate',
71
- passed: analysisReport.recommendedMcpPacks.length > 0,
72
- evidence: analysisReport.recommendedMcpPacks.map(pack => pack.label).join(', ') || 'No MCP pack recommendation generated.',
73
- },
74
- ];
75
-
76
- const passed = tasks.filter(task => task.passed).length;
77
- const total = tasks.length;
78
- return {
79
- taskPack: 'maintainer-core',
80
- tasks,
81
- summary: {
82
- passed,
83
- total,
84
- coverageScore: total > 0 ? Math.round((passed / total) * 100) : 0,
85
- },
86
- };
87
- }
88
-
89
- function buildCodexWorkflowEvidence(before, after, applyResult, analysisReport, governanceSummary) {
90
- const tasks = [
91
- {
92
- key: 'discover-without-writes',
93
- label: 'Discover next actions without writing files',
94
- passed: before.checkCount > 0 && Array.isArray(before.quickWins),
95
- evidence: `Baseline audit returned ${before.checkCount} applicable checks and ${before.quickWins.length} quick wins.`,
96
- },
97
- {
98
- key: 'starter-safe-improvement',
99
- label: 'Apply starter-safe Codex baseline in isolation',
100
- passed: after.score >= before.score && after.failed <= before.failed,
101
- evidence: `Score moved ${before.score} -> ${after.score}; failed checks moved ${before.failed} -> ${after.failed}.`,
102
- },
103
- {
104
- key: 'preserve-existing-files',
105
- label: 'Preserve existing files instead of overwriting them',
106
- passed: Array.isArray(applyResult.preservedFiles),
107
- evidence: `${applyResult.preservedFiles ? applyResult.preservedFiles.length : 0} files were preserved instead of overwritten.`,
108
- },
109
- {
110
- key: 'governed-rollout-surface',
111
- label: 'Expose governed rollout controls',
112
- passed: governanceSummary.permissionProfiles.length >= 3 && governanceSummary.hookRegistry.length >= 1,
113
- evidence: `${governanceSummary.permissionProfiles.length} profiles and ${governanceSummary.hookRegistry.length} governance surfaces available.`,
114
- },
115
- {
116
- key: 'domain-pack-guidance',
117
- label: 'Recommend Codex domain packs for the repo',
118
- passed: Array.isArray(analysisReport.recommendedDomainPacks) && analysisReport.recommendedDomainPacks.length > 0,
119
- evidence: (analysisReport.recommendedDomainPacks || []).map((pack) => pack.label).join(', ') || 'No Codex domain pack recommendation generated.',
120
- },
121
- {
122
- key: 'rollback-surface',
123
- label: 'Emit rollback evidence for writes',
124
- passed: Boolean(applyResult.rollbackArtifact),
125
- evidence: applyResult.rollbackArtifact
126
- ? `Rollback artifact emitted at ${applyResult.rollbackArtifact}.`
127
- : 'No rollback artifact emitted.',
128
- },
129
- ];
130
-
131
- const passed = tasks.filter((task) => task.passed).length;
132
- const total = tasks.length;
133
- return {
134
- taskPack: 'codex-baseline',
135
- tasks,
136
- summary: {
137
- passed,
138
- total,
139
- coverageScore: total > 0 ? Math.round((passed / total) * 100) : 0,
140
- },
141
- };
142
- }
143
-
144
- function buildExecutiveSummary(before, after, workflowEvidence) {
145
- const scoreDelta = after.score - before.score;
146
- const organicDelta = after.organicScore - before.organicScore;
147
- const workflowCoverage = workflowEvidence.summary.coverageScore;
148
- let headline = before.score >= 60
149
- ? 'Setup is already applied — benchmark shows no additional improvement. Run benchmark on a project before running setup to see the full delta.'
150
- : 'Benchmark did not improve the score in this run.';
151
-
152
- if (scoreDelta < 0) {
153
- headline = `Warning: score decreased by ${Math.abs(scoreDelta)} points. Setup may have introduced a regression.`;
154
- } else if (scoreDelta > 0) {
155
- headline = `Benchmark improved readiness by ${scoreDelta} points without touching the original repo.`;
156
- } else if (before.score >= 85 && after.score >= before.score && workflowCoverage >= 80) {
157
- headline = 'Benchmark confirmed the repo already meets the starter-safe baseline without regression.';
158
- }
159
-
160
- return {
161
- headline,
162
- scoreDelta,
163
- organicDelta,
164
- decisionGuidance: scoreDelta >= 20
165
- ? 'Strong pilot candidate'
166
- : scoreDelta >= 10
167
- ? 'Promising but needs manual review'
168
- : (before.score >= 85 && workflowCoverage >= 80
169
- ? 'Use suggest-only mode, domain packs, or task-level benchmarks next'
170
- : 'Use suggest-only mode before rollout'),
171
- };
172
- }
173
-
174
- function buildPracticalValue(before, after, applyResult) {
175
- const written = applyResult.writtenFiles || [];
176
- return {
177
- denyRulesAdded: written.includes('.claude/settings.json') ? 'yes' : 'no',
178
- hooksCreated: written.filter(f => f.includes('hooks/')).length,
179
- commandsCreated: written.filter(f => f.includes('commands/')).length,
180
- agentsCreated: written.filter(f => f.includes('agents/')).length,
181
- skillsCreated: written.filter(f => f.includes('skills/')).length,
182
- rulesCreated: written.filter(f => f.includes('rules/')).length,
183
- claudeMdCreated: written.includes('CLAUDE.md') ? 'yes' : 'no',
184
- totalFilesCreated: written.length,
185
- totalFilesPreserved: (applyResult.preservedFiles || []).length,
186
- };
187
- }
188
-
189
- function buildCaseStudy(before, after, applyResult) {
190
- return {
191
- initialState: `Baseline score ${before.score}/100, organic ${before.organicScore}/100.`,
192
- chosenMode: 'benchmark-on-isolated-copy',
193
- whatChanged: applyResult.writtenFiles,
194
- whatWasPreserved: applyResult.preservedFiles,
195
- measuredResults: {
196
- scoreDelta: after.score - before.score,
197
- organicDelta: after.organicScore - before.organicScore,
198
- passedDelta: after.passed - before.passed,
199
- },
200
- practicalValue: buildPracticalValue(before, after, applyResult),
201
- };
202
- }
203
-
204
- function renderBenchmarkMarkdown(report) {
205
- return [
206
- '# Claudex Setup Benchmark Report',
207
- '',
208
- `- Generated by: ${report.generatedBy}`,
209
- `- Created at: ${report.createdAt}`,
210
- `- Source repo: ${report.directory}`,
211
- '',
212
- '## Methodology',
213
- ...report.methodology.map(item => `- ${item}`),
214
- '',
215
- '## Before',
216
- `- Score: ${report.before.score}/100`,
217
- `- Organic score: ${report.before.organicScore}/100`,
218
- `- Passing checks: ${report.before.passed}/${report.before.checkCount}`,
219
- '',
220
- '## After',
221
- `- Score: ${report.after.score}/100`,
222
- `- Organic score: ${report.after.organicScore}/100`,
223
- `- Passing checks: ${report.after.passed}/${report.after.checkCount}`,
224
- '',
225
- '## Delta',
226
- `- Score delta: ${report.delta.score}`,
227
- `- Organic score delta: ${report.delta.organicScore}`,
228
- `- Passed checks delta: ${report.delta.passed}`,
229
- '',
230
- '## Executive Summary',
231
- `- ${report.executiveSummary.headline}`,
232
- `- Recommendation: ${report.executiveSummary.decisionGuidance}`,
233
- '',
234
- '## Workflow Evidence',
235
- `- Task pack: ${report.workflowEvidence.taskPack}`,
236
- `- Coverage: ${report.workflowEvidence.summary.passed}/${report.workflowEvidence.summary.total} (${report.workflowEvidence.summary.coverageScore}%)`,
237
- ...report.workflowEvidence.tasks.map(task => `- ${task.label}: ${task.passed ? 'pass' : 'not yet'} — ${task.evidence}`),
238
- '',
239
- '## Case Study',
240
- `- Initial state: ${report.caseStudy.initialState}`,
241
- `- Chosen mode: ${report.caseStudy.chosenMode}`,
242
- `- What changed: ${report.caseStudy.whatChanged.join(', ') || 'none'}`,
243
- `- What was preserved: ${report.caseStudy.whatWasPreserved.join(', ') || 'none'}`,
244
- '',
245
- ].join('\n');
246
- }
247
-
248
- /**
249
- * Run a before/after benchmark on an isolated copy of the project.
250
- * @param {Object} options - Benchmark options.
251
- * @param {string} options.dir - Project directory to benchmark.
252
- * @param {string} [options.external] - External repo path to benchmark instead of cwd.
253
- * @param {string} [options.profile] - Permission profile to use during setup.
254
- * @param {string[]} [options.mcpPacks] - MCP pack keys to include in setup.
255
- * @returns {Promise<Object>} Benchmark report with before/after scores, delta, and workflow evidence.
256
- */
257
- async function runBenchmark(options) {
258
- const platform = options.platform || 'claude';
259
- const sourceDir = options.external || options.dir;
260
- if (options.external && !fs.existsSync(options.external)) {
261
- throw new Error(`External repo path not found: ${options.external}`);
262
- }
263
- const before = await audit({ dir: sourceDir, silent: true, platform });
264
- const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'claudex-benchmark-'));
265
- const sandboxDir = path.join(tempRoot, 'repo');
266
-
267
- try {
268
- copyProject(sourceDir, sandboxDir);
269
- const applyResult = await setup({
270
- dir: sandboxDir,
271
- auto: true,
272
- silent: true,
273
- profile: options.profile,
274
- mcpPacks: options.mcpPacks || [],
275
- platform,
276
- });
277
- const after = await audit({ dir: sandboxDir, silent: true, platform });
278
- const analysisReport = await analyzeProject({ dir: sandboxDir, mode: 'suggest-only', platform });
279
- const governanceSummary = getGovernanceSummary(platform);
280
- const workflowEvidence = platform === 'codex'
281
- ? buildCodexWorkflowEvidence(before, after, applyResult, analysisReport, governanceSummary)
282
- : buildWorkflowEvidence(before, after, analysisReport, governanceSummary);
283
-
284
- return {
285
- schemaVersion: 1,
286
- generatedBy: `nerviq@${version}`,
287
- createdAt: new Date().toISOString(),
288
- directory: sourceDir,
289
- platform,
290
- methodology: [
291
- 'Run a baseline audit on the source repo.',
292
- 'Copy the repo into a temporary isolated workspace.',
293
- `Apply starter-safe ${platform === 'codex' ? 'Codex' : 'Claude'} artifacts only on the isolated copy.`,
294
- 'Re-run the audit and compare the results.',
295
- ],
296
- before: summarizeAudit(before),
297
- after: summarizeAudit(after),
298
- delta: {
299
- score: after.score - before.score,
300
- organicScore: after.organicScore - before.organicScore,
301
- passed: after.passed - before.passed,
302
- failed: after.failed - before.failed,
303
- },
304
- workflowEvidence,
305
- executiveSummary: buildExecutiveSummary(before, after, workflowEvidence),
306
- caseStudy: buildCaseStudy(before, after, applyResult),
307
- };
308
- } finally {
309
- fs.rmSync(tempRoot, { recursive: true, force: true });
310
- }
311
- }
312
-
313
- function printBenchmark(report, options = {}) {
314
- if (options.json) {
315
- console.log(JSON.stringify(report, null, 2));
316
- return;
317
- }
318
-
319
- console.log('');
320
- console.log(' nerviq benchmark');
321
- console.log(' ═══════════════════════════════════════');
322
- console.log(' Runs in an isolated temp copy. Your current repo is not modified.');
323
- console.log('');
324
- console.log(` Before: ${report.before.score}/100 (organic ${report.before.organicScore}/100)`);
325
- console.log(` After: ${report.after.score}/100 (organic ${report.after.organicScore}/100)`);
326
- console.log(` Delta: score ${report.delta.score >= 0 ? '+' : ''}${report.delta.score}, organic ${report.delta.organicScore >= 0 ? '+' : ''}${report.delta.organicScore}`);
327
- console.log('');
328
- console.log(` ${report.executiveSummary.headline}`);
329
- console.log(` Recommendation: ${report.executiveSummary.decisionGuidance}`);
330
- console.log(` Workflow evidence: ${report.workflowEvidence.summary.passed}/${report.workflowEvidence.summary.total} tasks (${report.workflowEvidence.summary.coverageScore}%)`);
331
- console.log('');
332
- }
333
-
334
- function writeBenchmarkReport(report, outFile) {
335
- fs.mkdirSync(path.dirname(outFile), { recursive: true });
336
- const content = path.extname(outFile).toLowerCase() === '.md'
337
- ? renderBenchmarkMarkdown(report)
338
- : JSON.stringify(report, null, 2);
339
- fs.writeFileSync(outFile, content, 'utf8');
340
- }
341
-
342
- module.exports = {
343
- runBenchmark,
344
- printBenchmark,
345
- writeBenchmarkReport,
346
- };
1
+ const fs = require('fs');
2
+ const os = require('os');
3
+ const path = require('path');
4
+
5
+ const { version } = require('../package.json');
6
+ const { audit } = require('./audit');
7
+ const { setup } = require('./setup');
8
+ const { analyzeProject } = require('./analyze');
9
+ const { getGovernanceSummary } = require('./governance');
10
+
11
+ function copyProject(sourceDir, targetDir) {
12
+ fs.mkdirSync(targetDir, { recursive: true });
13
+ const entries = fs.readdirSync(sourceDir, { withFileTypes: true });
14
+ for (const entry of entries) {
15
+ if (entry.name === '.git' || entry.name === 'node_modules' || entry.name === '__pycache__') {
16
+ continue;
17
+ }
18
+ const from = path.join(sourceDir, entry.name);
19
+ const to = path.join(targetDir, entry.name);
20
+ if (entry.isDirectory()) {
21
+ copyProject(from, to);
22
+ } else if (entry.isFile()) {
23
+ fs.copyFileSync(from, to);
24
+ } else if (entry.isSymbolicLink && entry.isSymbolicLink()) {
25
+ // Symlinks are skipped in benchmark sandbox — log for awareness
26
+ process.stderr.write(` Note: symlink skipped in benchmark: ${entry.name}\n`);
27
+ }
28
+ }
29
+ }
30
+
31
+ function summarizeAudit(result) {
32
+ return {
33
+ score: result.score,
34
+ organicScore: result.organicScore,
35
+ passed: result.passed,
36
+ failed: result.failed,
37
+ checkCount: result.checkCount,
38
+ quickWins: result.quickWins,
39
+ };
40
+ }
41
+
42
+ function buildWorkflowEvidence(before, after, analysisReport, governanceSummary) {
43
+ const tasks = [
44
+ {
45
+ key: 'discover-without-writes',
46
+ label: 'Discover next actions without writing files',
47
+ passed: before.checkCount > 0 && Array.isArray(before.quickWins),
48
+ evidence: `Baseline audit returned ${before.checkCount} applicable checks and ${before.quickWins.length} quick wins.`,
49
+ },
50
+ {
51
+ key: 'starter-safe-improvement',
52
+ label: 'Apply starter-safe improvements in isolation',
53
+ passed: after.score >= before.score && after.failed <= before.failed,
54
+ evidence: `Score moved ${before.score} -> ${after.score}; failed checks moved ${before.failed} -> ${after.failed}.`,
55
+ },
56
+ {
57
+ key: 'governed-rollout-surface',
58
+ label: 'Expose governed rollout controls',
59
+ passed: governanceSummary.permissionProfiles.length >= 3 && governanceSummary.hookRegistry.length >= 1,
60
+ evidence: `${governanceSummary.permissionProfiles.length} profiles and ${governanceSummary.hookRegistry.length} governed hooks available.`,
61
+ },
62
+ {
63
+ key: 'domain-pack-guidance',
64
+ label: 'Recommend a domain pack for the repo',
65
+ passed: analysisReport.recommendedDomainPacks.length > 0,
66
+ evidence: analysisReport.recommendedDomainPacks.map(pack => pack.label).join(', ') || 'No domain pack recommendation generated.',
67
+ },
68
+ {
69
+ key: 'mcp-pack-guidance',
70
+ label: 'Recommend MCP packs when appropriate',
71
+ passed: analysisReport.recommendedMcpPacks.length > 0,
72
+ evidence: analysisReport.recommendedMcpPacks.map(pack => pack.label).join(', ') || 'No MCP pack recommendation generated.',
73
+ },
74
+ ];
75
+
76
+ const passed = tasks.filter(task => task.passed).length;
77
+ const total = tasks.length;
78
+ return {
79
+ taskPack: 'maintainer-core',
80
+ tasks,
81
+ summary: {
82
+ passed,
83
+ total,
84
+ coverageScore: total > 0 ? Math.round((passed / total) * 100) : 0,
85
+ },
86
+ };
87
+ }
88
+
89
+ function buildCodexWorkflowEvidence(before, after, applyResult, analysisReport, governanceSummary) {
90
+ const tasks = [
91
+ {
92
+ key: 'discover-without-writes',
93
+ label: 'Discover next actions without writing files',
94
+ passed: before.checkCount > 0 && Array.isArray(before.quickWins),
95
+ evidence: `Baseline audit returned ${before.checkCount} applicable checks and ${before.quickWins.length} quick wins.`,
96
+ },
97
+ {
98
+ key: 'starter-safe-improvement',
99
+ label: 'Apply starter-safe Codex baseline in isolation',
100
+ passed: after.score >= before.score && after.failed <= before.failed,
101
+ evidence: `Score moved ${before.score} -> ${after.score}; failed checks moved ${before.failed} -> ${after.failed}.`,
102
+ },
103
+ {
104
+ key: 'preserve-existing-files',
105
+ label: 'Preserve existing files instead of overwriting them',
106
+ passed: Array.isArray(applyResult.preservedFiles),
107
+ evidence: `${applyResult.preservedFiles ? applyResult.preservedFiles.length : 0} files were preserved instead of overwritten.`,
108
+ },
109
+ {
110
+ key: 'governed-rollout-surface',
111
+ label: 'Expose governed rollout controls',
112
+ passed: governanceSummary.permissionProfiles.length >= 3 && governanceSummary.hookRegistry.length >= 1,
113
+ evidence: `${governanceSummary.permissionProfiles.length} profiles and ${governanceSummary.hookRegistry.length} governance surfaces available.`,
114
+ },
115
+ {
116
+ key: 'domain-pack-guidance',
117
+ label: 'Recommend Codex domain packs for the repo',
118
+ passed: Array.isArray(analysisReport.recommendedDomainPacks) && analysisReport.recommendedDomainPacks.length > 0,
119
+ evidence: (analysisReport.recommendedDomainPacks || []).map((pack) => pack.label).join(', ') || 'No Codex domain pack recommendation generated.',
120
+ },
121
+ {
122
+ key: 'rollback-surface',
123
+ label: 'Emit rollback evidence for writes',
124
+ passed: Boolean(applyResult.rollbackArtifact),
125
+ evidence: applyResult.rollbackArtifact
126
+ ? `Rollback artifact emitted at ${applyResult.rollbackArtifact}.`
127
+ : 'No rollback artifact emitted.',
128
+ },
129
+ ];
130
+
131
+ const passed = tasks.filter((task) => task.passed).length;
132
+ const total = tasks.length;
133
+ return {
134
+ taskPack: 'codex-baseline',
135
+ tasks,
136
+ summary: {
137
+ passed,
138
+ total,
139
+ coverageScore: total > 0 ? Math.round((passed / total) * 100) : 0,
140
+ },
141
+ };
142
+ }
143
+
144
+ function buildExecutiveSummary(before, after, workflowEvidence) {
145
+ const scoreDelta = after.score - before.score;
146
+ const organicDelta = after.organicScore - before.organicScore;
147
+ const workflowCoverage = workflowEvidence.summary.coverageScore;
148
+ let headline = before.score >= 60
149
+ ? 'Setup is already applied — benchmark shows no additional improvement. Run benchmark on a project before running setup to see the full delta.'
150
+ : 'Benchmark did not improve the score in this run.';
151
+
152
+ if (scoreDelta < 0) {
153
+ headline = `Warning: score decreased by ${Math.abs(scoreDelta)} points. Setup may have introduced a regression.`;
154
+ } else if (scoreDelta > 0) {
155
+ headline = `Benchmark improved readiness by ${scoreDelta} points without touching the original repo.`;
156
+ } else if (before.score >= 85 && after.score >= before.score && workflowCoverage >= 80) {
157
+ headline = 'Benchmark confirmed the repo already meets the starter-safe baseline without regression.';
158
+ }
159
+
160
+ return {
161
+ headline,
162
+ scoreDelta,
163
+ organicDelta,
164
+ decisionGuidance: scoreDelta >= 20
165
+ ? 'Strong pilot candidate'
166
+ : scoreDelta >= 10
167
+ ? 'Promising but needs manual review'
168
+ : (before.score >= 85 && workflowCoverage >= 80
169
+ ? 'Use suggest-only mode, domain packs, or task-level benchmarks next'
170
+ : 'Use suggest-only mode before rollout'),
171
+ };
172
+ }
173
+
174
+ function buildPracticalValue(before, after, applyResult) {
175
+ const written = applyResult.writtenFiles || [];
176
+ return {
177
+ denyRulesAdded: written.includes('.claude/settings.json') ? 'yes' : 'no',
178
+ hooksCreated: written.filter(f => f.includes('hooks/')).length,
179
+ commandsCreated: written.filter(f => f.includes('commands/')).length,
180
+ agentsCreated: written.filter(f => f.includes('agents/')).length,
181
+ skillsCreated: written.filter(f => f.includes('skills/')).length,
182
+ rulesCreated: written.filter(f => f.includes('rules/')).length,
183
+ claudeMdCreated: written.includes('CLAUDE.md') ? 'yes' : 'no',
184
+ totalFilesCreated: written.length,
185
+ totalFilesPreserved: (applyResult.preservedFiles || []).length,
186
+ };
187
+ }
188
+
189
+ function buildCaseStudy(before, after, applyResult) {
190
+ return {
191
+ initialState: `Baseline score ${before.score}/100, organic ${before.organicScore}/100.`,
192
+ chosenMode: 'benchmark-on-isolated-copy',
193
+ whatChanged: applyResult.writtenFiles,
194
+ whatWasPreserved: applyResult.preservedFiles,
195
+ measuredResults: {
196
+ scoreDelta: after.score - before.score,
197
+ organicDelta: after.organicScore - before.organicScore,
198
+ passedDelta: after.passed - before.passed,
199
+ },
200
+ practicalValue: buildPracticalValue(before, after, applyResult),
201
+ };
202
+ }
203
+
204
+ function renderBenchmarkMarkdown(report) {
205
+ return [
206
+ '# NERVIQ CLI Benchmark Report',
207
+ '',
208
+ `- Generated by: ${report.generatedBy}`,
209
+ `- Created at: ${report.createdAt}`,
210
+ `- Source repo: ${report.directory}`,
211
+ '',
212
+ '## Methodology',
213
+ ...report.methodology.map(item => `- ${item}`),
214
+ '',
215
+ '## Before',
216
+ `- Score: ${report.before.score}/100`,
217
+ `- Organic score: ${report.before.organicScore}/100`,
218
+ `- Passing checks: ${report.before.passed}/${report.before.checkCount}`,
219
+ '',
220
+ '## After',
221
+ `- Score: ${report.after.score}/100`,
222
+ `- Organic score: ${report.after.organicScore}/100`,
223
+ `- Passing checks: ${report.after.passed}/${report.after.checkCount}`,
224
+ '',
225
+ '## Delta',
226
+ `- Score delta: ${report.delta.score}`,
227
+ `- Organic score delta: ${report.delta.organicScore}`,
228
+ `- Passed checks delta: ${report.delta.passed}`,
229
+ '',
230
+ '## Executive Summary',
231
+ `- ${report.executiveSummary.headline}`,
232
+ `- Recommendation: ${report.executiveSummary.decisionGuidance}`,
233
+ '',
234
+ '## Workflow Evidence',
235
+ `- Task pack: ${report.workflowEvidence.taskPack}`,
236
+ `- Coverage: ${report.workflowEvidence.summary.passed}/${report.workflowEvidence.summary.total} (${report.workflowEvidence.summary.coverageScore}%)`,
237
+ ...report.workflowEvidence.tasks.map(task => `- ${task.label}: ${task.passed ? 'pass' : 'not yet'} — ${task.evidence}`),
238
+ '',
239
+ '## Case Study',
240
+ `- Initial state: ${report.caseStudy.initialState}`,
241
+ `- Chosen mode: ${report.caseStudy.chosenMode}`,
242
+ `- What changed: ${report.caseStudy.whatChanged.join(', ') || 'none'}`,
243
+ `- What was preserved: ${report.caseStudy.whatWasPreserved.join(', ') || 'none'}`,
244
+ '',
245
+ ].join('\n');
246
+ }
247
+
248
+ /**
249
+ * Run a before/after benchmark on an isolated copy of the project.
250
+ * @param {Object} options - Benchmark options.
251
+ * @param {string} options.dir - Project directory to benchmark.
252
+ * @param {string} [options.external] - External repo path to benchmark instead of cwd.
253
+ * @param {string} [options.profile] - Permission profile to use during setup.
254
+ * @param {string[]} [options.mcpPacks] - MCP pack keys to include in setup.
255
+ * @returns {Promise<Object>} Benchmark report with before/after scores, delta, and workflow evidence.
256
+ */
257
+ async function runBenchmark(options) {
258
+ const platform = options.platform || 'claude';
259
+ const sourceDir = options.external || options.dir;
260
+ if (options.external && !fs.existsSync(options.external)) {
261
+ throw new Error(`External repo path not found: ${options.external}`);
262
+ }
263
+ const before = await audit({ dir: sourceDir, silent: true, platform });
264
+ const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'nerviq-benchmark-'));
265
+ const sandboxDir = path.join(tempRoot, 'repo');
266
+
267
+ try {
268
+ copyProject(sourceDir, sandboxDir);
269
+ const applyResult = await setup({
270
+ dir: sandboxDir,
271
+ auto: true,
272
+ silent: true,
273
+ profile: options.profile,
274
+ mcpPacks: options.mcpPacks || [],
275
+ platform,
276
+ });
277
+ const after = await audit({ dir: sandboxDir, silent: true, platform });
278
+ const analysisReport = await analyzeProject({ dir: sandboxDir, mode: 'suggest-only', platform });
279
+ const governanceSummary = getGovernanceSummary(platform);
280
+ const workflowEvidence = platform === 'codex'
281
+ ? buildCodexWorkflowEvidence(before, after, applyResult, analysisReport, governanceSummary)
282
+ : buildWorkflowEvidence(before, after, analysisReport, governanceSummary);
283
+
284
+ return {
285
+ schemaVersion: 1,
286
+ generatedBy: `nerviq@${version}`,
287
+ createdAt: new Date().toISOString(),
288
+ directory: sourceDir,
289
+ platform,
290
+ methodology: [
291
+ 'Run a baseline audit on the source repo.',
292
+ 'Copy the repo into a temporary isolated workspace.',
293
+ `Apply starter-safe ${platform === 'codex' ? 'Codex' : 'Claude'} artifacts only on the isolated copy.`,
294
+ 'Re-run the audit and compare the results.',
295
+ ],
296
+ before: summarizeAudit(before),
297
+ after: summarizeAudit(after),
298
+ delta: {
299
+ score: after.score - before.score,
300
+ organicScore: after.organicScore - before.organicScore,
301
+ passed: after.passed - before.passed,
302
+ failed: after.failed - before.failed,
303
+ },
304
+ workflowEvidence,
305
+ executiveSummary: buildExecutiveSummary(before, after, workflowEvidence),
306
+ caseStudy: buildCaseStudy(before, after, applyResult),
307
+ };
308
+ } finally {
309
+ fs.rmSync(tempRoot, { recursive: true, force: true });
310
+ }
311
+ }
312
+
313
+ function printBenchmark(report, options = {}) {
314
+ if (options.json) {
315
+ console.log(JSON.stringify(report, null, 2));
316
+ return;
317
+ }
318
+
319
+ console.log('');
320
+ console.log(' nerviq benchmark');
321
+ console.log(' ═══════════════════════════════════════');
322
+ console.log(' Runs in an isolated temp copy. Your current repo is not modified.');
323
+ console.log('');
324
+ console.log(` Before: ${report.before.score}/100 (organic ${report.before.organicScore}/100)`);
325
+ console.log(` After: ${report.after.score}/100 (organic ${report.after.organicScore}/100)`);
326
+ console.log(` Delta: score ${report.delta.score >= 0 ? '+' : ''}${report.delta.score}, organic ${report.delta.organicScore >= 0 ? '+' : ''}${report.delta.organicScore}`);
327
+ console.log('');
328
+ console.log(` ${report.executiveSummary.headline}`);
329
+ console.log(` Recommendation: ${report.executiveSummary.decisionGuidance}`);
330
+ console.log(` Workflow evidence: ${report.workflowEvidence.summary.passed}/${report.workflowEvidence.summary.total} tasks (${report.workflowEvidence.summary.coverageScore}%)`);
331
+ console.log('');
332
+ }
333
+
334
+ function writeBenchmarkReport(report, outFile) {
335
+ fs.mkdirSync(path.dirname(outFile), { recursive: true });
336
+ const content = path.extname(outFile).toLowerCase() === '.md'
337
+ ? renderBenchmarkMarkdown(report)
338
+ : JSON.stringify(report, null, 2);
339
+ fs.writeFileSync(outFile, content, 'utf8');
340
+ }
341
+
342
+ module.exports = {
343
+ runBenchmark,
344
+ printBenchmark,
345
+ writeBenchmarkReport,
346
+ };