@nerviq/cli 1.29.0 → 1.29.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1527 -1493
- package/README.md +550 -538
- package/SECURITY.md +82 -82
- package/bin/cli.js +2562 -2558
- package/docs/api-reference.md +356 -356
- package/docs/audit-fix.md +109 -0
- package/docs/autofix.md +3 -62
- package/docs/getting-started.md +1 -1
- package/docs/index.html +592 -592
- package/docs/integration-contracts.md +287 -287
- package/docs/maintenance.md +128 -128
- package/docs/new-platform-guide.md +202 -202
- package/docs/release-process.md +63 -0
- package/docs/shallow-risk.md +244 -244
- package/docs/why-nerviq.md +82 -82
- package/package.json +67 -67
- package/src/aider/activity.js +226 -226
- package/src/aider/context.js +162 -162
- package/src/aider/freshness.js +123 -123
- package/src/aider/techniques.js +3465 -3465
- package/src/audit/layers.js +180 -180
- package/src/audit.js +1032 -1032
- package/src/benchmark.js +299 -299
- package/src/codex/activity.js +324 -324
- package/src/codex/freshness.js +142 -142
- package/src/codex/techniques.js +4895 -4895
- package/src/context.js +326 -326
- package/src/continuous-ops.js +11 -1
- package/src/convert.js +340 -340
- package/src/copilot/config-parser.js +280 -280
- package/src/copilot/context.js +218 -218
- package/src/copilot/freshness.js +177 -177
- package/src/copilot/patch.js +238 -238
- package/src/copilot/techniques.js +3578 -3578
- package/src/cursor/freshness.js +194 -194
- package/src/cursor/patch.js +243 -243
- package/src/cursor/techniques.js +3735 -3735
- package/src/doctor.js +201 -201
- package/src/fix-engine.js +511 -8
- package/src/formatters/csv.js +86 -86
- package/src/formatters/junit.js +123 -123
- package/src/formatters/markdown.js +164 -164
- package/src/formatters/otel.js +151 -151
- package/src/freshness.js +156 -156
- package/src/gemini/activity.js +402 -402
- package/src/gemini/context.js +290 -290
- package/src/gemini/freshness.js +183 -183
- package/src/gemini/patch.js +229 -229
- package/src/gemini/techniques.js +3811 -3811
- package/src/governance.js +533 -533
- package/src/harmony/audit.js +306 -306
- package/src/i18n.js +63 -63
- package/src/insights.js +119 -119
- package/src/integrations.js +134 -134
- package/src/locales/en.json +33 -33
- package/src/locales/es.json +33 -33
- package/src/migrate.js +354 -354
- package/src/opencode/activity.js +286 -286
- package/src/opencode/freshness.js +137 -137
- package/src/opencode/techniques.js +3450 -3450
- package/src/setup/analysis.js +12 -12
- package/src/setup.js +7 -6
- package/src/shallow-risk/index.js +56 -56
- package/src/shallow-risk/patterns/agent-config-cross-platform-drift.js +50 -50
- package/src/shallow-risk/patterns/agent-config-dangerous-autoapprove.js +46 -46
- package/src/shallow-risk/patterns/agent-config-deprecated-keys.js +46 -46
- package/src/shallow-risk/patterns/agent-config-missing-file.js +317 -317
- package/src/shallow-risk/patterns/agent-config-secret-literal.js +49 -49
- package/src/shallow-risk/patterns/agent-config-stack-contradiction.js +34 -34
- package/src/shallow-risk/patterns/hook-script-missing.js +70 -70
- package/src/shallow-risk/patterns/mcp-server-no-allowlist.js +52 -52
- package/src/shallow-risk/shared.js +648 -648
- package/src/source-urls.js +295 -295
- package/src/state-paths.js +85 -85
- package/src/supplemental-checks.js +805 -805
- package/src/telemetry.js +160 -160
- package/src/windsurf/context.js +359 -359
- package/src/windsurf/freshness.js +194 -194
- package/src/windsurf/patch.js +231 -231
- package/src/windsurf/techniques.js +3779 -3779
package/src/benchmark.js
CHANGED
|
@@ -1,207 +1,207 @@
|
|
|
1
|
-
const fs = require('fs');
|
|
2
|
-
const os = require('os');
|
|
3
|
-
const path = require('path');
|
|
4
|
-
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const os = require('os');
|
|
3
|
+
const path = require('path');
|
|
4
|
+
|
|
5
5
|
const { version } = require('../package.json');
|
|
6
6
|
const { audit } = require('./audit');
|
|
7
7
|
const { setup } = require('./setup');
|
|
8
8
|
const { analyzeProject } = require('./analyze');
|
|
9
9
|
const { getGovernanceSummary } = require('./governance');
|
|
10
10
|
const { formatTerminologyLines } = require('./terminology');
|
|
11
|
-
|
|
12
|
-
function copyProject(sourceDir, targetDir) {
|
|
13
|
-
fs.mkdirSync(targetDir, { recursive: true });
|
|
14
|
-
const entries = fs.readdirSync(sourceDir, { withFileTypes: true });
|
|
15
|
-
for (const entry of entries) {
|
|
16
|
-
if (entry.name === '.git' || entry.name === 'node_modules' || entry.name === '__pycache__') {
|
|
17
|
-
continue;
|
|
18
|
-
}
|
|
19
|
-
const from = path.join(sourceDir, entry.name);
|
|
20
|
-
const to = path.join(targetDir, entry.name);
|
|
21
|
-
if (entry.isDirectory()) {
|
|
22
|
-
copyProject(from, to);
|
|
23
|
-
} else if (entry.isFile()) {
|
|
24
|
-
fs.copyFileSync(from, to);
|
|
25
|
-
} else if (entry.isSymbolicLink && entry.isSymbolicLink()) {
|
|
26
|
-
// Symlinks are skipped in benchmark sandbox — log for awareness
|
|
27
|
-
process.stderr.write(` Note: symlink skipped in benchmark: ${entry.name}\n`);
|
|
28
|
-
}
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
function summarizeAudit(result) {
|
|
33
|
-
return {
|
|
34
|
-
score: result.score,
|
|
35
|
-
organicScore: result.organicScore,
|
|
36
|
-
passed: result.passed,
|
|
37
|
-
failed: result.failed,
|
|
38
|
-
checkCount: result.checkCount,
|
|
39
|
-
quickWins: result.quickWins,
|
|
40
|
-
};
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
function buildWorkflowEvidence(before, after, analysisReport, governanceSummary) {
|
|
44
|
-
const tasks = [
|
|
45
|
-
{
|
|
46
|
-
key: 'discover-without-writes',
|
|
47
|
-
label: 'Discover next actions without writing files',
|
|
48
|
-
passed: before.checkCount > 0 && Array.isArray(before.quickWins),
|
|
49
|
-
evidence: `Baseline audit returned ${before.checkCount} applicable checks and ${before.quickWins.length} quick wins.`,
|
|
50
|
-
},
|
|
51
|
-
{
|
|
52
|
-
key: 'starter-safe-improvement',
|
|
53
|
-
label: 'Apply starter-safe improvements in isolation',
|
|
54
|
-
passed: after.score >= before.score && after.failed <= before.failed,
|
|
55
|
-
evidence: `Score moved ${before.score} -> ${after.score}; failed checks moved ${before.failed} -> ${after.failed}.`,
|
|
56
|
-
},
|
|
57
|
-
{
|
|
58
|
-
key: 'governed-rollout-surface',
|
|
59
|
-
label: 'Expose governed rollout controls',
|
|
60
|
-
passed: governanceSummary.permissionProfiles.length >= 3 && governanceSummary.hookRegistry.length >= 1,
|
|
61
|
-
evidence: `${governanceSummary.permissionProfiles.length} profiles and ${governanceSummary.hookRegistry.length} governed hooks available.`,
|
|
62
|
-
},
|
|
63
|
-
{
|
|
64
|
-
key: 'domain-pack-guidance',
|
|
65
|
-
label: 'Recommend a domain pack for the repo',
|
|
66
|
-
passed: analysisReport.recommendedDomainPacks.length > 0,
|
|
67
|
-
evidence: analysisReport.recommendedDomainPacks.map(pack => pack.label).join(', ') || 'No domain pack recommendation generated.',
|
|
68
|
-
},
|
|
69
|
-
{
|
|
70
|
-
key: 'mcp-pack-guidance',
|
|
71
|
-
label: 'Recommend MCP packs when appropriate',
|
|
72
|
-
passed: analysisReport.recommendedMcpPacks.length > 0,
|
|
73
|
-
evidence: analysisReport.recommendedMcpPacks.map(pack => pack.label).join(', ') || 'No MCP pack recommendation generated.',
|
|
74
|
-
},
|
|
75
|
-
];
|
|
76
|
-
|
|
77
|
-
const passed = tasks.filter(task => task.passed).length;
|
|
78
|
-
const total = tasks.length;
|
|
79
|
-
return {
|
|
80
|
-
taskPack: 'maintainer-core',
|
|
81
|
-
tasks,
|
|
82
|
-
summary: {
|
|
83
|
-
passed,
|
|
84
|
-
total,
|
|
85
|
-
coverageScore: total > 0 ? Math.round((passed / total) * 100) : 0,
|
|
86
|
-
},
|
|
87
|
-
};
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
function buildCodexWorkflowEvidence(before, after, applyResult, analysisReport, governanceSummary) {
|
|
91
|
-
const tasks = [
|
|
92
|
-
{
|
|
93
|
-
key: 'discover-without-writes',
|
|
94
|
-
label: 'Discover next actions without writing files',
|
|
95
|
-
passed: before.checkCount > 0 && Array.isArray(before.quickWins),
|
|
96
|
-
evidence: `Baseline audit returned ${before.checkCount} applicable checks and ${before.quickWins.length} quick wins.`,
|
|
97
|
-
},
|
|
98
|
-
{
|
|
99
|
-
key: 'starter-safe-improvement',
|
|
100
|
-
label: 'Apply starter-safe Codex baseline in isolation',
|
|
101
|
-
passed: after.score >= before.score && after.failed <= before.failed,
|
|
102
|
-
evidence: `Score moved ${before.score} -> ${after.score}; failed checks moved ${before.failed} -> ${after.failed}.`,
|
|
103
|
-
},
|
|
104
|
-
{
|
|
105
|
-
key: 'preserve-existing-files',
|
|
106
|
-
label: 'Preserve existing files instead of overwriting them',
|
|
107
|
-
passed: Array.isArray(applyResult.preservedFiles),
|
|
108
|
-
evidence: `${applyResult.preservedFiles ? applyResult.preservedFiles.length : 0} files were preserved instead of overwritten.`,
|
|
109
|
-
},
|
|
110
|
-
{
|
|
111
|
-
key: 'governed-rollout-surface',
|
|
112
|
-
label: 'Expose governed rollout controls',
|
|
113
|
-
passed: governanceSummary.permissionProfiles.length >= 3 && governanceSummary.hookRegistry.length >= 1,
|
|
114
|
-
evidence: `${governanceSummary.permissionProfiles.length} profiles and ${governanceSummary.hookRegistry.length} governance surfaces available.`,
|
|
115
|
-
},
|
|
116
|
-
{
|
|
117
|
-
key: 'domain-pack-guidance',
|
|
118
|
-
label: 'Recommend Codex domain packs for the repo',
|
|
119
|
-
passed: Array.isArray(analysisReport.recommendedDomainPacks) && analysisReport.recommendedDomainPacks.length > 0,
|
|
120
|
-
evidence: (analysisReport.recommendedDomainPacks || []).map((pack) => pack.label).join(', ') || 'No Codex domain pack recommendation generated.',
|
|
121
|
-
},
|
|
122
|
-
{
|
|
123
|
-
key: 'rollback-surface',
|
|
124
|
-
label: 'Emit rollback evidence for writes',
|
|
125
|
-
passed: Boolean(applyResult.rollbackArtifact),
|
|
126
|
-
evidence: applyResult.rollbackArtifact
|
|
127
|
-
? `Rollback artifact emitted at ${applyResult.rollbackArtifact}.`
|
|
128
|
-
: 'No rollback artifact emitted.',
|
|
129
|
-
},
|
|
130
|
-
];
|
|
131
|
-
|
|
132
|
-
const passed = tasks.filter((task) => task.passed).length;
|
|
133
|
-
const total = tasks.length;
|
|
134
|
-
return {
|
|
135
|
-
taskPack: 'codex-baseline',
|
|
136
|
-
tasks,
|
|
137
|
-
summary: {
|
|
138
|
-
passed,
|
|
139
|
-
total,
|
|
140
|
-
coverageScore: total > 0 ? Math.round((passed / total) * 100) : 0,
|
|
141
|
-
},
|
|
142
|
-
};
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
function buildExecutiveSummary(before, after, workflowEvidence) {
|
|
146
|
-
const scoreDelta = after.score - before.score;
|
|
147
|
-
const organicDelta = after.organicScore - before.organicScore;
|
|
148
|
-
const workflowCoverage = workflowEvidence.summary.coverageScore;
|
|
149
|
-
let headline = before.score >= 60
|
|
150
|
-
? 'Setup is already applied — benchmark shows no additional improvement. Run benchmark on a project before running setup to see the full delta.'
|
|
151
|
-
: 'Benchmark did not improve the score in this run.';
|
|
152
|
-
|
|
153
|
-
if (scoreDelta < 0) {
|
|
154
|
-
headline = `Warning: score decreased by ${Math.abs(scoreDelta)} points. Setup may have introduced a regression.`;
|
|
155
|
-
} else if (scoreDelta > 0) {
|
|
156
|
-
headline = `Benchmark improved readiness by ${scoreDelta} points without touching the original repo.`;
|
|
157
|
-
} else if (before.score >= 85 && after.score >= before.score && workflowCoverage >= 80) {
|
|
158
|
-
headline = 'Benchmark confirmed the repo already meets the starter-safe baseline without regression.';
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
return {
|
|
162
|
-
headline,
|
|
163
|
-
scoreDelta,
|
|
164
|
-
organicDelta,
|
|
165
|
-
decisionGuidance: scoreDelta >= 20
|
|
166
|
-
? 'Strong pilot candidate'
|
|
167
|
-
: scoreDelta >= 10
|
|
168
|
-
? 'Promising but needs manual review'
|
|
169
|
-
: (before.score >= 85 && workflowCoverage >= 80
|
|
170
|
-
? 'Use suggest-only mode, domain packs, or task-level benchmarks next'
|
|
171
|
-
: 'Use suggest-only mode before rollout'),
|
|
172
|
-
};
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
function buildPracticalValue(before, after, applyResult) {
|
|
176
|
-
const written = applyResult.writtenFiles || [];
|
|
177
|
-
return {
|
|
178
|
-
denyRulesAdded: written.includes('.claude/settings.json') ? 'yes' : 'no',
|
|
179
|
-
hooksCreated: written.filter(f => f.includes('hooks/')).length,
|
|
180
|
-
commandsCreated: written.filter(f => f.includes('commands/')).length,
|
|
181
|
-
agentsCreated: written.filter(f => f.includes('agents/')).length,
|
|
182
|
-
skillsCreated: written.filter(f => f.includes('skills/')).length,
|
|
183
|
-
rulesCreated: written.filter(f => f.includes('rules/')).length,
|
|
184
|
-
claudeMdCreated: written.includes('CLAUDE.md') ? 'yes' : 'no',
|
|
185
|
-
totalFilesCreated: written.length,
|
|
186
|
-
totalFilesPreserved: (applyResult.preservedFiles || []).length,
|
|
187
|
-
};
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
function buildCaseStudy(before, after, applyResult) {
|
|
191
|
-
return {
|
|
192
|
-
initialState: `Baseline score ${before.score}/100, organic ${before.organicScore}/100.`,
|
|
193
|
-
chosenMode: 'benchmark-on-isolated-copy',
|
|
194
|
-
whatChanged: applyResult.writtenFiles,
|
|
195
|
-
whatWasPreserved: applyResult.preservedFiles,
|
|
196
|
-
measuredResults: {
|
|
197
|
-
scoreDelta: after.score - before.score,
|
|
198
|
-
organicDelta: after.organicScore - before.organicScore,
|
|
199
|
-
passedDelta: after.passed - before.passed,
|
|
200
|
-
},
|
|
201
|
-
practicalValue: buildPracticalValue(before, after, applyResult),
|
|
202
|
-
};
|
|
203
|
-
}
|
|
204
|
-
|
|
11
|
+
|
|
12
|
+
function copyProject(sourceDir, targetDir) {
|
|
13
|
+
fs.mkdirSync(targetDir, { recursive: true });
|
|
14
|
+
const entries = fs.readdirSync(sourceDir, { withFileTypes: true });
|
|
15
|
+
for (const entry of entries) {
|
|
16
|
+
if (entry.name === '.git' || entry.name === 'node_modules' || entry.name === '__pycache__') {
|
|
17
|
+
continue;
|
|
18
|
+
}
|
|
19
|
+
const from = path.join(sourceDir, entry.name);
|
|
20
|
+
const to = path.join(targetDir, entry.name);
|
|
21
|
+
if (entry.isDirectory()) {
|
|
22
|
+
copyProject(from, to);
|
|
23
|
+
} else if (entry.isFile()) {
|
|
24
|
+
fs.copyFileSync(from, to);
|
|
25
|
+
} else if (entry.isSymbolicLink && entry.isSymbolicLink()) {
|
|
26
|
+
// Symlinks are skipped in benchmark sandbox — log for awareness
|
|
27
|
+
process.stderr.write(` Note: symlink skipped in benchmark: ${entry.name}\n`);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function summarizeAudit(result) {
|
|
33
|
+
return {
|
|
34
|
+
score: result.score,
|
|
35
|
+
organicScore: result.organicScore,
|
|
36
|
+
passed: result.passed,
|
|
37
|
+
failed: result.failed,
|
|
38
|
+
checkCount: result.checkCount,
|
|
39
|
+
quickWins: result.quickWins,
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function buildWorkflowEvidence(before, after, analysisReport, governanceSummary) {
|
|
44
|
+
const tasks = [
|
|
45
|
+
{
|
|
46
|
+
key: 'discover-without-writes',
|
|
47
|
+
label: 'Discover next actions without writing files',
|
|
48
|
+
passed: before.checkCount > 0 && Array.isArray(before.quickWins),
|
|
49
|
+
evidence: `Baseline audit returned ${before.checkCount} applicable checks and ${before.quickWins.length} quick wins.`,
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
key: 'starter-safe-improvement',
|
|
53
|
+
label: 'Apply starter-safe improvements in isolation',
|
|
54
|
+
passed: after.score >= before.score && after.failed <= before.failed,
|
|
55
|
+
evidence: `Score moved ${before.score} -> ${after.score}; failed checks moved ${before.failed} -> ${after.failed}.`,
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
key: 'governed-rollout-surface',
|
|
59
|
+
label: 'Expose governed rollout controls',
|
|
60
|
+
passed: governanceSummary.permissionProfiles.length >= 3 && governanceSummary.hookRegistry.length >= 1,
|
|
61
|
+
evidence: `${governanceSummary.permissionProfiles.length} profiles and ${governanceSummary.hookRegistry.length} governed hooks available.`,
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
key: 'domain-pack-guidance',
|
|
65
|
+
label: 'Recommend a domain pack for the repo',
|
|
66
|
+
passed: analysisReport.recommendedDomainPacks.length > 0,
|
|
67
|
+
evidence: analysisReport.recommendedDomainPacks.map(pack => pack.label).join(', ') || 'No domain pack recommendation generated.',
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
key: 'mcp-pack-guidance',
|
|
71
|
+
label: 'Recommend MCP packs when appropriate',
|
|
72
|
+
passed: analysisReport.recommendedMcpPacks.length > 0,
|
|
73
|
+
evidence: analysisReport.recommendedMcpPacks.map(pack => pack.label).join(', ') || 'No MCP pack recommendation generated.',
|
|
74
|
+
},
|
|
75
|
+
];
|
|
76
|
+
|
|
77
|
+
const passed = tasks.filter(task => task.passed).length;
|
|
78
|
+
const total = tasks.length;
|
|
79
|
+
return {
|
|
80
|
+
taskPack: 'maintainer-core',
|
|
81
|
+
tasks,
|
|
82
|
+
summary: {
|
|
83
|
+
passed,
|
|
84
|
+
total,
|
|
85
|
+
coverageScore: total > 0 ? Math.round((passed / total) * 100) : 0,
|
|
86
|
+
},
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function buildCodexWorkflowEvidence(before, after, applyResult, analysisReport, governanceSummary) {
|
|
91
|
+
const tasks = [
|
|
92
|
+
{
|
|
93
|
+
key: 'discover-without-writes',
|
|
94
|
+
label: 'Discover next actions without writing files',
|
|
95
|
+
passed: before.checkCount > 0 && Array.isArray(before.quickWins),
|
|
96
|
+
evidence: `Baseline audit returned ${before.checkCount} applicable checks and ${before.quickWins.length} quick wins.`,
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
key: 'starter-safe-improvement',
|
|
100
|
+
label: 'Apply starter-safe Codex baseline in isolation',
|
|
101
|
+
passed: after.score >= before.score && after.failed <= before.failed,
|
|
102
|
+
evidence: `Score moved ${before.score} -> ${after.score}; failed checks moved ${before.failed} -> ${after.failed}.`,
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
key: 'preserve-existing-files',
|
|
106
|
+
label: 'Preserve existing files instead of overwriting them',
|
|
107
|
+
passed: Array.isArray(applyResult.preservedFiles),
|
|
108
|
+
evidence: `${applyResult.preservedFiles ? applyResult.preservedFiles.length : 0} files were preserved instead of overwritten.`,
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
key: 'governed-rollout-surface',
|
|
112
|
+
label: 'Expose governed rollout controls',
|
|
113
|
+
passed: governanceSummary.permissionProfiles.length >= 3 && governanceSummary.hookRegistry.length >= 1,
|
|
114
|
+
evidence: `${governanceSummary.permissionProfiles.length} profiles and ${governanceSummary.hookRegistry.length} governance surfaces available.`,
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
key: 'domain-pack-guidance',
|
|
118
|
+
label: 'Recommend Codex domain packs for the repo',
|
|
119
|
+
passed: Array.isArray(analysisReport.recommendedDomainPacks) && analysisReport.recommendedDomainPacks.length > 0,
|
|
120
|
+
evidence: (analysisReport.recommendedDomainPacks || []).map((pack) => pack.label).join(', ') || 'No Codex domain pack recommendation generated.',
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
key: 'rollback-surface',
|
|
124
|
+
label: 'Emit rollback evidence for writes',
|
|
125
|
+
passed: Boolean(applyResult.rollbackArtifact),
|
|
126
|
+
evidence: applyResult.rollbackArtifact
|
|
127
|
+
? `Rollback artifact emitted at ${applyResult.rollbackArtifact}.`
|
|
128
|
+
: 'No rollback artifact emitted.',
|
|
129
|
+
},
|
|
130
|
+
];
|
|
131
|
+
|
|
132
|
+
const passed = tasks.filter((task) => task.passed).length;
|
|
133
|
+
const total = tasks.length;
|
|
134
|
+
return {
|
|
135
|
+
taskPack: 'codex-baseline',
|
|
136
|
+
tasks,
|
|
137
|
+
summary: {
|
|
138
|
+
passed,
|
|
139
|
+
total,
|
|
140
|
+
coverageScore: total > 0 ? Math.round((passed / total) * 100) : 0,
|
|
141
|
+
},
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function buildExecutiveSummary(before, after, workflowEvidence) {
|
|
146
|
+
const scoreDelta = after.score - before.score;
|
|
147
|
+
const organicDelta = after.organicScore - before.organicScore;
|
|
148
|
+
const workflowCoverage = workflowEvidence.summary.coverageScore;
|
|
149
|
+
let headline = before.score >= 60
|
|
150
|
+
? 'Setup is already applied — benchmark shows no additional improvement. Run benchmark on a project before running setup to see the full delta.'
|
|
151
|
+
: 'Benchmark did not improve the score in this run.';
|
|
152
|
+
|
|
153
|
+
if (scoreDelta < 0) {
|
|
154
|
+
headline = `Warning: score decreased by ${Math.abs(scoreDelta)} points. Setup may have introduced a regression.`;
|
|
155
|
+
} else if (scoreDelta > 0) {
|
|
156
|
+
headline = `Benchmark improved readiness by ${scoreDelta} points without touching the original repo.`;
|
|
157
|
+
} else if (before.score >= 85 && after.score >= before.score && workflowCoverage >= 80) {
|
|
158
|
+
headline = 'Benchmark confirmed the repo already meets the starter-safe baseline without regression.';
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return {
|
|
162
|
+
headline,
|
|
163
|
+
scoreDelta,
|
|
164
|
+
organicDelta,
|
|
165
|
+
decisionGuidance: scoreDelta >= 20
|
|
166
|
+
? 'Strong pilot candidate'
|
|
167
|
+
: scoreDelta >= 10
|
|
168
|
+
? 'Promising but needs manual review'
|
|
169
|
+
: (before.score >= 85 && workflowCoverage >= 80
|
|
170
|
+
? 'Use suggest-only mode, domain packs, or task-level benchmarks next'
|
|
171
|
+
: 'Use suggest-only mode before rollout'),
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
function buildPracticalValue(before, after, applyResult) {
|
|
176
|
+
const written = applyResult.writtenFiles || [];
|
|
177
|
+
return {
|
|
178
|
+
denyRulesAdded: written.includes('.claude/settings.json') ? 'yes' : 'no',
|
|
179
|
+
hooksCreated: written.filter(f => f.includes('hooks/')).length,
|
|
180
|
+
commandsCreated: written.filter(f => f.includes('commands/')).length,
|
|
181
|
+
agentsCreated: written.filter(f => f.includes('agents/')).length,
|
|
182
|
+
skillsCreated: written.filter(f => f.includes('skills/')).length,
|
|
183
|
+
rulesCreated: written.filter(f => f.includes('rules/')).length,
|
|
184
|
+
claudeMdCreated: written.includes('CLAUDE.md') ? 'yes' : 'no',
|
|
185
|
+
totalFilesCreated: written.length,
|
|
186
|
+
totalFilesPreserved: (applyResult.preservedFiles || []).length,
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
function buildCaseStudy(before, after, applyResult) {
|
|
191
|
+
return {
|
|
192
|
+
initialState: `Baseline score ${before.score}/100, organic ${before.organicScore}/100.`,
|
|
193
|
+
chosenMode: 'benchmark-on-isolated-copy',
|
|
194
|
+
whatChanged: applyResult.writtenFiles,
|
|
195
|
+
whatWasPreserved: applyResult.preservedFiles,
|
|
196
|
+
measuredResults: {
|
|
197
|
+
scoreDelta: after.score - before.score,
|
|
198
|
+
organicDelta: after.organicScore - before.organicScore,
|
|
199
|
+
passedDelta: after.passed - before.passed,
|
|
200
|
+
},
|
|
201
|
+
practicalValue: buildPracticalValue(before, after, applyResult),
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
|
|
205
205
|
function renderBenchmarkMarkdown(report) {
|
|
206
206
|
return [
|
|
207
207
|
'# NERVIQ CLI Benchmark Report',
|
|
@@ -232,65 +232,65 @@ function renderBenchmarkMarkdown(report) {
|
|
|
232
232
|
`- Projected score delta: ${report.delta.score}`,
|
|
233
233
|
`- Projected organic score delta: ${report.delta.organicScore}`,
|
|
234
234
|
`- Passed checks delta: ${report.delta.passed}`,
|
|
235
|
-
'',
|
|
236
|
-
'## Executive Summary',
|
|
237
|
-
`- ${report.executiveSummary.headline}`,
|
|
238
|
-
`- Recommendation: ${report.executiveSummary.decisionGuidance}`,
|
|
239
|
-
'',
|
|
240
|
-
'## Workflow Evidence',
|
|
241
|
-
`- Task pack: ${report.workflowEvidence.taskPack}`,
|
|
242
|
-
`- Coverage: ${report.workflowEvidence.summary.passed}/${report.workflowEvidence.summary.total} (${report.workflowEvidence.summary.coverageScore}%)`,
|
|
243
|
-
...report.workflowEvidence.tasks.map(task => `- ${task.label}: ${task.passed ? 'pass' : 'not yet'} — ${task.evidence}`),
|
|
244
|
-
'',
|
|
245
|
-
'## Case Study',
|
|
246
|
-
`- Initial state: ${report.caseStudy.initialState}`,
|
|
247
|
-
`- Chosen mode: ${report.caseStudy.chosenMode}`,
|
|
248
|
-
`- What changed: ${report.caseStudy.whatChanged.join(', ') || 'none'}`,
|
|
249
|
-
`- What was preserved: ${report.caseStudy.whatWasPreserved.join(', ') || 'none'}`,
|
|
250
|
-
'',
|
|
251
|
-
].join('\n');
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
/**
|
|
255
|
-
* Run a before/after benchmark on an isolated copy of the project.
|
|
256
|
-
* @param {Object} options - Benchmark options.
|
|
257
|
-
* @param {string} options.dir - Project directory to benchmark.
|
|
258
|
-
* @param {string} [options.external] - External repo path to benchmark instead of cwd.
|
|
259
|
-
* @param {string} [options.profile] - Permission profile to use during setup.
|
|
260
|
-
* @param {string[]} [options.mcpPacks] - MCP pack keys to include in setup.
|
|
261
|
-
* @returns {Promise<Object>} Benchmark report with before/after scores, delta, and workflow evidence.
|
|
262
|
-
*/
|
|
263
|
-
async function runBenchmark(options) {
|
|
264
|
-
const platform = options.platform || 'claude';
|
|
265
|
-
const sourceDir = options.external || options.dir;
|
|
266
|
-
if (options.external && !fs.existsSync(options.external)) {
|
|
267
|
-
throw new Error(`External repo path not found: ${options.external}`);
|
|
268
|
-
}
|
|
269
|
-
const before = await audit({ dir: sourceDir, silent: true, platform });
|
|
270
|
-
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'nerviq-benchmark-'));
|
|
271
|
-
const sandboxDir = path.join(tempRoot, 'repo');
|
|
272
|
-
|
|
273
|
-
try {
|
|
274
|
-
copyProject(sourceDir, sandboxDir);
|
|
275
|
-
const applyResult = await setup({
|
|
276
|
-
dir: sandboxDir,
|
|
277
|
-
auto: true,
|
|
278
|
-
silent: true,
|
|
279
|
-
profile: options.profile,
|
|
280
|
-
mcpPacks: options.mcpPacks || [],
|
|
281
|
-
platform,
|
|
282
|
-
});
|
|
283
|
-
const after = await audit({ dir: sandboxDir, silent: true, platform });
|
|
284
|
-
const analysisReport = await analyzeProject({ dir: sandboxDir, mode: 'suggest-only', platform });
|
|
285
|
-
const governanceSummary = getGovernanceSummary(platform);
|
|
286
|
-
const workflowEvidence = platform === 'codex'
|
|
287
|
-
? buildCodexWorkflowEvidence(before, after, applyResult, analysisReport, governanceSummary)
|
|
288
|
-
: buildWorkflowEvidence(before, after, analysisReport, governanceSummary);
|
|
289
|
-
|
|
290
|
-
return {
|
|
291
|
-
schemaVersion: 1,
|
|
292
|
-
generatedBy: `nerviq@${version}`,
|
|
293
|
-
createdAt: new Date().toISOString(),
|
|
235
|
+
'',
|
|
236
|
+
'## Executive Summary',
|
|
237
|
+
`- ${report.executiveSummary.headline}`,
|
|
238
|
+
`- Recommendation: ${report.executiveSummary.decisionGuidance}`,
|
|
239
|
+
'',
|
|
240
|
+
'## Workflow Evidence',
|
|
241
|
+
`- Task pack: ${report.workflowEvidence.taskPack}`,
|
|
242
|
+
`- Coverage: ${report.workflowEvidence.summary.passed}/${report.workflowEvidence.summary.total} (${report.workflowEvidence.summary.coverageScore}%)`,
|
|
243
|
+
...report.workflowEvidence.tasks.map(task => `- ${task.label}: ${task.passed ? 'pass' : 'not yet'} — ${task.evidence}`),
|
|
244
|
+
'',
|
|
245
|
+
'## Case Study',
|
|
246
|
+
`- Initial state: ${report.caseStudy.initialState}`,
|
|
247
|
+
`- Chosen mode: ${report.caseStudy.chosenMode}`,
|
|
248
|
+
`- What changed: ${report.caseStudy.whatChanged.join(', ') || 'none'}`,
|
|
249
|
+
`- What was preserved: ${report.caseStudy.whatWasPreserved.join(', ') || 'none'}`,
|
|
250
|
+
'',
|
|
251
|
+
].join('\n');
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Run a before/after benchmark on an isolated copy of the project.
|
|
256
|
+
* @param {Object} options - Benchmark options.
|
|
257
|
+
* @param {string} options.dir - Project directory to benchmark.
|
|
258
|
+
* @param {string} [options.external] - External repo path to benchmark instead of cwd.
|
|
259
|
+
* @param {string} [options.profile] - Permission profile to use during setup.
|
|
260
|
+
* @param {string[]} [options.mcpPacks] - MCP pack keys to include in setup.
|
|
261
|
+
* @returns {Promise<Object>} Benchmark report with before/after scores, delta, and workflow evidence.
|
|
262
|
+
*/
|
|
263
|
+
async function runBenchmark(options) {
|
|
264
|
+
const platform = options.platform || 'claude';
|
|
265
|
+
const sourceDir = options.external || options.dir;
|
|
266
|
+
if (options.external && !fs.existsSync(options.external)) {
|
|
267
|
+
throw new Error(`External repo path not found: ${options.external}`);
|
|
268
|
+
}
|
|
269
|
+
const before = await audit({ dir: sourceDir, silent: true, platform });
|
|
270
|
+
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'nerviq-benchmark-'));
|
|
271
|
+
const sandboxDir = path.join(tempRoot, 'repo');
|
|
272
|
+
|
|
273
|
+
try {
|
|
274
|
+
copyProject(sourceDir, sandboxDir);
|
|
275
|
+
const applyResult = await setup({
|
|
276
|
+
dir: sandboxDir,
|
|
277
|
+
auto: true,
|
|
278
|
+
silent: true,
|
|
279
|
+
profile: options.profile,
|
|
280
|
+
mcpPacks: options.mcpPacks || [],
|
|
281
|
+
platform,
|
|
282
|
+
});
|
|
283
|
+
const after = await audit({ dir: sandboxDir, silent: true, platform });
|
|
284
|
+
const analysisReport = await analyzeProject({ dir: sandboxDir, mode: 'suggest-only', platform });
|
|
285
|
+
const governanceSummary = getGovernanceSummary(platform);
|
|
286
|
+
const workflowEvidence = platform === 'codex'
|
|
287
|
+
? buildCodexWorkflowEvidence(before, after, applyResult, analysisReport, governanceSummary)
|
|
288
|
+
: buildWorkflowEvidence(before, after, analysisReport, governanceSummary);
|
|
289
|
+
|
|
290
|
+
return {
|
|
291
|
+
schemaVersion: 1,
|
|
292
|
+
generatedBy: `nerviq@${version}`,
|
|
293
|
+
createdAt: new Date().toISOString(),
|
|
294
294
|
directory: sourceDir,
|
|
295
295
|
platform,
|
|
296
296
|
scoreSemantics: {
|
|
@@ -299,34 +299,34 @@ async function runBenchmark(options) {
|
|
|
299
299
|
organic: 'repo-owned config quality excluding starter-generated Nerviq assets',
|
|
300
300
|
},
|
|
301
301
|
methodology: [
|
|
302
|
-
'Run a baseline audit on the source repo.',
|
|
303
|
-
'Copy the repo into a temporary isolated workspace.',
|
|
304
|
-
`Apply starter-safe ${platform === 'codex' ? 'Codex' : 'Claude'} artifacts only on the isolated copy.`,
|
|
305
|
-
'Re-run the audit and compare the results.',
|
|
306
|
-
],
|
|
307
|
-
before: summarizeAudit(before),
|
|
308
|
-
after: summarizeAudit(after),
|
|
309
|
-
delta: {
|
|
310
|
-
score: after.score - before.score,
|
|
311
|
-
organicScore: after.organicScore - before.organicScore,
|
|
312
|
-
passed: after.passed - before.passed,
|
|
313
|
-
failed: after.failed - before.failed,
|
|
314
|
-
},
|
|
315
|
-
workflowEvidence,
|
|
316
|
-
executiveSummary: buildExecutiveSummary(before, after, workflowEvidence),
|
|
317
|
-
caseStudy: buildCaseStudy(before, after, applyResult),
|
|
318
|
-
};
|
|
319
|
-
} finally {
|
|
320
|
-
fs.rmSync(tempRoot, { recursive: true, force: true });
|
|
321
|
-
}
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
function printBenchmark(report, options = {}) {
|
|
325
|
-
if (options.json) {
|
|
326
|
-
console.log(JSON.stringify(report, null, 2));
|
|
327
|
-
return;
|
|
328
|
-
}
|
|
329
|
-
|
|
302
|
+
'Run a baseline audit on the source repo.',
|
|
303
|
+
'Copy the repo into a temporary isolated workspace.',
|
|
304
|
+
`Apply starter-safe ${platform === 'codex' ? 'Codex' : 'Claude'} artifacts only on the isolated copy.`,
|
|
305
|
+
'Re-run the audit and compare the results.',
|
|
306
|
+
],
|
|
307
|
+
before: summarizeAudit(before),
|
|
308
|
+
after: summarizeAudit(after),
|
|
309
|
+
delta: {
|
|
310
|
+
score: after.score - before.score,
|
|
311
|
+
organicScore: after.organicScore - before.organicScore,
|
|
312
|
+
passed: after.passed - before.passed,
|
|
313
|
+
failed: after.failed - before.failed,
|
|
314
|
+
},
|
|
315
|
+
workflowEvidence,
|
|
316
|
+
executiveSummary: buildExecutiveSummary(before, after, workflowEvidence),
|
|
317
|
+
caseStudy: buildCaseStudy(before, after, applyResult),
|
|
318
|
+
};
|
|
319
|
+
} finally {
|
|
320
|
+
fs.rmSync(tempRoot, { recursive: true, force: true });
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
function printBenchmark(report, options = {}) {
|
|
325
|
+
if (options.json) {
|
|
326
|
+
console.log(JSON.stringify(report, null, 2));
|
|
327
|
+
return;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
330
|
console.log('');
|
|
331
331
|
console.log(' nerviq benchmark');
|
|
332
332
|
console.log(' ═══════════════════════════════════════');
|
|
@@ -350,17 +350,17 @@ function printBenchmark(report, options = {}) {
|
|
|
350
350
|
}
|
|
351
351
|
console.log('');
|
|
352
352
|
}
|
|
353
|
-
|
|
354
|
-
function writeBenchmarkReport(report, outFile) {
|
|
355
|
-
fs.mkdirSync(path.dirname(outFile), { recursive: true });
|
|
356
|
-
const content = path.extname(outFile).toLowerCase() === '.md'
|
|
357
|
-
? renderBenchmarkMarkdown(report)
|
|
358
|
-
: JSON.stringify(report, null, 2);
|
|
359
|
-
fs.writeFileSync(outFile, content, 'utf8');
|
|
360
|
-
}
|
|
361
|
-
|
|
362
|
-
module.exports = {
|
|
363
|
-
runBenchmark,
|
|
364
|
-
printBenchmark,
|
|
365
|
-
writeBenchmarkReport,
|
|
366
|
-
};
|
|
353
|
+
|
|
354
|
+
function writeBenchmarkReport(report, outFile) {
|
|
355
|
+
fs.mkdirSync(path.dirname(outFile), { recursive: true });
|
|
356
|
+
const content = path.extname(outFile).toLowerCase() === '.md'
|
|
357
|
+
? renderBenchmarkMarkdown(report)
|
|
358
|
+
: JSON.stringify(report, null, 2);
|
|
359
|
+
fs.writeFileSync(outFile, content, 'utf8');
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
module.exports = {
|
|
363
|
+
runBenchmark,
|
|
364
|
+
printBenchmark,
|
|
365
|
+
writeBenchmarkReport,
|
|
366
|
+
};
|