claudex-setup 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # Changelog
2
2
 
3
+ ## [1.8.0] - 2026-03-31
4
+
5
+ ### Added
6
+ - domain pack recommendations for backend, frontend, data, infra, OSS, and enterprise-governed repos
7
+ - MCP pack recommendations and merge support for `context7-docs` and `next-devtools`
8
+ - workflow-evidence coverage in benchmark reports
9
+ - runtime settings overlays so `apply --plan` still respects current `--profile` and `--mcp-pack` flags
10
+
11
+ ### Changed
12
+ - benchmark now respects the selected profile and MCP pack options during isolated-copy runs
13
+ - governance and suggest-only outputs now expose domain packs and MCP packs directly
14
+ - README and docs clarify the local-vs-opt-in-network boundary for core flows vs `deep-review`
15
+ - audit output now frames `setup` as starter-safe generation instead of an automatic full fix
16
+
3
17
  ## [1.7.0] - 2026-03-31
4
18
 
5
19
  ### Added
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 CLAUDEX Project
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md CHANGED
@@ -10,9 +10,10 @@
10
10
 
11
11
  ```bash
12
12
  npx claudex-setup # Audit your project (10 seconds)
13
- npx claudex-setup setup # Auto-fix everything
13
+ npx claudex-setup setup # Create a starter-safe baseline
14
14
  npx claudex-setup augment # Repo-aware plan, no writes
15
15
  npx claudex-setup plan # Export proposal bundles with file previews
16
+ npx claudex-setup governance # See permission profiles, packs, and pilot guidance
16
17
  npx claudex-setup benchmark # Measure before/after in an isolated temp copy
17
18
  npx claudex-setup --threshold 60 # Fail CI if score is below 60
18
19
  ```
@@ -48,15 +49,15 @@ No install. No config. No dependencies.
48
49
  design: none (0/2)
49
50
  devops: none (0/4)
50
51
 
51
- 29/63 checks passing
52
- Run npx claudex-setup setup to fix
52
+ 29/62 checks passing
53
+ Run npx claudex-setup setup to create a starter-safe baseline
53
54
  ```
54
55
 
55
56
  ## All Commands
56
57
 
57
58
  | Command | What it does |
58
59
  |---------|-------------|
59
- | `npx claudex-setup` | **Discover** - Score 0-100 against 63 checks |
60
+ | `npx claudex-setup` | **Discover** - Score 0-100 against 62 checks |
60
61
  | `npx claudex-setup discover` | **Discover** - Alias for audit mode |
61
62
  | `npx claudex-setup setup` | **Starter** - Smart CLAUDE.md + hooks + commands + agents |
62
63
  | `npx claudex-setup starter` | **Starter** - Alias for setup mode |
@@ -81,6 +82,8 @@ No install. No config. No dependencies.
81
82
  | `--out FILE` | Write JSON or markdown output to a file |
82
83
  | `--plan FILE` | Load a previously exported plan file |
83
84
  | `--only A,B` | Limit plan/apply to selected proposal ids |
85
+ | `--profile NAME` | Choose a permission profile for write-capable flows |
86
+ | `--mcp-pack A,B` | Merge named MCP packs into generated or patched settings |
84
87
  | `--dry-run` | Preview apply without writing files |
85
88
  | `--verbose` | Show all recommendations (not just critical/high) |
86
89
  | `--json` | Machine-readable JSON output (for CI) |
@@ -121,7 +124,7 @@ Each proposal bundle includes:
121
124
 
122
125
  - trigger reasons tied to failed checks
123
126
  - file previews and diff-style output
124
- - `create` vs `manual-review` classification
127
+ - `create`, `patch`, or `manual-review` classification
125
128
  - risk/confidence labels
126
129
 
127
130
  Apply only the bundles you want:
@@ -130,7 +133,7 @@ Apply only the bundles you want:
130
133
  npx claudex-setup apply --plan claudex-plan.json --only claude-md,hooks
131
134
  ```
132
135
 
133
- `apply` creates rollback manifests and activity artifacts under `.claude/claudex-setup/`, so every applied batch has a paper trail and a delete-based rollback path.
136
+ `apply` creates rollback manifests and activity artifacts under `.claude/claudex-setup/`, so every applied batch has a paper trail and a create-or-patch rollback path.
134
137
 
135
138
  ## Governance And Pilot Readiness
136
139
 
@@ -145,8 +148,24 @@ It exposes:
145
148
  - permission profiles: `read-only`, `suggest-only`, `safe-write`, `power-user`, `internal-research`
146
149
  - hook registry with trigger point, purpose, side effects, risk, and rollback path
147
150
  - policy packs for baseline engineering, security-sensitive repos, OSS, and regulated-lite teams
151
+ - domain packs for backend, frontend, data, infra, OSS, and enterprise-governed repos
152
+ - MCP packs for live docs and framework-aware tooling such as Context7 and Next.js devtools
148
153
  - a pilot rollout kit with scope, approvals, success metrics, and rollback expectations
149
154
 
155
+ ## Domain Packs And MCP Packs
156
+
157
+ `augment` and `suggest-only` now recommend repo-shaped guidance instead of giving every project the same advice.
158
+
159
+ - domain packs identify the repo shape: `backend-api`, `frontend-ui`, `data-pipeline`, `infra-platform`, `oss-library`, `enterprise-governed`
160
+ - MCP packs recommend current-tooling companions: `context7-docs` for live docs, `next-devtools` for Next.js repos
161
+ - write-capable flows can merge MCP packs directly into `.claude/settings.json`
162
+
163
+ ```bash
164
+ npx claudex-setup suggest-only --json
165
+ npx claudex-setup setup --mcp-pack context7-docs
166
+ npx claudex-setup apply --plan claudex-plan.json --only hooks --mcp-pack context7-docs,next-devtools
167
+ ```
168
+
150
169
  ## Benchmark And Evidence
151
170
 
152
171
  Use `benchmark` to measure the impact of starter-safe improvements without modifying your working repo:
@@ -160,9 +179,11 @@ Benchmark mode:
160
179
  - runs a baseline audit on your repo
161
180
  - copies the repo to an isolated temp workspace
162
181
  - applies starter-safe artifacts only in the copy
163
- - reruns the audit and emits before/after deltas, a case-study summary, and an executive recommendation
182
+ - reruns the audit and emits before/after deltas, workflow-evidence coverage, a case-study summary, and an executive recommendation
183
+
184
+ ## 62 Checks Across 14 Categories
164
185
 
165
- ## 63 Checks Across 14 Categories
186
+ The exact applicable count can be lower on a given repo because stack-specific checks are skipped when they do not apply.
166
187
 
167
188
  | Category | Checks | Key items |
168
189
  |----------|-------:|-----------|
@@ -206,7 +227,7 @@ jobs:
206
227
  runs-on: ubuntu-latest
207
228
  steps:
208
229
  - uses: actions/checkout@v4
209
- - uses: DnaFin/claudex-setup@main
230
+ - uses: DnaFin/claudex-setup@v1.8.0
210
231
  with:
211
232
  threshold: 50
212
233
  ```
@@ -230,7 +251,7 @@ Already have a solid CLAUDE.md and hooks? Two things for you:
230
251
  npx claudex-setup deep-review
231
252
  ```
232
253
 
233
- Claude reads your actual config and gives specific feedback: what's strong, what has issues, what's missing for your stack. Not pattern matching real analysis. Your config goes to Anthropic API only, we never see it.
254
+ Claude reads your actual config and gives specific feedback: what's strong, what has issues, what's missing for your stack. This is an AI-assisted review, not a local heuristic audit. Your config goes to the Anthropic API only when you run this command; we do not receive it.
234
255
 
235
256
  ### Quality-Deep Checks
236
257
 
@@ -252,8 +273,9 @@ These checks evaluate **quality**, not just existence. A well-configured project
252
273
 
253
274
  ## Privacy
254
275
 
255
- - **Zero dependencies** - nothing to audit
256
- - **Runs 100% locally** - no cloud processing
276
+ - **Zero dependencies** - nothing extra to audit
277
+ - **Core flows run locally** - audit, setup, augment, plan, apply, governance, and benchmark run on your machine
278
+ - **Deep review is opt-in** - only `deep-review` sends selected config to Anthropic for analysis
257
279
  - **Benchmark uses an isolated temp copy** - your live repo is not touched
258
280
  - **Anonymous insights** - opt-in, no PII, no file contents (enable with `--insights`)
259
281
  - **MIT Licensed** - use anywhere
package/bin/cli.js CHANGED
@@ -4,7 +4,7 @@ const { audit } = require('../src/audit');
4
4
  const { setup } = require('../src/setup');
5
5
  const { analyzeProject, printAnalysis } = require('../src/analyze');
6
6
  const { buildProposalBundle, printProposalBundle, writePlanFile, applyProposalBundle, printApplyResult } = require('../src/plans');
7
- const { getGovernanceSummary, printGovernanceSummary } = require('../src/governance');
7
+ const { getGovernanceSummary, printGovernanceSummary, ensureWritableProfile } = require('../src/governance');
8
8
  const { runBenchmark, printBenchmark, writeBenchmarkReport } = require('../src/benchmark');
9
9
  const { version } = require('../package.json');
10
10
 
@@ -58,12 +58,14 @@ function parseArgs(rawArgs) {
58
58
  let out = null;
59
59
  let planFile = null;
60
60
  let only = [];
61
+ let profile = 'safe-write';
62
+ let mcpPacks = [];
61
63
  let commandSet = false;
62
64
 
63
65
  for (let i = 0; i < rawArgs.length; i++) {
64
66
  const arg = rawArgs[i];
65
67
 
66
- if (arg === '--threshold' || arg === '--out' || arg === '--plan' || arg === '--only') {
68
+ if (arg === '--threshold' || arg === '--out' || arg === '--plan' || arg === '--only' || arg === '--profile' || arg === '--mcp-pack') {
67
69
  const value = rawArgs[i + 1];
68
70
  if (!value || value.startsWith('--')) {
69
71
  throw new Error(`${arg} requires a value`);
@@ -72,6 +74,8 @@ function parseArgs(rawArgs) {
72
74
  if (arg === '--out') out = value;
73
75
  if (arg === '--plan') planFile = value;
74
76
  if (arg === '--only') only = value.split(',').map(item => item.trim()).filter(Boolean);
77
+ if (arg === '--profile') profile = value.trim();
78
+ if (arg === '--mcp-pack') mcpPacks = value.split(',').map(item => item.trim()).filter(Boolean);
75
79
  i++;
76
80
  continue;
77
81
  }
@@ -96,6 +100,16 @@ function parseArgs(rawArgs) {
96
100
  continue;
97
101
  }
98
102
 
103
+ if (arg.startsWith('--profile=')) {
104
+ profile = arg.split('=').slice(1).join('=').trim();
105
+ continue;
106
+ }
107
+
108
+ if (arg.startsWith('--mcp-pack=')) {
109
+ mcpPacks = arg.split('=').slice(1).join('=').split(',').map(item => item.trim()).filter(Boolean);
110
+ continue;
111
+ }
112
+
99
113
  if (arg.startsWith('--')) {
100
114
  flags.push(arg);
101
115
  continue;
@@ -109,13 +123,13 @@ function parseArgs(rawArgs) {
109
123
 
110
124
  const normalizedCommand = COMMAND_ALIASES[command] || command;
111
125
 
112
- return { flags, command, normalizedCommand, threshold, out, planFile, only };
126
+ return { flags, command, normalizedCommand, threshold, out, planFile, only, profile, mcpPacks };
113
127
  }
114
128
 
115
129
  const HELP = `
116
130
  claudex-setup v${version}
117
131
  Audit and optimize any project for Claude Code.
118
- Backed by research from 1,107 cataloged Claude Code entries.
132
+ Backed by CLAUDEX research and evidence.
119
133
 
120
134
  Usage:
121
135
  npx claudex-setup Run audit on current directory
@@ -140,6 +154,8 @@ const HELP = `
140
154
  --out FILE Write JSON or markdown output to a file
141
155
  --plan FILE Load a previously exported plan file
142
156
  --only A,B Limit plan/apply to selected proposal ids or technique keys
157
+ --profile NAME Choose permission profile (read-only, suggest-only, safe-write, power-user, internal-research)
158
+ --mcp-pack A,B Merge named MCP packs into generated settings (e.g. context7-docs,next-devtools)
143
159
  --dry-run Preview apply without writing files
144
160
  --verbose Show all recommendations (not just critical/high)
145
161
  --json Output as JSON (for CI pipelines)
@@ -153,7 +169,11 @@ const HELP = `
153
169
  npx claudex-setup augment
154
170
  npx claudex-setup suggest-only --json
155
171
  npx claudex-setup plan --out claudex-plan.json
172
+ npx claudex-setup plan --profile safe-write
173
+ npx claudex-setup setup --mcp-pack context7-docs
156
174
  npx claudex-setup apply --plan claudex-plan.json --only hooks,commands
175
+ npx claudex-setup apply --mcp-pack context7-docs,next-devtools --only hooks
176
+ npx claudex-setup apply --profile power-user --only claude-md,hooks
157
177
  npx claudex-setup governance --json
158
178
  npx claudex-setup benchmark --out benchmark.md
159
179
  npx claudex-setup --json --threshold 60
@@ -195,6 +215,8 @@ async function main() {
195
215
  out: parsed.out,
196
216
  planFile: parsed.planFile,
197
217
  only: parsed.only,
218
+ profile: parsed.profile,
219
+ mcpPacks: parsed.mcpPacks,
198
220
  dir: process.cwd()
199
221
  };
200
222
 
@@ -219,6 +241,15 @@ async function main() {
219
241
  process.exit(1);
220
242
  }
221
243
 
244
+ if (['setup', 'apply', 'benchmark'].includes(normalizedCommand)) {
245
+ try {
246
+ ensureWritableProfile(options.profile, normalizedCommand, options.dryRun);
247
+ } catch (err) {
248
+ console.error(`\n Error: ${err.message}\n`);
249
+ process.exit(1);
250
+ }
251
+ }
252
+
222
253
  try {
223
254
  if (normalizedCommand === 'badge') {
224
255
  const { getBadgeMarkdown } = require('../src/badge');
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "claudex-setup",
3
- "version": "1.7.0",
4
- "description": "Audit and optimize any project for Claude Code. Powered by 1107 verified techniques.",
3
+ "version": "1.8.0",
4
+ "description": "Audit and improve Claude Code readiness with discover, plan, apply, governance, and benchmark workflows.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
7
7
  "claudex-setup": "bin/cli.js"
@@ -14,6 +14,7 @@
14
14
  ],
15
15
  "scripts": {
16
16
  "start": "node bin/cli.js",
17
+ "build": "npm pack --dry-run",
17
18
  "test": "node test/run.js"
18
19
  },
19
20
  "keywords": [
package/src/analyze.js CHANGED
@@ -7,6 +7,8 @@ const path = require('path');
7
7
  const { audit } = require('./audit');
8
8
  const { ProjectContext } = require('./context');
9
9
  const { STACKS } = require('./techniques');
10
+ const { detectDomainPacks } = require('./domain-packs');
11
+ const { recommendMcpPacks } = require('./mcp-packs');
10
12
 
11
13
  const COLORS = {
12
14
  reset: '\x1b[0m',
@@ -275,6 +277,8 @@ async function analyzeProject(options) {
275
277
  const metadata = detectProjectMetadata(ctx);
276
278
  const maturity = detectMaturity(assets);
277
279
  const mainDirs = detectMainDirs(ctx);
280
+ const recommendedDomainPacks = detectDomainPacks(ctx, stacks, assets);
281
+ const recommendedMcpPacks = recommendMcpPacks(stacks, recommendedDomainPacks);
278
282
 
279
283
  const report = {
280
284
  mode,
@@ -284,6 +288,7 @@ async function analyzeProject(options) {
284
288
  description: metadata.description,
285
289
  directory: options.dir,
286
290
  stacks: stacks.map(s => s.label),
291
+ domains: recommendedDomainPacks.map(pack => pack.label),
287
292
  maturity,
288
293
  score: auditResult.score,
289
294
  organicScore: auditResult.organicScore,
@@ -308,6 +313,8 @@ async function analyzeProject(options) {
308
313
  gapsIdentified: toGaps(auditResult.results),
309
314
  topNextActions: auditResult.quickWins,
310
315
  recommendedImprovements: toRecommendations(auditResult),
316
+ recommendedDomainPacks,
317
+ recommendedMcpPacks,
311
318
  riskNotes: buildRiskNotes(auditResult, assets, maturity),
312
319
  optionalModules: buildOptionalModules(stacks, assets),
313
320
  };
@@ -332,6 +339,7 @@ function printAnalysis(report, options = {}) {
332
339
  console.log(c(' Project Summary', 'blue'));
333
340
  console.log(` ${report.projectSummary.name}${report.projectSummary.description ? ` — ${report.projectSummary.description}` : ''}`);
334
341
  console.log(c(` Stack: ${report.projectSummary.stacks.join(', ') || 'Unknown'}`, 'dim'));
342
+ console.log(c(` Domain packs: ${report.projectSummary.domains.join(', ') || 'Baseline General'}`, 'dim'));
335
343
  console.log(c(` Maturity: ${report.projectSummary.maturity} | Score: ${report.projectSummary.score}/100 | Organic: ${report.projectSummary.organicScore}/100`, 'dim'));
336
344
  console.log('');
337
345
 
@@ -371,6 +379,24 @@ function printAnalysis(report, options = {}) {
371
379
  console.log('');
372
380
  }
373
381
 
382
+ if (report.recommendedDomainPacks.length > 0) {
383
+ console.log(c(' Recommended Domain Packs', 'blue'));
384
+ for (const pack of report.recommendedDomainPacks) {
385
+ console.log(` - ${pack.label}`);
386
+ console.log(c(` ${pack.useWhen}`, 'dim'));
387
+ }
388
+ console.log('');
389
+ }
390
+
391
+ if (report.recommendedMcpPacks.length > 0) {
392
+ console.log(c(' Recommended MCP Packs', 'blue'));
393
+ for (const pack of report.recommendedMcpPacks) {
394
+ console.log(` - ${pack.label}`);
395
+ console.log(c(` ${pack.adoption}`, 'dim'));
396
+ }
397
+ console.log('');
398
+ }
399
+
374
400
  if (report.riskNotes.length > 0) {
375
401
  console.log(c(' Risk Notes', 'red'));
376
402
  for (const note of report.riskNotes) {
package/src/audit.js CHANGED
@@ -194,7 +194,7 @@ async function audit(options) {
194
194
  console.log(` ${colorize(`${passed.length}/${applicable.length}`, 'bold')} checks passing${skipped.length > 0 ? colorize(` (${skipped.length} not applicable)`, 'dim') : ''}`);
195
195
 
196
196
  if (failed.length > 0) {
197
- console.log(` Run ${colorize('npx claudex-setup setup', 'bold')} to fix automatically`);
197
+ console.log(` Run ${colorize('npx claudex-setup setup', 'bold')} to create starter-safe defaults`);
198
198
  }
199
199
 
200
200
  console.log('');
@@ -212,7 +212,7 @@ async function audit(options) {
212
212
  console.log('');
213
213
  }
214
214
 
215
- console.log(colorize(' Powered by CLAUDEX - 1,107 verified Claude Code techniques', 'dim'));
215
+ console.log(colorize(' Backed by CLAUDEX research and evidence', 'dim'));
216
216
  console.log(colorize(' https://github.com/DnaFin/claudex-setup', 'dim'));
217
217
  console.log('');
218
218
 
package/src/benchmark.js CHANGED
@@ -5,6 +5,8 @@ const path = require('path');
5
5
  const { version } = require('../package.json');
6
6
  const { audit } = require('./audit');
7
7
  const { setup } = require('./setup');
8
+ const { analyzeProject } = require('./analyze');
9
+ const { getGovernanceSummary } = require('./governance');
8
10
 
9
11
  function copyProject(sourceDir, targetDir) {
10
12
  fs.mkdirSync(targetDir, { recursive: true });
@@ -34,20 +36,76 @@ function summarizeAudit(result) {
34
36
  };
35
37
  }
36
38
 
37
- function buildExecutiveSummary(before, after) {
39
+ function buildWorkflowEvidence(before, after, analysisReport, governanceSummary) {
40
+ const tasks = [
41
+ {
42
+ key: 'discover-without-writes',
43
+ label: 'Discover next actions without writing files',
44
+ passed: before.checkCount > 0 && Array.isArray(before.quickWins),
45
+ evidence: `Baseline audit returned ${before.checkCount} applicable checks and ${before.quickWins.length} quick wins.`,
46
+ },
47
+ {
48
+ key: 'starter-safe-improvement',
49
+ label: 'Apply starter-safe improvements in isolation',
50
+ passed: after.score >= before.score && after.failed <= before.failed,
51
+ evidence: `Score moved ${before.score} -> ${after.score}; failed checks moved ${before.failed} -> ${after.failed}.`,
52
+ },
53
+ {
54
+ key: 'governed-rollout-surface',
55
+ label: 'Expose governed rollout controls',
56
+ passed: governanceSummary.permissionProfiles.length >= 3 && governanceSummary.hookRegistry.length >= 1,
57
+ evidence: `${governanceSummary.permissionProfiles.length} profiles and ${governanceSummary.hookRegistry.length} governed hooks available.`,
58
+ },
59
+ {
60
+ key: 'domain-pack-guidance',
61
+ label: 'Recommend a domain pack for the repo',
62
+ passed: analysisReport.recommendedDomainPacks.length > 0,
63
+ evidence: analysisReport.recommendedDomainPacks.map(pack => pack.label).join(', ') || 'No domain pack recommendation generated.',
64
+ },
65
+ {
66
+ key: 'mcp-pack-guidance',
67
+ label: 'Recommend MCP packs when appropriate',
68
+ passed: analysisReport.recommendedMcpPacks.length > 0,
69
+ evidence: analysisReport.recommendedMcpPacks.map(pack => pack.label).join(', ') || 'No MCP pack recommendation generated.',
70
+ },
71
+ ];
72
+
73
+ const passed = tasks.filter(task => task.passed).length;
74
+ const total = tasks.length;
75
+ return {
76
+ taskPack: 'maintainer-core',
77
+ tasks,
78
+ summary: {
79
+ passed,
80
+ total,
81
+ coverageScore: total > 0 ? Math.round((passed / total) * 100) : 0,
82
+ },
83
+ };
84
+ }
85
+
86
+ function buildExecutiveSummary(before, after, workflowEvidence) {
38
87
  const scoreDelta = after.score - before.score;
39
88
  const organicDelta = after.organicScore - before.organicScore;
89
+ const workflowCoverage = workflowEvidence.summary.coverageScore;
90
+ let headline = 'Benchmark did not improve the score in this run.';
91
+
92
+ if (scoreDelta > 0) {
93
+ headline = `Benchmark improved readiness by ${scoreDelta} points without touching the original repo.`;
94
+ } else if (before.score >= 85 && after.score >= before.score && workflowCoverage >= 80) {
95
+ headline = 'Benchmark confirmed the repo already meets the starter-safe baseline without regression.';
96
+ }
97
+
40
98
  return {
41
- headline: scoreDelta > 0
42
- ? `Benchmark improved readiness by ${scoreDelta} points without touching the original repo.`
43
- : 'Benchmark did not improve the score in this run.',
99
+ headline,
44
100
  scoreDelta,
45
101
  organicDelta,
46
102
  decisionGuidance: scoreDelta >= 20
47
103
  ? 'Strong pilot candidate'
48
104
  : scoreDelta >= 10
49
105
  ? 'Promising but needs manual review'
50
- : 'Use suggest-only mode before rollout',
106
+ : (before.score >= 85 && workflowCoverage >= 80
107
+ ? 'Use suggest-only mode, domain packs, or task-level benchmarks next'
108
+ : 'Use suggest-only mode before rollout'),
51
109
  };
52
110
  }
53
111
 
@@ -95,6 +153,11 @@ function renderBenchmarkMarkdown(report) {
95
153
  `- ${report.executiveSummary.headline}`,
96
154
  `- Recommendation: ${report.executiveSummary.decisionGuidance}`,
97
155
  '',
156
+ '## Workflow Evidence',
157
+ `- Task pack: ${report.workflowEvidence.taskPack}`,
158
+ `- Coverage: ${report.workflowEvidence.summary.passed}/${report.workflowEvidence.summary.total} (${report.workflowEvidence.summary.coverageScore}%)`,
159
+ ...report.workflowEvidence.tasks.map(task => `- ${task.label}: ${task.passed ? 'pass' : 'not yet'} — ${task.evidence}`),
160
+ '',
98
161
  '## Case Study',
99
162
  `- Initial state: ${report.caseStudy.initialState}`,
100
163
  `- Chosen mode: ${report.caseStudy.chosenMode}`,
@@ -111,8 +174,17 @@ async function runBenchmark(options) {
111
174
 
112
175
  try {
113
176
  copyProject(options.dir, sandboxDir);
114
- const applyResult = await setup({ dir: sandboxDir, auto: true, silent: true });
177
+ const applyResult = await setup({
178
+ dir: sandboxDir,
179
+ auto: true,
180
+ silent: true,
181
+ profile: options.profile,
182
+ mcpPacks: options.mcpPacks || [],
183
+ });
115
184
  const after = await audit({ dir: sandboxDir, silent: true });
185
+ const analysisReport = await analyzeProject({ dir: sandboxDir, mode: 'suggest-only' });
186
+ const governanceSummary = getGovernanceSummary();
187
+ const workflowEvidence = buildWorkflowEvidence(before, after, analysisReport, governanceSummary);
116
188
 
117
189
  return {
118
190
  schemaVersion: 1,
@@ -133,7 +205,8 @@ async function runBenchmark(options) {
133
205
  passed: after.passed - before.passed,
134
206
  failed: after.failed - before.failed,
135
207
  },
136
- executiveSummary: buildExecutiveSummary(before, after),
208
+ workflowEvidence,
209
+ executiveSummary: buildExecutiveSummary(before, after, workflowEvidence),
137
210
  caseStudy: buildCaseStudy(before, after, applyResult),
138
211
  };
139
212
  } finally {
@@ -158,6 +231,7 @@ function printBenchmark(report, options = {}) {
158
231
  console.log('');
159
232
  console.log(` ${report.executiveSummary.headline}`);
160
233
  console.log(` Recommendation: ${report.executiveSummary.decisionGuidance}`);
234
+ console.log(` Workflow evidence: ${report.workflowEvidence.summary.passed}/${report.workflowEvidence.summary.total} tasks (${report.workflowEvidence.summary.coverageScore}%)`);
161
235
  console.log('');
162
236
  }
163
237
 
package/src/context.js CHANGED
@@ -17,11 +17,12 @@ class ProjectContext {
17
17
  try {
18
18
  const entries = fs.readdirSync(this.dir, { withFileTypes: true });
19
19
  for (const entry of entries) {
20
- if (entry.name.startsWith('.') && entry.name !== '.claude' && entry.name !== '.gitignore') continue;
21
- if (entry.name === 'node_modules' || entry.name === '__pycache__') continue;
22
20
  if (entry.isFile()) {
21
+ if (entry.name === '.DS_Store') continue;
23
22
  this.files.push(entry.name);
24
23
  } else if (entry.isDirectory()) {
24
+ if (entry.name.startsWith('.') && entry.name !== '.claude') continue;
25
+ if (entry.name === 'node_modules' || entry.name === '__pycache__') continue;
25
26
  this.files.push(entry.name + '/');
26
27
  // Scan .claude/ subdirectories
27
28
  if (entry.name === '.claude') {