claude-launchpad 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +118 -106
- package/dist/cli.js +93 -36
- package/dist/cli.js.map +1 -1
- package/package.json +2 -2
- package/scenarios/{common → conventions}/error-handling.yaml +3 -3
- package/scenarios/{common → conventions}/file-size.yaml +3 -3
- package/scenarios/{common → conventions}/immutability.yaml +3 -3
- package/scenarios/{common → conventions}/naming-conventions.yaml +5 -5
- package/scenarios/{common → conventions}/no-hardcoded-values.yaml +3 -3
- package/scenarios/{common → security}/env-protection.yaml +3 -3
- package/scenarios/{common → security}/input-validation.yaml +3 -3
- package/scenarios/{common → security}/secret-exposure.yaml +2 -2
- package/scenarios/{common → security}/sql-injection.yaml +2 -2
- package/scenarios/{common → workflow}/git-conventions.yaml +3 -3
- package/scenarios/{common → workflow}/session-continuity.yaml +3 -3
package/README.md
CHANGED
|
@@ -1,32 +1,62 @@
|
|
|
1
1
|
# Claude Launchpad
|
|
2
2
|
|
|
3
|
-
**
|
|
3
|
+
**A linter for your Claude Code configuration.** Scores your setup, auto-fixes issues, and tests if Claude actually follows your rules.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
Claude Launchpad is the first CLI that **diagnoses, scaffolds, enhances, and tests** Claude Code configurations. Think ESLint for your AI setup.
|
|
5
|
+
You write a `CLAUDE.md`, add some hooks, configure settings — but is any of it actually working? Claude Launchpad scans your config, gives you a score out of 100, fixes what's broken, and runs Claude against test scenarios to prove it.
|
|
8
6
|
|
|
9
7
|
```bash
|
|
10
8
|
npx claude-launchpad
|
|
11
9
|
```
|
|
12
10
|
|
|
13
|
-
|
|
11
|
+
That's it. One command. You get a score. You see what's wrong. You fix it.
|
|
12
|
+
|
|
13
|
+
## What It Does
|
|
14
|
+
|
|
15
|
+
| Command | What it does | Cost |
|
|
16
|
+
|---|---|---|
|
|
17
|
+
| `claude-launchpad` | Scans your config, scores it 0-100, lists issues | Free |
|
|
18
|
+
| `claude-launchpad doctor --fix` | Auto-fixes issues (adds hooks, rules, missing sections) | Free |
|
|
19
|
+
| `claude-launchpad doctor --watch` | Live score that updates when you edit config files | Free |
|
|
20
|
+
| `claude-launchpad init` | Detects your stack, generates config from scratch | Free |
|
|
21
|
+
| `claude-launchpad enhance` | Opens Claude to read your code and complete CLAUDE.md | Uses Claude |
|
|
22
|
+
| `claude-launchpad eval --suite security` | Runs Claude against test scenarios, proves your config works | Uses Claude |
|
|
23
|
+
|
|
24
|
+
## Quick Start
|
|
14
25
|
|
|
15
26
|
```bash
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
27
|
+
# Install
|
|
28
|
+
npm i -g claude-launchpad
|
|
29
|
+
|
|
30
|
+
# Go to any project with Claude Code
|
|
31
|
+
cd your-project
|
|
32
|
+
|
|
33
|
+
# See your score
|
|
34
|
+
claude-launchpad
|
|
35
|
+
|
|
36
|
+
# Fix everything it found
|
|
37
|
+
claude-launchpad doctor --fix
|
|
38
|
+
|
|
39
|
+
# See your new score
|
|
40
|
+
claude-launchpad
|
|
21
41
|
```
|
|
22
42
|
|
|
23
|
-
|
|
43
|
+
That takes you from ~42% to ~86% with zero manual work.
|
|
24
44
|
|
|
25
|
-
##
|
|
45
|
+
## The Doctor
|
|
26
46
|
|
|
27
|
-
|
|
47
|
+
The core of the tool. Runs 7 analyzers against your `.claude/` directory and `CLAUDE.md`:
|
|
28
48
|
|
|
29
|
-
|
|
49
|
+
| Analyzer | What it catches |
|
|
50
|
+
|---|---|
|
|
51
|
+
| **Instruction Budget** | Too many instructions in CLAUDE.md — Claude starts ignoring rules past ~150 |
|
|
52
|
+
| **CLAUDE.md Quality** | Missing sections, vague instructions ("write good code"), hardcoded secrets |
|
|
53
|
+
| **Settings** | No hooks configured, dangerous tool access without safety nets |
|
|
54
|
+
| **Hooks** | Missing auto-format on save, no .env file protection, no security gates |
|
|
55
|
+
| **Rules** | Dead rule files, stale references, empty configs |
|
|
56
|
+
| **Permissions** | Bash auto-allowed without security hooks, no force-push protection |
|
|
57
|
+
| **MCP Servers** | Invalid transport configs, missing commands/URLs |
|
|
58
|
+
|
|
59
|
+
Output looks like this:
|
|
30
60
|
|
|
31
61
|
```
|
|
32
62
|
Instruction Budget ━━━━━━━━━━━━━━━━━━━━ 100%
|
|
@@ -42,40 +72,25 @@ Runs 7 static analyzers against your `.claude/` directory and `CLAUDE.md`. No AP
|
|
|
42
72
|
✓ No issues found. Your configuration looks solid.
|
|
43
73
|
```
|
|
44
74
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
**Flags:**
|
|
48
|
-
- `--fix` — Auto-apply deterministic fixes (42% → 86% in one command)
|
|
49
|
-
- `--watch` — Live score that updates every time you save a config file
|
|
50
|
-
- `--json` — JSON output for programmatic use
|
|
51
|
-
- `--min-score <n>` — Exit non-zero if score drops below threshold (for CI)
|
|
52
|
-
|
|
53
|
-
**What it checks:**
|
|
75
|
+
**All doctor flags:**
|
|
54
76
|
|
|
55
|
-
|
|
|
77
|
+
| Flag | What it does |
|
|
56
78
|
|---|---|
|
|
57
|
-
|
|
|
58
|
-
|
|
|
59
|
-
|
|
|
60
|
-
|
|
|
61
|
-
|
|
|
62
|
-
| **Permissions** | Bash auto-allowed without security hooks, no force-push protection |
|
|
63
|
-
| **MCP Servers** | Invalid transport configs, missing commands/URLs, broken executables |
|
|
79
|
+
| `--fix` | Auto-fixes issues: adds hooks, CLAUDE.md sections, rules, .claudeignore |
|
|
80
|
+
| `--watch` | Re-runs every second, updates when you save a config file |
|
|
81
|
+
| `--json` | Pure JSON output, no colors, no banner — for scripts and CI |
|
|
82
|
+
| `--min-score <n>` | Exit code 1 if score is below threshold — use in CI to block bad configs |
|
|
83
|
+
| `-p, --path <dir>` | Run on a different directory |
|
|
64
84
|
|
|
65
|
-
|
|
85
|
+
## Init
|
|
66
86
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
```bash
|
|
70
|
-
claude-launchpad init
|
|
71
|
-
```
|
|
87
|
+
Detects your project and generates Claude Code config that fits. No templates, no menus — it reads your manifest files and figures it out.
|
|
72
88
|
|
|
73
89
|
```
|
|
74
90
|
→ Detecting project...
|
|
75
91
|
✓ Found Next.js (TypeScript) project
|
|
76
92
|
· Package manager: pnpm
|
|
77
93
|
· Dev command: pnpm dev
|
|
78
|
-
· Test command: pnpm test
|
|
79
94
|
|
|
80
95
|
✓ Generated CLAUDE.md
|
|
81
96
|
✓ Generated TASKS.md
|
|
@@ -83,86 +98,87 @@ claude-launchpad init
|
|
|
83
98
|
✓ Generated .claudeignore
|
|
84
99
|
```
|
|
85
100
|
|
|
86
|
-
**
|
|
87
|
-
|
|
88
|
-
**Detects 20+ frameworks:** Next.js, FastAPI, Django, Rails, Laravel, Express, SvelteKit, Angular, NestJS, Hono, Astro, Remix, Nuxt, Symfony, and more.
|
|
89
|
-
|
|
90
|
-
**Detects package managers from lockfiles:** pnpm, yarn, npm, bun, uv, poetry, cargo, bundler, composer, go modules.
|
|
101
|
+
**Works with:** TypeScript, JavaScript, Python, Go, Ruby, Rust, Dart, PHP, Java, Kotlin, Swift, Elixir, C# — and detects frameworks (Next.js, FastAPI, Django, Rails, Laravel, Express, SvelteKit, Angular, NestJS, and 15+ more).
|
|
91
102
|
|
|
92
103
|
**What you get:**
|
|
93
|
-
- `CLAUDE.md`
|
|
94
|
-
- `TASKS.md`
|
|
95
|
-
- `.claude/settings.json`
|
|
96
|
-
- `.claudeignore`
|
|
104
|
+
- `CLAUDE.md` — your stack, commands, conventions, guardrails
|
|
105
|
+
- `TASKS.md` — session continuity across Claude Code sessions
|
|
106
|
+
- `.claude/settings.json` — auto-format hooks and .env file protection
|
|
107
|
+
- `.claudeignore` — keeps Claude from reading node_modules, dist, lockfiles, etc.
|
|
97
108
|
|
|
98
|
-
|
|
109
|
+
## Enhance
|
|
99
110
|
|
|
100
|
-
Init
|
|
111
|
+
Init detects your stack but can't understand your architecture. Enhance opens Claude to read your actual code and fill in the details.
|
|
101
112
|
|
|
102
113
|
```bash
|
|
103
114
|
claude-launchpad enhance
|
|
104
115
|
```
|
|
105
116
|
|
|
106
|
-
Claude
|
|
107
|
-
- **Architecture** — actual directory structure, data flow, key modules
|
|
108
|
-
- **Conventions** — patterns it observes in your code (naming, imports, state management)
|
|
109
|
-
- **Off-Limits** — guardrails based on what it sees (protected files, anti-patterns)
|
|
110
|
-
- **Key Decisions** — architectural decisions visible in the code
|
|
117
|
+
Claude reads your codebase and updates CLAUDE.md with real content — actual architecture, actual conventions, actual guardrails. Not boilerplate. It also suggests project-specific hooks and MCP servers based on what it finds.
|
|
111
118
|
|
|
112
|
-
|
|
119
|
+
Stays under the 120-instruction budget. Overflows detailed content to `.claude/rules/` files.
|
|
113
120
|
|
|
114
|
-
|
|
121
|
+
## Eval
|
|
115
122
|
|
|
116
|
-
|
|
123
|
+
The part nobody else has built. Runs Claude against real test scenarios and scores the results.
|
|
117
124
|
|
|
118
125
|
```bash
|
|
119
|
-
|
|
126
|
+
# Run only security tests (4 scenarios)
|
|
127
|
+
claude-launchpad eval --suite security
|
|
128
|
+
|
|
129
|
+
# Run only convention tests (5 scenarios)
|
|
130
|
+
claude-launchpad eval --suite conventions
|
|
131
|
+
|
|
132
|
+
# Run only workflow tests (2 scenarios)
|
|
133
|
+
claude-launchpad eval --suite workflow
|
|
134
|
+
|
|
135
|
+
# Run everything (11 scenarios)
|
|
136
|
+
claude-launchpad eval
|
|
137
|
+
|
|
138
|
+
# Use a cheaper model
|
|
139
|
+
claude-launchpad eval --suite security --model haiku
|
|
140
|
+
|
|
141
|
+
# One run per scenario (fastest)
|
|
142
|
+
claude-launchpad eval --suite security --runs 1
|
|
120
143
|
```
|
|
121
144
|
|
|
145
|
+
Each scenario creates an isolated sandbox, runs Claude with a task, and checks if Claude followed the rules:
|
|
146
|
+
|
|
122
147
|
```
|
|
123
148
|
✓ security/sql-injection 10/10 PASS
|
|
124
149
|
✓ security/env-protection 10/10 PASS
|
|
125
150
|
✓ security/secret-exposure 10/10 PASS
|
|
126
151
|
✓ security/input-validation 10/10 PASS
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
✓ conventions/no-hardcoded-values 10/10 PASS
|
|
130
|
-
✓ conventions/naming-conventions 10/10 PASS
|
|
131
|
-
✓ conventions/file-size 10/10 PASS
|
|
132
|
-
✓ workflow/git-conventions 10/10 PASS
|
|
133
|
-
✗ workflow/session-continuity 7/10 WARN
|
|
152
|
+
✗ conventions/file-size 5/10 FAIL
|
|
153
|
+
✗ Claude kept all generated files under 800 lines
|
|
134
154
|
|
|
135
155
|
Config Eval Score ━━━━━━━━━━━━━━━━━━━─ 95%
|
|
136
156
|
```
|
|
137
157
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
This is the part nobody else has built. Template repos scaffold. Audit tools diagnose. **Nobody tests whether your config actually makes Claude better.** Until now.
|
|
141
|
-
|
|
142
|
-
## How It Works Under the Hood
|
|
158
|
+
Results are saved to `.claude/eval/` as structured markdown — you can feed these reports back to Claude to fix the failures.
|
|
143
159
|
|
|
144
|
-
|
|
145
|
-
Reads your `CLAUDE.md`, `.claude/settings.json`, `.claude/rules/`, and `.claudeignore`. Runs 7 analyzers that check instruction count, section completeness, hook configuration, rule validity, permission safety, and MCP server configs. Pure static analysis — no API calls, no network, no cost.
|
|
160
|
+
**Suites:**
|
|
146
161
|
|
|
147
|
-
|
|
148
|
-
|
|
162
|
+
| Suite | Scenarios | What it tests |
|
|
163
|
+
|---|---|---|
|
|
164
|
+
| `security` | 4 | SQL injection, .env protection, secret exposure, input validation |
|
|
165
|
+
| `conventions` | 5 | Error handling, immutability, file size, naming, no hardcoded values |
|
|
166
|
+
| `workflow` | 2 | Git conventions, session continuity |
|
|
149
167
|
|
|
150
|
-
|
|
151
|
-
Spawns `claude "prompt"` as an interactive child process with `stdio: "inherit"` — you see Claude's full UI. The prompt instructs Claude to read the codebase and fill in CLAUDE.md sections. No data passes through the launchpad — it just launches Claude with a pre-loaded task.
|
|
168
|
+
**All eval flags:**
|
|
152
169
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
8. Cleans up the temp directory (or preserves it with `--debug`)
|
|
170
|
+
| Flag | What it does |
|
|
171
|
+
|---|---|
|
|
172
|
+
| `--suite <name>` | Run one suite: `security`, `conventions`, or `workflow` |
|
|
173
|
+
| `--model <model>` | Model to use: `haiku`, `sonnet`, `opus` |
|
|
174
|
+
| `--runs <n>` | Runs per scenario (default 3, median score used) |
|
|
175
|
+
| `--debug` | Keep sandbox directories so you can inspect what Claude wrote |
|
|
176
|
+
| `--json` | JSON output |
|
|
177
|
+
| `--timeout <ms>` | Timeout per run (default 120000) |
|
|
162
178
|
|
|
163
179
|
## Use in CI
|
|
164
180
|
|
|
165
|
-
|
|
181
|
+
Block PRs that degrade your Claude Code config quality:
|
|
166
182
|
|
|
167
183
|
```yaml
|
|
168
184
|
# .github/workflows/claude-config.yml
|
|
@@ -180,39 +196,35 @@ jobs:
|
|
|
180
196
|
- run: npx claude-launchpad@latest doctor --min-score 80 --json
|
|
181
197
|
```
|
|
182
198
|
|
|
183
|
-
|
|
199
|
+
Score below threshold = exit code 1 = PR blocked.
|
|
184
200
|
|
|
185
201
|
## Plugin (pending marketplace review)
|
|
186
202
|
|
|
187
|
-
The plugin has been submitted to the Claude Code marketplace. Once approved:
|
|
188
|
-
|
|
189
203
|
```bash
|
|
190
204
|
claude plugin install claude-launchpad
|
|
191
205
|
```
|
|
192
206
|
|
|
193
|
-
Then use `/launchpad:doctor`, `/launchpad:init`, `/launchpad:enhance`,
|
|
207
|
+
Then use `/launchpad:doctor`, `/launchpad:init`, `/launchpad:enhance`, `/launchpad:eval` inside Claude Code. The plugin nudges you to re-check your score when you edit config files.
|
|
194
208
|
|
|
195
|
-
##
|
|
209
|
+
## How It Works
|
|
196
210
|
|
|
197
|
-
|
|
211
|
+
**Doctor** reads your files and runs static analysis. No API calls. No network. No cost.
|
|
198
212
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
- **Nobody measures.** You can't improve what you can't measure.
|
|
213
|
+
**Init** scans manifest files (package.json, go.mod, pyproject.toml, etc.), detects your stack, and generates config with safe, hardcoded formatter hooks — never interpolates user-controlled strings.
|
|
214
|
+
|
|
215
|
+
**Enhance** spawns `claude "prompt"` as an interactive child process. You see Claude's full UI. No data passes through the tool — it just launches Claude with a task.
|
|
203
216
|
|
|
204
|
-
|
|
217
|
+
**Eval** creates a temp directory, writes seed files from the scenario YAML, initializes a git repo, runs Claude via the Agent SDK (or falls back to CLI), then checks the output with grep/file assertions. Sandbox is cleaned up after (or preserved with `--debug`).
|
|
205
218
|
|
|
206
|
-
##
|
|
219
|
+
## Why This Exists
|
|
220
|
+
|
|
221
|
+
- **CLAUDE.md is advisory.** ~80% compliance. Claude might ignore your rules.
|
|
222
|
+
- **Hooks are deterministic.** 100% compliance. But most people have zero hooks.
|
|
223
|
+
- **Instruction budget is real.** Past ~150, compliance drops. Most people don't know they're over.
|
|
224
|
+
- **Nobody measures.** You can't improve what you can't measure.
|
|
207
225
|
|
|
208
|
-
|
|
209
|
-
- **Doctor is free.** No API calls, no secrets, works offline and air-gapped.
|
|
210
|
-
- **Enhance uses Claude.** Spawns an interactive session to understand your codebase — costs tokens but produces a CLAUDE.md that actually knows your project.
|
|
211
|
-
- **Eval uses the Agent SDK.** Runs Claude headless in sandboxes with explicit tool permissions — proof that your config works.
|
|
212
|
-
- **Works with any stack.** Auto-detects your project. No fixed menu of supported frameworks.
|
|
213
|
-
- **57 tests.** The tool that tests configs is itself well-tested.
|
|
214
|
-
- **You never clone this repo.** It's a tool you run with `npx`, not a template you fork.
|
|
226
|
+
This tool gives you a number. Fix the issues, re-run, watch the number go up.
|
|
215
227
|
|
|
216
228
|
## License
|
|
217
229
|
|
|
218
|
-
MIT
|
|
230
|
+
MIT
|
package/dist/cli.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
// src/cli.ts
|
|
4
4
|
import { Command as Command5 } from "commander";
|
|
5
|
-
import { join as
|
|
5
|
+
import { join as join11 } from "path";
|
|
6
6
|
|
|
7
7
|
// src/commands/init/index.ts
|
|
8
8
|
import { Command } from "commander";
|
|
@@ -1515,9 +1515,11 @@ function createDoctorCommand() {
|
|
|
1515
1515
|
await watchConfig(opts.path);
|
|
1516
1516
|
return;
|
|
1517
1517
|
}
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1518
|
+
if (!opts.json) {
|
|
1519
|
+
printBanner();
|
|
1520
|
+
log.step("Scanning Claude Code configuration...");
|
|
1521
|
+
log.blank();
|
|
1522
|
+
}
|
|
1521
1523
|
const config = await parseClaudeConfig(opts.path);
|
|
1522
1524
|
if (config.claudeMdContent === null && config.settings === null) {
|
|
1523
1525
|
log.error("No Claude Code configuration found in this directory.");
|
|
@@ -1571,6 +1573,8 @@ function createDoctorCommand() {
|
|
|
1571
1573
|
import { Command as Command3 } from "commander";
|
|
1572
1574
|
import ora from "ora";
|
|
1573
1575
|
import chalk2 from "chalk";
|
|
1576
|
+
import { mkdir as mkdir4, writeFile as writeFile4 } from "fs/promises";
|
|
1577
|
+
import { join as join9 } from "path";
|
|
1574
1578
|
|
|
1575
1579
|
// src/commands/eval/loader.ts
|
|
1576
1580
|
import { readFile as readFile5, readdir as readdir3, access as access5 } from "fs/promises";
|
|
@@ -1745,7 +1749,7 @@ async function runScenario(scenario, options) {
|
|
|
1745
1749
|
const sandboxDir = join8(tmpdir(), `claude-eval-${randomUUID()}`);
|
|
1746
1750
|
try {
|
|
1747
1751
|
await setupSandbox(sandboxDir, scenario);
|
|
1748
|
-
await runClaudeInSandbox(sandboxDir, scenario.prompt, options.timeout);
|
|
1752
|
+
await runClaudeInSandbox(sandboxDir, scenario.prompt, options.timeout, options.model);
|
|
1749
1753
|
return await scoreResults(scenario, sandboxDir);
|
|
1750
1754
|
} finally {
|
|
1751
1755
|
if (options.debug) {
|
|
@@ -1794,7 +1798,7 @@ ${scenario.setup.instructions}
|
|
|
1794
1798
|
"eval setup"
|
|
1795
1799
|
], { cwd: sandboxDir });
|
|
1796
1800
|
}
|
|
1797
|
-
async function runClaudeInSandbox(cwd, prompt, timeout) {
|
|
1801
|
+
async function runClaudeInSandbox(cwd, prompt, timeout, model) {
|
|
1798
1802
|
try {
|
|
1799
1803
|
const sdk = await import("@anthropic-ai/claude-agent-sdk");
|
|
1800
1804
|
const controller = new AbortController();
|
|
@@ -1808,7 +1812,8 @@ async function runClaudeInSandbox(cwd, prompt, timeout) {
|
|
|
1808
1812
|
permissionMode: "dontAsk",
|
|
1809
1813
|
settingSources: [],
|
|
1810
1814
|
maxTurns: 20,
|
|
1811
|
-
abortController: controller
|
|
1815
|
+
abortController: controller,
|
|
1816
|
+
...model ? { model } : {}
|
|
1812
1817
|
}
|
|
1813
1818
|
})) {
|
|
1814
1819
|
}
|
|
@@ -1816,31 +1821,29 @@ async function runClaudeInSandbox(cwd, prompt, timeout) {
|
|
|
1816
1821
|
clearTimeout(timeoutId);
|
|
1817
1822
|
}
|
|
1818
1823
|
} catch {
|
|
1819
|
-
await runClaudeCli(cwd, prompt, timeout);
|
|
1824
|
+
await runClaudeCli(cwd, prompt, timeout, model);
|
|
1820
1825
|
}
|
|
1821
1826
|
}
|
|
1822
|
-
async function runClaudeCli(cwd, prompt, timeout) {
|
|
1827
|
+
async function runClaudeCli(cwd, prompt, timeout, model) {
|
|
1823
1828
|
try {
|
|
1824
|
-
|
|
1825
|
-
"
|
|
1826
|
-
|
|
1827
|
-
|
|
1828
|
-
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
|
|
1836
|
-
|
|
1837
|
-
|
|
1838
|
-
|
|
1839
|
-
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
{ cwd, timeout, maxBuffer: 10 * 1024 * 1024 }
|
|
1843
|
-
);
|
|
1829
|
+
const args = [
|
|
1830
|
+
"-p",
|
|
1831
|
+
prompt,
|
|
1832
|
+
"--output-format",
|
|
1833
|
+
"text",
|
|
1834
|
+
"--max-turns",
|
|
1835
|
+
"20",
|
|
1836
|
+
"--dangerously-skip-permissions",
|
|
1837
|
+
"--allowedTools",
|
|
1838
|
+
"Bash",
|
|
1839
|
+
"Read",
|
|
1840
|
+
"Write",
|
|
1841
|
+
"Edit",
|
|
1842
|
+
"Glob",
|
|
1843
|
+
"Grep"
|
|
1844
|
+
];
|
|
1845
|
+
if (model) args.push("--model", model);
|
|
1846
|
+
await exec("claude", args, { cwd, timeout, maxBuffer: 10 * 1024 * 1024 });
|
|
1844
1847
|
} catch (error) {
|
|
1845
1848
|
if (error && typeof error === "object" && "stdout" in error) {
|
|
1846
1849
|
return;
|
|
@@ -1949,7 +1952,7 @@ async function listAllFiles(dir) {
|
|
|
1949
1952
|
|
|
1950
1953
|
// src/commands/eval/index.ts
|
|
1951
1954
|
function createEvalCommand() {
|
|
1952
|
-
return new Command3("eval").description("Test your Claude Code config against eval scenarios").option("-s, --suite <suite>", "Eval suite to run (e.g., security, conventions, workflow)").option("-p, --path <path>", "Project root path", process.cwd()).option("--scenarios <path>", "Custom scenarios directory").option("--runs <n>", "Runs per scenario (default: 3)", "3").option("--timeout <ms>", "Timeout per run in ms (default: 120000)", "120000").option("--json", "Output as JSON").option("--debug", "Keep sandbox directories for inspection").action(async (opts) => {
|
|
1955
|
+
return new Command3("eval").description("Test your Claude Code config against eval scenarios").option("-s, --suite <suite>", "Eval suite to run (e.g., security, conventions, workflow)").option("-p, --path <path>", "Project root path", process.cwd()).option("--scenarios <path>", "Custom scenarios directory").option("--runs <n>", "Runs per scenario (default: 3)", "3").option("--timeout <ms>", "Timeout per run in ms (default: 120000)", "120000").option("--json", "Output as JSON").option("--debug", "Keep sandbox directories for inspection").option("--model <model>", "Model to use for eval (e.g., sonnet, haiku, opus)").action(async (opts) => {
|
|
1953
1956
|
printBanner();
|
|
1954
1957
|
const claudeAvailable = await checkClaudeCli();
|
|
1955
1958
|
if (!claudeAvailable) {
|
|
@@ -1970,6 +1973,9 @@ function createEvalCommand() {
|
|
|
1970
1973
|
return;
|
|
1971
1974
|
}
|
|
1972
1975
|
log.success(`Loaded ${scenarios.length} scenario(s)`);
|
|
1976
|
+
if (opts.model) {
|
|
1977
|
+
log.info(`Model: ${opts.model}`);
|
|
1978
|
+
}
|
|
1973
1979
|
log.blank();
|
|
1974
1980
|
const runs = parseInt(opts.runs, 10);
|
|
1975
1981
|
const timeout = parseInt(opts.timeout, 10);
|
|
@@ -1982,7 +1988,7 @@ function createEvalCommand() {
|
|
|
1982
1988
|
try {
|
|
1983
1989
|
const result = await runScenarioWithRetries(
|
|
1984
1990
|
{ ...scenario, runs },
|
|
1985
|
-
{ projectRoot: opts.path, timeout, debug: opts.debug }
|
|
1991
|
+
{ projectRoot: opts.path, timeout, debug: opts.debug, model: opts.model }
|
|
1986
1992
|
);
|
|
1987
1993
|
results.push(result);
|
|
1988
1994
|
if (result.passed) {
|
|
@@ -2017,6 +2023,7 @@ function createEvalCommand() {
|
|
|
2017
2023
|
return;
|
|
2018
2024
|
}
|
|
2019
2025
|
renderEvalReport(results);
|
|
2026
|
+
await saveEvalReport(results, opts.path, opts.suite, opts.model);
|
|
2020
2027
|
});
|
|
2021
2028
|
}
|
|
2022
2029
|
function renderEvalReport(results) {
|
|
@@ -2027,7 +2034,7 @@ function renderEvalReport(results) {
|
|
|
2027
2034
|
console.log(` ${icon} ${chalk2.bold(result.scenario)} ${score} ${status}`);
|
|
2028
2035
|
const failedChecks = result.checks.filter((c) => !c.passed);
|
|
2029
2036
|
for (const check of failedChecks) {
|
|
2030
|
-
console.log(` ${chalk2.
|
|
2037
|
+
console.log(` ${chalk2.red("\u2717")} ${chalk2.dim(check.label)}`);
|
|
2031
2038
|
}
|
|
2032
2039
|
}
|
|
2033
2040
|
log.blank();
|
|
@@ -2044,6 +2051,56 @@ function renderEvalReport(results) {
|
|
|
2044
2051
|
log.warn(`${passed} passed, ${failed} failed out of ${results.length} scenario(s).`);
|
|
2045
2052
|
}
|
|
2046
2053
|
}
|
|
2054
|
+
async function saveEvalReport(results, projectRoot, suite, model) {
|
|
2055
|
+
const totalScore = results.reduce((s, r) => s + r.score, 0);
|
|
2056
|
+
const totalMax = results.reduce((s, r) => s + r.maxScore, 0);
|
|
2057
|
+
const pct = totalMax > 0 ? Math.round(totalScore / totalMax * 100) : 0;
|
|
2058
|
+
const passed = results.filter((r) => r.passed).length;
|
|
2059
|
+
const failed = results.length - passed;
|
|
2060
|
+
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").slice(0, 19);
|
|
2061
|
+
const lines = [
|
|
2062
|
+
`# Eval Report \u2014 ${timestamp}`,
|
|
2063
|
+
"",
|
|
2064
|
+
`**Score: ${pct}%** (${passed} passed, ${failed} failed out of ${results.length} scenarios)`,
|
|
2065
|
+
"",
|
|
2066
|
+
`- Suite: ${suite ?? "all"}`,
|
|
2067
|
+
`- Model: ${model ?? "default"}`,
|
|
2068
|
+
`- Date: ${(/* @__PURE__ */ new Date()).toISOString().split("T")[0]}`,
|
|
2069
|
+
"",
|
|
2070
|
+
"## Results",
|
|
2071
|
+
""
|
|
2072
|
+
];
|
|
2073
|
+
for (const result of results) {
|
|
2074
|
+
const status = result.passed ? "PASS" : "FAIL";
|
|
2075
|
+
lines.push(`### ${result.scenario} \u2014 ${result.score}/${result.maxScore} ${status}`);
|
|
2076
|
+
const failedChecks = result.checks.filter((c) => !c.passed);
|
|
2077
|
+
const passedChecks = result.checks.filter((c) => c.passed);
|
|
2078
|
+
for (const check of passedChecks) {
|
|
2079
|
+
lines.push(`- PASSED: ${check.label} (${check.points} pts)`);
|
|
2080
|
+
}
|
|
2081
|
+
for (const check of failedChecks) {
|
|
2082
|
+
lines.push(`- FAILED: ${check.label} (${check.points} pts)`);
|
|
2083
|
+
}
|
|
2084
|
+
lines.push("");
|
|
2085
|
+
}
|
|
2086
|
+
if (failed > 0) {
|
|
2087
|
+
lines.push("## Recommendations");
|
|
2088
|
+
lines.push("");
|
|
2089
|
+
for (const result of results.filter((r) => !r.passed)) {
|
|
2090
|
+
lines.push(`### Fix: ${result.scenario}`);
|
|
2091
|
+
const failedChecks = result.checks.filter((c) => !c.passed);
|
|
2092
|
+
for (const check of failedChecks) {
|
|
2093
|
+
lines.push(`- ${check.label} \u2014 update CLAUDE.md instructions or add hooks to enforce this behavior`);
|
|
2094
|
+
}
|
|
2095
|
+
lines.push("");
|
|
2096
|
+
}
|
|
2097
|
+
}
|
|
2098
|
+
const evalDir = join9(projectRoot, ".claude", "eval");
|
|
2099
|
+
await mkdir4(evalDir, { recursive: true });
|
|
2100
|
+
const filename = `eval-${suite ?? "all"}-${timestamp}.md`;
|
|
2101
|
+
await writeFile4(join9(evalDir, filename), lines.join("\n"));
|
|
2102
|
+
log.success(`Report saved to .claude/eval/${filename}`);
|
|
2103
|
+
}
|
|
2047
2104
|
async function checkClaudeCli() {
|
|
2048
2105
|
const { execFile: execFile3 } = await import("child_process");
|
|
2049
2106
|
const { promisify: promisify3 } = await import("util");
|
|
@@ -2061,7 +2118,7 @@ import { Command as Command4 } from "commander";
|
|
|
2061
2118
|
import { spawn, execFile as execFile2 } from "child_process";
|
|
2062
2119
|
import { promisify as promisify2 } from "util";
|
|
2063
2120
|
import { access as access6 } from "fs/promises";
|
|
2064
|
-
import { join as
|
|
2121
|
+
import { join as join10 } from "path";
|
|
2065
2122
|
var execAsync = promisify2(execFile2);
|
|
2066
2123
|
var ENHANCE_PROMPT = `Read CLAUDE.md and the project's codebase, then update CLAUDE.md to fill in missing or incomplete sections.
|
|
2067
2124
|
|
|
@@ -2094,7 +2151,7 @@ function createEnhanceCommand() {
|
|
|
2094
2151
|
return new Command4("enhance").description("Use Claude to analyze your codebase and complete CLAUDE.md").option("-p, --path <path>", "Project root path", process.cwd()).action(async (opts) => {
|
|
2095
2152
|
printBanner();
|
|
2096
2153
|
const root = opts.path;
|
|
2097
|
-
const claudeMdPath =
|
|
2154
|
+
const claudeMdPath = join10(root, "CLAUDE.md");
|
|
2098
2155
|
try {
|
|
2099
2156
|
await access6(claudeMdPath);
|
|
2100
2157
|
} catch {
|
|
@@ -2123,8 +2180,8 @@ function createEnhanceCommand() {
|
|
|
2123
2180
|
}
|
|
2124
2181
|
|
|
2125
2182
|
// src/cli.ts
|
|
2126
|
-
var program = new Command5().name("claude-launchpad").description("CLI toolkit that makes Claude Code setups measurably good").version("0.3.
|
|
2127
|
-
const hasConfig = await fileExists(
|
|
2183
|
+
var program = new Command5().name("claude-launchpad").description("CLI toolkit that makes Claude Code setups measurably good").version("0.3.4", "-v, --version").action(async () => {
|
|
2184
|
+
const hasConfig = await fileExists(join11(process.cwd(), "CLAUDE.md")) || await fileExists(join11(process.cwd(), ".claude", "settings.json"));
|
|
2128
2185
|
if (hasConfig) {
|
|
2129
2186
|
await program.commands.find((c) => c.name() === "doctor")?.parseAsync([], { from: "user" });
|
|
2130
2187
|
} else {
|