claude-launchpad 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +118 -106
- package/dist/cli.js +88 -33
- package/dist/cli.js.map +1 -1
- package/package.json +2 -2
- package/scenarios/{common → conventions}/error-handling.yaml +3 -3
- package/scenarios/{common → conventions}/file-size.yaml +3 -3
- package/scenarios/{common → conventions}/immutability.yaml +3 -3
- package/scenarios/{common → conventions}/naming-conventions.yaml +5 -5
- package/scenarios/{common → conventions}/no-hardcoded-values.yaml +3 -3
- package/scenarios/{common → security}/env-protection.yaml +3 -3
- package/scenarios/{common → security}/input-validation.yaml +3 -3
- package/scenarios/{common → security}/secret-exposure.yaml +2 -2
- package/scenarios/{common → security}/sql-injection.yaml +2 -2
- package/scenarios/{common → workflow}/git-conventions.yaml +3 -3
- package/scenarios/{common → workflow}/session-continuity.yaml +3 -3
package/README.md
CHANGED
|
@@ -1,32 +1,62 @@
|
|
|
1
1
|
# Claude Launchpad
|
|
2
2
|
|
|
3
|
-
**
|
|
3
|
+
**A linter for your Claude Code configuration.** Scores your setup, auto-fixes issues, and tests if Claude actually follows your rules.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
Claude Launchpad is the first CLI that **diagnoses, scaffolds, enhances, and tests** Claude Code configurations. Think ESLint for your AI setup.
|
|
5
|
+
You write a `CLAUDE.md`, add some hooks, configure settings — but is any of it actually working? Claude Launchpad scans your config, gives you a score out of 100, fixes what's broken, and runs Claude against test scenarios to prove it.
|
|
8
6
|
|
|
9
7
|
```bash
|
|
10
8
|
npx claude-launchpad
|
|
11
9
|
```
|
|
12
10
|
|
|
13
|
-
|
|
11
|
+
That's it. One command. You get a score. You see what's wrong. You fix it.
|
|
12
|
+
|
|
13
|
+
## What It Does
|
|
14
|
+
|
|
15
|
+
| Command | What it does | Cost |
|
|
16
|
+
|---|---|---|
|
|
17
|
+
| `claude-launchpad` | Scans your config, scores it 0-100, lists issues | Free |
|
|
18
|
+
| `claude-launchpad doctor --fix` | Auto-fixes issues (adds hooks, rules, missing sections) | Free |
|
|
19
|
+
| `claude-launchpad doctor --watch` | Live score that updates when you edit config files | Free |
|
|
20
|
+
| `claude-launchpad init` | Detects your stack, generates config from scratch | Free |
|
|
21
|
+
| `claude-launchpad enhance` | Opens Claude to read your code and complete CLAUDE.md | Uses Claude |
|
|
22
|
+
| `claude-launchpad eval --suite security` | Runs Claude against test scenarios, proves your config works | Uses Claude |
|
|
23
|
+
|
|
24
|
+
## Quick Start
|
|
14
25
|
|
|
15
26
|
```bash
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
27
|
+
# Install
|
|
28
|
+
npm i -g claude-launchpad
|
|
29
|
+
|
|
30
|
+
# Go to any project with Claude Code
|
|
31
|
+
cd your-project
|
|
32
|
+
|
|
33
|
+
# See your score
|
|
34
|
+
claude-launchpad
|
|
35
|
+
|
|
36
|
+
# Fix everything it found
|
|
37
|
+
claude-launchpad doctor --fix
|
|
38
|
+
|
|
39
|
+
# See your new score
|
|
40
|
+
claude-launchpad
|
|
21
41
|
```
|
|
22
42
|
|
|
23
|
-
|
|
43
|
+
That takes you from ~42% to ~86% with zero manual work.
|
|
24
44
|
|
|
25
|
-
##
|
|
45
|
+
## The Doctor
|
|
26
46
|
|
|
27
|
-
|
|
47
|
+
The core of the tool. Runs 7 analyzers against your `.claude/` directory and `CLAUDE.md`:
|
|
28
48
|
|
|
29
|
-
|
|
49
|
+
| Analyzer | What it catches |
|
|
50
|
+
|---|---|
|
|
51
|
+
| **Instruction Budget** | Too many instructions in CLAUDE.md — Claude starts ignoring rules past ~150 |
|
|
52
|
+
| **CLAUDE.md Quality** | Missing sections, vague instructions ("write good code"), hardcoded secrets |
|
|
53
|
+
| **Settings** | No hooks configured, dangerous tool access without safety nets |
|
|
54
|
+
| **Hooks** | Missing auto-format on save, no .env file protection, no security gates |
|
|
55
|
+
| **Rules** | Dead rule files, stale references, empty configs |
|
|
56
|
+
| **Permissions** | Bash auto-allowed without security hooks, no force-push protection |
|
|
57
|
+
| **MCP Servers** | Invalid transport configs, missing commands/URLs |
|
|
58
|
+
|
|
59
|
+
Output looks like this:
|
|
30
60
|
|
|
31
61
|
```
|
|
32
62
|
Instruction Budget ━━━━━━━━━━━━━━━━━━━━ 100%
|
|
@@ -42,40 +72,25 @@ Runs 7 static analyzers against your `.claude/` directory and `CLAUDE.md`. No AP
|
|
|
42
72
|
✓ No issues found. Your configuration looks solid.
|
|
43
73
|
```
|
|
44
74
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
**Flags:**
|
|
48
|
-
- `--fix` — Auto-apply deterministic fixes (42% → 86% in one command)
|
|
49
|
-
- `--watch` — Live score that updates every time you save a config file
|
|
50
|
-
- `--json` — JSON output for programmatic use
|
|
51
|
-
- `--min-score <n>` — Exit non-zero if score drops below threshold (for CI)
|
|
52
|
-
|
|
53
|
-
**What it checks:**
|
|
75
|
+
**All doctor flags:**
|
|
54
76
|
|
|
55
|
-
|
|
|
77
|
+
| Flag | What it does |
|
|
56
78
|
|---|---|
|
|
57
|
-
|
|
|
58
|
-
|
|
|
59
|
-
|
|
|
60
|
-
|
|
|
61
|
-
|
|
|
62
|
-
| **Permissions** | Bash auto-allowed without security hooks, no force-push protection |
|
|
63
|
-
| **MCP Servers** | Invalid transport configs, missing commands/URLs, broken executables |
|
|
79
|
+
| `--fix` | Auto-fixes issues: adds hooks, CLAUDE.md sections, rules, .claudeignore |
|
|
80
|
+
| `--watch` | Re-runs every second, updates when you save a config file |
|
|
81
|
+
| `--json` | Pure JSON output, no colors, no banner — for scripts and CI |
|
|
82
|
+
| `--min-score <n>` | Exit code 1 if score is below threshold — use in CI to block bad configs |
|
|
83
|
+
| `-p, --path <dir>` | Run on a different directory |
|
|
64
84
|
|
|
65
|
-
|
|
85
|
+
## Init
|
|
66
86
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
```bash
|
|
70
|
-
claude-launchpad init
|
|
71
|
-
```
|
|
87
|
+
Detects your project and generates Claude Code config that fits. No templates, no menus — it reads your manifest files and figures it out.
|
|
72
88
|
|
|
73
89
|
```
|
|
74
90
|
→ Detecting project...
|
|
75
91
|
✓ Found Next.js (TypeScript) project
|
|
76
92
|
· Package manager: pnpm
|
|
77
93
|
· Dev command: pnpm dev
|
|
78
|
-
· Test command: pnpm test
|
|
79
94
|
|
|
80
95
|
✓ Generated CLAUDE.md
|
|
81
96
|
✓ Generated TASKS.md
|
|
@@ -83,86 +98,87 @@ claude-launchpad init
|
|
|
83
98
|
✓ Generated .claudeignore
|
|
84
99
|
```
|
|
85
100
|
|
|
86
|
-
**
|
|
87
|
-
|
|
88
|
-
**Detects 20+ frameworks:** Next.js, FastAPI, Django, Rails, Laravel, Express, SvelteKit, Angular, NestJS, Hono, Astro, Remix, Nuxt, Symfony, and more.
|
|
89
|
-
|
|
90
|
-
**Detects package managers from lockfiles:** pnpm, yarn, npm, bun, uv, poetry, cargo, bundler, composer, go modules.
|
|
101
|
+
**Works with:** TypeScript, JavaScript, Python, Go, Ruby, Rust, Dart, PHP, Java, Kotlin, Swift, Elixir, C# — and detects frameworks (Next.js, FastAPI, Django, Rails, Laravel, Express, SvelteKit, Angular, NestJS, and 15+ more).
|
|
91
102
|
|
|
92
103
|
**What you get:**
|
|
93
|
-
- `CLAUDE.md`
|
|
94
|
-
- `TASKS.md`
|
|
95
|
-
- `.claude/settings.json`
|
|
96
|
-
- `.claudeignore`
|
|
104
|
+
- `CLAUDE.md` — your stack, commands, conventions, guardrails
|
|
105
|
+
- `TASKS.md` — session continuity across Claude Code sessions
|
|
106
|
+
- `.claude/settings.json` — auto-format hooks and .env file protection
|
|
107
|
+
- `.claudeignore` — keeps Claude from reading node_modules, dist, lockfiles, etc.
|
|
97
108
|
|
|
98
|
-
|
|
109
|
+
## Enhance
|
|
99
110
|
|
|
100
|
-
Init
|
|
111
|
+
Init detects your stack but can't understand your architecture. Enhance opens Claude to read your actual code and fill in the details.
|
|
101
112
|
|
|
102
113
|
```bash
|
|
103
114
|
claude-launchpad enhance
|
|
104
115
|
```
|
|
105
116
|
|
|
106
|
-
Claude
|
|
107
|
-
- **Architecture** — actual directory structure, data flow, key modules
|
|
108
|
-
- **Conventions** — patterns it observes in your code (naming, imports, state management)
|
|
109
|
-
- **Off-Limits** — guardrails based on what it sees (protected files, anti-patterns)
|
|
110
|
-
- **Key Decisions** — architectural decisions visible in the code
|
|
117
|
+
Claude reads your codebase and updates CLAUDE.md with real content — actual architecture, actual conventions, actual guardrails. Not boilerplate. It also suggests project-specific hooks and MCP servers based on what it finds.
|
|
111
118
|
|
|
112
|
-
|
|
119
|
+
Stays under the 120-instruction budget. Overflows detailed content to `.claude/rules/` files.
|
|
113
120
|
|
|
114
|
-
|
|
121
|
+
## Eval
|
|
115
122
|
|
|
116
|
-
|
|
123
|
+
The part nobody else has built. Runs Claude against real test scenarios and scores the results.
|
|
117
124
|
|
|
118
125
|
```bash
|
|
119
|
-
|
|
126
|
+
# Run only security tests (4 scenarios)
|
|
127
|
+
claude-launchpad eval --suite security
|
|
128
|
+
|
|
129
|
+
# Run only convention tests (5 scenarios)
|
|
130
|
+
claude-launchpad eval --suite conventions
|
|
131
|
+
|
|
132
|
+
# Run only workflow tests (2 scenarios)
|
|
133
|
+
claude-launchpad eval --suite workflow
|
|
134
|
+
|
|
135
|
+
# Run everything (11 scenarios)
|
|
136
|
+
claude-launchpad eval
|
|
137
|
+
|
|
138
|
+
# Use a cheaper model
|
|
139
|
+
claude-launchpad eval --suite security --model haiku
|
|
140
|
+
|
|
141
|
+
# One run per scenario (fastest)
|
|
142
|
+
claude-launchpad eval --suite security --runs 1
|
|
120
143
|
```
|
|
121
144
|
|
|
145
|
+
Each scenario creates an isolated sandbox, runs Claude with a task, and checks if Claude followed the rules:
|
|
146
|
+
|
|
122
147
|
```
|
|
123
148
|
✓ security/sql-injection 10/10 PASS
|
|
124
149
|
✓ security/env-protection 10/10 PASS
|
|
125
150
|
✓ security/secret-exposure 10/10 PASS
|
|
126
151
|
✓ security/input-validation 10/10 PASS
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
✓ conventions/no-hardcoded-values 10/10 PASS
|
|
130
|
-
✓ conventions/naming-conventions 10/10 PASS
|
|
131
|
-
✓ conventions/file-size 10/10 PASS
|
|
132
|
-
✓ workflow/git-conventions 10/10 PASS
|
|
133
|
-
✗ workflow/session-continuity 7/10 WARN
|
|
152
|
+
✗ conventions/file-size 5/10 FAIL
|
|
153
|
+
✗ Claude kept all generated files under 800 lines
|
|
134
154
|
|
|
135
155
|
Config Eval Score ━━━━━━━━━━━━━━━━━━━─ 95%
|
|
136
156
|
```
|
|
137
157
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
This is the part nobody else has built. Template repos scaffold. Audit tools diagnose. **Nobody tests whether your config actually makes Claude better.** Until now.
|
|
141
|
-
|
|
142
|
-
## How It Works Under the Hood
|
|
158
|
+
Results are saved to `.claude/eval/` as structured markdown — you can feed these reports back to Claude to fix the failures.
|
|
143
159
|
|
|
144
|
-
|
|
145
|
-
Reads your `CLAUDE.md`, `.claude/settings.json`, `.claude/rules/`, and `.claudeignore`. Runs 7 analyzers that check instruction count, section completeness, hook configuration, rule validity, permission safety, and MCP server configs. Pure static analysis — no API calls, no network, no cost.
|
|
160
|
+
**Suites:**
|
|
146
161
|
|
|
147
|
-
|
|
148
|
-
|
|
162
|
+
| Suite | Scenarios | What it tests |
|
|
163
|
+
|---|---|---|
|
|
164
|
+
| `security` | 4 | SQL injection, .env protection, secret exposure, input validation |
|
|
165
|
+
| `conventions` | 5 | Error handling, immutability, file size, naming, no hardcoded values |
|
|
166
|
+
| `workflow` | 2 | Git conventions, session continuity |
|
|
149
167
|
|
|
150
|
-
|
|
151
|
-
Spawns `claude "prompt"` as an interactive child process with `stdio: "inherit"` — you see Claude's full UI. The prompt instructs Claude to read the codebase and fill in CLAUDE.md sections. No data passes through the launchpad — it just launches Claude with a pre-loaded task.
|
|
168
|
+
**All eval flags:**
|
|
152
169
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
8. Cleans up the temp directory (or preserves it with `--debug`)
|
|
170
|
+
| Flag | What it does |
|
|
171
|
+
|---|---|
|
|
172
|
+
| `--suite <name>` | Run one suite: `security`, `conventions`, or `workflow` |
|
|
173
|
+
| `--model <model>` | Model to use: `haiku`, `sonnet`, `opus` |
|
|
174
|
+
| `--runs <n>` | Runs per scenario (default 3, median score used) |
|
|
175
|
+
| `--debug` | Keep sandbox directories so you can inspect what Claude wrote |
|
|
176
|
+
| `--json` | JSON output |
|
|
177
|
+
| `--timeout <ms>` | Timeout per run (default 120000) |
|
|
162
178
|
|
|
163
179
|
## Use in CI
|
|
164
180
|
|
|
165
|
-
|
|
181
|
+
Block PRs that degrade your Claude Code config quality:
|
|
166
182
|
|
|
167
183
|
```yaml
|
|
168
184
|
# .github/workflows/claude-config.yml
|
|
@@ -180,39 +196,35 @@ jobs:
|
|
|
180
196
|
- run: npx claude-launchpad@latest doctor --min-score 80 --json
|
|
181
197
|
```
|
|
182
198
|
|
|
183
|
-
|
|
199
|
+
Score below threshold = exit code 1 = PR blocked.
|
|
184
200
|
|
|
185
201
|
## Plugin (pending marketplace review)
|
|
186
202
|
|
|
187
|
-
The plugin has been submitted to the Claude Code marketplace. Once approved:
|
|
188
|
-
|
|
189
203
|
```bash
|
|
190
204
|
claude plugin install claude-launchpad
|
|
191
205
|
```
|
|
192
206
|
|
|
193
|
-
Then use `/launchpad:doctor`, `/launchpad:init`, `/launchpad:enhance`,
|
|
207
|
+
Then use `/launchpad:doctor`, `/launchpad:init`, `/launchpad:enhance`, `/launchpad:eval` inside Claude Code. The plugin nudges you to re-check your score when you edit config files.
|
|
194
208
|
|
|
195
|
-
##
|
|
209
|
+
## How It Works
|
|
196
210
|
|
|
197
|
-
|
|
211
|
+
**Doctor** reads your files and runs static analysis. No API calls. No network. No cost.
|
|
198
212
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
- **Nobody measures.** You can't improve what you can't measure.
|
|
213
|
+
**Init** scans manifest files (package.json, go.mod, pyproject.toml, etc.), detects your stack, and generates config with safe, hardcoded formatter hooks — never interpolates user-controlled strings.
|
|
214
|
+
|
|
215
|
+
**Enhance** spawns `claude "prompt"` as an interactive child process. You see Claude's full UI. No data passes through the tool — it just launches Claude with a task.
|
|
203
216
|
|
|
204
|
-
|
|
217
|
+
**Eval** creates a temp directory, writes seed files from the scenario YAML, initializes a git repo, runs Claude via the Agent SDK (or falls back to CLI), then checks the output with grep/file assertions. Sandbox is cleaned up after (or preserved with `--debug`).
|
|
205
218
|
|
|
206
|
-
##
|
|
219
|
+
## Why This Exists
|
|
220
|
+
|
|
221
|
+
- **CLAUDE.md is advisory.** ~80% compliance. Claude might ignore your rules.
|
|
222
|
+
- **Hooks are deterministic.** 100% compliance. But most people have zero hooks.
|
|
223
|
+
- **Instruction budget is real.** Past ~150, compliance drops. Most people don't know they're over.
|
|
224
|
+
- **Nobody measures.** You can't improve what you can't measure.
|
|
207
225
|
|
|
208
|
-
|
|
209
|
-
- **Doctor is free.** No API calls, no secrets, works offline and air-gapped.
|
|
210
|
-
- **Enhance uses Claude.** Spawns an interactive session to understand your codebase — costs tokens but produces a CLAUDE.md that actually knows your project.
|
|
211
|
-
- **Eval uses the Agent SDK.** Runs Claude headless in sandboxes with explicit tool permissions — proof that your config works.
|
|
212
|
-
- **Works with any stack.** Auto-detects your project. No fixed menu of supported frameworks.
|
|
213
|
-
- **57 tests.** The tool that tests configs is itself well-tested.
|
|
214
|
-
- **You never clone this repo.** It's a tool you run with `npx`, not a template you fork.
|
|
226
|
+
This tool gives you a number. Fix the issues, re-run, watch the number go up.
|
|
215
227
|
|
|
216
228
|
## License
|
|
217
229
|
|
|
218
|
-
MIT
|
|
230
|
+
MIT
|
package/dist/cli.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
// src/cli.ts
|
|
4
4
|
import { Command as Command5 } from "commander";
|
|
5
|
-
import { join as
|
|
5
|
+
import { join as join11 } from "path";
|
|
6
6
|
|
|
7
7
|
// src/commands/init/index.ts
|
|
8
8
|
import { Command } from "commander";
|
|
@@ -1573,6 +1573,8 @@ function createDoctorCommand() {
|
|
|
1573
1573
|
import { Command as Command3 } from "commander";
|
|
1574
1574
|
import ora from "ora";
|
|
1575
1575
|
import chalk2 from "chalk";
|
|
1576
|
+
import { mkdir as mkdir4, writeFile as writeFile4 } from "fs/promises";
|
|
1577
|
+
import { join as join9 } from "path";
|
|
1576
1578
|
|
|
1577
1579
|
// src/commands/eval/loader.ts
|
|
1578
1580
|
import { readFile as readFile5, readdir as readdir3, access as access5 } from "fs/promises";
|
|
@@ -1747,7 +1749,7 @@ async function runScenario(scenario, options) {
|
|
|
1747
1749
|
const sandboxDir = join8(tmpdir(), `claude-eval-${randomUUID()}`);
|
|
1748
1750
|
try {
|
|
1749
1751
|
await setupSandbox(sandboxDir, scenario);
|
|
1750
|
-
await runClaudeInSandbox(sandboxDir, scenario.prompt, options.timeout);
|
|
1752
|
+
await runClaudeInSandbox(sandboxDir, scenario.prompt, options.timeout, options.model);
|
|
1751
1753
|
return await scoreResults(scenario, sandboxDir);
|
|
1752
1754
|
} finally {
|
|
1753
1755
|
if (options.debug) {
|
|
@@ -1796,7 +1798,7 @@ ${scenario.setup.instructions}
|
|
|
1796
1798
|
"eval setup"
|
|
1797
1799
|
], { cwd: sandboxDir });
|
|
1798
1800
|
}
|
|
1799
|
-
async function runClaudeInSandbox(cwd, prompt, timeout) {
|
|
1801
|
+
async function runClaudeInSandbox(cwd, prompt, timeout, model) {
|
|
1800
1802
|
try {
|
|
1801
1803
|
const sdk = await import("@anthropic-ai/claude-agent-sdk");
|
|
1802
1804
|
const controller = new AbortController();
|
|
@@ -1810,7 +1812,8 @@ async function runClaudeInSandbox(cwd, prompt, timeout) {
|
|
|
1810
1812
|
permissionMode: "dontAsk",
|
|
1811
1813
|
settingSources: [],
|
|
1812
1814
|
maxTurns: 20,
|
|
1813
|
-
abortController: controller
|
|
1815
|
+
abortController: controller,
|
|
1816
|
+
...model ? { model } : {}
|
|
1814
1817
|
}
|
|
1815
1818
|
})) {
|
|
1816
1819
|
}
|
|
@@ -1818,31 +1821,29 @@ async function runClaudeInSandbox(cwd, prompt, timeout) {
|
|
|
1818
1821
|
clearTimeout(timeoutId);
|
|
1819
1822
|
}
|
|
1820
1823
|
} catch {
|
|
1821
|
-
await runClaudeCli(cwd, prompt, timeout);
|
|
1824
|
+
await runClaudeCli(cwd, prompt, timeout, model);
|
|
1822
1825
|
}
|
|
1823
1826
|
}
|
|
1824
|
-
async function runClaudeCli(cwd, prompt, timeout) {
|
|
1827
|
+
async function runClaudeCli(cwd, prompt, timeout, model) {
|
|
1825
1828
|
try {
|
|
1826
|
-
|
|
1827
|
-
"
|
|
1828
|
-
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
|
|
1836
|
-
|
|
1837
|
-
|
|
1838
|
-
|
|
1839
|
-
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1843
|
-
|
|
1844
|
-
{ cwd, timeout, maxBuffer: 10 * 1024 * 1024 }
|
|
1845
|
-
);
|
|
1829
|
+
const args = [
|
|
1830
|
+
"-p",
|
|
1831
|
+
prompt,
|
|
1832
|
+
"--output-format",
|
|
1833
|
+
"text",
|
|
1834
|
+
"--max-turns",
|
|
1835
|
+
"20",
|
|
1836
|
+
"--dangerously-skip-permissions",
|
|
1837
|
+
"--allowedTools",
|
|
1838
|
+
"Bash",
|
|
1839
|
+
"Read",
|
|
1840
|
+
"Write",
|
|
1841
|
+
"Edit",
|
|
1842
|
+
"Glob",
|
|
1843
|
+
"Grep"
|
|
1844
|
+
];
|
|
1845
|
+
if (model) args.push("--model", model);
|
|
1846
|
+
await exec("claude", args, { cwd, timeout, maxBuffer: 10 * 1024 * 1024 });
|
|
1846
1847
|
} catch (error) {
|
|
1847
1848
|
if (error && typeof error === "object" && "stdout" in error) {
|
|
1848
1849
|
return;
|
|
@@ -1951,7 +1952,7 @@ async function listAllFiles(dir) {
|
|
|
1951
1952
|
|
|
1952
1953
|
// src/commands/eval/index.ts
|
|
1953
1954
|
function createEvalCommand() {
|
|
1954
|
-
return new Command3("eval").description("Test your Claude Code config against eval scenarios").option("-s, --suite <suite>", "Eval suite to run (e.g., security, conventions, workflow)").option("-p, --path <path>", "Project root path", process.cwd()).option("--scenarios <path>", "Custom scenarios directory").option("--runs <n>", "Runs per scenario (default: 3)", "3").option("--timeout <ms>", "Timeout per run in ms (default: 120000)", "120000").option("--json", "Output as JSON").option("--debug", "Keep sandbox directories for inspection").action(async (opts) => {
|
|
1955
|
+
return new Command3("eval").description("Test your Claude Code config against eval scenarios").option("-s, --suite <suite>", "Eval suite to run (e.g., security, conventions, workflow)").option("-p, --path <path>", "Project root path", process.cwd()).option("--scenarios <path>", "Custom scenarios directory").option("--runs <n>", "Runs per scenario (default: 3)", "3").option("--timeout <ms>", "Timeout per run in ms (default: 120000)", "120000").option("--json", "Output as JSON").option("--debug", "Keep sandbox directories for inspection").option("--model <model>", "Model to use for eval (e.g., sonnet, haiku, opus)").action(async (opts) => {
|
|
1955
1956
|
printBanner();
|
|
1956
1957
|
const claudeAvailable = await checkClaudeCli();
|
|
1957
1958
|
if (!claudeAvailable) {
|
|
@@ -1972,6 +1973,9 @@ function createEvalCommand() {
|
|
|
1972
1973
|
return;
|
|
1973
1974
|
}
|
|
1974
1975
|
log.success(`Loaded ${scenarios.length} scenario(s)`);
|
|
1976
|
+
if (opts.model) {
|
|
1977
|
+
log.info(`Model: ${opts.model}`);
|
|
1978
|
+
}
|
|
1975
1979
|
log.blank();
|
|
1976
1980
|
const runs = parseInt(opts.runs, 10);
|
|
1977
1981
|
const timeout = parseInt(opts.timeout, 10);
|
|
@@ -1984,7 +1988,7 @@ function createEvalCommand() {
|
|
|
1984
1988
|
try {
|
|
1985
1989
|
const result = await runScenarioWithRetries(
|
|
1986
1990
|
{ ...scenario, runs },
|
|
1987
|
-
{ projectRoot: opts.path, timeout, debug: opts.debug }
|
|
1991
|
+
{ projectRoot: opts.path, timeout, debug: opts.debug, model: opts.model }
|
|
1988
1992
|
);
|
|
1989
1993
|
results.push(result);
|
|
1990
1994
|
if (result.passed) {
|
|
@@ -2019,6 +2023,7 @@ function createEvalCommand() {
|
|
|
2019
2023
|
return;
|
|
2020
2024
|
}
|
|
2021
2025
|
renderEvalReport(results);
|
|
2026
|
+
await saveEvalReport(results, opts.path, opts.suite, opts.model);
|
|
2022
2027
|
});
|
|
2023
2028
|
}
|
|
2024
2029
|
function renderEvalReport(results) {
|
|
@@ -2029,7 +2034,7 @@ function renderEvalReport(results) {
|
|
|
2029
2034
|
console.log(` ${icon} ${chalk2.bold(result.scenario)} ${score} ${status}`);
|
|
2030
2035
|
const failedChecks = result.checks.filter((c) => !c.passed);
|
|
2031
2036
|
for (const check of failedChecks) {
|
|
2032
|
-
console.log(` ${chalk2.
|
|
2037
|
+
console.log(` ${chalk2.red("\u2717")} ${chalk2.dim(check.label)}`);
|
|
2033
2038
|
}
|
|
2034
2039
|
}
|
|
2035
2040
|
log.blank();
|
|
@@ -2046,6 +2051,56 @@ function renderEvalReport(results) {
|
|
|
2046
2051
|
log.warn(`${passed} passed, ${failed} failed out of ${results.length} scenario(s).`);
|
|
2047
2052
|
}
|
|
2048
2053
|
}
|
|
2054
|
+
async function saveEvalReport(results, projectRoot, suite, model) {
|
|
2055
|
+
const totalScore = results.reduce((s, r) => s + r.score, 0);
|
|
2056
|
+
const totalMax = results.reduce((s, r) => s + r.maxScore, 0);
|
|
2057
|
+
const pct = totalMax > 0 ? Math.round(totalScore / totalMax * 100) : 0;
|
|
2058
|
+
const passed = results.filter((r) => r.passed).length;
|
|
2059
|
+
const failed = results.length - passed;
|
|
2060
|
+
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").slice(0, 19);
|
|
2061
|
+
const lines = [
|
|
2062
|
+
`# Eval Report \u2014 ${timestamp}`,
|
|
2063
|
+
"",
|
|
2064
|
+
`**Score: ${pct}%** (${passed} passed, ${failed} failed out of ${results.length} scenarios)`,
|
|
2065
|
+
"",
|
|
2066
|
+
`- Suite: ${suite ?? "all"}`,
|
|
2067
|
+
`- Model: ${model ?? "default"}`,
|
|
2068
|
+
`- Date: ${(/* @__PURE__ */ new Date()).toISOString().split("T")[0]}`,
|
|
2069
|
+
"",
|
|
2070
|
+
"## Results",
|
|
2071
|
+
""
|
|
2072
|
+
];
|
|
2073
|
+
for (const result of results) {
|
|
2074
|
+
const status = result.passed ? "PASS" : "FAIL";
|
|
2075
|
+
lines.push(`### ${result.scenario} \u2014 ${result.score}/${result.maxScore} ${status}`);
|
|
2076
|
+
const failedChecks = result.checks.filter((c) => !c.passed);
|
|
2077
|
+
const passedChecks = result.checks.filter((c) => c.passed);
|
|
2078
|
+
for (const check of passedChecks) {
|
|
2079
|
+
lines.push(`- PASSED: ${check.label} (${check.points} pts)`);
|
|
2080
|
+
}
|
|
2081
|
+
for (const check of failedChecks) {
|
|
2082
|
+
lines.push(`- FAILED: ${check.label} (${check.points} pts)`);
|
|
2083
|
+
}
|
|
2084
|
+
lines.push("");
|
|
2085
|
+
}
|
|
2086
|
+
if (failed > 0) {
|
|
2087
|
+
lines.push("## Recommendations");
|
|
2088
|
+
lines.push("");
|
|
2089
|
+
for (const result of results.filter((r) => !r.passed)) {
|
|
2090
|
+
lines.push(`### Fix: ${result.scenario}`);
|
|
2091
|
+
const failedChecks = result.checks.filter((c) => !c.passed);
|
|
2092
|
+
for (const check of failedChecks) {
|
|
2093
|
+
lines.push(`- ${check.label} \u2014 update CLAUDE.md instructions or add hooks to enforce this behavior`);
|
|
2094
|
+
}
|
|
2095
|
+
lines.push("");
|
|
2096
|
+
}
|
|
2097
|
+
}
|
|
2098
|
+
const evalDir = join9(projectRoot, ".claude", "eval");
|
|
2099
|
+
await mkdir4(evalDir, { recursive: true });
|
|
2100
|
+
const filename = `eval-${suite ?? "all"}-${timestamp}.md`;
|
|
2101
|
+
await writeFile4(join9(evalDir, filename), lines.join("\n"));
|
|
2102
|
+
log.success(`Report saved to .claude/eval/${filename}`);
|
|
2103
|
+
}
|
|
2049
2104
|
async function checkClaudeCli() {
|
|
2050
2105
|
const { execFile: execFile3 } = await import("child_process");
|
|
2051
2106
|
const { promisify: promisify3 } = await import("util");
|
|
@@ -2063,7 +2118,7 @@ import { Command as Command4 } from "commander";
|
|
|
2063
2118
|
import { spawn, execFile as execFile2 } from "child_process";
|
|
2064
2119
|
import { promisify as promisify2 } from "util";
|
|
2065
2120
|
import { access as access6 } from "fs/promises";
|
|
2066
|
-
import { join as
|
|
2121
|
+
import { join as join10 } from "path";
|
|
2067
2122
|
var execAsync = promisify2(execFile2);
|
|
2068
2123
|
var ENHANCE_PROMPT = `Read CLAUDE.md and the project's codebase, then update CLAUDE.md to fill in missing or incomplete sections.
|
|
2069
2124
|
|
|
@@ -2096,7 +2151,7 @@ function createEnhanceCommand() {
|
|
|
2096
2151
|
return new Command4("enhance").description("Use Claude to analyze your codebase and complete CLAUDE.md").option("-p, --path <path>", "Project root path", process.cwd()).action(async (opts) => {
|
|
2097
2152
|
printBanner();
|
|
2098
2153
|
const root = opts.path;
|
|
2099
|
-
const claudeMdPath =
|
|
2154
|
+
const claudeMdPath = join10(root, "CLAUDE.md");
|
|
2100
2155
|
try {
|
|
2101
2156
|
await access6(claudeMdPath);
|
|
2102
2157
|
} catch {
|
|
@@ -2125,8 +2180,8 @@ function createEnhanceCommand() {
|
|
|
2125
2180
|
}
|
|
2126
2181
|
|
|
2127
2182
|
// src/cli.ts
|
|
2128
|
-
var program = new Command5().name("claude-launchpad").description("CLI toolkit that makes Claude Code setups measurably good").version("0.3.
|
|
2129
|
-
const hasConfig = await fileExists(
|
|
2183
|
+
var program = new Command5().name("claude-launchpad").description("CLI toolkit that makes Claude Code setups measurably good").version("0.3.4", "-v, --version").action(async () => {
|
|
2184
|
+
const hasConfig = await fileExists(join11(process.cwd(), "CLAUDE.md")) || await fileExists(join11(process.cwd(), ".claude", "settings.json"));
|
|
2130
2185
|
if (hasConfig) {
|
|
2131
2186
|
await program.commands.find((c) => c.name() === "doctor")?.parseAsync([], { from: "user" });
|
|
2132
2187
|
} else {
|