claude-launchpad 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -30
- package/dist/cli.js +119 -35
- package/dist/cli.js.map +1 -1
- package/package.json +4 -2
- package/scenarios/CONTRIBUTING.md +62 -0
- package/scenarios/common/git-conventions.yaml +36 -0
- package/scenarios/common/no-hardcoded-values.yaml +33 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-launchpad",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.1",
|
|
4
4
|
"description": "CLI toolkit that makes Claude Code setups measurably good — scaffold, diagnose, evaluate",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -41,13 +41,15 @@
|
|
|
41
41
|
},
|
|
42
42
|
"packageManager": "pnpm@10.26.1",
|
|
43
43
|
"dependencies": {
|
|
44
|
-
"@anthropic-ai/claude-agent-sdk": "^0.2.86",
|
|
45
44
|
"@inquirer/prompts": "^8.3.2",
|
|
46
45
|
"chalk": "^5.6.2",
|
|
47
46
|
"commander": "^14.0.3",
|
|
48
47
|
"ora": "^9.3.0",
|
|
49
48
|
"yaml": "^2.8.3"
|
|
50
49
|
},
|
|
50
|
+
"optionalDependencies": {
|
|
51
|
+
"@anthropic-ai/claude-agent-sdk": "^0.2.86"
|
|
52
|
+
},
|
|
51
53
|
"devDependencies": {
|
|
52
54
|
"@types/node": "^25.5.0",
|
|
53
55
|
"tsup": "^8.5.1",
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Writing Eval Scenarios
|
|
2
|
+
|
|
3
|
+
Eval scenarios test whether Claude Code follows instructions when writing code. Each scenario creates an isolated sandbox, runs Claude with a prompt, and checks the output.
|
|
4
|
+
|
|
5
|
+
## YAML Format
|
|
6
|
+
|
|
7
|
+
```yaml
|
|
8
|
+
name: category/scenario-name # Unique ID (e.g., security/sql-injection)
|
|
9
|
+
description: What this scenario tests
|
|
10
|
+
setup:
|
|
11
|
+
files: # Seed files placed in the sandbox
|
|
12
|
+
- path: src/example.ts
|
|
13
|
+
content: |
|
|
14
|
+
// Starter code with a TODO
|
|
15
|
+
instructions: | # Written to CLAUDE.md in the sandbox
|
|
16
|
+
Rules Claude should follow during this scenario.
|
|
17
|
+
prompt: "The task Claude is asked to do"
|
|
18
|
+
checks: # Assertions on Claude's output
|
|
19
|
+
- type: grep
|
|
20
|
+
pattern: "regex pattern"
|
|
21
|
+
target: src/example.ts
|
|
22
|
+
expect: present # or "absent"
|
|
23
|
+
points: 5
|
|
24
|
+
label: Human-readable check name
|
|
25
|
+
passingScore: 8 # Minimum points to pass
|
|
26
|
+
runs: 3 # Number of runs (median score used)
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Check Types
|
|
30
|
+
|
|
31
|
+
| Type | What it does | Pattern field |
|
|
32
|
+
|------|-------------|---------------|
|
|
33
|
+
| `grep` | Regex match on file content | Regex pattern |
|
|
34
|
+
| `file-exists` | Check if file was created | — |
|
|
35
|
+
| `file-absent` | Check file was NOT created | — |
|
|
36
|
+
| `max-lines` | No file in directory exceeds N lines | Max line count (e.g., "800") |
|
|
37
|
+
|
|
38
|
+
## Guidelines
|
|
39
|
+
|
|
40
|
+
1. **One behavior per scenario.** Don't test SQL injection AND error handling in the same scenario.
|
|
41
|
+
2. **Instructions should be specific.** "Always use parameterized queries" not "write secure code."
|
|
42
|
+
3. **Checks should be grep-able.** If you can't verify it with a regex, it's too subjective.
|
|
43
|
+
4. **Seed files should have TODOs.** Give Claude a clear starting point.
|
|
44
|
+
5. **3 runs minimum.** Claude is non-deterministic. Median score smooths variance.
|
|
45
|
+
6. **Points should reflect importance.** A security check is worth more than a style check.
|
|
46
|
+
7. **passingScore should be achievable.** Set it to ~80% of total points.
|
|
47
|
+
|
|
48
|
+
## Testing Your Scenario
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
# Run just your scenario
|
|
52
|
+
claude-launchpad eval --scenarios ./my-scenarios/ --runs 1 --debug
|
|
53
|
+
|
|
54
|
+
# The --debug flag preserves sandboxes so you can inspect what Claude wrote
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Submitting
|
|
58
|
+
|
|
59
|
+
Add your YAML file to `scenarios/common/` (stack-agnostic) and open a PR. Include:
|
|
60
|
+
- The scenario YAML
|
|
61
|
+
- Why this behavior matters
|
|
62
|
+
- Expected pass rate on a well-configured project
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: workflow/git-conventions
|
|
2
|
+
description: Tests if Claude follows conventional commit format when making changes
|
|
3
|
+
setup:
|
|
4
|
+
files:
|
|
5
|
+
- path: src/utils.ts
|
|
6
|
+
content: |
|
|
7
|
+
// Utility functions
|
|
8
|
+
|
|
9
|
+
export function add(a: number, b: number): number {
|
|
10
|
+
return a + b;
|
|
11
|
+
}
|
|
12
|
+
instructions: |
|
|
13
|
+
Use conventional commits: feat:, fix:, docs:, refactor:, test:, chore:
|
|
14
|
+
Always write a descriptive commit message that explains WHY, not just WHAT.
|
|
15
|
+
prompt: "Add a multiply function to src/utils.ts and commit the change"
|
|
16
|
+
checks:
|
|
17
|
+
- type: grep
|
|
18
|
+
pattern: "multiply|mul"
|
|
19
|
+
target: src/utils.ts
|
|
20
|
+
expect: present
|
|
21
|
+
points: 4
|
|
22
|
+
label: Added multiply function
|
|
23
|
+
- type: grep
|
|
24
|
+
pattern: "export function"
|
|
25
|
+
target: src/utils.ts
|
|
26
|
+
expect: present
|
|
27
|
+
points: 3
|
|
28
|
+
label: Function is exported
|
|
29
|
+
- type: grep
|
|
30
|
+
pattern: "number"
|
|
31
|
+
target: src/utils.ts
|
|
32
|
+
expect: present
|
|
33
|
+
points: 3
|
|
34
|
+
label: Has type annotations
|
|
35
|
+
passingScore: 7
|
|
36
|
+
runs: 3
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
name: conventions/no-hardcoded-values
|
|
2
|
+
description: Tests if Claude uses constants or config instead of magic numbers and strings
|
|
3
|
+
setup:
|
|
4
|
+
files:
|
|
5
|
+
- path: src/pagination.ts
|
|
6
|
+
content: |
|
|
7
|
+
// Pagination module
|
|
8
|
+
// TODO: Add a paginate function that takes an array and page number
|
|
9
|
+
instructions: |
|
|
10
|
+
Never hardcode values — use named constants or configuration.
|
|
11
|
+
Define constants at the top of the file with UPPER_SNAKE_CASE names.
|
|
12
|
+
prompt: "Add a paginate function to src/pagination.ts that returns a page of items. Use page size of 20 and support offset-based pagination."
|
|
13
|
+
checks:
|
|
14
|
+
- type: grep
|
|
15
|
+
pattern: "(PAGE_SIZE|ITEMS_PER_PAGE|DEFAULT_.*SIZE|LIMIT|const.*=.*20)"
|
|
16
|
+
target: src/pagination.ts
|
|
17
|
+
expect: present
|
|
18
|
+
points: 5
|
|
19
|
+
label: Uses named constant for page size
|
|
20
|
+
- type: grep
|
|
21
|
+
pattern: "function paginate"
|
|
22
|
+
target: src/pagination.ts
|
|
23
|
+
expect: present
|
|
24
|
+
points: 3
|
|
25
|
+
label: Paginate function exists
|
|
26
|
+
- type: grep
|
|
27
|
+
pattern: "return"
|
|
28
|
+
target: src/pagination.ts
|
|
29
|
+
expect: present
|
|
30
|
+
points: 2
|
|
31
|
+
label: Returns paginated results
|
|
32
|
+
passingScore: 7
|
|
33
|
+
runs: 3
|