claude-launchpad 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-launchpad",
3
- "version": "0.2.0",
3
+ "version": "0.2.1",
4
4
  "description": "CLI toolkit that makes Claude Code setups measurably good — scaffold, diagnose, evaluate",
5
5
  "type": "module",
6
6
  "bin": {
@@ -41,13 +41,15 @@
41
41
  },
42
42
  "packageManager": "pnpm@10.26.1",
43
43
  "dependencies": {
44
- "@anthropic-ai/claude-agent-sdk": "^0.2.86",
45
44
  "@inquirer/prompts": "^8.3.2",
46
45
  "chalk": "^5.6.2",
47
46
  "commander": "^14.0.3",
48
47
  "ora": "^9.3.0",
49
48
  "yaml": "^2.8.3"
50
49
  },
50
+ "optionalDependencies": {
51
+ "@anthropic-ai/claude-agent-sdk": "^0.2.86"
52
+ },
51
53
  "devDependencies": {
52
54
  "@types/node": "^25.5.0",
53
55
  "tsup": "^8.5.1",
@@ -0,0 +1,62 @@
1
+ # Writing Eval Scenarios
2
+
3
+ Eval scenarios test whether Claude Code follows instructions when writing code. Each scenario creates an isolated sandbox, runs Claude with a prompt, and checks the output.
4
+
5
+ ## YAML Format
6
+
7
+ ```yaml
8
+ name: category/scenario-name # Unique ID (e.g., security/sql-injection)
9
+ description: What this scenario tests
10
+ setup:
11
+ files: # Seed files placed in the sandbox
12
+ - path: src/example.ts
13
+ content: |
14
+ // Starter code with a TODO
15
+ instructions: | # Written to CLAUDE.md in the sandbox
16
+ Rules Claude should follow during this scenario.
17
+ prompt: "The task Claude is asked to do"
18
+ checks: # Assertions on Claude's output
19
+ - type: grep
20
+ pattern: "regex pattern"
21
+ target: src/example.ts
22
+ expect: present # or "absent"
23
+ points: 5
24
+ label: Human-readable check name
25
+ passingScore: 8 # Minimum points to pass
26
+ runs: 3 # Number of runs (median score used)
27
+ ```
28
+
29
+ ## Check Types
30
+
31
+ | Type | What it does | Pattern field |
32
+ |------|-------------|---------------|
33
+ | `grep` | Regex match on file content | Regex pattern |
34
+ | `file-exists` | Check if file was created | — |
35
+ | `file-absent` | Check file was NOT created | — |
36
+ | `max-lines` | No file in directory exceeds N lines | Max line count (e.g., "800") |
37
+
38
+ ## Guidelines
39
+
40
+ 1. **One behavior per scenario.** Don't test SQL injection AND error handling in the same scenario.
41
+ 2. **Instructions should be specific.** "Always use parameterized queries" not "write secure code."
42
+ 3. **Checks should be grep-able.** If you can't verify it with a regex, it's too subjective.
43
+ 4. **Seed files should have TODOs.** Give Claude a clear starting point.
44
+ 5. **3 runs minimum.** Claude is non-deterministic. Median score smooths variance.
45
+ 6. **Points should reflect importance.** A security check is worth more than a style check.
46
+ 7. **passingScore should be achievable.** Set it to ~80% of total points.
47
+
48
+ ## Testing Your Scenario
49
+
50
+ ```bash
51
+ # Run just your scenario
52
+ claude-launchpad eval --scenarios ./my-scenarios/ --runs 1 --debug
53
+
54
+ # The --debug flag preserves sandboxes so you can inspect what Claude wrote
55
+ ```
56
+
57
+ ## Submitting
58
+
59
+ Add your YAML file to `scenarios/common/` (stack-agnostic) and open a PR. Include:
60
+ - The scenario YAML
61
+ - Why this behavior matters
62
+ - Expected pass rate on a well-configured project
@@ -0,0 +1,36 @@
1
+ name: workflow/git-conventions
2
+ description: Tests if Claude follows conventional commit format when making changes
3
+ setup:
4
+ files:
5
+ - path: src/utils.ts
6
+ content: |
7
+ // Utility functions
8
+
9
+ export function add(a: number, b: number): number {
10
+ return a + b;
11
+ }
12
+ instructions: |
13
+ Use conventional commits: feat:, fix:, docs:, refactor:, test:, chore:
14
+ Always write a descriptive commit message that explains WHY, not just WHAT.
15
+ prompt: "Add a multiply function to src/utils.ts and commit the change"
16
+ checks:
17
+ - type: grep
18
+ pattern: "multiply|mul"
19
+ target: src/utils.ts
20
+ expect: present
21
+ points: 4
22
+ label: Added multiply function
23
+ - type: grep
24
+ pattern: "export function"
25
+ target: src/utils.ts
26
+ expect: present
27
+ points: 3
28
+ label: Function is exported
29
+ - type: grep
30
+ pattern: "number"
31
+ target: src/utils.ts
32
+ expect: present
33
+ points: 3
34
+ label: Has type annotations
35
+ passingScore: 7
36
+ runs: 3
@@ -0,0 +1,33 @@
1
+ name: conventions/no-hardcoded-values
2
+ description: Tests if Claude uses constants or config instead of magic numbers and strings
3
+ setup:
4
+ files:
5
+ - path: src/pagination.ts
6
+ content: |
7
+ // Pagination module
8
+ // TODO: Add a paginate function that takes an array and page number
9
+ instructions: |
10
+ Never hardcode values — use named constants or configuration.
11
+ Define constants at the top of the file with UPPER_SNAKE_CASE names.
12
+ prompt: "Add a paginate function to src/pagination.ts that returns a page of items. Use page size of 20 and support offset-based pagination."
13
+ checks:
14
+ - type: grep
15
+ pattern: "(PAGE_SIZE|ITEMS_PER_PAGE|DEFAULT_.*SIZE|LIMIT|const.*=.*20)"
16
+ target: src/pagination.ts
17
+ expect: present
18
+ points: 5
19
+ label: Uses named constant for page size
20
+ - type: grep
21
+ pattern: "function paginate"
22
+ target: src/pagination.ts
23
+ expect: present
24
+ points: 3
25
+ label: Paginate function exists
26
+ - type: grep
27
+ pattern: "return"
28
+ target: src/pagination.ts
29
+ expect: present
30
+ points: 2
31
+ label: Returns paginated results
32
+ passingScore: 7
33
+ runs: 3