@auto-engineer/component-implementor-react 1.98.0 → 1.100.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/.turbo/turbo-test.log +6 -6
- package/.turbo/turbo-type-check.log +1 -1
- package/CHANGELOG.md +92 -0
- package/dist/src/commands/implement-component.d.ts +19 -0
- package/dist/src/commands/implement-component.d.ts.map +1 -1
- package/dist/src/commands/implement-component.js +109 -30
- package/dist/src/commands/implement-component.js.map +1 -1
- package/dist/src/commands/implement-component.test.js +259 -69
- package/dist/src/commands/implement-component.test.js.map +1 -1
- package/dist/src/extract-exports.d.ts +6 -0
- package/dist/src/extract-exports.d.ts.map +1 -0
- package/dist/src/extract-exports.js +46 -0
- package/dist/src/extract-exports.js.map +1 -0
- package/dist/src/generate-story-deterministic.d.ts +30 -0
- package/dist/src/generate-story-deterministic.d.ts.map +1 -0
- package/dist/src/generate-story-deterministic.js +229 -0
- package/dist/src/generate-story-deterministic.js.map +1 -0
- package/dist/src/index.d.ts +4 -0
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +3 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/pipeline/run-pipeline.d.ts +69 -0
- package/dist/src/pipeline/run-pipeline.d.ts.map +1 -0
- package/dist/src/pipeline/run-pipeline.js +78 -0
- package/dist/src/pipeline/run-pipeline.js.map +1 -0
- package/dist/src/pipeline/run-pipeline.test.d.ts +2 -0
- package/dist/src/pipeline/run-pipeline.test.d.ts.map +1 -0
- package/dist/src/pipeline/run-pipeline.test.js +247 -0
- package/dist/src/pipeline/run-pipeline.test.js.map +1 -0
- package/dist/src/pipeline/steps/generate-component.d.ts +4 -0
- package/dist/src/pipeline/steps/generate-component.d.ts.map +1 -0
- package/dist/src/pipeline/steps/generate-component.js +50 -0
- package/dist/src/pipeline/steps/generate-component.js.map +1 -0
- package/dist/src/pipeline/steps/generate-component.test.d.ts.map +1 -0
- package/dist/src/pipeline/steps/generate-component.test.js +106 -0
- package/dist/src/pipeline/steps/generate-component.test.js.map +1 -0
- package/dist/src/pipeline/steps/generate-story.d.ts +3 -0
- package/dist/src/pipeline/steps/generate-story.d.ts.map +1 -0
- package/dist/src/pipeline/steps/generate-story.js +14 -0
- package/dist/src/pipeline/steps/generate-story.js.map +1 -0
- package/dist/src/pipeline/steps/generate-story.test.d.ts.map +1 -0
- package/dist/src/pipeline/steps/generate-story.test.js +41 -0
- package/dist/src/pipeline/steps/generate-story.test.js.map +1 -0
- package/dist/src/pipeline/steps/generate-test.d.ts +4 -0
- package/dist/src/pipeline/steps/generate-test.d.ts.map +1 -0
- package/dist/src/pipeline/steps/generate-test.js +19 -0
- package/dist/src/pipeline/steps/generate-test.js.map +1 -0
- package/dist/src/pipeline/steps/generate-test.test.d.ts.map +1 -0
- package/dist/src/pipeline/steps/generate-test.test.js +60 -0
- package/dist/src/pipeline/steps/generate-test.test.js.map +1 -0
- package/dist/src/pipeline/steps/lint-fix-loop.d.ts +4 -0
- package/dist/src/pipeline/steps/lint-fix-loop.d.ts.map +1 -0
- package/dist/src/pipeline/steps/lint-fix-loop.js +45 -0
- package/dist/src/pipeline/steps/lint-fix-loop.js.map +1 -0
- package/dist/src/pipeline/steps/lint-fix-loop.test.d.ts +2 -0
- package/dist/src/pipeline/steps/lint-fix-loop.test.d.ts.map +1 -0
- package/dist/src/pipeline/steps/lint-fix-loop.test.js +119 -0
- package/dist/src/pipeline/steps/lint-fix-loop.test.js.map +1 -0
- package/dist/src/pipeline/steps/story-fix-loop.d.ts +4 -0
- package/dist/src/pipeline/steps/story-fix-loop.d.ts.map +1 -0
- package/dist/src/pipeline/steps/story-fix-loop.js +34 -0
- package/dist/src/pipeline/steps/story-fix-loop.js.map +1 -0
- package/dist/src/pipeline/steps/story-fix-loop.test.d.ts +2 -0
- package/dist/src/pipeline/steps/story-fix-loop.test.d.ts.map +1 -0
- package/dist/src/pipeline/steps/story-fix-loop.test.js +94 -0
- package/dist/src/pipeline/steps/story-fix-loop.test.js.map +1 -0
- package/dist/src/pipeline/steps/storybook-test.d.ts +3 -0
- package/dist/src/pipeline/steps/storybook-test.d.ts.map +1 -0
- package/dist/src/pipeline/steps/storybook-test.js +22 -0
- package/dist/src/pipeline/steps/storybook-test.js.map +1 -0
- package/dist/src/pipeline/steps/storybook-test.test.d.ts +2 -0
- package/dist/src/pipeline/steps/storybook-test.test.d.ts.map +1 -0
- package/dist/src/pipeline/steps/storybook-test.test.js +66 -0
- package/dist/src/pipeline/steps/storybook-test.test.js.map +1 -0
- package/dist/src/pipeline/steps/test-fix-loop.d.ts +4 -0
- package/dist/src/pipeline/steps/test-fix-loop.d.ts.map +1 -0
- package/dist/src/pipeline/steps/test-fix-loop.js +44 -0
- package/dist/src/pipeline/steps/test-fix-loop.js.map +1 -0
- package/dist/src/pipeline/steps/test-fix-loop.test.d.ts +2 -0
- package/dist/src/pipeline/steps/test-fix-loop.test.d.ts.map +1 -0
- package/dist/src/pipeline/steps/test-fix-loop.test.js +168 -0
- package/dist/src/pipeline/steps/test-fix-loop.test.js.map +1 -0
- package/dist/src/pipeline/steps/type-fix-loop.d.ts +4 -0
- package/dist/src/pipeline/steps/type-fix-loop.d.ts.map +1 -0
- package/dist/src/pipeline/steps/type-fix-loop.js +43 -0
- package/dist/src/pipeline/steps/type-fix-loop.js.map +1 -0
- package/dist/src/pipeline/steps/type-fix-loop.test.d.ts +2 -0
- package/dist/src/pipeline/steps/type-fix-loop.test.d.ts.map +1 -0
- package/dist/src/pipeline/steps/type-fix-loop.test.js +112 -0
- package/dist/src/pipeline/steps/type-fix-loop.test.js.map +1 -0
- package/dist/src/pipeline/steps/visual-test.d.ts +3 -0
- package/dist/src/pipeline/steps/visual-test.d.ts.map +1 -0
- package/dist/src/pipeline/steps/visual-test.js +4 -0
- package/dist/src/pipeline/steps/visual-test.js.map +1 -0
- package/dist/src/pipeline/steps/visual-test.test.d.ts +2 -0
- package/dist/src/pipeline/steps/visual-test.test.d.ts.map +1 -0
- package/dist/src/pipeline/steps/visual-test.test.js +9 -0
- package/dist/src/pipeline/steps/visual-test.test.js.map +1 -0
- package/dist/src/project-context.d.ts +10 -0
- package/dist/src/project-context.d.ts.map +1 -0
- package/dist/src/project-context.js +178 -0
- package/dist/src/project-context.js.map +1 -0
- package/dist/src/prompt.d.ts +39 -7
- package/dist/src/prompt.d.ts.map +1 -1
- package/dist/src/prompt.js +233 -23
- package/dist/src/prompt.js.map +1 -1
- package/dist/src/prompt.test.js +154 -9
- package/dist/src/prompt.test.js.map +1 -1
- package/dist/src/scaffold.d.ts +49 -0
- package/dist/src/scaffold.d.ts.map +1 -0
- package/dist/src/scaffold.js +208 -0
- package/dist/src/scaffold.js.map +1 -0
- package/dist/src/tools/lint-runner.d.ts +7 -0
- package/dist/src/tools/lint-runner.d.ts.map +1 -0
- package/dist/src/tools/lint-runner.js +48 -0
- package/dist/src/tools/lint-runner.js.map +1 -0
- package/dist/src/tools/lint-runner.test.d.ts +2 -0
- package/dist/src/tools/lint-runner.test.d.ts.map +1 -0
- package/dist/src/tools/lint-runner.test.js +90 -0
- package/dist/src/tools/lint-runner.test.js.map +1 -0
- package/dist/src/tools/storybook-runner.d.ts +6 -0
- package/dist/src/tools/storybook-runner.d.ts.map +1 -0
- package/dist/src/tools/storybook-runner.js +25 -0
- package/dist/src/tools/storybook-runner.js.map +1 -0
- package/dist/src/tools/storybook-runner.test.d.ts +2 -0
- package/dist/src/tools/storybook-runner.test.d.ts.map +1 -0
- package/dist/src/tools/storybook-runner.test.js +43 -0
- package/dist/src/tools/storybook-runner.test.js.map +1 -0
- package/dist/src/tools/test-runner.d.ts +9 -0
- package/dist/src/tools/test-runner.d.ts.map +1 -0
- package/dist/src/tools/test-runner.js +74 -0
- package/dist/src/tools/test-runner.js.map +1 -0
- package/dist/src/tools/test-runner.test.d.ts +2 -0
- package/dist/src/tools/test-runner.test.d.ts.map +1 -0
- package/dist/src/tools/test-runner.test.js +177 -0
- package/dist/src/tools/test-runner.test.js.map +1 -0
- package/dist/src/tools/type-checker.d.ts +6 -0
- package/dist/src/tools/type-checker.d.ts.map +1 -0
- package/dist/src/tools/type-checker.js +36 -0
- package/dist/src/tools/type-checker.js.map +1 -0
- package/dist/src/tools/type-checker.test.d.ts +2 -0
- package/dist/src/tools/type-checker.test.d.ts.map +1 -0
- package/dist/src/tools/type-checker.test.js +96 -0
- package/dist/src/tools/type-checker.test.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/inputs/model-a/spec-deltas.json +1460 -0
- package/inputs/model-b/spec-deltas.json +1424 -0
- package/inputs/model-c/spec-deltas.json +1432 -0
- package/inputs/model-d/spec-deltas.json +967 -0
- package/inputs/model-e/spec-deltas.json +2292 -0
- package/ketchup-plan.md +43 -8
- package/package.json +3 -3
- package/scoring-heuristic.md +138 -0
- package/scripts/improve.ts +23 -18
- package/src/commands/implement-component.test.ts +309 -76
- package/src/commands/implement-component.ts +155 -31
- package/src/extract-exports.ts +53 -0
- package/src/generate-story-deterministic.ts +267 -0
- package/src/index.ts +12 -0
- package/src/pipeline/run-pipeline.test.ts +292 -0
- package/src/pipeline/run-pipeline.ts +160 -0
- package/src/pipeline/steps/generate-component.test.ts +130 -0
- package/src/pipeline/steps/generate-component.ts +60 -0
- package/src/pipeline/steps/generate-story.test.ts +54 -0
- package/src/pipeline/steps/generate-story.ts +17 -0
- package/src/pipeline/steps/generate-test.test.ts +75 -0
- package/src/pipeline/steps/generate-test.ts +25 -0
- package/src/pipeline/steps/lint-fix-loop.test.ts +155 -0
- package/src/pipeline/steps/lint-fix-loop.ts +59 -0
- package/src/pipeline/steps/story-fix-loop.test.ts +123 -0
- package/src/pipeline/steps/story-fix-loop.ts +47 -0
- package/src/pipeline/steps/storybook-test.test.ts +82 -0
- package/src/pipeline/steps/storybook-test.ts +27 -0
- package/src/pipeline/steps/test-fix-loop.test.ts +201 -0
- package/src/pipeline/steps/test-fix-loop.ts +56 -0
- package/src/pipeline/steps/type-fix-loop.test.ts +145 -0
- package/src/pipeline/steps/type-fix-loop.ts +55 -0
- package/src/pipeline/steps/visual-test.test.ts +10 -0
- package/src/pipeline/steps/visual-test.ts +5 -0
- package/src/project-context.ts +205 -0
- package/src/prompt.test.ts +174 -8
- package/src/prompt.ts +301 -23
- package/src/scaffold.ts +281 -0
- package/src/tools/lint-runner.test.ts +112 -0
- package/src/tools/lint-runner.ts +52 -0
- package/src/tools/storybook-runner.test.ts +53 -0
- package/src/tools/storybook-runner.ts +29 -0
- package/src/tools/test-runner.test.ts +213 -0
- package/src/tools/test-runner.ts +84 -0
- package/src/tools/type-checker.test.ts +120 -0
- package/src/tools/type-checker.ts +42 -0
- package/vitest.config.ts +9 -1
- package/dist/src/generate-component.d.ts +0 -4
- package/dist/src/generate-component.d.ts.map +0 -1
- package/dist/src/generate-component.js +0 -14
- package/dist/src/generate-component.js.map +0 -1
- package/dist/src/generate-component.test.d.ts.map +0 -1
- package/dist/src/generate-component.test.js +0 -73
- package/dist/src/generate-component.test.js.map +0 -1
- package/dist/src/generate-story.d.ts +0 -4
- package/dist/src/generate-story.d.ts.map +0 -1
- package/dist/src/generate-story.js +0 -14
- package/dist/src/generate-story.js.map +0 -1
- package/dist/src/generate-story.test.d.ts.map +0 -1
- package/dist/src/generate-story.test.js +0 -58
- package/dist/src/generate-story.test.js.map +0 -1
- package/dist/src/generate-test.d.ts +0 -4
- package/dist/src/generate-test.d.ts.map +0 -1
- package/dist/src/generate-test.js +0 -14
- package/dist/src/generate-test.js.map +0 -1
- package/dist/src/generate-test.test.d.ts.map +0 -1
- package/dist/src/generate-test.test.js +0 -77
- package/dist/src/generate-test.test.js.map +0 -1
- package/dist/src/reconcile.d.ts +0 -8
- package/dist/src/reconcile.d.ts.map +0 -1
- package/dist/src/reconcile.js +0 -18
- package/dist/src/reconcile.js.map +0 -1
- package/dist/src/reconcile.test.d.ts +0 -2
- package/dist/src/reconcile.test.d.ts.map +0 -1
- package/dist/src/reconcile.test.js +0 -108
- package/dist/src/reconcile.test.js.map +0 -1
- package/src/generate-component.test.ts +0 -89
- package/src/generate-component.ts +0 -16
- package/src/generate-story.test.ts +0 -71
- package/src/generate-story.ts +0 -16
- package/src/generate-test.test.ts +0 -93
- package/src/generate-test.ts +0 -16
- package/src/reconcile.test.ts +0 -127
- package/src/reconcile.ts +0 -27
- /package/dist/src/{generate-component.test.d.ts → pipeline/steps/generate-component.test.d.ts} +0 -0
- /package/dist/src/{generate-story.test.d.ts → pipeline/steps/generate-story.test.d.ts} +0 -0
- /package/dist/src/{generate-test.test.d.ts → pipeline/steps/generate-test.test.d.ts} +0 -0
package/ketchup-plan.md
CHANGED
|
@@ -1,13 +1,48 @@
|
|
|
1
|
-
# Ketchup Plan:
|
|
1
|
+
# Ketchup Plan: TDD Pipeline Overhaul
|
|
2
2
|
|
|
3
3
|
## TODO
|
|
4
4
|
|
|
5
|
+
### Phase 6: Wire into ui-runner
|
|
6
|
+
- [ ] Burst 21: Update ui-runner/src/run.ts to pass per-step model config [depends: 20]
|
|
7
|
+
|
|
5
8
|
## DONE
|
|
6
9
|
|
|
7
|
-
|
|
8
|
-
- [x] Burst
|
|
9
|
-
- [x] Burst
|
|
10
|
-
- [x] Burst
|
|
11
|
-
- [x] Burst
|
|
12
|
-
|
|
13
|
-
|
|
10
|
+
### Phase 5: Integration
|
|
11
|
+
- [x] Burst 17: Rewrite commands/implement-component.ts [depends: 16]
|
|
12
|
+
- [x] Burst 18: Rewrite commands/implement-component.test.ts [depends: 17]
|
|
13
|
+
- [x] Burst 19: Update index.ts exports + delete old files [depends: 18]
|
|
14
|
+
- [x] Burst 20: Coverage verification — 123 tests, 100% coverage [depends: 19]
|
|
15
|
+
|
|
16
|
+
### Phase 4: Pipeline Runner
|
|
17
|
+
- [x] Burst 16: pipeline/run-pipeline.ts + test [depends: 7-15]
|
|
18
|
+
|
|
19
|
+
### Phase 3: Pipeline Steps
|
|
20
|
+
- [x] Burst 7: pipeline/steps/generate-test.ts + test [depends: 1]
|
|
21
|
+
- [x] Burst 8: pipeline/steps/generate-component.ts + test [depends: 1]
|
|
22
|
+
- [x] Burst 9: pipeline/steps/type-fix-loop.ts + test [depends: 1, 2, 6]
|
|
23
|
+
- [x] Burst 10: pipeline/steps/test-fix-loop.ts + test [depends: 1, 3, 6]
|
|
24
|
+
- [x] Burst 11: pipeline/steps/lint-fix-loop.ts + test [depends: 1, 4, 6]
|
|
25
|
+
- [x] Burst 12: pipeline/steps/generate-story.ts + test [depends: 1]
|
|
26
|
+
- [x] Burst 13: pipeline/steps/story-fix-loop.ts + test [depends: 1, 2, 6]
|
|
27
|
+
- [x] Burst 14: pipeline/steps/storybook-test.ts + test [depends: 1, 2, 5]
|
|
28
|
+
- [x] Burst 15: pipeline/steps/visual-test.ts + test [depends: 1]
|
|
29
|
+
|
|
30
|
+
### Phase 2: Fixer Prompts
|
|
31
|
+
- [x] Burst 6: Add 4 fixer prompt builders to prompt.ts + tests [depends: none]
|
|
32
|
+
|
|
33
|
+
### Phase 1: Foundation
|
|
34
|
+
- [x] Burst 1: pipeline/types.ts — PipelineContext, StepResult, ModelConfig, PipelineConfig [depends: none]
|
|
35
|
+
- [x] Burst 2: tools/type-checker.ts + test — runTypeCheck wrapping tsc [depends: none]
|
|
36
|
+
- [x] Burst 3: tools/test-runner.ts + test — runTests wrapping vitest [depends: none]
|
|
37
|
+
- [x] Burst 4: tools/lint-runner.ts + test — runLint/runLintFix wrapping biome [depends: none]
|
|
38
|
+
- [x] Burst 5: tools/storybook-runner.ts + test — runStorybookTest [depends: none]
|
|
39
|
+
|
|
40
|
+
## DONE (previous)
|
|
41
|
+
|
|
42
|
+
- [x] Burst: Infrastructure — package.json, tsconfig.json, vitest.config.ts (8d0ef1f1)
|
|
43
|
+
- [x] Burst: extract-code-block.ts with test (c272077c)
|
|
44
|
+
- [x] Burst: generate-test.ts with test (ca284764)
|
|
45
|
+
- [x] Burst: generate-component.ts with test (2263623a)
|
|
46
|
+
- [x] Burst: generate-story.ts with test (ceaa70df)
|
|
47
|
+
- [x] Burst: implement-component command handler with test (150fcb02)
|
|
48
|
+
- [x] Burst: index.ts exports + type-check (a54bd956)
|
package/package.json
CHANGED
|
@@ -6,13 +6,13 @@
|
|
|
6
6
|
"dependencies": {
|
|
7
7
|
"ai": "^6.0.0",
|
|
8
8
|
"debug": "^4.4.1",
|
|
9
|
-
"@auto-engineer/message-bus": "1.
|
|
10
|
-
"@auto-engineer/model-factory": "1.
|
|
9
|
+
"@auto-engineer/message-bus": "1.100.0",
|
|
10
|
+
"@auto-engineer/model-factory": "1.100.0"
|
|
11
11
|
},
|
|
12
12
|
"devDependencies": {
|
|
13
13
|
"vitest": "^3.2.1"
|
|
14
14
|
},
|
|
15
|
-
"version": "1.
|
|
15
|
+
"version": "1.100.0",
|
|
16
16
|
"publishConfig": {
|
|
17
17
|
"access": "public"
|
|
18
18
|
},
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# Scoring Heuristic: component-implementor-react
|
|
2
|
+
|
|
3
|
+
Total score: **100 points** across 5 categories.
|
|
4
|
+
|
|
5
|
+
Scored **per component**, then aggregated across the batch.
|
|
6
|
+
|
|
7
|
+
## 1. First-Pass Success (30 pts)
|
|
8
|
+
|
|
9
|
+
Measures how often the LLM gets it right without fix loops. This is the strongest quality signal — it reflects upstream spec quality AND model capability.
|
|
10
|
+
|
|
11
|
+
| Check | Points | How to measure |
|
|
12
|
+
|---|---|---|
|
|
13
|
+
| Type check passes on first try (0 type-fix iterations) | 10 | `typeFixIterations === 0` |
|
|
14
|
+
| Tests pass on first try (0 test-fix iterations) | 10 | `testFixIterations === 0` |
|
|
15
|
+
| Lint passes on first try (0 lint-fix iterations) | 5 | `lintFixIterations === 0` |
|
|
16
|
+
| Story compiles on first try (0 story-fix iterations) | 5 | `storyFixIterations === 0` |
|
|
17
|
+
|
|
18
|
+
**Partial credit:** Each fix loop iteration deducts points proportionally.
|
|
19
|
+
- 1 iteration: 60% of points
|
|
20
|
+
- 2 iterations: 30% of points
|
|
21
|
+
- 3 iterations (max): 0 points
|
|
22
|
+
|
|
23
|
+
## 2. Test Quality (25 pts)
|
|
24
|
+
|
|
25
|
+
| Check | Points | How to measure |
|
|
26
|
+
|---|---|---|
|
|
27
|
+
| Test-to-spec ratio ≥ 0.8 | 8 | `testCount / totalSpecBullets`. Count `it()` blocks in test file vs total spec strings (structure + rendering + interaction + styling) |
|
|
28
|
+
| Tests exercise all spec categories | 6 | For each non-empty spec category (structure, rendering, interaction, styling), at least one `it()` block's assertion touches that behavior. Measure by matching test assertion targets (CSS queries, text content, event triggers, class checks) against spec category keywords — not by describe block naming |
|
|
29
|
+
| Low weak-assertion ratio | 5 | Grep test file for `toBeDefined`, `toBeTruthy`, `toBeFalsy`, `not.toBeNull`. Score = `max(0, 1 - (weakCount / totalAssertions))` * 5. One weak assertion in 50 total = ~5 pts; 10 weak in 12 total = ~0.8 pts |
|
|
30
|
+
| All tests pass | 6 | Final test run: `passedTests === totalTests` |
|
|
31
|
+
|
|
32
|
+
**Test-to-spec ratio scoring:**
|
|
33
|
+
- ≥ 1.0: 8 pts (1:1 or better)
|
|
34
|
+
- 0.8-0.99: 6 pts
|
|
35
|
+
- 0.5-0.79: 3 pts
|
|
36
|
+
- < 0.5: 0 pts
|
|
37
|
+
|
|
38
|
+
## 3. Implementation Quality (20 pts)
|
|
39
|
+
|
|
40
|
+
| Check | Points | How to measure |
|
|
41
|
+
|---|---|---|
|
|
42
|
+
| Named function export (not default) | 3 | Grep for `export function ComponentName` |
|
|
43
|
+
| Props type exported | 3 | Grep for `export type ComponentNameProps` or `export interface` |
|
|
44
|
+
| No TypeScript errors in final output | 4 | Final type check passes |
|
|
45
|
+
| No lint violations in final output | 3 | Final biome check passes |
|
|
46
|
+
| No `// @ts-ignore` or `// @ts-expect-error` | 3 | Grep for these patterns; 0 = pass |
|
|
47
|
+
| No `any` type in component code | 4 | Grep for `: any` or `as any`; 0 = pass |
|
|
48
|
+
|
|
49
|
+
## 4. Fix Loop Efficiency (15 pts)
|
|
50
|
+
|
|
51
|
+
Measures total cost/effort to produce one component.
|
|
52
|
+
|
|
53
|
+
| Metric | Points | Formula |
|
|
54
|
+
|---|---|---|
|
|
55
|
+
| Total LLM calls ≤ 3 (generate test + component + story) | 8 | Ideal = 3 calls. Score = `max(0, 8 - (totalCalls - 3))` |
|
|
56
|
+
| Total fix iterations = 0 | 7 | Score = `max(0, 7 - totalFixIterations * 2)` |
|
|
57
|
+
|
|
58
|
+
**Breakdown of ideal vs degraded:**
|
|
59
|
+
|
|
60
|
+
| Total LLM Calls | Fix Iterations | Score (of 15) | Interpretation |
|
|
61
|
+
|---|---|---|---|
|
|
62
|
+
| 3 | 0 | 15 | Perfect — no fixes needed |
|
|
63
|
+
| 4 | 1 | 11 | Good — one minor fix |
|
|
64
|
+
| 5 | 2 | 7 | Acceptable — some churn |
|
|
65
|
+
| 6 | 3 | 3 | Concerning — spec may be poor |
|
|
66
|
+
| 7+ | 4+ | 0 | Bad — upstream spec issue |
|
|
67
|
+
|
|
68
|
+
## 5. Story Quality (10 pts)
|
|
69
|
+
|
|
70
|
+
| Check | Points | How to measure |
|
|
71
|
+
|---|---|---|
|
|
72
|
+
| Story file compiles (no type errors) | 4 | Type check on story file passes |
|
|
73
|
+
| Has Default story export | 2 | Grep for `export const Default` |
|
|
74
|
+
| ≥ 2 named story exports | 2 | Count `export const` in story file |
|
|
75
|
+
| No `export default` in story (CSF3) | 2 | Only default export should be meta |
|
|
76
|
+
|
|
77
|
+
## Aggregation: Batch Score
|
|
78
|
+
|
|
79
|
+
When running multiple components in a batch:
|
|
80
|
+
|
|
81
|
+
```
|
|
82
|
+
batchScore = sum(componentScores) / componentCount
|
|
83
|
+
|
|
84
|
+
additionalMetrics:
|
|
85
|
+
- totalLLMCalls across batch
|
|
86
|
+
- totalFixIterations across batch
|
|
87
|
+
- totalTokensUsed
|
|
88
|
+
- estimatedCost
|
|
89
|
+
- avgTimePerComponent
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Grading
|
|
93
|
+
|
|
94
|
+
### Per Component
|
|
95
|
+
|
|
96
|
+
| Score | Grade | Interpretation |
|
|
97
|
+
|---|---|---|
|
|
98
|
+
| 90-100 | A | Clean implementation, excellent specs |
|
|
99
|
+
| 75-89 | B | Minor fixes needed, good result |
|
|
100
|
+
| 60-74 | C | Multiple fix loops, but final output works |
|
|
101
|
+
| < 60 | F | Component may need manual intervention |
|
|
102
|
+
|
|
103
|
+
### Batch
|
|
104
|
+
|
|
105
|
+
| Avg Score | Grade | Interpretation |
|
|
106
|
+
|---|---|---|
|
|
107
|
+
| 85+ | A | Pipeline is healthy, specs are good |
|
|
108
|
+
| 70-84 | B | Some specs need improvement |
|
|
109
|
+
| 55-69 | C | Systematic spec quality issue — review architect output |
|
|
110
|
+
| < 55 | F | Pipeline problem — check architect or model selection |
|
|
111
|
+
|
|
112
|
+
## Pipeline Gate
|
|
113
|
+
|
|
114
|
+
**Per component: minimum 60.** Below this, flag the component for manual review. A component at 50 can pass with failing tests if other categories compensate — 60 ensures core quality checks (types + tests passing) aren't bypassable.
|
|
115
|
+
|
|
116
|
+
**Batch average: minimum 65.** Below this, consider re-running the frontend-architect-agent with different instructions before re-implementing.
|
|
117
|
+
|
|
118
|
+
## Feedback Loop Signals
|
|
119
|
+
|
|
120
|
+
| Pattern | Root Cause | Fix |
|
|
121
|
+
|---|---|---|
|
|
122
|
+
| High type-fix iterations across many components | Props specs are wrong or incomplete | Improve architect props generation |
|
|
123
|
+
| High test-fix iterations | Tests assert things not in the component | Improve test agent prompt OR spec clarity |
|
|
124
|
+
| High lint-fix iterations | Code style issues | Consider adding lint rules to component agent prompt |
|
|
125
|
+
| Story failures across batch | Missing prop types or wrong exports | Check architect story variant quality |
|
|
126
|
+
| First-pass success < 30% batch-wide | Model capability issue | Try a stronger model for generation |
|
|
127
|
+
|
|
128
|
+
## Cross-Stage Correlation
|
|
129
|
+
|
|
130
|
+
Track upstream scores alongside implementor outcomes to pinpoint root causes:
|
|
131
|
+
|
|
132
|
+
| Upstream Score | Implementor Outcome | Root Cause | Action |
|
|
133
|
+
|---|---|---|---|
|
|
134
|
+
| ID coverage < 90% | Missing components in batch | Interaction model dropped narratives | Re-run interaction-designer |
|
|
135
|
+
| Architect styling prescriptiveness < 10 | High test-fix on styling assertions | Vague specs → ambiguous implementations | Re-run architect visual-design pass |
|
|
136
|
+
| Architect props quality < 12 | High type-fix iterations | Bad prop types cascade to component + story | Re-run architect architecture pass |
|
|
137
|
+
| Architect story completeness < 8 | High story-fix iterations | Missing required props in story args | Fix architect story variant generation |
|
|
138
|
+
| Architect composition score < 8 | Import errors across batch | Missing or wrong `composes` references | Fix architect composition logic |
|
package/scripts/improve.ts
CHANGED
|
@@ -195,7 +195,7 @@ console.log(' -> Wrote component.tsx, test.tsx, story.tsx');
|
|
|
195
195
|
}
|
|
196
196
|
|
|
197
197
|
// ─── Evaluate one scenario with Claude CLI ───────────────────────────────────
|
|
198
|
-
function evaluateScenario(scenario: string): { totalScore: number; raw:
|
|
198
|
+
function evaluateScenario(scenario: string): { totalScore: number; raw: Record<string, unknown> } | null {
|
|
199
199
|
log(` Evaluating ${scenario}...`);
|
|
200
200
|
|
|
201
201
|
const improvementPrompt = readFileSync(IMPROVEMENT_PROMPT_FILE, 'utf-8');
|
|
@@ -251,8 +251,11 @@ function evaluateScenario(scenario: string): { totalScore: number; raw: any } |
|
|
|
251
251
|
encoding: 'utf-8',
|
|
252
252
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
253
253
|
});
|
|
254
|
-
} catch (err:
|
|
255
|
-
console.error(
|
|
254
|
+
} catch (err: unknown) {
|
|
255
|
+
console.error(
|
|
256
|
+
` Claude CLI evaluation failed for ${scenario}:`,
|
|
257
|
+
err instanceof Error ? err.message : String(err),
|
|
258
|
+
);
|
|
256
259
|
return null;
|
|
257
260
|
}
|
|
258
261
|
|
|
@@ -265,15 +268,17 @@ function evaluateScenario(scenario: string): { totalScore: number; raw: any } |
|
|
|
265
268
|
try {
|
|
266
269
|
const parsed = JSON.parse(jsonMatch[0]);
|
|
267
270
|
return { totalScore: parsed.totalScore ?? 0, raw: parsed };
|
|
268
|
-
} catch (err:
|
|
269
|
-
console.error(` JSON parse error for ${scenario}:`, err.message);
|
|
271
|
+
} catch (err: unknown) {
|
|
272
|
+
console.error(` JSON parse error for ${scenario}:`, err instanceof Error ? err.message : String(err));
|
|
270
273
|
return null;
|
|
271
274
|
}
|
|
272
275
|
}
|
|
273
276
|
|
|
274
277
|
// ─── Aggregate improvements across scenarios ─────────────────────────────────
|
|
275
|
-
function aggregateImprovements(
|
|
276
|
-
|
|
278
|
+
function aggregateImprovements(
|
|
279
|
+
evaluations: { scenario: string; eval: Record<string, unknown> }[],
|
|
280
|
+
): Record<string, unknown>[] {
|
|
281
|
+
const allImprovements: { improvement: Record<string, unknown>; scenario: string }[] = [];
|
|
277
282
|
for (const e of evaluations) {
|
|
278
283
|
for (const imp of e.eval.promptImprovements ?? []) {
|
|
279
284
|
allImprovements.push({ improvement: imp, scenario: e.scenario });
|
|
@@ -281,7 +286,7 @@ function aggregateImprovements(evaluations: { scenario: string; eval: any }[]):
|
|
|
281
286
|
}
|
|
282
287
|
|
|
283
288
|
// Deduplicate by category + priority — prefer improvements that appear in multiple scenarios
|
|
284
|
-
const seen = new Map<string, { improvement:
|
|
289
|
+
const seen = new Map<string, { improvement: Record<string, unknown>; count: number; scenarios: string[] }>();
|
|
285
290
|
|
|
286
291
|
for (const { improvement, scenario } of allImprovements) {
|
|
287
292
|
const key = `${improvement.category}::${improvement.priority}::${improvement.desiredBehavior?.slice(0, 80)}`;
|
|
@@ -323,7 +328,7 @@ function priorityOrder(p: string): number {
|
|
|
323
328
|
}
|
|
324
329
|
|
|
325
330
|
// ─── Apply improvements ─────────────────────────────────────────────────────
|
|
326
|
-
function applyImprovements(improvements:
|
|
331
|
+
function applyImprovements(improvements: Record<string, unknown>[]): boolean {
|
|
327
332
|
if (improvements.length === 0) {
|
|
328
333
|
log('No improvements to apply.');
|
|
329
334
|
return false;
|
|
@@ -340,7 +345,7 @@ function applyImprovements(improvements: any[]): boolean {
|
|
|
340
345
|
const currentPrompt = readFileSync(PROMPT_FILE, 'utf-8');
|
|
341
346
|
|
|
342
347
|
const improvementList = improvements
|
|
343
|
-
.map((imp:
|
|
348
|
+
.map((imp: Record<string, unknown>, i: number) => {
|
|
344
349
|
const freq =
|
|
345
350
|
imp._frequency > 1
|
|
346
351
|
? ` (appeared in ${imp._frequency} scenarios: ${imp._appearsInScenarios?.join(', ')})`
|
|
@@ -402,8 +407,8 @@ function applyImprovements(improvements: any[]): boolean {
|
|
|
402
407
|
encoding: 'utf-8',
|
|
403
408
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
404
409
|
});
|
|
405
|
-
} catch (err:
|
|
406
|
-
console.error(' Claude CLI prompt-improvement failed:', err.message);
|
|
410
|
+
} catch (err: unknown) {
|
|
411
|
+
console.error(' Claude CLI prompt-improvement failed:', err instanceof Error ? err.message : String(err));
|
|
407
412
|
return false;
|
|
408
413
|
}
|
|
409
414
|
|
|
@@ -424,7 +429,7 @@ function applyImprovements(improvements: any[]): boolean {
|
|
|
424
429
|
|
|
425
430
|
// ─── Print scorecard ─────────────────────────────────────────────────────────
|
|
426
431
|
function printAggregateScorecard(
|
|
427
|
-
results: { scenario: string; totalScore: number; eval:
|
|
432
|
+
results: { scenario: string; totalScore: number; eval: Record<string, unknown> }[],
|
|
428
433
|
avgScore: number,
|
|
429
434
|
): void {
|
|
430
435
|
console.log('');
|
|
@@ -443,7 +448,7 @@ function printAggregateScorecard(
|
|
|
443
448
|
const categories = new Map<string, { total: number; max: number; count: number }>();
|
|
444
449
|
for (const r of results) {
|
|
445
450
|
for (const [key, val] of Object.entries(r.eval.categories ?? {})) {
|
|
446
|
-
const v = val as
|
|
451
|
+
const v = val as Record<string, number>;
|
|
447
452
|
const existing = categories.get(key) ?? { total: 0, max: v.maxScore, count: 0 };
|
|
448
453
|
existing.total += v.score;
|
|
449
454
|
existing.count++;
|
|
@@ -463,7 +468,7 @@ function printAggregateScorecard(
|
|
|
463
468
|
// ─── Save iteration artifacts ────────────────────────────────────────────────
|
|
464
469
|
function saveIteration(
|
|
465
470
|
iteration: number,
|
|
466
|
-
results: { scenario: string; totalScore: number; eval:
|
|
471
|
+
results: { scenario: string; totalScore: number; eval: Record<string, unknown> }[],
|
|
467
472
|
avgScore: number,
|
|
468
473
|
): void {
|
|
469
474
|
const iterDir = resolve(OUTPUTS_DIR, `iteration-${iteration}`);
|
|
@@ -525,14 +530,14 @@ async function main() {
|
|
|
525
530
|
for (const scenario of scenarios) {
|
|
526
531
|
try {
|
|
527
532
|
runAgentForScenario(scenario);
|
|
528
|
-
} catch (err:
|
|
529
|
-
console.error(` Agent failed on ${scenario}: ${err.message}`);
|
|
533
|
+
} catch (err: unknown) {
|
|
534
|
+
console.error(` Agent failed on ${scenario}: ${err instanceof Error ? err.message : String(err)}`);
|
|
530
535
|
}
|
|
531
536
|
}
|
|
532
537
|
|
|
533
538
|
// 3. Evaluate all scenarios
|
|
534
539
|
log('\nEvaluating all scenarios...');
|
|
535
|
-
const results: { scenario: string; totalScore: number; eval:
|
|
540
|
+
const results: { scenario: string; totalScore: number; eval: Record<string, unknown> }[] = [];
|
|
536
541
|
|
|
537
542
|
for (const scenario of scenarios) {
|
|
538
543
|
const outputPath = resolve(OUTPUTS_DIR, scenario, 'component.tsx');
|