agent-bober 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/.claude-plugin/plugin.json +9 -0
  2. package/LICENSE +21 -0
  3. package/README.md +495 -0
  4. package/agents/bober-evaluator.md +323 -0
  5. package/agents/bober-generator.md +245 -0
  6. package/agents/bober-planner.md +248 -0
  7. package/dist/cli/commands/eval.d.ts +6 -0
  8. package/dist/cli/commands/eval.d.ts.map +1 -0
  9. package/dist/cli/commands/eval.js +129 -0
  10. package/dist/cli/commands/eval.js.map +1 -0
  11. package/dist/cli/commands/init.d.ts +5 -0
  12. package/dist/cli/commands/init.d.ts.map +1 -0
  13. package/dist/cli/commands/init.js +547 -0
  14. package/dist/cli/commands/init.js.map +1 -0
  15. package/dist/cli/commands/plan.d.ts +5 -0
  16. package/dist/cli/commands/plan.d.ts.map +1 -0
  17. package/dist/cli/commands/plan.js +87 -0
  18. package/dist/cli/commands/plan.js.map +1 -0
  19. package/dist/cli/commands/run.d.ts +5 -0
  20. package/dist/cli/commands/run.d.ts.map +1 -0
  21. package/dist/cli/commands/run.js +120 -0
  22. package/dist/cli/commands/run.js.map +1 -0
  23. package/dist/cli/commands/sprint.d.ts +6 -0
  24. package/dist/cli/commands/sprint.d.ts.map +1 -0
  25. package/dist/cli/commands/sprint.js +206 -0
  26. package/dist/cli/commands/sprint.js.map +1 -0
  27. package/dist/cli/index.d.ts +3 -0
  28. package/dist/cli/index.d.ts.map +1 -0
  29. package/dist/cli/index.js +124 -0
  30. package/dist/cli/index.js.map +1 -0
  31. package/dist/config/defaults.d.ts +15 -0
  32. package/dist/config/defaults.d.ts.map +1 -0
  33. package/dist/config/defaults.js +226 -0
  34. package/dist/config/defaults.js.map +1 -0
  35. package/dist/config/index.d.ts +4 -0
  36. package/dist/config/index.d.ts.map +1 -0
  37. package/dist/config/index.js +8 -0
  38. package/dist/config/index.js.map +1 -0
  39. package/dist/config/loader.d.ts +18 -0
  40. package/dist/config/loader.d.ts.map +1 -0
  41. package/dist/config/loader.js +189 -0
  42. package/dist/config/loader.js.map +1 -0
  43. package/dist/config/schema.d.ts +904 -0
  44. package/dist/config/schema.d.ts.map +1 -0
  45. package/dist/config/schema.js +181 -0
  46. package/dist/config/schema.js.map +1 -0
  47. package/dist/contracts/eval-result.d.ts +205 -0
  48. package/dist/contracts/eval-result.d.ts.map +1 -0
  49. package/dist/contracts/eval-result.js +87 -0
  50. package/dist/contracts/eval-result.js.map +1 -0
  51. package/dist/contracts/index.d.ts +4 -0
  52. package/dist/contracts/index.d.ts.map +1 -0
  53. package/dist/contracts/index.js +16 -0
  54. package/dist/contracts/index.js.map +1 -0
  55. package/dist/contracts/spec.d.ts +101 -0
  56. package/dist/contracts/spec.d.ts.map +1 -0
  57. package/dist/contracts/spec.js +51 -0
  58. package/dist/contracts/spec.js.map +1 -0
  59. package/dist/contracts/sprint-contract.d.ts +141 -0
  60. package/dist/contracts/sprint-contract.d.ts.map +1 -0
  61. package/dist/contracts/sprint-contract.js +80 -0
  62. package/dist/contracts/sprint-contract.js.map +1 -0
  63. package/dist/evaluators/builtin/api-check.d.ts +13 -0
  64. package/dist/evaluators/builtin/api-check.d.ts.map +1 -0
  65. package/dist/evaluators/builtin/api-check.js +152 -0
  66. package/dist/evaluators/builtin/api-check.js.map +1 -0
  67. package/dist/evaluators/builtin/build-check.d.ts +17 -0
  68. package/dist/evaluators/builtin/build-check.d.ts.map +1 -0
  69. package/dist/evaluators/builtin/build-check.js +155 -0
  70. package/dist/evaluators/builtin/build-check.js.map +1 -0
  71. package/dist/evaluators/builtin/command-runner.d.ts +26 -0
  72. package/dist/evaluators/builtin/command-runner.d.ts.map +1 -0
  73. package/dist/evaluators/builtin/command-runner.js +114 -0
  74. package/dist/evaluators/builtin/command-runner.js.map +1 -0
  75. package/dist/evaluators/builtin/lint.d.ts +17 -0
  76. package/dist/evaluators/builtin/lint.d.ts.map +1 -0
  77. package/dist/evaluators/builtin/lint.js +264 -0
  78. package/dist/evaluators/builtin/lint.js.map +1 -0
  79. package/dist/evaluators/builtin/playwright.d.ts +16 -0
  80. package/dist/evaluators/builtin/playwright.d.ts.map +1 -0
  81. package/dist/evaluators/builtin/playwright.js +238 -0
  82. package/dist/evaluators/builtin/playwright.js.map +1 -0
  83. package/dist/evaluators/builtin/typescript-check.d.ts +12 -0
  84. package/dist/evaluators/builtin/typescript-check.d.ts.map +1 -0
  85. package/dist/evaluators/builtin/typescript-check.js +155 -0
  86. package/dist/evaluators/builtin/typescript-check.js.map +1 -0
  87. package/dist/evaluators/builtin/unit-test.d.ts +18 -0
  88. package/dist/evaluators/builtin/unit-test.d.ts.map +1 -0
  89. package/dist/evaluators/builtin/unit-test.js +279 -0
  90. package/dist/evaluators/builtin/unit-test.js.map +1 -0
  91. package/dist/evaluators/index.d.ts +11 -0
  92. package/dist/evaluators/index.d.ts.map +1 -0
  93. package/dist/evaluators/index.js +13 -0
  94. package/dist/evaluators/index.js.map +1 -0
  95. package/dist/evaluators/plugin-interface.d.ts +50 -0
  96. package/dist/evaluators/plugin-interface.d.ts.map +1 -0
  97. package/dist/evaluators/plugin-interface.js +2 -0
  98. package/dist/evaluators/plugin-interface.js.map +1 -0
  99. package/dist/evaluators/plugin-loader.d.ts +18 -0
  100. package/dist/evaluators/plugin-loader.d.ts.map +1 -0
  101. package/dist/evaluators/plugin-loader.js +107 -0
  102. package/dist/evaluators/plugin-loader.js.map +1 -0
  103. package/dist/evaluators/registry.d.ts +78 -0
  104. package/dist/evaluators/registry.d.ts.map +1 -0
  105. package/dist/evaluators/registry.js +238 -0
  106. package/dist/evaluators/registry.js.map +1 -0
  107. package/dist/index.d.ts +17 -0
  108. package/dist/index.d.ts.map +1 -0
  109. package/dist/index.js +22 -0
  110. package/dist/index.js.map +1 -0
  111. package/dist/orchestrator/context-handoff.d.ts +543 -0
  112. package/dist/orchestrator/context-handoff.d.ts.map +1 -0
  113. package/dist/orchestrator/context-handoff.js +133 -0
  114. package/dist/orchestrator/context-handoff.js.map +1 -0
  115. package/dist/orchestrator/evaluator-agent.d.ts +15 -0
  116. package/dist/orchestrator/evaluator-agent.d.ts.map +1 -0
  117. package/dist/orchestrator/evaluator-agent.js +233 -0
  118. package/dist/orchestrator/evaluator-agent.js.map +1 -0
  119. package/dist/orchestrator/generator-agent.d.ts +16 -0
  120. package/dist/orchestrator/generator-agent.d.ts.map +1 -0
  121. package/dist/orchestrator/generator-agent.js +147 -0
  122. package/dist/orchestrator/generator-agent.js.map +1 -0
  123. package/dist/orchestrator/pipeline.d.ts +24 -0
  124. package/dist/orchestrator/pipeline.d.ts.map +1 -0
  125. package/dist/orchestrator/pipeline.js +290 -0
  126. package/dist/orchestrator/pipeline.js.map +1 -0
  127. package/dist/orchestrator/planner-agent.d.ts +10 -0
  128. package/dist/orchestrator/planner-agent.d.ts.map +1 -0
  129. package/dist/orchestrator/planner-agent.js +187 -0
  130. package/dist/orchestrator/planner-agent.js.map +1 -0
  131. package/dist/state/helpers.d.ts +5 -0
  132. package/dist/state/helpers.d.ts.map +1 -0
  133. package/dist/state/helpers.js +8 -0
  134. package/dist/state/helpers.js.map +1 -0
  135. package/dist/state/history.d.ts +39 -0
  136. package/dist/state/history.d.ts.map +1 -0
  137. package/dist/state/history.js +162 -0
  138. package/dist/state/history.js.map +1 -0
  139. package/dist/state/index.d.ts +8 -0
  140. package/dist/state/index.d.ts.map +1 -0
  141. package/dist/state/index.js +22 -0
  142. package/dist/state/index.js.map +1 -0
  143. package/dist/state/plan-state.d.ts +21 -0
  144. package/dist/state/plan-state.d.ts.map +1 -0
  145. package/dist/state/plan-state.js +108 -0
  146. package/dist/state/plan-state.js.map +1 -0
  147. package/dist/state/sprint-state.d.ts +20 -0
  148. package/dist/state/sprint-state.d.ts.map +1 -0
  149. package/dist/state/sprint-state.js +98 -0
  150. package/dist/state/sprint-state.js.map +1 -0
  151. package/dist/utils/fs.d.ts +31 -0
  152. package/dist/utils/fs.d.ts.map +1 -0
  153. package/dist/utils/fs.js +67 -0
  154. package/dist/utils/fs.js.map +1 -0
  155. package/dist/utils/git.d.ts +35 -0
  156. package/dist/utils/git.d.ts.map +1 -0
  157. package/dist/utils/git.js +84 -0
  158. package/dist/utils/git.js.map +1 -0
  159. package/dist/utils/index.d.ts +4 -0
  160. package/dist/utils/index.d.ts.map +1 -0
  161. package/dist/utils/index.js +4 -0
  162. package/dist/utils/index.js.map +1 -0
  163. package/dist/utils/logger.d.ts +45 -0
  164. package/dist/utils/logger.d.ts.map +1 -0
  165. package/dist/utils/logger.js +73 -0
  166. package/dist/utils/logger.js.map +1 -0
  167. package/hooks/hooks.json +10 -0
  168. package/package.json +67 -0
  169. package/scripts/detect-stack.sh +287 -0
  170. package/scripts/init-project.sh +206 -0
  171. package/scripts/run-eval.sh +175 -0
  172. package/skills/bober.anchor/SKILL.md +365 -0
  173. package/skills/bober.anchor/references/anchor-guide.md +567 -0
  174. package/skills/bober.brownfield/SKILL.md +422 -0
  175. package/skills/bober.brownfield/references/codebase-analysis.md +304 -0
  176. package/skills/bober.eval/SKILL.md +235 -0
  177. package/skills/bober.eval/references/eval-strategies.md +407 -0
  178. package/skills/bober.eval/references/feedback-format.md +182 -0
  179. package/skills/bober.plan/SKILL.md +244 -0
  180. package/skills/bober.plan/references/clarification-guide.md +124 -0
  181. package/skills/bober.plan/references/spec-schema.md +253 -0
  182. package/skills/bober.react/SKILL.md +330 -0
  183. package/skills/bober.react/references/react-scaffold.md +344 -0
  184. package/skills/bober.run/SKILL.md +303 -0
  185. package/skills/bober.solidity/SKILL.md +416 -0
  186. package/skills/bober.solidity/references/solidity-guide.md +487 -0
  187. package/skills/bober.sprint/SKILL.md +280 -0
  188. package/skills/bober.sprint/references/contract-schema.md +251 -0
  189. package/templates/base/CLAUDE.md +20 -0
  190. package/templates/base/bober.config.json +35 -0
  191. package/templates/brownfield/CLAUDE.md +34 -0
  192. package/templates/brownfield/bober.config.json +37 -0
  193. package/templates/presets/anchor/CLAUDE.md +163 -0
  194. package/templates/presets/anchor/bober.config.json +9 -0
  195. package/templates/presets/api-node/CLAUDE.md +153 -0
  196. package/templates/presets/api-node/bober.config.json +10 -0
  197. package/templates/presets/nextjs/CLAUDE.md +82 -0
  198. package/templates/presets/nextjs/bober.config.json +14 -0
  199. package/templates/presets/python-api/CLAUDE.md +202 -0
  200. package/templates/presets/python-api/bober.config.json +9 -0
  201. package/templates/presets/react-vite/CLAUDE.md +71 -0
  202. package/templates/presets/react-vite/bober.config.json +53 -0
  203. package/templates/presets/react-vite/scaffold/package.json +45 -0
  204. package/templates/presets/react-vite/scaffold/server/index.ts +38 -0
  205. package/templates/presets/react-vite/scaffold/server/tsconfig.json +24 -0
  206. package/templates/presets/react-vite/scaffold/src/App.tsx +37 -0
  207. package/templates/presets/react-vite/scaffold/src/index.html +12 -0
  208. package/templates/presets/react-vite/scaffold/src/main.tsx +12 -0
  209. package/templates/presets/react-vite/scaffold/tsconfig.json +27 -0
  210. package/templates/presets/react-vite/scaffold/vite.config.ts +34 -0
  211. package/templates/presets/solidity/CLAUDE.md +106 -0
  212. package/templates/presets/solidity/bober.config.json +9 -0
@@ -0,0 +1,248 @@
1
+ ---
2
+ name: bober-planner
3
+ description: Product planning specialist that transforms vague feature ideas into comprehensive, sprint-decomposed PlanSpecs with clear acceptance criteria.
4
+ tools:
5
+ - Read
6
+ - Grep
7
+ - Glob
8
+ - Bash
9
+ - Write
10
+ model: opus
11
+ ---
12
+
13
+ # Bober Planner Agent
14
+
15
+ You are the **Planner** in the Bober Generator-Evaluator multi-agent harness. Your singular purpose is to transform vague user ideas into structured, comprehensive PlanSpec documents that a Generator agent can implement sprint-by-sprint.
16
+
17
+ You are a product planning specialist, not a coder. You think in terms of user value, scope boundaries, acceptance criteria, and incremental delivery. You do NOT write application code. You write specs.
18
+
19
+ ## Core Principles
20
+
21
+ 1. **Scope over implementation.** Define WHAT must be built and WHY, not HOW. The Generator decides implementation details.
22
+ 2. **Precision over brevity.** Ambiguity in a spec causes wasted sprint cycles. Be specific about expected behavior.
23
+ 3. **Incremental delivery.** Every sprint must produce a working, demonstrable increment. No "setup-only" sprints that deliver nothing visible.
24
+ 4. **Testability.** Every acceptance criterion must be objectively verifiable. "Works well" is not a criterion. "Clicking the Submit button with valid form data creates a new record and redirects to /dashboard" is.
25
+
26
+ ## Process
27
+
28
+ ### Phase 1: Context Gathering
29
+
30
+ 1. **Read `bober.config.json`** from the project root. This tells you the project mode (`greenfield` or `brownfield`), optional preset (e.g., `nextjs`, `react-vite`, `solidity`, `anchor`, `api-node`, `python-api`), configured evaluator strategies, sprint size preferences, and command configuration. If this file does not exist, STOP and tell the user to run the `bober.plan` skill first to initialize the project.
31
+
32
+ 2. **Analyze existing codebase** (if brownfield or existing project):
33
+ - Read `CLAUDE.md`, `README.md`, and the project manifest (`package.json`, `Cargo.toml`, `Anchor.toml`, `hardhat.config.ts`, `foundry.toml`, `pyproject.toml`, etc.) if they exist
34
+ - Use Glob to survey the file structure with patterns appropriate to the stack (e.g., `src/**/*`, `contracts/**/*.sol`, `programs/**/*.rs`, `app/**/*`, `pages/**/*`)
35
+ - Use Grep to find key patterns: route definitions, database schemas, API endpoints, component structure, smart contract interfaces, program instructions, etc.
36
+ - Read any files listed in `planner.contextFiles` from the config
37
+ - Build a mental model of: tech stack, architecture pattern (MVC, component-based, modular contracts, program accounts, etc.), existing test coverage, deployment setup
38
+
39
+ 3. **Read existing specs** in `.bober/specs/` to understand what has already been planned. Do not duplicate or conflict with existing plans.
40
+
41
+ ### Phase 2: Clarifying Questions
42
+
43
+ Ask the user **3 to 5 targeted clarifying questions**. These are NOT generic questions -- they must be informed by your codebase analysis and the specific feature request.
44
+
45
+ **Question format:**
46
+ ```
47
+ **Q1: [Category] — [Concise question]**
48
+
49
+ A) [Option with brief explanation]
50
+ B) [Option with brief explanation]
51
+ C) [Option with brief explanation]
52
+ D) Other: [Let me specify]
53
+
54
+ 💡 Based on your codebase, I'd lean toward [X] because [reason].
55
+ ```
56
+
57
+ **Question categories to draw from:**
58
+ - **Scope boundaries:** What is IN scope vs. explicitly OUT of scope?
59
+ - **User personas:** Who uses this feature? What are their roles/permissions?
60
+ - **Data model:** What entities are involved? What are the relationships?
61
+ - **Tech constraints:** Must this use specific libraries, APIs, or patterns already in the codebase?
62
+ - **Design/UX:** Are there wireframes, or should the agent make UI decisions? What's the interaction model?
63
+ - **Integrations:** Does this touch external services, auth, payments, notifications?
64
+ - **Non-functional requirements:** Performance targets, accessibility level (WCAG), i18n support?
65
+ - **Error handling:** What happens when things go wrong? What are the failure modes?
66
+
67
+ **Rules for questions:**
68
+ - Never ask a question whose answer is obvious from the codebase (e.g., don't ask "What framework are you using?" if package.json shows React)
69
+ - Always provide concrete options, not open-ended "what do you want?"
70
+ - Include your recommendation when the codebase provides enough context to have an opinion
71
+ - Limit to `planner.maxClarifications` questions (from config, default 5)
72
+
73
+ ### Phase 3: PlanSpec Generation
74
+
75
+ After receiving answers, generate a complete PlanSpec JSON document.
76
+
77
+ **PlanSpec structure:**
78
+ ```json
79
+ {
80
+ "specId": "spec-<timestamp>-<slug>",
81
+ "version": 1,
82
+ "createdAt": "<ISO-8601>",
83
+ "updatedAt": "<ISO-8601>",
84
+ "title": "<Human-readable feature title>",
85
+ "description": "<2-3 sentence summary of what this feature does and why>",
86
+ "mode": "<greenfield or brownfield from bober.config.json>",
87
+ "preset": "<preset from bober.config.json, if any>",
88
+ "assumptions": [
89
+ "<Key assumption 1 derived from user answers or codebase>",
90
+ "<Key assumption 2>"
91
+ ],
92
+ "outOfScope": [
93
+ "<Explicitly excluded item 1>",
94
+ "<Explicitly excluded item 2>"
95
+ ],
96
+ "features": [
97
+ {
98
+ "featureId": "feat-<index>",
99
+ "title": "<Feature title>",
100
+ "description": "<What this feature does>",
101
+ "priority": "must-have | should-have | nice-to-have",
102
+ "acceptanceCriteria": [
103
+ "AC1: <Specific, testable criterion>",
104
+ "AC2: <Specific, testable criterion>"
105
+ ],
106
+ "dependencies": ["feat-<other-index>"],
107
+ "estimatedComplexity": "low | medium | high"
108
+ }
109
+ ],
110
+ "nonFunctionalRequirements": [
111
+ {
112
+ "category": "performance | security | accessibility | reliability | maintainability",
113
+ "requirement": "<Specific requirement>",
114
+ "verificationMethod": "<How the evaluator can check this>"
115
+ }
116
+ ],
117
+ "techNotes": {
118
+ "suggestedStack": "<Only if greenfield, otherwise omit>",
119
+ "integrationPoints": ["<External API or service>"],
120
+ "dataModel": "<Brief description of key entities and relationships>",
121
+ "securityConsiderations": ["<Auth, input validation, etc.>"]
122
+ },
123
+ "sprints": [
124
+ "<Array of SprintContract objects -- see Phase 4>"
125
+ ]
126
+ }
127
+ ```
128
+
129
+ ### Phase 4: Sprint Decomposition
130
+
131
+ Decompose the PlanSpec into ordered sprints. This is the most critical part of your job.
132
+
133
+ **Sprint sizing rules based on `sprint.sprintSize` config:**
134
+ - `small`: 30-60 minutes of generator work. 1-2 files changed. Single concern.
135
+ - `medium`: 1-3 hours of generator work. 3-8 files changed. One cohesive feature slice.
136
+ - `large`: 3-5 hours of generator work. 5-15 files changed. Full feature vertical.
137
+
138
+ **Sprint decomposition principles:**
139
+ 1. **Vertical slices, not horizontal layers.** Sprint 1 should NOT be "set up the database schema." Sprint 1 should be a working end-to-end slice. For a web app: "Create the user registration flow end-to-end with a simple form, API endpoint, and database storage." For a smart contract: "Implement the core token contract with mint function and a passing test." For an API: "Create the health check endpoint with routing, middleware, and integration test." Every sprint should touch the relevant layers of the stack.
140
+ 2. **Each sprint produces a working increment.** After every sprint, the application must build, pass existing tests, and demonstrate new functionality.
141
+ 3. **Dependencies flow forward.** Sprint N+1 can depend on Sprint N's output, but Sprint N must be fully self-contained.
142
+ 4. **Clear boundaries.** A sprint contract must make it unambiguous what is included and what is NOT included. When in doubt, make the boundary narrower.
143
+ 5. **Front-load the risky parts.** Architecture decisions, complex integrations, and unknown-unknowns should come early. Polish and edge cases come later.
144
+ 6. **Include a testing sprint if needed.** For complex features, the last sprint should be dedicated to integration tests, error handling edge cases, and documentation.
145
+
146
+ **SprintContract structure within the PlanSpec:**
147
+ ```json
148
+ {
149
+ "contractId": "sprint-<specId>-<sprint-number>",
150
+ "specId": "<parent spec ID>",
151
+ "sprintNumber": 1,
152
+ "title": "<Sprint title>",
153
+ "description": "<What this sprint delivers>",
154
+ "status": "proposed",
155
+ "dependsOn": [],
156
+ "features": ["feat-1", "feat-2"],
157
+ "successCriteria": [
158
+ {
159
+ "criterionId": "sc-<sprint>-<index>",
160
+ "description": "<Specific, testable criterion>",
161
+ "verificationMethod": "manual | typecheck | lint | unit-test | playwright | api-check | build",
162
+ "required": true
163
+ }
164
+ ],
165
+ "generatorNotes": "<Guidance for the generator: key files to modify, patterns to follow, gotchas>",
166
+ "evaluatorNotes": "<Guidance for the evaluator: what to specifically test, how to verify criteria>",
167
+ "estimatedFiles": ["<file paths that will likely be created or modified>"],
168
+ "estimatedDuration": "<small | medium | large>"
169
+ }
170
+ ```
171
+
172
+ **Success criteria rules:**
173
+ - Every criterion must map to a `verificationMethod` the evaluator can actually execute
174
+ - Include at least one `build` criterion (the project must compile/build)
175
+ - Include at least one functional criterion (the feature actually works)
176
+ - For UI features, include criteria that describe observable behavior, not internal implementation
177
+ - Mark `required: true` for must-pass criteria; `required: false` for nice-to-have checks
178
+
179
+ ### Phase 5: Save and Report
180
+
181
+ 1. **Save the PlanSpec** to `.bober/specs/<specId>.json`
182
+ 2. **Save each SprintContract** to `.bober/contracts/<contractId>.json`
183
+ 3. **Update `.bober/progress.md`** with a section showing the new plan:
184
+ ```markdown
185
+ ## Plan: <title>
186
+ - Spec: <specId>
187
+ - Created: <date>
188
+ - Sprints: <count>
189
+ - Status: planned
190
+
191
+ ### Sprint Breakdown
192
+ 1. [proposed] <Sprint 1 title> — <brief description>
193
+ 2. [proposed] <Sprint 2 title> — <brief description>
194
+ ...
195
+ ```
196
+ 4. **Append to `.bober/history.jsonl`** a single JSON line:
197
+ ```json
198
+ {"event":"plan-created","specId":"...","timestamp":"...","sprintCount":N}
199
+ ```
200
+ 5. **Output a clean summary** to the user showing the plan, sprint breakdown, and next steps.
201
+
202
+ ## What You Must Never Do
203
+
204
+ - Never write application code (source files, tests, configs outside `.bober/`)
205
+ - Never make implementation decisions that belong to the Generator (library choices, code architecture, file structure)
206
+ - Never skip the clarifying questions phase unless the user explicitly provides exhaustive detail
207
+ - Never create a sprint with vague success criteria like "works correctly" or "looks good"
208
+ - Never create sprints that cannot be evaluated independently
209
+ - Never create more sprints than `sprint.maxSprints` from the config
210
+
211
+ ## Quality Standards for Success Criteria
212
+
213
+ Success criteria are the contract between the Generator and Evaluator. Bad criteria lead to bad evaluations. Follow these rules:
214
+
215
+ 1. **Every criterion must be verifiable by an outsider.** "The UI looks good" is not verifiable. "The dashboard has a navigation sidebar with at least 5 menu items, a header with the app logo, and a main content area that fills the remaining width" is verifiable.
216
+
217
+ 2. **Include quality criteria, not just functional ones.** For UI sprints, include criteria like:
218
+ - "The design uses a consistent color palette of no more than 5 colors"
219
+ - "Typography uses a clear hierarchy with at least 3 distinct text sizes"
220
+ - "The layout is visually cohesive — all components share consistent spacing and styling"
221
+ - "The design shows deliberate creative choices — no default template/library styling"
222
+
223
+ 3. **Include negative criteria.** Specify what should NOT happen:
224
+ - "No TypeScript `any` types in new code"
225
+ - "No console.log statements in production code"
226
+ - "No unhandled promise rejections"
227
+ - "No accessibility violations detectable by axe-core"
228
+
229
+ 4. **Be specific about error/edge states.** For every feature, include criteria for:
230
+ - What happens on error?
231
+ - What happens with empty data?
232
+ - What happens with malformed input?
233
+ - What happens during loading?
234
+
235
+ ## Output Quality Checklist
236
+
237
+ Before finalizing, verify:
238
+ - [ ] Every feature has at least 2 acceptance criteria
239
+ - [ ] Every sprint has at least 3 success criteria
240
+ - [ ] Every success criterion is testable by someone who has never seen the code
241
+ - [ ] UI sprints include design quality criteria (not just "it renders")
242
+ - [ ] Every sprint has both `generatorNotes` and `evaluatorNotes`
243
+ - [ ] Sprint dependencies form a valid DAG (no cycles)
244
+ - [ ] The first sprint is achievable without any prior sprint output
245
+ - [ ] No sprint requires more than `sprint.sprintSize` worth of effort
246
+ - [ ] All files are saved to the correct `.bober/` locations
247
+ - [ ] The plan is achievable with the tech stack in `bober.config.json`
248
+ - [ ] For non-web projects (smart contracts, CLI tools, libraries, etc.), sprints are adapted to the appropriate domain -- e.g., contract compilation instead of browser build, on-chain tests instead of E2E tests
@@ -0,0 +1,6 @@
1
+ export interface EvalCommandOptions {
2
+ verbose?: boolean;
3
+ sprint?: string;
4
+ }
5
+ export declare function runEvalCommand(projectRoot: string, options: EvalCommandOptions): Promise<void>;
6
+ //# sourceMappingURL=eval.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/eval.ts"],"names":[],"mappings":"AAgBA,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAID,wBAAsB,cAAc,CAClC,WAAW,EAAE,MAAM,EACnB,OAAO,EAAE,kBAAkB,GAC1B,OAAO,CAAC,IAAI,CAAC,CA2Jf"}
@@ -0,0 +1,129 @@
1
+ import chalk from "chalk";
2
+ import { loadConfig } from "../../config/loader.js";
3
+ import { createHandoff } from "../../orchestrator/context-handoff.js";
4
+ import { runEvaluatorAgent } from "../../orchestrator/evaluator-agent.js";
5
+ import { ensureBoberDir, listContracts, loadLatestSpec, } from "../../state/index.js";
6
+ import { getCurrentBranch, getChangedFiles } from "../../utils/git.js";
7
+ import { logger } from "../../utils/logger.js";
8
+ // ── Main ───────────────────────────────────────────────────────────
9
+ export async function runEvalCommand(projectRoot, options) {
10
+ if (options.verbose) {
11
+ logger.verbose = true;
12
+ }
13
+ // Load config
14
+ let config;
15
+ try {
16
+ config = await loadConfig(projectRoot);
17
+ }
18
+ catch (err) {
19
+ logger.error(`Failed to load config: ${err instanceof Error ? err.message : String(err)}`);
20
+ logger.info('Run "npx agent-bober init" to create a configuration.');
21
+ return;
22
+ }
23
+ await ensureBoberDir(projectRoot);
24
+ // Load spec and contracts
25
+ const spec = await loadLatestSpec(projectRoot);
26
+ if (!spec) {
27
+ logger.error("No plan found. Run 'npx agent-bober plan' first.");
28
+ return;
29
+ }
30
+ const contracts = await listContracts(projectRoot);
31
+ if (contracts.length === 0) {
32
+ logger.error("No sprint contracts found.");
33
+ return;
34
+ }
35
+ // Find the target contract
36
+ let targetContract;
37
+ if (options.sprint) {
38
+ targetContract = contracts.find((c) => c.id === options.sprint);
39
+ if (!targetContract) {
40
+ logger.error(`Sprint "${options.sprint}" not found.`);
41
+ logger.info(`Available sprints: ${contracts.map((c) => c.id).join(", ")}`);
42
+ return;
43
+ }
44
+ }
45
+ else {
46
+ // Find the most recent in-progress or evaluating sprint
47
+ targetContract = contracts.find((c) => c.status === "in-progress" ||
48
+ c.status === "evaluating" ||
49
+ c.status === "needs-rework");
50
+ if (!targetContract) {
51
+ // Fall back to the most recent sprint
52
+ targetContract = contracts[contracts.length - 1];
53
+ }
54
+ }
55
+ logger.phase(`Evaluating: ${targetContract.feature}`);
56
+ logger.sprint(targetContract.id, `Status: ${targetContract.status}`);
57
+ // Build project context
58
+ let currentBranch;
59
+ try {
60
+ currentBranch = await getCurrentBranch(projectRoot);
61
+ }
62
+ catch {
63
+ currentBranch = "unknown";
64
+ }
65
+ const projectContext = {
66
+ name: config.project.name,
67
+ type: config.project.mode,
68
+ techStack: spec.techStack,
69
+ entryPoints: [],
70
+ currentBranch,
71
+ };
72
+ // Get changed files
73
+ let changedFiles;
74
+ try {
75
+ changedFiles = await getChangedFiles(projectRoot);
76
+ }
77
+ catch {
78
+ changedFiles = [];
79
+ }
80
+ // Build handoff for evaluator
81
+ const completedContracts = contracts.filter((c) => c.status === "passed");
82
+ const handoff = createHandoff({
83
+ from: "generator",
84
+ to: "evaluator",
85
+ projectContext,
86
+ spec,
87
+ currentContract: targetContract,
88
+ sprintHistory: completedContracts,
89
+ instructions: `Re-evaluate sprint: ${targetContract.feature}`,
90
+ changedFiles,
91
+ });
92
+ // Run evaluation
93
+ const evaluation = await runEvaluatorAgent(handoff, projectRoot, config);
94
+ // Display results
95
+ console.log();
96
+ const statusIcon = evaluation.passed
97
+ ? chalk.green("[PASS]")
98
+ : chalk.red("[FAIL]");
99
+ console.log(`${statusIcon} ${chalk.bold(targetContract.feature)} - Score: ${evaluation.score}/100`);
100
+ console.log();
101
+ for (const result of evaluation.results) {
102
+ const icon = result.passed ? chalk.green(" PASS") : chalk.red(" FAIL");
103
+ const scoreStr = result.score !== undefined ? ` (${result.score}/100)` : "";
104
+ console.log(`${icon} ${chalk.bold(result.evaluator)}${scoreStr}`);
105
+ console.log(` ${chalk.gray(result.summary)}`);
106
+ if (!result.passed) {
107
+ const failures = result.details.filter((d) => !d.passed);
108
+ for (const detail of failures.slice(0, 5)) {
109
+ const severityColor = detail.severity === "error" ? chalk.red : chalk.yellow;
110
+ const location = detail.file
111
+ ? ` at ${detail.file}${detail.line !== undefined ? `:${detail.line}` : ""}`
112
+ : "";
113
+ console.log(` ${severityColor(`[${detail.severity.toUpperCase()}]`)} ${detail.message}${location}`);
114
+ }
115
+ if (failures.length > 5) {
116
+ console.log(chalk.gray(` ... and ${failures.length - 5} more issues`));
117
+ }
118
+ if (result.feedback) {
119
+ console.log(chalk.yellow(` Feedback: ${result.feedback.slice(0, 200)}`));
120
+ }
121
+ }
122
+ console.log();
123
+ }
124
+ console.log(chalk.gray(`Summary: ${evaluation.summary}`));
125
+ if (!evaluation.passed) {
126
+ process.exitCode = 1;
127
+ }
128
+ }
129
+ //# sourceMappingURL=eval.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval.js","sourceRoot":"","sources":["../../../src/cli/commands/eval.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACpD,OAAO,EAAE,aAAa,EAAE,MAAM,uCAAuC,CAAC;AAEtE,OAAO,EAAE,iBAAiB,EAAE,MAAM,uCAAuC,CAAC;AAC1E,OAAO,EACL,cAAc,EACd,aAAa,EACb,cAAc,GACf,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAS/C,sEAAsE;AAEtE,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,WAAmB,EACnB,OAA2B;IAE3B,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,CAAC,OAAO,GAAG,IAAI,CAAC;IACxB,CAAC;IAED,cAAc;IACd,IAAI,MAAM,CAAC;IACX,IAAI,CAAC;QACH,MAAM,GAAG,MAAM,UAAU,CAAC,WAAW,CAAC,CAAC;IACzC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,CAAC,KAAK,CACV,0BAA0B,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC7E,CAAC;QACF,MAAM,CAAC,IAAI,CAAC,uDAAuD,CAAC,CAAC;QACrE,OAAO;IACT,CAAC;IAED,MAAM,cAAc,CAAC,WAAW,CAAC,CAAC;IAElC,0BAA0B;IAC1B,MAAM,IAAI,GAAG,MAAM,cAAc,CAAC,WAAW,CAAC,CAAC;IAC/C,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,MAAM,CAAC,KAAK,CAAC,kDAAkD,CAAC,CAAC;QACjE,OAAO;IACT,CAAC;IAED,MAAM,SAAS,GAAG,MAAM,aAAa,CAAC,WAAW,CAAC,CAAC;IACnD,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,MAAM,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAC3C,OAAO;IACT,CAAC;IAED,2BAA2B;IAC3B,IAAI,cAAc,CAAC;IACnB,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QACnB,cAAc,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;QAChE,IAAI,CAAC,cAAc,EAAE,CAAC;YACpB,MAAM,CAAC,KAAK,CAAC,WAAW,OAAO,CAAC,MAAM,cAAc,CAAC,CAAC;YACtD,MAAM,CAAC,IAAI,CACT,sBAAsB,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAC9D,CAAC;YACF,OAAO;QACT,CAAC;IACH,CAAC;SAAM,CAAC;QACN,wDAAwD;QACxD,cAAc,GAAG,SAAS,CAAC,IAAI,CAC7B,CAAC,CAAC,EAAE,EAAE,CACJ,CAAC,CAAC,MAAM,KAAK,aAAa;YAC1B,CAAC,CAAC,MAAM,KAAK,YAAY;YACzB,CAAC,CAAC,MAAM,KAAK,cAAc,CAC9B,CAAC;QAEF,IAAI,CAAC,cAAc,EAAE,CAAC;YACpB,sCAAsC;YACtC,cAAc,GAAG,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACnD,CAAC;IACH,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,eAAe,cAAc,CAAC,OAAO,EAAE,CAAC,CAAC;IACtD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,EAAE,EAAE,WAAW,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC;IAErE,wBAAwB;IACxB,IAAI,aAAqB,CAAC;IAC1B,IAAI,CAAC;QACH,aAAa,GAAG,MAAM,gBAAgB,CAAC,WAAW,CAAC,CAAC;IACtD,CAAC;IAAC,MAAM,CAAC;QACP,aAAa,GAAG,SAAS,CAAC;IAC5B,CAAC;IAED,MAAM,cAAc,GAAmB;QACrC,IAAI,EAAE,MAAM,CAAC,OAAO,CAAC,IAAI;QACzB,IAAI,EAAE,MAAM,CAAC,OAAO,CAAC,IAAI;QACzB,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,WAAW,EAAE,EAAE;QACf,aAAa;KACd,CAAC;IAEF,oBAAoB;IACpB,IAAI,YAAsB,CAAC;IAC3B,IAAI,CAAC;QACH,YAAY,GAAG,MAAM,eAAe,CAAC,WAAW,CAAC,CAAC;IACpD,CAAC;IAAC,MAAM,CAAC;QACP,YAAY,GAAG,EAAE,CAAC;IACpB,CAAC;IAED,8BAA8B;IAC9B,MAAM,kBAAkB,GAAG,SAAS,CAAC,MAAM,CACzC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,QAAQ,CAC7B,CAAC;IAEF,MAAM,OAAO,GAAG,aAAa,CAAC;QAC5B,IAAI,EAAE,WAAW;QACjB,EAAE,EAAE,WAAW;QACf,cAAc;QACd,IAAI;QACJ,eAAe,EAAE,cAAc;QAC/B,aAAa,EAAE,kBAAkB;QACjC,YAAY,EAAE,uBAAuB,cAAc,CAAC,OAAO,EAAE;QAC7D,YAAY;KACb,CAAC,CAAC;IAEH,iBAAiB;IACjB,MAAM,UAAU,GAAG,MAAM,iBAAiB,CACxC,OAAO,EACP,WAAW,EACX,MAAM,CACP,CAAC;IAEF,kBAAkB;IAClB,OAAO,CAAC,GAAG,EAAE,CAAC;IACd,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM;QAClC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,QAAQ,CAAC;QACvB,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IACxB,OAAO,CAAC,GAAG,CACT,GAAG,UAAU,IAAI,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,aAAa,UAAU,CAAC,KAAK,MAAM,CACvF,CAAC;IACF,OAAO,CAAC,GAAG,EAAE,CAAC;IAEd,KAAK,MAAM,MAAM,IAAI,UAAU,CAAC,OAAO,EAAE,CAAC;QACxC,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACzE,MAAM,QAAQ,GACZ,MAAM,CAAC,KAAK,KAAK,SAAS,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,KAAK,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7D,OAAO,CAAC,GAAG,CAAC,GAAG,IAAI,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,GAAG,QAAQ,EAAE,CAAC,CAAC;QAClE,OAAO,CAAC,GAAG,CAAC,UAAU,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QAEpD,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACnB,MAAM,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAsB,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;YAC9E,KAAK,MAAM,MAAM,IAAI,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;gBAC1C,MAAM,aAAa,GACjB,MAAM,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC;gBACzD,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI;oBAC1B,CAAC,CAAC,OAAO,MAAM,CAAC,IAAI,GAAG,MAAM,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE;oBAC3E,CAAC,CAAC,EAAE,CAAC;gBACP,OAAO,CAAC,GAAG,CACT,UAAU,aAAa,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,GAAG,QAAQ,EAAE,CAC7F,CAAC;YACJ,CAAC;YACD,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxB,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,IAAI,CAAC,kBAAkB,QAAQ,CAAC,MAAM,GAAG,CAAC,cAAc,CAAC,CAChE,CAAC;YACJ,CAAC;YAED,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;gBACpB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,oBAAoB,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;YACjF,CAAC;QACH,CAAC;QACD,OAAO,CAAC,GAAG,EAAE,CAAC;IAChB,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,YAAY,UAAU,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;IAE1D,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC;QACvB,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;IACvB,CAAC;AACH,CAAC"}
@@ -0,0 +1,5 @@
1
+ export interface InitCommandOptions {
2
+ preset?: string;
3
+ }
4
+ export declare function runInitCommand(projectRoot: string, options?: InitCommandOptions): Promise<void>;
5
+ //# sourceMappingURL=init.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"init.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/init.ts"],"names":[],"mappings":"AAwPA,MAAM,WAAW,kBAAkB;IACjC,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,wBAAsB,cAAc,CAClC,WAAW,EAAE,MAAM,EACnB,OAAO,GAAE,kBAAuB,GAC/B,OAAO,CAAC,IAAI,CAAC,CAyEf"}