agent-bober 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +159 -28
- package/dist/cli/commands/eval.d.ts +2 -0
- package/dist/cli/commands/eval.d.ts.map +1 -1
- package/dist/cli/commands/eval.js +10 -0
- package/dist/cli/commands/eval.js.map +1 -1
- package/dist/cli/commands/init.d.ts.map +1 -1
- package/dist/cli/commands/init.js +181 -61
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/plan.d.ts +2 -0
- package/dist/cli/commands/plan.d.ts.map +1 -1
- package/dist/cli/commands/plan.js +10 -0
- package/dist/cli/commands/plan.js.map +1 -1
- package/dist/cli/commands/run.d.ts +2 -0
- package/dist/cli/commands/run.d.ts.map +1 -1
- package/dist/cli/commands/run.js +10 -0
- package/dist/cli/commands/run.js.map +1 -1
- package/dist/cli/commands/sprint.d.ts +2 -0
- package/dist/cli/commands/sprint.d.ts.map +1 -1
- package/dist/cli/commands/sprint.js +10 -0
- package/dist/cli/commands/sprint.js.map +1 -1
- package/dist/cli/index.js +22 -2
- package/dist/cli/index.js.map +1 -1
- package/dist/config/schema.d.ts +160 -43
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +11 -7
- package/dist/config/schema.js.map +1 -1
- package/dist/contracts/sprint-contract.d.ts +8 -8
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -1
- package/dist/mcp/index.d.ts +4 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/index.js +4 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/mcp/run-manager.d.ts +47 -0
- package/dist/mcp/run-manager.d.ts.map +1 -0
- package/dist/mcp/run-manager.js +79 -0
- package/dist/mcp/run-manager.js.map +1 -0
- package/dist/mcp/server.d.ts +15 -0
- package/dist/mcp/server.d.ts.map +1 -0
- package/dist/mcp/server.js +107 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/mcp/tools/config.d.ts +2 -0
- package/dist/mcp/tools/config.d.ts.map +1 -0
- package/dist/mcp/tools/config.js +153 -0
- package/dist/mcp/tools/config.js.map +1 -0
- package/dist/mcp/tools/contracts.d.ts +2 -0
- package/dist/mcp/tools/contracts.d.ts.map +1 -0
- package/dist/mcp/tools/contracts.js +61 -0
- package/dist/mcp/tools/contracts.js.map +1 -0
- package/dist/mcp/tools/eval.d.ts +2 -0
- package/dist/mcp/tools/eval.d.ts.map +1 -0
- package/dist/mcp/tools/eval.js +157 -0
- package/dist/mcp/tools/eval.js.map +1 -0
- package/dist/mcp/tools/index.d.ts +20 -0
- package/dist/mcp/tools/index.d.ts.map +1 -0
- package/dist/mcp/tools/index.js +47 -0
- package/dist/mcp/tools/index.js.map +1 -0
- package/dist/mcp/tools/init.d.ts +2 -0
- package/dist/mcp/tools/init.d.ts.map +1 -0
- package/dist/mcp/tools/init.js +121 -0
- package/dist/mcp/tools/init.js.map +1 -0
- package/dist/mcp/tools/plan.d.ts +2 -0
- package/dist/mcp/tools/plan.d.ts.map +1 -0
- package/dist/mcp/tools/plan.js +97 -0
- package/dist/mcp/tools/plan.js.map +1 -0
- package/dist/mcp/tools/principles.d.ts +2 -0
- package/dist/mcp/tools/principles.d.ts.map +1 -0
- package/dist/mcp/tools/principles.js +66 -0
- package/dist/mcp/tools/principles.js.map +1 -0
- package/dist/mcp/tools/registry.d.ts +45 -0
- package/dist/mcp/tools/registry.d.ts.map +1 -0
- package/dist/mcp/tools/registry.js +23 -0
- package/dist/mcp/tools/registry.js.map +1 -0
- package/dist/mcp/tools/run.d.ts +2 -0
- package/dist/mcp/tools/run.d.ts.map +1 -0
- package/dist/mcp/tools/run.js +66 -0
- package/dist/mcp/tools/run.js.map +1 -0
- package/dist/mcp/tools/spec.d.ts +2 -0
- package/dist/mcp/tools/spec.d.ts.map +1 -0
- package/dist/mcp/tools/spec.js +32 -0
- package/dist/mcp/tools/spec.js.map +1 -0
- package/dist/mcp/tools/sprint.d.ts +2 -0
- package/dist/mcp/tools/sprint.d.ts.map +1 -0
- package/dist/mcp/tools/sprint.js +243 -0
- package/dist/mcp/tools/sprint.js.map +1 -0
- package/dist/mcp/tools/status.d.ts +2 -0
- package/dist/mcp/tools/status.d.ts.map +1 -0
- package/dist/mcp/tools/status.js +76 -0
- package/dist/mcp/tools/status.js.map +1 -0
- package/dist/orchestrator/agentic-loop.d.ts +7 -6
- package/dist/orchestrator/agentic-loop.d.ts.map +1 -1
- package/dist/orchestrator/agentic-loop.js +33 -40
- package/dist/orchestrator/agentic-loop.js.map +1 -1
- package/dist/orchestrator/context-handoff.d.ts +20 -20
- package/dist/orchestrator/evaluator-agent.d.ts.map +1 -1
- package/dist/orchestrator/evaluator-agent.js +2 -2
- package/dist/orchestrator/evaluator-agent.js.map +1 -1
- package/dist/orchestrator/generator-agent.d.ts.map +1 -1
- package/dist/orchestrator/generator-agent.js +2 -2
- package/dist/orchestrator/generator-agent.js.map +1 -1
- package/dist/orchestrator/model-resolver.d.ts +35 -4
- package/dist/orchestrator/model-resolver.d.ts.map +1 -1
- package/dist/orchestrator/model-resolver.js +68 -15
- package/dist/orchestrator/model-resolver.js.map +1 -1
- package/dist/orchestrator/planner-agent.d.ts.map +1 -1
- package/dist/orchestrator/planner-agent.js +2 -2
- package/dist/orchestrator/planner-agent.js.map +1 -1
- package/dist/orchestrator/tools/index.d.ts +3 -4
- package/dist/orchestrator/tools/index.d.ts.map +1 -1
- package/dist/orchestrator/tools/index.js.map +1 -1
- package/dist/orchestrator/tools/schemas.d.ts +11 -12
- package/dist/orchestrator/tools/schemas.d.ts.map +1 -1
- package/dist/orchestrator/tools/schemas.js +3 -2
- package/dist/orchestrator/tools/schemas.js.map +1 -1
- package/dist/providers/anthropic.d.ts +15 -0
- package/dist/providers/anthropic.d.ts.map +1 -0
- package/dist/providers/anthropic.js +133 -0
- package/dist/providers/anthropic.js.map +1 -0
- package/dist/providers/factory.d.ts +37 -0
- package/dist/providers/factory.d.ts.map +1 -0
- package/dist/providers/factory.js +119 -0
- package/dist/providers/factory.js.map +1 -0
- package/dist/providers/google.d.ts +39 -0
- package/dist/providers/google.d.ts.map +1 -0
- package/dist/providers/google.js +195 -0
- package/dist/providers/google.js.map +1 -0
- package/dist/providers/index.d.ts +7 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +6 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/providers/openai-compat.d.ts +39 -0
- package/dist/providers/openai-compat.d.ts.map +1 -0
- package/dist/providers/openai-compat.js +42 -0
- package/dist/providers/openai-compat.js.map +1 -0
- package/dist/providers/openai.d.ts +41 -0
- package/dist/providers/openai.d.ts.map +1 -0
- package/dist/providers/openai.js +205 -0
- package/dist/providers/openai.js.map +1 -0
- package/dist/providers/types.d.ts +144 -0
- package/dist/providers/types.d.ts.map +1 -0
- package/dist/providers/types.js +8 -0
- package/dist/providers/types.js.map +1 -0
- package/package.json +26 -4
package/README.md
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
# agent-bober
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
[](https://www.npmjs.com/package/agent-bober)
|
|
4
|
+
[](https://github.com/BOBER3r/agent-bober/blob/main/LICENSE)
|
|
4
5
|
|
|
5
|
-
|
|
6
|
+
**Generator-Evaluator multi-agent harness for building applications autonomously with any LLM.**
|
|
7
|
+
|
|
8
|
+
Inspired by Anthropic's engineering publication [**"Harness design for long-running application development"**](https://www.anthropic.com/engineering/harness-design-long-running-apps), agent-bober implements the Generator-Evaluator multi-agent pattern as a reusable, installable workflow. It orchestrates AI agents in a structured loop: a **Planner** decomposes your idea into sprint contracts, a **Generator** writes the code, and an **Evaluator** independently verifies each sprint against its contract before moving on. The result is autonomous, high-quality software development with built-in guardrails, context resets, and brutally honest evaluation.
|
|
9
|
+
|
|
10
|
+
Works with **Claude, GPT, Gemini, Ollama**, and any OpenAI-compatible endpoint. Mix and match providers per agent role.
|
|
6
11
|
|
|
7
12
|
```
|
|
8
13
|
You describe a feature
|
|
@@ -36,7 +41,12 @@ npm install -g agent-bober
|
|
|
36
41
|
npx agent-bober init
|
|
37
42
|
```
|
|
38
43
|
|
|
39
|
-
agent-bober
|
|
44
|
+
agent-bober works in multiple environments:
|
|
45
|
+
|
|
46
|
+
- **Claude Code** -- Plugin with 10 slash commands (`/bober-plan`, `/bober-run`, etc.)
|
|
47
|
+
- **Cursor / Windsurf** -- MCP server with 10 tools in the chat interface
|
|
48
|
+
- **Any MCP-compatible IDE** -- MCP server via stdio transport
|
|
49
|
+
- **Any terminal** -- CLI commands (`npx agent-bober run "feature"`)
|
|
40
50
|
|
|
41
51
|
## Quick Start
|
|
42
52
|
|
|
@@ -45,7 +55,7 @@ agent-bober also works as a **Claude Code plugin**. If you install it as a depen
|
|
|
45
55
|
npx agent-bober init
|
|
46
56
|
```
|
|
47
57
|
|
|
48
|
-
Interactive setup --
|
|
58
|
+
Interactive setup -- pick your AI provider, choose a preset, describe what you want to build.
|
|
49
59
|
|
|
50
60
|
### With a Preset
|
|
51
61
|
```bash
|
|
@@ -83,14 +93,113 @@ Specialized workflows:
|
|
|
83
93
|
|
|
84
94
|
---
|
|
85
95
|
|
|
96
|
+
## Multi-Provider Support
|
|
97
|
+
|
|
98
|
+
agent-bober is **provider-agnostic**. Use any LLM provider for any agent role. Mix and match -- Opus for planning, GPT-4.1 for generation, local Ollama for evaluation.
|
|
99
|
+
|
|
100
|
+
### Supported Providers
|
|
101
|
+
|
|
102
|
+
| Provider | Models | API Key |
|
|
103
|
+
|----------|--------|---------|
|
|
104
|
+
| **Anthropic** (default) | `opus`, `sonnet`, `haiku` | `ANTHROPIC_API_KEY` |
|
|
105
|
+
| **OpenAI** | `gpt-4.1`, `gpt-4.1-mini`, `o3`, `o4-mini` | `OPENAI_API_KEY` |
|
|
106
|
+
| **Google Gemini** | `gemini-pro`, `gemini-flash` | `GOOGLE_API_KEY` or `GEMINI_API_KEY` |
|
|
107
|
+
| **OpenAI-Compatible** | Any model (Ollama, LM Studio, Groq, DeepSeek, etc.) | Optional |
|
|
108
|
+
|
|
109
|
+
### Configuration
|
|
110
|
+
|
|
111
|
+
Set providers per agent role in `bober.config.json`:
|
|
112
|
+
|
|
113
|
+
```jsonc
|
|
114
|
+
{
|
|
115
|
+
"planner": {
|
|
116
|
+
"provider": "anthropic",
|
|
117
|
+
"model": "opus"
|
|
118
|
+
},
|
|
119
|
+
"generator": {
|
|
120
|
+
"provider": "openai",
|
|
121
|
+
"model": "gpt-4.1"
|
|
122
|
+
},
|
|
123
|
+
"evaluator": {
|
|
124
|
+
"provider": "openai-compat",
|
|
125
|
+
"model": "llama3.1:70b",
|
|
126
|
+
"endpoint": "http://localhost:11434/v1"
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Model shorthands auto-resolve to the correct provider:
|
|
132
|
+
- `"opus"` / `"sonnet"` / `"haiku"` -- Anthropic
|
|
133
|
+
- `"gpt-4.1"` / `"o3"` / `"o4-mini"` -- OpenAI
|
|
134
|
+
- `"gemini-pro"` / `"gemini-flash"` -- Google
|
|
135
|
+
- `"ollama/llama3"` -- OpenAI-compatible at localhost:11434
|
|
136
|
+
|
|
137
|
+
Override provider for all roles from the CLI:
|
|
138
|
+
```bash
|
|
139
|
+
npx agent-bober run "feature" --provider openai
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Provider SDKs (`openai`, `@google/generative-ai`) are **optional peer dependencies** -- install only what you use. Only `@anthropic-ai/sdk` is required by default.
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## MCP Server (Cursor, Windsurf, etc.)
|
|
147
|
+
|
|
148
|
+
agent-bober includes an MCP (Model Context Protocol) server that exposes all functionality as tools in any MCP-compatible IDE.
|
|
149
|
+
|
|
150
|
+
### Setup for Cursor
|
|
151
|
+
|
|
152
|
+
Add to `.cursor/mcp.json`:
|
|
153
|
+
```json
|
|
154
|
+
{
|
|
155
|
+
"mcpServers": {
|
|
156
|
+
"bober": {
|
|
157
|
+
"command": "npx",
|
|
158
|
+
"args": ["agent-bober", "mcp"]
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### Setup for Windsurf
|
|
165
|
+
|
|
166
|
+
Add to your Windsurf MCP configuration:
|
|
167
|
+
```json
|
|
168
|
+
{
|
|
169
|
+
"mcpServers": {
|
|
170
|
+
"bober": {
|
|
171
|
+
"command": "npx",
|
|
172
|
+
"args": ["agent-bober", "mcp"]
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Available MCP Tools
|
|
179
|
+
|
|
180
|
+
| Tool | Type | Description |
|
|
181
|
+
|------|------|-------------|
|
|
182
|
+
| `bober_init` | sync | Initialize project config and `.bober/` directory |
|
|
183
|
+
| `bober_plan` | sync | Plan a feature, create sprint contracts |
|
|
184
|
+
| `bober_sprint` | sync | Execute the next sprint (generator + evaluator loop) |
|
|
185
|
+
| `bober_eval` | sync | Evaluate a sprint independently |
|
|
186
|
+
| `bober_run` | async | Full autonomous pipeline (returns immediately, poll with status) |
|
|
187
|
+
| `bober_status` | poll | Check pipeline progress or read current status |
|
|
188
|
+
| `bober_contracts` | read | List all sprint contracts or read a specific one |
|
|
189
|
+
| `bober_spec` | read | Read the current PlanSpec |
|
|
190
|
+
| `bober_principles` | read/write | Read or set project principles |
|
|
191
|
+
| `bober_config` | read/write | Read or update `bober.config.json` |
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
86
195
|
## Commands
|
|
87
196
|
|
|
88
197
|
### Slash Commands (Claude Code)
|
|
89
198
|
|
|
90
199
|
| Command | Description |
|
|
91
200
|
|---|---|
|
|
92
|
-
| `/bober-principles` | Define project principles
|
|
93
|
-
| `/bober-plan` | Plan any feature
|
|
201
|
+
| `/bober-principles` | Define project principles -- AI expands your rough notes into standards |
|
|
202
|
+
| `/bober-plan` | Plan any feature -- stack-agnostic, sprint-decomposed |
|
|
94
203
|
| `/bober-sprint` | Execute the next sprint contract |
|
|
95
204
|
| `/bober-eval` | Evaluate current sprint output |
|
|
96
205
|
| `/bober-run` | Full autonomous pipeline (plan + sprint + eval loop) |
|
|
@@ -103,11 +212,12 @@ Specialized workflows:
|
|
|
103
212
|
### CLI
|
|
104
213
|
|
|
105
214
|
```bash
|
|
106
|
-
npx agent-bober init [preset] # Initialize project
|
|
215
|
+
npx agent-bober init [preset] # Initialize project (with provider selection)
|
|
107
216
|
npx agent-bober plan "feature" # Run the planner
|
|
108
217
|
npx agent-bober sprint # Execute next sprint
|
|
109
218
|
npx agent-bober eval # Evaluate current sprint
|
|
110
219
|
npx agent-bober run "feature" # Full autonomous loop
|
|
220
|
+
npx agent-bober mcp # Start MCP server (Cursor/Windsurf)
|
|
111
221
|
```
|
|
112
222
|
|
|
113
223
|
### Fully Autonomous Mode (no human in the loop)
|
|
@@ -135,7 +245,16 @@ agent-bober init nextjs
|
|
|
135
245
|
agent-bober run "Build a complete dashboard with auth, CRUD, and charts"
|
|
136
246
|
```
|
|
137
247
|
|
|
138
|
-
The CLI uses the Anthropic SDK directly
|
|
248
|
+
The CLI uses the Anthropic SDK directly -- no approval prompts at all.
|
|
249
|
+
|
|
250
|
+
**Option C: With a different provider**
|
|
251
|
+
|
|
252
|
+
```bash
|
|
253
|
+
export OPENAI_API_KEY=sk-...
|
|
254
|
+
cd your-project
|
|
255
|
+
agent-bober init nextjs
|
|
256
|
+
agent-bober run "Build a complete dashboard with auth, CRUD, and charts" --provider openai
|
|
257
|
+
```
|
|
139
258
|
|
|
140
259
|
---
|
|
141
260
|
|
|
@@ -147,7 +266,7 @@ All configuration lives in `bober.config.json` at your project root. The `init`
|
|
|
147
266
|
|
|
148
267
|
```jsonc
|
|
149
268
|
{
|
|
150
|
-
//
|
|
269
|
+
// -- Project -----------------------------------------
|
|
151
270
|
"project": {
|
|
152
271
|
"name": "my-app", // Project name
|
|
153
272
|
"mode": "greenfield", // "greenfield" | "brownfield"
|
|
@@ -155,26 +274,35 @@ All configuration lives in `bober.config.json` at your project root. The `init`
|
|
|
155
274
|
"description": "A task management app with real-time collaboration"
|
|
156
275
|
},
|
|
157
276
|
|
|
158
|
-
//
|
|
277
|
+
// -- Planner -----------------------------------------
|
|
159
278
|
"planner": {
|
|
279
|
+
"provider": "anthropic", // "anthropic" | "openai" | "google" | "openai-compat"
|
|
280
|
+
"model": "opus", // Any model string or shorthand
|
|
281
|
+
"endpoint": null, // Custom base URL (for openai-compat)
|
|
282
|
+
"providerConfig": {}, // Provider-specific settings
|
|
160
283
|
"maxClarifications": 5, // Max clarifying questions (0 to skip)
|
|
161
|
-
"model": "opus", // Model for planning: "opus" | "sonnet" | "haiku"
|
|
162
284
|
"contextFiles": [ // Extra files the planner should read
|
|
163
285
|
"docs/architecture.md"
|
|
164
286
|
]
|
|
165
287
|
},
|
|
166
288
|
|
|
167
|
-
//
|
|
289
|
+
// -- Generator ---------------------------------------
|
|
168
290
|
"generator": {
|
|
169
|
-
"
|
|
291
|
+
"provider": "anthropic", // "anthropic" | "openai" | "google" | "openai-compat"
|
|
292
|
+
"model": "sonnet", // Any model string or shorthand
|
|
293
|
+
"endpoint": null, // Custom base URL (for openai-compat)
|
|
294
|
+
"providerConfig": {}, // Provider-specific settings
|
|
170
295
|
"maxTurnsPerSprint": 50, // Max tool-use turns per sprint
|
|
171
296
|
"autoCommit": true, // Auto-commit after each sprint
|
|
172
297
|
"branchPattern": "bober/{feature-name}" // Git branch naming
|
|
173
298
|
},
|
|
174
299
|
|
|
175
|
-
//
|
|
300
|
+
// -- Evaluator ---------------------------------------
|
|
176
301
|
"evaluator": {
|
|
177
|
-
"
|
|
302
|
+
"provider": "anthropic", // "anthropic" | "openai" | "google" | "openai-compat"
|
|
303
|
+
"model": "sonnet", // Any model string or shorthand
|
|
304
|
+
"endpoint": null, // Custom base URL (for openai-compat)
|
|
305
|
+
"providerConfig": {}, // Provider-specific settings
|
|
178
306
|
"strategies": [ // Evaluation strategies to run
|
|
179
307
|
{ "type": "typecheck", "required": true },
|
|
180
308
|
{ "type": "lint", "required": true },
|
|
@@ -186,21 +314,21 @@ All configuration lives in `bober.config.json` at your project root. The `init`
|
|
|
186
314
|
"plugins": [] // Custom evaluator plugin paths
|
|
187
315
|
},
|
|
188
316
|
|
|
189
|
-
//
|
|
317
|
+
// -- Sprint ------------------------------------------
|
|
190
318
|
"sprint": {
|
|
191
319
|
"maxSprints": 10, // Max sprints per plan
|
|
192
320
|
"requireContracts": true, // Require contract agreement before coding
|
|
193
321
|
"sprintSize": "medium" // "small" | "medium" | "large"
|
|
194
322
|
},
|
|
195
323
|
|
|
196
|
-
//
|
|
324
|
+
// -- Pipeline ----------------------------------------
|
|
197
325
|
"pipeline": {
|
|
198
326
|
"maxIterations": 20, // Max total iterations across all sprints
|
|
199
327
|
"requireApproval": false, // Pause for user approval between sprints
|
|
200
328
|
"contextReset": "always" // "always" | "on-threshold" | "never"
|
|
201
329
|
},
|
|
202
330
|
|
|
203
|
-
//
|
|
331
|
+
// -- Commands ----------------------------------------
|
|
204
332
|
"commands": {
|
|
205
333
|
"install": "npm install",
|
|
206
334
|
"build": "npm run build",
|
|
@@ -245,7 +373,7 @@ All configuration lives in `bober.config.json` at your project root. The `init`
|
|
|
245
373
|
|
|
246
374
|
### Inline Command Evaluators
|
|
247
375
|
|
|
248
|
-
The strategy type is **open**
|
|
376
|
+
The strategy type is **open** -- you can use any name and provide a shell command directly. No plugin file needed:
|
|
249
377
|
|
|
250
378
|
```json
|
|
251
379
|
{
|
|
@@ -449,18 +577,18 @@ To debug failing E2E tests:
|
|
|
449
577
|
|
|
450
578
|
### The Generator-Evaluator Pattern
|
|
451
579
|
|
|
452
|
-
This architecture implements the patterns described in Anthropic's [**"Harness design for long-running application development"**](https://www.anthropic.com/engineering/harness-design-long-running-apps) by Prithvi Rajasekaran. The key insight from that research: separating code generation from code evaluation creates a feedback loop that catches errors early and dramatically improves output quality. In their tests, a solo agent produced broken output in 20 minutes, while the full harness produced a polished, working application
|
|
580
|
+
This architecture implements the patterns described in Anthropic's [**"Harness design for long-running application development"**](https://www.anthropic.com/engineering/harness-design-long-running-apps) by Prithvi Rajasekaran. The key insight from that research: separating code generation from code evaluation creates a feedback loop that catches errors early and dramatically improves output quality. In their tests, a solo agent produced broken output in 20 minutes, while the full harness produced a polished, working application -- demonstrating that multi-agent orchestration with honest evaluation is worth the investment.
|
|
453
581
|
|
|
454
|
-
###
|
|
582
|
+
### Provider-Agnostic Architecture
|
|
455
583
|
|
|
456
|
-
Each agent runs as a **multi-turn agentic loop** with tool access via the Anthropic
|
|
584
|
+
Each agent runs as a **multi-turn agentic loop** with tool access via the unified `LLMClient` interface. The provider layer abstracts away the differences between Anthropic, OpenAI, Google, and OpenAI-compatible APIs. System prompts are loaded from the detailed agent definitions in `agents/bober-*.md` (300-600 lines of role-specific instructions, anti-leniency protocols, and evaluation criteria).
|
|
457
585
|
|
|
458
|
-
- **Planner** (Claude Opus): Explores the codebase via read-only tools (`read_file`, `glob`, `grep`), then produces sprint-decomposed plans. Thinks about scope, dependencies, and risk.
|
|
459
|
-
- **Generator** (Claude Sonnet): Full tool access (`bash`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`). Reads existing code, writes implementation, runs tests, and commits
|
|
460
|
-
- **Evaluator** (Claude Sonnet): Read-only + bash tools (`bash`, `read_file`, `glob`, `grep`
|
|
586
|
+
- **Planner** (default: Claude Opus): Explores the codebase via read-only tools (`read_file`, `glob`, `grep`), then produces sprint-decomposed plans. Thinks about scope, dependencies, and risk.
|
|
587
|
+
- **Generator** (default: Claude Sonnet): Full tool access (`bash`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`). Reads existing code, writes implementation, runs tests, and commits -- all autonomously within the sprint contract boundaries.
|
|
588
|
+
- **Evaluator** (default: Claude Sonnet): Read-only + bash tools (`bash`, `read_file`, `glob`, `grep` -- deliberately NO write/edit). Independently verifies by running the dev server, taking Playwright screenshots, executing tests, and inspecting code. Cannot fix bugs -- only report them with precise feedback.
|
|
461
589
|
|
|
462
590
|
The separation ensures that:
|
|
463
|
-
1. The Generator cannot "mark its own homework"
|
|
591
|
+
1. The Generator cannot "mark its own homework" -- an independent evaluation step with its own tool access catches issues through actual runtime verification, not just reading the generator's self-report.
|
|
464
592
|
2. Sprint contracts provide clear scope boundaries, preventing feature creep.
|
|
465
593
|
3. Automated checks (programmatic evaluators) + agent-based qualitative evaluation run after every sprint.
|
|
466
594
|
4. Context resets between sprints keep the Generator focused and prevent context degradation.
|
|
@@ -474,8 +602,8 @@ All bober state lives in the `.bober/` directory:
|
|
|
474
602
|
.bober/
|
|
475
603
|
specs/ PlanSpec JSON files
|
|
476
604
|
contracts/ SprintContract JSON files
|
|
477
|
-
|
|
478
|
-
|
|
605
|
+
eval-results/ Evaluation result logs
|
|
606
|
+
handoffs/ Context handoff documents
|
|
479
607
|
progress.md Human-readable progress tracker
|
|
480
608
|
history.jsonl Machine-readable event log
|
|
481
609
|
```
|
|
@@ -527,8 +655,11 @@ agent-bober/
|
|
|
527
655
|
config/ Config schema, loader, defaults
|
|
528
656
|
contracts/ Sprint contract and eval result types
|
|
529
657
|
evaluators/ Built-in evaluator plugins
|
|
658
|
+
mcp/ MCP server and tool definitions
|
|
659
|
+
tools/ 10 MCP tools (init, plan, sprint, eval, run, status, etc.)
|
|
530
660
|
orchestrator/ Agent runners, agentic loop, tool infrastructure
|
|
531
661
|
tools/ Tool schemas, sandboxed handlers, role-based sets
|
|
662
|
+
providers/ LLM provider adapters (Anthropic, OpenAI, Google, OpenAI-compat)
|
|
532
663
|
state/ State management for .bober/ directory
|
|
533
664
|
utils/ Shared utilities
|
|
534
665
|
agents/ Agent system prompts (.md files, loaded at runtime)
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
export interface EvalCommandOptions {
|
|
2
2
|
verbose?: boolean;
|
|
3
3
|
sprint?: string;
|
|
4
|
+
/** Override AI provider for all roles. Overrides config.planner/generator/evaluator.provider. */
|
|
5
|
+
provider?: string;
|
|
4
6
|
}
|
|
5
7
|
export declare function runEvalCommand(projectRoot: string, options: EvalCommandOptions): Promise<void>;
|
|
6
8
|
//# sourceMappingURL=eval.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/eval.ts"],"names":[],"mappings":"AAgBA,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/eval.ts"],"names":[],"mappings":"AAgBA,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,iGAAiG;IACjG,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAID,wBAAsB,cAAc,CAClC,WAAW,EAAE,MAAM,EACnB,OAAO,EAAE,kBAAkB,GAC1B,OAAO,CAAC,IAAI,CAAC,CAsKf"}
|
|
@@ -20,6 +20,16 @@ export async function runEvalCommand(projectRoot, options) {
|
|
|
20
20
|
logger.info('Run "npx agent-bober init" to create a configuration.');
|
|
21
21
|
return;
|
|
22
22
|
}
|
|
23
|
+
// Apply --provider override for all roles
|
|
24
|
+
if (options.provider) {
|
|
25
|
+
config = {
|
|
26
|
+
...config,
|
|
27
|
+
planner: { ...config.planner, provider: options.provider },
|
|
28
|
+
generator: { ...config.generator, provider: options.provider },
|
|
29
|
+
evaluator: { ...config.evaluator, provider: options.provider },
|
|
30
|
+
};
|
|
31
|
+
logger.info(`Provider override: ${options.provider}`);
|
|
32
|
+
}
|
|
23
33
|
await ensureBoberDir(projectRoot);
|
|
24
34
|
// Load spec and contracts
|
|
25
35
|
const spec = await loadLatestSpec(projectRoot);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"eval.js","sourceRoot":"","sources":["../../../src/cli/commands/eval.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACpD,OAAO,EAAE,aAAa,EAAE,MAAM,uCAAuC,CAAC;AAEtE,OAAO,EAAE,iBAAiB,EAAE,MAAM,uCAAuC,CAAC;AAC1E,OAAO,EACL,cAAc,EACd,aAAa,EACb,cAAc,GACf,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"eval.js","sourceRoot":"","sources":["../../../src/cli/commands/eval.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACpD,OAAO,EAAE,aAAa,EAAE,MAAM,uCAAuC,CAAC;AAEtE,OAAO,EAAE,iBAAiB,EAAE,MAAM,uCAAuC,CAAC;AAC1E,OAAO,EACL,cAAc,EACd,aAAa,EACb,cAAc,GACf,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAW/C,sEAAsE;AAEtE,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,WAAmB,EACnB,OAA2B;IAE3B,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,CAAC,OAAO,GAAG,IAAI,CAAC;IACxB,CAAC;IAED,cAAc;IACd,IAAI,MAAM,CAAC;IACX,IAAI,CAAC;QACH,MAAM,GAAG,MAAM,UAAU,CAAC,WAAW,CAAC,CAAC;IACzC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,CAAC,KAAK,CACV,0BAA0B,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC7E,CAAC;QACF,MAAM,CAAC,IAAI,CAAC,uDAAuD,CAAC,CAAC;QACrE,OAAO;IACT,CAAC;IAED,0CAA0C;IAC1C,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;QACrB,MAAM,GAAG;YACP,GAAG,MAAM;YACT,OAAO,EAAE,EAAE,GAAG,MAAM,CAAC,OAAO,EAAE,QAAQ,EAAE,OAAO,CAAC,QAAQ,EAAE;YAC1D,SAAS,EAAE,EAAE,GAAG,MAAM,CAAC,SAAS,EAAE,QAAQ,EAAE,OAAO,CAAC,QAAQ,EAAE;YAC9D,SAAS,EAAE,EAAE,GAAG,MAAM,CAAC,SAAS,EAAE,QAAQ,EAAE,OAAO,CAAC,QAAQ,EAAE;SAC/D,CAAC;QACF,MAAM,CAAC,IAAI,CAAC,sBAAsB,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;IACxD,CAAC;IAED,MAAM,cAAc,CAAC,WAAW,CAAC,CAAC;IAElC,0BAA0B;IAC1B,MAAM,IAAI,GAAG,MAAM,cAAc,CAAC,WAAW,CAAC,CAAC;IAC/C,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,MAAM,CAAC,KAAK,CAAC,kDAAkD,CAAC,CAAC;QACjE,OAAO;IACT,CAAC;IAED,MAAM,SAAS,GAAG,MAAM,aAAa,CAAC,WAAW,CAAC,CAAC;IACnD,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,MAAM,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAC3C,OAAO;IACT,CAAC;IAED,2BAA2B;IAC3B,IAAI,cAAc,CAAC;IACnB,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QACnB,cAAc,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;QAChE,IAAI,CAAC,cAAc,EAAE,CAAC;YACpB,MAAM,CAAC,KAAK,CAAC,WAAW,OAAO,CAAC,MAAM,cAAc,CAAC,CAAC;YACtD,MAAM,CAAC,IAAI,CACT,sBAAsB,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAC9D,CAAC;YACF,OAAO;QACT,CAAC;IACH,CAAC;SAAM,CAAC;QACN,wDAAwD;QACxD,cAAc,GAAG,SAAS,CAAC,IAAI,CAC7B,CAAC,CAAC,EAAE,EAAE,CACJ,CAAC,CAAC,MAAM,KAAK,aAAa;YAC1B,CAAC,CAAC,MAAM,KAAK,YAAY;YACzB,CAAC,CAAC,MAAM,KAAK,cAAc,CAC9B,CAAC;QAEF,IAAI,CAAC,cAAc,EAAE,CAAC;YACpB,sCAAsC;YACtC,cAAc,GAAG,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACnD,CAAC;IACH,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,eAAe,cAAc,CAAC,OAAO,EAAE,CAAC,CAAC;IACtD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,EAAE,EAAE,WAAW,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC;IAErE,wBAAwB;IACxB,IAAI,aAAqB,CAAC;IAC1B,IAAI,CAAC;QACH,aAAa,GAAG,MAAM,gBAAgB,CAAC,WAAW,CAAC,CAAC;IACtD,CAAC;IAAC,MAAM,CAAC;QACP,aAAa,GAAG,SAAS,CAAC;IAC5B,CAAC;IAED,MAAM,cAAc,GAAmB;QACrC,IAAI,EAAE,MAAM,CAAC,OAAO,CAAC,IAAI;QACzB,IAAI,EAAE,MAAM,CAAC,OAAO,CAAC,IAAI;QACzB,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,WAAW,EAAE,EAAE;QACf,aAAa;KACd,CAAC;IAEF,oBAAoB;IACpB,IAAI,YAAsB,CAAC;IAC3B,IAAI,CAAC;QACH,YAAY,GAAG,MAAM,eAAe,CAAC,WAAW,CAAC,CAAC;IACpD,CAAC;IAAC,MAAM,CAAC;QACP,YAAY,GAAG,EAAE,CAAC;IACpB,CAAC;IAED,8BAA8B;IAC9B,MAAM,kBAAkB,GAAG,SAAS,CAAC,MAAM,CACzC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,QAAQ,CAC7B,CAAC;IAEF,MAAM,OAAO,GAAG,aAAa,CAAC;QAC5B,IAAI,EAAE,WAAW;QACjB,EAAE,EAAE,WAAW;QACf,cAAc;QACd,IAAI;QACJ,eAAe,EAAE,cAAc;QAC/B,aAAa,EAAE,kBAAkB;QACjC,YAAY,EAAE,uBAAuB,cAAc,CAAC,OAAO,EAAE;QAC7D,YAAY;KACb,CAAC,CAAC;IAEH,iBAAiB;IACjB,MAAM,UAAU,GAAG,MAAM,iBAAiB,CACxC,OAAO,EACP,WAAW,EACX,MAAM,CACP,CAAC;IAEF,kBAAkB;IAClB,OAAO,CAAC,GAAG,EAAE,CAAC;IACd,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM;QAClC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,QAAQ,CAAC;QACvB,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IACxB,OAAO,CAAC,GAAG,CACT,GAAG,UAAU,IAAI,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,aAAa,UAAU,CAAC,KAAK,MAAM,CACvF,CAAC;IACF,OAAO,CAAC,GAAG,EAAE,CAAC;IAEd,KAAK,MAAM,MAAM,IAAI,UAAU,CAAC,OAAO,EAAE,CAAC;QACxC,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACzE,MAAM,QAAQ,GACZ,MAAM,CAAC,KAAK,KAAK,SAAS,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,KAAK,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7D,OAAO,CAAC,GAAG,CAAC,GAAG,IAAI,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,GAAG,QAAQ,EAAE,CAAC,CAAC;QAClE,OAAO,CAAC,GAAG,CAAC,UAAU,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QAEpD,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACnB,MAAM,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAsB,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;YAC9E,KAAK,MAAM,MAAM,IAAI,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;gBAC1C,MAAM,aAAa,GACjB,MAAM,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC;gBACzD,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI;oBAC1B,CAAC,CAAC,OAAO,MAAM,CAAC,IAAI,GAAG,MAAM,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE;oBAC3E,CAAC,CAAC,EAAE,CAAC;gBACP,OAAO,CAAC,GAAG,CACT,UAAU,aAAa,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,GAAG,QAAQ,EAAE,CAC7F,CAAC;YACJ,CAAC;YACD,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxB,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,IAAI,CAAC,kBAAkB,QAAQ,CAAC,MAAM,GAAG,CAAC,cAAc,CAAC,CAChE,CAAC;YACJ,CAAC;YAED,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;gBACpB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,oBAAoB,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;YACjF,CAAC;QACH,CAAC;QACD,OAAO,CAAC,GAAG,EAAE,CAAC;IAChB,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,YAAY,UAAU,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;IAE1D,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC;QACvB,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;IACvB,CAAC;AACH,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"init.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/init.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"init.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/init.ts"],"names":[],"mappings":"AA6ZA,MAAM,WAAW,kBAAkB;IACjC,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,wBAAsB,cAAc,CAClC,WAAW,EAAE,MAAM,EACnB,OAAO,GAAE,kBAAuB,GAC/B,OAAO,CAAC,IAAI,CAAC,CAyEf"}
|