agent-bober 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +156 -28
- package/dist/cli/commands/eval.d.ts +2 -0
- package/dist/cli/commands/eval.d.ts.map +1 -1
- package/dist/cli/commands/eval.js +10 -0
- package/dist/cli/commands/eval.js.map +1 -1
- package/dist/cli/commands/init.d.ts.map +1 -1
- package/dist/cli/commands/init.js +181 -61
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/plan.d.ts +2 -0
- package/dist/cli/commands/plan.d.ts.map +1 -1
- package/dist/cli/commands/plan.js +10 -0
- package/dist/cli/commands/plan.js.map +1 -1
- package/dist/cli/commands/run.d.ts +2 -0
- package/dist/cli/commands/run.d.ts.map +1 -1
- package/dist/cli/commands/run.js +10 -0
- package/dist/cli/commands/run.js.map +1 -1
- package/dist/cli/commands/sprint.d.ts +2 -0
- package/dist/cli/commands/sprint.d.ts.map +1 -1
- package/dist/cli/commands/sprint.js +10 -0
- package/dist/cli/commands/sprint.js.map +1 -1
- package/dist/cli/index.js +22 -2
- package/dist/cli/index.js.map +1 -1
- package/dist/config/schema.d.ts +160 -43
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +11 -7
- package/dist/config/schema.js.map +1 -1
- package/dist/contracts/sprint-contract.d.ts +8 -8
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -1
- package/dist/mcp/index.d.ts +4 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/index.js +4 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/mcp/run-manager.d.ts +47 -0
- package/dist/mcp/run-manager.d.ts.map +1 -0
- package/dist/mcp/run-manager.js +79 -0
- package/dist/mcp/run-manager.js.map +1 -0
- package/dist/mcp/server.d.ts +15 -0
- package/dist/mcp/server.d.ts.map +1 -0
- package/dist/mcp/server.js +107 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/mcp/tools/config.d.ts +2 -0
- package/dist/mcp/tools/config.d.ts.map +1 -0
- package/dist/mcp/tools/config.js +153 -0
- package/dist/mcp/tools/config.js.map +1 -0
- package/dist/mcp/tools/contracts.d.ts +2 -0
- package/dist/mcp/tools/contracts.d.ts.map +1 -0
- package/dist/mcp/tools/contracts.js +61 -0
- package/dist/mcp/tools/contracts.js.map +1 -0
- package/dist/mcp/tools/eval.d.ts +2 -0
- package/dist/mcp/tools/eval.d.ts.map +1 -0
- package/dist/mcp/tools/eval.js +157 -0
- package/dist/mcp/tools/eval.js.map +1 -0
- package/dist/mcp/tools/index.d.ts +20 -0
- package/dist/mcp/tools/index.d.ts.map +1 -0
- package/dist/mcp/tools/index.js +47 -0
- package/dist/mcp/tools/index.js.map +1 -0
- package/dist/mcp/tools/init.d.ts +2 -0
- package/dist/mcp/tools/init.d.ts.map +1 -0
- package/dist/mcp/tools/init.js +121 -0
- package/dist/mcp/tools/init.js.map +1 -0
- package/dist/mcp/tools/plan.d.ts +2 -0
- package/dist/mcp/tools/plan.d.ts.map +1 -0
- package/dist/mcp/tools/plan.js +97 -0
- package/dist/mcp/tools/plan.js.map +1 -0
- package/dist/mcp/tools/principles.d.ts +2 -0
- package/dist/mcp/tools/principles.d.ts.map +1 -0
- package/dist/mcp/tools/principles.js +66 -0
- package/dist/mcp/tools/principles.js.map +1 -0
- package/dist/mcp/tools/registry.d.ts +45 -0
- package/dist/mcp/tools/registry.d.ts.map +1 -0
- package/dist/mcp/tools/registry.js +23 -0
- package/dist/mcp/tools/registry.js.map +1 -0
- package/dist/mcp/tools/run.d.ts +2 -0
- package/dist/mcp/tools/run.d.ts.map +1 -0
- package/dist/mcp/tools/run.js +66 -0
- package/dist/mcp/tools/run.js.map +1 -0
- package/dist/mcp/tools/spec.d.ts +2 -0
- package/dist/mcp/tools/spec.d.ts.map +1 -0
- package/dist/mcp/tools/spec.js +32 -0
- package/dist/mcp/tools/spec.js.map +1 -0
- package/dist/mcp/tools/sprint.d.ts +2 -0
- package/dist/mcp/tools/sprint.d.ts.map +1 -0
- package/dist/mcp/tools/sprint.js +243 -0
- package/dist/mcp/tools/sprint.js.map +1 -0
- package/dist/mcp/tools/status.d.ts +2 -0
- package/dist/mcp/tools/status.d.ts.map +1 -0
- package/dist/mcp/tools/status.js +76 -0
- package/dist/mcp/tools/status.js.map +1 -0
- package/dist/orchestrator/agentic-loop.d.ts +7 -6
- package/dist/orchestrator/agentic-loop.d.ts.map +1 -1
- package/dist/orchestrator/agentic-loop.js +33 -40
- package/dist/orchestrator/agentic-loop.js.map +1 -1
- package/dist/orchestrator/context-handoff.d.ts +20 -20
- package/dist/orchestrator/evaluator-agent.d.ts.map +1 -1
- package/dist/orchestrator/evaluator-agent.js +2 -2
- package/dist/orchestrator/evaluator-agent.js.map +1 -1
- package/dist/orchestrator/generator-agent.d.ts.map +1 -1
- package/dist/orchestrator/generator-agent.js +2 -2
- package/dist/orchestrator/generator-agent.js.map +1 -1
- package/dist/orchestrator/model-resolver.d.ts +35 -4
- package/dist/orchestrator/model-resolver.d.ts.map +1 -1
- package/dist/orchestrator/model-resolver.js +68 -15
- package/dist/orchestrator/model-resolver.js.map +1 -1
- package/dist/orchestrator/planner-agent.d.ts.map +1 -1
- package/dist/orchestrator/planner-agent.js +2 -2
- package/dist/orchestrator/planner-agent.js.map +1 -1
- package/dist/orchestrator/tools/index.d.ts +3 -4
- package/dist/orchestrator/tools/index.d.ts.map +1 -1
- package/dist/orchestrator/tools/index.js.map +1 -1
- package/dist/orchestrator/tools/schemas.d.ts +11 -12
- package/dist/orchestrator/tools/schemas.d.ts.map +1 -1
- package/dist/orchestrator/tools/schemas.js +3 -2
- package/dist/orchestrator/tools/schemas.js.map +1 -1
- package/dist/providers/anthropic.d.ts +15 -0
- package/dist/providers/anthropic.d.ts.map +1 -0
- package/dist/providers/anthropic.js +133 -0
- package/dist/providers/anthropic.js.map +1 -0
- package/dist/providers/factory.d.ts +37 -0
- package/dist/providers/factory.d.ts.map +1 -0
- package/dist/providers/factory.js +119 -0
- package/dist/providers/factory.js.map +1 -0
- package/dist/providers/google.d.ts +39 -0
- package/dist/providers/google.d.ts.map +1 -0
- package/dist/providers/google.js +195 -0
- package/dist/providers/google.js.map +1 -0
- package/dist/providers/index.d.ts +7 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +6 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/providers/openai-compat.d.ts +39 -0
- package/dist/providers/openai-compat.d.ts.map +1 -0
- package/dist/providers/openai-compat.js +42 -0
- package/dist/providers/openai-compat.js.map +1 -0
- package/dist/providers/openai.d.ts +41 -0
- package/dist/providers/openai.d.ts.map +1 -0
- package/dist/providers/openai.js +205 -0
- package/dist/providers/openai.js.map +1 -0
- package/dist/providers/types.d.ts +144 -0
- package/dist/providers/types.d.ts.map +1 -0
- package/dist/providers/types.js +8 -0
- package/dist/providers/types.js.map +1 -0
- package/package.json +21 -3
package/README.md
CHANGED
|
@@ -3,9 +3,11 @@
|
|
|
3
3
|
[](https://www.npmjs.com/package/agent-bober)
|
|
4
4
|
[](https://github.com/BOBER3r/agent-bober/blob/main/LICENSE)
|
|
5
5
|
|
|
6
|
-
**Generator-Evaluator multi-agent harness for building applications autonomously with
|
|
6
|
+
**Generator-Evaluator multi-agent harness for building applications autonomously with any LLM.**
|
|
7
7
|
|
|
8
|
-
Inspired by Anthropic's engineering publication [**"Harness design for long-running application development"**](https://www.anthropic.com/engineering/harness-design-long-running-apps), agent-bober implements the Generator-Evaluator multi-agent pattern as a reusable, installable workflow. It orchestrates
|
|
8
|
+
Inspired by Anthropic's engineering publication [**"Harness design for long-running application development"**](https://www.anthropic.com/engineering/harness-design-long-running-apps), agent-bober implements the Generator-Evaluator multi-agent pattern as a reusable, installable workflow. It orchestrates AI agents in a structured loop: a **Planner** decomposes your idea into sprint contracts, a **Generator** writes the code, and an **Evaluator** independently verifies each sprint against its contract before moving on. The result is autonomous, high-quality software development with built-in guardrails, context resets, and brutally honest evaluation.
|
|
9
|
+
|
|
10
|
+
Works with **Claude, GPT, Gemini, Ollama**, and any OpenAI-compatible endpoint. Mix and match providers per agent role.
|
|
9
11
|
|
|
10
12
|
```
|
|
11
13
|
You describe a feature
|
|
@@ -39,7 +41,12 @@ npm install -g agent-bober
|
|
|
39
41
|
npx agent-bober init
|
|
40
42
|
```
|
|
41
43
|
|
|
42
|
-
agent-bober
|
|
44
|
+
agent-bober works in multiple environments:
|
|
45
|
+
|
|
46
|
+
- **Claude Code** -- Plugin with 10 slash commands (`/bober-plan`, `/bober-run`, etc.)
|
|
47
|
+
- **Cursor / Windsurf** -- MCP server with 10 tools in the chat interface
|
|
48
|
+
- **Any MCP-compatible IDE** -- MCP server via stdio transport
|
|
49
|
+
- **Any terminal** -- CLI commands (`npx agent-bober run "feature"`)
|
|
43
50
|
|
|
44
51
|
## Quick Start
|
|
45
52
|
|
|
@@ -48,7 +55,7 @@ agent-bober also works as a **Claude Code plugin**. If you install it as a depen
|
|
|
48
55
|
npx agent-bober init
|
|
49
56
|
```
|
|
50
57
|
|
|
51
|
-
Interactive setup --
|
|
58
|
+
Interactive setup -- pick your AI provider, choose a preset, describe what you want to build.
|
|
52
59
|
|
|
53
60
|
### With a Preset
|
|
54
61
|
```bash
|
|
@@ -86,14 +93,113 @@ Specialized workflows:
|
|
|
86
93
|
|
|
87
94
|
---
|
|
88
95
|
|
|
96
|
+
## Multi-Provider Support
|
|
97
|
+
|
|
98
|
+
agent-bober is **provider-agnostic**. Use any LLM provider for any agent role. Mix and match -- Opus for planning, GPT-4.1 for generation, local Ollama for evaluation.
|
|
99
|
+
|
|
100
|
+
### Supported Providers
|
|
101
|
+
|
|
102
|
+
| Provider | Models | API Key |
|
|
103
|
+
|----------|--------|---------|
|
|
104
|
+
| **Anthropic** (default) | `opus`, `sonnet`, `haiku` | `ANTHROPIC_API_KEY` |
|
|
105
|
+
| **OpenAI** | `gpt-4.1`, `gpt-4.1-mini`, `o3`, `o4-mini` | `OPENAI_API_KEY` |
|
|
106
|
+
| **Google Gemini** | `gemini-pro`, `gemini-flash` | `GOOGLE_API_KEY` or `GEMINI_API_KEY` |
|
|
107
|
+
| **OpenAI-Compatible** | Any model (Ollama, LM Studio, Groq, DeepSeek, etc.) | Optional |
|
|
108
|
+
|
|
109
|
+
### Configuration
|
|
110
|
+
|
|
111
|
+
Set providers per agent role in `bober.config.json`:
|
|
112
|
+
|
|
113
|
+
```jsonc
|
|
114
|
+
{
|
|
115
|
+
"planner": {
|
|
116
|
+
"provider": "anthropic",
|
|
117
|
+
"model": "opus"
|
|
118
|
+
},
|
|
119
|
+
"generator": {
|
|
120
|
+
"provider": "openai",
|
|
121
|
+
"model": "gpt-4.1"
|
|
122
|
+
},
|
|
123
|
+
"evaluator": {
|
|
124
|
+
"provider": "openai-compat",
|
|
125
|
+
"model": "llama3.1:70b",
|
|
126
|
+
"endpoint": "http://localhost:11434/v1"
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Model shorthands auto-resolve to the correct provider:
|
|
132
|
+
- `"opus"` / `"sonnet"` / `"haiku"` -- Anthropic
|
|
133
|
+
- `"gpt-4.1"` / `"o3"` / `"o4-mini"` -- OpenAI
|
|
134
|
+
- `"gemini-pro"` / `"gemini-flash"` -- Google
|
|
135
|
+
- `"ollama/llama3"` -- OpenAI-compatible at localhost:11434
|
|
136
|
+
|
|
137
|
+
Override provider for all roles from the CLI:
|
|
138
|
+
```bash
|
|
139
|
+
npx agent-bober run "feature" --provider openai
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Provider SDKs (`openai`, `@google/generative-ai`) are **optional peer dependencies** -- install only what you use. Only `@anthropic-ai/sdk` is required by default.
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## MCP Server (Cursor, Windsurf, etc.)
|
|
147
|
+
|
|
148
|
+
agent-bober includes an MCP (Model Context Protocol) server that exposes all functionality as tools in any MCP-compatible IDE.
|
|
149
|
+
|
|
150
|
+
### Setup for Cursor
|
|
151
|
+
|
|
152
|
+
Add to `.cursor/mcp.json`:
|
|
153
|
+
```json
|
|
154
|
+
{
|
|
155
|
+
"mcpServers": {
|
|
156
|
+
"bober": {
|
|
157
|
+
"command": "npx",
|
|
158
|
+
"args": ["agent-bober", "mcp"]
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### Setup for Windsurf
|
|
165
|
+
|
|
166
|
+
Add to your Windsurf MCP configuration:
|
|
167
|
+
```json
|
|
168
|
+
{
|
|
169
|
+
"mcpServers": {
|
|
170
|
+
"bober": {
|
|
171
|
+
"command": "npx",
|
|
172
|
+
"args": ["agent-bober", "mcp"]
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Available MCP Tools
|
|
179
|
+
|
|
180
|
+
| Tool | Type | Description |
|
|
181
|
+
|------|------|-------------|
|
|
182
|
+
| `bober_init` | sync | Initialize project config and `.bober/` directory |
|
|
183
|
+
| `bober_plan` | sync | Plan a feature, create sprint contracts |
|
|
184
|
+
| `bober_sprint` | sync | Execute the next sprint (generator + evaluator loop) |
|
|
185
|
+
| `bober_eval` | sync | Evaluate a sprint independently |
|
|
186
|
+
| `bober_run` | async | Full autonomous pipeline (returns immediately, poll with status) |
|
|
187
|
+
| `bober_status` | poll | Check pipeline progress or read current status |
|
|
188
|
+
| `bober_contracts` | read | List all sprint contracts or read a specific one |
|
|
189
|
+
| `bober_spec` | read | Read the current PlanSpec |
|
|
190
|
+
| `bober_principles` | read/write | Read or set project principles |
|
|
191
|
+
| `bober_config` | read/write | Read or update `bober.config.json` |
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
89
195
|
## Commands
|
|
90
196
|
|
|
91
197
|
### Slash Commands (Claude Code)
|
|
92
198
|
|
|
93
199
|
| Command | Description |
|
|
94
200
|
|---|---|
|
|
95
|
-
| `/bober-principles` | Define project principles
|
|
96
|
-
| `/bober-plan` | Plan any feature
|
|
201
|
+
| `/bober-principles` | Define project principles -- AI expands your rough notes into standards |
|
|
202
|
+
| `/bober-plan` | Plan any feature -- stack-agnostic, sprint-decomposed |
|
|
97
203
|
| `/bober-sprint` | Execute the next sprint contract |
|
|
98
204
|
| `/bober-eval` | Evaluate current sprint output |
|
|
99
205
|
| `/bober-run` | Full autonomous pipeline (plan + sprint + eval loop) |
|
|
@@ -106,11 +212,12 @@ Specialized workflows:
|
|
|
106
212
|
### CLI
|
|
107
213
|
|
|
108
214
|
```bash
|
|
109
|
-
npx agent-bober init [preset] # Initialize project
|
|
215
|
+
npx agent-bober init [preset] # Initialize project (with provider selection)
|
|
110
216
|
npx agent-bober plan "feature" # Run the planner
|
|
111
217
|
npx agent-bober sprint # Execute next sprint
|
|
112
218
|
npx agent-bober eval # Evaluate current sprint
|
|
113
219
|
npx agent-bober run "feature" # Full autonomous loop
|
|
220
|
+
npx agent-bober mcp # Start MCP server (Cursor/Windsurf)
|
|
114
221
|
```
|
|
115
222
|
|
|
116
223
|
### Fully Autonomous Mode (no human in the loop)
|
|
@@ -138,7 +245,16 @@ agent-bober init nextjs
|
|
|
138
245
|
agent-bober run "Build a complete dashboard with auth, CRUD, and charts"
|
|
139
246
|
```
|
|
140
247
|
|
|
141
|
-
The CLI uses the Anthropic SDK directly
|
|
248
|
+
The CLI uses the Anthropic SDK directly -- no approval prompts at all.
|
|
249
|
+
|
|
250
|
+
**Option C: With a different provider**
|
|
251
|
+
|
|
252
|
+
```bash
|
|
253
|
+
export OPENAI_API_KEY=sk-...
|
|
254
|
+
cd your-project
|
|
255
|
+
agent-bober init nextjs
|
|
256
|
+
agent-bober run "Build a complete dashboard with auth, CRUD, and charts" --provider openai
|
|
257
|
+
```
|
|
142
258
|
|
|
143
259
|
---
|
|
144
260
|
|
|
@@ -150,7 +266,7 @@ All configuration lives in `bober.config.json` at your project root. The `init`
|
|
|
150
266
|
|
|
151
267
|
```jsonc
|
|
152
268
|
{
|
|
153
|
-
//
|
|
269
|
+
// -- Project -----------------------------------------
|
|
154
270
|
"project": {
|
|
155
271
|
"name": "my-app", // Project name
|
|
156
272
|
"mode": "greenfield", // "greenfield" | "brownfield"
|
|
@@ -158,26 +274,35 @@ All configuration lives in `bober.config.json` at your project root. The `init`
|
|
|
158
274
|
"description": "A task management app with real-time collaboration"
|
|
159
275
|
},
|
|
160
276
|
|
|
161
|
-
//
|
|
277
|
+
// -- Planner -----------------------------------------
|
|
162
278
|
"planner": {
|
|
279
|
+
"provider": "anthropic", // "anthropic" | "openai" | "google" | "openai-compat"
|
|
280
|
+
"model": "opus", // Any model string or shorthand
|
|
281
|
+
"endpoint": null, // Custom base URL (for openai-compat)
|
|
282
|
+
"providerConfig": {}, // Provider-specific settings
|
|
163
283
|
"maxClarifications": 5, // Max clarifying questions (0 to skip)
|
|
164
|
-
"model": "opus", // Model for planning: "opus" | "sonnet" | "haiku"
|
|
165
284
|
"contextFiles": [ // Extra files the planner should read
|
|
166
285
|
"docs/architecture.md"
|
|
167
286
|
]
|
|
168
287
|
},
|
|
169
288
|
|
|
170
|
-
//
|
|
289
|
+
// -- Generator ---------------------------------------
|
|
171
290
|
"generator": {
|
|
172
|
-
"
|
|
291
|
+
"provider": "anthropic", // "anthropic" | "openai" | "google" | "openai-compat"
|
|
292
|
+
"model": "sonnet", // Any model string or shorthand
|
|
293
|
+
"endpoint": null, // Custom base URL (for openai-compat)
|
|
294
|
+
"providerConfig": {}, // Provider-specific settings
|
|
173
295
|
"maxTurnsPerSprint": 50, // Max tool-use turns per sprint
|
|
174
296
|
"autoCommit": true, // Auto-commit after each sprint
|
|
175
297
|
"branchPattern": "bober/{feature-name}" // Git branch naming
|
|
176
298
|
},
|
|
177
299
|
|
|
178
|
-
//
|
|
300
|
+
// -- Evaluator ---------------------------------------
|
|
179
301
|
"evaluator": {
|
|
180
|
-
"
|
|
302
|
+
"provider": "anthropic", // "anthropic" | "openai" | "google" | "openai-compat"
|
|
303
|
+
"model": "sonnet", // Any model string or shorthand
|
|
304
|
+
"endpoint": null, // Custom base URL (for openai-compat)
|
|
305
|
+
"providerConfig": {}, // Provider-specific settings
|
|
181
306
|
"strategies": [ // Evaluation strategies to run
|
|
182
307
|
{ "type": "typecheck", "required": true },
|
|
183
308
|
{ "type": "lint", "required": true },
|
|
@@ -189,21 +314,21 @@ All configuration lives in `bober.config.json` at your project root. The `init`
|
|
|
189
314
|
"plugins": [] // Custom evaluator plugin paths
|
|
190
315
|
},
|
|
191
316
|
|
|
192
|
-
//
|
|
317
|
+
// -- Sprint ------------------------------------------
|
|
193
318
|
"sprint": {
|
|
194
319
|
"maxSprints": 10, // Max sprints per plan
|
|
195
320
|
"requireContracts": true, // Require contract agreement before coding
|
|
196
321
|
"sprintSize": "medium" // "small" | "medium" | "large"
|
|
197
322
|
},
|
|
198
323
|
|
|
199
|
-
//
|
|
324
|
+
// -- Pipeline ----------------------------------------
|
|
200
325
|
"pipeline": {
|
|
201
326
|
"maxIterations": 20, // Max total iterations across all sprints
|
|
202
327
|
"requireApproval": false, // Pause for user approval between sprints
|
|
203
328
|
"contextReset": "always" // "always" | "on-threshold" | "never"
|
|
204
329
|
},
|
|
205
330
|
|
|
206
|
-
//
|
|
331
|
+
// -- Commands ----------------------------------------
|
|
207
332
|
"commands": {
|
|
208
333
|
"install": "npm install",
|
|
209
334
|
"build": "npm run build",
|
|
@@ -248,7 +373,7 @@ All configuration lives in `bober.config.json` at your project root. The `init`
|
|
|
248
373
|
|
|
249
374
|
### Inline Command Evaluators
|
|
250
375
|
|
|
251
|
-
The strategy type is **open**
|
|
376
|
+
The strategy type is **open** -- you can use any name and provide a shell command directly. No plugin file needed:
|
|
252
377
|
|
|
253
378
|
```json
|
|
254
379
|
{
|
|
@@ -452,18 +577,18 @@ To debug failing E2E tests:
|
|
|
452
577
|
|
|
453
578
|
### The Generator-Evaluator Pattern
|
|
454
579
|
|
|
455
|
-
This architecture implements the patterns described in Anthropic's [**"Harness design for long-running application development"**](https://www.anthropic.com/engineering/harness-design-long-running-apps) by Prithvi Rajasekaran. The key insight from that research: separating code generation from code evaluation creates a feedback loop that catches errors early and dramatically improves output quality. In their tests, a solo agent produced broken output in 20 minutes, while the full harness produced a polished, working application
|
|
580
|
+
This architecture implements the patterns described in Anthropic's [**"Harness design for long-running application development"**](https://www.anthropic.com/engineering/harness-design-long-running-apps) by Prithvi Rajasekaran. The key insight from that research: separating code generation from code evaluation creates a feedback loop that catches errors early and dramatically improves output quality. In their tests, a solo agent produced broken output in 20 minutes, while the full harness produced a polished, working application -- demonstrating that multi-agent orchestration with honest evaluation is worth the investment.
|
|
456
581
|
|
|
457
|
-
###
|
|
582
|
+
### Provider-Agnostic Architecture
|
|
458
583
|
|
|
459
|
-
Each agent runs as a **multi-turn agentic loop** with tool access via the Anthropic
|
|
584
|
+
Each agent runs as a **multi-turn agentic loop** with tool access via the unified `LLMClient` interface. The provider layer abstracts away the differences between Anthropic, OpenAI, Google, and OpenAI-compatible APIs. System prompts are loaded from the detailed agent definitions in `agents/bober-*.md` (300-600 lines of role-specific instructions, anti-leniency protocols, and evaluation criteria).
|
|
460
585
|
|
|
461
|
-
- **Planner** (Claude Opus): Explores the codebase via read-only tools (`read_file`, `glob`, `grep`), then produces sprint-decomposed plans. Thinks about scope, dependencies, and risk.
|
|
462
|
-
- **Generator** (Claude Sonnet): Full tool access (`bash`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`). Reads existing code, writes implementation, runs tests, and commits
|
|
463
|
-
- **Evaluator** (Claude Sonnet): Read-only + bash tools (`bash`, `read_file`, `glob`, `grep`
|
|
586
|
+
- **Planner** (default: Claude Opus): Explores the codebase via read-only tools (`read_file`, `glob`, `grep`), then produces sprint-decomposed plans. Thinks about scope, dependencies, and risk.
|
|
587
|
+
- **Generator** (default: Claude Sonnet): Full tool access (`bash`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`). Reads existing code, writes implementation, runs tests, and commits -- all autonomously within the sprint contract boundaries.
|
|
588
|
+
- **Evaluator** (default: Claude Sonnet): Read-only + bash tools (`bash`, `read_file`, `glob`, `grep` -- deliberately NO write/edit). Independently verifies by running the dev server, taking Playwright screenshots, executing tests, and inspecting code. Cannot fix bugs -- only report them with precise feedback.
|
|
464
589
|
|
|
465
590
|
The separation ensures that:
|
|
466
|
-
1. The Generator cannot "mark its own homework"
|
|
591
|
+
1. The Generator cannot "mark its own homework" -- an independent evaluation step with its own tool access catches issues through actual runtime verification, not just reading the generator's self-report.
|
|
467
592
|
2. Sprint contracts provide clear scope boundaries, preventing feature creep.
|
|
468
593
|
3. Automated checks (programmatic evaluators) + agent-based qualitative evaluation run after every sprint.
|
|
469
594
|
4. Context resets between sprints keep the Generator focused and prevent context degradation.
|
|
@@ -477,8 +602,8 @@ All bober state lives in the `.bober/` directory:
|
|
|
477
602
|
.bober/
|
|
478
603
|
specs/ PlanSpec JSON files
|
|
479
604
|
contracts/ SprintContract JSON files
|
|
480
|
-
|
|
481
|
-
|
|
605
|
+
eval-results/ Evaluation result logs
|
|
606
|
+
handoffs/ Context handoff documents
|
|
482
607
|
progress.md Human-readable progress tracker
|
|
483
608
|
history.jsonl Machine-readable event log
|
|
484
609
|
```
|
|
@@ -530,8 +655,11 @@ agent-bober/
|
|
|
530
655
|
config/ Config schema, loader, defaults
|
|
531
656
|
contracts/ Sprint contract and eval result types
|
|
532
657
|
evaluators/ Built-in evaluator plugins
|
|
658
|
+
mcp/ MCP server and tool definitions
|
|
659
|
+
tools/ 10 MCP tools (init, plan, sprint, eval, run, status, etc.)
|
|
533
660
|
orchestrator/ Agent runners, agentic loop, tool infrastructure
|
|
534
661
|
tools/ Tool schemas, sandboxed handlers, role-based sets
|
|
662
|
+
providers/ LLM provider adapters (Anthropic, OpenAI, Google, OpenAI-compat)
|
|
535
663
|
state/ State management for .bober/ directory
|
|
536
664
|
utils/ Shared utilities
|
|
537
665
|
agents/ Agent system prompts (.md files, loaded at runtime)
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
export interface EvalCommandOptions {
|
|
2
2
|
verbose?: boolean;
|
|
3
3
|
sprint?: string;
|
|
4
|
+
/** Override AI provider for all roles. Overrides config.planner/generator/evaluator.provider. */
|
|
5
|
+
provider?: string;
|
|
4
6
|
}
|
|
5
7
|
export declare function runEvalCommand(projectRoot: string, options: EvalCommandOptions): Promise<void>;
|
|
6
8
|
//# sourceMappingURL=eval.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/eval.ts"],"names":[],"mappings":"AAgBA,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/eval.ts"],"names":[],"mappings":"AAgBA,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,iGAAiG;IACjG,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAID,wBAAsB,cAAc,CAClC,WAAW,EAAE,MAAM,EACnB,OAAO,EAAE,kBAAkB,GAC1B,OAAO,CAAC,IAAI,CAAC,CAsKf"}
|
|
@@ -20,6 +20,16 @@ export async function runEvalCommand(projectRoot, options) {
|
|
|
20
20
|
logger.info('Run "npx agent-bober init" to create a configuration.');
|
|
21
21
|
return;
|
|
22
22
|
}
|
|
23
|
+
// Apply --provider override for all roles
|
|
24
|
+
if (options.provider) {
|
|
25
|
+
config = {
|
|
26
|
+
...config,
|
|
27
|
+
planner: { ...config.planner, provider: options.provider },
|
|
28
|
+
generator: { ...config.generator, provider: options.provider },
|
|
29
|
+
evaluator: { ...config.evaluator, provider: options.provider },
|
|
30
|
+
};
|
|
31
|
+
logger.info(`Provider override: ${options.provider}`);
|
|
32
|
+
}
|
|
23
33
|
await ensureBoberDir(projectRoot);
|
|
24
34
|
// Load spec and contracts
|
|
25
35
|
const spec = await loadLatestSpec(projectRoot);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"eval.js","sourceRoot":"","sources":["../../../src/cli/commands/eval.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACpD,OAAO,EAAE,aAAa,EAAE,MAAM,uCAAuC,CAAC;AAEtE,OAAO,EAAE,iBAAiB,EAAE,MAAM,uCAAuC,CAAC;AAC1E,OAAO,EACL,cAAc,EACd,aAAa,EACb,cAAc,GACf,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"eval.js","sourceRoot":"","sources":["../../../src/cli/commands/eval.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACpD,OAAO,EAAE,aAAa,EAAE,MAAM,uCAAuC,CAAC;AAEtE,OAAO,EAAE,iBAAiB,EAAE,MAAM,uCAAuC,CAAC;AAC1E,OAAO,EACL,cAAc,EACd,aAAa,EACb,cAAc,GACf,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAW/C,sEAAsE;AAEtE,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,WAAmB,EACnB,OAA2B;IAE3B,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,CAAC,OAAO,GAAG,IAAI,CAAC;IACxB,CAAC;IAED,cAAc;IACd,IAAI,MAAM,CAAC;IACX,IAAI,CAAC;QACH,MAAM,GAAG,MAAM,UAAU,CAAC,WAAW,CAAC,CAAC;IACzC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,CAAC,KAAK,CACV,0BAA0B,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC7E,CAAC;QACF,MAAM,CAAC,IAAI,CAAC,uDAAuD,CAAC,CAAC;QACrE,OAAO;IACT,CAAC;IAED,0CAA0C;IAC1C,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;QACrB,MAAM,GAAG;YACP,GAAG,MAAM;YACT,OAAO,EAAE,EAAE,GAAG,MAAM,CAAC,OAAO,EAAE,QAAQ,EAAE,OAAO,CAAC,QAAQ,EAAE;YAC1D,SAAS,EAAE,EAAE,GAAG,MAAM,CAAC,SAAS,EAAE,QAAQ,EAAE,OAAO,CAAC,QAAQ,EAAE;YAC9D,SAAS,EAAE,EAAE,GAAG,MAAM,CAAC,SAAS,EAAE,QAAQ,EAAE,OAAO,CAAC,QAAQ,EAAE;SAC/D,CAAC;QACF,MAAM,CAAC,IAAI,CAAC,sBAAsB,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;IACxD,CAAC;IAED,MAAM,cAAc,CAAC,WAAW,CAAC,CAAC;IAElC,0BAA0B;IAC1B,MAAM,IAAI,GAAG,MAAM,cAAc,CAAC,WAAW,CAAC,CAAC;IAC/C,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,MAAM,CAAC,KAAK,CAAC,kDAAkD,CAAC,CAAC;QACjE,OAAO;IACT,CAAC;IAED,MAAM,SAAS,GAAG,MAAM,aAAa,CAAC,WAAW,CAAC,CAAC;IACnD,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,MAAM,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAC3C,OAAO;IACT,CAAC;IAED,2BAA2B;IAC3B,IAAI,cAAc,CAAC;IACnB,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QACnB,cAAc,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;QAChE,IAAI,CAAC,cAAc,EAAE,CAAC;YACpB,MAAM,CAAC,KAAK,CAAC,WAAW,OAAO,CAAC,MAAM,cAAc,CAAC,CAAC;YACtD,MAAM,CAAC,IAAI,CACT,sBAAsB,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAC9D,CAAC;YACF,OAAO;QACT,CAAC;IACH,CAAC;SAAM,CAAC;QACN,wDAAwD;QACxD,cAAc,GAAG,SAAS,CAAC,IAAI,CAC7B,CAAC,CAAC,EAAE,EAAE,CACJ,CAAC,CAAC,MAAM,KAAK,aAAa;YAC1B,CAAC,CAAC,MAAM,KAAK,YAAY;YACzB,CAAC,CAAC,MAAM,KAAK,cAAc,CAC9B,CAAC;QAEF,IAAI,CAAC,cAAc,EAAE,CAAC;YACpB,sCAAsC;YACtC,cAAc,GAAG,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACnD,CAAC;IACH,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,eAAe,cAAc,CAAC,OAAO,EAAE,CAAC,CAAC;IACtD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,EAAE,EAAE,WAAW,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC;IAErE,wBAAwB;IACxB,IAAI,aAAqB,CAAC;IAC1B,IAAI,CAAC;QACH,aAAa,GAAG,MAAM,gBAAgB,CAAC,WAAW,CAAC,CAAC;IACtD,CAAC;IAAC,MAAM,CAAC;QACP,aAAa,GAAG,SAAS,CAAC;IAC5B,CAAC;IAED,MAAM,cAAc,GAAmB;QACrC,IAAI,EAAE,MAAM,CAAC,OAAO,CAAC,IAAI;QACzB,IAAI,EAAE,MAAM,CAAC,OAAO,CAAC,IAAI;QACzB,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,WAAW,EAAE,EAAE;QACf,aAAa;KACd,CAAC;IAEF,oBAAoB;IACpB,IAAI,YAAsB,CAAC;IAC3B,IAAI,CAAC;QACH,YAAY,GAAG,MAAM,eAAe,CAAC,WAAW,CAAC,CAAC;IACpD,CAAC;IAAC,MAAM,CAAC;QACP,YAAY,GAAG,EAAE,CAAC;IACpB,CAAC;IAED,8BAA8B;IAC9B,MAAM,kBAAkB,GAAG,SAAS,CAAC,MAAM,CACzC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,QAAQ,CAC7B,CAAC;IAEF,MAAM,OAAO,GAAG,aAAa,CAAC;QAC5B,IAAI,EAAE,WAAW;QACjB,EAAE,EAAE,WAAW;QACf,cAAc;QACd,IAAI;QACJ,eAAe,EAAE,cAAc;QAC/B,aAAa,EAAE,kBAAkB;QACjC,YAAY,EAAE,uBAAuB,cAAc,CAAC,OAAO,EAAE;QAC7D,YAAY;KACb,CAAC,CAAC;IAEH,iBAAiB;IACjB,MAAM,UAAU,GAAG,MAAM,iBAAiB,CACxC,OAAO,EACP,WAAW,EACX,MAAM,CACP,CAAC;IAEF,kBAAkB;IAClB,OAAO,CAAC,GAAG,EAAE,CAAC;IACd,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM;QAClC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,QAAQ,CAAC;QACvB,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IACxB,OAAO,CAAC,GAAG,CACT,GAAG,UAAU,IAAI,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,aAAa,UAAU,CAAC,KAAK,MAAM,CACvF,CAAC;IACF,OAAO,CAAC,GAAG,EAAE,CAAC;IAEd,KAAK,MAAM,MAAM,IAAI,UAAU,CAAC,OAAO,EAAE,CAAC;QACxC,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACzE,MAAM,QAAQ,GACZ,MAAM,CAAC,KAAK,KAAK,SAAS,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,KAAK,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7D,OAAO,CAAC,GAAG,CAAC,GAAG,IAAI,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,GAAG,QAAQ,EAAE,CAAC,CAAC;QAClE,OAAO,CAAC,GAAG,CAAC,UAAU,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QAEpD,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACnB,MAAM,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAsB,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;YAC9E,KAAK,MAAM,MAAM,IAAI,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;gBAC1C,MAAM,aAAa,GACjB,MAAM,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC;gBACzD,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI;oBAC1B,CAAC,CAAC,OAAO,MAAM,CAAC,IAAI,GAAG,MAAM,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE;oBAC3E,CAAC,CAAC,EAAE,CAAC;gBACP,OAAO,CAAC,GAAG,CACT,UAAU,aAAa,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,GAAG,QAAQ,EAAE,CAC7F,CAAC;YACJ,CAAC;YACD,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxB,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,IAAI,CAAC,kBAAkB,QAAQ,CAAC,MAAM,GAAG,CAAC,cAAc,CAAC,CAChE,CAAC;YACJ,CAAC;YAED,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;gBACpB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,oBAAoB,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;YACjF,CAAC;QACH,CAAC;QACD,OAAO,CAAC,GAAG,EAAE,CAAC;IAChB,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,YAAY,UAAU,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;IAE1D,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC;QACvB,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;IACvB,CAAC;AACH,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"init.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/init.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"init.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/init.ts"],"names":[],"mappings":"AA6ZA,MAAM,WAAW,kBAAkB;IACjC,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,wBAAsB,cAAc,CAClC,WAAW,EAAE,MAAM,EACnB,OAAO,GAAE,kBAAuB,GAC/B,OAAO,CAAC,IAAI,CAAC,CAyEf"}
|