agent-bober 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -7
- package/agents/bober-evaluator.md +62 -54
- package/agents/bober-generator.md +4 -0
- package/dist/contracts/eval-result.d.ts +339 -0
- package/dist/contracts/eval-result.d.ts.map +1 -1
- package/dist/contracts/eval-result.js +36 -0
- package/dist/contracts/eval-result.js.map +1 -1
- package/dist/evaluators/builtin/playwright.d.ts.map +1 -1
- package/dist/evaluators/builtin/playwright.js +50 -15
- package/dist/evaluators/builtin/playwright.js.map +1 -1
- package/dist/index.d.ts +5 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -1
- package/dist/orchestrator/agent-loader.d.ts +26 -0
- package/dist/orchestrator/agent-loader.d.ts.map +1 -0
- package/dist/orchestrator/agent-loader.js +125 -0
- package/dist/orchestrator/agent-loader.js.map +1 -0
- package/dist/orchestrator/agentic-loop.d.ts +53 -0
- package/dist/orchestrator/agentic-loop.d.ts.map +1 -0
- package/dist/orchestrator/agentic-loop.js +145 -0
- package/dist/orchestrator/agentic-loop.js.map +1 -0
- package/dist/orchestrator/evaluator-agent.d.ts +4 -1
- package/dist/orchestrator/evaluator-agent.d.ts.map +1 -1
- package/dist/orchestrator/evaluator-agent.js +107 -84
- package/dist/orchestrator/evaluator-agent.js.map +1 -1
- package/dist/orchestrator/generator-agent.d.ts +14 -2
- package/dist/orchestrator/generator-agent.d.ts.map +1 -1
- package/dist/orchestrator/generator-agent.js +96 -73
- package/dist/orchestrator/generator-agent.js.map +1 -1
- package/dist/orchestrator/model-resolver.d.ts +9 -0
- package/dist/orchestrator/model-resolver.d.ts.map +1 -0
- package/dist/orchestrator/model-resolver.js +21 -0
- package/dist/orchestrator/model-resolver.js.map +1 -0
- package/dist/orchestrator/pipeline.d.ts.map +1 -1
- package/dist/orchestrator/pipeline.js +21 -4
- package/dist/orchestrator/pipeline.js.map +1 -1
- package/dist/orchestrator/planner-agent.d.ts +3 -2
- package/dist/orchestrator/planner-agent.d.ts.map +1 -1
- package/dist/orchestrator/planner-agent.js +39 -75
- package/dist/orchestrator/planner-agent.js.map +1 -1
- package/dist/orchestrator/tools/handlers.d.ts +9 -0
- package/dist/orchestrator/tools/handlers.d.ts.map +1 -0
- package/dist/orchestrator/tools/handlers.js +279 -0
- package/dist/orchestrator/tools/handlers.js.map +1 -0
- package/dist/orchestrator/tools/index.d.ts +21 -0
- package/dist/orchestrator/tools/index.d.ts.map +1 -0
- package/dist/orchestrator/tools/index.js +33 -0
- package/dist/orchestrator/tools/index.js.map +1 -0
- package/dist/orchestrator/tools/schemas.d.ts +16 -0
- package/dist/orchestrator/tools/schemas.d.ts.map +1 -0
- package/dist/orchestrator/tools/schemas.js +138 -0
- package/dist/orchestrator/tools/schemas.js.map +1 -0
- package/package.json +1 -1
- package/templates/presets/nextjs/bober.config.json +1 -1
- package/templates/presets/react-vite/bober.config.json +1 -1
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import { logger } from "../utils/logger.js";
|
|
2
|
+
// ── Helpers ─────────────────────────────────────────────────────────
|
|
3
|
+
function extractText(content) {
|
|
4
|
+
return content
|
|
5
|
+
.filter((b) => b.type === "text")
|
|
6
|
+
.map((b) => b.text)
|
|
7
|
+
.join("");
|
|
8
|
+
}
|
|
9
|
+
// ── Main loop ──────────────────────────────────────────────────────
|
|
10
|
+
/**
|
|
11
|
+
* Run a multi-turn agentic conversation loop.
|
|
12
|
+
*
|
|
13
|
+
* The loop sends the initial user message, then iterates: if the model
|
|
14
|
+
* responds with tool_use, we execute the tools and feed results back.
|
|
15
|
+
* This continues until the model stops requesting tools or maxTurns
|
|
16
|
+
* is exceeded.
|
|
17
|
+
*
|
|
18
|
+
* @returns The final text response and metadata about the conversation.
|
|
19
|
+
*/
|
|
20
|
+
export async function runAgenticLoop(params) {
|
|
21
|
+
const { client, model, systemPrompt, userMessage, tools, toolHandlers, maxTurns, maxTokens = 16384, onToolUse, onTurnComplete, } = params;
|
|
22
|
+
const messages = [
|
|
23
|
+
{ role: "user", content: userMessage },
|
|
24
|
+
];
|
|
25
|
+
let totalInputTokens = 0;
|
|
26
|
+
let totalOutputTokens = 0;
|
|
27
|
+
const allToolsCalled = [];
|
|
28
|
+
let finalText = "";
|
|
29
|
+
for (let turn = 1; turn <= maxTurns; turn++) {
|
|
30
|
+
logger.debug(`Agentic loop turn ${turn}/${maxTurns}...`);
|
|
31
|
+
let response;
|
|
32
|
+
try {
|
|
33
|
+
response = await client.messages.create({
|
|
34
|
+
model,
|
|
35
|
+
max_tokens: maxTokens,
|
|
36
|
+
system: systemPrompt,
|
|
37
|
+
tools: tools.length > 0 ? tools : undefined,
|
|
38
|
+
messages,
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
catch (err) {
|
|
42
|
+
// Handle context window exhaustion or other API errors
|
|
43
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
44
|
+
logger.warn(`Agentic loop API error on turn ${turn}: ${message}`);
|
|
45
|
+
return {
|
|
46
|
+
finalText: finalText || `Error on turn ${turn}: ${message}`,
|
|
47
|
+
turnsUsed: turn - 1,
|
|
48
|
+
toolsCalled: allToolsCalled,
|
|
49
|
+
usage: {
|
|
50
|
+
inputTokens: totalInputTokens,
|
|
51
|
+
outputTokens: totalOutputTokens,
|
|
52
|
+
},
|
|
53
|
+
stopReason: "error",
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
// Accumulate usage
|
|
57
|
+
totalInputTokens += response.usage?.input_tokens ?? 0;
|
|
58
|
+
totalOutputTokens += response.usage?.output_tokens ?? 0;
|
|
59
|
+
// Extract text from this response
|
|
60
|
+
const turnText = extractText(response.content);
|
|
61
|
+
const turnStopReason = response.stop_reason ?? "unknown";
|
|
62
|
+
// If the model is done (no more tool use), return
|
|
63
|
+
if (response.stop_reason !== "tool_use") {
|
|
64
|
+
finalText = turnText;
|
|
65
|
+
return {
|
|
66
|
+
finalText,
|
|
67
|
+
turnsUsed: turn,
|
|
68
|
+
toolsCalled: allToolsCalled,
|
|
69
|
+
usage: {
|
|
70
|
+
inputTokens: totalInputTokens,
|
|
71
|
+
outputTokens: totalOutputTokens,
|
|
72
|
+
},
|
|
73
|
+
stopReason: turnStopReason,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
// Model wants to use tools — process tool_use blocks
|
|
77
|
+
// Append the assistant's full response (including tool_use blocks)
|
|
78
|
+
messages.push({
|
|
79
|
+
role: "assistant",
|
|
80
|
+
content: response.content,
|
|
81
|
+
});
|
|
82
|
+
// Execute each tool and collect results
|
|
83
|
+
const toolResults = [];
|
|
84
|
+
const turnTools = [];
|
|
85
|
+
for (const block of response.content) {
|
|
86
|
+
if (block.type !== "tool_use")
|
|
87
|
+
continue;
|
|
88
|
+
const toolName = block.name;
|
|
89
|
+
const toolInput = block.input;
|
|
90
|
+
turnTools.push(toolName);
|
|
91
|
+
allToolsCalled.push(toolName);
|
|
92
|
+
onToolUse?.(toolName, toolInput);
|
|
93
|
+
const handler = toolHandlers.get(toolName);
|
|
94
|
+
if (!handler) {
|
|
95
|
+
logger.warn(`Unknown tool requested: "${toolName}"`);
|
|
96
|
+
toolResults.push({
|
|
97
|
+
type: "tool_result",
|
|
98
|
+
tool_use_id: block.id,
|
|
99
|
+
content: `Error: Unknown tool "${toolName}". Available tools: ${[...toolHandlers.keys()].join(", ")}`,
|
|
100
|
+
is_error: true,
|
|
101
|
+
});
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
try {
|
|
105
|
+
const result = await handler(toolInput);
|
|
106
|
+
toolResults.push({
|
|
107
|
+
type: "tool_result",
|
|
108
|
+
tool_use_id: block.id,
|
|
109
|
+
content: result.output,
|
|
110
|
+
is_error: result.isError,
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
catch (err) {
|
|
114
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
115
|
+
logger.warn(`Tool "${toolName}" threw: ${message}`);
|
|
116
|
+
toolResults.push({
|
|
117
|
+
type: "tool_result",
|
|
118
|
+
tool_use_id: block.id,
|
|
119
|
+
content: `Error: Tool execution failed: ${message}`,
|
|
120
|
+
is_error: true,
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
// Append tool results as a user message
|
|
125
|
+
messages.push({
|
|
126
|
+
role: "user",
|
|
127
|
+
content: toolResults,
|
|
128
|
+
});
|
|
129
|
+
onTurnComplete?.(turn, turnTools);
|
|
130
|
+
}
|
|
131
|
+
// Max turns exceeded — return what we have
|
|
132
|
+
logger.warn(`Agentic loop exceeded max turns (${maxTurns}). Returning partial result.`);
|
|
133
|
+
return {
|
|
134
|
+
finalText: finalText ||
|
|
135
|
+
"Max turns exceeded. The agent ran out of tool-use budget before completing.",
|
|
136
|
+
turnsUsed: maxTurns,
|
|
137
|
+
toolsCalled: allToolsCalled,
|
|
138
|
+
usage: {
|
|
139
|
+
inputTokens: totalInputTokens,
|
|
140
|
+
outputTokens: totalOutputTokens,
|
|
141
|
+
},
|
|
142
|
+
stopReason: "max_turns_exceeded",
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
//# sourceMappingURL=agentic-loop.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agentic-loop.js","sourceRoot":"","sources":["../../src/orchestrator/agentic-loop.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAgD5C,uEAAuE;AAEvE,SAAS,WAAW,CAClB,OAA0C;IAE1C,OAAO,OAAO;SACX,MAAM,CAAC,CAAC,CAAC,EAAqC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC;SACnE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;SAClB,IAAI,CAAC,EAAE,CAAC,CAAC;AACd,CAAC;AAED,sEAAsE;AAEtE;;;;;;;;;GASG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,MAAyB;IAEzB,MAAM,EACJ,MAAM,EACN,KAAK,EACL,YAAY,EACZ,WAAW,EACX,KAAK,EACL,YAAY,EACZ,QAAQ,EACR,SAAS,GAAG,KAAK,EACjB,SAAS,EACT,cAAc,GACf,GAAG,MAAM,CAAC;IAEX,MAAM,QAAQ,GAAmB;QAC/B,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE;KACvC,CAAC;IAEF,IAAI,gBAAgB,GAAG,CAAC,CAAC;IACzB,IAAI,iBAAiB,GAAG,CAAC,CAAC;IAC1B,MAAM,cAAc,GAAa,EAAE,CAAC;IACpC,IAAI,SAAS,GAAG,EAAE,CAAC;IAEnB,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,IAAI,QAAQ,EAAE,IAAI,EAAE,EAAE,CAAC;QAC5C,MAAM,CAAC,KAAK,CAAC,qBAAqB,IAAI,IAAI,QAAQ,KAAK,CAAC,CAAC;QAEzD,IAAI,QAAoC,CAAC;QACzC,IAAI,CAAC;YACH,QAAQ,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;gBACtC,KAAK;gBACL,UAAU,EAAE,SAAS;gBACrB,MAAM,EAAE,YAAY;gBACpB,KAAK,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;gBAC3C,QAAQ;aACT,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,uDAAuD;YACvD,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACjE,MAAM,CAAC,IAAI,CAAC,kCAAkC,IAAI,KAAK,OAAO,EAAE,CAAC,CAAC;YAElE,OAAO;gBACL,SAAS,EAAE,SAAS,IAAI,iBAAiB,IAAI,KAAK,OAAO,EAAE;gBAC3D,SAAS,EAAE,IAAI,GAAG,CAAC;gBACnB,WAAW,EAAE,cAAc;gBAC3B,KAAK,EAAE;oBACL,WAAW,EAAE,gBAAgB;oBAC7B,YAAY,EAAE,iBAAiB;iBAChC;gBACD,UAAU,EAAE,OAAO;aACpB,CAAC;QACJ,CAAC;QAED,mBAAmB;QACnB,gBAAgB,IAAI,QAAQ,CAAC,KAAK,EAAE,YAAY,IAAI,CAAC,CAAC;QACtD,iBAAiB,IAAI,QAAQ,CAAC,KAAK,EAAE,aAAa,IAAI,CAAC,CAAC;QAExD,kCAAkC;QAClC,MAAM,QAAQ,GAAG,WAAW,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,cAAc,GAAG,QAAQ,CAAC,WAAW,IAAI,SAAS,CAAC;QAEzD,kDAAkD;QAClD,IAAI,QAAQ,CAAC,WAAW,KAAK,UAAU,EAAE,CAAC;YACxC,SAAS,GAAG,QAAQ,CAAC;YAErB,OAAO;gBACL,SAAS;gBACT,SAAS,EAAE,IAAI;gBACf,WAAW,EAAE,cAAc;gBAC3B,KAAK,EAAE;oBACL,WAAW,EAAE,gBAAgB;oBAC7B,YAAY,EAAE,iBAAiB;iBAChC;gBACD,UAAU,EAAE,cAAc;aAC3B,CAAC;QACJ,CAAC;QAED,qDAAqD;QACrD,mEAAmE;QACnE,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,WAAW;YACjB,OAAO,EAAE,QAAQ,CAAC,OAAyC;SAC5D,CAAC,CAAC;QAEH,wCAAwC;QACxC,MAAM,WAAW,GAA2B,EAAE,CAAC;QAC/C,MAAM,SAAS,GAAa,EAAE,CAAC;QAE/B,KAAK,MAAM,KAAK,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;YACrC,IAAI,KAAK,CAAC,IAAI,KAAK,UAAU;gBAAE,SAAS;YAExC,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC;YAC5B,MAAM,SAAS,GAAG,KAAK,CAAC,KAAgC,CAAC;YACzD,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACzB,cAAc,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAE9B,SAAS,EAAE,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YAEjC,MAAM,OAAO,GAAG,YAAY,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YAC3C,IAAI,CAAC,OAAO,EAAE,CAAC;gBACb,MAAM,CAAC,IAAI,CAAC,4BAA4B,QAAQ,GAAG,CAAC,CAAC;gBACrD,WAAW,CAAC,IAAI,CAAC;oBACf,IAAI,EAAE,aAAa;oBACnB,WAAW,EAAE,KAAK,CAAC,EAAE;oBACrB,OAAO,EAAE,wBAAwB,QAAQ,uBAAuB,CAAC,GAAG,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;oBACrG,QAAQ,EAAE,IAAI;iBACf,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,SAAS,CAAC,CAAC;gBACxC,WAAW,CAAC,IAAI,CAAC;oBACf,IAAI,EAAE,aAAa;oBACnB,WAAW,EAAE,KAAK,CAAC,EAAE;oBACrB,OAAO,EAAE,MAAM,CAAC,MAAM;oBACtB,QAAQ,EAAE,MAAM,CAAC,OAAO;iBACzB,CAAC,CAAC;YACL,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;gBACjE,MAAM,CAAC,IAAI,CAAC,SAAS,QAAQ,YAAY,OAAO,EAAE,CAAC,CAAC;gBACpD,WAAW,CAAC,IAAI,CAAC;oBACf,IAAI,EAAE,aAAa;oBACnB,WAAW,EAAE,KAAK,CAAC,EAAE;oBACrB,OAAO,EAAE,iCAAiC,OAAO,EAAE;oBACnD,QAAQ,EAAE,IAAI;iBACf,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,wCAAwC;QACxC,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE,WAAkC;SAC5C,CAAC,CAAC;QAEH,cAAc,EAAE,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;IACpC,CAAC;IAED,2CAA2C;IAC3C,MAAM,CAAC,IAAI,CACT,oCAAoC,QAAQ,8BAA8B,CAC3E,CAAC;IAEF,OAAO;QACL,SAAS,EACP,SAAS;YACT,6EAA6E;QAC/E,SAAS,EAAE,QAAQ;QACnB,WAAW,EAAE,cAAc;QAC3B,KAAK,EAAE;YACL,WAAW,EAAE,gBAAgB;YAC7B,YAAY,EAAE,iBAAiB;SAChC;QACD,UAAU,EAAE,oBAAoB;KACjC,CAAC;AACJ,CAAC"}
|
|
@@ -4,7 +4,10 @@ import type { EvaluationRunResult } from "../evaluators/registry.js";
|
|
|
4
4
|
export type { EvaluationRunResult } from "../evaluators/registry.js";
|
|
5
5
|
/**
|
|
6
6
|
* Run the evaluator agent, combining programmatic evaluation (plugins)
|
|
7
|
-
* with agent-based qualitative evaluation.
|
|
7
|
+
* with agent-based qualitative evaluation using tools.
|
|
8
|
+
*
|
|
9
|
+
* The evaluator agent can read files, run bash commands (tests, dev server,
|
|
10
|
+
* screenshots), and search the codebase — but CANNOT write or edit files.
|
|
8
11
|
*
|
|
9
12
|
* @param handoff Context handoff for the current sprint.
|
|
10
13
|
* @param projectRoot Absolute path to the project.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"evaluator-agent.d.ts","sourceRoot":"","sources":["../../src/orchestrator/evaluator-agent.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AACvD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAO3D,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,2BAA2B,CAAC;
|
|
1
|
+
{"version":3,"file":"evaluator-agent.d.ts","sourceRoot":"","sources":["../../src/orchestrator/evaluator-agent.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AACvD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAO3D,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,2BAA2B,CAAC;AAQrE,YAAY,EAAE,mBAAmB,EAAE,MAAM,2BAA2B,CAAC;AAQrE;;;;;;;;;;;GAWG;AACH,wBAAsB,iBAAiB,CACrC,OAAO,EAAE,cAAc,EACvB,WAAW,EAAE,MAAM,EACnB,MAAM,EAAE,WAAW,GAClB,OAAO,CAAC,mBAAmB,CAAC,CA4E9B"}
|
|
@@ -3,58 +3,19 @@ import { serializeHandoff } from "./context-handoff.js";
|
|
|
3
3
|
import { createDefaultRegistry, runEvaluation, } from "../evaluators/registry.js";
|
|
4
4
|
import { getChangedFiles } from "../utils/git.js";
|
|
5
5
|
import { logger } from "../utils/logger.js";
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
return "claude-sonnet-4-20250514";
|
|
13
|
-
case "haiku":
|
|
14
|
-
return "claude-haiku-4-20250414";
|
|
15
|
-
default:
|
|
16
|
-
return "claude-sonnet-4-20250514";
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
// ── Agent Evaluation System Prompt ─────────────────────────────────
|
|
20
|
-
const EVALUATOR_SYSTEM_PROMPT = `You are the Bober Evaluator agent. Your job is to qualitatively assess whether a sprint's implementation meets its contract criteria.
|
|
21
|
-
|
|
22
|
-
You will receive:
|
|
23
|
-
- The sprint contract with success criteria
|
|
24
|
-
- The context handoff with implementation notes
|
|
25
|
-
- Results from automated checks (typecheck, lint, tests, etc.)
|
|
26
|
-
|
|
27
|
-
For each success criterion that cannot be automatically verified, assess whether it has been met based on the implementation description and changed files.
|
|
28
|
-
|
|
29
|
-
Output format — respond with a JSON object:
|
|
30
|
-
{
|
|
31
|
-
"evaluator": "Agent Evaluation",
|
|
32
|
-
"passed": true/false,
|
|
33
|
-
"score": 0-100,
|
|
34
|
-
"details": [
|
|
35
|
-
{
|
|
36
|
-
"criterion": "criterion id or description",
|
|
37
|
-
"passed": true/false,
|
|
38
|
-
"message": "explanation",
|
|
39
|
-
"severity": "error" | "warning" | "info"
|
|
40
|
-
}
|
|
41
|
-
],
|
|
42
|
-
"summary": "Overall assessment",
|
|
43
|
-
"feedback": "Actionable feedback for the generator if anything needs fixing",
|
|
44
|
-
"timestamp": "<ISO datetime>"
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
Guidelines:
|
|
48
|
-
- Be thorough but fair. If the implementation reasonably meets a criterion, mark it as passed.
|
|
49
|
-
- If automated checks already cover a criterion, you can defer to their results.
|
|
50
|
-
- Focus on criteria that require human-like judgment: code quality, architectural decisions, completeness.
|
|
51
|
-
- Provide specific, actionable feedback when something fails.
|
|
52
|
-
|
|
53
|
-
Output ONLY the JSON object. No markdown fences, no explanation.`;
|
|
6
|
+
import { resolveModel } from "./model-resolver.js";
|
|
7
|
+
import { loadAgentDefinition } from "./agent-loader.js";
|
|
8
|
+
import { buildToolSet } from "./tools/index.js";
|
|
9
|
+
import { runAgenticLoop } from "./agentic-loop.js";
|
|
10
|
+
// ── Constants ──────────────────────────────────────────────────────
|
|
11
|
+
const EVALUATOR_MAX_TURNS = 25;
|
|
54
12
|
// ── Main ───────────────────────────────────────────────────────────
|
|
55
13
|
/**
|
|
56
14
|
* Run the evaluator agent, combining programmatic evaluation (plugins)
|
|
57
|
-
* with agent-based qualitative evaluation.
|
|
15
|
+
* with agent-based qualitative evaluation using tools.
|
|
16
|
+
*
|
|
17
|
+
* The evaluator agent can read files, run bash commands (tests, dev server,
|
|
18
|
+
* screenshots), and search the codebase — but CANNOT write or edit files.
|
|
58
19
|
*
|
|
59
20
|
* @param handoff Context handoff for the current sprint.
|
|
60
21
|
* @param projectRoot Absolute path to the project.
|
|
@@ -73,9 +34,10 @@ export async function runEvaluatorAgent(handoff, projectRoot, config) {
|
|
|
73
34
|
const registry = await createDefaultRegistry(config);
|
|
74
35
|
let changedFiles;
|
|
75
36
|
try {
|
|
76
|
-
changedFiles =
|
|
77
|
-
|
|
78
|
-
|
|
37
|
+
changedFiles =
|
|
38
|
+
handoff.changedFiles.length > 0
|
|
39
|
+
? handoff.changedFiles
|
|
40
|
+
: await getChangedFiles(projectRoot);
|
|
79
41
|
}
|
|
80
42
|
catch {
|
|
81
43
|
changedFiles = handoff.changedFiles;
|
|
@@ -85,10 +47,10 @@ export async function runEvaluatorAgent(handoff, projectRoot, config) {
|
|
|
85
47
|
const icon = result.passed ? "PASS" : "FAIL";
|
|
86
48
|
logger.debug(` [${icon}] ${result.evaluator}: ${result.summary}`);
|
|
87
49
|
}
|
|
88
|
-
// 2. Agent evaluation — qualitative assessment via
|
|
50
|
+
// 2. Agent evaluation — qualitative assessment via agentic loop with tools
|
|
89
51
|
logger.info("Running agent evaluation...");
|
|
90
|
-
const agentResult = await runAgentEvaluation(handoff, programmaticEval.results, config);
|
|
91
|
-
// 3. Combine results
|
|
52
|
+
const agentResult = await runAgentEvaluation(handoff, programmaticEval.results, projectRoot, config);
|
|
53
|
+
// 3. Combine results
|
|
92
54
|
const allResults = [...programmaticEval.results, agentResult];
|
|
93
55
|
const scoredResults = allResults.filter((r) => r.score !== undefined);
|
|
94
56
|
const avgScore = scoredResults.length > 0
|
|
@@ -111,45 +73,105 @@ export async function runEvaluatorAgent(handoff, projectRoot, config) {
|
|
|
111
73
|
logger.sprint(sprintId, `Evaluation ${statusLabel}`);
|
|
112
74
|
return evaluation;
|
|
113
75
|
}
|
|
76
|
+
// ── Agent evaluation with tools ────────────────────────────────────
|
|
114
77
|
/**
|
|
115
|
-
* Run the agent-based qualitative evaluation
|
|
78
|
+
* Run the agent-based qualitative evaluation using a multi-turn agentic
|
|
79
|
+
* loop with bash, read_file, glob, and grep tools.
|
|
80
|
+
*
|
|
81
|
+
* The evaluator can run commands, take screenshots, inspect code, start
|
|
82
|
+
* dev servers, and curl endpoints — but CANNOT write or edit files.
|
|
116
83
|
*/
|
|
117
|
-
async function runAgentEvaluation(handoff, programmaticResults, config) {
|
|
118
|
-
const model = resolveModel(config.evaluator.model);
|
|
119
|
-
const client = new Anthropic();
|
|
84
|
+
async function runAgentEvaluation(handoff, programmaticResults, projectRoot, config) {
|
|
120
85
|
const timestamp = new Date().toISOString();
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
.
|
|
125
|
-
|
|
86
|
+
try {
|
|
87
|
+
// Load agent definition (system prompt from .md file)
|
|
88
|
+
const agentDef = await loadAgentDefinition("bober-evaluator", projectRoot);
|
|
89
|
+
const model = resolveModel(config.evaluator.model);
|
|
90
|
+
// Build tool set (evaluator: bash, read_file, glob, grep — NO write/edit)
|
|
91
|
+
const toolSet = buildToolSet("evaluator", projectRoot);
|
|
92
|
+
const client = new Anthropic();
|
|
93
|
+
const handoffJson = serializeHandoff(handoff);
|
|
94
|
+
// Format programmatic results for context
|
|
95
|
+
const programmaticSummary = programmaticResults
|
|
96
|
+
.map((r) => {
|
|
97
|
+
const lines = [`[${r.passed ? "PASS" : "FAIL"}] ${r.evaluator}: ${r.summary}`];
|
|
98
|
+
if (!r.passed && r.feedback) {
|
|
99
|
+
lines.push(` Feedback: ${r.feedback}`);
|
|
100
|
+
}
|
|
101
|
+
for (const detail of r.details) {
|
|
102
|
+
if (!detail.passed) {
|
|
103
|
+
const loc = detail.file
|
|
104
|
+
? ` at ${detail.file}${detail.line !== undefined ? `:${detail.line}` : ""}`
|
|
105
|
+
: "";
|
|
106
|
+
lines.push(` [${detail.severity.toUpperCase()}] ${detail.message}${loc}`);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
return lines.join("\n");
|
|
110
|
+
})
|
|
111
|
+
.join("\n\n");
|
|
112
|
+
const contract = handoff.currentContract;
|
|
113
|
+
const criteriaList = contract?.successCriteria
|
|
114
|
+
?.map((c, i) => `${i + 1}. [${c.id}] ${c.description} (verification: ${c.verificationMethod})`)
|
|
115
|
+
.join("\n") ?? "No criteria found.";
|
|
116
|
+
const userMessage = `# Context Handoff
|
|
126
117
|
${handoffJson}
|
|
127
118
|
|
|
128
|
-
#
|
|
119
|
+
# Project Root
|
|
120
|
+
${projectRoot}
|
|
121
|
+
|
|
122
|
+
# Automated Check Results (already completed)
|
|
129
123
|
${programmaticSummary}
|
|
130
124
|
|
|
131
|
-
|
|
125
|
+
# Success Criteria to Verify
|
|
126
|
+
${criteriaList}
|
|
132
127
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
128
|
+
# Your Task
|
|
129
|
+
Evaluate whether the sprint contract criteria have been met. Use your tools to:
|
|
130
|
+
1. Read the relevant source files to verify implementation
|
|
131
|
+
2. Run the dev server and test the application if applicable
|
|
132
|
+
3. Take Playwright screenshots if applicable: \`npx playwright screenshot http://localhost:3000 /tmp/bober-eval.png --full-page\`
|
|
133
|
+
4. Run any additional verification commands
|
|
134
|
+
5. Check for regressions
|
|
135
|
+
|
|
136
|
+
Be skeptical. Verify independently. Do not trust the generator's self-report alone.
|
|
137
|
+
|
|
138
|
+
Your final response must contain ONLY a JSON object matching this schema (no markdown fences):
|
|
139
|
+
{
|
|
140
|
+
"evaluator": "Agent Evaluation",
|
|
141
|
+
"passed": true/false,
|
|
142
|
+
"score": 0-100,
|
|
143
|
+
"details": [
|
|
144
|
+
{
|
|
145
|
+
"criterion": "criterion id or description",
|
|
146
|
+
"passed": true/false,
|
|
147
|
+
"message": "explanation with evidence",
|
|
148
|
+
"severity": "error" | "warning" | "info",
|
|
149
|
+
"file": "file path if applicable",
|
|
150
|
+
"line": 123
|
|
151
|
+
}
|
|
152
|
+
],
|
|
153
|
+
"summary": "Overall assessment",
|
|
154
|
+
"feedback": "Actionable feedback for the generator if anything needs fixing",
|
|
155
|
+
"timestamp": "${timestamp}"
|
|
156
|
+
}`;
|
|
157
|
+
logger.info(`Calling evaluator model (${config.evaluator.model} → ${model})...`);
|
|
158
|
+
const result = await runAgenticLoop({
|
|
159
|
+
client,
|
|
136
160
|
model,
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
161
|
+
systemPrompt: agentDef.systemPrompt,
|
|
162
|
+
userMessage,
|
|
163
|
+
tools: toolSet.schemas,
|
|
164
|
+
toolHandlers: toolSet.handlers,
|
|
165
|
+
maxTurns: EVALUATOR_MAX_TURNS,
|
|
166
|
+
maxTokens: 16384,
|
|
167
|
+
onToolUse: (name, input) => {
|
|
168
|
+
const inp = input;
|
|
169
|
+
const inputStr = JSON.stringify(inp).slice(0, 120);
|
|
170
|
+
logger.debug(` [evaluator] ${name}(${inputStr})`);
|
|
171
|
+
},
|
|
145
172
|
});
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
if (block.type === "text") {
|
|
149
|
-
responseText += block.text;
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
return parseEvalResult(responseText, timestamp);
|
|
173
|
+
logger.debug(`Evaluator completed in ${result.turnsUsed} turns (${result.toolsCalled.length} tool calls)`);
|
|
174
|
+
return parseEvalResult(result.finalText, timestamp);
|
|
153
175
|
}
|
|
154
176
|
catch (err) {
|
|
155
177
|
logger.warn(`Agent evaluation failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
@@ -164,6 +186,7 @@ Output ONLY a JSON object matching the EvalResult schema. No markdown fences.`;
|
|
|
164
186
|
};
|
|
165
187
|
}
|
|
166
188
|
}
|
|
189
|
+
// ── JSON parser ────────────────────────────────────────────────────
|
|
167
190
|
/**
|
|
168
191
|
* Parse the evaluator agent's response into an EvalResult.
|
|
169
192
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"evaluator-agent.js","sourceRoot":"","sources":["../../src/orchestrator/evaluator-agent.ts"],"names":[],"mappings":"AAAA,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAI1C,OAAO,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AAExD,OAAO,EACL,qBAAqB,EACrB,aAAa,GACd,MAAM,2BAA2B,CAAC;AAEnC,OAAO,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAClD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"evaluator-agent.js","sourceRoot":"","sources":["../../src/orchestrator/evaluator-agent.ts"],"names":[],"mappings":"AAAA,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAI1C,OAAO,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AAExD,OAAO,EACL,qBAAqB,EACrB,aAAa,GACd,MAAM,2BAA2B,CAAC;AAEnC,OAAO,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAClD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AACxD,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAChD,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAInD,sEAAsE;AAEtE,MAAM,mBAAmB,GAAG,EAAE,CAAC;AAE/B,sEAAsE;AAEtE;;;;;;;;;;;GAWG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,OAAuB,EACvB,WAAmB,EACnB,MAAmB;IAEnB,MAAM,QAAQ,GAAG,OAAO,CAAC,eAAe,CAAC;IACzC,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;IACpE,CAAC;IAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,EAAE,CAAC;IAE7B,MAAM,CAAC,MAAM,CAAC,QAAQ,EAAE,eAAe,QAAQ,CAAC,OAAO,EAAE,CAAC,CAAC;IAE3D,gEAAgE;IAChE,MAAM,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;IACnD,MAAM,QAAQ,GAAG,MAAM,qBAAqB,CAAC,MAAM,CAAC,CAAC;IAErD,IAAI,YAAsB,CAAC;IAC3B,IAAI,CAAC;QACH,YAAY;YACV,OAAO,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC;gBAC7B,CAAC,CAAC,OAAO,CAAC,YAAY;gBACtB,CAAC,CAAC,MAAM,eAAe,CAAC,WAAW,CAAC,CAAC;IAC3C,CAAC;IAAC,MAAM,CAAC;QACP,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;IACtC,CAAC;IAED,MAAM,gBAAgB,GAAG,MAAM,aAAa,CAC1C,QAAQ,EACR,WAAW,EACX,MAAM,EACN,QAAQ,EACR,YAAY,CACb,CAAC;IAEF,KAAK,MAAM,MAAM,IAAI,gBAAgB,CAAC,OAAO,EAAE,CAAC;QAC9C,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;QAC7C,MAAM,CAAC,KAAK,CAAC,MAAM,IAAI,KAAK,MAAM,CAAC,SAAS,KAAK,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;IACrE,CAAC;IAED,2EAA2E;IAC3E,MAAM,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;IAC3C,MAAM,WAAW,GAAG,MAAM,kBAAkB,CAC1C,OAAO,EACP,gBAAgB,CAAC,OAAO,EACxB,WAAW,EACX,MAAM,CACP,CAAC;IAEF,qBAAqB;IACrB,MAAM,UAAU,GAAG,CAAC,GAAG,gBAAgB,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;IAE9D,MAAM,aAAa,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,KAAK,SAAS,CAAC,CAAC;IACtE,MAAM,QAAQ,GACZ,aAAa,CAAC,MAAM,GAAG,CAAC;QACtB,CAAC,CAAC,IAAI,CAAC,KAAK,CACR,aAAa,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC;YACvD,aAAa,CAAC,MAAM,CACvB;QACH,CAAC,CAAC,CAAC,CAAC;IAER,MAAM,WAAW,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAC9D,MAAM,YAAY,GAAG;QACnB,wBAAwB,WAAW,IAAI,UAAU,CAAC,MAAM,oBAAoB;QAC5E,UAAU,QAAQ,MAAM;KACzB,CAAC;IAEF,MAAM,UAAU,GAAwB;QACtC,MAAM,EAAE,gBAAgB,CAAC,MAAM,IAAI,WAAW,CAAC,MAAM;QACrD,KAAK,EAAE,QAAQ;QACf,OAAO,EAAE,UAAU;QACnB,OAAO,EAAE,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC;QAChC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACpC,CAAC;IAEF,MAAM,WAAW,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC;IAC5D,MAAM,CAAC,MAAM,CAAC,QAAQ,EAAE,cAAc,WAAW,EAAE,CAAC,CAAC;IAErD,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,sEAAsE;AAEtE;;;;;;GAMG;AACH,KAAK,UAAU,kBAAkB,CAC/B,OAAuB,EACvB,mBAAiC,EACjC,WAAmB,EACnB,MAAmB;IAEnB,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAE3C,IAAI,CAAC;QACH,sDAAsD;QACtD,MAAM,QAAQ,GAAG,MAAM,mBAAmB,CAAC,iBAAiB,EAAE,WAAW,CAAC,CAAC;QAC3E,MAAM,KAAK,GAAG,YAAY,CAAC,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QAEnD,0EAA0E;QAC1E,MAAM,OAAO,GAAG,YAAY,CAAC,WAAW,EAAE,WAAW,CAAC,CAAC;QAEvD,MAAM,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;QAC/B,MAAM,WAAW,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;QAE9C,0CAA0C;QAC1C,MAAM,mBAAmB,GAAG,mBAAmB;aAC5C,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;YACT,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;YAC/E,IAAI,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC;gBAC5B,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC;YAC1C,CAAC;YACD,KAAK,MAAM,MAAM,IAAI,CAAC,CAAC,OAAO,EAAE,CAAC;gBAC/B,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;oBACnB,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI;wBACrB,CAAC,CAAC,OAAO,MAAM,CAAC,IAAI,GAAG,MAAM,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE;wBAC3E,CAAC,CAAC,EAAE,CAAC;oBACP,KAAK,CAAC,IAAI,CACR,MAAM,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,KAAK,MAAM,CAAC,OAAO,GAAG,GAAG,EAAE,CAC/D,CAAC;gBACJ,CAAC;YACH,CAAC;YACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1B,CAAC,CAAC;aACD,IAAI,CAAC,MAAM,CAAC,CAAC;QAEhB,MAAM,QAAQ,GAAG,OAAO,CAAC,eAAe,CAAC;QACzC,MAAM,YAAY,GAAG,QAAQ,EAAE,eAAe;YAC5C,EAAE,GAAG,CACH,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACP,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,WAAW,mBAAmB,CAAC,CAAC,kBAAkB,GAAG,CACjF;aACA,IAAI,CAAC,IAAI,CAAC,IAAI,oBAAoB,CAAC;QAEtC,MAAM,WAAW,GAAG;EACtB,WAAW;;;EAGX,WAAW;;;EAGX,mBAAmB;;;EAGnB,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kBA6BI,SAAS;EACzB,CAAC;QAEC,MAAM,CAAC,IAAI,CACT,4BAA4B,MAAM,CAAC,SAAS,CAAC,KAAK,MAAM,KAAK,MAAM,CACpE,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC;YAClC,MAAM;YACN,KAAK;YACL,YAAY,EAAE,QAAQ,CAAC,YAAY;YACnC,WAAW;YACX,KAAK,EAAE,OAAO,CAAC,OAAO;YACtB,YAAY,EAAE,OAAO,CAAC,QAAQ;YAC9B,QAAQ,EAAE,mBAAmB;YAC7B,SAAS,EAAE,KAAK;YAChB,SAAS,EAAE,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;gBACzB,MAAM,GAAG,GAAG,KAAgC,CAAC;gBAC7C,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;gBACnD,MAAM,CAAC,KAAK,CAAC,iBAAiB,IAAI,IAAI,QAAQ,GAAG,CAAC,CAAC;YACrD,CAAC;SACF,CAAC,CAAC;QAEH,MAAM,CAAC,KAAK,CACV,0BAA0B,MAAM,CAAC,SAAS,WAAW,MAAM,CAAC,WAAW,CAAC,MAAM,cAAc,CAC7F,CAAC;QAEF,OAAO,eAAe,CAAC,MAAM,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;IACtD,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,CAAC,IAAI,CACT,4BAA4B,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC/E,CAAC;QAEF,OAAO;YACL,SAAS,EAAE,kBAAkB;YAC7B,MAAM,EAAE,IAAI,EAAE,oCAAoC;YAClD,KAAK,EAAE,SAAS;YAChB,OAAO,EAAE,EAAE;YACX,OAAO,EAAE,0CAA0C;YACnD,QAAQ,EAAE,2BAA2B,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;YACvF,SAAS;SACV,CAAC;IACJ,CAAC;AACH,CAAC;AAED,sEAAsE;AAEtE;;GAEG;AACH,SAAS,eAAe,CAAC,IAAY,EAAE,iBAAyB;IAC9D,IAAI,MAAe,CAAC;IAEpB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;IACnC,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,UAAU,GAAG,uCAAuC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACtE,IAAI,UAAU,EAAE,CAAC;YACf,IAAI,CAAC;gBACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YAC5C,CAAC;YAAC,MAAM,CAAC;gBACP,eAAe;YACjB,CAAC;QACH,CAAC;QAED,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;YACvC,IAAI,UAAU,KAAK,CAAC,CAAC,IAAI,QAAQ,GAAG,UAAU,EAAE,CAAC;gBAC/C,IAAI,CAAC;oBACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC;gBAC5D,CAAC;gBAAC,MAAM,CAAC;oBACP,eAAe;gBACjB,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;QAC5D,MAAM,GAAG,GAAG,MAAiC,CAAC;QAE9C,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC;YACxC,CAAC,CAAE,GAAG,CAAC,OAAqB;iBACvB,MAAM,CACL,CAAC,CAAC,EAAgC,EAAE,CAClC,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,KAAK,IAAI,CACtC;iBACA,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACX,SAAS,EAAE,MAAM,CAAC,CAAC,CAAC,SAAS,IAAI,SAAS,CAAC;gBAC3C,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC;gBACzB,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,IAAI,EAAE,CAAC;gBAChC,QAAQ,EAAE,CAAC,CAAC,OAAO,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC,QAAQ,CAC9C,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CACnB;oBACC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;oBACpB,CAAC,CAAC,MAAM,CAAiC;gBAC3C,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBACvD,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aACxD,CAAC,CAAC;YACP,CAAC,CAAC,EAAE,CAAC;QAEP,OAAO;YACL,SAAS,EAAE,MAAM,CAAC,GAAG,CAAC,SAAS,IAAI,kBAAkB,CAAC;YACtD,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC;YAC3B,KAAK,EAAE,OAAO,GAAG,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;YAC5D,OAAO;YACP,OAAO,EAAE,MAAM,CAAC,GAAG,CAAC,OAAO,IAAI,sBAAsB,CAAC;YACtD,QAAQ,EAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,IAAI,uBAAuB,CAAC;YACzD,SAAS,EACP,OAAO,GAAG,CAAC,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,iBAAiB;SACxE,CAAC;IACJ,CAAC;IAED,OAAO;QACL,SAAS,EAAE,kBAAkB;QAC7B,MAAM,EAAE,KAAK;QACb,OAAO,EAAE,EAAE;QACX,OAAO,EAAE,4CAA4C;QACrD,QAAQ,EAAE,kBAAkB,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;QAChD,SAAS,EAAE,iBAAiB;KAC7B,CAAC;AACJ,CAAC"}
|
|
@@ -5,12 +5,24 @@ export interface GeneratorResult {
|
|
|
5
5
|
notes: string;
|
|
6
6
|
filesChanged: string[];
|
|
7
7
|
commitHash?: string;
|
|
8
|
+
/** Number of agentic loop turns used. */
|
|
9
|
+
turnsUsed?: number;
|
|
10
|
+
/** Tools called during generation. */
|
|
11
|
+
toolsCalled?: string[];
|
|
12
|
+
/** Token usage. */
|
|
13
|
+
usage?: {
|
|
14
|
+
inputTokens: number;
|
|
15
|
+
outputTokens: number;
|
|
16
|
+
};
|
|
8
17
|
}
|
|
9
18
|
/**
|
|
10
19
|
* Run the generator agent to implement changes for a sprint.
|
|
11
20
|
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
21
|
+
* Uses a multi-turn agentic loop with full tool access (bash, read/write/edit
|
|
22
|
+
* files, glob, grep). The agent actually reads the codebase, writes code,
|
|
23
|
+
* runs tests, and commits — all via tools.
|
|
24
|
+
*
|
|
25
|
+
* The system prompt is loaded from `agents/bober-generator.md`.
|
|
14
26
|
*/
|
|
15
27
|
export declare function runGenerator(handoff: ContextHandoff, projectRoot: string, config: BoberConfig): Promise<GeneratorResult>;
|
|
16
28
|
//# sourceMappingURL=generator-agent.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generator-agent.d.ts","sourceRoot":"","sources":["../../src/orchestrator/generator-agent.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AACvD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;
|
|
1
|
+
{"version":3,"file":"generator-agent.d.ts","sourceRoot":"","sources":["../../src/orchestrator/generator-agent.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AACvD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAU3D,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,yCAAyC;IACzC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,sCAAsC;IACtC,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,mBAAmB;IACnB,KAAK,CAAC,EAAE;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC;CACvD;AAID;;;;;;;;GAQG;AACH,wBAAsB,YAAY,CAChC,OAAO,EAAE,cAAc,EACvB,WAAW,EAAE,MAAM,EACnB,MAAM,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,CAAC,CAyE1B"}
|