karajan-code 2.2.0 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agents/model-registry.js +83 -33
- package/src/hu/auto-generator.js +48 -38
- package/src/orchestrator/brain-coordinator.js +21 -0
- package/src/orchestrator/post-loop-stages.js +16 -1
- package/src/orchestrator/solomon-escalation.js +1 -1
- package/src/orchestrator/stages/coder-stage.js +28 -6
- package/src/orchestrator/stages/reviewer-stage.js +23 -6
- package/src/orchestrator/stages/sonar-stage.js +35 -7
- package/src/orchestrator.js +18 -9
- package/src/utils/budget.js +1 -0
- package/src/utils/pricing.js +13 -2
package/package.json
CHANGED
|
@@ -1,64 +1,114 @@
|
|
|
1
1
|
const modelRegistry = new Map();
|
|
2
2
|
|
|
3
3
|
export function registerModel(name, { provider, pricing, deprecated } = {}) {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
4
|
+
if (!name || typeof name !== "string") {
|
|
5
|
+
throw new Error("Model name must be a non-empty string");
|
|
6
|
+
}
|
|
7
|
+
if (!pricing || typeof pricing.input_per_million !== "number" || typeof pricing.output_per_million !== "number") {
|
|
8
|
+
throw new Error(`Model "${name}" requires pricing with input_per_million and output_per_million`);
|
|
9
|
+
}
|
|
10
|
+
modelRegistry.set(name, {
|
|
11
|
+
name,
|
|
12
|
+
provider: provider || name.split("/")[0],
|
|
13
|
+
pricing: { input_per_million: pricing.input_per_million, output_per_million: pricing.output_per_million },
|
|
14
|
+
deprecated: deprecated || null,
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function registerModelAlias(alias, target, { provider } = {}) {
|
|
19
|
+
const entry = modelRegistry.get(target);
|
|
20
|
+
if (!entry) {
|
|
21
|
+
throw new Error(`Target model "${target}" for alias "${alias}" not found`);
|
|
22
|
+
}
|
|
23
|
+
modelRegistry.set(alias, {
|
|
24
|
+
...entry,
|
|
25
|
+
name: alias,
|
|
26
|
+
provider: provider || entry.provider
|
|
27
|
+
});
|
|
15
28
|
}
|
|
16
29
|
|
|
17
30
|
export function getModelPricing(name) {
|
|
18
|
-
|
|
19
|
-
|
|
31
|
+
const entry = modelRegistry.get(name);
|
|
32
|
+
return entry ? { ...entry.pricing } : null;
|
|
20
33
|
}
|
|
21
34
|
|
|
22
35
|
export function isModelDeprecated(name) {
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
36
|
+
const entry = modelRegistry.get(name);
|
|
37
|
+
if (!entry?.deprecated) return false;
|
|
38
|
+
return new Date(entry.deprecated) <= new Date();
|
|
26
39
|
}
|
|
27
40
|
|
|
28
41
|
export function getModelInfo(name) {
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
42
|
+
const entry = modelRegistry.get(name);
|
|
43
|
+
if (!entry) return null;
|
|
44
|
+
return { name: entry.name, provider: entry.provider, pricing: { ...entry.pricing }, deprecated: entry.deprecated };
|
|
32
45
|
}
|
|
33
46
|
|
|
34
47
|
export function getRegisteredModels() {
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
48
|
+
return [...modelRegistry.entries()].map(([name, entry]) => ({
|
|
49
|
+
name: entry.name,
|
|
50
|
+
provider: entry.provider,
|
|
51
|
+
pricing: { ...entry.pricing },
|
|
52
|
+
deprecated: entry.deprecated,
|
|
53
|
+
}));
|
|
41
54
|
}
|
|
42
55
|
|
|
43
56
|
export function buildDefaultPricingTable() {
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
57
|
+
const table = {};
|
|
58
|
+
for (const [name, entry] of modelRegistry) {
|
|
59
|
+
table[name] = { ...entry.pricing };
|
|
60
|
+
}
|
|
61
|
+
return table;
|
|
49
62
|
}
|
|
50
63
|
|
|
51
|
-
|
|
52
|
-
|
|
64
|
+
/**
|
|
65
|
+
* Anthropic Claude Family
|
|
66
|
+
* Pricing & Info: https://platform.claude.com/docs/en/about-claude/pricing
|
|
67
|
+
*/
|
|
68
|
+
registerModel("claude-opus-4.6", { provider: "anthropic", pricing: { input_per_million: 5.0, output_per_million: 25.0 } });
|
|
69
|
+
registerModel("claude-sonnet-4.6", { provider: "anthropic", pricing: { input_per_million: 3.0, output_per_million: 15.0 } });
|
|
70
|
+
registerModel("claude-haiku-4.5", { provider: "anthropic", pricing: { input_per_million: 1.0, output_per_million: 5.0 } });
|
|
71
|
+
|
|
72
|
+
// Default models & General aliases
|
|
53
73
|
registerModel("claude", { provider: "anthropic", pricing: { input_per_million: 3, output_per_million: 15 } });
|
|
54
74
|
registerModel("sonnet", { provider: "anthropic", pricing: { input_per_million: 3, output_per_million: 15 } });
|
|
55
75
|
registerModel("opus", { provider: "anthropic", pricing: { input_per_million: 15, output_per_million: 75 } });
|
|
56
76
|
registerModel("haiku", { provider: "anthropic", pricing: { input_per_million: 0.25, output_per_million: 1.25 } });
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* OpenAI GPT Family
|
|
80
|
+
* Pricing: https://developers.openai.com/api/docs/pricing
|
|
81
|
+
*/
|
|
82
|
+
registerModel("gpt-5.4-standard", { provider: "openai", pricing: { input_per_million: 2.5, output_per_million: 15.0 } });
|
|
83
|
+
registerModel("gpt-5.4-thinking", { provider: "openai", pricing: { input_per_million: 2.5, output_per_million: 15.0 } });
|
|
84
|
+
registerModel("gpt-5.4-pro", { provider: "openai", pricing: { input_per_million: 30.0, output_per_million: 180.0 } });
|
|
85
|
+
registerModel("gpt-5.4-mini", { provider: "openai", pricing: { input_per_million: 0.75, output_per_million: 4.5 } });
|
|
86
|
+
|
|
87
|
+
// Default models & General aliases
|
|
57
88
|
registerModel("codex", { provider: "openai", pricing: { input_per_million: 1.5, output_per_million: 4 } });
|
|
58
89
|
registerModel("o4-mini", { provider: "openai", pricing: { input_per_million: 1.5, output_per_million: 4 } });
|
|
59
90
|
registerModel("o3", { provider: "openai", pricing: { input_per_million: 10, output_per_million: 40 } });
|
|
60
|
-
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Google Gemini Family
|
|
94
|
+
* Pricing: https://ai.google.dev/gemini-api/docs/pricing
|
|
95
|
+
*/
|
|
96
|
+
registerModel("gemini-3.1-pro-preview", { provider: "google", pricing: { input_per_million: 2.0, output_per_million: 12.0 } });
|
|
97
|
+
registerModel("gemini-3.1-flash-lite", { provider: "google", pricing: { input_per_million: 0.25, output_per_million: 1.5 } });
|
|
98
|
+
registerModel("gemini-3-flash-preview", { provider: "google", pricing: { input_per_million: 0.5, output_per_million: 3.0 } });
|
|
61
99
|
registerModel("gemini-2.5-pro", { provider: "google", pricing: { input_per_million: 1.25, output_per_million: 5 } });
|
|
62
100
|
registerModel("gemini-2.0-flash", { provider: "google", pricing: { input_per_million: 0.075, output_per_million: 0.3 } });
|
|
101
|
+
|
|
102
|
+
// Default models & General aliases
|
|
103
|
+
registerModelAlias("gemini", "gemini-2.5-pro");
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Other Providers & CLI Aliases
|
|
107
|
+
*/
|
|
63
108
|
registerModel("aider", { provider: "aider", pricing: { input_per_million: 3, output_per_million: 15 } });
|
|
64
109
|
registerModel("opencode", { provider: "opencode", pricing: { input_per_million: 0, output_per_million: 0 } });
|
|
110
|
+
|
|
111
|
+
// Common CLI Aliases (with provider overrides)
|
|
112
|
+
registerModelAlias("aider/claude-3-7-sonnet", "claude-sonnet-4.6", { provider: "aider" });
|
|
113
|
+
registerModelAlias("aider/gpt-4o", "gpt-5.4-standard", { provider: "aider" });
|
|
114
|
+
registerModelAlias("opencode/minimax-m2.5", "opencode", { provider: "opencode" });
|
package/src/hu/auto-generator.js
CHANGED
|
@@ -16,9 +16,11 @@ export function deriveProjectName(originalTask) {
|
|
|
16
16
|
if (!originalTask || typeof originalTask !== "string") return "Untitled Project";
|
|
17
17
|
const STOPWORDS = new Set([
|
|
18
18
|
"a", "an", "the", "and", "or", "with", "for", "to", "of", "in", "on",
|
|
19
|
+
"is", "it", "its", "this", "that", "these", "those", "be", "been", "being",
|
|
19
20
|
"build", "create", "implement", "make", "develop", "add", "set", "up",
|
|
20
|
-
"setup", "write", "code", "new", "complete", "
|
|
21
|
-
"application", "app", "tool", "system", "project", "using", "use"
|
|
21
|
+
"setup", "write", "code", "new", "complete", "from", "scratch",
|
|
22
|
+
"application", "app", "tool", "system", "project", "using", "use",
|
|
23
|
+
"full", "full-stack", "fullstack", "stack", "based", "simple", "basic"
|
|
22
24
|
]);
|
|
23
25
|
const words = originalTask
|
|
24
26
|
.toLowerCase()
|
|
@@ -65,54 +67,63 @@ export function needsSetupHu({ isNewProject = false, stackHints = [], subtasks =
|
|
|
65
67
|
}
|
|
66
68
|
|
|
67
69
|
/**
|
|
68
|
-
* Build
|
|
70
|
+
* Build a MINIMAL setup HU — project structure + deps only.
|
|
71
|
+
* NEVER includes the full original task. The coder must only do setup.
|
|
69
72
|
*/
|
|
70
|
-
function buildSetupHu({ stackHints
|
|
71
|
-
const
|
|
73
|
+
function buildSetupHu({ stackHints }) {
|
|
74
|
+
const deps = stackHints.length > 0
|
|
72
75
|
? stackHints.map(h => `- ${h}`).join("\n")
|
|
73
|
-
: "-
|
|
76
|
+
: "- (auto-detect from subsequent HUs)";
|
|
74
77
|
const certifiedText = [
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
78
|
+
"**Setup: initialize project structure and install dependencies.**",
|
|
79
|
+
"",
|
|
80
|
+
"SCOPE (do ONLY this, nothing else):",
|
|
81
|
+
"- Create package.json (with workspaces if monorepo detected from stack hints)",
|
|
82
|
+
"- Install all runtime + dev dependencies listed in stack hints",
|
|
83
|
+
"- Configure test framework so `npm test` runs (even with 0 tests)",
|
|
84
|
+
"- Create .env.example with placeholder variables",
|
|
85
|
+
"- Verify: `npm install` succeeds, `npm test` runs without error",
|
|
86
|
+
"",
|
|
87
|
+
"DO NOT implement any business logic, API routes, components, or features.",
|
|
88
|
+
"DO NOT add security middleware, auth, or any application code.",
|
|
89
|
+
"This HU is ONLY project scaffolding.",
|
|
90
|
+
"",
|
|
91
|
+
"Stack hints:",
|
|
92
|
+
deps
|
|
88
93
|
].join("\n");
|
|
89
94
|
return {
|
|
90
95
|
id: "HU-01",
|
|
91
|
-
title: "Setup project
|
|
96
|
+
title: "Setup: project structure + dependencies",
|
|
92
97
|
task_type: "infra",
|
|
93
98
|
status: "certified",
|
|
94
99
|
blocked_by: [],
|
|
95
100
|
certified: { text: certifiedText },
|
|
96
101
|
acceptance_criteria: [
|
|
97
|
-
"
|
|
98
|
-
"
|
|
99
|
-
"
|
|
100
|
-
"
|
|
102
|
+
"npm install succeeds without errors",
|
|
103
|
+
"npm test runs (even with 0 tests)",
|
|
104
|
+
".env.example exists",
|
|
105
|
+
"No business logic or application code added"
|
|
101
106
|
]
|
|
102
107
|
};
|
|
103
108
|
}
|
|
104
109
|
|
|
105
110
|
/**
|
|
106
|
-
* Build a task HU
|
|
111
|
+
* Build a MINIMAL task HU — one specific, focused piece of work.
|
|
112
|
+
* Includes a short goal reference (max 80 chars) NOT the full task.
|
|
107
113
|
*/
|
|
108
|
-
function buildTaskHu({ id, subtask,
|
|
114
|
+
function buildTaskHu({ id, subtask, projectName, blockedBy }) {
|
|
109
115
|
const taskType = classifyTaskType(subtask);
|
|
110
116
|
const certifiedText = [
|
|
111
117
|
`**${subtask}**`,
|
|
112
|
-
|
|
113
|
-
`
|
|
114
|
-
|
|
115
|
-
|
|
118
|
+
"",
|
|
119
|
+
`Project: ${projectName}`,
|
|
120
|
+
"",
|
|
121
|
+
"SCOPE (do ONLY this, nothing else):",
|
|
122
|
+
`- Implement: ${subtask}`,
|
|
123
|
+
"- Add unit tests for the new code",
|
|
124
|
+
"- Do NOT touch code outside this subtask's scope",
|
|
125
|
+
"- Do NOT refactor or 'improve' unrelated files",
|
|
126
|
+
"- Target: <200 lines changed (like an atomic PR)"
|
|
116
127
|
].join("\n");
|
|
117
128
|
return {
|
|
118
129
|
id,
|
|
@@ -122,9 +133,9 @@ function buildTaskHu({ id, subtask, originalTask, blockedBy }) {
|
|
|
122
133
|
blocked_by: blockedBy,
|
|
123
134
|
certified: { text: certifiedText },
|
|
124
135
|
acceptance_criteria: [
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
136
|
+
`${subtask} is implemented and working`,
|
|
137
|
+
"Unit tests cover the new code",
|
|
138
|
+
"No changes to files outside this subtask's scope"
|
|
128
139
|
]
|
|
129
140
|
};
|
|
130
141
|
}
|
|
@@ -160,23 +171,22 @@ export function generateHuBatch({
|
|
|
160
171
|
const needsSetup = needsSetupHu({ isNewProject, stackHints, subtasks });
|
|
161
172
|
let nextId = 1;
|
|
162
173
|
|
|
174
|
+
const projectName = deriveProjectName(originalTask);
|
|
175
|
+
|
|
163
176
|
if (needsSetup) {
|
|
164
|
-
stories.push(buildSetupHu({ stackHints
|
|
177
|
+
stories.push(buildSetupHu({ stackHints }));
|
|
165
178
|
nextId = 2;
|
|
166
179
|
}
|
|
167
180
|
|
|
168
181
|
// Task HUs: linear dependency chain after setup (conservative default).
|
|
169
|
-
// Architect context could later inform parallel-safe groupings.
|
|
170
182
|
const setupId = needsSetup ? "HU-01" : null;
|
|
171
183
|
let previousId = setupId;
|
|
172
184
|
for (const subtask of subtasks) {
|
|
173
185
|
const id = `HU-${String(nextId).padStart(2, "0")}`;
|
|
174
186
|
const blockedBy = [];
|
|
175
187
|
if (setupId) blockedBy.push(setupId);
|
|
176
|
-
// Conservative: also depend on previous task HU to enforce linear execution.
|
|
177
|
-
// Later phases can relax this with architect-informed graph.
|
|
178
188
|
if (previousId && previousId !== setupId) blockedBy.push(previousId);
|
|
179
|
-
stories.push(buildTaskHu({ id, subtask,
|
|
189
|
+
stories.push(buildTaskHu({ id, subtask, projectName, blockedBy }));
|
|
180
190
|
previousId = id;
|
|
181
191
|
nextId += 1;
|
|
182
192
|
}
|
|
@@ -23,6 +23,7 @@ export function createBrainContext({ enabled = false } = {}) {
|
|
|
23
23
|
feedbackQueue: queue.createQueue(),
|
|
24
24
|
verificationTracker: new VerificationTracker(),
|
|
25
25
|
compressionStats: { totalSaved: 0, perRole: {} },
|
|
26
|
+
extensionCount: 0,
|
|
26
27
|
enabled
|
|
27
28
|
};
|
|
28
29
|
}
|
|
@@ -99,6 +100,16 @@ function extractFeedbackEntries(roleName, output, iteration) {
|
|
|
99
100
|
iteration
|
|
100
101
|
});
|
|
101
102
|
}
|
|
103
|
+
// Catch-all: tester failed but no structured detail → still record the failure
|
|
104
|
+
if (entries.length === 0) {
|
|
105
|
+
entries.push({
|
|
106
|
+
source: "tester",
|
|
107
|
+
severity: "high",
|
|
108
|
+
category: "tests",
|
|
109
|
+
description: output.summary || "Tester failed (no structured detail available)",
|
|
110
|
+
iteration
|
|
111
|
+
});
|
|
112
|
+
}
|
|
102
113
|
} else if (roleName === "security" && output.verdict === "fail") {
|
|
103
114
|
for (const vuln of output.vulnerabilities || []) {
|
|
104
115
|
entries.push({
|
|
@@ -112,6 +123,16 @@ function extractFeedbackEntries(roleName, output, iteration) {
|
|
|
112
123
|
iteration
|
|
113
124
|
});
|
|
114
125
|
}
|
|
126
|
+
// Catch-all: security failed but no structured vulnerabilities → still record
|
|
127
|
+
if (entries.length === 0) {
|
|
128
|
+
entries.push({
|
|
129
|
+
source: "security",
|
|
130
|
+
severity: "high",
|
|
131
|
+
category: "security",
|
|
132
|
+
description: output.summary || "Security gate failed (no structured detail available)",
|
|
133
|
+
iteration
|
|
134
|
+
});
|
|
135
|
+
}
|
|
115
136
|
}
|
|
116
137
|
return entries;
|
|
117
138
|
}
|
|
@@ -158,7 +158,7 @@ export async function runTesterStage({ config, logger, emitter, eventBase, sessi
|
|
|
158
158
|
return { action: "ok", stageResult: { ok: true, summary: testerOutput.summary || "All tests passed" } };
|
|
159
159
|
}
|
|
160
160
|
|
|
161
|
-
export async function runSecurityStage({ config, logger, emitter, eventBase, session, coderRole, trackBudget, iteration, task, diff, askQuestion }) {
|
|
161
|
+
export async function runSecurityStage({ config, logger, emitter, eventBase, session, coderRole, trackBudget, iteration, task, diff, askQuestion, brainCtx }) {
|
|
162
162
|
logger.setContext({ iteration, stage: "security" });
|
|
163
163
|
emitProgress(
|
|
164
164
|
emitter,
|
|
@@ -209,6 +209,21 @@ export async function runSecurityStage({ config, logger, emitter, eventBase, ses
|
|
|
209
209
|
const isCritical = criticalPatterns.some((p) => summary.includes(p));
|
|
210
210
|
|
|
211
211
|
if (isCritical) {
|
|
212
|
+
// Brain: when enabled, skip Solomon — Brain handles via feedback queue
|
|
213
|
+
if (brainCtx?.enabled) {
|
|
214
|
+
logger.warn(`Brain: critical security finding — Brain will handle (Solomon bypassed): ${securityOutput.summary}`);
|
|
215
|
+
const { processRoleOutput } = await import("./brain-coordinator.js");
|
|
216
|
+
processRoleOutput(brainCtx, { roleName: "security", output: { verdict: "fail", summary: securityOutput.summary, critical: true }, iteration });
|
|
217
|
+
emitProgress(emitter, makeEvent("brain:security-critical", { ...eventBase, stage: "security" }, {
|
|
218
|
+
message: `Critical security finding — Brain handling: ${securityOutput.summary.slice(0, 200)}`,
|
|
219
|
+
detail: { summary: securityOutput.summary }
|
|
220
|
+
}));
|
|
221
|
+
return {
|
|
222
|
+
action: "continue",
|
|
223
|
+
stageResult: { ...securityOutput.result, summary: securityOutput.summary, provider: securityProvider }
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
|
|
212
227
|
// Critical security issue — escalate to Solomon/human
|
|
213
228
|
logger.warn(`Critical security finding — escalating: ${securityOutput.summary}`);
|
|
214
229
|
const solomonResult = await invokeSolomon({
|
|
@@ -103,7 +103,7 @@ export async function invokeSolomon({ config, logger, emitter, eventBase, stage,
|
|
|
103
103
|
: null;
|
|
104
104
|
|
|
105
105
|
const solomon = new SolomonRole({ config, logger, emitter });
|
|
106
|
-
await solomon.init({ task: conflict.task
|
|
106
|
+
await solomon.init({ task: conflict.task, iteration });
|
|
107
107
|
let ruling;
|
|
108
108
|
try {
|
|
109
109
|
// Inject Solomon's own history so it doesn't repeat failed strategies
|
|
@@ -249,18 +249,28 @@ function handleSolomonAction(solomonResult, session, contextPrefix) {
|
|
|
249
249
|
return null;
|
|
250
250
|
}
|
|
251
251
|
|
|
252
|
-
async function handleSolomonContinue(solomonResult, session, counterField) {
|
|
252
|
+
async function handleSolomonContinue(solomonResult, session, counterField, brainCtx) {
|
|
253
253
|
if (solomonResult.action !== "continue") return false;
|
|
254
254
|
if (solomonResult.humanGuidance) {
|
|
255
255
|
session.last_reviewer_feedback += `\nUser guidance: ${solomonResult.humanGuidance}`;
|
|
256
|
+
// Brain: also push user guidance into feedback queue when enabled
|
|
257
|
+
if (brainCtx?.enabled) {
|
|
258
|
+
const { processRoleOutput } = await import("../brain-coordinator.js");
|
|
259
|
+
processRoleOutput(brainCtx, { roleName: "solomon", output: { verdict: "continue", summary: solomonResult.humanGuidance }, iteration: 0 });
|
|
260
|
+
}
|
|
256
261
|
}
|
|
257
262
|
session[counterField] = 0;
|
|
258
263
|
await saveSession(session);
|
|
259
264
|
return true;
|
|
260
265
|
}
|
|
261
266
|
|
|
262
|
-
async function handleTddFailure({ tddEval, config, logger, emitter, eventBase, session, iteration, askQuestion }) {
|
|
267
|
+
async function handleTddFailure({ tddEval, config, logger, emitter, eventBase, session, iteration, askQuestion, task, brainCtx }) {
|
|
263
268
|
session.last_reviewer_feedback = tddEval.message;
|
|
269
|
+
// Brain: push TDD failure into feedback queue when enabled
|
|
270
|
+
if (brainCtx?.enabled) {
|
|
271
|
+
const { processRoleOutput } = await import("../brain-coordinator.js");
|
|
272
|
+
processRoleOutput(brainCtx, { roleName: "tdd", output: { verdict: "fail", summary: tddEval.message }, iteration });
|
|
273
|
+
}
|
|
264
274
|
session.repeated_issue_count += 1;
|
|
265
275
|
await saveSession(session);
|
|
266
276
|
|
|
@@ -268,6 +278,18 @@ async function handleTddFailure({ tddEval, config, logger, emitter, eventBase, s
|
|
|
268
278
|
return { action: "continue" };
|
|
269
279
|
}
|
|
270
280
|
|
|
281
|
+
// Brain: when enabled, skip Solomon — Brain handles via max_iterations
|
|
282
|
+
if (brainCtx?.enabled) {
|
|
283
|
+
logger.info("Brain: TDD sub-loop limit reached — Brain will handle via max_iterations (Solomon bypassed)");
|
|
284
|
+
emitProgress(emitter, makeEvent("brain:tdd-retry-limit", { ...eventBase, stage: "tdd" }, {
|
|
285
|
+
message: `TDD sub-loop limit reached (${session.repeated_issue_count}/${config.session.fail_fast_repeats}) — Brain handling`,
|
|
286
|
+
detail: { subloop: "tdd", retryCount: session.repeated_issue_count, reason: tddEval.reason }
|
|
287
|
+
}));
|
|
288
|
+
session.repeated_issue_count = 0;
|
|
289
|
+
await saveSession(session);
|
|
290
|
+
return { action: "continue" };
|
|
291
|
+
}
|
|
292
|
+
|
|
271
293
|
emitProgress(
|
|
272
294
|
emitter,
|
|
273
295
|
makeEvent("solomon:escalate", { ...eventBase, stage: "tdd" }, {
|
|
@@ -280,7 +302,7 @@ async function handleTddFailure({ tddEval, config, logger, emitter, eventBase, s
|
|
|
280
302
|
config, logger, emitter, eventBase, stage: "tdd", askQuestion, session, iteration,
|
|
281
303
|
conflict: {
|
|
282
304
|
stage: "tdd",
|
|
283
|
-
task: session.task,
|
|
305
|
+
task: task || session.task,
|
|
284
306
|
iterationCount: session.repeated_issue_count,
|
|
285
307
|
maxIterations: config.session.fail_fast_repeats,
|
|
286
308
|
reason: tddEval.reason,
|
|
@@ -292,13 +314,13 @@ async function handleTddFailure({ tddEval, config, logger, emitter, eventBase, s
|
|
|
292
314
|
|
|
293
315
|
const actionResult = handleSolomonAction(solomonResult, session, "tdd");
|
|
294
316
|
if (actionResult) return actionResult;
|
|
295
|
-
const continued = await handleSolomonContinue(solomonResult, session, "repeated_issue_count");
|
|
317
|
+
const continued = await handleSolomonContinue(solomonResult, session, "repeated_issue_count", brainCtx);
|
|
296
318
|
if (continued) return { action: "continue" };
|
|
297
319
|
|
|
298
320
|
return { action: "continue" };
|
|
299
321
|
}
|
|
300
322
|
|
|
301
|
-
export async function runTddCheckStage({ config, logger, emitter, eventBase, session, trackBudget, iteration, askQuestion }) {
|
|
323
|
+
export async function runTddCheckStage({ config, logger, emitter, eventBase, session, trackBudget, iteration, askQuestion, task, brainCtx }) {
|
|
302
324
|
logger.setContext({ iteration, stage: "tdd" });
|
|
303
325
|
let tddDiff, untrackedFiles;
|
|
304
326
|
try {
|
|
@@ -335,7 +357,7 @@ export async function runTddCheckStage({ config, logger, emitter, eventBase, ses
|
|
|
335
357
|
);
|
|
336
358
|
|
|
337
359
|
if (!tddEval.ok) {
|
|
338
|
-
return handleTddFailure({ tddEval, config, logger, emitter, eventBase, session, iteration, askQuestion });
|
|
360
|
+
return handleTddFailure({ tddEval, config, logger, emitter, eventBase, session, iteration, askQuestion, task, brainCtx });
|
|
339
361
|
}
|
|
340
362
|
|
|
341
363
|
return { action: "ok" };
|
|
@@ -36,7 +36,7 @@ function buildReviewHistory(session) {
|
|
|
36
36
|
.map(cp => ({ iteration: cp.iteration, note: cp.note || "" }));
|
|
37
37
|
}
|
|
38
38
|
|
|
39
|
-
async function handleReviewerStalledSolomon({ review, repeatCounts, repeatState, config, logger, emitter, eventBase, session, iteration, task, askQuestion, budgetSummary, repeatDetector }) {
|
|
39
|
+
async function handleReviewerStalledSolomon({ review, repeatCounts, repeatState, config, logger, emitter, eventBase, session, iteration, task, askQuestion, budgetSummary, repeatDetector, brainCtx }) {
|
|
40
40
|
// DETERMINISTIC GUARD: security issues NEVER go to Solomon — always return to coder
|
|
41
41
|
const categories = categorizeIssues(review.blocking_issues);
|
|
42
42
|
if (categories.security > 0) {
|
|
@@ -48,6 +48,22 @@ async function handleReviewerStalledSolomon({ review, repeatCounts, repeatState,
|
|
|
48
48
|
return { review, solomonApproved: false };
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
+
// Brain: when enabled, ALL paths go through Brain, not Solomon
|
|
52
|
+
if (brainCtx?.enabled) {
|
|
53
|
+
const logPrefix = repeatState.stalled
|
|
54
|
+
? `Reviewer stalled (${repeatCounts.reviewer} repeats)`
|
|
55
|
+
: `Reviewer rejected (first rejection)`;
|
|
56
|
+
logger.info(`Brain: ${logPrefix} — Brain will handle (Solomon bypassed)`);
|
|
57
|
+
emitProgress(emitter, makeEvent("brain:escalate", { ...eventBase, stage: "reviewer" }, {
|
|
58
|
+
message: `${logPrefix} — Brain handling`,
|
|
59
|
+
detail: { repeats: repeatCounts.reviewer || 1, reason: repeatState.reason || "first_rejection" }
|
|
60
|
+
}));
|
|
61
|
+
// Push reviewer feedback into Brain queue
|
|
62
|
+
const { processRoleOutput } = await import("../brain-coordinator.js");
|
|
63
|
+
processRoleOutput(brainCtx, { roleName: "reviewer", output: review, iteration });
|
|
64
|
+
return { review, solomonApproved: false };
|
|
65
|
+
}
|
|
66
|
+
|
|
51
67
|
const logPrefix = repeatState.stalled
|
|
52
68
|
? `Reviewer stalled (${repeatCounts.reviewer} repeats)`
|
|
53
69
|
: `Reviewer rejected (first rejection)`;
|
|
@@ -156,22 +172,23 @@ async function handleReviewerRejection({ review, repeatDetector, config, logger,
|
|
|
156
172
|
return handleReviewerStalledSolomon({
|
|
157
173
|
review, repeatCounts, repeatState, config, logger, emitter,
|
|
158
174
|
eventBase, session, iteration, task, askQuestion,
|
|
159
|
-
budgetSummary, repeatDetector
|
|
175
|
+
budgetSummary, repeatDetector, brainCtx
|
|
160
176
|
});
|
|
161
177
|
}
|
|
162
178
|
|
|
163
179
|
// Solomon evaluates EVERY rejection
|
|
164
180
|
const repeatCounts = repeatDetector.getRepeatCounts();
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
181
|
+
const evaluateEventName = brainCtx?.enabled ? "brain:evaluate" : "solomon:evaluate";
|
|
182
|
+
logger.info(`Reviewer rejected — ${brainCtx?.enabled ? "Brain" : "Solomon"} evaluating ${review.blocking_issues.length} blocking issue(s)`);
|
|
183
|
+
emitProgress(emitter, makeEvent(evaluateEventName, { ...eventBase, stage: brainCtx?.enabled ? "brain" : "solomon" }, {
|
|
184
|
+
message: `${brainCtx?.enabled ? "Brain" : "Solomon"} evaluating reviewer rejection`,
|
|
168
185
|
detail: { blockingCount: review.blocking_issues.length, isRepeat: repeatState.stalled }
|
|
169
186
|
}));
|
|
170
187
|
|
|
171
188
|
return handleReviewerStalledSolomon({
|
|
172
189
|
review, repeatCounts, repeatState, config, logger, emitter,
|
|
173
190
|
eventBase, session, iteration, task, askQuestion,
|
|
174
|
-
budgetSummary, repeatDetector
|
|
191
|
+
budgetSummary, repeatDetector, brainCtx
|
|
175
192
|
});
|
|
176
193
|
}
|
|
177
194
|
|
|
@@ -26,7 +26,19 @@ async function handleSonarStalled({ repeatDetector, logger, emitter, eventBase,
|
|
|
26
26
|
return { action: "stalled", result: { approved: false, sessionId: session.id, reason: "stalled" } };
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
-
async function handleSonarRetryLimit({ config, logger, emitter, eventBase, session, iteration, askQuestion, task, maxSonarRetries, sonarResult }) {
|
|
29
|
+
async function handleSonarRetryLimit({ config, logger, emitter, eventBase, session, iteration, askQuestion, task, maxSonarRetries, sonarResult, brainCtx }) {
|
|
30
|
+
// Brain: when enabled, skip Solomon — Brain handles via max_iterations
|
|
31
|
+
if (brainCtx?.enabled) {
|
|
32
|
+
logger.info("Brain: sonar retry limit reached — Brain will handle via max_iterations (Solomon bypassed)");
|
|
33
|
+
emitProgress(emitter, makeEvent("brain:sonar-retry-limit", { ...eventBase, stage: "sonar" }, {
|
|
34
|
+
message: `Sonar sub-loop limit reached (${session.sonar_retry_count}/${maxSonarRetries}) — Brain handling`,
|
|
35
|
+
detail: { subloop: "sonar", retryCount: session.sonar_retry_count, limit: maxSonarRetries, gateStatus: sonarResult.gateStatus }
|
|
36
|
+
}));
|
|
37
|
+
session.sonar_retry_count = 0;
|
|
38
|
+
await saveSession(session);
|
|
39
|
+
return { action: "continue" };
|
|
40
|
+
}
|
|
41
|
+
|
|
30
42
|
emitProgress(
|
|
31
43
|
emitter,
|
|
32
44
|
makeEvent("solomon:escalate", { ...eventBase, stage: "sonar" }, {
|
|
@@ -64,7 +76,7 @@ async function handleSonarRetryLimit({ config, logger, emitter, eventBase, sessi
|
|
|
64
76
|
return null;
|
|
65
77
|
}
|
|
66
78
|
|
|
67
|
-
async function handleSonarBlocking({ sonarResult, config, logger, emitter, eventBase, session, iteration, repeatDetector, budgetSummary, askQuestion, task }) {
|
|
79
|
+
async function handleSonarBlocking({ sonarResult, config, logger, emitter, eventBase, session, iteration, repeatDetector, budgetSummary, askQuestion, task, brainCtx }) {
|
|
68
80
|
// If the ONLY quality gate failure is coverage, treat as non-blocking warning
|
|
69
81
|
if (sonarResult.conditions) {
|
|
70
82
|
const failedConditions = sonarResult.conditions.filter(c => c.status === "ERROR");
|
|
@@ -88,20 +100,26 @@ async function handleSonarBlocking({ sonarResult, config, logger, emitter, event
|
|
|
88
100
|
return handleSonarStalled({ repeatDetector, logger, emitter, eventBase, session, budgetSummary });
|
|
89
101
|
}
|
|
90
102
|
|
|
91
|
-
|
|
103
|
+
const summary = `Sonar gate blocking (${sonarResult.gateStatus}). Resolve critical findings first.`;
|
|
104
|
+
session.last_reviewer_feedback = summary;
|
|
105
|
+
// Brain: push sonar feedback into queue when enabled
|
|
106
|
+
if (brainCtx?.enabled) {
|
|
107
|
+
const { processRoleOutput } = await import("../brain-coordinator.js");
|
|
108
|
+
processRoleOutput(brainCtx, { roleName: "sonar", output: { verdict: "fail", summary }, iteration });
|
|
109
|
+
}
|
|
92
110
|
session.sonar_retry_count = (session.sonar_retry_count || 0) + 1;
|
|
93
111
|
await saveSession(session);
|
|
94
112
|
const maxSonarRetries = config.session.max_sonar_retries ?? config.session.fail_fast_repeats;
|
|
95
113
|
|
|
96
114
|
if (session.sonar_retry_count >= maxSonarRetries) {
|
|
97
|
-
const result = await handleSonarRetryLimit({ config, logger, emitter, eventBase, session, iteration, askQuestion, task, maxSonarRetries, sonarResult });
|
|
115
|
+
const result = await handleSonarRetryLimit({ config, logger, emitter, eventBase, session, iteration, askQuestion, task, maxSonarRetries, sonarResult, brainCtx });
|
|
98
116
|
if (result) return result;
|
|
99
117
|
}
|
|
100
118
|
|
|
101
119
|
return { action: "continue" };
|
|
102
120
|
}
|
|
103
121
|
|
|
104
|
-
export async function runSonarStage({ config, logger, emitter, eventBase, session, trackBudget, iteration, repeatDetector, budgetSummary, sonarState, askQuestion, task }) {
|
|
122
|
+
export async function runSonarStage({ config, logger, emitter, eventBase, session, trackBudget, iteration, repeatDetector, budgetSummary, sonarState, askQuestion, task, brainCtx }) {
|
|
105
123
|
logger.setContext({ iteration, stage: "sonar" });
|
|
106
124
|
emitProgress(
|
|
107
125
|
emitter,
|
|
@@ -170,12 +188,22 @@ export async function runSonarStage({ config, logger, emitter, eventBase, sessio
|
|
|
170
188
|
})
|
|
171
189
|
);
|
|
172
190
|
|
|
191
|
+
// Brain: when enabled, skip Solomon for sonar errors — Brain handles via max_iterations
|
|
192
|
+
if (brainCtx?.enabled) {
|
|
193
|
+
logger.info("Brain: sonar error — Brain will handle (Solomon bypassed)");
|
|
194
|
+
emitProgress(emitter, makeEvent("brain:sonar-error", { ...eventBase, stage: "sonar" }, {
|
|
195
|
+
message: `Sonar error — Brain handling: ${errorMessage.slice(0, 200)}`,
|
|
196
|
+
detail: { error: errorMessage }
|
|
197
|
+
}));
|
|
198
|
+
return { action: "continue" };
|
|
199
|
+
}
|
|
200
|
+
|
|
173
201
|
// Let Solomon decide: continue without sonar or stop
|
|
174
202
|
const solomonResult = await invokeSolomon({
|
|
175
203
|
config, logger, emitter, eventBase, stage: "sonar_error", askQuestion, session, iteration,
|
|
176
204
|
conflict: {
|
|
177
205
|
stage: "sonar_error",
|
|
178
|
-
task
|
|
206
|
+
task,
|
|
179
207
|
iterationCount: iteration,
|
|
180
208
|
maxIterations: config.max_iterations,
|
|
181
209
|
history: [{ agent: "sonar", feedback: errorMessage }]
|
|
@@ -223,7 +251,7 @@ export async function runSonarStage({ config, logger, emitter, eventBase, sessio
|
|
|
223
251
|
);
|
|
224
252
|
|
|
225
253
|
if (sonarResult.blocking) {
|
|
226
|
-
return handleSonarBlocking({ sonarResult, config, logger, emitter, eventBase, session, iteration, repeatDetector, budgetSummary, askQuestion, task });
|
|
254
|
+
return handleSonarBlocking({ sonarResult, config, logger, emitter, eventBase, session, iteration, repeatDetector, budgetSummary, askQuestion, task, brainCtx });
|
|
227
255
|
}
|
|
228
256
|
|
|
229
257
|
// Sonar passed — reset retry counter
|
package/src/orchestrator.js
CHANGED
|
@@ -351,7 +351,7 @@ async function handlePostLoopStages({ config, session, emitter, eventBase, coder
|
|
|
351
351
|
if (securityEnabled) {
|
|
352
352
|
const securityResult = await runSecurityStage({
|
|
353
353
|
config, logger, emitter, eventBase, session, coderRole, trackBudget,
|
|
354
|
-
iteration: i, task, diff: postLoopDiff, askQuestion
|
|
354
|
+
iteration: i, task, diff: postLoopDiff, askQuestion, brainCtx
|
|
355
355
|
});
|
|
356
356
|
if (securityResult.action === "pause") return { action: "return", result: securityResult.result };
|
|
357
357
|
if (securityResult.action === "continue") {
|
|
@@ -918,8 +918,8 @@ async function runGuardStages({ config, logger, emitter, eventBase, session, ite
|
|
|
918
918
|
return { action: "ok" };
|
|
919
919
|
}
|
|
920
920
|
|
|
921
|
-
async function runQualityGateStages({ config, logger, emitter, eventBase, session, trackBudget, i, askQuestion, repeatDetector, budgetSummary, sonarState, task, stageResults, coderRole, pipelineFlags }) {
|
|
922
|
-
const tddResult = await runTddCheckStage({ config, logger, emitter, eventBase, session, trackBudget, iteration: i, askQuestion });
|
|
921
|
+
async function runQualityGateStages({ config, logger, emitter, eventBase, session, trackBudget, i, askQuestion, repeatDetector, budgetSummary, sonarState, task, stageResults, coderRole, pipelineFlags, brainCtx }) {
|
|
922
|
+
const tddResult = await runTddCheckStage({ config, logger, emitter, eventBase, session, trackBudget, iteration: i, askQuestion, task, brainCtx });
|
|
923
923
|
if (tddResult.action === "pause") return { action: "return", result: tddResult.result };
|
|
924
924
|
if (tddResult.action === "continue") return { action: "continue" };
|
|
925
925
|
|
|
@@ -928,7 +928,7 @@ async function runQualityGateStages({ config, logger, emitter, eventBase, sessio
|
|
|
928
928
|
if (config.sonarqube.enabled && !skipSonarForTaskType.has(effectiveTaskType)) {
|
|
929
929
|
const sonarResult = await runSonarStage({
|
|
930
930
|
config, logger, emitter, eventBase, session, trackBudget, iteration: i,
|
|
931
|
-
repeatDetector, budgetSummary, sonarState, askQuestion, task
|
|
931
|
+
repeatDetector, budgetSummary, sonarState, askQuestion, task, brainCtx
|
|
932
932
|
});
|
|
933
933
|
if (sonarResult.action === "stalled" || sonarResult.action === "pause") return { action: "return", result: sonarResult.result };
|
|
934
934
|
if (sonarResult.action === "continue") return { action: "continue" };
|
|
@@ -1033,8 +1033,14 @@ async function handleMaxIterationsReached({ session, budgetSummary, emitter, eve
|
|
|
1033
1033
|
}
|
|
1034
1034
|
|
|
1035
1035
|
if (hasCorrectness) {
|
|
1036
|
-
// Brain: correctness/test issues pending
|
|
1037
|
-
|
|
1036
|
+
// Brain: correctness/test issues pending. Cap at MAX_EXTENSIONS to avoid infinite extensions.
|
|
1037
|
+
const MAX_EXTENSIONS = 2;
|
|
1038
|
+
if (brainCtx.extensionCount >= MAX_EXTENSIONS) {
|
|
1039
|
+
logger.warn(`Brain: ${brainCtx.extensionCount} extensions exhausted with correctness issues still pending — escalating to human`);
|
|
1040
|
+
return { paused: true, sessionId: session.id, question: `Brain exhausted ${MAX_EXTENSIONS} extensions with correctness/tests still pending. Manual intervention needed.`, context: "brain_extension_cap", pending };
|
|
1041
|
+
}
|
|
1042
|
+
brainCtx.extensionCount += 1;
|
|
1043
|
+
logger.info(`Brain: max_iterations reached with ${entries.filter(e => ["correctness", "tests"].includes(e.category)).length} correctness issue(s) pending — extending iterations (extension ${brainCtx.extensionCount}/${MAX_EXTENSIONS})`);
|
|
1038
1044
|
session.reviewer_retry_count = 0;
|
|
1039
1045
|
await saveSession(session);
|
|
1040
1046
|
return { approved: false, sessionId: session.id, reason: "max_iterations_extended", extraIterations: Math.ceil(config.max_iterations / 2) };
|
|
@@ -1311,7 +1317,10 @@ async function initFlowContext({ task, config, logger, emitter, askQuestion, pgT
|
|
|
1311
1317
|
}
|
|
1312
1318
|
|
|
1313
1319
|
async function runSingleIteration(ctx) {
|
|
1314
|
-
|
|
1320
|
+
// Use plannedTask (HU-scoped or planner-enriched) over the raw original task.
|
|
1321
|
+
// When running per-HU sub-pipelines, plannedTask is the HU's text, not the full spec.
|
|
1322
|
+
const { config, logger, emitter, eventBase, session, iteration: i } = ctx;
|
|
1323
|
+
const task = ctx.plannedTask || ctx.task;
|
|
1315
1324
|
|
|
1316
1325
|
const iterStart = Date.now();
|
|
1317
1326
|
const ciEnabled = Boolean(config.ci?.enabled) && ctx.gitCtx?.enabled;
|
|
@@ -1341,7 +1350,7 @@ async function runSingleIteration(ctx) {
|
|
|
1341
1350
|
config, logger, emitter, eventBase, session, trackBudget: ctx.trackBudget, i,
|
|
1342
1351
|
askQuestion: ctx.askQuestion, repeatDetector: ctx.repeatDetector, budgetSummary: ctx.budgetSummary,
|
|
1343
1352
|
sonarState: ctx.sonarState, task, stageResults: ctx.stageResults, coderRole: ctx.coderRole,
|
|
1344
|
-
pipelineFlags: ctx.pipelineFlags
|
|
1353
|
+
pipelineFlags: ctx.pipelineFlags, brainCtx: ctx.brainCtx
|
|
1345
1354
|
});
|
|
1346
1355
|
if (qgResult.action === "return" || qgResult.action === "continue") return qgResult;
|
|
1347
1356
|
|
|
@@ -1604,7 +1613,7 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
1604
1613
|
const securityResult = await runSecurityStage({
|
|
1605
1614
|
config: ctx.config, logger, emitter, eventBase: ctx.eventBase, session: ctx.session,
|
|
1606
1615
|
coderRole: ctx.coderRole, trackBudget: ctx.trackBudget,
|
|
1607
|
-
iteration: 1, task: ctx.plannedTask, diff: postLoopDiff, askQuestion
|
|
1616
|
+
iteration: 1, task: ctx.plannedTask, diff: postLoopDiff, askQuestion, brainCtx: ctx.brainCtx
|
|
1608
1617
|
});
|
|
1609
1618
|
if (securityResult.stageResult) analysisStageResults.security = securityResult.stageResult;
|
|
1610
1619
|
}
|
package/src/utils/budget.js
CHANGED
|
@@ -100,6 +100,7 @@ export class BudgetTracker {
|
|
|
100
100
|
const hasExplicitCost = cost_usd !== undefined && cost_usd !== null && cost_usd !== "";
|
|
101
101
|
const modelName = model || provider || null;
|
|
102
102
|
const computedCost = calculateUsageCostUsd({
|
|
103
|
+
provider: provider,
|
|
103
104
|
model: modelName,
|
|
104
105
|
tokens_in: safeTokensIn,
|
|
105
106
|
tokens_out: safeTokensOut,
|
package/src/utils/pricing.js
CHANGED
|
@@ -2,9 +2,20 @@ import { buildDefaultPricingTable } from "../agents/model-registry.js";
|
|
|
2
2
|
|
|
3
3
|
export const DEFAULT_MODEL_PRICING = buildDefaultPricingTable();
|
|
4
4
|
|
|
5
|
-
export function calculateUsageCostUsd({ model, tokens_in, tokens_out, pricing }) {
|
|
5
|
+
export function calculateUsageCostUsd({ provider, model, tokens_in, tokens_out, pricing }) {
|
|
6
6
|
const table = pricing || DEFAULT_MODEL_PRICING;
|
|
7
|
-
|
|
7
|
+
|
|
8
|
+
let entry = table[model];
|
|
9
|
+
|
|
10
|
+
if (!entry && provider && model) {
|
|
11
|
+
entry = table[`${provider}/${model}`];
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
if (!entry && model && model.includes("/")) {
|
|
15
|
+
const [, actualModel] = model.split("/");
|
|
16
|
+
entry = table[actualModel];
|
|
17
|
+
}
|
|
18
|
+
|
|
8
19
|
if (!entry) return 0;
|
|
9
20
|
|
|
10
21
|
const inputCost = (tokens_in * entry.input_per_million) / 1_000_000;
|