karajan-code 1.25.2 → 1.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "karajan-code",
3
- "version": "1.25.2",
3
+ "version": "1.26.0",
4
4
  "description": "Local multi-agent coding orchestrator with TDD, SonarQube, and code review pipeline",
5
5
  "type": "module",
6
6
  "license": "AGPL-3.0",
@@ -100,9 +100,10 @@ export async function runCoderStage({ coderRoleInstance, coderRole, config, logg
100
100
  action: "standby",
101
101
  standbyInfo: {
102
102
  agent: coderRole.provider,
103
- cooldownMs: rateLimitCheck.cooldownMs,
103
+ cooldownMs: rateLimitCheck.cooldownMs || (rateLimitCheck.isProviderOutage ? 30000 : null),
104
104
  cooldownUntil: rateLimitCheck.cooldownUntil,
105
- message: rateLimitCheck.message
105
+ message: rateLimitCheck.message,
106
+ isProviderOutage: rateLimitCheck.isProviderOutage || false
106
107
  }
107
108
  };
108
109
  }
@@ -169,9 +170,10 @@ export async function runRefactorerStage({ refactorerRole, config, logger, emitt
169
170
  action: "standby",
170
171
  standbyInfo: {
171
172
  agent: refactorerRole.provider,
172
- cooldownMs: rateLimitCheck.cooldownMs,
173
+ cooldownMs: rateLimitCheck.cooldownMs || (rateLimitCheck.isProviderOutage ? 30000 : null),
173
174
  cooldownUntil: rateLimitCheck.cooldownUntil,
174
- message: rateLimitCheck.message
175
+ message: rateLimitCheck.message,
176
+ isProviderOutage: rateLimitCheck.isProviderOutage || false
175
177
  }
176
178
  };
177
179
  }
@@ -738,9 +740,10 @@ export async function runReviewerStage({ reviewerRole, config, logger, emitter,
738
740
  action: "standby",
739
741
  standbyInfo: {
740
742
  agent: reviewerRole.provider,
741
- cooldownMs: rateLimitCheck.cooldownMs,
743
+ cooldownMs: rateLimitCheck.cooldownMs || (rateLimitCheck.isProviderOutage ? 30000 : null),
742
744
  cooldownUntil: rateLimitCheck.cooldownUntil,
743
- message: rateLimitCheck.message
745
+ message: rateLimitCheck.message,
746
+ isProviderOutage: rateLimitCheck.isProviderOutage || false
744
747
  }
745
748
  };
746
749
  }
@@ -5,6 +5,93 @@ import { addCheckpoint, saveSession } from "../session-store.js";
5
5
  import { emitProgress, makeEvent } from "../utils/events.js";
6
6
  import { invokeSolomon } from "./solomon-escalation.js";
7
7
 
8
+ const KNOWN_AGENTS = ["claude", "codex", "gemini"];
9
+
10
+ /**
11
+ * Build an ordered fallback chain for a role.
12
+ * Primary provider first, then remaining known agents (no duplicates).
13
+ */
14
+ function buildFallbackChain(config, roleName) {
15
+ const primary =
16
+ config?.roles?.[roleName]?.provider ||
17
+ config?.roles?.coder?.provider ||
18
+ config?.coder ||
19
+ "claude";
20
+ return [primary, ...KNOWN_AGENTS.filter((a) => a !== primary)];
21
+ }
22
+
23
+ /**
24
+ * Detect if a role output is an agent/spawn failure (vs a genuine evaluation failure).
25
+ * Agent failures have `result.error` but no `result.verdict`.
26
+ */
27
+ function isAgentFailure(output) {
28
+ if (!output || output.ok) return false;
29
+ return Boolean(output.result?.error) && !output.result?.verdict;
30
+ }
31
+
32
+ /**
33
+ * Run a role (TesterRole or SecurityRole) with agent fallback chain.
34
+ * If the primary agent fails to start (spawn/auth failure), tries the next agent.
35
+ * Genuine evaluation failures (agent ran but verdict=fail) are NOT retried.
36
+ *
37
+ * @returns {{ output, provider, attempts }}
38
+ */
39
+ async function runRoleWithFallback(RoleClass, { roleName, config, logger, emitter, eventBase, task, iteration, diff }) {
40
+ const chain = buildFallbackChain(config, roleName);
41
+ const attempts = [];
42
+
43
+ for (const provider of chain) {
44
+ const overrideConfig = {
45
+ ...config,
46
+ roles: { ...config.roles, [roleName]: { ...config.roles?.[roleName], provider } }
47
+ };
48
+
49
+ const role = new RoleClass({ config: overrideConfig, logger, emitter });
50
+ await role.init({ task, iteration });
51
+
52
+ const start = Date.now();
53
+ let output;
54
+ try {
55
+ output = await role.run({ task, diff });
56
+ } catch (err) {
57
+ output = {
58
+ ok: false,
59
+ result: { error: err.message, provider },
60
+ summary: `${roleName} threw: ${err.message}`
61
+ };
62
+ }
63
+ const duration = Date.now() - start;
64
+
65
+ attempts.push({ provider, ok: output.ok, duration, summary: output.summary });
66
+
67
+ if (output.ok || !isAgentFailure(output)) {
68
+ return { output, provider, attempts };
69
+ }
70
+
71
+ logger.warn(`${roleName} agent "${provider}" failed (${duration}ms): ${output.summary} — trying next agent`);
72
+ emitProgress(emitter, makeEvent(`${roleName}:fallback`, { ...eventBase, stage: roleName }, {
73
+ status: "warn",
74
+ message: `Agent "${provider}" failed, falling back`,
75
+ detail: { provider, duration, summary: output.summary, remaining: chain.length - attempts.length }
76
+ }));
77
+ }
78
+
79
+ // All agents failed
80
+ const lastAttempt = attempts[attempts.length - 1];
81
+ const allProviders = attempts.map((a) => a.provider).join(", ");
82
+ logger.error(`${roleName}: all agents failed (${allProviders})`);
83
+
84
+ return {
85
+ output: {
86
+ ok: false,
87
+ result: { error: `All agents failed: ${allProviders}`, attempts },
88
+ summary: `All ${roleName} agents failed (${allProviders}) — check agent installation and configuration`
89
+ },
90
+ provider: lastAttempt?.provider,
91
+ attempts
92
+ };
93
+ }
94
+
8
95
  export async function runTesterStage({ config, logger, emitter, eventBase, session, coderRole, trackBudget, iteration, task, diff, askQuestion }) {
9
96
  logger.setContext({ iteration, stage: "tester" });
10
97
  emitProgress(
@@ -14,30 +101,28 @@ export async function runTesterStage({ config, logger, emitter, eventBase, sessi
14
101
  })
15
102
  );
16
103
 
17
- const tester = new TesterRole({ config, logger, emitter });
18
- await tester.init({ task, iteration });
19
104
  const testerStart = Date.now();
20
- let testerOutput;
21
- try {
22
- testerOutput = await tester.run({ task, diff });
23
- } catch (err) {
24
- logger.warn(`Tester threw: ${err.message}`);
25
- testerOutput = { ok: false, summary: `Tester error: ${err.message}`, result: { error: err.message } };
26
- }
105
+ const { output: testerOutput, provider, attempts } = await runRoleWithFallback(
106
+ TesterRole,
107
+ { roleName: "tester", config, logger, emitter, eventBase, task, iteration, diff }
108
+ );
109
+ const totalDuration = Date.now() - testerStart;
110
+
27
111
  trackBudget({
28
112
  role: "tester",
29
- provider: config?.roles?.tester?.provider || coderRole.provider,
113
+ provider: provider || coderRole.provider,
30
114
  model: config?.roles?.tester?.model || coderRole.model,
31
115
  result: testerOutput,
32
- duration_ms: Date.now() - testerStart
116
+ duration_ms: totalDuration
33
117
  });
34
118
 
35
119
  await addCheckpoint(session, {
36
120
  stage: "tester",
37
121
  iteration,
38
122
  ok: testerOutput.ok,
39
- provider: config?.roles?.tester?.provider || coderRole.provider,
40
- model: config?.roles?.tester?.model || coderRole.model || null
123
+ provider: provider || coderRole.provider,
124
+ model: config?.roles?.tester?.model || coderRole.model || null,
125
+ attempts: attempts.length > 1 ? attempts : undefined
41
126
  });
42
127
 
43
128
  emitProgress(
@@ -94,30 +179,28 @@ export async function runSecurityStage({ config, logger, emitter, eventBase, ses
94
179
  })
95
180
  );
96
181
 
97
- const security = new SecurityRole({ config, logger, emitter });
98
- await security.init({ task, iteration });
99
182
  const securityStart = Date.now();
100
- let securityOutput;
101
- try {
102
- securityOutput = await security.run({ task, diff });
103
- } catch (err) {
104
- logger.warn(`Security threw: ${err.message}`);
105
- securityOutput = { ok: false, summary: `Security error: ${err.message}`, result: { error: err.message } };
106
- }
183
+ const { output: securityOutput, provider, attempts } = await runRoleWithFallback(
184
+ SecurityRole,
185
+ { roleName: "security", config, logger, emitter, eventBase, task, iteration, diff }
186
+ );
187
+ const totalDuration = Date.now() - securityStart;
188
+
107
189
  trackBudget({
108
190
  role: "security",
109
- provider: config?.roles?.security?.provider || coderRole.provider,
191
+ provider: provider || coderRole.provider,
110
192
  model: config?.roles?.security?.model || coderRole.model,
111
193
  result: securityOutput,
112
- duration_ms: Date.now() - securityStart
194
+ duration_ms: totalDuration
113
195
  });
114
196
 
115
197
  await addCheckpoint(session, {
116
198
  stage: "security",
117
199
  iteration,
118
200
  ok: securityOutput.ok,
119
- provider: config?.roles?.security?.provider || coderRole.provider,
120
- model: config?.roles?.security?.model || coderRole.model || null
201
+ provider: provider || coderRole.provider,
202
+ model: config?.roles?.security?.model || coderRole.model || null,
203
+ attempts: attempts.length > 1 ? attempts : undefined
121
204
  });
122
205
 
123
206
  emitProgress(
@@ -214,3 +297,6 @@ export async function runImpeccableStage({ config, logger, emitter, eventBase, s
214
297
  // Impeccable is advisory — failures do not block the pipeline
215
298
  return { action: "ok", stageResult: { ok: impeccableOutput.ok, verdict, summary: impeccableOutput.summary || "No frontend design issues found" } };
216
299
  }
300
+
301
+ // Exported for testing
302
+ export { buildFallbackChain, isAgentFailure, runRoleWithFallback };
@@ -0,0 +1,266 @@
1
+ /**
2
+ * Preflight environment checks for kj_run.
3
+ *
4
+ * Runs AFTER policy resolution (so we know which stages are active)
5
+ * and BEFORE session iteration loop (so we fail fast or degrade gracefully).
6
+ *
7
+ * Design: always returns ok:true (graceful degradation, never hard-fail).
8
+ * Disabled stages are auto-disabled via configOverrides instead of blocking.
9
+ */
10
+
11
+ import { checkBinary } from "../utils/agent-detect.js";
12
+ import { isSonarReachable, sonarUp } from "../sonar/manager.js";
13
+ import { runCommand } from "../utils/process.js";
14
+ import { emitProgress, makeEvent } from "../utils/events.js";
15
+
16
+ function normalizeApiHost(rawHost) {
17
+ return String(rawHost || "http://localhost:9000").replace(/host\.docker\.internal/g, "localhost");
18
+ }
19
+
20
+ function parseJsonSafe(text) {
21
+ try {
22
+ return JSON.parse(text);
23
+ } catch {
24
+ return null;
25
+ }
26
+ }
27
+
28
+ async function checkDocker() {
29
+ const result = await checkBinary("docker");
30
+ return {
31
+ name: "docker",
32
+ ok: result.ok,
33
+ detail: result.ok ? `Docker ${result.version}` : "Docker not found",
34
+ };
35
+ }
36
+
37
+ async function checkSonarReachable(host) {
38
+ const reachable = await isSonarReachable(host);
39
+ if (reachable) {
40
+ return { name: "sonar-reachable", ok: true, detail: `SonarQube reachable at ${host}`, remediated: false };
41
+ }
42
+
43
+ // Auto-remediation: try to start SonarQube
44
+ try {
45
+ const upResult = await sonarUp(host);
46
+ if (upResult.exitCode === 0) {
47
+ // Verify it's actually reachable now
48
+ const reachableAfter = await isSonarReachable(host);
49
+ if (reachableAfter) {
50
+ return { name: "sonar-reachable", ok: true, detail: `SonarQube started and reachable at ${host}`, remediated: true };
51
+ }
52
+ }
53
+ } catch {
54
+ // sonarUp failed, fall through
55
+ }
56
+
57
+ return { name: "sonar-reachable", ok: false, detail: `SonarQube not reachable at ${host} (auto-start failed)` };
58
+ }
59
+
60
+ async function checkSonarAuth(config) {
61
+ const host = normalizeApiHost(config.sonarqube?.host);
62
+
63
+ // Check explicit token first
64
+ const explicitToken = process.env.KJ_SONAR_TOKEN || process.env.SONAR_TOKEN || config.sonarqube?.token;
65
+ if (explicitToken) {
66
+ // Validate the token works
67
+ const res = await runCommand("curl", [
68
+ "-sS", "-o", "/dev/null", "-w", "%{http_code}",
69
+ "-H", `Authorization: Bearer ${explicitToken}`,
70
+ "--max-time", "5",
71
+ `${host}/api/authentication/validate`
72
+ ]);
73
+ if (res.exitCode === 0 && res.stdout.trim().startsWith("2")) {
74
+ return { name: "sonar-auth", ok: true, detail: "Sonar token valid", token: explicitToken };
75
+ }
76
+ }
77
+
78
+ // Try admin credentials to generate a token
79
+ const adminUser = process.env.KJ_SONAR_ADMIN_USER || config.sonarqube?.admin_user || "admin";
80
+ const candidates = [
81
+ process.env.KJ_SONAR_ADMIN_PASSWORD,
82
+ config.sonarqube?.admin_password,
83
+ "admin"
84
+ ].filter(Boolean);
85
+
86
+ for (const password of [...new Set(candidates)]) {
87
+ const validateRes = await runCommand("curl", [
88
+ "-sS", "-u", `${adminUser}:${password}`,
89
+ `${host}/api/authentication/validate`
90
+ ]);
91
+ if (validateRes.exitCode !== 0) continue;
92
+ const parsed = parseJsonSafe(validateRes.stdout);
93
+ if (!parsed?.valid) continue;
94
+
95
+ // Generate a user token
96
+ const tokenName = `karajan-preflight-${Date.now()}`;
97
+ const tokenRes = await runCommand("curl", [
98
+ "-sS", "-u", `${adminUser}:${password}`,
99
+ "-X", "POST",
100
+ "--data-urlencode", `name=${tokenName}`,
101
+ `${host}/api/user_tokens/generate`
102
+ ]);
103
+ if (tokenRes.exitCode !== 0) continue;
104
+ const tokenParsed = parseJsonSafe(tokenRes.stdout);
105
+ if (tokenParsed?.token) {
106
+ return { name: "sonar-auth", ok: true, detail: "Sonar token generated", token: tokenParsed.token };
107
+ }
108
+ }
109
+
110
+ return { name: "sonar-auth", ok: false, detail: "Could not validate or generate Sonar token" };
111
+ }
112
+
113
+ async function checkSecurityAgent(config) {
114
+ const provider = config.roles?.security?.provider
115
+ || config.roles?.coder?.provider
116
+ || config.coder
117
+ || "claude";
118
+
119
+ const result = await checkBinary(provider);
120
+ return {
121
+ name: "security-agent",
122
+ ok: result.ok,
123
+ detail: result.ok ? `Security agent "${provider}" available (${result.version})` : `Security agent "${provider}" not found`,
124
+ provider,
125
+ };
126
+ }
127
+
128
+ /**
129
+ * Run preflight environment checks.
130
+ *
131
+ * @param {object} opts
132
+ * @param {object} opts.config - Karajan config
133
+ * @param {object} opts.logger - Logger instance
134
+ * @param {object|null} opts.emitter - Event emitter
135
+ * @param {object} opts.eventBase - Base event data
136
+ * @param {object} opts.resolvedPolicies - Output from applyPolicies()
137
+ * @param {boolean} opts.securityEnabled - Whether security stage is enabled
138
+ * @returns {{ ok: boolean, checks: object[], remediations: string[], configOverrides: object, warnings: string[] }}
139
+ */
140
+ export async function runPreflightChecks({ config, logger, emitter, eventBase, resolvedPolicies, securityEnabled }) {
141
+ const sonarEnabled = Boolean(config.sonarqube?.enabled) && resolvedPolicies.sonar !== false;
142
+ const isExternalSonar = Boolean(config.sonarqube?.external);
143
+ const sonarHost = normalizeApiHost(config.sonarqube?.host);
144
+
145
+ const result = {
146
+ ok: true,
147
+ checks: [],
148
+ remediations: [],
149
+ configOverrides: {},
150
+ warnings: [],
151
+ };
152
+
153
+ // Short-circuit: nothing to check
154
+ if (!sonarEnabled && !securityEnabled) {
155
+ logger.info("Preflight: skipped (no sonar, no security)");
156
+ emitProgress(emitter, makeEvent("preflight:end", { ...eventBase, stage: "preflight" }, {
157
+ message: "Preflight skipped (no checks needed)",
158
+ detail: result
159
+ }));
160
+ return result;
161
+ }
162
+
163
+ emitProgress(emitter, makeEvent("preflight:start", { ...eventBase, stage: "preflight" }, {
164
+ message: "Running preflight environment checks",
165
+ detail: { sonarEnabled, securityEnabled }
166
+ }));
167
+
168
+ // --- 1. Docker (only if sonar enabled and not external) ---
169
+ if (sonarEnabled && !isExternalSonar) {
170
+ const dockerCheck = await checkDocker();
171
+ result.checks.push(dockerCheck);
172
+
173
+ emitProgress(emitter, makeEvent("preflight:check", { ...eventBase, stage: "preflight" }, {
174
+ status: dockerCheck.ok ? "ok" : "warn",
175
+ message: `Docker: ${dockerCheck.detail}`,
176
+ detail: dockerCheck
177
+ }));
178
+
179
+ if (!dockerCheck.ok) {
180
+ result.configOverrides.sonarDisabled = true;
181
+ result.warnings.push("Docker not available — SonarQube auto-disabled");
182
+ logger.warn("Preflight: Docker not found, disabling SonarQube");
183
+
184
+ // Skip remaining sonar checks, continue to security
185
+ if (!securityEnabled) {
186
+ emitProgress(emitter, makeEvent("preflight:end", { ...eventBase, stage: "preflight" }, {
187
+ status: "warn", message: "Preflight completed with warnings", detail: result
188
+ }));
189
+ return result;
190
+ }
191
+ }
192
+ }
193
+
194
+ // --- 2. SonarQube reachable ---
195
+ if (sonarEnabled && !result.configOverrides.sonarDisabled) {
196
+ const reachableCheck = await checkSonarReachable(sonarHost);
197
+ result.checks.push(reachableCheck);
198
+
199
+ if (reachableCheck.remediated) {
200
+ result.remediations.push("SonarQube auto-started via docker compose");
201
+ }
202
+
203
+ emitProgress(emitter, makeEvent("preflight:check", { ...eventBase, stage: "preflight" }, {
204
+ status: reachableCheck.ok ? "ok" : "warn",
205
+ message: `SonarQube reachability: ${reachableCheck.detail}`,
206
+ detail: reachableCheck
207
+ }));
208
+
209
+ if (!reachableCheck.ok) {
210
+ result.configOverrides.sonarDisabled = true;
211
+ result.warnings.push("SonarQube not reachable — auto-disabled");
212
+ logger.warn("Preflight: SonarQube not reachable after remediation, disabling");
213
+ }
214
+ }
215
+
216
+ // --- 3. SonarQube auth/token ---
217
+ if (sonarEnabled && !result.configOverrides.sonarDisabled) {
218
+ const authCheck = await checkSonarAuth(config);
219
+ result.checks.push(authCheck);
220
+
221
+ emitProgress(emitter, makeEvent("preflight:check", { ...eventBase, stage: "preflight" }, {
222
+ status: authCheck.ok ? "ok" : "warn",
223
+ message: `SonarQube auth: ${authCheck.detail}`,
224
+ detail: { name: authCheck.name, ok: authCheck.ok, detail: authCheck.detail }
225
+ }));
226
+
227
+ if (authCheck.ok && authCheck.token) {
228
+ process.env.KJ_SONAR_TOKEN = authCheck.token;
229
+ result.remediations.push("Sonar token resolved and cached in KJ_SONAR_TOKEN");
230
+ logger.info("Preflight: Sonar token resolved and cached");
231
+ } else if (!authCheck.ok) {
232
+ result.configOverrides.sonarDisabled = true;
233
+ result.warnings.push("SonarQube auth failed — auto-disabled");
234
+ logger.warn("Preflight: Sonar auth failed, disabling SonarQube");
235
+ }
236
+ }
237
+
238
+ // --- 4. Security agent ---
239
+ if (securityEnabled) {
240
+ const secCheck = await checkSecurityAgent(config);
241
+ result.checks.push(secCheck);
242
+
243
+ emitProgress(emitter, makeEvent("preflight:check", { ...eventBase, stage: "preflight" }, {
244
+ status: secCheck.ok ? "ok" : "warn",
245
+ message: `Security agent: ${secCheck.detail}`,
246
+ detail: secCheck
247
+ }));
248
+
249
+ if (!secCheck.ok) {
250
+ result.configOverrides.securityDisabled = true;
251
+ result.warnings.push(`Security agent "${secCheck.provider}" not found — security stage auto-disabled`);
252
+ logger.warn(`Preflight: Security agent "${secCheck.provider}" not found, disabling security stage`);
253
+ }
254
+ }
255
+
256
+ const hasWarnings = result.warnings.length > 0;
257
+ emitProgress(emitter, makeEvent("preflight:end", { ...eventBase, stage: "preflight" }, {
258
+ status: hasWarnings ? "warn" : "ok",
259
+ message: hasWarnings
260
+ ? `Preflight completed with ${result.warnings.length} warning(s)`
261
+ : "Preflight passed — all checks OK",
262
+ detail: result
263
+ }));
264
+
265
+ return result;
266
+ }
@@ -34,6 +34,7 @@ import { runCoderStage, runRefactorerStage, runTddCheckStage, runSonarStage, run
34
34
  import { runTesterStage, runSecurityStage, runImpeccableStage } from "./orchestrator/post-loop-stages.js";
35
35
  import { waitForCooldown, MAX_STANDBY_RETRIES } from "./orchestrator/standby.js";
36
36
  import { detectTestFramework } from "./utils/project-detect.js";
37
+ import { runPreflightChecks } from "./orchestrator/preflight-checks.js";
37
38
 
38
39
 
39
40
  // --- Extracted helper functions (pure refactoring, zero behavior change) ---
@@ -485,10 +486,18 @@ async function handleStandbyResult({ stageResult, session, emitter, eventBase, i
485
486
  }
486
487
 
487
488
  const standbyRetries = session.standby_retry_count || 0;
489
+ const isOutage = stageResult.standbyInfo.isProviderOutage;
490
+ const pauseReason = isOutage
491
+ ? `Provider outage (${stageResult.standbyInfo.message || "5xx/connection error"}) — retried ${standbyRetries} times. This is NOT a KJ or code problem.`
492
+ : `Rate limit standby exhausted after ${standbyRetries} retries. Agent: ${stageResult.standbyInfo.agent}`;
493
+
488
494
  if (standbyRetries >= MAX_STANDBY_RETRIES) {
495
+ session.last_reviewer_feedback = isOutage
496
+ ? "IMPORTANT: The previous interruption was caused by a provider outage (API 500 error), NOT by a problem in your code or in Karajan. Continue from where you left off."
497
+ : session.last_reviewer_feedback;
489
498
  await pauseSession(session, {
490
- question: `Rate limit standby exhausted after ${standbyRetries} retries. Agent: ${stageResult.standbyInfo.agent}`,
491
- context: { iteration: i, stage, reason: "standby_exhausted" }
499
+ question: pauseReason,
500
+ context: { iteration: i, stage, reason: isOutage ? "provider_outage" : "standby_exhausted" }
492
501
  });
493
502
  emitProgress(emitter, makeEvent(`${stage}:rate_limit`, { ...eventBase, stage }, {
494
503
  status: "paused",
@@ -849,7 +858,23 @@ async function runPreLoopStages({ config, logger, emitter, eventBase, session, f
849
858
  }));
850
859
  }
851
860
 
852
- const updatedConfig = resolvePipelinePolicies({ flags, config, stageResults, emitter, eventBase, session, pipelineFlags });
861
+ let updatedConfig = resolvePipelinePolicies({ flags, config, stageResults, emitter, eventBase, session, pipelineFlags });
862
+
863
+ // --- Preflight environment checks ---
864
+ const preflightResult = await runPreflightChecks({
865
+ config: updatedConfig, logger, emitter, eventBase,
866
+ resolvedPolicies: session.resolved_policies,
867
+ securityEnabled: pipelineFlags.securityEnabled
868
+ });
869
+ session.preflight = preflightResult;
870
+ await saveSession(session);
871
+
872
+ if (preflightResult.configOverrides.sonarDisabled) {
873
+ updatedConfig = { ...updatedConfig, sonarqube: { ...updatedConfig.sonarqube, enabled: false } };
874
+ }
875
+ if (preflightResult.configOverrides.securityDisabled) {
876
+ pipelineFlags.securityEnabled = false;
877
+ }
853
878
 
854
879
  // --- Researcher → Planner ---
855
880
  const { plannedTask } = await runPlanningPhases({ config: updatedConfig, logger, emitter, eventBase, session, stageResults, pipelineFlags, coderRole, trackBudget, task, askQuestion });
@@ -77,22 +77,35 @@ const RATE_LIMIT_PATTERNS = [
77
77
  { pattern: /resource exhausted/i, agent: "gemini" },
78
78
  { pattern: /quota exceeded/i, agent: "gemini" },
79
79
 
80
- // Generic (match any agent)
80
+ // Generic rate limits (match any agent)
81
81
  { pattern: /rate limit/i, agent: "unknown" },
82
82
  { pattern: /token limit reached/i, agent: "unknown" },
83
83
  { pattern: /\b429\b/, agent: "unknown" },
84
84
  { pattern: /too many requests/i, agent: "unknown" },
85
85
  { pattern: /throttl/i, agent: "unknown" },
86
+
87
+ // Provider outages / transient errors (treat like rate limits → retry)
88
+ { pattern: /\b500\b.*(?:internal server error|error)/i, agent: "unknown" },
89
+ { pattern: /\b502\b|bad gateway/i, agent: "unknown" },
90
+ { pattern: /\b503\b|service unavailable/i, agent: "unknown" },
91
+ { pattern: /\b504\b|gateway timeout/i, agent: "unknown" },
92
+ { pattern: /overloaded/i, agent: "unknown" },
93
+ { pattern: /ECONNREFUSED|ECONNRESET|ETIMEDOUT|socket hang up/i, agent: "unknown" },
94
+ { pattern: /network error|fetch failed/i, agent: "unknown" },
86
95
  ];
87
96
 
88
97
  export function detectRateLimit({ stderr = "", stdout = "" }) {
89
98
  const combined = `${stderr}\n${stdout}`;
90
99
 
100
+ const PROVIDER_OUTAGE_PATTERNS = /\b50[0-4]\b|bad gateway|service unavailable|gateway timeout|overloaded|ECONNREFUSED|ECONNRESET|ETIMEDOUT|socket hang up|network error|fetch failed/i;
101
+
91
102
  for (const { pattern, agent } of RATE_LIMIT_PATTERNS) {
92
103
  if (pattern.test(combined)) {
93
104
  const matchedLine = combined.split("\n").find((l) => pattern.test(l)) || combined.trim();
105
+ const isProviderOutage = PROVIDER_OUTAGE_PATTERNS.test(matchedLine);
94
106
  return {
95
107
  isRateLimit: true,
108
+ isProviderOutage,
96
109
  agent,
97
110
  message: matchedLine.trim(),
98
111
  ...parseCooldown(matchedLine)
@@ -100,5 +113,5 @@ export function detectRateLimit({ stderr = "", stdout = "" }) {
100
113
  }
101
114
  }
102
115
 
103
- return { isRateLimit: false, agent: "", message: "", cooldownUntil: null, cooldownMs: null };
116
+ return { isRateLimit: false, isProviderOutage: false, agent: "", message: "", cooldownUntil: null, cooldownMs: null };
104
117
  }