@virtengine/openfleet 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/.env.example +914 -0
  2. package/LICENSE +190 -0
  3. package/README.md +500 -0
  4. package/agent-endpoint.mjs +918 -0
  5. package/agent-hook-bridge.mjs +230 -0
  6. package/agent-hooks.mjs +1188 -0
  7. package/agent-pool.mjs +2403 -0
  8. package/agent-prompts.mjs +689 -0
  9. package/agent-sdk.mjs +141 -0
  10. package/anomaly-detector.mjs +1195 -0
  11. package/autofix.mjs +1294 -0
  12. package/claude-shell.mjs +708 -0
  13. package/cli.mjs +906 -0
  14. package/codex-config.mjs +1274 -0
  15. package/codex-model-profiles.mjs +135 -0
  16. package/codex-shell.mjs +762 -0
  17. package/config-doctor.mjs +613 -0
  18. package/config.mjs +1720 -0
  19. package/conflict-resolver.mjs +248 -0
  20. package/container-runner.mjs +450 -0
  21. package/copilot-shell.mjs +827 -0
  22. package/daemon-restart-policy.mjs +56 -0
  23. package/diff-stats.mjs +282 -0
  24. package/error-detector.mjs +829 -0
  25. package/fetch-runtime.mjs +34 -0
  26. package/fleet-coordinator.mjs +838 -0
  27. package/get-telegram-chat-id.mjs +71 -0
  28. package/git-safety.mjs +170 -0
  29. package/github-reconciler.mjs +403 -0
  30. package/hook-profiles.mjs +651 -0
  31. package/kanban-adapter.mjs +4491 -0
  32. package/lib/logger.mjs +645 -0
  33. package/maintenance.mjs +828 -0
  34. package/merge-strategy.mjs +1171 -0
  35. package/monitor.mjs +12207 -0
  36. package/openfleet.config.example.json +115 -0
  37. package/openfleet.schema.json +465 -0
  38. package/package.json +203 -0
  39. package/postinstall.mjs +187 -0
  40. package/pr-cleanup-daemon.mjs +978 -0
  41. package/preflight.mjs +408 -0
  42. package/prepublish-check.mjs +90 -0
  43. package/presence.mjs +328 -0
  44. package/primary-agent.mjs +282 -0
  45. package/publish.mjs +151 -0
  46. package/repo-root.mjs +29 -0
  47. package/restart-controller.mjs +100 -0
  48. package/review-agent.mjs +557 -0
  49. package/rotate-agent-logs.sh +133 -0
  50. package/sdk-conflict-resolver.mjs +973 -0
  51. package/session-tracker.mjs +880 -0
  52. package/setup.mjs +3937 -0
  53. package/shared-knowledge.mjs +410 -0
  54. package/shared-state-manager.mjs +841 -0
  55. package/shared-workspace-cli.mjs +199 -0
  56. package/shared-workspace-registry.mjs +537 -0
  57. package/shared-workspaces.json +18 -0
  58. package/startup-service.mjs +1070 -0
  59. package/sync-engine.mjs +1063 -0
  60. package/task-archiver.mjs +801 -0
  61. package/task-assessment.mjs +550 -0
  62. package/task-claims.mjs +924 -0
  63. package/task-complexity.mjs +581 -0
  64. package/task-executor.mjs +5111 -0
  65. package/task-store.mjs +753 -0
  66. package/telegram-bot.mjs +9281 -0
  67. package/telegram-sentinel.mjs +2010 -0
  68. package/ui/app.js +867 -0
  69. package/ui/app.legacy.js +1464 -0
  70. package/ui/app.monolith.js +2488 -0
  71. package/ui/components/charts.js +226 -0
  72. package/ui/components/chat-view.js +567 -0
  73. package/ui/components/command-palette.js +587 -0
  74. package/ui/components/diff-viewer.js +190 -0
  75. package/ui/components/forms.js +327 -0
  76. package/ui/components/kanban-board.js +451 -0
  77. package/ui/components/session-list.js +305 -0
  78. package/ui/components/shared.js +473 -0
  79. package/ui/index.html +70 -0
  80. package/ui/modules/api.js +297 -0
  81. package/ui/modules/icons.js +461 -0
  82. package/ui/modules/router.js +81 -0
  83. package/ui/modules/settings-schema.js +261 -0
  84. package/ui/modules/state.js +679 -0
  85. package/ui/modules/telegram.js +331 -0
  86. package/ui/modules/utils.js +270 -0
  87. package/ui/styles/animations.css +140 -0
  88. package/ui/styles/base.css +98 -0
  89. package/ui/styles/components.css +1915 -0
  90. package/ui/styles/kanban.css +286 -0
  91. package/ui/styles/layout.css +809 -0
  92. package/ui/styles/sessions.css +827 -0
  93. package/ui/styles/variables.css +188 -0
  94. package/ui/styles.css +141 -0
  95. package/ui/styles.monolith.css +1046 -0
  96. package/ui/tabs/agents.js +1417 -0
  97. package/ui/tabs/chat.js +74 -0
  98. package/ui/tabs/control.js +887 -0
  99. package/ui/tabs/dashboard.js +515 -0
  100. package/ui/tabs/infra.js +537 -0
  101. package/ui/tabs/logs.js +783 -0
  102. package/ui/tabs/settings.js +1487 -0
  103. package/ui/tabs/tasks.js +1385 -0
  104. package/ui-server.mjs +4073 -0
  105. package/update-check.mjs +465 -0
  106. package/utils.mjs +172 -0
  107. package/ve-kanban.mjs +654 -0
  108. package/ve-kanban.ps1 +1365 -0
  109. package/ve-kanban.sh +18 -0
  110. package/ve-orchestrator.mjs +340 -0
  111. package/ve-orchestrator.ps1 +6546 -0
  112. package/ve-orchestrator.sh +18 -0
  113. package/vibe-kanban-wrapper.mjs +41 -0
  114. package/vk-error-resolver.mjs +470 -0
  115. package/vk-log-stream.mjs +914 -0
  116. package/whatsapp-channel.mjs +520 -0
  117. package/workspace-monitor.mjs +581 -0
  118. package/workspace-reaper.mjs +405 -0
  119. package/workspace-registry.mjs +238 -0
  120. package/worktree-manager.mjs +1266 -0
package/autofix.mjs ADDED
@@ -0,0 +1,1294 @@
1
+ /**
2
+ * autofix.mjs — Self-healing engine for openfleet.
3
+ *
4
+ * Two operating modes determined by `isDevMode()`:
5
+ *
6
+ * DEV MODE (running from source repo):
7
+ * - Actually applies fixes via `codex exec --full-auto`
8
+ * - Writes changes to disk, file watcher restarts orchestrator
9
+ *
10
+ * NPM MODE (installed as npm package):
11
+ * - Analysis-only: diagnoses the issue and suggests fixes
12
+ * - Sends suggestions to Telegram / logs — never modifies files
13
+ * - User must apply suggested fixes manually
14
+ *
15
+ * Safety guardrails:
16
+ * - Max 3 attempts per unique error signature
17
+ * - 5-minute cooldown between fix attempts (prevents rapid crash loops)
18
+ * - Tracks all attempts for audit (autofix-*.log in log dir)
19
+ * - Won't retry the same error more than 3 times (gives up → Telegram alert)
20
+ * - Timeout guard on codex exec (30 min default, lets the agent finish its work)
21
+ *
22
+ * Error formats handled:
23
+ * - Standard PS errors: ErrorType: filepath:line:col
24
+ * - ParserError format: ParserError: filepath:line (no column)
25
+ * - Method invocation errors
26
+ * - Generic PS error blocks with "Line |" markers
27
+ * - Raw log fallback: when no structured errors found, feeds raw tail to Codex
28
+ */
29
+
30
+ import { spawn, execSync } from "node:child_process";
31
+ import { existsSync, mkdirSync, createWriteStream } from "node:fs";
32
+ import { readFile, writeFile } from "node:fs/promises";
33
+ import { resolve, dirname } from "node:path";
34
+ import { fileURLToPath } from "node:url";
35
+ import { getConsoleLevel, LogLevel } from "./lib/logger.mjs";
36
+ import { isBenignErrorMention } from "./utils.mjs";
37
+ import { resolvePromptTemplate } from "./agent-prompts.mjs";
38
+ import { resolveCodexProfileRuntime } from "./codex-model-profiles.mjs";
39
+
40
+ const __dirname = dirname(fileURLToPath(import.meta.url));
41
+
42
+ // ── Dev mode detection ──────────────────────────────────────────────────────
43
+
44
+ /**
45
+ * Detect whether openfleet is running from its source repo (dev mode)
46
+ * or from an npm install (npm mode).
47
+ *
48
+ * Dev mode indicators:
49
+ * - Running from a path that contains the source repo structure
50
+ * - The parent directory has go.mod, Makefile, etc. (monorepo root)
51
+ * - AUTOFIX_MODE env var is set to "execute" (explicit override)
52
+ *
53
+ * npm mode indicators:
54
+ * - Running from node_modules/
55
+ * - No monorepo markers in parent directories
56
+ * - AUTOFIX_MODE env var is set to "analyze" (explicit override)
57
+ */
58
+ // ── Error extraction ────────────────────────────────────────────────────────
59
+
60
+ /**
61
+ * Extract structured PowerShell errors from crash log text.
62
+ * Uses a line-by-line parser for robustness (regex-only approaches break
63
+ * on missing trailing newlines and backtracking edge cases).
64
+ *
65
+ * Handles: ParserError, RuntimeException, MethodInvocationException,
66
+ * SetValueInvocationException, At-line stack traces, TerminatingError, etc.
67
+ *
68
+ * Returns [{ errorType, file, line, column?, message, signature, codeLine? }]
69
+ */
70
+ export function extractErrors(logText) {
71
+ const errors = [];
72
+ const seen = new Set();
73
+
74
+ function addError(err) {
75
+ if (err && err.file && err.line && !seen.has(err.signature)) {
76
+ seen.add(err.signature);
77
+ errors.push(err);
78
+ }
79
+ }
80
+
81
+ const lines = logText.split(/\r?\n/);
82
+
83
+ // ── Line-by-line parser ───────────────────────────────────────────────
84
+
85
+ // Pattern A: "ErrorType: filepath:line:col" or "ErrorType: filepath:line"
86
+ // Followed by "Line |" block
87
+ const errorHeaderWithCol =
88
+ /^(\w[\w.-]+):\s+([A-Za-z]:\\[^\n:]+\.ps1):(\d+):(\d+)\s*$/;
89
+ const errorHeaderNoCol =
90
+ /^(\w[\w.-]*Error):\s+([A-Za-z]:\\[^\n:]+\.ps1):(\d+)\s*$/;
91
+
92
+ // Pattern B: "At filepath:line char:col"
93
+ const atLineHeader = /^At\s+([A-Za-z]:\\[^\n:]+\.ps1):(\d+)\s+char:(\d+)/;
94
+
95
+ // Pattern C: TerminatingError(X): "message"
96
+ const terminatingPattern = /TerminatingError\(([^)]+)\):\s*"(.+?)"/;
97
+
98
+ for (let i = 0; i < lines.length; i++) {
99
+ const line = lines[i];
100
+
101
+ // ── Check Pattern A (with column) ───────────────────────────────
102
+ let matchA = line.match(errorHeaderWithCol);
103
+ if (matchA) {
104
+ const parsed = parseLineBlock(lines, i + 1);
105
+ addError({
106
+ errorType: matchA[1],
107
+ file: matchA[2],
108
+ line: Number(matchA[3]),
109
+ column: Number(matchA[4]),
110
+ codeLine: parsed.codeLine,
111
+ message: parsed.message,
112
+ signature: `${matchA[2]}:${matchA[3]}:${matchA[1]}`,
113
+ });
114
+ continue;
115
+ }
116
+
117
+ // ── Check Pattern A (no column — ParserError, etc.) ─────────────
118
+ let matchB = line.match(errorHeaderNoCol);
119
+ if (matchB) {
120
+ const parsed = parseLineBlock(lines, i + 1);
121
+ addError({
122
+ errorType: matchB[1],
123
+ file: matchB[2],
124
+ line: Number(matchB[3]),
125
+ column: null,
126
+ codeLine: parsed.codeLine,
127
+ message: parsed.message,
128
+ signature: `${matchB[2]}:${matchB[3]}:${matchB[1]}`,
129
+ });
130
+ continue;
131
+ }
132
+
133
+ // ── Check Pattern B (At line) ───────────────────────────────────
134
+ let matchC = line.match(atLineHeader);
135
+ if (matchC) {
136
+ const parsed = parsePlusBlock(lines, i + 1);
137
+ addError({
138
+ errorType: parsed.errorType || "RuntimeException",
139
+ file: matchC[1],
140
+ line: Number(matchC[2]),
141
+ column: Number(matchC[3]),
142
+ codeLine: parsed.codeLine,
143
+ message: parsed.message,
144
+ signature: `${matchC[1]}:${matchC[2]}:${parsed.errorType || "RuntimeException"}`,
145
+ });
146
+ continue;
147
+ }
148
+
149
+ // ── Check Pattern C (TerminatingError) ──────────────────────────
150
+ let matchD = line.match(terminatingPattern);
151
+ if (matchD) {
152
+ addError({
153
+ errorType: "TerminatingError",
154
+ file: "unknown",
155
+ line: 0,
156
+ column: null,
157
+ message: `${matchD[1]}: ${matchD[2].trim()}`,
158
+ signature: `TerminatingError:${matchD[1]}`,
159
+ });
160
+ }
161
+ }
162
+
163
+ return errors;
164
+ }
165
+
166
+ /**
167
+ * Parse a "Line |" block starting at lineIdx.
168
+ * Returns { codeLine, message }
169
+ *
170
+ * Example block:
171
+ * Line |
172
+ * 123 | $code = Something
173
+ * | ~~~~~~
174
+ * | Error message here
175
+ */
176
+ function parseLineBlock(lines, startIdx) {
177
+ let codeLine = "";
178
+ let message = "";
179
+
180
+ let i = startIdx;
181
+
182
+ // Skip "Line |" header
183
+ if (i < lines.length && /^\s*Line\s*\|\s*$/.test(lines[i])) {
184
+ i++;
185
+ }
186
+
187
+ // Capture code line: " NNN | code..."
188
+ if (i < lines.length && /^\s*\d+\s*\|/.test(lines[i])) {
189
+ const codeMatch = lines[i].match(/^\s*\d+\s*\|\s*(.*)$/);
190
+ if (codeMatch) codeLine = codeMatch[1].trim();
191
+ i++;
192
+ }
193
+
194
+ // Skip underline and intermediate "| ..." lines, capture last "| message" line
195
+ let lastPipeMessage = "";
196
+ while (i < lines.length) {
197
+ const pipeMatch = lines[i].match(/^\s*\|\s*(.*)$/);
198
+ if (!pipeMatch) break;
199
+ const content = pipeMatch[1].trim();
200
+ // Skip underline-only lines (~~~~) and empty lines
201
+ if (content && !/^~+$/.test(content)) {
202
+ lastPipeMessage = content;
203
+ }
204
+ i++;
205
+ }
206
+
207
+ message = lastPipeMessage || codeLine;
208
+ return { codeLine, message };
209
+ }
210
+
211
+ /**
212
+ * Parse a "+ " block (from At-line stack traces) starting at lineIdx.
213
+ * Returns { codeLine, message, errorType }
214
+ *
215
+ * Example block:
216
+ * + $result = Something
217
+ * + ~~~~~~~~~~
218
+ * + ErrorType: explanation here
219
+ */
220
+ function parsePlusBlock(lines, startIdx) {
221
+ let codeLine = "";
222
+ let message = "";
223
+ let errorType = "";
224
+
225
+ let i = startIdx;
226
+
227
+ // First "+ " line is usually the code
228
+ if (i < lines.length && /^\s*\+\s*/.test(lines[i])) {
229
+ const codeMatch = lines[i].match(/^\s*\+\s*(.*)$/);
230
+ if (codeMatch) {
231
+ const content = codeMatch[1].trim();
232
+ if (!/^~+$/.test(content)) codeLine = content;
233
+ }
234
+ i++;
235
+ }
236
+
237
+ // Subsequent "+ " lines — skip underlines, capture error type + message
238
+ while (i < lines.length && /^\s*\+\s*/.test(lines[i])) {
239
+ const plusMatch = lines[i].match(/^\s*\+\s*(.*)$/);
240
+ if (plusMatch) {
241
+ const content = plusMatch[1].trim();
242
+ if (/^~+$/.test(content)) {
243
+ i++;
244
+ continue;
245
+ }
246
+ // Check for "ErrorType: message"
247
+ const errMatch = content.match(/^(\w[\w.-]+):\s*(.+)$/);
248
+ if (errMatch) {
249
+ errorType = errMatch[1];
250
+ message = errMatch[2].trim();
251
+ }
252
+ }
253
+ i++;
254
+ }
255
+
256
+ return { codeLine, message, errorType };
257
+ }
258
+
259
+ /**
260
+ * Extract a fallback "generic crash" descriptor when no structured errors found.
261
+ * Pulls the last meaningful lines from the log for Codex to analyze.
262
+ */
263
+ export function extractFallbackContext(logText, reason) {
264
+ // Get the last 80 lines, filter out blanks and timestamp-only lines
265
+ const lines = logText.split(/\r?\n/).filter((l) => l.trim().length > 0);
266
+ const tail = lines.slice(-80).join("\n");
267
+
268
+ // Try to detect the "last error-like" message
269
+ const errorIndicators = [
270
+ /error/i,
271
+ /exception/i,
272
+ /failed/i,
273
+ /cannot /i,
274
+ /unexpected/i,
275
+ /invalid/i,
276
+ /denied/i,
277
+ /terminated/i,
278
+ ];
279
+
280
+ const errorLines = lines
281
+ .slice(-40)
282
+ .filter((l) => errorIndicators.some((re) => re.test(l)))
283
+ .filter((l) => !isBenignErrorMention(l));
284
+
285
+ return {
286
+ tail,
287
+ errorLines: errorLines.slice(-10),
288
+ reason,
289
+ lineCount: lines.length,
290
+ };
291
+ }
292
+
293
+ // ── Source context reader ────────────────────────────────────────────────────
294
+
295
+ /**
296
+ * Read source lines around the error for context.
297
+ * Returns the source excerpt as a string with line numbers.
298
+ */
299
+ async function readSourceContext(filePath, errorLine, contextLines = 30) {
300
+ try {
301
+ const source = await readFile(filePath, "utf8");
302
+ const lines = source.split(/\r?\n/);
303
+ const start = Math.max(0, errorLine - contextLines);
304
+ const end = Math.min(lines.length, errorLine + contextLines);
305
+ return lines
306
+ .slice(start, end)
307
+ .map((line, i) => {
308
+ const lineNum = start + i + 1;
309
+ const marker = lineNum === errorLine ? " >>>" : " ";
310
+ return `${marker}${String(lineNum).padStart(5)} | ${line}`;
311
+ })
312
+ .join("\n");
313
+ } catch {
314
+ return `(could not read ${filePath})`;
315
+ }
316
+ }
317
+
318
+ // ── Fix tracking ────────────────────────────────────────────────────────────
319
+
320
+ // ── Dev mode detection ───────────────────────────────────────────────────────
321
+
322
+ /**
323
+ * Detect whether openfleet is running from its source repo (dev mode)
324
+ * or from an npm install (npm mode).
325
+ *
326
+ * Dev mode: AUTOFIX_MODE=dev/execute, or monorepo markers present
327
+ * npm mode: AUTOFIX_MODE=npm/analyze/suggest, or inside node_modules
328
+ */
329
+ let _devModeCache = null;
330
+
331
+ export function isDevMode() {
332
+ if (_devModeCache !== null) return _devModeCache;
333
+
334
+ const envMode = (process.env.AUTOFIX_MODE || "").toLowerCase();
335
+ if (envMode === "execute" || envMode === "dev") {
336
+ _devModeCache = true;
337
+ return true;
338
+ }
339
+ if (envMode === "analyze" || envMode === "npm" || envMode === "suggest") {
340
+ _devModeCache = false;
341
+ return false;
342
+ }
343
+
344
+ // Check if we're inside node_modules (npm install)
345
+ const normalized = __dirname.replace(/\\/g, "/").toLowerCase();
346
+ if (normalized.includes("/node_modules/")) {
347
+ _devModeCache = false;
348
+ return false;
349
+ }
350
+
351
+ // Check for monorepo markers (source repo)
352
+ const repoRoot = resolve(__dirname, "..", "..");
353
+ const monoRepoMarkers = ["go.mod", "Makefile", "AGENTS.md", "x"];
354
+ const isMonoRepo = monoRepoMarkers.some((m) =>
355
+ existsSync(resolve(repoRoot, m)),
356
+ );
357
+
358
+ _devModeCache = isMonoRepo;
359
+ return isMonoRepo;
360
+ }
361
+
362
+ /** Reset dev mode cache (for testing). */
363
+ export function resetDevModeCache() {
364
+ _devModeCache = null;
365
+ }
366
+
367
+ /** @type {Map<string, {count: number, lastAt: number}>} */
368
+ const fixAttempts = new Map();
369
+ const MAX_FIX_ATTEMPTS = 3;
370
+ // 1 min cooldown prevents rapid-fire crash loop while keeping retry cadence short.
371
+ const FIX_COOLDOWN_MS = 60_000;
372
+
373
+ function canAttemptFix(signature) {
374
+ const record = fixAttempts.get(signature);
375
+ if (!record) return true;
376
+ if (record.count >= MAX_FIX_ATTEMPTS) return false;
377
+ if (Date.now() - record.lastAt < FIX_COOLDOWN_MS) return false;
378
+ return true;
379
+ }
380
+
381
+ function recordFixAttempt(signature) {
382
+ const record = fixAttempts.get(signature) || { count: 0, lastAt: 0 };
383
+ record.count += 1;
384
+ record.lastAt = Date.now();
385
+ fixAttempts.set(signature, record);
386
+ }
387
+
388
+ export function getFixAttemptCount(signature) {
389
+ return fixAttempts.get(signature)?.count || 0;
390
+ }
391
+
392
+ // ── Codex exec runner ───────────────────────────────────────────────────────
393
+
394
+ /**
395
+ * Run `codex exec --full-auto` with a fix prompt.
396
+ * Returns { success, output, logPath } — Codex will write fixes directly to disk.
397
+ *
398
+ * Full Codex SDK streams are logged to logs/codex-sdk/ for debugging.
399
+ *
400
+ * Guards against common crash scenarios:
401
+ * - ENOENT: codex binary not found
402
+ * - Timeout: kills child after timeoutMs
403
+ * - Process spawn errors
404
+ */
405
+ export function runCodexExec(
406
+ prompt,
407
+ cwd,
408
+ timeoutMs = 1_800_000,
409
+ logDir = null,
410
+ ) {
411
+ // Capture path.resolve before the Promise executor shadows it
412
+ const pathResolve = resolve;
413
+ return new Promise((promiseResolve) => {
414
+ // ── Setup Codex SDK log directory ──────────────────────────────────
415
+ const codexLogDir = logDir
416
+ ? pathResolve(logDir, "codex-sdk")
417
+ : pathResolve(__dirname, "logs", "codex-sdk");
418
+
419
+ if (!existsSync(codexLogDir)) {
420
+ mkdirSync(codexLogDir, { recursive: true });
421
+ }
422
+
423
+ const stamp = new Date().toISOString().replace(/[:.]/g, "-");
424
+ const logPath = pathResolve(codexLogDir, `codex-exec-${stamp}.log`);
425
+
426
+ let args;
427
+ try {
428
+ // Pass prompt via stdin (no positional arg) to avoid shell word-splitting
429
+ args = [
430
+ "exec",
431
+ "--full-auto",
432
+ "-a",
433
+ "auto-edit",
434
+ "--sandbox",
435
+ "workspace-write",
436
+ "-C",
437
+ cwd,
438
+ ];
439
+ } catch (err) {
440
+ return promiseResolve({
441
+ success: false,
442
+ output: "",
443
+ error: `Failed to build args: ${err.message}`,
444
+ logPath,
445
+ });
446
+ }
447
+
448
+ let child;
449
+ try {
450
+ // Build a clean env for the codex CLI binary.
451
+ // Auto-detect Azure: if OPENAI_BASE_URL contains .openai.azure.com,
452
+ // configure the CLI for Azure via -c overrides and AZURE_OPENAI_API_KEY.
453
+ // Otherwise strip OPENAI_BASE_URL so the CLI uses its ChatGPT OAuth.
454
+ const { env: codexEnv } = resolveCodexProfileRuntime(process.env);
455
+ const baseUrl = codexEnv.OPENAI_BASE_URL || "";
456
+ const isAzure = baseUrl.includes(".openai.azure.com");
457
+ if (isAzure) {
458
+ // Map OPENAI_API_KEY → AZURE_OPENAI_API_KEY for Azure auth header
459
+ if (codexEnv.OPENAI_API_KEY && !codexEnv.AZURE_OPENAI_API_KEY) {
460
+ codexEnv.AZURE_OPENAI_API_KEY = codexEnv.OPENAI_API_KEY;
461
+ }
462
+ // Inject Azure provider config via -c overrides
463
+ args.push(
464
+ "-c", 'model_provider="azure"',
465
+ "-c", 'model_providers.azure.name="Azure OpenAI"',
466
+ "-c", `model_providers.azure.base_url="${baseUrl}"`,
467
+ "-c", 'model_providers.azure.env_key="AZURE_OPENAI_API_KEY"',
468
+ "-c", 'model_providers.azure.wire_api="responses"',
469
+ );
470
+ // Override model from CODEX_MODEL env var — Azure deployment names
471
+ // often differ from the config.toml model (e.g. "gpt-5.2-codex" vs
472
+ // "gpt-5.3-codex"). The user sets the correct deployment name in .env.
473
+ const azureModel = codexEnv.CODEX_MODEL;
474
+ if (azureModel) {
475
+ args.push("-m", azureModel);
476
+ }
477
+ }
478
+ // Always strip OPENAI_BASE_URL — for Azure we use -c overrides above,
479
+ // for non-Azure the CLI should use its built-in endpoint.
480
+ delete codexEnv.OPENAI_BASE_URL;
481
+ const spawnOptions = {
482
+ cwd,
483
+ stdio: ["pipe", "pipe", "pipe"],
484
+ // Do NOT set spawn timeout — we manage our own setTimeout to avoid
485
+ // Node double-killing the child with SIGTERM before our handler runs.
486
+ env: codexEnv,
487
+ };
488
+ if (process.platform === "win32") {
489
+ // On Windows, avoid spawning via a shell with a concatenated command
490
+ // string. Instead, invoke the binary directly with an argument array
491
+ // just like on POSIX platforms to prevent command injection.
492
+ child = spawn("codex", args, {
493
+ ...spawnOptions,
494
+ shell: false,
495
+ });
496
+ } else {
497
+ child = spawn("codex", args, {
498
+ ...spawnOptions,
499
+ shell: false,
500
+ });
501
+ }
502
+ } catch (err) {
503
+ return promiseResolve({
504
+ success: false,
505
+ output: "",
506
+ error: `spawn failed: ${err.message}`,
507
+ logPath,
508
+ });
509
+ }
510
+
511
+ // Write prompt to stdin then close the stream
512
+ try {
513
+ child.stdin.write(prompt);
514
+ child.stdin.end();
515
+ } catch {
516
+ /* stdin may already be closed */
517
+ }
518
+
519
+ let stdout = "";
520
+ let stderr = "";
521
+ const stream = createWriteStream(logPath, { flags: "w" });
522
+ stream.write(
523
+ [
524
+ `# Codex SDK execution log`,
525
+ `# Timestamp: ${new Date().toISOString()}`,
526
+ `# Working directory: ${cwd}`,
527
+ `# Command: codex ${args.join(" ")}`,
528
+ `# Timeout: ${timeoutMs}ms`,
529
+ ``,
530
+ `## Prompt sent to Codex:`,
531
+ prompt,
532
+ ``,
533
+ `## Codex SDK output stream:`,
534
+ ``,
535
+ ].join("\n"),
536
+ );
537
+
538
+ child.stdout.on("data", (chunk) => {
539
+ const text = chunk.toString();
540
+ stdout += text;
541
+ stream.write(text);
542
+ // Only echo live agent output when --verbose or --trace is used
543
+ if (getConsoleLevel() <= LogLevel.DEBUG) process.stdout.write(text);
544
+ });
545
+ child.stderr.on("data", (chunk) => {
546
+ const text = chunk.toString();
547
+ stderr += text;
548
+ stream.write(`[stderr] ${text}`);
549
+ // Only echo live stderr when --verbose or --trace is used
550
+ if (getConsoleLevel() <= LogLevel.DEBUG) process.stderr.write(text);
551
+ });
552
+
553
+ const timer = setTimeout(() => {
554
+ stream.write(`\n\n## TIMEOUT after ${timeoutMs}ms\n`);
555
+ try {
556
+ child.kill("SIGTERM");
557
+ } catch {
558
+ /* best effort */
559
+ }
560
+ stream.end();
561
+ promiseResolve({
562
+ success: false,
563
+ output: stdout,
564
+ error: "timeout after " + timeoutMs + "ms",
565
+ logPath,
566
+ });
567
+ }, timeoutMs);
568
+
569
+ child.on("error", (err) => {
570
+ clearTimeout(timer);
571
+ stream.write(`\n\n## ERROR: ${err.message}\n`);
572
+ stream.end();
573
+ promiseResolve({
574
+ success: false,
575
+ output: stdout,
576
+ error: err.message,
577
+ logPath,
578
+ });
579
+ });
580
+
581
+ child.on("exit", (code) => {
582
+ clearTimeout(timer);
583
+ stream.write(`\n\n## Exit code: ${code}\n`);
584
+ stream.write(`\n## stderr:\n${stderr}\n`);
585
+ stream.end();
586
+ promiseResolve({
587
+ success: code === 0,
588
+ output: stdout + (stderr ? "\n" + stderr : ""),
589
+ error: code !== 0 ? `exit code ${code}` : null,
590
+ logPath,
591
+ });
592
+ });
593
+ });
594
+ }
595
+
596
+ // ── Main auto-fix function ──────────────────────────────────────────────────
597
+
598
+ /**
599
+ * Detect which files were modified by comparing git status before/after.
600
+ * Returns array of changed file paths.
601
+ */
602
+ function detectChangedFiles(repoRoot) {
603
+ try {
604
+ const output = execSync("git diff --name-only", {
605
+ cwd: repoRoot,
606
+ encoding: "utf8",
607
+ timeout: 10_000,
608
+ });
609
+ return output
610
+ .split(/\r?\n/)
611
+ .map((f) => f.trim())
612
+ .filter(Boolean);
613
+ } catch {
614
+ return [];
615
+ }
616
+ }
617
+
618
+ /**
619
+ * Get git diff summary for changed files (short, for Telegram).
620
+ */
621
+ function getChangeSummary(repoRoot, files) {
622
+ if (!files.length) return "(no file changes detected)";
623
+ try {
624
+ const diff = execSync("git diff --stat", {
625
+ cwd: repoRoot,
626
+ encoding: "utf8",
627
+ timeout: 10_000,
628
+ });
629
+ return diff.trim() || files.join(", ");
630
+ } catch {
631
+ return files.join(", ");
632
+ }
633
+ }
634
+
635
+ /**
636
+ * Attempt to auto-fix errors found in a crash log.
637
+ *
638
+ * In DEV MODE: extracts errors → runs codex exec → applies fixes to disk.
639
+ * In NPM MODE: extracts errors → runs codex exec in read-only → sends
640
+ * suggested fix to Telegram/logs. Never modifies files.
641
+ *
642
+ * @param {object} opts
643
+ * @param {string} opts.logText — tail of the crash log
644
+ * @param {string} opts.reason — crash reason (signal/exit code)
645
+ * @param {string} opts.repoRoot — repository root directory
646
+ * @param {string} opts.logDir — directory for fix audit logs
647
+ * @param {function} [opts.onTelegram] — optional callback to send Telegram message
648
+ * @param {string[]} [opts.recentMessages] — recent Telegram messages for context
649
+ * @param {object} [opts.promptTemplates] — optional prompt template overrides
650
+ * @returns {Promise<{fixed: boolean, errors: object[], skipped: string[], outcome: string}>}
651
+ */
652
+ export async function attemptAutoFix(opts) {
653
+ const {
654
+ logText,
655
+ reason,
656
+ repoRoot,
657
+ logDir,
658
+ onTelegram,
659
+ recentMessages,
660
+ promptTemplates = {},
661
+ } = opts;
662
+
663
+ const errors = extractErrors(logText);
664
+
665
+ // ── Fallback: no structured errors → feed raw log to Codex ────────────
666
+ if (errors.length === 0) {
667
+ console.log(
668
+ "[autofix] no structured errors found — trying raw log fallback",
669
+ );
670
+
671
+ const fallback = extractFallbackContext(logText, reason);
672
+
673
+ // Don't attempt fallback on empty logs or clean exits
674
+ if (
675
+ fallback.lineCount < 3 &&
676
+ !fallback.errorLines.length &&
677
+ reason === "exit 0"
678
+ ) {
679
+ console.log("[autofix] clean exit with minimal log — skipping fallback");
680
+ return {
681
+ fixed: false,
682
+ errors: [],
683
+ skipped: [],
684
+ outcome: "clean-exit-skip",
685
+ };
686
+ }
687
+
688
+ const fallbackSig = `raw-fallback:${reason}`;
689
+ if (!canAttemptFix(fallbackSig)) {
690
+ const count = getFixAttemptCount(fallbackSig);
691
+ console.warn(
692
+ `[autofix] raw fallback exhausted (${count}/${MAX_FIX_ATTEMPTS})`,
693
+ );
694
+ if (onTelegram) {
695
+ onTelegram(
696
+ `🔧 Auto-fix gave up on raw crash (${reason}) after ${MAX_FIX_ATTEMPTS} attempts.\nManual intervention required.`,
697
+ );
698
+ }
699
+ return {
700
+ fixed: false,
701
+ errors: [],
702
+ skipped: [fallbackSig],
703
+ outcome: "fallback-exhausted",
704
+ };
705
+ }
706
+
707
+ recordFixAttempt(fallbackSig);
708
+ const attemptNum = getFixAttemptCount(fallbackSig);
709
+ const devMode = isDevMode();
710
+ const modeLabel = devMode ? "execute" : "analyze-only";
711
+
712
+ if (onTelegram) {
713
+ onTelegram(
714
+ `🔧 Auto-fix starting [${modeLabel}] (raw fallback, attempt #${attemptNum}):\nCrash: ${reason}\nError indicators: ${fallback.errorLines.length} suspicious lines`,
715
+ );
716
+ }
717
+
718
+ const prompt = buildFallbackPrompt(
719
+ fallback,
720
+ recentMessages,
721
+ promptTemplates.autofixFallback,
722
+ );
723
+
724
+ // Audit log
725
+ const stamp = new Date().toISOString().replace(/[:.]/g, "-");
726
+ const auditPath = resolve(
727
+ logDir,
728
+ `autofix-fallback-${stamp}-attempt${attemptNum}.log`,
729
+ );
730
+ await writeFile(
731
+ auditPath,
732
+ [
733
+ `# Auto-fix FALLBACK attempt #${attemptNum} [${modeLabel}]`,
734
+ `# Reason: ${reason}`,
735
+ `# Error lines found: ${fallback.errorLines.length}`,
736
+ `# Timestamp: ${new Date().toISOString()}`,
737
+ "",
738
+ "## Prompt sent to Codex:",
739
+ prompt,
740
+ "",
741
+ ].join("\n"),
742
+ "utf8",
743
+ );
744
+
745
+ // ── NPM mode: analyze only, suggest fix to user ──────────────────
746
+ if (!devMode) {
747
+ console.log("[autofix] npm mode — skipping execution, sending analysis");
748
+
749
+ const suggestion =
750
+ `📋 *Auto-fix analysis* (raw fallback, attempt #${attemptNum}):\n` +
751
+ `Crash: ${reason}\n\n` +
752
+ `**Error indicators found:**\n` +
753
+ (fallback.errorLines.length > 0
754
+ ? fallback.errorLines
755
+ .slice(0, 10)
756
+ .map((l) => `• ${l}`)
757
+ .join("\n")
758
+ : "(no explicit error lines — possible SIGKILL/OOM)") +
759
+ `\n\n**Suggested action:** Review the error indicators above. ` +
760
+ `The main orchestrator script is \`scripts/openfleet/ve-orchestrator.ps1\`. ` +
761
+ `Check for PowerShell syntax errors, null references, or infinite retry loops.`;
762
+
763
+ await writeFile(
764
+ auditPath,
765
+ [
766
+ "",
767
+ `## Mode: ANALYZE-ONLY (npm mode)`,
768
+ `## Suggestion sent to user (no files modified)`,
769
+ suggestion,
770
+ ].join("\n"),
771
+ { flag: "a" },
772
+ );
773
+
774
+ if (onTelegram) onTelegram(suggestion);
775
+
776
+ return {
777
+ fixed: false,
778
+ errors: [],
779
+ skipped: [],
780
+ outcome: suggestion,
781
+ };
782
+ }
783
+
784
+ // ── DEV mode: execute fix via Codex ──────────────────────────────
785
+ const filesBefore = detectChangedFiles(repoRoot);
786
+ const result = await runCodexExec(prompt, repoRoot, 1_800_000, logDir);
787
+ const filesAfter = detectChangedFiles(repoRoot);
788
+
789
+ // Detect new changes
790
+ const newChanges = filesAfter.filter((f) => !filesBefore.includes(f));
791
+ const changeSummary = getChangeSummary(repoRoot, newChanges);
792
+
793
+ await writeFile(
794
+ auditPath,
795
+ [
796
+ "",
797
+ `## Mode: EXECUTE (dev mode)`,
798
+ `## Codex SDK full log: ${result.logPath || "N/A"}`,
799
+ `## Codex result (success=${result.success}):`,
800
+ result.output || "(no output)",
801
+ result.error ? `## Error: ${result.error}` : "",
802
+ `## Files changed: ${newChanges.join(", ") || "none"}`,
803
+ ].join("\n"),
804
+ { flag: "a" },
805
+ );
806
+
807
+ if (result.success && newChanges.length > 0) {
808
+ const outcomeMsg =
809
+ `🔧 Auto-fix applied (raw fallback, attempt #${attemptNum}):\n` +
810
+ `Crash: ${reason}\n` +
811
+ `Changes:\n${changeSummary}\n` +
812
+ `Codex SDK log: ${result.logPath}`;
813
+ console.log(`[autofix] fallback fix applied: ${newChanges.join(", ")}`);
814
+ console.log(`[autofix] Codex SDK full log: ${result.logPath}`);
815
+ if (onTelegram) onTelegram(outcomeMsg);
816
+ return {
817
+ fixed: true,
818
+ errors: [],
819
+ skipped: [],
820
+ outcome: outcomeMsg,
821
+ };
822
+ } else {
823
+ const outcomeMsg =
824
+ `🔧 Auto-fix fallback failed (attempt #${attemptNum}):\n` +
825
+ `Crash: ${reason}\n` +
826
+ `Codex: ${result.error || "no changes written"}\n` +
827
+ `Codex SDK log: ${result.logPath}`;
828
+ console.warn(`[autofix] fallback codex exec failed: ${result.error}`);
829
+ console.log(`[autofix] Codex SDK full log: ${result.logPath}`);
830
+ if (onTelegram) onTelegram(outcomeMsg);
831
+ return {
832
+ fixed: false,
833
+ errors: [],
834
+ skipped: [],
835
+ outcome: outcomeMsg,
836
+ };
837
+ }
838
+ }
839
+
840
+ // ── Structured errors found ───────────────────────────────────────────
841
+ console.log(`[autofix] found ${errors.length} error(s) in crash log`);
842
+
843
+ const devMode = isDevMode();
844
+ const modeLabel = devMode ? "execute" : "analyze-only";
845
+
846
+ if (onTelegram) {
847
+ const errorSummary = errors
848
+ .map((e) => `• ${e.errorType}: ${e.file}:${e.line}`)
849
+ .join("\n");
850
+ onTelegram(
851
+ `🔧 Auto-fix starting [${modeLabel}]:\nFound ${errors.length} error(s):\n${errorSummary}`,
852
+ );
853
+ }
854
+
855
+ const skipped = [];
856
+ let anyFixed = false;
857
+ const outcomes = [];
858
+
859
+ for (const error of errors) {
860
+ // Dedup: skip if we've already tried this fix too many times
861
+ if (!canAttemptFix(error.signature)) {
862
+ const count = getFixAttemptCount(error.signature);
863
+ console.warn(
864
+ `[autofix] skipping ${error.signature} (${count}/${MAX_FIX_ATTEMPTS} attempts exhausted or cooldown)`,
865
+ );
866
+ skipped.push(error.signature);
867
+
868
+ if (count >= MAX_FIX_ATTEMPTS && onTelegram) {
869
+ onTelegram(
870
+ `🔧 Auto-fix gave up on ${error.file}:${error.line} after ${MAX_FIX_ATTEMPTS} attempts.\n` +
871
+ `Error: ${error.message}\nManual intervention required.`,
872
+ );
873
+ }
874
+ continue;
875
+ }
876
+
877
+ recordFixAttempt(error.signature);
878
+ const attemptNum = getFixAttemptCount(error.signature);
879
+
880
+ console.log(
881
+ `[autofix] attempting fix #${attemptNum} [${modeLabel}] for ${error.file}:${error.line} — ${error.errorType}`,
882
+ );
883
+
884
+ // Read source context around the error
885
+ const sourceContext =
886
+ error.file !== "unknown"
887
+ ? await readSourceContext(error.file, error.line)
888
+ : "(file unknown — error extracted from log)";
889
+
890
+ // Build a focused fix prompt
891
+ const prompt = buildFixPrompt(
892
+ error,
893
+ sourceContext,
894
+ reason,
895
+ recentMessages,
896
+ promptTemplates.autofixFix,
897
+ );
898
+
899
+ // Write prompt to audit log
900
+ const stamp = new Date().toISOString().replace(/[:.]/g, "-");
901
+ const auditPath = resolve(
902
+ logDir,
903
+ `autofix-${stamp}-attempt${attemptNum}.log`,
904
+ );
905
+ await writeFile(
906
+ auditPath,
907
+ [
908
+ `# Auto-fix attempt #${attemptNum} [${modeLabel}]`,
909
+ `# Error: ${error.errorType} at ${error.file}:${error.line}`,
910
+ `# Message: ${error.message}`,
911
+ `# Reason: ${reason}`,
912
+ `# Timestamp: ${new Date().toISOString()}`,
913
+ "",
914
+ "## Prompt sent to Codex:",
915
+ prompt,
916
+ "",
917
+ ].join("\n"),
918
+ "utf8",
919
+ );
920
+
921
+ // ── NPM mode: analyze only, suggest fix to user ──────────────────
922
+ if (!devMode) {
923
+ const suggestion =
924
+ `📋 *Auto-fix analysis* (attempt #${attemptNum}):\n` +
925
+ `**${error.errorType}** at \`${error.file}:${error.line}\`\n` +
926
+ `Message: ${error.message}\n` +
927
+ (error.codeLine ? `Failing code: \`${error.codeLine}\`\n` : "") +
928
+ `\n**Source context:**\n\`\`\`\n${sourceContext.slice(0, 800)}\n\`\`\`\n` +
929
+ `\n**Suggested fix:** Check line ${error.line} for the ${error.errorType}. ` +
930
+ `Common causes: null references, array/object type mismatches, ` +
931
+ `missing variable declarations, or scope issues.`;
932
+
933
+ await writeFile(
934
+ auditPath,
935
+ [
936
+ "",
937
+ `## Mode: ANALYZE-ONLY (npm mode)`,
938
+ `## Suggestion sent to user (no files modified)`,
939
+ suggestion,
940
+ ].join("\n"),
941
+ { flag: "a" },
942
+ );
943
+
944
+ outcomes.push(suggestion);
945
+ if (onTelegram) onTelegram(suggestion);
946
+ continue;
947
+ }
948
+
949
+ // ── DEV mode: execute fix via Codex ──────────────────────────────
950
+
951
+ // Snapshot files before
952
+ const filesBefore = detectChangedFiles(repoRoot);
953
+
954
+ // Run Codex
955
+ const result = await runCodexExec(prompt, repoRoot);
956
+
957
+ // Detect what changed
958
+ const filesAfter = detectChangedFiles(repoRoot);
959
+ const newChanges = filesAfter.filter((f) => !filesBefore.includes(f));
960
+ const changeSummary = getChangeSummary(repoRoot, newChanges);
961
+
962
+ // Append result to audit log
963
+ await writeFile(
964
+ auditPath,
965
+ [
966
+ "",
967
+ `## Mode: EXECUTE (dev mode)`,
968
+ `## Codex result (success=${result.success}):`,
969
+ result.output || "(no output)",
970
+ result.error ? `## Error: ${result.error}` : "",
971
+ `## Files changed: ${newChanges.join(", ") || "none"}`,
972
+ ].join("\n"),
973
+ { flag: "a" },
974
+ );
975
+
976
+ if (result.success) {
977
+ const outcomeMsg =
978
+ `🔧 Auto-fix applied (attempt #${attemptNum}):\n` +
979
+ `${error.errorType} at ${error.file}:${error.line}\n` +
980
+ `"${error.message}"\n` +
981
+ `Changes:\n${changeSummary}`;
982
+
983
+ console.log(
984
+ `[autofix] fix applied for ${error.file}:${error.line} — file watcher will restart orchestrator`,
985
+ );
986
+ anyFixed = true;
987
+ outcomes.push(outcomeMsg);
988
+
989
+ if (onTelegram) onTelegram(outcomeMsg);
990
+ } else {
991
+ const outcomeMsg =
992
+ `🔧 Auto-fix failed (attempt #${attemptNum}):\n` +
993
+ `${error.errorType} at ${error.file}:${error.line}\n` +
994
+ `Codex: ${result.error || "no changes written"}`;
995
+
996
+ console.warn(
997
+ `[autofix] codex exec failed: ${result.error || "unknown error"}`,
998
+ );
999
+ outcomes.push(outcomeMsg);
1000
+
1001
+ if (onTelegram) onTelegram(outcomeMsg);
1002
+ }
1003
+ }
1004
+
1005
+ return {
1006
+ fixed: anyFixed,
1007
+ errors,
1008
+ skipped,
1009
+ outcome: outcomes.join("\n---\n"),
1010
+ };
1011
+ }
1012
+
1013
+ // ── Prompt builders ─────────────────────────────────────────────────────────
1014
+
1015
+ function buildRecentMessagesContext(recentMessages) {
1016
+ if (!recentMessages || !recentMessages.length) return "";
1017
+ const msgs = recentMessages.slice(-15);
1018
+ return `
1019
+ ## Recent monitor notifications (for context — shows what led to this crash)
1020
+ ${msgs.map((m, i) => `[${i + 1}] ${m}`).join("\n")}
1021
+ `;
1022
+ }
1023
+
1024
+ function buildFixPrompt(
1025
+ error,
1026
+ sourceContext,
1027
+ reason,
1028
+ recentMessages,
1029
+ promptTemplate = "",
1030
+ ) {
1031
+ const messagesCtx = buildRecentMessagesContext(recentMessages);
1032
+
1033
+ const fallback = `You are a PowerShell expert fixing a crash in a running orchestrator script.
1034
+
1035
+ ## Error
1036
+ Type: ${error.errorType}
1037
+ File: ${error.file}
1038
+ Line: ${error.line}${error.column ? `\nColumn: ${error.column}` : ""}
1039
+ Message: ${error.message}${error.codeLine ? `\nFailing code: ${error.codeLine}` : ""}
1040
+ Crash reason: ${reason}
1041
+
1042
+ ## Source context around line ${error.line}
1043
+ \`\`\`powershell
1044
+ ${sourceContext}
1045
+ \`\`\`
1046
+ ${messagesCtx}
1047
+ ## Instructions
1048
+ 1. Read the file "${error.file}"
1049
+ 2. Identify the root cause of the error at line ${error.line}
1050
+ 3. Fix ONLY the bug — minimal change, don't refactor unrelated code
1051
+ 4. Common PowerShell pitfalls:
1052
+ - \`+=\` on arrays with single items fails — use [List[object]] or @() wrapping
1053
+ - \`$a + $b\` on PSObjects fails — iterate and add individually
1054
+ - Pipeline output can be a single object, not an array — always wrap with @()
1055
+ - \`$null.Method()\` crashes — add null guards
1056
+ - Named mutex with "Global\\\\" prefix fails on non-elevated Windows — use plain names
1057
+ - \`$Var:\` is treated as a scope-qualified variable — use \`\${Var}:\` to embed colon in string
1058
+ - ParserError: check for syntax issues like unclosed brackets, bad string interpolation
1059
+ 5. Write the fix to the file. Do NOT create new files or refactor other functions.
1060
+ 6. Keep all existing functionality intact.`;
1061
+ return resolvePromptTemplate(
1062
+ promptTemplate,
1063
+ {
1064
+ ERROR_TYPE: error.errorType,
1065
+ ERROR_FILE: error.file,
1066
+ ERROR_LINE: error.line,
1067
+ ERROR_COLUMN_LINE: error.column ? `Column: ${error.column}` : "",
1068
+ ERROR_MESSAGE: error.message,
1069
+ ERROR_CODE_LINE: error.codeLine ? `Failing code: ${error.codeLine}` : "",
1070
+ CRASH_REASON: reason,
1071
+ SOURCE_CONTEXT: sourceContext,
1072
+ RECENT_MESSAGES_CONTEXT: messagesCtx,
1073
+ },
1074
+ fallback,
1075
+ );
1076
+ }
1077
+
1078
+ function buildFallbackPrompt(fallback, recentMessages, promptTemplate = "") {
1079
+ const messagesCtx = buildRecentMessagesContext(recentMessages);
1080
+
1081
+ const defaultPrompt = `You are a PowerShell expert analyzing an orchestrator script crash.
1082
+ No structured error was extracted — the process terminated with: ${fallback.reason}
1083
+
1084
+ ## Error indicators from log tail
1085
+ ${fallback.errorLines.length > 0 ? fallback.errorLines.join("\n") : "(no explicit error lines detected — possible SIGKILL, OOM, or silent crash)"}
1086
+
1087
+ ## Last ${Math.min(80, fallback.lineCount)} lines of crash log
1088
+ \`\`\`
1089
+ ${fallback.tail}
1090
+ \`\`\`
1091
+ ${messagesCtx}
1092
+ ## Instructions
1093
+ 1. Analyze the log for the root cause of the crash
1094
+ 2. The main orchestrator script is: scripts/openfleet/ve-orchestrator.ps1
1095
+ 3. If you can identify a fixable bug, apply a minimal fix to the file
1096
+ 4. Common crash causes:
1097
+ - PowerShell syntax errors (\$Var: treated as scope, missing brackets)
1098
+ - Array/object operation errors (+=, +, pipeline single-item issues)
1099
+ - Null reference errors on optional API responses
1100
+ - Infinite loops or stack overflow from recursive calls
1101
+ - Exit code 4294967295 = unsigned overflow from uncaught exception
1102
+ 5. If the crash is external (SIGKILL, OOM) with no code bug, do nothing
1103
+ 6. Write any fix directly to the file. Keep existing functionality intact.`;
1104
+ return resolvePromptTemplate(
1105
+ promptTemplate,
1106
+ {
1107
+ FALLBACK_REASON: fallback.reason,
1108
+ FALLBACK_ERROR_LINES:
1109
+ fallback.errorLines.length > 0
1110
+ ? fallback.errorLines.join("\n")
1111
+ : "(no explicit error lines detected — possible SIGKILL, OOM, or silent crash)",
1112
+ FALLBACK_LINE_COUNT: Math.min(80, fallback.lineCount),
1113
+ FALLBACK_TAIL: fallback.tail,
1114
+ RECENT_MESSAGES_CONTEXT: messagesCtx,
1115
+ },
1116
+ defaultPrompt,
1117
+ );
1118
+ }
1119
+
1120
+ // ── Repeating error (loop) fixer ────────────────────────────────────────────
1121
+
1122
+ /**
1123
+ * Fix a looping/repeating error detected while the orchestrator is still running.
1124
+ * Unlike attemptAutoFix (which handles crashes), this runs proactively when the
1125
+ * monitor detects the same error line appearing repeatedly.
1126
+ *
1127
+ * In DEV MODE: applies fix via Codex exec.
1128
+ * In NPM MODE: analyzes and sends fix suggestion to user.
1129
+ *
1130
+ * @param {object} opts
1131
+ * @param {string} opts.errorLine — the repeating error line
1132
+ * @param {number} opts.repeatCount — how many times it has repeated
1133
+ * @param {string} opts.repoRoot — repository root
1134
+ * @param {string} opts.logDir — log directory
1135
+ * @param {function} [opts.onTelegram] — Telegram callback
1136
+ * @param {string[]} [opts.recentMessages] — recent Telegram messages for context
1137
+ * @param {string} [opts.promptTemplate] — optional loop-fix prompt template
1138
+ * @returns {Promise<{fixed: boolean, outcome: string}>}
1139
+ */
1140
+ export async function fixLoopingError(opts) {
1141
+ const {
1142
+ errorLine,
1143
+ repeatCount,
1144
+ repoRoot,
1145
+ logDir,
1146
+ onTelegram,
1147
+ recentMessages,
1148
+ promptTemplate = "",
1149
+ } = opts;
1150
+
1151
+ const signature = `loop:${errorLine.slice(0, 120)}`;
1152
+
1153
+ if (!canAttemptFix(signature)) {
1154
+ const count = getFixAttemptCount(signature);
1155
+ const outcome = `🔁 Loop fix gave up on repeating error after ${count} attempts.\n"${errorLine.slice(0, 200)}"\nManual intervention required.`;
1156
+ console.warn(`[autofix] loop fix exhausted for: ${errorLine.slice(0, 80)}`);
1157
+ if (onTelegram) onTelegram(outcome);
1158
+ return { fixed: false, outcome };
1159
+ }
1160
+
1161
+ recordFixAttempt(signature);
1162
+ const attemptNum = getFixAttemptCount(signature);
1163
+ const devMode = isDevMode();
1164
+ const modeLabel = devMode ? "execute" : "analyze-only";
1165
+
1166
+ if (onTelegram) {
1167
+ onTelegram(
1168
+ `🔁 Repeating error detected [${modeLabel}] (${repeatCount}x, fix attempt #${attemptNum}):\n"${errorLine.slice(0, 200)}"`,
1169
+ );
1170
+ }
1171
+
1172
+ const messagesCtx = buildRecentMessagesContext(recentMessages);
1173
+
1174
+ const defaultPrompt = `You are a PowerShell expert fixing a loop bug in a running orchestrator script.
1175
+
1176
+ ## Problem
1177
+ The following error line is repeating ${repeatCount} times in the orchestrator output,
1178
+ indicating an infinite retry loop that needs to be fixed:
1179
+
1180
+ "${errorLine}"
1181
+
1182
+ ${messagesCtx}
1183
+
1184
+ ## Instructions
1185
+ 1. The main script is: scripts/openfleet/ve-orchestrator.ps1
1186
+ 2. Search for the code that produces this error message
1187
+ 3. Identify why it loops (missing break/continue/return, no state change between iterations, etc.)
1188
+ 4. Fix the loop by adding proper exit conditions, error handling, or state tracking
1189
+ 5. Common loop-causing patterns in this codebase:
1190
+ - \`gh pr create\` failing with "No commits between" but caller retries every cycle
1191
+ - API calls returning the same error repeatedly with no backoff or give-up logic
1192
+ - Status not updated after failure → next cycle tries the same thing
1193
+ - Missing \`continue\` or state change in foreach loops over tracked attempts
1194
+ 6. Apply a minimal fix. Do NOT refactor unrelated code.
1195
+ 7. Write the fix directly to the file.`;
1196
+ const prompt = resolvePromptTemplate(
1197
+ promptTemplate,
1198
+ {
1199
+ REPEAT_COUNT: repeatCount,
1200
+ ERROR_LINE: errorLine,
1201
+ RECENT_MESSAGES_CONTEXT: messagesCtx,
1202
+ },
1203
+ defaultPrompt,
1204
+ );
1205
+
1206
+ // Audit log
1207
+ const stamp = new Date().toISOString().replace(/[:.]/g, "-");
1208
+ const auditPath = resolve(
1209
+ logDir,
1210
+ `autofix-loop-${stamp}-attempt${attemptNum}.log`,
1211
+ );
1212
+ await writeFile(
1213
+ auditPath,
1214
+ [
1215
+ `# Loop fix attempt #${attemptNum} [${modeLabel}]`,
1216
+ `# Error line: ${errorLine}`,
1217
+ `# Repeat count: ${repeatCount}`,
1218
+ `# Timestamp: ${new Date().toISOString()}`,
1219
+ "",
1220
+ "## Prompt sent to Codex:",
1221
+ prompt,
1222
+ "",
1223
+ ].join("\n"),
1224
+ "utf8",
1225
+ );
1226
+
1227
+ // ── NPM mode: analyze only, suggest fix to user ──────────────────────
1228
+ if (!devMode) {
1229
+ console.log("[autofix] npm mode — loop fix: analysis only");
1230
+
1231
+ const suggestion =
1232
+ `📋 *Loop fix analysis* (attempt #${attemptNum}):\n` +
1233
+ `**Repeating error** (${repeatCount}x):\n` +
1234
+ `\`${errorLine.slice(0, 300)}\`\n\n` +
1235
+ `**Likely cause:** This error is repeating in a loop, likely because:\n` +
1236
+ `• No break/continue/return after the error condition\n` +
1237
+ `• Status not updated after failure → retries the same operation\n` +
1238
+ `• Missing backoff or give-up logic after repeated failures\n\n` +
1239
+ `**Suggested fix:** Check \`scripts/openfleet/ve-orchestrator.ps1\` for the code ` +
1240
+ `that produces this error message and add proper exit conditions.`;
1241
+
1242
+ await writeFile(
1243
+ auditPath,
1244
+ [
1245
+ "",
1246
+ `## Mode: ANALYZE-ONLY (npm mode)`,
1247
+ `## Suggestion sent to user (no files modified)`,
1248
+ suggestion,
1249
+ ].join("\n"),
1250
+ { flag: "a" },
1251
+ );
1252
+
1253
+ if (onTelegram) onTelegram(suggestion);
1254
+ return { fixed: false, outcome: suggestion };
1255
+ }
1256
+
1257
+ // ── DEV mode: execute fix via Codex ────────────────────────────────────
1258
+ const filesBefore = detectChangedFiles(repoRoot);
1259
+ const result = await runCodexExec(prompt, repoRoot);
1260
+ const filesAfter = detectChangedFiles(repoRoot);
1261
+ const newChanges = filesAfter.filter((f) => !filesBefore.includes(f));
1262
+ const changeSummary = getChangeSummary(repoRoot, newChanges);
1263
+
1264
+ await writeFile(
1265
+ auditPath,
1266
+ [
1267
+ "",
1268
+ `## Mode: EXECUTE (dev mode)`,
1269
+ `## Codex result (success=${result.success}):`,
1270
+ result.output || "(no output)",
1271
+ result.error ? `## Error: ${result.error}` : "",
1272
+ `## Files changed: ${newChanges.join(", ") || "none"}`,
1273
+ ].join("\n"),
1274
+ { flag: "a" },
1275
+ );
1276
+
1277
+ if (result.success && newChanges.length > 0) {
1278
+ const outcome =
1279
+ `🔁 Loop fix applied (attempt #${attemptNum}):\n` +
1280
+ `Error: "${errorLine.slice(0, 150)}"\n` +
1281
+ `Changes:\n${changeSummary}`;
1282
+ console.log(`[autofix] loop fix applied: ${newChanges.join(", ")}`);
1283
+ if (onTelegram) onTelegram(outcome);
1284
+ return { fixed: true, outcome };
1285
+ } else {
1286
+ const outcome =
1287
+ `🔁 Loop fix failed (attempt #${attemptNum}):\n` +
1288
+ `Error: "${errorLine.slice(0, 150)}"\n` +
1289
+ `Codex: ${result.error || "no changes written"}`;
1290
+ console.warn(`[autofix] loop fix codex exec failed: ${result.error}`);
1291
+ if (onTelegram) onTelegram(outcome);
1292
+ return { fixed: false, outcome };
1293
+ }
1294
+ }