bosun 0.42.2 → 0.42.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/.env.example +9 -0
  2. package/agent/agent-event-bus.mjs +10 -0
  3. package/agent/agent-supervisor.mjs +20 -0
  4. package/bosun-tui.mjs +107 -105
  5. package/cli.mjs +10 -0
  6. package/config/config.mjs +25 -0
  7. package/config/executor-config.mjs +124 -1
  8. package/infra/container-runner.mjs +565 -1
  9. package/infra/monitor.mjs +18 -0
  10. package/infra/tracing.mjs +544 -240
  11. package/infra/tui-bridge.mjs +13 -1
  12. package/kanban/kanban-adapter.mjs +128 -4
  13. package/lib/repo-map.mjs +114 -3
  14. package/package.json +11 -4
  15. package/server/ui-server.mjs +3 -0
  16. package/task/task-archiver.mjs +18 -6
  17. package/task/task-attachments.mjs +14 -10
  18. package/task/task-cli.mjs +24 -4
  19. package/task/task-executor.mjs +19 -0
  20. package/task/task-store.mjs +194 -37
  21. package/telegram/telegram-bot.mjs +4 -1
  22. package/tui/app.mjs +131 -171
  23. package/tui/components/status-header.mjs +178 -75
  24. package/tui/lib/header-config.mjs +68 -0
  25. package/tui/lib/ws-bridge.mjs +61 -9
  26. package/tui/screens/agents.mjs +127 -0
  27. package/tui/screens/tasks.mjs +1 -48
  28. package/ui/app.js +8 -5
  29. package/ui/components/kanban-board.js +65 -3
  30. package/ui/components/session-list.js +18 -32
  31. package/ui/demo-defaults.js +52 -2
  32. package/ui/modules/session-api.js +100 -0
  33. package/ui/modules/state.js +71 -15
  34. package/ui/tabs/workflows.js +25 -1
  35. package/ui/tui/App.js +298 -0
  36. package/ui/tui/TasksScreen.js +564 -0
  37. package/ui/tui/constants.js +55 -0
  38. package/ui/tui/tasks-screen-helpers.js +301 -0
  39. package/ui/tui/useTasks.js +61 -0
  40. package/ui/tui/useWebSocket.js +166 -0
  41. package/ui/tui/useWorkflows.js +30 -0
  42. package/workflow/workflow-engine.mjs +412 -7
  43. package/workflow/workflow-nodes.mjs +616 -75
  44. package/workflow-templates/agents.mjs +3 -0
  45. package/workflow-templates/planning.mjs +7 -0
  46. package/workflow-templates/sub-workflows.mjs +5 -0
  47. package/workflow-templates/task-execution.mjs +3 -0
  48. package/workspace/command-diagnostics.mjs +1 -1
  49. package/workspace/context-cache.mjs +182 -9
@@ -18,7 +18,7 @@
18
18
 
19
19
  import { spawn, spawnSync, execSync } from "node:child_process";
20
20
  import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
21
- import { resolve, basename } from "node:path";
21
+ import { resolve, basename, join } from "node:path";
22
22
 
23
23
  // ── Configuration ────────────────────────────────────────────────────────────
24
24
 
@@ -51,7 +51,36 @@ const OUTPUT_END_MARKER = "---CODEXMON_OUTPUT_END---";
51
51
  // ── State ────────────────────────────────────────────────────────────────────
52
52
 
53
53
  const activeContainers = new Map(); // containerName → { proc, startTime, taskId }
54
+ const isolatedRunnerPoolEnabled = !["0", "false", "no", "off"].includes(
55
+ String(process.env.HEAVY_RUNNER_POOL_ENABLED || "1").toLowerCase(),
56
+ );
57
+ const isolatedRunnerProvider =
58
+ String(process.env.HEAVY_RUNNER_PROVIDER || "process").trim().toLowerCase() || "process";
59
+ const isolatedRunnerLeaseTimeoutMs = Math.max(
60
+ 1000,
61
+ parseInt(process.env.HEAVY_RUNNER_LEASE_TIMEOUT_MS || "30000", 10),
62
+ );
63
+ const isolatedRunnerRetryLimit = Math.max(
64
+ 0,
65
+ parseInt(process.env.HEAVY_RUNNER_RETRY_LIMIT || "1", 10),
66
+ );
67
+ const isolatedRunnerRetryDelayMs = Math.max(
68
+ 0,
69
+ parseInt(process.env.HEAVY_RUNNER_RETRY_DELAY_MS || "750", 10),
70
+ );
71
+ const isolatedRunnerMaxConcurrent = Math.max(
72
+ 1,
73
+ parseInt(
74
+ process.env.HEAVY_RUNNER_MAX_CONCURRENT || String(maxConcurrentContainers),
75
+ 10,
76
+ ),
77
+ );
78
+ const isolatedRunnerArtifactDirName = String(
79
+ process.env.HEAVY_RUNNER_ARTIFACT_DIR || join(".bosun", "artifacts", "isolated-runs"),
80
+ );
81
+ const activeRunnerLeases = new Map(); // leaseId → lease metadata
54
82
  let containerIdCounter = 0;
83
+ let runnerLeaseCounter = 0;
55
84
 
56
85
  function runContainerRuntimeSync(args, options = {}) {
57
86
  const res = spawnSync(containerRuntime, args, {
@@ -72,6 +101,293 @@ function runContainerRuntimeSync(args, options = {}) {
72
101
  return String(res.stdout || "");
73
102
  }
74
103
 
104
+ function waitForLeaseRetry(delayMs) {
105
+ if (!delayMs || delayMs <= 0) return Promise.resolve();
106
+ return new Promise((resolvePromise) => setTimeout(resolvePromise, delayMs));
107
+ }
108
+
109
+ export function formatArtifactRetrieveCommand(filePath, platform = process.platform) {
110
+ const normalizedPath = String(filePath || "");
111
+ if (platform === "win32") {
112
+ return `Get-Content -Raw "${normalizedPath.replace(/"/g, '""')}"`;
113
+ }
114
+ return `cat '${normalizedPath.replace(/'/g, `'"'"'`)}'`;
115
+ }
116
+
117
+ function buildIsolatedArtifactRoot(cwd) {
118
+ return resolve(cwd || process.cwd(), isolatedRunnerArtifactDirName);
119
+ }
120
+
121
+ function persistIsolatedRunArtifacts({
122
+ cwd,
123
+ leaseId,
124
+ stdout = "",
125
+ stderr = "",
126
+ metadata = {},
127
+ extraArtifacts = [],
128
+ } = {}) {
129
+ const artifactRoot = resolve(
130
+ buildIsolatedArtifactRoot(cwd),
131
+ leaseId || `lease-${Date.now()}`,
132
+ );
133
+ mkdirSync(artifactRoot, { recursive: true });
134
+
135
+ const stdoutPath = resolve(artifactRoot, "stdout.log");
136
+ const stderrPath = resolve(artifactRoot, "stderr.log");
137
+ const metadataPath = resolve(artifactRoot, "metadata.json");
138
+ writeFileSync(stdoutPath, String(stdout || ""), "utf8");
139
+ writeFileSync(stderrPath, String(stderr || ""), "utf8");
140
+ writeFileSync(metadataPath, JSON.stringify(metadata || {}, null, 2), "utf8");
141
+
142
+ const artifacts = [
143
+ {
144
+ label: "stdout",
145
+ kind: "log",
146
+ path: stdoutPath,
147
+ retrieveCommand: formatArtifactRetrieveCommand(stdoutPath),
148
+ },
149
+ {
150
+ label: "stderr",
151
+ kind: "log",
152
+ path: stderrPath,
153
+ retrieveCommand: formatArtifactRetrieveCommand(stderrPath),
154
+ },
155
+ {
156
+ label: "metadata",
157
+ kind: "json",
158
+ path: metadataPath,
159
+ retrieveCommand: formatArtifactRetrieveCommand(metadataPath),
160
+ },
161
+ ];
162
+
163
+ for (const entry of Array.isArray(extraArtifacts) ? extraArtifacts : []) {
164
+ if (!entry?.path) continue;
165
+ artifacts.push({
166
+ label: entry.label || basename(entry.path),
167
+ kind: entry.kind || "artifact",
168
+ path: entry.path,
169
+ retrieveCommand:
170
+ entry.retrieveCommand || formatArtifactRetrieveCommand(entry.path),
171
+ });
172
+ }
173
+
174
+ return { artifactRoot, artifacts };
175
+ }
176
+
177
+ function acquireLeaseRecord(options = {}) {
178
+ if (!isolatedRunnerPoolEnabled) {
179
+ return { ok: false, reason: "runner_pool_disabled" };
180
+ }
181
+ if (activeRunnerLeases.size >= isolatedRunnerMaxConcurrent) {
182
+ return {
183
+ ok: false,
184
+ reason: `lease_capacity_reached:${isolatedRunnerMaxConcurrent}`,
185
+ };
186
+ }
187
+
188
+ const leaseId = `runner-${Date.now()}-${++runnerLeaseCounter}`;
189
+ const lease = {
190
+ leaseId,
191
+ taskId: String(options.taskId || "validation"),
192
+ requestType: String(options.requestType || options.commandType || "validation"),
193
+ provider: String(options.provider || isolatedRunnerProvider || "process"),
194
+ cwd: resolve(options.cwd || process.cwd()),
195
+ acquiredAt: Date.now(),
196
+ metadata:
197
+ options.metadata && typeof options.metadata === "object"
198
+ ? { ...options.metadata }
199
+ : {},
200
+ };
201
+ activeRunnerLeases.set(leaseId, lease);
202
+ return { ok: true, lease };
203
+ }
204
+
205
+ function releaseLeaseRecord(leaseOrId, options = {}) {
206
+ const leaseId = typeof leaseOrId === "string" ? leaseOrId : leaseOrId?.leaseId;
207
+ if (!leaseId) return null;
208
+ const lease = activeRunnerLeases.get(leaseId) || null;
209
+ activeRunnerLeases.delete(leaseId);
210
+ if (!lease) return null;
211
+ const releasedAt = Date.now();
212
+ return {
213
+ ...lease,
214
+ releasedAt,
215
+ durationMs: releasedAt - lease.acquiredAt,
216
+ ...options,
217
+ };
218
+ }
219
+
220
+ function quotePosixArg(value) {
221
+ return `'${String(value || "").replace(/'/g, `'\\''`)}'`;
222
+ }
223
+
224
+ function joinCommandArgs(command, args = []) {
225
+ const parts = [
226
+ String(command || "").trim(),
227
+ ...args.map((arg) => quotePosixArg(arg)),
228
+ ].filter(Boolean);
229
+ return parts.join(" ").trim();
230
+ }
231
+
232
+ function resolveRunnerProvider(provider, options = {}) {
233
+ const requested = String(
234
+ provider || options.provider || isolatedRunnerProvider || "process",
235
+ )
236
+ .trim()
237
+ .toLowerCase();
238
+ if (typeof options.execute === "function") return "custom";
239
+ if (requested === "auto") {
240
+ return containerEnabled && checkContainerRuntime().available
241
+ ? "container"
242
+ : "process";
243
+ }
244
+ return requested || "process";
245
+ }
246
+
247
+ function isSandboxFailureText(text) {
248
+ return /(?:sandbox|operation not permitted|permission denied|access is denied|read-only file system|EPERM|EACCES|denied by policy|seccomp)/i.test(
249
+ String(text || ""),
250
+ );
251
+ }
252
+
253
+ function isBootstrapFailureText(text) {
254
+ return /(?:\bENOENT\b|spawn\s+.+\s+ENOENT|not recognized as an internal or external command|is not recognized as a name of a cmdlet|command not found|executable file not found|no such file or directory|cannot find the file|failed to start|startup failure)/i.test(
255
+ String(text || ""),
256
+ );
257
+ }
258
+
259
+ function summarizeFailureDetail(value) {
260
+ const normalized = String(value || "").trim();
261
+ if (!normalized) return "";
262
+ const firstLine = normalized.split(/\r?\n/).find((line) => String(line || "").trim()) || normalized;
263
+ return firstLine.slice(0, 400);
264
+ }
265
+
266
+ function buildIsolatedFailureDiagnostic(result = {}, options = {}) {
267
+ const status = String(result?.status || "unknown").trim().toLowerCase();
268
+ const exitCode = result?.exitCode ?? null;
269
+ const blocked = result?.blocked === true || status === "blocked";
270
+ const errorText = [result?.stderr, result?.error, result?.stdout]
271
+ .map((value) => String(value || "").trim())
272
+ .find(Boolean) || "";
273
+ if (!blocked && status === "success" && Number(exitCode ?? 0) === 0) return null;
274
+
275
+ let category = "command_failure";
276
+ let retryable = false;
277
+ let summary = `Validation command exited with code ${exitCode ?? "unknown"}.`;
278
+
279
+ if (blocked) {
280
+ category = "runner_unavailable";
281
+ retryable = true;
282
+ summary = "Isolated runner was unavailable before the validation command started.";
283
+ } else if (status === "timeout") {
284
+ category = "timeout";
285
+ retryable = true;
286
+ const timeoutMs = Number(options.timeoutMs);
287
+ const timeoutDescription =
288
+ Number.isFinite(timeoutMs) && timeoutMs > 0
289
+ ? `${timeoutMs}ms`
290
+ : "the configured timeout";
291
+ summary = `Validation timed out after ${timeoutDescription}.`;
292
+ } else if (isSandboxFailureText(errorText)) {
293
+ category = "sandbox_error";
294
+ retryable = false;
295
+ summary = "Validation was blocked by sandbox or filesystem restrictions.";
296
+ } else if (isBootstrapFailureText(errorText) || status === "error" && (exitCode == null || Number(exitCode) < 0)) {
297
+ category = "bootstrap_failure";
298
+ retryable = true;
299
+ summary = "Validation could not start cleanly in the isolated runner.";
300
+ }
301
+
302
+ return {
303
+ category,
304
+ retryable,
305
+ summary,
306
+ detail: summarizeFailureDetail(errorText),
307
+ status,
308
+ exitCode,
309
+ blocked,
310
+ provider: options.provider || null,
311
+ command: options.command || null,
312
+ args: Array.isArray(options.args) ? [...options.args] : [],
313
+ attempts: Number(options.attempts || 1),
314
+ durationMs: result?.duration ?? null,
315
+ };
316
+ }
317
+
318
+ async function runIsolatedProcess(options = {}) {
319
+ const {
320
+ command,
321
+ args = [],
322
+ cwd = process.cwd(),
323
+ env = {},
324
+ timeoutMs = containerTimeout,
325
+ onStdout,
326
+ onStderr,
327
+ } = options;
328
+
329
+ return new Promise((resolvePromise) => {
330
+ const useArgv = Array.isArray(args) && args.length > 0;
331
+ const proc = useArgv
332
+ ? spawn(String(command || ""), args.map((arg) => String(arg)), {
333
+ cwd,
334
+ env: { ...process.env, ...env },
335
+ stdio: ["ignore", "pipe", "pipe"],
336
+ windowsHide: true,
337
+ })
338
+ : spawn(String(command || ""), {
339
+ cwd,
340
+ env: { ...process.env, ...env },
341
+ stdio: ["ignore", "pipe", "pipe"],
342
+ windowsHide: true,
343
+ shell: true,
344
+ });
345
+
346
+ const startedAt = Date.now();
347
+ let stdout = "";
348
+ let stderr = "";
349
+ let timedOut = false;
350
+
351
+ proc.stdout.on("data", (data) => {
352
+ const chunk = data.toString();
353
+ stdout += chunk;
354
+ if (typeof onStdout === "function") onStdout(chunk);
355
+ });
356
+ proc.stderr.on("data", (data) => {
357
+ const chunk = data.toString();
358
+ stderr += chunk;
359
+ if (typeof onStderr === "function") onStderr(chunk);
360
+ });
361
+
362
+ const timer = setTimeout(() => {
363
+ timedOut = true;
364
+ proc.kill("SIGKILL");
365
+ }, timeoutMs);
366
+
367
+ proc.on("close", (code) => {
368
+ clearTimeout(timer);
369
+ resolvePromise({
370
+ status: timedOut ? "timeout" : code === 0 ? "success" : "error",
371
+ stdout,
372
+ stderr,
373
+ exitCode: code,
374
+ duration: Date.now() - startedAt,
375
+ });
376
+ });
377
+
378
+ proc.on("error", (error) => {
379
+ clearTimeout(timer);
380
+ resolvePromise({
381
+ status: "error",
382
+ stdout,
383
+ stderr: error?.message || "spawn failed",
384
+ exitCode: -1,
385
+ duration: Date.now() - startedAt,
386
+ });
387
+ });
388
+ });
389
+ }
390
+
75
391
  // ── Public API ───────────────────────────────────────────────────────────────
76
392
 
77
393
  /**
@@ -247,6 +563,8 @@ export async function runInContainer(options) {
247
563
  timeout = containerTimeout,
248
564
  mountOptions = {},
249
565
  onOutput,
566
+ onStdout,
567
+ onStderr,
250
568
  } = options;
251
569
 
252
570
  // Create scratch directory for container writes
@@ -313,6 +631,7 @@ export async function runInContainer(options) {
313
631
  if (stdout.length + chunk.length <= containerMaxOutput) {
314
632
  stdout += chunk;
315
633
  }
634
+ if (typeof onStdout === "function") onStdout(chunk);
316
635
 
317
636
  // Stream-parse for output markers
318
637
  if (onOutput) {
@@ -340,6 +659,7 @@ export async function runInContainer(options) {
340
659
  if (stderr.length + chunk.length <= containerMaxOutput) {
341
660
  stderr += chunk;
342
661
  }
662
+ if (typeof onStderr === "function") onStderr(chunk);
343
663
  });
344
664
 
345
665
  const timer = setTimeout(() => {
@@ -471,3 +791,247 @@ export function cleanupOrphanedContainers() {
471
791
  /* no orphans or runtime not available */
472
792
  }
473
793
  }
794
+
795
+
796
+
797
+
798
+ export function isIsolatedRunnerPoolEnabled() {
799
+ return isolatedRunnerPoolEnabled;
800
+ }
801
+
802
+ export function getIsolatedRunnerPoolStatus() {
803
+ return {
804
+ enabled: isolatedRunnerPoolEnabled,
805
+ provider: isolatedRunnerProvider,
806
+ maxConcurrent: isolatedRunnerMaxConcurrent,
807
+ leaseTimeoutMs: isolatedRunnerLeaseTimeoutMs,
808
+ retryLimit: isolatedRunnerRetryLimit,
809
+ active: activeRunnerLeases.size,
810
+ leases: [...activeRunnerLeases.values()].map((lease) => ({
811
+ leaseId: lease.leaseId,
812
+ taskId: lease.taskId,
813
+ requestType: lease.requestType,
814
+ provider: lease.provider,
815
+ ageMs: Date.now() - lease.acquiredAt,
816
+ })),
817
+ };
818
+ }
819
+
820
+ export function acquireRunnerLease(options = {}) {
821
+ const result = acquireLeaseRecord(options);
822
+ return result.ok ? result.lease : null;
823
+ }
824
+
825
+ export function releaseRunnerLease(leaseOrId, options = {}) {
826
+ return releaseLeaseRecord(leaseOrId, options);
827
+ }
828
+
829
+ export async function runInIsolatedRunner(options = {}) {
830
+ const command = String(options.command || "").trim();
831
+ if (!command) {
832
+ throw new Error("runInIsolatedRunner requires a non-empty command");
833
+ }
834
+
835
+ const maxAttempts = Math.max(
836
+ 1,
837
+ Number(options.maxAttempts ?? isolatedRunnerRetryLimit + 1),
838
+ );
839
+ const provider = resolveRunnerProvider(options.provider, options);
840
+ let lastFailure = null;
841
+
842
+ for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
843
+ const leaseResult = acquireLeaseRecord({
844
+ ...options,
845
+ provider,
846
+ metadata: {
847
+ ...(options.metadata && typeof options.metadata === "object"
848
+ ? options.metadata
849
+ : {}),
850
+ attempt,
851
+ },
852
+ });
853
+
854
+ if (!leaseResult.ok) {
855
+ lastFailure = new Error(leaseResult.reason || "runner_lease_unavailable");
856
+ if (attempt < maxAttempts) {
857
+ await waitForLeaseRetry(isolatedRunnerRetryDelayMs);
858
+ continue;
859
+ }
860
+ const failedLeaseId = `blocked-${Date.now()}-${attempt}`;
861
+ const blockedResult = {
862
+ status: "blocked",
863
+ blocked: true,
864
+ error: lastFailure.message,
865
+ exitCode: null,
866
+ };
867
+ const failureDiagnostic = buildIsolatedFailureDiagnostic(blockedResult, {
868
+ command,
869
+ args: Array.isArray(options.args) ? options.args : [],
870
+ provider,
871
+ attempts: attempt,
872
+ });
873
+ const evidence = persistIsolatedRunArtifacts({
874
+ cwd: options.cwd || process.cwd(),
875
+ leaseId: failedLeaseId,
876
+ metadata: {
877
+ status: "blocked",
878
+ reason: lastFailure.message,
879
+ attempts: attempt,
880
+ provider,
881
+ command,
882
+ args: Array.isArray(options.args) ? options.args : [],
883
+ failureDiagnostic,
884
+ },
885
+ });
886
+ return {
887
+ ...blockedResult,
888
+ attempts: attempt,
889
+ provider,
890
+ leaseId: failedLeaseId,
891
+ artifactRoot: evidence.artifactRoot,
892
+ artifacts: evidence.artifacts,
893
+ failureDiagnostic,
894
+ };
895
+ }
896
+
897
+ const lease = leaseResult.lease;
898
+ const startedAt = Date.now();
899
+ try {
900
+ const timeoutMs = Number(
901
+ options.timeoutMs || options.timeout || containerTimeout,
902
+ );
903
+ let result;
904
+ if (typeof options.execute === "function") {
905
+ result = await options.execute({
906
+ ...options,
907
+ command,
908
+ cwd: lease.cwd,
909
+ timeoutMs,
910
+ lease,
911
+ provider,
912
+ });
913
+ } else if (provider === "container") {
914
+ result = await runInContainer({
915
+ workspacePath: lease.cwd,
916
+ command: "/bin/sh",
917
+ args: [
918
+ "-lc",
919
+ joinCommandArgs(command, Array.isArray(options.args) ? options.args : []),
920
+ ],
921
+ env: options.env || {},
922
+ taskId: lease.taskId,
923
+ timeout: timeoutMs,
924
+ onStdout: options.onStdout,
925
+ onStderr: options.onStderr,
926
+ });
927
+ } else {
928
+ result = await runIsolatedProcess({
929
+ command,
930
+ args: Array.isArray(options.args) ? options.args : [],
931
+ cwd: lease.cwd,
932
+ env: options.env || {},
933
+ timeoutMs,
934
+ onStdout: options.onStdout,
935
+ onStderr: options.onStderr,
936
+ });
937
+ }
938
+
939
+ const failureDiagnostic = buildIsolatedFailureDiagnostic(result, {
940
+ command,
941
+ args: Array.isArray(options.args) ? options.args : [],
942
+ provider,
943
+ attempts: attempt,
944
+ timeoutMs,
945
+ });
946
+ const releaseInfo = releaseLeaseRecord(lease, {
947
+ status: result?.status || "unknown",
948
+ exitCode: result?.exitCode ?? null,
949
+ });
950
+ const extraArtifacts = [];
951
+ if (result?.scratchDir) {
952
+ extraArtifacts.push({
953
+ label: "scratch",
954
+ kind: "directory",
955
+ path: result.scratchDir,
956
+ });
957
+ }
958
+ const evidence = persistIsolatedRunArtifacts({
959
+ cwd: lease.cwd,
960
+ leaseId: lease.leaseId,
961
+ stdout: result?.stdout || "",
962
+ stderr: result?.stderr || "",
963
+ metadata: {
964
+ lease: releaseInfo,
965
+ provider,
966
+ command,
967
+ args: Array.isArray(options.args) ? options.args : [],
968
+ attempts: attempt,
969
+ durationMs: result?.duration ?? Date.now() - startedAt,
970
+ status: result?.status || "unknown",
971
+ exitCode: result?.exitCode ?? null,
972
+ failureDiagnostic,
973
+ },
974
+ extraArtifacts,
975
+ });
976
+ return {
977
+ ...result,
978
+ attempts: attempt,
979
+ provider,
980
+ isolated: true,
981
+ leaseId: lease.leaseId,
982
+ artifactRoot: evidence.artifactRoot,
983
+ artifacts: evidence.artifacts,
984
+ failureDiagnostic,
985
+ };
986
+ } catch (error) {
987
+ releaseLeaseRecord(lease, {
988
+ status: "error",
989
+ error: error?.message || String(error),
990
+ });
991
+ lastFailure = error;
992
+ if (attempt < maxAttempts) {
993
+ await waitForLeaseRetry(isolatedRunnerRetryDelayMs);
994
+ continue;
995
+ }
996
+ const failedLeaseId = lease.leaseId || `failed-${Date.now()}-${attempt}`;
997
+ const blockedResult = {
998
+ status: "blocked",
999
+ blocked: true,
1000
+ error: error?.message || String(error),
1001
+ exitCode: null,
1002
+ };
1003
+ const failureDiagnostic = buildIsolatedFailureDiagnostic(blockedResult, {
1004
+ command,
1005
+ args: Array.isArray(options.args) ? options.args : [],
1006
+ provider,
1007
+ attempts: attempt,
1008
+ });
1009
+ const evidence = persistIsolatedRunArtifacts({
1010
+ cwd: lease.cwd,
1011
+ leaseId: failedLeaseId,
1012
+ stderr: error?.stack || error?.message || String(error),
1013
+ metadata: {
1014
+ status: "blocked",
1015
+ reason: error?.message || String(error),
1016
+ provider,
1017
+ command,
1018
+ args: Array.isArray(options.args) ? options.args : [],
1019
+ attempts: attempt,
1020
+ failureDiagnostic,
1021
+ },
1022
+ });
1023
+ return {
1024
+ ...blockedResult,
1025
+ attempts: attempt,
1026
+ provider,
1027
+ isolated: true,
1028
+ leaseId: failedLeaseId,
1029
+ artifactRoot: evidence.artifactRoot,
1030
+ artifacts: evidence.artifacts,
1031
+ failureDiagnostic,
1032
+ };
1033
+ }
1034
+ }
1035
+
1036
+ throw lastFailure || new Error("runInIsolatedRunner failed unexpectedly");
1037
+ }
package/infra/monitor.mjs CHANGED
@@ -98,6 +98,8 @@ import {
98
98
  ensureContainerRuntime,
99
99
  stopAllContainers,
100
100
  cleanupOrphanedContainers,
101
+ getIsolatedRunnerPoolStatus,
102
+ runInIsolatedRunner,
101
103
  } from "./container-runner.mjs";
102
104
  import { ensureCodexConfig, printConfigSummary } from "../shell/codex-config.mjs";
103
105
  import { RestartController } from "./restart-controller.mjs";
@@ -899,6 +901,11 @@ async function ensureWorkflowAutomationEngine() {
899
901
  meeting: meetingService,
900
902
  prompts: Object.keys(promptServices).length > 0 ? promptServices : null,
901
903
  anomalyDetector: anomalyDetector || null,
904
+ scheduler: executorScheduler,
905
+ isolatedRunner: {
906
+ run: runInIsolatedRunner,
907
+ getStatus: getIsolatedRunnerPoolStatus,
908
+ },
902
909
  };
903
910
 
904
911
  const engine = getWorkflowEngine({
@@ -16312,6 +16319,15 @@ injectMonitorFunctions({
16312
16319
  getReviewAgentEnabled: () => isReviewAgentEnabled(),
16313
16320
  getSyncEngine: () => syncEngine,
16314
16321
  getErrorDetector: () => errorDetector,
16322
+ getTuiMonitorStats: () => {
16323
+ try {
16324
+ return typeof internalTaskExecutor?.getTuiStats === "function"
16325
+ ? (internalTaskExecutor.getTuiStats() || {})
16326
+ : {};
16327
+ } catch {
16328
+ return {};
16329
+ }
16330
+ },
16315
16331
  getWorkspaceMonitor: () => workspaceMonitor,
16316
16332
  getTaskStoreStats: () => {
16317
16333
  try {
@@ -16469,3 +16485,5 @@ export {
16469
16485
  // Workflow event bridge — for fleet/kanban modules to emit events
16470
16486
  queueWorkflowEvent,
16471
16487
  };
16488
+
16489
+