@covibes/zeroshot 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/CHANGELOG.md +167 -0
  2. package/LICENSE +21 -0
  3. package/README.md +364 -0
  4. package/cli/index.js +3990 -0
  5. package/cluster-templates/base-templates/debug-workflow.json +181 -0
  6. package/cluster-templates/base-templates/full-workflow.json +455 -0
  7. package/cluster-templates/base-templates/single-worker.json +48 -0
  8. package/cluster-templates/base-templates/worker-validator.json +131 -0
  9. package/cluster-templates/conductor-bootstrap.json +122 -0
  10. package/cluster-templates/conductor-junior-bootstrap.json +69 -0
  11. package/docker/zeroshot-cluster/Dockerfile +132 -0
  12. package/lib/completion.js +174 -0
  13. package/lib/id-detector.js +53 -0
  14. package/lib/settings.js +97 -0
  15. package/lib/stream-json-parser.js +236 -0
  16. package/package.json +121 -0
  17. package/src/agent/agent-config.js +121 -0
  18. package/src/agent/agent-context-builder.js +241 -0
  19. package/src/agent/agent-hook-executor.js +329 -0
  20. package/src/agent/agent-lifecycle.js +555 -0
  21. package/src/agent/agent-stuck-detector.js +256 -0
  22. package/src/agent/agent-task-executor.js +1034 -0
  23. package/src/agent/agent-trigger-evaluator.js +67 -0
  24. package/src/agent-wrapper.js +459 -0
  25. package/src/agents/git-pusher-agent.json +20 -0
  26. package/src/attach/attach-client.js +438 -0
  27. package/src/attach/attach-server.js +543 -0
  28. package/src/attach/index.js +35 -0
  29. package/src/attach/protocol.js +220 -0
  30. package/src/attach/ring-buffer.js +121 -0
  31. package/src/attach/socket-discovery.js +242 -0
  32. package/src/claude-task-runner.js +468 -0
  33. package/src/config-router.js +80 -0
  34. package/src/config-validator.js +598 -0
  35. package/src/github.js +103 -0
  36. package/src/isolation-manager.js +1042 -0
  37. package/src/ledger.js +429 -0
  38. package/src/logic-engine.js +223 -0
  39. package/src/message-bus-bridge.js +139 -0
  40. package/src/message-bus.js +202 -0
  41. package/src/name-generator.js +232 -0
  42. package/src/orchestrator.js +1938 -0
  43. package/src/schemas/sub-cluster.js +156 -0
  44. package/src/sub-cluster-wrapper.js +545 -0
  45. package/src/task-runner.js +28 -0
  46. package/src/template-resolver.js +347 -0
  47. package/src/tui/CHANGES.txt +133 -0
  48. package/src/tui/LAYOUT.md +261 -0
  49. package/src/tui/README.txt +192 -0
  50. package/src/tui/TWO-LEVEL-NAVIGATION.md +186 -0
  51. package/src/tui/data-poller.js +325 -0
  52. package/src/tui/demo.js +208 -0
  53. package/src/tui/formatters.js +123 -0
  54. package/src/tui/index.js +193 -0
  55. package/src/tui/keybindings.js +383 -0
  56. package/src/tui/layout.js +317 -0
  57. package/src/tui/renderer.js +194 -0
@@ -0,0 +1,1034 @@
1
+ // @ts-nocheck
2
+ /**
3
+ * AgentTaskExecutor - Claude CLI spawning and monitoring
4
+ *
5
+ * Provides:
6
+ * - Claude CLI task spawning (normal and isolated modes)
7
+ * - Log streaming and real-time output broadcasting
8
+ * - Task lifecycle management (wait, kill)
9
+ * - Output parsing and validation
10
+ * - Vibe-specific Claude config with AskUserQuestion blocked
11
+ */
12
+
13
+ const { spawn } = require('child_process');
14
+ const path = require('path');
15
+ const fs = require('fs');
16
+ const os = require('os');
17
+
18
+ /**
19
+ * Validate and sanitize error messages.
20
+ * Detects TypeScript type annotations that may have leaked into error storage.
21
+ *
22
+ * @param {string|null} error - Error message to validate
23
+ * @returns {string|null} Sanitized error or original if valid
24
+ */
25
+ function sanitizeErrorMessage(error) {
26
+ if (!error) return null;
27
+
28
+ // Patterns that look like TypeScript type annotations (not real error messages)
29
+ const typeAnnotationPatterns = [
30
+ /^string\s*\|\s*null$/i,
31
+ /^number\s*\|\s*undefined$/i,
32
+ /^boolean\s*\|\s*null$/i,
33
+ /^any$/i,
34
+ /^unknown$/i,
35
+ /^void$/i,
36
+ /^never$/i,
37
+ /^[A-Z][a-zA-Z]*\s*\|\s*(null|undefined)$/, // e.g., "Error | null"
38
+ /^[a-z]+(\s*\|\s*[a-z]+)+$/i, // e.g., "string | number | boolean"
39
+ ];
40
+
41
+ for (const pattern of typeAnnotationPatterns) {
42
+ if (pattern.test(error.trim())) {
43
+ console.warn(
44
+ `[agent-task-executor] WARNING: Error message looks like a TypeScript type annotation: "${error}". ` +
45
+ `This indicates corrupted data. Replacing with generic error.`
46
+ );
47
+ return `Task failed with corrupted error data (original: "${error}")`;
48
+ }
49
+ }
50
+
51
+ return error;
52
+ }
53
+
54
+ // Track if we've already ensured the AskUserQuestion hook is installed
55
+ let askUserQuestionHookInstalled = false;
56
+
57
+ /**
58
+ * Ensure the AskUserQuestion blocking hook is installed in user's Claude config.
59
+ * This adds defense-in-depth by blocking the tool at the Claude CLI level.
60
+ * Modifies ~/.claude/settings.json and copies hook script to ~/.claude/hooks/
61
+ *
62
+ * Safe to call multiple times - only modifies config once per process.
63
+ */
64
+ function ensureAskUserQuestionHook() {
65
+ if (askUserQuestionHookInstalled) {
66
+ return; // Already installed this session
67
+ }
68
+
69
+ const userClaudeDir = process.env.CLAUDE_CONFIG_DIR || path.join(os.homedir(), '.claude');
70
+ const hooksDir = path.join(userClaudeDir, 'hooks');
71
+ const settingsPath = path.join(userClaudeDir, 'settings.json');
72
+ const hookScriptName = 'block-ask-user-question.py';
73
+ const hookScriptDst = path.join(hooksDir, hookScriptName);
74
+
75
+ // Ensure hooks directory exists
76
+ if (!fs.existsSync(hooksDir)) {
77
+ fs.mkdirSync(hooksDir, { recursive: true });
78
+ }
79
+
80
+ // Copy hook script if not present or outdated
81
+ const hookScriptSrc = path.join(__dirname, '..', '..', 'hooks', hookScriptName);
82
+ if (fs.existsSync(hookScriptSrc)) {
83
+ // Always copy to ensure latest version
84
+ fs.copyFileSync(hookScriptSrc, hookScriptDst);
85
+ fs.chmodSync(hookScriptDst, 0o755);
86
+ }
87
+
88
+ // Read existing settings or create new
89
+ let settings = {};
90
+ if (fs.existsSync(settingsPath)) {
91
+ try {
92
+ settings = JSON.parse(fs.readFileSync(settingsPath, 'utf8'));
93
+ } catch (e) {
94
+ console.warn(`[AgentTaskExecutor] Could not parse settings.json, creating new: ${e.message}`);
95
+ settings = {};
96
+ }
97
+ }
98
+
99
+ // Ensure hooks structure exists
100
+ if (!settings.hooks) {
101
+ settings.hooks = {};
102
+ }
103
+ if (!settings.hooks.PreToolUse) {
104
+ settings.hooks.PreToolUse = [];
105
+ }
106
+
107
+ // Check if AskUserQuestion hook already exists
108
+ const hasHook = settings.hooks.PreToolUse.some(
109
+ (entry) =>
110
+ entry.matcher === 'AskUserQuestion' ||
111
+ (entry.hooks && entry.hooks.some((h) => h.command && h.command.includes(hookScriptName)))
112
+ );
113
+
114
+ if (!hasHook) {
115
+ // Add the hook
116
+ settings.hooks.PreToolUse.push({
117
+ matcher: 'AskUserQuestion',
118
+ hooks: [
119
+ {
120
+ type: 'command',
121
+ command: hookScriptDst,
122
+ },
123
+ ],
124
+ });
125
+
126
+ // Write updated settings
127
+ fs.writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
128
+ console.log(`[AgentTaskExecutor] Installed AskUserQuestion blocking hook in ${settingsPath}`);
129
+ }
130
+
131
+ askUserQuestionHookInstalled = true;
132
+ }
133
+
134
+ /**
135
+ * Spawn claude-zeroshots process and stream output via message bus
136
+ * @param {Object} agent - Agent instance
137
+ * @param {String} context - Context to pass to Claude
138
+ * @returns {Promise<Object>} Result object { success, output, error }
139
+ */
140
+ async function spawnClaudeTask(agent, context) {
141
+ const ctPath = getClaudeTasksPath();
142
+ const cwd = agent.config.cwd || process.cwd();
143
+
144
+ // Build zeroshot task run args.
145
+ // CRITICAL: Default to strict schema validation to prevent cluster crashes from parse failures
146
+ // strictSchema=true uses Claude CLI's native --json-schema enforcement (no streaming but guaranteed structure)
147
+ // strictSchema=false uses stream-json with post-run validation (live logs but fragile)
148
+ const desiredOutputFormat = agent.config.outputFormat || 'json';
149
+ const strictSchema = agent.config.strictSchema !== false; // DEFAULT TO TRUE
150
+ const runOutputFormat =
151
+ agent.config.jsonSchema && desiredOutputFormat === 'json' && !strictSchema
152
+ ? 'stream-json'
153
+ : desiredOutputFormat;
154
+ const args = ['task', 'run', '--output-format', runOutputFormat];
155
+
156
+ // Add verification mode flag if configured
157
+ if (agent.config.verificationMode) {
158
+ args.push('-v');
159
+ }
160
+
161
+ // NOTE: maxRetries is handled by the agent wrapper's internal retry loop,
162
+ // not passed to the CLI. See _handleTrigger() for retry logic.
163
+
164
+ // Add JSON schema if specified in agent config.
165
+ // If we are running stream-json for live logs (strictSchema=false), do NOT pass schema to CLI.
166
+ if (agent.config.jsonSchema) {
167
+ if (runOutputFormat === 'json') {
168
+ // strictSchema=true OR no schema conflict: pass schema to CLI for native enforcement
169
+ const schema = JSON.stringify(agent.config.jsonSchema);
170
+ args.push('--json-schema', schema);
171
+ } else if (!agent.quiet) {
172
+ agent._log(
173
+ `[Agent ${agent.id}] jsonSchema configured; running stream-json for live logs (strictSchema=false). Schema will be validated after completion.`
174
+ );
175
+ }
176
+ }
177
+
178
+ // If schema enforcement is desired but we had to run stream-json for live logs,
179
+ // add explicit output instructions so the model still knows the required shape.
180
+ let finalContext = context;
181
+ if (
182
+ agent.config.jsonSchema &&
183
+ desiredOutputFormat === 'json' &&
184
+ runOutputFormat === 'stream-json'
185
+ ) {
186
+ finalContext += `\n\n## Output Format (REQUIRED)\n\nReturn a JSON object that matches this schema exactly.\n\nSchema:\n\`\`\`json\n${JSON.stringify(
187
+ agent.config.jsonSchema,
188
+ null,
189
+ 2
190
+ )}\n\`\`\`\n`;
191
+ }
192
+
193
+ args.push(finalContext);
194
+
195
+ // MOCK SUPPORT: Use injected mock function if provided
196
+ if (agent.mockSpawnFn) {
197
+ return agent.mockSpawnFn(args, { context });
198
+ }
199
+
200
+ // SAFETY: Fail hard if testMode=true but no mock (should be caught in constructor)
201
+ if (agent.testMode) {
202
+ throw new Error(
203
+ `AgentWrapper: testMode=true but attempting real Claude API call for agent '${agent.id}'. ` +
204
+ `This is a bug - mock should be set in constructor.`
205
+ );
206
+ }
207
+
208
+ // ISOLATION MODE: Run inside Docker container
209
+ if (agent.isolation?.enabled) {
210
+ return spawnClaudeTaskIsolated(agent, context);
211
+ }
212
+
213
+ // NON-ISOLATION MODE: Use user's existing Claude config (preserves Keychain auth)
214
+ // AskUserQuestion blocking handled via:
215
+ // 1. Prompt injection (see agent-context-builder) - tells agent not to ask
216
+ // 2. PreToolUse hook (defense-in-depth) - activated by ZEROSHOT_BLOCK_ASK_USER env var
217
+ // DO NOT override CLAUDE_CONFIG_DIR - it breaks authentication on Claude CLI 2.x
218
+ ensureAskUserQuestionHook();
219
+
220
+ const taskId = await new Promise((resolve, reject) => {
221
+ const proc = spawn(ctPath, args, {
222
+ cwd,
223
+ stdio: ['ignore', 'pipe', 'pipe'],
224
+ env: {
225
+ ...process.env,
226
+ ANTHROPIC_MODEL: agent._selectModel(),
227
+ // Activate AskUserQuestion blocking hook (see hooks/block-ask-user-question.py)
228
+ ZEROSHOT_BLOCK_ASK_USER: '1',
229
+ },
230
+ });
231
+ // Track PID for resource monitoring
232
+ agent.processPid = proc.pid;
233
+ agent._publishLifecycle('PROCESS_SPAWNED', { pid: proc.pid });
234
+
235
+ let stdout = '';
236
+ let stderr = '';
237
+
238
+ proc.stdout.on('data', (data) => {
239
+ stdout += data.toString();
240
+ });
241
+
242
+ proc.stderr.on('data', (data) => {
243
+ stderr += data.toString();
244
+ });
245
+
246
+ proc.on('close', (code, signal) => {
247
+ // Handle process killed by signal (e.g., SIGTERM, SIGKILL, SIGSTOP)
248
+ if (signal) {
249
+ reject(new Error(`Process killed by signal ${signal}${stderr ? `: ${stderr}` : ''}`));
250
+ return;
251
+ }
252
+
253
+ if (code === 0) {
254
+ // Parse task ID from output: "✓ Task spawned: xxx-yyy-nn"
255
+ // Format: <adjective>-<noun>-<digits> (may or may not have task- prefix)
256
+ const match = stdout.match(/Task spawned: ((?:task-)?[a-z]+-[a-z]+-[a-z0-9]+)/);
257
+ if (match) {
258
+ const spawnedTaskId = match[1];
259
+ agent.currentTaskId = spawnedTaskId; // Track for resume capability
260
+ agent._publishLifecycle('TASK_ID_ASSIGNED', {
261
+ pid: agent.processPid,
262
+ taskId: spawnedTaskId,
263
+ });
264
+
265
+ // Start liveness monitoring
266
+ if (agent.enableLivenessCheck) {
267
+ agent.lastOutputTime = Date.now(); // Initialize to spawn time
268
+ agent._startLivenessCheck();
269
+ }
270
+
271
+ resolve(spawnedTaskId);
272
+ } else {
273
+ reject(new Error(`Could not parse task ID from output: ${stdout}`));
274
+ }
275
+ } else {
276
+ reject(new Error(`zeroshot task run failed with code ${code}: ${stderr}`));
277
+ }
278
+ });
279
+
280
+ proc.on('error', (error) => {
281
+ reject(error);
282
+ });
283
+ });
284
+
285
+ agent._log(`📋 Agent ${agent.id}: Following zeroshot logs for ${taskId}`);
286
+
287
+ // Wait for task to be registered in zeroshot storage (race condition fix)
288
+ await waitForTaskReady(agent, taskId);
289
+
290
+ // Now follow the logs and stream output
291
+ return followClaudeTaskLogs(agent, taskId);
292
+ }
293
+
294
+ /**
295
+ * Wait for task to be registered in ct storage
296
+ * @param {Object} agent - Agent instance
297
+ * @param {String} taskId - Task ID to wait for
298
+ * @param {Number} maxRetries - Max retries (default 10)
299
+ * @param {Number} delayMs - Delay between retries (default 200)
300
+ * @returns {Promise<void>}
301
+ */
302
+ async function waitForTaskReady(agent, taskId, maxRetries = 10, delayMs = 200) {
303
+ const { exec } = require('child_process');
304
+ const ctPath = getClaudeTasksPath();
305
+
306
+ for (let i = 0; i < maxRetries; i++) {
307
+ const exists = await new Promise((resolve) => {
308
+ exec(`${ctPath} status ${taskId}`, (error, stdout) => {
309
+ // Task exists if status doesn't return "Task not found"
310
+ resolve(!error && !stdout.includes('Task not found'));
311
+ });
312
+ });
313
+
314
+ if (exists) return;
315
+
316
+ // Wait before retry
317
+ await new Promise((r) => setTimeout(r, delayMs));
318
+ }
319
+
320
+ // Continue anyway after max retries - the task may still work
321
+ console.warn(`⚠️ Task ${taskId} not yet visible after ${maxRetries} retries, continuing anyway`);
322
+ }
323
+
324
+ /**
325
+ * Follow claude-zeroshots logs until completion, streaming to message bus
326
+ * Reads log file directly for reliable streaming
327
+ * @param {Object} agent - Agent instance
328
+ * @param {String} taskId - Task ID to follow
329
+ * @returns {Promise<Object>} Result object { success, output, error }
330
+ */
331
+ function followClaudeTaskLogs(agent, taskId) {
332
+ const fsModule = require('fs');
333
+ const { execSync, exec } = require('child_process');
334
+ const ctPath = getClaudeTasksPath();
335
+
336
+ return new Promise((resolve, _reject) => {
337
+ let output = '';
338
+ let logFilePath = null;
339
+ let lastSize = 0;
340
+ let pollInterval = null;
341
+ let statusCheckInterval = null;
342
+ let resolved = false;
343
+
344
+ // Get log file path from ct
345
+ try {
346
+ logFilePath = execSync(`${ctPath} get-log-path ${taskId}`, {
347
+ encoding: 'utf-8',
348
+ }).trim();
349
+ agent._log(`📋 Agent ${agent.id}: Following ct logs for ${taskId}`);
350
+ } catch {
351
+ // Task might not have log file yet, wait and retry
352
+ agent._log(`⏳ Agent ${agent.id}: Waiting for log file...`);
353
+ }
354
+
355
+ // Buffer for incomplete lines across polls
356
+ let lineBuffer = '';
357
+
358
+ // Broadcast a complete JSON line as one message
359
+ // Lines are now prefixed with timestamps: [1733301234567]{json...}
360
+ const broadcastLine = (line) => {
361
+ if (!line.trim()) return;
362
+
363
+ // Parse timestamp prefix if present: [epochMs]content
364
+ // IMPORTANT: Trim \r from CRLF line endings before matching
365
+ let timestamp = Date.now();
366
+ let content = line.replace(/\r$/, '');
367
+
368
+ const timestampMatch = content.match(/^\[(\d{13})\](.*)$/);
369
+ if (timestampMatch) {
370
+ timestamp = parseInt(timestampMatch[1], 10);
371
+ content = timestampMatch[2];
372
+ }
373
+
374
+ // Skip known non-JSON patterns (footer, separators, metadata)
375
+ if (
376
+ content.startsWith('===') ||
377
+ content.startsWith('Finished:') ||
378
+ content.startsWith('Exit code:') ||
379
+ (content.includes('"type":"system"') && content.includes('"subtype":"init"'))
380
+ ) {
381
+ return;
382
+ }
383
+
384
+ // Only parse lines that start with { (likely JSON)
385
+ if (!content.trim().startsWith('{')) {
386
+ return;
387
+ }
388
+
389
+ // Validate it's valid JSON before broadcasting
390
+ try {
391
+ JSON.parse(content);
392
+ } catch {
393
+ // Not valid JSON, skip silently
394
+ return;
395
+ }
396
+
397
+ output += content + '\n';
398
+
399
+ // Update liveness timestamp
400
+ agent.lastOutputTime = Date.now();
401
+
402
+ agent._publish({
403
+ topic: 'AGENT_OUTPUT',
404
+ receiver: 'broadcast',
405
+ timestamp, // Use the actual timestamp from when output was produced
406
+ content: {
407
+ text: content,
408
+ data: {
409
+ type: 'stdout',
410
+ line: content,
411
+ agent: agent.id,
412
+ role: agent.role,
413
+ iteration: agent.iteration,
414
+ },
415
+ },
416
+ });
417
+ };
418
+
419
+ // Process new content by splitting into complete lines
420
+ const processNewContent = (content) => {
421
+ // Add to buffer
422
+ lineBuffer += content;
423
+
424
+ // Split by newlines
425
+ const lines = lineBuffer.split('\n');
426
+
427
+ // Process all complete lines (all except last, which might be incomplete)
428
+ for (let i = 0; i < lines.length - 1; i++) {
429
+ broadcastLine(lines[i]);
430
+ }
431
+
432
+ // Keep last line in buffer (might be incomplete)
433
+ lineBuffer = lines[lines.length - 1];
434
+ };
435
+
436
+ // Poll the log file for new content
437
+ const pollLogFile = () => {
438
+ // If we don't have log path yet, try to get it
439
+ if (!logFilePath) {
440
+ try {
441
+ logFilePath = execSync(`${ctPath} get-log-path ${taskId}`, {
442
+ encoding: 'utf-8',
443
+ }).trim();
444
+ agent._log(`📋 Agent ${agent.id}: Found log file: ${logFilePath}`);
445
+ } catch {
446
+ return; // Not ready yet
447
+ }
448
+ }
449
+
450
+ // Check if file exists
451
+ if (!fsModule.existsSync(logFilePath)) {
452
+ return; // File not created yet
453
+ }
454
+
455
+ try {
456
+ const stats = fsModule.statSync(logFilePath);
457
+ const currentSize = stats.size;
458
+
459
+ if (currentSize > lastSize) {
460
+ // Read new content
461
+ const fd = fsModule.openSync(logFilePath, 'r');
462
+ const buffer = Buffer.alloc(currentSize - lastSize);
463
+ fsModule.readSync(fd, buffer, 0, buffer.length, lastSize);
464
+ fsModule.closeSync(fd);
465
+
466
+ const newContent = buffer.toString('utf-8');
467
+ // Process new content line-by-line
468
+ processNewContent(newContent);
469
+ lastSize = currentSize;
470
+ }
471
+ } catch (err) {
472
+ // File might have been deleted or locked
473
+ console.warn(`⚠️ Agent ${agent.id}: Error reading log: ${err.message}`);
474
+ }
475
+ };
476
+
477
+ // Start polling log file (every 300ms for responsive streaming)
478
+ pollInterval = setInterval(pollLogFile, 300);
479
+
480
+ // Poll ct status to know when task is complete
481
+ // Track consecutive failures for debugging stuck clusters
482
+ let consecutiveExecFailures = 0;
483
+ const MAX_CONSECUTIVE_FAILURES = 30; // 30 seconds of failures = log warning
484
+
485
+ statusCheckInterval = setInterval(() => {
486
+ exec(`${ctPath} status ${taskId}`, (error, stdout, stderr) => {
487
+ if (resolved) return;
488
+
489
+ // Track exec failures - if status command keeps failing, something is wrong
490
+ if (error) {
491
+ consecutiveExecFailures++;
492
+ if (consecutiveExecFailures === MAX_CONSECUTIVE_FAILURES) {
493
+ console.error(
494
+ `[Agent ${agent.id}] ⚠️ Status polling failed ${MAX_CONSECUTIVE_FAILURES} times consecutively!`
495
+ );
496
+ console.error(` Command: ${ctPath} status ${taskId}`);
497
+ console.error(` Error: ${error.message}`);
498
+ console.error(` Stderr: ${stderr || 'none'}`);
499
+ console.error(` This may indicate zeroshot is not in PATH or task storage is corrupted.`);
500
+ }
501
+ return; // Keep polling - might be transient
502
+ }
503
+
504
+ // Reset failure counter on success
505
+ consecutiveExecFailures = 0;
506
+
507
+ // Check for completion/failure status
508
+ // Strip ANSI codes in case chalk outputs them (shouldn't in non-TTY, but be safe)
509
+ // Use RegExp constructor to avoid ESLint no-control-regex false positive
510
+ const ansiPattern = new RegExp(String.fromCharCode(27) + '\\[[0-9;]*m', 'g');
511
+ const cleanStdout = stdout.replace(ansiPattern, '');
512
+ // Use flexible whitespace matching in case spacing changes
513
+ const isCompleted = /Status:\s+completed/i.test(cleanStdout);
514
+ const isFailed = /Status:\s+failed/i.test(cleanStdout);
515
+
516
+ if (isCompleted || isFailed) {
517
+ const success = isCompleted;
518
+
519
+ // Read any final content
520
+ pollLogFile();
521
+
522
+ // Clean up and resolve
523
+ setTimeout(() => {
524
+ if (resolved) return;
525
+ resolved = true;
526
+
527
+ clearInterval(pollInterval);
528
+ clearInterval(statusCheckInterval);
529
+ agent.currentTask = null;
530
+
531
+ // Extract meaningful error context when task fails
532
+ let errorContext = null;
533
+ if (!success) {
534
+ // Try to extract error from status output first
535
+ const statusErrorMatch = stdout.match(/Error:\s*(.+)/);
536
+ if (statusErrorMatch) {
537
+ errorContext = statusErrorMatch[1].trim();
538
+ } else {
539
+ // Fall back to last 500 chars of output (likely contains the failure reason)
540
+ const lastOutput = output.slice(-500).trim();
541
+ if (lastOutput) {
542
+ // Look for common error patterns in output
543
+ const errorPatterns = [
544
+ /Error:\s*(.+)/i,
545
+ /error:\s*(.+)/i,
546
+ /failed:\s*(.+)/i,
547
+ /Exception:\s*(.+)/i,
548
+ /panic:\s*(.+)/i,
549
+ ];
550
+ for (const pattern of errorPatterns) {
551
+ const match = lastOutput.match(pattern);
552
+ if (match) {
553
+ errorContext = match[1].slice(0, 200);
554
+ break;
555
+ }
556
+ }
557
+ // If no pattern matched, include last portion of output
558
+ if (!errorContext) {
559
+ errorContext = `Task failed. Last output: ${lastOutput.slice(-200)}`;
560
+ }
561
+ } else {
562
+ errorContext =
563
+ 'Task failed with no output (check if task was interrupted or timed out)';
564
+ }
565
+ }
566
+ }
567
+
568
+ resolve({
569
+ success,
570
+ output,
571
+ error: sanitizeErrorMessage(errorContext),
572
+ });
573
+ }, 500);
574
+ }
575
+ });
576
+ }, 1000);
577
+
578
+ // Store cleanup function for kill
579
+ // CRITICAL: Must reject promise to avoid orphaned promise that hangs forever
580
+ agent.currentTask = {
581
+ kill: (reason = 'Task killed') => {
582
+ if (resolved) return;
583
+ resolved = true;
584
+ clearInterval(pollInterval);
585
+ clearInterval(statusCheckInterval);
586
+ agent._stopLivenessCheck();
587
+ // BUGFIX: Resolve with failure instead of orphaning the promise
588
+ // This allows the caller to handle the kill gracefully
589
+ resolve({
590
+ success: false,
591
+ output,
592
+ error: reason,
593
+ });
594
+ },
595
+ };
596
+
597
+ // REMOVED: Task timeout disabled - tasks run until completion or explicit kill
598
+ // Tasks should run until:
599
+ // - Completion
600
+ // - Explicit kill
601
+ // - External error (rate limit, API failure)
602
+ //
603
+ // setTimeout(() => {
604
+ // if (resolved) return;
605
+ // resolved = true;
606
+ //
607
+ // clearInterval(pollInterval);
608
+ // clearInterval(statusCheckInterval);
609
+ // agent._stopLivenessCheck();
610
+ // agent.currentTask = null;
611
+ // const timeoutMinutes = Math.round(agent.timeout / 60000);
612
+ // reject(new Error(`Task timed out after ${timeoutMinutes} minutes`));
613
+ // }, agent.timeout);
614
+ });
615
+ }
616
+
617
+ /**
618
+ * Get path to claude-zeroshots executable
619
+ * @returns {String} Path to zeroshot command
620
+ */
621
+ function getClaudeTasksPath() {
622
+ // Use zeroshot command (unified CLI)
623
+ return 'zeroshot'; // Assumes zeroshot is installed globally
624
+ }
625
+
626
+ /**
627
+ * Spawn claude-zeroshots inside Docker container (isolation mode)
628
+ * Runs Claude CLI inside the container for full isolation
629
+ * @param {Object} agent - Agent instance
630
+ * @param {String} context - Context to pass to Claude
631
+ * @returns {Promise<Object>} Result object { success, output, error }
632
+ */
633
+ function spawnClaudeTaskIsolated(agent, context) {
634
+ const { manager, clusterId } = agent.isolation;
635
+
636
+ agent._log(`📦 Agent ${agent.id}: Running task in isolated container...`);
637
+
638
+ // Build command to run inside container
639
+ // Use claude directly inside container (installed in base image)
640
+ // CRITICAL: Default to strict schema validation (same as _spawnClaudeTask)
641
+ const desiredOutputFormat = agent.config.outputFormat || 'json';
642
+ const strictSchema = agent.config.strictSchema !== false; // DEFAULT TO TRUE
643
+ const runOutputFormat =
644
+ agent.config.jsonSchema && desiredOutputFormat === 'json' && !strictSchema
645
+ ? 'stream-json'
646
+ : desiredOutputFormat;
647
+ // NOTE: --dangerously-skip-permissions is REQUIRED for non-interactive (--print) mode
648
+ // Without it, Claude can't write files, run commands, etc. in the isolated container
649
+ const command = [
650
+ 'claude',
651
+ '--print',
652
+ '--dangerously-skip-permissions',
653
+ '--output-format',
654
+ runOutputFormat,
655
+ ];
656
+
657
+ // stream-json with --print requires --verbose and partial messages for live output
658
+ if (runOutputFormat === 'stream-json') {
659
+ command.push('--verbose');
660
+ command.push('--include-partial-messages');
661
+ }
662
+
663
+ // Add JSON schema if specified in agent config (enforces structured output)
664
+ if (agent.config.jsonSchema) {
665
+ if (runOutputFormat === 'json') {
666
+ // strictSchema=true OR no schema conflict: pass schema to CLI for native enforcement
667
+ const schema = JSON.stringify(agent.config.jsonSchema);
668
+ command.push('--json-schema', schema);
669
+ } else if (!agent.quiet) {
670
+ agent._log(
671
+ `[Agent ${agent.id}] jsonSchema configured; running stream-json for live logs (strictSchema=false). Schema will be validated after completion.`
672
+ );
673
+ }
674
+ }
675
+
676
+ // Add model if specified
677
+ const selectedModel = agent._selectModel();
678
+ if (selectedModel) {
679
+ command.push('--model', selectedModel);
680
+ }
681
+
682
+ // Add explicit output instructions when we run stream-json for a jsonSchema agent.
683
+ let finalContext = context;
684
+ if (
685
+ agent.config.jsonSchema &&
686
+ desiredOutputFormat === 'json' &&
687
+ runOutputFormat === 'stream-json'
688
+ ) {
689
+ finalContext += `\n\n## Output Format (REQUIRED)\n\nReturn a JSON object that matches this schema exactly.\n\nSchema:\n\`\`\`json\n${JSON.stringify(
690
+ agent.config.jsonSchema,
691
+ null,
692
+ 2
693
+ )}\n\`\`\`\n`;
694
+ }
695
+
696
+ // Add the context as the prompt
697
+ command.push(finalContext);
698
+
699
+ return new Promise((resolve, reject) => {
700
+ let output = '';
701
+ let resolved = false;
702
+
703
+ // Spawn process inside container
704
+ const proc = manager.spawnInContainer(clusterId, command, {
705
+ env: {
706
+ ANTHROPIC_MODEL: selectedModel,
707
+ // Activate AskUserQuestion blocking hook (see hooks/block-ask-user-question.py)
708
+ ZEROSHOT_BLOCK_ASK_USER: '1',
709
+ },
710
+ });
711
+
712
+ // Stream stdout to message bus
713
+ proc.stdout.on('data', (data) => {
714
+ const chunk = data.toString();
715
+ output += chunk;
716
+
717
+ // Process each line
718
+ const lines = chunk.split('\n');
719
+ for (const line of lines) {
720
+ if (!line.trim() || !line.trim().startsWith('{')) continue;
721
+
722
+ // Validate JSON
723
+ try {
724
+ JSON.parse(line);
725
+ } catch {
726
+ continue; // Not valid JSON
727
+ }
728
+
729
+ // Publish to message bus
730
+ agent._publish({
731
+ topic: 'AGENT_OUTPUT',
732
+ receiver: 'broadcast',
733
+ content: {
734
+ text: line,
735
+ data: {
736
+ type: 'stdout',
737
+ line,
738
+ agent: agent.id,
739
+ role: agent.role,
740
+ iteration: agent.iteration,
741
+ isolated: true,
742
+ },
743
+ },
744
+ });
745
+ }
746
+ });
747
+
748
+ proc.stderr.on('data', (data) => {
749
+ const text = data.toString();
750
+ console.error(`[${agent.id}] stderr: ${text}`);
751
+ });
752
+
753
+ proc.on('close', (code, signal) => {
754
+ if (resolved) return;
755
+ resolved = true;
756
+
757
+ agent.currentTask = null;
758
+
759
+ // Handle process killed by signal (e.g., SIGTERM, SIGKILL, SIGSTOP)
760
+ if (signal) {
761
+ resolve({
762
+ success: false,
763
+ output,
764
+ error: `Process killed by signal ${signal}`,
765
+ });
766
+ return;
767
+ }
768
+
769
+ resolve({
770
+ success: code === 0,
771
+ output,
772
+ error: code !== 0 ? `Process exited with code ${code}` : null,
773
+ });
774
+ });
775
+
776
+ proc.on('error', (err) => {
777
+ if (resolved) return;
778
+ resolved = true;
779
+
780
+ agent.currentTask = null;
781
+ reject(err);
782
+ });
783
+
784
+ // Store cleanup function
785
+ agent.currentTask = {
786
+ kill: () => {
787
+ if (!resolved) {
788
+ proc.kill('SIGTERM');
789
+ }
790
+ },
791
+ };
792
+
793
+ // REMOVED: Task timeout disabled - tasks run until completion or explicit kill
794
+ // Tasks should run until:
795
+ // - Completion
796
+ // - Explicit kill
797
+ // - External error (rate limit, API failure)
798
+ //
799
+ // setTimeout(() => {
800
+ // if (resolved) return;
801
+ // resolved = true;
802
+ //
803
+ // proc.kill("SIGTERM");
804
+ // agent.currentTask = null;
805
+ // const timeoutMinutes = Math.round(agent.timeout / 60000);
806
+ // reject(
807
+ // new Error(`Isolated task timed out after ${timeoutMinutes} minutes`),
808
+ // );
809
+ // }, agent.timeout);
810
+ });
811
+ }
812
+
813
+ /**
814
+ * Parse agent output to extract structured result data
815
+ * GENERIC - returns whatever structured output the agent provides
816
+ * Works with any agent schema (planner, validator, worker, etc.)
817
+ * @param {Object} agent - Agent instance
818
+ * @param {String} output - Raw output from agent
819
+ * @returns {Object} Parsed result data
820
+ */
821
+ function parseResultOutput(agent, output) {
822
+ // Empty or error outputs = FAIL
823
+ if (!output || output.includes('Task not found') || output.includes('Process terminated')) {
824
+ throw new Error('Task execution failed - no output');
825
+ }
826
+
827
+ let parsed;
828
+ let trimmedOutput = output.trim();
829
+
830
+ // IMPORTANT: Output is NDJSON (one JSON object per line) from streaming log
831
+ // Find the line with "type":"result" which contains the actual result
832
+ const lines = trimmedOutput.split('\n');
833
+ const resultLine = lines.find((line) => {
834
+ try {
835
+ const obj = JSON.parse(line.trim());
836
+ return obj.type === 'result';
837
+ } catch {
838
+ return false;
839
+ }
840
+ });
841
+
842
+ // Use the result line if found, otherwise use last non-empty line
843
+ if (resultLine) {
844
+ trimmedOutput = resultLine.trim();
845
+ } else if (lines.length > 1) {
846
+ // Fallback: use last non-empty line
847
+ for (let i = lines.length - 1; i >= 0; i--) {
848
+ if (lines[i].trim()) {
849
+ trimmedOutput = lines[i].trim();
850
+ break;
851
+ }
852
+ }
853
+ }
854
+
855
+ // Strategy 1: If agent uses JSON output format, try CLI JSON structure first
856
+ if (agent.config.outputFormat === 'json' && agent.config.jsonSchema) {
857
+ try {
858
+ const claudeOutput = JSON.parse(trimmedOutput);
859
+
860
+ // Try structured_output field first (standard CLI format)
861
+ if (claudeOutput.structured_output && typeof claudeOutput.structured_output === 'object') {
862
+ parsed = claudeOutput.structured_output;
863
+ }
864
+ // Check if it's a direct object (not a primitive)
865
+ else if (
866
+ typeof claudeOutput === 'object' &&
867
+ claudeOutput !== null &&
868
+ !Array.isArray(claudeOutput)
869
+ ) {
870
+ // Check for result wrapper
871
+ if (claudeOutput.result && typeof claudeOutput.result === 'object') {
872
+ parsed = claudeOutput.result;
873
+ }
874
+ // IMPORTANT: Handle case where result is a string containing markdown-wrapped JSON
875
+ // Claude CLI with --output-format json returns { result: "```json\n{...}\n```" }
876
+ else if (claudeOutput.result && typeof claudeOutput.result === 'string') {
877
+ const resultStr = claudeOutput.result;
878
+ // Try extracting JSON from markdown code block
879
+ const jsonMatch = resultStr.match(/```json\s*([\s\S]*?)```/);
880
+ if (jsonMatch) {
881
+ try {
882
+ parsed = JSON.parse(jsonMatch[1].trim());
883
+ } catch {
884
+ // Fall through to other strategies
885
+ }
886
+ }
887
+ // If no markdown block, try parsing result string directly as JSON
888
+ if (!parsed) {
889
+ try {
890
+ parsed = JSON.parse(resultStr);
891
+ } catch {
892
+ // Fall through to other strategies
893
+ }
894
+ }
895
+ }
896
+ // Use directly if it has meaningful keys (and we haven't found a better parse)
897
+ if (!parsed) {
898
+ const keys = Object.keys(claudeOutput);
899
+ if (keys.length > 0 && keys.some((k) => !['type', 'subtype', 'is_error'].includes(k))) {
900
+ parsed = claudeOutput;
901
+ }
902
+ }
903
+ }
904
+ } catch {
905
+ // JSON parse failed - fall through to markdown extraction
906
+ }
907
+ }
908
+
909
+ // Strategy 2: Extract JSON from markdown code block (legacy or fallback)
910
+ if (!parsed) {
911
+ const jsonMatch = trimmedOutput.match(/```json\s*([\s\S]*?)```/);
912
+ if (jsonMatch) {
913
+ try {
914
+ parsed = JSON.parse(jsonMatch[1].trim());
915
+ } catch (e) {
916
+ throw new Error(`JSON parse failed in markdown block: ${e.message}`);
917
+ }
918
+ }
919
+ }
920
+
921
+ // Strategy 3: Try parsing the whole output as JSON
922
+ if (!parsed) {
923
+ try {
924
+ const directParse = JSON.parse(trimmedOutput);
925
+ if (typeof directParse === 'object' && directParse !== null) {
926
+ parsed = directParse;
927
+ }
928
+ } catch {
929
+ // Not valid JSON, fall through to error
930
+ }
931
+ }
932
+
933
+ // No strategy worked
934
+ if (!parsed) {
935
+ console.error(`\n${'='.repeat(80)}`);
936
+ console.error(`🔴 AGENT OUTPUT MISSING REQUIRED JSON BLOCK`);
937
+ console.error(`${'='.repeat(80)}`);
938
+ console.error(`Agent: ${agent.id}, Role: ${agent.role}`);
939
+ console.error(`Output (last 500 chars): ${trimmedOutput.slice(-500)}`);
940
+ console.error(`${'='.repeat(80)}\n`);
941
+ throw new Error(`Agent ${agent.id} output missing required JSON block`);
942
+ }
943
+
944
+ // If a JSON schema is configured, validate parsed output locally.
945
+ // This preserves schema enforcement even when we run stream-json for live logs.
946
+ // IMPORTANT: For non-validator agents we warn but do not fail the cluster.
947
+ if (agent.config.jsonSchema) {
948
+ const Ajv = require('ajv');
949
+ const ajv = new Ajv({
950
+ allErrors: true,
951
+ strict: false,
952
+ coerceTypes: false, // STRICT: Reject type mismatches (e.g., null instead of array)
953
+ useDefaults: true,
954
+ removeAdditional: true,
955
+ });
956
+ const validate = ajv.compile(agent.config.jsonSchema);
957
+ const valid = validate(parsed);
958
+ if (!valid) {
959
+ const errorList = (validate.errors || [])
960
+ .slice(0, 5)
961
+ .map((e) => `${e.instancePath || e.schemaPath} ${e.message}`)
962
+ .join('; ');
963
+ const msg =
964
+ `Agent ${agent.id} output failed JSON schema validation: ` +
965
+ (errorList || 'unknown schema error');
966
+
967
+ // Validators stay strict (they already have auto-approval fallback on crash).
968
+ if (agent.role === 'validator') {
969
+ throw new Error(msg);
970
+ }
971
+
972
+ // Non-validators: emit warning and continue with best-effort parsed data.
973
+ console.warn(`⚠️ ${msg}`);
974
+ agent._publish({
975
+ topic: 'AGENT_SCHEMA_WARNING',
976
+ receiver: 'broadcast',
977
+ content: {
978
+ text: msg,
979
+ data: {
980
+ agent: agent.id,
981
+ role: agent.role,
982
+ iteration: agent.iteration,
983
+ errors: validate.errors || [],
984
+ },
985
+ },
986
+ });
987
+ }
988
+ }
989
+
990
+ // Return whatever the agent produced - no hardcoded field requirements
991
+ // Template substitution will validate that required fields exist
992
+ return parsed;
993
+ }
994
+
995
+ /**
996
+ * Kill current task
997
+ * @param {Object} agent - Agent instance
998
+ */
999
+ function killTask(agent) {
1000
+ if (agent.currentTask) {
1001
+ // currentTask may be either a ChildProcess or our custom { kill } object
1002
+ if (typeof agent.currentTask.kill === 'function') {
1003
+ agent.currentTask.kill('SIGTERM');
1004
+ }
1005
+ agent.currentTask = null;
1006
+ }
1007
+
1008
+ // Also kill the underlying zeroshot task if we have a task ID
1009
+ // This ensures the task process is stopped, not just our polling intervals
1010
+ if (agent.currentTaskId) {
1011
+ const { exec } = require('child_process');
1012
+ const ctPath = getClaudeTasksPath();
1013
+ exec(`${ctPath} task kill ${agent.currentTaskId}`, (error) => {
1014
+ if (error) {
1015
+ // Task may have already completed or been killed, ignore errors
1016
+ agent._log(`Note: Could not kill task ${agent.currentTaskId}: ${error.message}`);
1017
+ } else {
1018
+ agent._log(`Killed task ${agent.currentTaskId}`);
1019
+ }
1020
+ });
1021
+ agent.currentTaskId = null;
1022
+ }
1023
+ }
1024
+
1025
+ module.exports = {
1026
+ ensureAskUserQuestionHook,
1027
+ spawnClaudeTask,
1028
+ followClaudeTaskLogs,
1029
+ waitForTaskReady,
1030
+ spawnClaudeTaskIsolated,
1031
+ getClaudeTasksPath,
1032
+ parseResultOutput,
1033
+ killTask,
1034
+ };