@covibes/zeroshot 5.2.1 → 5.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/CHANGELOG.md +174 -189
  2. package/README.md +199 -248
  3. package/cli/commands/providers.js +150 -0
  4. package/cli/index.js +214 -58
  5. package/cli/lib/first-run.js +40 -3
  6. package/cluster-templates/base-templates/debug-workflow.json +24 -78
  7. package/cluster-templates/base-templates/full-workflow.json +44 -145
  8. package/cluster-templates/base-templates/single-worker.json +23 -15
  9. package/cluster-templates/base-templates/worker-validator.json +47 -34
  10. package/cluster-templates/conductor-bootstrap.json +7 -5
  11. package/lib/docker-config.js +6 -1
  12. package/lib/provider-detection.js +59 -0
  13. package/lib/provider-names.js +56 -0
  14. package/lib/settings.js +191 -6
  15. package/lib/stream-json-parser.js +4 -238
  16. package/package.json +21 -5
  17. package/scripts/validate-templates.js +100 -0
  18. package/src/agent/agent-config.js +37 -13
  19. package/src/agent/agent-context-builder.js +64 -2
  20. package/src/agent/agent-hook-executor.js +82 -9
  21. package/src/agent/agent-lifecycle.js +53 -14
  22. package/src/agent/agent-task-executor.js +196 -194
  23. package/src/agent/output-extraction.js +200 -0
  24. package/src/agent/output-reformatter.js +175 -0
  25. package/src/agent/schema-utils.js +111 -0
  26. package/src/agent-wrapper.js +102 -30
  27. package/src/agents/git-pusher-agent.json +1 -1
  28. package/src/claude-task-runner.js +80 -30
  29. package/src/config-router.js +13 -13
  30. package/src/config-validator.js +231 -10
  31. package/src/github.js +36 -0
  32. package/src/isolation-manager.js +243 -154
  33. package/src/ledger.js +28 -6
  34. package/src/orchestrator.js +391 -96
  35. package/src/preflight.js +85 -82
  36. package/src/providers/anthropic/cli-builder.js +45 -0
  37. package/src/providers/anthropic/index.js +134 -0
  38. package/src/providers/anthropic/models.js +23 -0
  39. package/src/providers/anthropic/output-parser.js +159 -0
  40. package/src/providers/base-provider.js +181 -0
  41. package/src/providers/capabilities.js +51 -0
  42. package/src/providers/google/cli-builder.js +55 -0
  43. package/src/providers/google/index.js +116 -0
  44. package/src/providers/google/models.js +24 -0
  45. package/src/providers/google/output-parser.js +92 -0
  46. package/src/providers/index.js +75 -0
  47. package/src/providers/openai/cli-builder.js +122 -0
  48. package/src/providers/openai/index.js +135 -0
  49. package/src/providers/openai/models.js +21 -0
  50. package/src/providers/openai/output-parser.js +129 -0
  51. package/src/sub-cluster-wrapper.js +18 -3
  52. package/src/task-runner.js +8 -6
  53. package/src/tui/layout.js +20 -3
  54. package/task-lib/attachable-watcher.js +80 -78
  55. package/task-lib/claude-recovery.js +119 -0
  56. package/task-lib/commands/list.js +1 -1
  57. package/task-lib/commands/resume.js +3 -2
  58. package/task-lib/commands/run.js +12 -3
  59. package/task-lib/runner.js +59 -38
  60. package/task-lib/scheduler.js +2 -2
  61. package/task-lib/store.js +43 -30
  62. package/task-lib/watcher.js +81 -62
@@ -14,6 +14,10 @@ const { spawn } = require('child_process');
14
14
  const path = require('path');
15
15
  const fs = require('fs');
16
16
  const os = require('os');
17
+ const { getProvider, parseChunkWithProvider } = require('../providers');
18
+
19
+ // Schema utilities for normalizing LLM output
20
+ const { normalizeEnumValues } = require('./schema-utils');
17
21
 
18
22
  /**
19
23
  * Validate and sanitize error messages.
@@ -51,21 +55,6 @@ function sanitizeErrorMessage(error) {
51
55
  return error;
52
56
  }
53
57
 
54
- /**
55
- * Strip timestamp prefix from log lines.
56
- * Log lines may have format: [epochMs]{json...} or [epochMs]text
57
- *
58
- * @param {string} line - Raw log line
59
- * @returns {string} Line content without timestamp prefix, empty string for invalid input
60
- */
61
- function stripTimestampPrefix(line) {
62
- if (!line || typeof line !== 'string') return '';
63
- const trimmed = line.trim().replace(/\r$/, '');
64
- if (!trimmed) return '';
65
- const match = trimmed.match(/^\[(\d{13})\](.*)$/);
66
- return match ? match[2] : trimmed;
67
- }
68
-
69
58
  /**
70
59
  * Extract error context from task output.
71
60
  * Shared by both isolated and non-isolated modes.
@@ -91,13 +80,47 @@ function extractErrorContext({ output, statusOutput, taskId, isNotFound = false
91
80
  }
92
81
  }
93
82
 
83
+ // KNOWN CLAUDE CODE LIMITATIONS - detect and provide actionable guidance
84
+ const fullOutput = output || '';
85
+
86
+ // 256KB file limit error
87
+ if (fullOutput.includes('exceeds maximum allowed size') || fullOutput.includes('256KB')) {
88
+ return sanitizeErrorMessage(
89
+ `FILE TOO LARGE (Claude Code 256KB limit). ` +
90
+ `Use offset and limit parameters when reading large files. ` +
91
+ `Example: Read tool with offset=0, limit=1000 to read first 1000 lines.`
92
+ );
93
+ }
94
+
95
+ // Streaming mode error (interactive tools in non-interactive mode)
96
+ if (fullOutput.includes('only prompt commands are supported in streaming mode')) {
97
+ return sanitizeErrorMessage(
98
+ `STREAMING MODE ERROR: Agent tried to use interactive tools in streaming mode. ` +
99
+ `This usually happens with AskUserQuestion or interactive prompts. ` +
100
+ `Zeroshot agents must run non-interactively.`
101
+ );
102
+ }
103
+
94
104
  // Fall back to extracting from output (last 500 chars)
95
- const lastOutput = (output || '').slice(-500).trim();
105
+ const lastOutput = fullOutput.slice(-500).trim();
96
106
  if (!lastOutput) {
97
- return sanitizeErrorMessage('Task failed with no output (check if task was interrupted or timed out)');
107
+ return sanitizeErrorMessage(
108
+ 'Task failed with no output (check if task was interrupted or timed out)'
109
+ );
98
110
  }
99
111
 
100
- // Common error patterns
112
+ // Extract non-JSON lines only (JSON lines contain "is_error": true which falsely matches)
113
+ const nonJsonLines = lastOutput
114
+ .split('\n')
115
+ .filter((line) => {
116
+ const trimmed = line.trim();
117
+ // Skip JSON objects and JSON-like content
118
+ return trimmed && !trimmed.startsWith('{') && !trimmed.startsWith('"');
119
+ })
120
+ .join('\n');
121
+
122
+ // Common error patterns - match against non-JSON content
123
+ const textToSearch = nonJsonLines || lastOutput;
101
124
  const errorPatterns = [
102
125
  /Error:\s*(.+)/i,
103
126
  /error:\s*(.+)/i,
@@ -107,7 +130,7 @@ function extractErrorContext({ output, statusOutput, taskId, isNotFound = false
107
130
  ];
108
131
 
109
132
  for (const pattern of errorPatterns) {
110
- const match = lastOutput.match(pattern);
133
+ const match = textToSearch.match(pattern);
111
134
  if (match) {
112
135
  return sanitizeErrorMessage(match[1].slice(0, 200));
113
136
  }
@@ -130,36 +153,26 @@ let dangerousGitHookInstalled = false;
130
153
  * @param {string} output - Full NDJSON output from Claude CLI
131
154
  * @returns {Object|null} Token usage data or null if not found
132
155
  */
133
- function extractTokenUsage(output) {
156
+ function extractTokenUsage(output, providerName = 'claude') {
134
157
  if (!output) return null;
135
158
 
136
- const lines = output.split('\n');
137
-
138
- // Find the result line containing usage data
139
- for (const line of lines) {
140
- const content = stripTimestampPrefix(line);
141
- if (!content) continue;
159
+ const provider = getProvider(providerName);
160
+ const events = parseChunkWithProvider(provider, output);
161
+ const resultEvent = events.find((event) => event.type === 'result');
142
162
 
143
- try {
144
- const event = JSON.parse(content);
145
- if (event.type === 'result') {
146
- const usage = event.usage || {};
147
- return {
148
- inputTokens: usage.input_tokens || 0,
149
- outputTokens: usage.output_tokens || 0,
150
- cacheReadInputTokens: usage.cache_read_input_tokens || 0,
151
- cacheCreationInputTokens: usage.cache_creation_input_tokens || 0,
152
- totalCostUsd: event.total_cost_usd || null,
153
- durationMs: event.duration_ms || null,
154
- modelUsage: event.modelUsage || null,
155
- };
156
- }
157
- } catch {
158
- // Not valid JSON, continue
159
- }
163
+ if (!resultEvent) {
164
+ return null;
160
165
  }
161
166
 
162
- return null;
167
+ return {
168
+ inputTokens: resultEvent.inputTokens || 0,
169
+ outputTokens: resultEvent.outputTokens || 0,
170
+ cacheReadInputTokens: resultEvent.cacheReadInputTokens || 0,
171
+ cacheCreationInputTokens: resultEvent.cacheCreationInputTokens || 0,
172
+ totalCostUsd: resultEvent.cost || null,
173
+ durationMs: resultEvent.duration || null,
174
+ modelUsage: resultEvent.modelUsage || null,
175
+ };
163
176
  }
164
177
 
165
178
  /**
@@ -325,6 +338,11 @@ function ensureDangerousGitHook() {
325
338
  * @returns {Promise<Object>} Result object { success, output, error }
326
339
  */
327
340
  async function spawnClaudeTask(agent, context) {
341
+ const providerName = agent._resolveProvider ? agent._resolveProvider() : 'claude';
342
+ const modelSpec = agent._resolveModelSpec
343
+ ? agent._resolveModelSpec()
344
+ : { model: agent._selectModel() };
345
+
328
346
  const ctPath = getClaudeTasksPath();
329
347
  const cwd = agent.config.cwd || process.cwd();
330
348
 
@@ -338,7 +356,15 @@ async function spawnClaudeTask(agent, context) {
338
356
  agent.config.jsonSchema && desiredOutputFormat === 'json' && !strictSchema
339
357
  ? 'stream-json'
340
358
  : desiredOutputFormat;
341
- const args = ['task', 'run', '--output-format', runOutputFormat];
359
+ const args = ['task', 'run', '--output-format', runOutputFormat, '--provider', providerName];
360
+
361
+ if (modelSpec?.model) {
362
+ args.push('--model', modelSpec.model);
363
+ }
364
+
365
+ if (modelSpec?.reasoningEffort) {
366
+ args.push('--reasoning-effort', modelSpec.reasoningEffort);
367
+ }
342
368
 
343
369
  // Add verification mode flag if configured
344
370
  if (agent.config.verificationMode) {
@@ -397,30 +423,35 @@ async function spawnClaudeTask(agent, context) {
397
423
  return spawnClaudeTaskIsolated(agent, context);
398
424
  }
399
425
 
400
- // NON-ISOLATION MODE: Use user's existing Claude config (preserves Keychain auth)
426
+ // NON-ISOLATION MODE: For Claude, use user's existing Claude config
401
427
  // AskUserQuestion blocking handled via:
402
- // 1. Prompt injection (see agent-context-builder) - tells agent not to ask
428
+ // 1. Prompt injection (see agent-context-builder)
403
429
  // 2. PreToolUse hook (defense-in-depth) - activated by ZEROSHOT_BLOCK_ASK_USER env var
404
- // DO NOT override CLAUDE_CONFIG_DIR - it breaks authentication on Claude CLI 2.x
405
- ensureAskUserQuestionHook();
430
+ if (providerName === 'claude') {
431
+ ensureAskUserQuestionHook();
406
432
 
407
- // WORKTREE MODE: Install git safety hook (blocks dangerous git commands)
408
- if (agent.worktree?.enabled) {
409
- ensureDangerousGitHook();
433
+ // WORKTREE MODE: Install git safety hook (blocks dangerous git commands)
434
+ if (agent.worktree?.enabled) {
435
+ ensureDangerousGitHook();
436
+ }
410
437
  }
411
438
 
412
439
  // Build environment for spawn
413
440
  const spawnEnv = {
414
441
  ...process.env,
415
- ANTHROPIC_MODEL: agent._selectModel(),
416
- // Activate AskUserQuestion blocking hook (see hooks/block-ask-user-question.py)
417
- ZEROSHOT_BLOCK_ASK_USER: '1',
418
442
  };
419
443
 
420
- // WORKTREE MODE: Activate git safety hook via environment variable
421
- // The hook only activates when ZEROSHOT_WORKTREE=1 is set
422
- if (agent.worktree?.enabled) {
423
- spawnEnv.ZEROSHOT_WORKTREE = '1';
444
+ if (providerName === 'claude') {
445
+ if (modelSpec?.model) {
446
+ spawnEnv.ANTHROPIC_MODEL = modelSpec.model;
447
+ }
448
+ // Activate AskUserQuestion blocking hook (see hooks/block-ask-user-question.py)
449
+ spawnEnv.ZEROSHOT_BLOCK_ASK_USER = '1';
450
+
451
+ // WORKTREE MODE: Activate git safety hook via environment variable
452
+ if (agent.worktree?.enabled) {
453
+ spawnEnv.ZEROSHOT_WORKTREE = '1';
454
+ }
424
455
  }
425
456
 
426
457
  const taskId = await new Promise((resolve, reject) => {
@@ -533,6 +564,7 @@ function followClaudeTaskLogs(agent, taskId) {
533
564
  const fsModule = require('fs');
534
565
  const { execSync, exec } = require('child_process');
535
566
  const ctPath = getClaudeTasksPath();
567
+ const providerName = agent._resolveProvider ? agent._resolveProvider() : 'claude';
536
568
 
537
569
  return new Promise((resolve, _reject) => {
538
570
  let output = '';
@@ -613,6 +645,7 @@ function followClaudeTaskLogs(agent, taskId) {
613
645
  agent: agent.id,
614
646
  role: agent.role,
615
647
  iteration: agent.iteration,
648
+ provider: providerName,
616
649
  },
617
650
  },
618
651
  });
@@ -698,7 +731,9 @@ function followClaudeTaskLogs(agent, taskId) {
698
731
  console.error(` Command: ${ctPath} status ${taskId}`);
699
732
  console.error(` Error: ${error.message}`);
700
733
  console.error(` Stderr: ${stderr || 'none'}`);
701
- console.error(` This may indicate zeroshot is not in PATH or task storage is corrupted.`);
734
+ console.error(
735
+ ` This may indicate zeroshot is not in PATH or task storage is corrupted.`
736
+ );
702
737
 
703
738
  // Stop polling and resolve with failure
704
739
  if (!resolved) {
@@ -745,13 +780,36 @@ function followClaudeTaskLogs(agent, taskId) {
745
780
  // Use flexible whitespace matching in case spacing changes
746
781
  const isCompleted = /Status:\s+completed/i.test(cleanStdout);
747
782
  const isFailed = /Status:\s+failed/i.test(cleanStdout);
783
+ // BUGFIX: Handle "stale (process died)" status - watcher died before updating status
784
+ // Check if task produced a successful result (structured_output in log file)
785
+ const isStale = /Status:\s+stale/i.test(cleanStdout);
748
786
 
749
- if (isCompleted || isFailed) {
750
- const success = isCompleted;
751
-
752
- // Read any final content
787
+ if (isCompleted || isFailed || isStale) {
788
+ // CRITICAL: Read final log content BEFORE checking output
789
+ // Fixes race where status flips to stale before log polling catches up
753
790
  pollLogFile();
754
791
 
792
+ // For stale tasks, check log file for successful result
793
+ let success = isCompleted;
794
+ if (isStale && output) {
795
+ // Look for structured_output in accumulated output - indicates success
796
+ const hasStructuredOutput = /"structured_output"\s*:/.test(output);
797
+ const hasSuccessResult = /"subtype"\s*:\s*"success"/.test(output);
798
+ let hasParsedOutput = false;
799
+ try {
800
+ const { extractJsonFromOutput } = require('./output-extraction');
801
+ hasParsedOutput = !!extractJsonFromOutput(output, providerName);
802
+ } catch {
803
+ // Ignore extraction errors - fallback to other signals
804
+ }
805
+ success = hasStructuredOutput || hasSuccessResult || hasParsedOutput;
806
+ if (!agent.quiet) {
807
+ agent._log(
808
+ `[Agent ${agent.id}] Task ${taskId} is stale - recovered as ${success ? 'SUCCESS' : 'FAILURE'} based on output analysis`
809
+ );
810
+ }
811
+ }
812
+
755
813
  // Clean up and resolve
756
814
  setTimeout(() => {
757
815
  if (resolved) return;
@@ -770,7 +828,7 @@ function followClaudeTaskLogs(agent, taskId) {
770
828
  success,
771
829
  output,
772
830
  error: errorContext,
773
- tokenUsage: extractTokenUsage(output),
831
+ tokenUsage: extractTokenUsage(output, providerName),
774
832
  });
775
833
  }, 500);
776
834
  }
@@ -792,7 +850,7 @@ function followClaudeTaskLogs(agent, taskId) {
792
850
  success: false,
793
851
  output,
794
852
  error: reason,
795
- tokenUsage: extractTokenUsage(output),
853
+ tokenUsage: extractTokenUsage(output, providerName),
796
854
  });
797
855
  },
798
856
  };
@@ -835,6 +893,10 @@ function getClaudeTasksPath() {
835
893
  */
836
894
  async function spawnClaudeTaskIsolated(agent, context) {
837
895
  const { manager, clusterId } = agent.isolation;
896
+ const providerName = agent._resolveProvider ? agent._resolveProvider() : 'claude';
897
+ const modelSpec = agent._resolveModelSpec
898
+ ? agent._resolveModelSpec()
899
+ : { model: agent._selectModel() };
838
900
 
839
901
  agent._log(`📦 Agent ${agent.id}: Running task in isolated container using zeroshot task run...`);
840
902
 
@@ -847,7 +909,23 @@ async function spawnClaudeTaskIsolated(agent, context) {
847
909
  ? 'stream-json'
848
910
  : desiredOutputFormat;
849
911
 
850
- const command = ['zeroshot', 'task', 'run', '--output-format', runOutputFormat];
912
+ const command = [
913
+ 'zeroshot',
914
+ 'task',
915
+ 'run',
916
+ '--output-format',
917
+ runOutputFormat,
918
+ '--provider',
919
+ providerName,
920
+ ];
921
+
922
+ if (modelSpec?.model) {
923
+ command.push('--model', modelSpec.model);
924
+ }
925
+
926
+ if (modelSpec?.reasoningEffort) {
927
+ command.push('--reasoning-effort', modelSpec.reasoningEffort);
928
+ }
851
929
 
852
930
  // Add verification mode flag if configured
853
931
  if (agent.config.verificationMode) {
@@ -886,13 +964,15 @@ async function spawnClaudeTaskIsolated(agent, context) {
886
964
 
887
965
  // STEP 1: Spawn task and extract task ID (same as non-isolated mode)
888
966
  const taskId = await new Promise((resolve, reject) => {
889
- const selectedModel = agent._selectModel();
890
967
  const proc = manager.spawnInContainer(clusterId, command, {
891
- env: {
892
- ANTHROPIC_MODEL: selectedModel,
893
- // Activate AskUserQuestion blocking hook (see hooks/block-ask-user-question.py)
894
- ZEROSHOT_BLOCK_ASK_USER: '1',
895
- },
968
+ env:
969
+ providerName === 'claude'
970
+ ? {
971
+ ANTHROPIC_MODEL: modelSpec?.model,
972
+ // Activate AskUserQuestion blocking hook (see hooks/block-ask-user-question.py)
973
+ ZEROSHOT_BLOCK_ASK_USER: '1',
974
+ }
975
+ : {},
896
976
  });
897
977
 
898
978
  // Track PID for resource monitoring
@@ -985,6 +1065,7 @@ function followClaudeTaskLogsIsolated(agent, taskId) {
985
1065
 
986
1066
  const manager = isolation.manager;
987
1067
  const clusterId = isolation.clusterId;
1068
+ const providerName = agent._resolveProvider ? agent._resolveProvider() : 'claude';
988
1069
 
989
1070
  return new Promise((resolve, reject) => {
990
1071
  let taskExited = false;
@@ -1012,9 +1093,7 @@ function followClaudeTaskLogsIsolated(agent, taskId) {
1012
1093
  // Broadcast line helper (same as non-isolated mode)
1013
1094
  const broadcastLine = (line) => {
1014
1095
  const timestampMatch = line.match(/^\[(\d{4}-\d{2}-\d{2}T[^\]]+)\]\s*(.*)$/);
1015
- const timestamp = timestampMatch
1016
- ? new Date(timestampMatch[1]).getTime()
1017
- : Date.now();
1096
+ const timestamp = timestampMatch ? new Date(timestampMatch[1]).getTime() : Date.now();
1018
1097
  const content = timestampMatch ? timestampMatch[2] : line;
1019
1098
 
1020
1099
  agent.messageBus.publish({
@@ -1026,6 +1105,7 @@ function followClaudeTaskLogsIsolated(agent, taskId) {
1026
1105
  line: content,
1027
1106
  taskId,
1028
1107
  iteration: agent.iteration,
1108
+ provider: providerName,
1029
1109
  },
1030
1110
  },
1031
1111
  timestamp,
@@ -1058,9 +1138,7 @@ function followClaudeTaskLogsIsolated(agent, taskId) {
1058
1138
  if (code !== 0) {
1059
1139
  cleanup();
1060
1140
  return reject(
1061
- new Error(
1062
- `Failed to get log path for ${taskId} inside container: ${stderr || stdout}`
1063
- )
1141
+ new Error(`Failed to get log path for ${taskId} inside container: ${stderr || stdout}`)
1064
1142
  );
1065
1143
  }
1066
1144
 
@@ -1161,8 +1239,8 @@ function followClaudeTaskLogsIsolated(agent, taskId) {
1161
1239
  ? extractErrorContext({ output: fullOutput, taskId, isNotFound })
1162
1240
  : null;
1163
1241
 
1164
- // Parse result from output
1165
- const parsedResult = agent._parseResultOutput(fullOutput);
1242
+ // Parse result from output (async - may trigger reformatting)
1243
+ const parsedResult = await agent._parseResultOutput(fullOutput);
1166
1244
 
1167
1245
  resolve({
1168
1246
  success,
@@ -1170,7 +1248,7 @@ function followClaudeTaskLogsIsolated(agent, taskId) {
1170
1248
  taskId,
1171
1249
  result: parsedResult,
1172
1250
  error: errorContext,
1173
- tokenUsage: extractTokenUsage(fullOutput),
1251
+ tokenUsage: extractTokenUsage(fullOutput, providerName),
1174
1252
  });
1175
1253
  }
1176
1254
  } catch (statusErr) {
@@ -1184,11 +1262,7 @@ function followClaudeTaskLogsIsolated(agent, taskId) {
1184
1262
  setTimeout(() => {
1185
1263
  if (!taskExited) {
1186
1264
  cleanup();
1187
- reject(
1188
- new Error(
1189
- `Task ${taskId} timeout after ${agent.timeout}ms (isolated mode)`
1190
- )
1191
- );
1265
+ reject(new Error(`Task ${taskId} timeout after ${agent.timeout}ms (isolated mode)`));
1192
1266
  }
1193
1267
  }, agent.timeout);
1194
1268
  }
@@ -1204,133 +1278,57 @@ function followClaudeTaskLogsIsolated(agent, taskId) {
1204
1278
  * Parse agent output to extract structured result data
1205
1279
  * GENERIC - returns whatever structured output the agent provides
1206
1280
  * Works with any agent schema (planner, validator, worker, etc.)
1281
+ *
1282
+ * Uses clean extraction pipeline from output-extraction.js
1283
+ * Falls back to reformatting if extraction fails and schema is available
1284
+ *
1207
1285
  * @param {Object} agent - Agent instance
1208
1286
  * @param {String} output - Raw output from agent
1209
- * @returns {Object} Parsed result data
1287
+ * @returns {Promise<Object>} Parsed result data
1210
1288
  */
1211
- function parseResultOutput(agent, output) {
1289
+ async function parseResultOutput(agent, output) {
1212
1290
  // Empty or error outputs = FAIL
1213
1291
  if (!output || output.includes('Task not found') || output.includes('Process terminated')) {
1214
1292
  throw new Error('Task execution failed - no output');
1215
1293
  }
1216
1294
 
1217
- let parsed;
1218
- let trimmedOutput = output.trim();
1295
+ const providerName = agent._resolveProvider ? agent._resolveProvider() : 'claude';
1296
+ const { extractJsonFromOutput } = require('./output-extraction');
1219
1297
 
1220
- // IMPORTANT: Output is NDJSON (one JSON object per line) from streaming log
1221
- // Lines may have timestamp prefix: [epochMs]{json...}
1222
- // Find the line with "type":"result" which contains the actual result
1223
- const lines = trimmedOutput.split('\n');
1224
- const resultLine = lines.find((line) => {
1225
- try {
1226
- const content = stripTimestampPrefix(line);
1227
- if (!content.startsWith('{')) return false;
1228
- const obj = JSON.parse(content);
1229
- return obj.type === 'result';
1230
- } catch {
1231
- return false;
1232
- }
1233
- });
1298
+ // Use clean extraction pipeline
1299
+ let parsed = extractJsonFromOutput(output, providerName);
1234
1300
 
1235
- // Use the result line if found, otherwise use last non-empty line
1236
- // CRITICAL: Strip timestamp prefix before assigning to trimmedOutput
1237
- if (resultLine) {
1238
- trimmedOutput = stripTimestampPrefix(resultLine);
1239
- } else if (lines.length > 1) {
1240
- // Fallback: use last non-empty line (also strip timestamp)
1241
- for (let i = lines.length - 1; i >= 0; i--) {
1242
- const content = stripTimestampPrefix(lines[i]);
1243
- if (content) {
1244
- trimmedOutput = content;
1245
- break;
1246
- }
1247
- }
1248
- }
1301
+ // If extraction failed but we have a schema, attempt reformatting
1302
+ if (!parsed && agent.config.jsonSchema) {
1303
+ const { reformatOutput } = require('./output-reformatter');
1249
1304
 
1250
- // Strategy 1: If agent uses JSON output format, try CLI JSON structure first
1251
- if (agent.config.outputFormat === 'json' && agent.config.jsonSchema) {
1252
1305
  try {
1253
- const claudeOutput = JSON.parse(trimmedOutput);
1254
-
1255
- // Try structured_output field first (standard CLI format)
1256
- if (claudeOutput.structured_output && typeof claudeOutput.structured_output === 'object') {
1257
- parsed = claudeOutput.structured_output;
1258
- }
1259
- // Check if it's a direct object (not a primitive)
1260
- else if (
1261
- typeof claudeOutput === 'object' &&
1262
- claudeOutput !== null &&
1263
- !Array.isArray(claudeOutput)
1264
- ) {
1265
- // Check for result wrapper
1266
- if (claudeOutput.result && typeof claudeOutput.result === 'object') {
1267
- parsed = claudeOutput.result;
1268
- }
1269
- // IMPORTANT: Handle case where result is a string containing markdown-wrapped JSON
1270
- // Claude CLI with --output-format json returns { result: "```json\n{...}\n```" }
1271
- else if (claudeOutput.result && typeof claudeOutput.result === 'string') {
1272
- const resultStr = claudeOutput.result;
1273
- // Try extracting JSON from markdown code block
1274
- const jsonMatch = resultStr.match(/```json\s*([\s\S]*?)```/);
1275
- if (jsonMatch) {
1276
- try {
1277
- parsed = JSON.parse(jsonMatch[1].trim());
1278
- } catch {
1279
- // Fall through to other strategies
1280
- }
1281
- }
1282
- // If no markdown block, try parsing result string directly as JSON
1283
- if (!parsed) {
1284
- try {
1285
- parsed = JSON.parse(resultStr);
1286
- } catch {
1287
- // Fall through to other strategies
1288
- }
1289
- }
1290
- }
1291
- // Use directly if it has meaningful keys (and we haven't found a better parse)
1292
- if (!parsed) {
1293
- const keys = Object.keys(claudeOutput);
1294
- if (keys.length > 0 && keys.some((k) => !['type', 'subtype', 'is_error'].includes(k))) {
1295
- parsed = claudeOutput;
1306
+ parsed = await reformatOutput({
1307
+ rawOutput: output,
1308
+ schema: agent.config.jsonSchema,
1309
+ providerName,
1310
+ onAttempt: (attempt, lastError) => {
1311
+ if (lastError) {
1312
+ console.warn(`[Agent ${agent.id}] Reformat attempt ${attempt}: ${lastError}`);
1313
+ } else {
1314
+ console.warn(
1315
+ `[Agent ${agent.id}] JSON extraction failed, reformatting (attempt ${attempt})...`
1316
+ );
1296
1317
  }
1297
- }
1298
- }
1299
- } catch {
1300
- // JSON parse failed - fall through to markdown extraction
1301
- }
1302
- }
1303
-
1304
- // Strategy 2: Extract JSON from markdown code block (legacy or fallback)
1305
- if (!parsed) {
1306
- const jsonMatch = trimmedOutput.match(/```json\s*([\s\S]*?)```/);
1307
- if (jsonMatch) {
1308
- try {
1309
- parsed = JSON.parse(jsonMatch[1].trim());
1310
- } catch (e) {
1311
- throw new Error(`JSON parse failed in markdown block: ${e.message}`);
1312
- }
1313
- }
1314
- }
1315
-
1316
- // Strategy 3: Try parsing the whole output as JSON
1317
- if (!parsed) {
1318
- try {
1319
- const directParse = JSON.parse(trimmedOutput);
1320
- if (typeof directParse === 'object' && directParse !== null) {
1321
- parsed = directParse;
1322
- }
1323
- } catch {
1324
- // Not valid JSON, fall through to error
1318
+ },
1319
+ });
1320
+ } catch (reformatError) {
1321
+ // Reformatting failed - fall through to error below
1322
+ console.error(`[Agent ${agent.id}] Reformatting failed: ${reformatError.message}`);
1325
1323
  }
1326
1324
  }
1327
1325
 
1328
- // No strategy worked
1329
1326
  if (!parsed) {
1327
+ const trimmedOutput = output.trim();
1330
1328
  console.error(`\n${'='.repeat(80)}`);
1331
1329
  console.error(`🔴 AGENT OUTPUT MISSING REQUIRED JSON BLOCK`);
1332
1330
  console.error(`${'='.repeat(80)}`);
1333
- console.error(`Agent: ${agent.id}, Role: ${agent.role}`);
1331
+ console.error(`Agent: ${agent.id}, Role: ${agent.role}, Provider: ${providerName}`);
1334
1332
  console.error(`Output (last 500 chars): ${trimmedOutput.slice(-500)}`);
1335
1333
  console.error(`${'='.repeat(80)}\n`);
1336
1334
  throw new Error(`Agent ${agent.id} output missing required JSON block`);
@@ -1340,6 +1338,10 @@ function parseResultOutput(agent, output) {
1340
1338
  // This preserves schema enforcement even when we run stream-json for live logs.
1341
1339
  // IMPORTANT: For non-validator agents we warn but do not fail the cluster.
1342
1340
  if (agent.config.jsonSchema) {
1341
+ // Normalize enum values BEFORE validation (handles case mismatches, common variations)
1342
+ // This is provider-agnostic - works for Claude CLI, Gemini, Codex, etc.
1343
+ normalizeEnumValues(parsed, agent.config.jsonSchema);
1344
+
1343
1345
  const Ajv = require('ajv');
1344
1346
  const ajv = new Ajv({
1345
1347
  allErrors: true,