@covibes/zeroshot 5.2.1 → 5.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/CHANGELOG.md +174 -189
  2. package/README.md +199 -248
  3. package/cli/commands/providers.js +150 -0
  4. package/cli/index.js +214 -58
  5. package/cli/lib/first-run.js +40 -3
  6. package/cluster-templates/base-templates/debug-workflow.json +24 -78
  7. package/cluster-templates/base-templates/full-workflow.json +44 -145
  8. package/cluster-templates/base-templates/single-worker.json +23 -15
  9. package/cluster-templates/base-templates/worker-validator.json +47 -34
  10. package/cluster-templates/conductor-bootstrap.json +7 -5
  11. package/lib/docker-config.js +6 -1
  12. package/lib/provider-detection.js +59 -0
  13. package/lib/provider-names.js +56 -0
  14. package/lib/settings.js +191 -6
  15. package/lib/stream-json-parser.js +4 -238
  16. package/package.json +21 -5
  17. package/scripts/validate-templates.js +100 -0
  18. package/src/agent/agent-config.js +37 -13
  19. package/src/agent/agent-context-builder.js +64 -2
  20. package/src/agent/agent-hook-executor.js +82 -9
  21. package/src/agent/agent-lifecycle.js +53 -14
  22. package/src/agent/agent-task-executor.js +196 -194
  23. package/src/agent/output-extraction.js +200 -0
  24. package/src/agent/output-reformatter.js +175 -0
  25. package/src/agent/schema-utils.js +111 -0
  26. package/src/agent-wrapper.js +102 -30
  27. package/src/agents/git-pusher-agent.json +1 -1
  28. package/src/claude-task-runner.js +80 -30
  29. package/src/config-router.js +13 -13
  30. package/src/config-validator.js +231 -10
  31. package/src/github.js +36 -0
  32. package/src/isolation-manager.js +243 -154
  33. package/src/ledger.js +28 -6
  34. package/src/orchestrator.js +391 -96
  35. package/src/preflight.js +85 -82
  36. package/src/providers/anthropic/cli-builder.js +45 -0
  37. package/src/providers/anthropic/index.js +134 -0
  38. package/src/providers/anthropic/models.js +23 -0
  39. package/src/providers/anthropic/output-parser.js +159 -0
  40. package/src/providers/base-provider.js +181 -0
  41. package/src/providers/capabilities.js +51 -0
  42. package/src/providers/google/cli-builder.js +55 -0
  43. package/src/providers/google/index.js +116 -0
  44. package/src/providers/google/models.js +24 -0
  45. package/src/providers/google/output-parser.js +92 -0
  46. package/src/providers/index.js +75 -0
  47. package/src/providers/openai/cli-builder.js +122 -0
  48. package/src/providers/openai/index.js +135 -0
  49. package/src/providers/openai/models.js +21 -0
  50. package/src/providers/openai/output-parser.js +129 -0
  51. package/src/sub-cluster-wrapper.js +18 -3
  52. package/src/task-runner.js +8 -6
  53. package/src/tui/layout.js +20 -3
  54. package/task-lib/attachable-watcher.js +80 -78
  55. package/task-lib/claude-recovery.js +119 -0
  56. package/task-lib/commands/list.js +1 -1
  57. package/task-lib/commands/resume.js +3 -2
  58. package/task-lib/commands/run.js +12 -3
  59. package/task-lib/runner.js +59 -38
  60. package/task-lib/scheduler.js +2 -2
  61. package/task-lib/store.js +43 -30
  62. package/task-lib/watcher.js +81 -62
package/task-lib/store.js CHANGED
@@ -1,31 +1,37 @@
1
- import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
1
+ import { existsSync, mkdirSync, readFileSync, writeFileSync, unlinkSync, statSync } from 'fs';
2
2
  import { TASKS_DIR, TASKS_FILE, LOGS_DIR, SCHEDULES_FILE } from './config.js';
3
3
  import { generateName } from './name-generator.js';
4
4
  import lockfile from 'proper-lockfile';
5
5
 
6
- // Lock options for sync API (no retries allowed)
6
+ // Stale lock timeout - 5 seconds is plenty for JSON read/write
7
+ const LOCK_STALE_MS = 5000;
8
+
9
+ // Lock options with async retry support
7
10
  const LOCK_OPTIONS = {
8
- stale: 30000, // Consider lock stale after 30s
11
+ stale: LOCK_STALE_MS,
12
+ retries: {
13
+ retries: 20,
14
+ minTimeout: 100,
15
+ maxTimeout: 200,
16
+ randomize: true,
17
+ },
9
18
  };
10
19
 
11
- // Retry wrapper for sync lock acquisition
12
- function lockWithRetry(file, options, maxRetries = 100, delayMs = 100) {
13
- for (let i = 0; i < maxRetries; i++) {
14
- try {
15
- return lockfile.lockSync(file, options);
16
- } catch (err) {
17
- if (err.code === 'ELOCKED' && i < maxRetries - 1) {
18
- // File is locked, wait and retry
19
- const start = Date.now();
20
- while (Date.now() - start < delayMs) {
21
- // Busy wait (sync)
22
- }
23
- continue;
20
+ /**
21
+ * Remove lock file if it's stale (older than LOCK_STALE_MS)
22
+ */
23
+ function cleanStaleLock(filePath) {
24
+ const lockPath = filePath + '.lock';
25
+ try {
26
+ if (existsSync(lockPath)) {
27
+ const age = Date.now() - statSync(lockPath).mtimeMs;
28
+ if (age > LOCK_STALE_MS) {
29
+ unlinkSync(lockPath);
24
30
  }
25
- throw err;
26
31
  }
32
+ } catch {
33
+ // Ignore - another process may have cleaned it
27
34
  }
28
- throw new Error(`Failed to acquire lock after ${maxRetries} retries`);
29
35
  }
30
36
 
31
37
  export function ensureDirs() {
@@ -60,9 +66,9 @@ export function saveTasks(tasks) {
60
66
  /**
61
67
  * Atomic read-modify-write with file locking
62
68
  * @param {Function} modifier - Function that receives tasks object and returns modified tasks
63
- * @returns {any} - Return value from modifier function
69
+ * @returns {Promise<any>} - Return value from modifier function
64
70
  */
65
- export function withTasksLock(modifier) {
71
+ export async function withTasksLock(modifier) {
66
72
  ensureDirs();
67
73
 
68
74
  // Create file if it doesn't exist (needed for locking)
@@ -72,8 +78,11 @@ export function withTasksLock(modifier) {
72
78
 
73
79
  let release;
74
80
  try {
75
- // Acquire lock (blocks until available)
76
- release = lockWithRetry(TASKS_FILE, LOCK_OPTIONS);
81
+ // Clean stale locks from crashed processes
82
+ cleanStaleLock(TASKS_FILE);
83
+
84
+ // Acquire lock with async API (proper retries without CPU spin-wait)
85
+ release = await lockfile.lock(TASKS_FILE, LOCK_OPTIONS);
77
86
 
78
87
  // Read current state
79
88
  const content = readFileSync(TASKS_FILE, 'utf-8');
@@ -93,7 +102,7 @@ export function withTasksLock(modifier) {
93
102
  return result;
94
103
  } finally {
95
104
  if (release) {
96
- release();
105
+ await release();
97
106
  }
98
107
  }
99
108
  }
@@ -122,8 +131,8 @@ export function addTask(task) {
122
131
  });
123
132
  }
124
133
 
125
- export function removeTask(id) {
126
- withTasksLock((tasks) => {
134
+ export async function removeTask(id) {
135
+ await withTasksLock((tasks) => {
127
136
  delete tasks[id];
128
137
  });
129
138
  }
@@ -138,7 +147,7 @@ export function generateScheduleId() {
138
147
 
139
148
  // Schedule management - same pattern with locking
140
149
 
141
- function withSchedulesLock(modifier) {
150
+ async function withSchedulesLock(modifier) {
142
151
  ensureDirs();
143
152
 
144
153
  if (!existsSync(SCHEDULES_FILE)) {
@@ -147,7 +156,11 @@ function withSchedulesLock(modifier) {
147
156
 
148
157
  let release;
149
158
  try {
150
- release = lockWithRetry(SCHEDULES_FILE, LOCK_OPTIONS);
159
+ // Clean stale locks from crashed processes
160
+ cleanStaleLock(SCHEDULES_FILE);
161
+
162
+ // Acquire lock with async API (proper retries without CPU spin-wait)
163
+ release = await lockfile.lock(SCHEDULES_FILE, LOCK_OPTIONS);
151
164
 
152
165
  const content = readFileSync(SCHEDULES_FILE, 'utf-8');
153
166
  let schedules;
@@ -163,7 +176,7 @@ function withSchedulesLock(modifier) {
163
176
  return result;
164
177
  } finally {
165
178
  if (release) {
166
- release();
179
+ await release();
167
180
  }
168
181
  }
169
182
  }
@@ -210,8 +223,8 @@ export function updateSchedule(id, updates) {
210
223
  });
211
224
  }
212
225
 
213
- export function removeSchedule(id) {
214
- withSchedulesLock((schedules) => {
226
+ export async function removeSchedule(id) {
227
+ await withSchedulesLock((schedules) => {
215
228
  delete schedules[id];
216
229
  });
217
230
  }
@@ -1,24 +1,18 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  /**
4
- * Watcher process - spawns and monitors a claude process
4
+ * Watcher process - spawns and monitors a CLI process
5
5
  * Runs detached from parent, updates task status on completion
6
- *
7
- * Uses regular spawn (not PTY) - Claude CLI with --print is non-interactive
8
- * PTY causes EIO errors when processes are killed/OOM'd
9
6
  */
10
7
 
11
8
  import { spawn } from 'child_process';
12
9
  import { appendFileSync } from 'fs';
13
- import { dirname } from 'path';
14
- import { fileURLToPath } from 'url';
15
10
  import { updateTask } from './store.js';
11
+ import { detectStreamingModeError, recoverStructuredOutput } from './claude-recovery.js';
16
12
  import { createRequire } from 'module';
17
13
 
18
14
  const require = createRequire(import.meta.url);
19
- const { getClaudeCommand } = require('../lib/settings.js');
20
-
21
- const __dirname = dirname(fileURLToPath(import.meta.url));
15
+ const { normalizeProviderName } = require('../lib/provider-names');
22
16
 
23
17
  const [, , taskId, cwd, logFile, argsJson, configJson] = process.argv;
24
18
  const args = JSON.parse(argsJson);
@@ -28,80 +22,76 @@ function log(msg) {
28
22
  appendFileSync(logFile, msg);
29
23
  }
30
24
 
31
- // Build environment - inherit user's auth method (API key or subscription)
32
- const env = { ...process.env };
33
-
34
- // Add model flag - priority: config.model > ANTHROPIC_MODEL env var
35
- const claudeArgs = [...args];
36
- const model = config.model || env.ANTHROPIC_MODEL;
37
- if (model && !claudeArgs.includes('--model')) {
38
- claudeArgs.unshift('--model', model);
39
- }
25
+ const providerName = normalizeProviderName(config.provider || 'claude');
26
+ const enableRecovery = providerName === 'claude';
40
27
 
41
- // Get configured Claude command (supports custom commands like 'ccr code')
42
- const { command: claudeCommand, args: claudeExtraArgs } = getClaudeCommand();
43
- const finalArgs = [...claudeExtraArgs, ...claudeArgs];
28
+ const env = { ...process.env, ...(config.env || {}) };
29
+ const command = config.command || 'claude';
30
+ const finalArgs = [...args];
44
31
 
45
- // Spawn claude using regular child_process (not PTY)
46
- // --print mode is non-interactive, PTY adds overhead and causes EIO on OOM
47
- const child = spawn(claudeCommand, finalArgs, {
32
+ const child = spawn(command, finalArgs, {
48
33
  cwd,
49
34
  env,
50
35
  stdio: ['ignore', 'pipe', 'pipe'],
51
36
  });
52
37
 
53
- // Update task with PID
54
38
  updateTask(taskId, { pid: child.pid });
55
39
 
56
- // For JSON schema output with silent mode, capture ONLY the structured_output JSON
57
40
  const silentJsonMode =
58
- config.outputFormat === 'json' && config.jsonSchema && config.silentJsonOutput;
41
+ config.outputFormat === 'json' && config.jsonSchema && config.silentJsonOutput && enableRecovery;
42
+
59
43
  let finalResultJson = null;
44
+ let streamingModeError = null;
60
45
 
61
- // Buffer for incomplete lines (need complete lines to add timestamps)
62
46
  let stdoutBuffer = '';
63
47
 
64
- // Process stdout data
65
- // CRITICAL: Prepend timestamp to each line for real-time tracking in cluster
66
- // Format: [1733301234567]{json...} - consumers parse timestamp for accurate timing
67
48
  child.stdout.on('data', (data) => {
68
49
  const chunk = data.toString();
69
50
  const timestamp = Date.now();
70
51
 
71
52
  if (silentJsonMode) {
72
- // Parse each line to find the one with structured_output
73
53
  stdoutBuffer += chunk;
74
54
  const lines = stdoutBuffer.split('\n');
75
- stdoutBuffer = lines.pop() || ''; // Keep incomplete line in buffer
55
+ stdoutBuffer = lines.pop() || '';
76
56
 
77
57
  for (const line of lines) {
78
58
  if (!line.trim()) continue;
59
+ if (enableRecovery) {
60
+ const detectedError = detectStreamingModeError(line);
61
+ if (detectedError) {
62
+ streamingModeError = { ...detectedError, timestamp };
63
+ continue;
64
+ }
65
+ }
79
66
  try {
80
67
  const json = JSON.parse(line);
81
68
  if (json.structured_output) {
82
69
  finalResultJson = line;
83
70
  }
84
71
  } catch {
85
- // Not JSON or incomplete, skip
72
+ // Not JSON, skip
86
73
  }
87
74
  }
88
75
  } else {
89
- // Normal mode - stream with timestamps on each complete line
90
76
  stdoutBuffer += chunk;
91
77
  const lines = stdoutBuffer.split('\n');
92
- stdoutBuffer = lines.pop() || ''; // Keep incomplete line in buffer
78
+ stdoutBuffer = lines.pop() || '';
93
79
 
94
80
  for (const line of lines) {
95
- // Timestamp each line: [epochMs]originalContent
81
+ if (enableRecovery) {
82
+ const detectedError = detectStreamingModeError(line);
83
+ if (detectedError) {
84
+ streamingModeError = { ...detectedError, timestamp };
85
+ continue;
86
+ }
87
+ }
96
88
  log(`[${timestamp}]${line}\n`);
97
89
  }
98
90
  }
99
91
  });
100
92
 
101
- // Buffer for stderr incomplete lines
102
93
  let stderrBuffer = '';
103
94
 
104
- // Stream stderr to log with timestamps
105
95
  child.stderr.on('data', (data) => {
106
96
  const chunk = data.toString();
107
97
  const timestamp = Date.now();
@@ -115,55 +105,84 @@ child.stderr.on('data', (data) => {
115
105
  }
116
106
  });
117
107
 
118
- // Handle process exit
119
- child.on('close', (code, signal) => {
108
+ child.on('close', async (code, signal) => {
120
109
  const timestamp = Date.now();
121
110
 
122
- // Flush any remaining buffered stdout
123
111
  if (stdoutBuffer.trim()) {
124
- if (silentJsonMode) {
125
- try {
126
- const json = JSON.parse(stdoutBuffer);
127
- if (json.structured_output) {
128
- finalResultJson = stdoutBuffer;
112
+ if (enableRecovery) {
113
+ const detectedError = detectStreamingModeError(stdoutBuffer);
114
+ if (detectedError) {
115
+ streamingModeError = { ...detectedError, timestamp };
116
+ } else if (silentJsonMode) {
117
+ try {
118
+ const json = JSON.parse(stdoutBuffer);
119
+ if (json.structured_output) {
120
+ finalResultJson = stdoutBuffer;
121
+ }
122
+ } catch {
123
+ // Not valid JSON
129
124
  }
130
- } catch {
131
- // Not valid JSON
125
+ } else {
126
+ log(`[${timestamp}]${stdoutBuffer}\n`);
132
127
  }
133
- } else {
128
+ } else if (!silentJsonMode) {
134
129
  log(`[${timestamp}]${stdoutBuffer}\n`);
135
130
  }
136
131
  }
137
132
 
138
- // Flush any remaining buffered stderr
139
133
  if (stderrBuffer.trim()) {
140
134
  log(`[${timestamp}]${stderrBuffer}\n`);
141
135
  }
142
136
 
143
- // In silent JSON mode, log ONLY the final structured_output JSON
137
+ let recovered = null;
138
+ if (enableRecovery && code !== 0 && streamingModeError?.sessionId) {
139
+ recovered = recoverStructuredOutput(streamingModeError.sessionId);
140
+ if (recovered?.payload) {
141
+ const recoveredLine = JSON.stringify(recovered.payload);
142
+ if (silentJsonMode) {
143
+ finalResultJson = recoveredLine;
144
+ } else {
145
+ log(`[${timestamp}]${recoveredLine}\n`);
146
+ }
147
+ } else if (streamingModeError.line) {
148
+ if (silentJsonMode) {
149
+ log(streamingModeError.line + '\n');
150
+ } else {
151
+ log(`[${streamingModeError.timestamp}]${streamingModeError.line}\n`);
152
+ }
153
+ }
154
+ }
155
+
144
156
  if (silentJsonMode && finalResultJson) {
145
157
  log(finalResultJson + '\n');
146
158
  }
147
159
 
148
- // Skip footer for pure JSON output
149
160
  if (config.outputFormat !== 'json') {
150
161
  log(`\n${'='.repeat(50)}\n`);
151
162
  log(`Finished: ${new Date().toISOString()}\n`);
152
163
  log(`Exit code: ${code}, Signal: ${signal}\n`);
153
164
  }
154
165
 
155
- // Simple status: completed if exit 0, failed otherwise
156
- const status = code === 0 ? 'completed' : 'failed';
157
- updateTask(taskId, {
158
- status,
159
- exitCode: code,
160
- error: signal ? `Killed by ${signal}` : null,
161
- });
166
+ const resolvedCode = recovered?.payload ? 0 : code;
167
+ const status = resolvedCode === 0 ? 'completed' : 'failed';
168
+ try {
169
+ await updateTask(taskId, {
170
+ status,
171
+ exitCode: resolvedCode,
172
+ error: resolvedCode === 0 ? null : signal ? `Killed by ${signal}` : null,
173
+ });
174
+ } catch (updateError) {
175
+ log(`[${Date.now()}][ERROR] Failed to update task status: ${updateError.message}\n`);
176
+ }
162
177
  process.exit(0);
163
178
  });
164
179
 
165
- child.on('error', (err) => {
180
+ child.on('error', async (err) => {
166
181
  log(`\nError: ${err.message}\n`);
167
- updateTask(taskId, { status: 'failed', error: err.message });
182
+ try {
183
+ await updateTask(taskId, { status: 'failed', error: err.message });
184
+ } catch (updateError) {
185
+ log(`[${Date.now()}][ERROR] Failed to update task status: ${updateError.message}\n`);
186
+ }
168
187
  process.exit(1);
169
188
  });