@covibes/zeroshot 1.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +63 -0
- package/README.md +20 -6
- package/cli/index.js +513 -194
- package/cli/lib/first-run.js +174 -0
- package/cli/lib/update-checker.js +234 -0
- package/cli/message-formatters-normal.js +77 -38
- package/cluster-templates/base-templates/debug-workflow.json +11 -2
- package/cluster-templates/base-templates/full-workflow.json +20 -7
- package/cluster-templates/base-templates/single-worker.json +8 -1
- package/cluster-templates/base-templates/worker-validator.json +10 -2
- package/docker/zeroshot-cluster/Dockerfile +7 -0
- package/lib/settings.js +25 -7
- package/package.json +3 -1
- package/src/agent/agent-config.js +19 -6
- package/src/agent/agent-context-builder.js +9 -0
- package/src/agent/agent-task-executor.js +149 -65
- package/src/config-validator.js +13 -0
- package/src/isolation-manager.js +11 -7
- package/src/orchestrator.js +78 -1
- package/src/status-footer.js +59 -6
- package/src/template-resolver.js +23 -1
|
@@ -51,6 +51,72 @@ function sanitizeErrorMessage(error) {
|
|
|
51
51
|
return error;
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
+
/**
|
|
55
|
+
* Strip timestamp prefix from log lines.
|
|
56
|
+
* Log lines may have format: [epochMs]{json...} or [epochMs]text
|
|
57
|
+
*
|
|
58
|
+
* @param {string} line - Raw log line
|
|
59
|
+
* @returns {string} Line content without timestamp prefix, empty string for invalid input
|
|
60
|
+
*/
|
|
61
|
+
function stripTimestampPrefix(line) {
|
|
62
|
+
if (!line || typeof line !== 'string') return '';
|
|
63
|
+
const trimmed = line.trim().replace(/\r$/, '');
|
|
64
|
+
if (!trimmed) return '';
|
|
65
|
+
const match = trimmed.match(/^\[(\d{13})\](.*)$/);
|
|
66
|
+
return match ? match[2] : trimmed;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Extract error context from task output.
|
|
71
|
+
* Shared by both isolated and non-isolated modes.
|
|
72
|
+
*
|
|
73
|
+
* @param {Object} params - Extraction parameters
|
|
74
|
+
* @param {string} params.output - Full task output
|
|
75
|
+
* @param {string} [params.statusOutput] - Status command output (non-isolated only)
|
|
76
|
+
* @param {string} params.taskId - Task ID for error messages
|
|
77
|
+
* @param {boolean} [params.isNotFound=false] - True if task was not found
|
|
78
|
+
* @returns {string|null} Sanitized error context or null if extraction failed
|
|
79
|
+
*/
|
|
80
|
+
function extractErrorContext({ output, statusOutput, taskId, isNotFound = false }) {
|
|
81
|
+
// Task not found - explicit error
|
|
82
|
+
if (isNotFound) {
|
|
83
|
+
return sanitizeErrorMessage(`Task ${taskId} not found (may have crashed or been killed)`);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Try status output first (only available in non-isolated mode)
|
|
87
|
+
if (statusOutput) {
|
|
88
|
+
const statusErrorMatch = statusOutput.match(/Error:\s*(.+)/);
|
|
89
|
+
if (statusErrorMatch) {
|
|
90
|
+
return sanitizeErrorMessage(statusErrorMatch[1].trim());
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Fall back to extracting from output (last 500 chars)
|
|
95
|
+
const lastOutput = (output || '').slice(-500).trim();
|
|
96
|
+
if (!lastOutput) {
|
|
97
|
+
return sanitizeErrorMessage('Task failed with no output (check if task was interrupted or timed out)');
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Common error patterns
|
|
101
|
+
const errorPatterns = [
|
|
102
|
+
/Error:\s*(.+)/i,
|
|
103
|
+
/error:\s*(.+)/i,
|
|
104
|
+
/failed:\s*(.+)/i,
|
|
105
|
+
/Exception:\s*(.+)/i,
|
|
106
|
+
/panic:\s*(.+)/i,
|
|
107
|
+
];
|
|
108
|
+
|
|
109
|
+
for (const pattern of errorPatterns) {
|
|
110
|
+
const match = lastOutput.match(pattern);
|
|
111
|
+
if (match) {
|
|
112
|
+
return sanitizeErrorMessage(match[1].slice(0, 200));
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// No pattern matched - include last portion of output
|
|
117
|
+
return sanitizeErrorMessage(`Task failed. Last output: ${lastOutput.slice(-200)}`);
|
|
118
|
+
}
|
|
119
|
+
|
|
54
120
|
// Track if we've already ensured the AskUserQuestion hook is installed
|
|
55
121
|
let askUserQuestionHookInstalled = false;
|
|
56
122
|
|
|
@@ -68,10 +134,11 @@ function extractTokenUsage(output) {
|
|
|
68
134
|
|
|
69
135
|
// Find the result line containing usage data
|
|
70
136
|
for (const line of lines) {
|
|
71
|
-
|
|
137
|
+
const content = stripTimestampPrefix(line);
|
|
138
|
+
if (!content) continue;
|
|
72
139
|
|
|
73
140
|
try {
|
|
74
|
-
const event = JSON.parse(
|
|
141
|
+
const event = JSON.parse(content);
|
|
75
142
|
if (event.type === 'result') {
|
|
76
143
|
const usage = event.usage || {};
|
|
77
144
|
return {
|
|
@@ -527,14 +594,45 @@ function followClaudeTaskLogs(agent, taskId) {
|
|
|
527
594
|
// Track exec failures - if status command keeps failing, something is wrong
|
|
528
595
|
if (error) {
|
|
529
596
|
consecutiveExecFailures++;
|
|
530
|
-
if (consecutiveExecFailures
|
|
597
|
+
if (consecutiveExecFailures >= MAX_CONSECUTIVE_FAILURES) {
|
|
531
598
|
console.error(
|
|
532
|
-
`[Agent ${agent.id}] ⚠️ Status polling failed ${MAX_CONSECUTIVE_FAILURES} times consecutively
|
|
599
|
+
`[Agent ${agent.id}] ⚠️ Status polling failed ${MAX_CONSECUTIVE_FAILURES} times consecutively! STOPPING.`
|
|
533
600
|
);
|
|
534
601
|
console.error(` Command: ${ctPath} status ${taskId}`);
|
|
535
602
|
console.error(` Error: ${error.message}`);
|
|
536
603
|
console.error(` Stderr: ${stderr || 'none'}`);
|
|
537
604
|
console.error(` This may indicate zeroshot is not in PATH or task storage is corrupted.`);
|
|
605
|
+
|
|
606
|
+
// Stop polling and resolve with failure
|
|
607
|
+
if (!resolved) {
|
|
608
|
+
resolved = true;
|
|
609
|
+
clearInterval(pollInterval);
|
|
610
|
+
clearInterval(statusCheckInterval);
|
|
611
|
+
agent.currentTask = null;
|
|
612
|
+
|
|
613
|
+
// Publish error for orchestrator/resume
|
|
614
|
+
agent._publish({
|
|
615
|
+
topic: 'AGENT_ERROR',
|
|
616
|
+
receiver: 'broadcast',
|
|
617
|
+
content: {
|
|
618
|
+
text: `Task ${taskId} polling failed after ${MAX_CONSECUTIVE_FAILURES} consecutive failures`,
|
|
619
|
+
data: {
|
|
620
|
+
taskId,
|
|
621
|
+
error: 'polling_timeout',
|
|
622
|
+
attempts: consecutiveExecFailures,
|
|
623
|
+
role: agent.role,
|
|
624
|
+
iteration: agent.iteration,
|
|
625
|
+
},
|
|
626
|
+
},
|
|
627
|
+
});
|
|
628
|
+
|
|
629
|
+
resolve({
|
|
630
|
+
success: false,
|
|
631
|
+
output,
|
|
632
|
+
error: `Status polling failed ${MAX_CONSECUTIVE_FAILURES} times - task may not exist`,
|
|
633
|
+
});
|
|
634
|
+
}
|
|
635
|
+
return;
|
|
538
636
|
}
|
|
539
637
|
return; // Keep polling - might be transient
|
|
540
638
|
}
|
|
@@ -566,47 +664,15 @@ function followClaudeTaskLogs(agent, taskId) {
|
|
|
566
664
|
clearInterval(statusCheckInterval);
|
|
567
665
|
agent.currentTask = null;
|
|
568
666
|
|
|
569
|
-
// Extract
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
const statusErrorMatch = stdout.match(/Error:\s*(.+)/);
|
|
574
|
-
if (statusErrorMatch) {
|
|
575
|
-
errorContext = statusErrorMatch[1].trim();
|
|
576
|
-
} else {
|
|
577
|
-
// Fall back to last 500 chars of output (likely contains the failure reason)
|
|
578
|
-
const lastOutput = output.slice(-500).trim();
|
|
579
|
-
if (lastOutput) {
|
|
580
|
-
// Look for common error patterns in output
|
|
581
|
-
const errorPatterns = [
|
|
582
|
-
/Error:\s*(.+)/i,
|
|
583
|
-
/error:\s*(.+)/i,
|
|
584
|
-
/failed:\s*(.+)/i,
|
|
585
|
-
/Exception:\s*(.+)/i,
|
|
586
|
-
/panic:\s*(.+)/i,
|
|
587
|
-
];
|
|
588
|
-
for (const pattern of errorPatterns) {
|
|
589
|
-
const match = lastOutput.match(pattern);
|
|
590
|
-
if (match) {
|
|
591
|
-
errorContext = match[1].slice(0, 200);
|
|
592
|
-
break;
|
|
593
|
-
}
|
|
594
|
-
}
|
|
595
|
-
// If no pattern matched, include last portion of output
|
|
596
|
-
if (!errorContext) {
|
|
597
|
-
errorContext = `Task failed. Last output: ${lastOutput.slice(-200)}`;
|
|
598
|
-
}
|
|
599
|
-
} else {
|
|
600
|
-
errorContext =
|
|
601
|
-
'Task failed with no output (check if task was interrupted or timed out)';
|
|
602
|
-
}
|
|
603
|
-
}
|
|
604
|
-
}
|
|
667
|
+
// Extract error context using shared helper
|
|
668
|
+
const errorContext = !success
|
|
669
|
+
? extractErrorContext({ output, statusOutput: stdout, taskId })
|
|
670
|
+
: null;
|
|
605
671
|
|
|
606
672
|
resolve({
|
|
607
673
|
success,
|
|
608
674
|
output,
|
|
609
|
-
error:
|
|
675
|
+
error: errorContext,
|
|
610
676
|
tokenUsage: extractTokenUsage(output),
|
|
611
677
|
});
|
|
612
678
|
}, 500);
|
|
@@ -912,12 +978,14 @@ function followClaudeTaskLogsIsolated(agent, taskId) {
|
|
|
912
978
|
`zeroshot status ${taskId} 2>/dev/null || echo "not_found"`,
|
|
913
979
|
]);
|
|
914
980
|
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
)
|
|
981
|
+
// Use same regex patterns as non-isolated mode (lines 649-650)
|
|
982
|
+
// CRITICAL: Don't use substring matching - it matches "error" in "is_error":false
|
|
983
|
+
const statusOutput = statusResult.stdout;
|
|
984
|
+
const isSuccess = /Status:\s+completed/i.test(statusOutput);
|
|
985
|
+
const isError = /Status:\s+failed/i.test(statusOutput);
|
|
986
|
+
const isNotFound = statusOutput.includes('not_found');
|
|
987
|
+
|
|
988
|
+
if (isSuccess || isError || isNotFound) {
|
|
921
989
|
// Task finished - read final output and resolve
|
|
922
990
|
const finalReadResult = await manager.execInContainer(clusterId, [
|
|
923
991
|
'sh',
|
|
@@ -940,13 +1008,23 @@ function followClaudeTaskLogsIsolated(agent, taskId) {
|
|
|
940
1008
|
cleanup();
|
|
941
1009
|
taskExited = true;
|
|
942
1010
|
|
|
943
|
-
//
|
|
1011
|
+
// Determine success status
|
|
1012
|
+
const success = isSuccess && !isError;
|
|
1013
|
+
|
|
1014
|
+
// Extract error context using shared helper
|
|
1015
|
+
const errorContext = !success
|
|
1016
|
+
? extractErrorContext({ output: fullOutput, taskId, isNotFound })
|
|
1017
|
+
: null;
|
|
1018
|
+
|
|
1019
|
+
// Parse result from output
|
|
944
1020
|
const parsedResult = agent._parseResultOutput(fullOutput);
|
|
945
1021
|
|
|
946
1022
|
resolve({
|
|
1023
|
+
success,
|
|
947
1024
|
output: fullOutput,
|
|
948
1025
|
taskId,
|
|
949
1026
|
result: parsedResult,
|
|
1027
|
+
error: errorContext,
|
|
950
1028
|
tokenUsage: extractTokenUsage(fullOutput),
|
|
951
1029
|
});
|
|
952
1030
|
}
|
|
@@ -956,18 +1034,19 @@ function followClaudeTaskLogsIsolated(agent, taskId) {
|
|
|
956
1034
|
}
|
|
957
1035
|
}, 500);
|
|
958
1036
|
|
|
959
|
-
// Safety timeout (
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
1037
|
+
// Safety timeout (0 = no timeout, task runs until completion)
|
|
1038
|
+
if (agent.timeout > 0) {
|
|
1039
|
+
setTimeout(() => {
|
|
1040
|
+
if (!taskExited) {
|
|
1041
|
+
cleanup();
|
|
1042
|
+
reject(
|
|
1043
|
+
new Error(
|
|
1044
|
+
`Task ${taskId} timeout after ${agent.timeout}ms (isolated mode)`
|
|
1045
|
+
)
|
|
1046
|
+
);
|
|
1047
|
+
}
|
|
1048
|
+
}, agent.timeout);
|
|
1049
|
+
}
|
|
971
1050
|
})
|
|
972
1051
|
.catch((err) => {
|
|
973
1052
|
cleanup();
|
|
@@ -994,11 +1073,14 @@ function parseResultOutput(agent, output) {
|
|
|
994
1073
|
let trimmedOutput = output.trim();
|
|
995
1074
|
|
|
996
1075
|
// IMPORTANT: Output is NDJSON (one JSON object per line) from streaming log
|
|
1076
|
+
// Lines may have timestamp prefix: [epochMs]{json...}
|
|
997
1077
|
// Find the line with "type":"result" which contains the actual result
|
|
998
1078
|
const lines = trimmedOutput.split('\n');
|
|
999
1079
|
const resultLine = lines.find((line) => {
|
|
1000
1080
|
try {
|
|
1001
|
-
const
|
|
1081
|
+
const content = stripTimestampPrefix(line);
|
|
1082
|
+
if (!content.startsWith('{')) return false;
|
|
1083
|
+
const obj = JSON.parse(content);
|
|
1002
1084
|
return obj.type === 'result';
|
|
1003
1085
|
} catch {
|
|
1004
1086
|
return false;
|
|
@@ -1006,13 +1088,15 @@ function parseResultOutput(agent, output) {
|
|
|
1006
1088
|
});
|
|
1007
1089
|
|
|
1008
1090
|
// Use the result line if found, otherwise use last non-empty line
|
|
1091
|
+
// CRITICAL: Strip timestamp prefix before assigning to trimmedOutput
|
|
1009
1092
|
if (resultLine) {
|
|
1010
|
-
trimmedOutput = resultLine
|
|
1093
|
+
trimmedOutput = stripTimestampPrefix(resultLine);
|
|
1011
1094
|
} else if (lines.length > 1) {
|
|
1012
|
-
// Fallback: use last non-empty line
|
|
1095
|
+
// Fallback: use last non-empty line (also strip timestamp)
|
|
1013
1096
|
for (let i = lines.length - 1; i >= 0; i--) {
|
|
1014
|
-
|
|
1015
|
-
|
|
1097
|
+
const content = stripTimestampPrefix(lines[i]);
|
|
1098
|
+
if (content) {
|
|
1099
|
+
trimmedOutput = content;
|
|
1016
1100
|
break;
|
|
1017
1101
|
}
|
|
1018
1102
|
}
|
package/src/config-validator.js
CHANGED
|
@@ -415,6 +415,19 @@ function validateAgents(config) {
|
|
|
415
415
|
}
|
|
416
416
|
}
|
|
417
417
|
|
|
418
|
+
// Check for git operations in validator prompts (unreliable in agents)
|
|
419
|
+
if (agent.role === 'validator') {
|
|
420
|
+
const prompt = typeof agent.prompt === 'string' ? agent.prompt : agent.prompt?.system;
|
|
421
|
+
const gitPatterns = ['git diff', 'git status', 'git log', 'git show'];
|
|
422
|
+
for (const pattern of gitPatterns) {
|
|
423
|
+
if (prompt?.includes(pattern)) {
|
|
424
|
+
errors.push(
|
|
425
|
+
`Validator '${agent.id}' uses '${pattern}' - git state is unreliable in agents`
|
|
426
|
+
);
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
|
|
418
431
|
// JSON output without schema
|
|
419
432
|
if (agent.outputFormat === 'json' && !agent.jsonSchema) {
|
|
420
433
|
warnings.push(
|
package/src/isolation-manager.js
CHANGED
|
@@ -191,7 +191,7 @@ class IsolationManager {
|
|
|
191
191
|
try {
|
|
192
192
|
installResult = await this.execInContainer(
|
|
193
193
|
clusterId,
|
|
194
|
-
['sh', '-c', 'npm install --no-audit --no-fund
|
|
194
|
+
['sh', '-c', 'npm_config_engine_strict=false npm install --no-audit --no-fund'],
|
|
195
195
|
{}
|
|
196
196
|
);
|
|
197
197
|
|
|
@@ -201,16 +201,18 @@ class IsolationManager {
|
|
|
201
201
|
}
|
|
202
202
|
|
|
203
203
|
// Failed - retry if not last attempt
|
|
204
|
+
// Use stderr if available, otherwise stdout (npm writes some errors to stdout)
|
|
205
|
+
const errorOutput = (installResult.stderr || installResult.stdout || '').slice(0, 500);
|
|
204
206
|
if (attempt < maxRetries) {
|
|
205
207
|
const delay = baseDelay * Math.pow(2, attempt - 1);
|
|
206
208
|
console.warn(
|
|
207
209
|
`[IsolationManager] ⚠️ npm install failed (attempt ${attempt}/${maxRetries}), retrying in ${delay}ms...`
|
|
208
210
|
);
|
|
209
|
-
console.warn(`[IsolationManager] Error: ${
|
|
211
|
+
console.warn(`[IsolationManager] Error: ${errorOutput}`);
|
|
210
212
|
await new Promise((_resolve) => setTimeout(_resolve, delay));
|
|
211
213
|
} else {
|
|
212
214
|
console.warn(
|
|
213
|
-
`[IsolationManager] ⚠️ npm install failed after ${maxRetries} attempts (non-fatal): ${
|
|
215
|
+
`[IsolationManager] ⚠️ npm install failed after ${maxRetries} attempts (non-fatal): ${errorOutput}`
|
|
214
216
|
);
|
|
215
217
|
}
|
|
216
218
|
} catch (execErr) {
|
|
@@ -342,8 +344,9 @@ class IsolationManager {
|
|
|
342
344
|
* @param {number} [timeout=10] - Timeout in seconds before SIGKILL
|
|
343
345
|
* @returns {Promise<void>}
|
|
344
346
|
*/
|
|
345
|
-
stopContainer(clusterId, timeout = 10) {
|
|
346
|
-
|
|
347
|
+
stopContainer(clusterId, timeout = 10, explicitContainerId = null) {
|
|
348
|
+
// Use explicit containerId (from restored state) or in-memory Map
|
|
349
|
+
const containerId = explicitContainerId || this.containers.get(clusterId);
|
|
347
350
|
if (!containerId) {
|
|
348
351
|
return; // Already stopped or never started
|
|
349
352
|
}
|
|
@@ -369,8 +372,9 @@ class IsolationManager {
|
|
|
369
372
|
* @param {boolean} [force=false] - Force remove running container
|
|
370
373
|
* @returns {Promise<void>}
|
|
371
374
|
*/
|
|
372
|
-
removeContainer(clusterId, force = false) {
|
|
373
|
-
|
|
375
|
+
removeContainer(clusterId, force = false, explicitContainerId = null) {
|
|
376
|
+
// Use explicit containerId (from restored state) or in-memory Map
|
|
377
|
+
const containerId = explicitContainerId || this.containers.get(clusterId);
|
|
374
378
|
if (!containerId) {
|
|
375
379
|
return;
|
|
376
380
|
}
|
package/src/orchestrator.js
CHANGED
|
@@ -126,9 +126,55 @@ class Orchestrator {
|
|
|
126
126
|
const clusterIds = Object.keys(data);
|
|
127
127
|
this._log(`[Orchestrator] Found ${clusterIds.length} clusters in file:`, clusterIds);
|
|
128
128
|
|
|
129
|
+
// Track clusters to remove (missing .db files or 0 messages)
|
|
130
|
+
const clustersToRemove = [];
|
|
131
|
+
// Track clusters with 0 messages (corrupted from SIGINT race condition)
|
|
132
|
+
const corruptedClusters = [];
|
|
133
|
+
|
|
129
134
|
for (const [clusterId, clusterData] of Object.entries(data)) {
|
|
135
|
+
// Skip clusters whose .db file doesn't exist (orphaned registry entries)
|
|
136
|
+
const dbPath = path.join(this.storageDir, `${clusterId}.db`);
|
|
137
|
+
if (!fs.existsSync(dbPath)) {
|
|
138
|
+
console.warn(`[Orchestrator] Cluster ${clusterId} has no database file, removing from registry`);
|
|
139
|
+
clustersToRemove.push(clusterId);
|
|
140
|
+
continue;
|
|
141
|
+
}
|
|
142
|
+
|
|
130
143
|
this._log(`[Orchestrator] Loading cluster: ${clusterId}`);
|
|
131
|
-
this._loadSingleCluster(clusterId, clusterData);
|
|
144
|
+
const cluster = this._loadSingleCluster(clusterId, clusterData);
|
|
145
|
+
|
|
146
|
+
// VALIDATION: Detect 0-message clusters (corrupted from SIGINT during initialization)
|
|
147
|
+
// These clusters were created before the initCompletePromise fix was applied
|
|
148
|
+
if (cluster && cluster.messageBus) {
|
|
149
|
+
const messageCount = cluster.messageBus.count({ cluster_id: clusterId });
|
|
150
|
+
if (messageCount === 0) {
|
|
151
|
+
console.warn(`[Orchestrator] ⚠️ Cluster ${clusterId} has 0 messages (corrupted)`);
|
|
152
|
+
console.warn(`[Orchestrator] This likely occurred from SIGINT during initialization.`);
|
|
153
|
+
console.warn(`[Orchestrator] Marking as 'corrupted' - use 'zeroshot kill ${clusterId}' to remove.`);
|
|
154
|
+
corruptedClusters.push(clusterId);
|
|
155
|
+
// Mark cluster as corrupted for visibility in status/list commands
|
|
156
|
+
cluster.state = 'corrupted';
|
|
157
|
+
cluster.corruptedReason = 'SIGINT during initialization (0 messages in ledger)';
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Clean up orphaned entries from clusters.json
|
|
163
|
+
if (clustersToRemove.length > 0) {
|
|
164
|
+
for (const clusterId of clustersToRemove) {
|
|
165
|
+
delete data[clusterId];
|
|
166
|
+
}
|
|
167
|
+
fs.writeFileSync(clustersFile, JSON.stringify(data, null, 2));
|
|
168
|
+
this._log(`[Orchestrator] Removed ${clustersToRemove.length} orphaned cluster(s) from registry`);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Log summary of corrupted clusters
|
|
172
|
+
if (corruptedClusters.length > 0) {
|
|
173
|
+
console.warn(`\n[Orchestrator] ⚠️ Found ${corruptedClusters.length} corrupted cluster(s):`);
|
|
174
|
+
for (const clusterId of corruptedClusters) {
|
|
175
|
+
console.warn(` - ${clusterId}`);
|
|
176
|
+
}
|
|
177
|
+
console.warn(`[Orchestrator] Run 'zeroshot clear' to remove all corrupted clusters.\n`);
|
|
132
178
|
}
|
|
133
179
|
|
|
134
180
|
this._log(`[Orchestrator] Total clusters loaded: ${this.clusters.size}`);
|
|
@@ -494,6 +540,13 @@ class Orchestrator {
|
|
|
494
540
|
}
|
|
495
541
|
|
|
496
542
|
// Build cluster object
|
|
543
|
+
// CRITICAL: initComplete promise ensures ISSUE_OPENED is published before stop() completes
|
|
544
|
+
// This prevents 0-message clusters from SIGINT during async initialization
|
|
545
|
+
let resolveInitComplete;
|
|
546
|
+
const initCompletePromise = new Promise((resolve) => {
|
|
547
|
+
resolveInitComplete = resolve;
|
|
548
|
+
});
|
|
549
|
+
|
|
497
550
|
const cluster = {
|
|
498
551
|
id: clusterId,
|
|
499
552
|
config,
|
|
@@ -504,6 +557,9 @@ class Orchestrator {
|
|
|
504
557
|
createdAt: Date.now(),
|
|
505
558
|
// Track PID for zombie detection (this process owns the cluster)
|
|
506
559
|
pid: process.pid,
|
|
560
|
+
// Initialization completion tracking (for safe SIGINT handling)
|
|
561
|
+
initCompletePromise,
|
|
562
|
+
_resolveInitComplete: resolveInitComplete,
|
|
507
563
|
// Isolation state (only if enabled)
|
|
508
564
|
// CRITICAL: Store workDir for resume capability - without this, resume() can't recreate container
|
|
509
565
|
isolation: options.isolation
|
|
@@ -652,6 +708,12 @@ class Orchestrator {
|
|
|
652
708
|
},
|
|
653
709
|
});
|
|
654
710
|
|
|
711
|
+
// CRITICAL: Mark initialization complete AFTER ISSUE_OPENED is published
|
|
712
|
+
// This ensures stop() waits for at least 1 message before stopping
|
|
713
|
+
if (cluster._resolveInitComplete) {
|
|
714
|
+
cluster._resolveInitComplete();
|
|
715
|
+
}
|
|
716
|
+
|
|
655
717
|
this._log(`Cluster ${clusterId} started with ${cluster.agents.length} agents`);
|
|
656
718
|
|
|
657
719
|
// Watch for CLUSTER_COMPLETE message to auto-stop
|
|
@@ -818,6 +880,10 @@ class Orchestrator {
|
|
|
818
880
|
};
|
|
819
881
|
} catch (error) {
|
|
820
882
|
cluster.state = 'failed';
|
|
883
|
+
// CRITICAL: Resolve the promise on failure too, so stop() doesn't hang
|
|
884
|
+
if (cluster._resolveInitComplete) {
|
|
885
|
+
cluster._resolveInitComplete();
|
|
886
|
+
}
|
|
821
887
|
console.error(`Cluster ${clusterId} failed to start:`, error);
|
|
822
888
|
throw error;
|
|
823
889
|
}
|
|
@@ -833,6 +899,17 @@ class Orchestrator {
|
|
|
833
899
|
throw new Error(`Cluster ${clusterId} not found`);
|
|
834
900
|
}
|
|
835
901
|
|
|
902
|
+
// CRITICAL: Wait for initialization to complete before stopping
|
|
903
|
+
// This ensures ISSUE_OPENED is published, preventing 0-message clusters
|
|
904
|
+
// Timeout after 30s to prevent infinite hang if init truly fails
|
|
905
|
+
if (cluster.initCompletePromise && cluster.state === 'initializing') {
|
|
906
|
+
this._log(`[Orchestrator] Waiting for initialization to complete before stopping...`);
|
|
907
|
+
await Promise.race([
|
|
908
|
+
cluster.initCompletePromise,
|
|
909
|
+
new Promise((resolve) => setTimeout(resolve, 30000)),
|
|
910
|
+
]);
|
|
911
|
+
}
|
|
912
|
+
|
|
836
913
|
cluster.state = 'stopping';
|
|
837
914
|
|
|
838
915
|
// Stop all agents (including subclusters which handle their own children)
|
package/src/status-footer.js
CHANGED
|
@@ -96,6 +96,11 @@ class StatusFooter {
|
|
|
96
96
|
this.minRows = 8; // Minimum rows for footer display (graceful degradation)
|
|
97
97
|
this.hidden = false; // True when terminal too small for footer
|
|
98
98
|
|
|
99
|
+
// Output queue - serializes all stdout to prevent cursor corruption
|
|
100
|
+
// When scroll region is active, console.log() can corrupt cursor position
|
|
101
|
+
// All output must go through print() to coordinate with render cycles
|
|
102
|
+
this.printQueue = [];
|
|
103
|
+
|
|
99
104
|
// Debounced resize handler (100ms) - prevents rapid-fire redraws
|
|
100
105
|
this._debouncedResize = debounce(() => this._handleResize(), 100);
|
|
101
106
|
}
|
|
@@ -108,6 +113,38 @@ class StatusFooter {
|
|
|
108
113
|
return process.stdout.isTTY === true;
|
|
109
114
|
}
|
|
110
115
|
|
|
116
|
+
/**
|
|
117
|
+
* Print text to stdout, coordinating with the render cycle.
|
|
118
|
+
* When a render is in progress, queues output to prevent cursor corruption.
|
|
119
|
+
* When no render is active, writes immediately.
|
|
120
|
+
*
|
|
121
|
+
* MUST be used instead of console.log() when status footer is active.
|
|
122
|
+
* @param {string} text - Text to print (newline will be added)
|
|
123
|
+
*/
|
|
124
|
+
print(text) {
|
|
125
|
+
if (this.isRendering) {
|
|
126
|
+
// Queue for later - render() will flush after restoring cursor
|
|
127
|
+
this.printQueue.push(text);
|
|
128
|
+
} else {
|
|
129
|
+
// Write immediately - no render in progress
|
|
130
|
+
process.stdout.write(text + '\n');
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Flush queued output to stdout.
|
|
136
|
+
* Called after render() restores cursor to ensure proper positioning.
|
|
137
|
+
* @private
|
|
138
|
+
*/
|
|
139
|
+
_flushPrintQueue() {
|
|
140
|
+
if (this.printQueue.length === 0) return;
|
|
141
|
+
|
|
142
|
+
// Write all queued output
|
|
143
|
+
const output = this.printQueue.map(text => text + '\n').join('');
|
|
144
|
+
this.printQueue = [];
|
|
145
|
+
process.stdout.write(output);
|
|
146
|
+
}
|
|
147
|
+
|
|
111
148
|
/**
|
|
112
149
|
* Get terminal dimensions
|
|
113
150
|
* @returns {{ rows: number, cols: number }}
|
|
@@ -495,6 +532,19 @@ class StatusFooter {
|
|
|
495
532
|
} finally {
|
|
496
533
|
this.isRendering = false;
|
|
497
534
|
|
|
535
|
+
// CRITICAL: Position cursor at bottom of scroll region before flushing
|
|
536
|
+
// Without this, output goes below footer if cursor was restored outside scroll region
|
|
537
|
+
// (RESTORE_CURSOR at line 527 may restore to row outside the scrollable area)
|
|
538
|
+
if (this.scrollRegionSet) {
|
|
539
|
+
const { rows } = this.getTerminalSize();
|
|
540
|
+
const scrollEnd = rows - this.footerHeight;
|
|
541
|
+
process.stdout.write(this._moveToStr(scrollEnd, 1));
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
// Flush any output that was queued during render
|
|
545
|
+
// Must happen BEFORE pending resize to preserve output order
|
|
546
|
+
this._flushPrintQueue();
|
|
547
|
+
|
|
498
548
|
// Process pending resize if one was queued during render
|
|
499
549
|
if (this.pendingResize) {
|
|
500
550
|
this.pendingResize = false;
|
|
@@ -728,13 +778,15 @@ class StatusFooter {
|
|
|
728
778
|
// Prevents interleaving with agent output during cleanup
|
|
729
779
|
let buffer = '';
|
|
730
780
|
|
|
731
|
-
//
|
|
781
|
+
// CRITICAL: Clear footer area BEFORE resetting scroll region
|
|
782
|
+
// While scroll region is active, footer area contains only status bar content
|
|
783
|
+
// After reset, those lines may contain scrolled output (which we DON'T want to clear)
|
|
784
|
+
buffer += this._clearFooterAreaStr();
|
|
785
|
+
|
|
786
|
+
// Now reset scroll region (full terminal is scrollable again)
|
|
732
787
|
buffer += this._resetScrollRegionStr();
|
|
733
788
|
this.scrollRegionSet = false;
|
|
734
789
|
|
|
735
|
-
// Clear all footer lines
|
|
736
|
-
buffer += this._clearFooterAreaStr();
|
|
737
|
-
|
|
738
790
|
// Move cursor to safe position and show cursor
|
|
739
791
|
const { rows } = this.getTerminalSize();
|
|
740
792
|
const startRow = rows - this.footerHeight + 1;
|
|
@@ -753,9 +805,10 @@ class StatusFooter {
|
|
|
753
805
|
if (!this.isTTY()) return;
|
|
754
806
|
|
|
755
807
|
// Single atomic write for hide operation
|
|
756
|
-
|
|
808
|
+
// CRITICAL: Clear footer BEFORE resetting scroll region (same reason as stop())
|
|
809
|
+
let buffer = this._clearFooterAreaStr();
|
|
810
|
+
buffer += this._resetScrollRegionStr();
|
|
757
811
|
this.scrollRegionSet = false;
|
|
758
|
-
buffer += this._clearFooterAreaStr();
|
|
759
812
|
process.stdout.write(buffer);
|
|
760
813
|
}
|
|
761
814
|
|
package/src/template-resolver.js
CHANGED
|
@@ -43,8 +43,11 @@ class TemplateResolver {
|
|
|
43
43
|
// Validate required params
|
|
44
44
|
this._validateParams(template, params);
|
|
45
45
|
|
|
46
|
+
// Apply defaults for missing params (e.g., timeout: 0)
|
|
47
|
+
const paramsWithDefaults = this._applyDefaults(template, params);
|
|
48
|
+
|
|
46
49
|
// Deep clone and resolve
|
|
47
|
-
const resolved = this._resolveObject(JSON.parse(JSON.stringify(template)),
|
|
50
|
+
const resolved = this._resolveObject(JSON.parse(JSON.stringify(template)), paramsWithDefaults);
|
|
48
51
|
|
|
49
52
|
// Filter out conditional agents that don't meet their condition
|
|
50
53
|
if (resolved.agents) {
|
|
@@ -86,6 +89,25 @@ class TemplateResolver {
|
|
|
86
89
|
}
|
|
87
90
|
}
|
|
88
91
|
|
|
92
|
+
/**
|
|
93
|
+
* Apply template defaults for any missing params
|
|
94
|
+
* @private
|
|
95
|
+
* @param {any} template
|
|
96
|
+
* @param {any} params
|
|
97
|
+
* @returns {any} params with defaults applied
|
|
98
|
+
*/
|
|
99
|
+
_applyDefaults(template, params) {
|
|
100
|
+
if (!template.params) return params;
|
|
101
|
+
|
|
102
|
+
const result = { ...params };
|
|
103
|
+
for (const [name, schema] of Object.entries(template.params)) {
|
|
104
|
+
if (result[name] === undefined && schema.default !== undefined) {
|
|
105
|
+
result[name] = schema.default;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
return result;
|
|
109
|
+
}
|
|
110
|
+
|
|
89
111
|
/**
|
|
90
112
|
* Recursively resolve placeholders in an object
|
|
91
113
|
* @private
|