@covibes/zeroshot 1.0.1 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/README.md +2 -0
- package/cli/index.js +151 -208
- package/cli/message-formatter-utils.js +75 -0
- package/cli/message-formatters-normal.js +214 -0
- package/cli/message-formatters-watch.js +181 -0
- package/cluster-templates/base-templates/full-workflow.json +10 -5
- package/docker/zeroshot-cluster/Dockerfile +6 -0
- package/package.json +5 -2
- package/src/agent/agent-task-executor.js +237 -112
- package/src/isolation-manager.js +94 -51
- package/src/orchestrator.js +45 -10
- package/src/preflight.js +383 -0
- package/src/process-metrics.js +546 -0
- package/src/status-footer.js +543 -0
- package/task-lib/attachable-watcher.js +202 -0
- package/task-lib/commands/clean.js +50 -0
- package/task-lib/commands/get-log-path.js +23 -0
- package/task-lib/commands/kill.js +32 -0
- package/task-lib/commands/list.js +105 -0
- package/task-lib/commands/logs.js +411 -0
- package/task-lib/commands/resume.js +41 -0
- package/task-lib/commands/run.js +48 -0
- package/task-lib/commands/schedule.js +105 -0
- package/task-lib/commands/scheduler-cmd.js +96 -0
- package/task-lib/commands/schedules.js +98 -0
- package/task-lib/commands/status.js +44 -0
- package/task-lib/commands/unschedule.js +16 -0
- package/task-lib/completion.js +9 -0
- package/task-lib/config.js +10 -0
- package/task-lib/name-generator.js +230 -0
- package/task-lib/package.json +3 -0
- package/task-lib/runner.js +123 -0
- package/task-lib/scheduler.js +252 -0
- package/task-lib/store.js +217 -0
- package/task-lib/tui/formatters.js +166 -0
- package/task-lib/tui/index.js +197 -0
- package/task-lib/tui/layout.js +111 -0
- package/task-lib/tui/renderer.js +119 -0
- package/task-lib/tui.js +384 -0
- package/task-lib/watcher.js +162 -0
- package/cluster-templates/conductor-junior-bootstrap.json +0 -69
|
@@ -630,37 +630,29 @@ function getClaudeTasksPath() {
|
|
|
630
630
|
* @param {String} context - Context to pass to Claude
|
|
631
631
|
* @returns {Promise<Object>} Result object { success, output, error }
|
|
632
632
|
*/
|
|
633
|
-
function spawnClaudeTaskIsolated(agent, context) {
|
|
633
|
+
async function spawnClaudeTaskIsolated(agent, context) {
|
|
634
634
|
const { manager, clusterId } = agent.isolation;
|
|
635
635
|
|
|
636
|
-
agent._log(`📦 Agent ${agent.id}: Running task in isolated container...`);
|
|
636
|
+
agent._log(`📦 Agent ${agent.id}: Running task in isolated container using zeroshot task run...`);
|
|
637
637
|
|
|
638
|
-
// Build
|
|
639
|
-
//
|
|
640
|
-
// CRITICAL: Default to strict schema validation (same as _spawnClaudeTask)
|
|
638
|
+
// Build zeroshot task run command (same infrastructure as non-isolation mode)
|
|
639
|
+
// CRITICAL: Default to strict schema validation to prevent cluster crashes from parse failures
|
|
641
640
|
const desiredOutputFormat = agent.config.outputFormat || 'json';
|
|
642
641
|
const strictSchema = agent.config.strictSchema !== false; // DEFAULT TO TRUE
|
|
643
642
|
const runOutputFormat =
|
|
644
643
|
agent.config.jsonSchema && desiredOutputFormat === 'json' && !strictSchema
|
|
645
644
|
? 'stream-json'
|
|
646
645
|
: desiredOutputFormat;
|
|
647
|
-
// NOTE: --dangerously-skip-permissions is REQUIRED for non-interactive (--print) mode
|
|
648
|
-
// Without it, Claude can't write files, run commands, etc. in the isolated container
|
|
649
|
-
const command = [
|
|
650
|
-
'claude',
|
|
651
|
-
'--print',
|
|
652
|
-
'--dangerously-skip-permissions',
|
|
653
|
-
'--output-format',
|
|
654
|
-
runOutputFormat,
|
|
655
|
-
];
|
|
656
646
|
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
647
|
+
const command = ['zeroshot', 'task', 'run', '--output-format', runOutputFormat];
|
|
648
|
+
|
|
649
|
+
// Add verification mode flag if configured
|
|
650
|
+
if (agent.config.verificationMode) {
|
|
651
|
+
command.push('-v');
|
|
661
652
|
}
|
|
662
653
|
|
|
663
|
-
// Add JSON schema if specified in agent config
|
|
654
|
+
// Add JSON schema if specified in agent config
|
|
655
|
+
// If we are running stream-json for live logs (strictSchema=false), do NOT pass schema to CLI
|
|
664
656
|
if (agent.config.jsonSchema) {
|
|
665
657
|
if (runOutputFormat === 'json') {
|
|
666
658
|
// strictSchema=true OR no schema conflict: pass schema to CLI for native enforcement
|
|
@@ -673,13 +665,7 @@ function spawnClaudeTaskIsolated(agent, context) {
|
|
|
673
665
|
}
|
|
674
666
|
}
|
|
675
667
|
|
|
676
|
-
// Add
|
|
677
|
-
const selectedModel = agent._selectModel();
|
|
678
|
-
if (selectedModel) {
|
|
679
|
-
command.push('--model', selectedModel);
|
|
680
|
-
}
|
|
681
|
-
|
|
682
|
-
// Add explicit output instructions when we run stream-json for a jsonSchema agent.
|
|
668
|
+
// Add explicit output instructions when we run stream-json for a jsonSchema agent
|
|
683
669
|
let finalContext = context;
|
|
684
670
|
if (
|
|
685
671
|
agent.config.jsonSchema &&
|
|
@@ -693,14 +679,11 @@ function spawnClaudeTaskIsolated(agent, context) {
|
|
|
693
679
|
)}\n\`\`\`\n`;
|
|
694
680
|
}
|
|
695
681
|
|
|
696
|
-
// Add the context as the prompt
|
|
697
682
|
command.push(finalContext);
|
|
698
683
|
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
// Spawn process inside container
|
|
684
|
+
// STEP 1: Spawn task and extract task ID (same as non-isolated mode)
|
|
685
|
+
const taskId = await new Promise((resolve, reject) => {
|
|
686
|
+
const selectedModel = agent._selectModel();
|
|
704
687
|
const proc = manager.spawnInContainer(clusterId, command, {
|
|
705
688
|
env: {
|
|
706
689
|
ANTHROPIC_MODEL: selectedModel,
|
|
@@ -709,104 +692,246 @@ function spawnClaudeTaskIsolated(agent, context) {
|
|
|
709
692
|
},
|
|
710
693
|
});
|
|
711
694
|
|
|
712
|
-
//
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
output += chunk;
|
|
716
|
-
|
|
717
|
-
// Process each line
|
|
718
|
-
const lines = chunk.split('\n');
|
|
719
|
-
for (const line of lines) {
|
|
720
|
-
if (!line.trim() || !line.trim().startsWith('{')) continue;
|
|
695
|
+
// Track PID for resource monitoring
|
|
696
|
+
agent.processPid = proc.pid;
|
|
697
|
+
agent._publishLifecycle('PROCESS_SPAWNED', { pid: proc.pid });
|
|
721
698
|
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
JSON.parse(line);
|
|
725
|
-
} catch {
|
|
726
|
-
continue; // Not valid JSON
|
|
727
|
-
}
|
|
699
|
+
let stdout = '';
|
|
700
|
+
let stderr = '';
|
|
728
701
|
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
topic: 'AGENT_OUTPUT',
|
|
732
|
-
receiver: 'broadcast',
|
|
733
|
-
content: {
|
|
734
|
-
text: line,
|
|
735
|
-
data: {
|
|
736
|
-
type: 'stdout',
|
|
737
|
-
line,
|
|
738
|
-
agent: agent.id,
|
|
739
|
-
role: agent.role,
|
|
740
|
-
iteration: agent.iteration,
|
|
741
|
-
isolated: true,
|
|
742
|
-
},
|
|
743
|
-
},
|
|
744
|
-
});
|
|
745
|
-
}
|
|
702
|
+
proc.stdout.on('data', (data) => {
|
|
703
|
+
stdout += data.toString();
|
|
746
704
|
});
|
|
747
705
|
|
|
748
706
|
proc.stderr.on('data', (data) => {
|
|
749
|
-
|
|
750
|
-
console.error(`[${agent.id}] stderr: ${text}`);
|
|
707
|
+
stderr += data.toString();
|
|
751
708
|
});
|
|
752
709
|
|
|
753
710
|
proc.on('close', (code, signal) => {
|
|
754
|
-
|
|
755
|
-
resolved = true;
|
|
756
|
-
|
|
757
|
-
agent.currentTask = null;
|
|
758
|
-
|
|
759
|
-
// Handle process killed by signal (e.g., SIGTERM, SIGKILL, SIGSTOP)
|
|
711
|
+
// Handle process killed by signal
|
|
760
712
|
if (signal) {
|
|
761
|
-
|
|
762
|
-
success: false,
|
|
763
|
-
output,
|
|
764
|
-
error: `Process killed by signal ${signal}`,
|
|
765
|
-
});
|
|
713
|
+
reject(new Error(`Process killed by signal ${signal}${stderr ? `: ${stderr}` : ''}`));
|
|
766
714
|
return;
|
|
767
715
|
}
|
|
768
716
|
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
717
|
+
if (code === 0) {
|
|
718
|
+
// Parse task ID from output: "✓ Task spawned: xxx-yyy-nn"
|
|
719
|
+
const match = stdout.match(/Task spawned: ((?:task-)?[a-z]+-[a-z]+-[a-z0-9]+)/);
|
|
720
|
+
if (match) {
|
|
721
|
+
const spawnedTaskId = match[1];
|
|
722
|
+
agent.currentTaskId = spawnedTaskId; // Track for resume capability
|
|
723
|
+
agent._publishLifecycle('TASK_ID_ASSIGNED', {
|
|
724
|
+
pid: agent.processPid,
|
|
725
|
+
taskId: spawnedTaskId,
|
|
726
|
+
});
|
|
775
727
|
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
728
|
+
// Start liveness monitoring
|
|
729
|
+
if (agent.enableLivenessCheck) {
|
|
730
|
+
agent.lastOutputTime = Date.now(); // Initialize to spawn time
|
|
731
|
+
agent._startLivenessCheck();
|
|
732
|
+
}
|
|
779
733
|
|
|
780
|
-
|
|
781
|
-
|
|
734
|
+
resolve(spawnedTaskId);
|
|
735
|
+
} else {
|
|
736
|
+
reject(new Error(`Could not parse task ID from output: ${stdout}`));
|
|
737
|
+
}
|
|
738
|
+
} else {
|
|
739
|
+
reject(new Error(`zeroshot task run failed with code ${code}: ${stderr}`));
|
|
740
|
+
}
|
|
782
741
|
});
|
|
783
742
|
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
743
|
+
proc.on('error', (error) => {
|
|
744
|
+
reject(error);
|
|
745
|
+
});
|
|
746
|
+
});
|
|
747
|
+
|
|
748
|
+
agent._log(`📋 Agent ${agent.id}: Following zeroshot logs for ${taskId} in container...`);
|
|
749
|
+
|
|
750
|
+
// STEP 2: Follow the task's log file inside container (NOT the spawn stdout!)
|
|
751
|
+
return followClaudeTaskLogsIsolated(agent, taskId);
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
/**
|
|
755
|
+
* Follow task logs inside Docker container (isolated mode)
|
|
756
|
+
* Reads task log file inside container and streams JSON lines to message bus
|
|
757
|
+
* @param {Object} agent - Agent instance with isolation context
|
|
758
|
+
* @param {String} taskId - Task ID to follow
|
|
759
|
+
* @returns {Promise<Object>} Result object
|
|
760
|
+
* @private
|
|
761
|
+
*/
|
|
762
|
+
function followClaudeTaskLogsIsolated(agent, taskId) {
|
|
763
|
+
const { isolation } = agent;
|
|
764
|
+
if (!isolation?.manager) {
|
|
765
|
+
throw new Error('followClaudeTaskLogsIsolated: isolation manager not found');
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
const manager = isolation.manager;
|
|
769
|
+
const clusterId = isolation.clusterId;
|
|
770
|
+
|
|
771
|
+
return new Promise((resolve, reject) => {
|
|
772
|
+
let taskExited = false;
|
|
773
|
+
let lastSize = 0;
|
|
774
|
+
let fullOutput = '';
|
|
775
|
+
let pollInterval = null;
|
|
776
|
+
|
|
777
|
+
// Cleanup function
|
|
778
|
+
const cleanup = () => {
|
|
779
|
+
if (pollInterval) {
|
|
780
|
+
clearInterval(pollInterval);
|
|
781
|
+
pollInterval = null;
|
|
782
|
+
}
|
|
791
783
|
};
|
|
792
784
|
|
|
793
|
-
//
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
785
|
+
// Get log file path from zeroshot CLI inside container
|
|
786
|
+
manager
|
|
787
|
+
.execInContainer(clusterId, ['sh', '-c', `zeroshot get-log-path ${taskId}`])
|
|
788
|
+
.then(({ stdout, stderr, code }) => {
|
|
789
|
+
if (code !== 0) {
|
|
790
|
+
cleanup();
|
|
791
|
+
return reject(
|
|
792
|
+
new Error(
|
|
793
|
+
`Failed to get log path for ${taskId} inside container: ${stderr || stdout}`
|
|
794
|
+
)
|
|
795
|
+
);
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
const logFilePath = stdout.trim();
|
|
799
|
+
if (!logFilePath) {
|
|
800
|
+
cleanup();
|
|
801
|
+
return reject(new Error(`Empty log path returned for ${taskId}`));
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
agent._log(`[${agent.id}] Following isolated task logs: ${logFilePath}`);
|
|
805
|
+
|
|
806
|
+
// Broadcast line helper (same as non-isolated mode)
|
|
807
|
+
const broadcastLine = (line) => {
|
|
808
|
+
const timestampMatch = line.match(/^\[(\d{4}-\d{2}-\d{2}T[^\]]+)\]\s*(.*)$/);
|
|
809
|
+
const timestamp = timestampMatch
|
|
810
|
+
? new Date(timestampMatch[1]).getTime()
|
|
811
|
+
: Date.now();
|
|
812
|
+
const content = timestampMatch ? timestampMatch[2] : line;
|
|
813
|
+
|
|
814
|
+
agent.messageBus.publish({
|
|
815
|
+
cluster_id: agent.cluster.id,
|
|
816
|
+
topic: 'AGENT_OUTPUT',
|
|
817
|
+
sender: agent.id,
|
|
818
|
+
content: {
|
|
819
|
+
data: {
|
|
820
|
+
line: content,
|
|
821
|
+
taskId,
|
|
822
|
+
iteration: agent.iteration,
|
|
823
|
+
},
|
|
824
|
+
},
|
|
825
|
+
timestamp,
|
|
826
|
+
});
|
|
827
|
+
|
|
828
|
+
// Update last output time for liveness tracking
|
|
829
|
+
agent.lastOutputTime = Date.now();
|
|
830
|
+
};
|
|
831
|
+
|
|
832
|
+
// Poll log file inside container (check every 500ms)
|
|
833
|
+
pollInterval = setInterval(async () => {
|
|
834
|
+
try {
|
|
835
|
+
// Get file size inside container
|
|
836
|
+
const sizeResult = await manager.execInContainer(clusterId, [
|
|
837
|
+
'sh',
|
|
838
|
+
'-c',
|
|
839
|
+
`stat -c %s "${logFilePath}" 2>/dev/null || echo 0`,
|
|
840
|
+
]);
|
|
841
|
+
|
|
842
|
+
const currentSize = parseInt(sizeResult.stdout.trim()) || 0;
|
|
843
|
+
|
|
844
|
+
// Read new content if file grew
|
|
845
|
+
if (currentSize > lastSize) {
|
|
846
|
+
const bytesToRead = currentSize - lastSize;
|
|
847
|
+
const readResult = await manager.execInContainer(clusterId, [
|
|
848
|
+
'sh',
|
|
849
|
+
'-c',
|
|
850
|
+
`tail -c ${bytesToRead} "${logFilePath}"`,
|
|
851
|
+
]);
|
|
852
|
+
|
|
853
|
+
if (readResult.code === 0 && readResult.stdout) {
|
|
854
|
+
fullOutput += readResult.stdout;
|
|
855
|
+
|
|
856
|
+
// Split by newlines and broadcast each complete line
|
|
857
|
+
const lines = readResult.stdout.split('\n');
|
|
858
|
+
for (let i = 0; i < lines.length - 1; i++) {
|
|
859
|
+
if (lines[i].trim()) {
|
|
860
|
+
broadcastLine(lines[i]);
|
|
861
|
+
}
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
lastSize = currentSize;
|
|
866
|
+
}
|
|
867
|
+
|
|
868
|
+
// Check if task exited (query zeroshot status inside container)
|
|
869
|
+
const statusResult = await manager.execInContainer(clusterId, [
|
|
870
|
+
'sh',
|
|
871
|
+
'-c',
|
|
872
|
+
`zeroshot status ${taskId} 2>/dev/null || echo "not_found"`,
|
|
873
|
+
]);
|
|
874
|
+
|
|
875
|
+
const statusOutput = statusResult.stdout.toLowerCase();
|
|
876
|
+
if (
|
|
877
|
+
statusOutput.includes('success') ||
|
|
878
|
+
statusOutput.includes('error') ||
|
|
879
|
+
statusOutput.includes('not_found')
|
|
880
|
+
) {
|
|
881
|
+
// Task finished - read final output and resolve
|
|
882
|
+
const finalReadResult = await manager.execInContainer(clusterId, [
|
|
883
|
+
'sh',
|
|
884
|
+
'-c',
|
|
885
|
+
`cat "${logFilePath}"`,
|
|
886
|
+
]);
|
|
887
|
+
|
|
888
|
+
if (finalReadResult.code === 0) {
|
|
889
|
+
fullOutput = finalReadResult.stdout;
|
|
890
|
+
|
|
891
|
+
// Broadcast any final lines we haven't seen
|
|
892
|
+
const finalLines = fullOutput.split('\n');
|
|
893
|
+
for (const line of finalLines) {
|
|
894
|
+
if (line.trim()) {
|
|
895
|
+
broadcastLine(line);
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
cleanup();
|
|
901
|
+
taskExited = true;
|
|
902
|
+
|
|
903
|
+
// Parse result from output (same logic as non-isolated mode)
|
|
904
|
+
const parsedResult = agent._parseResultOutput(fullOutput);
|
|
905
|
+
|
|
906
|
+
resolve({
|
|
907
|
+
output: fullOutput,
|
|
908
|
+
taskId,
|
|
909
|
+
result: parsedResult,
|
|
910
|
+
});
|
|
911
|
+
}
|
|
912
|
+
} catch (pollErr) {
|
|
913
|
+
// Log error but continue polling (file might not exist yet)
|
|
914
|
+
agent._log(`[${agent.id}] Poll error (will retry): ${pollErr.message}`);
|
|
915
|
+
}
|
|
916
|
+
}, 500);
|
|
917
|
+
|
|
918
|
+
// Safety timeout (same as non-isolated mode)
|
|
919
|
+
const timeoutMs = agent.timeout || 300000; // 5 minutes default
|
|
920
|
+
setTimeout(() => {
|
|
921
|
+
if (!taskExited) {
|
|
922
|
+
cleanup();
|
|
923
|
+
reject(
|
|
924
|
+
new Error(
|
|
925
|
+
`Task ${taskId} timeout after ${timeoutMs}ms (isolated mode)`
|
|
926
|
+
)
|
|
927
|
+
);
|
|
928
|
+
}
|
|
929
|
+
}, timeoutMs);
|
|
930
|
+
})
|
|
931
|
+
.catch((err) => {
|
|
932
|
+
cleanup();
|
|
933
|
+
reject(err);
|
|
934
|
+
});
|
|
810
935
|
});
|
|
811
936
|
}
|
|
812
937
|
|
package/src/isolation-manager.js
CHANGED
|
@@ -49,12 +49,14 @@ class IsolationManager {
|
|
|
49
49
|
* @param {object} config - Container config
|
|
50
50
|
* @param {string} config.workDir - Working directory to mount
|
|
51
51
|
* @param {string} [config.image] - Docker image (default: zeroshot-cluster-base)
|
|
52
|
+
* @param {boolean} [config.reuseExistingWorkspace=false] - If true, reuse existing isolated workspace (for resume)
|
|
52
53
|
* @returns {Promise<string>} Container ID
|
|
53
54
|
*/
|
|
54
55
|
createContainer(clusterId, config) {
|
|
55
56
|
const image = config.image || this.image;
|
|
56
57
|
let workDir = config.workDir || process.cwd();
|
|
57
58
|
const containerName = `zeroshot-cluster-${clusterId}`;
|
|
59
|
+
const reuseExisting = config.reuseExistingWorkspace || false;
|
|
58
60
|
|
|
59
61
|
// Check if container already exists
|
|
60
62
|
if (this.containers.has(clusterId)) {
|
|
@@ -69,15 +71,30 @@ class IsolationManager {
|
|
|
69
71
|
|
|
70
72
|
// For isolation mode: copy files to temp dir with fresh git repo (100% isolated)
|
|
71
73
|
// No worktrees - cleaner, no host path dependencies
|
|
74
|
+
// EXCEPTION: On resume (reuseExisting=true), skip copy and use existing workspace
|
|
72
75
|
if (this._isGitRepo(workDir)) {
|
|
73
|
-
const
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
76
|
+
const isolatedPath = path.join(os.tmpdir(), 'zeroshot-isolated', clusterId);
|
|
77
|
+
|
|
78
|
+
if (reuseExisting && fs.existsSync(isolatedPath)) {
|
|
79
|
+
// Resume mode: reuse existing isolated workspace (contains agent's work)
|
|
80
|
+
console.log(`[IsolationManager] Reusing existing isolated workspace at ${isolatedPath}`);
|
|
81
|
+
this.isolatedDirs = this.isolatedDirs || new Map();
|
|
82
|
+
this.isolatedDirs.set(clusterId, {
|
|
83
|
+
path: isolatedPath,
|
|
84
|
+
originalDir: workDir,
|
|
85
|
+
});
|
|
86
|
+
workDir = isolatedPath;
|
|
87
|
+
} else {
|
|
88
|
+
// Fresh start: create new isolated copy
|
|
89
|
+
const isolatedDir = this._createIsolatedCopy(clusterId, workDir);
|
|
90
|
+
this.isolatedDirs = this.isolatedDirs || new Map();
|
|
91
|
+
this.isolatedDirs.set(clusterId, {
|
|
92
|
+
path: isolatedDir,
|
|
93
|
+
originalDir: workDir,
|
|
94
|
+
});
|
|
95
|
+
workDir = isolatedDir;
|
|
96
|
+
console.log(`[IsolationManager] Created isolated copy at ${workDir}`);
|
|
97
|
+
}
|
|
81
98
|
}
|
|
82
99
|
|
|
83
100
|
// Create fresh Claude config dir for this cluster (avoids permission issues from host)
|
|
@@ -104,40 +121,41 @@ class IsolationManager {
|
|
|
104
121
|
// Mount fresh Claude config to node user's home (read-write - Claude CLI writes settings, todos, etc.)
|
|
105
122
|
'-v',
|
|
106
123
|
`${clusterConfigDir}:/home/node/.claude`,
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
//
|
|
114
|
-
'
|
|
115
|
-
|
|
116
|
-
//
|
|
117
|
-
'
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
124
|
+
];
|
|
125
|
+
|
|
126
|
+
// Add optional volume mounts (skip if path doesn't exist or isn't mountable)
|
|
127
|
+
// Each mount is [hostPath, containerPath, options?]
|
|
128
|
+
const optionalMounts = [
|
|
129
|
+
[this._getGhConfigDir(), '/home/node/.config/gh', null], // gh credentials (read-write)
|
|
130
|
+
[this._getGitConfigPath(), '/home/node/.gitconfig', 'ro'], // git config (read-only)
|
|
131
|
+
[this._getAwsConfigDir(), '/home/node/.aws', 'ro'], // AWS credentials (read-only)
|
|
132
|
+
[this._getKubeConfigDir(), '/home/node/.kube', 'ro'], // Kubernetes config (read-only)
|
|
133
|
+
[this._getSshDir(), '/home/node/.ssh', 'ro'], // SSH keys (read-only)
|
|
134
|
+
[this._getTerraformPluginDir(), '/home/node/.terraform.d', null], // Terraform cache (read-write)
|
|
135
|
+
];
|
|
136
|
+
|
|
137
|
+
for (const [hostPath, containerPath, options] of optionalMounts) {
|
|
138
|
+
if (hostPath && fs.existsSync(hostPath)) {
|
|
139
|
+
const mountSpec = options ? `${hostPath}:${containerPath}:${options}` : `${hostPath}:${containerPath}`;
|
|
140
|
+
args.push('-v', mountSpec);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Environment variables and final args
|
|
145
|
+
args.push(
|
|
126
146
|
'-e',
|
|
127
147
|
`AWS_REGION=${process.env.AWS_REGION || 'eu-north-1'}`,
|
|
128
148
|
'-e',
|
|
129
149
|
`AWS_PROFILE=${process.env.AWS_PROFILE || 'default'}`,
|
|
130
150
|
'-e',
|
|
131
151
|
'AWS_PAGER=',
|
|
132
|
-
// Set working directory
|
|
133
152
|
'-w',
|
|
134
153
|
'/workspace',
|
|
135
|
-
// Keep container running
|
|
136
154
|
image,
|
|
137
155
|
'tail',
|
|
138
156
|
'-f',
|
|
139
|
-
'/dev/null'
|
|
140
|
-
|
|
157
|
+
'/dev/null'
|
|
158
|
+
);
|
|
141
159
|
|
|
142
160
|
return new Promise((resolve, reject) => {
|
|
143
161
|
const proc = spawn('docker', args, { stdio: ['pipe', 'pipe', 'pipe'] });
|
|
@@ -381,32 +399,42 @@ class IsolationManager {
|
|
|
381
399
|
}
|
|
382
400
|
|
|
383
401
|
/**
|
|
384
|
-
* Stop and remove a container, and clean up isolated dir/config
|
|
402
|
+
* Stop and remove a container, and optionally clean up isolated dir/config
|
|
385
403
|
* @param {string} clusterId - Cluster ID
|
|
404
|
+
* @param {object} [options] - Cleanup options
|
|
405
|
+
* @param {boolean} [options.preserveWorkspace=false] - If true, keep the isolated workspace (for resume capability)
|
|
386
406
|
* @returns {Promise<void>}
|
|
387
407
|
*/
|
|
388
|
-
async cleanup(clusterId) {
|
|
408
|
+
async cleanup(clusterId, options = {}) {
|
|
409
|
+
const preserveWorkspace = options.preserveWorkspace || false;
|
|
410
|
+
|
|
389
411
|
await this.stopContainer(clusterId);
|
|
390
412
|
await this.removeContainer(clusterId);
|
|
391
413
|
|
|
392
|
-
// Clean up isolated directory if one was created
|
|
414
|
+
// Clean up isolated directory if one was created (unless preserveWorkspace is set)
|
|
393
415
|
if (this.isolatedDirs?.has(clusterId)) {
|
|
394
416
|
const isolatedInfo = this.isolatedDirs.get(clusterId);
|
|
395
|
-
console.log(`[IsolationManager] Cleaning up isolated dir at ${isolatedInfo.path}`);
|
|
396
417
|
|
|
397
|
-
|
|
398
|
-
|
|
418
|
+
if (preserveWorkspace) {
|
|
419
|
+
console.log(`[IsolationManager] Preserving isolated workspace at ${isolatedInfo.path} for resume`);
|
|
420
|
+
// Don't delete - but DON'T remove from Map either, resume() needs it
|
|
421
|
+
} else {
|
|
422
|
+
console.log(`[IsolationManager] Cleaning up isolated dir at ${isolatedInfo.path}`);
|
|
399
423
|
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
424
|
+
// Preserve Terraform state before deleting isolated directory
|
|
425
|
+
this._preserveTerraformState(clusterId, isolatedInfo.path);
|
|
426
|
+
|
|
427
|
+
// Remove the isolated directory
|
|
428
|
+
try {
|
|
429
|
+
fs.rmSync(isolatedInfo.path, { recursive: true, force: true });
|
|
430
|
+
} catch {
|
|
431
|
+
// Ignore
|
|
432
|
+
}
|
|
433
|
+
this.isolatedDirs.delete(clusterId);
|
|
405
434
|
}
|
|
406
|
-
this.isolatedDirs.delete(clusterId);
|
|
407
435
|
}
|
|
408
436
|
|
|
409
|
-
// Clean up cluster config dir
|
|
437
|
+
// Clean up cluster config dir (always - it's recreated on resume)
|
|
410
438
|
this._cleanupClusterConfigDir(clusterId);
|
|
411
439
|
}
|
|
412
440
|
|
|
@@ -752,10 +780,22 @@ class IsolationManager {
|
|
|
752
780
|
|
|
753
781
|
/**
|
|
754
782
|
* Get git config file path (for commit identity)
|
|
783
|
+
* Returns null if .gitconfig doesn't exist or is a directory (e.g., on GitHub Actions)
|
|
755
784
|
* @private
|
|
756
785
|
*/
|
|
757
786
|
_getGitConfigPath() {
|
|
758
|
-
|
|
787
|
+
const gitConfigPath = path.join(os.homedir(), '.gitconfig');
|
|
788
|
+
try {
|
|
789
|
+
const stat = fs.statSync(gitConfigPath);
|
|
790
|
+
if (stat.isFile()) {
|
|
791
|
+
return gitConfigPath;
|
|
792
|
+
}
|
|
793
|
+
// .gitconfig exists but is a directory (GitHub Actions runner has this issue)
|
|
794
|
+
return null;
|
|
795
|
+
} catch {
|
|
796
|
+
// .gitconfig doesn't exist
|
|
797
|
+
return null;
|
|
798
|
+
}
|
|
759
799
|
}
|
|
760
800
|
|
|
761
801
|
/**
|
|
@@ -840,10 +880,12 @@ class IsolationManager {
|
|
|
840
880
|
* @returns {Promise<void>}
|
|
841
881
|
*/
|
|
842
882
|
static async buildImage(image = DEFAULT_IMAGE, maxRetries = 3) {
|
|
843
|
-
|
|
883
|
+
// Repository root is one level up from src/
|
|
884
|
+
const repoRoot = path.join(__dirname, '..');
|
|
885
|
+
const dockerfilePath = path.join(repoRoot, 'docker', 'zeroshot-cluster', 'Dockerfile');
|
|
844
886
|
|
|
845
|
-
if (!fs.existsSync(
|
|
846
|
-
throw new Error(`Dockerfile not found at ${dockerfilePath}
|
|
887
|
+
if (!fs.existsSync(dockerfilePath)) {
|
|
888
|
+
throw new Error(`Dockerfile not found at ${dockerfilePath}`);
|
|
847
889
|
}
|
|
848
890
|
|
|
849
891
|
console.log(`[IsolationManager] Building Docker image '${image}'...`);
|
|
@@ -852,9 +894,10 @@ class IsolationManager {
|
|
|
852
894
|
|
|
853
895
|
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
854
896
|
try {
|
|
855
|
-
//
|
|
856
|
-
|
|
857
|
-
|
|
897
|
+
// CRITICAL: Run from repo root so build context includes package.json and src/
|
|
898
|
+
// Use -f flag to specify Dockerfile location
|
|
899
|
+
execSync(`docker build -f docker/zeroshot-cluster/Dockerfile -t ${image} .`, {
|
|
900
|
+
cwd: repoRoot,
|
|
858
901
|
encoding: 'utf8',
|
|
859
902
|
stdio: 'inherit',
|
|
860
903
|
});
|