@covibes/zeroshot 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/CHANGELOG.md +167 -0
  2. package/LICENSE +21 -0
  3. package/README.md +364 -0
  4. package/cli/index.js +3990 -0
  5. package/cluster-templates/base-templates/debug-workflow.json +181 -0
  6. package/cluster-templates/base-templates/full-workflow.json +455 -0
  7. package/cluster-templates/base-templates/single-worker.json +48 -0
  8. package/cluster-templates/base-templates/worker-validator.json +131 -0
  9. package/cluster-templates/conductor-bootstrap.json +122 -0
  10. package/cluster-templates/conductor-junior-bootstrap.json +69 -0
  11. package/docker/zeroshot-cluster/Dockerfile +132 -0
  12. package/lib/completion.js +174 -0
  13. package/lib/id-detector.js +53 -0
  14. package/lib/settings.js +97 -0
  15. package/lib/stream-json-parser.js +236 -0
  16. package/package.json +121 -0
  17. package/src/agent/agent-config.js +121 -0
  18. package/src/agent/agent-context-builder.js +241 -0
  19. package/src/agent/agent-hook-executor.js +329 -0
  20. package/src/agent/agent-lifecycle.js +555 -0
  21. package/src/agent/agent-stuck-detector.js +256 -0
  22. package/src/agent/agent-task-executor.js +1034 -0
  23. package/src/agent/agent-trigger-evaluator.js +67 -0
  24. package/src/agent-wrapper.js +459 -0
  25. package/src/agents/git-pusher-agent.json +20 -0
  26. package/src/attach/attach-client.js +438 -0
  27. package/src/attach/attach-server.js +543 -0
  28. package/src/attach/index.js +35 -0
  29. package/src/attach/protocol.js +220 -0
  30. package/src/attach/ring-buffer.js +121 -0
  31. package/src/attach/socket-discovery.js +242 -0
  32. package/src/claude-task-runner.js +468 -0
  33. package/src/config-router.js +80 -0
  34. package/src/config-validator.js +598 -0
  35. package/src/github.js +103 -0
  36. package/src/isolation-manager.js +1042 -0
  37. package/src/ledger.js +429 -0
  38. package/src/logic-engine.js +223 -0
  39. package/src/message-bus-bridge.js +139 -0
  40. package/src/message-bus.js +202 -0
  41. package/src/name-generator.js +232 -0
  42. package/src/orchestrator.js +1938 -0
  43. package/src/schemas/sub-cluster.js +156 -0
  44. package/src/sub-cluster-wrapper.js +545 -0
  45. package/src/task-runner.js +28 -0
  46. package/src/template-resolver.js +347 -0
  47. package/src/tui/CHANGES.txt +133 -0
  48. package/src/tui/LAYOUT.md +261 -0
  49. package/src/tui/README.txt +192 -0
  50. package/src/tui/TWO-LEVEL-NAVIGATION.md +186 -0
  51. package/src/tui/data-poller.js +325 -0
  52. package/src/tui/demo.js +208 -0
  53. package/src/tui/formatters.js +123 -0
  54. package/src/tui/index.js +193 -0
  55. package/src/tui/keybindings.js +383 -0
  56. package/src/tui/layout.js +317 -0
  57. package/src/tui/renderer.js +194 -0
@@ -0,0 +1,1042 @@
1
+ /**
2
+ * IsolationManager - Docker container lifecycle for isolated cluster execution
3
+ *
4
+ * Handles:
5
+ * - Container creation with workspace mounts
6
+ * - Credential injection for Claude CLI
7
+ * - Command execution inside containers
8
+ * - Container cleanup on stop/kill
9
+ */
10
+
11
+ const { spawn, execSync } = require('child_process');
12
+ const path = require('path');
13
+ const os = require('os');
14
+ const fs = require('fs');
15
+
16
+ const DEFAULT_IMAGE = 'zeroshot-cluster-base';
17
+
18
+ class IsolationManager {
19
+ constructor(options = {}) {
20
+ this.image = options.image || DEFAULT_IMAGE;
21
+ this.containers = new Map(); // clusterId -> containerId
22
+ this.isolatedDirs = new Map(); // clusterId -> { path, originalDir }
23
+ this.clusterConfigDirs = new Map(); // clusterId -> configDirPath
24
+ }
25
+
26
+ /**
27
+ * Get GitHub token from gh CLI config (hosts.yml)
28
+ * Works with older gh CLI versions that don't have `gh auth token` command
29
+ * @returns {string|null}
30
+ * @private
31
+ */
32
+ _getGhToken() {
33
+ try {
34
+ const hostsPath = path.join(os.homedir(), '.config', 'gh', 'hosts.yml');
35
+ if (!fs.existsSync(hostsPath)) return null;
36
+
37
+ const content = fs.readFileSync(hostsPath, 'utf8');
38
+ // Match oauth_token: <token> in YAML
39
+ const match = content.match(/oauth_token:\s*(\S+)/);
40
+ return match ? match[1] : null;
41
+ } catch {
42
+ return null;
43
+ }
44
+ }
45
+
46
+ /**
47
+ * Create and start a container for a cluster
48
+ * @param {string} clusterId - Cluster ID
49
+ * @param {object} config - Container config
50
+ * @param {string} config.workDir - Working directory to mount
51
+ * @param {string} [config.image] - Docker image (default: zeroshot-cluster-base)
52
+ * @returns {Promise<string>} Container ID
53
+ */
54
+ createContainer(clusterId, config) {
55
+ const image = config.image || this.image;
56
+ let workDir = config.workDir || process.cwd();
57
+ const containerName = `zeroshot-cluster-${clusterId}`;
58
+
59
+ // Check if container already exists
60
+ if (this.containers.has(clusterId)) {
61
+ const existingId = this.containers.get(clusterId);
62
+ if (this._isContainerRunning(existingId)) {
63
+ return existingId;
64
+ }
65
+ }
66
+
67
+ // Clean up any existing container with same name
68
+ this._removeContainerByName(containerName);
69
+
70
+ // For isolation mode: copy files to temp dir with fresh git repo (100% isolated)
71
+ // No worktrees - cleaner, no host path dependencies
72
+ if (this._isGitRepo(workDir)) {
73
+ const isolatedDir = this._createIsolatedCopy(clusterId, workDir);
74
+ this.isolatedDirs = this.isolatedDirs || new Map();
75
+ this.isolatedDirs.set(clusterId, {
76
+ path: isolatedDir,
77
+ originalDir: workDir,
78
+ });
79
+ workDir = isolatedDir;
80
+ console.log(`[IsolationManager] Created isolated copy at ${workDir}`);
81
+ }
82
+
83
+ // Create fresh Claude config dir for this cluster (avoids permission issues from host)
84
+ const clusterConfigDir = this._createClusterConfigDir(clusterId);
85
+ console.log(`[IsolationManager] Created cluster config dir at ${clusterConfigDir}`);
86
+
87
+ // Build docker run command
88
+ // NOTE: Container runs as 'node' user (uid 1000) for --dangerously-skip-permissions
89
+ const args = [
90
+ 'run',
91
+ '-d', // detached
92
+ '--name',
93
+ containerName,
94
+ // Mount workspace
95
+ '-v',
96
+ `${workDir}:/workspace`,
97
+ // Mount Docker socket for Docker-in-Docker (e2e tests need docker compose)
98
+ '-v',
99
+ '/var/run/docker.sock:/var/run/docker.sock',
100
+ // Add node user to host's docker group (fixes permission denied)
101
+ // CRITICAL: Without this, agent can't run docker commands inside container
102
+ '--group-add',
103
+ this._getDockerGid(),
104
+ // Mount fresh Claude config to node user's home (read-write - Claude CLI writes settings, todos, etc.)
105
+ '-v',
106
+ `${clusterConfigDir}:/home/node/.claude`,
107
+ // Mount gh credentials (read-write - gh auth setup-git needs to write)
108
+ '-v',
109
+ `${this._getGhConfigDir()}:/home/node/.config/gh`,
110
+ // Mount git config (read-only - for git identity)
111
+ '-v',
112
+ `${this._getGitConfigPath()}:/home/node/.gitconfig:ro`,
113
+ // Mount AWS credentials (read-only)
114
+ '-v',
115
+ `${this._getAwsConfigDir()}:/home/node/.aws:ro`,
116
+ // Mount Kubernetes config (read-only)
117
+ '-v',
118
+ `${this._getKubeConfigDir()}:/home/node/.kube:ro`,
119
+ // Mount SSH keys (read-only)
120
+ '-v',
121
+ `${this._getSshDir()}:/home/node/.ssh:ro`,
122
+ // Mount Terraform plugin cache (read-write for caching)
123
+ '-v',
124
+ `${this._getTerraformPluginDir()}:/home/node/.terraform.d`,
125
+ // Environment variables for infrastructure tasks
126
+ '-e',
127
+ `AWS_REGION=${process.env.AWS_REGION || 'eu-north-1'}`,
128
+ '-e',
129
+ `AWS_PROFILE=${process.env.AWS_PROFILE || 'default'}`,
130
+ '-e',
131
+ 'AWS_PAGER=',
132
+ // Set working directory
133
+ '-w',
134
+ '/workspace',
135
+ // Keep container running
136
+ image,
137
+ 'tail',
138
+ '-f',
139
+ '/dev/null',
140
+ ];
141
+
142
+ return new Promise((resolve, reject) => {
143
+ const proc = spawn('docker', args, { stdio: ['pipe', 'pipe', 'pipe'] });
144
+
145
+ let stdout = '';
146
+ let stderr = '';
147
+
148
+ proc.stdout.on('data', (data) => {
149
+ stdout += data;
150
+ });
151
+ proc.stderr.on('data', (data) => {
152
+ stderr += data;
153
+ });
154
+
155
+ proc.on('close', async (code) => {
156
+ if (code === 0) {
157
+ const containerId = stdout.trim().substring(0, 12);
158
+ this.containers.set(clusterId, containerId);
159
+
160
+ // Install dependencies if package.json exists
161
+ // This enables e2e tests and other npm-based tools to run
162
+ try {
163
+ console.log(`[IsolationManager] Checking for package.json in ${workDir}...`);
164
+ if (fs.existsSync(path.join(workDir, 'package.json'))) {
165
+ console.log(`[IsolationManager] Installing npm dependencies in container...`);
166
+
167
+ // Retry npm install with exponential backoff (network issues are common)
168
+ const maxRetries = 3;
169
+ const baseDelay = 2000; // 2 seconds
170
+ let installResult = null;
171
+
172
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
173
+ try {
174
+ installResult = await this.execInContainer(
175
+ clusterId,
176
+ ['sh', '-c', 'npm install --no-audit --no-fund 2>&1'],
177
+ {}
178
+ );
179
+
180
+ if (installResult.code === 0) {
181
+ console.log(`[IsolationManager] ✓ Dependencies installed`);
182
+ break; // Success - exit retry loop
183
+ }
184
+
185
+ // Failed - retry if not last attempt
186
+ if (attempt < maxRetries) {
187
+ const delay = baseDelay * Math.pow(2, attempt - 1);
188
+ console.warn(
189
+ `[IsolationManager] ⚠️ npm install failed (attempt ${attempt}/${maxRetries}), retrying in ${delay}ms...`
190
+ );
191
+ console.warn(`[IsolationManager] Error: ${installResult.stderr.slice(0, 200)}`);
192
+ await new Promise((_resolve) => setTimeout(_resolve, delay));
193
+ } else {
194
+ console.warn(
195
+ `[IsolationManager] ⚠️ npm install failed after ${maxRetries} attempts (non-fatal): ${installResult.stderr.slice(0, 200)}`
196
+ );
197
+ }
198
+ } catch (execErr) {
199
+ if (attempt < maxRetries) {
200
+ const delay = baseDelay * Math.pow(2, attempt - 1);
201
+ console.warn(
202
+ `[IsolationManager] ⚠️ npm install execution error (attempt ${attempt}/${maxRetries}), retrying in ${delay}ms...`
203
+ );
204
+ console.warn(`[IsolationManager] Error: ${execErr.message}`);
205
+ await new Promise((_resolve) => setTimeout(_resolve, delay));
206
+ } else {
207
+ throw execErr; // Re-throw on last attempt
208
+ }
209
+ }
210
+ }
211
+ }
212
+ } catch (err) {
213
+ console.warn(
214
+ `[IsolationManager] ⚠️ Failed to install dependencies (non-fatal): ${err.message}`
215
+ );
216
+ }
217
+
218
+ resolve(containerId);
219
+ } else {
220
+ reject(new Error(`Failed to create container: ${stderr}`));
221
+ }
222
+ });
223
+
224
+ proc.on('error', (err) => {
225
+ reject(new Error(`Docker spawn error: ${err.message}`));
226
+ });
227
+ });
228
+ }
229
+
230
+ /**
231
+ * Execute a command inside the container
232
+ * @param {string} clusterId - Cluster ID
233
+ * @param {string[]} command - Command and arguments
234
+ * @param {object} [options] - Exec options
235
+ * @param {boolean} [options.interactive] - Use -it flags
236
+ * @param {object} [options.env] - Environment variables
237
+ * @returns {Promise<{stdout: string, stderr: string, code: number}>}
238
+ */
239
+ execInContainer(clusterId, command, options = {}) {
240
+ const containerId = this.containers.get(clusterId);
241
+ if (!containerId) {
242
+ throw new Error(`No container found for cluster ${clusterId}`);
243
+ }
244
+
245
+ const args = ['exec'];
246
+
247
+ if (options.interactive) {
248
+ args.push('-it');
249
+ }
250
+
251
+ // Add environment variables
252
+ if (options.env) {
253
+ for (const [key, value] of Object.entries(options.env)) {
254
+ args.push('-e', `${key}=${value}`);
255
+ }
256
+ }
257
+
258
+ args.push(containerId, ...command);
259
+
260
+ return new Promise((resolve, reject) => {
261
+ const proc = spawn('docker', args, {
262
+ stdio: options.interactive ? 'inherit' : ['pipe', 'pipe', 'pipe'],
263
+ });
264
+
265
+ let stdout = '';
266
+ let stderr = '';
267
+
268
+ if (!options.interactive) {
269
+ proc.stdout.on('data', (data) => {
270
+ stdout += data;
271
+ });
272
+ proc.stderr.on('data', (data) => {
273
+ stderr += data;
274
+ });
275
+ }
276
+
277
+ proc.on('close', (code) => {
278
+ resolve({ stdout, stderr, code });
279
+ });
280
+
281
+ proc.on('error', (err) => {
282
+ reject(new Error(`Docker exec error: ${err.message}`));
283
+ });
284
+ });
285
+ }
286
+
287
+ /**
288
+ * Spawn a PTY-like process inside the container
289
+ * Returns a child process that can be used like a PTY
290
+ * @param {string} clusterId - Cluster ID
291
+ * @param {string[]} command - Command and arguments
292
+ * @param {object} [options] - Spawn options
293
+ * @returns {ChildProcess}
294
+ */
295
+ spawnInContainer(clusterId, command, options = {}) {
296
+ const containerId = this.containers.get(clusterId);
297
+ if (!containerId) {
298
+ throw new Error(`No container found for cluster ${clusterId}`);
299
+ }
300
+
301
+ // IMPORTANT: Must use -i flag for interactive stdin/stdout communication with commands like 'cat'
302
+ // If omitted, docker exec will not properly connect stdin, causing piped input to be ignored
303
+ // This is required for PTY-like behavior where child process stdin/stdout are used
304
+ const args = ['exec', '-i'];
305
+
306
+ // Add environment variables
307
+ if (options.env) {
308
+ for (const [key, value] of Object.entries(options.env)) {
309
+ args.push('-e', `${key}=${value}`);
310
+ }
311
+ }
312
+
313
+ args.push(containerId, ...command);
314
+
315
+ return spawn('docker', args, {
316
+ stdio: ['pipe', 'pipe', 'pipe'],
317
+ ...options.spawnOptions,
318
+ });
319
+ }
320
+
321
+ /**
322
+ * Stop a container
323
+ * @param {string} clusterId - Cluster ID
324
+ * @param {number} [timeout=10] - Timeout in seconds before SIGKILL
325
+ * @returns {Promise<void>}
326
+ */
327
+ stopContainer(clusterId, timeout = 10) {
328
+ const containerId = this.containers.get(clusterId);
329
+ if (!containerId) {
330
+ return; // Already stopped or never started
331
+ }
332
+
333
+ return new Promise((resolve) => {
334
+ const proc = spawn('docker', ['stop', '-t', String(timeout), containerId], {
335
+ stdio: ['pipe', 'pipe', 'pipe'],
336
+ });
337
+
338
+ proc.on('close', () => {
339
+ resolve();
340
+ });
341
+
342
+ proc.on('error', () => {
343
+ resolve(); // Ignore errors on stop
344
+ });
345
+ });
346
+ }
347
+
348
+ /**
349
+ * Remove a container
350
+ * @param {string} clusterId - Cluster ID
351
+ * @param {boolean} [force=false] - Force remove running container
352
+ * @returns {Promise<void>}
353
+ */
354
+ removeContainer(clusterId, force = false) {
355
+ const containerId = this.containers.get(clusterId);
356
+ if (!containerId) {
357
+ return;
358
+ }
359
+
360
+ const args = ['rm'];
361
+ if (force) {
362
+ args.push('-f');
363
+ }
364
+ args.push(containerId);
365
+
366
+ return new Promise((resolve) => {
367
+ const proc = spawn('docker', args, {
368
+ stdio: ['pipe', 'pipe', 'pipe'],
369
+ });
370
+
371
+ proc.on('close', () => {
372
+ this.containers.delete(clusterId);
373
+ resolve();
374
+ });
375
+
376
+ proc.on('error', () => {
377
+ this.containers.delete(clusterId);
378
+ resolve();
379
+ });
380
+ });
381
+ }
382
+
383
+ /**
384
+ * Stop and remove a container, and clean up isolated dir/config
385
+ * @param {string} clusterId - Cluster ID
386
+ * @returns {Promise<void>}
387
+ */
388
+ async cleanup(clusterId) {
389
+ await this.stopContainer(clusterId);
390
+ await this.removeContainer(clusterId);
391
+
392
+ // Clean up isolated directory if one was created
393
+ if (this.isolatedDirs?.has(clusterId)) {
394
+ const isolatedInfo = this.isolatedDirs.get(clusterId);
395
+ console.log(`[IsolationManager] Cleaning up isolated dir at ${isolatedInfo.path}`);
396
+
397
+ // Preserve Terraform state before deleting isolated directory
398
+ this._preserveTerraformState(clusterId, isolatedInfo.path);
399
+
400
+ // Remove the isolated directory
401
+ try {
402
+ fs.rmSync(isolatedInfo.path, { recursive: true, force: true });
403
+ } catch {
404
+ // Ignore
405
+ }
406
+ this.isolatedDirs.delete(clusterId);
407
+ }
408
+
409
+ // Clean up cluster config dir
410
+ this._cleanupClusterConfigDir(clusterId);
411
+ }
412
+
413
+ /**
414
+ * Create an isolated copy of a directory with fresh git repo
415
+ * @private
416
+ * @param {string} clusterId - Cluster ID
417
+ * @param {string} sourceDir - Source directory to copy
418
+ * @returns {string} Path to isolated directory
419
+ */
420
+ _createIsolatedCopy(clusterId, sourceDir) {
421
+ const isolatedPath = path.join(os.tmpdir(), 'zeroshot-isolated', clusterId);
422
+
423
+ // Clean up existing dir
424
+ if (fs.existsSync(isolatedPath)) {
425
+ fs.rmSync(isolatedPath, { recursive: true, force: true });
426
+ }
427
+
428
+ // Create directory
429
+ fs.mkdirSync(isolatedPath, { recursive: true });
430
+
431
+ // Copy files (excluding .git and common build artifacts)
432
+ this._copyDirExcluding(sourceDir, isolatedPath, [
433
+ '.git',
434
+ 'node_modules',
435
+ '.next',
436
+ 'dist',
437
+ 'build',
438
+ '__pycache__',
439
+ '.pytest_cache',
440
+ '.mypy_cache',
441
+ '.ruff_cache',
442
+ '.venv',
443
+ 'venv',
444
+ '.tox',
445
+ '.eggs',
446
+ '*.egg-info',
447
+ 'coverage',
448
+ '.coverage',
449
+ '.nyc_output',
450
+ '.DS_Store',
451
+ 'Thumbs.db',
452
+ ]);
453
+
454
+ // Get remote URL from original repo (for PR creation)
455
+ let remoteUrl = null;
456
+ try {
457
+ remoteUrl = execSync('git remote get-url origin', {
458
+ cwd: sourceDir,
459
+ encoding: 'utf8',
460
+ stdio: 'pipe',
461
+ }).trim();
462
+ } catch {
463
+ // No remote configured in source
464
+ }
465
+
466
+ // Initialize fresh git repo
467
+ execSync('git init', { cwd: isolatedPath, stdio: 'pipe' });
468
+
469
+ // Add remote if source had one (needed for git push / PR creation)
470
+ // Inject gh token into URL for authentication inside container
471
+ if (remoteUrl) {
472
+ let authRemoteUrl = remoteUrl;
473
+ const token = this._getGhToken();
474
+ if (token && remoteUrl.startsWith('https://github.com/')) {
475
+ // Convert https://github.com/org/repo.git to https://x-access-token:TOKEN@github.com/org/repo.git
476
+ authRemoteUrl = remoteUrl.replace(
477
+ 'https://github.com/',
478
+ `https://x-access-token:${token}@github.com/`
479
+ );
480
+ }
481
+ execSync(`git remote add origin "${authRemoteUrl}"`, {
482
+ cwd: isolatedPath,
483
+ stdio: 'pipe',
484
+ });
485
+ }
486
+
487
+ execSync('git add -A', { cwd: isolatedPath, stdio: 'pipe' });
488
+
489
+ try {
490
+ execSync('git commit -m "Initial commit (isolated copy)"', {
491
+ cwd: isolatedPath,
492
+ stdio: 'pipe',
493
+ });
494
+ } catch {
495
+ // May fail if nothing to commit (empty dir)
496
+ }
497
+
498
+ // Create feature branch for work
499
+ const branchName = `zeroshot/${clusterId}`;
500
+ execSync(`git checkout -b "${branchName}"`, {
501
+ cwd: isolatedPath,
502
+ stdio: 'pipe',
503
+ });
504
+
505
+ return isolatedPath;
506
+ }
507
+
508
+ /**
509
+ * Copy directory excluding certain paths
510
+ * Supports exact matches and glob patterns (*.ext)
511
+ * @private
512
+ */
513
+ _copyDirExcluding(src, dest, exclude) {
514
+ const entries = fs.readdirSync(src, { withFileTypes: true });
515
+
516
+ for (const entry of entries) {
517
+ // Check exclusions (exact match or glob pattern)
518
+ const shouldExclude = exclude.some((pattern) => {
519
+ if (pattern.startsWith('*.')) {
520
+ return entry.name.endsWith(pattern.slice(1));
521
+ }
522
+ return entry.name === pattern;
523
+ });
524
+ if (shouldExclude) continue;
525
+
526
+ const srcPath = path.join(src, entry.name);
527
+ const destPath = path.join(dest, entry.name);
528
+
529
+ try {
530
+ // Handle symlinks: resolve to actual target and copy appropriately
531
+ // This avoids EISDIR errors when symlink points to directory
532
+ if (entry.isSymbolicLink()) {
533
+ // Get the actual target stats (follows the symlink)
534
+ const targetStats = fs.statSync(srcPath);
535
+ if (targetStats.isDirectory()) {
536
+ fs.mkdirSync(destPath, { recursive: true });
537
+ this._copyDirExcluding(srcPath, destPath, exclude);
538
+ } else {
539
+ fs.copyFileSync(srcPath, destPath);
540
+ }
541
+ } else if (entry.isDirectory()) {
542
+ fs.mkdirSync(destPath, { recursive: true });
543
+ this._copyDirExcluding(srcPath, destPath, exclude);
544
+ } else {
545
+ fs.copyFileSync(srcPath, destPath);
546
+ }
547
+ } catch (err) {
548
+ // Skip files we can't copy (permission denied, broken symlinks, etc.)
549
+ // These are usually cache/temp files that aren't needed
550
+ if (err.code === 'EACCES' || err.code === 'EPERM' || err.code === 'ENOENT') {
551
+ continue;
552
+ }
553
+ throw err; // Re-throw other errors
554
+ }
555
+ }
556
+ }
557
+
558
+ /**
559
+ * Get container ID for a cluster
560
+ * @param {string} clusterId - Cluster ID
561
+ * @returns {string|undefined}
562
+ */
563
+ getContainerId(clusterId) {
564
+ return this.containers.get(clusterId);
565
+ }
566
+
567
+ /**
568
+ * Check if a cluster has an active container
569
+ * @param {string} clusterId - Cluster ID
570
+ * @returns {boolean}
571
+ */
572
+ hasContainer(clusterId) {
573
+ const containerId = this.containers.get(clusterId);
574
+ if (!containerId) return false;
575
+ return this._isContainerRunning(containerId);
576
+ }
577
+
578
+ /**
579
+ * Get Claude config directory
580
+ * @private
581
+ */
582
+ _getClaudeConfigDir() {
583
+ return process.env.CLAUDE_CONFIG_DIR || path.join(os.homedir(), '.claude');
584
+ }
585
+
586
+ /**
587
+ * Create a fresh Claude config directory for a cluster (avoids permission issues from host)
588
+ * Copies only essential files: .credentials.json
589
+ * @private
590
+ * @param {string} clusterId - Cluster ID
591
+ * @returns {string} Path to cluster-specific config directory
592
+ */
593
+ _createClusterConfigDir(clusterId) {
594
+ const sourceDir = this._getClaudeConfigDir();
595
+ const configDir = path.join(os.tmpdir(), 'zeroshot-cluster-configs', clusterId);
596
+
597
+ // Clean up existing dir
598
+ if (fs.existsSync(configDir)) {
599
+ fs.rmSync(configDir, { recursive: true, force: true });
600
+ }
601
+
602
+ // Create fresh directory and required subdirectories
603
+ fs.mkdirSync(configDir, { recursive: true });
604
+ const hooksDir = path.join(configDir, 'hooks');
605
+ fs.mkdirSync(hooksDir, { recursive: true });
606
+ // CRITICAL: Claude CLI writes session files to projects/ subdirectory
607
+ const projectsDir = path.join(configDir, 'projects');
608
+ fs.mkdirSync(projectsDir, { recursive: true });
609
+
610
+ // Copy only credentials file (essential for auth)
611
+ const credentialsFile = path.join(sourceDir, '.credentials.json');
612
+ if (fs.existsSync(credentialsFile)) {
613
+ fs.copyFileSync(credentialsFile, path.join(configDir, '.credentials.json'));
614
+ }
615
+
616
+ // Copy hook script to block AskUserQuestion (CRITICAL for autonomous execution)
617
+ const hookScriptSrc = path.join(__dirname, '..', 'hooks', 'block-ask-user-question.py');
618
+ const hookScriptDst = path.join(hooksDir, 'block-ask-user-question.py');
619
+ if (fs.existsSync(hookScriptSrc)) {
620
+ fs.copyFileSync(hookScriptSrc, hookScriptDst);
621
+ fs.chmodSync(hookScriptDst, 0o755);
622
+ }
623
+
624
+ // Create settings.json with PreToolUse hook to block AskUserQuestion
625
+ // This PREVENTS agents from asking questions in non-interactive mode
626
+ const settings = {
627
+ hooks: {
628
+ PreToolUse: [
629
+ {
630
+ matcher: 'AskUserQuestion',
631
+ hooks: [
632
+ {
633
+ type: 'command',
634
+ command: '/home/node/.claude/hooks/block-ask-user-question.py',
635
+ },
636
+ ],
637
+ },
638
+ ],
639
+ },
640
+ };
641
+ fs.writeFileSync(path.join(configDir, 'settings.json'), JSON.stringify(settings, null, 2));
642
+
643
+ // Track for cleanup
644
+ this.clusterConfigDirs = this.clusterConfigDirs || new Map();
645
+ this.clusterConfigDirs.set(clusterId, configDir);
646
+
647
+ return configDir;
648
+ }
649
+
650
+ /**
651
+ * Clean up cluster config directory
652
+ * @private
653
+ * @param {string} clusterId - Cluster ID
654
+ */
655
+ _cleanupClusterConfigDir(clusterId) {
656
+ if (!this.clusterConfigDirs?.has(clusterId)) return;
657
+
658
+ const configDir = this.clusterConfigDirs.get(clusterId);
659
+ try {
660
+ fs.rmSync(configDir, { recursive: true, force: true });
661
+ } catch {
662
+ // Ignore
663
+ }
664
+ this.clusterConfigDirs.delete(clusterId);
665
+ }
666
+
667
+ /**
668
+ * Preserve Terraform state files before cleanup
669
+ * Checks both terraform/ subdirectory and root directory
670
+ * @private
671
+ * @param {string} clusterId - Cluster ID
672
+ * @param {string} isolatedPath - Path to isolated directory
673
+ */
674
+ _preserveTerraformState(clusterId, isolatedPath) {
675
+ const stateFiles = ['terraform.tfstate', 'terraform.tfstate.backup', 'tfplan'];
676
+ const checkDirs = [isolatedPath, path.join(isolatedPath, 'terraform')];
677
+
678
+ let foundState = false;
679
+
680
+ for (const checkDir of checkDirs) {
681
+ if (!fs.existsSync(checkDir)) continue;
682
+
683
+ const hasStateFiles = stateFiles.some((file) => fs.existsSync(path.join(checkDir, file)));
684
+
685
+ if (hasStateFiles) {
686
+ const stateDir = path.join(os.homedir(), '.zeroshot', 'terraform-state', clusterId);
687
+ fs.mkdirSync(stateDir, { recursive: true });
688
+
689
+ for (const file of stateFiles) {
690
+ const srcPath = path.join(checkDir, file);
691
+ if (fs.existsSync(srcPath)) {
692
+ const destPath = path.join(stateDir, file);
693
+ try {
694
+ fs.copyFileSync(srcPath, destPath);
695
+ console.log(`[IsolationManager] Preserved Terraform state: ${file} → ${stateDir}`);
696
+ foundState = true;
697
+ } catch (err) {
698
+ console.warn(`[IsolationManager] Failed to preserve ${file}: ${err.message}`);
699
+ }
700
+ }
701
+ }
702
+ break; // Only backup from first dir with state files
703
+ }
704
+ }
705
+
706
+ if (!foundState) {
707
+ console.log(`[IsolationManager] No Terraform state found to preserve`);
708
+ }
709
+ }
710
+
711
+ /**
712
+ * Get AWS config directory
713
+ * @private
714
+ */
715
+ _getAwsConfigDir() {
716
+ return process.env.AWS_CONFIG_DIR || path.join(os.homedir(), '.aws');
717
+ }
718
+
719
+ /**
720
+ * Get Kubernetes config directory
721
+ * @private
722
+ */
723
+ _getKubeConfigDir() {
724
+ return process.env.KUBECONFIG_DIR || path.join(os.homedir(), '.kube');
725
+ }
726
+
727
+ /**
728
+ * Get SSH directory
729
+ * @private
730
+ */
731
+ _getSshDir() {
732
+ return path.join(os.homedir(), '.ssh');
733
+ }
734
+
735
+ /**
736
+ * Get Terraform plugin cache directory
737
+ * @private
738
+ */
739
+ _getTerraformPluginDir() {
740
+ const dir = path.join(os.homedir(), '.terraform.d');
741
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
742
+ return dir;
743
+ }
744
+
745
+ /**
746
+ * Get gh CLI config directory (for PR creation)
747
+ * @private
748
+ */
749
+ _getGhConfigDir() {
750
+ return path.join(os.homedir(), '.config', 'gh');
751
+ }
752
+
753
+ /**
754
+ * Get git config file path (for commit identity)
755
+ * @private
756
+ */
757
+ _getGitConfigPath() {
758
+ return path.join(os.homedir(), '.gitconfig');
759
+ }
760
+
761
+ /**
762
+ * Get host's docker group GID (for Docker socket access inside container)
763
+ * @private
764
+ * @returns {string} Docker group GID
765
+ */
766
+ _getDockerGid() {
767
+ try {
768
+ // Get docker group info: "docker:x:999:user1,user2"
769
+ const result = execSync('getent group docker', { encoding: 'utf8' });
770
+ const gid = result.split(':')[2];
771
+ return gid.trim();
772
+ } catch {
773
+ // Fallback: common docker GID is 999
774
+ console.warn('[IsolationManager] Could not detect docker GID, using default 999');
775
+ return '999';
776
+ }
777
+ }
778
+
779
+ /**
780
+ * Check if a container is running
781
+ * @private
782
+ */
783
+ _isContainerRunning(containerId) {
784
+ try {
785
+ const result = execSync(`docker inspect -f '{{.State.Running}}' ${containerId} 2>/dev/null`, {
786
+ encoding: 'utf8',
787
+ });
788
+ return result.trim() === 'true';
789
+ } catch {
790
+ return false;
791
+ }
792
+ }
793
+
794
+ /**
795
+ * Remove container by name (cleanup before create)
796
+ * @private
797
+ */
798
+ _removeContainerByName(name) {
799
+ try {
800
+ execSync(`docker rm -f ${name} 2>/dev/null`, { encoding: 'utf8' });
801
+ } catch {
802
+ // Ignore - container doesn't exist
803
+ }
804
+ }
805
+
806
+ /**
807
+ * Check if Docker is available
808
+ * @returns {boolean}
809
+ */
810
+ static isDockerAvailable() {
811
+ try {
812
+ execSync('docker --version', { encoding: 'utf8', stdio: 'pipe' });
813
+ return true;
814
+ } catch {
815
+ return false;
816
+ }
817
+ }
818
+
819
+ /**
820
+ * Check if the base image exists
821
+ * @param {string} [image] - Image name to check
822
+ * @returns {boolean}
823
+ */
824
+ static imageExists(image = DEFAULT_IMAGE) {
825
+ try {
826
+ execSync(`docker image inspect ${image} 2>/dev/null`, {
827
+ encoding: 'utf8',
828
+ stdio: 'pipe',
829
+ });
830
+ return true;
831
+ } catch {
832
+ return false;
833
+ }
834
+ }
835
+
836
+ /**
837
+ * Build the Docker image with retry logic
838
+ * @param {string} [image] - Image name to build
839
+ * @param {number} [maxRetries=3] - Maximum retry attempts
840
+ * @returns {Promise<void>}
841
+ */
842
+ static async buildImage(image = DEFAULT_IMAGE, maxRetries = 3) {
843
+ const dockerfilePath = path.join(__dirname, '..', 'docker', 'zeroshot-cluster');
844
+
845
+ if (!fs.existsSync(path.join(dockerfilePath, 'Dockerfile'))) {
846
+ throw new Error(`Dockerfile not found at ${dockerfilePath}/Dockerfile`);
847
+ }
848
+
849
+ console.log(`[IsolationManager] Building Docker image '${image}'...`);
850
+
851
+ const baseDelay = 3000; // 3 seconds
852
+
853
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
854
+ try {
855
+ // Use execSync with stdio: 'inherit' to stream output in real-time
856
+ execSync(`docker build -t ${image} .`, {
857
+ cwd: dockerfilePath,
858
+ encoding: 'utf8',
859
+ stdio: 'inherit',
860
+ });
861
+
862
+ console.log(`[IsolationManager] ✓ Image '${image}' built successfully`);
863
+ return;
864
+ } catch (err) {
865
+ if (attempt < maxRetries) {
866
+ const delay = baseDelay * Math.pow(2, attempt - 1);
867
+ console.warn(
868
+ `[IsolationManager] ⚠️ Docker build failed (attempt ${attempt}/${maxRetries}), retrying in ${delay}ms...`
869
+ );
870
+ console.warn(`[IsolationManager] Error: ${err.message}`);
871
+ await new Promise((resolve) => setTimeout(resolve, delay));
872
+ } else {
873
+ throw new Error(
874
+ `Failed to build Docker image '${image}' after ${maxRetries} attempts: ${err.message}`
875
+ );
876
+ }
877
+ }
878
+ }
879
+ }
880
+
881
+ /**
882
+ * Ensure Docker image exists, building it if necessary
883
+ * @param {string} [image] - Image name to ensure
884
+ * @param {boolean} [autoBuild=true] - Auto-build if missing
885
+ * @returns {Promise<void>}
886
+ */
887
+ static async ensureImage(image = DEFAULT_IMAGE, autoBuild = true) {
888
+ if (this.imageExists(image)) {
889
+ return;
890
+ }
891
+
892
+ if (!autoBuild) {
893
+ throw new Error(
894
+ `Docker image '${image}' not found. Build it with:\n` +
895
+ ` docker build -t ${image} zeroshot/cluster/docker/zeroshot-cluster/`
896
+ );
897
+ }
898
+
899
+ console.log(`[IsolationManager] Image '${image}' not found, building automatically...`);
900
+ await this.buildImage(image);
901
+ }
902
+
903
+ /**
904
+ * Check if directory is a git repository
905
+ * @private
906
+ */
907
+ _isGitRepo(dir) {
908
+ try {
909
+ execSync('git rev-parse --git-dir', {
910
+ cwd: dir,
911
+ encoding: 'utf8',
912
+ stdio: 'pipe',
913
+ });
914
+ return true;
915
+ } catch {
916
+ return false;
917
+ }
918
+ }
919
+
920
+ /**
921
+ * Get the git repository root for a directory
922
+ * @private
923
+ */
924
+ _getGitRoot(dir) {
925
+ try {
926
+ return execSync('git rev-parse --show-toplevel', {
927
+ cwd: dir,
928
+ encoding: 'utf8',
929
+ stdio: 'pipe',
930
+ }).trim();
931
+ } catch {
932
+ return null;
933
+ }
934
+ }
935
+
936
+ /**
937
+ * Create a git worktree for isolated work
938
+ * @private
939
+ * @param {string} clusterId - Cluster ID (used as branch name)
940
+ * @param {string} workDir - Original working directory
941
+ * @returns {{ path: string, branch: string, repoRoot: string }}
942
+ */
943
+ _createWorktree(clusterId, workDir) {
944
+ const repoRoot = this._getGitRoot(workDir);
945
+ if (!repoRoot) {
946
+ throw new Error(`Cannot find git root for ${workDir}`);
947
+ }
948
+
949
+ // Create branch name from cluster ID (e.g., cluster-cosmic-meteor-87 -> zeroshot/cosmic-meteor-87)
950
+ const branchName = `zeroshot/${clusterId.replace(/^cluster-/, '')}`;
951
+
952
+ // Worktree path in tmp
953
+ const worktreePath = path.join(os.tmpdir(), 'zeroshot-worktrees', clusterId);
954
+
955
+ // Ensure parent directory exists
956
+ const parentDir = path.dirname(worktreePath);
957
+ if (!fs.existsSync(parentDir)) {
958
+ fs.mkdirSync(parentDir, { recursive: true });
959
+ }
960
+
961
+ // Remove existing worktree if it exists (cleanup from previous run)
962
+ try {
963
+ execSync(`git worktree remove --force "${worktreePath}" 2>/dev/null`, {
964
+ cwd: repoRoot,
965
+ encoding: 'utf8',
966
+ stdio: 'pipe',
967
+ });
968
+ } catch {
969
+ // Ignore - worktree doesn't exist
970
+ }
971
+
972
+ // Delete the branch if it exists (from previous run)
973
+ try {
974
+ execSync(`git branch -D "${branchName}" 2>/dev/null`, {
975
+ cwd: repoRoot,
976
+ encoding: 'utf8',
977
+ stdio: 'pipe',
978
+ });
979
+ } catch {
980
+ // Ignore - branch doesn't exist
981
+ }
982
+
983
+ // Create worktree with new branch based on HEAD
984
+ execSync(`git worktree add -b "${branchName}" "${worktreePath}" HEAD`, {
985
+ cwd: repoRoot,
986
+ encoding: 'utf8',
987
+ stdio: 'pipe',
988
+ });
989
+
990
+ return {
991
+ path: worktreePath,
992
+ branch: branchName,
993
+ repoRoot,
994
+ };
995
+ }
996
+
997
+ /**
998
+ * Remove a git worktree
999
+ * @private
1000
+ * @param {{ path: string, branch: string, repoRoot: string }} worktreeInfo
1001
+ */
1002
+ _removeWorktree(worktreeInfo) {
1003
+ try {
1004
+ // Remove the worktree
1005
+ execSync(`git worktree remove --force "${worktreeInfo.path}" 2>/dev/null`, {
1006
+ cwd: worktreeInfo.repoRoot,
1007
+ encoding: 'utf8',
1008
+ stdio: 'pipe',
1009
+ });
1010
+ } catch {
1011
+ // Fallback: manually remove directory if worktree command fails
1012
+ try {
1013
+ fs.rmSync(worktreeInfo.path, { recursive: true, force: true });
1014
+ } catch {
1015
+ // Ignore
1016
+ }
1017
+ }
1018
+
1019
+ // Optionally delete the branch (only if not merged)
1020
+ // We leave this commented out - let the user decide to keep/delete branches
1021
+ // try {
1022
+ // execSync(`git branch -D "${worktreeInfo.branch}" 2>/dev/null`, {
1023
+ // cwd: worktreeInfo.repoRoot,
1024
+ // encoding: 'utf8',
1025
+ // stdio: 'pipe'
1026
+ // });
1027
+ // } catch {
1028
+ // // Ignore - branch may have been merged or deleted
1029
+ // }
1030
+ }
1031
+
1032
+ /**
1033
+ * Get worktree info for a cluster
1034
+ * @param {string} clusterId - Cluster ID
1035
+ * @returns {{ path: string, branch: string, repoRoot: string }|undefined}
1036
+ */
1037
+ getWorktreeInfo(clusterId) {
1038
+ return this.worktrees.get(clusterId);
1039
+ }
1040
+ }
1041
+
1042
+ module.exports = IsolationManager;