npm - @covibes/zeroshot - Versions diffs - 2.0.0 → 3.0.0 - Mend

@covibes/zeroshot 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/CHANGELOG.md +82 -0
package/README.md +19 -8
package/cli/index.js +146 -111
package/cli/lib/first-run.js +11 -11
package/cli/lib/update-checker.js +2 -1
package/cluster-templates/base-templates/debug-workflow.json +75 -6
package/cluster-templates/conductor-bootstrap.json +3 -3
package/docker/zeroshot-cluster/Dockerfile +8 -1
package/docker/zeroshot-cluster/pre-baked-deps.json +28 -0
package/lib/settings.js +46 -4
package/package.json +1 -1
package/src/agent/agent-config.js +38 -3
package/src/agent/agent-task-executor.js +229 -85
package/src/agent-wrapper.js +49 -13
package/src/config-validator.js +198 -0
package/src/copy-worker.js +43 -0
package/src/isolation-manager.js +328 -108
package/src/orchestrator.js +93 -11
package/src/preflight.js +28 -2
package/src/process-metrics.js +16 -4
package/src/status-footer.js +151 -42

package/src/agent/agent-task-executor.js CHANGED Viewed

@@ -120,6 +120,9 @@ function extractErrorContext({ output, statusOutput, taskId, isNotFound = false
 // Track if we've already ensured the AskUserQuestion hook is installed
 let askUserQuestionHookInstalled = false;
+// Track if we've already ensured the dangerous git hook is installed
+let dangerousGitHookInstalled = false;
 /**
  * Extract token usage from NDJSON output.
  * Looks for the 'result' event line which contains usage data.
@@ -236,6 +239,85 @@ function ensureAskUserQuestionHook() {
   askUserQuestionHookInstalled = true;
 }
+/**
+ * Ensure the dangerous git blocking hook is installed in user's Claude config.
+ * This blocks dangerous git commands like stash, checkout --, reset --hard, etc.
+ * Modifies ~/.claude/settings.json and copies hook script to ~/.claude/hooks/
+ *
+ * Only used in worktree mode - Docker isolation mode has its own git-safe.sh wrapper.
+ * Safe to call multiple times - only modifies config once per process.
+ */
+function ensureDangerousGitHook() {
+  if (dangerousGitHookInstalled) {
+    return; // Already installed this session
+  }
+  const userClaudeDir = process.env.CLAUDE_CONFIG_DIR || path.join(os.homedir(), '.claude');
+  const hooksDir = path.join(userClaudeDir, 'hooks');
+  const settingsPath = path.join(userClaudeDir, 'settings.json');
+  const hookScriptName = 'block-dangerous-git.py';
+  const hookScriptDst = path.join(hooksDir, hookScriptName);
+  // Ensure hooks directory exists
+  if (!fs.existsSync(hooksDir)) {
+    fs.mkdirSync(hooksDir, { recursive: true });
+  }
+  // Copy hook script if not present or outdated
+  const hookScriptSrc = path.join(__dirname, '..', '..', 'hooks', hookScriptName);
+  if (fs.existsSync(hookScriptSrc)) {
+    // Always copy to ensure latest version
+    fs.copyFileSync(hookScriptSrc, hookScriptDst);
+    fs.chmodSync(hookScriptDst, 0o755);
+  }
+  // Read existing settings or create new
+  let settings = {};
+  if (fs.existsSync(settingsPath)) {
+    try {
+      settings = JSON.parse(fs.readFileSync(settingsPath, 'utf8'));
+    } catch (e) {
+      console.warn(`[AgentTaskExecutor] Could not parse settings.json, creating new: ${e.message}`);
+      settings = {};
+    }
+  }
+  // Ensure hooks structure exists
+  if (!settings.hooks) {
+    settings.hooks = {};
+  }
+  if (!settings.hooks.PreToolUse) {
+    settings.hooks.PreToolUse = [];
+  }
+  // Check if dangerous git hook already exists
+  const hasHook = settings.hooks.PreToolUse.some(
+    (entry) =>
+      entry.matcher === 'Bash' &&
+      entry.hooks &&
+      entry.hooks.some((h) => h.command && h.command.includes(hookScriptName))
+  );
+  if (!hasHook) {
+    // Add the hook - matches Bash tool to check for dangerous git commands
+    settings.hooks.PreToolUse.push({
+      matcher: 'Bash',
+      hooks: [
+        {
+          type: 'command',
+          command: hookScriptDst,
+        },
+      ],
+    });
+    // Write updated settings
+    fs.writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
+    console.log(`[AgentTaskExecutor] Installed dangerous git blocking hook in ${settingsPath}`);
+  }
+  dangerousGitHookInstalled = true;
+}
 /**
  * Spawn claude-zeroshots process and stream output via message bus
  * @param {Object} agent - Agent instance
@@ -322,16 +404,30 @@ async function spawnClaudeTask(agent, context) {
   // DO NOT override CLAUDE_CONFIG_DIR - it breaks authentication on Claude CLI 2.x
   ensureAskUserQuestionHook();
+  // WORKTREE MODE: Install git safety hook (blocks dangerous git commands)
+  if (agent.worktree?.enabled) {
+    ensureDangerousGitHook();
+  }
+  // Build environment for spawn
+  const spawnEnv = {
+    ...process.env,
+    ANTHROPIC_MODEL: agent._selectModel(),
+    // Activate AskUserQuestion blocking hook (see hooks/block-ask-user-question.py)
+    ZEROSHOT_BLOCK_ASK_USER: '1',
+  };
+  // WORKTREE MODE: Activate git safety hook via environment variable
+  // The hook only activates when ZEROSHOT_WORKTREE=1 is set
+  if (agent.worktree?.enabled) {
+    spawnEnv.ZEROSHOT_WORKTREE = '1';
+  }
   const taskId = await new Promise((resolve, reject) => {
     const proc = spawn(ctPath, args, {
       cwd,
       stdio: ['ignore', 'pipe', 'pipe'],
-      env: {
-        ...process.env,
-        ANTHROPIC_MODEL: agent._selectModel(),
-        // Activate AskUserQuestion blocking hook (see hooks/block-ask-user-question.py)
-        ZEROSHOT_BLOCK_ASK_USER: '1',
-      },
+      env: spawnEnv,
     });
     // Track PID for resource monitoring
     agent.processPid = proc.pid;
@@ -865,6 +961,21 @@ async function spawnClaudeTaskIsolated(agent, context) {
  * @returns {Promise<Object>} Result object
  * @private
  */
+/**
+ * Follow Claude task logs in isolated container using persistent tail -f stream
+ * Issue #23: Persistent log streaming instead of polling (10-20% latency reduction)
+ *
+ * OLD APPROACH (removed):
+ * - Polled every 500ms with 2-3 docker exec calls per poll
+ * - Each docker exec = ~100-200ms overhead
+ * - Total: 300-400ms latency per poll cycle
+ *
+ * NEW APPROACH:
+ * - Single persistent `tail -f` stream via spawnInContainer()
+ * - Lines arrive in real-time as they're written
+ * - Status checks reduced to every 2 seconds (not every poll)
+ * - Result: 10-20% overall latency reduction
+ */
 function followClaudeTaskLogsIsolated(agent, taskId) {
   const { isolation } = agent;
   if (!isolation?.manager) {
@@ -876,16 +987,67 @@ function followClaudeTaskLogsIsolated(agent, taskId) {
   return new Promise((resolve, reject) => {
     let taskExited = false;
-    let lastSize = 0;
     let fullOutput = '';
-    let pollInterval = null;
+    let tailProcess = null;
+    let statusCheckInterval = null;
+    let lineBuffer = '';
-    // Cleanup function
+    // Cleanup function - kill tail process and clear intervals
     const cleanup = () => {
-      if (pollInterval) {
-        clearInterval(pollInterval);
-        pollInterval = null;
+      if (tailProcess) {
+        try {
+          tailProcess.kill('SIGTERM');
+        } catch {
+          // Ignore - process may already be dead
+        }
+        tailProcess = null;
       }
+      if (statusCheckInterval) {
+        clearInterval(statusCheckInterval);
+        statusCheckInterval = null;
+      }
+    };
+    // Broadcast line helper (same as non-isolated mode)
+    const broadcastLine = (line) => {
+      const timestampMatch = line.match(/^\[(\d{4}-\d{2}-\d{2}T[^\]]+)\]\s*(.*)$/);
+      const timestamp = timestampMatch
+        ? new Date(timestampMatch[1]).getTime()
+        : Date.now();
+      const content = timestampMatch ? timestampMatch[2] : line;
+      agent.messageBus.publish({
+        cluster_id: agent.cluster.id,
+        topic: 'AGENT_OUTPUT',
+        sender: agent.id,
+        content: {
+          data: {
+            line: content,
+            taskId,
+            iteration: agent.iteration,
+          },
+        },
+        timestamp,
+      });
+      // Update last output time for liveness tracking
+      agent.lastOutputTime = Date.now();
+    };
+    // Process new content by splitting into complete lines
+    const processNewContent = (content) => {
+      lineBuffer += content;
+      const lines = lineBuffer.split('\n');
+      // Process all complete lines (all except last, which might be incomplete)
+      for (let i = 0; i < lines.length - 1; i++) {
+        if (lines[i].trim()) {
+          broadcastLine(lines[i]);
+        }
+      }
+      // Keep last line in buffer (might be incomplete)
+      lineBuffer = lines[lines.length - 1];
     };
     // Get log file path from zeroshot CLI inside container
@@ -907,98 +1069,81 @@ function followClaudeTaskLogsIsolated(agent, taskId) {
           return reject(new Error(`Empty log path returned for ${taskId}`));
         }
-        agent._log(`[${agent.id}] Following isolated task logs: ${logFilePath}`);
-        // Broadcast line helper (same as non-isolated mode)
-        const broadcastLine = (line) => {
-          const timestampMatch = line.match(/^\[(\d{4}-\d{2}-\d{2}T[^\]]+)\]\s*(.*)$/);
-          const timestamp = timestampMatch
-            ? new Date(timestampMatch[1]).getTime()
-            : Date.now();
-          const content = timestampMatch ? timestampMatch[2] : line;
-          agent.messageBus.publish({
-            cluster_id: agent.cluster.id,
-            topic: 'AGENT_OUTPUT',
-            sender: agent.id,
-            content: {
-              data: {
-                line: content,
-                taskId,
-                iteration: agent.iteration,
-              },
-            },
-            timestamp,
-          });
-          // Update last output time for liveness tracking
-          agent.lastOutputTime = Date.now();
-        };
-        // Poll log file inside container (check every 500ms)
-        pollInterval = setInterval(async () => {
-          try {
-            // Get file size inside container
-            const sizeResult = await manager.execInContainer(clusterId, [
-              'sh',
-              '-c',
-              `stat -c %s "${logFilePath}" 2>/dev/null || echo 0`,
-            ]);
-            const currentSize = parseInt(sizeResult.stdout.trim()) || 0;
+        agent._log(`[${agent.id}] Following isolated task logs (streaming): ${logFilePath}`);
+        // Start persistent tail -f stream
+        // Uses spawnInContainer() which creates a single docker exec process
+        // that streams output in real-time (no polling overhead)
+        tailProcess = manager.spawnInContainer(clusterId, [
+          'sh',
+          '-c',
+          // Wait for file to exist, then tail -f from beginning
+          // The -F flag handles file recreation (rotation)
+          `while [ ! -f "${logFilePath}" ]; do sleep 0.1; done; tail -F -n +1 "${logFilePath}"`,
+        ]);
+        // Stream stdout directly - lines arrive as they're written
+        tailProcess.stdout.on('data', (data) => {
+          const chunk = data.toString();
+          fullOutput += chunk;
+          processNewContent(chunk);
+        });
-            // Read new content if file grew
-            if (currentSize > lastSize) {
-              const bytesToRead = currentSize - lastSize;
-              const readResult = await manager.execInContainer(clusterId, [
-                'sh',
-                '-c',
-                `tail -c ${bytesToRead} "${logFilePath}"`,
-              ]);
+        // Log stderr but don't fail (tail might emit warnings)
+        tailProcess.stderr.on('data', (data) => {
+          const msg = data.toString().trim();
+          if (msg && !msg.includes('file truncated')) {
+            agent._log(`[${agent.id}] tail stderr: ${msg}`);
+          }
+        });
-              if (readResult.code === 0 && readResult.stdout) {
-                fullOutput += readResult.stdout;
+        // Handle tail process exit (shouldn't happen unless killed)
+        tailProcess.on('close', (exitCode) => {
+          if (!taskExited) {
+            agent._log(`[${agent.id}] tail process exited with code ${exitCode}`);
+          }
+        });
-                // Split by newlines and broadcast each complete line
-                const lines = readResult.stdout.split('\n');
-                for (let i = 0; i < lines.length - 1; i++) {
-                  if (lines[i].trim()) {
-                    broadcastLine(lines[i]);
-                  }
-                }
-              }
+        tailProcess.on('error', (err) => {
+          agent._log(`[${agent.id}] tail process error: ${err.message}`);
+        });
-              lastSize = currentSize;
-            }
+        // Check task status periodically (every 2 seconds - much less frequent than polling)
+        // This is the only remaining docker exec - but now at 2s intervals instead of 500ms
+        statusCheckInterval = setInterval(async () => {
+          if (taskExited) return;
-            // Check if task exited (query zeroshot status inside container)
+          try {
             const statusResult = await manager.execInContainer(clusterId, [
               'sh',
               '-c',
               `zeroshot status ${taskId} 2>/dev/null || echo "not_found"`,
             ]);
-            // Use same regex patterns as non-isolated mode (lines 649-650)
-            // CRITICAL: Don't use substring matching - it matches "error" in "is_error":false
             const statusOutput = statusResult.stdout;
             const isSuccess = /Status:\s+completed/i.test(statusOutput);
             const isError = /Status:\s+failed/i.test(statusOutput);
             const isNotFound = statusOutput.includes('not_found');
             if (isSuccess || isError || isNotFound) {
-              // Task finished - read final output and resolve
+              taskExited = true;
+              // Give tail a moment to flush remaining output
+              await new Promise((r) => setTimeout(r, 200));
+              // Read final output to ensure we have everything
               const finalReadResult = await manager.execInContainer(clusterId, [
                 'sh',
                 '-c',
-                `cat "${logFilePath}"`,
+                `cat "${logFilePath}" 2>/dev/null || echo ""`,
               ]);
-              if (finalReadResult.code === 0) {
+              if (finalReadResult.code === 0 && finalReadResult.stdout) {
                 fullOutput = finalReadResult.stdout;
-                // Broadcast any final lines we haven't seen
-                const finalLines = fullOutput.split('\n');
-                for (const line of finalLines) {
+                // Process any remaining content
+                const remainingLines = fullOutput.split('\n');
+                for (const line of remainingLines) {
                   if (line.trim()) {
                     broadcastLine(line);
                   }
@@ -1006,7 +1151,6 @@ function followClaudeTaskLogsIsolated(agent, taskId) {
               }
               cleanup();
-              taskExited = true;
               // Determine success status
               const success = isSuccess && !isError;
@@ -1028,11 +1172,11 @@ function followClaudeTaskLogsIsolated(agent, taskId) {
                 tokenUsage: extractTokenUsage(fullOutput),
               });
             }
-          } catch (pollErr) {
-            // Log error but continue polling (file might not exist yet)
-            agent._log(`[${agent.id}] Poll error (will retry): ${pollErr.message}`);
+          } catch (statusErr) {
+            // Log error but continue checking (transient failures are common)
+            agent._log(`[${agent.id}] Status check error (will retry): ${statusErr.message}`);
           }
-        }, 500);
+        }, 2000); // Check every 2 seconds (was 500ms in polling mode)
         // Safety timeout (0 = no timeout, task runs until completion)
         if (agent.timeout > 0) {

package/src/agent-wrapper.js CHANGED Viewed

@@ -12,6 +12,7 @@
 const LogicEngine = require('./logic-engine');
 const { validateAgentConfig } = require('./agent/agent-config');
+const { loadSettings, validateModelAgainstMax } = require('../lib/settings');
 const { buildContext } = require('./agent/agent-context-builder');
 const { findMatchingTrigger, evaluateTrigger } = require('./agent/agent-trigger-evaluator');
 const { executeHook } = require('./agent/agent-hook-executor');
@@ -79,12 +80,30 @@ class AgentWrapper {
     // MOCK SUPPORT - Inject mock spawn function for testing
     // When set, _spawnClaudeTask uses this instead of real ct CLI
-    this.mockSpawnFn = options.mockSpawnFn || null;
+    // Priority: options.mockSpawnFn (legacy) > options.taskRunner (new DI pattern)
+    if (options.mockSpawnFn) {
+      this.mockSpawnFn = options.mockSpawnFn;
+    } else if (options.taskRunner) {
+      // TaskRunner DI - create mockSpawnFn wrapper
+      const taskRunner = options.taskRunner;
+      this.mockSpawnFn = (args, { context }) => {
+        return taskRunner.run(context, {
+          agentId: this.id,
+          model: this._selectModel(),
+        });
+      };
+    } else {
+      this.mockSpawnFn = null;
+    }
     this.testMode = options.testMode || false;
     this.quiet = options.quiet || false;
     // ISOLATION SUPPORT - Run tasks inside Docker container
     this.isolation = options.isolation || null;
+    // WORKTREE SUPPORT - Run tasks in git worktree (lightweight isolation without Docker)
+    this.worktree = options.worktree || null;
   }
   /**
@@ -134,27 +153,44 @@ class AgentWrapper {
   /**
    * Select model based on current iteration and agent config
+   * Enforces maxModel ceiling from settings
    * @returns {string} Model name ('sonnet', 'opus', 'haiku')
    * @private
    */
   _selectModel() {
-    // Backward compatibility: static model
+    const settings = loadSettings();
+    const maxModel = settings.maxModel || 'sonnet';
+    let requestedModel = null;
+    // Get requested model from config
     if (this.modelConfig.type === 'static') {
-      return this.modelConfig.model;
-    }
+      requestedModel = this.modelConfig.model;
+    } else if (this.modelConfig.type === 'rules') {
+      // Dynamic rules: evaluate based on iteration
+      for (const rule of this.modelConfig.rules) {
+        if (this._matchesIterationRange(rule.iterations)) {
+          requestedModel = rule.model;
+          break;
+        }
+      }
-    // Dynamic rules: evaluate based on iteration
-    for (const rule of this.modelConfig.rules) {
-      if (this._matchesIterationRange(rule.iterations)) {
-        return rule.model;
+      // No match for rules: fail fast (config error)
+      if (!requestedModel) {
+        throw new Error(
+          `Agent ${this.id}: No model rule matched iteration ${this.iteration}. ` +
+            `Add a catch-all rule like { "iterations": "all", "model": "sonnet" }`
+        );
       }
     }
-    // No match: fail fast
-    throw new Error(
-      `Agent ${this.id}: No model rule matched iteration ${this.iteration}. ` +
-        `Add a catch-all rule like { "iterations": "all", "model": "sonnet" }`
-    );
+    // If no model specified (neither static nor rules), use maxModel as default
+    if (!requestedModel) {
+      return maxModel;
+    }
+    // Enforce ceiling - will throw if requestedModel > maxModel
+    return validateModelAgainstMax(requestedModel, maxModel);
   }
   /**