@covibes/zeroshot 2.1.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -120,6 +120,9 @@ function extractErrorContext({ output, statusOutput, taskId, isNotFound = false
120
120
  // Track if we've already ensured the AskUserQuestion hook is installed
121
121
  let askUserQuestionHookInstalled = false;
122
122
 
123
+ // Track if we've already ensured the dangerous git hook is installed
124
+ let dangerousGitHookInstalled = false;
125
+
123
126
  /**
124
127
  * Extract token usage from NDJSON output.
125
128
  * Looks for the 'result' event line which contains usage data.
@@ -236,6 +239,85 @@ function ensureAskUserQuestionHook() {
236
239
  askUserQuestionHookInstalled = true;
237
240
  }
238
241
 
242
+ /**
243
+ * Ensure the dangerous git blocking hook is installed in user's Claude config.
244
+ * This blocks dangerous git commands like stash, checkout --, reset --hard, etc.
245
+ * Modifies ~/.claude/settings.json and copies hook script to ~/.claude/hooks/
246
+ *
247
+ * Only used in worktree mode - Docker isolation mode has its own git-safe.sh wrapper.
248
+ * Safe to call multiple times - only modifies config once per process.
249
+ */
250
+ function ensureDangerousGitHook() {
251
+ if (dangerousGitHookInstalled) {
252
+ return; // Already installed this session
253
+ }
254
+
255
+ const userClaudeDir = process.env.CLAUDE_CONFIG_DIR || path.join(os.homedir(), '.claude');
256
+ const hooksDir = path.join(userClaudeDir, 'hooks');
257
+ const settingsPath = path.join(userClaudeDir, 'settings.json');
258
+ const hookScriptName = 'block-dangerous-git.py';
259
+ const hookScriptDst = path.join(hooksDir, hookScriptName);
260
+
261
+ // Ensure hooks directory exists
262
+ if (!fs.existsSync(hooksDir)) {
263
+ fs.mkdirSync(hooksDir, { recursive: true });
264
+ }
265
+
266
+ // Copy hook script if not present or outdated
267
+ const hookScriptSrc = path.join(__dirname, '..', '..', 'hooks', hookScriptName);
268
+ if (fs.existsSync(hookScriptSrc)) {
269
+ // Always copy to ensure latest version
270
+ fs.copyFileSync(hookScriptSrc, hookScriptDst);
271
+ fs.chmodSync(hookScriptDst, 0o755);
272
+ }
273
+
274
+ // Read existing settings or create new
275
+ let settings = {};
276
+ if (fs.existsSync(settingsPath)) {
277
+ try {
278
+ settings = JSON.parse(fs.readFileSync(settingsPath, 'utf8'));
279
+ } catch (e) {
280
+ console.warn(`[AgentTaskExecutor] Could not parse settings.json, creating new: ${e.message}`);
281
+ settings = {};
282
+ }
283
+ }
284
+
285
+ // Ensure hooks structure exists
286
+ if (!settings.hooks) {
287
+ settings.hooks = {};
288
+ }
289
+ if (!settings.hooks.PreToolUse) {
290
+ settings.hooks.PreToolUse = [];
291
+ }
292
+
293
+ // Check if dangerous git hook already exists
294
+ const hasHook = settings.hooks.PreToolUse.some(
295
+ (entry) =>
296
+ entry.matcher === 'Bash' &&
297
+ entry.hooks &&
298
+ entry.hooks.some((h) => h.command && h.command.includes(hookScriptName))
299
+ );
300
+
301
+ if (!hasHook) {
302
+ // Add the hook - matches Bash tool to check for dangerous git commands
303
+ settings.hooks.PreToolUse.push({
304
+ matcher: 'Bash',
305
+ hooks: [
306
+ {
307
+ type: 'command',
308
+ command: hookScriptDst,
309
+ },
310
+ ],
311
+ });
312
+
313
+ // Write updated settings
314
+ fs.writeFileSync(settingsPath, JSON.stringify(settings, null, 2));
315
+ console.log(`[AgentTaskExecutor] Installed dangerous git blocking hook in ${settingsPath}`);
316
+ }
317
+
318
+ dangerousGitHookInstalled = true;
319
+ }
320
+
239
321
  /**
240
322
  * Spawn claude-zeroshots process and stream output via message bus
241
323
  * @param {Object} agent - Agent instance
@@ -322,16 +404,30 @@ async function spawnClaudeTask(agent, context) {
322
404
  // DO NOT override CLAUDE_CONFIG_DIR - it breaks authentication on Claude CLI 2.x
323
405
  ensureAskUserQuestionHook();
324
406
 
407
+ // WORKTREE MODE: Install git safety hook (blocks dangerous git commands)
408
+ if (agent.worktree?.enabled) {
409
+ ensureDangerousGitHook();
410
+ }
411
+
412
+ // Build environment for spawn
413
+ const spawnEnv = {
414
+ ...process.env,
415
+ ANTHROPIC_MODEL: agent._selectModel(),
416
+ // Activate AskUserQuestion blocking hook (see hooks/block-ask-user-question.py)
417
+ ZEROSHOT_BLOCK_ASK_USER: '1',
418
+ };
419
+
420
+ // WORKTREE MODE: Activate git safety hook via environment variable
421
+ // The hook only activates when ZEROSHOT_WORKTREE=1 is set
422
+ if (agent.worktree?.enabled) {
423
+ spawnEnv.ZEROSHOT_WORKTREE = '1';
424
+ }
425
+
325
426
  const taskId = await new Promise((resolve, reject) => {
326
427
  const proc = spawn(ctPath, args, {
327
428
  cwd,
328
429
  stdio: ['ignore', 'pipe', 'pipe'],
329
- env: {
330
- ...process.env,
331
- ANTHROPIC_MODEL: agent._selectModel(),
332
- // Activate AskUserQuestion blocking hook (see hooks/block-ask-user-question.py)
333
- ZEROSHOT_BLOCK_ASK_USER: '1',
334
- },
430
+ env: spawnEnv,
335
431
  });
336
432
  // Track PID for resource monitoring
337
433
  agent.processPid = proc.pid;
@@ -865,6 +961,21 @@ async function spawnClaudeTaskIsolated(agent, context) {
865
961
  * @returns {Promise<Object>} Result object
866
962
  * @private
867
963
  */
964
+ /**
965
+ * Follow Claude task logs in isolated container using persistent tail -f stream
966
+ * Issue #23: Persistent log streaming instead of polling (10-20% latency reduction)
967
+ *
968
+ * OLD APPROACH (removed):
969
+ * - Polled every 500ms with 2-3 docker exec calls per poll
970
+ * - Each docker exec = ~100-200ms overhead
971
+ * - Total: 300-400ms latency per poll cycle
972
+ *
973
+ * NEW APPROACH:
974
+ * - Single persistent `tail -f` stream via spawnInContainer()
975
+ * - Lines arrive in real-time as they're written
976
+ * - Status checks reduced to every 2 seconds (not every poll)
977
+ * - Result: 10-20% overall latency reduction
978
+ */
868
979
  function followClaudeTaskLogsIsolated(agent, taskId) {
869
980
  const { isolation } = agent;
870
981
  if (!isolation?.manager) {
@@ -876,16 +987,67 @@ function followClaudeTaskLogsIsolated(agent, taskId) {
876
987
 
877
988
  return new Promise((resolve, reject) => {
878
989
  let taskExited = false;
879
- let lastSize = 0;
880
990
  let fullOutput = '';
881
- let pollInterval = null;
991
+ let tailProcess = null;
992
+ let statusCheckInterval = null;
993
+ let lineBuffer = '';
882
994
 
883
- // Cleanup function
995
+ // Cleanup function - kill tail process and clear intervals
884
996
  const cleanup = () => {
885
- if (pollInterval) {
886
- clearInterval(pollInterval);
887
- pollInterval = null;
997
+ if (tailProcess) {
998
+ try {
999
+ tailProcess.kill('SIGTERM');
1000
+ } catch {
1001
+ // Ignore - process may already be dead
1002
+ }
1003
+ tailProcess = null;
888
1004
  }
1005
+ if (statusCheckInterval) {
1006
+ clearInterval(statusCheckInterval);
1007
+ statusCheckInterval = null;
1008
+ }
1009
+ };
1010
+
1011
+ // Broadcast line helper (same as non-isolated mode)
1012
+ const broadcastLine = (line) => {
1013
+ const timestampMatch = line.match(/^\[(\d{4}-\d{2}-\d{2}T[^\]]+)\]\s*(.*)$/);
1014
+ const timestamp = timestampMatch
1015
+ ? new Date(timestampMatch[1]).getTime()
1016
+ : Date.now();
1017
+ const content = timestampMatch ? timestampMatch[2] : line;
1018
+
1019
+ agent.messageBus.publish({
1020
+ cluster_id: agent.cluster.id,
1021
+ topic: 'AGENT_OUTPUT',
1022
+ sender: agent.id,
1023
+ content: {
1024
+ data: {
1025
+ line: content,
1026
+ taskId,
1027
+ iteration: agent.iteration,
1028
+ },
1029
+ },
1030
+ timestamp,
1031
+ });
1032
+
1033
+ // Update last output time for liveness tracking
1034
+ agent.lastOutputTime = Date.now();
1035
+ };
1036
+
1037
+ // Process new content by splitting into complete lines
1038
+ const processNewContent = (content) => {
1039
+ lineBuffer += content;
1040
+ const lines = lineBuffer.split('\n');
1041
+
1042
+ // Process all complete lines (all except last, which might be incomplete)
1043
+ for (let i = 0; i < lines.length - 1; i++) {
1044
+ if (lines[i].trim()) {
1045
+ broadcastLine(lines[i]);
1046
+ }
1047
+ }
1048
+
1049
+ // Keep last line in buffer (might be incomplete)
1050
+ lineBuffer = lines[lines.length - 1];
889
1051
  };
890
1052
 
891
1053
  // Get log file path from zeroshot CLI inside container
@@ -907,98 +1069,81 @@ function followClaudeTaskLogsIsolated(agent, taskId) {
907
1069
  return reject(new Error(`Empty log path returned for ${taskId}`));
908
1070
  }
909
1071
 
910
- agent._log(`[${agent.id}] Following isolated task logs: ${logFilePath}`);
911
-
912
- // Broadcast line helper (same as non-isolated mode)
913
- const broadcastLine = (line) => {
914
- const timestampMatch = line.match(/^\[(\d{4}-\d{2}-\d{2}T[^\]]+)\]\s*(.*)$/);
915
- const timestamp = timestampMatch
916
- ? new Date(timestampMatch[1]).getTime()
917
- : Date.now();
918
- const content = timestampMatch ? timestampMatch[2] : line;
919
-
920
- agent.messageBus.publish({
921
- cluster_id: agent.cluster.id,
922
- topic: 'AGENT_OUTPUT',
923
- sender: agent.id,
924
- content: {
925
- data: {
926
- line: content,
927
- taskId,
928
- iteration: agent.iteration,
929
- },
930
- },
931
- timestamp,
932
- });
933
-
934
- // Update last output time for liveness tracking
935
- agent.lastOutputTime = Date.now();
936
- };
937
-
938
- // Poll log file inside container (check every 500ms)
939
- pollInterval = setInterval(async () => {
940
- try {
941
- // Get file size inside container
942
- const sizeResult = await manager.execInContainer(clusterId, [
943
- 'sh',
944
- '-c',
945
- `stat -c %s "${logFilePath}" 2>/dev/null || echo 0`,
946
- ]);
947
-
948
- const currentSize = parseInt(sizeResult.stdout.trim()) || 0;
1072
+ agent._log(`[${agent.id}] Following isolated task logs (streaming): ${logFilePath}`);
1073
+
1074
+ // Start persistent tail -f stream
1075
+ // Uses spawnInContainer() which creates a single docker exec process
1076
+ // that streams output in real-time (no polling overhead)
1077
+ tailProcess = manager.spawnInContainer(clusterId, [
1078
+ 'sh',
1079
+ '-c',
1080
+ // Wait for file to exist, then tail -f from beginning
1081
+ // The -F flag handles file recreation (rotation)
1082
+ `while [ ! -f "${logFilePath}" ]; do sleep 0.1; done; tail -F -n +1 "${logFilePath}"`,
1083
+ ]);
1084
+
1085
+ // Stream stdout directly - lines arrive as they're written
1086
+ tailProcess.stdout.on('data', (data) => {
1087
+ const chunk = data.toString();
1088
+ fullOutput += chunk;
1089
+ processNewContent(chunk);
1090
+ });
949
1091
 
950
- // Read new content if file grew
951
- if (currentSize > lastSize) {
952
- const bytesToRead = currentSize - lastSize;
953
- const readResult = await manager.execInContainer(clusterId, [
954
- 'sh',
955
- '-c',
956
- `tail -c ${bytesToRead} "${logFilePath}"`,
957
- ]);
1092
+ // Log stderr but don't fail (tail might emit warnings)
1093
+ tailProcess.stderr.on('data', (data) => {
1094
+ const msg = data.toString().trim();
1095
+ if (msg && !msg.includes('file truncated')) {
1096
+ agent._log(`[${agent.id}] tail stderr: ${msg}`);
1097
+ }
1098
+ });
958
1099
 
959
- if (readResult.code === 0 && readResult.stdout) {
960
- fullOutput += readResult.stdout;
1100
+ // Handle tail process exit (shouldn't happen unless killed)
1101
+ tailProcess.on('close', (exitCode) => {
1102
+ if (!taskExited) {
1103
+ agent._log(`[${agent.id}] tail process exited with code ${exitCode}`);
1104
+ }
1105
+ });
961
1106
 
962
- // Split by newlines and broadcast each complete line
963
- const lines = readResult.stdout.split('\n');
964
- for (let i = 0; i < lines.length - 1; i++) {
965
- if (lines[i].trim()) {
966
- broadcastLine(lines[i]);
967
- }
968
- }
969
- }
1107
+ tailProcess.on('error', (err) => {
1108
+ agent._log(`[${agent.id}] tail process error: ${err.message}`);
1109
+ });
970
1110
 
971
- lastSize = currentSize;
972
- }
1111
+ // Check task status periodically (every 2 seconds - much less frequent than polling)
1112
+ // This is the only remaining docker exec - but now at 2s intervals instead of 500ms
1113
+ statusCheckInterval = setInterval(async () => {
1114
+ if (taskExited) return;
973
1115
 
974
- // Check if task exited (query zeroshot status inside container)
1116
+ try {
975
1117
  const statusResult = await manager.execInContainer(clusterId, [
976
1118
  'sh',
977
1119
  '-c',
978
1120
  `zeroshot status ${taskId} 2>/dev/null || echo "not_found"`,
979
1121
  ]);
980
1122
 
981
- // Use same regex patterns as non-isolated mode (lines 649-650)
982
- // CRITICAL: Don't use substring matching - it matches "error" in "is_error":false
983
1123
  const statusOutput = statusResult.stdout;
984
1124
  const isSuccess = /Status:\s+completed/i.test(statusOutput);
985
1125
  const isError = /Status:\s+failed/i.test(statusOutput);
986
1126
  const isNotFound = statusOutput.includes('not_found');
987
1127
 
988
1128
  if (isSuccess || isError || isNotFound) {
989
- // Task finished - read final output and resolve
1129
+ taskExited = true;
1130
+
1131
+ // Give tail a moment to flush remaining output
1132
+ await new Promise((r) => setTimeout(r, 200));
1133
+
1134
+ // Read final output to ensure we have everything
990
1135
  const finalReadResult = await manager.execInContainer(clusterId, [
991
1136
  'sh',
992
1137
  '-c',
993
- `cat "${logFilePath}"`,
1138
+ `cat "${logFilePath}" 2>/dev/null || echo ""`,
994
1139
  ]);
995
1140
 
996
- if (finalReadResult.code === 0) {
1141
+ if (finalReadResult.code === 0 && finalReadResult.stdout) {
997
1142
  fullOutput = finalReadResult.stdout;
998
1143
 
999
- // Broadcast any final lines we haven't seen
1000
- const finalLines = fullOutput.split('\n');
1001
- for (const line of finalLines) {
1144
+ // Process any remaining content
1145
+ const remainingLines = fullOutput.split('\n');
1146
+ for (const line of remainingLines) {
1002
1147
  if (line.trim()) {
1003
1148
  broadcastLine(line);
1004
1149
  }
@@ -1006,7 +1151,6 @@ function followClaudeTaskLogsIsolated(agent, taskId) {
1006
1151
  }
1007
1152
 
1008
1153
  cleanup();
1009
- taskExited = true;
1010
1154
 
1011
1155
  // Determine success status
1012
1156
  const success = isSuccess && !isError;
@@ -1028,11 +1172,11 @@ function followClaudeTaskLogsIsolated(agent, taskId) {
1028
1172
  tokenUsage: extractTokenUsage(fullOutput),
1029
1173
  });
1030
1174
  }
1031
- } catch (pollErr) {
1032
- // Log error but continue polling (file might not exist yet)
1033
- agent._log(`[${agent.id}] Poll error (will retry): ${pollErr.message}`);
1175
+ } catch (statusErr) {
1176
+ // Log error but continue checking (transient failures are common)
1177
+ agent._log(`[${agent.id}] Status check error (will retry): ${statusErr.message}`);
1034
1178
  }
1035
- }, 500);
1179
+ }, 2000); // Check every 2 seconds (was 500ms in polling mode)
1036
1180
 
1037
1181
  // Safety timeout (0 = no timeout, task runs until completion)
1038
1182
  if (agent.timeout > 0) {
@@ -12,6 +12,7 @@
12
12
 
13
13
  const LogicEngine = require('./logic-engine');
14
14
  const { validateAgentConfig } = require('./agent/agent-config');
15
+ const { loadSettings, validateModelAgainstMax } = require('../lib/settings');
15
16
  const { buildContext } = require('./agent/agent-context-builder');
16
17
  const { findMatchingTrigger, evaluateTrigger } = require('./agent/agent-trigger-evaluator');
17
18
  const { executeHook } = require('./agent/agent-hook-executor');
@@ -79,12 +80,30 @@ class AgentWrapper {
79
80
 
80
81
  // MOCK SUPPORT - Inject mock spawn function for testing
81
82
  // When set, _spawnClaudeTask uses this instead of real ct CLI
82
- this.mockSpawnFn = options.mockSpawnFn || null;
83
+ // Priority: options.mockSpawnFn (legacy) > options.taskRunner (new DI pattern)
84
+ if (options.mockSpawnFn) {
85
+ this.mockSpawnFn = options.mockSpawnFn;
86
+ } else if (options.taskRunner) {
87
+ // TaskRunner DI - create mockSpawnFn wrapper
88
+ const taskRunner = options.taskRunner;
89
+ this.mockSpawnFn = (args, { context }) => {
90
+ return taskRunner.run(context, {
91
+ agentId: this.id,
92
+ model: this._selectModel(),
93
+ });
94
+ };
95
+ } else {
96
+ this.mockSpawnFn = null;
97
+ }
98
+
83
99
  this.testMode = options.testMode || false;
84
100
  this.quiet = options.quiet || false;
85
101
 
86
102
  // ISOLATION SUPPORT - Run tasks inside Docker container
87
103
  this.isolation = options.isolation || null;
104
+
105
+ // WORKTREE SUPPORT - Run tasks in git worktree (lightweight isolation without Docker)
106
+ this.worktree = options.worktree || null;
88
107
  }
89
108
 
90
109
  /**
@@ -134,27 +153,44 @@ class AgentWrapper {
134
153
 
135
154
  /**
136
155
  * Select model based on current iteration and agent config
156
+ * Enforces maxModel ceiling from settings
137
157
  * @returns {string} Model name ('sonnet', 'opus', 'haiku')
138
158
  * @private
139
159
  */
140
160
  _selectModel() {
141
- // Backward compatibility: static model
161
+ const settings = loadSettings();
162
+ const maxModel = settings.maxModel || 'sonnet';
163
+
164
+ let requestedModel = null;
165
+
166
+ // Get requested model from config
142
167
  if (this.modelConfig.type === 'static') {
143
- return this.modelConfig.model;
144
- }
168
+ requestedModel = this.modelConfig.model;
169
+ } else if (this.modelConfig.type === 'rules') {
170
+ // Dynamic rules: evaluate based on iteration
171
+ for (const rule of this.modelConfig.rules) {
172
+ if (this._matchesIterationRange(rule.iterations)) {
173
+ requestedModel = rule.model;
174
+ break;
175
+ }
176
+ }
145
177
 
146
- // Dynamic rules: evaluate based on iteration
147
- for (const rule of this.modelConfig.rules) {
148
- if (this._matchesIterationRange(rule.iterations)) {
149
- return rule.model;
178
+ // No match for rules: fail fast (config error)
179
+ if (!requestedModel) {
180
+ throw new Error(
181
+ `Agent ${this.id}: No model rule matched iteration ${this.iteration}. ` +
182
+ `Add a catch-all rule like { "iterations": "all", "model": "sonnet" }`
183
+ );
150
184
  }
151
185
  }
152
186
 
153
- // No match: fail fast
154
- throw new Error(
155
- `Agent ${this.id}: No model rule matched iteration ${this.iteration}. ` +
156
- `Add a catch-all rule like { "iterations": "all", "model": "sonnet" }`
157
- );
187
+ // If no model specified (neither static nor rules), use maxModel as default
188
+ if (!requestedModel) {
189
+ return maxModel;
190
+ }
191
+
192
+ // Enforce ceiling - will throw if requestedModel > maxModel
193
+ return validateModelAgainstMax(requestedModel, maxModel);
158
194
  }
159
195
 
160
196
  /**