@link-assistant/hive-mind 1.35.10 → 1.35.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # @link-assistant/hive-mind
2
2
 
3
+ ## 1.35.11
4
+
5
+ ### Patch Changes
6
+
7
+ - 6edb401: fix: add stream startup timeout to detect stuck Claude CLI (Issue #1472/#1475)
8
+
9
+ Both affected sessions showed ~4.5 hours with zero stdout/stderr from Claude CLI despite a successful API response. Adds a configurable startup timeout (default: 2 minutes, env: HIVE_MIND_STREAM_STARTUP_MS) that force-kills the Claude CLI process if no output is received, preventing indefinite hangs and enabling retry logic.
10
+
3
11
  ## 1.35.10
4
12
 
5
13
  ### Patch Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/hive-mind",
3
- "version": "1.35.10",
3
+ "version": "1.35.11",
4
4
  "description": "AI-powered issue solver and hive mind for collaborative problem solving",
5
5
  "main": "src/hive.mjs",
6
6
  "type": "module",
@@ -6,7 +6,6 @@ if (typeof globalThis.use === 'undefined') {
6
6
  const { $ } = await use('command-stream');
7
7
  const fs = (await use('fs')).promises;
8
8
  const path = (await use('path')).default;
9
- // Import log from general lib
10
9
  import { log } from './lib.mjs';
11
10
  import { reportError } from './sentry.lib.mjs';
12
11
  import { timeouts, retryLimits, claudeCode, getClaudeEnv, getThinkingLevelToTokens, getTokensToThinkingLevel, supportsThinkingBudget, DEFAULT_MAX_THINKING_BUDGET, getMaxOutputTokensForModel } from './config.lib.mjs';
@@ -18,7 +17,6 @@ import { buildClaudeResumeCommand } from './claude.command-builder.lib.mjs';
18
17
  import { handleClaudeRuntimeSwitch } from './claude.runtime-switch.lib.mjs'; // see issue #1141
19
18
  import { CLAUDE_MODELS as availableModels } from './models/index.mjs'; // Issue #1221
20
19
  export { availableModels }; // Re-export for backward compatibility
21
- // Helper to display resume command at end of session
22
20
  const showResumeCommand = async (sessionId, tempDir, claudePath, model, log) => {
23
21
  if (!sessionId || !tempDir) return;
24
22
  const cmd = buildClaudeResumeCommand({ tempDir, sessionId, claudePath, model });
@@ -51,7 +49,6 @@ export const mapModelToId = model => {
51
49
  export const validateClaudeConnection = async (model = 'haiku') => {
52
50
  // Map model alias to full ID
53
51
  const mappedModel = mapModelToId(model);
54
- // Retry configuration for API overload errors
55
52
  const maxRetries = 3;
56
53
  const baseDelay = timeouts.retryBaseDelay;
57
54
  let retryCount = 0;
@@ -62,13 +59,11 @@ export const validateClaudeConnection = async (model = 'haiku') => {
62
59
  } else {
63
60
  await log(`🔄 Retry attempt ${retryCount}/${maxRetries} for Claude CLI validation...`);
64
61
  }
65
- // First try a quick validation approach
66
62
  try {
67
63
  const versionResult = await $`timeout ${Math.floor(timeouts.claudeCli / 6000)} claude --version`;
68
64
  if (versionResult.code === 0) {
69
65
  const version = versionResult.stdout?.toString().trim();
70
- // Store the version for thinking settings translation (issue #1146)
71
- detectedClaudeVersion = version;
66
+ detectedClaudeVersion = version; // issue #1146
72
67
  if (retryCount === 0) {
73
68
  await log(`📦 Claude CLI version: ${version}`);
74
69
  }
@@ -84,7 +79,6 @@ export const validateClaudeConnection = async (model = 'haiku') => {
84
79
  // Primary validation: use printf piping with specified model
85
80
  result = await $`printf hi | claude --model ${mappedModel} -p`;
86
81
  } catch (pipeError) {
87
- // If piping fails, fallback to the timeout approach as last resort
88
82
  await log(`⚠️ Pipe validation failed (${pipeError.code}), trying timeout approach...`);
89
83
  try {
90
84
  result = await $`timeout ${Math.floor(timeouts.claudeCli / 1000)} claude --model ${mappedModel} -p hi`;
@@ -99,17 +93,13 @@ export const validateClaudeConnection = async (model = 'haiku') => {
99
93
  });
100
94
  return false;
101
95
  }
102
- // Re-throw if it's not a timeout error
103
96
  throw timeoutError;
104
97
  }
105
98
  }
106
- // Check for common error patterns
107
99
  const stdout = result.stdout?.toString() || '';
108
100
  const stderr = result.stderr?.toString() || '';
109
- // Check for JSON errors in stdout or stderr
110
101
  const checkForJsonError = text => {
111
102
  try {
112
- // Look for JSON error patterns
113
103
  if (text.includes('"error"') && text.includes('"type"')) {
114
104
  const jsonMatch = text.match(/\{.*"error".*\}/);
115
105
  if (jsonMatch) {
@@ -118,7 +108,6 @@ export const validateClaudeConnection = async (model = 'haiku') => {
118
108
  }
119
109
  }
120
110
  } catch (e) {
121
- // Not valid JSON, continue with other checks
122
111
  if (global.verboseMode) {
123
112
  reportError(e, {
124
113
  context: 'claude_json_error_parse',
@@ -149,10 +138,8 @@ export const validateClaudeConnection = async (model = 'haiku') => {
149
138
  return false;
150
139
  }
151
140
  }
152
- // Use exitCode if code is undefined (Bun shell behavior)
153
- const exitCode = result.code ?? result.exitCode ?? 0;
141
+ const exitCode = result.code ?? result.exitCode ?? 0; // Bun shell compat
154
142
  if (exitCode !== 0) {
155
- // Command failed
156
143
  if (jsonError) {
157
144
  await log(`❌ Claude CLI authentication failed: ${jsonError.type} - ${jsonError.message}`, {
158
145
  level: 'error',
@@ -166,7 +153,6 @@ export const validateClaudeConnection = async (model = 'haiku') => {
166
153
  }
167
154
  return false;
168
155
  }
169
- // Check for error patterns in successful response
170
156
  if (jsonError) {
171
157
  if ((jsonError.type === 'api_error' || jsonError.type === 'overloaded_error') && jsonError.message === 'Overloaded') {
172
158
  if (retryCount < maxRetries) {
@@ -188,7 +174,6 @@ export const validateClaudeConnection = async (model = 'haiku') => {
188
174
  }
189
175
  return false;
190
176
  }
191
- // Success - Claude responded (LLM responses are probabilistic, so any response is good)
192
177
  await log('✅ Claude CLI connection validated successfully');
193
178
  return true;
194
179
  } catch (error) {
@@ -839,16 +824,16 @@ export const executeClaudeCommand = async params => {
839
824
  let lastMessage = '';
840
825
  let isOverloadError = false;
841
826
  let is503Error = false;
842
- let isInternalServerError = false; // Issue #1331: Track 500 Internal server error
843
- let isRequestTimeout = false; // Issue #1353: Track "Request timed out" from Claude CLI
844
- let apiMarkedNotRetryable = false; // Issue #1437: Track when API explicitly signals x-should-retry: false
845
- let resultNumTurns = 0; // Issue #1437: Track num_turns from result event to detect stuck retries
827
+ let isInternalServerError = false;
828
+ let isRequestTimeout = false;
829
+ let apiMarkedNotRetryable = false;
830
+ let resultNumTurns = 0;
846
831
  let stderrErrors = [];
847
- let resultSuccessReceived = false; // Issue #1354: Track if result success event was received
848
- let anthropicTotalCostUSD = null; // Capture Anthropic's official total_cost_usd from result
849
- let errorDuringExecution = false; // Issue #1088: Track if error_during_execution subtype occurred
850
- let resultSummary = null; // Issue #1263: Capture AI result summary for --attach-solution-summary
851
- let resultModelUsage = null; // Issue #1454
832
+ let resultSuccessReceived = false;
833
+ let anthropicTotalCostUSD = null;
834
+ let errorDuringExecution = false;
835
+ let resultSummary = null;
836
+ let resultModelUsage = null;
852
837
  // Create interactive mode handler if enabled
853
838
  let interactiveHandler = null;
854
839
  if (argv.interactiveMode && owner && repo && prNumber) {
@@ -872,40 +857,25 @@ export const executeClaudeCommand = async params => {
872
857
  await log(`${fullCommand}`);
873
858
  await log('');
874
859
  if (argv.verbose) {
875
- await log('📋 User prompt:', { verbose: true });
876
- await log('---BEGIN USER PROMPT---', { verbose: true });
877
- await log(prompt, { verbose: true });
878
- await log('---END USER PROMPT---', { verbose: true });
879
- await log('📋 System prompt:', { verbose: true });
880
- await log('---BEGIN SYSTEM PROMPT---', { verbose: true });
881
- await log(systemPrompt, { verbose: true });
882
- await log('---END SYSTEM PROMPT---', { verbose: true });
860
+ await log(`📋 User prompt:\n---BEGIN USER PROMPT---\n${prompt}\n---END USER PROMPT---`, { verbose: true });
861
+ await log(`📋 System prompt:\n---BEGIN SYSTEM PROMPT---\n${systemPrompt}\n---END SYSTEM PROMPT---`, { verbose: true });
883
862
  }
884
863
  try {
885
- // Resolve thinking settings (see issue #1146)
886
864
  const { thinkingBudget: resolvedThinkingBudget, thinkLevel, isNewVersion, maxBudget } = await resolveThinkingSettings(argv, log);
887
- // Set CLAUDE_CODE_MAX_OUTPUT_TOKENS (#1076), MAX_THINKING_TOKENS (#1146), MCP timeout (#1066),
888
- // CLAUDE_CODE_EFFORT_LEVEL (#1238), model/thinkLevel/maxBudget for effort conversion (#1221, #1238)
889
865
  const claudeEnv = getClaudeEnv({ thinkingBudget: resolvedThinkingBudget, model: mappedModel, thinkLevel, maxBudget });
890
- // Issue #1337: Enable ANTHROPIC_LOG=debug in --verbose mode for detailed API request diagnostics.
866
+ if (argv.verbose) claudeEnv.ANTHROPIC_LOG = 'debug';
867
+ const modelMaxOutputTokens = getMaxOutputTokensForModel(mappedModel);
891
868
  if (argv.verbose) {
892
- claudeEnv.ANTHROPIC_LOG = 'debug';
869
+ await log(`📊 CLAUDE_CODE_MAX_OUTPUT_TOKENS: ${modelMaxOutputTokens}, MCP_TIMEOUT: ${claudeCode.mcpTimeout}ms, MCP_TOOL_TIMEOUT: ${claudeCode.mcpToolTimeout}ms, ANTHROPIC_LOG: debug`, { verbose: true });
870
+ if (resolvedThinkingBudget !== undefined) await log(`📊 MAX_THINKING_TOKENS: ${resolvedThinkingBudget}`, { verbose: true });
871
+ if (claudeEnv.CLAUDE_CODE_EFFORT_LEVEL) await log(`📊 CLAUDE_CODE_EFFORT_LEVEL: ${claudeEnv.CLAUDE_CODE_EFFORT_LEVEL}`, { verbose: true });
872
+ if (!isNewVersion && thinkLevel) await log(`📊 Thinking level (via keywords): ${thinkLevel}`, { verbose: true });
893
873
  }
894
- const modelMaxOutputTokens = getMaxOutputTokensForModel(mappedModel);
895
- if (argv.verbose) await log(`📊 CLAUDE_CODE_MAX_OUTPUT_TOKENS: ${modelMaxOutputTokens}`, { verbose: true });
896
- if (argv.verbose) await log(`📊 MCP_TIMEOUT: ${claudeCode.mcpTimeout}ms (server startup)`, { verbose: true });
897
- if (argv.verbose) await log(`📊 MCP_TOOL_TIMEOUT: ${claudeCode.mcpToolTimeout}ms (tool execution)`, { verbose: true });
898
- if (argv.verbose) await log(`📊 ANTHROPIC_LOG: debug (verbose mode)`, { verbose: true });
899
- if (resolvedThinkingBudget !== undefined) await log(`📊 MAX_THINKING_TOKENS: ${resolvedThinkingBudget}`, { verbose: true });
900
- if (claudeEnv.CLAUDE_CODE_EFFORT_LEVEL) await log(`📊 CLAUDE_CODE_EFFORT_LEVEL: ${claudeEnv.CLAUDE_CODE_EFFORT_LEVEL}`, { verbose: true });
901
- if (!isNewVersion && thinkLevel) await log(`📊 Thinking level (via keywords): ${thinkLevel}`, { verbose: true });
902
874
  if (argv.resume) {
903
- // When resuming, pass prompt directly with -p flag. Escape double quotes for shell.
904
875
  const simpleEscapedPrompt = prompt.replace(/"/g, '\\"');
905
876
  const simpleEscapedSystem = systemPrompt.replace(/"/g, '\\"');
906
877
  execCommand = $({ cwd: tempDir, mirror: false, env: claudeEnv })`${claudePath} --resume ${argv.resume} --output-format stream-json --verbose --dangerously-skip-permissions --model ${mappedModel} -p "${simpleEscapedPrompt}" --append-system-prompt "${simpleEscapedSystem}"`;
907
878
  } else {
908
- // When not resuming, pass prompt via stdin. Escape double quotes for shell.
909
879
  const simpleEscapedSystem = systemPrompt.replace(/"/g, '\\"');
910
880
  execCommand = $({ cwd: tempDir, stdin: prompt, mirror: false, env: claudeEnv })`${claudePath} --output-format stream-json --verbose --dangerously-skip-permissions --model ${mappedModel} --append-system-prompt "${simpleEscapedSystem}"`;
911
881
  }
@@ -917,50 +887,78 @@ export const executeClaudeCommand = async params => {
917
887
  await log(formatAligned('🍴', 'Fork:', forkedRepo, 2));
918
888
  }
919
889
  await log(`\n${formatAligned('▶️', 'Streaming output:', '')}\n`);
920
- // Use command-stream's async iteration for real-time streaming
921
890
  let exitCode = 0;
922
- // Issue #1183: Line buffer for NDJSON stream parsing - accumulate incomplete lines across chunks
923
- // Long JSON messages (e.g., result with total_cost_usd) may be split across multiple stdout chunks
924
891
  let stdoutLineBuffer = '';
925
- // Issue #1280: Track result event and timeout for hung processes.
926
- // command-stream's stream() waits for BOTH process exit AND stdout pipe close; if stdout stays open
927
- // the stream hangs. Workaround: force-kill after result event. See command-stream/issues/155
928
892
  let resultEventReceived = false;
929
893
  let resultTimeoutId = null;
930
894
  let forceExitTriggered = false;
931
895
  const streamCloseTimeoutMs = timeouts.resultStreamCloseMs;
896
+ let firstChunkReceived = false;
897
+ let startupTimeoutId = null;
898
+ let isStartupTimeout = false;
899
+ let lastEventTime = null;
900
+ let activityTimeoutId = null;
901
+ let isActivityTimeout = false;
932
902
  const forceExitOnTimeout = async () => {
933
903
  if (forceExitTriggered) return;
934
904
  forceExitTriggered = true;
935
- const elapsed = `${streamCloseTimeoutMs / 1000}s`;
936
- await log(`⚠️ Stream didn't close ${elapsed} after result event, forcing exit (Issue #1280)`, { verbose: true });
937
- await log(` command-stream stream() is likely stuck waiting for pipe close`, { verbose: true });
905
+ await log(`⚠️ Stream timeout — forcing exit (Issue #1280)`, { verbose: true });
938
906
  try {
939
907
  if (execCommand.kill) {
940
- await log(` Sending SIGTERM to process...`, { verbose: true });
941
908
  execCommand.kill('SIGTERM');
942
- // Issue #1346: Capture timer handle so it can be cleared after use to avoid
943
- // leaking an active event loop reference when the process exits before 2s
944
- const sigkillTimerId = setTimeout(() => {
909
+ // Issue #1346: Follow up with SIGKILL after 2s if still alive
910
+ const t = setTimeout(() => {
945
911
  try {
946
- if (!execCommand.result?.code) {
947
- log(` Process still alive after 2s, sending SIGKILL`, { verbose: true });
948
- execCommand.kill('SIGKILL');
949
- }
912
+ if (!execCommand.result?.code) execCommand.kill('SIGKILL');
950
913
  } catch {
951
- /* process may have exited */
914
+ /* exited */
952
915
  }
953
916
  }, 2000);
954
- sigkillTimerId.unref();
917
+ t.unref();
955
918
  }
956
919
  } catch (e) {
957
920
  await log(` Warning: Could not kill process: ${e.message}`, { verbose: true });
958
921
  }
959
922
  };
923
+ // Issue #1472/#1475: Startup timeout — force-kill if no output within streamStartupMs
924
+ if (timeouts.streamStartupMs > 0) {
925
+ startupTimeoutId = setTimeout(async () => {
926
+ if (!firstChunkReceived && !forceExitTriggered) {
927
+ isStartupTimeout = true; // Issue #1472/#1475: Flag for retry logic
928
+ await log(`\n⚠️ No output from Claude CLI after ${timeouts.streamStartupMs / 1000}s — force-killing (Issue #1472/#1475)`, { level: 'warning' });
929
+ await forceExitOnTimeout();
930
+ }
931
+ }, timeouts.streamStartupMs);
932
+ startupTimeoutId.unref();
933
+ }
934
+ // Issue #1472: Helper to reset activity timeout on each stdout chunk
935
+ const resetActivityTimeout = () => {
936
+ if (timeouts.streamActivityMs > 0 && !resultEventReceived) {
937
+ if (activityTimeoutId) clearTimeout(activityTimeoutId);
938
+ activityTimeoutId = setTimeout(async () => {
939
+ if (!forceExitTriggered && !resultEventReceived) {
940
+ isActivityTimeout = true;
941
+ const idleSeconds = lastEventTime ? Math.round((Date.now() - lastEventTime) / 1000) : 'unknown';
942
+ await log(`\n⚠️ No stream output for ${timeouts.streamActivityMs / 1000}s after previous activity (idle: ${idleSeconds}s) — force-killing (Issue #1472)`, { level: 'warning' });
943
+ await forceExitOnTimeout();
944
+ }
945
+ }, timeouts.streamActivityMs);
946
+ activityTimeoutId.unref();
947
+ }
948
+ };
960
949
  for await (const chunk of execCommand.stream()) {
961
950
  if (forceExitTriggered) break;
951
+ if (!firstChunkReceived) {
952
+ // Issue #1472/#1475: Clear startup timeout on first output
953
+ firstChunkReceived = true;
954
+ if (startupTimeoutId) {
955
+ clearTimeout(startupTimeoutId);
956
+ startupTimeoutId = null;
957
+ }
958
+ }
962
959
  if (chunk.type === 'stdout') {
963
960
  const output = chunk.data.toString();
961
+ resetActivityTimeout(); // Issue #1472: Reset activity timeout on each stdout chunk
964
962
  // Append to buffer and split; keep last element (may be incomplete) for next chunk
965
963
  stdoutLineBuffer += output;
966
964
  const lines = stdoutLineBuffer.split('\n');
@@ -970,8 +968,12 @@ export const executeClaudeCommand = async params => {
970
968
  if (!line.trim()) continue;
971
969
  try {
972
970
  const data = sanitizeObjectStrings(JSON.parse(line));
973
- // Process event in interactive mode
974
971
  if (interactiveHandler) {
972
+ if (!interactiveHandler._firstEventLogged) {
973
+ interactiveHandler._firstEventLogged = true;
974
+ await log(`🔌 Interactive mode: First event received (type: ${data.type || 'unknown'}) — stream is active`, { verbose: true });
975
+ }
976
+ lastEventTime = Date.now();
975
977
  try {
976
978
  await interactiveHandler.processEvent(data);
977
979
  } catch (interactiveError) {
@@ -979,7 +981,6 @@ export const executeClaudeCommand = async params => {
979
981
  }
980
982
  }
981
983
  await log(JSON.stringify(data, null, 2));
982
- // Capture session ID and rename log file
983
984
  if (!sessionId && data.session_id) {
984
985
  sessionId = data.session_id;
985
986
  await log(`📌 Session ID: ${sessionId}`);
@@ -995,24 +996,15 @@ export const executeClaudeCommand = async params => {
995
996
  await log(`⚠️ Could not rename log file: ${renameError.message}`, { verbose: true });
996
997
  }
997
998
  }
998
- if (data.type === 'message') {
999
- messageCount++;
1000
- } else if (data.type === 'tool_use') {
1001
- toolUseCount++;
1002
- }
1003
- // Handle session result type from Claude CLI (emitted when session completes)
999
+ if (data.type === 'message') messageCount++;
1000
+ else if (data.type === 'tool_use') toolUseCount++;
1004
1001
  if (data.type === 'result') {
1005
- // Issue #1280: Start 30s timeout for stream close after result event
1006
1002
  if (!resultEventReceived) {
1007
1003
  resultEventReceived = true;
1008
1004
  await log(`📌 Result event received, starting ${streamCloseTimeoutMs / 1000}s stream close timeout (Issue #1280)`, { verbose: true });
1009
1005
  resultTimeoutId = setTimeout(forceExitOnTimeout, streamCloseTimeoutMs);
1010
1006
  }
1011
- // Issue #1354: Track when result event confirms success (prevents false positive detection)
1012
- if (data.subtype === 'success') {
1013
- resultSuccessReceived = true;
1014
- }
1015
- // Issue #1104: Only extract cost from subtype 'success' results
1007
+ if (data.subtype === 'success') resultSuccessReceived = true;
1016
1008
  if (data.subtype === 'success' && data.total_cost_usd !== undefined && data.total_cost_usd !== null) {
1017
1009
  anthropicTotalCostUSD = data.total_cost_usd;
1018
1010
  await log(`💰 Anthropic official cost captured from success result: $${anthropicTotalCostUSD.toFixed(6)}`, { verbose: true });
@@ -1024,7 +1016,6 @@ export const executeClaudeCommand = async params => {
1024
1016
  resultSummary = data.result;
1025
1017
  await log('📝 Captured result summary from Claude output', { verbose: true });
1026
1018
  }
1027
- // Issue #1437: Capture num_turns to detect stuck retries (degrading turn count signals non-recovery)
1028
1019
  if (data.num_turns !== undefined) {
1029
1020
  resultNumTurns = data.num_turns;
1030
1021
  await log(`📊 Session num_turns: ${resultNumTurns}`, { verbose: true });
@@ -1033,7 +1024,6 @@ export const executeClaudeCommand = async params => {
1033
1024
  if (data.is_error === true) {
1034
1025
  lastMessage = data.result || JSON.stringify(data);
1035
1026
  const subtype = data.subtype || 'unknown';
1036
- // Issue #1088: "error_during_execution" = warning (work may exist), others = failure
1037
1027
  if (subtype === 'error_during_execution') {
1038
1028
  errorDuringExecution = true;
1039
1029
  await log(`⚠️ Error during execution (subtype: ${subtype}) - work may be completed`, { verbose: true });
@@ -1055,16 +1045,11 @@ export const executeClaudeCommand = async params => {
1055
1045
  }
1056
1046
  }
1057
1047
  }
1058
- // Store last message for error detection
1059
- if (data.type === 'text' && data.text) {
1060
- lastMessage = data.text;
1061
- } else if (data.type === 'error') {
1048
+ if (data.type === 'text' && data.text) lastMessage = data.text;
1049
+ else if (data.type === 'error') {
1062
1050
  lastMessage = data.error || JSON.stringify(data);
1063
- if (lastMessage.includes('Internal server error')) {
1064
- isInternalServerError = true;
1065
- }
1051
+ if (lastMessage.includes('Internal server error')) isInternalServerError = true;
1066
1052
  }
1067
- // Check for API overload error and 503 errors
1068
1053
  if (data.type === 'assistant' && data.message && data.message.content) {
1069
1054
  const content = Array.isArray(data.message.content) ? data.message.content : [data.message.content];
1070
1055
  for (const item of content) {
@@ -1121,23 +1106,15 @@ export const executeClaudeCommand = async params => {
1121
1106
  }
1122
1107
  if (chunk.type === 'stderr') {
1123
1108
  const errorOutput = chunk.data.toString();
1124
- // Log stderr immediately
1125
1109
  if (errorOutput) {
1126
1110
  await log(errorOutput, { stream: 'stderr' });
1127
- // Issue #1437: Detect x-should-retry: false in ANTHROPIC_LOG=debug output signals
1128
- // a non-transient error; fail fast instead of blindly retrying.
1129
- if (errorOutput.includes('not retryable') || errorOutput.includes("'x-should-retry': 'false'") || errorOutput.includes('"x-should-retry": "false"')) {
1130
- if (!apiMarkedNotRetryable) {
1131
- apiMarkedNotRetryable = true;
1132
- await log('⚠️ API signaled error is not retryable (x-should-retry: false)', { verbose: true });
1133
- }
1111
+ // Issue #1437: Detect x-should-retry: false non-transient error, fail fast
1112
+ if (!apiMarkedNotRetryable && (errorOutput.includes('not retryable') || errorOutput.includes("'x-should-retry': 'false'") || errorOutput.includes('"x-should-retry": "false"'))) {
1113
+ apiMarkedNotRetryable = true;
1114
+ await log('⚠️ API signaled error is not retryable (x-should-retry: false)', { verbose: true });
1134
1115
  }
1135
- // Issue #1354: Split multi-line chunks — a chunk may contain multiple JSON messages;
1136
- // passing the whole chunk to isStderrError() causes JSON.parse() to fail.
1137
1116
  for (const line of errorOutput.split('\n')) {
1138
- if (isStderrError(line)) {
1139
- stderrErrors.push(line.trim());
1140
- }
1117
+ if (isStderrError(line)) stderrErrors.push(line.trim());
1141
1118
  }
1142
1119
  }
1143
1120
  } else if (chunk.type === 'exit') {
@@ -1150,6 +1127,7 @@ export const executeClaudeCommand = async params => {
1150
1127
  }
1151
1128
 
1152
1129
  // Issue #1183: Process remaining buffer content - extract cost from result type if present
1130
+ // Issue #1472: Also forward remaining buffer events to interactive handler
1153
1131
  if (stdoutLineBuffer.trim()) {
1154
1132
  try {
1155
1133
  const data = sanitizeObjectStrings(JSON.parse(stdoutLineBuffer));
@@ -1157,20 +1135,30 @@ export const executeClaudeCommand = async params => {
1157
1135
  if (data.type === 'result' && data.subtype === 'success' && data.total_cost_usd != null) {
1158
1136
  anthropicTotalCostUSD = data.total_cost_usd;
1159
1137
  }
1138
+ // Issue #1472: Forward remaining buffer event to interactive handler (was previously missed)
1139
+ if (interactiveHandler) {
1140
+ try {
1141
+ await interactiveHandler.processEvent(data);
1142
+ } catch (interactiveError) {
1143
+ await log(`⚠️ Interactive mode error (remaining buffer): ${interactiveError.message}`, { verbose: true });
1144
+ }
1145
+ }
1160
1146
  } catch {
1161
1147
  if (!stdoutLineBuffer.includes('node:internal')) await log(stdoutLineBuffer, { stream: 'raw' });
1162
1148
  }
1163
1149
  }
1164
- // Issue #1280: Clear the stream close timeout since we exited the loop
1150
+ if (startupTimeoutId) {
1151
+ clearTimeout(startupTimeoutId);
1152
+ startupTimeoutId = null;
1153
+ }
1154
+ if (activityTimeoutId) {
1155
+ clearTimeout(activityTimeoutId);
1156
+ activityTimeoutId = null;
1157
+ }
1165
1158
  if (resultTimeoutId) {
1166
- clearTimeout(resultTimeoutId);
1167
- if (forceExitTriggered) {
1168
- await log('⚠️ Stream exited via force-kill timeout (Issue #1280)', { verbose: true });
1169
- } else {
1170
- await log('✅ Stream closed normally after result event', { verbose: true });
1171
- }
1159
+ clearTimeout(resultTimeoutId); // Issue #1280
1160
+ await log(forceExitTriggered ? '⚠️ Stream exited via force-kill timeout' : '✅ Stream closed normally after result event', { verbose: true });
1172
1161
  }
1173
- // Issue #1165: Check actual exit code from command result (stream() may not emit 'exit' chunks)
1174
1162
  if (execCommand.result && typeof execCommand.result.code === 'number') {
1175
1163
  const resultExitCode = execCommand.result.code;
1176
1164
  if (exitCode === 0 && resultExitCode !== 0) {
@@ -1183,25 +1171,34 @@ export const executeClaudeCommand = async params => {
1183
1171
  await log(`\n❌ Command not found (exit code 127) - "${claudePath}" is not installed or not in PATH\n Please ensure Claude CLI is installed: npm install -g @anthropic-ai/claude-code`, { level: 'error' });
1184
1172
  }
1185
1173
  }
1186
- // Flush any remaining queued comments from interactive mode
1174
+ // Issue #1472: Flush remaining queued comments, log diagnostic summary, warn on zero events
1187
1175
  if (interactiveHandler) {
1176
+ if (!interactiveHandler._firstEventLogged) {
1177
+ await log('⚠️ Interactive mode: No events received from Claude CLI — zero comments posted (Issue #1472)', { level: 'warning' });
1178
+ }
1188
1179
  try {
1189
1180
  await interactiveHandler.flush();
1190
1181
  } catch (flushError) {
1191
1182
  await log(`⚠️ Interactive mode flush error: ${flushError.message}`, { verbose: true });
1192
1183
  }
1184
+ const handlerState = interactiveHandler.getState();
1185
+ const durationMin = ((Date.now() - handlerState.startTime) / 60000).toFixed(1);
1186
+ const { eventsProcessed: ep, commentsAttempted: ca, commentsPosted: cp, commentsFailed: cf, editsAttempted: ea, editsSucceeded: es, editsFailed: ef, commentQueue: cq } = handlerState;
1187
+ await log(`🔌 Interactive mode summary: ${ep} events processed, ${ca} comments attempted, ${cp} posted, ${cf} failed, ${ea} edits attempted, ${es} succeeded, ${ef} failed, ${cq.length} still queued, duration ${durationMin}m`);
1188
+ if (handlerState.eventsProcessed > 0 && handlerState.commentsPosted === 0) {
1189
+ await log(`⚠️ Interactive mode: Events were received (${handlerState.eventsProcessed}) but zero comments were posted — check GitHub API connectivity and PR access (${handlerState.commentsFailed} failures)`, { level: 'warning' });
1190
+ }
1193
1191
  }
1194
1192
 
1195
- // Issues #1331, #1353: Unified handler for transient API errors (Overloaded, 503, Internal Server Error,
1196
- // Request timed out). All use exponential backoff with session preservation via --resume.
1197
- const isTransientError = isOverloadError || isInternalServerError || is503Error || isRequestTimeout || (lastMessage.includes('API Error: 500') && (lastMessage.includes('Overloaded') || lastMessage.includes('Internal server error'))) || (lastMessage.includes('API Error: 529') && (lastMessage.includes('overloaded_error') || lastMessage.includes('Overloaded'))) || (lastMessage.includes('api_error') && lastMessage.includes('Overloaded')) || (lastMessage.includes('overloaded_error') && lastMessage.includes('Overloaded')) || lastMessage.includes('API Error: 503') || (lastMessage.includes('503') && (lastMessage.includes('upstream connect error') || lastMessage.includes('remote connection failure'))) || lastMessage === 'Request timed out' || lastMessage.includes('Request timed out');
1193
+ // Issues #1331, #1353, #1472/#1475: Unified transient error retry (exponential backoff, session preservation)
1194
+ const isTransientError = isStartupTimeout || isActivityTimeout || isOverloadError || isInternalServerError || is503Error || isRequestTimeout || (lastMessage.includes('API Error: 500') && (lastMessage.includes('Overloaded') || lastMessage.includes('Internal server error'))) || (lastMessage.includes('API Error: 529') && (lastMessage.includes('overloaded_error') || lastMessage.includes('Overloaded'))) || (lastMessage.includes('api_error') && lastMessage.includes('Overloaded')) || (lastMessage.includes('overloaded_error') && lastMessage.includes('Overloaded')) || lastMessage.includes('API Error: 503') || (lastMessage.includes('503') && (lastMessage.includes('upstream connect error') || lastMessage.includes('remote connection failure'))) || lastMessage === 'Request timed out' || lastMessage.includes('Request timed out');
1198
1195
  if ((commandFailed || isTransientError) && isTransientError) {
1199
- // Issue #1353: Timeouts use longer backoff (5min–1hr) vs general transient (2min–30min)
1200
- const maxRetries = isRequestTimeout ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
1201
- const initialDelay = isRequestTimeout ? retryLimits.initialRequestTimeoutDelayMs : retryLimits.initialTransientErrorDelayMs;
1202
- const maxDelay = isRequestTimeout ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs;
1196
+ // Issue #1472/#1475: Startup/activity timeout 30s–2min backoff; #1353: Request timeout → 5min–1hr; general 2min–30min
1197
+ const isTimeoutRetry = isStartupTimeout || isActivityTimeout;
1198
+ const maxRetries = isTimeoutRetry ? retryLimits.maxTransientErrorRetries : isRequestTimeout ? retryLimits.maxRequestTimeoutRetries : retryLimits.maxTransientErrorRetries;
1199
+ const initialDelay = isTimeoutRetry ? 30000 : isRequestTimeout ? retryLimits.initialRequestTimeoutDelayMs : retryLimits.initialTransientErrorDelayMs;
1200
+ const maxDelay = isTimeoutRetry ? 120000 : isRequestTimeout ? retryLimits.maxRequestTimeoutDelayMs : retryLimits.maxTransientErrorDelayMs;
1203
1201
  // Issue #1437: Fail fast when API signals x-should-retry: false AND session made no progress
1204
- // (num_turns <= 1). Allow maxNotRetryableAttempts before giving up (signal can be wrong sometimes).
1205
1202
  const isStuckRetry = apiMarkedNotRetryable && retryCount >= retryLimits.maxNotRetryableAttempts && resultNumTurns <= 1;
1206
1203
  if (isStuckRetry) {
1207
1204
  await log(`\n\n❌ API explicitly marked error as not retryable (x-should-retry: false) and session made no progress (num_turns=${resultNumTurns}) after ${retryCount} attempt(s)`, { level: 'error' });
@@ -1222,11 +1219,14 @@ export const executeClaudeCommand = async params => {
1222
1219
  }
1223
1220
  if (retryCount < maxRetries) {
1224
1221
  const delay = Math.min(initialDelay * Math.pow(retryLimits.retryBackoffMultiplier, retryCount), maxDelay);
1225
- const errorLabel = isRequestTimeout ? 'Request timeout' : isOverloadError || (lastMessage.includes('API Error: 500') && lastMessage.includes('Overloaded')) || (lastMessage.includes('API Error: 529') && lastMessage.includes('Overloaded')) ? `API overload (${lastMessage.includes('529') ? '529' : '500'})` : isInternalServerError || lastMessage.includes('Internal server error') ? 'Internal server error (500)' : '503 network error';
1222
+ const errorLabel = isStartupTimeout ? 'Stream startup timeout (Issue #1472/#1475)' : isActivityTimeout ? 'Stream activity timeout (Issue #1472)' : isRequestTimeout ? 'Request timeout' : isOverloadError || (lastMessage.includes('API Error: 500') && lastMessage.includes('Overloaded')) || (lastMessage.includes('API Error: 529') && lastMessage.includes('Overloaded')) ? `API overload (${lastMessage.includes('529') ? '529' : '500'})` : isInternalServerError || lastMessage.includes('Internal server error') ? 'Internal server error (500)' : '503 network error';
1226
1223
  const notRetryableHint = apiMarkedNotRetryable ? ' (API says not retryable — will stop early if no progress)' : '';
1227
- await log(`\n⚠️ ${errorLabel} detected. Retry ${retryCount + 1}/${maxRetries} in ${Math.round(delay / 60000)} min (session preserved)${notRetryableHint}...`, { level: 'warning' });
1228
- await log(` Error: ${lastMessage.substring(0, 200)}`, { verbose: true });
1229
- if (sessionId && !argv.resume) argv.resume = sessionId; // preserve session for resume
1224
+ const delayLabel = delay >= 60000 ? `${Math.round(delay / 60000)} min` : `${Math.round(delay / 1000)}s`;
1225
+ const retryMode = isStartupTimeout ? ' (fresh start)' : ' (session preserved)';
1226
+ await log(`\n⚠️ ${errorLabel} detected. Retry ${retryCount + 1}/${maxRetries} in ${delayLabel}${retryMode}${notRetryableHint}...`, { level: 'warning' });
1227
+ await log(` Error: ${isStartupTimeout ? `No output from Claude CLI within ${timeouts.streamStartupMs / 1000}s` : isActivityTimeout ? `No output for ${timeouts.streamActivityMs / 1000}s after previous activity` : lastMessage.substring(0, 200)}`, { verbose: true });
1228
+ // Activity timeout preserves session (work was started), startup timeout does not (no session created)
1229
+ if (!isStartupTimeout && sessionId && !argv.resume) argv.resume = sessionId;
1230
1230
  await waitWithCountdown(delay, log);
1231
1231
  await log('\n🔄 Retrying now...');
1232
1232
  retryCount++;
@@ -54,6 +54,17 @@ export const timeouts = {
54
54
  // Issue #1280: Timeout (ms) to wait for stream close after result event before force-killing
55
55
  // command-stream's stream() waits for process exit + pipe close; if stdout stays open, it hangs
56
56
  resultStreamCloseMs: parseIntWithDefault('HIVE_MIND_RESULT_STREAM_CLOSE_MS', 30000),
57
+ // Issue #1472/#1475: Timeout (ms) to wait for first stream output from Claude CLI after startup.
58
+ // If no stdout/stderr output is received within this period, the process is considered stuck
59
+ // and will be force-killed. Both affected sessions showed ~4.5h with zero output from Claude CLI.
60
+ // Default: 120000ms (2 minutes) — Claude CLI normally emits system.init within 1-3 seconds.
61
+ streamStartupMs: parseIntWithDefault('HIVE_MIND_STREAM_STARTUP_MS', 120000),
62
+ // Issue #1472: Activity timeout (ms) — if no new stream output is received for this duration
63
+ // after at least one event was received, the process is considered hung mid-session.
64
+ // This catches the case where Claude CLI starts producing output but then stops (e.g., the
65
+ // original Issue #1472 where CLI was stuck for 4.5h with all output arriving only at CTRL+C).
66
+ // Default: 300000ms (5 minutes). Set to 0 to disable. Configurable via environment variable.
67
+ streamActivityMs: parseIntWithDefault('HIVE_MIND_STREAM_ACTIVITY_MS', 300000),
57
68
  };
58
69
 
59
70
  // Auto-continue configurations
@@ -270,6 +270,14 @@ export const createInteractiveHandler = options => {
270
270
  // Track active agent tasks for progress update deduplication
271
271
  // Map of task_id -> { commentId, toolUseId, description, commentIdPromise, resolveCommentId }
272
272
  pendingTasks: new Map(),
273
+ // Issue #1472: Diagnostic counters for tracking comment posting success/failure
274
+ eventsProcessed: 0,
275
+ commentsAttempted: 0,
276
+ commentsPosted: 0,
277
+ commentsFailed: 0,
278
+ editsAttempted: 0,
279
+ editsSucceeded: 0,
280
+ editsFailed: 0,
273
281
  };
274
282
 
275
283
  /**
@@ -299,6 +307,7 @@ export const createInteractiveHandler = options => {
299
307
  return null;
300
308
  }
301
309
 
310
+ state.commentsAttempted++;
302
311
  try {
303
312
  // Post comment via gh api with stdin to avoid shell quoting issues
304
313
  // with complex markdown bodies containing backticks, quotes, etc.
@@ -310,6 +319,7 @@ export const createInteractiveHandler = options => {
310
319
  maxBuffer: 10 * 1024 * 1024, // 10MB
311
320
  });
312
321
  state.lastCommentTime = Date.now();
322
+ state.commentsPosted++;
313
323
 
314
324
  // Extract comment ID from the API response JSON
315
325
  let commentId = null;
@@ -327,9 +337,9 @@ export const createInteractiveHandler = options => {
327
337
  }
328
338
  return commentId;
329
339
  } catch (error) {
330
- if (verbose) {
331
- await log(`⚠️ Interactive mode: Failed to post comment: ${error.message} (body: ${body.length} chars)`, { verbose: true });
332
- }
340
+ state.commentsFailed++;
341
+ // Issue #1472: Always log comment failures (not just verbose) silent failures cause zero-comment bugs
342
+ await log(`⚠️ Interactive mode: Failed to post comment: ${error.message} (body: ${body.length} chars)`);
333
343
  return null;
334
344
  }
335
345
  };
@@ -349,6 +359,7 @@ export const createInteractiveHandler = options => {
349
359
  return false;
350
360
  }
351
361
 
362
+ state.editsAttempted++;
352
363
  try {
353
364
  // Edit comment via gh api with stdin to avoid shell quoting issues
354
365
  // with complex markdown bodies containing backticks, quotes, etc.
@@ -359,14 +370,14 @@ export const createInteractiveHandler = options => {
359
370
  input: jsonPayload,
360
371
  maxBuffer: 10 * 1024 * 1024, // 10MB
361
372
  });
373
+ state.editsSucceeded++;
362
374
  if (verbose) {
363
375
  await log(`✅ Interactive mode: Comment ${commentId} updated (body: ${body.length} chars, payload: ${jsonPayload.length} chars)`, { verbose: true });
364
376
  }
365
377
  return true;
366
378
  } catch (error) {
367
- if (verbose) {
368
- await log(`⚠️ Interactive mode: Failed to edit comment ${commentId}: ${error.message} (body: ${body.length} chars)`, { verbose: true });
369
- }
379
+ state.editsFailed++;
380
+ await log(`⚠️ Interactive mode: Failed to edit comment ${commentId}: ${error.message} (body: ${body.length} chars)`);
370
381
  return false;
371
382
  }
372
383
  };
@@ -1135,6 +1146,7 @@ ${createRawJsonSection(data)}`;
1135
1146
  if (!data || typeof data !== 'object') {
1136
1147
  return;
1137
1148
  }
1149
+ state.eventsProcessed++;
1138
1150
 
1139
1151
  // Handle events without type as unrecognized
1140
1152
  if (!data.type) {