@link-assistant/hive-mind 1.73.3 → 1.73.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -0
- package/package.json +1 -1
- package/src/claude.budget-stats.lib.mjs +77 -1
- package/src/claude.lib.mjs +31 -78
- package/src/claude.thinking-block-recovery.lib.mjs +79 -0
- package/src/config.lib.mjs +54 -13
- package/src/critical-error-commit.lib.mjs +70 -0
- package/src/models/index.mjs +13 -7
- package/src/solve.config.lib.mjs +3 -3
- package/src/solve.mjs +21 -2
- package/src/tool-retry.lib.mjs +15 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,42 @@
|
|
|
1
1
|
# @link-assistant/hive-mind
|
|
2
2
|
|
|
3
|
+
## 1.73.5
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 7cb9b7e: fix(claude): recover from corrupted extended-thinking blocks instead of looping (#1834)
|
|
8
|
+
|
|
9
|
+
A long Claude (Opus) agentic run with extended thinking + tool use can leave a
|
|
10
|
+
thinking block in the session transcript corrupted (text emptied while the
|
|
11
|
+
original signature is kept). The Anthropic API then rejects every following turn
|
|
12
|
+
with `400 ... `thinking`or`redacted_thinking` blocks in the latest assistant
|
|
13
|
+
message cannot be modified`, permanently poisoning the on-disk session — so any
|
|
14
|
+
`--resume` retry fails forever. This is an upstream Claude Code bug
|
|
15
|
+
(anthropics/claude-code#63147).
|
|
16
|
+
|
|
17
|
+
Hive Mind now detects this terminal error (`classifyRetryableError` →
|
|
18
|
+
`requiresFreshSession`) and recovers with a two-phase escalation: it **tries to
|
|
19
|
+
resume the existing session first** (capped by
|
|
20
|
+
`HIVE_MIND_MAX_THINKING_BLOCK_RESUMES`, default 1) and only when resume is not
|
|
21
|
+
possible does it **discard the un-resumable session and restart fresh** (capped
|
|
22
|
+
by `HIVE_MIND_MAX_THINKING_BLOCK_RESTARTS`, default 2) — rather than retrying the
|
|
23
|
+
dead session or failing outright.
|
|
24
|
+
|
|
25
|
+
Additionally, on **all** critical errors Hive Mind now auto-commits (and
|
|
26
|
+
best-effort pushes) any uncommitted changes by default before recovery resets
|
|
27
|
+
the session, so partial work is preserved in the PR branch history. This is
|
|
28
|
+
on by default and can be toggled with `HIVE_MIND_AUTO_COMMIT_ON_CRITICAL_ERROR`.
|
|
29
|
+
|
|
30
|
+
Verbose logging records the `request_id` and `messages.N.content.N` path for
|
|
31
|
+
diagnostics. A deep case study with the full reproduction log is added under
|
|
32
|
+
`docs/case-studies/issue-1834`.
|
|
33
|
+
|
|
34
|
+
## 1.73.4
|
|
35
|
+
|
|
36
|
+
### Patch Changes
|
|
37
|
+
|
|
38
|
+
- bfdc3fe: Add support for Claude Opus 4.8 (issue #1832). The bare `opus` alias for the `claude` tool now resolves to `claude-opus-4-8`, and the explicit `opus-4-8` / `claude-opus-4-8` aliases (plus their `[1m]` variants for the 1M-token context window) are accepted everywhere existing Opus aliases are. All earlier aliases keep working unchanged — `opus-4-7` / `claude-opus-4-7`, `opus-4-6` / `claude-opus-4-6`, `opus-4-5`, `sonnet`, `haiku`, and `opusplan` continue to map to the same model IDs as before. The `--fallback-model` default chain for the `claude` tool extends to `opus`/`opus-4-8` → `opus-4-7` → `opus-4-6`; the `--think xhigh`/`max` levels remain supported (4.8 shares Opus 4.7's effort surface and adaptive-only thinking, so Claude Code never emits `MAX_THINKING_TOKENS` for it); `--show-thinking-content` still opts into thinking output on 4.8 the same way it does on 4.7. Adds the deep case study under `docs/case-studies/issue-1832/` (covering the requirements, solution plan, and verification matrix) and `tests/test-opus-48-model-support.mjs` (175 assertions) alongside the existing 4.7 regression test. The English `docs/CONFIGURATION.md` row text is left unchanged in this PR to keep all four language siblings in sync; the case study is the authoritative user-facing documentation for the 4.8 behavior.
|
|
39
|
+
|
|
3
40
|
## 1.73.3
|
|
4
41
|
|
|
5
42
|
### Patch Changes
|
package/package.json
CHANGED
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
// Token budget statistics display module
|
|
3
3
|
// Extracted from claude.lib.mjs to maintain file line limits
|
|
4
4
|
|
|
5
|
-
import { formatNumber } from './claude.lib.mjs';
|
|
5
|
+
import { formatNumber, calculateSessionTokens } from './claude.lib.mjs';
|
|
6
|
+
import { reportError } from './sentry.lib.mjs';
|
|
6
7
|
import Decimal from 'decimal.js-light';
|
|
7
8
|
import { getCacheReadTokenCount, getCacheWriteTokenCount, getCumulativeContextInputTokens, getDisplayContextInputTokens, getExplicitContextFillInputTokens, getInputTokenCount, getOutputTokenCount, getRestoredContextInputTokens } from './context-fill.lib.mjs';
|
|
8
9
|
|
|
@@ -306,6 +307,81 @@ export const displayBudgetStats = async (usage, tokenUsage, log) => {
|
|
|
306
307
|
await dumpBudgetTrace(usage, tokenUsage, log);
|
|
307
308
|
};
|
|
308
309
|
|
|
310
|
+
/**
|
|
311
|
+
* Calculate and display the total token-usage summary for a finished Claude session.
|
|
312
|
+
* Extracted from claude.lib.mjs to keep that file under the 1500-line limit (Issue #1834).
|
|
313
|
+
* Reads the session JSONL, logs the per-model breakdown, cost comparison and (optionally)
|
|
314
|
+
* budget stats. Failures are reported but never thrown — token reporting is best-effort.
|
|
315
|
+
* @param {Object} params
|
|
316
|
+
* @param {string} params.sessionId - Claude session id (skips when falsy)
|
|
317
|
+
* @param {string} params.tempDir - Working directory containing the session JSONL (skips when falsy)
|
|
318
|
+
* @param {Object|null} params.resultModelUsage - Authoritative per-model usage from the result JSON event
|
|
319
|
+
* @param {number} params.anthropicTotalCostUSD - Anthropic's official total cost (for the comparison line)
|
|
320
|
+
* @param {Object} params.argv - Parsed CLI args (reads argv.tokensBudgetStats)
|
|
321
|
+
* @param {Function} params.log - Logger
|
|
322
|
+
*/
|
|
323
|
+
export const displaySessionTokenUsage = async ({ sessionId, tempDir, resultModelUsage, anthropicTotalCostUSD, argv, log }) => {
|
|
324
|
+
if (!sessionId || !tempDir) return;
|
|
325
|
+
try {
|
|
326
|
+
const tokenUsage = await calculateSessionTokens(sessionId, tempDir, resultModelUsage);
|
|
327
|
+
if (!tokenUsage) return;
|
|
328
|
+
// Issue #1501: Log deduplication stats in verbose mode
|
|
329
|
+
if (tokenUsage.duplicateEntriesSkipped > 0) {
|
|
330
|
+
await log(`\n⚠️ JSONL deduplication: skipped ${tokenUsage.duplicateEntriesSkipped} duplicate entries (upstream: anthropics/claude-code#6805)`, { verbose: true });
|
|
331
|
+
}
|
|
332
|
+
if (tokenUsage.peakContextUsage > 0) {
|
|
333
|
+
await log(`📊 Peak restored-context input: ${formatNumber(tokenUsage.peakContextUsage)} tokens`, { verbose: true });
|
|
334
|
+
}
|
|
335
|
+
await log('\n💰 Token Usage Summary:');
|
|
336
|
+
// Display per-model breakdown
|
|
337
|
+
if (tokenUsage.modelUsage) {
|
|
338
|
+
const modelIds = Object.keys(tokenUsage.modelUsage);
|
|
339
|
+
const modelsFromResult = modelIds.filter(id => tokenUsage.modelUsage[id]._sourceResultJson);
|
|
340
|
+
if (modelsFromResult.length > 0) {
|
|
341
|
+
await log(`📊 Token data supplemented from result JSON for: ${modelsFromResult.join(', ')}`, { verbose: true });
|
|
342
|
+
}
|
|
343
|
+
for (const modelId of modelIds) {
|
|
344
|
+
const usage = tokenUsage.modelUsage[modelId];
|
|
345
|
+
const sourceNote = usage._sourceResultJson ? ' (from result JSON)' : '';
|
|
346
|
+
await log(`\n 📊 ${usage.modelName || modelId}:${sourceNote}`);
|
|
347
|
+
await displayModelUsage(usage, log);
|
|
348
|
+
// Display budget stats if flag is enabled
|
|
349
|
+
if (argv.tokensBudgetStats && usage.modelInfo?.limit) {
|
|
350
|
+
await displayBudgetStats(usage, tokenUsage, log);
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
// Show totals if multiple models were used
|
|
354
|
+
if (modelIds.length > 1) {
|
|
355
|
+
await log('\n 📈 Total across all models:');
|
|
356
|
+
}
|
|
357
|
+
// Show cost comparison (for both single and multiple models)
|
|
358
|
+
await displayCostComparison(tokenUsage.totalCostUSD, anthropicTotalCostUSD, log);
|
|
359
|
+
// Show total tokens for single model only
|
|
360
|
+
if (modelIds.length === 1) {
|
|
361
|
+
await log(` Total tokens: ${formatNumber(tokenUsage.totalTokens)}`);
|
|
362
|
+
}
|
|
363
|
+
} else {
|
|
364
|
+
// Fallback to old format if modelUsage is not available
|
|
365
|
+
await log(` Input tokens: ${formatNumber(tokenUsage.inputTokens)}`);
|
|
366
|
+
if (tokenUsage.cacheCreationTokens > 0) {
|
|
367
|
+
await log(` Cache creation tokens: ${formatNumber(tokenUsage.cacheCreationTokens)}`);
|
|
368
|
+
}
|
|
369
|
+
if (tokenUsage.cacheReadTokens > 0) {
|
|
370
|
+
await log(` Cache read tokens: ${formatNumber(tokenUsage.cacheReadTokens)}`);
|
|
371
|
+
}
|
|
372
|
+
await log(` Output tokens: ${formatNumber(tokenUsage.outputTokens)}`);
|
|
373
|
+
await log(` Total tokens: ${formatNumber(tokenUsage.totalTokens)}`);
|
|
374
|
+
}
|
|
375
|
+
} catch (tokenError) {
|
|
376
|
+
reportError(tokenError, {
|
|
377
|
+
context: 'calculate_session_tokens',
|
|
378
|
+
sessionId,
|
|
379
|
+
operation: 'read_session_jsonl',
|
|
380
|
+
});
|
|
381
|
+
await log(` ⚠️ Could not calculate token usage: ${tokenError.message}`, { verbose: true });
|
|
382
|
+
}
|
|
383
|
+
};
|
|
384
|
+
|
|
309
385
|
/**
|
|
310
386
|
* Merge resultModelUsage from Claude Code result JSON into JSONL-based modelUsage map.
|
|
311
387
|
* Issue #1508: The JSONL file may miss sub-agent model entries (e.g., Haiku used internally),
|
package/src/claude.lib.mjs
CHANGED
|
@@ -15,7 +15,7 @@ import { setupBidirectionalHandler, finalizeBidirectionalHandler, validateBidire
|
|
|
15
15
|
import { initProgressMonitoring } from './solve.progress-monitoring.lib.mjs';
|
|
16
16
|
import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
|
|
17
17
|
import Decimal from 'decimal.js-light';
|
|
18
|
-
import {
|
|
18
|
+
import { createEmptySubSessionUsage, accumulateModelUsage, mergeResultModelUsage, createSubAgentCallEntry, accumulateSubAgentUsage, getRawRequestInputTokens, displaySessionTokenUsage } from './claude.budget-stats.lib.mjs';
|
|
19
19
|
import { buildClaudeResumeCommand, buildClaudeAutonomousResumeCommand } from './claude.command-builder.lib.mjs';
|
|
20
20
|
import { buildSolveResumeCommand } from './solve.resume-command.lib.mjs'; // Issue #942
|
|
21
21
|
import { SESSION_FORCE_KILLED_MARKER, postTrackedComment } from './tool-comments.lib.mjs'; // Issue #1625
|
|
@@ -25,9 +25,10 @@ import { buildMcpConfigWithoutPlaywright } from './playwright-mcp.lib.mjs';
|
|
|
25
25
|
import { resolveClaudeSessionToolFlags } from './useless-tools.lib.mjs';
|
|
26
26
|
import { ensureClaudeQuietConfig } from './claude-quiet-config.lib.mjs';
|
|
27
27
|
import { fetchModelInfo } from './model-info.lib.mjs';
|
|
28
|
-
import { classifyRetryableError, maybeSwitchToFallbackModel } from './tool-retry.lib.mjs';
|
|
28
|
+
import { classifyRetryableError, maybeSwitchToFallbackModel, waitWithCountdown } from './tool-retry.lib.mjs';
|
|
29
29
|
import { resolveSubSessionSize } from './sub-session-size.lib.mjs'; // Issue #1706
|
|
30
30
|
import { withAgentsMdAsClaudeMd } from './agents-md-claude-support.lib.mjs';
|
|
31
|
+
import { createThinkingBlockRecovery } from './claude.thinking-block-recovery.lib.mjs'; // Issue #1834 (PR #1835 feedback)
|
|
31
32
|
export { availableModels, fetchModelInfo }; // Re-export for backward compatibility
|
|
32
33
|
const showResumeCommand = async (sessionId, tempDir, claudePath, model, log, argv = null) => {
|
|
33
34
|
if (!sessionId || !tempDir) return;
|
|
@@ -607,20 +608,12 @@ export const executeClaudeCommand = async params => {
|
|
|
607
608
|
// Issue #1331: Unified retry configuration for all transient API errors
|
|
608
609
|
// (Overloaded, 503 Network Error, Internal Server Error) - same params, all with session preservation
|
|
609
610
|
let retryCount = 0;
|
|
610
|
-
//
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
let remaining = delayMs;
|
|
617
|
-
const timer = setInterval(async () => {
|
|
618
|
-
remaining -= 60000;
|
|
619
|
-
if (remaining > 0) await log(`⏳ ${Math.round(remaining / 60000)} min remaining...`);
|
|
620
|
-
}, 60000);
|
|
621
|
-
await new Promise(resolve => setTimeout(resolve, delayMs));
|
|
622
|
-
clearInterval(timer);
|
|
623
|
-
};
|
|
611
|
+
// Issue #1834 (PR #1835 feedback): corrupted-thinking-block recovery — resume the session first,
|
|
612
|
+
// then escalate to a fresh restart, auto-committing uncommitted work before each attempt. Created
|
|
613
|
+
// once so its resume/restart caps persist across recursive retry calls.
|
|
614
|
+
const tryThinkingBlockRecovery = createThinkingBlockRecovery({ argv, tempDir, branchName, $, log });
|
|
615
|
+
// Helper `waitWithCountdown` (per-minute countdown for delays >1 minute, Issue #1331) is shared
|
|
616
|
+
// from tool-retry.lib.mjs so claude/codex/gemini/qwen/opencode all use one implementation.
|
|
624
617
|
// Function to execute with retry logic
|
|
625
618
|
const executeWithRetry = async () => {
|
|
626
619
|
// Execute claude command from the cloned repository directory
|
|
@@ -981,6 +974,12 @@ export const executeClaudeCommand = async params => {
|
|
|
981
974
|
isRequestTimeout = true;
|
|
982
975
|
await log('⏱️ Detected request timeout from Claude CLI (will retry with --resume)', { verbose: true });
|
|
983
976
|
}
|
|
977
|
+
// Issue #1834: Detect corrupted extended-thinking-block 400 (un-resumable session).
|
|
978
|
+
// Capture diagnostics (request id, content path) to aid debugging and upstream reports.
|
|
979
|
+
if ((lastMessage.includes('thinking') || lastMessage.includes('redacted_thinking')) && lastMessage.includes('cannot be modified')) {
|
|
980
|
+
const contentPath = (lastMessage.match(/messages\.\d+\.content\.\d+/) || [])[0] || 'unknown';
|
|
981
|
+
await log(`🧠 Detected corrupted thinking-block error (un-resumable session). request_id=${data.request_id || 'unknown'}, at=${contentPath}. Will discard the session and restart fresh (Issue #1834, upstream anthropics/claude-code#63147).`, { verbose: true });
|
|
982
|
+
}
|
|
984
983
|
}
|
|
985
984
|
}
|
|
986
985
|
if (data.type === 'text' && data.text) lastMessage = data.text;
|
|
@@ -1160,6 +1159,13 @@ export const executeClaudeCommand = async params => {
|
|
|
1160
1159
|
// Issue #817: Stop bidirectional mode monitoring and collect queued feedback
|
|
1161
1160
|
queuedFeedback = await finalizeBidirectionalHandler(bidirectionalHandler, log);
|
|
1162
1161
|
const retryableLastError = classifyRetryableError(lastMessage);
|
|
1162
|
+
// Issue #1834: Corrupted extended-thinking blocks → try to resume the session first, then fall
|
|
1163
|
+
// back to a fresh restart (PR #1835 feedback). When both caps are reached, tryThinkingBlockRecovery
|
|
1164
|
+
// logs the failure and returns false; we fall through to the normal commandFailed return below
|
|
1165
|
+
// (the 400 is not a transient pattern, so it is not retried).
|
|
1166
|
+
if (commandFailed && retryableLastError.requiresFreshSession && (await tryThinkingBlockRecovery({ classified: retryableLastError, source: 'result', sessionId }))) {
|
|
1167
|
+
return await executeWithRetry();
|
|
1168
|
+
}
|
|
1163
1169
|
// Issues #1331, #1353, #1472/#1475: Unified transient error retry (exponential backoff, session preservation)
|
|
1164
1170
|
const isTransientError = isStartupTimeout || isActivityTimeout || isOverloadError || isInternalServerError || is503Error || isRequestTimeout || retryableLastError.isRetryable || (lastMessage.includes('API Error: 500') && (lastMessage.includes('Overloaded') || lastMessage.includes('Internal server error'))) || (lastMessage.includes('API Error: 529') && (lastMessage.includes('overloaded_error') || lastMessage.includes('Overloaded'))) || (lastMessage.includes('api_error') && lastMessage.includes('Overloaded')) || (lastMessage.includes('overloaded_error') && lastMessage.includes('Overloaded')) || lastMessage.includes('API Error: 503') || (lastMessage.includes('503') && (lastMessage.includes('upstream connect error') || lastMessage.includes('remote connection failure'))) || lastMessage === 'Request timed out' || lastMessage.includes('Request timed out');
|
|
1165
1171
|
if ((commandFailed || isTransientError) && isTransientError) {
|
|
@@ -1300,68 +1306,9 @@ export const executeClaudeCommand = async params => {
|
|
|
1300
1306
|
await log('\n\n✅ Claude command completed');
|
|
1301
1307
|
}
|
|
1302
1308
|
await log(`📊 Total messages: ${messageCount}, Tool uses: ${toolUseCount}`);
|
|
1303
|
-
// Calculate and display total token usage from session JSONL file
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
const tokenUsage = await calculateSessionTokens(sessionId, tempDir, resultModelUsage);
|
|
1307
|
-
if (tokenUsage) {
|
|
1308
|
-
// Issue #1501: Log deduplication stats in verbose mode
|
|
1309
|
-
if (tokenUsage.duplicateEntriesSkipped > 0) {
|
|
1310
|
-
await log(`\n⚠️ JSONL deduplication: skipped ${tokenUsage.duplicateEntriesSkipped} duplicate entries (upstream: anthropics/claude-code#6805)`, { verbose: true });
|
|
1311
|
-
}
|
|
1312
|
-
if (tokenUsage.peakContextUsage > 0) {
|
|
1313
|
-
await log(`📊 Peak restored-context input: ${formatNumber(tokenUsage.peakContextUsage)} tokens`, { verbose: true });
|
|
1314
|
-
}
|
|
1315
|
-
await log('\n💰 Token Usage Summary:');
|
|
1316
|
-
// Display per-model breakdown
|
|
1317
|
-
if (tokenUsage.modelUsage) {
|
|
1318
|
-
const modelIds = Object.keys(tokenUsage.modelUsage);
|
|
1319
|
-
const modelsFromResult = modelIds.filter(id => tokenUsage.modelUsage[id]._sourceResultJson);
|
|
1320
|
-
if (modelsFromResult.length > 0) {
|
|
1321
|
-
await log(`📊 Token data supplemented from result JSON for: ${modelsFromResult.join(', ')}`, { verbose: true });
|
|
1322
|
-
}
|
|
1323
|
-
for (const modelId of modelIds) {
|
|
1324
|
-
const usage = tokenUsage.modelUsage[modelId];
|
|
1325
|
-
const sourceNote = usage._sourceResultJson ? ' (from result JSON)' : '';
|
|
1326
|
-
await log(`\n 📊 ${usage.modelName || modelId}:${sourceNote}`);
|
|
1327
|
-
await displayModelUsage(usage, log);
|
|
1328
|
-
// Display budget stats if flag is enabled
|
|
1329
|
-
if (argv.tokensBudgetStats && usage.modelInfo?.limit) {
|
|
1330
|
-
await displayBudgetStats(usage, tokenUsage, log);
|
|
1331
|
-
}
|
|
1332
|
-
}
|
|
1333
|
-
// Show totals if multiple models were used
|
|
1334
|
-
if (modelIds.length > 1) {
|
|
1335
|
-
await log('\n 📈 Total across all models:');
|
|
1336
|
-
}
|
|
1337
|
-
// Show cost comparison (for both single and multiple models)
|
|
1338
|
-
await displayCostComparison(tokenUsage.totalCostUSD, anthropicTotalCostUSD, log);
|
|
1339
|
-
// Show total tokens for single model only
|
|
1340
|
-
if (modelIds.length === 1) {
|
|
1341
|
-
await log(` Total tokens: ${formatNumber(tokenUsage.totalTokens)}`);
|
|
1342
|
-
}
|
|
1343
|
-
} else {
|
|
1344
|
-
// Fallback to old format if modelUsage is not available
|
|
1345
|
-
await log(` Input tokens: ${formatNumber(tokenUsage.inputTokens)}`);
|
|
1346
|
-
if (tokenUsage.cacheCreationTokens > 0) {
|
|
1347
|
-
await log(` Cache creation tokens: ${formatNumber(tokenUsage.cacheCreationTokens)}`);
|
|
1348
|
-
}
|
|
1349
|
-
if (tokenUsage.cacheReadTokens > 0) {
|
|
1350
|
-
await log(` Cache read tokens: ${formatNumber(tokenUsage.cacheReadTokens)}`);
|
|
1351
|
-
}
|
|
1352
|
-
await log(` Output tokens: ${formatNumber(tokenUsage.outputTokens)}`);
|
|
1353
|
-
await log(` Total tokens: ${formatNumber(tokenUsage.totalTokens)}`);
|
|
1354
|
-
}
|
|
1355
|
-
}
|
|
1356
|
-
} catch (tokenError) {
|
|
1357
|
-
reportError(tokenError, {
|
|
1358
|
-
context: 'calculate_session_tokens',
|
|
1359
|
-
sessionId,
|
|
1360
|
-
operation: 'read_session_jsonl',
|
|
1361
|
-
});
|
|
1362
|
-
await log(` ⚠️ Could not calculate token usage: ${tokenError.message}`, { verbose: true });
|
|
1363
|
-
}
|
|
1364
|
-
}
|
|
1309
|
+
// Calculate and display total token usage from session JSONL file.
|
|
1310
|
+
// Extracted to claude.budget-stats.lib.mjs to keep this file under the line limit (Issue #1834).
|
|
1311
|
+
await displaySessionTokenUsage({ sessionId, tempDir, resultModelUsage, anthropicTotalCostUSD, argv, log });
|
|
1365
1312
|
await showResumeCommand(sessionId, tempDir, claudePath, argv.model, log, argv);
|
|
1366
1313
|
return {
|
|
1367
1314
|
success: true,
|
|
@@ -1388,6 +1335,12 @@ export const executeClaudeCommand = async params => {
|
|
|
1388
1335
|
});
|
|
1389
1336
|
const errorStr = error.message || error.toString();
|
|
1390
1337
|
const retryableException = classifyRetryableError(errorStr);
|
|
1338
|
+
// Issue #1834: Corrupted extended-thinking blocks surfaced as a thrown exception. Same recovery
|
|
1339
|
+
// as the streamed-result path: resume the session first, then fall back to a fresh restart.
|
|
1340
|
+
if (retryableException.requiresFreshSession && (await tryThinkingBlockRecovery({ classified: retryableException, source: 'exception', sessionId }))) {
|
|
1341
|
+
retryCount++;
|
|
1342
|
+
return await executeWithRetry();
|
|
1343
|
+
}
|
|
1391
1344
|
// Issue #1331: Unified handler for all transient API errors in exception block
|
|
1392
1345
|
// Issue #1353: Also handle "Request timed out" in exception block
|
|
1393
1346
|
// (Overloaded, 503, Internal Server Error, Request timed out) - all with session preservation
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// Issue #1834: recovery for corrupted extended-thinking blocks.
|
|
4
|
+
//
|
|
5
|
+
// When extended thinking is combined with tool use, Claude Code can persist a thinking block to the
|
|
6
|
+
// on-disk session transcript with the `thinking` text emptied to "" while keeping the original
|
|
7
|
+
// `signature`. On resume/continue the API validates the signature against the now-empty text and
|
|
8
|
+
// rejects the turn with a 400:
|
|
9
|
+
// API Error: 400 ... `thinking` or `redacted_thinking` blocks in the latest assistant message
|
|
10
|
+
// cannot be modified. These blocks must remain as they were in the original response.
|
|
11
|
+
// Upstream: https://github.com/anthropics/claude-code/issues/63147
|
|
12
|
+
//
|
|
13
|
+
// PR #1835 feedback: "in case of this specific error we should try resume first, and if not possible
|
|
14
|
+
// try to restart." Recovery is therefore a two-phase escalation:
|
|
15
|
+
// Phase 1 — resume the existing session (context-preserving; occasionally the transcript is intact
|
|
16
|
+
// enough to continue).
|
|
17
|
+
// Phase 2 — resume unavailable or already failed → discard the session and start fresh (`/clear`).
|
|
18
|
+
// On every attempt we first auto-commit any uncommitted work (Issue #1834 / PR #1835 feedback:
|
|
19
|
+
// "on all critical errors we auto commit uncommitted changes by default") so nothing is lost when
|
|
20
|
+
// the session context resets.
|
|
21
|
+
|
|
22
|
+
import { retryLimits, criticalErrorRecovery } from './config.lib.mjs';
|
|
23
|
+
import { waitWithCountdown } from './tool-retry.lib.mjs';
|
|
24
|
+
import { commitUncommittedChangesOnCriticalError } from './critical-error-commit.lib.mjs';
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Create a stateful corrupted-thinking-block recovery handler. The returned function persists its
|
|
28
|
+
* resume/restart counters across calls (so the caps survive recursive retries) and mutates
|
|
29
|
+
* `argv.resume` to drive the next session: setting it to the session id resumes, clearing it forces
|
|
30
|
+
* a fresh session.
|
|
31
|
+
*
|
|
32
|
+
* @param {object} ctx
|
|
33
|
+
* @param {object} ctx.argv - parsed CLI args (argv.resume is mutated to choose resume vs fresh).
|
|
34
|
+
* @param {string} ctx.tempDir - working tree for auto-committing uncommitted work.
|
|
35
|
+
* @param {string} [ctx.branchName] - branch to push preserved work to.
|
|
36
|
+
* @param {Function} ctx.$ - command-stream executor.
|
|
37
|
+
* @param {Function} ctx.log - async logger.
|
|
38
|
+
* @param {number} [ctx.waitMs=5000] - settle delay before re-running (overridable for tests).
|
|
39
|
+
* @returns {(opts: {classified: object, source: string, sessionId: string|null}) => Promise<boolean>}
|
|
40
|
+
* Resolves true when a recovery attempt was initiated (caller should re-run); false when
|
|
41
|
+
* both caps are exhausted (caller should fail).
|
|
42
|
+
*/
|
|
43
|
+
export const createThinkingBlockRecovery = ({ argv, tempDir, branchName, $, log, waitMs = 5000 }) => {
|
|
44
|
+
let resumeCount = 0;
|
|
45
|
+
let restartCount = 0;
|
|
46
|
+
return async ({ classified, source, sessionId }) => {
|
|
47
|
+
const preserveWork = async () => {
|
|
48
|
+
if (criticalErrorRecovery.autoCommitUncommittedChanges) {
|
|
49
|
+
await commitUncommittedChangesOnCriticalError({ tempDir, branchName, $, log, reason: `${classified.label} (${source})` });
|
|
50
|
+
}
|
|
51
|
+
};
|
|
52
|
+
// Phase 1 — resume the existing session first (cheaper, keeps accumulated context).
|
|
53
|
+
if (sessionId && resumeCount < retryLimits.maxThinkingBlockResumes) {
|
|
54
|
+
resumeCount++;
|
|
55
|
+
await preserveWork();
|
|
56
|
+
await log(`\n⚠️ ${classified.label} (${source}). Resume attempt ${resumeCount}/${retryLimits.maxThinkingBlockResumes} — trying to resume the existing session first before discarding it (Issue #1834)...`, { level: 'warning' });
|
|
57
|
+
argv.resume = sessionId;
|
|
58
|
+
await waitWithCountdown(waitMs, log);
|
|
59
|
+
await log('\n🔄 Resuming the session now...');
|
|
60
|
+
return true;
|
|
61
|
+
}
|
|
62
|
+
// Phase 2 — resume not possible / already failed → discard the session and start fresh.
|
|
63
|
+
if (restartCount < retryLimits.maxThinkingBlockRestarts) {
|
|
64
|
+
restartCount++;
|
|
65
|
+
await preserveWork();
|
|
66
|
+
await log(`\n⚠️ ${classified.label} (${source}). Resume not possible — restart ${restartCount}/${retryLimits.maxThinkingBlockRestarts} with a fresh session (Issue #1834)...`, { level: 'warning' });
|
|
67
|
+
await log(` Discarding session ${argv.resume || sessionId || '(none)'} and starting fresh — the corrupted thinking blocks can never be replayed (upstream anthropics/claude-code#63147).`, { verbose: true });
|
|
68
|
+
// Force a fresh session — do NOT resume the corrupted one, otherwise the 400 repeats forever.
|
|
69
|
+
argv.resume = undefined;
|
|
70
|
+
await waitWithCountdown(waitMs, log);
|
|
71
|
+
await log('\n🔄 Restarting with a fresh session now...');
|
|
72
|
+
return true;
|
|
73
|
+
}
|
|
74
|
+
await log(`\n\n❌ Corrupted thinking blocks persisted after ${resumeCount} resume + ${restartCount} fresh-session attempt(s) (Issue #1834).\n This is an upstream Claude Code bug (anthropics/claude-code#63147). Failing to avoid an endless recovery loop.`, { level: 'error' });
|
|
75
|
+
return false;
|
|
76
|
+
};
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
export default { createThinkingBlockRecovery };
|
package/src/config.lib.mjs
CHANGED
|
@@ -137,6 +137,27 @@ export const retryLimits = {
|
|
|
137
137
|
// Default: 5 — retry generously even when API signals not retryable, since the signal can be wrong
|
|
138
138
|
// for transient backend glitches (e.g. overloaded errors observed as non-retryable 500s).
|
|
139
139
|
maxNotRetryableAttempts: parseIntWithDefault('HIVE_MIND_MAX_NOT_RETRYABLE_ATTEMPTS', 5),
|
|
140
|
+
// Corrupted extended-thinking-block recovery (Issue #1834)
|
|
141
|
+
// When Claude Code returns a 400 "`thinking` or `redacted_thinking` blocks ... cannot be modified",
|
|
142
|
+
// the on-disk session is permanently un-resumable (upstream anthropics/claude-code#63147: the
|
|
143
|
+
// transcript stores thinking text as "" but keeps the original signature, so every resumed turn
|
|
144
|
+
// fails signature validation). The only recovery is to discard the session and start a fresh one
|
|
145
|
+
// (equivalent to `/clear`). Cap fresh restarts to avoid expensive re-run loops.
|
|
146
|
+
maxThinkingBlockRestarts: parseIntWithDefault('HIVE_MIND_MAX_THINKING_BLOCK_RESTARTS', 2),
|
|
147
|
+
// PR #1835 feedback: "in case of this specific error we should try resume first, and if not
|
|
148
|
+
// possible try to restart." Before discarding the session we first attempt to resume it this many
|
|
149
|
+
// times (context-preserving). Only after these resume attempts also fail do we fall back to a
|
|
150
|
+
// fresh restart. Default: 1 — one cheap resume attempt, then escalate to a fresh session.
|
|
151
|
+
maxThinkingBlockResumes: parseIntWithDefault('HIVE_MIND_MAX_THINKING_BLOCK_RESUMES', 1),
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
// Critical-error recovery behaviour (Issue #1834, PR #1835 feedback)
|
|
155
|
+
// "On all critical errors we auto commit uncommitted changes by default." When a critical error
|
|
156
|
+
// forces the tool to discard/restart a session, any uncommitted work on disk would be lost when the
|
|
157
|
+
// session context resets. Auto-committing (and pushing) preserves it in the PR branch. On by default;
|
|
158
|
+
// set HIVE_MIND_AUTO_COMMIT_ON_CRITICAL_ERROR=false to disable.
|
|
159
|
+
export const criticalErrorRecovery = {
|
|
160
|
+
autoCommitUncommittedChanges: getenv('HIVE_MIND_AUTO_COMMIT_ON_CRITICAL_ERROR', 'true').toLowerCase() === 'true',
|
|
140
161
|
};
|
|
141
162
|
|
|
142
163
|
// Claude Code CLI configurations
|
|
@@ -177,7 +198,7 @@ export const DEFAULT_MAX_THINKING_BUDGET = 31999;
|
|
|
177
198
|
export const DEFAULT_MAX_THINKING_BUDGET_OPUS_46 = parseIntWithDefault('HIVE_MIND_MAX_THINKING_BUDGET_OPUS_46', 31999);
|
|
178
199
|
|
|
179
200
|
/**
|
|
180
|
-
* Check if a model is Opus 4.6 or later (Issue #1221, updated in Issue #1238)
|
|
201
|
+
* Check if a model is Opus 4.6 or later (Issue #1221, updated in Issue #1238, Issue #1832)
|
|
181
202
|
* @param {string} model - The model name or ID
|
|
182
203
|
* @returns {boolean} True if the model is Opus 4.6 or later
|
|
183
204
|
*/
|
|
@@ -185,22 +206,22 @@ export const isOpus46OrLater = model => {
|
|
|
185
206
|
if (!model) return false;
|
|
186
207
|
const normalizedModel = model.toLowerCase();
|
|
187
208
|
// Check for explicit opus-4-6 or later versions, or opusplan (Issue #1223)
|
|
188
|
-
// Note: The 'opus' alias now maps to Opus 4.
|
|
209
|
+
// Note: The 'opus' alias now maps to Opus 4.8 (Issue #1832), so we also check for the alias directly
|
|
189
210
|
// opusplan uses Opus for planning, so it should get Opus-level settings
|
|
190
|
-
return normalizedModel === 'opus' || normalizedModel === 'opusplan' || normalizedModel.includes('opus-4-6') || normalizedModel.includes('opus-4-7') || normalizedModel.includes('opus-5');
|
|
211
|
+
return normalizedModel === 'opus' || normalizedModel === 'opusplan' || normalizedModel.includes('opus-4-6') || normalizedModel.includes('opus-4-7') || normalizedModel.includes('opus-4-8') || normalizedModel.includes('opus-5');
|
|
191
212
|
};
|
|
192
213
|
|
|
193
214
|
const isOpus47 = model => {
|
|
194
215
|
if (!model) return false;
|
|
195
216
|
const normalizedModel = model.toLowerCase();
|
|
196
|
-
// 'opus' alias now maps to Opus 4.
|
|
217
|
+
// 'opus' alias now maps to Opus 4.8 (Issue #1832), which inherits 4.7 behaviour
|
|
197
218
|
// opusplan uses Opus for planning, so it gets Opus-level settings
|
|
198
|
-
return normalizedModel === 'opus' || normalizedModel === 'opusplan' || normalizedModel.includes('opus-4-7');
|
|
219
|
+
return normalizedModel === 'opus' || normalizedModel === 'opusplan' || normalizedModel.includes('opus-4-7') || normalizedModel.includes('opus-4-8');
|
|
199
220
|
};
|
|
200
221
|
|
|
201
222
|
/**
|
|
202
|
-
* Check if a model is Opus 4.7 or later (Issue #1620)
|
|
203
|
-
* These models use Opus 4.7+ adaptive thinking behavior.
|
|
223
|
+
* Check if a model is Opus 4.7 or later (Issue #1620, Issue #1832)
|
|
224
|
+
* These models use Opus 4.7+ adaptive thinking behavior (also applies to Opus 4.8).
|
|
204
225
|
* @param {string} model - The model name or ID
|
|
205
226
|
* @returns {boolean} True if the model is Opus 4.7 or later
|
|
206
227
|
*/
|
|
@@ -210,6 +231,22 @@ export const isOpus47OrLater = model => {
|
|
|
210
231
|
return isOpus47(model) || normalizedModel.includes('opus-5');
|
|
211
232
|
};
|
|
212
233
|
|
|
234
|
+
/**
|
|
235
|
+
* Check if a model is Opus 4.8 or later (Issue #1832)
|
|
236
|
+
* Opus 4.8 inherits all Opus 4.7 API constraints (adaptive thinking only, no sampling
|
|
237
|
+
* params) and adds new features such as mid-conversation system messages, refusal stop
|
|
238
|
+
* details, and fast mode. These are not exposed through Claude Code today, but this
|
|
239
|
+
* helper enables finer-grained control for future wiring.
|
|
240
|
+
* @param {string} model - The model name or ID
|
|
241
|
+
* @returns {boolean} True if the model is Opus 4.8 or later
|
|
242
|
+
*/
|
|
243
|
+
export const isOpus48OrLater = model => {
|
|
244
|
+
if (!model) return false;
|
|
245
|
+
const normalizedModel = model.toLowerCase();
|
|
246
|
+
// 'opus' alias now maps to Opus 4.8 (Issue #1832)
|
|
247
|
+
return normalizedModel === 'opus' || normalizedModel === 'opusplan' || normalizedModel.includes('opus-4-8') || normalizedModel.includes('opus-5');
|
|
248
|
+
};
|
|
249
|
+
|
|
213
250
|
const isOpus45 = model => {
|
|
214
251
|
if (!model) return false;
|
|
215
252
|
const m = model.toLowerCase();
|
|
@@ -247,7 +284,7 @@ export const supportsEffortLevel = model => {
|
|
|
247
284
|
|
|
248
285
|
/**
|
|
249
286
|
* Check if a model supports the xhigh effort level.
|
|
250
|
-
* Official docs list xhigh
|
|
287
|
+
* Official docs list xhigh for Claude Opus 4.7 and Opus 4.8 (Issue #1832).
|
|
251
288
|
* @param {string} model - The model name or ID
|
|
252
289
|
* @returns {boolean} True if the model supports xhigh effort
|
|
253
290
|
*/
|
|
@@ -336,8 +373,10 @@ export const tokensToThinkingLevel = getTokensToThinkingLevel(DEFAULT_MAX_THINKI
|
|
|
336
373
|
export const OPUS_46_EFFORT_LEVELS = ['low', 'medium', 'high', 'max'];
|
|
337
374
|
|
|
338
375
|
/**
|
|
339
|
-
* Valid effort levels for Opus 4.7 (Issue #1620)
|
|
340
|
-
*
|
|
376
|
+
* Valid effort levels for Opus 4.7 and Opus 4.8 (Issue #1620, Issue #1832)
|
|
377
|
+
* Both models support the additional 'xhigh' level.
|
|
378
|
+
* Opus 4.8 keeps the same effort level set; the default effort level is 'high'
|
|
379
|
+
* (enforced by Claude Code itself, not by this module).
|
|
341
380
|
* See: https://platform.claude.com/docs/en/build-with-claude/effort
|
|
342
381
|
* @type {string[]}
|
|
343
382
|
*/
|
|
@@ -438,12 +477,13 @@ export const getClaudeEnv = (options = {}) => {
|
|
|
438
477
|
MCP_TOOL_TIMEOUT: String(claudeCode.mcpToolTimeout),
|
|
439
478
|
});
|
|
440
479
|
|
|
441
|
-
// Opus 4.7+ always uses adaptive thinking — MAX_THINKING_TOKENS has no effect (Issue #1620)
|
|
480
|
+
// Opus 4.7+ always uses adaptive thinking — MAX_THINKING_TOKENS has no effect (Issue #1620, Issue #1832)
|
|
481
|
+
// Opus 4.8 inherits this constraint: adaptive thinking is the only thinking mode.
|
|
442
482
|
// For Opus 4.6 and earlier, MAX_THINKING_TOKENS controls extended thinking (Claude Code >= 2.1.12)
|
|
443
483
|
// Default is 0 (thinking disabled) per Issue #1238.
|
|
444
484
|
const opus47 = options.model && isOpus47OrLater(options.model);
|
|
445
485
|
if (opus47) {
|
|
446
|
-
// Remove any inherited MAX_THINKING_TOKENS from process.env — Opus 4.7 ignores it
|
|
486
|
+
// Remove any inherited MAX_THINKING_TOKENS from process.env — Opus 4.7+ ignores it
|
|
447
487
|
delete env.MAX_THINKING_TOKENS;
|
|
448
488
|
} else {
|
|
449
489
|
env.MAX_THINKING_TOKENS = String(options.thinkingBudget ?? 0);
|
|
@@ -467,8 +507,9 @@ export const getClaudeEnv = (options = {}) => {
|
|
|
467
507
|
}
|
|
468
508
|
}
|
|
469
509
|
|
|
470
|
-
// Opus 4.7 omits thinking content by default; opt in with --show-thinking-content (Issue #1620)
|
|
510
|
+
// Opus 4.7+ omits thinking content by default; opt in with --show-thinking-content (Issue #1620, Issue #1832)
|
|
471
511
|
// Sets CLAUDE_CODE_SHOW_THINKING=1 which Claude Code uses to request display: "summarized"
|
|
512
|
+
// Applies to Opus 4.8 as well, which inherits Opus 4.7 thinking display behaviour.
|
|
472
513
|
if (options.showThinkingContent) {
|
|
473
514
|
env.CLAUDE_CODE_SHOW_THINKING = '1';
|
|
474
515
|
}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// Issue #1834 (PR #1835 feedback): "On all critical errors we auto commit uncommitted changes by
|
|
4
|
+
// default." When the tool hits a critical error and has to discard/restart a session (e.g. the
|
|
5
|
+
// corrupted extended-thinking-block 400, anthropics/claude-code#63147), any work the agent already
|
|
6
|
+
// made on disk would otherwise be silently lost when the session context is reset. This helper
|
|
7
|
+
// commits — and best-effort pushes — those uncommitted changes so the partial work is preserved in
|
|
8
|
+
// the PR branch history before recovery proceeds.
|
|
9
|
+
//
|
|
10
|
+
// It is intentionally dependency-light (receives `$` and `log`) and NEVER throws: a failure to
|
|
11
|
+
// commit must not mask the original critical error or break the recovery flow.
|
|
12
|
+
|
|
13
|
+
import { reportError } from './sentry.lib.mjs';
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Commit (and optionally push) any uncommitted changes in a working tree before critical-error
|
|
17
|
+
* recovery resets the session.
|
|
18
|
+
*
|
|
19
|
+
* @param {object} params
|
|
20
|
+
* @param {string} params.tempDir - Working tree (git clone) to inspect.
|
|
21
|
+
* @param {string} [params.branchName] - Branch to push to (push skipped when absent).
|
|
22
|
+
* @param {Function} params.$ - command-stream tagged-template executor.
|
|
23
|
+
* @param {Function} params.log - async logger.
|
|
24
|
+
* @param {string} [params.reason] - Short human-readable reason, recorded in the commit message.
|
|
25
|
+
* @param {boolean} [params.push=true] - Whether to push after committing.
|
|
26
|
+
* @returns {Promise<{committed: boolean, pushed: boolean}>}
|
|
27
|
+
*/
|
|
28
|
+
export const commitUncommittedChangesOnCriticalError = async ({ tempDir, branchName, $, log, reason = 'critical error', push = true }) => {
|
|
29
|
+
if (!tempDir || typeof $ !== 'function') {
|
|
30
|
+
return { committed: false, pushed: false };
|
|
31
|
+
}
|
|
32
|
+
try {
|
|
33
|
+
const statusResult = await $({ cwd: tempDir })`git status --porcelain 2>&1`;
|
|
34
|
+
const statusOutput = statusResult.stdout?.toString().trim() || '';
|
|
35
|
+
if (!statusOutput) {
|
|
36
|
+
await log(' ℹ️ No uncommitted changes to preserve before recovery.', { verbose: true });
|
|
37
|
+
return { committed: false, pushed: false };
|
|
38
|
+
}
|
|
39
|
+
await log(`💾 Critical error (${reason}) — auto-committing uncommitted changes to preserve work before recovery...`);
|
|
40
|
+
for (const line of statusOutput.split('\n')) await log(` ${line}`, { verbose: true });
|
|
41
|
+
const addResult = await $({ cwd: tempDir })`git add -A`;
|
|
42
|
+
if (addResult.code !== 0) {
|
|
43
|
+
await log(`⚠️ Could not stage changes before recovery: ${addResult.stderr?.toString().trim()}`, { level: 'warning' });
|
|
44
|
+
return { committed: false, pushed: false };
|
|
45
|
+
}
|
|
46
|
+
const commitMessage = `🛟 Auto-commit before critical-error recovery (${reason})`;
|
|
47
|
+
const commitResult = await $({ cwd: tempDir })`git commit -m ${commitMessage}`;
|
|
48
|
+
if (commitResult.code !== 0) {
|
|
49
|
+
await log(`⚠️ Could not commit changes before recovery: ${commitResult.stderr?.toString().trim() || commitResult.stdout?.toString().trim()}`, { level: 'warning' });
|
|
50
|
+
return { committed: false, pushed: false };
|
|
51
|
+
}
|
|
52
|
+
await log('✅ Uncommitted changes committed before recovery.');
|
|
53
|
+
if (!push || !branchName) {
|
|
54
|
+
return { committed: true, pushed: false };
|
|
55
|
+
}
|
|
56
|
+
const pushResult = await $({ cwd: tempDir })`git push origin ${branchName} 2>&1`;
|
|
57
|
+
if (pushResult.code === 0) {
|
|
58
|
+
await log('✅ Preserved work pushed to remote.');
|
|
59
|
+
return { committed: true, pushed: true };
|
|
60
|
+
}
|
|
61
|
+
await log(`⚠️ Committed locally but could not push preserved work: ${pushResult.stderr?.toString().trim() || pushResult.stdout?.toString().trim()}`, { level: 'warning' });
|
|
62
|
+
return { committed: true, pushed: false };
|
|
63
|
+
} catch (error) {
|
|
64
|
+
reportError(error, { context: 'commit_uncommitted_on_critical_error', tempDir, operation: 'auto_commit_recovery' });
|
|
65
|
+
await log(`⚠️ Error while auto-committing before recovery (continuing anyway): ${error.message}`, { level: 'warning' });
|
|
66
|
+
return { committed: false, pushed: false };
|
|
67
|
+
}
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
export default { commitUncommittedChangesOnCriticalError };
|
package/src/models/index.mjs
CHANGED
|
@@ -28,23 +28,25 @@ const execFileAsync = promisify(execFile);
|
|
|
28
28
|
// ─── MODEL DATA ──────────────────────────────────────────────────────────────
|
|
29
29
|
|
|
30
30
|
// Claude models (Anthropic API)
|
|
31
|
-
// Updated for Opus 4.5/4.6/4.7 and Sonnet 4.6 support (Issue #1221, Issue #1238, Issue #1329, Issue #1433, Issue #1620)
|
|
31
|
+
// Updated for Opus 4.5/4.6/4.7/4.8 and Sonnet 4.6 support (Issue #1221, Issue #1238, Issue #1329, Issue #1433, Issue #1620, Issue #1832)
|
|
32
32
|
export const claudeModels = {
|
|
33
33
|
sonnet: 'claude-sonnet-4-6', // Sonnet 4.6 (default, Issue #1329)
|
|
34
|
-
opus: 'claude-opus-4-
|
|
34
|
+
opus: 'claude-opus-4-8', // Opus 4.8 (Issue #1832)
|
|
35
35
|
haiku: 'claude-haiku-4-5-20251001', // Haiku 4.5
|
|
36
36
|
'haiku-3-5': 'claude-3-5-haiku-20241022', // Haiku 3.5
|
|
37
37
|
'haiku-3': 'claude-3-haiku-20240307', // Haiku 3
|
|
38
38
|
opusplan: 'opusplan', // Special mode: Opus for planning, Sonnet for execution (Issue #1223)
|
|
39
39
|
// Shorter version aliases (Issue #1221, Issue #1329 - PR comment feedback)
|
|
40
40
|
'sonnet-4-6': 'claude-sonnet-4-6', // Sonnet 4.6 short alias (Issue #1329)
|
|
41
|
-
'opus-4-
|
|
41
|
+
'opus-4-8': 'claude-opus-4-8', // Opus 4.8 short alias (Issue #1832)
|
|
42
|
+
'opus-4-7': 'claude-opus-4-7', // Opus 4.7 short alias (backward compatibility)
|
|
42
43
|
'opus-4-6': 'claude-opus-4-6', // Opus 4.6 short alias (backward compatibility)
|
|
43
44
|
'opus-4-5': 'claude-opus-4-5-20251101', // Opus 4.5 short alias
|
|
44
45
|
'sonnet-4-5': 'claude-sonnet-4-5-20250929', // Sonnet 4.5 short alias (backward compatibility)
|
|
45
46
|
'haiku-4-5': 'claude-haiku-4-5-20251001', // Haiku 4.5 short alias
|
|
46
|
-
// Version aliases for backward compatibility (Issue #1221, Issue #1329, Issue #1620)
|
|
47
|
-
'claude-opus-4-
|
|
47
|
+
// Version aliases for backward compatibility (Issue #1221, Issue #1329, Issue #1620, Issue #1832)
|
|
48
|
+
'claude-opus-4-8': 'claude-opus-4-8', // Opus 4.8 (Issue #1832)
|
|
49
|
+
'claude-opus-4-7': 'claude-opus-4-7', // Opus 4.7 (backward compatibility)
|
|
48
50
|
'claude-sonnet-4-6': 'claude-sonnet-4-6', // Sonnet 4.6 (Issue #1329)
|
|
49
51
|
'claude-opus-4-6': 'claude-opus-4-6', // Opus 4.6 (backward compatibility)
|
|
50
52
|
'claude-opus-4-5': 'claude-opus-4-5-20251101', // Opus 4.5
|
|
@@ -175,9 +177,10 @@ export const defaultModels = {
|
|
|
175
177
|
gemini: 'flash',
|
|
176
178
|
};
|
|
177
179
|
|
|
178
|
-
// Models that support 1M token context window via [1m] suffix (Issue #1221, Issue #1238, Issue #1329)
|
|
180
|
+
// Models that support 1M token context window via [1m] suffix (Issue #1221, Issue #1238, Issue #1329, Issue #1832)
|
|
179
181
|
// See: https://code.claude.com/docs/en/model-config
|
|
180
182
|
export const MODELS_SUPPORTING_1M_CONTEXT = [
|
|
183
|
+
'claude-opus-4-8', // Opus 4.8 (Issue #1832)
|
|
181
184
|
'claude-opus-4-7', // Opus 4.7 (Issue #1620)
|
|
182
185
|
'claude-opus-4-6',
|
|
183
186
|
'claude-opus-4-5-20251101',
|
|
@@ -186,7 +189,8 @@ export const MODELS_SUPPORTING_1M_CONTEXT = [
|
|
|
186
189
|
'claude-sonnet-4-5',
|
|
187
190
|
'sonnet', // Now maps to Sonnet 4.6 (Issue #1329)
|
|
188
191
|
'sonnet-4-6', // Short alias (Issue #1329)
|
|
189
|
-
'opus', // Now maps to Opus 4.
|
|
192
|
+
'opus', // Now maps to Opus 4.8 (Issue #1832)
|
|
193
|
+
'opus-4-8', // Short alias (Issue #1832)
|
|
190
194
|
'opus-4-7', // Short alias (Issue #1620)
|
|
191
195
|
'opus-4-6', // Short alias (Issue #1221 - PR comment feedback)
|
|
192
196
|
'opus-4-5', // Short alias (Issue #1238)
|
|
@@ -216,6 +220,7 @@ export const freeToBaseModelMap = {
|
|
|
216
220
|
|
|
217
221
|
export const CLAUDE_MODELS = {
|
|
218
222
|
...claudeModels,
|
|
223
|
+
'claude-opus-4-8': 'claude-opus-4-8', // Opus 4.8 full ID (Issue #1832)
|
|
219
224
|
'claude-opus-4-7': 'claude-opus-4-7', // Opus 4.7 full ID (Issue #1620)
|
|
220
225
|
'claude-sonnet-4-5-20250929': 'claude-sonnet-4-5-20250929',
|
|
221
226
|
'claude-opus-4-5-20251101': 'claude-opus-4-5-20251101',
|
|
@@ -989,6 +994,7 @@ export const resolveModelId = (requestedModel, tool) => {
|
|
|
989
994
|
|
|
990
995
|
export const defaultFallbackModels = {
|
|
991
996
|
claude: {
|
|
997
|
+
'claude-opus-4-8': 'opus-4-7',
|
|
992
998
|
'claude-opus-4-7': 'opus-4-6',
|
|
993
999
|
},
|
|
994
1000
|
codex: {
|
package/src/solve.config.lib.mjs
CHANGED
|
@@ -285,7 +285,7 @@ export const SOLVE_OPTION_DEFINITIONS = {
|
|
|
285
285
|
},
|
|
286
286
|
think: {
|
|
287
287
|
type: 'string',
|
|
288
|
-
description: 'Thinking level hint. For Claude, translated to --thinking-budget for Claude Code >= 2.1.12 (off=0, low=~8000, medium=~16000, high=~24000, xhigh/max=31999) and to CLAUDE_CODE_EFFORT_LEVEL when supported. Opus 4.7
|
|
288
|
+
description: 'Thinking level hint. For Claude, translated to --thinking-budget for Claude Code >= 2.1.12 (off=0, low=~8000, medium=~16000, high=~24000, xhigh/max=31999) and to CLAUDE_CODE_EFFORT_LEVEL when supported. Opus 4.8/4.7 support xhigh and max; Opus 4.6/Sonnet 4.6/Mythos support max; Opus 4.5 uses high for xhigh/max. For Codex, mapped to reasoning effort (off=none, low=low, medium=medium, high=high, xhigh/max=xhigh).',
|
|
289
289
|
choices: ['off', 'low', 'medium', 'high', 'xhigh', 'max'],
|
|
290
290
|
default: undefined,
|
|
291
291
|
},
|
|
@@ -316,12 +316,12 @@ export const SOLVE_OPTION_DEFINITIONS = {
|
|
|
316
316
|
},
|
|
317
317
|
'fallback-model': {
|
|
318
318
|
type: 'string',
|
|
319
|
-
description: 'Fallback model to switch to on model capacity/overload errors. When supported, retries resume the same session with this model. Defaults: claude opus/opus-4-7 -> opus-4-6; codex gpt-5.5 -> gpt-5.4; all others unset.',
|
|
319
|
+
description: 'Fallback model to switch to on model capacity/overload errors. When supported, retries resume the same session with this model. Defaults: claude opus/opus-4-8 -> opus-4-7; claude opus-4-7 -> opus-4-6; codex gpt-5.5 -> gpt-5.4; all others unset.',
|
|
320
320
|
default: undefined,
|
|
321
321
|
},
|
|
322
322
|
'show-thinking-content': {
|
|
323
323
|
type: 'boolean',
|
|
324
|
-
description: 'Show thinking content in Claude responses. Opus 4.7 omits thinking content by default; this option opts in to receive summarized thinking blocks. Disabled by default. Only affects --tool claude.',
|
|
324
|
+
description: 'Show thinking content in Claude responses. Opus 4.7+ omits thinking content by default (applies to Opus 4.8 as well); this option opts in to receive summarized thinking blocks. Disabled by default. Only affects --tool claude.',
|
|
325
325
|
default: false,
|
|
326
326
|
},
|
|
327
327
|
'prompt-plan-sub-agent': {
|
package/src/solve.mjs
CHANGED
|
@@ -1136,6 +1136,20 @@ try {
|
|
|
1136
1136
|
}
|
|
1137
1137
|
}
|
|
1138
1138
|
|
|
1139
|
+
// Issue #1834 (PR #1835 feedback): "on all critical errors we auto commit uncommitted changes
|
|
1140
|
+
// by default." A failed session is a critical error and exits here before the normal
|
|
1141
|
+
// auto-commit chokepoint below, so preserve (commit + push) any work the agent left on disk
|
|
1142
|
+
// first. On by default; disable via HIVE_MIND_AUTO_COMMIT_ON_CRITICAL_ERROR=false. Never throws.
|
|
1143
|
+
try {
|
|
1144
|
+
const { criticalErrorRecovery } = await import('./config.lib.mjs');
|
|
1145
|
+
if (criticalErrorRecovery.autoCommitUncommittedChanges) {
|
|
1146
|
+
const { commitUncommittedChangesOnCriticalError } = await import('./critical-error-commit.lib.mjs');
|
|
1147
|
+
await commitUncommittedChangesOnCriticalError({ tempDir, branchName, $, log, reason: `${argv.tool || 'claude'} execution failed` });
|
|
1148
|
+
}
|
|
1149
|
+
} catch (preserveError) {
|
|
1150
|
+
await log(` ⚠️ Could not auto-commit before failure exit: ${preserveError.message}`, { verbose: true });
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1139
1153
|
await safeExit(1, `${argv.tool.toUpperCase()} execution failed`);
|
|
1140
1154
|
}
|
|
1141
1155
|
|
|
@@ -1159,8 +1173,13 @@ try {
|
|
|
1159
1173
|
await log('ℹ️ Playwright MCP auto-cleanup disabled via --no-playwright-mcp-auto-cleanup', { verbose: true });
|
|
1160
1174
|
}
|
|
1161
1175
|
|
|
1162
|
-
// When limit is reached, force auto-commit of any uncommitted changes to preserve work
|
|
1163
|
-
|
|
1176
|
+
// When limit is reached, force auto-commit of any uncommitted changes to preserve work.
|
|
1177
|
+
// Issue #1834 (PR #1835 feedback): "on all critical errors we auto commit uncommitted changes by
|
|
1178
|
+
// default." A failed/errored session is a critical error, so auto-commit (and push) to preserve any
|
|
1179
|
+
// work the agent left on disk. On by default; disable via HIVE_MIND_AUTO_COMMIT_ON_CRITICAL_ERROR=false.
|
|
1180
|
+
const { criticalErrorRecovery } = await import('./config.lib.mjs');
|
|
1181
|
+
const criticalError = success === false || errorDuringExecution === true;
|
|
1182
|
+
const shouldAutoCommit = argv['auto-commit-uncommitted-changes'] || limitReached || (criticalError && criticalErrorRecovery.autoCommitUncommittedChanges);
|
|
1164
1183
|
const autoRestartEnabled = argv['autoRestartOnUncommittedChanges'] !== false;
|
|
1165
1184
|
const shouldRestart = await checkForUncommittedChanges(tempDir, owner, repo, branchName, $, log, shouldAutoCommit, autoRestartEnabled);
|
|
1166
1185
|
|
package/src/tool-retry.lib.mjs
CHANGED
|
@@ -43,6 +43,21 @@ export const classifyRetryableError = value => {
|
|
|
43
43
|
return { message, isRetryable: true, isCapacity: false, label: 'Stream disconnected before completion' };
|
|
44
44
|
}
|
|
45
45
|
|
|
46
|
+
// Issue #1834: Corrupted extended-thinking blocks. When extended thinking is combined with tool
|
|
47
|
+
// use, Claude Code can persist a thinking block to the session transcript with the `thinking`
|
|
48
|
+
// text emptied to "" while retaining the original `signature`. On resume/continue the block is
|
|
49
|
+
// replayed as `{ type: 'thinking', thinking: '', signature: <original> }`; the API validates the
|
|
50
|
+
// signature against the (now empty) text and rejects every subsequent turn with:
|
|
51
|
+
// 400 ... `thinking` or `redacted_thinking` blocks in the latest assistant message cannot be
|
|
52
|
+
// modified. These blocks must remain as they were in the original response.
|
|
53
|
+
// The session is therefore permanently un-resumable — retrying with --resume always fails. The
|
|
54
|
+
// only recovery is to discard the session and start fresh (equivalent to `/clear`), so this is
|
|
55
|
+
// flagged with `requiresFreshSession` rather than the plain `isRetryable` retry-with-resume path.
|
|
56
|
+
// Upstream: https://github.com/anthropics/claude-code/issues/63147
|
|
57
|
+
if ((lower.includes('thinking') || lower.includes('redacted_thinking')) && lower.includes('cannot be modified')) {
|
|
58
|
+
return { message, isRetryable: false, isCapacity: false, requiresFreshSession: true, label: 'Corrupted thinking blocks (un-resumable session)' };
|
|
59
|
+
}
|
|
60
|
+
|
|
46
61
|
if (lower.includes('api error: 503') || (lower.includes('503') && (lower.includes('upstream connect error') || lower.includes('remote connection failure')))) {
|
|
47
62
|
return { message, isRetryable: true, isCapacity: false, label: '503 network error' };
|
|
48
63
|
}
|