@link-assistant/hive-mind 1.73.5 → 1.73.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,34 @@
1
1
  # @link-assistant/hive-mind
2
2
 
3
+ ## 1.73.6
4
+
5
+ ### Patch Changes
6
+
7
+ - defa8c4: fix(claude): repair corrupted thinking-block transcripts so resume preserves context (#1834)
8
+
9
+ Follow-up to the Issue #1834 recovery ("can we do even better?"). The previous
10
+ recovery (PR #1835) was reactive: a plain resume of a transcript poisoned by a
11
+ corrupted extended-thinking block (`{ "type": "thinking", "thinking": "" }` with a
12
+ kept signature) just repeats the `400 ... thinking blocks ... cannot be modified`
13
+ error, so recovery almost always fell through to a **fresh restart that discards
14
+ dozens of turns** of accumulated context (50 turns / $3.84 in the second
15
+ reproduction log).
16
+
17
+ Recovery Phase 1 now **proactively repairs the on-disk session transcript** before
18
+ resuming: `repairCorruptedThinkingBlocks` (new
19
+ `src/claude.session-transcript-repair.lib.mjs`) strips the empty-text
20
+ `thinking`/`redacted_thinking` blocks from the session JSONL — a workaround proven
21
+ upstream (the Anthropic API permits _omitting_ earlier thinking, just not
22
+ _modifying_ it). When repair succeeds the resume keeps all accumulated context;
23
+ when it can't help, recovery still falls back to a fresh restart, so there is no
24
+ regression.
25
+
26
+ The repair is conservative: it never throws, only removes empty-text blocks (valid
27
+ signed thinking is untouched), never empties an assistant message, and writes a
28
+ one-time `<session>.jsonl.pre-repair-backup` before rewriting. The case study under
29
+ `docs/case-studies/issue-1834` is updated with a second reproduction log and the
30
+ new repair-then-resume design.
31
+
3
32
  ## 1.73.5
4
33
 
5
34
  ### Patch Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/hive-mind",
3
- "version": "1.73.5",
3
+ "version": "1.73.6",
4
4
  "description": "AI-powered issue solver and hive mind for collaborative problem solving",
5
5
  "main": "src/hive.mjs",
6
6
  "type": "module",
@@ -0,0 +1,150 @@
1
+ #!/usr/bin/env node
2
+
3
+ // Issue #1834 (PR #1836): repair a Claude Code session transcript that was poisoned by a
4
+ // corrupted extended-thinking block, so the session can be RESUMED (context preserved) instead
5
+ // of being discarded entirely.
6
+ //
7
+ // Root cause (upstream anthropics/claude-code#63147, #46843, #24662, #41992): when extended
8
+ // thinking is combined with tool use, Claude Code can persist a thinking block to the on-disk
9
+ // session JSONL with its `thinking` text emptied to "" while keeping the original `signature`:
10
+ //
11
+ // { "type": "thinking", "thinking": "", "signature": "Eyc…" }
12
+ //
13
+ // On resume/continue the API replays that block and validates the signature against the now-empty
14
+ // text, rejecting every following turn with a 400:
15
+ // `thinking` or `redacted_thinking` blocks in the latest assistant message cannot be modified.
16
+ //
17
+ // The proven community workaround (anthropics/claude-code#46843, miteshashar/claude-code-thinking-
18
+ // blocks-fix) is to STRIP the corrupted (empty-text) thinking blocks from the transcript — the API
19
+ // permits omitting earlier-turn thinking, so once the offending blocks are gone the session resumes
20
+ // cleanly with all of its text/tool-use history intact. This is strictly better than throwing the
21
+ // whole session away: when the repair succeeds we keep the accumulated context (worth many dollars
22
+ // and dozens of turns); when it can't help we still fall back to a fresh restart.
23
+
24
+ import { promises as fs } from 'fs';
25
+ import os from 'os';
26
+ import path from 'path';
27
+
28
+ /**
29
+ * Resolve the on-disk session transcript path for a Claude Code session. Claude Code stores each
30
+ * session as `~/.claude/projects/<cwd-with-slashes-as-dashes>/<sessionId>.jsonl` (mirrors the
31
+ * path logic already used by getModelUsageFromSession in claude.lib.mjs).
32
+ *
33
+ * @param {string} tempDir - the working directory the Claude session ran in.
34
+ * @param {string} sessionId - the Claude Code session id.
35
+ * @param {string} [homeDir] - override home dir (tests).
36
+ * @returns {string} absolute path to the session JSONL file.
37
+ */
38
+ export const resolveSessionTranscriptPath = (tempDir, sessionId, homeDir = os.homedir()) => {
39
+ const projectDirName = String(tempDir).replace(/\//g, '-');
40
+ return path.join(homeDir, '.claude', 'projects', projectDirName, `${sessionId}.jsonl`);
41
+ };
42
+
43
+ /**
44
+ * True when a content block is a corrupted thinking block: an extended-thinking block whose text
45
+ * was emptied (the upstream corruption) — `{ type: 'thinking', thinking: '' }` (optionally with a
46
+ * stale `signature`) or the redacted variant `{ type: 'redacted_thinking', data: '' }`.
47
+ */
48
+ const isCorruptedThinkingBlock = block => {
49
+ if (!block || typeof block !== 'object') return false;
50
+ if (block.type === 'thinking') return !block.thinking; // '' / undefined / null
51
+ if (block.type === 'redacted_thinking') return !block.data;
52
+ return false;
53
+ };
54
+
55
+ /**
56
+ * Strip corrupted (empty-text) thinking blocks from a Claude Code session transcript so the session
57
+ * can be resumed. Conservative and side-effect-safe:
58
+ * - never throws (returns a result object describing what happened);
59
+ * - only removes blocks whose thinking text is empty (legitimate signed thinking is untouched);
60
+ * - never empties an assistant message (if removing the blocks would leave a message with no
61
+ * content, that message is left exactly as-is);
62
+ * - writes a one-time backup (`<file>.pre-repair-backup`) before modifying the transcript.
63
+ *
64
+ * @param {object} opts
65
+ * @param {string} opts.tempDir - working directory the session ran in.
66
+ * @param {string} opts.sessionId - Claude Code session id.
67
+ * @param {string} [opts.homeDir] - override home dir (tests).
68
+ * @param {Function} [opts.log] - async logger.
69
+ * @returns {Promise<{ repaired: boolean, removedBlocks: number, scannedLines: number, sessionFile: string|null, reason?: string }>}
70
+ */
71
+ export const repairCorruptedThinkingBlocks = async ({ tempDir, sessionId, homeDir, log = async () => {} } = {}) => {
72
+ const result = { repaired: false, removedBlocks: 0, scannedLines: 0, sessionFile: null };
73
+ if (!tempDir || !sessionId) {
74
+ return { ...result, reason: 'missing tempDir or sessionId' };
75
+ }
76
+ const sessionFile = resolveSessionTranscriptPath(tempDir, sessionId, homeDir);
77
+ result.sessionFile = sessionFile;
78
+ let fileContent;
79
+ try {
80
+ fileContent = await fs.readFile(sessionFile, 'utf8');
81
+ } catch {
82
+ // No transcript on disk (e.g. fresh run never persisted, or path mismatch) — nothing to repair.
83
+ return { ...result, reason: 'session transcript not found' };
84
+ }
85
+
86
+ try {
87
+ const lines = fileContent.split('\n');
88
+ const out = [];
89
+ let removedBlocks = 0;
90
+ let scannedLines = 0;
91
+ for (const line of lines) {
92
+ if (!line.trim()) {
93
+ out.push(line);
94
+ continue;
95
+ }
96
+ scannedLines++;
97
+ let entry;
98
+ try {
99
+ entry = JSON.parse(line);
100
+ } catch {
101
+ out.push(line); // preserve anything we can't parse verbatim
102
+ continue;
103
+ }
104
+ const content = entry?.message?.content;
105
+ if (Array.isArray(content)) {
106
+ const corrupted = content.filter(isCorruptedThinkingBlock).length;
107
+ if (corrupted > 0) {
108
+ const cleaned = content.filter(b => !isCorruptedThinkingBlock(b));
109
+ // Never leave an assistant message with an empty content array (invalid for the API).
110
+ if (cleaned.length > 0) {
111
+ entry.message.content = cleaned;
112
+ removedBlocks += corrupted;
113
+ out.push(JSON.stringify(entry));
114
+ continue;
115
+ }
116
+ }
117
+ }
118
+ out.push(line);
119
+ }
120
+
121
+ result.scannedLines = scannedLines;
122
+ if (removedBlocks === 0) {
123
+ return { ...result, reason: 'no corrupted thinking blocks found' };
124
+ }
125
+
126
+ // Back up the original transcript exactly once before rewriting it.
127
+ const backupFile = `${sessionFile}.pre-repair-backup`;
128
+ try {
129
+ await fs.access(backupFile);
130
+ } catch {
131
+ try {
132
+ await fs.copyFile(sessionFile, backupFile);
133
+ } catch {
134
+ // Best effort — a missing backup must not block the repair.
135
+ }
136
+ }
137
+
138
+ await fs.writeFile(sessionFile, out.join('\n'), 'utf8');
139
+ result.repaired = true;
140
+ result.removedBlocks = removedBlocks;
141
+ await log(`🩹 Repaired session transcript: stripped ${removedBlocks} corrupted thinking block(s) from ${scannedLines} message line(s) (Issue #1834). Backup: ${backupFile}`, { verbose: true });
142
+ return result;
143
+ } catch (error) {
144
+ // Defensive: any unexpected failure degrades gracefully to "no repair" so the caller can fall
145
+ // back to a fresh restart.
146
+ return { ...result, reason: `repair failed: ${error?.message || error}` };
147
+ }
148
+ };
149
+
150
+ export default { repairCorruptedThinkingBlocks, resolveSessionTranscriptPath };
@@ -12,9 +12,12 @@
12
12
  //
13
13
  // PR #1835 feedback: "in case of this specific error we should try resume first, and if not possible
14
14
  // try to restart." Recovery is therefore a two-phase escalation:
15
- // Phase 1 — resume the existing session (context-preserving; occasionally the transcript is intact
16
- // enough to continue).
17
- // Phase 2resume unavailable or already failed discard the session and start fresh (`/clear`).
15
+ // Phase 1 — REPAIR the on-disk transcript (strip the corrupted empty-text thinking blocks) and
16
+ // resume the existing session (context-preserving). Plain resume of a poisoned
17
+ // transcript is futile the 400 just repeats so we first remove the offending blocks,
18
+ // which the API permits omitting. When repair succeeds the resume keeps all accumulated
19
+ // text/tool-use history (Issue #1834 "can we do even better?").
20
+ // Phase 2 — repair/resume unavailable or already failed → discard the session and start fresh.
18
21
  // On every attempt we first auto-commit any uncommitted work (Issue #1834 / PR #1835 feedback:
19
22
  // "on all critical errors we auto commit uncommitted changes by default") so nothing is lost when
20
23
  // the session context resets.
@@ -22,6 +25,7 @@
22
25
  import { retryLimits, criticalErrorRecovery } from './config.lib.mjs';
23
26
  import { waitWithCountdown } from './tool-retry.lib.mjs';
24
27
  import { commitUncommittedChangesOnCriticalError } from './critical-error-commit.lib.mjs';
28
+ import { repairCorruptedThinkingBlocks } from './claude.session-transcript-repair.lib.mjs';
25
29
 
26
30
  /**
27
31
  * Create a stateful corrupted-thinking-block recovery handler. The returned function persists its
@@ -36,11 +40,13 @@ import { commitUncommittedChangesOnCriticalError } from './critical-error-commit
36
40
  * @param {Function} ctx.$ - command-stream executor.
37
41
  * @param {Function} ctx.log - async logger.
38
42
  * @param {number} [ctx.waitMs=5000] - settle delay before re-running (overridable for tests).
43
+ * @param {Function} [ctx.repair=repairCorruptedThinkingBlocks] - transcript repair (injectable for tests).
44
+ * @param {string} [ctx.homeDir] - override home dir for transcript lookup (tests).
39
45
  * @returns {(opts: {classified: object, source: string, sessionId: string|null}) => Promise<boolean>}
40
46
  * Resolves true when a recovery attempt was initiated (caller should re-run); false when
41
47
  * both caps are exhausted (caller should fail).
42
48
  */
43
- export const createThinkingBlockRecovery = ({ argv, tempDir, branchName, $, log, waitMs = 5000 }) => {
49
+ export const createThinkingBlockRecovery = ({ argv, tempDir, branchName, $, log, waitMs = 5000, repair = repairCorruptedThinkingBlocks, homeDir }) => {
44
50
  let resumeCount = 0;
45
51
  let restartCount = 0;
46
52
  return async ({ classified, source, sessionId }) => {
@@ -49,11 +55,22 @@ export const createThinkingBlockRecovery = ({ argv, tempDir, branchName, $, log,
49
55
  await commitUncommittedChangesOnCriticalError({ tempDir, branchName, $, log, reason: `${classified.label} (${source})` });
50
56
  }
51
57
  };
52
- // Phase 1 — resume the existing session first (cheaper, keeps accumulated context).
58
+ // Phase 1 — repair the on-disk transcript, then resume (keeps accumulated context).
53
59
  if (sessionId && resumeCount < retryLimits.maxThinkingBlockResumes) {
54
60
  resumeCount++;
55
61
  await preserveWork();
56
- await log(`\n⚠️ ${classified.label} (${source}). Resume attempt ${resumeCount}/${retryLimits.maxThinkingBlockResumes} — trying to resume the existing session first before discarding it (Issue #1834)...`, { level: 'warning' });
62
+ await log(`\n⚠️ ${classified.label} (${source}). Resume attempt ${resumeCount}/${retryLimits.maxThinkingBlockResumes} — repairing the corrupted transcript then resuming the existing session before discarding it (Issue #1834)...`, { level: 'warning' });
63
+ // Strip the corrupted (empty-text) thinking blocks so resume isn't doomed to repeat the 400.
64
+ try {
65
+ const repairResult = await repair({ tempDir, sessionId, homeDir, log });
66
+ if (repairResult?.repaired) {
67
+ await log(` 🩹 Stripped ${repairResult.removedBlocks} corrupted thinking block(s) from the transcript — resume will preserve context (Issue #1834).`, { verbose: true });
68
+ } else {
69
+ await log(` ℹ️ Transcript repair made no change (${repairResult?.reason || 'unknown'}) — resuming as-is (Issue #1834).`, { verbose: true });
70
+ }
71
+ } catch {
72
+ // Repair must never block recovery — fall through to a plain resume attempt.
73
+ }
57
74
  argv.resume = sessionId;
58
75
  await waitWithCountdown(waitMs, log);
59
76
  await log('\n🔄 Resuming the session now...');