@link-assistant/hive-mind 1.32.0 → 1.32.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # @link-assistant/hive-mind
2
2
 
3
+ ## 1.32.1
4
+
5
+ ### Patch Changes
6
+
7
+ - 2f710dd: fix: sanitize orphaned UTF-16 surrogates across all CLI output parsing paths (Issue #1324)
8
+
9
+ Extract `sanitizeUnicode()` and `sanitizeObjectStrings()` into a shared `unicode-sanitization.lib.mjs` module and apply sanitization in all CLI output parsing paths — `claude.lib.mjs`, `agent.lib.mjs`, `codex.lib.mjs`, `opencode.lib.mjs`, and `interactive-mode.lib.mjs`. This ensures orphaned UTF-16 surrogates (from Claude CLI's `<persisted-output>` truncation) are replaced with U+FFFD before any JSON re-serialization, logging, or API calls. Add 62 unit tests covering surrogate edge cases, real-world Claude NDJSON events, and JSON round-trip safety.
10
+
3
11
  ## 1.32.0
4
12
 
5
13
  ### Minor Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/hive-mind",
3
- "version": "1.32.0",
3
+ "version": "1.32.1",
4
4
  "description": "AI-powered issue solver and hive mind for collaborative problem solving",
5
5
  "main": "src/hive.mjs",
6
6
  "type": "module",
package/src/agent.lib.mjs CHANGED
@@ -17,6 +17,7 @@ import { log } from './lib.mjs';
17
17
  import { reportError } from './sentry.lib.mjs';
18
18
  import { timeouts } from './config.lib.mjs';
19
19
  import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
20
+ import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
20
21
 
21
22
  // Import pricing functions from claude.lib.mjs
22
23
  // We reuse fetchModelInfo and checkModelVisionCapability to get data from models.dev API
@@ -47,7 +48,7 @@ export const parseAgentTokenUsage = output => {
47
48
  if (!trimmedLine || !trimmedLine.startsWith('{')) continue;
48
49
 
49
50
  try {
50
- const parsed = JSON.parse(trimmedLine);
51
+ const parsed = sanitizeObjectStrings(JSON.parse(trimmedLine));
51
52
 
52
53
  // Look for step_finish events which contain token usage
53
54
  if (parsed.type === 'step_finish' && parsed.part?.tokens) {
@@ -615,7 +616,7 @@ export const executeAgentCommand = async params => {
615
616
  for (const line of lines) {
616
617
  if (!line.trim()) continue;
617
618
  try {
618
- const data = JSON.parse(line);
619
+ const data = sanitizeObjectStrings(JSON.parse(line));
619
620
  // Output formatted JSON
620
621
  await log(JSON.stringify(data, null, 2));
621
622
  // Capture session ID from the first message
@@ -689,7 +690,7 @@ export const executeAgentCommand = async params => {
689
690
  for (const stderrLine of stderrLines) {
690
691
  if (!stderrLine.trim()) continue;
691
692
  try {
692
- const stderrData = JSON.parse(stderrLine);
693
+ const stderrData = sanitizeObjectStrings(JSON.parse(stderrLine));
693
694
  // Output formatted JSON (same formatting as stdout)
694
695
  await log(JSON.stringify(stderrData, null, 2));
695
696
  // Capture session ID from stderr too (agent sends it via stderr)
@@ -767,7 +768,7 @@ export const executeAgentCommand = async params => {
767
768
  if (!line.trim()) continue;
768
769
 
769
770
  try {
770
- const msg = JSON.parse(line);
771
+ const msg = sanitizeObjectStrings(JSON.parse(line));
771
772
 
772
773
  // Check for explicit error message types from agent
773
774
  if (msg.type === 'error' || msg.type === 'step_error') {
@@ -12,6 +12,7 @@ import { reportError } from './sentry.lib.mjs';
12
12
  import { timeouts, retryLimits, claudeCode, getClaudeEnv, getThinkingLevelToTokens, getTokensToThinkingLevel, supportsThinkingBudget, DEFAULT_MAX_THINKING_BUDGET, getMaxOutputTokensForModel } from './config.lib.mjs';
13
13
  import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
14
14
  import { createInteractiveHandler } from './interactive-mode.lib.mjs';
15
+ import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
15
16
  import { displayBudgetStats } from './claude.budget-stats.lib.mjs';
16
17
  import { buildClaudeResumeCommand } from './claude.command-builder.lib.mjs';
17
18
  import { handleClaudeRuntimeSwitch } from './claude.runtime-switch.lib.mjs'; // see issue #1141
@@ -974,7 +975,7 @@ export const executeClaudeCommand = async params => {
974
975
  for (const line of lines) {
975
976
  if (!line.trim()) continue;
976
977
  try {
977
- const data = JSON.parse(line);
978
+ const data = sanitizeObjectStrings(JSON.parse(line));
978
979
  // Process event in interactive mode
979
980
  if (interactiveHandler) {
980
981
  try {
@@ -1153,7 +1154,7 @@ export const executeClaudeCommand = async params => {
1153
1154
  // Issue #1183: Process remaining buffer content - extract cost from result type if present
1154
1155
  if (stdoutLineBuffer.trim()) {
1155
1156
  try {
1156
- const data = JSON.parse(stdoutLineBuffer);
1157
+ const data = sanitizeObjectStrings(JSON.parse(stdoutLineBuffer));
1157
1158
  await log(JSON.stringify(data, null, 2));
1158
1159
  if (data.type === 'result' && data.subtype === 'success' && data.total_cost_usd != null) {
1159
1160
  anthropicTotalCostUSD = data.total_cost_usd;
package/src/codex.lib.mjs CHANGED
@@ -17,6 +17,7 @@ import { log } from './lib.mjs';
17
17
  import { reportError } from './sentry.lib.mjs';
18
18
  import { timeouts } from './config.lib.mjs';
19
19
  import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
20
+ import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
20
21
 
21
22
  // Model mapping to translate aliases to full model IDs for Codex
22
23
  export const mapModelToId = model => {
@@ -303,7 +304,7 @@ export const executeCodexCommand = async params => {
303
304
  const lines = output.split('\n');
304
305
  for (const line of lines) {
305
306
  if (!line.trim()) continue;
306
- const data = JSON.parse(line);
307
+ const data = sanitizeObjectStrings(JSON.parse(line));
307
308
  // Check for both thread_id (codex) and session_id (legacy)
308
309
  if ((data.thread_id || data.session_id) && !sessionId) {
309
310
  sessionId = data.thread_id || data.session_id;
@@ -42,16 +42,26 @@ const CONFIG = {
42
42
  MAX_JSON_DEPTH: 10,
43
43
  };
44
44
 
45
+ // Import sanitizeUnicode from the shared module so that the same logic is used
46
+ // everywhere: in the interactive-mode PR-comment path and in the regular
47
+ // Claude output parsing path (claude.lib.mjs).
48
+ // See: https://github.com/link-assistant/hive-mind/issues/1324
49
+ import { sanitizeUnicode } from './unicode-sanitization.lib.mjs';
50
+
45
51
  /**
46
52
  * Truncate content in the middle, keeping start and end
47
53
  * This helps show context while reducing size for large outputs
48
54
  *
55
+ * The result is always passed through sanitizeUnicode() so that a truncation
56
+ * point that falls inside a UTF-16 surrogate pair never produces invalid JSON.
57
+ * See: https://github.com/link-assistant/hive-mind/issues/1324
58
+ *
49
59
  * @param {string} content - Content to potentially truncate
50
60
  * @param {Object} options - Truncation options
51
61
  * @param {number} [options.maxLines=50] - Maximum lines before truncation
52
62
  * @param {number} [options.keepStart=20] - Lines to keep at start
53
63
  * @param {number} [options.keepEnd=20] - Lines to keep at end
54
- * @returns {string} Truncated content with ellipsis indicator
64
+ * @returns {string} Truncated, Unicode-sanitized content with ellipsis indicator
55
65
  */
56
66
  const truncateMiddle = (content, options = {}) => {
57
67
  const { maxLines = CONFIG.MAX_LINES_BEFORE_TRUNCATION, keepStart = CONFIG.LINES_TO_KEEP_START, keepEnd = CONFIG.LINES_TO_KEEP_END } = options;
@@ -62,22 +72,27 @@ const truncateMiddle = (content, options = {}) => {
62
72
 
63
73
  const lines = content.split('\n');
64
74
  if (lines.length <= maxLines) {
65
- return content;
75
+ return sanitizeUnicode(content);
66
76
  }
67
77
 
68
78
  const startLines = lines.slice(0, keepStart);
69
79
  const endLines = lines.slice(-keepEnd);
70
80
  const removedCount = lines.length - keepStart - keepEnd;
71
81
 
72
- return [...startLines, '', `... [${removedCount} lines truncated] ...`, '', ...endLines].join('\n');
82
+ return sanitizeUnicode([...startLines, '', `... [${removedCount} lines truncated] ...`, '', ...endLines].join('\n'));
73
83
  };
74
84
 
75
85
  /**
76
- * Safely stringify JSON with depth limit and circular reference handling
86
+ * Safely stringify JSON with depth limit and circular reference handling.
87
+ * String values are passed through sanitizeUnicode() so that orphaned UTF-16
88
+ * surrogates (which can appear after persisted-output truncation) never reach
89
+ * JSON.stringify() and cause a 400 API error.
90
+ *
91
+ * @see https://github.com/link-assistant/hive-mind/issues/1324
77
92
  *
78
93
  * @param {any} obj - Object to stringify
79
94
  * @param {number} [indent=2] - Indentation spaces
80
- * @returns {string} Formatted JSON string
95
+ * @returns {string} Formatted JSON string with sanitized Unicode
81
96
  */
82
97
  const safeJsonStringify = (obj, indent = 2) => {
83
98
  const seen = new WeakSet();
@@ -90,6 +105,9 @@ const safeJsonStringify = (obj, indent = 2) => {
90
105
  }
91
106
  seen.add(value);
92
107
  }
108
+ if (typeof value === 'string') {
109
+ return sanitizeUnicode(value);
110
+ }
93
111
  return value;
94
112
  },
95
113
  indent
@@ -954,6 +972,7 @@ export const validateInteractiveModeConfig = async (argv, log) => {
954
972
 
955
973
  // Export utilities for testing
956
974
  export const utils = {
975
+ sanitizeUnicode,
957
976
  truncateMiddle,
958
977
  safeJsonStringify,
959
978
  createCollapsible,
@@ -17,6 +17,7 @@ import { log } from './lib.mjs';
17
17
  import { reportError } from './sentry.lib.mjs';
18
18
  import { timeouts } from './config.lib.mjs';
19
19
  import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
20
+ import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
20
21
 
21
22
  // Model mapping to translate aliases to full model IDs for OpenCode
22
23
  export const mapModelToId = model => {
@@ -322,7 +323,7 @@ export const executeOpenCodeCommand = async params => {
322
323
  const lines = output.split('\n');
323
324
  for (const line of lines) {
324
325
  if (!line.trim()) continue;
325
- const data = JSON.parse(line);
326
+ const data = sanitizeObjectStrings(JSON.parse(line));
326
327
  // Track text content for result summary
327
328
  // OpenCode outputs text via 'text', 'assistant', 'message', or 'result' type events
328
329
  if (data.type === 'text' && data.text) {
@@ -364,7 +365,7 @@ export const executeOpenCodeCommand = async params => {
364
365
  const lines = errorOutput.split('\n');
365
366
  for (const line of lines) {
366
367
  if (!line.trim()) continue;
367
- const data = JSON.parse(line);
368
+ const data = sanitizeObjectStrings(JSON.parse(line));
368
369
  if (data.type === 'text' && data.text) {
369
370
  lastTextContent = data.text;
370
371
  } else if (data.type === 'assistant' && data.message?.content) {
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Unicode Sanitization Utility
3
+ *
4
+ * Provides functions to sanitize orphaned UTF-16 surrogates from strings.
5
+ * When Claude Code's <persisted-output> truncation splits a surrogate pair,
6
+ * the orphaned high surrogate (e.g. \uD83E without \uDD16) causes
7
+ * JSON.stringify() to produce invalid JSON that the Anthropic API rejects:
8
+ *
9
+ * API Error: 400 {"type":"error","error":{"type":"invalid_request_error",
10
+ * "message":"The request body is not valid JSON: no low surrogate in string..."}}
11
+ *
12
+ * This module is used by both the regular Claude output parsing path
13
+ * (claude.lib.mjs) and the interactive mode PR comment path
14
+ * (interactive-mode.lib.mjs) to ensure all text is valid before
15
+ * JSON serialization or external API calls.
16
+ *
17
+ * @see https://github.com/link-assistant/hive-mind/issues/1324
18
+ * @see https://www.rfc-editor.org/rfc/rfc8259#section-7
19
+ * @module unicode-sanitization
20
+ */
21
+
22
+ /**
23
+ * Replace every orphaned UTF-16 surrogate with the Unicode replacement
24
+ * character U+FFFD. A "well-formed" string never contains:
25
+ * - A high surrogate (U+D800–U+DBFF) not immediately followed by a low surrogate (U+DC00–U+DFFF)
26
+ * - A low surrogate (U+DC00–U+DFFF) not immediately preceded by a high surrogate
27
+ *
28
+ * @param {string} text - Input string that may contain orphaned surrogates
29
+ * @returns {string} String with every orphaned surrogate replaced by U+FFFD
30
+ */
31
+ export const sanitizeUnicode = text => {
32
+ if (!text || typeof text !== 'string') {
33
+ return text || '';
34
+ }
35
+ // Regex explanation:
36
+ // [\uD800-\uDBFF](?![\uDC00-\uDFFF]) — high surrogate not followed by low surrogate
37
+ // |
38
+ // (?<![\uD800-\uDBFF])[\uDC00-\uDFFF] — low surrogate not preceded by high surrogate
39
+ return text.replace(/[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/g, '\uFFFD');
40
+ };
41
+
42
+ /**
43
+ * Recursively sanitize all string values in an object/array.
44
+ * This is useful for sanitizing parsed JSON objects from Claude CLI output
45
+ * before they are re-serialized or processed.
46
+ *
47
+ * @param {any} value - Value to sanitize (strings are sanitized, objects/arrays are traversed)
48
+ * @returns {any} The value with all string leaves sanitized
49
+ */
50
+ export const sanitizeObjectStrings = value => {
51
+ if (typeof value === 'string') {
52
+ return sanitizeUnicode(value);
53
+ }
54
+ if (Array.isArray(value)) {
55
+ return value.map(sanitizeObjectStrings);
56
+ }
57
+ if (typeof value === 'object' && value !== null) {
58
+ const result = {};
59
+ for (const [key, val] of Object.entries(value)) {
60
+ result[key] = sanitizeObjectStrings(val);
61
+ }
62
+ return result;
63
+ }
64
+ return value;
65
+ };
66
+
67
+ export default { sanitizeUnicode, sanitizeObjectStrings };