npm - @link-assistant/hive-mind - Versions diffs - 1.32.0 → 1.32.1 - Mend

@link-assistant/hive-mind 1.32.0 → 1.32.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/CHANGELOG.md +8 -0
package/package.json +1 -1
package/src/agent.lib.mjs +5 -4
package/src/claude.lib.mjs +3 -2
package/src/codex.lib.mjs +2 -1
package/src/interactive-mode.lib.mjs +24 -5
package/src/opencode.lib.mjs +3 -2
package/src/unicode-sanitization.lib.mjs +67 -0

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,13 @@
 # @link-assistant/hive-mind
+## 1.32.1
+### Patch Changes
+- 2f710dd: fix: sanitize orphaned UTF-16 surrogates across all CLI output parsing paths (Issue #1324)
+  Extract `sanitizeUnicode()` and `sanitizeObjectStrings()` into a shared `unicode-sanitization.lib.mjs` module and apply sanitization in all CLI output parsing paths — `claude.lib.mjs`, `agent.lib.mjs`, `codex.lib.mjs`, `opencode.lib.mjs`, and `interactive-mode.lib.mjs`. This ensures orphaned UTF-16 surrogates (from Claude CLI's `<persisted-output>` truncation) are replaced with U+FFFD before any JSON re-serialization, logging, or API calls. Add 62 unit tests covering surrogate edge cases, real-world Claude NDJSON events, and JSON round-trip safety.
 ## 1.32.0
 ### Minor Changes

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@link-assistant/hive-mind",
-  "version": "1.32.0",
+  "version": "1.32.1",
   "description": "AI-powered issue solver and hive mind for collaborative problem solving",
   "main": "src/hive.mjs",
   "type": "module",

package/src/agent.lib.mjs CHANGED Viewed

@@ -17,6 +17,7 @@ import { log } from './lib.mjs';
 import { reportError } from './sentry.lib.mjs';
 import { timeouts } from './config.lib.mjs';
 import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
+import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
 // Import pricing functions from claude.lib.mjs
 // We reuse fetchModelInfo and checkModelVisionCapability to get data from models.dev API
@@ -47,7 +48,7 @@ export const parseAgentTokenUsage = output => {
     if (!trimmedLine || !trimmedLine.startsWith('{')) continue;
     try {
-      const parsed = JSON.parse(trimmedLine);
+      const parsed = sanitizeObjectStrings(JSON.parse(trimmedLine));
       // Look for step_finish events which contain token usage
       if (parsed.type === 'step_finish' && parsed.part?.tokens) {
@@ -615,7 +616,7 @@ export const executeAgentCommand = async params => {
           for (const line of lines) {
             if (!line.trim()) continue;
             try {
-              const data = JSON.parse(line);
+              const data = sanitizeObjectStrings(JSON.parse(line));
               // Output formatted JSON
               await log(JSON.stringify(data, null, 2));
               // Capture session ID from the first message
@@ -689,7 +690,7 @@ export const executeAgentCommand = async params => {
             for (const stderrLine of stderrLines) {
               if (!stderrLine.trim()) continue;
               try {
-                const stderrData = JSON.parse(stderrLine);
+                const stderrData = sanitizeObjectStrings(JSON.parse(stderrLine));
                 // Output formatted JSON (same formatting as stdout)
                 await log(JSON.stringify(stderrData, null, 2));
                 // Capture session ID from stderr too (agent sends it via stderr)
@@ -767,7 +768,7 @@ export const executeAgentCommand = async params => {
           if (!line.trim()) continue;
           try {
-            const msg = JSON.parse(line);
+            const msg = sanitizeObjectStrings(JSON.parse(line));
             // Check for explicit error message types from agent
             if (msg.type === 'error' || msg.type === 'step_error') {

package/src/claude.lib.mjs CHANGED Viewed

@@ -12,6 +12,7 @@ import { reportError } from './sentry.lib.mjs';
 import { timeouts, retryLimits, claudeCode, getClaudeEnv, getThinkingLevelToTokens, getTokensToThinkingLevel, supportsThinkingBudget, DEFAULT_MAX_THINKING_BUDGET, getMaxOutputTokensForModel } from './config.lib.mjs';
 import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
 import { createInteractiveHandler } from './interactive-mode.lib.mjs';
+import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
 import { displayBudgetStats } from './claude.budget-stats.lib.mjs';
 import { buildClaudeResumeCommand } from './claude.command-builder.lib.mjs';
 import { handleClaudeRuntimeSwitch } from './claude.runtime-switch.lib.mjs'; // see issue #1141
@@ -974,7 +975,7 @@ export const executeClaudeCommand = async params => {
           for (const line of lines) {
             if (!line.trim()) continue;
             try {
-              const data = JSON.parse(line);
+              const data = sanitizeObjectStrings(JSON.parse(line));
               // Process event in interactive mode
               if (interactiveHandler) {
                 try {
@@ -1153,7 +1154,7 @@ export const executeClaudeCommand = async params => {
       // Issue #1183: Process remaining buffer content - extract cost from result type if present
       if (stdoutLineBuffer.trim()) {
         try {
-          const data = JSON.parse(stdoutLineBuffer);
+          const data = sanitizeObjectStrings(JSON.parse(stdoutLineBuffer));
           await log(JSON.stringify(data, null, 2));
           if (data.type === 'result' && data.subtype === 'success' && data.total_cost_usd != null) {
             anthropicTotalCostUSD = data.total_cost_usd;

package/src/codex.lib.mjs CHANGED Viewed

@@ -17,6 +17,7 @@ import { log } from './lib.mjs';
 import { reportError } from './sentry.lib.mjs';
 import { timeouts } from './config.lib.mjs';
 import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
+import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
 // Model mapping to translate aliases to full model IDs for Codex
 export const mapModelToId = model => {
@@ -303,7 +304,7 @@ export const executeCodexCommand = async params => {
             const lines = output.split('\n');
             for (const line of lines) {
               if (!line.trim()) continue;
-              const data = JSON.parse(line);
+              const data = sanitizeObjectStrings(JSON.parse(line));
               // Check for both thread_id (codex) and session_id (legacy)
               if ((data.thread_id || data.session_id) && !sessionId) {
                 sessionId = data.thread_id || data.session_id;

package/src/interactive-mode.lib.mjs CHANGED Viewed

@@ -42,16 +42,26 @@ const CONFIG = {
   MAX_JSON_DEPTH: 10,
 };
+// Import sanitizeUnicode from the shared module so that the same logic is used
+// everywhere: in the interactive-mode PR-comment path and in the regular
+// Claude output parsing path (claude.lib.mjs).
+// See: https://github.com/link-assistant/hive-mind/issues/1324
+import { sanitizeUnicode } from './unicode-sanitization.lib.mjs';
 /**
  * Truncate content in the middle, keeping start and end
  * This helps show context while reducing size for large outputs
  *
+ * The result is always passed through sanitizeUnicode() so that a truncation
+ * point that falls inside a UTF-16 surrogate pair never produces invalid JSON.
+ * See: https://github.com/link-assistant/hive-mind/issues/1324
+ *
  * @param {string} content - Content to potentially truncate
  * @param {Object} options - Truncation options
  * @param {number} [options.maxLines=50] - Maximum lines before truncation
  * @param {number} [options.keepStart=20] - Lines to keep at start
  * @param {number} [options.keepEnd=20] - Lines to keep at end
- * @returns {string} Truncated content with ellipsis indicator
+ * @returns {string} Truncated, Unicode-sanitized content with ellipsis indicator
  */
 const truncateMiddle = (content, options = {}) => {
   const { maxLines = CONFIG.MAX_LINES_BEFORE_TRUNCATION, keepStart = CONFIG.LINES_TO_KEEP_START, keepEnd = CONFIG.LINES_TO_KEEP_END } = options;
@@ -62,22 +72,27 @@ const truncateMiddle = (content, options = {}) => {
   const lines = content.split('\n');
   if (lines.length <= maxLines) {
-    return content;
+    return sanitizeUnicode(content);
   }
   const startLines = lines.slice(0, keepStart);
   const endLines = lines.slice(-keepEnd);
   const removedCount = lines.length - keepStart - keepEnd;
-  return [...startLines, '', `... [${removedCount} lines truncated] ...`, '', ...endLines].join('\n');
+  return sanitizeUnicode([...startLines, '', `... [${removedCount} lines truncated] ...`, '', ...endLines].join('\n'));
 };
 /**
- * Safely stringify JSON with depth limit and circular reference handling
+ * Safely stringify JSON with depth limit and circular reference handling.
+ * String values are passed through sanitizeUnicode() so that orphaned UTF-16
+ * surrogates (which can appear after persisted-output truncation) never reach
+ * JSON.stringify() and cause a 400 API error.
+ *
+ * @see https://github.com/link-assistant/hive-mind/issues/1324
  *
  * @param {any} obj - Object to stringify
  * @param {number} [indent=2] - Indentation spaces
- * @returns {string} Formatted JSON string
+ * @returns {string} Formatted JSON string with sanitized Unicode
  */
 const safeJsonStringify = (obj, indent = 2) => {
   const seen = new WeakSet();
@@ -90,6 +105,9 @@ const safeJsonStringify = (obj, indent = 2) => {
         }
         seen.add(value);
       }
+      if (typeof value === 'string') {
+        return sanitizeUnicode(value);
+      }
       return value;
     },
     indent
@@ -954,6 +972,7 @@ export const validateInteractiveModeConfig = async (argv, log) => {
 // Export utilities for testing
 export const utils = {
+  sanitizeUnicode,
   truncateMiddle,
   safeJsonStringify,
   createCollapsible,

package/src/opencode.lib.mjs CHANGED Viewed

@@ -17,6 +17,7 @@ import { log } from './lib.mjs';
 import { reportError } from './sentry.lib.mjs';
 import { timeouts } from './config.lib.mjs';
 import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
+import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
 // Model mapping to translate aliases to full model IDs for OpenCode
 export const mapModelToId = model => {
@@ -322,7 +323,7 @@ export const executeOpenCodeCommand = async params => {
             const lines = output.split('\n');
             for (const line of lines) {
               if (!line.trim()) continue;
-              const data = JSON.parse(line);
+              const data = sanitizeObjectStrings(JSON.parse(line));
               // Track text content for result summary
               // OpenCode outputs text via 'text', 'assistant', 'message', or 'result' type events
               if (data.type === 'text' && data.text) {
@@ -364,7 +365,7 @@ export const executeOpenCodeCommand = async params => {
               const lines = errorOutput.split('\n');
               for (const line of lines) {
                 if (!line.trim()) continue;
-                const data = JSON.parse(line);
+                const data = sanitizeObjectStrings(JSON.parse(line));
                 if (data.type === 'text' && data.text) {
                   lastTextContent = data.text;
                 } else if (data.type === 'assistant' && data.message?.content) {

package/src/unicode-sanitization.lib.mjs ADDED Viewed

@@ -0,0 +1,67 @@
+/**
+ * Unicode Sanitization Utility
+ *
+ * Provides functions to sanitize orphaned UTF-16 surrogates from strings.
+ * When Claude Code's <persisted-output> truncation splits a surrogate pair,
+ * the orphaned high surrogate (e.g. \uD83E without \uDD16) causes
+ * JSON.stringify() to produce invalid JSON that the Anthropic API rejects:
+ *
+ *   API Error: 400 {"type":"error","error":{"type":"invalid_request_error",
+ *   "message":"The request body is not valid JSON: no low surrogate in string..."}}
+ *
+ * This module is used by both the regular Claude output parsing path
+ * (claude.lib.mjs) and the interactive mode PR comment path
+ * (interactive-mode.lib.mjs) to ensure all text is valid before
+ * JSON serialization or external API calls.
+ *
+ * @see https://github.com/link-assistant/hive-mind/issues/1324
+ * @see https://www.rfc-editor.org/rfc/rfc8259#section-7
+ * @module unicode-sanitization
+ */
+/**
+ * Replace every orphaned UTF-16 surrogate with the Unicode replacement
+ * character U+FFFD. A "well-formed" string never contains:
+ *   - A high surrogate (U+D800–U+DBFF) not immediately followed by a low surrogate (U+DC00–U+DFFF)
+ *   - A low surrogate (U+DC00–U+DFFF) not immediately preceded by a high surrogate
+ *
+ * @param {string} text - Input string that may contain orphaned surrogates
+ * @returns {string} String with every orphaned surrogate replaced by U+FFFD
+ */
+export const sanitizeUnicode = text => {
+  if (!text || typeof text !== 'string') {
+    return text || '';
+  }
+  // Regex explanation:
+  //   [\uD800-\uDBFF](?![\uDC00-\uDFFF])  — high surrogate not followed by low surrogate
+  //   |
+  //   (?<![\uD800-\uDBFF])[\uDC00-\uDFFF] — low surrogate not preceded by high surrogate
+  return text.replace(/[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/g, '\uFFFD');
+};
+/**
+ * Recursively sanitize all string values in an object/array.
+ * This is useful for sanitizing parsed JSON objects from Claude CLI output
+ * before they are re-serialized or processed.
+ *
+ * @param {any} value - Value to sanitize (strings are sanitized, objects/arrays are traversed)
+ * @returns {any} The value with all string leaves sanitized
+ */
+export const sanitizeObjectStrings = value => {
+  if (typeof value === 'string') {
+    return sanitizeUnicode(value);
+  }
+  if (Array.isArray(value)) {
+    return value.map(sanitizeObjectStrings);
+  }
+  if (typeof value === 'object' && value !== null) {
+    const result = {};
+    for (const [key, val] of Object.entries(value)) {
+      result[key] = sanitizeObjectStrings(val);
+    }
+    return result;
+  }
+  return value;
+};
+export default { sanitizeUnicode, sanitizeObjectStrings };