npm - @link-assistant/hive-mind - Versions diffs - 1.31.4 → 1.32.1 - Mend

@link-assistant/hive-mind 1.31.4 → 1.32.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/CHANGELOG.md +19 -0
package/package.json +1 -1
package/src/agent.lib.mjs +5 -4
package/src/claude.lib.mjs +3 -2
package/src/codex.lib.mjs +2 -1
package/src/github-merge.lib.mjs +1 -3
package/src/interactive-mode.lib.mjs +24 -5
package/src/opencode.lib.mjs +3 -2
package/src/solve.mjs +4 -10
package/src/telegram-bot.mjs +23 -61
package/src/telegram-message-filters.lib.mjs +45 -0
package/src/unicode-sanitization.lib.mjs +67 -0

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,24 @@
 # @link-assistant/hive-mind
+## 1.32.1
+### Patch Changes
+- 2f710dd: fix: sanitize orphaned UTF-16 surrogates across all CLI output parsing paths (Issue #1324)
+  Extract `sanitizeUnicode()` and `sanitizeObjectStrings()` into a shared `unicode-sanitization.lib.mjs` module and apply sanitization in all CLI output parsing paths — `claude.lib.mjs`, `agent.lib.mjs`, `codex.lib.mjs`, `opencode.lib.mjs`, and `interactive-mode.lib.mjs`. This ensures orphaned UTF-16 surrogates (from Claude CLI's `<persisted-output>` truncation) are replaced with U+FFFD before any JSON re-serialization, logging, or API calls. Add 62 unit tests covering surrogate edge cases, real-world Claude NDJSON events, and JSON round-trip safety.
+## 1.32.0
+### Minor Changes
+- b2c94db: Support all options via /solve command when replying to a message containing a GitHub link (issue #1325)
+  Previously, `/solve` as a reply only worked when used without any arguments. Now users can reply to a message containing a GitHub issue/PR link with `/solve --model opus` or any other options, and the bot will:
+  1. Extract the GitHub URL from the replied message
+  2. Use the provided options
+  3. Execute the solve command with both the extracted URL and the user-provided options
 ## 1.31.4
 ### Patch Changes

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@link-assistant/hive-mind",
-  "version": "1.31.4",
+  "version": "1.32.1",
   "description": "AI-powered issue solver and hive mind for collaborative problem solving",
   "main": "src/hive.mjs",
   "type": "module",

package/src/agent.lib.mjs CHANGED Viewed

@@ -17,6 +17,7 @@ import { log } from './lib.mjs';
 import { reportError } from './sentry.lib.mjs';
 import { timeouts } from './config.lib.mjs';
 import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
+import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
 // Import pricing functions from claude.lib.mjs
 // We reuse fetchModelInfo and checkModelVisionCapability to get data from models.dev API
@@ -47,7 +48,7 @@ export const parseAgentTokenUsage = output => {
     if (!trimmedLine || !trimmedLine.startsWith('{')) continue;
     try {
-      const parsed = JSON.parse(trimmedLine);
+      const parsed = sanitizeObjectStrings(JSON.parse(trimmedLine));
       // Look for step_finish events which contain token usage
       if (parsed.type === 'step_finish' && parsed.part?.tokens) {
@@ -615,7 +616,7 @@ export const executeAgentCommand = async params => {
           for (const line of lines) {
             if (!line.trim()) continue;
             try {
-              const data = JSON.parse(line);
+              const data = sanitizeObjectStrings(JSON.parse(line));
               // Output formatted JSON
               await log(JSON.stringify(data, null, 2));
               // Capture session ID from the first message
@@ -689,7 +690,7 @@ export const executeAgentCommand = async params => {
             for (const stderrLine of stderrLines) {
               if (!stderrLine.trim()) continue;
               try {
-                const stderrData = JSON.parse(stderrLine);
+                const stderrData = sanitizeObjectStrings(JSON.parse(stderrLine));
                 // Output formatted JSON (same formatting as stdout)
                 await log(JSON.stringify(stderrData, null, 2));
                 // Capture session ID from stderr too (agent sends it via stderr)
@@ -767,7 +768,7 @@ export const executeAgentCommand = async params => {
           if (!line.trim()) continue;
           try {
-            const msg = JSON.parse(line);
+            const msg = sanitizeObjectStrings(JSON.parse(line));
             // Check for explicit error message types from agent
             if (msg.type === 'error' || msg.type === 'step_error') {

package/src/claude.lib.mjs CHANGED Viewed

@@ -12,6 +12,7 @@ import { reportError } from './sentry.lib.mjs';
 import { timeouts, retryLimits, claudeCode, getClaudeEnv, getThinkingLevelToTokens, getTokensToThinkingLevel, supportsThinkingBudget, DEFAULT_MAX_THINKING_BUDGET, getMaxOutputTokensForModel } from './config.lib.mjs';
 import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
 import { createInteractiveHandler } from './interactive-mode.lib.mjs';
+import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
 import { displayBudgetStats } from './claude.budget-stats.lib.mjs';
 import { buildClaudeResumeCommand } from './claude.command-builder.lib.mjs';
 import { handleClaudeRuntimeSwitch } from './claude.runtime-switch.lib.mjs'; // see issue #1141
@@ -974,7 +975,7 @@ export const executeClaudeCommand = async params => {
           for (const line of lines) {
             if (!line.trim()) continue;
             try {
-              const data = JSON.parse(line);
+              const data = sanitizeObjectStrings(JSON.parse(line));
               // Process event in interactive mode
               if (interactiveHandler) {
                 try {
@@ -1153,7 +1154,7 @@ export const executeClaudeCommand = async params => {
       // Issue #1183: Process remaining buffer content - extract cost from result type if present
       if (stdoutLineBuffer.trim()) {
         try {
-          const data = JSON.parse(stdoutLineBuffer);
+          const data = sanitizeObjectStrings(JSON.parse(stdoutLineBuffer));
           await log(JSON.stringify(data, null, 2));
           if (data.type === 'result' && data.subtype === 'success' && data.total_cost_usd != null) {
             anthropicTotalCostUSD = data.total_cost_usd;

package/src/codex.lib.mjs CHANGED Viewed

@@ -17,6 +17,7 @@ import { log } from './lib.mjs';
 import { reportError } from './sentry.lib.mjs';
 import { timeouts } from './config.lib.mjs';
 import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
+import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
 // Model mapping to translate aliases to full model IDs for Codex
 export const mapModelToId = model => {
@@ -303,7 +304,7 @@ export const executeCodexCommand = async params => {
             const lines = output.split('\n');
             for (const line of lines) {
               if (!line.trim()) continue;
-              const data = JSON.parse(line);
+              const data = sanitizeObjectStrings(JSON.parse(line));
               // Check for both thread_id (codex) and session_id (legacy)
               if ((data.thread_id || data.session_id) && !sessionId) {
                 sessionId = data.thread_id || data.session_id;

package/src/github-merge.lib.mjs CHANGED Viewed

@@ -516,9 +516,7 @@ export async function checkMergePermissions(owner, repo, verbose = false) {
  * @param {string} repo - Repository name
  * @param {number} prNumber - Pull request number
  * @param {Object} options - Merge options
- * @param {string} options.mergeMethod - Merge method: 'merge', 'squash', or 'rebase' (default: 'merge')
- *                                       Note: Must specify one method when running non-interactively.
- *                                       See Issue #1269 for details.
+ * @param {string} options.mergeMethod - Merge method: 'merge', 'squash', or 'rebase' (default: 'merge'). Must specify one method non-interactively (Issue #1269).
  * @param {boolean} options.squash - DEPRECATED: Use mergeMethod: 'squash' instead
  * @param {boolean} options.deleteAfter - Whether to delete branch after merge (default: false)
  * @param {boolean} verbose - Whether to log verbose output

package/src/interactive-mode.lib.mjs CHANGED Viewed

@@ -42,16 +42,26 @@ const CONFIG = {
   MAX_JSON_DEPTH: 10,
 };
+// Import sanitizeUnicode from the shared module so that the same logic is used
+// everywhere: in the interactive-mode PR-comment path and in the regular
+// Claude output parsing path (claude.lib.mjs).
+// See: https://github.com/link-assistant/hive-mind/issues/1324
+import { sanitizeUnicode } from './unicode-sanitization.lib.mjs';
 /**
  * Truncate content in the middle, keeping start and end
  * This helps show context while reducing size for large outputs
  *
+ * The result is always passed through sanitizeUnicode() so that a truncation
+ * point that falls inside a UTF-16 surrogate pair never produces invalid JSON.
+ * See: https://github.com/link-assistant/hive-mind/issues/1324
+ *
  * @param {string} content - Content to potentially truncate
  * @param {Object} options - Truncation options
  * @param {number} [options.maxLines=50] - Maximum lines before truncation
  * @param {number} [options.keepStart=20] - Lines to keep at start
  * @param {number} [options.keepEnd=20] - Lines to keep at end
- * @returns {string} Truncated content with ellipsis indicator
+ * @returns {string} Truncated, Unicode-sanitized content with ellipsis indicator
  */
 const truncateMiddle = (content, options = {}) => {
   const { maxLines = CONFIG.MAX_LINES_BEFORE_TRUNCATION, keepStart = CONFIG.LINES_TO_KEEP_START, keepEnd = CONFIG.LINES_TO_KEEP_END } = options;
@@ -62,22 +72,27 @@ const truncateMiddle = (content, options = {}) => {
   const lines = content.split('\n');
   if (lines.length <= maxLines) {
-    return content;
+    return sanitizeUnicode(content);
   }
   const startLines = lines.slice(0, keepStart);
   const endLines = lines.slice(-keepEnd);
   const removedCount = lines.length - keepStart - keepEnd;
-  return [...startLines, '', `... [${removedCount} lines truncated] ...`, '', ...endLines].join('\n');
+  return sanitizeUnicode([...startLines, '', `... [${removedCount} lines truncated] ...`, '', ...endLines].join('\n'));
 };
 /**
- * Safely stringify JSON with depth limit and circular reference handling
+ * Safely stringify JSON with depth limit and circular reference handling.
+ * String values are passed through sanitizeUnicode() so that orphaned UTF-16
+ * surrogates (which can appear after persisted-output truncation) never reach
+ * JSON.stringify() and cause a 400 API error.
+ *
+ * @see https://github.com/link-assistant/hive-mind/issues/1324
  *
  * @param {any} obj - Object to stringify
  * @param {number} [indent=2] - Indentation spaces
- * @returns {string} Formatted JSON string
+ * @returns {string} Formatted JSON string with sanitized Unicode
  */
 const safeJsonStringify = (obj, indent = 2) => {
   const seen = new WeakSet();
@@ -90,6 +105,9 @@ const safeJsonStringify = (obj, indent = 2) => {
         }
         seen.add(value);
       }
+      if (typeof value === 'string') {
+        return sanitizeUnicode(value);
+      }
       return value;
     },
     indent
@@ -954,6 +972,7 @@ export const validateInteractiveModeConfig = async (argv, log) => {
 // Export utilities for testing
 export const utils = {
+  sanitizeUnicode,
   truncateMiddle,
   safeJsonStringify,
   createCollapsible,

package/src/opencode.lib.mjs CHANGED Viewed

@@ -17,6 +17,7 @@ import { log } from './lib.mjs';
 import { reportError } from './sentry.lib.mjs';
 import { timeouts } from './config.lib.mjs';
 import { detectUsageLimit, formatUsageLimitMessage } from './usage-limit.lib.mjs';
+import { sanitizeObjectStrings } from './unicode-sanitization.lib.mjs';
 // Model mapping to translate aliases to full model IDs for OpenCode
 export const mapModelToId = model => {
@@ -322,7 +323,7 @@ export const executeOpenCodeCommand = async params => {
             const lines = output.split('\n');
             for (const line of lines) {
               if (!line.trim()) continue;
-              const data = JSON.parse(line);
+              const data = sanitizeObjectStrings(JSON.parse(line));
               // Track text content for result summary
               // OpenCode outputs text via 'text', 'assistant', 'message', or 'result' type events
               if (data.type === 'text' && data.text) {
@@ -364,7 +365,7 @@ export const executeOpenCodeCommand = async params => {
               const lines = errorOutput.split('\n');
               for (const line of lines) {
                 if (!line.trim()) continue;
-                const data = JSON.parse(line);
+                const data = sanitizeObjectStrings(JSON.parse(line));
                 if (data.type === 'text' && data.text) {
                   lastTextContent = data.text;
                 } else if (data.type === 'assistant' && data.message?.content) {

package/src/solve.mjs CHANGED Viewed

@@ -98,12 +98,10 @@ const { validateAndExitOnInvalidModel } = modelValidation;
 const acceptInviteLib = await import('./solve.accept-invite.lib.mjs');
 const { autoAcceptInviteForRepo } = acceptInviteLib;
-// Initialize log file EARLY to capture all output including version and command
-// Use default directory (cwd) initially, will be set from argv.logDir after parsing
+// Initialize log file EARLY (use cwd initially, will be updated after argv parsing)
 const logFile = await initializeLogFile(null);
-// Log version and raw command IMMEDIATELY after log file initialization
-// This ensures they appear in both console and log file, even if argument parsing fails
+// Log version and raw command IMMEDIATELY after log file initialization (ensures they appear even if parsing fails)
 const versionInfo = await getVersionInfo();
 await log('');
 await log(`🚀 solve v${versionInfo}`);
@@ -221,9 +219,7 @@ if (!(await validateContinueOnlyOnFeedback(argv, isPrUrl, isIssueUrl))) {
 const tool = argv.tool || 'claude';
 await validateAndExitOnInvalidModel(argv.model, tool, safeExit);
-// Perform all system checks using validation module
-// Skip tool CONNECTION validation in dry-run mode or when --skip-tool-connection-check or --no-tool-connection-check is enabled
-// Note: This does NOT skip model validation which is performed above
+// Perform all system checks (skip tool connection check in dry-run or when --skip-tool-connection-check; model validation always runs)
 const skipToolConnectionCheck = argv.dryRun || argv.skipToolConnectionCheck || argv.toolConnectionCheck === false;
 if (!(await performSystemChecks(argv.minDiskSpace || 2048, skipToolConnectionCheck, argv.model, argv))) {
   await safeExit(1, 'System checks failed');
@@ -236,9 +232,7 @@ if (argv.verbose) {
   await log(`   Is PR URL: ${!!isPrUrl}`, { verbose: true });
 }
 const claudePath = argv.executeToolWithBun ? 'bunx claude' : process.env.CLAUDE_PATH || 'claude';
-// Note: owner, repo, and urlNumber are already extracted from validateGitHubUrl() above
-// The parseUrlComponents() call was removed as it had a bug with hash fragments (#issuecomment-xyz)
-// and the validation result already provides these values correctly parsed
+// Note: owner, repo, and urlNumber are extracted from validateGitHubUrl() above (parseUrlComponents() removed due to hash fragment bug)
 // Handle --auto-fork option: automatically fork public repositories without write access
 if (argv.autoFork && !argv.fork) {

package/src/telegram-bot.mjs CHANGED Viewed

@@ -52,7 +52,7 @@ const { formatUsageMessage, getAllCachedLimits } = await import('./limits.lib.mj
 const { getVersionInfo, formatVersionMessage } = await import('./version-info.lib.mjs');
 const { escapeMarkdown, escapeMarkdownV2, cleanNonPrintableChars, makeSpecialCharsVisible } = await import('./telegram-markdown.lib.mjs');
 const { getSolveQueue, createQueueExecuteCallback } = await import('./telegram-solve-queue.lib.mjs');
-const { isOldMessage: _isOldMessage, isGroupChat: _isGroupChat, isChatAuthorized: _isChatAuthorized, isForwardedOrReply: _isForwardedOrReply, extractCommandFromText } = await import('./telegram-message-filters.lib.mjs');
+const { isOldMessage: _isOldMessage, isGroupChat: _isGroupChat, isChatAuthorized: _isChatAuthorized, isForwardedOrReply: _isForwardedOrReply, extractCommandFromText, extractGitHubUrl: _extractGitHubUrl } = await import('./telegram-message-filters.lib.mjs');
 // Import bot launcher with exponential backoff retry (issue #1240)
 const { launchBotWithRetry } = await import('./telegram-bot-launcher.lib.mjs');
@@ -313,10 +313,6 @@ function isOldMessage(ctx) {
   return _isOldMessage(ctx, BOT_START_TIME, { verbose: VERBOSE });
 }
-function isGroupChat(ctx) {
-  return _isGroupChat(ctx);
-}
 function isForwardedOrReply(ctx) {
   return _isForwardedOrReply(ctx, { verbose: VERBOSE });
 }
@@ -596,46 +592,6 @@ async function executeAndUpdateMessage(ctx, startingMessage, commandName, args,
   }
 }
-/**
- * Extract GitHub issue/PR URL from message text
- * Validates that message contains exactly one GitHub issue/PR link
- *
- * @param {string} text - Message text to search
- * @returns {{ url: string|null, error: string|null, linkCount: number }}
- */
-function extractGitHubUrl(text) {
-  if (!text || typeof text !== 'string') {
-    return { url: null, error: null, linkCount: 0 };
-  }
-  text = cleanNonPrintableChars(text); // Clean non-printable chars before processing
-  const words = text.split(/\s+/);
-  const foundUrls = [];
-  for (const word of words) {
-    // Try to parse as GitHub URL
-    const parsed = parseGitHubUrl(word);
-    // Accept issue or PR URLs
-    if (parsed.valid && (parsed.type === 'issue' || parsed.type === 'pull')) {
-      foundUrls.push(parsed.normalized);
-    }
-  }
-  // Check if multiple links were found
-  if (foundUrls.length === 0) {
-    return { url: null, error: null, linkCount: 0 };
-  } else if (foundUrls.length === 1) {
-    return { url: foundUrls[0], error: null, linkCount: 1 };
-  } else {
-    return {
-      url: null,
-      error: `Found ${foundUrls.length} GitHub links in the message. Please reply to a message with only one GitHub issue or PR link.`,
-      linkCount: foundUrls.length,
-    };
-  }
-}
 bot.command('help', async ctx => {
   if (VERBOSE) {
     console.log('[VERBOSE] /help command received');
@@ -760,7 +716,7 @@ bot.command('limits', async ctx => {
     return;
   }
-  if (!isGroupChat(ctx)) {
+  if (!_isGroupChat(ctx)) {
     if (VERBOSE) {
       console.log('[VERBOSE] /limits ignored: not a group chat');
     }
@@ -809,7 +765,7 @@ bot.command('version', async ctx => {
     data: { chatId: ctx.chat?.id, chatType: ctx.chat?.type, userId: ctx.from?.id, username: ctx.from?.username },
   });
   if (isOldMessage(ctx) || isForwardedOrReply(ctx)) return;
-  if (!isGroupChat(ctx)) return await ctx.reply('❌ The /version command only works in group chats. Please add this bot to a group and make it an admin.', { reply_to_message_id: ctx.message.message_id });
+  if (!_isGroupChat(ctx)) return await ctx.reply('❌ The /version command only works in group chats. Please add this bot to a group and make it an admin.', { reply_to_message_id: ctx.message.message_id });
   const chatId = ctx.chat.id;
   if (!isChatAuthorized(chatId)) return await ctx.reply(`❌ This chat (ID: ${chatId}) is not authorized to use this bot. Please contact the bot administrator.`, { reply_to_message_id: ctx.message.message_id });
   const fetchingMessage = await ctx.reply('🔄 Gathering version information...', {
@@ -827,7 +783,7 @@ registerAcceptInvitesCommand(bot, {
   VERBOSE,
   isOldMessage,
   isForwardedOrReply,
-  isGroupChat,
+  isGroupChat: _isGroupChat,
   isChatAuthorized,
   addBreadcrumb,
 });
@@ -838,7 +794,7 @@ registerMergeCommand(bot, {
   VERBOSE,
   isOldMessage,
   isForwardedOrReply,
-  isGroupChat,
+  isGroupChat: _isGroupChat,
   isChatAuthorized,
   addBreadcrumb,
 });
@@ -849,7 +805,7 @@ const { handleSolveQueueCommand } = registerSolveQueueCommand(bot, {
   VERBOSE,
   isOldMessage,
   isForwardedOrReply,
-  isGroupChat,
+  isGroupChat: _isGroupChat,
   isChatAuthorized,
   addBreadcrumb,
   getSolveQueue,
@@ -903,7 +859,7 @@ async function handleSolveCommand(ctx) {
     return;
   }
-  if (!isGroupChat(ctx)) {
+  if (!_isGroupChat(ctx)) {
     if (VERBOSE) {
       console.log('[VERBOSE] /solve ignored: not a group chat');
     }
@@ -926,17 +882,23 @@ async function handleSolveCommand(ctx) {
   let userArgs = parseCommandArgs(ctx.message.text);
-  // Check if this is a reply to a message and user didn't provide URL
+  // Check if this is a reply to a message and user didn't provide URL as first argument
   // In that case, try to extract GitHub URL from the replied message
+  // Issue #1325: Support all options via /solve command when replying (e.g., "/solve --model opus")
   const isReply = message.reply_to_message && message.reply_to_message.message_id && !message.reply_to_message.forum_topic_created;
-  if (isReply && userArgs.length === 0) {
+  // Check if the first argument looks like a GitHub URL
+  // If not, we should try to extract the URL from the replied message
+  const firstArgIsUrl = userArgs.length > 0 && (userArgs[0].includes('github.com') || userArgs[0].match(/^https?:\/\//));
+  if (isReply && !firstArgIsUrl) {
     if (VERBOSE) {
-      console.log('[VERBOSE] /solve is a reply without URL, extracting from replied message...');
+      console.log('[VERBOSE] /solve is a reply without URL in args, extracting from replied message...');
+      console.log('[VERBOSE] User args:', userArgs);
     }
     const replyText = message.reply_to_message.text || '';
-    const extraction = extractGitHubUrl(replyText);
+    const extraction = _extractGitHubUrl(replyText, { parseGitHubUrl, cleanNonPrintableChars });
     if (extraction.error) {
       // Multiple links found
@@ -949,18 +911,18 @@ async function handleSolveCommand(ctx) {
       });
       return;
     } else if (extraction.url) {
-      // Single link found
+      // Single link found - prepend it to existing user args (issue #1325)
       if (VERBOSE) {
         console.log('[VERBOSE] Extracted URL from reply:', extraction.url);
       }
-      // Add the extracted URL as the first argument
-      userArgs = [extraction.url];
+      // Prepend the extracted URL to user's options (e.g., ['--model', 'opus'] -> ['url', '--model', 'opus'])
+      userArgs = [extraction.url, ...userArgs];
     } else {
       // No link found
       if (VERBOSE) {
         console.log('[VERBOSE] No GitHub URL found in replied message');
       }
-      await ctx.reply('❌ No GitHub issue/PR link found in the replied message.\n\nExample: Reply to a message containing a GitHub issue link with `/solve`', { parse_mode: 'Markdown', reply_to_message_id: ctx.message.message_id });
+      await ctx.reply('❌ No GitHub issue/PR link found in the replied message.\n\nExample: Reply to a message containing a GitHub issue link with `/solve`\n\nOr with options: `/solve --model opus`', { parse_mode: 'Markdown', reply_to_message_id: ctx.message.message_id });
       return;
     }
   }
@@ -1113,7 +1075,7 @@ async function handleHiveCommand(ctx) {
     return;
   }
-  if (!isGroupChat(ctx)) {
+  if (!_isGroupChat(ctx)) {
     if (VERBOSE) {
       console.log('[VERBOSE] /hive ignored: not a group chat');
     }
@@ -1217,7 +1179,7 @@ registerTopCommand(bot, {
   VERBOSE,
   isOldMessage,
   isForwardedOrReply,
-  isGroupChat,
+  isGroupChat: _isGroupChat,
   isChatAuthorized,
 });

package/src/telegram-message-filters.lib.mjs CHANGED Viewed

@@ -171,3 +171,48 @@ export function extractCommandFromText(text, botUsername = null) {
   return { command, botMention };
 }
+/**
+ * Extract GitHub issue/PR URL from message text.
+ * Validates that message contains exactly one GitHub issue/PR link.
+ * Extracted from telegram-bot.mjs to reduce file size (issue #1325).
+ *
+ * @param {string} text - Message text to search
+ * @param {Object} deps - Dependencies for parsing
+ * @param {Function} deps.parseGitHubUrl - Function to parse GitHub URLs
+ * @param {Function} deps.cleanNonPrintableChars - Function to clean non-printable characters
+ * @returns {{ url: string|null, error: string|null, linkCount: number }}
+ * @see https://github.com/link-assistant/hive-mind/issues/1325
+ */
+export function extractGitHubUrl(text, { parseGitHubUrl, cleanNonPrintableChars }) {
+  if (!text || typeof text !== 'string') {
+    return { url: null, error: null, linkCount: 0 };
+  }
+  text = cleanNonPrintableChars(text); // Clean non-printable chars before processing
+  const words = text.split(/\s+/);
+  const foundUrls = [];
+  for (const word of words) {
+    // Try to parse as GitHub URL
+    const parsed = parseGitHubUrl(word);
+    // Accept issue or PR URLs
+    if (parsed.valid && (parsed.type === 'issue' || parsed.type === 'pull')) {
+      foundUrls.push(parsed.normalized);
+    }
+  }
+  // Check if multiple links were found
+  if (foundUrls.length === 0) {
+    return { url: null, error: null, linkCount: 0 };
+  } else if (foundUrls.length === 1) {
+    return { url: foundUrls[0], error: null, linkCount: 1 };
+  } else {
+    return {
+      url: null,
+      error: `Found ${foundUrls.length} GitHub links in the message. Please reply to a message with only one GitHub issue or PR link.`,
+      linkCount: foundUrls.length,
+    };
+  }
+}

package/src/unicode-sanitization.lib.mjs ADDED Viewed

@@ -0,0 +1,67 @@
+/**
+ * Unicode Sanitization Utility
+ *
+ * Provides functions to sanitize orphaned UTF-16 surrogates from strings.
+ * When Claude Code's <persisted-output> truncation splits a surrogate pair,
+ * the orphaned high surrogate (e.g. \uD83E without \uDD16) causes
+ * JSON.stringify() to produce invalid JSON that the Anthropic API rejects:
+ *
+ *   API Error: 400 {"type":"error","error":{"type":"invalid_request_error",
+ *   "message":"The request body is not valid JSON: no low surrogate in string..."}}
+ *
+ * This module is used by both the regular Claude output parsing path
+ * (claude.lib.mjs) and the interactive mode PR comment path
+ * (interactive-mode.lib.mjs) to ensure all text is valid before
+ * JSON serialization or external API calls.
+ *
+ * @see https://github.com/link-assistant/hive-mind/issues/1324
+ * @see https://www.rfc-editor.org/rfc/rfc8259#section-7
+ * @module unicode-sanitization
+ */
+/**
+ * Replace every orphaned UTF-16 surrogate with the Unicode replacement
+ * character U+FFFD. A "well-formed" string never contains:
+ *   - A high surrogate (U+D800–U+DBFF) not immediately followed by a low surrogate (U+DC00–U+DFFF)
+ *   - A low surrogate (U+DC00–U+DFFF) not immediately preceded by a high surrogate
+ *
+ * @param {string} text - Input string that may contain orphaned surrogates
+ * @returns {string} String with every orphaned surrogate replaced by U+FFFD
+ */
+export const sanitizeUnicode = text => {
+  if (!text || typeof text !== 'string') {
+    return text || '';
+  }
+  // Regex explanation:
+  //   [\uD800-\uDBFF](?![\uDC00-\uDFFF])  — high surrogate not followed by low surrogate
+  //   |
+  //   (?<![\uD800-\uDBFF])[\uDC00-\uDFFF] — low surrogate not preceded by high surrogate
+  return text.replace(/[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/g, '\uFFFD');
+};
+/**
+ * Recursively sanitize all string values in an object/array.
+ * This is useful for sanitizing parsed JSON objects from Claude CLI output
+ * before they are re-serialized or processed.
+ *
+ * @param {any} value - Value to sanitize (strings are sanitized, objects/arrays are traversed)
+ * @returns {any} The value with all string leaves sanitized
+ */
+export const sanitizeObjectStrings = value => {
+  if (typeof value === 'string') {
+    return sanitizeUnicode(value);
+  }
+  if (Array.isArray(value)) {
+    return value.map(sanitizeObjectStrings);
+  }
+  if (typeof value === 'object' && value !== null) {
+    const result = {};
+    for (const [key, val] of Object.entries(value)) {
+      result[key] = sanitizeObjectStrings(val);
+    }
+    return result;
+  }
+  return value;
+};
+export default { sanitizeUnicode, sanitizeObjectStrings };