npm - nothumanallowed - Versions diffs - 13.2.41 → 13.2.42 - Mend

nothumanallowed 13.2.41 → 13.2.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nothumanallowed",
-  "version": "13.2.41",
+  "version": "13.2.42",
   "description": "NotHumanAllowed — 38 AI agents, 80 tools, Studio (visual agentic workflows). Email, calendar, browser automation, screen capture, canvas, cron/heartbeat, Alexandria E2E messaging, GitHub, Notion, Slack, voice chat, free AI (Liara), 28 languages. Zero-dependency CLI.",
   "type": "module",
   "bin": {

package/src/constants.mjs CHANGED Viewed

@@ -5,7 +5,7 @@ import { fileURLToPath } from 'url';
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
-export const VERSION = '13.2.41';
+export const VERSION = '13.2.42';
 export const BASE_URL = 'https://nothumanallowed.com/cli';
 export const API_BASE = 'https://nothumanallowed.com/api/v1';

package/src/services/llm.mjs CHANGED Viewed

@@ -521,7 +521,7 @@ export async function callLLMStream(config, systemPrompt, userMessage, onToken,
         { role: 'system', content: sanitize(systemPrompt) },
         { role: 'user', content: sanitize(userMessage) },
       ],
-      stream: true,
+      stream: false,
       chat_template_kwargs: { enable_thinking: thinkingEnabled },
     };
     const nhaRes = await fetch('https://nothumanallowed.com/api/v1/liara/chat', {
@@ -533,10 +533,13 @@ export async function callLLMStream(config, systemPrompt, userMessage, onToken,
       const err = await nhaRes.text();
       throw new Error(`NHA Free ${nhaRes.status}: ${err}`);
     }
-    // Node.js native fetch ReadableStream closes after first TCP buffer for SSE.
-    // Use res.text() to get the full response, then parse SSE lines synchronously.
-    const rawText = await nhaRes.text();
-    return parseSSEText(rawText, 'openai', onToken);
+    // Non-streaming: vLLM returns complete text — no BPE subword splitting issues
+    const nhaJson = await nhaRes.json();
+    let fullNhaText = nhaJson.choices?.[0]?.message?.content || '';
+    // Strip <think>...</think> blocks
+    fullNhaText = fullNhaText.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
+    if (onToken) onToken(fullNhaText);
+    return fullNhaText;
   }
   const format = provider === 'anthropic' ? 'anthropic' : 'openai';
@@ -648,6 +651,7 @@ function parseSSEText(text, format, onToken) {
   let thinkBuf = '';
   let inThink = false;
   let isHtmlOutput = false;
+  let chunkCount = 0;
   for (const line of text.split('\n')) {
     if (!line.startsWith('data: ')) continue;
@@ -681,6 +685,8 @@ function parseSSEText(text, format, onToken) {
           }
         }
         if (out) {
+          chunkCount++;
+          if (chunkCount <= 3) process.stderr.write(`[QWEN3 CHUNK ${chunkCount}] len=${out.length} repr=${JSON.stringify(out.slice(0,60))}\n`);
           // Detect HTML output on first meaningful token
           if (!isHtmlOutput && (out.includes('<div') || out.includes('<!DOCTYPE') || out.includes('<html'))) {
             isHtmlOutput = true;
@@ -698,7 +704,7 @@ function parseSSEText(text, format, onToken) {
       }
     } catch {}
   }
+  process.stderr.write(`[QWEN3 TOTAL CHUNKS] ${chunkCount}, fullText len=${fullText.length}\n`);
   return fullText;
 }