nothumanallowed 13.2.72 → 13.2.74

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nothumanallowed",
3
- "version": "13.2.72",
3
+ "version": "13.2.74",
4
4
  "description": "NotHumanAllowed — 38 AI agents, 80 tools, Studio (visual agentic workflows). Email, calendar, browser automation, screen capture, canvas, cron/heartbeat, Alexandria E2E messaging, GitHub, Notion, Slack, voice chat, free AI (Liara), 28 languages. Zero-dependency CLI.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -2852,35 +2852,73 @@ export async function cmdUI(args) {
2852
2852
 
2853
2853
  // ── Fetch REAL data for each agent type ──────────────────────
2854
2854
  if (agent === 'DocumentReaderAgent') {
2855
- // Extract text from attached PDF and return it as the step output.
2856
- // This becomes context for all subsequent steps (WebSearchAgent etc.)
2855
+ // Extract text from attached PDF, then ask the LLM to structure it cleanly.
2856
+ // The structured output becomes context for all subsequent steps.
2857
2857
  sendToken('[Reading attached document...] ');
2858
+ let rawText = '';
2858
2859
  if (stepPdfBase64) {
2859
2860
  try {
2860
2861
  const b64 = stepPdfBase64.includes(',') ? stepPdfBase64.split(',')[1] : stepPdfBase64;
2861
2862
  const pdfBuffer = Buffer.from(b64, 'base64');
2862
- const extracted = extractTextFromPdf(pdfBuffer);
2863
- if (extracted && extracted.length > 20) {
2864
- toolData = `## Document: ${stepPdfName || 'attached'}\n\n${extracted.slice(0, 20000)}`;
2865
- } else {
2866
- // Fallback: ask vision model to describe/OCR the document
2867
- sendToken('[No text found — using vision OCR...] ');
2863
+ rawText = extractTextFromPdf(pdfBuffer) || '';
2864
+ if (!rawText || rawText.length < 20) {
2865
+ // Scanned PDF use vision OCR
2866
+ sendToken('[No text layer — using vision OCR...] ');
2868
2867
  try {
2869
- const visionText = await callLLMVision(config, stepPdfBase64, 'application/pdf',
2870
- `Extract ALL text, technical specifications, model numbers, part codes, product names, manufacturer details, dimensions, ratings, and any other data from this document. List every detail exactly as printed.`);
2871
- toolData = `## Document (OCR): ${stepPdfName || 'attached'}\n\n${visionText}`;
2872
- } catch (ve) {
2873
- toolData = `Could not extract text from document: ${ve.message}`;
2874
- }
2868
+ rawText = await callLLMVision(config, stepPdfBase64, 'application/pdf',
2869
+ 'Extract ALL text from this document exactly as printed, preserving all numbers, codes, and values.');
2870
+ } catch (ve) { rawText = ''; }
2875
2871
  }
2876
- } catch (e) { toolData = `Document read failed: ${e.message}`; }
2877
- } else {
2878
- toolData = 'No document attached.';
2872
+ } catch (e) { rawText = ''; }
2873
+ }
2874
+ if (!rawText) {
2875
+ sendToken('Could not extract text from the attached document.');
2876
+ clearInterval(keepalive);
2877
+ sendEvent({ done: true, usage: { input: 0, output: 0 } });
2878
+ res.end();
2879
+ logRequest(method, pathname, 200, Date.now() - start);
2880
+ return;
2881
+ }
2882
+ // Ask LLM to structure the raw extracted text into readable markdown
2883
+ sendToken('[Structuring document content...] ');
2884
+ const LANG_MAP_DOC = {en:'English',it:'Italian',es:'Spanish',fr:'French',de:'German',pt:'Portuguese',zh:'Chinese',ja:'Japanese',ar:'Arabic',hi:'Hindi',ru:'Russian',nl:'Dutch',pl:'Polish',tr:'Turkish',ko:'Korean',sv:'Swedish',da:'Danish',fi:'Finnish',no:'Norwegian',cs:'Czech'};
2885
+ const docLang = LANG_MAP_DOC[(config?.language||'it').toLowerCase().slice(0,2)] || 'Italian';
2886
+ const docSys = `You are a technical document analyst. Extract and structure the content of this document into clear, readable markdown. Respond in ${docLang}.
2887
+ Rules:
2888
+ - List ALL technical specifications with their exact values (codes, voltages, pressures, temperatures, dimensions, flow rates, etc.)
2889
+ - Use markdown headers (##), bullet points (-), and tables where appropriate
2890
+ - Do NOT invent, interpret, or add anything not present in the raw text
2891
+ - Include all product/part codes exactly as written
2892
+ - Keep all numeric values with their units`;
2893
+ const docUser = `Here is the raw text extracted from "${stepPdfName || 'document.pdf'}". Structure it into clean, readable markdown:\n\n${rawText.slice(0, 18000)}`;
2894
+ let structuredOutput = '';
2895
+ let inThink = false;
2896
+ try {
2897
+ await withTimeout(
2898
+ callLLMStream(config, docSys, docUser,
2899
+ (token) => {
2900
+ // Strip <think> blocks
2901
+ let buf = token;
2902
+ if (inThink) {
2903
+ const ci = buf.indexOf('</think>');
2904
+ if (ci >= 0) { buf = buf.slice(ci + 8); inThink = false; }
2905
+ else return;
2906
+ }
2907
+ const oi = buf.indexOf('<think>');
2908
+ if (oi >= 0) { buf = buf.slice(0, oi); inThink = true; }
2909
+ if (buf) { structuredOutput += buf; sendToken(buf); }
2910
+ },
2911
+ { max_tokens: 3000 }
2912
+ ),
2913
+ 90000
2914
+ );
2915
+ } catch (e) {
2916
+ // LLM failed — fall back to raw text
2917
+ structuredOutput = `## ${stepPdfName || 'Document'}\n\n${rawText.slice(0, 8000)}`;
2918
+ sendToken(structuredOutput);
2879
2919
  }
2880
- // Stream the extracted content as the step output directly — no LLM rewrite needed
2881
- sendToken(toolData);
2882
2920
  clearInterval(keepalive);
2883
- sendEvent({ done: true, usage: { input: 0, output: Math.ceil(toolData.length / 4) } });
2921
+ sendEvent({ done: true, usage: { input: Math.ceil(rawText.length / 4), output: Math.ceil(structuredOutput.length / 4) } });
2884
2922
  res.end();
2885
2923
  logRequest(method, pathname, 200, Date.now() - start);
2886
2924
  return;
@@ -2909,22 +2947,55 @@ export async function cmdUI(args) {
2909
2947
  // Extract a concise search query from the step prompt
2910
2948
  let searchQuery = stepPrompt;
2911
2949
 
2912
- // If context contains extracted PDF data, extract the best search query from it:
2913
- // product codes, model numbers, part numbers etc. are better search terms than the task text.
2950
+ // If context contains extracted PDF/document data, build a smart search query.
2951
+ // Detect whether the task asks for "similar" products vs. exact product lookup.
2914
2952
  if (context && context.length > 50) {
2915
- // Look for product codes: alphanumeric codes, e.g. 321k63, VFD-001, 4WE6D6X
2916
- const codeMatch = context.match(/\b([A-Z0-9]{2,}[-\/]?[A-Z0-9]{2,}(?:[-\/][A-Z0-9]+)*)\b/g);
2917
- const productCodes = codeMatch ? [...new Set(codeMatch)].slice(0, 3) : [];
2918
- // Also grab manufacturer name if present
2919
- const mfrMatch = context.match(/(?:Marca|Marchio|Manufacturer|Brand|Produttore|Costruttore)[:\s]+([A-Za-z0-9 &]{2,40})/i);
2920
- const mfr = mfrMatch ? mfrMatch[1].trim() : '';
2921
- if (productCodes.length > 0) {
2922
- searchQuery = (mfr ? mfr + ' ' : '') + productCodes.join(' ') + ' buy acquista distributore';
2923
- sendToken(`[Search query from document: "${searchQuery}"] `);
2953
+ const taskLow2 = task.toLowerCase();
2954
+ const wantsSimilar = /simil|equivalen|alternativ|sostitut|find similar|alternative|replacement/i.test(taskLow2 + ' ' + stepPrompt);
2955
+
2956
+ // Extract manufacturer name from structured context
2957
+ const mfrMatch = context.match(/(?:Marca|Marchio|Manufacturer|Brand|Produttore|Costruttore|Parker|Bosch|Rexroth|SMC|Festo|Burkert|Norgren|Sirai|Asco)[:\s]*([A-Za-z0-9 &]{2,40})/i);
2958
+ const mfr = mfrMatch ? mfrMatch[1].trim().split(/\s+/).slice(0,2).join(' ') : '';
2959
+
2960
+ // Extract model/part code: 3-8 char alphanumeric, NOT common words/units
2961
+ const IGNORE = new Set(['NBR','FKM','RUBY','BSP','PDF','URL','HTTP','THE','AND','FOR','MIN','MAX','BAR','VAC','KV','AC','DC','CE','UK','EU','ISO','DIN']);
2962
+ const codeRx = /\b([A-Z][A-Z0-9]{2,7}(?:[-\/][A-Z0-9]{1,6})*)\b/g;
2963
+ const allCodes = [];
2964
+ let cm;
2965
+ while ((cm = codeRx.exec(context)) !== null) {
2966
+ const c = cm[1];
2967
+ if (!IGNORE.has(c) && c.length >= 4 && /\d/.test(c)) allCodes.push(c);
2968
+ }
2969
+ const productCode = [...new Set(allCodes)][0] || '';
2970
+
2971
+ // Extract key technical specs for similarity search
2972
+ const pressMatch = context.match(/(?:pressione|pressure|MOPD)[^\d]*(\d+)\s*bar/i);
2973
+ const pressure = pressMatch ? pressMatch[1] + ' bar' : '';
2974
+ const portMatch = context.match(/(?:porta|port|attacco|fitting)[^\d]*(\d+\/\d+[''"]?)/i);
2975
+ const port = portMatch ? portMatch[1] : '';
2976
+ const typeMatch = context.match(/(?:valvola|valve|solenoid)[^,\n]{0,30}?(2\/2|3\/2|pilot.operated|normally.closed|normally.open)/i);
2977
+ const valveType = typeMatch ? typeMatch[1] : '';
2978
+
2979
+ if (wantsSimilar) {
2980
+ // Task wants equivalent/similar product — search by specs, not by exact code
2981
+ const specParts = [
2982
+ valveType || 'solenoid valve',
2983
+ pressure,
2984
+ port,
2985
+ 'hydraulic oil',
2986
+ 'acquista distributore Italy'
2987
+ ].filter(Boolean);
2988
+ searchQuery = specParts.join(' ');
2989
+ if (mfr && mfr.toLowerCase() !== productCode.toLowerCase()) searchQuery = mfr + ' ' + searchQuery;
2990
+ sendToken(`[Search query (similar product): "${searchQuery}"] `);
2991
+ } else if (productCode) {
2992
+ // Task wants the exact product — search by code + manufacturer
2993
+ searchQuery = (mfr ? mfr + ' ' : '') + productCode + ' acquista distributore';
2994
+ sendToken(`[Search query (exact product): "${searchQuery}"] `);
2924
2995
  }
2925
2996
  }
2926
2997
 
2927
- if (searchQuery.length > 120 && !context) {
2998
+ if (!context && searchQuery.length > 120) {
2928
2999
  const keywordMatch = searchQuery.match(/(?:cerca|search|find|ricerca|notizie su|news about|latest on|aggiornamenti su)\s+(.{5,80}?)(?:\s+(?:e|and|per|for|poi|then)|$)/i);
2929
3000
  if (keywordMatch) {
2930
3001
  searchQuery = keywordMatch[1].trim();
@@ -2946,10 +3017,29 @@ export async function cmdUI(args) {
2946
3017
  }
2947
3018
  } catch {}
2948
3019
  }
2949
- // Deep web search for broader context
3020
+ // Primary search
2950
3021
  const searchResult = await withTimeout(executeTool('web_search', { query: searchQuery, deep: true }, config), 25000);
2951
3022
  const searchStr = typeof searchResult === 'string' ? searchResult : JSON.stringify(searchResult);
2952
3023
  toolData += (toolData ? '\n\n' : '') + `## Web search results for "${searchQuery}":\n${searchStr}`;
3024
+
3025
+ // Secondary search: if task mentions "similar" or "where to buy", run a second query for vendors
3026
+ const taskLow3 = task.toLowerCase();
3027
+ if (/simil|equivalen|alternativ|sostitut|rivendit|distributore|vend|acquist|compra|dove\s+trovare|where\s+to\s+buy/i.test(taskLow3) && context && context.length > 50) {
3028
+ // Build vendor search query from manufacturer + product type
3029
+ const mfrVendor = context.match(/(?:Parker|Bosch|Rexroth|SMC|Festo|Burkert|Norgren|Sirai|Asco)\s+[A-Za-z]*/i);
3030
+ const mfrName = mfrVendor ? mfrVendor[0].trim() : '';
3031
+ const typeVendor = context.match(/(?:valvola|valve|solenoid|elettrovalvola)[^,\n]{0,30}?(2\/2|pilot.operated|normally.closed)/i);
3032
+ const typeStr = typeVendor ? typeVendor[1] : 'solenoid valve';
3033
+ const pressVendor = context.match(/(\d+)\s*bar/i);
3034
+ const pressStr = pressVendor ? pressVendor[1] + ' bar' : '';
3035
+ const vendorQuery = [mfrName, typeStr, pressStr, 'rivenditore distributore acquisto'].filter(Boolean).join(' ');
3036
+ sendToken(`[Search query (vendor/similar): "${vendorQuery}"] `);
3037
+ try {
3038
+ const vendorResult = await withTimeout(executeTool('web_search', { query: vendorQuery, deep: true }, config), 20000);
3039
+ const vendorStr = typeof vendorResult === 'string' ? vendorResult : JSON.stringify(vendorResult);
3040
+ toolData += `\n\n## Vendor search results for "${vendorQuery}":\n${vendorStr}`;
3041
+ } catch {}
3042
+ }
2953
3043
  } catch (e) { toolData = toolData || `Web search failed: ${e.message}`; }
2954
3044
 
2955
3045
  } else if (agent === 'BrowserAgent') {
package/src/constants.mjs CHANGED
@@ -5,7 +5,7 @@ import { fileURLToPath } from 'url';
5
5
  const __filename = fileURLToPath(import.meta.url);
6
6
  const __dirname = path.dirname(__filename);
7
7
 
8
- export const VERSION = '13.2.72';
8
+ export const VERSION = '13.2.74';
9
9
  export const BASE_URL = 'https://nothumanallowed.com/cli';
10
10
  export const API_BASE = 'https://nothumanallowed.com/api/v1';
11
11
 
@@ -3367,7 +3367,11 @@ function renderStudioLog() {
3367
3367
  el.scrollTop = el.scrollHeight;
3368
3368
  }
3369
3369
 
3370
+ var _downloadPdfLast = 0;
3370
3371
  function downloadStudioPDF() {
3372
+ var now = Date.now();
3373
+ if (now - _downloadPdfLast < 3000) return; // debounce: max 1 download every 3s
3374
+ _downloadPdfLast = now;
3371
3375
  var task = studioState.task || 'NHA Studio Report';
3372
3376
  var today = new Date().toLocaleDateString('it-IT', {day:'2-digit',month:'2-digit',year:'numeric'});
3373
3377
  var nodes = studioState.nodes || [];