npm - notoken-core - Versions diffs - 1.6.0 → 2.0.0 - Mend

notoken-core 1.6.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

package/config/chat-responses.json +767 -0
package/config/concept-clusters.json +31 -0
package/config/entities.json +93 -0
package/config/image-prompts.json +20 -0
package/config/intent-vectors.json +1 -0
package/config/intents.json +4946 -83
package/config/ollama-models.json +193 -0
package/config/rules.json +32 -1
package/dist/automation/discordPatchright.d.ts +35 -0
package/dist/automation/discordPatchright.js +424 -0
package/dist/automation/discordSetup.d.ts +31 -0
package/dist/automation/discordSetup.js +338 -0
package/dist/conversation/coreference.js +44 -4
package/dist/conversation/pendingActions.d.ts +55 -0
package/dist/conversation/pendingActions.js +127 -0
package/dist/conversation/store.d.ts +72 -0
package/dist/conversation/store.js +140 -1
package/dist/conversation/topicTracker.d.ts +36 -0
package/dist/conversation/topicTracker.js +141 -0
package/dist/execution/ssh.d.ts +42 -1
package/dist/execution/ssh.js +532 -3
package/dist/handlers/executor.js +3981 -16
package/dist/index.d.ts +25 -3
package/dist/index.js +36 -2
package/dist/nlp/batchParser.d.ts +30 -0
package/dist/nlp/batchParser.js +77 -0
package/dist/nlp/conceptExpansion.d.ts +54 -0
package/dist/nlp/conceptExpansion.js +136 -0
package/dist/nlp/conceptRouter.d.ts +49 -0
package/dist/nlp/conceptRouter.js +302 -0
package/dist/nlp/confidenceCalibrator.d.ts +62 -0
package/dist/nlp/confidenceCalibrator.js +116 -0
package/dist/nlp/correctionLearner.d.ts +45 -0
package/dist/nlp/correctionLearner.js +207 -0
package/dist/nlp/entitySpellCorrect.d.ts +35 -0
package/dist/nlp/entitySpellCorrect.js +141 -0
package/dist/nlp/knowledgeGraph.d.ts +70 -0
package/dist/nlp/knowledgeGraph.js +380 -0
package/dist/nlp/llmFallback.js +28 -1
package/dist/nlp/multiClassifier.js +91 -6
package/dist/nlp/multiIntent.d.ts +43 -0
package/dist/nlp/multiIntent.js +154 -0
package/dist/nlp/parseIntent.d.ts +6 -1
package/dist/nlp/parseIntent.js +180 -5
package/dist/nlp/ruleParser.js +315 -0
package/dist/nlp/semanticSimilarity.d.ts +30 -0
package/dist/nlp/semanticSimilarity.js +174 -0
package/dist/nlp/vocabularyBuilder.d.ts +43 -0
package/dist/nlp/vocabularyBuilder.js +224 -0
package/dist/nlp/wikidata.d.ts +49 -0
package/dist/nlp/wikidata.js +228 -0
package/dist/policy/confirm.d.ts +10 -0
package/dist/policy/confirm.js +39 -0
package/dist/policy/safety.js +6 -4
package/dist/utils/aliases.d.ts +5 -0
package/dist/utils/aliases.js +39 -0
package/dist/utils/analysis.js +71 -15
package/dist/utils/browser.d.ts +64 -0
package/dist/utils/browser.js +364 -0
package/dist/utils/commandHistory.d.ts +20 -0
package/dist/utils/commandHistory.js +108 -0
package/dist/utils/completer.d.ts +17 -0
package/dist/utils/completer.js +79 -0
package/dist/utils/config.js +32 -2
package/dist/utils/dbQuery.d.ts +25 -0
package/dist/utils/dbQuery.js +248 -0
package/dist/utils/discordDiag.d.ts +35 -0
package/dist/utils/discordDiag.js +826 -0
package/dist/utils/diskCleanup.d.ts +36 -0
package/dist/utils/diskCleanup.js +775 -0
package/dist/utils/entityResolver.d.ts +107 -0
package/dist/utils/entityResolver.js +468 -0
package/dist/utils/imageGen.d.ts +92 -0
package/dist/utils/imageGen.js +2031 -0
package/dist/utils/installTracker.d.ts +57 -0
package/dist/utils/installTracker.js +160 -0
package/dist/utils/multiExec.d.ts +21 -0
package/dist/utils/multiExec.js +141 -0
package/dist/utils/openclawDiag.d.ts +29 -0
package/dist/utils/openclawDiag.js +1035 -0
package/dist/utils/output.js +4 -0
package/dist/utils/platform.js +2 -1
package/dist/utils/progressReporter.d.ts +50 -0
package/dist/utils/progressReporter.js +58 -0
package/dist/utils/projectDetect.d.ts +44 -0
package/dist/utils/projectDetect.js +319 -0
package/dist/utils/projectScanner.d.ts +44 -0
package/dist/utils/projectScanner.js +312 -0
package/dist/utils/shellCompat.d.ts +78 -0
package/dist/utils/shellCompat.js +186 -0
package/dist/utils/smartArchive.d.ts +16 -0
package/dist/utils/smartArchive.js +172 -0
package/dist/utils/smartRetry.d.ts +26 -0
package/dist/utils/smartRetry.js +114 -0
package/dist/utils/updater.d.ts +1 -0
package/dist/utils/updater.js +1 -1
package/dist/utils/version.d.ts +20 -0
package/dist/utils/version.js +212 -0
package/package.json +6 -3

package/dist/nlp/ruleParser.js CHANGED Viewed

@@ -4,6 +4,239 @@ export function parseByRules(rawText) {
     const rules = loadRules();
     const intents = loadIntents();
     const text = rawText.trim().toLowerCase();
+    // Pre-check: casual conversation / greetings / social
+    const casualPatterns = [
+        { pattern: /^(hey|hi|hello|howdy|yo|sup|what'?s up|good (morning|afternoon|evening|night)|greetings)\s*[!?.]*$/i, intent: "chat.greeting" },
+        { pattern: /^how (are you|you doing|is it going|do you feel|are things)/i, intent: "chat.howru" },
+        { pattern: /^(how'?s it going|what'?s good|you good|you ok)\s*[!?.]*$/i, intent: "chat.howru" },
+        { pattern: /^(thanks|thank you|thx|cheers|appreciate it|good job|nice work|well done|great job|awesome|perfect|excellent)\s*[!?.]*$/i, intent: "chat.thanks" },
+        { pattern: /^(bye|goodbye|see you|later|gotta go|peace|cya|goodnight|good night|take care)\s*[!?.]*$/i, intent: "chat.bye" },
+        { pattern: /^(who are you|what are you|tell me about yourself|what is notoken)/i, intent: "chat.about" },
+        { pattern: /^(tell me a joke|say something funny|make me laugh|joke)\s*[!?.]*$/i, intent: "chat.joke" },
+        { pattern: /^(i'?m (bored|tired|frustrated|confused|stuck|lost))/i, intent: "chat.empathy" },
+        { pattern: /^(this (sucks|is broken|doesn'?t work|is frustrating))/i, intent: "chat.empathy" },
+        { pattern: /^(what do you think|your opinion|do you like|which is better)/i, intent: "chat.opinion" },
+        // Compliments
+        { pattern: /^(you('re| are) (awesome|great|amazing|the best|cool|smart|helpful|incredible))/i, intent: "chat.compliment" },
+        { pattern: /^(nice|love it|love you|love this|you rock|brilliant)/i, intent: "chat.compliment" },
+        // Insults (playful)
+        { pattern: /^(you('re| are) (stupid|dumb|useless|terrible|bad|wrong|slow|broken))/i, intent: "chat.insult" },
+        { pattern: /^(you suck|this sucks|worst|hate this)/i, intent: "chat.insult" },
+        // What can you do / capabilities
+        { pattern: /^(what (else )?can you do|show me what you can do|what are your (skills|capabilities|features))/i, intent: "chat.capabilities" },
+        // Bored / entertain me
+        { pattern: /^(i('m| am) bored|entertain me|do something (cool|fun|interesting)|surprise me|show me something)/i, intent: "chat.bored" },
+        // Existential
+        { pattern: /^(are you (alive|real|sentient|conscious|human|ai|a robot|a bot))/i, intent: "chat.existential" },
+        { pattern: /^(do you (dream|sleep|feel|think|have feelings|have emotions))/i, intent: "chat.existential" },
+        // Motivational
+        { pattern: /^(motivate me|inspire me|give me a (quote|pep talk)|i need motivation)/i, intent: "chat.motivate" },
+        // Facts / trivia
+        { pattern: /^(tell me a fact|random fact|fun fact|did you know|trivia)/i, intent: "chat.fact" },
+        // Easter eggs
+        { pattern: /^(42|meaning of life|do a barrel roll|make me a sandwich|sudo make me a sandwich)/i, intent: "chat.easter" },
+        { pattern: /^(what is the matrix|open the pod bay doors|i am your father|may the force)/i, intent: "chat.easter" },
+        // Apology
+        { pattern: /^(sorry|my bad|i('m| am) sorry|apologies|oops|my mistake)/i, intent: "chat.sorry" },
+        // Agreement / affirmation (not pending action)
+        { pattern: /^(cool|nice|ok cool|awesome|sweet|neat|dope|sick|rad|lit)\s*[!.]*$/i, intent: "chat.acknowledge" },
+        // How old are you / version
+        { pattern: /^(how old are you|when were you (made|born|created)|your (age|birthday|version))/i, intent: "chat.age" },
+        // Favorite things
+        { pattern: /^(what('s| is) your favorite|do you have a favorite)/i, intent: "chat.favorite" },
+        // Riddles
+        { pattern: /^(tell me a riddle|riddle|give me a riddle|riddle me|got a riddle|brain teaser)\s*[!?.]*$/i, intent: "chat.riddle" },
+        // Task management (natural language)
+        { pattern: /^(what'?s running in (the )?background|any(thing)? running in (the )?background|running tasks|background tasks|active tasks|show (my )?tasks|what tasks)\s*[!?.]*$/i, intent: "notoken.jobs" },
+        { pattern: /^(cancel|stop|kill|abort)\s+(it|that|everything|all( tasks)?|the (task|job|scan|download))\s*$/i, intent: "notoken.cancel" },
+        { pattern: /^(cancel|stop|kill) (task|job) #?\d+$/i, intent: "notoken.cancel" },
+    ];
+    for (const { pattern, intent } of casualPatterns) {
+        if (pattern.test(text))
+            return { intent, confidence: 0.95, rawText, fields: {} };
+    }
+    // Pre-check: negation detection — "don't restart nginx", "do not check disk", "never mind"
+    // Note: "stop <service>" is a legitimate stop command, so we only match "stop" when
+    // followed by a verb (e.g. "stop checking") or on its own, not "stop <noun>"
+    if (/^(don'?t|do not|no don'?t)\s+/i.test(text)
+        || /^(cancel|never mind|abort|nevermind)$/i.test(text)
+        || /^never\s+(do|run|execute|mind)/i.test(text)
+        || /^stop\s+(doing|checking|running|monitoring|that|it)(\s|$)/i.test(text)) {
+        return { intent: "notoken.cancel", confidence: 0.95, rawText, fields: {} };
+    }
+    // Pre-check: status queries → notoken.status (not knowledge.lookup or service.status)
+    if (/^(what is |what's |show |check |give me )?(the )?(system |computer |machine |notoken )?status( of)?( this| the| my)?( machine| computer| system| server)?[?.!]?$/.test(text)
+        || /^(how is |how's )?(this |the |my )?(system|machine|computer|server) doing/.test(text)
+        || /^system status$/.test(text)) {
+        const statusDef = intents.find(i => i.name === "notoken.status");
+        if (statusDef)
+            return { intent: "notoken.status", confidence: 0.95, rawText, fields: {} };
+    }
+    // Pre-check: server/system queries — "what is load", "what is cpu usage", "what is memory", "how much ram"
+    if (/^(what is |what's |show |check |how much |how's )?(the )?(load|cpu|cpu usage|uptime|server load)( right now| currently| on this)?\??$/.test(text)
+        || /^(what is |show )?(the )?(load|cpu) (average|right now|currently)/.test(text)) {
+        return { intent: "server.uptime", confidence: 0.9, rawText, fields: {} };
+    }
+    // "what is using heavy cpu" / "what is eating cpu" / "any heavy load processes"
+    if (/\b(what|which|any)\b.*(using|eating|taking|hogging|consuming)\b.*(cpu|processing|resources|memory|ram|load)\b/i.test(text)
+        || /\b(heavy|high)\s+(load|cpu|processing|processes)\b/i.test(text)) {
+        return { intent: "process.list", confidence: 0.9, rawText, fields: {} };
+    }
+    if (/^(what is |what's |show |check |how much )?(the )?(memory|ram|memory usage|ram usage)( right now| left| free| used| currently)?\??$/.test(text)) {
+        return { intent: "server.check_memory", confidence: 0.9, rawText, fields: {} };
+    }
+    if (/^(what is |what's |show |check |how much )?(the |my )?(disk|disk space|storage|space|drives)( left| free| used| right now| currently)?\??$/.test(text)) {
+        return { intent: "server.check_disk", confidence: 0.9, rawText, fields: {} };
+    }
+    // Pre-check: common conversational queries that get misrouted
+    // Weather
+    if (/\b(weather|forecast|temperature|rain|snow|sunny|cloudy)\b/i.test(text)
+        && !/\b(log|error|server|disk)\b/i.test(text)) {
+        const locMatch = text.match(/(?:weather|forecast|temperature)\s+(?:in|at|for|of)\s+(.+?)(?:\?|$)/i)
+            ?? text.match(/(?:in|at|for)\s+(.+?)(?:\s+weather|\s+forecast|\?|$)/i);
+        return { intent: "weather.current", confidence: 0.95, rawText, fields: locMatch ? { location: locMatch[1].trim() } : {} };
+    }
+    // News
+    if (/^(what is |what's |show me )?(the )?(latest |today's |current )?(news|headlines|top stories)/i.test(text)
+        || /^(any |what's? )?news( today)?\??$/i.test(text)) {
+        return { intent: "news.headlines", confidence: 0.9, rawText, fields: {} };
+    }
+    // Database size
+    if (/\b(how big|size of|how much space)\b.*\b(database|db|mysql|postgres|mongo)\b/i.test(text)
+        || /\b(database|db)\s+(size|storage|disk|space)\b/i.test(text)) {
+        return { intent: "db.size", confidence: 0.9, rawText, fields: {} };
+    }
+    // Time/date
+    if (/^(what is |what's )?(the )?(time|date|day|today)( right now| today)?\??$/.test(text)) {
+        return { intent: "system.datetime", confidence: 0.9, rawText, fields: {} };
+    }
+    // Help / capabilities
+    // Only match bare help — not "ask openclaw what can you do"
+    if (/^(help|help me|what can you do|what do you do|show me help|commands)\??$/.test(text) && !text.includes("openclaw") && !text.includes("claw")) {
+        return { intent: "notoken.help", confidence: 0.95, rawText, fields: {} };
+    }
+    // History / undo
+    if (/^(show me |what is )?(my )?history$/.test(text) || /^what did i (do|run|ask) (last|before|previously)/.test(text)) {
+        return { intent: "notoken.history", confidence: 0.9, rawText, fields: {} };
+    }
+    if (/^undo( that| last| it)?$/.test(text)) {
+        return { intent: "notoken.undo", confidence: 0.9, rawText, fields: {} };
+    }
+    // Who am I / logged in users
+    if (/^who am i\??$/.test(text) || /^(what is |what's )?my (user|username|login)\??$/.test(text)) {
+        return { intent: "user.whoami", confidence: 0.9, rawText, fields: {} };
+    }
+    if (/^who (else )?(is |are )?(logged in|online|connected)\??$/.test(text)) {
+        return { intent: "user.who", confidence: 0.9, rawText, fields: {} };
+    }
+    // Running services
+    if (/^(show me |list |what are )?(the )?(running |active )?services$/.test(text)) {
+        return { intent: "service.list", confidence: 0.9, rawText, fields: {} };
+    }
+    // Network: ip address, bandwidth, speed, slow
+    if (/^(what is |what's |show )?(my )?(ip|ip address|public ip)\??$/.test(text)) {
+        return { intent: "network.ip", confidence: 0.9, rawText, fields: {} };
+    }
+    if (/\b(bandwidth|network speed|connection speed|speed test|speedtest)\b/i.test(text) || /^(is the )?network slow\??$/.test(text)) {
+        return { intent: "network.speedtest", confidence: 0.9, rawText, fields: {} };
+    }
+    // Block/unblock IP → firewall
+    if (/^(block|unblock|ban|unban)\s+(this\s+)?ip/i.test(text) || /^(block|unblock|ban|unban)\s+\d+\.\d+/i.test(text)) {
+        return { intent: "firewall.block_ip", confidence: 0.9, rawText, fields: {} };
+    }
+    // Docker queries with "show me"
+    if (/^(show me |list )?(docker )?(images|containers)$/.test(text) || /^what (containers|images) are (running|there)\??$/.test(text)) {
+        const isImages = /images/.test(text);
+        return { intent: isImages ? "docker.images" : "docker.list", confidence: 0.9, rawText, fields: {} };
+    }
+    // Large files
+    if (/^find (large|big|huge) files$/.test(text) || /\b(large|big|huge) files\b/.test(text)) {
+        return { intent: "disk.scan", confidence: 0.9, rawText, fields: {} };
+    }
+    // Error logs
+    if (/^(show me |check |any )?(the )?(error|recent) logs$/.test(text) || /^any errors in (the )?logs\??$/.test(text)) {
+        return { intent: "logs.errors", confidence: 0.9, rawText, fields: {} };
+    }
+    // Clear screen
+    if (/^clear( the)?( screen| terminal)?$/.test(text)) {
+        return { intent: "shell.clear", confidence: 0.95, rawText, fields: {} };
+    }
+    // Disk IO
+    if (/^(show me |check )?(disk|io|disk io|iops)( stats| usage)?\??$/.test(text)) {
+        return { intent: "server.check_disk", confidence: 0.9, rawText, fields: {} };
+    }
+    // Website up check
+    if (/^(check if |is )?(the |my )?(website|site|server|page) (is )?(up|down|running|alive|responding)\??$/.test(text)) {
+        return { intent: "network.curl", confidence: 0.9, rawText, fields: {} };
+    }
+    // Pre-check: attack/security/ddos queries → security.scan
+    if (/\b(attack|ddos|brute.?force|intrusion|hacked|breach|compromised|unauthorized|virus|malware|rootkit)\b/i.test(text)
+        || /\b(are we|am i|is .* being)\s+(under\s+)?attack/i.test(text)
+        || /\b(suspicious|failed)\s+(activity|login|connection|traffic|access)/i.test(text)
+        || /\bwho is (attacking|hacking|connecting|hitting)/i.test(text)
+        || /\bcheck (for )?(attacks|security|intrusion|viruses|malware)/i.test(text)
+        || /\b(any )?(viruses|malware|rootkits?) (on|in|running)/i.test(text)) {
+        return { intent: "security.scan", confidence: 0.95, rawText, fields: {} };
+    }
+    // Pre-check: "can you generate an image" → ai.generate_image (not ai.image_status)
+    if (/^(can you|could you|are you able to|do you)\s+(generate|create|make|draw)\s+(an?\s+)?(image|picture|photo|art)/i.test(text)) {
+        return { intent: "ai.generate_image", confidence: 0.9, rawText, fields: {} };
+    }
+    // Pre-check: "cd /path" → shell cd (change directory)
+    const cdMatch = text.match(/^cd\s+(\/\S+|~\S*|\.\S*)$/);
+    if (cdMatch) {
+        return { intent: "shell.cd", confidence: 0.95, rawText, fields: { path: cdMatch[1] } };
+    }
+    // Pre-check: "what is in my documents/folder/drive" → dir.list
+    const whatIsInMatch = text.match(/^(?:what is |what's |show me what(?:'s| is) )in (?:my |the |this )?(.*?)(?:\?|$)/);
+    if (whatIsInMatch) {
+        const target = whatIsInMatch[1].trim();
+        // Resolve common folder names
+        const folderMap = {
+            "documents": process.platform === "win32" ? "%USERPROFILE%\\Documents" : "~/Documents",
+            "documents folder": process.platform === "win32" ? "%USERPROFILE%\\Documents" : "~/Documents",
+            "downloads": process.platform === "win32" ? "%USERPROFILE%\\Downloads" : "~/Downloads",
+            "downloads folder": process.platform === "win32" ? "%USERPROFILE%\\Downloads" : "~/Downloads",
+            "desktop": process.platform === "win32" ? "%USERPROFILE%\\Desktop" : "~/Desktop",
+            "home": "~",
+            "home folder": "~",
+            "home directory": "~",
+            "root": "/",
+            "root folder": "/",
+            "root c drive": "/mnt/c/",
+            "c drive": "/mnt/c/",
+            "d drive": "/mnt/d/",
+            "e drive": "/mnt/e/",
+        };
+        const path = folderMap[target] ?? target;
+        if (target.includes("drive")) {
+            return { intent: "disk.scan", confidence: 0.9, rawText, fields: { path } };
+        }
+        return { intent: "dir.list", confidence: 0.9, rawText, fields: { path } };
+    }
+    // Pre-check: "what projects are on this drive" → project.scan
+    if (/\bwhat projects\b.*\b(on|in)\b.*\b(this|the|my|c|d)\b/.test(text)) {
+        return { intent: "project.scan", confidence: 0.9, rawText, fields: { path: "." } };
+    }
+    // Pre-check: "what's on this drive" / "show me whats on this drive" → disk.scan
+    if (/\b(what.?s|show me what.?s|what is) on (this|the|my|c|d) drive\b/.test(text)
+        || /\bshow me (this|the|my) drive\b/.test(text)) {
+        return { intent: "disk.scan", confidence: 0.9, rawText, fields: {} };
+    }
+    // Pre-check: "what files" / "what are files in this folder" → dir.list or project.detect
+    if (/^(what are |what's in |show me |list |show )(the )?(files|contents)( in| of)?( this| the| my| current)?( folder| directory| dir| project)?[?.!]?$/.test(text)
+        || /^(show me |list )(project |all )?files$/.test(text)) {
+        const isDirList = text.includes("folder") || text.includes("directory") || text.includes("dir");
+        const intentName = isDirList ? "dir.list" : "project.detect";
+        return { intent: intentName, confidence: 0.9, rawText, fields: { path: "." } };
+    }
+    // Pre-check: "how is openclaw doing" / "how is discord doing" → *.status
+    const howIsMatch = text.match(/^how(?:'s| is| are) (openclaw|claw|discord|ollama|notoken) (?:doing|going|running|working)/);
+    if (howIsMatch) {
+        const target = howIsMatch[1] === "claw" ? "openclaw" : howIsMatch[1];
+        const intentName = target === "notoken" ? "notoken.status" : `${target}.status`;
+        return { intent: intentName, confidence: 0.9, rawText, fields: {} };
+    }
     // Match intent by synonyms defined in intents.json
     const matched = matchIntent(text, intents);
     if (!matched)
@@ -90,6 +323,10 @@ function extractStringFields(rawText, lowerText, matchedPhrase, fieldNames, alre
         }
     }
     remaining = remaining.replace(/\s+/g, " ").trim();
+    // Strip filler words that aren't meaningful field values
+    remaining = remaining.replace(/^(can you |could you |would you |please |hey |yo |just )+/i, "").trim();
+    remaining = remaining.replace(/\b(please|for me|for errors|for issues)\b/gi, "").trim();
+    remaining = remaining.replace(/\s+/g, " ").trim();
     // Check for quoted strings first
     const quoted = rawText.match(/["']([^"']+)["']/g);
     if (quoted) {
@@ -182,6 +419,7 @@ function isStopWord(word) {
 }
 function matchIntent(text, intents) {
     let best = null;
+    // Pass 1: exact substring match (fast path)
     for (const def of intents) {
         for (const phrase of def.synonyms) {
             if (text.includes(phrase)) {
@@ -191,8 +429,85 @@ function matchIntent(text, intents) {
             }
         }
     }
+    if (best)
+        return { def: best.def, matchedPhrase: best.matchedPhrase };
+    // Pass 2: fuzzy/spell-corrected match — correct typos in user input
+    // then retry matching. Only for single/double-word synonyms to avoid
+    // false positives on long phrases.
+    const corrected = spellCorrectText(text, intents);
+    if (corrected !== text) {
+        for (const def of intents) {
+            for (const phrase of def.synonyms) {
+                if (corrected.includes(phrase)) {
+                    if (!best || phrase.length > best.length) {
+                        best = { def, matchedPhrase: phrase, length: phrase.length };
+                    }
+                }
+            }
+        }
+    }
     return best ? { def: best.def, matchedPhrase: best.matchedPhrase } : null;
 }
+/**
+ * Spell-correct text by replacing unknown words with the closest known synonym word.
+ * Uses Levenshtein distance with a max edit distance of 2.
+ */
+function spellCorrectText(text, intents) {
+    // Build vocabulary from all synonyms
+    const vocab = new Set();
+    for (const def of intents) {
+        for (const phrase of def.synonyms) {
+            for (const word of phrase.split(/\s+/)) {
+                if (word.length >= 3)
+                    vocab.add(word);
+            }
+        }
+    }
+    const words = text.split(/\s+/);
+    let changed = false;
+    const correctedWords = words.map(word => {
+        if (word.length < 3)
+            return word;
+        if (vocab.has(word))
+            return word; // already a known word
+        // Find closest vocabulary word
+        let bestWord = word;
+        let bestDist = Infinity;
+        const maxDist = word.length <= 4 ? 1 : 2;
+        for (const candidate of vocab) {
+            // Quick length check — edit distance can't be less than length difference
+            if (Math.abs(candidate.length - word.length) > maxDist)
+                continue;
+            const dist = editDistance(word, candidate);
+            if (dist <= maxDist && dist < bestDist) {
+                bestDist = dist;
+                bestWord = candidate;
+            }
+        }
+        if (bestWord !== word)
+            changed = true;
+        return bestWord;
+    });
+    return changed ? correctedWords.join(" ") : text;
+}
+function editDistance(a, b) {
+    const m = a.length, n = b.length;
+    if (m === 0)
+        return n;
+    if (n === 0)
+        return m;
+    const dp = Array.from({ length: n + 1 }, (_, i) => i);
+    for (let i = 1; i <= m; i++) {
+        let prev = dp[0];
+        dp[0] = i;
+        for (let j = 1; j <= n; j++) {
+            const tmp = dp[j];
+            dp[j] = a[i - 1] === b[j - 1] ? prev : 1 + Math.min(prev, dp[j], dp[j - 1]);
+            prev = tmp;
+        }
+    }
+    return dp[n];
+}
 function extractEnvironment(text, aliases) {
     for (const [canonical, aliasList] of Object.entries(aliases)) {
         for (const alias of aliasList) {

package/dist/nlp/semanticSimilarity.d.ts ADDED Viewed

@@ -0,0 +1,30 @@
+/**
+ * Semantic Similarity — lightweight sentence-level matching.
+ *
+ * Uses character n-gram overlap + word-level Jaccard + IDF weighting
+ * to compute similarity between user input and intent descriptions/synonyms.
+ * No external API needed — runs entirely local.
+ *
+ * This catches paraphrases that exact synonym matching misses:
+ *   "what's hogging my CPU" ≈ "show me what processes are eating resources"
+ *   "is my site live" ≈ "check if website is up"
+ */
+export interface SimilarityMatch {
+    intent: string;
+    score: number;
+    matchedPhrase: string;
+}
+/**
+ * Find the most similar intents to the input text.
+ * Combines character n-gram, word overlap, and IDF-weighted scoring.
+ */
+export declare function findSimilarIntents(rawText: string, topN?: number): SimilarityMatch[];
+/**
+ * Score how similar two phrases are (0-1).
+ */
+export declare function phraseSimilarity(text1: string, text2: string): number;
+/**
+ * Expand a query with similar words found across all intent synonyms.
+ * Returns words that co-occur with the input words in intent synonyms.
+ */
+export declare function expandWithCooccurrences(rawText: string): string[];

package/dist/nlp/semanticSimilarity.js ADDED Viewed

@@ -0,0 +1,174 @@
+/**
+ * Semantic Similarity — lightweight sentence-level matching.
+ *
+ * Uses character n-gram overlap + word-level Jaccard + IDF weighting
+ * to compute similarity between user input and intent descriptions/synonyms.
+ * No external API needed — runs entirely local.
+ *
+ * This catches paraphrases that exact synonym matching misses:
+ *   "what's hogging my CPU" ≈ "show me what processes are eating resources"
+ *   "is my site live" ≈ "check if website is up"
+ */
+import { loadIntents } from "../utils/config.js";
+// ─── N-gram extraction ─────────────────────────────────────────────────────
+function charNgrams(text, n) {
+    const grams = new Set();
+    const cleaned = text.toLowerCase().replace(/[^a-z0-9 ]/g, "");
+    for (let i = 0; i <= cleaned.length - n; i++) {
+        grams.add(cleaned.substring(i, i + n));
+    }
+    return grams;
+}
+function wordSet(text) {
+    return new Set(text.toLowerCase().replace(/[^a-z0-9 ]/g, "").split(/\s+/).filter(w => w.length > 1));
+}
+// ─── Similarity metrics ─────────────────────────────────────────────────────
+function jaccardSimilarity(a, b) {
+    if (a.size === 0 && b.size === 0)
+        return 0;
+    let intersection = 0;
+    for (const item of a)
+        if (b.has(item))
+            intersection++;
+    return intersection / (a.size + b.size - intersection);
+}
+function ngramSimilarity(text1, text2) {
+    // Combine bigram + trigram overlap
+    const bi1 = charNgrams(text1, 2);
+    const bi2 = charNgrams(text2, 2);
+    const tri1 = charNgrams(text1, 3);
+    const tri2 = charNgrams(text2, 3);
+    return (jaccardSimilarity(bi1, bi2) * 0.4 + jaccardSimilarity(tri1, tri2) * 0.6);
+}
+function wordOverlap(text1, text2) {
+    return jaccardSimilarity(wordSet(text1), wordSet(text2));
+}
+// ─── Stopword filtering ─────────────────────────────────────────────────────
+const STOPWORDS = new Set([
+    "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
+    "have", "has", "had", "do", "does", "did", "will", "would", "could",
+    "should", "may", "might", "can", "shall", "to", "of", "in", "for",
+    "on", "with", "at", "by", "from", "it", "its", "this", "that",
+    "i", "me", "my", "we", "us", "our", "you", "your", "he", "she",
+    "they", "them", "what", "which", "who", "when", "where", "how",
+    "not", "no", "nor", "or", "and", "but", "if", "then", "so",
+    "just", "also", "very", "too", "some", "any", "all", "more",
+    "please", "can", "could", "would",
+]);
+function contentWords(text) {
+    return text.toLowerCase().split(/\s+/).filter(w => w.length > 2 && !STOPWORDS.has(w));
+}
+// ─── IDF weighting ──────────────────────────────────────────────────────────
+// Words that appear in many intents are less discriminative.
+let idfCache = null;
+function buildIDF() {
+    if (idfCache)
+        return idfCache;
+    const intents = loadIntents();
+    const docCount = new Map();
+    const totalDocs = intents.length;
+    for (const intent of intents) {
+        const wordsInDoc = new Set();
+        for (const syn of intent.synonyms) {
+            for (const w of contentWords(syn))
+                wordsInDoc.add(w);
+        }
+        for (const w of contentWords(intent.description))
+            wordsInDoc.add(w);
+        for (const w of wordsInDoc)
+            docCount.set(w, (docCount.get(w) ?? 0) + 1);
+    }
+    idfCache = new Map();
+    for (const [word, count] of docCount) {
+        idfCache.set(word, Math.log(totalDocs / (1 + count)));
+    }
+    return idfCache;
+}
+function weightedOverlap(text1, text2) {
+    const idf = buildIDF();
+    const words1 = contentWords(text1);
+    const words2 = new Set(contentWords(text2));
+    let weightedIntersection = 0;
+    let totalWeight = 0;
+    for (const w of words1) {
+        const weight = idf.get(w) ?? 2.0; // Unknown words get high weight (rare = discriminative)
+        totalWeight += weight;
+        if (words2.has(w))
+            weightedIntersection += weight;
+    }
+    return totalWeight > 0 ? weightedIntersection / totalWeight : 0;
+}
+/**
+ * Find the most similar intents to the input text.
+ * Combines character n-gram, word overlap, and IDF-weighted scoring.
+ */
+export function findSimilarIntents(rawText, topN = 5) {
+    const intents = loadIntents();
+    const results = [];
+    const text = rawText.toLowerCase();
+    for (const intent of intents) {
+        let bestScore = 0;
+        let bestPhrase = "";
+        // Score against synonyms
+        for (const syn of intent.synonyms) {
+            const ngram = ngramSimilarity(text, syn);
+            const word = wordOverlap(text, syn);
+            const weighted = weightedOverlap(text, syn);
+            const score = ngram * 0.3 + word * 0.3 + weighted * 0.4;
+            if (score > bestScore) {
+                bestScore = score;
+                bestPhrase = syn;
+            }
+        }
+        // Score against description
+        const descNgram = ngramSimilarity(text, intent.description);
+        const descWord = wordOverlap(text, intent.description);
+        const descWeighted = weightedOverlap(text, intent.description);
+        const descScore = (descNgram * 0.3 + descWord * 0.3 + descWeighted * 0.4) * 0.8; // Slight penalty for description match
+        if (descScore > bestScore) {
+            bestScore = descScore;
+            bestPhrase = intent.description;
+        }
+        if (bestScore > 0.15) {
+            results.push({ intent: intent.name, score: bestScore, matchedPhrase: bestPhrase });
+        }
+    }
+    return results.sort((a, b) => b.score - a.score).slice(0, topN);
+}
+/**
+ * Score how similar two phrases are (0-1).
+ */
+export function phraseSimilarity(text1, text2) {
+    const ngram = ngramSimilarity(text1, text2);
+    const word = wordOverlap(text1, text2);
+    const weighted = weightedOverlap(text1, text2);
+    return ngram * 0.3 + word * 0.3 + weighted * 0.4;
+}
+/**
+ * Expand a query with similar words found across all intent synonyms.
+ * Returns words that co-occur with the input words in intent synonyms.
+ */
+export function expandWithCooccurrences(rawText) {
+    const intents = loadIntents();
+    const inputWords = new Set(contentWords(rawText));
+    const cooccur = new Map();
+    for (const intent of intents) {
+        for (const syn of intent.synonyms) {
+            const synWords = contentWords(syn);
+            const hasOverlap = synWords.some(w => inputWords.has(w));
+            if (hasOverlap) {
+                for (const w of synWords) {
+                    if (!inputWords.has(w)) {
+                        cooccur.set(w, (cooccur.get(w) ?? 0) + 1);
+                    }
+                }
+            }
+        }
+    }
+    // Return words that co-occur with input words in at least 2 synonyms
+    return [...cooccur.entries()]
+        .filter(([, count]) => count >= 2)
+        .sort((a, b) => b[1] - a[1])
+        .slice(0, 10)
+        .map(([word]) => word);
+}

package/dist/nlp/vocabularyBuilder.d.ts ADDED Viewed

@@ -0,0 +1,43 @@
+/**
+ * Vocabulary Builder — learns vocabulary from Wikidata lookups.
+ *
+ * After every successful Wikidata entity lookup, this module:
+ *   1. Extracts instanceOf labels and maps them to intent domains
+ *   2. Collects aliases as synonyms for future matching
+ *   3. Adds related concepts to the concept router map
+ *   4. Persists learned vocabulary to ~/.notoken/learned-vocabulary.json
+ *
+ * On startup, loads learned vocabulary and merges it into the
+ * concept router's CONCEPT_DOMAINS so future queries benefit.
+ */
+import type { WikiEntity } from "./wikidata.js";
+export interface LearnedVocabulary {
+    /** Maps an entity label (lowercase) to its known aliases/synonyms. */
+    concepts: Record<string, string[]>;
+    /** Maps an instanceOf label (lowercase) to intent domain strings. */
+    domainMappings: Record<string, string[]>;
+    /** ISO timestamp of last update. */
+    learnedAt: string;
+}
+/**
+ * Enrich vocabulary from a Wikidata entity.
+ *
+ * Called after every successful Wikidata lookup. Extracts:
+ *   - instanceOf labels → domain mappings
+ *   - aliases → concept synonyms
+ *   - related concepts → concept router entries
+ */
+export declare function enrichVocabularyFromWiki(entity: WikiEntity): void;
+/**
+ * Load learned vocabulary from disk and merge into the concept router.
+ *
+ * Should be called on startup so that previously learned vocabulary
+ * is available for intent routing from the first query.
+ */
+export declare function loadLearnedVocabulary(): void;
+/**
+ * Get the current enriched concepts map (merged vocabulary).
+ *
+ * Returns a combined view of hardcoded concepts and learned vocabulary.
+ */
+export declare function getEnrichedConcepts(): LearnedVocabulary;