notoken-core 1.6.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/config/chat-responses.json +767 -0
  2. package/config/concept-clusters.json +31 -0
  3. package/config/entities.json +93 -0
  4. package/config/image-prompts.json +20 -0
  5. package/config/intent-vectors.json +1 -0
  6. package/config/intents.json +4946 -83
  7. package/config/ollama-models.json +193 -0
  8. package/config/rules.json +32 -1
  9. package/dist/automation/discordPatchright.d.ts +35 -0
  10. package/dist/automation/discordPatchright.js +424 -0
  11. package/dist/automation/discordSetup.d.ts +31 -0
  12. package/dist/automation/discordSetup.js +338 -0
  13. package/dist/conversation/coreference.js +44 -4
  14. package/dist/conversation/pendingActions.d.ts +55 -0
  15. package/dist/conversation/pendingActions.js +127 -0
  16. package/dist/conversation/store.d.ts +72 -0
  17. package/dist/conversation/store.js +140 -1
  18. package/dist/conversation/topicTracker.d.ts +36 -0
  19. package/dist/conversation/topicTracker.js +141 -0
  20. package/dist/execution/ssh.d.ts +42 -1
  21. package/dist/execution/ssh.js +532 -3
  22. package/dist/handlers/executor.js +3981 -16
  23. package/dist/index.d.ts +25 -3
  24. package/dist/index.js +36 -2
  25. package/dist/nlp/batchParser.d.ts +30 -0
  26. package/dist/nlp/batchParser.js +77 -0
  27. package/dist/nlp/conceptExpansion.d.ts +54 -0
  28. package/dist/nlp/conceptExpansion.js +136 -0
  29. package/dist/nlp/conceptRouter.d.ts +49 -0
  30. package/dist/nlp/conceptRouter.js +302 -0
  31. package/dist/nlp/confidenceCalibrator.d.ts +62 -0
  32. package/dist/nlp/confidenceCalibrator.js +116 -0
  33. package/dist/nlp/correctionLearner.d.ts +45 -0
  34. package/dist/nlp/correctionLearner.js +207 -0
  35. package/dist/nlp/entitySpellCorrect.d.ts +35 -0
  36. package/dist/nlp/entitySpellCorrect.js +141 -0
  37. package/dist/nlp/knowledgeGraph.d.ts +70 -0
  38. package/dist/nlp/knowledgeGraph.js +380 -0
  39. package/dist/nlp/llmFallback.js +28 -1
  40. package/dist/nlp/multiClassifier.js +91 -6
  41. package/dist/nlp/multiIntent.d.ts +43 -0
  42. package/dist/nlp/multiIntent.js +154 -0
  43. package/dist/nlp/parseIntent.d.ts +6 -1
  44. package/dist/nlp/parseIntent.js +180 -5
  45. package/dist/nlp/ruleParser.js +315 -0
  46. package/dist/nlp/semanticSimilarity.d.ts +30 -0
  47. package/dist/nlp/semanticSimilarity.js +174 -0
  48. package/dist/nlp/vocabularyBuilder.d.ts +43 -0
  49. package/dist/nlp/vocabularyBuilder.js +224 -0
  50. package/dist/nlp/wikidata.d.ts +49 -0
  51. package/dist/nlp/wikidata.js +228 -0
  52. package/dist/policy/confirm.d.ts +10 -0
  53. package/dist/policy/confirm.js +39 -0
  54. package/dist/policy/safety.js +6 -4
  55. package/dist/utils/aliases.d.ts +5 -0
  56. package/dist/utils/aliases.js +39 -0
  57. package/dist/utils/analysis.js +71 -15
  58. package/dist/utils/browser.d.ts +64 -0
  59. package/dist/utils/browser.js +364 -0
  60. package/dist/utils/commandHistory.d.ts +20 -0
  61. package/dist/utils/commandHistory.js +108 -0
  62. package/dist/utils/completer.d.ts +17 -0
  63. package/dist/utils/completer.js +79 -0
  64. package/dist/utils/config.js +32 -2
  65. package/dist/utils/dbQuery.d.ts +25 -0
  66. package/dist/utils/dbQuery.js +248 -0
  67. package/dist/utils/discordDiag.d.ts +35 -0
  68. package/dist/utils/discordDiag.js +826 -0
  69. package/dist/utils/diskCleanup.d.ts +36 -0
  70. package/dist/utils/diskCleanup.js +775 -0
  71. package/dist/utils/entityResolver.d.ts +107 -0
  72. package/dist/utils/entityResolver.js +468 -0
  73. package/dist/utils/imageGen.d.ts +92 -0
  74. package/dist/utils/imageGen.js +2031 -0
  75. package/dist/utils/installTracker.d.ts +57 -0
  76. package/dist/utils/installTracker.js +160 -0
  77. package/dist/utils/multiExec.d.ts +21 -0
  78. package/dist/utils/multiExec.js +141 -0
  79. package/dist/utils/openclawDiag.d.ts +29 -0
  80. package/dist/utils/openclawDiag.js +1035 -0
  81. package/dist/utils/output.js +4 -0
  82. package/dist/utils/platform.js +2 -1
  83. package/dist/utils/progressReporter.d.ts +50 -0
  84. package/dist/utils/progressReporter.js +58 -0
  85. package/dist/utils/projectDetect.d.ts +44 -0
  86. package/dist/utils/projectDetect.js +319 -0
  87. package/dist/utils/projectScanner.d.ts +44 -0
  88. package/dist/utils/projectScanner.js +312 -0
  89. package/dist/utils/shellCompat.d.ts +78 -0
  90. package/dist/utils/shellCompat.js +186 -0
  91. package/dist/utils/smartArchive.d.ts +16 -0
  92. package/dist/utils/smartArchive.js +172 -0
  93. package/dist/utils/smartRetry.d.ts +26 -0
  94. package/dist/utils/smartRetry.js +114 -0
  95. package/dist/utils/updater.d.ts +1 -0
  96. package/dist/utils/updater.js +1 -1
  97. package/dist/utils/version.d.ts +20 -0
  98. package/dist/utils/version.js +212 -0
  99. package/package.json +6 -3
@@ -4,6 +4,239 @@ export function parseByRules(rawText) {
4
4
  const rules = loadRules();
5
5
  const intents = loadIntents();
6
6
  const text = rawText.trim().toLowerCase();
7
+ // Pre-check: casual conversation / greetings / social
8
+ const casualPatterns = [
9
+ { pattern: /^(hey|hi|hello|howdy|yo|sup|what'?s up|good (morning|afternoon|evening|night)|greetings)\s*[!?.]*$/i, intent: "chat.greeting" },
10
+ { pattern: /^how (are you|you doing|is it going|do you feel|are things)/i, intent: "chat.howru" },
11
+ { pattern: /^(how'?s it going|what'?s good|you good|you ok)\s*[!?.]*$/i, intent: "chat.howru" },
12
+ { pattern: /^(thanks|thank you|thx|cheers|appreciate it|good job|nice work|well done|great job|awesome|perfect|excellent)\s*[!?.]*$/i, intent: "chat.thanks" },
13
+ { pattern: /^(bye|goodbye|see you|later|gotta go|peace|cya|goodnight|good night|take care)\s*[!?.]*$/i, intent: "chat.bye" },
14
+ { pattern: /^(who are you|what are you|tell me about yourself|what is notoken)/i, intent: "chat.about" },
15
+ { pattern: /^(tell me a joke|say something funny|make me laugh|joke)\s*[!?.]*$/i, intent: "chat.joke" },
16
+ { pattern: /^(i'?m (bored|tired|frustrated|confused|stuck|lost))/i, intent: "chat.empathy" },
17
+ { pattern: /^(this (sucks|is broken|doesn'?t work|is frustrating))/i, intent: "chat.empathy" },
18
+ { pattern: /^(what do you think|your opinion|do you like|which is better)/i, intent: "chat.opinion" },
19
+ // Compliments
20
+ { pattern: /^(you('re| are) (awesome|great|amazing|the best|cool|smart|helpful|incredible))/i, intent: "chat.compliment" },
21
+ { pattern: /^(nice|love it|love you|love this|you rock|brilliant)/i, intent: "chat.compliment" },
22
+ // Insults (playful)
23
+ { pattern: /^(you('re| are) (stupid|dumb|useless|terrible|bad|wrong|slow|broken))/i, intent: "chat.insult" },
24
+ { pattern: /^(you suck|this sucks|worst|hate this)/i, intent: "chat.insult" },
25
+ // What can you do / capabilities
26
+ { pattern: /^(what (else )?can you do|show me what you can do|what are your (skills|capabilities|features))/i, intent: "chat.capabilities" },
27
+ // Bored / entertain me
28
+ { pattern: /^(i('m| am) bored|entertain me|do something (cool|fun|interesting)|surprise me|show me something)/i, intent: "chat.bored" },
29
+ // Existential
30
+ { pattern: /^(are you (alive|real|sentient|conscious|human|ai|a robot|a bot))/i, intent: "chat.existential" },
31
+ { pattern: /^(do you (dream|sleep|feel|think|have feelings|have emotions))/i, intent: "chat.existential" },
32
+ // Motivational
33
+ { pattern: /^(motivate me|inspire me|give me a (quote|pep talk)|i need motivation)/i, intent: "chat.motivate" },
34
+ // Facts / trivia
35
+ { pattern: /^(tell me a fact|random fact|fun fact|did you know|trivia)/i, intent: "chat.fact" },
36
+ // Easter eggs
37
+ { pattern: /^(42|meaning of life|do a barrel roll|make me a sandwich|sudo make me a sandwich)/i, intent: "chat.easter" },
38
+ { pattern: /^(what is the matrix|open the pod bay doors|i am your father|may the force)/i, intent: "chat.easter" },
39
+ // Apology
40
+ { pattern: /^(sorry|my bad|i('m| am) sorry|apologies|oops|my mistake)/i, intent: "chat.sorry" },
41
+ // Agreement / affirmation (not pending action)
42
+ { pattern: /^(cool|nice|ok cool|awesome|sweet|neat|dope|sick|rad|lit)\s*[!.]*$/i, intent: "chat.acknowledge" },
43
+ // How old are you / version
44
+ { pattern: /^(how old are you|when were you (made|born|created)|your (age|birthday|version))/i, intent: "chat.age" },
45
+ // Favorite things
46
+ { pattern: /^(what('s| is) your favorite|do you have a favorite)/i, intent: "chat.favorite" },
47
+ // Riddles
48
+ { pattern: /^(tell me a riddle|riddle|give me a riddle|riddle me|got a riddle|brain teaser)\s*[!?.]*$/i, intent: "chat.riddle" },
49
+ // Task management (natural language)
50
+ { pattern: /^(what'?s running in (the )?background|any(thing)? running in (the )?background|running tasks|background tasks|active tasks|show (my )?tasks|what tasks)\s*[!?.]*$/i, intent: "notoken.jobs" },
51
+ { pattern: /^(cancel|stop|kill|abort)\s+(it|that|everything|all( tasks)?|the (task|job|scan|download))\s*$/i, intent: "notoken.cancel" },
52
+ { pattern: /^(cancel|stop|kill) (task|job) #?\d+$/i, intent: "notoken.cancel" },
53
+ ];
54
+ for (const { pattern, intent } of casualPatterns) {
55
+ if (pattern.test(text))
56
+ return { intent, confidence: 0.95, rawText, fields: {} };
57
+ }
58
+ // Pre-check: negation detection — "don't restart nginx", "do not check disk", "never mind"
59
+ // Note: "stop <service>" is a legitimate stop command, so we only match "stop" when
60
+ // followed by a verb (e.g. "stop checking") or on its own, not "stop <noun>"
61
+ if (/^(don'?t|do not|no don'?t)\s+/i.test(text)
62
+ || /^(cancel|never mind|abort|nevermind)$/i.test(text)
63
+ || /^never\s+(do|run|execute|mind)/i.test(text)
64
+ || /^stop\s+(doing|checking|running|monitoring|that|it)(\s|$)/i.test(text)) {
65
+ return { intent: "notoken.cancel", confidence: 0.95, rawText, fields: {} };
66
+ }
67
+ // Pre-check: status queries → notoken.status (not knowledge.lookup or service.status)
68
+ if (/^(what is |what's |show |check |give me )?(the )?(system |computer |machine |notoken )?status( of)?( this| the| my)?( machine| computer| system| server)?[?.!]?$/.test(text)
69
+ || /^(how is |how's )?(this |the |my )?(system|machine|computer|server) doing/.test(text)
70
+ || /^system status$/.test(text)) {
71
+ const statusDef = intents.find(i => i.name === "notoken.status");
72
+ if (statusDef)
73
+ return { intent: "notoken.status", confidence: 0.95, rawText, fields: {} };
74
+ }
75
+ // Pre-check: server/system queries — "what is load", "what is cpu usage", "what is memory", "how much ram"
76
+ if (/^(what is |what's |show |check |how much |how's )?(the )?(load|cpu|cpu usage|uptime|server load)( right now| currently| on this)?\??$/.test(text)
77
+ || /^(what is |show )?(the )?(load|cpu) (average|right now|currently)/.test(text)) {
78
+ return { intent: "server.uptime", confidence: 0.9, rawText, fields: {} };
79
+ }
80
+ // "what is using heavy cpu" / "what is eating cpu" / "any heavy load processes"
81
+ if (/\b(what|which|any)\b.*(using|eating|taking|hogging|consuming)\b.*(cpu|processing|resources|memory|ram|load)\b/i.test(text)
82
+ || /\b(heavy|high)\s+(load|cpu|processing|processes)\b/i.test(text)) {
83
+ return { intent: "process.list", confidence: 0.9, rawText, fields: {} };
84
+ }
85
+ if (/^(what is |what's |show |check |how much )?(the )?(memory|ram|memory usage|ram usage)( right now| left| free| used| currently)?\??$/.test(text)) {
86
+ return { intent: "server.check_memory", confidence: 0.9, rawText, fields: {} };
87
+ }
88
+ if (/^(what is |what's |show |check |how much )?(the |my )?(disk|disk space|storage|space|drives)( left| free| used| right now| currently)?\??$/.test(text)) {
89
+ return { intent: "server.check_disk", confidence: 0.9, rawText, fields: {} };
90
+ }
91
+ // Pre-check: common conversational queries that get misrouted
92
+ // Weather
93
+ if (/\b(weather|forecast|temperature|rain|snow|sunny|cloudy)\b/i.test(text)
94
+ && !/\b(log|error|server|disk)\b/i.test(text)) {
95
+ const locMatch = text.match(/(?:weather|forecast|temperature)\s+(?:in|at|for|of)\s+(.+?)(?:\?|$)/i)
96
+ ?? text.match(/(?:in|at|for)\s+(.+?)(?:\s+weather|\s+forecast|\?|$)/i);
97
+ return { intent: "weather.current", confidence: 0.95, rawText, fields: locMatch ? { location: locMatch[1].trim() } : {} };
98
+ }
99
+ // News
100
+ if (/^(what is |what's |show me )?(the )?(latest |today's |current )?(news|headlines|top stories)/i.test(text)
101
+ || /^(any |what's? )?news( today)?\??$/i.test(text)) {
102
+ return { intent: "news.headlines", confidence: 0.9, rawText, fields: {} };
103
+ }
104
+ // Database size
105
+ if (/\b(how big|size of|how much space)\b.*\b(database|db|mysql|postgres|mongo)\b/i.test(text)
106
+ || /\b(database|db)\s+(size|storage|disk|space)\b/i.test(text)) {
107
+ return { intent: "db.size", confidence: 0.9, rawText, fields: {} };
108
+ }
109
+ // Time/date
110
+ if (/^(what is |what's )?(the )?(time|date|day|today)( right now| today)?\??$/.test(text)) {
111
+ return { intent: "system.datetime", confidence: 0.9, rawText, fields: {} };
112
+ }
113
+ // Help / capabilities
114
+ // Only match bare help — not "ask openclaw what can you do"
115
+ if (/^(help|help me|what can you do|what do you do|show me help|commands)\??$/.test(text) && !text.includes("openclaw") && !text.includes("claw")) {
116
+ return { intent: "notoken.help", confidence: 0.95, rawText, fields: {} };
117
+ }
118
+ // History / undo
119
+ if (/^(show me |what is )?(my )?history$/.test(text) || /^what did i (do|run|ask) (last|before|previously)/.test(text)) {
120
+ return { intent: "notoken.history", confidence: 0.9, rawText, fields: {} };
121
+ }
122
+ if (/^undo( that| last| it)?$/.test(text)) {
123
+ return { intent: "notoken.undo", confidence: 0.9, rawText, fields: {} };
124
+ }
125
+ // Who am I / logged in users
126
+ if (/^who am i\??$/.test(text) || /^(what is |what's )?my (user|username|login)\??$/.test(text)) {
127
+ return { intent: "user.whoami", confidence: 0.9, rawText, fields: {} };
128
+ }
129
+ if (/^who (else )?(is |are )?(logged in|online|connected)\??$/.test(text)) {
130
+ return { intent: "user.who", confidence: 0.9, rawText, fields: {} };
131
+ }
132
+ // Running services
133
+ if (/^(show me |list |what are )?(the )?(running |active )?services$/.test(text)) {
134
+ return { intent: "service.list", confidence: 0.9, rawText, fields: {} };
135
+ }
136
+ // Network: ip address, bandwidth, speed, slow
137
+ if (/^(what is |what's |show )?(my )?(ip|ip address|public ip)\??$/.test(text)) {
138
+ return { intent: "network.ip", confidence: 0.9, rawText, fields: {} };
139
+ }
140
+ if (/\b(bandwidth|network speed|connection speed|speed test|speedtest)\b/i.test(text) || /^(is the )?network slow\??$/.test(text)) {
141
+ return { intent: "network.speedtest", confidence: 0.9, rawText, fields: {} };
142
+ }
143
+ // Block/unblock IP → firewall
144
+ if (/^(block|unblock|ban|unban)\s+(this\s+)?ip/i.test(text) || /^(block|unblock|ban|unban)\s+\d+\.\d+/i.test(text)) {
145
+ return { intent: "firewall.block_ip", confidence: 0.9, rawText, fields: {} };
146
+ }
147
+ // Docker queries with "show me"
148
+ if (/^(show me |list )?(docker )?(images|containers)$/.test(text) || /^what (containers|images) are (running|there)\??$/.test(text)) {
149
+ const isImages = /images/.test(text);
150
+ return { intent: isImages ? "docker.images" : "docker.list", confidence: 0.9, rawText, fields: {} };
151
+ }
152
+ // Large files
153
+ if (/^find (large|big|huge) files$/.test(text) || /\b(large|big|huge) files\b/.test(text)) {
154
+ return { intent: "disk.scan", confidence: 0.9, rawText, fields: {} };
155
+ }
156
+ // Error logs
157
+ if (/^(show me |check |any )?(the )?(error|recent) logs$/.test(text) || /^any errors in (the )?logs\??$/.test(text)) {
158
+ return { intent: "logs.errors", confidence: 0.9, rawText, fields: {} };
159
+ }
160
+ // Clear screen
161
+ if (/^clear( the)?( screen| terminal)?$/.test(text)) {
162
+ return { intent: "shell.clear", confidence: 0.95, rawText, fields: {} };
163
+ }
164
+ // Disk IO
165
+ if (/^(show me |check )?(disk|io|disk io|iops)( stats| usage)?\??$/.test(text)) {
166
+ return { intent: "server.check_disk", confidence: 0.9, rawText, fields: {} };
167
+ }
168
+ // Website up check
169
+ if (/^(check if |is )?(the |my )?(website|site|server|page) (is )?(up|down|running|alive|responding)\??$/.test(text)) {
170
+ return { intent: "network.curl", confidence: 0.9, rawText, fields: {} };
171
+ }
172
+ // Pre-check: attack/security/ddos queries → security.scan
173
+ if (/\b(attack|ddos|brute.?force|intrusion|hacked|breach|compromised|unauthorized|virus|malware|rootkit)\b/i.test(text)
174
+ || /\b(are we|am i|is .* being)\s+(under\s+)?attack/i.test(text)
175
+ || /\b(suspicious|failed)\s+(activity|login|connection|traffic|access)/i.test(text)
176
+ || /\bwho is (attacking|hacking|connecting|hitting)/i.test(text)
177
+ || /\bcheck (for )?(attacks|security|intrusion|viruses|malware)/i.test(text)
178
+ || /\b(any )?(viruses|malware|rootkits?) (on|in|running)/i.test(text)) {
179
+ return { intent: "security.scan", confidence: 0.95, rawText, fields: {} };
180
+ }
181
+ // Pre-check: "can you generate an image" → ai.generate_image (not ai.image_status)
182
+ if (/^(can you|could you|are you able to|do you)\s+(generate|create|make|draw)\s+(an?\s+)?(image|picture|photo|art)/i.test(text)) {
183
+ return { intent: "ai.generate_image", confidence: 0.9, rawText, fields: {} };
184
+ }
185
+ // Pre-check: "cd /path" → shell cd (change directory)
186
+ const cdMatch = text.match(/^cd\s+(\/\S+|~\S*|\.\S*)$/);
187
+ if (cdMatch) {
188
+ return { intent: "shell.cd", confidence: 0.95, rawText, fields: { path: cdMatch[1] } };
189
+ }
190
+ // Pre-check: "what is in my documents/folder/drive" → dir.list
191
+ const whatIsInMatch = text.match(/^(?:what is |what's |show me what(?:'s| is) )in (?:my |the |this )?(.*?)(?:\?|$)/);
192
+ if (whatIsInMatch) {
193
+ const target = whatIsInMatch[1].trim();
194
+ // Resolve common folder names
195
+ const folderMap = {
196
+ "documents": process.platform === "win32" ? "%USERPROFILE%\\Documents" : "~/Documents",
197
+ "documents folder": process.platform === "win32" ? "%USERPROFILE%\\Documents" : "~/Documents",
198
+ "downloads": process.platform === "win32" ? "%USERPROFILE%\\Downloads" : "~/Downloads",
199
+ "downloads folder": process.platform === "win32" ? "%USERPROFILE%\\Downloads" : "~/Downloads",
200
+ "desktop": process.platform === "win32" ? "%USERPROFILE%\\Desktop" : "~/Desktop",
201
+ "home": "~",
202
+ "home folder": "~",
203
+ "home directory": "~",
204
+ "root": "/",
205
+ "root folder": "/",
206
+ "root c drive": "/mnt/c/",
207
+ "c drive": "/mnt/c/",
208
+ "d drive": "/mnt/d/",
209
+ "e drive": "/mnt/e/",
210
+ };
211
+ const path = folderMap[target] ?? target;
212
+ if (target.includes("drive")) {
213
+ return { intent: "disk.scan", confidence: 0.9, rawText, fields: { path } };
214
+ }
215
+ return { intent: "dir.list", confidence: 0.9, rawText, fields: { path } };
216
+ }
217
+ // Pre-check: "what projects are on this drive" → project.scan
218
+ if (/\bwhat projects\b.*\b(on|in)\b.*\b(this|the|my|c|d)\b/.test(text)) {
219
+ return { intent: "project.scan", confidence: 0.9, rawText, fields: { path: "." } };
220
+ }
221
+ // Pre-check: "what's on this drive" / "show me whats on this drive" → disk.scan
222
+ if (/\b(what.?s|show me what.?s|what is) on (this|the|my|c|d) drive\b/.test(text)
223
+ || /\bshow me (this|the|my) drive\b/.test(text)) {
224
+ return { intent: "disk.scan", confidence: 0.9, rawText, fields: {} };
225
+ }
226
+ // Pre-check: "what files" / "what are files in this folder" → dir.list or project.detect
227
+ if (/^(what are |what's in |show me |list |show )(the )?(files|contents)( in| of)?( this| the| my| current)?( folder| directory| dir| project)?[?.!]?$/.test(text)
228
+ || /^(show me |list )(project |all )?files$/.test(text)) {
229
+ const isDirList = text.includes("folder") || text.includes("directory") || text.includes("dir");
230
+ const intentName = isDirList ? "dir.list" : "project.detect";
231
+ return { intent: intentName, confidence: 0.9, rawText, fields: { path: "." } };
232
+ }
233
+ // Pre-check: "how is openclaw doing" / "how is discord doing" → *.status
234
+ const howIsMatch = text.match(/^how(?:'s| is| are) (openclaw|claw|discord|ollama|notoken) (?:doing|going|running|working)/);
235
+ if (howIsMatch) {
236
+ const target = howIsMatch[1] === "claw" ? "openclaw" : howIsMatch[1];
237
+ const intentName = target === "notoken" ? "notoken.status" : `${target}.status`;
238
+ return { intent: intentName, confidence: 0.9, rawText, fields: {} };
239
+ }
7
240
  // Match intent by synonyms defined in intents.json
8
241
  const matched = matchIntent(text, intents);
9
242
  if (!matched)
@@ -90,6 +323,10 @@ function extractStringFields(rawText, lowerText, matchedPhrase, fieldNames, alre
90
323
  }
91
324
  }
92
325
  remaining = remaining.replace(/\s+/g, " ").trim();
326
+ // Strip filler words that aren't meaningful field values
327
+ remaining = remaining.replace(/^(can you |could you |would you |please |hey |yo |just )+/i, "").trim();
328
+ remaining = remaining.replace(/\b(please|for me|for errors|for issues)\b/gi, "").trim();
329
+ remaining = remaining.replace(/\s+/g, " ").trim();
93
330
  // Check for quoted strings first
94
331
  const quoted = rawText.match(/["']([^"']+)["']/g);
95
332
  if (quoted) {
@@ -182,6 +419,7 @@ function isStopWord(word) {
182
419
  }
183
420
  function matchIntent(text, intents) {
184
421
  let best = null;
422
+ // Pass 1: exact substring match (fast path)
185
423
  for (const def of intents) {
186
424
  for (const phrase of def.synonyms) {
187
425
  if (text.includes(phrase)) {
@@ -191,8 +429,85 @@ function matchIntent(text, intents) {
191
429
  }
192
430
  }
193
431
  }
432
+ if (best)
433
+ return { def: best.def, matchedPhrase: best.matchedPhrase };
434
+ // Pass 2: fuzzy/spell-corrected match — correct typos in user input
435
+ // then retry matching. Only for single/double-word synonyms to avoid
436
+ // false positives on long phrases.
437
+ const corrected = spellCorrectText(text, intents);
438
+ if (corrected !== text) {
439
+ for (const def of intents) {
440
+ for (const phrase of def.synonyms) {
441
+ if (corrected.includes(phrase)) {
442
+ if (!best || phrase.length > best.length) {
443
+ best = { def, matchedPhrase: phrase, length: phrase.length };
444
+ }
445
+ }
446
+ }
447
+ }
448
+ }
194
449
  return best ? { def: best.def, matchedPhrase: best.matchedPhrase } : null;
195
450
  }
451
+ /**
452
+ * Spell-correct text by replacing unknown words with the closest known synonym word.
453
+ * Uses Levenshtein distance with a max edit distance of 2.
454
+ */
455
+ function spellCorrectText(text, intents) {
456
+ // Build vocabulary from all synonyms
457
+ const vocab = new Set();
458
+ for (const def of intents) {
459
+ for (const phrase of def.synonyms) {
460
+ for (const word of phrase.split(/\s+/)) {
461
+ if (word.length >= 3)
462
+ vocab.add(word);
463
+ }
464
+ }
465
+ }
466
+ const words = text.split(/\s+/);
467
+ let changed = false;
468
+ const correctedWords = words.map(word => {
469
+ if (word.length < 3)
470
+ return word;
471
+ if (vocab.has(word))
472
+ return word; // already a known word
473
+ // Find closest vocabulary word
474
+ let bestWord = word;
475
+ let bestDist = Infinity;
476
+ const maxDist = word.length <= 4 ? 1 : 2;
477
+ for (const candidate of vocab) {
478
+ // Quick length check — edit distance can't be less than length difference
479
+ if (Math.abs(candidate.length - word.length) > maxDist)
480
+ continue;
481
+ const dist = editDistance(word, candidate);
482
+ if (dist <= maxDist && dist < bestDist) {
483
+ bestDist = dist;
484
+ bestWord = candidate;
485
+ }
486
+ }
487
+ if (bestWord !== word)
488
+ changed = true;
489
+ return bestWord;
490
+ });
491
+ return changed ? correctedWords.join(" ") : text;
492
+ }
493
+ function editDistance(a, b) {
494
+ const m = a.length, n = b.length;
495
+ if (m === 0)
496
+ return n;
497
+ if (n === 0)
498
+ return m;
499
+ const dp = Array.from({ length: n + 1 }, (_, i) => i);
500
+ for (let i = 1; i <= m; i++) {
501
+ let prev = dp[0];
502
+ dp[0] = i;
503
+ for (let j = 1; j <= n; j++) {
504
+ const tmp = dp[j];
505
+ dp[j] = a[i - 1] === b[j - 1] ? prev : 1 + Math.min(prev, dp[j], dp[j - 1]);
506
+ prev = tmp;
507
+ }
508
+ }
509
+ return dp[n];
510
+ }
196
511
  function extractEnvironment(text, aliases) {
197
512
  for (const [canonical, aliasList] of Object.entries(aliases)) {
198
513
  for (const alias of aliasList) {
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Semantic Similarity — lightweight sentence-level matching.
3
+ *
4
+ * Uses character n-gram overlap + word-level Jaccard + IDF weighting
5
+ * to compute similarity between user input and intent descriptions/synonyms.
6
+ * No external API needed — runs entirely local.
7
+ *
8
+ * This catches paraphrases that exact synonym matching misses:
9
+ * "what's hogging my CPU" ≈ "show me what processes are eating resources"
10
+ * "is my site live" ≈ "check if website is up"
11
+ */
12
+ export interface SimilarityMatch {
13
+ intent: string;
14
+ score: number;
15
+ matchedPhrase: string;
16
+ }
17
+ /**
18
+ * Find the most similar intents to the input text.
19
+ * Combines character n-gram, word overlap, and IDF-weighted scoring.
20
+ */
21
+ export declare function findSimilarIntents(rawText: string, topN?: number): SimilarityMatch[];
22
+ /**
23
+ * Score how similar two phrases are (0-1).
24
+ */
25
+ export declare function phraseSimilarity(text1: string, text2: string): number;
26
+ /**
27
+ * Expand a query with similar words found across all intent synonyms.
28
+ * Returns words that co-occur with the input words in intent synonyms.
29
+ */
30
+ export declare function expandWithCooccurrences(rawText: string): string[];
@@ -0,0 +1,174 @@
1
+ /**
2
+ * Semantic Similarity — lightweight sentence-level matching.
3
+ *
4
+ * Uses character n-gram overlap + word-level Jaccard + IDF weighting
5
+ * to compute similarity between user input and intent descriptions/synonyms.
6
+ * No external API needed — runs entirely local.
7
+ *
8
+ * This catches paraphrases that exact synonym matching misses:
9
+ * "what's hogging my CPU" ≈ "show me what processes are eating resources"
10
+ * "is my site live" ≈ "check if website is up"
11
+ */
12
+ import { loadIntents } from "../utils/config.js";
13
+ // ─── N-gram extraction ─────────────────────────────────────────────────────
14
+ function charNgrams(text, n) {
15
+ const grams = new Set();
16
+ const cleaned = text.toLowerCase().replace(/[^a-z0-9 ]/g, "");
17
+ for (let i = 0; i <= cleaned.length - n; i++) {
18
+ grams.add(cleaned.substring(i, i + n));
19
+ }
20
+ return grams;
21
+ }
22
+ function wordSet(text) {
23
+ return new Set(text.toLowerCase().replace(/[^a-z0-9 ]/g, "").split(/\s+/).filter(w => w.length > 1));
24
+ }
25
+ // ─── Similarity metrics ─────────────────────────────────────────────────────
26
+ function jaccardSimilarity(a, b) {
27
+ if (a.size === 0 && b.size === 0)
28
+ return 0;
29
+ let intersection = 0;
30
+ for (const item of a)
31
+ if (b.has(item))
32
+ intersection++;
33
+ return intersection / (a.size + b.size - intersection);
34
+ }
35
+ function ngramSimilarity(text1, text2) {
36
+ // Combine bigram + trigram overlap
37
+ const bi1 = charNgrams(text1, 2);
38
+ const bi2 = charNgrams(text2, 2);
39
+ const tri1 = charNgrams(text1, 3);
40
+ const tri2 = charNgrams(text2, 3);
41
+ return (jaccardSimilarity(bi1, bi2) * 0.4 + jaccardSimilarity(tri1, tri2) * 0.6);
42
+ }
43
+ function wordOverlap(text1, text2) {
44
+ return jaccardSimilarity(wordSet(text1), wordSet(text2));
45
+ }
46
+ // ─── Stopword filtering ─────────────────────────────────────────────────────
47
+ const STOPWORDS = new Set([
48
+ "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
49
+ "have", "has", "had", "do", "does", "did", "will", "would", "could",
50
+ "should", "may", "might", "can", "shall", "to", "of", "in", "for",
51
+ "on", "with", "at", "by", "from", "it", "its", "this", "that",
52
+ "i", "me", "my", "we", "us", "our", "you", "your", "he", "she",
53
+ "they", "them", "what", "which", "who", "when", "where", "how",
54
+ "not", "no", "nor", "or", "and", "but", "if", "then", "so",
55
+ "just", "also", "very", "too", "some", "any", "all", "more",
56
+ "please", "can", "could", "would",
57
+ ]);
58
+ function contentWords(text) {
59
+ return text.toLowerCase().split(/\s+/).filter(w => w.length > 2 && !STOPWORDS.has(w));
60
+ }
61
+ // ─── IDF weighting ──────────────────────────────────────────────────────────
62
+ // Words that appear in many intents are less discriminative.
63
+ let idfCache = null;
64
+ function buildIDF() {
65
+ if (idfCache)
66
+ return idfCache;
67
+ const intents = loadIntents();
68
+ const docCount = new Map();
69
+ const totalDocs = intents.length;
70
+ for (const intent of intents) {
71
+ const wordsInDoc = new Set();
72
+ for (const syn of intent.synonyms) {
73
+ for (const w of contentWords(syn))
74
+ wordsInDoc.add(w);
75
+ }
76
+ for (const w of contentWords(intent.description))
77
+ wordsInDoc.add(w);
78
+ for (const w of wordsInDoc)
79
+ docCount.set(w, (docCount.get(w) ?? 0) + 1);
80
+ }
81
+ idfCache = new Map();
82
+ for (const [word, count] of docCount) {
83
+ idfCache.set(word, Math.log(totalDocs / (1 + count)));
84
+ }
85
+ return idfCache;
86
+ }
87
+ function weightedOverlap(text1, text2) {
88
+ const idf = buildIDF();
89
+ const words1 = contentWords(text1);
90
+ const words2 = new Set(contentWords(text2));
91
+ let weightedIntersection = 0;
92
+ let totalWeight = 0;
93
+ for (const w of words1) {
94
+ const weight = idf.get(w) ?? 2.0; // Unknown words get high weight (rare = discriminative)
95
+ totalWeight += weight;
96
+ if (words2.has(w))
97
+ weightedIntersection += weight;
98
+ }
99
+ return totalWeight > 0 ? weightedIntersection / totalWeight : 0;
100
+ }
101
+ /**
102
+ * Find the most similar intents to the input text.
103
+ * Combines character n-gram, word overlap, and IDF-weighted scoring.
104
+ */
105
+ export function findSimilarIntents(rawText, topN = 5) {
106
+ const intents = loadIntents();
107
+ const results = [];
108
+ const text = rawText.toLowerCase();
109
+ for (const intent of intents) {
110
+ let bestScore = 0;
111
+ let bestPhrase = "";
112
+ // Score against synonyms
113
+ for (const syn of intent.synonyms) {
114
+ const ngram = ngramSimilarity(text, syn);
115
+ const word = wordOverlap(text, syn);
116
+ const weighted = weightedOverlap(text, syn);
117
+ const score = ngram * 0.3 + word * 0.3 + weighted * 0.4;
118
+ if (score > bestScore) {
119
+ bestScore = score;
120
+ bestPhrase = syn;
121
+ }
122
+ }
123
+ // Score against description
124
+ const descNgram = ngramSimilarity(text, intent.description);
125
+ const descWord = wordOverlap(text, intent.description);
126
+ const descWeighted = weightedOverlap(text, intent.description);
127
+ const descScore = (descNgram * 0.3 + descWord * 0.3 + descWeighted * 0.4) * 0.8; // Slight penalty for description match
128
+ if (descScore > bestScore) {
129
+ bestScore = descScore;
130
+ bestPhrase = intent.description;
131
+ }
132
+ if (bestScore > 0.15) {
133
+ results.push({ intent: intent.name, score: bestScore, matchedPhrase: bestPhrase });
134
+ }
135
+ }
136
+ return results.sort((a, b) => b.score - a.score).slice(0, topN);
137
+ }
138
+ /**
139
+ * Score how similar two phrases are (0-1).
140
+ */
141
+ export function phraseSimilarity(text1, text2) {
142
+ const ngram = ngramSimilarity(text1, text2);
143
+ const word = wordOverlap(text1, text2);
144
+ const weighted = weightedOverlap(text1, text2);
145
+ return ngram * 0.3 + word * 0.3 + weighted * 0.4;
146
+ }
147
+ /**
148
+ * Expand a query with similar words found across all intent synonyms.
149
+ * Returns words that co-occur with the input words in intent synonyms.
150
+ */
151
+ export function expandWithCooccurrences(rawText) {
152
+ const intents = loadIntents();
153
+ const inputWords = new Set(contentWords(rawText));
154
+ const cooccur = new Map();
155
+ for (const intent of intents) {
156
+ for (const syn of intent.synonyms) {
157
+ const synWords = contentWords(syn);
158
+ const hasOverlap = synWords.some(w => inputWords.has(w));
159
+ if (hasOverlap) {
160
+ for (const w of synWords) {
161
+ if (!inputWords.has(w)) {
162
+ cooccur.set(w, (cooccur.get(w) ?? 0) + 1);
163
+ }
164
+ }
165
+ }
166
+ }
167
+ }
168
+ // Return words that co-occur with input words in at least 2 synonyms
169
+ return [...cooccur.entries()]
170
+ .filter(([, count]) => count >= 2)
171
+ .sort((a, b) => b[1] - a[1])
172
+ .slice(0, 10)
173
+ .map(([word]) => word);
174
+ }
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Vocabulary Builder — learns vocabulary from Wikidata lookups.
3
+ *
4
+ * After every successful Wikidata entity lookup, this module:
5
+ * 1. Extracts instanceOf labels and maps them to intent domains
6
+ * 2. Collects aliases as synonyms for future matching
7
+ * 3. Adds related concepts to the concept router map
8
+ * 4. Persists learned vocabulary to ~/.notoken/learned-vocabulary.json
9
+ *
10
+ * On startup, loads learned vocabulary and merges it into the
11
+ * concept router's CONCEPT_DOMAINS so future queries benefit.
12
+ */
13
+ import type { WikiEntity } from "./wikidata.js";
14
+ export interface LearnedVocabulary {
15
+ /** Maps an entity label (lowercase) to its known aliases/synonyms. */
16
+ concepts: Record<string, string[]>;
17
+ /** Maps an instanceOf label (lowercase) to intent domain strings. */
18
+ domainMappings: Record<string, string[]>;
19
+ /** ISO timestamp of last update. */
20
+ learnedAt: string;
21
+ }
22
+ /**
23
+ * Enrich vocabulary from a Wikidata entity.
24
+ *
25
+ * Called after every successful Wikidata lookup. Extracts:
26
+ * - instanceOf labels → domain mappings
27
+ * - aliases → concept synonyms
28
+ * - related concepts → concept router entries
29
+ */
30
+ export declare function enrichVocabularyFromWiki(entity: WikiEntity): void;
31
+ /**
32
+ * Load learned vocabulary from disk and merge into the concept router.
33
+ *
34
+ * Should be called on startup so that previously learned vocabulary
35
+ * is available for intent routing from the first query.
36
+ */
37
+ export declare function loadLearnedVocabulary(): void;
38
+ /**
39
+ * Get the current enriched concepts map (merged vocabulary).
40
+ *
41
+ * Returns a combined view of hardcoded concepts and learned vocabulary.
42
+ */
43
+ export declare function getEnrichedConcepts(): LearnedVocabulary;