notoken-core 1.6.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/config/ascii-art.json +12 -0
  2. package/config/chat-responses.json +1019 -0
  3. package/config/cheat-sheets.json +94 -0
  4. package/config/concept-clusters.json +31 -0
  5. package/config/daily-tips.json +105 -0
  6. package/config/entities.json +93 -0
  7. package/config/history-today.json +9762 -0
  8. package/config/image-prompts.json +20 -0
  9. package/config/intent-vectors.json +1 -0
  10. package/config/intents.json +5354 -85
  11. package/config/ollama-models.json +193 -0
  12. package/config/rules.json +32 -1
  13. package/config/startup-quotes.json +45 -0
  14. package/dist/automation/discordPatchright.d.ts +35 -0
  15. package/dist/automation/discordPatchright.js +424 -0
  16. package/dist/automation/discordSetup.d.ts +31 -0
  17. package/dist/automation/discordSetup.js +338 -0
  18. package/dist/automation/smAutomation.d.ts +82 -0
  19. package/dist/automation/smAutomation.js +448 -0
  20. package/dist/conversation/coreference.js +44 -4
  21. package/dist/conversation/pendingActions.d.ts +55 -0
  22. package/dist/conversation/pendingActions.js +127 -0
  23. package/dist/conversation/store.d.ts +72 -0
  24. package/dist/conversation/store.js +140 -1
  25. package/dist/conversation/topicTracker.d.ts +36 -0
  26. package/dist/conversation/topicTracker.js +141 -0
  27. package/dist/execution/ssh.d.ts +42 -1
  28. package/dist/execution/ssh.js +538 -3
  29. package/dist/handlers/executor.d.ts +2 -0
  30. package/dist/handlers/executor.js +4234 -31
  31. package/dist/index.d.ts +35 -4
  32. package/dist/index.js +51 -3
  33. package/dist/nlp/batchParser.d.ts +30 -0
  34. package/dist/nlp/batchParser.js +77 -0
  35. package/dist/nlp/conceptExpansion.d.ts +54 -0
  36. package/dist/nlp/conceptExpansion.js +136 -0
  37. package/dist/nlp/conceptRouter.d.ts +49 -0
  38. package/dist/nlp/conceptRouter.js +302 -0
  39. package/dist/nlp/confidenceCalibrator.d.ts +62 -0
  40. package/dist/nlp/confidenceCalibrator.js +116 -0
  41. package/dist/nlp/correctionLearner.d.ts +45 -0
  42. package/dist/nlp/correctionLearner.js +207 -0
  43. package/dist/nlp/entitySpellCorrect.d.ts +35 -0
  44. package/dist/nlp/entitySpellCorrect.js +141 -0
  45. package/dist/nlp/knowledgeGraph.d.ts +70 -0
  46. package/dist/nlp/knowledgeGraph.js +380 -0
  47. package/dist/nlp/llmFallback.js +28 -1
  48. package/dist/nlp/multiClassifier.js +91 -6
  49. package/dist/nlp/multiIntent.d.ts +43 -0
  50. package/dist/nlp/multiIntent.js +154 -0
  51. package/dist/nlp/parseIntent.d.ts +6 -1
  52. package/dist/nlp/parseIntent.js +180 -5
  53. package/dist/nlp/ruleParser.js +317 -0
  54. package/dist/nlp/semanticSimilarity.d.ts +30 -0
  55. package/dist/nlp/semanticSimilarity.js +174 -0
  56. package/dist/nlp/vocabularyBuilder.d.ts +43 -0
  57. package/dist/nlp/vocabularyBuilder.js +224 -0
  58. package/dist/nlp/wikidata.d.ts +49 -0
  59. package/dist/nlp/wikidata.js +228 -0
  60. package/dist/policy/confirm.d.ts +10 -0
  61. package/dist/policy/confirm.js +39 -0
  62. package/dist/policy/safety.js +6 -4
  63. package/dist/types/intent.d.ts +8 -0
  64. package/dist/types/intent.js +1 -0
  65. package/dist/utils/achievements.d.ts +38 -0
  66. package/dist/utils/achievements.js +126 -0
  67. package/dist/utils/aliases.d.ts +5 -0
  68. package/dist/utils/aliases.js +39 -0
  69. package/dist/utils/analysis.js +71 -15
  70. package/dist/utils/bookmarks.d.ts +13 -0
  71. package/dist/utils/bookmarks.js +51 -0
  72. package/dist/utils/browser.d.ts +64 -0
  73. package/dist/utils/browser.js +364 -0
  74. package/dist/utils/commandHistory.d.ts +20 -0
  75. package/dist/utils/commandHistory.js +108 -0
  76. package/dist/utils/completer.d.ts +17 -0
  77. package/dist/utils/completer.js +79 -0
  78. package/dist/utils/config.js +32 -2
  79. package/dist/utils/dbQuery.d.ts +25 -0
  80. package/dist/utils/dbQuery.js +248 -0
  81. package/dist/utils/devTools.d.ts +35 -0
  82. package/dist/utils/devTools.js +95 -0
  83. package/dist/utils/discordDiag.d.ts +35 -0
  84. package/dist/utils/discordDiag.js +826 -0
  85. package/dist/utils/diskCleanup.d.ts +36 -0
  86. package/dist/utils/diskCleanup.js +775 -0
  87. package/dist/utils/entityResolver.d.ts +107 -0
  88. package/dist/utils/entityResolver.js +468 -0
  89. package/dist/utils/imageGen.d.ts +92 -0
  90. package/dist/utils/imageGen.js +2031 -0
  91. package/dist/utils/installTracker.d.ts +57 -0
  92. package/dist/utils/installTracker.js +160 -0
  93. package/dist/utils/multiExec.d.ts +21 -0
  94. package/dist/utils/multiExec.js +141 -0
  95. package/dist/utils/openclawDiag.d.ts +29 -0
  96. package/dist/utils/openclawDiag.js +1035 -0
  97. package/dist/utils/output.js +4 -0
  98. package/dist/utils/platform.js +2 -1
  99. package/dist/utils/progressReporter.d.ts +50 -0
  100. package/dist/utils/progressReporter.js +58 -0
  101. package/dist/utils/projectDetect.d.ts +44 -0
  102. package/dist/utils/projectDetect.js +319 -0
  103. package/dist/utils/projectScanner.d.ts +44 -0
  104. package/dist/utils/projectScanner.js +312 -0
  105. package/dist/utils/shellCompat.d.ts +78 -0
  106. package/dist/utils/shellCompat.js +186 -0
  107. package/dist/utils/smartArchive.d.ts +16 -0
  108. package/dist/utils/smartArchive.js +172 -0
  109. package/dist/utils/smartRetry.d.ts +26 -0
  110. package/dist/utils/smartRetry.js +114 -0
  111. package/dist/utils/snippets.d.ts +13 -0
  112. package/dist/utils/snippets.js +53 -0
  113. package/dist/utils/stabilityMatrixManager.d.ts +80 -0
  114. package/dist/utils/stabilityMatrixManager.js +268 -0
  115. package/dist/utils/teachMode.d.ts +41 -0
  116. package/dist/utils/teachMode.js +100 -0
  117. package/dist/utils/timer.d.ts +22 -0
  118. package/dist/utils/timer.js +52 -0
  119. package/dist/utils/updater.d.ts +1 -0
  120. package/dist/utils/updater.js +1 -1
  121. package/dist/utils/version.d.ts +20 -0
  122. package/dist/utils/version.js +212 -0
  123. package/package.json +6 -3
@@ -4,6 +4,241 @@ export function parseByRules(rawText) {
4
4
  const rules = loadRules();
5
5
  const intents = loadIntents();
6
6
  const text = rawText.trim().toLowerCase();
7
+ // Pre-check: casual conversation / greetings / social
8
+ const casualPatterns = [
9
+ { pattern: /^(hey|hi|hello|howdy|yo|sup|what'?s up|good (morning|afternoon|evening|night)|greetings)\s*[!?.]*$/i, intent: "chat.greeting" },
10
+ { pattern: /^how (are you|you doing|is it going|do you feel|are things)/i, intent: "chat.howru" },
11
+ { pattern: /^(how'?s it going|what'?s good|you good|you ok)\s*[!?.]*$/i, intent: "chat.howru" },
12
+ { pattern: /^(thanks|thank you|thx|cheers|appreciate it|good job|nice work|well done|great job|awesome|perfect|excellent)\s*[!?.]*$/i, intent: "chat.thanks" },
13
+ { pattern: /^(bye|goodbye|see you|later|gotta go|peace|cya|goodnight|good night|take care)\s*[!?.]*$/i, intent: "chat.bye" },
14
+ { pattern: /^(who are you|what are you|tell me about yourself|what is notoken)/i, intent: "chat.about" },
15
+ { pattern: /^(tell me a joke|say something funny|make me laugh|joke)\s*[!?.]*$/i, intent: "chat.joke" },
16
+ { pattern: /^(i'?m (bored|tired|frustrated|confused|stuck|lost))/i, intent: "chat.empathy" },
17
+ { pattern: /^(this (sucks|is broken|doesn'?t work|is frustrating))/i, intent: "chat.empathy" },
18
+ { pattern: /^(what do you think|your opinion|do you like|which is better)/i, intent: "chat.opinion" },
19
+ // Compliments
20
+ { pattern: /^(you('re| are) (awesome|great|amazing|the best|cool|smart|helpful|incredible))/i, intent: "chat.compliment" },
21
+ { pattern: /^(nice|love it|love you|love this|you rock|brilliant)/i, intent: "chat.compliment" },
22
+ // Insults (playful)
23
+ { pattern: /^(you('re| are) (stupid|dumb|useless|terrible|bad|wrong|slow|broken))/i, intent: "chat.insult" },
24
+ { pattern: /^(you suck|this sucks|worst|hate this)/i, intent: "chat.insult" },
25
+ // What can you do / capabilities
26
+ { pattern: /^(what (else )?can you do|show me what you can do|what are your (skills|capabilities|features))/i, intent: "chat.capabilities" },
27
+ // Bored / entertain me
28
+ { pattern: /^(i('m| am) bored|entertain me|do something (cool|fun|interesting)|surprise me|show me something)/i, intent: "chat.bored" },
29
+ // Existential
30
+ { pattern: /^(are you (alive|real|sentient|conscious|human|ai|a robot|a bot))/i, intent: "chat.existential" },
31
+ { pattern: /^(do you (dream|sleep|feel|think|have feelings|have emotions))/i, intent: "chat.existential" },
32
+ // Motivational
33
+ { pattern: /^(motivate me|inspire me|give me a (quote|pep talk)|i need motivation)/i, intent: "chat.motivate" },
34
+ // Facts / trivia
35
+ { pattern: /^(tell me a fact|random fact|fun fact|did you know|trivia)/i, intent: "chat.fact" },
36
+ // Easter eggs
37
+ { pattern: /^(42|meaning of life|do a barrel roll|make me a sandwich|sudo make me a sandwich)/i, intent: "chat.easter" },
38
+ { pattern: /^(what is the matrix|open the pod bay doors|i am your father|may the force)/i, intent: "chat.easter" },
39
+ // Apology
40
+ { pattern: /^(sorry|my bad|i('m| am) sorry|apologies|oops|my mistake)/i, intent: "chat.sorry" },
41
+ // Agreement / affirmation (not pending action)
42
+ { pattern: /^(cool|nice|ok cool|awesome|sweet|neat|dope|sick|rad|lit)\s*[!.]*$/i, intent: "chat.acknowledge" },
43
+ // How old are you / version
44
+ { pattern: /^(how old are you|when were you (made|born|created)|your (age|birthday|version))/i, intent: "chat.age" },
45
+ // Favorite things
46
+ { pattern: /^(what('s| is) your favorite|do you have a favorite)/i, intent: "chat.favorite" },
47
+ // Riddles
48
+ { pattern: /^(tell me a riddle|riddle|give me a riddle|riddle me|got a riddle|brain teaser)\s*[!?.]*$/i, intent: "chat.riddle" },
49
+ // Today in history
50
+ { pattern: /^(what happened today|today in history|on this day|this day in history|historical fact|history fact)\s*[!?.]*$/i, intent: "chat.history_today" },
51
+ // Task management (natural language)
52
+ { pattern: /^(what'?s running in (the )?background|any(thing)? running in (the )?background|running tasks|background tasks|active tasks|show (my )?tasks|what tasks)\s*[!?.]*$/i, intent: "notoken.jobs" },
53
+ { pattern: /^(cancel|stop|kill|abort)\s+(it|that|everything|all( tasks)?|the (task|job|scan|download))\s*$/i, intent: "notoken.cancel" },
54
+ { pattern: /^(cancel|stop|kill) (task|job) #?\d+$/i, intent: "notoken.cancel" },
55
+ ];
56
+ for (const { pattern, intent } of casualPatterns) {
57
+ if (pattern.test(text))
58
+ return { intent, confidence: 0.95, rawText, fields: {} };
59
+ }
60
+ // Pre-check: negation detection — "don't restart nginx", "do not check disk", "never mind"
61
+ // Note: "stop <service>" is a legitimate stop command, so we only match "stop" when
62
+ // followed by a verb (e.g. "stop checking") or on its own, not "stop <noun>"
63
+ if (/^(don'?t|do not|no don'?t)\s+/i.test(text)
64
+ || /^(cancel|never mind|abort|nevermind)$/i.test(text)
65
+ || /^never\s+(do|run|execute|mind)/i.test(text)
66
+ || /^stop\s+(doing|checking|running|monitoring|that|it)(\s|$)/i.test(text)) {
67
+ return { intent: "notoken.cancel", confidence: 0.95, rawText, fields: {} };
68
+ }
69
+ // Pre-check: status queries → notoken.status (not knowledge.lookup or service.status)
70
+ if (/^(what is |what's |show |check |give me )?(the )?(system |computer |machine |notoken )?status( of)?( this| the| my)?( machine| computer| system| server)?[?.!]?$/.test(text)
71
+ || /^(how is |how's )?(this |the |my )?(system|machine|computer|server) doing/.test(text)
72
+ || /^system status$/.test(text)) {
73
+ const statusDef = intents.find(i => i.name === "notoken.status");
74
+ if (statusDef)
75
+ return { intent: "notoken.status", confidence: 0.95, rawText, fields: {} };
76
+ }
77
+ // Pre-check: server/system queries — "what is load", "what is cpu usage", "what is memory", "how much ram"
78
+ if (/^(what is |what's |show |check |how much |how's )?(the )?(load|cpu|cpu usage|uptime|server load)( right now| currently| on this)?\??$/.test(text)
79
+ || /^(what is |show )?(the )?(load|cpu) (average|right now|currently)/.test(text)) {
80
+ return { intent: "server.uptime", confidence: 0.9, rawText, fields: {} };
81
+ }
82
+ // "what is using heavy cpu" / "what is eating cpu" / "any heavy load processes"
83
+ if (/\b(what|which|any)\b.*(using|eating|taking|hogging|consuming)\b.*(cpu|processing|resources|memory|ram|load)\b/i.test(text)
84
+ || /\b(heavy|high)\s+(load|cpu|processing|processes)\b/i.test(text)) {
85
+ return { intent: "process.list", confidence: 0.9, rawText, fields: {} };
86
+ }
87
+ if (/^(what is |what's |show |check |how much )?(the )?(memory|ram|memory usage|ram usage)( right now| left| free| used| currently)?\??$/.test(text)) {
88
+ return { intent: "server.check_memory", confidence: 0.9, rawText, fields: {} };
89
+ }
90
+ if (/^(what is |what's |show |check |how much )?(the |my )?(disk|disk space|storage|space|drives)( left| free| used| right now| currently)?\??$/.test(text)) {
91
+ return { intent: "server.check_disk", confidence: 0.9, rawText, fields: {} };
92
+ }
93
+ // Pre-check: common conversational queries that get misrouted
94
+ // Weather
95
+ if (/\b(weather|forecast|temperature|rain|snow|sunny|cloudy)\b/i.test(text)
96
+ && !/\b(log|error|server|disk)\b/i.test(text)) {
97
+ const locMatch = text.match(/(?:weather|forecast|temperature)\s+(?:in|at|for|of)\s+(.+?)(?:\?|$)/i)
98
+ ?? text.match(/(?:in|at|for)\s+(.+?)(?:\s+weather|\s+forecast|\?|$)/i);
99
+ return { intent: "weather.current", confidence: 0.95, rawText, fields: locMatch ? { location: locMatch[1].trim() } : {} };
100
+ }
101
+ // News
102
+ if (/^(what is |what's |show me )?(the )?(latest |today's |current )?(news|headlines|top stories)/i.test(text)
103
+ || /^(any |what's? )?news( today)?\??$/i.test(text)) {
104
+ return { intent: "news.headlines", confidence: 0.9, rawText, fields: {} };
105
+ }
106
+ // Database size
107
+ if (/\b(how big|size of|how much space)\b.*\b(database|db|mysql|postgres|mongo)\b/i.test(text)
108
+ || /\b(database|db)\s+(size|storage|disk|space)\b/i.test(text)) {
109
+ return { intent: "db.size", confidence: 0.9, rawText, fields: {} };
110
+ }
111
+ // Time/date
112
+ if (/^(what is |what's )?(the )?(time|date|day|today)( right now| today)?\??$/.test(text)) {
113
+ return { intent: "system.datetime", confidence: 0.9, rawText, fields: {} };
114
+ }
115
+ // Help / capabilities
116
+ // Only match bare help — not "ask openclaw what can you do"
117
+ if (/^(help|help me|what can you do|what do you do|show me help|commands)\??$/.test(text) && !text.includes("openclaw") && !text.includes("claw")) {
118
+ return { intent: "notoken.help", confidence: 0.95, rawText, fields: {} };
119
+ }
120
+ // History / undo
121
+ if (/^(show me |what is )?(my )?history$/.test(text) || /^what did i (do|run|ask) (last|before|previously)/.test(text)) {
122
+ return { intent: "notoken.history", confidence: 0.9, rawText, fields: {} };
123
+ }
124
+ if (/^undo( that| last| it)?$/.test(text)) {
125
+ return { intent: "notoken.undo", confidence: 0.9, rawText, fields: {} };
126
+ }
127
+ // Who am I / logged in users
128
+ if (/^who am i\??$/.test(text) || /^(what is |what's )?my (user|username|login)\??$/.test(text)) {
129
+ return { intent: "user.whoami", confidence: 0.9, rawText, fields: {} };
130
+ }
131
+ if (/^who (else )?(is |are )?(logged in|online|connected)\??$/.test(text)) {
132
+ return { intent: "user.who", confidence: 0.9, rawText, fields: {} };
133
+ }
134
+ // Running services
135
+ if (/^(show me |list |what are )?(the )?(running |active )?services$/.test(text)) {
136
+ return { intent: "service.list", confidence: 0.9, rawText, fields: {} };
137
+ }
138
+ // Network: ip address, bandwidth, speed, slow
139
+ if (/^(what is |what's |show )?(my )?(ip|ip address|public ip)\??$/.test(text)) {
140
+ return { intent: "network.ip", confidence: 0.9, rawText, fields: {} };
141
+ }
142
+ if (/\b(bandwidth|network speed|connection speed|speed test|speedtest)\b/i.test(text) || /^(is the )?network slow\??$/.test(text)) {
143
+ return { intent: "network.speedtest", confidence: 0.9, rawText, fields: {} };
144
+ }
145
+ // Block/unblock IP → firewall
146
+ if (/^(block|unblock|ban|unban)\s+(this\s+)?ip/i.test(text) || /^(block|unblock|ban|unban)\s+\d+\.\d+/i.test(text)) {
147
+ return { intent: "firewall.block_ip", confidence: 0.9, rawText, fields: {} };
148
+ }
149
+ // Docker queries with "show me"
150
+ if (/^(show me |list )?(docker )?(images|containers)$/.test(text) || /^what (containers|images) are (running|there)\??$/.test(text)) {
151
+ const isImages = /images/.test(text);
152
+ return { intent: isImages ? "docker.images" : "docker.list", confidence: 0.9, rawText, fields: {} };
153
+ }
154
+ // Large files
155
+ if (/^find (large|big|huge) files$/.test(text) || /\b(large|big|huge) files\b/.test(text)) {
156
+ return { intent: "disk.scan", confidence: 0.9, rawText, fields: {} };
157
+ }
158
+ // Error logs
159
+ if (/^(show me |check |any )?(the )?(error|recent) logs$/.test(text) || /^any errors in (the )?logs\??$/.test(text)) {
160
+ return { intent: "logs.errors", confidence: 0.9, rawText, fields: {} };
161
+ }
162
+ // Clear screen
163
+ if (/^clear( the)?( screen| terminal)?$/.test(text)) {
164
+ return { intent: "shell.clear", confidence: 0.95, rawText, fields: {} };
165
+ }
166
+ // Disk IO
167
+ if (/^(show me |check )?(disk|io|disk io|iops)( stats| usage)?\??$/.test(text)) {
168
+ return { intent: "server.check_disk", confidence: 0.9, rawText, fields: {} };
169
+ }
170
+ // Website up check
171
+ if (/^(check if |is )?(the |my )?(website|site|server|page) (is )?(up|down|running|alive|responding)\??$/.test(text)) {
172
+ return { intent: "network.curl", confidence: 0.9, rawText, fields: {} };
173
+ }
174
+ // Pre-check: attack/security/ddos queries → security.scan
175
+ if (/\b(attack|ddos|brute.?force|intrusion|hacked|breach|compromised|unauthorized|virus|malware|rootkit)\b/i.test(text)
176
+ || /\b(are we|am i|is .* being)\s+(under\s+)?attack/i.test(text)
177
+ || /\b(suspicious|failed)\s+(activity|login|connection|traffic|access)/i.test(text)
178
+ || /\bwho is (attacking|hacking|connecting|hitting)/i.test(text)
179
+ || /\bcheck (for )?(attacks|security|intrusion|viruses|malware)/i.test(text)
180
+ || /\b(any )?(viruses|malware|rootkits?) (on|in|running)/i.test(text)) {
181
+ return { intent: "security.scan", confidence: 0.95, rawText, fields: {} };
182
+ }
183
+ // Pre-check: "can you generate an image" → ai.generate_image (not ai.image_status)
184
+ if (/^(can you|could you|are you able to|do you)\s+(generate|create|make|draw)\s+(an?\s+)?(image|picture|photo|art)/i.test(text)) {
185
+ return { intent: "ai.generate_image", confidence: 0.9, rawText, fields: {} };
186
+ }
187
+ // Pre-check: "cd /path" → shell cd (change directory)
188
+ const cdMatch = text.match(/^cd\s+(\/\S+|~\S*|\.\S*)$/);
189
+ if (cdMatch) {
190
+ return { intent: "shell.cd", confidence: 0.95, rawText, fields: { path: cdMatch[1] } };
191
+ }
192
+ // Pre-check: "what is in my documents/folder/drive" → dir.list
193
+ const whatIsInMatch = text.match(/^(?:what is |what's |show me what(?:'s| is) )in (?:my |the |this )?(.*?)(?:\?|$)/);
194
+ if (whatIsInMatch) {
195
+ const target = whatIsInMatch[1].trim();
196
+ // Resolve common folder names
197
+ const folderMap = {
198
+ "documents": process.platform === "win32" ? "%USERPROFILE%\\Documents" : "~/Documents",
199
+ "documents folder": process.platform === "win32" ? "%USERPROFILE%\\Documents" : "~/Documents",
200
+ "downloads": process.platform === "win32" ? "%USERPROFILE%\\Downloads" : "~/Downloads",
201
+ "downloads folder": process.platform === "win32" ? "%USERPROFILE%\\Downloads" : "~/Downloads",
202
+ "desktop": process.platform === "win32" ? "%USERPROFILE%\\Desktop" : "~/Desktop",
203
+ "home": "~",
204
+ "home folder": "~",
205
+ "home directory": "~",
206
+ "root": "/",
207
+ "root folder": "/",
208
+ "root c drive": "/mnt/c/",
209
+ "c drive": "/mnt/c/",
210
+ "d drive": "/mnt/d/",
211
+ "e drive": "/mnt/e/",
212
+ };
213
+ const path = folderMap[target] ?? target;
214
+ if (target.includes("drive")) {
215
+ return { intent: "disk.scan", confidence: 0.9, rawText, fields: { path } };
216
+ }
217
+ return { intent: "dir.list", confidence: 0.9, rawText, fields: { path } };
218
+ }
219
+ // Pre-check: "what projects are on this drive" → project.scan
220
+ if (/\bwhat projects\b.*\b(on|in)\b.*\b(this|the|my|c|d)\b/.test(text)) {
221
+ return { intent: "project.scan", confidence: 0.9, rawText, fields: { path: "." } };
222
+ }
223
+ // Pre-check: "what's on this drive" / "show me whats on this drive" → disk.scan
224
+ if (/\b(what.?s|show me what.?s|what is) on (this|the|my|c|d) drive\b/.test(text)
225
+ || /\bshow me (this|the|my) drive\b/.test(text)) {
226
+ return { intent: "disk.scan", confidence: 0.9, rawText, fields: {} };
227
+ }
228
+ // Pre-check: "what files" / "what are files in this folder" → dir.list or project.detect
229
+ if (/^(what are |what's in |show me |list |show )(the )?(files|contents)( in| of)?( this| the| my| current)?( folder| directory| dir| project)?[?.!]?$/.test(text)
230
+ || /^(show me |list )(project |all )?files$/.test(text)) {
231
+ const isDirList = text.includes("folder") || text.includes("directory") || text.includes("dir");
232
+ const intentName = isDirList ? "dir.list" : "project.detect";
233
+ return { intent: intentName, confidence: 0.9, rawText, fields: { path: "." } };
234
+ }
235
+ // Pre-check: "how is openclaw doing" / "how is discord doing" → *.status
236
+ const howIsMatch = text.match(/^how(?:'s| is| are) (openclaw|claw|discord|ollama|notoken) (?:doing|going|running|working)/);
237
+ if (howIsMatch) {
238
+ const target = howIsMatch[1] === "claw" ? "openclaw" : howIsMatch[1];
239
+ const intentName = target === "notoken" ? "notoken.status" : `${target}.status`;
240
+ return { intent: intentName, confidence: 0.9, rawText, fields: {} };
241
+ }
7
242
  // Match intent by synonyms defined in intents.json
8
243
  const matched = matchIntent(text, intents);
9
244
  if (!matched)
@@ -90,6 +325,10 @@ function extractStringFields(rawText, lowerText, matchedPhrase, fieldNames, alre
90
325
  }
91
326
  }
92
327
  remaining = remaining.replace(/\s+/g, " ").trim();
328
+ // Strip filler words that aren't meaningful field values
329
+ remaining = remaining.replace(/^(can you |could you |would you |please |hey |yo |just )+/i, "").trim();
330
+ remaining = remaining.replace(/\b(please|for me|for errors|for issues)\b/gi, "").trim();
331
+ remaining = remaining.replace(/\s+/g, " ").trim();
93
332
  // Check for quoted strings first
94
333
  const quoted = rawText.match(/["']([^"']+)["']/g);
95
334
  if (quoted) {
@@ -182,6 +421,7 @@ function isStopWord(word) {
182
421
  }
183
422
  function matchIntent(text, intents) {
184
423
  let best = null;
424
+ // Pass 1: exact substring match (fast path)
185
425
  for (const def of intents) {
186
426
  for (const phrase of def.synonyms) {
187
427
  if (text.includes(phrase)) {
@@ -191,8 +431,85 @@ function matchIntent(text, intents) {
191
431
  }
192
432
  }
193
433
  }
434
+ if (best)
435
+ return { def: best.def, matchedPhrase: best.matchedPhrase };
436
+ // Pass 2: fuzzy/spell-corrected match — correct typos in user input
437
+ // then retry matching. Only for single/double-word synonyms to avoid
438
+ // false positives on long phrases.
439
+ const corrected = spellCorrectText(text, intents);
440
+ if (corrected !== text) {
441
+ for (const def of intents) {
442
+ for (const phrase of def.synonyms) {
443
+ if (corrected.includes(phrase)) {
444
+ if (!best || phrase.length > best.length) {
445
+ best = { def, matchedPhrase: phrase, length: phrase.length };
446
+ }
447
+ }
448
+ }
449
+ }
450
+ }
194
451
  return best ? { def: best.def, matchedPhrase: best.matchedPhrase } : null;
195
452
  }
453
+ /**
454
+ * Spell-correct text by replacing unknown words with the closest known synonym word.
455
+ * Uses Levenshtein distance with a max edit distance of 2.
456
+ */
457
+ function spellCorrectText(text, intents) {
458
+ // Build vocabulary from all synonyms
459
+ const vocab = new Set();
460
+ for (const def of intents) {
461
+ for (const phrase of def.synonyms) {
462
+ for (const word of phrase.split(/\s+/)) {
463
+ if (word.length >= 3)
464
+ vocab.add(word);
465
+ }
466
+ }
467
+ }
468
+ const words = text.split(/\s+/);
469
+ let changed = false;
470
+ const correctedWords = words.map(word => {
471
+ if (word.length < 3)
472
+ return word;
473
+ if (vocab.has(word))
474
+ return word; // already a known word
475
+ // Find closest vocabulary word
476
+ let bestWord = word;
477
+ let bestDist = Infinity;
478
+ const maxDist = word.length <= 4 ? 1 : 2;
479
+ for (const candidate of vocab) {
480
+ // Quick length check — edit distance can't be less than length difference
481
+ if (Math.abs(candidate.length - word.length) > maxDist)
482
+ continue;
483
+ const dist = editDistance(word, candidate);
484
+ if (dist <= maxDist && dist < bestDist) {
485
+ bestDist = dist;
486
+ bestWord = candidate;
487
+ }
488
+ }
489
+ if (bestWord !== word)
490
+ changed = true;
491
+ return bestWord;
492
+ });
493
+ return changed ? correctedWords.join(" ") : text;
494
+ }
495
+ function editDistance(a, b) {
496
+ const m = a.length, n = b.length;
497
+ if (m === 0)
498
+ return n;
499
+ if (n === 0)
500
+ return m;
501
+ const dp = Array.from({ length: n + 1 }, (_, i) => i);
502
+ for (let i = 1; i <= m; i++) {
503
+ let prev = dp[0];
504
+ dp[0] = i;
505
+ for (let j = 1; j <= n; j++) {
506
+ const tmp = dp[j];
507
+ dp[j] = a[i - 1] === b[j - 1] ? prev : 1 + Math.min(prev, dp[j], dp[j - 1]);
508
+ prev = tmp;
509
+ }
510
+ }
511
+ return dp[n];
512
+ }
196
513
  function extractEnvironment(text, aliases) {
197
514
  for (const [canonical, aliasList] of Object.entries(aliases)) {
198
515
  for (const alias of aliasList) {
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Semantic Similarity — lightweight sentence-level matching.
3
+ *
4
+ * Uses character n-gram overlap + word-level Jaccard + IDF weighting
5
+ * to compute similarity between user input and intent descriptions/synonyms.
6
+ * No external API needed — runs entirely local.
7
+ *
8
+ * This catches paraphrases that exact synonym matching misses:
9
+ * "what's hogging my CPU" ≈ "show me what processes are eating resources"
10
+ * "is my site live" ≈ "check if website is up"
11
+ */
12
+ export interface SimilarityMatch {
13
+ intent: string;
14
+ score: number;
15
+ matchedPhrase: string;
16
+ }
17
+ /**
18
+ * Find the most similar intents to the input text.
19
+ * Combines character n-gram, word overlap, and IDF-weighted scoring.
20
+ */
21
+ export declare function findSimilarIntents(rawText: string, topN?: number): SimilarityMatch[];
22
+ /**
23
+ * Score how similar two phrases are (0-1).
24
+ */
25
+ export declare function phraseSimilarity(text1: string, text2: string): number;
26
+ /**
27
+ * Expand a query with similar words found across all intent synonyms.
28
+ * Returns words that co-occur with the input words in intent synonyms.
29
+ */
30
+ export declare function expandWithCooccurrences(rawText: string): string[];
@@ -0,0 +1,174 @@
1
+ /**
2
+ * Semantic Similarity — lightweight sentence-level matching.
3
+ *
4
+ * Uses character n-gram overlap + word-level Jaccard + IDF weighting
5
+ * to compute similarity between user input and intent descriptions/synonyms.
6
+ * No external API needed — runs entirely local.
7
+ *
8
+ * This catches paraphrases that exact synonym matching misses:
9
+ * "what's hogging my CPU" ≈ "show me what processes are eating resources"
10
+ * "is my site live" ≈ "check if website is up"
11
+ */
12
+ import { loadIntents } from "../utils/config.js";
13
+ // ─── N-gram extraction ─────────────────────────────────────────────────────
14
+ function charNgrams(text, n) {
15
+ const grams = new Set();
16
+ const cleaned = text.toLowerCase().replace(/[^a-z0-9 ]/g, "");
17
+ for (let i = 0; i <= cleaned.length - n; i++) {
18
+ grams.add(cleaned.substring(i, i + n));
19
+ }
20
+ return grams;
21
+ }
22
+ function wordSet(text) {
23
+ return new Set(text.toLowerCase().replace(/[^a-z0-9 ]/g, "").split(/\s+/).filter(w => w.length > 1));
24
+ }
25
+ // ─── Similarity metrics ─────────────────────────────────────────────────────
26
+ function jaccardSimilarity(a, b) {
27
+ if (a.size === 0 && b.size === 0)
28
+ return 0;
29
+ let intersection = 0;
30
+ for (const item of a)
31
+ if (b.has(item))
32
+ intersection++;
33
+ return intersection / (a.size + b.size - intersection);
34
+ }
35
+ function ngramSimilarity(text1, text2) {
36
+ // Combine bigram + trigram overlap
37
+ const bi1 = charNgrams(text1, 2);
38
+ const bi2 = charNgrams(text2, 2);
39
+ const tri1 = charNgrams(text1, 3);
40
+ const tri2 = charNgrams(text2, 3);
41
+ return (jaccardSimilarity(bi1, bi2) * 0.4 + jaccardSimilarity(tri1, tri2) * 0.6);
42
+ }
43
+ function wordOverlap(text1, text2) {
44
+ return jaccardSimilarity(wordSet(text1), wordSet(text2));
45
+ }
46
+ // ─── Stopword filtering ─────────────────────────────────────────────────────
47
+ const STOPWORDS = new Set([
48
+ "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
49
+ "have", "has", "had", "do", "does", "did", "will", "would", "could",
50
+ "should", "may", "might", "can", "shall", "to", "of", "in", "for",
51
+ "on", "with", "at", "by", "from", "it", "its", "this", "that",
52
+ "i", "me", "my", "we", "us", "our", "you", "your", "he", "she",
53
+ "they", "them", "what", "which", "who", "when", "where", "how",
54
+ "not", "no", "nor", "or", "and", "but", "if", "then", "so",
55
+ "just", "also", "very", "too", "some", "any", "all", "more",
56
+ "please", "can", "could", "would",
57
+ ]);
58
+ function contentWords(text) {
59
+ return text.toLowerCase().split(/\s+/).filter(w => w.length > 2 && !STOPWORDS.has(w));
60
+ }
61
+ // ─── IDF weighting ──────────────────────────────────────────────────────────
62
+ // Words that appear in many intents are less discriminative.
63
+ let idfCache = null;
64
+ function buildIDF() {
65
+ if (idfCache)
66
+ return idfCache;
67
+ const intents = loadIntents();
68
+ const docCount = new Map();
69
+ const totalDocs = intents.length;
70
+ for (const intent of intents) {
71
+ const wordsInDoc = new Set();
72
+ for (const syn of intent.synonyms) {
73
+ for (const w of contentWords(syn))
74
+ wordsInDoc.add(w);
75
+ }
76
+ for (const w of contentWords(intent.description))
77
+ wordsInDoc.add(w);
78
+ for (const w of wordsInDoc)
79
+ docCount.set(w, (docCount.get(w) ?? 0) + 1);
80
+ }
81
+ idfCache = new Map();
82
+ for (const [word, count] of docCount) {
83
+ idfCache.set(word, Math.log(totalDocs / (1 + count)));
84
+ }
85
+ return idfCache;
86
+ }
87
+ function weightedOverlap(text1, text2) {
88
+ const idf = buildIDF();
89
+ const words1 = contentWords(text1);
90
+ const words2 = new Set(contentWords(text2));
91
+ let weightedIntersection = 0;
92
+ let totalWeight = 0;
93
+ for (const w of words1) {
94
+ const weight = idf.get(w) ?? 2.0; // Unknown words get high weight (rare = discriminative)
95
+ totalWeight += weight;
96
+ if (words2.has(w))
97
+ weightedIntersection += weight;
98
+ }
99
+ return totalWeight > 0 ? weightedIntersection / totalWeight : 0;
100
+ }
101
+ /**
102
+ * Find the most similar intents to the input text.
103
+ * Combines character n-gram, word overlap, and IDF-weighted scoring.
104
+ */
105
+ export function findSimilarIntents(rawText, topN = 5) {
106
+ const intents = loadIntents();
107
+ const results = [];
108
+ const text = rawText.toLowerCase();
109
+ for (const intent of intents) {
110
+ let bestScore = 0;
111
+ let bestPhrase = "";
112
+ // Score against synonyms
113
+ for (const syn of intent.synonyms) {
114
+ const ngram = ngramSimilarity(text, syn);
115
+ const word = wordOverlap(text, syn);
116
+ const weighted = weightedOverlap(text, syn);
117
+ const score = ngram * 0.3 + word * 0.3 + weighted * 0.4;
118
+ if (score > bestScore) {
119
+ bestScore = score;
120
+ bestPhrase = syn;
121
+ }
122
+ }
123
+ // Score against description
124
+ const descNgram = ngramSimilarity(text, intent.description);
125
+ const descWord = wordOverlap(text, intent.description);
126
+ const descWeighted = weightedOverlap(text, intent.description);
127
+ const descScore = (descNgram * 0.3 + descWord * 0.3 + descWeighted * 0.4) * 0.8; // Slight penalty for description match
128
+ if (descScore > bestScore) {
129
+ bestScore = descScore;
130
+ bestPhrase = intent.description;
131
+ }
132
+ if (bestScore > 0.15) {
133
+ results.push({ intent: intent.name, score: bestScore, matchedPhrase: bestPhrase });
134
+ }
135
+ }
136
+ return results.sort((a, b) => b.score - a.score).slice(0, topN);
137
+ }
138
+ /**
139
+ * Score how similar two phrases are (0-1).
140
+ */
141
+ export function phraseSimilarity(text1, text2) {
142
+ const ngram = ngramSimilarity(text1, text2);
143
+ const word = wordOverlap(text1, text2);
144
+ const weighted = weightedOverlap(text1, text2);
145
+ return ngram * 0.3 + word * 0.3 + weighted * 0.4;
146
+ }
147
+ /**
148
+ * Expand a query with similar words found across all intent synonyms.
149
+ * Returns words that co-occur with the input words in intent synonyms.
150
+ */
151
+ export function expandWithCooccurrences(rawText) {
152
+ const intents = loadIntents();
153
+ const inputWords = new Set(contentWords(rawText));
154
+ const cooccur = new Map();
155
+ for (const intent of intents) {
156
+ for (const syn of intent.synonyms) {
157
+ const synWords = contentWords(syn);
158
+ const hasOverlap = synWords.some(w => inputWords.has(w));
159
+ if (hasOverlap) {
160
+ for (const w of synWords) {
161
+ if (!inputWords.has(w)) {
162
+ cooccur.set(w, (cooccur.get(w) ?? 0) + 1);
163
+ }
164
+ }
165
+ }
166
+ }
167
+ }
168
+ // Return words that co-occur with input words in at least 2 synonyms
169
+ return [...cooccur.entries()]
170
+ .filter(([, count]) => count >= 2)
171
+ .sort((a, b) => b[1] - a[1])
172
+ .slice(0, 10)
173
+ .map(([word]) => word);
174
+ }
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Vocabulary Builder — learns vocabulary from Wikidata lookups.
3
+ *
4
+ * After every successful Wikidata entity lookup, this module:
5
+ * 1. Extracts instanceOf labels and maps them to intent domains
6
+ * 2. Collects aliases as synonyms for future matching
7
+ * 3. Adds related concepts to the concept router map
8
+ * 4. Persists learned vocabulary to ~/.notoken/learned-vocabulary.json
9
+ *
10
+ * On startup, loads learned vocabulary and merges it into the
11
+ * concept router's CONCEPT_DOMAINS so future queries benefit.
12
+ */
13
+ import type { WikiEntity } from "./wikidata.js";
14
+ export interface LearnedVocabulary {
15
+ /** Maps an entity label (lowercase) to its known aliases/synonyms. */
16
+ concepts: Record<string, string[]>;
17
+ /** Maps an instanceOf label (lowercase) to intent domain strings. */
18
+ domainMappings: Record<string, string[]>;
19
+ /** ISO timestamp of last update. */
20
+ learnedAt: string;
21
+ }
22
+ /**
23
+ * Enrich vocabulary from a Wikidata entity.
24
+ *
25
+ * Called after every successful Wikidata lookup. Extracts:
26
+ * - instanceOf labels → domain mappings
27
+ * - aliases → concept synonyms
28
+ * - related concepts → concept router entries
29
+ */
30
+ export declare function enrichVocabularyFromWiki(entity: WikiEntity): void;
31
+ /**
32
+ * Load learned vocabulary from disk and merge into the concept router.
33
+ *
34
+ * Should be called on startup so that previously learned vocabulary
35
+ * is available for intent routing from the first query.
36
+ */
37
+ export declare function loadLearnedVocabulary(): void;
38
+ /**
39
+ * Get the current enriched concepts map (merged vocabulary).
40
+ *
41
+ * Returns a combined view of hardcoded concepts and learned vocabulary.
42
+ */
43
+ export declare function getEnrichedConcepts(): LearnedVocabulary;