notoken-core 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/file-hints.json +255 -0
- package/config/hosts.json +14 -0
- package/config/intents.json +3920 -0
- package/config/playbooks.json +112 -0
- package/config/rules.json +100 -0
- package/dist/agents/agentSpawner.d.ts +56 -0
- package/dist/agents/agentSpawner.js +180 -0
- package/dist/agents/planner.d.ts +40 -0
- package/dist/agents/planner.js +175 -0
- package/dist/agents/playbookRunner.d.ts +45 -0
- package/dist/agents/playbookRunner.js +120 -0
- package/dist/agents/taskRunner.d.ts +61 -0
- package/dist/agents/taskRunner.js +142 -0
- package/dist/context/history.d.ts +36 -0
- package/dist/context/history.js +115 -0
- package/dist/conversation/coreference.d.ts +27 -0
- package/dist/conversation/coreference.js +147 -0
- package/dist/conversation/secrets.d.ts +43 -0
- package/dist/conversation/secrets.js +129 -0
- package/dist/conversation/store.d.ts +94 -0
- package/dist/conversation/store.js +184 -0
- package/dist/execution/git.d.ts +11 -0
- package/dist/execution/git.js +146 -0
- package/dist/execution/ssh.d.ts +2 -0
- package/dist/execution/ssh.js +17 -0
- package/dist/handlers/executor.d.ts +8 -0
- package/dist/handlers/executor.js +216 -0
- package/dist/healing/claudeHealer.d.ts +17 -0
- package/dist/healing/claudeHealer.js +300 -0
- package/dist/healing/patchPromoter.d.ts +25 -0
- package/dist/healing/patchPromoter.js +118 -0
- package/dist/healing/ruleBuilder.d.ts +5 -0
- package/dist/healing/ruleBuilder.js +111 -0
- package/dist/healing/ruleRepairer.d.ts +8 -0
- package/dist/healing/ruleRepairer.js +29 -0
- package/dist/healing/ruleValidator.d.ts +22 -0
- package/dist/healing/ruleValidator.js +145 -0
- package/dist/healing/runHealer.d.ts +11 -0
- package/dist/healing/runHealer.js +74 -0
- package/dist/index.d.ts +51 -0
- package/dist/index.js +62 -0
- package/dist/intents/catalog.d.ts +4 -0
- package/dist/intents/catalog.js +7 -0
- package/dist/nlp/disambiguate.d.ts +2 -0
- package/dist/nlp/disambiguate.js +46 -0
- package/dist/nlp/fuzzyResolver.d.ts +14 -0
- package/dist/nlp/fuzzyResolver.js +108 -0
- package/dist/nlp/llmFallback.d.ts +63 -0
- package/dist/nlp/llmFallback.js +338 -0
- package/dist/nlp/llmParser.d.ts +8 -0
- package/dist/nlp/llmParser.js +118 -0
- package/dist/nlp/multiClassifier.d.ts +39 -0
- package/dist/nlp/multiClassifier.js +181 -0
- package/dist/nlp/parseIntent.d.ts +2 -0
- package/dist/nlp/parseIntent.js +34 -0
- package/dist/nlp/ruleParser.d.ts +2 -0
- package/dist/nlp/ruleParser.js +234 -0
- package/dist/nlp/semantic.d.ts +104 -0
- package/dist/nlp/semantic.js +419 -0
- package/dist/nlp/uncertainty.d.ts +42 -0
- package/dist/nlp/uncertainty.js +103 -0
- package/dist/parsers/apacheParser.d.ts +50 -0
- package/dist/parsers/apacheParser.js +152 -0
- package/dist/parsers/bindParser.d.ts +40 -0
- package/dist/parsers/bindParser.js +189 -0
- package/dist/parsers/envFile.d.ts +39 -0
- package/dist/parsers/envFile.js +128 -0
- package/dist/parsers/fileFinder.d.ts +30 -0
- package/dist/parsers/fileFinder.js +226 -0
- package/dist/parsers/index.d.ts +27 -0
- package/dist/parsers/index.js +193 -0
- package/dist/parsers/jsonParser.d.ts +16 -0
- package/dist/parsers/jsonParser.js +57 -0
- package/dist/parsers/nginxParser.d.ts +47 -0
- package/dist/parsers/nginxParser.js +161 -0
- package/dist/parsers/passwd.d.ts +25 -0
- package/dist/parsers/passwd.js +41 -0
- package/dist/parsers/shadow.d.ts +23 -0
- package/dist/parsers/shadow.js +50 -0
- package/dist/parsers/yamlParser.d.ts +13 -0
- package/dist/parsers/yamlParser.js +54 -0
- package/dist/policy/confirm.d.ts +2 -0
- package/dist/policy/confirm.js +29 -0
- package/dist/policy/safety.d.ts +4 -0
- package/dist/policy/safety.js +32 -0
- package/dist/types/intent.d.ts +205 -0
- package/dist/types/intent.js +32 -0
- package/dist/types/rules.d.ts +237 -0
- package/dist/types/rules.js +50 -0
- package/dist/utils/analysis.d.ts +25 -0
- package/dist/utils/analysis.js +307 -0
- package/dist/utils/autoBackup.d.ts +43 -0
- package/dist/utils/autoBackup.js +144 -0
- package/dist/utils/config.d.ts +11 -0
- package/dist/utils/config.js +32 -0
- package/dist/utils/dirAnalysis.d.ts +23 -0
- package/dist/utils/dirAnalysis.js +192 -0
- package/dist/utils/explain.d.ts +8 -0
- package/dist/utils/explain.js +145 -0
- package/dist/utils/logger.d.ts +5 -0
- package/dist/utils/logger.js +29 -0
- package/dist/utils/output.d.ts +2 -0
- package/dist/utils/output.js +26 -0
- package/dist/utils/paths.d.ts +26 -0
- package/dist/utils/paths.js +47 -0
- package/dist/utils/permissions.d.ts +64 -0
- package/dist/utils/permissions.js +298 -0
- package/dist/utils/platform.d.ts +53 -0
- package/dist/utils/platform.js +253 -0
- package/dist/utils/smartFile.d.ts +29 -0
- package/dist/utils/smartFile.js +188 -0
- package/dist/utils/spinner.d.ts +53 -0
- package/dist/utils/spinner.js +140 -0
- package/dist/utils/verbose.d.ts +27 -0
- package/dist/utils/verbose.js +131 -0
- package/dist/utils/wslPaths.d.ts +31 -0
- package/dist/utils/wslPaths.js +145 -0
- package/package.json +39 -0
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic NLP layer — powered by compromise.
|
|
3
|
+
*
|
|
4
|
+
* Uses compromise for:
|
|
5
|
+
* - Tokenization
|
|
6
|
+
* - POS tagging (Verb, Noun, Adjective, Preposition, etc.)
|
|
7
|
+
* - Normalization and root form extraction
|
|
8
|
+
* - Sentence/clause structure
|
|
9
|
+
*
|
|
10
|
+
* Layers on top:
|
|
11
|
+
* - Domain entity recognition (services, environments, paths)
|
|
12
|
+
* - Adjacent keyboard typo correction
|
|
13
|
+
* - Dependency parsing (SVO + prepositional)
|
|
14
|
+
* - Concept graph builder
|
|
15
|
+
* - Knowledge graph for entity relationships
|
|
16
|
+
*/
|
|
17
|
+
import nlp from "compromise";
|
|
18
|
+
// ─── Adjacent Keyboard Map ──────────────────────────────────────────────────
|
|
19
|
+
const KEYBOARD_ADJACENCY = {
|
|
20
|
+
q: ["w", "a", "s"],
|
|
21
|
+
w: ["q", "e", "a", "s", "d"],
|
|
22
|
+
e: ["w", "r", "s", "d", "f"],
|
|
23
|
+
r: ["e", "t", "d", "f", "g"],
|
|
24
|
+
t: ["r", "y", "f", "g", "h"],
|
|
25
|
+
y: ["t", "u", "g", "h", "j"],
|
|
26
|
+
u: ["y", "i", "h", "j", "k"],
|
|
27
|
+
i: ["u", "o", "j", "k", "l"],
|
|
28
|
+
o: ["i", "p", "k", "l"],
|
|
29
|
+
p: ["o", "l"],
|
|
30
|
+
a: ["q", "w", "s", "z", "x"],
|
|
31
|
+
s: ["q", "w", "e", "a", "d", "z", "x", "c"],
|
|
32
|
+
d: ["w", "e", "r", "s", "f", "x", "c", "v"],
|
|
33
|
+
f: ["e", "r", "t", "d", "g", "c", "v", "b"],
|
|
34
|
+
g: ["r", "t", "y", "f", "h", "v", "b", "n"],
|
|
35
|
+
h: ["t", "y", "u", "g", "j", "b", "n", "m"],
|
|
36
|
+
j: ["y", "u", "i", "h", "k", "n", "m"],
|
|
37
|
+
k: ["u", "i", "o", "j", "l", "m"],
|
|
38
|
+
l: ["i", "o", "p", "k"],
|
|
39
|
+
z: ["a", "s", "x"],
|
|
40
|
+
x: ["a", "s", "d", "z", "c"],
|
|
41
|
+
c: ["s", "d", "f", "x", "v"],
|
|
42
|
+
v: ["d", "f", "g", "c", "b"],
|
|
43
|
+
b: ["f", "g", "h", "v", "n"],
|
|
44
|
+
n: ["g", "h", "j", "b", "m"],
|
|
45
|
+
m: ["h", "j", "k", "n"],
|
|
46
|
+
};
|
|
47
|
+
export function keyboardDistance(a, b) {
|
|
48
|
+
const la = a.toLowerCase();
|
|
49
|
+
const lb = b.toLowerCase();
|
|
50
|
+
const m = la.length;
|
|
51
|
+
const n = lb.length;
|
|
52
|
+
const dp = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
|
|
53
|
+
for (let i = 0; i <= m; i++)
|
|
54
|
+
dp[i][0] = i;
|
|
55
|
+
for (let j = 0; j <= n; j++)
|
|
56
|
+
dp[0][j] = j;
|
|
57
|
+
for (let i = 1; i <= m; i++) {
|
|
58
|
+
for (let j = 1; j <= n; j++) {
|
|
59
|
+
if (la[i - 1] === lb[j - 1]) {
|
|
60
|
+
dp[i][j] = dp[i - 1][j - 1];
|
|
61
|
+
}
|
|
62
|
+
else {
|
|
63
|
+
const isAdjacent = KEYBOARD_ADJACENCY[la[i - 1]]?.includes(lb[j - 1]);
|
|
64
|
+
const subCost = isAdjacent ? 0.5 : 1;
|
|
65
|
+
dp[i][j] = Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + subCost);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return dp[m][n];
|
|
70
|
+
}
|
|
71
|
+
export function fuzzyMatch(word, candidates, maxDistance = 2) {
|
|
72
|
+
let best = null;
|
|
73
|
+
for (const candidate of candidates) {
|
|
74
|
+
const dist = keyboardDistance(word, candidate);
|
|
75
|
+
if (dist <= maxDistance && (!best || dist < best.distance)) {
|
|
76
|
+
best = { match: candidate, distance: dist };
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return best;
|
|
80
|
+
}
|
|
81
|
+
// Words that compromise misclassifies for CLI usage
|
|
82
|
+
const FORCE_PREPOSITION = new Set(["to", "from", "into", "onto"]);
|
|
83
|
+
// Domain verbs that compromise may tag as nouns
|
|
84
|
+
const DOMAIN_VERBS = new Set([
|
|
85
|
+
"restart", "bounce", "reload", "recycle", "kick", "deploy", "release", "ship",
|
|
86
|
+
"push", "promote", "rollback", "revert", "undo", "tail", "watch", "show",
|
|
87
|
+
"check", "search", "grep", "find", "locate", "list", "kill", "stop",
|
|
88
|
+
"terminate", "copy", "move", "delete", "remove", "create", "add", "modify",
|
|
89
|
+
"ping", "connect", "login", "ssh", "stash", "commit", "stage", "fetch",
|
|
90
|
+
"merge", "rebase", "reset", "checkout", "pull", "clone", "diff", "status",
|
|
91
|
+
]);
|
|
92
|
+
/**
|
|
93
|
+
* Use compromise to get rich POS tagging and normalization,
|
|
94
|
+
* then overlay domain-specific entity detection and typo correction.
|
|
95
|
+
*
|
|
96
|
+
* Pre-processes input to protect path-like tokens (e.g. /var/log)
|
|
97
|
+
* from being split by compromise's tokenizer.
|
|
98
|
+
*/
|
|
99
|
+
export function tokenize(text, knownServices, knownEnvironments) {
|
|
100
|
+
// Pre-process: extract path-like tokens before compromise gets them
|
|
101
|
+
const pathMap = new Map();
|
|
102
|
+
let processed = text;
|
|
103
|
+
const pathRegex = /\/[a-zA-Z0-9_.\/\-]+/g;
|
|
104
|
+
let pathIdx = 0;
|
|
105
|
+
for (const match of text.matchAll(pathRegex)) {
|
|
106
|
+
const placeholder = `PATHPLACEHOLDER${pathIdx}`;
|
|
107
|
+
pathMap.set(placeholder.toLowerCase(), match[0]);
|
|
108
|
+
processed = processed.replace(match[0], placeholder);
|
|
109
|
+
pathIdx++;
|
|
110
|
+
}
|
|
111
|
+
// Also protect dotfiles like nginx.conf, app.log
|
|
112
|
+
const dotfileRegex = /\b([a-zA-Z0-9_-]+\.[a-zA-Z0-9]+)\b/g;
|
|
113
|
+
let dotIdx = 0;
|
|
114
|
+
for (const match of processed.matchAll(dotfileRegex)) {
|
|
115
|
+
// Don't protect common words with periods (e.g., "Mr.", "Dr.")
|
|
116
|
+
if (match[1].includes("/"))
|
|
117
|
+
continue;
|
|
118
|
+
const placeholder = `DOTFILE${dotIdx}`;
|
|
119
|
+
pathMap.set(placeholder.toLowerCase(), match[1]);
|
|
120
|
+
processed = processed.replace(match[1], placeholder);
|
|
121
|
+
dotIdx++;
|
|
122
|
+
}
|
|
123
|
+
const doc = nlp(processed);
|
|
124
|
+
const json = doc.json();
|
|
125
|
+
if (!json.length || !json[0].terms)
|
|
126
|
+
return [];
|
|
127
|
+
const terms = json[0].terms;
|
|
128
|
+
const tokens = [];
|
|
129
|
+
// Also get verb root forms from compromise
|
|
130
|
+
const verbs = doc.verbs().toInfinitive().json();
|
|
131
|
+
const verbRoots = new Map();
|
|
132
|
+
for (const sentence of verbs) {
|
|
133
|
+
for (const term of sentence.terms) {
|
|
134
|
+
if (term.tags.includes("Verb")) {
|
|
135
|
+
verbRoots.set(term.normal, term.text.toLowerCase());
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
for (let i = 0; i < terms.length; i++) {
|
|
140
|
+
const term = terms[i];
|
|
141
|
+
let word = term.normal || term.text.toLowerCase();
|
|
142
|
+
const compromiseTags = term.tags;
|
|
143
|
+
// Restore path placeholders
|
|
144
|
+
const restoredPath = pathMap.get(word);
|
|
145
|
+
if (restoredPath) {
|
|
146
|
+
word = restoredPath.toLowerCase();
|
|
147
|
+
tokens.push({ text: word, tag: "PATH", index: i, compromiseTags });
|
|
148
|
+
continue;
|
|
149
|
+
}
|
|
150
|
+
// Determine our tag using compromise POS + domain knowledge
|
|
151
|
+
let tag;
|
|
152
|
+
let normalized;
|
|
153
|
+
let root;
|
|
154
|
+
// Force common CLI prepositions that compromise misclassifies
|
|
155
|
+
if (FORCE_PREPOSITION.has(word)) {
|
|
156
|
+
tag = "PREP";
|
|
157
|
+
}
|
|
158
|
+
else if (word.includes("/") || (word.includes(".") && !compromiseTags.includes("Verb"))) {
|
|
159
|
+
// Domain-specific overrides first (paths, services, envs)
|
|
160
|
+
tag = "PATH";
|
|
161
|
+
}
|
|
162
|
+
else if (knownEnvironments.includes(word)) {
|
|
163
|
+
tag = "ENV";
|
|
164
|
+
}
|
|
165
|
+
else if (knownServices.includes(word)) {
|
|
166
|
+
tag = "SERVICE";
|
|
167
|
+
}
|
|
168
|
+
else if (/^\d+$/.test(word)) {
|
|
169
|
+
tag = "NUMBER";
|
|
170
|
+
}
|
|
171
|
+
else if (DOMAIN_VERBS.has(word)) {
|
|
172
|
+
// Domain verbs override compromise — "tail", "grep", etc. are verbs in our context
|
|
173
|
+
tag = "VERB";
|
|
174
|
+
root = word;
|
|
175
|
+
}
|
|
176
|
+
else if (compromiseTags.includes("Verb") || compromiseTags.includes("Infinitive") || compromiseTags.includes("Imperative")) {
|
|
177
|
+
tag = "VERB";
|
|
178
|
+
root = verbRoots.get(word) ?? word;
|
|
179
|
+
}
|
|
180
|
+
else if (compromiseTags.includes("Preposition")) {
|
|
181
|
+
tag = "PREP";
|
|
182
|
+
}
|
|
183
|
+
else if (compromiseTags.includes("Determiner")) {
|
|
184
|
+
tag = "DET";
|
|
185
|
+
}
|
|
186
|
+
else if (compromiseTags.includes("Conjunction")) {
|
|
187
|
+
tag = "CONJ";
|
|
188
|
+
}
|
|
189
|
+
else if (compromiseTags.includes("Adjective") || compromiseTags.includes("Comparable") || compromiseTags.includes("Superlative")) {
|
|
190
|
+
tag = "ADJ";
|
|
191
|
+
}
|
|
192
|
+
else if (compromiseTags.includes("Adverb")) {
|
|
193
|
+
tag = "ADV";
|
|
194
|
+
}
|
|
195
|
+
else if (compromiseTags.includes("Noun") || compromiseTags.includes("Singular") || compromiseTags.includes("Plural")) {
|
|
196
|
+
// It's a noun — check if it fuzzy-matches a service or env
|
|
197
|
+
const serviceMatch = fuzzyMatch(word, knownServices, 1.5);
|
|
198
|
+
if (serviceMatch) {
|
|
199
|
+
tag = "SERVICE";
|
|
200
|
+
normalized = serviceMatch.match;
|
|
201
|
+
}
|
|
202
|
+
else {
|
|
203
|
+
const envMatch = fuzzyMatch(word, knownEnvironments, 1.5);
|
|
204
|
+
if (envMatch) {
|
|
205
|
+
tag = "ENV";
|
|
206
|
+
normalized = envMatch.match;
|
|
207
|
+
}
|
|
208
|
+
else {
|
|
209
|
+
tag = "NOUN";
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
else {
|
|
214
|
+
// Unknown POS — try fuzzy matching
|
|
215
|
+
const serviceMatch = fuzzyMatch(word, knownServices, 1.5);
|
|
216
|
+
if (serviceMatch) {
|
|
217
|
+
tag = "SERVICE";
|
|
218
|
+
normalized = serviceMatch.match;
|
|
219
|
+
}
|
|
220
|
+
else {
|
|
221
|
+
const envMatch = fuzzyMatch(word, knownEnvironments, 1.5);
|
|
222
|
+
if (envMatch) {
|
|
223
|
+
tag = "ENV";
|
|
224
|
+
normalized = envMatch.match;
|
|
225
|
+
}
|
|
226
|
+
else {
|
|
227
|
+
tag = "UNKNOWN";
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
tokens.push({
|
|
232
|
+
text: word,
|
|
233
|
+
tag,
|
|
234
|
+
index: i,
|
|
235
|
+
normalized,
|
|
236
|
+
compromiseTags,
|
|
237
|
+
root,
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
return tokens;
|
|
241
|
+
}
|
|
242
|
+
export function parseDependencies(tokens) {
|
|
243
|
+
const deps = [];
|
|
244
|
+
const verb = tokens.find((t) => t.tag === "VERB");
|
|
245
|
+
if (!verb)
|
|
246
|
+
return deps;
|
|
247
|
+
const afterVerb = tokens.filter((t) => t.index > verb.index);
|
|
248
|
+
const beforeVerb = tokens.filter((t) => t.index < verb.index);
|
|
249
|
+
for (const t of beforeVerb) {
|
|
250
|
+
if (t.tag === "NOUN" || t.tag === "SERVICE") {
|
|
251
|
+
deps.push({ head: verb, dependent: t, relation: "subject" });
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
for (let i = 0; i < afterVerb.length; i++) {
|
|
255
|
+
const token = afterVerb[i];
|
|
256
|
+
const next = afterVerb[i + 1];
|
|
257
|
+
if (token.tag === "PREP" && next) {
|
|
258
|
+
if (token.text === "to" || token.text === "into") {
|
|
259
|
+
deps.push({ head: verb, dependent: next, relation: "destination" });
|
|
260
|
+
i++;
|
|
261
|
+
}
|
|
262
|
+
else if (token.text === "from") {
|
|
263
|
+
deps.push({ head: verb, dependent: next, relation: "source" });
|
|
264
|
+
i++;
|
|
265
|
+
}
|
|
266
|
+
else if (token.text === "on" || token.text === "in" || token.text === "at") {
|
|
267
|
+
deps.push({ head: verb, dependent: next, relation: "location" });
|
|
268
|
+
i++;
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
else if (token.tag === "SERVICE" || token.tag === "NOUN" || token.tag === "PATH") {
|
|
272
|
+
deps.push({ head: verb, dependent: token, relation: "object" });
|
|
273
|
+
}
|
|
274
|
+
else if (token.tag === "NUMBER") {
|
|
275
|
+
deps.push({ head: verb, dependent: token, relation: "quantity" });
|
|
276
|
+
}
|
|
277
|
+
else if (token.tag === "ADJ" && next) {
|
|
278
|
+
deps.push({ head: next, dependent: token, relation: "modifier" });
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
return deps;
|
|
282
|
+
}
|
|
283
|
+
export function buildConceptGraph(tokens, deps) {
|
|
284
|
+
const nodes = [];
|
|
285
|
+
const edges = [];
|
|
286
|
+
const seen = new Set();
|
|
287
|
+
function addNode(token, type) {
|
|
288
|
+
const id = `${token.tag}_${token.normalized ?? token.text}`;
|
|
289
|
+
if (!seen.has(id)) {
|
|
290
|
+
seen.add(id);
|
|
291
|
+
nodes.push({
|
|
292
|
+
id,
|
|
293
|
+
label: token.normalized ?? token.text,
|
|
294
|
+
type,
|
|
295
|
+
aliases: token.normalized ? [token.text] : [],
|
|
296
|
+
});
|
|
297
|
+
}
|
|
298
|
+
return id;
|
|
299
|
+
}
|
|
300
|
+
const verb = tokens.find((t) => t.tag === "VERB");
|
|
301
|
+
if (verb)
|
|
302
|
+
addNode(verb, "action");
|
|
303
|
+
for (const token of tokens) {
|
|
304
|
+
if (token.tag === "SERVICE" || token.tag === "NOUN")
|
|
305
|
+
addNode(token, "entity");
|
|
306
|
+
if (token.tag === "ENV")
|
|
307
|
+
addNode(token, "location");
|
|
308
|
+
if (token.tag === "PATH")
|
|
309
|
+
addNode(token, "entity");
|
|
310
|
+
if (token.tag === "NUMBER")
|
|
311
|
+
addNode(token, "quantity");
|
|
312
|
+
if (token.tag === "ADJ")
|
|
313
|
+
addNode(token, "property");
|
|
314
|
+
}
|
|
315
|
+
for (const dep of deps) {
|
|
316
|
+
const fromId = `${dep.head.tag}_${dep.head.normalized ?? dep.head.text}`;
|
|
317
|
+
const toId = `${dep.dependent.tag}_${dep.dependent.normalized ?? dep.dependent.text}`;
|
|
318
|
+
let relation;
|
|
319
|
+
switch (dep.relation) {
|
|
320
|
+
case "object":
|
|
321
|
+
case "subject":
|
|
322
|
+
relation = "acts_on";
|
|
323
|
+
break;
|
|
324
|
+
case "location":
|
|
325
|
+
relation = "located_at";
|
|
326
|
+
break;
|
|
327
|
+
case "destination":
|
|
328
|
+
relation = "destination";
|
|
329
|
+
break;
|
|
330
|
+
case "source":
|
|
331
|
+
relation = "source";
|
|
332
|
+
break;
|
|
333
|
+
case "quantity":
|
|
334
|
+
relation = "quantity_of";
|
|
335
|
+
break;
|
|
336
|
+
case "modifier":
|
|
337
|
+
relation = "has_property";
|
|
338
|
+
break;
|
|
339
|
+
}
|
|
340
|
+
edges.push({ from: fromId, to: toId, relation });
|
|
341
|
+
}
|
|
342
|
+
return { nodes, edges };
|
|
343
|
+
}
|
|
344
|
+
// ─── High-level NLP Utilities ────────────────────────────────────────────────
|
|
345
|
+
/**
|
|
346
|
+
* Extract verbs in their root/infinitive form using compromise.
|
|
347
|
+
*/
|
|
348
|
+
export function extractVerbs(text) {
|
|
349
|
+
const doc = nlp(text);
|
|
350
|
+
const verbs = doc.verbs().toInfinitive();
|
|
351
|
+
return verbs.out("array");
|
|
352
|
+
}
|
|
353
|
+
/**
|
|
354
|
+
* Extract nouns using compromise.
|
|
355
|
+
*/
|
|
356
|
+
export function extractNouns(text) {
|
|
357
|
+
const doc = nlp(text);
|
|
358
|
+
return doc.nouns().out("array");
|
|
359
|
+
}
|
|
360
|
+
/**
|
|
361
|
+
* Normalize text — lowercase, expand contractions, normalize whitespace.
|
|
362
|
+
*/
|
|
363
|
+
export function normalizeText(text) {
|
|
364
|
+
const doc = nlp(text);
|
|
365
|
+
// Expand contractions: don't → do not
|
|
366
|
+
doc.contractions().expand();
|
|
367
|
+
return doc.text("normal");
|
|
368
|
+
}
|
|
369
|
+
/**
|
|
370
|
+
* Get sentence structure analysis from compromise.
|
|
371
|
+
*/
|
|
372
|
+
export function analyzeSentence(text) {
|
|
373
|
+
const doc = nlp(text);
|
|
374
|
+
return {
|
|
375
|
+
verbs: doc.verbs().toInfinitive().out("array"),
|
|
376
|
+
nouns: doc.nouns().out("array"),
|
|
377
|
+
adjectives: doc.adjectives().out("array"),
|
|
378
|
+
prepositions: doc.match("#Preposition").out("array"),
|
|
379
|
+
isQuestion: doc.questions().length > 0,
|
|
380
|
+
isNegative: doc.has("#Negative"),
|
|
381
|
+
tense: detectTense(doc),
|
|
382
|
+
};
|
|
383
|
+
}
|
|
384
|
+
function detectTense(doc) {
|
|
385
|
+
if (doc.has("#PastTense"))
|
|
386
|
+
return "past";
|
|
387
|
+
if (doc.has("#FutureTense"))
|
|
388
|
+
return "future";
|
|
389
|
+
if (doc.has("#Imperative"))
|
|
390
|
+
return "imperative";
|
|
391
|
+
return "present";
|
|
392
|
+
}
|
|
393
|
+
export function semanticParse(text, knownServices, knownEnvironments) {
|
|
394
|
+
const tokens = tokenize(text, knownServices, knownEnvironments);
|
|
395
|
+
const dependencies = parseDependencies(tokens);
|
|
396
|
+
const graph = buildConceptGraph(tokens, dependencies);
|
|
397
|
+
const sentence = analyzeSentence(text);
|
|
398
|
+
const actionToken = tokens.find((t) => t.tag === "VERB");
|
|
399
|
+
const entities = tokens
|
|
400
|
+
.filter((t) => ["SERVICE", "NOUN", "PATH"].includes(t.tag))
|
|
401
|
+
.map((t) => ({ text: t.text, type: t.tag, normalized: t.normalized }));
|
|
402
|
+
const locationDep = dependencies.find((d) => d.relation === "location");
|
|
403
|
+
const destDep = dependencies.find((d) => d.relation === "destination");
|
|
404
|
+
const srcDep = dependencies.find((d) => d.relation === "source");
|
|
405
|
+
const qtyDep = dependencies.find((d) => d.relation === "quantity");
|
|
406
|
+
return {
|
|
407
|
+
tokens,
|
|
408
|
+
dependencies,
|
|
409
|
+
graph,
|
|
410
|
+
action: actionToken?.normalized ?? actionToken?.text,
|
|
411
|
+
actionRoot: actionToken?.root ?? sentence.verbs[0],
|
|
412
|
+
entities,
|
|
413
|
+
location: locationDep?.dependent.normalized ?? locationDep?.dependent.text,
|
|
414
|
+
destination: destDep?.dependent.normalized ?? destDep?.dependent.text,
|
|
415
|
+
source: srcDep?.dependent.normalized ?? srcDep?.dependent.text,
|
|
416
|
+
quantity: qtyDep ? Number(qtyDep.dependent.text) : undefined,
|
|
417
|
+
sentence,
|
|
418
|
+
};
|
|
419
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import type { Token } from "./semantic.js";
|
|
2
|
+
import type { DynamicIntent } from "../types/intent.js";
|
|
3
|
+
import type { UncertaintyReport } from "../conversation/store.js";
|
|
4
|
+
export interface UncertaintyEntry {
|
|
5
|
+
timestamp: string;
|
|
6
|
+
rawText: string;
|
|
7
|
+
intent: string;
|
|
8
|
+
overallConfidence: number;
|
|
9
|
+
unknownTokens: string[];
|
|
10
|
+
lowConfidenceFields: Array<{
|
|
11
|
+
field: string;
|
|
12
|
+
value: string;
|
|
13
|
+
confidence: number;
|
|
14
|
+
}>;
|
|
15
|
+
/** Which parts of the sentence had no coverage */
|
|
16
|
+
uncoveredSpans: string[];
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Analyze a parse result for uncertainty.
|
|
20
|
+
*
|
|
21
|
+
* Returns a report of:
|
|
22
|
+
* - Tokens that weren't classified (UNKNOWN tag)
|
|
23
|
+
* - Fields that were filled with low confidence
|
|
24
|
+
* - Parts of the sentence that weren't used by any field
|
|
25
|
+
*/
|
|
26
|
+
export declare function analyzeUncertainty(rawText: string, tokens: Token[], intent: DynamicIntent): UncertaintyReport;
|
|
27
|
+
/**
|
|
28
|
+
* Get the uncovered spans — parts of the sentence we didn't understand.
|
|
29
|
+
*/
|
|
30
|
+
export declare function getUncoveredSpans(rawText: string, tokens: Token[]): string[];
|
|
31
|
+
/**
|
|
32
|
+
* Log uncertainty for later analysis by the auto-learning system.
|
|
33
|
+
*/
|
|
34
|
+
export declare function logUncertainty(entry: UncertaintyEntry): void;
|
|
35
|
+
export declare function loadUncertaintyLog(): UncertaintyEntry[];
|
|
36
|
+
/**
|
|
37
|
+
* Get a summary of most common uncertain tokens across all logged entries.
|
|
38
|
+
*/
|
|
39
|
+
export declare function getUncertaintySummary(): Array<{
|
|
40
|
+
token: string;
|
|
41
|
+
count: number;
|
|
42
|
+
}>;
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import { existsSync, readFileSync, writeFileSync, mkdirSync } from "node:fs";
|
|
2
|
+
import { resolve } from "node:path";
|
|
3
|
+
import { LOG_DIR } from "../utils/paths.js";
|
|
4
|
+
const UNCERTAINTY_LOG = resolve(LOG_DIR, "uncertainty.json");
|
|
5
|
+
/**
|
|
6
|
+
* Analyze a parse result for uncertainty.
|
|
7
|
+
*
|
|
8
|
+
* Returns a report of:
|
|
9
|
+
* - Tokens that weren't classified (UNKNOWN tag)
|
|
10
|
+
* - Fields that were filled with low confidence
|
|
11
|
+
* - Parts of the sentence that weren't used by any field
|
|
12
|
+
*/
|
|
13
|
+
export function analyzeUncertainty(rawText, tokens, intent) {
|
|
14
|
+
const unknownTokens = tokens
|
|
15
|
+
.filter((t) => t.tag === "UNKNOWN")
|
|
16
|
+
.map((t) => t.text);
|
|
17
|
+
// Find tokens that weren't consumed by any field
|
|
18
|
+
const usedWords = new Set();
|
|
19
|
+
for (const value of Object.values(intent.fields)) {
|
|
20
|
+
if (typeof value === "string") {
|
|
21
|
+
for (const word of value.toLowerCase().split(/\s+/)) {
|
|
22
|
+
usedWords.add(word);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
const uncoveredTokens = tokens.filter((t) => t.tag !== "DET" &&
|
|
27
|
+
t.tag !== "PREP" &&
|
|
28
|
+
t.tag !== "CONJ" &&
|
|
29
|
+
!usedWords.has(t.text) &&
|
|
30
|
+
t.text !== intent.intent.split(".")[0] // Don't count the intent verb as uncovered
|
|
31
|
+
);
|
|
32
|
+
// Estimate field confidence based on extraction method
|
|
33
|
+
const lowConfidenceFields = [];
|
|
34
|
+
for (const [field, value] of Object.entries(intent.fields)) {
|
|
35
|
+
if (typeof value !== "string" || !value)
|
|
36
|
+
continue;
|
|
37
|
+
// Fields filled by defaults are lower confidence
|
|
38
|
+
const wasExplicit = rawText.toLowerCase().includes(value.toLowerCase());
|
|
39
|
+
if (!wasExplicit) {
|
|
40
|
+
lowConfidenceFields.push({ field, value, confidence: 0.4 });
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return {
|
|
44
|
+
unknownTokens,
|
|
45
|
+
lowConfidenceFields,
|
|
46
|
+
overallConfidence: intent.confidence,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Get the uncovered spans — parts of the sentence we didn't understand.
|
|
51
|
+
*/
|
|
52
|
+
export function getUncoveredSpans(rawText, tokens) {
|
|
53
|
+
const spans = [];
|
|
54
|
+
let currentSpan = [];
|
|
55
|
+
for (const token of tokens) {
|
|
56
|
+
if (token.tag === "UNKNOWN") {
|
|
57
|
+
currentSpan.push(token.text);
|
|
58
|
+
}
|
|
59
|
+
else {
|
|
60
|
+
if (currentSpan.length > 0) {
|
|
61
|
+
spans.push(currentSpan.join(" "));
|
|
62
|
+
currentSpan = [];
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
if (currentSpan.length > 0) {
|
|
67
|
+
spans.push(currentSpan.join(" "));
|
|
68
|
+
}
|
|
69
|
+
return spans;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Log uncertainty for later analysis by the auto-learning system.
|
|
73
|
+
*/
|
|
74
|
+
export function logUncertainty(entry) {
|
|
75
|
+
if (!existsSync(LOG_DIR))
|
|
76
|
+
mkdirSync(LOG_DIR, { recursive: true });
|
|
77
|
+
const existing = loadUncertaintyLog();
|
|
78
|
+
existing.push(entry);
|
|
79
|
+
// Keep last 500 entries
|
|
80
|
+
if (existing.length > 500)
|
|
81
|
+
existing.splice(0, existing.length - 500);
|
|
82
|
+
writeFileSync(UNCERTAINTY_LOG, JSON.stringify(existing, null, 2));
|
|
83
|
+
}
|
|
84
|
+
export function loadUncertaintyLog() {
|
|
85
|
+
if (!existsSync(UNCERTAINTY_LOG))
|
|
86
|
+
return [];
|
|
87
|
+
return JSON.parse(readFileSync(UNCERTAINTY_LOG, "utf-8"));
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Get a summary of most common uncertain tokens across all logged entries.
|
|
91
|
+
*/
|
|
92
|
+
export function getUncertaintySummary() {
|
|
93
|
+
const log = loadUncertaintyLog();
|
|
94
|
+
const counts = new Map();
|
|
95
|
+
for (const entry of log) {
|
|
96
|
+
for (const token of entry.unknownTokens) {
|
|
97
|
+
counts.set(token, (counts.get(token) ?? 0) + 1);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return Array.from(counts.entries())
|
|
101
|
+
.map(([token, count]) => ({ token, count }))
|
|
102
|
+
.sort((a, b) => b.count - a.count);
|
|
103
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Apache config parser.
|
|
3
|
+
*
|
|
4
|
+
* Parses httpd.conf / apache2.conf and vhost configs.
|
|
5
|
+
* Handles: <VirtualHost>, <Directory>, <Location>, directives.
|
|
6
|
+
*/
|
|
7
|
+
export interface ApacheDirective {
|
|
8
|
+
name: string;
|
|
9
|
+
value: string;
|
|
10
|
+
line: number;
|
|
11
|
+
}
|
|
12
|
+
export interface ApacheBlock {
|
|
13
|
+
tag: string;
|
|
14
|
+
args: string;
|
|
15
|
+
directives: ApacheDirective[];
|
|
16
|
+
blocks: ApacheBlock[];
|
|
17
|
+
line: number;
|
|
18
|
+
}
|
|
19
|
+
export interface ApacheConfig {
|
|
20
|
+
directives: ApacheDirective[];
|
|
21
|
+
blocks: ApacheBlock[];
|
|
22
|
+
vhosts: ApacheVHost[];
|
|
23
|
+
}
|
|
24
|
+
export interface ApacheVHost {
|
|
25
|
+
address: string;
|
|
26
|
+
serverName?: string;
|
|
27
|
+
serverAlias: string[];
|
|
28
|
+
documentRoot?: string;
|
|
29
|
+
errorLog?: string;
|
|
30
|
+
customLog?: string;
|
|
31
|
+
ssl: boolean;
|
|
32
|
+
sslCertificate?: string;
|
|
33
|
+
sslCertificateKey?: string;
|
|
34
|
+
locations: Array<{
|
|
35
|
+
path: string;
|
|
36
|
+
directives: ApacheDirective[];
|
|
37
|
+
}>;
|
|
38
|
+
directories: Array<{
|
|
39
|
+
path: string;
|
|
40
|
+
directives: ApacheDirective[];
|
|
41
|
+
}>;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Parse Apache config content.
|
|
45
|
+
*/
|
|
46
|
+
export declare function parseApache(content: string): ApacheConfig;
|
|
47
|
+
/**
|
|
48
|
+
* Format an Apache config summary for display.
|
|
49
|
+
*/
|
|
50
|
+
export declare function formatApacheSummary(config: ApacheConfig): string;
|