@nguyentamdat/mempalace 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +127 -0
- package/hooks/README.md +133 -0
- package/hooks/mempal_precompact_hook.sh +35 -0
- package/hooks/mempal_save_hook.sh +80 -0
- package/package.json +36 -0
- package/src/cli.ts +50 -0
- package/src/commands/compress.ts +161 -0
- package/src/commands/init.ts +40 -0
- package/src/commands/mine.ts +51 -0
- package/src/commands/search.ts +23 -0
- package/src/commands/split.ts +20 -0
- package/src/commands/status.ts +12 -0
- package/src/commands/wake-up.ts +20 -0
- package/src/config.ts +111 -0
- package/src/convo-miner.ts +373 -0
- package/src/dialect.ts +921 -0
- package/src/entity-detector.d.ts +25 -0
- package/src/entity-detector.ts +674 -0
- package/src/entity-registry.ts +806 -0
- package/src/general-extractor.ts +487 -0
- package/src/index.ts +5 -0
- package/src/knowledge-graph.ts +461 -0
- package/src/layers.ts +512 -0
- package/src/mcp-server.ts +1034 -0
- package/src/miner.ts +612 -0
- package/src/missing-modules.d.ts +43 -0
- package/src/normalize.ts +374 -0
- package/src/onboarding.ts +485 -0
- package/src/palace-graph.ts +310 -0
- package/src/room-detector-local.ts +415 -0
- package/src/room-detector.d.ts +1 -0
- package/src/room-detector.ts +6 -0
- package/src/searcher.ts +181 -0
- package/src/spellcheck.ts +200 -0
- package/src/split-mega-files.d.ts +8 -0
- package/src/split-mega-files.ts +297 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
export type EntityDetectionResult = {
|
|
2
|
+
people: Array<Record<string, unknown>>;
|
|
3
|
+
projects: Array<Record<string, unknown>>;
|
|
4
|
+
uncertain: Array<Record<string, unknown>>;
|
|
5
|
+
};
|
|
6
|
+
|
|
7
|
+
export type ConfirmedEntities = {
|
|
8
|
+
people: Array<Record<string, unknown>>;
|
|
9
|
+
projects: Array<Record<string, unknown>>;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
export declare function scanForDetection(dir: string): string[];
|
|
13
|
+
export declare function detectEntities(files: string[]): EntityDetectionResult;
|
|
14
|
+
export declare function confirmEntities(
|
|
15
|
+
detected: EntityDetectionResult,
|
|
16
|
+
autoAccept?: boolean,
|
|
17
|
+
): Promise<ConfirmedEntities>;
|
|
18
|
+
|
|
19
|
+
export declare function extractCandidates(text: string): Record<string, number>;
|
|
20
|
+
export declare function scoreEntity(name: string, text: string, lines: string[]): Record<string, number>;
|
|
21
|
+
export declare function classifyEntity(
|
|
22
|
+
name: string,
|
|
23
|
+
frequency: number,
|
|
24
|
+
scores: Record<string, number>,
|
|
25
|
+
): Record<string, string | number | boolean | null | undefined>;
|
|
@@ -0,0 +1,674 @@
|
|
|
1
|
+
import { readdirSync, readFileSync, statSync } from "node:fs";
|
|
2
|
+
import { extname, resolve } from "node:path";
|
|
3
|
+
import { cancel, confirm, isCancel, select, text } from "@clack/prompts";
|
|
4
|
+
|
|
5
|
+
export const PERSON_VERB_PATTERNS = [
|
|
6
|
+
String.raw`\b{name}\s+said\b`,
|
|
7
|
+
String.raw`\b{name}\s+asked\b`,
|
|
8
|
+
String.raw`\b{name}\s+told\b`,
|
|
9
|
+
String.raw`\b{name}\s+replied\b`,
|
|
10
|
+
String.raw`\b{name}\s+laughed\b`,
|
|
11
|
+
String.raw`\b{name}\s+smiled\b`,
|
|
12
|
+
String.raw`\b{name}\s+cried\b`,
|
|
13
|
+
String.raw`\b{name}\s+felt\b`,
|
|
14
|
+
String.raw`\b{name}\s+thinks?\b`,
|
|
15
|
+
String.raw`\b{name}\s+wants?\b`,
|
|
16
|
+
String.raw`\b{name}\s+loves?\b`,
|
|
17
|
+
String.raw`\b{name}\s+hates?\b`,
|
|
18
|
+
String.raw`\b{name}\s+knows?\b`,
|
|
19
|
+
String.raw`\b{name}\s+decided\b`,
|
|
20
|
+
String.raw`\b{name}\s+pushed\b`,
|
|
21
|
+
String.raw`\b{name}\s+wrote\b`,
|
|
22
|
+
String.raw`\bhey\s+{name}\b`,
|
|
23
|
+
String.raw`\bthanks?\s+{name}\b`,
|
|
24
|
+
String.raw`\bhi\s+{name}\b`,
|
|
25
|
+
String.raw`\bdear\s+{name}\b`,
|
|
26
|
+
] as const;
|
|
27
|
+
|
|
28
|
+
export const PRONOUN_PATTERNS = [
|
|
29
|
+
String.raw`\bshe\b`,
|
|
30
|
+
String.raw`\bher\b`,
|
|
31
|
+
String.raw`\bhers\b`,
|
|
32
|
+
String.raw`\bhe\b`,
|
|
33
|
+
String.raw`\bhim\b`,
|
|
34
|
+
String.raw`\bhis\b`,
|
|
35
|
+
String.raw`\bthey\b`,
|
|
36
|
+
String.raw`\bthem\b`,
|
|
37
|
+
String.raw`\btheir\b`,
|
|
38
|
+
] as const;
|
|
39
|
+
|
|
40
|
+
export const DIALOGUE_PATTERNS = [
|
|
41
|
+
String.raw`^>\s*{name}[:\s]`,
|
|
42
|
+
String.raw`^{name}:\s`,
|
|
43
|
+
String.raw`^\[{name}\]`,
|
|
44
|
+
String.raw`"{name}\s+said`,
|
|
45
|
+
] as const;
|
|
46
|
+
|
|
47
|
+
export const PROJECT_VERB_PATTERNS = [
|
|
48
|
+
String.raw`\bbuilding\s+{name}\b`,
|
|
49
|
+
String.raw`\bbuilt\s+{name}\b`,
|
|
50
|
+
String.raw`\bship(?:ping|ped)?\s+{name}\b`,
|
|
51
|
+
String.raw`\blaunch(?:ing|ed)?\s+{name}\b`,
|
|
52
|
+
String.raw`\bdeploy(?:ing|ed)?\s+{name}\b`,
|
|
53
|
+
String.raw`\binstall(?:ing|ed)?\s+{name}\b`,
|
|
54
|
+
String.raw`\bthe\s+{name}\s+architecture\b`,
|
|
55
|
+
String.raw`\bthe\s+{name}\s+pipeline\b`,
|
|
56
|
+
String.raw`\bthe\s+{name}\s+system\b`,
|
|
57
|
+
String.raw`\bthe\s+{name}\s+repo\b`,
|
|
58
|
+
String.raw`\b{name}\s+v\d+\b`,
|
|
59
|
+
String.raw`\b{name}\.py\b`,
|
|
60
|
+
String.raw`\b{name}-core\b`,
|
|
61
|
+
String.raw`\b{name}-local\b`,
|
|
62
|
+
String.raw`\bimport\s+{name}\b`,
|
|
63
|
+
String.raw`\bpip\s+install\s+{name}\b`,
|
|
64
|
+
] as const;
|
|
65
|
+
|
|
66
|
+
export const STOPWORDS: ReadonlySet<string> = new Set([
|
|
67
|
+
"the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by",
|
|
68
|
+
"from", "as", "is", "was", "are", "were", "be", "been", "being", "have", "has", "had", "do",
|
|
69
|
+
"does", "did", "will", "would", "could", "should", "may", "might", "must", "shall", "can",
|
|
70
|
+
"this", "that", "these", "those", "it", "its", "they", "them", "their", "we", "our", "you",
|
|
71
|
+
"your", "i", "my", "me", "he", "she", "his", "her", "who", "what", "when", "where", "why",
|
|
72
|
+
"how", "which", "if", "then", "so", "not", "no", "yes", "ok", "okay", "just", "very",
|
|
73
|
+
"really", "also", "already", "still", "even", "only", "here", "there", "now", "then", "too",
|
|
74
|
+
"up", "out", "about", "like", "use", "get", "got", "make", "made", "take", "put", "come",
|
|
75
|
+
"go", "see", "know", "think", "true", "false", "none", "null", "new", "old", "all", "any",
|
|
76
|
+
"some", "true", "false", "return", "print", "def", "class", "import", "from", "step", "usage",
|
|
77
|
+
"run", "check", "find", "add", "get", "set", "list", "args", "dict", "str", "int", "bool",
|
|
78
|
+
"path", "file", "type", "name", "note", "example", "option", "result", "error", "warning",
|
|
79
|
+
"info", "every", "each", "more", "less", "next", "last", "first", "second", "stack", "layer",
|
|
80
|
+
"mode", "test", "stop", "start", "copy", "move", "source", "target", "output", "input", "data",
|
|
81
|
+
"item", "key", "value", "returns", "raises", "yields", "none", "self", "cls", "kwargs", "world",
|
|
82
|
+
"well", "want", "topic", "choose", "social", "cars", "phones", "healthcare", "ex", "machina",
|
|
83
|
+
"deus", "human", "humans", "people", "things", "something", "nothing", "everything", "anything",
|
|
84
|
+
"someone", "everyone", "anyone", "way", "time", "day", "life", "place", "thing", "part", "kind",
|
|
85
|
+
"sort", "case", "point", "idea", "fact", "sense", "question", "answer", "reason", "number",
|
|
86
|
+
"version", "system", "hey", "hi", "hello", "thanks", "thank", "right", "let", "ok", "click",
|
|
87
|
+
"hit", "press", "tap", "drag", "drop", "open", "close", "save", "load", "launch", "install",
|
|
88
|
+
"download", "upload", "scroll", "select", "enter", "submit", "cancel", "confirm", "delete", "copy",
|
|
89
|
+
"paste", "type", "write", "read", "search", "find", "show", "hide", "desktop", "documents",
|
|
90
|
+
"downloads", "users", "home", "library", "applications", "system", "preferences", "settings",
|
|
91
|
+
"terminal", "actor", "vector", "remote", "control", "duration", "fetch", "agents", "tools", "others",
|
|
92
|
+
"guards", "ethics", "regulation", "learning", "thinking", "memory", "language", "intelligence",
|
|
93
|
+
"technology", "society", "culture", "future", "history", "science", "model", "models", "network",
|
|
94
|
+
"networks", "training", "inference",
|
|
95
|
+
]);
|
|
96
|
+
|
|
97
|
+
export const PROSE_EXTENSIONS: ReadonlySet<string> = new Set([".txt", ".md", ".rst", ".csv"]);
|
|
98
|
+
|
|
99
|
+
export const READABLE_EXTENSIONS: ReadonlySet<string> = new Set([
|
|
100
|
+
".txt",
|
|
101
|
+
".md",
|
|
102
|
+
".py",
|
|
103
|
+
".js",
|
|
104
|
+
".ts",
|
|
105
|
+
".json",
|
|
106
|
+
".yaml",
|
|
107
|
+
".yml",
|
|
108
|
+
".csv",
|
|
109
|
+
".rst",
|
|
110
|
+
".toml",
|
|
111
|
+
".sh",
|
|
112
|
+
".rb",
|
|
113
|
+
".go",
|
|
114
|
+
".rs",
|
|
115
|
+
]);
|
|
116
|
+
|
|
117
|
+
export const SKIP_DIRS: ReadonlySet<string> = new Set([
|
|
118
|
+
".git",
|
|
119
|
+
"node_modules",
|
|
120
|
+
"__pycache__",
|
|
121
|
+
".venv",
|
|
122
|
+
"venv",
|
|
123
|
+
"env",
|
|
124
|
+
"dist",
|
|
125
|
+
"build",
|
|
126
|
+
".next",
|
|
127
|
+
"coverage",
|
|
128
|
+
".mempalace",
|
|
129
|
+
]);
|
|
130
|
+
|
|
131
|
+
type EntityType = "person" | "project" | "uncertain";
|
|
132
|
+
type ConfirmChoice = "accept" | "edit" | "add";
|
|
133
|
+
type ClassificationChoice = EntityType | "skip";
|
|
134
|
+
|
|
135
|
+
export type EntityScores = {
|
|
136
|
+
personScore: number;
|
|
137
|
+
projectScore: number;
|
|
138
|
+
personSignals: string[];
|
|
139
|
+
projectSignals: string[];
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
export type DetectedEntity = {
|
|
143
|
+
name: string;
|
|
144
|
+
type: EntityType;
|
|
145
|
+
confidence: number;
|
|
146
|
+
frequency: number;
|
|
147
|
+
signals: string[];
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
export type EntityDetectionResult = {
|
|
151
|
+
people: DetectedEntity[];
|
|
152
|
+
projects: DetectedEntity[];
|
|
153
|
+
uncertain: DetectedEntity[];
|
|
154
|
+
};
|
|
155
|
+
|
|
156
|
+
export type ConfirmedEntities = {
|
|
157
|
+
people: string[];
|
|
158
|
+
projects: string[];
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
type CompiledPatterns = {
|
|
162
|
+
dialogue: RegExp[];
|
|
163
|
+
personVerbs: RegExp[];
|
|
164
|
+
projectVerbs: RegExp[];
|
|
165
|
+
direct: RegExp;
|
|
166
|
+
versioned: RegExp;
|
|
167
|
+
codeRef: RegExp;
|
|
168
|
+
};
|
|
169
|
+
|
|
170
|
+
const CANDIDATE_WORD_REGEX = /\b([A-Z][a-z]{1,19})\b/g;
|
|
171
|
+
const MULTI_WORD_REGEX = /\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b/g;
|
|
172
|
+
const MAX_BYTES_PER_FILE = 5_000;
|
|
173
|
+
|
|
174
|
+
function regexEscape(value: string): string {
|
|
175
|
+
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function buildPattern(template: string, escapedName: string, flags: string): RegExp {
|
|
179
|
+
return new RegExp(template.replaceAll("{name}", escapedName), flags);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
function countMatches(regex: RegExp, text: string): number {
|
|
183
|
+
const matches = text.match(regex);
|
|
184
|
+
return matches ? matches.length : 0;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function roundConfidence(value: number): number {
|
|
188
|
+
return Math.round(value * 100) / 100;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function ensureNotCancelled<T>(value: T): T {
|
|
192
|
+
if (isCancel(value)) {
|
|
193
|
+
cancel("Operation cancelled.");
|
|
194
|
+
process.exit(1);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
return value;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
async function promptChoice(): Promise<ConfirmChoice> {
|
|
201
|
+
return ensureNotCancelled(
|
|
202
|
+
await select<ConfirmChoice>({
|
|
203
|
+
message: "How do you want to handle detected entities?",
|
|
204
|
+
options: [
|
|
205
|
+
{ value: "accept", label: "Accept all detected people and projects" },
|
|
206
|
+
{ value: "edit", label: "Review uncertain entries and remove mistakes" },
|
|
207
|
+
{ value: "add", label: "Add entities manually" },
|
|
208
|
+
],
|
|
209
|
+
}),
|
|
210
|
+
) as ConfirmChoice;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
async function promptClassification(name: string): Promise<ClassificationChoice> {
|
|
214
|
+
return ensureNotCancelled(
|
|
215
|
+
await select<ClassificationChoice>({
|
|
216
|
+
message: `${name} — how should this be classified?`,
|
|
217
|
+
options: [
|
|
218
|
+
{ value: "person", label: "Person" },
|
|
219
|
+
{ value: "project", label: "Project" },
|
|
220
|
+
{ value: "skip", label: "Skip" },
|
|
221
|
+
],
|
|
222
|
+
}),
|
|
223
|
+
) as ClassificationChoice;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
async function promptText(message: string, placeholder?: string): Promise<string> {
|
|
227
|
+
return ensureNotCancelled(
|
|
228
|
+
await text({
|
|
229
|
+
message,
|
|
230
|
+
placeholder,
|
|
231
|
+
}),
|
|
232
|
+
) as string;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
async function promptConfirm(message: string, initialValue = false): Promise<boolean> {
|
|
236
|
+
return ensureNotCancelled(
|
|
237
|
+
await confirm({
|
|
238
|
+
message,
|
|
239
|
+
initialValue,
|
|
240
|
+
}),
|
|
241
|
+
) as boolean;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
function parseRemovalInput(input: string): Set<number> {
|
|
245
|
+
const indexes = new Set<number>();
|
|
246
|
+
for (const rawPart of input.split(",")) {
|
|
247
|
+
const trimmed = rawPart.trim();
|
|
248
|
+
if (!trimmed) {
|
|
249
|
+
continue;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
const parsed = Number.parseInt(trimmed, 10);
|
|
253
|
+
if (!Number.isNaN(parsed) && parsed > 0) {
|
|
254
|
+
indexes.add(parsed - 1);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
return indexes;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
function printNumberedNames(label: string, names: string[]): void {
|
|
262
|
+
console.log(`\n ${label}:`);
|
|
263
|
+
if (names.length === 0) {
|
|
264
|
+
console.log(" (none)");
|
|
265
|
+
return;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
for (const [index, name] of names.entries()) {
|
|
269
|
+
console.log(` ${String(index + 1).padStart(2, " ")}. ${name}`);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
export function extractCandidates(text: string): Record<string, number> {
|
|
274
|
+
const counts = new Map<string, number>();
|
|
275
|
+
|
|
276
|
+
for (const match of text.matchAll(CANDIDATE_WORD_REGEX)) {
|
|
277
|
+
const word = match[1];
|
|
278
|
+
if (!word) {
|
|
279
|
+
continue;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
if (!STOPWORDS.has(word.toLowerCase()) && word.length > 1) {
|
|
283
|
+
counts.set(word, (counts.get(word) ?? 0) + 1);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
for (const match of text.matchAll(MULTI_WORD_REGEX)) {
|
|
288
|
+
const phrase = match[1];
|
|
289
|
+
if (!phrase) {
|
|
290
|
+
continue;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
const hasStopword = phrase
|
|
294
|
+
.split(/\s+/)
|
|
295
|
+
.some((part) => STOPWORDS.has(part.toLowerCase()));
|
|
296
|
+
if (!hasStopword) {
|
|
297
|
+
counts.set(phrase, (counts.get(phrase) ?? 0) + 1);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
const candidates: Record<string, number> = {};
|
|
302
|
+
for (const [name, count] of counts.entries()) {
|
|
303
|
+
if (count >= 3) {
|
|
304
|
+
candidates[name] = count;
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
return candidates;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
function _buildPatterns(name: string): CompiledPatterns {
|
|
312
|
+
const escapedName = regexEscape(name);
|
|
313
|
+
return {
|
|
314
|
+
dialogue: DIALOGUE_PATTERNS.map((pattern) => buildPattern(pattern, escapedName, "gim")),
|
|
315
|
+
personVerbs: PERSON_VERB_PATTERNS.map((pattern) => buildPattern(pattern, escapedName, "gi")),
|
|
316
|
+
projectVerbs: PROJECT_VERB_PATTERNS.map((pattern) => buildPattern(pattern, escapedName, "gi")),
|
|
317
|
+
direct: new RegExp(String.raw`\bhey\s+${escapedName}\b|\bthanks?\s+${escapedName}\b|\bhi\s+${escapedName}\b`, "gi"),
|
|
318
|
+
versioned: new RegExp(String.raw`\b${escapedName}[-v]\w+`, "gi"),
|
|
319
|
+
codeRef: new RegExp(String.raw`\b${escapedName}\.(py|js|ts|yaml|yml|json|sh)\b`, "gi"),
|
|
320
|
+
};
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
export function scoreEntity(name: string, text: string, lines: string[]): EntityScores {
|
|
324
|
+
const patterns = _buildPatterns(name);
|
|
325
|
+
let personScore = 0;
|
|
326
|
+
let projectScore = 0;
|
|
327
|
+
const personSignals: string[] = [];
|
|
328
|
+
const projectSignals: string[] = [];
|
|
329
|
+
|
|
330
|
+
for (const regex of patterns.dialogue) {
|
|
331
|
+
const matches = countMatches(regex, text);
|
|
332
|
+
if (matches > 0) {
|
|
333
|
+
personScore += matches * 3;
|
|
334
|
+
personSignals.push(`dialogue marker (${matches}x)`);
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
for (const regex of patterns.personVerbs) {
|
|
339
|
+
const matches = countMatches(regex, text);
|
|
340
|
+
if (matches > 0) {
|
|
341
|
+
personScore += matches * 2;
|
|
342
|
+
personSignals.push(`'${name} ...' action (${matches}x)`);
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
const nameLower = name.toLowerCase();
|
|
347
|
+
const nameLineIndexes = lines.flatMap((line, index) =>
|
|
348
|
+
line.toLowerCase().includes(nameLower) ? [index] : [],
|
|
349
|
+
);
|
|
350
|
+
|
|
351
|
+
let pronounHits = 0;
|
|
352
|
+
for (const index of nameLineIndexes) {
|
|
353
|
+
const windowText = lines.slice(Math.max(0, index - 2), index + 3).join(" ").toLowerCase();
|
|
354
|
+
for (const pronounPattern of PRONOUN_PATTERNS) {
|
|
355
|
+
if (new RegExp(pronounPattern, "i").test(windowText)) {
|
|
356
|
+
pronounHits += 1;
|
|
357
|
+
break;
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
if (pronounHits > 0) {
|
|
363
|
+
personScore += pronounHits * 2;
|
|
364
|
+
personSignals.push(`pronoun nearby (${pronounHits}x)`);
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
const direct = countMatches(patterns.direct, text);
|
|
368
|
+
if (direct > 0) {
|
|
369
|
+
personScore += direct * 4;
|
|
370
|
+
personSignals.push(`addressed directly (${direct}x)`);
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
for (const regex of patterns.projectVerbs) {
|
|
374
|
+
const matches = countMatches(regex, text);
|
|
375
|
+
if (matches > 0) {
|
|
376
|
+
projectScore += matches * 2;
|
|
377
|
+
projectSignals.push(`project verb (${matches}x)`);
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
const versioned = countMatches(patterns.versioned, text);
|
|
382
|
+
if (versioned > 0) {
|
|
383
|
+
projectScore += versioned * 3;
|
|
384
|
+
projectSignals.push(`versioned/hyphenated (${versioned}x)`);
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
const codeRef = countMatches(patterns.codeRef, text);
|
|
388
|
+
if (codeRef > 0) {
|
|
389
|
+
projectScore += codeRef * 3;
|
|
390
|
+
projectSignals.push(`code file reference (${codeRef}x)`);
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
return {
|
|
394
|
+
personScore,
|
|
395
|
+
projectScore,
|
|
396
|
+
personSignals: personSignals.slice(0, 3),
|
|
397
|
+
projectSignals: projectSignals.slice(0, 3),
|
|
398
|
+
};
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
export function classifyEntity(name: string, frequency: number, scores: EntityScores): DetectedEntity {
|
|
402
|
+
const personScore = scores.personScore;
|
|
403
|
+
const projectScore = scores.projectScore;
|
|
404
|
+
const total = personScore + projectScore;
|
|
405
|
+
|
|
406
|
+
if (total === 0) {
|
|
407
|
+
return {
|
|
408
|
+
name,
|
|
409
|
+
type: "uncertain",
|
|
410
|
+
confidence: roundConfidence(Math.min(0.4, frequency / 50)),
|
|
411
|
+
frequency,
|
|
412
|
+
signals: [`appears ${frequency}x, no strong type signals`],
|
|
413
|
+
};
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
const personRatio = total > 0 ? personScore / total : 0;
|
|
417
|
+
const signalCategories = new Set<string>();
|
|
418
|
+
for (const signal of scores.personSignals) {
|
|
419
|
+
if (signal.includes("dialogue")) {
|
|
420
|
+
signalCategories.add("dialogue");
|
|
421
|
+
} else if (signal.includes("action")) {
|
|
422
|
+
signalCategories.add("action");
|
|
423
|
+
} else if (signal.includes("pronoun")) {
|
|
424
|
+
signalCategories.add("pronoun");
|
|
425
|
+
} else if (signal.includes("addressed")) {
|
|
426
|
+
signalCategories.add("addressed");
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
const hasTwoSignalTypes = signalCategories.size >= 2;
|
|
431
|
+
if (personRatio >= 0.7 && hasTwoSignalTypes && personScore >= 5) {
|
|
432
|
+
return {
|
|
433
|
+
name,
|
|
434
|
+
type: "person",
|
|
435
|
+
confidence: roundConfidence(Math.min(0.99, 0.5 + personRatio * 0.5)),
|
|
436
|
+
frequency,
|
|
437
|
+
signals: scores.personSignals.length > 0 ? scores.personSignals : [`appears ${frequency}x`],
|
|
438
|
+
};
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
if (personRatio >= 0.7 && (!hasTwoSignalTypes || personScore < 5)) {
|
|
442
|
+
return {
|
|
443
|
+
name,
|
|
444
|
+
type: "uncertain",
|
|
445
|
+
confidence: 0.4,
|
|
446
|
+
frequency,
|
|
447
|
+
signals: [...scores.personSignals, `appears ${frequency}x — pronoun-only match`],
|
|
448
|
+
};
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
if (personRatio <= 0.3) {
|
|
452
|
+
return {
|
|
453
|
+
name,
|
|
454
|
+
type: "project",
|
|
455
|
+
confidence: roundConfidence(Math.min(0.99, 0.5 + (1 - personRatio) * 0.5)),
|
|
456
|
+
frequency,
|
|
457
|
+
signals: scores.projectSignals.length > 0 ? scores.projectSignals : [`appears ${frequency}x`],
|
|
458
|
+
};
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
return {
|
|
462
|
+
name,
|
|
463
|
+
type: "uncertain",
|
|
464
|
+
confidence: 0.5,
|
|
465
|
+
frequency,
|
|
466
|
+
signals: [...scores.personSignals, ...scores.projectSignals].slice(0, 3).concat("mixed signals — needs review"),
|
|
467
|
+
};
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
export function detectEntities(filePaths: string[], maxFiles = 10): EntityDetectionResult {
|
|
471
|
+
const allText: string[] = [];
|
|
472
|
+
const allLines: string[] = [];
|
|
473
|
+
let filesRead = 0;
|
|
474
|
+
|
|
475
|
+
for (const filePath of filePaths) {
|
|
476
|
+
if (filesRead >= maxFiles) {
|
|
477
|
+
break;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
try {
|
|
481
|
+
const content = readFileSync(filePath, "utf-8").slice(0, MAX_BYTES_PER_FILE);
|
|
482
|
+
allText.push(content);
|
|
483
|
+
allLines.push(...content.split(/\r?\n/));
|
|
484
|
+
filesRead += 1;
|
|
485
|
+
} catch {}
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
const combinedText = allText.join("\n");
|
|
489
|
+
const candidates = extractCandidates(combinedText);
|
|
490
|
+
if (Object.keys(candidates).length === 0) {
|
|
491
|
+
return { people: [], projects: [], uncertain: [] };
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
const people: DetectedEntity[] = [];
|
|
495
|
+
const projects: DetectedEntity[] = [];
|
|
496
|
+
const uncertain: DetectedEntity[] = [];
|
|
497
|
+
|
|
498
|
+
for (const [name, frequency] of Object.entries(candidates).sort((left, right) => right[1] - left[1])) {
|
|
499
|
+
const scores = scoreEntity(name, combinedText, allLines);
|
|
500
|
+
const entity = classifyEntity(name, frequency, scores);
|
|
501
|
+
|
|
502
|
+
if (entity.type === "person") {
|
|
503
|
+
people.push(entity);
|
|
504
|
+
} else if (entity.type === "project") {
|
|
505
|
+
projects.push(entity);
|
|
506
|
+
} else {
|
|
507
|
+
uncertain.push(entity);
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
people.sort((left, right) => right.confidence - left.confidence);
|
|
512
|
+
projects.sort((left, right) => right.confidence - left.confidence);
|
|
513
|
+
uncertain.sort((left, right) => right.frequency - left.frequency);
|
|
514
|
+
|
|
515
|
+
return {
|
|
516
|
+
people: people.slice(0, 15),
|
|
517
|
+
projects: projects.slice(0, 10),
|
|
518
|
+
uncertain: uncertain.slice(0, 8),
|
|
519
|
+
};
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
function _printEntityList(entities: DetectedEntity[], label: string): void {
|
|
523
|
+
console.log(`\n ${label}:`);
|
|
524
|
+
if (entities.length === 0) {
|
|
525
|
+
console.log(" (none detected)");
|
|
526
|
+
return;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
for (const [index, entity] of entities.entries()) {
|
|
530
|
+
const filled = Math.floor(entity.confidence * 5);
|
|
531
|
+
const confidenceBar = `${"●".repeat(filled)}${"○".repeat(5 - filled)}`;
|
|
532
|
+
const signals = entity.signals.length > 0 ? entity.signals.slice(0, 2).join(", ") : "";
|
|
533
|
+
console.log(` ${String(index + 1).padStart(2, " ")}. ${entity.name.padEnd(20, " ")} [${confidenceBar}] ${signals}`);
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
export async function confirmEntities(detected: EntityDetectionResult, yes = false): Promise<ConfirmedEntities> {
|
|
538
|
+
console.log(`\n${"=".repeat(58)}`);
|
|
539
|
+
console.log(" MemPalace — Entity Detection");
|
|
540
|
+
console.log(`${"=".repeat(58)}`);
|
|
541
|
+
console.log("\n Scanned your files. Here's what we found:\n");
|
|
542
|
+
|
|
543
|
+
_printEntityList(detected.people, "PEOPLE");
|
|
544
|
+
_printEntityList(detected.projects, "PROJECTS");
|
|
545
|
+
if (detected.uncertain.length > 0) {
|
|
546
|
+
_printEntityList(detected.uncertain, "UNCERTAIN (need your call)");
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
let confirmedPeople = detected.people.map((entity) => entity.name);
|
|
550
|
+
let confirmedProjects = detected.projects.map((entity) => entity.name);
|
|
551
|
+
|
|
552
|
+
if (yes) {
|
|
553
|
+
console.log(`\n Auto-accepting ${confirmedPeople.length} people, ${confirmedProjects.length} projects.`);
|
|
554
|
+
return { people: confirmedPeople, projects: confirmedProjects };
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
console.log(`\n${"─".repeat(58)}`);
|
|
558
|
+
console.log(" Options:");
|
|
559
|
+
console.log(" accept Accept all");
|
|
560
|
+
console.log(" edit Remove wrong entries or reclassify uncertain");
|
|
561
|
+
console.log(" add Add missing people or projects");
|
|
562
|
+
console.log();
|
|
563
|
+
|
|
564
|
+
const choice = await promptChoice();
|
|
565
|
+
|
|
566
|
+
if (choice === "edit") {
|
|
567
|
+
if (detected.uncertain.length > 0) {
|
|
568
|
+
console.log("\n Uncertain entities — classify each:");
|
|
569
|
+
for (const entity of detected.uncertain) {
|
|
570
|
+
const answer = await promptClassification(entity.name);
|
|
571
|
+
if (answer === "person") {
|
|
572
|
+
confirmedPeople.push(entity.name);
|
|
573
|
+
} else if (answer === "project") {
|
|
574
|
+
confirmedProjects.push(entity.name);
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
printNumberedNames("Current people", confirmedPeople);
|
|
580
|
+
const removePeople = await promptText(
|
|
581
|
+
"Numbers to REMOVE from people (comma-separated, or enter to skip)",
|
|
582
|
+
"1,3",
|
|
583
|
+
);
|
|
584
|
+
if (removePeople.trim()) {
|
|
585
|
+
const indexes = parseRemovalInput(removePeople);
|
|
586
|
+
confirmedPeople = confirmedPeople.filter((_, index) => !indexes.has(index));
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
printNumberedNames("Current projects", confirmedProjects);
|
|
590
|
+
const removeProjects = await promptText(
|
|
591
|
+
"Numbers to REMOVE from projects (comma-separated, or enter to skip)",
|
|
592
|
+
"1,3",
|
|
593
|
+
);
|
|
594
|
+
if (removeProjects.trim()) {
|
|
595
|
+
const indexes = parseRemovalInput(removeProjects);
|
|
596
|
+
confirmedProjects = confirmedProjects.filter((_, index) => !indexes.has(index));
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
if (choice === "add" || (await promptConfirm("Add any missing entities?", false))) {
|
|
601
|
+
while (true) {
|
|
602
|
+
const name = (await promptText("Name (or enter to stop)", "Ada Lovelace")).trim();
|
|
603
|
+
if (!name) {
|
|
604
|
+
break;
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
const kind = await ensureNotCancelled(
|
|
608
|
+
await select<"person" | "project">({
|
|
609
|
+
message: `Is '${name}' a person or project?`,
|
|
610
|
+
options: [
|
|
611
|
+
{ value: "person", label: "Person" },
|
|
612
|
+
{ value: "project", label: "Project" },
|
|
613
|
+
],
|
|
614
|
+
}),
|
|
615
|
+
);
|
|
616
|
+
|
|
617
|
+
if (kind === "person") {
|
|
618
|
+
confirmedPeople.push(name);
|
|
619
|
+
} else {
|
|
620
|
+
confirmedProjects.push(name);
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
console.log(`\n${"=".repeat(58)}`);
|
|
626
|
+
console.log(" Confirmed:");
|
|
627
|
+
console.log(` People: ${confirmedPeople.join(", ") || "(none)"}`);
|
|
628
|
+
console.log(` Projects: ${confirmedProjects.join(", ") || "(none)"}`);
|
|
629
|
+
console.log(`${"=".repeat(58)}\n`);
|
|
630
|
+
|
|
631
|
+
return {
|
|
632
|
+
people: confirmedPeople,
|
|
633
|
+
projects: confirmedProjects,
|
|
634
|
+
};
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
export function scanForDetection(projectDir: string, maxFiles = 10): string[] {
|
|
638
|
+
const projectPath = resolve(projectDir);
|
|
639
|
+
const proseFiles: string[] = [];
|
|
640
|
+
const allFiles: string[] = [];
|
|
641
|
+
|
|
642
|
+
const walk = (currentDir: string): void => {
|
|
643
|
+
for (const entry of readdirSync(currentDir, { withFileTypes: true })) {
|
|
644
|
+
const filePath = resolve(currentDir, entry.name);
|
|
645
|
+
|
|
646
|
+
if (entry.isDirectory()) {
|
|
647
|
+
if (!SKIP_DIRS.has(entry.name)) {
|
|
648
|
+
walk(filePath);
|
|
649
|
+
}
|
|
650
|
+
continue;
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
if (!entry.isFile() && !statSync(filePath).isFile()) {
|
|
654
|
+
continue;
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
const extension = extname(entry.name).toLowerCase();
|
|
658
|
+
if (PROSE_EXTENSIONS.has(extension)) {
|
|
659
|
+
proseFiles.push(filePath);
|
|
660
|
+
} else if (READABLE_EXTENSIONS.has(extension)) {
|
|
661
|
+
allFiles.push(filePath);
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
};
|
|
665
|
+
|
|
666
|
+
try {
|
|
667
|
+
walk(projectPath);
|
|
668
|
+
} catch {
|
|
669
|
+
return [];
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
const files = proseFiles.length >= 3 ? proseFiles : [...proseFiles, ...allFiles];
|
|
673
|
+
return files.slice(0, maxFiles);
|
|
674
|
+
}
|