@nguyentamdat/mempalace 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,806 @@
1
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
2
+ import { homedir } from "node:os";
3
+ import { dirname, join } from "node:path";
4
+
5
+ export const COMMON_ENGLISH_WORDS = new Set<string>([
6
+ "ever",
7
+ "grace",
8
+ "will",
9
+ "bill",
10
+ "mark",
11
+ "april",
12
+ "may",
13
+ "june",
14
+ "joy",
15
+ "hope",
16
+ "faith",
17
+ "chance",
18
+ "chase",
19
+ "hunter",
20
+ "dash",
21
+ "flash",
22
+ "star",
23
+ "sky",
24
+ "river",
25
+ "brook",
26
+ "lane",
27
+ "art",
28
+ "clay",
29
+ "gil",
30
+ "nat",
31
+ "max",
32
+ "rex",
33
+ "ray",
34
+ "jay",
35
+ "rose",
36
+ "violet",
37
+ "lily",
38
+ "ivy",
39
+ "ash",
40
+ "reed",
41
+ "sage",
42
+ "monday",
43
+ "tuesday",
44
+ "wednesday",
45
+ "thursday",
46
+ "friday",
47
+ "saturday",
48
+ "sunday",
49
+ "january",
50
+ "february",
51
+ "march",
52
+ "july",
53
+ "august",
54
+ "september",
55
+ "october",
56
+ "november",
57
+ "december",
58
+ ]);
59
+
60
+ const PERSON_CONTEXT_PATTERNS = [
61
+ String.raw`\b{name}\s+said\b`,
62
+ String.raw`\b{name}\s+told\b`,
63
+ String.raw`\b{name}\s+asked\b`,
64
+ String.raw`\b{name}\s+laughed\b`,
65
+ String.raw`\b{name}\s+smiled\b`,
66
+ String.raw`\b{name}\s+was\b`,
67
+ String.raw`\b{name}\s+is\b`,
68
+ String.raw`\b{name}\s+called\b`,
69
+ String.raw`\b{name}\s+texted\b`,
70
+ String.raw`\bwith\s+{name}\b`,
71
+ String.raw`\bsaw\s+{name}\b`,
72
+ String.raw`\bcalled\s+{name}\b`,
73
+ String.raw`\btook\s+{name}\b`,
74
+ String.raw`\bpicked\s+up\s+{name}\b`,
75
+ String.raw`\bdrop(?:ped)?\s+(?:off\s+)?{name}\b`,
76
+ String.raw`\b{name}(?:'s|s')\b`,
77
+ String.raw`\bhey\s+{name}\b`,
78
+ String.raw`\bthanks?\s+{name}\b`,
79
+ String.raw`^{name}[:\s]`,
80
+ String.raw`\bmy\s+(?:son|daughter|kid|child|brother|sister|friend|partner|colleague|coworker)\s+{name}\b`,
81
+ ] as const;
82
+
83
+ const CONCEPT_CONTEXT_PATTERNS = [
84
+ String.raw`\bhave\s+you\s+{name}\b`,
85
+ String.raw`\bif\s+you\s+{name}\b`,
86
+ String.raw`\b{name}\s+since\b`,
87
+ String.raw`\b{name}\s+again\b`,
88
+ String.raw`\bnot\s+{name}\b`,
89
+ String.raw`\b{name}\s+more\b`,
90
+ String.raw`\bwould\s+{name}\b`,
91
+ String.raw`\bcould\s+{name}\b`,
92
+ String.raw`\bwill\s+{name}\b`,
93
+ String.raw`(?:the\s+)?{name}\s+(?:of|in|at|for|to)\b`,
94
+ ] as const;
95
+
96
+ const NAME_INDICATOR_PHRASES = [
97
+ "given name",
98
+ "personal name",
99
+ "first name",
100
+ "forename",
101
+ "masculine name",
102
+ "feminine name",
103
+ "boy's name",
104
+ "girl's name",
105
+ "male name",
106
+ "female name",
107
+ "irish name",
108
+ "welsh name",
109
+ "scottish name",
110
+ "gaelic name",
111
+ "hebrew name",
112
+ "arabic name",
113
+ "norse name",
114
+ "old english name",
115
+ "is a name",
116
+ "as a name",
117
+ "name meaning",
118
+ "name derived from",
119
+ "legendary irish",
120
+ "legendary welsh",
121
+ "legendary scottish",
122
+ ] as const;
123
+
124
+ const PLACE_INDICATOR_PHRASES = [
125
+ "city in",
126
+ "town in",
127
+ "village in",
128
+ "municipality",
129
+ "capital of",
130
+ "district of",
131
+ "county",
132
+ "province",
133
+ "region of",
134
+ "island of",
135
+ "mountain in",
136
+ "river in",
137
+ ] as const;
138
+
139
+ type RegistryEntityType = "person" | "project" | "concept" | "place" | "ambiguous" | "unknown";
140
+
141
+ interface PersonInfo {
142
+ source: string;
143
+ contexts: string[];
144
+ aliases: string[];
145
+ relationship: string;
146
+ confidence: number;
147
+ canonical?: string;
148
+ seen_count?: number;
149
+ }
150
+
151
+ interface WikiCacheEntry {
152
+ inferred_type: RegistryEntityType;
153
+ confidence: number;
154
+ wiki_summary: string | null;
155
+ wiki_title?: string | null;
156
+ note?: string;
157
+ word?: string;
158
+ confirmed?: boolean;
159
+ confirmed_type?: string;
160
+ }
161
+
162
+ interface RegistryData {
163
+ version: number;
164
+ mode: string;
165
+ people: Record<string, PersonInfo>;
166
+ projects: string[];
167
+ ambiguous_flags: string[];
168
+ wiki_cache: Record<string, WikiCacheEntry>;
169
+ }
170
+
171
+ interface SeedPersonEntry {
172
+ name: string;
173
+ relationship?: string;
174
+ context?: string;
175
+ }
176
+
177
+ interface LookupResult {
178
+ type: RegistryEntityType;
179
+ confidence: number;
180
+ source: string;
181
+ name: string;
182
+ needs_disambiguation: boolean;
183
+ context?: string[];
184
+ disambiguated_by?: string;
185
+ }
186
+
187
+ interface DetectorModule {
188
+ extractCandidates: (text: string) => Record<string, number>;
189
+ scoreEntity: (name: string, text: string, lines: string[]) => Record<string, number>;
190
+ classifyEntity: (name: string, frequency: number, scores: Record<string, number>) => LearnedEntity;
191
+ }
192
+
193
+ interface LearnedEntity {
194
+ type: string;
195
+ confidence: number;
196
+ [key: string]: string | number | boolean | null | undefined;
197
+ }
198
+
199
+ function isRecord(value: unknown): value is Record<string, unknown> {
200
+ return typeof value === "object" && value !== null;
201
+ }
202
+
203
+ function readString(value: unknown, fallback = ""): string {
204
+ return typeof value === "string" ? value : fallback;
205
+ }
206
+
207
+ function readNumber(value: unknown, fallback = 0): number {
208
+ return typeof value === "number" && Number.isFinite(value) ? value : fallback;
209
+ }
210
+
211
+ function readStringArray(value: unknown): string[] {
212
+ if (!Array.isArray(value)) {
213
+ return [];
214
+ }
215
+ return value.filter((entry): entry is string => typeof entry === "string");
216
+ }
217
+
218
+ function toPersonInfo(value: unknown): PersonInfo {
219
+ const record = isRecord(value) ? value : {};
220
+ const result: PersonInfo = {
221
+ source: readString(record.source),
222
+ contexts: readStringArray(record.contexts),
223
+ aliases: readStringArray(record.aliases),
224
+ relationship: readString(record.relationship),
225
+ confidence: readNumber(record.confidence),
226
+ };
227
+
228
+ const canonical = readString(record.canonical);
229
+ if (canonical) {
230
+ result.canonical = canonical;
231
+ }
232
+
233
+ if (typeof record.seen_count === "number" && Number.isFinite(record.seen_count)) {
234
+ result.seen_count = record.seen_count;
235
+ }
236
+
237
+ return result;
238
+ }
239
+
240
+ function toWikiCacheEntry(value: unknown): WikiCacheEntry {
241
+ const record = isRecord(value) ? value : {};
242
+ const inferredType = readString(record.inferred_type, "unknown");
243
+ const result: WikiCacheEntry = {
244
+ inferred_type: isRegistryEntityType(inferredType) ? inferredType : "unknown",
245
+ confidence: readNumber(record.confidence),
246
+ wiki_summary: typeof record.wiki_summary === "string" ? record.wiki_summary : null,
247
+ };
248
+
249
+ if (typeof record.wiki_title === "string" || record.wiki_title === null) {
250
+ result.wiki_title = record.wiki_title;
251
+ }
252
+ if (typeof record.note === "string") {
253
+ result.note = record.note;
254
+ }
255
+ if (typeof record.word === "string") {
256
+ result.word = record.word;
257
+ }
258
+ if (typeof record.confirmed === "boolean") {
259
+ result.confirmed = record.confirmed;
260
+ }
261
+ if (typeof record.confirmed_type === "string") {
262
+ result.confirmed_type = record.confirmed_type;
263
+ }
264
+
265
+ return result;
266
+ }
267
+
268
+ function normalizeRegistryData(value: unknown): RegistryData {
269
+ const record = isRecord(value) ? value : {};
270
+ const peopleRecord = isRecord(record.people) ? record.people : {};
271
+ const wikiCacheRecord = isRecord(record.wiki_cache) ? record.wiki_cache : {};
272
+
273
+ const people: Record<string, PersonInfo> = {};
274
+ for (const [key, entry] of Object.entries(peopleRecord)) {
275
+ people[key] = toPersonInfo(entry);
276
+ }
277
+
278
+ const wikiCache: Record<string, WikiCacheEntry> = {};
279
+ for (const [key, entry] of Object.entries(wikiCacheRecord)) {
280
+ wikiCache[key] = toWikiCacheEntry(entry);
281
+ }
282
+
283
+ return {
284
+ version: readNumber(record.version, 1),
285
+ mode: readString(record.mode, "personal"),
286
+ people,
287
+ projects: readStringArray(record.projects),
288
+ ambiguous_flags: readStringArray(record.ambiguous_flags),
289
+ wiki_cache: wikiCache,
290
+ };
291
+ }
292
+
293
+ function isRegistryEntityType(value: string): value is RegistryEntityType {
294
+ return ["person", "project", "concept", "place", "ambiguous", "unknown"].includes(value);
295
+ }
296
+
297
+ function regexEscape(value: string): string {
298
+ return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
299
+ }
300
+
301
+ function truncateSummary(value: string): string {
302
+ return value.slice(0, 200);
303
+ }
304
+
305
+ function extractDetectorModule(value: unknown): DetectorModule {
306
+ if (!isRecord(value)) {
307
+ throw new Error("entity-detector module did not load correctly");
308
+ }
309
+
310
+ const extractCandidates = value.extractCandidates ?? value.extract_candidates;
311
+ const scoreEntity = value.scoreEntity ?? value.score_entity;
312
+ const classifyEntity = value.classifyEntity ?? value.classify_entity;
313
+
314
+ if (
315
+ typeof extractCandidates !== "function" ||
316
+ typeof scoreEntity !== "function" ||
317
+ typeof classifyEntity !== "function"
318
+ ) {
319
+ throw new Error("entity-detector module is missing required exports");
320
+ }
321
+
322
+ return {
323
+ extractCandidates: (text: string) => {
324
+ const result = extractCandidates(text);
325
+ if (!isRecord(result)) {
326
+ throw new Error("entity-detector extractCandidates() returned invalid data");
327
+ }
328
+
329
+ const normalized: Record<string, number> = {};
330
+ for (const [key, entry] of Object.entries(result)) {
331
+ if (typeof entry === "number" && Number.isFinite(entry)) {
332
+ normalized[key] = entry;
333
+ }
334
+ }
335
+ return normalized;
336
+ },
337
+ scoreEntity: (name: string, text: string, lines: string[]) => {
338
+ const result = scoreEntity(name, text, lines);
339
+ if (!isRecord(result)) {
340
+ throw new Error("entity-detector scoreEntity() returned invalid data");
341
+ }
342
+
343
+ const normalized: Record<string, number> = {};
344
+ for (const [key, entry] of Object.entries(result)) {
345
+ if (typeof entry === "number" && Number.isFinite(entry)) {
346
+ normalized[key] = entry;
347
+ }
348
+ }
349
+ return normalized;
350
+ },
351
+ classifyEntity: (name: string, frequency: number, scores: Record<string, number>) => {
352
+ const result = classifyEntity(name, frequency, scores);
353
+ if (!isRecord(result)) {
354
+ throw new Error("entity-detector classifyEntity() returned invalid data");
355
+ }
356
+
357
+ const learnedEntity: LearnedEntity = {
358
+ type: readString(result.type),
359
+ confidence: readNumber(result.confidence),
360
+ };
361
+
362
+ for (const [key, entry] of Object.entries(result)) {
363
+ if (key === "type" || key === "confidence") {
364
+ continue;
365
+ }
366
+ if (
367
+ typeof entry === "string" ||
368
+ typeof entry === "number" ||
369
+ typeof entry === "boolean" ||
370
+ entry === null ||
371
+ typeof entry === "undefined"
372
+ ) {
373
+ learnedEntity[key] = entry;
374
+ }
375
+ }
376
+
377
+ return learnedEntity;
378
+ },
379
+ };
380
+ }
381
+
382
+ async function _wikipediaLookup(word: string): Promise<WikiCacheEntry> {
383
+ try {
384
+ const url = `https://en.wikipedia.org/api/rest_v1/page/summary/${encodeURIComponent(word)}`;
385
+ const response = await fetch(url, {
386
+ headers: {
387
+ "User-Agent": "MemPalace/1.0",
388
+ },
389
+ });
390
+
391
+ if (response.status === 404) {
392
+ return {
393
+ inferred_type: "person",
394
+ confidence: 0.7,
395
+ wiki_summary: null,
396
+ wiki_title: null,
397
+ note: "not found in Wikipedia — likely a proper noun or unusual name",
398
+ };
399
+ }
400
+
401
+ if (!response.ok) {
402
+ return { inferred_type: "unknown", confidence: 0, wiki_summary: null };
403
+ }
404
+
405
+ const payload: unknown = await response.json();
406
+ const data = isRecord(payload) ? payload : {};
407
+ const pageType = readString(data.type);
408
+ const extract = readString(data.extract).toLowerCase();
409
+ const title = readString(data.title, word);
410
+
411
+ if (pageType === "disambiguation") {
412
+ const description = readString(data.description).toLowerCase();
413
+ if (["name", "given name"].some((phrase) => description.includes(phrase))) {
414
+ return {
415
+ inferred_type: "person",
416
+ confidence: 0.65,
417
+ wiki_summary: truncateSummary(extract),
418
+ wiki_title: title,
419
+ note: "disambiguation page with name entries",
420
+ };
421
+ }
422
+
423
+ return {
424
+ inferred_type: "ambiguous",
425
+ confidence: 0.4,
426
+ wiki_summary: truncateSummary(extract),
427
+ wiki_title: title,
428
+ };
429
+ }
430
+
431
+ if (NAME_INDICATOR_PHRASES.some((phrase) => extract.includes(phrase))) {
432
+ const lowerWord = word.toLowerCase();
433
+ const confidence =
434
+ extract.includes(`${lowerWord} is a`) || extract.includes(`${lowerWord} (name`)
435
+ ? 0.9
436
+ : 0.8;
437
+
438
+ return {
439
+ inferred_type: "person",
440
+ confidence,
441
+ wiki_summary: truncateSummary(extract),
442
+ wiki_title: title,
443
+ };
444
+ }
445
+
446
+ if (PLACE_INDICATOR_PHRASES.some((phrase) => extract.includes(phrase))) {
447
+ return {
448
+ inferred_type: "place",
449
+ confidence: 0.8,
450
+ wiki_summary: truncateSummary(extract),
451
+ wiki_title: title,
452
+ };
453
+ }
454
+
455
+ return {
456
+ inferred_type: "concept",
457
+ confidence: 0.6,
458
+ wiki_summary: truncateSummary(extract),
459
+ wiki_title: title,
460
+ };
461
+ } catch {
462
+ return { inferred_type: "unknown", confidence: 0, wiki_summary: null };
463
+ }
464
+ }
465
+
466
+ export class EntityRegistry {
467
+ static readonly DEFAULT_PATH = join(homedir(), ".mempalace", "entity_registry.json");
468
+
469
+ private readonly data: RegistryData;
470
+ private readonly filePath: string;
471
+
472
+ constructor(data: RegistryData, filePath: string) {
473
+ this.data = data;
474
+ this.filePath = filePath;
475
+ }
476
+
477
+ static load(configDir?: string): EntityRegistry {
478
+ const filePath = configDir ? join(configDir, "entity_registry.json") : EntityRegistry.DEFAULT_PATH;
479
+ if (existsSync(filePath)) {
480
+ try {
481
+ const data = JSON.parse(readFileSync(filePath, "utf-8")) as unknown;
482
+ return new EntityRegistry(normalizeRegistryData(data), filePath);
483
+ } catch {}
484
+ }
485
+
486
+ return new EntityRegistry(EntityRegistry.empty(), filePath);
487
+ }
488
+
489
+ save(): void {
490
+ mkdirSync(dirname(this.filePath), { recursive: true });
491
+ writeFileSync(this.filePath, JSON.stringify(this.data, null, 2));
492
+ }
493
+
494
+ private static empty(): RegistryData {
495
+ return {
496
+ version: 1,
497
+ mode: "personal",
498
+ people: {},
499
+ projects: [],
500
+ ambiguous_flags: [],
501
+ wiki_cache: {},
502
+ };
503
+ }
504
+
505
+ get mode(): string {
506
+ return this.data.mode;
507
+ }
508
+
509
+ get people(): Record<string, PersonInfo> {
510
+ return this.data.people;
511
+ }
512
+
513
+ get projects(): string[] {
514
+ return this.data.projects;
515
+ }
516
+
517
+ get ambiguousFlags(): string[] {
518
+ return this.data.ambiguous_flags;
519
+ }
520
+
521
+ seed(mode: string, people: SeedPersonEntry[], projects: string[], aliases: Record<string, string> = {}): void {
522
+ this.data.mode = mode;
523
+ this.data.projects = [...projects];
524
+
525
+ const reverseAliases: Record<string, string> = {};
526
+ for (const [alias, canonical] of Object.entries(aliases)) {
527
+ reverseAliases[canonical] = alias;
528
+ }
529
+
530
+ for (const entry of people) {
531
+ const name = entry.name.trim();
532
+ if (!name) {
533
+ continue;
534
+ }
535
+
536
+ const context = entry.context ?? "personal";
537
+ const relationship = entry.relationship ?? "";
538
+
539
+ this.data.people[name] = {
540
+ source: "onboarding",
541
+ contexts: [context],
542
+ aliases: name in reverseAliases ? [reverseAliases[name]] : [],
543
+ relationship,
544
+ confidence: 1,
545
+ };
546
+
547
+ if (name in reverseAliases) {
548
+ const alias = reverseAliases[name];
549
+ this.data.people[alias] = {
550
+ source: "onboarding",
551
+ contexts: [context],
552
+ aliases: [name],
553
+ relationship,
554
+ confidence: 1,
555
+ canonical: name,
556
+ };
557
+ }
558
+ }
559
+
560
+ const ambiguous: string[] = [];
561
+ for (const name of Object.keys(this.data.people)) {
562
+ const lower = name.toLowerCase();
563
+ if (COMMON_ENGLISH_WORDS.has(lower)) {
564
+ ambiguous.push(lower);
565
+ }
566
+ }
567
+ this.data.ambiguous_flags = ambiguous;
568
+
569
+ this.save();
570
+ }
571
+
572
+ lookup(word: string, context = ""): LookupResult {
573
+ for (const [canonical, info] of Object.entries(this.people)) {
574
+ const aliases = info.aliases.map((alias) => alias.toLowerCase());
575
+ if (word.toLowerCase() === canonical.toLowerCase() || aliases.includes(word.toLowerCase())) {
576
+ if (this.ambiguousFlags.includes(word.toLowerCase()) && context) {
577
+ const resolved = this._disambiguate(word, context, info);
578
+ if (resolved !== null) {
579
+ return resolved;
580
+ }
581
+ }
582
+
583
+ return {
584
+ type: "person",
585
+ confidence: info.confidence,
586
+ source: info.source,
587
+ name: canonical,
588
+ context: info.contexts.length > 0 ? info.contexts : ["personal"],
589
+ needs_disambiguation: false,
590
+ };
591
+ }
592
+ }
593
+
594
+ for (const project of this.projects) {
595
+ if (word.toLowerCase() === project.toLowerCase()) {
596
+ return {
597
+ type: "project",
598
+ confidence: 1,
599
+ source: "onboarding",
600
+ name: project,
601
+ needs_disambiguation: false,
602
+ };
603
+ }
604
+ }
605
+
606
+ for (const [cachedWord, cachedResult] of Object.entries(this.data.wiki_cache)) {
607
+ if (word.toLowerCase() === cachedWord.toLowerCase() && cachedResult.confirmed) {
608
+ return {
609
+ type: cachedResult.inferred_type,
610
+ confidence: cachedResult.confidence,
611
+ source: "wiki",
612
+ name: word,
613
+ needs_disambiguation: false,
614
+ };
615
+ }
616
+ }
617
+
618
+ return {
619
+ type: "unknown",
620
+ confidence: 0,
621
+ source: "none",
622
+ name: word,
623
+ needs_disambiguation: false,
624
+ };
625
+ }
626
+
627
+ private _disambiguate(word: string, context: string, personInfo: PersonInfo): LookupResult | null {
628
+ const nameLower = word.toLowerCase();
629
+ const contextLower = context.toLowerCase();
630
+ const escapedName = regexEscape(nameLower);
631
+
632
+ let personScore = 0;
633
+ for (const pattern of PERSON_CONTEXT_PATTERNS) {
634
+ if (new RegExp(pattern.replaceAll("{name}", escapedName)).test(contextLower)) {
635
+ personScore += 1;
636
+ }
637
+ }
638
+
639
+ let conceptScore = 0;
640
+ for (const pattern of CONCEPT_CONTEXT_PATTERNS) {
641
+ if (new RegExp(pattern.replaceAll("{name}", escapedName)).test(contextLower)) {
642
+ conceptScore += 1;
643
+ }
644
+ }
645
+
646
+ if (personScore > conceptScore) {
647
+ return {
648
+ type: "person",
649
+ confidence: Math.min(0.95, 0.7 + personScore * 0.1),
650
+ source: personInfo.source,
651
+ name: word,
652
+ context: personInfo.contexts.length > 0 ? personInfo.contexts : ["personal"],
653
+ needs_disambiguation: false,
654
+ disambiguated_by: "context_patterns",
655
+ };
656
+ }
657
+
658
+ if (conceptScore > personScore) {
659
+ return {
660
+ type: "concept",
661
+ confidence: Math.min(0.9, 0.7 + conceptScore * 0.1),
662
+ source: "context_disambiguated",
663
+ name: word,
664
+ needs_disambiguation: false,
665
+ disambiguated_by: "context_patterns",
666
+ };
667
+ }
668
+
669
+ return null;
670
+ }
671
+
672
+ async research(word: string, autoConfirm = false): Promise<WikiCacheEntry> {
673
+ if (word in this.data.wiki_cache) {
674
+ return this.data.wiki_cache[word];
675
+ }
676
+
677
+ const result = await _wikipediaLookup(word);
678
+ result.word = word;
679
+ result.confirmed = autoConfirm;
680
+
681
+ this.data.wiki_cache[word] = result;
682
+ this.save();
683
+ return result;
684
+ }
685
+
686
+ confirmResearch(word: string, entityType: string, relationship = "", context = "personal"): void {
687
+ const cache = this.data.wiki_cache;
688
+ if (word in cache) {
689
+ cache[word].confirmed = true;
690
+ cache[word].confirmed_type = entityType;
691
+ }
692
+
693
+ if (entityType === "person") {
694
+ this.data.people[word] = {
695
+ source: "wiki",
696
+ contexts: [context],
697
+ aliases: [],
698
+ relationship,
699
+ confidence: 0.9,
700
+ };
701
+
702
+ if (COMMON_ENGLISH_WORDS.has(word.toLowerCase())) {
703
+ if (!this.data.ambiguous_flags.includes(word.toLowerCase())) {
704
+ this.data.ambiguous_flags.push(word.toLowerCase());
705
+ }
706
+ }
707
+ }
708
+
709
+ this.save();
710
+ }
711
+
712
+ async learnFromText(text: string, minConfidence = 0.75): Promise<LearnedEntity[]> {
713
+ const modulePath = "./entity-detector";
714
+ const detectorModule = extractDetectorModule(await import(modulePath));
715
+
716
+ const lines = text.split(/\r?\n/);
717
+ const candidates = detectorModule.extractCandidates(text);
718
+ const newCandidates: LearnedEntity[] = [];
719
+
720
+ for (const [name, frequency] of Object.entries(candidates)) {
721
+ if (name in this.people || this.projects.includes(name)) {
722
+ continue;
723
+ }
724
+
725
+ const scores = detectorModule.scoreEntity(name, text, lines);
726
+ const entity = detectorModule.classifyEntity(name, frequency, scores);
727
+
728
+ if (entity.type === "person" && entity.confidence >= minConfidence) {
729
+ this.data.people[name] = {
730
+ source: "learned",
731
+ contexts: [this.mode !== "combo" ? this.mode : "personal"],
732
+ aliases: [],
733
+ relationship: "",
734
+ confidence: entity.confidence,
735
+ seen_count: frequency,
736
+ };
737
+
738
+ if (COMMON_ENGLISH_WORDS.has(name.toLowerCase()) && !this.data.ambiguous_flags.includes(name.toLowerCase())) {
739
+ this.data.ambiguous_flags.push(name.toLowerCase());
740
+ }
741
+
742
+ newCandidates.push(entity);
743
+ }
744
+ }
745
+
746
+ if (newCandidates.length > 0) {
747
+ this.save();
748
+ }
749
+
750
+ return newCandidates;
751
+ }
752
+
753
+ extractPeopleFromQuery(query: string): string[] {
754
+ const found: string[] = [];
755
+
756
+ for (const [canonical, info] of Object.entries(this.people)) {
757
+ const namesToCheck = [canonical, ...info.aliases];
758
+ for (const name of namesToCheck) {
759
+ if (new RegExp(`\\b${regexEscape(name)}\\b`, "i").test(query)) {
760
+ if (this.ambiguousFlags.includes(name.toLowerCase())) {
761
+ const result = this._disambiguate(name, query, info);
762
+ if (result && result.type === "person" && !found.includes(canonical)) {
763
+ found.push(canonical);
764
+ }
765
+ } else if (!found.includes(canonical)) {
766
+ found.push(canonical);
767
+ }
768
+ }
769
+ }
770
+ }
771
+
772
+ return found;
773
+ }
774
+
775
+ extractUnknownCandidates(query: string): string[] {
776
+ const candidates = query.match(/\b[A-Z][a-z]{2,15}\b/g) ?? [];
777
+ const unknown: string[] = [];
778
+
779
+ for (const word of new Set(candidates)) {
780
+ if (COMMON_ENGLISH_WORDS.has(word.toLowerCase())) {
781
+ continue;
782
+ }
783
+
784
+ const result = this.lookup(word);
785
+ if (result.type === "unknown") {
786
+ unknown.push(word);
787
+ }
788
+ }
789
+
790
+ return unknown;
791
+ }
792
+
793
+ summary(): string {
794
+ const peopleNames = Object.keys(this.people);
795
+ const visiblePeople = peopleNames.slice(0, 8).join(", ");
796
+ const suffix = peopleNames.length > 8 ? "..." : "";
797
+
798
+ return [
799
+ `Mode: ${this.mode}`,
800
+ `People: ${peopleNames.length} (${visiblePeople}${suffix})`,
801
+ `Projects: ${this.projects.join(", ") || "(none)"}`,
802
+ `Ambiguous flags: ${this.ambiguousFlags.join(", ") || "(none)"}`,
803
+ `Wiki cache: ${Object.keys(this.data.wiki_cache).length} entries`,
804
+ ].join("\n");
805
+ }
806
+ }