@velvetmonkey/vault-core 2.0.30 → 2.0.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/src/entities.d.ts +50 -0
  2. package/dist/src/entities.d.ts.map +1 -0
  3. package/dist/src/entities.js +499 -0
  4. package/dist/src/entities.js.map +1 -0
  5. package/dist/src/index.d.ts +15 -0
  6. package/dist/src/index.d.ts.map +1 -0
  7. package/dist/src/index.js +23 -0
  8. package/dist/src/index.js.map +1 -0
  9. package/dist/src/logging/index.d.ts +7 -0
  10. package/dist/src/logging/index.d.ts.map +1 -0
  11. package/dist/src/logging/index.js +7 -0
  12. package/dist/src/logging/index.js.map +1 -0
  13. package/dist/src/logging/operationLogger.d.ts +59 -0
  14. package/dist/src/logging/operationLogger.d.ts.map +1 -0
  15. package/dist/src/logging/operationLogger.js +282 -0
  16. package/dist/src/logging/operationLogger.js.map +1 -0
  17. package/dist/src/logging/sessionManager.d.ts +35 -0
  18. package/dist/src/logging/sessionManager.d.ts.map +1 -0
  19. package/dist/src/logging/sessionManager.js +68 -0
  20. package/dist/src/logging/sessionManager.js.map +1 -0
  21. package/dist/src/logging/types.d.ts +123 -0
  22. package/dist/src/logging/types.d.ts.map +1 -0
  23. package/dist/src/logging/types.js +23 -0
  24. package/dist/src/logging/types.js.map +1 -0
  25. package/dist/src/protectedZones.d.ts +36 -0
  26. package/dist/src/protectedZones.d.ts.map +1 -0
  27. package/dist/src/protectedZones.js +114 -0
  28. package/dist/src/protectedZones.js.map +1 -0
  29. package/dist/src/sqlite.d.ts +273 -0
  30. package/dist/src/sqlite.d.ts.map +1 -0
  31. package/dist/src/sqlite.js +959 -0
  32. package/dist/src/sqlite.js.map +1 -0
  33. package/dist/src/types.d.ts +171 -0
  34. package/dist/src/types.d.ts.map +1 -0
  35. package/dist/src/types.js +5 -0
  36. package/dist/src/types.js.map +1 -0
  37. package/dist/src/wikilinks.d.ts +76 -0
  38. package/dist/src/wikilinks.d.ts.map +1 -0
  39. package/dist/src/wikilinks.js +681 -0
  40. package/dist/src/wikilinks.js.map +1 -0
  41. package/package.json +2 -2
@@ -0,0 +1,681 @@
1
+ /**
2
+ * Wikilink application logic
3
+ *
4
+ * Applies [[wikilinks]] to known entities in content while
5
+ * respecting protected zones (code, frontmatter, existing links, etc.)
6
+ *
7
+ * Also supports:
8
+ * - Pattern-based detection for implicit entities (proper nouns, quoted terms)
9
+ * - Alias resolution for existing wikilinks (resolves [[alias]] to [[Entity|alias]])
10
+ */
11
+ import { getProtectedZones, rangeOverlapsProtectedZone } from './protectedZones.js';
12
+ /**
13
+ * Get all search terms for an entity (name + aliases)
14
+ * Returns tuples of [searchTerm, entityName] for proper linking
15
+ */
16
+ function getSearchTerms(entity) {
17
+ if (typeof entity === 'string') {
18
+ return [{ term: entity, entityName: entity }];
19
+ }
20
+ // Include the entity name and all aliases
21
+ const terms = [
22
+ { term: entity.name, entityName: entity.name }
23
+ ];
24
+ for (const alias of entity.aliases) {
25
+ terms.push({ term: alias, entityName: entity.name });
26
+ }
27
+ return terms;
28
+ }
29
+ /**
30
+ * Common words to exclude from wikilink suggestions
31
+ */
32
+ const EXCLUDE_WORDS = new Set([
33
+ 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday',
34
+ 'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august',
35
+ 'september', 'october', 'november', 'december',
36
+ 'today', 'tomorrow', 'yesterday', 'week', 'month', 'year',
37
+ 'the', 'and', 'for', 'with', 'from', 'this', 'that',
38
+ 'christmas', 'holiday', 'break',
39
+ ]);
40
+ /**
41
+ * Escape special regex characters in a string
42
+ */
43
+ function escapeRegex(str) {
44
+ return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
45
+ }
46
+ /**
47
+ * Check if an entity should be excluded from wikilikning
48
+ */
49
+ function shouldExcludeEntity(entity) {
50
+ return EXCLUDE_WORDS.has(entity.toLowerCase());
51
+ }
52
+ /**
53
+ * Find all matches of an entity in content with word boundaries
54
+ */
55
+ function findEntityMatches(content, entity, caseInsensitive) {
56
+ const pattern = `\\b${escapeRegex(entity)}\\b`;
57
+ const flags = caseInsensitive ? 'gi' : 'g';
58
+ const regex = new RegExp(pattern, flags);
59
+ const matches = [];
60
+ let match;
61
+ while ((match = regex.exec(content)) !== null) {
62
+ matches.push({
63
+ start: match.index,
64
+ end: match.index + match[0].length,
65
+ matched: match[0],
66
+ });
67
+ }
68
+ return matches;
69
+ }
70
+ /**
71
+ * Apply wikilinks to entities in content
72
+ *
73
+ * @param content - The markdown content to process
74
+ * @param entities - List of entity names or Entity objects to look for
75
+ * @param options - Wikilink options
76
+ * @returns Result with updated content and statistics
77
+ */
78
+ export function applyWikilinks(content, entities, options = {}) {
79
+ const { firstOccurrenceOnly = true, caseInsensitive = true, } = options;
80
+ if (!entities.length) {
81
+ return {
82
+ content,
83
+ linksAdded: 0,
84
+ linkedEntities: [],
85
+ };
86
+ }
87
+ // Build search terms from all entities (names + aliases)
88
+ // Each term maps back to its canonical entity name
89
+ const allSearchTerms = [];
90
+ for (const entity of entities) {
91
+ const terms = getSearchTerms(entity);
92
+ for (const t of terms) {
93
+ if (!shouldExcludeEntity(t.term)) {
94
+ allSearchTerms.push(t);
95
+ }
96
+ }
97
+ }
98
+ // Sort by term length (longest first) to avoid partial matches
99
+ allSearchTerms.sort((a, b) => b.term.length - a.term.length);
100
+ // Get protected zones
101
+ let zones = getProtectedZones(content);
102
+ let result = content;
103
+ let linksAdded = 0;
104
+ const linkedEntities = [];
105
+ if (firstOccurrenceOnly) {
106
+ // For firstOccurrenceOnly mode, we need to find the earliest match across
107
+ // all terms (name + aliases) for each entity, then link that one
108
+ // Also need to handle overlapping matches between different entities
109
+ // First, collect ALL valid matches for each entity (name + aliases combined)
110
+ const entityAllMatches = new Map();
111
+ for (const { term, entityName } of allSearchTerms) {
112
+ const entityKey = entityName.toLowerCase();
113
+ // Find all matches of the search term
114
+ const matches = findEntityMatches(result, term, caseInsensitive);
115
+ // Filter out matches in protected zones
116
+ const validMatches = matches.filter(match => !rangeOverlapsProtectedZone(match.start, match.end, zones));
117
+ if (validMatches.length === 0) {
118
+ continue;
119
+ }
120
+ // Add to entity's matches
121
+ const existingMatches = entityAllMatches.get(entityKey) || [];
122
+ for (const match of validMatches) {
123
+ existingMatches.push({ term, match });
124
+ }
125
+ entityAllMatches.set(entityKey, existingMatches);
126
+ }
127
+ // Sort each entity's matches by position
128
+ for (const [_entityKey, matches] of entityAllMatches.entries()) {
129
+ matches.sort((a, b) => a.match.start - b.match.start);
130
+ }
131
+ // Build final list: for each entity, pick the earliest non-overlapping match
132
+ // Process entities in order of their earliest match length (longest first for same position)
133
+ let allCandidates = [];
134
+ for (const [entityKey, matches] of entityAllMatches.entries()) {
135
+ // Find the original entityName (with correct casing)
136
+ const entityName = allSearchTerms.find(t => t.entityName.toLowerCase() === entityKey)?.entityName || entityKey;
137
+ for (const m of matches) {
138
+ allCandidates.push({ entityName, ...m });
139
+ }
140
+ }
141
+ // Sort by position, then by match length (descending), then by term length (ascending)
142
+ // The term length tiebreaker ensures "API" wins over "API Management" when both match "api"
143
+ allCandidates.sort((a, b) => {
144
+ // Primary: earliest position first
145
+ if (a.match.start !== b.match.start)
146
+ return a.match.start - b.match.start;
147
+ // Secondary: longest matched text first
148
+ if (a.match.matched.length !== b.match.matched.length)
149
+ return b.match.matched.length - a.match.matched.length;
150
+ // Tertiary: shorter entity term first (more exact match)
151
+ return a.term.length - b.term.length;
152
+ });
153
+ // Select non-overlapping matches, preferring longer ones at same position
154
+ // Each entity gets at most one match
155
+ const selectedMatches = [];
156
+ const selectedEntityNames = new Set();
157
+ for (const candidate of allCandidates) {
158
+ const entityKey = candidate.entityName.toLowerCase();
159
+ // Skip if this entity already has a selected match
160
+ if (selectedEntityNames.has(entityKey)) {
161
+ continue;
162
+ }
163
+ // Check if this overlaps with any already selected match
164
+ const overlaps = selectedMatches.some(existing => (candidate.match.start >= existing.match.start && candidate.match.start < existing.match.end) ||
165
+ (candidate.match.end > existing.match.start && candidate.match.end <= existing.match.end) ||
166
+ (candidate.match.start <= existing.match.start && candidate.match.end >= existing.match.end));
167
+ if (!overlaps) {
168
+ selectedMatches.push(candidate);
169
+ selectedEntityNames.add(entityKey);
170
+ }
171
+ }
172
+ // Sort by position from end to start to preserve offsets when inserting
173
+ selectedMatches.sort((a, b) => b.match.start - a.match.start);
174
+ for (const { entityName, term: _term, match } of selectedMatches) {
175
+ // Use display text format when matched text differs from entity name
176
+ const matchedTextLower = match.matched.toLowerCase();
177
+ const entityNameLower = entityName.toLowerCase();
178
+ const wikilink = matchedTextLower === entityNameLower
179
+ ? `[[${entityName}]]`
180
+ : `[[${entityName}|${match.matched}]]`;
181
+ result = result.slice(0, match.start) + wikilink + result.slice(match.end);
182
+ // Update protected zones (shift positions after insertion)
183
+ const shift = wikilink.length - match.matched.length;
184
+ zones = zones.map(zone => ({
185
+ ...zone,
186
+ start: zone.start <= match.start ? zone.start : zone.start + shift,
187
+ end: zone.end <= match.start ? zone.end : zone.end + shift,
188
+ }));
189
+ // Add new wikilink as protected zone
190
+ zones.push({
191
+ start: match.start,
192
+ end: match.start + wikilink.length,
193
+ type: 'wikilink',
194
+ });
195
+ zones.sort((a, b) => a.start - b.start);
196
+ linksAdded++;
197
+ if (!linkedEntities.includes(entityName)) {
198
+ linkedEntities.push(entityName);
199
+ }
200
+ }
201
+ }
202
+ else {
203
+ // For all occurrences mode, process each term
204
+ for (const { term, entityName } of allSearchTerms) {
205
+ // Find all matches of the search term
206
+ const matches = findEntityMatches(result, term, caseInsensitive);
207
+ // Filter out matches in protected zones
208
+ const validMatches = matches.filter(match => !rangeOverlapsProtectedZone(match.start, match.end, zones));
209
+ if (validMatches.length === 0) {
210
+ continue;
211
+ }
212
+ // Process from end to start to preserve positions
213
+ const matchesToProcess = [...validMatches].reverse();
214
+ for (const match of matchesToProcess) {
215
+ // Use display text format when matched text differs from entity name
216
+ const matchedTextLower = match.matched.toLowerCase();
217
+ const entityNameLower = entityName.toLowerCase();
218
+ const wikilink = matchedTextLower === entityNameLower
219
+ ? `[[${entityName}]]`
220
+ : `[[${entityName}|${match.matched}]]`;
221
+ result = result.slice(0, match.start) + wikilink + result.slice(match.end);
222
+ // Update protected zones (shift positions after insertion)
223
+ const shift = wikilink.length - match.matched.length;
224
+ zones = zones.map(zone => ({
225
+ ...zone,
226
+ start: zone.start <= match.start ? zone.start : zone.start + shift,
227
+ end: zone.end <= match.start ? zone.end : zone.end + shift,
228
+ }));
229
+ // Add new wikilink as protected zone
230
+ zones.push({
231
+ start: match.start,
232
+ end: match.start + wikilink.length,
233
+ type: 'wikilink',
234
+ });
235
+ zones.sort((a, b) => a.start - b.start);
236
+ linksAdded++;
237
+ if (!linkedEntities.includes(entityName)) {
238
+ linkedEntities.push(entityName);
239
+ }
240
+ }
241
+ }
242
+ }
243
+ return {
244
+ content: result,
245
+ linksAdded,
246
+ linkedEntities,
247
+ };
248
+ }
249
+ /**
250
+ * Suggest wikilinks without applying them
251
+ * Returns a list of potential links with their positions
252
+ *
253
+ * Supports both entity names and aliases - if content matches an alias,
254
+ * the suggestion will contain the canonical entity name.
255
+ */
256
+ export function suggestWikilinks(content, entities, options = {}) {
257
+ const { firstOccurrenceOnly = true, caseInsensitive = true, } = options;
258
+ const suggestions = [];
259
+ if (!entities.length) {
260
+ return suggestions;
261
+ }
262
+ // Build search terms from all entities (names + aliases)
263
+ // Each term maps back to its canonical entity name
264
+ const allSearchTerms = [];
265
+ for (const entity of entities) {
266
+ const terms = getSearchTerms(entity);
267
+ for (const t of terms) {
268
+ if (!shouldExcludeEntity(t.term)) {
269
+ allSearchTerms.push(t);
270
+ }
271
+ }
272
+ }
273
+ // Sort by term length (longest first) to prioritize longer matches
274
+ allSearchTerms.sort((a, b) => b.term.length - a.term.length);
275
+ // Get protected zones
276
+ const zones = getProtectedZones(content);
277
+ if (firstOccurrenceOnly) {
278
+ // For firstOccurrenceOnly mode, find the earliest match across all terms
279
+ // for each entity, similar to applyWikilinks behavior
280
+ const entityAllMatches = new Map();
281
+ for (const { term, entityName } of allSearchTerms) {
282
+ const entityKey = entityName.toLowerCase();
283
+ const matches = findEntityMatches(content, term, caseInsensitive);
284
+ // Filter out matches in protected zones
285
+ const validMatches = matches.filter(match => !rangeOverlapsProtectedZone(match.start, match.end, zones));
286
+ if (validMatches.length === 0)
287
+ continue;
288
+ // Add to entity's matches
289
+ const existingMatches = entityAllMatches.get(entityKey) || [];
290
+ for (const match of validMatches) {
291
+ existingMatches.push({ match, entityName });
292
+ }
293
+ entityAllMatches.set(entityKey, existingMatches);
294
+ }
295
+ // For each entity, pick the earliest match
296
+ const selectedSuggestions = [];
297
+ for (const [_entityKey, matches] of entityAllMatches.entries()) {
298
+ // Sort by position and pick the earliest
299
+ matches.sort((a, b) => a.match.start - b.match.start);
300
+ const earliest = matches[0];
301
+ const contextStart = Math.max(0, earliest.match.start - 20);
302
+ const contextEnd = Math.min(content.length, earliest.match.end + 20);
303
+ const context = content.slice(contextStart, contextEnd);
304
+ selectedSuggestions.push({
305
+ entity: earliest.entityName,
306
+ start: earliest.match.start,
307
+ end: earliest.match.end,
308
+ context: contextStart > 0 ? '...' + context : context,
309
+ });
310
+ }
311
+ // Sort suggestions by position
312
+ selectedSuggestions.sort((a, b) => a.start - b.start);
313
+ return selectedSuggestions;
314
+ }
315
+ // For all occurrences mode, process each term
316
+ for (const { term, entityName } of allSearchTerms) {
317
+ const matches = findEntityMatches(content, term, caseInsensitive);
318
+ for (const match of matches) {
319
+ // Skip if in protected zone
320
+ if (rangeOverlapsProtectedZone(match.start, match.end, zones)) {
321
+ continue;
322
+ }
323
+ // Extract context (surrounding text)
324
+ const contextStart = Math.max(0, match.start - 20);
325
+ const contextEnd = Math.min(content.length, match.end + 20);
326
+ const context = content.slice(contextStart, contextEnd);
327
+ // Return the canonical entity name, not the matched term
328
+ suggestions.push({
329
+ entity: entityName,
330
+ start: match.start,
331
+ end: match.end,
332
+ context: contextStart > 0 ? '...' + context : context,
333
+ });
334
+ }
335
+ }
336
+ return suggestions;
337
+ }
338
+ /**
339
+ * Resolve wikilinks that target aliases to their canonical entity names
340
+ *
341
+ * When a user types [[model context protocol]], and "Model Context Protocol"
342
+ * is an alias for entity "MCP", this function transforms it to:
343
+ * [[MCP|model context protocol]]
344
+ *
345
+ * This preserves the user's original text as display text while resolving
346
+ * to the canonical entity target.
347
+ *
348
+ * @param content - The markdown content to process
349
+ * @param entities - List of entity names or Entity objects to look for
350
+ * @param options - Resolution options
351
+ * @returns Result with updated content and statistics
352
+ */
353
+ export function resolveAliasWikilinks(content, entities, options = {}) {
354
+ const { caseInsensitive = true } = options;
355
+ if (!entities.length) {
356
+ return {
357
+ content,
358
+ linksAdded: 0,
359
+ linkedEntities: [],
360
+ };
361
+ }
362
+ // Build alias → entity lookup map
363
+ // Key: alias (lowercase if caseInsensitive)
364
+ // Value: { entityName: canonical name, aliasText: original alias casing }
365
+ const aliasMap = new Map();
366
+ for (const entity of entities) {
367
+ if (typeof entity === 'string')
368
+ continue;
369
+ for (const alias of entity.aliases) {
370
+ const key = caseInsensitive ? alias.toLowerCase() : alias;
371
+ aliasMap.set(key, { entityName: entity.name, aliasText: alias });
372
+ }
373
+ // Also map the entity name itself so we can detect if target already points to entity
374
+ const nameKey = caseInsensitive ? entity.name.toLowerCase() : entity.name;
375
+ // Don't overwrite if name happens to be an alias of another entity
376
+ if (!aliasMap.has(nameKey)) {
377
+ aliasMap.set(nameKey, { entityName: entity.name, aliasText: entity.name });
378
+ }
379
+ }
380
+ // Find wikilinks: [[target]] or [[target|display]]
381
+ const wikilinkRegex = /\[\[([^\]|]+)(\|[^\]]+)?\]\]/g;
382
+ let result = content;
383
+ let linksResolved = 0;
384
+ const resolvedEntities = [];
385
+ // Collect all matches first, then process from end to preserve positions
386
+ const matches = [];
387
+ let match;
388
+ while ((match = wikilinkRegex.exec(content)) !== null) {
389
+ matches.push({
390
+ fullMatch: match[0],
391
+ target: match[1],
392
+ displayPart: match[2], // includes | if present
393
+ index: match.index,
394
+ });
395
+ }
396
+ // Process from end to start to preserve positions
397
+ for (let i = matches.length - 1; i >= 0; i--) {
398
+ const { fullMatch, target, displayPart, index } = matches[i];
399
+ const targetKey = caseInsensitive ? target.toLowerCase() : target;
400
+ // Check if target matches an alias
401
+ const aliasInfo = aliasMap.get(targetKey);
402
+ if (!aliasInfo) {
403
+ // Target doesn't match any alias or entity name - leave unchanged
404
+ continue;
405
+ }
406
+ // Check if already pointing to the entity name (no resolution needed)
407
+ const entityNameKey = caseInsensitive ? aliasInfo.entityName.toLowerCase() : aliasInfo.entityName;
408
+ if (targetKey === entityNameKey) {
409
+ // Already pointing to entity name, no change needed
410
+ continue;
411
+ }
412
+ // Target matches an alias! Resolve to canonical entity
413
+ let newWikilink;
414
+ if (displayPart) {
415
+ // Has existing display text: [[alias|display]] → [[Entity|display]]
416
+ newWikilink = `[[${aliasInfo.entityName}${displayPart}]]`;
417
+ }
418
+ else {
419
+ // No display text: [[alias]] → [[Entity|alias]]
420
+ // Preserve the user's original casing of the alias
421
+ newWikilink = `[[${aliasInfo.entityName}|${target}]]`;
422
+ }
423
+ result = result.slice(0, index) + newWikilink + result.slice(index + fullMatch.length);
424
+ linksResolved++;
425
+ if (!resolvedEntities.includes(aliasInfo.entityName)) {
426
+ resolvedEntities.push(aliasInfo.entityName);
427
+ }
428
+ }
429
+ return {
430
+ content: result,
431
+ linksAdded: linksResolved,
432
+ linkedEntities: resolvedEntities,
433
+ };
434
+ }
435
+ /**
436
+ * Default configuration for implicit entity detection
437
+ */
438
+ const DEFAULT_IMPLICIT_CONFIG = {
439
+ detectImplicit: false,
440
+ implicitPatterns: ['proper-nouns', 'quoted-terms'],
441
+ excludePatterns: ['^The ', '^A ', '^An ', '^This ', '^That ', '^These ', '^Those '],
442
+ minEntityLength: 3,
443
+ };
444
+ /**
445
+ * Common words that should not be detected as implicit entities
446
+ */
447
+ const IMPLICIT_EXCLUDE_WORDS = new Set([
448
+ // Days and months (already in EXCLUDE_WORDS but duplicated for safety)
449
+ 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday',
450
+ 'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august',
451
+ 'september', 'october', 'november', 'december',
452
+ // Common sentence starters
453
+ 'this', 'that', 'these', 'those', 'there', 'here', 'when', 'where', 'what',
454
+ 'which', 'while', 'since', 'after', 'before', 'during', 'until', 'because',
455
+ 'however', 'therefore', 'although', 'though', 'unless', 'whether',
456
+ // Common proper-looking words that aren't entities
457
+ 'note', 'notes', 'example', 'chapter', 'section', 'part', 'item', 'figure',
458
+ 'table', 'list', 'step', 'task', 'todo', 'idea', 'thought', 'question',
459
+ 'answer', 'summary', 'overview', 'introduction', 'conclusion',
460
+ // Technical terms that look like proper nouns
461
+ 'true', 'false', 'null', 'undefined', 'none', 'class', 'function', 'method',
462
+ ]);
463
+ /**
464
+ * Words that commonly start sentences but should not start a proper noun entity.
465
+ * These are checked separately because they might appear capitalized at sentence start.
466
+ */
467
+ const SENTENCE_STARTER_WORDS = new Set([
468
+ 'visit', 'also', 'see', 'please', 'note', 'check', 'read', 'look', 'find',
469
+ 'get', 'set', 'add', 'use', 'try', 'make', 'take', 'give', 'keep', 'let',
470
+ 'call', 'run', 'ask', 'tell', 'show', 'help', 'need', 'want', 'like',
471
+ 'think', 'know', 'feel', 'seem', 'look', 'hear', 'watch', 'wait', 'work',
472
+ 'start', 'stop', 'open', 'close', 'move', 'turn', 'bring', 'send', 'leave',
473
+ 'meet', 'join', 'follow', 'include', 'consider', 'remember', 'forget',
474
+ ]);
475
+ /**
476
+ * Detect implicit entities in content using pattern matching
477
+ *
478
+ * This finds potential entities that don't have existing files:
479
+ * - Multi-word proper nouns (e.g., "Marcus Johnson", "Project Alpha")
480
+ * - Single capitalized words after lowercase (e.g., "discussed with Marcus")
481
+ * - Quoted terms (e.g., "Turbopump" becomes [[Turbopump]])
482
+ *
483
+ * @param content - The markdown content to analyze
484
+ * @param config - Configuration for detection patterns
485
+ * @returns Array of detected implicit entity matches
486
+ */
487
+ export function detectImplicitEntities(content, config = {}) {
488
+ const { implicitPatterns = DEFAULT_IMPLICIT_CONFIG.implicitPatterns, excludePatterns = DEFAULT_IMPLICIT_CONFIG.excludePatterns, minEntityLength = DEFAULT_IMPLICIT_CONFIG.minEntityLength, } = config;
489
+ const detected = [];
490
+ const seenTexts = new Set();
491
+ // Get protected zones to avoid detecting entities in code/links/etc.
492
+ const zones = getProtectedZones(content);
493
+ // Build exclude regex from patterns
494
+ const excludeRegexes = excludePatterns.map(p => new RegExp(p, 'i'));
495
+ /**
496
+ * Check if detected text should be excluded
497
+ */
498
+ function shouldExclude(text) {
499
+ // Length check
500
+ if (text.length < minEntityLength)
501
+ return true;
502
+ // Common words
503
+ if (IMPLICIT_EXCLUDE_WORDS.has(text.toLowerCase()))
504
+ return true;
505
+ // Exclude patterns
506
+ for (const regex of excludeRegexes) {
507
+ if (regex.test(text))
508
+ return true;
509
+ }
510
+ // Already seen (dedup)
511
+ const normalized = text.toLowerCase();
512
+ if (seenTexts.has(normalized))
513
+ return true;
514
+ return false;
515
+ }
516
+ /**
517
+ * Check if match is in a protected zone
518
+ */
519
+ function isProtected(start, end) {
520
+ return rangeOverlapsProtectedZone(start, end, zones);
521
+ }
522
+ // Pattern 1: Multi-word proper nouns
523
+ // Matches "Marcus Johnson", "Project Alpha", "San Francisco Bay Area"
524
+ if (implicitPatterns.includes('proper-nouns')) {
525
+ const properNounRegex = /\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b/g;
526
+ let match;
527
+ while ((match = properNounRegex.exec(content)) !== null) {
528
+ let text = match[1];
529
+ let start = match.index;
530
+ let end = start + match[0].length;
531
+ // Check if first word is a common sentence starter (e.g., "Visit", "Also", "See")
532
+ // If so, trim it and use the remaining words as the entity
533
+ const firstSpaceIndex = text.indexOf(' ');
534
+ if (firstSpaceIndex > 0) {
535
+ const firstWord = text.substring(0, firstSpaceIndex).toLowerCase();
536
+ if (SENTENCE_STARTER_WORDS.has(firstWord)) {
537
+ // Trim the first word and recalculate positions
538
+ text = text.substring(firstSpaceIndex + 1);
539
+ start = start + firstSpaceIndex + 1;
540
+ // Only keep if remaining text has 2+ words (still a proper noun phrase)
541
+ if (!text.includes(' ')) {
542
+ continue; // Skip single-word remainder
543
+ }
544
+ }
545
+ }
546
+ if (!shouldExclude(text) && !isProtected(start, end)) {
547
+ detected.push({ text, start, end, pattern: 'proper-nouns' });
548
+ seenTexts.add(text.toLowerCase());
549
+ }
550
+ }
551
+ }
552
+ // Pattern 2: Single capitalized words after lowercase
553
+ // Matches "discussed with Marcus yesterday" -> "Marcus"
554
+ if (implicitPatterns.includes('single-caps')) {
555
+ // Lookbehind for lowercase letter + space
556
+ const singleCapRegex = /(?<=[a-z]\s)([A-Z][a-z]{3,})\b/g;
557
+ let match;
558
+ while ((match = singleCapRegex.exec(content)) !== null) {
559
+ const text = match[1];
560
+ const start = match.index;
561
+ const end = start + match[0].length;
562
+ if (!shouldExclude(text) && !isProtected(start, end)) {
563
+ detected.push({ text, start, end, pattern: 'single-caps' });
564
+ seenTexts.add(text.toLowerCase());
565
+ }
566
+ }
567
+ }
568
+ // Pattern 3: Quoted terms (explicit entity markers)
569
+ // Matches "Turbopump" -> [[Turbopump]]
570
+ if (implicitPatterns.includes('quoted-terms')) {
571
+ const quotedRegex = /"([^"]{3,30})"/g;
572
+ let match;
573
+ while ((match = quotedRegex.exec(content)) !== null) {
574
+ const text = match[1];
575
+ // Include the quotes in the position for replacement
576
+ const start = match.index;
577
+ const end = start + match[0].length;
578
+ if (!shouldExclude(text) && !isProtected(start, end)) {
579
+ detected.push({ text, start, end, pattern: 'quoted-terms' });
580
+ seenTexts.add(text.toLowerCase());
581
+ }
582
+ }
583
+ }
584
+ // Sort by position
585
+ detected.sort((a, b) => a.start - b.start);
586
+ return detected;
587
+ }
588
+ /**
589
+ * Process wikilinks with support for both existing entities and implicit detection
590
+ *
591
+ * This is the main entry point that combines:
592
+ * 1. applyWikilinks() for known entities from the vault index
593
+ * 2. detectImplicitEntities() for pattern-based detection
594
+ *
595
+ * @param content - The markdown content to process
596
+ * @param entities - List of known entity names or Entity objects
597
+ * @param options - Extended options including implicit entity config
598
+ * @returns Result with updated content and statistics
599
+ */
600
+ export function processWikilinks(content, entities, options = {}) {
601
+ const { detectImplicit = false, implicitPatterns, excludePatterns, minEntityLength, notePath, ...wikilinkOptions } = options;
602
+ // Step 1: Apply wikilinks for known entities
603
+ const result = applyWikilinks(content, entities, wikilinkOptions);
604
+ // If implicit detection is disabled, return the basic result
605
+ if (!detectImplicit) {
606
+ return result;
607
+ }
608
+ // Step 2: Detect implicit entities in the already-processed content
609
+ const implicitMatches = detectImplicitEntities(result.content, {
610
+ detectImplicit: true,
611
+ implicitPatterns,
612
+ excludePatterns,
613
+ minEntityLength,
614
+ });
615
+ if (implicitMatches.length === 0) {
616
+ return result;
617
+ }
618
+ // Step 3: Build set of already-linked entities (case-insensitive)
619
+ const alreadyLinked = new Set(result.linkedEntities.map(e => e.toLowerCase()));
620
+ // Also add all known entity names to avoid duplicate linking
621
+ for (const entity of entities) {
622
+ const name = typeof entity === 'string' ? entity : entity.name;
623
+ alreadyLinked.add(name.toLowerCase());
624
+ }
625
+ // Get current note name if provided (to avoid self-links)
626
+ const currentNoteName = notePath
627
+ ? notePath.replace(/\.md$/, '').split('/').pop()?.toLowerCase()
628
+ : null;
629
+ // Step 4: Filter implicit matches that don't conflict with existing links
630
+ const newImplicitMatches = implicitMatches.filter(match => {
631
+ const normalized = match.text.toLowerCase();
632
+ // Skip if already linked as known entity
633
+ if (alreadyLinked.has(normalized))
634
+ return false;
635
+ // Skip self-links
636
+ if (currentNoteName && normalized === currentNoteName)
637
+ return false;
638
+ return true;
639
+ });
640
+ if (newImplicitMatches.length === 0) {
641
+ return result;
642
+ }
643
+ // Step 5: Apply implicit wikilinks (process from end to preserve positions)
644
+ let processedContent = result.content;
645
+ const implicitEntities = [];
646
+ // Process from end to start
647
+ for (let i = newImplicitMatches.length - 1; i >= 0; i--) {
648
+ const match = newImplicitMatches[i];
649
+ // For quoted terms, we replace "Term" with [[Term]]
650
+ // For other patterns, we replace Term with [[Term]]
651
+ let wikilink;
652
+ let replaceStart;
653
+ let replaceEnd;
654
+ if (match.pattern === 'quoted-terms') {
655
+ // Replace "Term" with [[Term]] (remove quotes)
656
+ wikilink = `[[${match.text}]]`;
657
+ replaceStart = match.start;
658
+ replaceEnd = match.end;
659
+ }
660
+ else {
661
+ // Replace Term with [[Term]]
662
+ wikilink = `[[${match.text}]]`;
663
+ replaceStart = match.start;
664
+ replaceEnd = match.end;
665
+ }
666
+ processedContent =
667
+ processedContent.slice(0, replaceStart) +
668
+ wikilink +
669
+ processedContent.slice(replaceEnd);
670
+ if (!implicitEntities.includes(match.text)) {
671
+ implicitEntities.push(match.text);
672
+ }
673
+ }
674
+ return {
675
+ content: processedContent,
676
+ linksAdded: result.linksAdded + newImplicitMatches.length,
677
+ linkedEntities: result.linkedEntities,
678
+ implicitEntities,
679
+ };
680
+ }
681
+ //# sourceMappingURL=wikilinks.js.map