@velvetmonkey/vault-core 2.0.30 → 2.0.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/entities.d.ts +50 -0
- package/dist/src/entities.d.ts.map +1 -0
- package/dist/src/entities.js +499 -0
- package/dist/src/entities.js.map +1 -0
- package/dist/src/index.d.ts +15 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +23 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/logging/index.d.ts +7 -0
- package/dist/src/logging/index.d.ts.map +1 -0
- package/dist/src/logging/index.js +7 -0
- package/dist/src/logging/index.js.map +1 -0
- package/dist/src/logging/operationLogger.d.ts +59 -0
- package/dist/src/logging/operationLogger.d.ts.map +1 -0
- package/dist/src/logging/operationLogger.js +282 -0
- package/dist/src/logging/operationLogger.js.map +1 -0
- package/dist/src/logging/sessionManager.d.ts +35 -0
- package/dist/src/logging/sessionManager.d.ts.map +1 -0
- package/dist/src/logging/sessionManager.js +68 -0
- package/dist/src/logging/sessionManager.js.map +1 -0
- package/dist/src/logging/types.d.ts +123 -0
- package/dist/src/logging/types.d.ts.map +1 -0
- package/dist/src/logging/types.js +23 -0
- package/dist/src/logging/types.js.map +1 -0
- package/dist/src/protectedZones.d.ts +36 -0
- package/dist/src/protectedZones.d.ts.map +1 -0
- package/dist/src/protectedZones.js +114 -0
- package/dist/src/protectedZones.js.map +1 -0
- package/dist/src/sqlite.d.ts +273 -0
- package/dist/src/sqlite.d.ts.map +1 -0
- package/dist/src/sqlite.js +959 -0
- package/dist/src/sqlite.js.map +1 -0
- package/dist/src/types.d.ts +171 -0
- package/dist/src/types.d.ts.map +1 -0
- package/dist/src/types.js +5 -0
- package/dist/src/types.js.map +1 -0
- package/dist/src/wikilinks.d.ts +76 -0
- package/dist/src/wikilinks.d.ts.map +1 -0
- package/dist/src/wikilinks.js +681 -0
- package/dist/src/wikilinks.js.map +1 -0
- package/package.json +2 -2
|
@@ -0,0 +1,681 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wikilink application logic
|
|
3
|
+
*
|
|
4
|
+
* Applies [[wikilinks]] to known entities in content while
|
|
5
|
+
* respecting protected zones (code, frontmatter, existing links, etc.)
|
|
6
|
+
*
|
|
7
|
+
* Also supports:
|
|
8
|
+
* - Pattern-based detection for implicit entities (proper nouns, quoted terms)
|
|
9
|
+
* - Alias resolution for existing wikilinks (resolves [[alias]] to [[Entity|alias]])
|
|
10
|
+
*/
|
|
11
|
+
import { getProtectedZones, rangeOverlapsProtectedZone } from './protectedZones.js';
|
|
12
|
+
/**
|
|
13
|
+
* Get all search terms for an entity (name + aliases)
|
|
14
|
+
* Returns tuples of [searchTerm, entityName] for proper linking
|
|
15
|
+
*/
|
|
16
|
+
function getSearchTerms(entity) {
|
|
17
|
+
if (typeof entity === 'string') {
|
|
18
|
+
return [{ term: entity, entityName: entity }];
|
|
19
|
+
}
|
|
20
|
+
// Include the entity name and all aliases
|
|
21
|
+
const terms = [
|
|
22
|
+
{ term: entity.name, entityName: entity.name }
|
|
23
|
+
];
|
|
24
|
+
for (const alias of entity.aliases) {
|
|
25
|
+
terms.push({ term: alias, entityName: entity.name });
|
|
26
|
+
}
|
|
27
|
+
return terms;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Common words to exclude from wikilink suggestions
|
|
31
|
+
*/
|
|
32
|
+
const EXCLUDE_WORDS = new Set([
|
|
33
|
+
'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday',
|
|
34
|
+
'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august',
|
|
35
|
+
'september', 'october', 'november', 'december',
|
|
36
|
+
'today', 'tomorrow', 'yesterday', 'week', 'month', 'year',
|
|
37
|
+
'the', 'and', 'for', 'with', 'from', 'this', 'that',
|
|
38
|
+
'christmas', 'holiday', 'break',
|
|
39
|
+
]);
|
|
40
|
+
/**
|
|
41
|
+
* Escape special regex characters in a string
|
|
42
|
+
*/
|
|
43
|
+
function escapeRegex(str) {
|
|
44
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Check if an entity should be excluded from wikilikning
|
|
48
|
+
*/
|
|
49
|
+
function shouldExcludeEntity(entity) {
|
|
50
|
+
return EXCLUDE_WORDS.has(entity.toLowerCase());
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Find all matches of an entity in content with word boundaries
|
|
54
|
+
*/
|
|
55
|
+
function findEntityMatches(content, entity, caseInsensitive) {
|
|
56
|
+
const pattern = `\\b${escapeRegex(entity)}\\b`;
|
|
57
|
+
const flags = caseInsensitive ? 'gi' : 'g';
|
|
58
|
+
const regex = new RegExp(pattern, flags);
|
|
59
|
+
const matches = [];
|
|
60
|
+
let match;
|
|
61
|
+
while ((match = regex.exec(content)) !== null) {
|
|
62
|
+
matches.push({
|
|
63
|
+
start: match.index,
|
|
64
|
+
end: match.index + match[0].length,
|
|
65
|
+
matched: match[0],
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
return matches;
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Apply wikilinks to entities in content
|
|
72
|
+
*
|
|
73
|
+
* @param content - The markdown content to process
|
|
74
|
+
* @param entities - List of entity names or Entity objects to look for
|
|
75
|
+
* @param options - Wikilink options
|
|
76
|
+
* @returns Result with updated content and statistics
|
|
77
|
+
*/
|
|
78
|
+
export function applyWikilinks(content, entities, options = {}) {
|
|
79
|
+
const { firstOccurrenceOnly = true, caseInsensitive = true, } = options;
|
|
80
|
+
if (!entities.length) {
|
|
81
|
+
return {
|
|
82
|
+
content,
|
|
83
|
+
linksAdded: 0,
|
|
84
|
+
linkedEntities: [],
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
// Build search terms from all entities (names + aliases)
|
|
88
|
+
// Each term maps back to its canonical entity name
|
|
89
|
+
const allSearchTerms = [];
|
|
90
|
+
for (const entity of entities) {
|
|
91
|
+
const terms = getSearchTerms(entity);
|
|
92
|
+
for (const t of terms) {
|
|
93
|
+
if (!shouldExcludeEntity(t.term)) {
|
|
94
|
+
allSearchTerms.push(t);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
// Sort by term length (longest first) to avoid partial matches
|
|
99
|
+
allSearchTerms.sort((a, b) => b.term.length - a.term.length);
|
|
100
|
+
// Get protected zones
|
|
101
|
+
let zones = getProtectedZones(content);
|
|
102
|
+
let result = content;
|
|
103
|
+
let linksAdded = 0;
|
|
104
|
+
const linkedEntities = [];
|
|
105
|
+
if (firstOccurrenceOnly) {
|
|
106
|
+
// For firstOccurrenceOnly mode, we need to find the earliest match across
|
|
107
|
+
// all terms (name + aliases) for each entity, then link that one
|
|
108
|
+
// Also need to handle overlapping matches between different entities
|
|
109
|
+
// First, collect ALL valid matches for each entity (name + aliases combined)
|
|
110
|
+
const entityAllMatches = new Map();
|
|
111
|
+
for (const { term, entityName } of allSearchTerms) {
|
|
112
|
+
const entityKey = entityName.toLowerCase();
|
|
113
|
+
// Find all matches of the search term
|
|
114
|
+
const matches = findEntityMatches(result, term, caseInsensitive);
|
|
115
|
+
// Filter out matches in protected zones
|
|
116
|
+
const validMatches = matches.filter(match => !rangeOverlapsProtectedZone(match.start, match.end, zones));
|
|
117
|
+
if (validMatches.length === 0) {
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
// Add to entity's matches
|
|
121
|
+
const existingMatches = entityAllMatches.get(entityKey) || [];
|
|
122
|
+
for (const match of validMatches) {
|
|
123
|
+
existingMatches.push({ term, match });
|
|
124
|
+
}
|
|
125
|
+
entityAllMatches.set(entityKey, existingMatches);
|
|
126
|
+
}
|
|
127
|
+
// Sort each entity's matches by position
|
|
128
|
+
for (const [_entityKey, matches] of entityAllMatches.entries()) {
|
|
129
|
+
matches.sort((a, b) => a.match.start - b.match.start);
|
|
130
|
+
}
|
|
131
|
+
// Build final list: for each entity, pick the earliest non-overlapping match
|
|
132
|
+
// Process entities in order of their earliest match length (longest first for same position)
|
|
133
|
+
let allCandidates = [];
|
|
134
|
+
for (const [entityKey, matches] of entityAllMatches.entries()) {
|
|
135
|
+
// Find the original entityName (with correct casing)
|
|
136
|
+
const entityName = allSearchTerms.find(t => t.entityName.toLowerCase() === entityKey)?.entityName || entityKey;
|
|
137
|
+
for (const m of matches) {
|
|
138
|
+
allCandidates.push({ entityName, ...m });
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
// Sort by position, then by match length (descending), then by term length (ascending)
|
|
142
|
+
// The term length tiebreaker ensures "API" wins over "API Management" when both match "api"
|
|
143
|
+
allCandidates.sort((a, b) => {
|
|
144
|
+
// Primary: earliest position first
|
|
145
|
+
if (a.match.start !== b.match.start)
|
|
146
|
+
return a.match.start - b.match.start;
|
|
147
|
+
// Secondary: longest matched text first
|
|
148
|
+
if (a.match.matched.length !== b.match.matched.length)
|
|
149
|
+
return b.match.matched.length - a.match.matched.length;
|
|
150
|
+
// Tertiary: shorter entity term first (more exact match)
|
|
151
|
+
return a.term.length - b.term.length;
|
|
152
|
+
});
|
|
153
|
+
// Select non-overlapping matches, preferring longer ones at same position
|
|
154
|
+
// Each entity gets at most one match
|
|
155
|
+
const selectedMatches = [];
|
|
156
|
+
const selectedEntityNames = new Set();
|
|
157
|
+
for (const candidate of allCandidates) {
|
|
158
|
+
const entityKey = candidate.entityName.toLowerCase();
|
|
159
|
+
// Skip if this entity already has a selected match
|
|
160
|
+
if (selectedEntityNames.has(entityKey)) {
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
163
|
+
// Check if this overlaps with any already selected match
|
|
164
|
+
const overlaps = selectedMatches.some(existing => (candidate.match.start >= existing.match.start && candidate.match.start < existing.match.end) ||
|
|
165
|
+
(candidate.match.end > existing.match.start && candidate.match.end <= existing.match.end) ||
|
|
166
|
+
(candidate.match.start <= existing.match.start && candidate.match.end >= existing.match.end));
|
|
167
|
+
if (!overlaps) {
|
|
168
|
+
selectedMatches.push(candidate);
|
|
169
|
+
selectedEntityNames.add(entityKey);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
// Sort by position from end to start to preserve offsets when inserting
|
|
173
|
+
selectedMatches.sort((a, b) => b.match.start - a.match.start);
|
|
174
|
+
for (const { entityName, term: _term, match } of selectedMatches) {
|
|
175
|
+
// Use display text format when matched text differs from entity name
|
|
176
|
+
const matchedTextLower = match.matched.toLowerCase();
|
|
177
|
+
const entityNameLower = entityName.toLowerCase();
|
|
178
|
+
const wikilink = matchedTextLower === entityNameLower
|
|
179
|
+
? `[[${entityName}]]`
|
|
180
|
+
: `[[${entityName}|${match.matched}]]`;
|
|
181
|
+
result = result.slice(0, match.start) + wikilink + result.slice(match.end);
|
|
182
|
+
// Update protected zones (shift positions after insertion)
|
|
183
|
+
const shift = wikilink.length - match.matched.length;
|
|
184
|
+
zones = zones.map(zone => ({
|
|
185
|
+
...zone,
|
|
186
|
+
start: zone.start <= match.start ? zone.start : zone.start + shift,
|
|
187
|
+
end: zone.end <= match.start ? zone.end : zone.end + shift,
|
|
188
|
+
}));
|
|
189
|
+
// Add new wikilink as protected zone
|
|
190
|
+
zones.push({
|
|
191
|
+
start: match.start,
|
|
192
|
+
end: match.start + wikilink.length,
|
|
193
|
+
type: 'wikilink',
|
|
194
|
+
});
|
|
195
|
+
zones.sort((a, b) => a.start - b.start);
|
|
196
|
+
linksAdded++;
|
|
197
|
+
if (!linkedEntities.includes(entityName)) {
|
|
198
|
+
linkedEntities.push(entityName);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
else {
|
|
203
|
+
// For all occurrences mode, process each term
|
|
204
|
+
for (const { term, entityName } of allSearchTerms) {
|
|
205
|
+
// Find all matches of the search term
|
|
206
|
+
const matches = findEntityMatches(result, term, caseInsensitive);
|
|
207
|
+
// Filter out matches in protected zones
|
|
208
|
+
const validMatches = matches.filter(match => !rangeOverlapsProtectedZone(match.start, match.end, zones));
|
|
209
|
+
if (validMatches.length === 0) {
|
|
210
|
+
continue;
|
|
211
|
+
}
|
|
212
|
+
// Process from end to start to preserve positions
|
|
213
|
+
const matchesToProcess = [...validMatches].reverse();
|
|
214
|
+
for (const match of matchesToProcess) {
|
|
215
|
+
// Use display text format when matched text differs from entity name
|
|
216
|
+
const matchedTextLower = match.matched.toLowerCase();
|
|
217
|
+
const entityNameLower = entityName.toLowerCase();
|
|
218
|
+
const wikilink = matchedTextLower === entityNameLower
|
|
219
|
+
? `[[${entityName}]]`
|
|
220
|
+
: `[[${entityName}|${match.matched}]]`;
|
|
221
|
+
result = result.slice(0, match.start) + wikilink + result.slice(match.end);
|
|
222
|
+
// Update protected zones (shift positions after insertion)
|
|
223
|
+
const shift = wikilink.length - match.matched.length;
|
|
224
|
+
zones = zones.map(zone => ({
|
|
225
|
+
...zone,
|
|
226
|
+
start: zone.start <= match.start ? zone.start : zone.start + shift,
|
|
227
|
+
end: zone.end <= match.start ? zone.end : zone.end + shift,
|
|
228
|
+
}));
|
|
229
|
+
// Add new wikilink as protected zone
|
|
230
|
+
zones.push({
|
|
231
|
+
start: match.start,
|
|
232
|
+
end: match.start + wikilink.length,
|
|
233
|
+
type: 'wikilink',
|
|
234
|
+
});
|
|
235
|
+
zones.sort((a, b) => a.start - b.start);
|
|
236
|
+
linksAdded++;
|
|
237
|
+
if (!linkedEntities.includes(entityName)) {
|
|
238
|
+
linkedEntities.push(entityName);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
return {
|
|
244
|
+
content: result,
|
|
245
|
+
linksAdded,
|
|
246
|
+
linkedEntities,
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
/**
|
|
250
|
+
* Suggest wikilinks without applying them
|
|
251
|
+
* Returns a list of potential links with their positions
|
|
252
|
+
*
|
|
253
|
+
* Supports both entity names and aliases - if content matches an alias,
|
|
254
|
+
* the suggestion will contain the canonical entity name.
|
|
255
|
+
*/
|
|
256
|
+
export function suggestWikilinks(content, entities, options = {}) {
|
|
257
|
+
const { firstOccurrenceOnly = true, caseInsensitive = true, } = options;
|
|
258
|
+
const suggestions = [];
|
|
259
|
+
if (!entities.length) {
|
|
260
|
+
return suggestions;
|
|
261
|
+
}
|
|
262
|
+
// Build search terms from all entities (names + aliases)
|
|
263
|
+
// Each term maps back to its canonical entity name
|
|
264
|
+
const allSearchTerms = [];
|
|
265
|
+
for (const entity of entities) {
|
|
266
|
+
const terms = getSearchTerms(entity);
|
|
267
|
+
for (const t of terms) {
|
|
268
|
+
if (!shouldExcludeEntity(t.term)) {
|
|
269
|
+
allSearchTerms.push(t);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
// Sort by term length (longest first) to prioritize longer matches
|
|
274
|
+
allSearchTerms.sort((a, b) => b.term.length - a.term.length);
|
|
275
|
+
// Get protected zones
|
|
276
|
+
const zones = getProtectedZones(content);
|
|
277
|
+
if (firstOccurrenceOnly) {
|
|
278
|
+
// For firstOccurrenceOnly mode, find the earliest match across all terms
|
|
279
|
+
// for each entity, similar to applyWikilinks behavior
|
|
280
|
+
const entityAllMatches = new Map();
|
|
281
|
+
for (const { term, entityName } of allSearchTerms) {
|
|
282
|
+
const entityKey = entityName.toLowerCase();
|
|
283
|
+
const matches = findEntityMatches(content, term, caseInsensitive);
|
|
284
|
+
// Filter out matches in protected zones
|
|
285
|
+
const validMatches = matches.filter(match => !rangeOverlapsProtectedZone(match.start, match.end, zones));
|
|
286
|
+
if (validMatches.length === 0)
|
|
287
|
+
continue;
|
|
288
|
+
// Add to entity's matches
|
|
289
|
+
const existingMatches = entityAllMatches.get(entityKey) || [];
|
|
290
|
+
for (const match of validMatches) {
|
|
291
|
+
existingMatches.push({ match, entityName });
|
|
292
|
+
}
|
|
293
|
+
entityAllMatches.set(entityKey, existingMatches);
|
|
294
|
+
}
|
|
295
|
+
// For each entity, pick the earliest match
|
|
296
|
+
const selectedSuggestions = [];
|
|
297
|
+
for (const [_entityKey, matches] of entityAllMatches.entries()) {
|
|
298
|
+
// Sort by position and pick the earliest
|
|
299
|
+
matches.sort((a, b) => a.match.start - b.match.start);
|
|
300
|
+
const earliest = matches[0];
|
|
301
|
+
const contextStart = Math.max(0, earliest.match.start - 20);
|
|
302
|
+
const contextEnd = Math.min(content.length, earliest.match.end + 20);
|
|
303
|
+
const context = content.slice(contextStart, contextEnd);
|
|
304
|
+
selectedSuggestions.push({
|
|
305
|
+
entity: earliest.entityName,
|
|
306
|
+
start: earliest.match.start,
|
|
307
|
+
end: earliest.match.end,
|
|
308
|
+
context: contextStart > 0 ? '...' + context : context,
|
|
309
|
+
});
|
|
310
|
+
}
|
|
311
|
+
// Sort suggestions by position
|
|
312
|
+
selectedSuggestions.sort((a, b) => a.start - b.start);
|
|
313
|
+
return selectedSuggestions;
|
|
314
|
+
}
|
|
315
|
+
// For all occurrences mode, process each term
|
|
316
|
+
for (const { term, entityName } of allSearchTerms) {
|
|
317
|
+
const matches = findEntityMatches(content, term, caseInsensitive);
|
|
318
|
+
for (const match of matches) {
|
|
319
|
+
// Skip if in protected zone
|
|
320
|
+
if (rangeOverlapsProtectedZone(match.start, match.end, zones)) {
|
|
321
|
+
continue;
|
|
322
|
+
}
|
|
323
|
+
// Extract context (surrounding text)
|
|
324
|
+
const contextStart = Math.max(0, match.start - 20);
|
|
325
|
+
const contextEnd = Math.min(content.length, match.end + 20);
|
|
326
|
+
const context = content.slice(contextStart, contextEnd);
|
|
327
|
+
// Return the canonical entity name, not the matched term
|
|
328
|
+
suggestions.push({
|
|
329
|
+
entity: entityName,
|
|
330
|
+
start: match.start,
|
|
331
|
+
end: match.end,
|
|
332
|
+
context: contextStart > 0 ? '...' + context : context,
|
|
333
|
+
});
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
return suggestions;
|
|
337
|
+
}
|
|
338
|
+
/**
|
|
339
|
+
* Resolve wikilinks that target aliases to their canonical entity names
|
|
340
|
+
*
|
|
341
|
+
* When a user types [[model context protocol]], and "Model Context Protocol"
|
|
342
|
+
* is an alias for entity "MCP", this function transforms it to:
|
|
343
|
+
* [[MCP|model context protocol]]
|
|
344
|
+
*
|
|
345
|
+
* This preserves the user's original text as display text while resolving
|
|
346
|
+
* to the canonical entity target.
|
|
347
|
+
*
|
|
348
|
+
* @param content - The markdown content to process
|
|
349
|
+
* @param entities - List of entity names or Entity objects to look for
|
|
350
|
+
* @param options - Resolution options
|
|
351
|
+
* @returns Result with updated content and statistics
|
|
352
|
+
*/
|
|
353
|
+
export function resolveAliasWikilinks(content, entities, options = {}) {
|
|
354
|
+
const { caseInsensitive = true } = options;
|
|
355
|
+
if (!entities.length) {
|
|
356
|
+
return {
|
|
357
|
+
content,
|
|
358
|
+
linksAdded: 0,
|
|
359
|
+
linkedEntities: [],
|
|
360
|
+
};
|
|
361
|
+
}
|
|
362
|
+
// Build alias → entity lookup map
|
|
363
|
+
// Key: alias (lowercase if caseInsensitive)
|
|
364
|
+
// Value: { entityName: canonical name, aliasText: original alias casing }
|
|
365
|
+
const aliasMap = new Map();
|
|
366
|
+
for (const entity of entities) {
|
|
367
|
+
if (typeof entity === 'string')
|
|
368
|
+
continue;
|
|
369
|
+
for (const alias of entity.aliases) {
|
|
370
|
+
const key = caseInsensitive ? alias.toLowerCase() : alias;
|
|
371
|
+
aliasMap.set(key, { entityName: entity.name, aliasText: alias });
|
|
372
|
+
}
|
|
373
|
+
// Also map the entity name itself so we can detect if target already points to entity
|
|
374
|
+
const nameKey = caseInsensitive ? entity.name.toLowerCase() : entity.name;
|
|
375
|
+
// Don't overwrite if name happens to be an alias of another entity
|
|
376
|
+
if (!aliasMap.has(nameKey)) {
|
|
377
|
+
aliasMap.set(nameKey, { entityName: entity.name, aliasText: entity.name });
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
// Find wikilinks: [[target]] or [[target|display]]
|
|
381
|
+
const wikilinkRegex = /\[\[([^\]|]+)(\|[^\]]+)?\]\]/g;
|
|
382
|
+
let result = content;
|
|
383
|
+
let linksResolved = 0;
|
|
384
|
+
const resolvedEntities = [];
|
|
385
|
+
// Collect all matches first, then process from end to preserve positions
|
|
386
|
+
const matches = [];
|
|
387
|
+
let match;
|
|
388
|
+
while ((match = wikilinkRegex.exec(content)) !== null) {
|
|
389
|
+
matches.push({
|
|
390
|
+
fullMatch: match[0],
|
|
391
|
+
target: match[1],
|
|
392
|
+
displayPart: match[2], // includes | if present
|
|
393
|
+
index: match.index,
|
|
394
|
+
});
|
|
395
|
+
}
|
|
396
|
+
// Process from end to start to preserve positions
|
|
397
|
+
for (let i = matches.length - 1; i >= 0; i--) {
|
|
398
|
+
const { fullMatch, target, displayPart, index } = matches[i];
|
|
399
|
+
const targetKey = caseInsensitive ? target.toLowerCase() : target;
|
|
400
|
+
// Check if target matches an alias
|
|
401
|
+
const aliasInfo = aliasMap.get(targetKey);
|
|
402
|
+
if (!aliasInfo) {
|
|
403
|
+
// Target doesn't match any alias or entity name - leave unchanged
|
|
404
|
+
continue;
|
|
405
|
+
}
|
|
406
|
+
// Check if already pointing to the entity name (no resolution needed)
|
|
407
|
+
const entityNameKey = caseInsensitive ? aliasInfo.entityName.toLowerCase() : aliasInfo.entityName;
|
|
408
|
+
if (targetKey === entityNameKey) {
|
|
409
|
+
// Already pointing to entity name, no change needed
|
|
410
|
+
continue;
|
|
411
|
+
}
|
|
412
|
+
// Target matches an alias! Resolve to canonical entity
|
|
413
|
+
let newWikilink;
|
|
414
|
+
if (displayPart) {
|
|
415
|
+
// Has existing display text: [[alias|display]] → [[Entity|display]]
|
|
416
|
+
newWikilink = `[[${aliasInfo.entityName}${displayPart}]]`;
|
|
417
|
+
}
|
|
418
|
+
else {
|
|
419
|
+
// No display text: [[alias]] → [[Entity|alias]]
|
|
420
|
+
// Preserve the user's original casing of the alias
|
|
421
|
+
newWikilink = `[[${aliasInfo.entityName}|${target}]]`;
|
|
422
|
+
}
|
|
423
|
+
result = result.slice(0, index) + newWikilink + result.slice(index + fullMatch.length);
|
|
424
|
+
linksResolved++;
|
|
425
|
+
if (!resolvedEntities.includes(aliasInfo.entityName)) {
|
|
426
|
+
resolvedEntities.push(aliasInfo.entityName);
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
return {
|
|
430
|
+
content: result,
|
|
431
|
+
linksAdded: linksResolved,
|
|
432
|
+
linkedEntities: resolvedEntities,
|
|
433
|
+
};
|
|
434
|
+
}
|
|
435
|
+
/**
|
|
436
|
+
* Default configuration for implicit entity detection
|
|
437
|
+
*/
|
|
438
|
+
const DEFAULT_IMPLICIT_CONFIG = {
|
|
439
|
+
detectImplicit: false,
|
|
440
|
+
implicitPatterns: ['proper-nouns', 'quoted-terms'],
|
|
441
|
+
excludePatterns: ['^The ', '^A ', '^An ', '^This ', '^That ', '^These ', '^Those '],
|
|
442
|
+
minEntityLength: 3,
|
|
443
|
+
};
|
|
444
|
+
/**
|
|
445
|
+
* Common words that should not be detected as implicit entities
|
|
446
|
+
*/
|
|
447
|
+
const IMPLICIT_EXCLUDE_WORDS = new Set([
|
|
448
|
+
// Days and months (already in EXCLUDE_WORDS but duplicated for safety)
|
|
449
|
+
'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday',
|
|
450
|
+
'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august',
|
|
451
|
+
'september', 'october', 'november', 'december',
|
|
452
|
+
// Common sentence starters
|
|
453
|
+
'this', 'that', 'these', 'those', 'there', 'here', 'when', 'where', 'what',
|
|
454
|
+
'which', 'while', 'since', 'after', 'before', 'during', 'until', 'because',
|
|
455
|
+
'however', 'therefore', 'although', 'though', 'unless', 'whether',
|
|
456
|
+
// Common proper-looking words that aren't entities
|
|
457
|
+
'note', 'notes', 'example', 'chapter', 'section', 'part', 'item', 'figure',
|
|
458
|
+
'table', 'list', 'step', 'task', 'todo', 'idea', 'thought', 'question',
|
|
459
|
+
'answer', 'summary', 'overview', 'introduction', 'conclusion',
|
|
460
|
+
// Technical terms that look like proper nouns
|
|
461
|
+
'true', 'false', 'null', 'undefined', 'none', 'class', 'function', 'method',
|
|
462
|
+
]);
|
|
463
|
+
/**
|
|
464
|
+
* Words that commonly start sentences but should not start a proper noun entity.
|
|
465
|
+
* These are checked separately because they might appear capitalized at sentence start.
|
|
466
|
+
*/
|
|
467
|
+
const SENTENCE_STARTER_WORDS = new Set([
|
|
468
|
+
'visit', 'also', 'see', 'please', 'note', 'check', 'read', 'look', 'find',
|
|
469
|
+
'get', 'set', 'add', 'use', 'try', 'make', 'take', 'give', 'keep', 'let',
|
|
470
|
+
'call', 'run', 'ask', 'tell', 'show', 'help', 'need', 'want', 'like',
|
|
471
|
+
'think', 'know', 'feel', 'seem', 'look', 'hear', 'watch', 'wait', 'work',
|
|
472
|
+
'start', 'stop', 'open', 'close', 'move', 'turn', 'bring', 'send', 'leave',
|
|
473
|
+
'meet', 'join', 'follow', 'include', 'consider', 'remember', 'forget',
|
|
474
|
+
]);
|
|
475
|
+
/**
|
|
476
|
+
* Detect implicit entities in content using pattern matching
|
|
477
|
+
*
|
|
478
|
+
* This finds potential entities that don't have existing files:
|
|
479
|
+
* - Multi-word proper nouns (e.g., "Marcus Johnson", "Project Alpha")
|
|
480
|
+
* - Single capitalized words after lowercase (e.g., "discussed with Marcus")
|
|
481
|
+
* - Quoted terms (e.g., "Turbopump" becomes [[Turbopump]])
|
|
482
|
+
*
|
|
483
|
+
* @param content - The markdown content to analyze
|
|
484
|
+
* @param config - Configuration for detection patterns
|
|
485
|
+
* @returns Array of detected implicit entity matches
|
|
486
|
+
*/
|
|
487
|
+
export function detectImplicitEntities(content, config = {}) {
|
|
488
|
+
const { implicitPatterns = DEFAULT_IMPLICIT_CONFIG.implicitPatterns, excludePatterns = DEFAULT_IMPLICIT_CONFIG.excludePatterns, minEntityLength = DEFAULT_IMPLICIT_CONFIG.minEntityLength, } = config;
|
|
489
|
+
const detected = [];
|
|
490
|
+
const seenTexts = new Set();
|
|
491
|
+
// Get protected zones to avoid detecting entities in code/links/etc.
|
|
492
|
+
const zones = getProtectedZones(content);
|
|
493
|
+
// Build exclude regex from patterns
|
|
494
|
+
const excludeRegexes = excludePatterns.map(p => new RegExp(p, 'i'));
|
|
495
|
+
/**
|
|
496
|
+
* Check if detected text should be excluded
|
|
497
|
+
*/
|
|
498
|
+
function shouldExclude(text) {
|
|
499
|
+
// Length check
|
|
500
|
+
if (text.length < minEntityLength)
|
|
501
|
+
return true;
|
|
502
|
+
// Common words
|
|
503
|
+
if (IMPLICIT_EXCLUDE_WORDS.has(text.toLowerCase()))
|
|
504
|
+
return true;
|
|
505
|
+
// Exclude patterns
|
|
506
|
+
for (const regex of excludeRegexes) {
|
|
507
|
+
if (regex.test(text))
|
|
508
|
+
return true;
|
|
509
|
+
}
|
|
510
|
+
// Already seen (dedup)
|
|
511
|
+
const normalized = text.toLowerCase();
|
|
512
|
+
if (seenTexts.has(normalized))
|
|
513
|
+
return true;
|
|
514
|
+
return false;
|
|
515
|
+
}
|
|
516
|
+
/**
|
|
517
|
+
* Check if match is in a protected zone
|
|
518
|
+
*/
|
|
519
|
+
function isProtected(start, end) {
|
|
520
|
+
return rangeOverlapsProtectedZone(start, end, zones);
|
|
521
|
+
}
|
|
522
|
+
// Pattern 1: Multi-word proper nouns
|
|
523
|
+
// Matches "Marcus Johnson", "Project Alpha", "San Francisco Bay Area"
|
|
524
|
+
if (implicitPatterns.includes('proper-nouns')) {
|
|
525
|
+
const properNounRegex = /\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b/g;
|
|
526
|
+
let match;
|
|
527
|
+
while ((match = properNounRegex.exec(content)) !== null) {
|
|
528
|
+
let text = match[1];
|
|
529
|
+
let start = match.index;
|
|
530
|
+
let end = start + match[0].length;
|
|
531
|
+
// Check if first word is a common sentence starter (e.g., "Visit", "Also", "See")
|
|
532
|
+
// If so, trim it and use the remaining words as the entity
|
|
533
|
+
const firstSpaceIndex = text.indexOf(' ');
|
|
534
|
+
if (firstSpaceIndex > 0) {
|
|
535
|
+
const firstWord = text.substring(0, firstSpaceIndex).toLowerCase();
|
|
536
|
+
if (SENTENCE_STARTER_WORDS.has(firstWord)) {
|
|
537
|
+
// Trim the first word and recalculate positions
|
|
538
|
+
text = text.substring(firstSpaceIndex + 1);
|
|
539
|
+
start = start + firstSpaceIndex + 1;
|
|
540
|
+
// Only keep if remaining text has 2+ words (still a proper noun phrase)
|
|
541
|
+
if (!text.includes(' ')) {
|
|
542
|
+
continue; // Skip single-word remainder
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
if (!shouldExclude(text) && !isProtected(start, end)) {
|
|
547
|
+
detected.push({ text, start, end, pattern: 'proper-nouns' });
|
|
548
|
+
seenTexts.add(text.toLowerCase());
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
// Pattern 2: Single capitalized words after lowercase
|
|
553
|
+
// Matches "discussed with Marcus yesterday" -> "Marcus"
|
|
554
|
+
if (implicitPatterns.includes('single-caps')) {
|
|
555
|
+
// Lookbehind for lowercase letter + space
|
|
556
|
+
const singleCapRegex = /(?<=[a-z]\s)([A-Z][a-z]{3,})\b/g;
|
|
557
|
+
let match;
|
|
558
|
+
while ((match = singleCapRegex.exec(content)) !== null) {
|
|
559
|
+
const text = match[1];
|
|
560
|
+
const start = match.index;
|
|
561
|
+
const end = start + match[0].length;
|
|
562
|
+
if (!shouldExclude(text) && !isProtected(start, end)) {
|
|
563
|
+
detected.push({ text, start, end, pattern: 'single-caps' });
|
|
564
|
+
seenTexts.add(text.toLowerCase());
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
// Pattern 3: Quoted terms (explicit entity markers)
|
|
569
|
+
// Matches "Turbopump" -> [[Turbopump]]
|
|
570
|
+
if (implicitPatterns.includes('quoted-terms')) {
|
|
571
|
+
const quotedRegex = /"([^"]{3,30})"/g;
|
|
572
|
+
let match;
|
|
573
|
+
while ((match = quotedRegex.exec(content)) !== null) {
|
|
574
|
+
const text = match[1];
|
|
575
|
+
// Include the quotes in the position for replacement
|
|
576
|
+
const start = match.index;
|
|
577
|
+
const end = start + match[0].length;
|
|
578
|
+
if (!shouldExclude(text) && !isProtected(start, end)) {
|
|
579
|
+
detected.push({ text, start, end, pattern: 'quoted-terms' });
|
|
580
|
+
seenTexts.add(text.toLowerCase());
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
// Sort by position
|
|
585
|
+
detected.sort((a, b) => a.start - b.start);
|
|
586
|
+
return detected;
|
|
587
|
+
}
|
|
588
|
+
/**
|
|
589
|
+
* Process wikilinks with support for both existing entities and implicit detection
|
|
590
|
+
*
|
|
591
|
+
* This is the main entry point that combines:
|
|
592
|
+
* 1. applyWikilinks() for known entities from the vault index
|
|
593
|
+
* 2. detectImplicitEntities() for pattern-based detection
|
|
594
|
+
*
|
|
595
|
+
* @param content - The markdown content to process
|
|
596
|
+
* @param entities - List of known entity names or Entity objects
|
|
597
|
+
* @param options - Extended options including implicit entity config
|
|
598
|
+
* @returns Result with updated content and statistics
|
|
599
|
+
*/
|
|
600
|
+
export function processWikilinks(content, entities, options = {}) {
|
|
601
|
+
const { detectImplicit = false, implicitPatterns, excludePatterns, minEntityLength, notePath, ...wikilinkOptions } = options;
|
|
602
|
+
// Step 1: Apply wikilinks for known entities
|
|
603
|
+
const result = applyWikilinks(content, entities, wikilinkOptions);
|
|
604
|
+
// If implicit detection is disabled, return the basic result
|
|
605
|
+
if (!detectImplicit) {
|
|
606
|
+
return result;
|
|
607
|
+
}
|
|
608
|
+
// Step 2: Detect implicit entities in the already-processed content
|
|
609
|
+
const implicitMatches = detectImplicitEntities(result.content, {
|
|
610
|
+
detectImplicit: true,
|
|
611
|
+
implicitPatterns,
|
|
612
|
+
excludePatterns,
|
|
613
|
+
minEntityLength,
|
|
614
|
+
});
|
|
615
|
+
if (implicitMatches.length === 0) {
|
|
616
|
+
return result;
|
|
617
|
+
}
|
|
618
|
+
// Step 3: Build set of already-linked entities (case-insensitive)
|
|
619
|
+
const alreadyLinked = new Set(result.linkedEntities.map(e => e.toLowerCase()));
|
|
620
|
+
// Also add all known entity names to avoid duplicate linking
|
|
621
|
+
for (const entity of entities) {
|
|
622
|
+
const name = typeof entity === 'string' ? entity : entity.name;
|
|
623
|
+
alreadyLinked.add(name.toLowerCase());
|
|
624
|
+
}
|
|
625
|
+
// Get current note name if provided (to avoid self-links)
|
|
626
|
+
const currentNoteName = notePath
|
|
627
|
+
? notePath.replace(/\.md$/, '').split('/').pop()?.toLowerCase()
|
|
628
|
+
: null;
|
|
629
|
+
// Step 4: Filter implicit matches that don't conflict with existing links
|
|
630
|
+
const newImplicitMatches = implicitMatches.filter(match => {
|
|
631
|
+
const normalized = match.text.toLowerCase();
|
|
632
|
+
// Skip if already linked as known entity
|
|
633
|
+
if (alreadyLinked.has(normalized))
|
|
634
|
+
return false;
|
|
635
|
+
// Skip self-links
|
|
636
|
+
if (currentNoteName && normalized === currentNoteName)
|
|
637
|
+
return false;
|
|
638
|
+
return true;
|
|
639
|
+
});
|
|
640
|
+
if (newImplicitMatches.length === 0) {
|
|
641
|
+
return result;
|
|
642
|
+
}
|
|
643
|
+
// Step 5: Apply implicit wikilinks (process from end to preserve positions)
|
|
644
|
+
let processedContent = result.content;
|
|
645
|
+
const implicitEntities = [];
|
|
646
|
+
// Process from end to start
|
|
647
|
+
for (let i = newImplicitMatches.length - 1; i >= 0; i--) {
|
|
648
|
+
const match = newImplicitMatches[i];
|
|
649
|
+
// For quoted terms, we replace "Term" with [[Term]]
|
|
650
|
+
// For other patterns, we replace Term with [[Term]]
|
|
651
|
+
let wikilink;
|
|
652
|
+
let replaceStart;
|
|
653
|
+
let replaceEnd;
|
|
654
|
+
if (match.pattern === 'quoted-terms') {
|
|
655
|
+
// Replace "Term" with [[Term]] (remove quotes)
|
|
656
|
+
wikilink = `[[${match.text}]]`;
|
|
657
|
+
replaceStart = match.start;
|
|
658
|
+
replaceEnd = match.end;
|
|
659
|
+
}
|
|
660
|
+
else {
|
|
661
|
+
// Replace Term with [[Term]]
|
|
662
|
+
wikilink = `[[${match.text}]]`;
|
|
663
|
+
replaceStart = match.start;
|
|
664
|
+
replaceEnd = match.end;
|
|
665
|
+
}
|
|
666
|
+
processedContent =
|
|
667
|
+
processedContent.slice(0, replaceStart) +
|
|
668
|
+
wikilink +
|
|
669
|
+
processedContent.slice(replaceEnd);
|
|
670
|
+
if (!implicitEntities.includes(match.text)) {
|
|
671
|
+
implicitEntities.push(match.text);
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
return {
|
|
675
|
+
content: processedContent,
|
|
676
|
+
linksAdded: result.linksAdded + newImplicitMatches.length,
|
|
677
|
+
linkedEntities: result.linkedEntities,
|
|
678
|
+
implicitEntities,
|
|
679
|
+
};
|
|
680
|
+
}
|
|
681
|
+
//# sourceMappingURL=wikilinks.js.map
|