@velvetmonkey/vault-core 2.0.98 → 2.0.100
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/types.d.ts +3 -3
- package/dist/wikilinks.d.ts +2 -2
- package/dist/wikilinks.js +30 -32
- package/package.json +1 -1
package/dist/types.d.ts
CHANGED
|
@@ -131,9 +131,9 @@ export interface ImplicitEntityConfig {
|
|
|
131
131
|
detectImplicit?: boolean;
|
|
132
132
|
/**
|
|
133
133
|
* Which patterns to use for detection
|
|
134
|
-
* @default ['proper-nouns'
|
|
134
|
+
* @default ['proper-nouns']
|
|
135
135
|
*/
|
|
136
|
-
implicitPatterns?: Array<'proper-nouns' | '
|
|
136
|
+
implicitPatterns?: Array<'proper-nouns' | 'single-caps' | 'camel-case' | 'acronyms'>;
|
|
137
137
|
/**
|
|
138
138
|
* Regex patterns to exclude from implicit detection
|
|
139
139
|
* @default ['^The ', '^A ', '^An ', '^This ', '^That ', '^These ', '^Those ']
|
|
@@ -165,7 +165,7 @@ export interface ImplicitEntityMatch {
|
|
|
165
165
|
/** End position in content */
|
|
166
166
|
end: number;
|
|
167
167
|
/** Detection method used */
|
|
168
|
-
pattern: 'proper-nouns' | '
|
|
168
|
+
pattern: 'proper-nouns' | 'single-caps' | 'camel-case' | 'acronyms';
|
|
169
169
|
}
|
|
170
170
|
/**
|
|
171
171
|
* Options for resolving alias-based wikilinks
|
package/dist/wikilinks.d.ts
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* respecting protected zones (code, frontmatter, existing links, etc.)
|
|
6
6
|
*
|
|
7
7
|
* Also supports:
|
|
8
|
-
* - Pattern-based detection for implicit entities (proper nouns,
|
|
8
|
+
* - Pattern-based detection for implicit entities (proper nouns, acronyms, CamelCase)
|
|
9
9
|
* - Alias resolution for existing wikilinks (resolves [[alias]] to [[Entity|alias]])
|
|
10
10
|
*/
|
|
11
11
|
import type { WikilinkOptions, WikilinkResult, Entity, ExtendedWikilinkOptions, ImplicitEntityMatch, ImplicitEntityConfig, ResolveAliasOptions } from './types.js';
|
|
@@ -62,7 +62,7 @@ export declare const IMPLICIT_EXCLUDE_WORDS: Set<string>;
|
|
|
62
62
|
* This finds potential entities that don't have existing files:
|
|
63
63
|
* - Multi-word proper nouns (e.g., "Marcus Johnson", "Project Alpha")
|
|
64
64
|
* - Single capitalized words after lowercase (e.g., "discussed with Marcus")
|
|
65
|
-
* -
|
|
65
|
+
* - CamelCase words (e.g., TypeScript, HuggingFace)
|
|
66
66
|
*
|
|
67
67
|
* @param content - The markdown content to analyze
|
|
68
68
|
* @param config - Configuration for detection patterns
|
package/dist/wikilinks.js
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* respecting protected zones (code, frontmatter, existing links, etc.)
|
|
6
6
|
*
|
|
7
7
|
* Also supports:
|
|
8
|
-
* - Pattern-based detection for implicit entities (proper nouns,
|
|
8
|
+
* - Pattern-based detection for implicit entities (proper nouns, acronyms, CamelCase)
|
|
9
9
|
* - Alias resolution for existing wikilinks (resolves [[alias]] to [[Entity|alias]])
|
|
10
10
|
*/
|
|
11
11
|
import { getProtectedZones, rangeOverlapsProtectedZone } from './protectedZones.js';
|
|
@@ -44,6 +44,12 @@ const EXCLUDE_WORDS = new Set([
|
|
|
44
44
|
// Stop words
|
|
45
45
|
'the', 'and', 'for', 'with', 'from', 'this', 'that',
|
|
46
46
|
'christmas', 'holiday', 'break',
|
|
47
|
+
// Common adjectives that should never be entities
|
|
48
|
+
'safe', 'new', 'old', 'local', 'native', 'first', 'related', 'similar',
|
|
49
|
+
'simple', 'basic', 'early', 'real', 'clear', 'fixed',
|
|
50
|
+
// Common verbs that should never be entities
|
|
51
|
+
'remember', 'include', 'avoid', 'provide', 'create', 'build', 'target',
|
|
52
|
+
'define', 'test', 'enable', 'handle', 'focus', 'track',
|
|
47
53
|
]);
|
|
48
54
|
/**
|
|
49
55
|
* Escape special regex characters in a string
|
|
@@ -55,7 +61,12 @@ function escapeRegex(str) {
|
|
|
55
61
|
* Check if an entity should be excluded from wikilikning
|
|
56
62
|
*/
|
|
57
63
|
function shouldExcludeEntity(entity) {
|
|
58
|
-
|
|
64
|
+
if (EXCLUDE_WORDS.has(entity.toLowerCase()))
|
|
65
|
+
return true;
|
|
66
|
+
// Skip lowercase hyphenated descriptors (e.g., self-improving, local-first, Claude-native)
|
|
67
|
+
if (entity.includes('-') && entity === entity.toLowerCase())
|
|
68
|
+
return true;
|
|
69
|
+
return false;
|
|
59
70
|
}
|
|
60
71
|
/**
|
|
61
72
|
* Find all matches of an entity in content with word boundaries
|
|
@@ -454,7 +465,7 @@ export function resolveAliasWikilinks(content, entities, options = {}) {
|
|
|
454
465
|
*/
|
|
455
466
|
const DEFAULT_IMPLICIT_CONFIG = {
|
|
456
467
|
detectImplicit: false,
|
|
457
|
-
implicitPatterns: ['proper-nouns'
|
|
468
|
+
implicitPatterns: ['proper-nouns'],
|
|
458
469
|
excludePatterns: ['^The ', '^A ', '^An ', '^This ', '^That ', '^These ', '^Those '],
|
|
459
470
|
minEntityLength: 3,
|
|
460
471
|
};
|
|
@@ -498,12 +509,19 @@ const SENTENCE_STARTER_WORDS = new Set([
|
|
|
498
509
|
'think', 'know', 'feel', 'seem', 'look', 'hear', 'watch', 'wait', 'work',
|
|
499
510
|
'start', 'stop', 'open', 'close', 'move', 'turn', 'bring', 'send', 'leave',
|
|
500
511
|
'meet', 'join', 'follow', 'include', 'consider', 'remember', 'forget',
|
|
512
|
+
// Additional common verbs/imperative starters
|
|
513
|
+
'target', 'create', 'build', 'write', 'avoid', 'provide', 'maintain',
|
|
514
|
+
'define', 'ensure', 'place', 'focus', 'track', 'enable', 'apply', 'test',
|
|
515
|
+
'handle', 'load', 'link', 'pass', 'save', 'lead', 'frame', 'point',
|
|
501
516
|
// Pronouns, possessives, determiners — capitalized at sentence start but not proper nouns
|
|
502
517
|
'my', 'your', 'his', 'her', 'its', 'our', 'their',
|
|
503
518
|
'some', 'any', 'every', 'each', 'both', 'few', 'many', 'most',
|
|
504
519
|
// Common adjectives that precede proper nouns at sentence start
|
|
505
520
|
'poor', 'old', 'new', 'big', 'little', 'great', 'good', 'bad',
|
|
506
521
|
'first', 'last', 'next', 'other', 'more', 'just', 'very',
|
|
522
|
+
// Additional adjectives/adverbs that appear capitalized at sentence starts
|
|
523
|
+
'still', 'clear', 'fixed', 'based', 'using', 'real', 'even',
|
|
524
|
+
'safe', 'local', 'native', 'early', 'similar', 'simple', 'basic', 'related',
|
|
507
525
|
]);
|
|
508
526
|
/**
|
|
509
527
|
* Detect implicit entities in content using pattern matching
|
|
@@ -511,7 +529,7 @@ const SENTENCE_STARTER_WORDS = new Set([
|
|
|
511
529
|
* This finds potential entities that don't have existing files:
|
|
512
530
|
* - Multi-word proper nouns (e.g., "Marcus Johnson", "Project Alpha")
|
|
513
531
|
* - Single capitalized words after lowercase (e.g., "discussed with Marcus")
|
|
514
|
-
* -
|
|
532
|
+
* - CamelCase words (e.g., TypeScript, HuggingFace)
|
|
515
533
|
*
|
|
516
534
|
* @param content - The markdown content to analyze
|
|
517
535
|
* @param config - Configuration for detection patterns
|
|
@@ -598,22 +616,6 @@ export function detectImplicitEntities(content, config = {}) {
|
|
|
598
616
|
}
|
|
599
617
|
}
|
|
600
618
|
}
|
|
601
|
-
// Pattern 3: Quoted terms (explicit entity markers)
|
|
602
|
-
// Matches "Turbopump" -> [[Turbopump]]
|
|
603
|
-
if (implicitPatterns.includes('quoted-terms')) {
|
|
604
|
-
const quotedRegex = /"([^"]{3,30})"/g;
|
|
605
|
-
let match;
|
|
606
|
-
while ((match = quotedRegex.exec(content)) !== null) {
|
|
607
|
-
const text = match[1];
|
|
608
|
-
// Include the quotes in the position for replacement
|
|
609
|
-
const start = match.index;
|
|
610
|
-
const end = start + match[0].length;
|
|
611
|
-
if (!shouldExclude(text) && !isProtected(start, end)) {
|
|
612
|
-
detected.push({ text, start, end, pattern: 'quoted-terms' });
|
|
613
|
-
seenTexts.add(text.toLowerCase());
|
|
614
|
-
}
|
|
615
|
-
}
|
|
616
|
-
}
|
|
617
619
|
// Pattern 4: CamelCase words (TypeScript, YouTube, HuggingFace)
|
|
618
620
|
if (implicitPatterns.includes('camel-case')) {
|
|
619
621
|
const camelRegex = /\b([A-Z][a-z]+[A-Z][a-zA-Z]*)\b/g;
|
|
@@ -634,6 +636,10 @@ export function detectImplicitEntities(content, config = {}) {
|
|
|
634
636
|
let match;
|
|
635
637
|
while ((match = acronymRegex.exec(content)) !== null) {
|
|
636
638
|
const text = match[1];
|
|
639
|
+
// Skip long ALL-CAPS words (>5 chars) — likely English words in caps, not acronyms
|
|
640
|
+
// Real acronyms are typically 2-5 chars (API, SQL, LLM, ONNX)
|
|
641
|
+
if (text.length > 5)
|
|
642
|
+
continue;
|
|
637
643
|
const start = match.index;
|
|
638
644
|
const end = start + text.length;
|
|
639
645
|
if (!shouldExclude(text) && !isProtected(start, end)) {
|
|
@@ -739,18 +745,10 @@ export function processWikilinks(content, entities, options = {}) {
|
|
|
739
745
|
let wikilink;
|
|
740
746
|
let replaceStart;
|
|
741
747
|
let replaceEnd;
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
replaceEnd = match.end;
|
|
747
|
-
}
|
|
748
|
-
else {
|
|
749
|
-
// Replace Term with [[Term]]
|
|
750
|
-
wikilink = `[[${match.text}]]`;
|
|
751
|
-
replaceStart = match.start;
|
|
752
|
-
replaceEnd = match.end;
|
|
753
|
-
}
|
|
748
|
+
// Replace Term with [[Term]]
|
|
749
|
+
wikilink = `[[${match.text}]]`;
|
|
750
|
+
replaceStart = match.start;
|
|
751
|
+
replaceEnd = match.end;
|
|
754
752
|
processedContent =
|
|
755
753
|
processedContent.slice(0, replaceStart) +
|
|
756
754
|
wikilink +
|
package/package.json
CHANGED