@velvetmonkey/vault-core 2.0.125 → 2.0.127

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/wikilinks.js +544 -22
  2. package/package.json +1 -1
package/dist/wikilinks.js CHANGED
@@ -27,7 +27,8 @@ function getSearchTerms(entity) {
27
27
  return terms;
28
28
  }
29
29
  /**
30
- * Common words to exclude from wikilink suggestions
30
+ * Common words to exclude from wikilink matching.
31
+ * These words are never wikified even when they match entity names or aliases.
31
32
  */
32
33
  const EXCLUDE_WORDS = new Set([
33
34
  // Day names
@@ -41,15 +42,191 @@ const EXCLUDE_WORDS = new Set([
41
42
  'month end', 'month start', 'year end', 'year start',
42
43
  'quarter end', 'quarter start', 'quarterly review',
43
44
  'weekly review', 'monthly review', 'annual review',
44
- // Stop words
45
- 'the', 'and', 'for', 'with', 'from', 'this', 'that',
46
45
  'christmas', 'holiday', 'break',
47
- // Common adjectives that should never be entities
48
- 'safe', 'new', 'old', 'local', 'native', 'first', 'related', 'similar',
49
- 'simple', 'basic', 'early', 'real', 'clear', 'fixed',
50
- // Common verbs that should never be entities
51
- 'remember', 'include', 'avoid', 'provide', 'create', 'build', 'target',
52
- 'define', 'test', 'enable', 'handle', 'focus', 'track',
46
+ // --- Two-char common words (pronouns, prepositions, conjunctions) ---
47
+ 'me', 'us', 'we', 'he', 'it', 'am', 'is', 'be', 'do', 'go',
48
+ 'no', 'so', 'up', 'if', 'or', 'as', 'at', 'by', 'on', 'in', 'to',
49
+ 'of', 'an', 'my', 'oh', 'ok',
50
+ // --- Pronouns (personal, possessive, reflexive, relative, demonstrative) ---
51
+ 'she', 'her', 'him', 'his', 'they', 'them', 'their', 'its', 'our', 'ours',
52
+ 'who', 'whom', 'whose', 'what', 'which', 'mine', 'yours', 'hers', 'theirs',
53
+ 'myself', 'yourself', 'himself', 'herself', 'itself', 'ourselves', 'themselves',
54
+ // --- Stop words & determiners ---
55
+ 'the', 'and', 'for', 'with', 'from', 'this', 'that', 'these', 'those',
56
+ 'some', 'any', 'each', 'both', 'few', 'many', 'most', 'such',
57
+ // --- Prepositions ---
58
+ 'about', 'above', 'across', 'after', 'against', 'along', 'among', 'around',
59
+ 'before', 'behind', 'below', 'beneath', 'beside', 'between', 'beyond',
60
+ 'despite', 'down', 'during', 'except', 'inside', 'into', 'near',
61
+ 'off', 'onto', 'outside', 'over', 'past', 'since', 'through',
62
+ 'toward', 'towards', 'under', 'underneath', 'until', 'upon', 'within', 'without',
63
+ // --- Conjunctions ---
64
+ 'although', 'because', 'however', 'therefore', 'moreover', 'furthermore',
65
+ 'nevertheless', 'otherwise', 'meanwhile', 'whereas', 'whenever', 'wherever',
66
+ 'whether', 'while', 'unless', 'though', 'hence',
67
+ // --- Common adverbs ---
68
+ 'again', 'already', 'always', 'almost', 'also', 'away',
69
+ 'back', 'certainly', 'clearly', 'completely', 'currently',
70
+ 'directly', 'effectively', 'enough', 'especially', 'essentially',
71
+ 'eventually', 'ever', 'exactly', 'fairly', 'finally', 'frequently',
72
+ 'fully', 'generally', 'gradually', 'greatly', 'hardly', 'here',
73
+ 'highly', 'immediately', 'indeed', 'instead',
74
+ 'just', 'largely', 'later', 'likely', 'mainly', 'maybe',
75
+ 'merely', 'mostly', 'naturally', 'nearly', 'necessarily',
76
+ 'never', 'normally', 'now', 'obviously', 'occasionally', 'often',
77
+ 'only', 'originally', 'particularly', 'perhaps',
78
+ 'personally', 'possibly', 'potentially', 'practically', 'precisely',
79
+ 'presumably', 'previously', 'primarily', 'probably', 'properly',
80
+ 'quickly', 'quite', 'rarely', 'rather', 'readily', 'really',
81
+ 'recently', 'relatively', 'roughly', 'significantly', 'simply',
82
+ 'slightly', 'slowly', 'sometimes', 'somewhat', 'soon', 'specifically',
83
+ 'still', 'strongly', 'subsequently', 'successfully', 'suddenly',
84
+ 'surely', 'then', 'there', 'thoroughly', 'together',
85
+ 'too', 'truly', 'typically', 'ultimately', 'unfortunately', 'usually',
86
+ 'very', 'well', 'widely', 'yet',
87
+ // --- Common adjectives ---
88
+ 'able', 'actual', 'additional', 'alternative', 'appropriate', 'available',
89
+ 'basic', 'broad', 'certain', 'clear', 'close', 'common', 'complete',
90
+ 'comprehensive', 'considerable', 'consistent', 'correct', 'critical',
91
+ 'current', 'deep', 'different', 'difficult', 'direct', 'due', 'early',
92
+ 'effective', 'entire', 'essential', 'exact', 'excellent', 'existing',
93
+ 'extensive', 'extra', 'fair', 'familiar', 'final', 'fine', 'first',
94
+ 'fixed', 'flat', 'formal', 'former', 'free', 'fresh', 'full', 'further',
95
+ 'future', 'general', 'given', 'global', 'good', 'great', 'hard', 'heavy',
96
+ 'high', 'huge', 'ideal', 'important', 'independent', 'individual',
97
+ 'initial', 'internal', 'key', 'large', 'last', 'late', 'latest', 'least',
98
+ 'less', 'light', 'limited', 'little', 'local', 'long', 'loose', 'low',
99
+ 'main', 'major', 'massive', 'minor', 'missing', 'modern', 'much',
100
+ 'narrow', 'native', 'natural', 'necessary', 'negative', 'new', 'nice',
101
+ 'normal', 'obvious', 'old', 'only', 'open', 'original', 'overall', 'own',
102
+ 'particular', 'perfect', 'personal', 'plain', 'poor', 'popular',
103
+ 'positive', 'possible', 'potential', 'powerful', 'practical', 'present',
104
+ 'previous', 'primary', 'prime', 'private', 'proper', 'public', 'pure',
105
+ 'quick', 'quiet', 'random', 'rapid', 'rare', 'raw', 'ready', 'real',
106
+ 'reasonable', 'recent', 'regular', 'related', 'relevant', 'remote',
107
+ 'required', 'responsible', 'rich', 'right', 'rough', 'round', 'safe',
108
+ 'secure', 'separate', 'serious', 'sharp', 'short', 'significant',
109
+ 'silent', 'similar', 'simple', 'single', 'slight', 'slow', 'small',
110
+ 'smart', 'smooth', 'soft', 'solid', 'special', 'specific', 'stable',
111
+ 'standard', 'steep', 'straight', 'strict', 'strong', 'sudden',
112
+ 'sufficient', 'suitable', 'sure', 'sweet', 'tall', 'thick', 'thin',
113
+ 'tight', 'tiny', 'total', 'tough', 'true', 'typical', 'unique',
114
+ 'unusual', 'useful', 'usual', 'valid', 'valuable', 'various', 'vast',
115
+ 'warm', 'weak', 'whole', 'wide', 'wild', 'worth', 'wrong',
116
+ // --- Common verbs ---
117
+ 'accept', 'achieve', 'add', 'admit', 'agree', 'allow', 'announce', 'appear',
118
+ 'apply', 'approach', 'argue', 'arrange', 'arrive', 'assume', 'attempt', 'avoid',
119
+ 'begin', 'believe', 'belong', 'break', 'bring', 'build', 'burn', 'buy',
120
+ 'call', 'carry', 'catch', 'cause', 'change', 'charge', 'check', 'choose',
121
+ 'claim', 'clean', 'climb', 'close', 'collect', 'come', 'commit',
122
+ 'compare', 'complain', 'confirm', 'connect', 'consider', 'contain',
123
+ 'continue', 'contribute', 'control', 'convert', 'cook', 'copy', 'correct',
124
+ 'cost', 'count', 'cover', 'create', 'cross', 'cry', 'cut',
125
+ 'deal', 'decide', 'declare', 'define', 'deliver', 'demand', 'deny', 'depend',
126
+ 'describe', 'design', 'destroy', 'determine', 'develop', 'die', 'discover',
127
+ 'discuss', 'divide', 'double', 'doubt', 'draw', 'dress', 'drink', 'drive',
128
+ 'drop', 'earn', 'eat', 'enable', 'encourage', 'enjoy', 'ensure', 'enter',
129
+ 'establish', 'examine', 'exist', 'expand', 'expect', 'experience',
130
+ 'explain', 'express', 'extend', 'face', 'fail', 'fall', 'feed', 'feel',
131
+ 'fight', 'fill', 'find', 'finish', 'fit', 'fix', 'fly', 'focus', 'force',
132
+ 'forget', 'form', 'gain', 'gather', 'generate', 'get', 'give', 'go', 'grab',
133
+ 'grant', 'grow', 'guess', 'handle', 'happen', 'hate', 'head', 'hear',
134
+ 'help', 'hide', 'hit', 'hold', 'hope', 'hurt', 'identify', 'ignore',
135
+ 'imagine', 'improve', 'include', 'increase', 'indicate', 'influence',
136
+ 'inform', 'insist', 'install', 'intend', 'introduce', 'invest', 'invite',
137
+ 'involve', 'issue', 'join', 'judge', 'jump', 'justify', 'keep', 'kick',
138
+ 'kill', 'knock', 'land', 'last', 'laugh', 'launch', 'lay', 'lead', 'learn',
139
+ 'leave', 'lend', 'let', 'lie', 'lift', 'limit', 'link', 'listen', 'live',
140
+ 'look', 'lose', 'love', 'maintain', 'make', 'manage', 'mark', 'match',
141
+ 'matter', 'mean', 'measure', 'meet', 'mention', 'mind', 'miss', 'mix',
142
+ 'monitor', 'move', 'need', 'note', 'notice', 'obtain', 'occur', 'offer',
143
+ 'open', 'operate', 'order', 'organise', 'organize', 'own',
144
+ 'pass', 'pay', 'perform', 'permit', 'pick', 'place', 'plan', 'plant',
145
+ 'play', 'point', 'pour', 'practice', 'prefer', 'prepare', 'present',
146
+ 'press', 'prevent', 'produce', 'promise', 'promote', 'propose', 'protect',
147
+ 'prove', 'provide', 'publish', 'pull', 'push', 'put', 'raise', 'reach',
148
+ 'read', 'realize', 'receive', 'recognize', 'recommend', 'record',
149
+ 'reduce', 'reflect', 'refuse', 'regard', 'reject', 'relate', 'release',
150
+ 'rely', 'remain', 'remember', 'remove', 'repeat', 'replace', 'report',
151
+ 'represent', 'request', 'require', 'respond', 'rest', 'restore', 'result',
152
+ 'retain', 'retire', 'return', 'reveal', 'review', 'ring', 'rise', 'risk',
153
+ 'roll', 'run', 'rush', 'save', 'say', 'search', 'seek', 'seem',
154
+ 'select', 'sell', 'send', 'serve', 'set', 'settle', 'shake', 'shape',
155
+ 'share', 'shift', 'shoot', 'shut', 'sign', 'sing', 'sit', 'skip', 'sleep',
156
+ 'slip', 'smile', 'solve', 'sort', 'sound', 'speak', 'spend', 'split',
157
+ 'spread', 'stand', 'start', 'state', 'stay', 'steal', 'step', 'stick',
158
+ 'stop', 'store', 'strike', 'struggle', 'study', 'submit', 'succeed',
159
+ 'suffer', 'suggest', 'suit', 'supply', 'support', 'suppose', 'survive',
160
+ 'suspect', 'switch', 'take', 'talk', 'target', 'teach', 'tear', 'tell',
161
+ 'tend', 'test', 'thank', 'think', 'throw', 'touch', 'track', 'trade',
162
+ 'train', 'travel', 'treat', 'trust', 'try', 'turn', 'understand', 'use',
163
+ 'visit', 'vote', 'wait', 'wake', 'walk', 'want', 'warn', 'wash', 'watch',
164
+ 'wear', 'weigh', 'win', 'wish', 'wonder', 'work', 'worry', 'wrap', 'write',
165
+ // --- Common nouns (generic, not entity-like) ---
166
+ 'access', 'account', 'act', 'action', 'activity', 'addition', 'address',
167
+ 'age', 'air', 'amount', 'analysis', 'answer', 'area', 'argument', 'arm',
168
+ 'article', 'aspect', 'attention', 'authority', 'balance', 'base', 'basis',
169
+ 'bed', 'benefit', 'bit', 'blood', 'board', 'body', 'book', 'bottom',
170
+ 'box', 'business', 'capacity', 'capital', 'card', 'care', 'case',
171
+ 'centre', 'challenge', 'chance', 'character', 'choice',
172
+ 'circle', 'class', 'club', 'code', 'collection', 'colour',
173
+ 'comment', 'commission', 'community', 'company', 'comparison', 'competition',
174
+ 'concern', 'condition', 'connection', 'content', 'context', 'contract',
175
+ 'contribution', 'corner', 'country', 'couple', 'course', 'credit', 'cup',
176
+ 'damage', 'danger', 'data', 'date', 'death', 'debate', 'decision',
177
+ 'demand', 'department', 'detail', 'development', 'difference', 'direction',
178
+ 'discussion', 'disease', 'display', 'distance', 'document', 'door',
179
+ 'doubt', 'duty', 'earth', 'edge', 'education', 'effect',
180
+ 'effort', 'element', 'end', 'energy', 'engine', 'environment', 'error',
181
+ 'event', 'evidence', 'exchange', 'exercise', 'expression',
182
+ 'extent', 'eye', 'fact', 'failure', 'family', 'feature',
183
+ 'field', 'figure', 'film', 'floor', 'food', 'foot',
184
+ 'force', 'foundation', 'front', 'fund', 'game', 'garden', 'gas',
185
+ 'glass', 'goal', 'gold', 'grade', 'ground', 'growth', 'guide', 'hair',
186
+ 'hall', 'hand', 'heart', 'heat', 'hill', 'history',
187
+ 'hole', 'home', 'horse', 'hotel', 'hour', 'house', 'image', 'impact',
188
+ 'income', 'index', 'industry', 'information',
189
+ 'instance', 'interest', 'investment', 'island', 'item',
190
+ 'job', 'kitchen', 'knee', 'knowledge', 'lack', 'language',
191
+ 'law', 'league', 'length', 'lesson', 'letter', 'level',
192
+ 'library', 'life', 'line', 'list', 'living', 'loss',
193
+ 'machine', 'management', 'manner', 'map', 'market', 'mass', 'master',
194
+ 'material', 'meeting', 'member', 'memory', 'message', 'metal',
195
+ 'method', 'middle', 'minute', 'model', 'moment', 'money',
196
+ 'morning', 'mouth', 'movement', 'music', 'name', 'nature',
197
+ 'network', 'news', 'night', 'node', 'noise', 'north', 'number',
198
+ 'object', 'office', 'officer', 'operation', 'opinion', 'opportunity',
199
+ 'option', 'output', 'owner', 'package', 'pair', 'paper',
200
+ 'parent', 'part', 'party', 'passage', 'path', 'pattern',
201
+ 'performance', 'period', 'person', 'picture', 'player',
202
+ 'pleasure', 'pocket', 'position', 'post', 'pound',
203
+ 'power', 'pressure', 'price', 'principle', 'problem',
204
+ 'procedure', 'process', 'product', 'production', 'programme', 'progress',
205
+ 'proof', 'property', 'proposal', 'protection', 'purpose',
206
+ 'quality', 'quarter', 'question', 'race', 'range', 'rate', 'reason',
207
+ 'reference', 'reform', 'region', 'relation', 'relationship',
208
+ 'request', 'research', 'resource', 'response',
209
+ 'road', 'role', 'roof', 'room', 'route', 'row', 'rule',
210
+ 'safety', 'sale', 'sample', 'scale', 'scene', 'scheme', 'school',
211
+ 'science', 'screen', 'season', 'seat', 'section', 'security', 'sense',
212
+ 'series', 'service', 'session', 'setting', 'sex',
213
+ 'shop', 'shot', 'shoulder', 'show', 'side', 'sight', 'signal',
214
+ 'site', 'situation', 'size', 'skin', 'society',
215
+ 'software', 'solution', 'song', 'source', 'south',
216
+ 'space', 'speech', 'speed', 'spirit', 'sport', 'spring', 'square',
217
+ 'staff', 'stage', 'star', 'statement', 'station',
218
+ 'status', 'stock', 'stone', 'story', 'strategy',
219
+ 'street', 'strength', 'structure', 'student', 'stuff',
220
+ 'style', 'subject', 'success', 'summer', 'supply', 'surface',
221
+ 'surprise', 'survey', 'system', 'task', 'team', 'technique',
222
+ 'technology', 'term', 'text', 'theory', 'thing', 'thought',
223
+ 'threat', 'time', 'title', 'tool', 'top', 'tour', 'town',
224
+ 'training', 'transfer', 'transport',
225
+ 'treatment', 'trial', 'trouble', 'truth', 'type',
226
+ 'union', 'unit', 'user', 'valley', 'value', 'variety', 'version',
227
+ 'view', 'village', 'voice', 'volume', 'wall', 'war', 'waste', 'water',
228
+ 'wave', 'way', 'weather', 'weight', 'west', 'wind', 'window',
229
+ 'winter', 'wood', 'word', 'worker', 'world', 'writing',
53
230
  ]);
54
231
  /**
55
232
  * Escape special regex characters in a string
@@ -61,6 +238,9 @@ function escapeRegex(str) {
61
238
  * Check if an entity should be excluded from wikilikning
62
239
  */
63
240
  function shouldExcludeEntity(entity) {
241
+ // Skip single-char terms (e.g. alias "I" for Ben)
242
+ if (entity.length < 2)
243
+ return true;
64
244
  if (EXCLUDE_WORDS.has(entity.toLowerCase()))
65
245
  return true;
66
246
  // Skip lowercase hyphenated descriptors (e.g., self-improving, local-first, Claude-native)
@@ -473,15 +653,15 @@ const DEFAULT_IMPLICIT_CONFIG = {
473
653
  * Common words that should not be detected as implicit entities
474
654
  */
475
655
  export const IMPLICIT_EXCLUDE_WORDS = new Set([
476
- // Days and months (already in EXCLUDE_WORDS but duplicated for safety)
656
+ // Days and months
477
657
  'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday',
478
658
  'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august',
479
659
  'september', 'october', 'november', 'december',
480
- // Common sentence starters
660
+ // Common sentence starters / determiners
481
661
  'this', 'that', 'these', 'those', 'there', 'here', 'when', 'where', 'what',
482
662
  'which', 'while', 'since', 'after', 'before', 'during', 'until', 'because',
483
663
  'however', 'therefore', 'although', 'though', 'unless', 'whether',
484
- // Common proper-looking words that aren't entities
664
+ // Document/structure words
485
665
  'note', 'notes', 'example', 'chapter', 'section', 'part', 'item', 'figure',
486
666
  'table', 'list', 'step', 'task', 'todo', 'idea', 'thought', 'question',
487
667
  'answer', 'summary', 'overview', 'introduction', 'conclusion',
@@ -494,34 +674,376 @@ export const IMPLICIT_EXCLUDE_WORDS = new Set([
494
674
  'the', 'and', 'but', 'for', 'not', 'you', 'all', 'can', 'had', 'her',
495
675
  'was', 'one', 'our', 'out', 'are', 'has', 'his', 'how', 'its', 'may',
496
676
  'new', 'now', 'old', 'see', 'way', 'who', 'did', 'got', 'let', 'say',
497
- // Common abbreviations that aren't entities
677
+ // Common abbreviations
498
678
  'etc', 'aka', 'btw', 'fyi', 'imo', 'tldr', 'asap', 'rsvp',
499
679
  'url', 'html', 'css', 'http', 'https', 'json', 'xml', 'sql', 'ssh', 'tcp', 'udp', 'dns',
680
+ // --- Common adjectives (capitalized at sentence starts) ---
681
+ 'able', 'absolute', 'acceptable', 'accessible', 'accurate', 'actual',
682
+ 'additional', 'adequate', 'advanced', 'aggressive', 'alive', 'alternative',
683
+ 'amazing', 'ancient', 'angry', 'annual', 'apparent', 'applicable',
684
+ 'appropriate', 'approximate', 'arbitrary', 'automatic', 'available',
685
+ 'aware', 'awful', 'awkward',
686
+ 'bad', 'bare', 'beautiful', 'beneficial', 'best', 'better', 'big',
687
+ 'bitter', 'blank', 'blind', 'bold', 'boring', 'brave', 'brief',
688
+ 'bright', 'brilliant', 'broad', 'broken', 'busy',
689
+ 'calm', 'capable', 'careful', 'casual', 'central', 'certain', 'cheap',
690
+ 'clean', 'clear', 'clever', 'close', 'cold', 'comfortable', 'common',
691
+ 'comparable', 'compatible', 'competitive', 'complete', 'complex',
692
+ 'comprehensive', 'concerned', 'concrete', 'confident', 'confused',
693
+ 'conscious', 'conservative', 'considerable', 'consistent', 'constant',
694
+ 'content', 'continuous', 'convenient', 'conventional', 'cool', 'correct',
695
+ 'corresponding', 'costly', 'crazy', 'creative', 'critical', 'crucial',
696
+ 'curious', 'current', 'custom',
697
+ 'dangerous', 'dark', 'dead', 'dear', 'decent', 'deep', 'defensive',
698
+ 'definite', 'deliberate', 'delicate', 'dense', 'dependent', 'desperate',
699
+ 'detailed', 'different', 'difficult', 'digital', 'direct', 'dirty',
700
+ 'distinct', 'double', 'dramatic', 'dry', 'due', 'dull', 'dumb',
701
+ 'eager', 'early', 'eastern', 'easy', 'economic', 'educational',
702
+ 'effective', 'efficient', 'elaborate', 'elderly', 'electric', 'elegant',
703
+ 'emotional', 'empty', 'encouraging', 'endless', 'enormous', 'entire',
704
+ 'equal', 'equivalent', 'essential', 'even', 'eventual', 'every',
705
+ 'everyday', 'evident', 'evil', 'exact', 'excellent', 'exceptional',
706
+ 'excessive', 'exciting', 'exclusive', 'existing', 'exotic', 'expensive',
707
+ 'experienced', 'experimental', 'explicit', 'extended', 'extensive',
708
+ 'external', 'extra', 'extraordinary', 'extreme',
709
+ 'fair', 'faithful', 'familiar', 'famous', 'fancy', 'fantastic', 'far',
710
+ 'fascinating', 'fast', 'fat', 'fatal', 'favorable', 'favourite', 'federal',
711
+ 'fierce', 'final', 'financial', 'fine', 'firm', 'fit', 'fixed', 'flat',
712
+ 'flexible', 'fluid', 'foolish', 'foreign', 'formal', 'former', 'forward',
713
+ 'fragile', 'free', 'frequent', 'fresh', 'friendly', 'front', 'frozen',
714
+ 'full', 'fun', 'functional', 'fundamental', 'funny', 'further', 'future',
715
+ 'general', 'generous', 'gentle', 'genuine', 'giant', 'glad', 'global',
716
+ 'golden', 'good', 'gorgeous', 'gradual', 'grand', 'grateful', 'grave',
717
+ 'great', 'green', 'grey', 'gross', 'growing', 'guilty',
718
+ 'half', 'handsome', 'handy', 'happy', 'hard', 'harmful', 'harsh',
719
+ 'healthy', 'heavy', 'helpful', 'hidden', 'high', 'historic', 'honest',
720
+ 'horrible', 'hostile', 'hot', 'huge', 'humble', 'hungry',
721
+ 'ideal', 'identical', 'immediate', 'immense', 'immune', 'implicit',
722
+ 'important', 'impossible', 'impressive', 'inadequate', 'inappropriate',
723
+ 'incredible', 'independent', 'indirect', 'individual', 'industrial',
724
+ 'inevitable', 'infinite', 'informal', 'inherent', 'initial', 'inner',
725
+ 'innocent', 'innovative', 'instant', 'insufficient', 'intelligent',
726
+ 'intense', 'intensive', 'interactive', 'interesting', 'interim',
727
+ 'intermediate', 'internal', 'international', 'invalid', 'invisible',
728
+ 'irrelevant', 'isolated',
729
+ 'joint', 'junior', 'just',
730
+ 'keen', 'key', 'kind',
731
+ 'large', 'last', 'late', 'lateral', 'latest', 'lazy', 'lean', 'least',
732
+ 'legitimate', 'lengthy', 'less', 'lesser', 'level', 'liberal', 'light',
733
+ 'likely', 'limited', 'linear', 'literal', 'little', 'live', 'lively',
734
+ 'local', 'logical', 'lone', 'lonely', 'long', 'loose', 'loud', 'lovely',
735
+ 'low', 'loyal', 'lucky',
736
+ 'mad', 'magic', 'main', 'major', 'male', 'manual', 'many', 'marginal',
737
+ 'massive', 'mature', 'maximum', 'mean', 'meaningful', 'mechanical',
738
+ 'medical', 'medium', 'mental', 'mere', 'mild', 'military', 'minimal',
739
+ 'minimum', 'minor', 'minute', 'missing', 'mixed', 'mobile', 'moderate',
740
+ 'modern', 'modest', 'moral', 'more', 'most', 'multiple', 'mutual',
741
+ 'naked', 'narrow', 'nasty', 'native', 'natural', 'neat', 'necessary',
742
+ 'negative', 'nervous', 'neutral', 'nice', 'noble', 'nominal', 'normal',
743
+ 'notable', 'novel', 'numerous',
744
+ 'obvious', 'occasional', 'odd', 'offensive', 'official', 'only', 'open',
745
+ 'operational', 'opposite', 'optimal', 'optional', 'ordinary', 'organic',
746
+ 'original', 'other', 'outer', 'overall', 'overnight', 'own',
747
+ 'painful', 'pale', 'parallel', 'partial', 'particular', 'passive', 'past',
748
+ 'patient', 'peaceful', 'peculiar', 'perfect', 'permanent', 'personal',
749
+ 'physical', 'plain', 'pleasant', 'plenty', 'plus', 'polite', 'political',
750
+ 'poor', 'popular', 'portable', 'positive', 'possible', 'potential',
751
+ 'powerful', 'practical', 'precise', 'predictable', 'preliminary',
752
+ 'premium', 'prepared', 'present', 'pretty', 'previous',
753
+ 'primary', 'prime', 'primitive', 'principal', 'prior', 'private',
754
+ 'probable', 'productive', 'professional', 'profitable', 'profound',
755
+ 'progressive', 'prominent', 'promising', 'proper', 'proportional',
756
+ 'proposed', 'prospective', 'protective', 'proud', 'provisional', 'public',
757
+ 'pure',
758
+ 'quick', 'quiet',
759
+ 'radical', 'random', 'rapid', 'rare', 'rational', 'raw', 'ready', 'real',
760
+ 'realistic', 'reasonable', 'recent', 'regional', 'regular', 'related',
761
+ 'relative', 'relevant', 'reliable', 'reluctant', 'remaining', 'remarkable',
762
+ 'remote', 'repeated', 'representative', 'required', 'residential',
763
+ 'respective', 'responsible', 'rich', 'rigid', 'right', 'rising', 'robust',
764
+ 'rough', 'round', 'royal', 'rude', 'rural',
765
+ 'sacred', 'sad', 'safe', 'satisfactory', 'scared', 'scattered', 'secure',
766
+ 'selective', 'senior', 'sensitive', 'separate', 'serious', 'severe',
767
+ 'shallow', 'sharp', 'sheer', 'short', 'shy', 'sick', 'significant',
768
+ 'silent', 'silly', 'similar', 'simple', 'single', 'slight', 'slim',
769
+ 'slow', 'small', 'smart', 'smooth', 'sober', 'social', 'soft', 'solar',
770
+ 'sole', 'solid', 'sophisticated', 'sorry', 'sound', 'southern', 'spare',
771
+ 'spatial', 'special', 'specific', 'spectacular', 'spiritual', 'splendid',
772
+ 'spontaneous', 'stable', 'standard', 'static', 'statistical', 'steady',
773
+ 'steep', 'sticky', 'stiff', 'straight', 'strange', 'strategic', 'strict',
774
+ 'striking', 'strong', 'structural', 'stupid', 'subject', 'substantial',
775
+ 'subtle', 'successful', 'successive', 'such', 'sudden', 'sufficient',
776
+ 'suitable', 'super', 'superb', 'superior', 'supreme', 'sure', 'surgical',
777
+ 'surprised', 'surprising', 'suspicious', 'sweet', 'swift', 'symbolic',
778
+ 'sympathetic',
779
+ 'tall', 'technical', 'temporary', 'tender', 'terrible', 'thick', 'thin',
780
+ 'thorough', 'tight', 'tiny', 'tired', 'top', 'total', 'tough',
781
+ 'traditional', 'tremendous', 'tropical', 'true', 'typical',
782
+ 'ugly', 'ultimate', 'unable', 'uncertain', 'underlying', 'unfair',
783
+ 'unfortunate', 'unhappy', 'uniform', 'unique', 'universal', 'unknown',
784
+ 'unlikely', 'unnecessary', 'unpleasant', 'unprecedented', 'unusual',
785
+ 'upper', 'upset', 'urban', 'urgent', 'useful', 'useless', 'usual',
786
+ 'valid', 'valuable', 'variable', 'various', 'vast', 'verbal', 'vertical',
787
+ 'viable', 'violent', 'virtual', 'visible', 'visual', 'vital', 'vivid',
788
+ 'voluntary', 'vulnerable',
789
+ 'warm', 'weak', 'wealthy', 'weird', 'welcome', 'western', 'wet', 'white',
790
+ 'whole', 'wicked', 'wide', 'widespread', 'wild', 'willing', 'wise',
791
+ 'wonderful', 'wooden', 'working', 'worried', 'worse', 'worst', 'worth',
792
+ 'worthy', 'wrong',
793
+ 'young',
794
+ // --- Common verbs / past participles (capitalized at sentence starts) ---
795
+ 'accepted', 'achieved', 'acquired', 'added', 'adjusted', 'adopted',
796
+ 'affected', 'agreed', 'allowed', 'announced', 'applied', 'appointed',
797
+ 'approved', 'argued', 'arranged', 'arrived', 'asked', 'assessed',
798
+ 'assigned', 'associated', 'assumed', 'attached', 'attempted', 'attended',
799
+ 'based', 'beaten', 'become', 'begun', 'believed', 'belonged', 'blocked',
800
+ 'born', 'bought', 'brought', 'built', 'buried', 'burned',
801
+ 'called', 'captured', 'carried', 'caught', 'caused', 'challenged',
802
+ 'changed', 'charged', 'checked', 'chosen', 'claimed', 'cleaned',
803
+ 'cleared', 'closed', 'collected', 'combined', 'compared', 'compiled',
804
+ 'completed', 'complicated', 'composed', 'concerned', 'concluded',
805
+ 'conducted', 'confirmed', 'connected', 'considered', 'constructed',
806
+ 'contained', 'continued', 'contributed', 'controlled', 'converted',
807
+ 'convinced', 'cooked', 'copied', 'corrected', 'covered', 'created',
808
+ 'crossed', 'crushed', 'customized',
809
+ 'damaged', 'dealt', 'decided', 'declared', 'declined', 'dedicated',
810
+ 'defeated', 'defined', 'delivered', 'demanded', 'demonstrated', 'denied',
811
+ 'deployed', 'derived', 'described', 'designed', 'desired', 'destroyed',
812
+ 'detected', 'determined', 'developed', 'devoted', 'directed', 'disabled',
813
+ 'disappointed', 'discovered', 'discussed', 'dismissed', 'displayed',
814
+ 'distributed', 'divided', 'documented', 'dominated', 'done', 'doubled',
815
+ 'downloaded', 'drafted', 'drawn', 'dressed', 'driven', 'dropped',
816
+ 'earned', 'edited', 'educated', 'elected', 'eliminated', 'embedded',
817
+ 'emerged', 'employed', 'enabled', 'encountered', 'encouraged', 'ended',
818
+ 'engaged', 'enhanced', 'enjoyed', 'entered', 'equipped', 'escaped',
819
+ 'established', 'estimated', 'evaluated', 'examined', 'exceeded',
820
+ 'exchanged', 'excluded', 'executed', 'exercised', 'exhausted', 'expanded',
821
+ 'expected', 'experienced', 'explained', 'exposed', 'expressed', 'extended',
822
+ 'extracted',
823
+ 'faced', 'failed', 'fallen', 'featured', 'fed', 'felt', 'filed',
824
+ 'filled', 'filtered', 'finalised', 'finalized', 'finished', 'fired',
825
+ 'fixed', 'flagged', 'flipped', 'floated', 'followed', 'forced',
826
+ 'forgotten', 'formed', 'formatted', 'found', 'founded', 'freed', 'frozen',
827
+ 'fulfilled', 'funded', 'furnished',
828
+ 'gained', 'gathered', 'generated', 'given', 'gone', 'grabbed', 'granted',
829
+ 'grown', 'guaranteed', 'guided',
830
+ 'handled', 'happened', 'heard', 'heated', 'held', 'helped', 'hidden',
831
+ 'highlighted', 'hired', 'hosted', 'hurt',
832
+ 'identified', 'ignored', 'illustrated', 'imagined', 'implemented',
833
+ 'implied', 'imported', 'imposed', 'improved', 'included', 'incorporated',
834
+ 'increased', 'indicated', 'influenced', 'informed', 'inherited',
835
+ 'initiated', 'injured', 'inserted', 'inspired', 'installed', 'integrated',
836
+ 'intended', 'interested', 'interpreted', 'introduced', 'invaded',
837
+ 'invested', 'investigated', 'invited', 'involved', 'isolated', 'issued',
838
+ 'joined', 'judged', 'jumped', 'justified',
839
+ 'kept', 'kicked', 'killed', 'knocked', 'known',
840
+ 'labelled', 'lacked', 'laid', 'landed', 'lasted', 'launched', 'learned',
841
+ 'learnt', 'left', 'lifted', 'liked', 'lined', 'linked',
842
+ 'listed', 'listened', 'lived', 'loaded', 'located', 'locked', 'logged',
843
+ 'looked', 'lost', 'loved', 'lowered',
844
+ 'made', 'maintained', 'managed', 'manufactured', 'mapped', 'marked',
845
+ 'matched', 'meant', 'measured', 'mentioned', 'merged', 'met', 'migrated',
846
+ 'minded', 'missed', 'mixed', 'modified', 'monitored', 'motivated',
847
+ 'mounted', 'moved', 'multiplied',
848
+ 'named', 'needed', 'negotiated', 'nested', 'nominated', 'normalised',
849
+ 'noted', 'noticed',
850
+ 'observed', 'obtained', 'occupied', 'occurred', 'offered', 'opened',
851
+ 'operated', 'opposed', 'ordered', 'organised', 'organized', 'oriented',
852
+ 'outlined', 'overcome', 'overlooked', 'owned',
853
+ 'packed', 'paid', 'paired', 'parsed', 'passed', 'patched', 'performed',
854
+ 'permitted', 'picked', 'pinned', 'placed', 'planned', 'planted', 'played',
855
+ 'pleased', 'pointed', 'polished', 'positioned', 'posted', 'poured',
856
+ 'powered', 'practised', 'preferred', 'prepared', 'presented', 'preserved',
857
+ 'pressed', 'prevented', 'priced', 'printed', 'prioritised', 'processed',
858
+ 'produced', 'programmed', 'promised', 'promoted', 'prompted', 'proposed',
859
+ 'protected', 'proved', 'proven', 'provided', 'published', 'pulled',
860
+ 'purchased', 'pushed', 'put',
861
+ 'qualified', 'queried', 'questioned', 'quoted',
862
+ 'raised', 'ran', 'ranked', 'rated', 'reached', 'read', 'realised',
863
+ 'realized', 'received', 'recognised', 'recognized', 'recommended',
864
+ 'recorded', 'recovered', 'reduced', 'referred', 'reflected', 'reformed',
865
+ 'refused', 'regarded', 'registered', 'regulated', 'rejected', 'related',
866
+ 'released', 'relied', 'remained', 'remembered', 'reminded', 'removed',
867
+ 'renamed', 'renewed', 'repaired', 'repeated', 'replaced', 'replied',
868
+ 'reported', 'represented', 'requested', 'required', 'rescued', 'reserved',
869
+ 'resigned', 'resolved', 'responded', 'restored', 'restricted', 'resulted',
870
+ 'retained', 'retired', 'retrieved', 'returned', 'revealed', 'reversed',
871
+ 'reviewed', 'revised', 'rewarded', 'rolled', 'rotated', 'rounded', 'ruled',
872
+ 'rushed',
873
+ 'satisfied', 'saved', 'scaled', 'scanned', 'scattered', 'scheduled',
874
+ 'scored', 'searched', 'secured', 'selected', 'sent', 'separated', 'served',
875
+ 'settled', 'shaped', 'shared', 'shifted', 'shipped', 'shocked', 'shown',
876
+ 'shut', 'signed', 'simplified', 'situated', 'skipped', 'slipped', 'sold',
877
+ 'solved', 'sorted', 'sought', 'sourced', 'spent', 'split', 'spoken',
878
+ 'sponsored', 'spotted', 'spread', 'staged', 'started', 'stated',
879
+ 'stayed', 'stolen', 'stopped', 'stored', 'strengthened', 'stretched',
880
+ 'struck', 'structured', 'studied', 'submitted', 'succeeded', 'suffered',
881
+ 'suggested', 'suited', 'summarised', 'supplied', 'supported', 'supposed',
882
+ 'surprised', 'surrounded', 'survived', 'suspected', 'suspended',
883
+ 'sustained', 'switched',
884
+ 'taken', 'talked', 'targeted', 'taught', 'tested', 'thanked', 'thought',
885
+ 'threatened', 'thrown', 'tied', 'titled', 'told', 'topped', 'torn',
886
+ 'touched', 'traced', 'tracked', 'traded', 'trained', 'transferred',
887
+ 'transformed', 'translated', 'transmitted', 'transported', 'trapped',
888
+ 'travelled', 'treated', 'triggered', 'troubled', 'trusted', 'turned',
889
+ 'typed',
890
+ 'understood', 'undertaken', 'unified', 'united', 'unlocked', 'updated',
891
+ 'upgraded', 'uploaded', 'urged', 'used', 'utilised',
892
+ 'validated', 'valued', 'varied', 'verified', 'viewed', 'visited', 'voted',
893
+ 'waited', 'walked', 'wanted', 'warned', 'washed', 'watched', 'welcomed',
894
+ 'withdrawn', 'witnessed', 'won', 'wondered', 'worked', 'worried',
895
+ 'wrapped', 'written',
896
+ // --- Common nouns (non-entity, capitalized at sentence starts) ---
897
+ 'absence', 'access', 'account', 'accuracy', 'achievement', 'acquisition',
898
+ 'act', 'action', 'activity', 'addition', 'address', 'administration',
899
+ 'admission', 'adoption', 'adult', 'advance', 'advantage', 'advice',
900
+ 'affair', 'afternoon', 'age', 'agency', 'agenda', 'agreement', 'aid',
901
+ 'aim', 'air', 'alarm', 'alternative', 'ambition', 'amendment', 'amount',
902
+ 'analysis', 'anger', 'angle', 'announcement', 'anxiety', 'appeal',
903
+ 'appearance', 'application', 'appointment', 'approach', 'approval',
904
+ 'argument', 'arrangement', 'arrival', 'aspect', 'assembly', 'assessment',
905
+ 'asset', 'assignment', 'assistance', 'association', 'assumption',
906
+ 'atmosphere', 'attachment', 'attack', 'attempt', 'attendance', 'attention',
907
+ 'attitude', 'audience', 'authority', 'average', 'awareness',
908
+ 'background', 'balance', 'band', 'barrier', 'base', 'basis', 'battle',
909
+ 'beauty', 'bedroom', 'beginning', 'behaviour', 'belief', 'benefit',
910
+ 'birth', 'blade', 'blame', 'blast', 'block', 'blow', 'boat', 'bond',
911
+ 'bone', 'bonus', 'border', 'boss', 'boundary', 'brain', 'brand', 'breath',
912
+ 'brick', 'broadcast', 'brother', 'browser', 'budget', 'bug', 'bulk',
913
+ 'burden', 'buyer',
914
+ 'cabinet', 'cable', 'calculation', 'campaign', 'candidate', 'capability',
915
+ 'captain', 'career', 'cargo', 'carpet', 'carrier', 'cash', 'cast',
916
+ 'catalogue', 'category', 'cause', 'ceiling', 'celebration', 'chain',
917
+ 'chair', 'chairman', 'champion', 'channel', 'chapter', 'charity', 'chart',
918
+ 'check', 'chest', 'child', 'chip', 'chunk', 'circuit', 'citizen', 'city',
919
+ 'civilian', 'claim', 'clarity', 'clash', 'clause', 'client', 'climate',
920
+ 'clock', 'closure', 'cloth', 'cloud', 'cluster', 'coach', 'coalition',
921
+ 'coast', 'collaboration', 'collapse', 'colleague',
922
+ 'colony', 'column', 'combination', 'comfort', 'command', 'commander',
923
+ 'comment', 'commerce', 'commission', 'commitment', 'committee',
924
+ 'companion', 'complaint', 'complexity', 'component', 'composition',
925
+ 'compromise', 'concentration', 'concept', 'conclusion', 'confidence',
926
+ 'configuration', 'confirmation', 'conflict', 'confusion', 'conjunction',
927
+ 'consequence', 'conservation', 'consideration', 'constraint', 'consultant',
928
+ 'consultation', 'consumer', 'consumption', 'contact', 'container',
929
+ 'contempt', 'continent', 'continuation', 'controversy', 'convention',
930
+ 'conversation', 'conviction', 'cooperation', 'coordination', 'core',
931
+ 'correction', 'correlation', 'correspondent', 'corridor', 'corruption',
932
+ 'counter', 'countryside', 'coverage', 'crash', 'creature',
933
+ 'crew', 'crime', 'crisis', 'criterion', 'criticism', 'crop', 'crowd',
934
+ 'crown', 'currency', 'curriculum', 'curve', 'customer', 'cycle',
935
+ // --- Common adverbs (capitalized at sentence starts) ---
936
+ 'absolutely', 'accordingly', 'accurately', 'actively', 'actually',
937
+ 'additionally', 'admittedly', 'allegedly', 'alternatively', 'altogether',
938
+ 'amazingly', 'apparently', 'arguably', 'automatically',
939
+ 'barely', 'basically', 'briefly', 'broadly',
940
+ 'carefully', 'casually', 'cautiously', 'certainly',
941
+ 'clearly', 'closely', 'collectively', 'commonly',
942
+ 'comparatively', 'completely', 'consequently', 'considerably',
943
+ 'consistently', 'constantly', 'continuously', 'conversely', 'correctly',
944
+ 'critically', 'crucially', 'curiously', 'currently',
945
+ 'definitely', 'deliberately', 'desperately', 'directly', 'distinctly',
946
+ 'dramatically',
947
+ 'easily', 'effectively', 'efficiently', 'elegantly', 'elsewhere',
948
+ 'emotionally', 'enormously', 'entirely', 'equally',
949
+ 'especially', 'essentially', 'eventually', 'evidently', 'exactly',
950
+ 'exclusively', 'explicitly', 'extensively', 'externally', 'extremely',
951
+ 'fairly', 'famously', 'finally', 'firmly', 'firstly', 'formally',
952
+ 'formerly', 'fortunately', 'frankly', 'freely', 'frequently',
953
+ 'fundamentally',
954
+ 'generally', 'gently', 'genuinely', 'gradually', 'greatly',
955
+ 'happily', 'hardly', 'heavily', 'hence', 'highly', 'honestly',
956
+ 'hopefully', 'hugely',
957
+ 'ideally', 'immediately', 'immensely',
958
+ 'importantly', 'impressively', 'incidentally',
959
+ 'increasingly', 'incredibly', 'independently', 'indirectly',
960
+ 'individually', 'inevitably', 'informally', 'inherently', 'initially',
961
+ 'intensely', 'intentionally', 'interestingly', 'internally', 'ironically',
962
+ 'jointly',
963
+ 'kindly',
964
+ 'largely', 'lastly', 'lately', 'legally', 'legitimately', 'literally',
965
+ 'locally', 'logically', 'loosely',
966
+ 'mainly', 'manually', 'marginally', 'meanwhile',
967
+ 'merely', 'mildly', 'minimally', 'moderately', 'morally',
968
+ 'moreover', 'mostly', 'mutually',
969
+ 'namely', 'naturally', 'neatly', 'necessarily', 'negatively',
970
+ 'nevertheless', 'newly', 'nicely', 'nominally',
971
+ 'nonetheless', 'normally', 'notably', 'noticeably',
972
+ 'objectively', 'obviously', 'occasionally', 'oddly',
973
+ 'officially', 'openly', 'optimally', 'ordinarily',
974
+ 'originally', 'otherwise', 'overall', 'overwhelmingly',
975
+ 'partially', 'particularly', 'partly', 'passively',
976
+ 'patiently', 'perfectly', 'periodically', 'permanently', 'personally',
977
+ 'physically', 'plainly', 'politely', 'politically',
978
+ 'poorly', 'positively', 'possibly', 'potentially',
979
+ 'practically', 'precisely', 'predominantly', 'preferably', 'presently',
980
+ 'presumably', 'pretty', 'previously', 'primarily', 'principally',
981
+ 'privately', 'probably', 'professionally', 'profoundly',
982
+ 'progressively', 'prominently', 'promptly', 'properly', 'proportionally',
983
+ 'publicly', 'purely',
984
+ 'quickly', 'quietly', 'quite',
985
+ 'radically', 'randomly', 'rapidly', 'rarely', 'rationally', 'readily',
986
+ 'realistically', 'really', 'reasonably', 'recently', 'regardless',
987
+ 'regularly', 'relatively', 'reliably', 'reluctantly',
988
+ 'remarkably', 'remotely', 'repeatedly', 'reportedly', 'respectively',
989
+ 'responsibly', 'roughly',
990
+ 'sadly', 'safely', 'scarcely', 'secondly', 'secretly', 'seemingly',
991
+ 'selectively', 'separately', 'seriously', 'severely', 'sharply',
992
+ 'shortly', 'significantly', 'silently', 'similarly', 'simply',
993
+ 'simultaneously', 'sincerely', 'slightly', 'slowly', 'smoothly',
994
+ 'socially', 'solely', 'somehow', 'sometimes', 'somewhat', 'soon',
995
+ 'specifically', 'spontaneously', 'steadily', 'steeply',
996
+ 'still', 'strategically', 'strictly', 'strikingly', 'strongly',
997
+ 'structurally', 'subsequently', 'substantially', 'subtly', 'successfully',
998
+ 'suddenly', 'sufficiently', 'supposedly', 'surely', 'surprisingly',
999
+ 'swiftly', 'systematically',
1000
+ 'technically', 'temporarily', 'terribly', 'thankfully',
1001
+ 'thoroughly', 'tightly', 'together', 'traditionally', 'tremendously',
1002
+ 'truly', 'typically',
1003
+ 'ultimately', 'undoubtedly',
1004
+ 'unexpectedly', 'unfortunately', 'uniformly', 'universally',
1005
+ 'unnecessarily', 'unusually', 'urgently',
1006
+ 'usefully', 'usually', 'utterly',
1007
+ 'vastly', 'virtually', 'visually',
1008
+ 'warmly', 'weakly', 'widely', 'wildly', 'willingly', 'wisely',
500
1009
  ]);
501
1010
  /**
502
1011
  * Words that commonly start sentences but should not start a proper noun entity.
503
1012
  * These are checked separately because they might appear capitalized at sentence start.
504
1013
  */
505
1014
  const SENTENCE_STARTER_WORDS = new Set([
506
- 'visit', 'also', 'see', 'please', 'note', 'check', 'read', 'look', 'find',
1015
+ // Imperative verbs
1016
+ 'visit', 'see', 'please', 'note', 'check', 'read', 'look', 'find',
507
1017
  'get', 'set', 'add', 'use', 'try', 'make', 'take', 'give', 'keep', 'let',
508
1018
  'call', 'run', 'ask', 'tell', 'show', 'help', 'need', 'want', 'like',
509
- 'think', 'know', 'feel', 'seem', 'look', 'hear', 'watch', 'wait', 'work',
1019
+ 'think', 'know', 'feel', 'seem', 'hear', 'watch', 'wait', 'work',
510
1020
  'start', 'stop', 'open', 'close', 'move', 'turn', 'bring', 'send', 'leave',
511
1021
  'meet', 'join', 'follow', 'include', 'consider', 'remember', 'forget',
512
- // Additional common verbs/imperative starters
513
1022
  'target', 'create', 'build', 'write', 'avoid', 'provide', 'maintain',
514
1023
  'define', 'ensure', 'place', 'focus', 'track', 'enable', 'apply', 'test',
515
1024
  'handle', 'load', 'link', 'pass', 'save', 'lead', 'frame', 'point',
516
- // Pronouns, possessives, determiners — capitalized at sentence start but not proper nouns
1025
+ // Greetings / interjections
1026
+ 'hello', 'hi', 'hey', 'thanks', 'thank', 'sorry',
1027
+ // Titles
1028
+ 'mr', 'mrs', 'ms', 'dr', 'sir',
1029
+ // Pronouns, possessives, determiners
517
1030
  'my', 'your', 'his', 'her', 'its', 'our', 'their',
518
1031
  'some', 'any', 'every', 'each', 'both', 'few', 'many', 'most',
519
- // Common adjectives that precede proper nouns at sentence start
1032
+ // Common starters (conjunctions, adverbs, auxiliaries)
1033
+ 'so', 'no', 'yes', 'not', 'never', 'always', 'also', 'just', 'only', 'already',
1034
+ 'here', 'there', 'then', 'now', 'when', 'how', 'even', 'still',
1035
+ 'go', 'went', 'gone', 'going',
1036
+ 'had', 'have', 'has', 'having',
1037
+ 'been', 'being', 'was', 'were',
1038
+ 'got', 'getting', 'put', 'putting',
1039
+ 'said', 'told', 'asked', 'called',
1040
+ 'do', 'did', 'does', 'done',
1041
+ // Common adjectives at sentence start
520
1042
  'poor', 'old', 'new', 'big', 'little', 'great', 'good', 'bad',
521
- 'first', 'last', 'next', 'other', 'more', 'just', 'very',
522
- // Additional adjectives/adverbs that appear capitalized at sentence starts
523
- 'still', 'clear', 'fixed', 'based', 'using', 'real', 'even',
1043
+ 'first', 'last', 'next', 'other', 'more', 'very',
1044
+ 'clear', 'fixed', 'based', 'using', 'real',
524
1045
  'safe', 'local', 'native', 'early', 'similar', 'simple', 'basic', 'related',
1046
+ 'skip', 'don', 'won',
525
1047
  ]);
526
1048
  /**
527
1049
  * Detect implicit entities in content using pattern matching
@@ -573,7 +1095,7 @@ export function detectImplicitEntities(content, config = {}) {
573
1095
  // Pattern 1: Multi-word proper nouns
574
1096
  // Matches "Marcus Johnson", "Project Alpha", "San Francisco Bay Area"
575
1097
  if (implicitPatterns.includes('proper-nouns')) {
576
- const properNounRegex = /\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b/g;
1098
+ const properNounRegex = /\b([A-Z][a-z]+(?:[^\S\n]+[A-Z][a-z]+)+)\b/g;
577
1099
  let match;
578
1100
  while ((match = properNounRegex.exec(content)) !== null) {
579
1101
  let text = match[1];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@velvetmonkey/vault-core",
3
- "version": "2.0.125",
3
+ "version": "2.0.127",
4
4
  "description": "Shared vault utilities for Flywheel ecosystem (entity scanning, wikilinks, protected zones)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",