@velvetmonkey/vault-core 2.0.125 → 2.0.126
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/wikilinks.js +544 -22
- package/package.json +1 -1
package/dist/wikilinks.js
CHANGED
|
@@ -27,7 +27,8 @@ function getSearchTerms(entity) {
|
|
|
27
27
|
return terms;
|
|
28
28
|
}
|
|
29
29
|
/**
|
|
30
|
-
* Common words to exclude from wikilink
|
|
30
|
+
* Common words to exclude from wikilink matching.
|
|
31
|
+
* These words are never wikified even when they match entity names or aliases.
|
|
31
32
|
*/
|
|
32
33
|
const EXCLUDE_WORDS = new Set([
|
|
33
34
|
// Day names
|
|
@@ -41,15 +42,191 @@ const EXCLUDE_WORDS = new Set([
|
|
|
41
42
|
'month end', 'month start', 'year end', 'year start',
|
|
42
43
|
'quarter end', 'quarter start', 'quarterly review',
|
|
43
44
|
'weekly review', 'monthly review', 'annual review',
|
|
44
|
-
// Stop words
|
|
45
|
-
'the', 'and', 'for', 'with', 'from', 'this', 'that',
|
|
46
45
|
'christmas', 'holiday', 'break',
|
|
47
|
-
//
|
|
48
|
-
'
|
|
49
|
-
'
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
'
|
|
46
|
+
// --- Two-char common words (pronouns, prepositions, conjunctions) ---
|
|
47
|
+
'me', 'us', 'we', 'he', 'it', 'am', 'is', 'be', 'do', 'go',
|
|
48
|
+
'no', 'so', 'up', 'if', 'or', 'as', 'at', 'by', 'on', 'in', 'to',
|
|
49
|
+
'of', 'an', 'my', 'oh', 'ok',
|
|
50
|
+
// --- Pronouns (personal, possessive, reflexive, relative, demonstrative) ---
|
|
51
|
+
'she', 'her', 'him', 'his', 'they', 'them', 'their', 'its', 'our', 'ours',
|
|
52
|
+
'who', 'whom', 'whose', 'what', 'which', 'mine', 'yours', 'hers', 'theirs',
|
|
53
|
+
'myself', 'yourself', 'himself', 'herself', 'itself', 'ourselves', 'themselves',
|
|
54
|
+
// --- Stop words & determiners ---
|
|
55
|
+
'the', 'and', 'for', 'with', 'from', 'this', 'that', 'these', 'those',
|
|
56
|
+
'some', 'any', 'each', 'both', 'few', 'many', 'most', 'such',
|
|
57
|
+
// --- Prepositions ---
|
|
58
|
+
'about', 'above', 'across', 'after', 'against', 'along', 'among', 'around',
|
|
59
|
+
'before', 'behind', 'below', 'beneath', 'beside', 'between', 'beyond',
|
|
60
|
+
'despite', 'down', 'during', 'except', 'inside', 'into', 'near',
|
|
61
|
+
'off', 'onto', 'outside', 'over', 'past', 'since', 'through',
|
|
62
|
+
'toward', 'towards', 'under', 'underneath', 'until', 'upon', 'within', 'without',
|
|
63
|
+
// --- Conjunctions ---
|
|
64
|
+
'although', 'because', 'however', 'therefore', 'moreover', 'furthermore',
|
|
65
|
+
'nevertheless', 'otherwise', 'meanwhile', 'whereas', 'whenever', 'wherever',
|
|
66
|
+
'whether', 'while', 'unless', 'though', 'hence',
|
|
67
|
+
// --- Common adverbs ---
|
|
68
|
+
'again', 'already', 'always', 'almost', 'also', 'away',
|
|
69
|
+
'back', 'certainly', 'clearly', 'completely', 'currently',
|
|
70
|
+
'directly', 'effectively', 'enough', 'especially', 'essentially',
|
|
71
|
+
'eventually', 'ever', 'exactly', 'fairly', 'finally', 'frequently',
|
|
72
|
+
'fully', 'generally', 'gradually', 'greatly', 'hardly', 'here',
|
|
73
|
+
'highly', 'immediately', 'indeed', 'instead',
|
|
74
|
+
'just', 'largely', 'later', 'likely', 'mainly', 'maybe',
|
|
75
|
+
'merely', 'mostly', 'naturally', 'nearly', 'necessarily',
|
|
76
|
+
'never', 'normally', 'now', 'obviously', 'occasionally', 'often',
|
|
77
|
+
'only', 'originally', 'particularly', 'perhaps',
|
|
78
|
+
'personally', 'possibly', 'potentially', 'practically', 'precisely',
|
|
79
|
+
'presumably', 'previously', 'primarily', 'probably', 'properly',
|
|
80
|
+
'quickly', 'quite', 'rarely', 'rather', 'readily', 'really',
|
|
81
|
+
'recently', 'relatively', 'roughly', 'significantly', 'simply',
|
|
82
|
+
'slightly', 'slowly', 'sometimes', 'somewhat', 'soon', 'specifically',
|
|
83
|
+
'still', 'strongly', 'subsequently', 'successfully', 'suddenly',
|
|
84
|
+
'surely', 'then', 'there', 'thoroughly', 'together',
|
|
85
|
+
'too', 'truly', 'typically', 'ultimately', 'unfortunately', 'usually',
|
|
86
|
+
'very', 'well', 'widely', 'yet',
|
|
87
|
+
// --- Common adjectives ---
|
|
88
|
+
'able', 'actual', 'additional', 'alternative', 'appropriate', 'available',
|
|
89
|
+
'basic', 'broad', 'certain', 'clear', 'close', 'common', 'complete',
|
|
90
|
+
'comprehensive', 'considerable', 'consistent', 'correct', 'critical',
|
|
91
|
+
'current', 'deep', 'different', 'difficult', 'direct', 'due', 'early',
|
|
92
|
+
'effective', 'entire', 'essential', 'exact', 'excellent', 'existing',
|
|
93
|
+
'extensive', 'extra', 'fair', 'familiar', 'final', 'fine', 'first',
|
|
94
|
+
'fixed', 'flat', 'formal', 'former', 'free', 'fresh', 'full', 'further',
|
|
95
|
+
'future', 'general', 'given', 'global', 'good', 'great', 'hard', 'heavy',
|
|
96
|
+
'high', 'huge', 'ideal', 'important', 'independent', 'individual',
|
|
97
|
+
'initial', 'internal', 'key', 'large', 'last', 'late', 'latest', 'least',
|
|
98
|
+
'less', 'light', 'limited', 'little', 'local', 'long', 'loose', 'low',
|
|
99
|
+
'main', 'major', 'massive', 'minor', 'missing', 'modern', 'much',
|
|
100
|
+
'narrow', 'native', 'natural', 'necessary', 'negative', 'new', 'nice',
|
|
101
|
+
'normal', 'obvious', 'old', 'only', 'open', 'original', 'overall', 'own',
|
|
102
|
+
'particular', 'perfect', 'personal', 'plain', 'poor', 'popular',
|
|
103
|
+
'positive', 'possible', 'potential', 'powerful', 'practical', 'present',
|
|
104
|
+
'previous', 'primary', 'prime', 'private', 'proper', 'public', 'pure',
|
|
105
|
+
'quick', 'quiet', 'random', 'rapid', 'rare', 'raw', 'ready', 'real',
|
|
106
|
+
'reasonable', 'recent', 'regular', 'related', 'relevant', 'remote',
|
|
107
|
+
'required', 'responsible', 'rich', 'right', 'rough', 'round', 'safe',
|
|
108
|
+
'secure', 'separate', 'serious', 'sharp', 'short', 'significant',
|
|
109
|
+
'silent', 'similar', 'simple', 'single', 'slight', 'slow', 'small',
|
|
110
|
+
'smart', 'smooth', 'soft', 'solid', 'special', 'specific', 'stable',
|
|
111
|
+
'standard', 'steep', 'straight', 'strict', 'strong', 'sudden',
|
|
112
|
+
'sufficient', 'suitable', 'sure', 'sweet', 'tall', 'thick', 'thin',
|
|
113
|
+
'tight', 'tiny', 'total', 'tough', 'true', 'typical', 'unique',
|
|
114
|
+
'unusual', 'useful', 'usual', 'valid', 'valuable', 'various', 'vast',
|
|
115
|
+
'warm', 'weak', 'whole', 'wide', 'wild', 'worth', 'wrong',
|
|
116
|
+
// --- Common verbs ---
|
|
117
|
+
'accept', 'achieve', 'add', 'admit', 'agree', 'allow', 'announce', 'appear',
|
|
118
|
+
'apply', 'approach', 'argue', 'arrange', 'arrive', 'assume', 'attempt', 'avoid',
|
|
119
|
+
'begin', 'believe', 'belong', 'break', 'bring', 'build', 'burn', 'buy',
|
|
120
|
+
'call', 'carry', 'catch', 'cause', 'change', 'charge', 'check', 'choose',
|
|
121
|
+
'claim', 'clean', 'climb', 'close', 'collect', 'come', 'commit',
|
|
122
|
+
'compare', 'complain', 'confirm', 'connect', 'consider', 'contain',
|
|
123
|
+
'continue', 'contribute', 'control', 'convert', 'cook', 'copy', 'correct',
|
|
124
|
+
'cost', 'count', 'cover', 'create', 'cross', 'cry', 'cut',
|
|
125
|
+
'deal', 'decide', 'declare', 'define', 'deliver', 'demand', 'deny', 'depend',
|
|
126
|
+
'describe', 'design', 'destroy', 'determine', 'develop', 'die', 'discover',
|
|
127
|
+
'discuss', 'divide', 'double', 'doubt', 'draw', 'dress', 'drink', 'drive',
|
|
128
|
+
'drop', 'earn', 'eat', 'enable', 'encourage', 'enjoy', 'ensure', 'enter',
|
|
129
|
+
'establish', 'examine', 'exist', 'expand', 'expect', 'experience',
|
|
130
|
+
'explain', 'express', 'extend', 'face', 'fail', 'fall', 'feed', 'feel',
|
|
131
|
+
'fight', 'fill', 'find', 'finish', 'fit', 'fix', 'fly', 'focus', 'force',
|
|
132
|
+
'forget', 'form', 'gain', 'gather', 'generate', 'get', 'give', 'go', 'grab',
|
|
133
|
+
'grant', 'grow', 'guess', 'handle', 'happen', 'hate', 'head', 'hear',
|
|
134
|
+
'help', 'hide', 'hit', 'hold', 'hope', 'hurt', 'identify', 'ignore',
|
|
135
|
+
'imagine', 'improve', 'include', 'increase', 'indicate', 'influence',
|
|
136
|
+
'inform', 'insist', 'install', 'intend', 'introduce', 'invest', 'invite',
|
|
137
|
+
'involve', 'issue', 'join', 'judge', 'jump', 'justify', 'keep', 'kick',
|
|
138
|
+
'kill', 'knock', 'land', 'last', 'laugh', 'launch', 'lay', 'lead', 'learn',
|
|
139
|
+
'leave', 'lend', 'let', 'lie', 'lift', 'limit', 'link', 'listen', 'live',
|
|
140
|
+
'look', 'lose', 'love', 'maintain', 'make', 'manage', 'mark', 'match',
|
|
141
|
+
'matter', 'mean', 'measure', 'meet', 'mention', 'mind', 'miss', 'mix',
|
|
142
|
+
'monitor', 'move', 'need', 'note', 'notice', 'obtain', 'occur', 'offer',
|
|
143
|
+
'open', 'operate', 'order', 'organise', 'organize', 'own',
|
|
144
|
+
'pass', 'pay', 'perform', 'permit', 'pick', 'place', 'plan', 'plant',
|
|
145
|
+
'play', 'point', 'pour', 'practice', 'prefer', 'prepare', 'present',
|
|
146
|
+
'press', 'prevent', 'produce', 'promise', 'promote', 'propose', 'protect',
|
|
147
|
+
'prove', 'provide', 'publish', 'pull', 'push', 'put', 'raise', 'reach',
|
|
148
|
+
'read', 'realize', 'receive', 'recognize', 'recommend', 'record',
|
|
149
|
+
'reduce', 'reflect', 'refuse', 'regard', 'reject', 'relate', 'release',
|
|
150
|
+
'rely', 'remain', 'remember', 'remove', 'repeat', 'replace', 'report',
|
|
151
|
+
'represent', 'request', 'require', 'respond', 'rest', 'restore', 'result',
|
|
152
|
+
'retain', 'retire', 'return', 'reveal', 'review', 'ring', 'rise', 'risk',
|
|
153
|
+
'roll', 'run', 'rush', 'save', 'say', 'search', 'seek', 'seem',
|
|
154
|
+
'select', 'sell', 'send', 'serve', 'set', 'settle', 'shake', 'shape',
|
|
155
|
+
'share', 'shift', 'shoot', 'shut', 'sign', 'sing', 'sit', 'skip', 'sleep',
|
|
156
|
+
'slip', 'smile', 'solve', 'sort', 'sound', 'speak', 'spend', 'split',
|
|
157
|
+
'spread', 'stand', 'start', 'state', 'stay', 'steal', 'step', 'stick',
|
|
158
|
+
'stop', 'store', 'strike', 'struggle', 'study', 'submit', 'succeed',
|
|
159
|
+
'suffer', 'suggest', 'suit', 'supply', 'support', 'suppose', 'survive',
|
|
160
|
+
'suspect', 'switch', 'take', 'talk', 'target', 'teach', 'tear', 'tell',
|
|
161
|
+
'tend', 'test', 'thank', 'think', 'throw', 'touch', 'track', 'trade',
|
|
162
|
+
'train', 'travel', 'treat', 'trust', 'try', 'turn', 'understand', 'use',
|
|
163
|
+
'visit', 'vote', 'wait', 'wake', 'walk', 'want', 'warn', 'wash', 'watch',
|
|
164
|
+
'wear', 'weigh', 'win', 'wish', 'wonder', 'work', 'worry', 'wrap', 'write',
|
|
165
|
+
// --- Common nouns (generic, not entity-like) ---
|
|
166
|
+
'access', 'account', 'act', 'action', 'activity', 'addition', 'address',
|
|
167
|
+
'age', 'air', 'amount', 'analysis', 'answer', 'area', 'argument', 'arm',
|
|
168
|
+
'article', 'aspect', 'attention', 'authority', 'balance', 'base', 'basis',
|
|
169
|
+
'bed', 'benefit', 'bit', 'blood', 'board', 'body', 'book', 'bottom',
|
|
170
|
+
'box', 'business', 'capacity', 'capital', 'card', 'care', 'case',
|
|
171
|
+
'centre', 'challenge', 'chance', 'character', 'choice',
|
|
172
|
+
'circle', 'class', 'club', 'code', 'collection', 'colour',
|
|
173
|
+
'comment', 'commission', 'community', 'company', 'comparison', 'competition',
|
|
174
|
+
'concern', 'condition', 'connection', 'content', 'context', 'contract',
|
|
175
|
+
'contribution', 'corner', 'country', 'couple', 'course', 'credit', 'cup',
|
|
176
|
+
'damage', 'danger', 'data', 'date', 'death', 'debate', 'decision',
|
|
177
|
+
'demand', 'department', 'detail', 'development', 'difference', 'direction',
|
|
178
|
+
'discussion', 'disease', 'display', 'distance', 'document', 'door',
|
|
179
|
+
'doubt', 'duty', 'earth', 'edge', 'education', 'effect',
|
|
180
|
+
'effort', 'element', 'end', 'energy', 'engine', 'environment', 'error',
|
|
181
|
+
'event', 'evidence', 'exchange', 'exercise', 'expression',
|
|
182
|
+
'extent', 'eye', 'fact', 'failure', 'family', 'feature',
|
|
183
|
+
'field', 'figure', 'film', 'floor', 'food', 'foot',
|
|
184
|
+
'force', 'foundation', 'front', 'fund', 'game', 'garden', 'gas',
|
|
185
|
+
'glass', 'goal', 'gold', 'grade', 'ground', 'growth', 'guide', 'hair',
|
|
186
|
+
'hall', 'hand', 'heart', 'heat', 'hill', 'history',
|
|
187
|
+
'hole', 'home', 'horse', 'hotel', 'hour', 'house', 'image', 'impact',
|
|
188
|
+
'income', 'index', 'industry', 'information',
|
|
189
|
+
'instance', 'interest', 'investment', 'island', 'item',
|
|
190
|
+
'job', 'kitchen', 'knee', 'knowledge', 'lack', 'language',
|
|
191
|
+
'law', 'league', 'length', 'lesson', 'letter', 'level',
|
|
192
|
+
'library', 'life', 'line', 'list', 'living', 'loss',
|
|
193
|
+
'machine', 'management', 'manner', 'map', 'market', 'mass', 'master',
|
|
194
|
+
'material', 'meeting', 'member', 'memory', 'message', 'metal',
|
|
195
|
+
'method', 'middle', 'minute', 'model', 'moment', 'money',
|
|
196
|
+
'morning', 'mouth', 'movement', 'music', 'name', 'nature',
|
|
197
|
+
'network', 'news', 'night', 'node', 'noise', 'north', 'number',
|
|
198
|
+
'object', 'office', 'officer', 'operation', 'opinion', 'opportunity',
|
|
199
|
+
'option', 'output', 'owner', 'package', 'pair', 'paper',
|
|
200
|
+
'parent', 'part', 'party', 'passage', 'path', 'pattern',
|
|
201
|
+
'performance', 'period', 'person', 'picture', 'player',
|
|
202
|
+
'pleasure', 'pocket', 'position', 'post', 'pound',
|
|
203
|
+
'power', 'pressure', 'price', 'principle', 'problem',
|
|
204
|
+
'procedure', 'process', 'product', 'production', 'programme', 'progress',
|
|
205
|
+
'proof', 'property', 'proposal', 'protection', 'purpose',
|
|
206
|
+
'quality', 'quarter', 'question', 'race', 'range', 'rate', 'reason',
|
|
207
|
+
'reference', 'reform', 'region', 'relation', 'relationship',
|
|
208
|
+
'request', 'research', 'resource', 'response',
|
|
209
|
+
'road', 'role', 'roof', 'room', 'route', 'row', 'rule',
|
|
210
|
+
'safety', 'sale', 'sample', 'scale', 'scene', 'scheme', 'school',
|
|
211
|
+
'science', 'screen', 'season', 'seat', 'section', 'security', 'sense',
|
|
212
|
+
'series', 'service', 'session', 'setting', 'sex',
|
|
213
|
+
'shop', 'shot', 'shoulder', 'show', 'side', 'sight', 'signal',
|
|
214
|
+
'site', 'situation', 'size', 'skin', 'society',
|
|
215
|
+
'software', 'solution', 'song', 'source', 'south',
|
|
216
|
+
'space', 'speech', 'speed', 'spirit', 'sport', 'spring', 'square',
|
|
217
|
+
'staff', 'stage', 'star', 'statement', 'station',
|
|
218
|
+
'status', 'stock', 'stone', 'story', 'strategy',
|
|
219
|
+
'street', 'strength', 'structure', 'student', 'stuff',
|
|
220
|
+
'style', 'subject', 'success', 'summer', 'supply', 'surface',
|
|
221
|
+
'surprise', 'survey', 'system', 'task', 'team', 'technique',
|
|
222
|
+
'technology', 'term', 'text', 'theory', 'thing', 'thought',
|
|
223
|
+
'threat', 'time', 'title', 'tool', 'top', 'tour', 'town',
|
|
224
|
+
'training', 'transfer', 'transport',
|
|
225
|
+
'treatment', 'trial', 'trouble', 'truth', 'type',
|
|
226
|
+
'union', 'unit', 'user', 'valley', 'value', 'variety', 'version',
|
|
227
|
+
'view', 'village', 'voice', 'volume', 'wall', 'war', 'waste', 'water',
|
|
228
|
+
'wave', 'way', 'weather', 'weight', 'west', 'wind', 'window',
|
|
229
|
+
'winter', 'wood', 'word', 'worker', 'world', 'writing',
|
|
53
230
|
]);
|
|
54
231
|
/**
|
|
55
232
|
* Escape special regex characters in a string
|
|
@@ -61,6 +238,9 @@ function escapeRegex(str) {
|
|
|
61
238
|
* Check if an entity should be excluded from wikilikning
|
|
62
239
|
*/
|
|
63
240
|
function shouldExcludeEntity(entity) {
|
|
241
|
+
// Skip single-char terms (e.g. alias "I" for Ben)
|
|
242
|
+
if (entity.length < 2)
|
|
243
|
+
return true;
|
|
64
244
|
if (EXCLUDE_WORDS.has(entity.toLowerCase()))
|
|
65
245
|
return true;
|
|
66
246
|
// Skip lowercase hyphenated descriptors (e.g., self-improving, local-first, Claude-native)
|
|
@@ -473,15 +653,15 @@ const DEFAULT_IMPLICIT_CONFIG = {
|
|
|
473
653
|
* Common words that should not be detected as implicit entities
|
|
474
654
|
*/
|
|
475
655
|
export const IMPLICIT_EXCLUDE_WORDS = new Set([
|
|
476
|
-
// Days and months
|
|
656
|
+
// Days and months
|
|
477
657
|
'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday',
|
|
478
658
|
'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august',
|
|
479
659
|
'september', 'october', 'november', 'december',
|
|
480
|
-
// Common sentence starters
|
|
660
|
+
// Common sentence starters / determiners
|
|
481
661
|
'this', 'that', 'these', 'those', 'there', 'here', 'when', 'where', 'what',
|
|
482
662
|
'which', 'while', 'since', 'after', 'before', 'during', 'until', 'because',
|
|
483
663
|
'however', 'therefore', 'although', 'though', 'unless', 'whether',
|
|
484
|
-
//
|
|
664
|
+
// Document/structure words
|
|
485
665
|
'note', 'notes', 'example', 'chapter', 'section', 'part', 'item', 'figure',
|
|
486
666
|
'table', 'list', 'step', 'task', 'todo', 'idea', 'thought', 'question',
|
|
487
667
|
'answer', 'summary', 'overview', 'introduction', 'conclusion',
|
|
@@ -494,34 +674,376 @@ export const IMPLICIT_EXCLUDE_WORDS = new Set([
|
|
|
494
674
|
'the', 'and', 'but', 'for', 'not', 'you', 'all', 'can', 'had', 'her',
|
|
495
675
|
'was', 'one', 'our', 'out', 'are', 'has', 'his', 'how', 'its', 'may',
|
|
496
676
|
'new', 'now', 'old', 'see', 'way', 'who', 'did', 'got', 'let', 'say',
|
|
497
|
-
// Common abbreviations
|
|
677
|
+
// Common abbreviations
|
|
498
678
|
'etc', 'aka', 'btw', 'fyi', 'imo', 'tldr', 'asap', 'rsvp',
|
|
499
679
|
'url', 'html', 'css', 'http', 'https', 'json', 'xml', 'sql', 'ssh', 'tcp', 'udp', 'dns',
|
|
680
|
+
// --- Common adjectives (capitalized at sentence starts) ---
|
|
681
|
+
'able', 'absolute', 'acceptable', 'accessible', 'accurate', 'actual',
|
|
682
|
+
'additional', 'adequate', 'advanced', 'aggressive', 'alive', 'alternative',
|
|
683
|
+
'amazing', 'ancient', 'angry', 'annual', 'apparent', 'applicable',
|
|
684
|
+
'appropriate', 'approximate', 'arbitrary', 'automatic', 'available',
|
|
685
|
+
'aware', 'awful', 'awkward',
|
|
686
|
+
'bad', 'bare', 'beautiful', 'beneficial', 'best', 'better', 'big',
|
|
687
|
+
'bitter', 'blank', 'blind', 'bold', 'boring', 'brave', 'brief',
|
|
688
|
+
'bright', 'brilliant', 'broad', 'broken', 'busy',
|
|
689
|
+
'calm', 'capable', 'careful', 'casual', 'central', 'certain', 'cheap',
|
|
690
|
+
'clean', 'clear', 'clever', 'close', 'cold', 'comfortable', 'common',
|
|
691
|
+
'comparable', 'compatible', 'competitive', 'complete', 'complex',
|
|
692
|
+
'comprehensive', 'concerned', 'concrete', 'confident', 'confused',
|
|
693
|
+
'conscious', 'conservative', 'considerable', 'consistent', 'constant',
|
|
694
|
+
'content', 'continuous', 'convenient', 'conventional', 'cool', 'correct',
|
|
695
|
+
'corresponding', 'costly', 'crazy', 'creative', 'critical', 'crucial',
|
|
696
|
+
'curious', 'current', 'custom',
|
|
697
|
+
'dangerous', 'dark', 'dead', 'dear', 'decent', 'deep', 'defensive',
|
|
698
|
+
'definite', 'deliberate', 'delicate', 'dense', 'dependent', 'desperate',
|
|
699
|
+
'detailed', 'different', 'difficult', 'digital', 'direct', 'dirty',
|
|
700
|
+
'distinct', 'double', 'dramatic', 'dry', 'due', 'dull', 'dumb',
|
|
701
|
+
'eager', 'early', 'eastern', 'easy', 'economic', 'educational',
|
|
702
|
+
'effective', 'efficient', 'elaborate', 'elderly', 'electric', 'elegant',
|
|
703
|
+
'emotional', 'empty', 'encouraging', 'endless', 'enormous', 'entire',
|
|
704
|
+
'equal', 'equivalent', 'essential', 'even', 'eventual', 'every',
|
|
705
|
+
'everyday', 'evident', 'evil', 'exact', 'excellent', 'exceptional',
|
|
706
|
+
'excessive', 'exciting', 'exclusive', 'existing', 'exotic', 'expensive',
|
|
707
|
+
'experienced', 'experimental', 'explicit', 'extended', 'extensive',
|
|
708
|
+
'external', 'extra', 'extraordinary', 'extreme',
|
|
709
|
+
'fair', 'faithful', 'familiar', 'famous', 'fancy', 'fantastic', 'far',
|
|
710
|
+
'fascinating', 'fast', 'fat', 'fatal', 'favorable', 'favourite', 'federal',
|
|
711
|
+
'fierce', 'final', 'financial', 'fine', 'firm', 'fit', 'fixed', 'flat',
|
|
712
|
+
'flexible', 'fluid', 'foolish', 'foreign', 'formal', 'former', 'forward',
|
|
713
|
+
'fragile', 'free', 'frequent', 'fresh', 'friendly', 'front', 'frozen',
|
|
714
|
+
'full', 'fun', 'functional', 'fundamental', 'funny', 'further', 'future',
|
|
715
|
+
'general', 'generous', 'gentle', 'genuine', 'giant', 'glad', 'global',
|
|
716
|
+
'golden', 'good', 'gorgeous', 'gradual', 'grand', 'grateful', 'grave',
|
|
717
|
+
'great', 'green', 'grey', 'gross', 'growing', 'guilty',
|
|
718
|
+
'half', 'handsome', 'handy', 'happy', 'hard', 'harmful', 'harsh',
|
|
719
|
+
'healthy', 'heavy', 'helpful', 'hidden', 'high', 'historic', 'honest',
|
|
720
|
+
'horrible', 'hostile', 'hot', 'huge', 'humble', 'hungry',
|
|
721
|
+
'ideal', 'identical', 'immediate', 'immense', 'immune', 'implicit',
|
|
722
|
+
'important', 'impossible', 'impressive', 'inadequate', 'inappropriate',
|
|
723
|
+
'incredible', 'independent', 'indirect', 'individual', 'industrial',
|
|
724
|
+
'inevitable', 'infinite', 'informal', 'inherent', 'initial', 'inner',
|
|
725
|
+
'innocent', 'innovative', 'instant', 'insufficient', 'intelligent',
|
|
726
|
+
'intense', 'intensive', 'interactive', 'interesting', 'interim',
|
|
727
|
+
'intermediate', 'internal', 'international', 'invalid', 'invisible',
|
|
728
|
+
'irrelevant', 'isolated',
|
|
729
|
+
'joint', 'junior', 'just',
|
|
730
|
+
'keen', 'key', 'kind',
|
|
731
|
+
'large', 'last', 'late', 'lateral', 'latest', 'lazy', 'lean', 'least',
|
|
732
|
+
'legitimate', 'lengthy', 'less', 'lesser', 'level', 'liberal', 'light',
|
|
733
|
+
'likely', 'limited', 'linear', 'literal', 'little', 'live', 'lively',
|
|
734
|
+
'local', 'logical', 'lone', 'lonely', 'long', 'loose', 'loud', 'lovely',
|
|
735
|
+
'low', 'loyal', 'lucky',
|
|
736
|
+
'mad', 'magic', 'main', 'major', 'male', 'manual', 'many', 'marginal',
|
|
737
|
+
'massive', 'mature', 'maximum', 'mean', 'meaningful', 'mechanical',
|
|
738
|
+
'medical', 'medium', 'mental', 'mere', 'mild', 'military', 'minimal',
|
|
739
|
+
'minimum', 'minor', 'minute', 'missing', 'mixed', 'mobile', 'moderate',
|
|
740
|
+
'modern', 'modest', 'moral', 'more', 'most', 'multiple', 'mutual',
|
|
741
|
+
'naked', 'narrow', 'nasty', 'native', 'natural', 'neat', 'necessary',
|
|
742
|
+
'negative', 'nervous', 'neutral', 'nice', 'noble', 'nominal', 'normal',
|
|
743
|
+
'notable', 'novel', 'numerous',
|
|
744
|
+
'obvious', 'occasional', 'odd', 'offensive', 'official', 'only', 'open',
|
|
745
|
+
'operational', 'opposite', 'optimal', 'optional', 'ordinary', 'organic',
|
|
746
|
+
'original', 'other', 'outer', 'overall', 'overnight', 'own',
|
|
747
|
+
'painful', 'pale', 'parallel', 'partial', 'particular', 'passive', 'past',
|
|
748
|
+
'patient', 'peaceful', 'peculiar', 'perfect', 'permanent', 'personal',
|
|
749
|
+
'physical', 'plain', 'pleasant', 'plenty', 'plus', 'polite', 'political',
|
|
750
|
+
'poor', 'popular', 'portable', 'positive', 'possible', 'potential',
|
|
751
|
+
'powerful', 'practical', 'precise', 'predictable', 'preliminary',
|
|
752
|
+
'premium', 'prepared', 'present', 'pretty', 'previous',
|
|
753
|
+
'primary', 'prime', 'primitive', 'principal', 'prior', 'private',
|
|
754
|
+
'probable', 'productive', 'professional', 'profitable', 'profound',
|
|
755
|
+
'progressive', 'prominent', 'promising', 'proper', 'proportional',
|
|
756
|
+
'proposed', 'prospective', 'protective', 'proud', 'provisional', 'public',
|
|
757
|
+
'pure',
|
|
758
|
+
'quick', 'quiet',
|
|
759
|
+
'radical', 'random', 'rapid', 'rare', 'rational', 'raw', 'ready', 'real',
|
|
760
|
+
'realistic', 'reasonable', 'recent', 'regional', 'regular', 'related',
|
|
761
|
+
'relative', 'relevant', 'reliable', 'reluctant', 'remaining', 'remarkable',
|
|
762
|
+
'remote', 'repeated', 'representative', 'required', 'residential',
|
|
763
|
+
'respective', 'responsible', 'rich', 'rigid', 'right', 'rising', 'robust',
|
|
764
|
+
'rough', 'round', 'royal', 'rude', 'rural',
|
|
765
|
+
'sacred', 'sad', 'safe', 'satisfactory', 'scared', 'scattered', 'secure',
|
|
766
|
+
'selective', 'senior', 'sensitive', 'separate', 'serious', 'severe',
|
|
767
|
+
'shallow', 'sharp', 'sheer', 'short', 'shy', 'sick', 'significant',
|
|
768
|
+
'silent', 'silly', 'similar', 'simple', 'single', 'slight', 'slim',
|
|
769
|
+
'slow', 'small', 'smart', 'smooth', 'sober', 'social', 'soft', 'solar',
|
|
770
|
+
'sole', 'solid', 'sophisticated', 'sorry', 'sound', 'southern', 'spare',
|
|
771
|
+
'spatial', 'special', 'specific', 'spectacular', 'spiritual', 'splendid',
|
|
772
|
+
'spontaneous', 'stable', 'standard', 'static', 'statistical', 'steady',
|
|
773
|
+
'steep', 'sticky', 'stiff', 'straight', 'strange', 'strategic', 'strict',
|
|
774
|
+
'striking', 'strong', 'structural', 'stupid', 'subject', 'substantial',
|
|
775
|
+
'subtle', 'successful', 'successive', 'such', 'sudden', 'sufficient',
|
|
776
|
+
'suitable', 'super', 'superb', 'superior', 'supreme', 'sure', 'surgical',
|
|
777
|
+
'surprised', 'surprising', 'suspicious', 'sweet', 'swift', 'symbolic',
|
|
778
|
+
'sympathetic',
|
|
779
|
+
'tall', 'technical', 'temporary', 'tender', 'terrible', 'thick', 'thin',
|
|
780
|
+
'thorough', 'tight', 'tiny', 'tired', 'top', 'total', 'tough',
|
|
781
|
+
'traditional', 'tremendous', 'tropical', 'true', 'typical',
|
|
782
|
+
'ugly', 'ultimate', 'unable', 'uncertain', 'underlying', 'unfair',
|
|
783
|
+
'unfortunate', 'unhappy', 'uniform', 'unique', 'universal', 'unknown',
|
|
784
|
+
'unlikely', 'unnecessary', 'unpleasant', 'unprecedented', 'unusual',
|
|
785
|
+
'upper', 'upset', 'urban', 'urgent', 'useful', 'useless', 'usual',
|
|
786
|
+
'valid', 'valuable', 'variable', 'various', 'vast', 'verbal', 'vertical',
|
|
787
|
+
'viable', 'violent', 'virtual', 'visible', 'visual', 'vital', 'vivid',
|
|
788
|
+
'voluntary', 'vulnerable',
|
|
789
|
+
'warm', 'weak', 'wealthy', 'weird', 'welcome', 'western', 'wet', 'white',
|
|
790
|
+
'whole', 'wicked', 'wide', 'widespread', 'wild', 'willing', 'wise',
|
|
791
|
+
'wonderful', 'wooden', 'working', 'worried', 'worse', 'worst', 'worth',
|
|
792
|
+
'worthy', 'wrong',
|
|
793
|
+
'young',
|
|
794
|
+
// --- Common verbs / past participles (capitalized at sentence starts) ---
|
|
795
|
+
'accepted', 'achieved', 'acquired', 'added', 'adjusted', 'adopted',
|
|
796
|
+
'affected', 'agreed', 'allowed', 'announced', 'applied', 'appointed',
|
|
797
|
+
'approved', 'argued', 'arranged', 'arrived', 'asked', 'assessed',
|
|
798
|
+
'assigned', 'associated', 'assumed', 'attached', 'attempted', 'attended',
|
|
799
|
+
'based', 'beaten', 'become', 'begun', 'believed', 'belonged', 'blocked',
|
|
800
|
+
'born', 'bought', 'brought', 'built', 'buried', 'burned',
|
|
801
|
+
'called', 'captured', 'carried', 'caught', 'caused', 'challenged',
|
|
802
|
+
'changed', 'charged', 'checked', 'chosen', 'claimed', 'cleaned',
|
|
803
|
+
'cleared', 'closed', 'collected', 'combined', 'compared', 'compiled',
|
|
804
|
+
'completed', 'complicated', 'composed', 'concerned', 'concluded',
|
|
805
|
+
'conducted', 'confirmed', 'connected', 'considered', 'constructed',
|
|
806
|
+
'contained', 'continued', 'contributed', 'controlled', 'converted',
|
|
807
|
+
'convinced', 'cooked', 'copied', 'corrected', 'covered', 'created',
|
|
808
|
+
'crossed', 'crushed', 'customized',
|
|
809
|
+
'damaged', 'dealt', 'decided', 'declared', 'declined', 'dedicated',
|
|
810
|
+
'defeated', 'defined', 'delivered', 'demanded', 'demonstrated', 'denied',
|
|
811
|
+
'deployed', 'derived', 'described', 'designed', 'desired', 'destroyed',
|
|
812
|
+
'detected', 'determined', 'developed', 'devoted', 'directed', 'disabled',
|
|
813
|
+
'disappointed', 'discovered', 'discussed', 'dismissed', 'displayed',
|
|
814
|
+
'distributed', 'divided', 'documented', 'dominated', 'done', 'doubled',
|
|
815
|
+
'downloaded', 'drafted', 'drawn', 'dressed', 'driven', 'dropped',
|
|
816
|
+
'earned', 'edited', 'educated', 'elected', 'eliminated', 'embedded',
|
|
817
|
+
'emerged', 'employed', 'enabled', 'encountered', 'encouraged', 'ended',
|
|
818
|
+
'engaged', 'enhanced', 'enjoyed', 'entered', 'equipped', 'escaped',
|
|
819
|
+
'established', 'estimated', 'evaluated', 'examined', 'exceeded',
|
|
820
|
+
'exchanged', 'excluded', 'executed', 'exercised', 'exhausted', 'expanded',
|
|
821
|
+
'expected', 'experienced', 'explained', 'exposed', 'expressed', 'extended',
|
|
822
|
+
'extracted',
|
|
823
|
+
'faced', 'failed', 'fallen', 'featured', 'fed', 'felt', 'filed',
|
|
824
|
+
'filled', 'filtered', 'finalised', 'finalized', 'finished', 'fired',
|
|
825
|
+
'fixed', 'flagged', 'flipped', 'floated', 'followed', 'forced',
|
|
826
|
+
'forgotten', 'formed', 'formatted', 'found', 'founded', 'freed', 'frozen',
|
|
827
|
+
'fulfilled', 'funded', 'furnished',
|
|
828
|
+
'gained', 'gathered', 'generated', 'given', 'gone', 'grabbed', 'granted',
|
|
829
|
+
'grown', 'guaranteed', 'guided',
|
|
830
|
+
'handled', 'happened', 'heard', 'heated', 'held', 'helped', 'hidden',
|
|
831
|
+
'highlighted', 'hired', 'hosted', 'hurt',
|
|
832
|
+
'identified', 'ignored', 'illustrated', 'imagined', 'implemented',
|
|
833
|
+
'implied', 'imported', 'imposed', 'improved', 'included', 'incorporated',
|
|
834
|
+
'increased', 'indicated', 'influenced', 'informed', 'inherited',
|
|
835
|
+
'initiated', 'injured', 'inserted', 'inspired', 'installed', 'integrated',
|
|
836
|
+
'intended', 'interested', 'interpreted', 'introduced', 'invaded',
|
|
837
|
+
'invested', 'investigated', 'invited', 'involved', 'isolated', 'issued',
|
|
838
|
+
'joined', 'judged', 'jumped', 'justified',
|
|
839
|
+
'kept', 'kicked', 'killed', 'knocked', 'known',
|
|
840
|
+
'labelled', 'lacked', 'laid', 'landed', 'lasted', 'launched', 'learned',
|
|
841
|
+
'learnt', 'left', 'lifted', 'liked', 'lined', 'linked',
|
|
842
|
+
'listed', 'listened', 'lived', 'loaded', 'located', 'locked', 'logged',
|
|
843
|
+
'looked', 'lost', 'loved', 'lowered',
|
|
844
|
+
'made', 'maintained', 'managed', 'manufactured', 'mapped', 'marked',
|
|
845
|
+
'matched', 'meant', 'measured', 'mentioned', 'merged', 'met', 'migrated',
|
|
846
|
+
'minded', 'missed', 'mixed', 'modified', 'monitored', 'motivated',
|
|
847
|
+
'mounted', 'moved', 'multiplied',
|
|
848
|
+
'named', 'needed', 'negotiated', 'nested', 'nominated', 'normalised',
|
|
849
|
+
'noted', 'noticed',
|
|
850
|
+
'observed', 'obtained', 'occupied', 'occurred', 'offered', 'opened',
|
|
851
|
+
'operated', 'opposed', 'ordered', 'organised', 'organized', 'oriented',
|
|
852
|
+
'outlined', 'overcome', 'overlooked', 'owned',
|
|
853
|
+
'packed', 'paid', 'paired', 'parsed', 'passed', 'patched', 'performed',
|
|
854
|
+
'permitted', 'picked', 'pinned', 'placed', 'planned', 'planted', 'played',
|
|
855
|
+
'pleased', 'pointed', 'polished', 'positioned', 'posted', 'poured',
|
|
856
|
+
'powered', 'practised', 'preferred', 'prepared', 'presented', 'preserved',
|
|
857
|
+
'pressed', 'prevented', 'priced', 'printed', 'prioritised', 'processed',
|
|
858
|
+
'produced', 'programmed', 'promised', 'promoted', 'prompted', 'proposed',
|
|
859
|
+
'protected', 'proved', 'proven', 'provided', 'published', 'pulled',
|
|
860
|
+
'purchased', 'pushed', 'put',
|
|
861
|
+
'qualified', 'queried', 'questioned', 'quoted',
|
|
862
|
+
'raised', 'ran', 'ranked', 'rated', 'reached', 'read', 'realised',
|
|
863
|
+
'realized', 'received', 'recognised', 'recognized', 'recommended',
|
|
864
|
+
'recorded', 'recovered', 'reduced', 'referred', 'reflected', 'reformed',
|
|
865
|
+
'refused', 'regarded', 'registered', 'regulated', 'rejected', 'related',
|
|
866
|
+
'released', 'relied', 'remained', 'remembered', 'reminded', 'removed',
|
|
867
|
+
'renamed', 'renewed', 'repaired', 'repeated', 'replaced', 'replied',
|
|
868
|
+
'reported', 'represented', 'requested', 'required', 'rescued', 'reserved',
|
|
869
|
+
'resigned', 'resolved', 'responded', 'restored', 'restricted', 'resulted',
|
|
870
|
+
'retained', 'retired', 'retrieved', 'returned', 'revealed', 'reversed',
|
|
871
|
+
'reviewed', 'revised', 'rewarded', 'rolled', 'rotated', 'rounded', 'ruled',
|
|
872
|
+
'rushed',
|
|
873
|
+
'satisfied', 'saved', 'scaled', 'scanned', 'scattered', 'scheduled',
|
|
874
|
+
'scored', 'searched', 'secured', 'selected', 'sent', 'separated', 'served',
|
|
875
|
+
'settled', 'shaped', 'shared', 'shifted', 'shipped', 'shocked', 'shown',
|
|
876
|
+
'shut', 'signed', 'simplified', 'situated', 'skipped', 'slipped', 'sold',
|
|
877
|
+
'solved', 'sorted', 'sought', 'sourced', 'spent', 'split', 'spoken',
|
|
878
|
+
'sponsored', 'spotted', 'spread', 'staged', 'started', 'stated',
|
|
879
|
+
'stayed', 'stolen', 'stopped', 'stored', 'strengthened', 'stretched',
|
|
880
|
+
'struck', 'structured', 'studied', 'submitted', 'succeeded', 'suffered',
|
|
881
|
+
'suggested', 'suited', 'summarised', 'supplied', 'supported', 'supposed',
|
|
882
|
+
'surprised', 'surrounded', 'survived', 'suspected', 'suspended',
|
|
883
|
+
'sustained', 'switched',
|
|
884
|
+
'taken', 'talked', 'targeted', 'taught', 'tested', 'thanked', 'thought',
|
|
885
|
+
'threatened', 'thrown', 'tied', 'titled', 'told', 'topped', 'torn',
|
|
886
|
+
'touched', 'traced', 'tracked', 'traded', 'trained', 'transferred',
|
|
887
|
+
'transformed', 'translated', 'transmitted', 'transported', 'trapped',
|
|
888
|
+
'travelled', 'treated', 'triggered', 'troubled', 'trusted', 'turned',
|
|
889
|
+
'typed',
|
|
890
|
+
'understood', 'undertaken', 'unified', 'united', 'unlocked', 'updated',
|
|
891
|
+
'upgraded', 'uploaded', 'urged', 'used', 'utilised',
|
|
892
|
+
'validated', 'valued', 'varied', 'verified', 'viewed', 'visited', 'voted',
|
|
893
|
+
'waited', 'walked', 'wanted', 'warned', 'washed', 'watched', 'welcomed',
|
|
894
|
+
'withdrawn', 'witnessed', 'won', 'wondered', 'worked', 'worried',
|
|
895
|
+
'wrapped', 'written',
|
|
896
|
+
// --- Common nouns (non-entity, capitalized at sentence starts) ---
|
|
897
|
+
'absence', 'access', 'account', 'accuracy', 'achievement', 'acquisition',
|
|
898
|
+
'act', 'action', 'activity', 'addition', 'address', 'administration',
|
|
899
|
+
'admission', 'adoption', 'adult', 'advance', 'advantage', 'advice',
|
|
900
|
+
'affair', 'afternoon', 'age', 'agency', 'agenda', 'agreement', 'aid',
|
|
901
|
+
'aim', 'air', 'alarm', 'alternative', 'ambition', 'amendment', 'amount',
|
|
902
|
+
'analysis', 'anger', 'angle', 'announcement', 'anxiety', 'appeal',
|
|
903
|
+
'appearance', 'application', 'appointment', 'approach', 'approval',
|
|
904
|
+
'argument', 'arrangement', 'arrival', 'aspect', 'assembly', 'assessment',
|
|
905
|
+
'asset', 'assignment', 'assistance', 'association', 'assumption',
|
|
906
|
+
'atmosphere', 'attachment', 'attack', 'attempt', 'attendance', 'attention',
|
|
907
|
+
'attitude', 'audience', 'authority', 'average', 'awareness',
|
|
908
|
+
'background', 'balance', 'band', 'barrier', 'base', 'basis', 'battle',
|
|
909
|
+
'beauty', 'bedroom', 'beginning', 'behaviour', 'belief', 'benefit',
|
|
910
|
+
'birth', 'blade', 'blame', 'blast', 'block', 'blow', 'boat', 'bond',
|
|
911
|
+
'bone', 'bonus', 'border', 'boss', 'boundary', 'brain', 'brand', 'breath',
|
|
912
|
+
'brick', 'broadcast', 'brother', 'browser', 'budget', 'bug', 'bulk',
|
|
913
|
+
'burden', 'buyer',
|
|
914
|
+
'cabinet', 'cable', 'calculation', 'campaign', 'candidate', 'capability',
|
|
915
|
+
'captain', 'career', 'cargo', 'carpet', 'carrier', 'cash', 'cast',
|
|
916
|
+
'catalogue', 'category', 'cause', 'ceiling', 'celebration', 'chain',
|
|
917
|
+
'chair', 'chairman', 'champion', 'channel', 'chapter', 'charity', 'chart',
|
|
918
|
+
'check', 'chest', 'child', 'chip', 'chunk', 'circuit', 'citizen', 'city',
|
|
919
|
+
'civilian', 'claim', 'clarity', 'clash', 'clause', 'client', 'climate',
|
|
920
|
+
'clock', 'closure', 'cloth', 'cloud', 'cluster', 'coach', 'coalition',
|
|
921
|
+
'coast', 'collaboration', 'collapse', 'colleague',
|
|
922
|
+
'colony', 'column', 'combination', 'comfort', 'command', 'commander',
|
|
923
|
+
'comment', 'commerce', 'commission', 'commitment', 'committee',
|
|
924
|
+
'companion', 'complaint', 'complexity', 'component', 'composition',
|
|
925
|
+
'compromise', 'concentration', 'concept', 'conclusion', 'confidence',
|
|
926
|
+
'configuration', 'confirmation', 'conflict', 'confusion', 'conjunction',
|
|
927
|
+
'consequence', 'conservation', 'consideration', 'constraint', 'consultant',
|
|
928
|
+
'consultation', 'consumer', 'consumption', 'contact', 'container',
|
|
929
|
+
'contempt', 'continent', 'continuation', 'controversy', 'convention',
|
|
930
|
+
'conversation', 'conviction', 'cooperation', 'coordination', 'core',
|
|
931
|
+
'correction', 'correlation', 'correspondent', 'corridor', 'corruption',
|
|
932
|
+
'counter', 'countryside', 'coverage', 'crash', 'creature',
|
|
933
|
+
'crew', 'crime', 'crisis', 'criterion', 'criticism', 'crop', 'crowd',
|
|
934
|
+
'crown', 'currency', 'curriculum', 'curve', 'customer', 'cycle',
|
|
935
|
+
// --- Common adverbs (capitalized at sentence starts) ---
|
|
936
|
+
'absolutely', 'accordingly', 'accurately', 'actively', 'actually',
|
|
937
|
+
'additionally', 'admittedly', 'allegedly', 'alternatively', 'altogether',
|
|
938
|
+
'amazingly', 'apparently', 'arguably', 'automatically',
|
|
939
|
+
'barely', 'basically', 'briefly', 'broadly',
|
|
940
|
+
'carefully', 'casually', 'cautiously', 'certainly',
|
|
941
|
+
'clearly', 'closely', 'collectively', 'commonly',
|
|
942
|
+
'comparatively', 'completely', 'consequently', 'considerably',
|
|
943
|
+
'consistently', 'constantly', 'continuously', 'conversely', 'correctly',
|
|
944
|
+
'critically', 'crucially', 'curiously', 'currently',
|
|
945
|
+
'definitely', 'deliberately', 'desperately', 'directly', 'distinctly',
|
|
946
|
+
'dramatically',
|
|
947
|
+
'easily', 'effectively', 'efficiently', 'elegantly', 'elsewhere',
|
|
948
|
+
'emotionally', 'enormously', 'entirely', 'equally',
|
|
949
|
+
'especially', 'essentially', 'eventually', 'evidently', 'exactly',
|
|
950
|
+
'exclusively', 'explicitly', 'extensively', 'externally', 'extremely',
|
|
951
|
+
'fairly', 'famously', 'finally', 'firmly', 'firstly', 'formally',
|
|
952
|
+
'formerly', 'fortunately', 'frankly', 'freely', 'frequently',
|
|
953
|
+
'fundamentally',
|
|
954
|
+
'generally', 'gently', 'genuinely', 'gradually', 'greatly',
|
|
955
|
+
'happily', 'hardly', 'heavily', 'hence', 'highly', 'honestly',
|
|
956
|
+
'hopefully', 'hugely',
|
|
957
|
+
'ideally', 'immediately', 'immensely',
|
|
958
|
+
'importantly', 'impressively', 'incidentally',
|
|
959
|
+
'increasingly', 'incredibly', 'independently', 'indirectly',
|
|
960
|
+
'individually', 'inevitably', 'informally', 'inherently', 'initially',
|
|
961
|
+
'intensely', 'intentionally', 'interestingly', 'internally', 'ironically',
|
|
962
|
+
'jointly',
|
|
963
|
+
'kindly',
|
|
964
|
+
'largely', 'lastly', 'lately', 'legally', 'legitimately', 'literally',
|
|
965
|
+
'locally', 'logically', 'loosely',
|
|
966
|
+
'mainly', 'manually', 'marginally', 'meanwhile',
|
|
967
|
+
'merely', 'mildly', 'minimally', 'moderately', 'morally',
|
|
968
|
+
'moreover', 'mostly', 'mutually',
|
|
969
|
+
'namely', 'naturally', 'neatly', 'necessarily', 'negatively',
|
|
970
|
+
'nevertheless', 'newly', 'nicely', 'nominally',
|
|
971
|
+
'nonetheless', 'normally', 'notably', 'noticeably',
|
|
972
|
+
'objectively', 'obviously', 'occasionally', 'oddly',
|
|
973
|
+
'officially', 'openly', 'optimally', 'ordinarily',
|
|
974
|
+
'originally', 'otherwise', 'overall', 'overwhelmingly',
|
|
975
|
+
'partially', 'particularly', 'partly', 'passively',
|
|
976
|
+
'patiently', 'perfectly', 'periodically', 'permanently', 'personally',
|
|
977
|
+
'physically', 'plainly', 'politely', 'politically',
|
|
978
|
+
'poorly', 'positively', 'possibly', 'potentially',
|
|
979
|
+
'practically', 'precisely', 'predominantly', 'preferably', 'presently',
|
|
980
|
+
'presumably', 'pretty', 'previously', 'primarily', 'principally',
|
|
981
|
+
'privately', 'probably', 'professionally', 'profoundly',
|
|
982
|
+
'progressively', 'prominently', 'promptly', 'properly', 'proportionally',
|
|
983
|
+
'publicly', 'purely',
|
|
984
|
+
'quickly', 'quietly', 'quite',
|
|
985
|
+
'radically', 'randomly', 'rapidly', 'rarely', 'rationally', 'readily',
|
|
986
|
+
'realistically', 'really', 'reasonably', 'recently', 'regardless',
|
|
987
|
+
'regularly', 'relatively', 'reliably', 'reluctantly',
|
|
988
|
+
'remarkably', 'remotely', 'repeatedly', 'reportedly', 'respectively',
|
|
989
|
+
'responsibly', 'roughly',
|
|
990
|
+
'sadly', 'safely', 'scarcely', 'secondly', 'secretly', 'seemingly',
|
|
991
|
+
'selectively', 'separately', 'seriously', 'severely', 'sharply',
|
|
992
|
+
'shortly', 'significantly', 'silently', 'similarly', 'simply',
|
|
993
|
+
'simultaneously', 'sincerely', 'slightly', 'slowly', 'smoothly',
|
|
994
|
+
'socially', 'solely', 'somehow', 'sometimes', 'somewhat', 'soon',
|
|
995
|
+
'specifically', 'spontaneously', 'steadily', 'steeply',
|
|
996
|
+
'still', 'strategically', 'strictly', 'strikingly', 'strongly',
|
|
997
|
+
'structurally', 'subsequently', 'substantially', 'subtly', 'successfully',
|
|
998
|
+
'suddenly', 'sufficiently', 'supposedly', 'surely', 'surprisingly',
|
|
999
|
+
'swiftly', 'systematically',
|
|
1000
|
+
'technically', 'temporarily', 'terribly', 'thankfully',
|
|
1001
|
+
'thoroughly', 'tightly', 'together', 'traditionally', 'tremendously',
|
|
1002
|
+
'truly', 'typically',
|
|
1003
|
+
'ultimately', 'undoubtedly',
|
|
1004
|
+
'unexpectedly', 'unfortunately', 'uniformly', 'universally',
|
|
1005
|
+
'unnecessarily', 'unusually', 'urgently',
|
|
1006
|
+
'usefully', 'usually', 'utterly',
|
|
1007
|
+
'vastly', 'virtually', 'visually',
|
|
1008
|
+
'warmly', 'weakly', 'widely', 'wildly', 'willingly', 'wisely',
|
|
500
1009
|
]);
|
|
501
1010
|
/**
|
|
502
1011
|
* Words that commonly start sentences but should not start a proper noun entity.
|
|
503
1012
|
* These are checked separately because they might appear capitalized at sentence start.
|
|
504
1013
|
*/
|
|
505
1014
|
const SENTENCE_STARTER_WORDS = new Set([
|
|
506
|
-
|
|
1015
|
+
// Imperative verbs
|
|
1016
|
+
'visit', 'see', 'please', 'note', 'check', 'read', 'look', 'find',
|
|
507
1017
|
'get', 'set', 'add', 'use', 'try', 'make', 'take', 'give', 'keep', 'let',
|
|
508
1018
|
'call', 'run', 'ask', 'tell', 'show', 'help', 'need', 'want', 'like',
|
|
509
|
-
'think', 'know', 'feel', 'seem', '
|
|
1019
|
+
'think', 'know', 'feel', 'seem', 'hear', 'watch', 'wait', 'work',
|
|
510
1020
|
'start', 'stop', 'open', 'close', 'move', 'turn', 'bring', 'send', 'leave',
|
|
511
1021
|
'meet', 'join', 'follow', 'include', 'consider', 'remember', 'forget',
|
|
512
|
-
// Additional common verbs/imperative starters
|
|
513
1022
|
'target', 'create', 'build', 'write', 'avoid', 'provide', 'maintain',
|
|
514
1023
|
'define', 'ensure', 'place', 'focus', 'track', 'enable', 'apply', 'test',
|
|
515
1024
|
'handle', 'load', 'link', 'pass', 'save', 'lead', 'frame', 'point',
|
|
516
|
-
//
|
|
1025
|
+
// Greetings / interjections
|
|
1026
|
+
'hello', 'hi', 'hey', 'thanks', 'thank', 'sorry',
|
|
1027
|
+
// Titles
|
|
1028
|
+
'mr', 'mrs', 'ms', 'dr', 'sir',
|
|
1029
|
+
// Pronouns, possessives, determiners
|
|
517
1030
|
'my', 'your', 'his', 'her', 'its', 'our', 'their',
|
|
518
1031
|
'some', 'any', 'every', 'each', 'both', 'few', 'many', 'most',
|
|
519
|
-
// Common
|
|
1032
|
+
// Common starters (conjunctions, adverbs, auxiliaries)
|
|
1033
|
+
'so', 'no', 'yes', 'not', 'never', 'always', 'also', 'just', 'only', 'already',
|
|
1034
|
+
'here', 'there', 'then', 'now', 'when', 'how', 'even', 'still',
|
|
1035
|
+
'go', 'went', 'gone', 'going',
|
|
1036
|
+
'had', 'have', 'has', 'having',
|
|
1037
|
+
'been', 'being', 'was', 'were',
|
|
1038
|
+
'got', 'getting', 'put', 'putting',
|
|
1039
|
+
'said', 'told', 'asked', 'called',
|
|
1040
|
+
'do', 'did', 'does', 'done',
|
|
1041
|
+
// Common adjectives at sentence start
|
|
520
1042
|
'poor', 'old', 'new', 'big', 'little', 'great', 'good', 'bad',
|
|
521
|
-
'first', 'last', 'next', 'other', 'more', '
|
|
522
|
-
|
|
523
|
-
'still', 'clear', 'fixed', 'based', 'using', 'real', 'even',
|
|
1043
|
+
'first', 'last', 'next', 'other', 'more', 'very',
|
|
1044
|
+
'clear', 'fixed', 'based', 'using', 'real',
|
|
524
1045
|
'safe', 'local', 'native', 'early', 'similar', 'simple', 'basic', 'related',
|
|
1046
|
+
'skip', 'don', 'won',
|
|
525
1047
|
]);
|
|
526
1048
|
/**
|
|
527
1049
|
* Detect implicit entities in content using pattern matching
|
|
@@ -573,7 +1095,7 @@ export function detectImplicitEntities(content, config = {}) {
|
|
|
573
1095
|
// Pattern 1: Multi-word proper nouns
|
|
574
1096
|
// Matches "Marcus Johnson", "Project Alpha", "San Francisco Bay Area"
|
|
575
1097
|
if (implicitPatterns.includes('proper-nouns')) {
|
|
576
|
-
const properNounRegex = /\b([A-Z][a-z]+(
|
|
1098
|
+
const properNounRegex = /\b([A-Z][a-z]+(?:[^\S\n]+[A-Z][a-z]+)+)\b/g;
|
|
577
1099
|
let match;
|
|
578
1100
|
while ((match = properNounRegex.exec(content)) !== null) {
|
|
579
1101
|
let text = match[1];
|
package/package.json
CHANGED