@velvetmonkey/vault-core 2.0.154 → 2.0.155
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +1 -0
- package/dist/index.js +2 -0
- package/dist/stopwords.d.ts +19 -0
- package/dist/stopwords.js +94 -0
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
export type { EntityIndex, EntityCategory, DefaultEntityCategory, EntityWithAliases, Entity, EntityWithType, ScanOptions, WikilinkOptions, WikilinkResult, ImplicitEntityConfig, ExtendedWikilinkOptions, ImplicitEntityMatch, ResolveAliasOptions, ProtectedZone, ProtectedZoneType, } from './types.js';
|
|
8
8
|
export { DEFAULT_ENTITY_CATEGORIES, getIndexCategory, ensureIndexCategory, } from './types.js';
|
|
9
9
|
export { COMMON_ENGLISH_WORDS } from './common-words.js';
|
|
10
|
+
export { STOPWORDS_EN, isStopword } from './stopwords.js';
|
|
10
11
|
export { stem } from './stemmer.js';
|
|
11
12
|
export { scanVaultEntities, getAllEntities, getAllEntitiesWithTypes, getEntityName, getEntityAliases, loadEntityCache, saveEntityCache, ENTITY_CACHE_VERSION, } from './entities.js';
|
|
12
13
|
export { applyWikilinks, processWikilinks, resolveAliasWikilinks, suggestWikilinks, detectImplicitEntities, findEntityMatches, IMPLICIT_EXCLUDE_WORDS, } from './wikilinks.js';
|
package/dist/index.js
CHANGED
|
@@ -8,6 +8,8 @@
|
|
|
8
8
|
export { DEFAULT_ENTITY_CATEGORIES, getIndexCategory, ensureIndexCategory, } from './types.js';
|
|
9
9
|
// Common English words (frequency list for alias filtering)
|
|
10
10
|
export { COMMON_ENGLISH_WORDS } from './common-words.js';
|
|
11
|
+
// Stopwords (canonical set for search tokenization)
|
|
12
|
+
export { STOPWORDS_EN, isStopword } from './stopwords.js';
|
|
11
13
|
// Porter Stemmer (for morphological entity matching)
|
|
12
14
|
export { stem } from './stemmer.js';
|
|
13
15
|
// Entity scanning
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Canonical English stopwords for search tokenization
|
|
3
|
+
*
|
|
4
|
+
* Single source of truth — imported by flywheel-memory (stemmer, similarity,
|
|
5
|
+
* wikilink suggestions). Union of all previously separate stopword sets.
|
|
6
|
+
*
|
|
7
|
+
* Categories:
|
|
8
|
+
* - Function words (articles, pronouns, prepositions, conjunctions)
|
|
9
|
+
* - Common verbs with inflections (go/went/gone, make/made/making)
|
|
10
|
+
* - Time words (today, daily, week, month)
|
|
11
|
+
* - Generic/filler words (thing, stuff, something)
|
|
12
|
+
* - Domain-specific PKM terms (vault, wikilink, frontmatter)
|
|
13
|
+
*/
|
|
14
|
+
export declare const STOPWORDS_EN: Set<string>;
|
|
15
|
+
/**
|
|
16
|
+
* Check if a word is a stopword
|
|
17
|
+
*/
|
|
18
|
+
export declare function isStopword(word: string): boolean;
|
|
19
|
+
//# sourceMappingURL=stopwords.d.ts.map
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Canonical English stopwords for search tokenization
|
|
3
|
+
*
|
|
4
|
+
* Single source of truth — imported by flywheel-memory (stemmer, similarity,
|
|
5
|
+
* wikilink suggestions). Union of all previously separate stopword sets.
|
|
6
|
+
*
|
|
7
|
+
* Categories:
|
|
8
|
+
* - Function words (articles, pronouns, prepositions, conjunctions)
|
|
9
|
+
* - Common verbs with inflections (go/went/gone, make/made/making)
|
|
10
|
+
* - Time words (today, daily, week, month)
|
|
11
|
+
* - Generic/filler words (thing, stuff, something)
|
|
12
|
+
* - Domain-specific PKM terms (vault, wikilink, frontmatter)
|
|
13
|
+
*/
|
|
14
|
+
export const STOPWORDS_EN = new Set([
|
|
15
|
+
'a', 'about', 'above', 'accordingly', 'actual', 'actually', 'add', 'added',
|
|
16
|
+
'adding', 'additionally', 'adds', 'after', 'afternoon', 'again', 'all', 'almost',
|
|
17
|
+
'already', 'also', 'alternatively', 'although', 'always', 'an', 'and', 'annually',
|
|
18
|
+
'another', 'any', 'anyone', 'anything', 'anyway', 'anywhere', 'archive', 'are',
|
|
19
|
+
'as', 'at', 'back', 'bad', 'basically', 'be', 'because', 'been',
|
|
20
|
+
'before', 'began', 'begin', 'beginning', 'begins', 'begun', 'being', 'below',
|
|
21
|
+
'besides', 'best', 'better', 'between', 'big', 'both', 'bring', 'bringing',
|
|
22
|
+
'brings', 'brought', 'build', 'building', 'builds', 'built', 'but', 'by',
|
|
23
|
+
'call', 'called', 'calling', 'calls', 'came', 'can', 'case', 'cases',
|
|
24
|
+
'certainly', 'change', 'changed', 'changes', 'changing', 'close', 'closed', 'closes',
|
|
25
|
+
'closing', 'come', 'coming', 'complete', 'completed', 'completes', 'completing', 'continue',
|
|
26
|
+
'continued', 'continues', 'continuing', 'could', 'create', 'created', 'creates', 'creating',
|
|
27
|
+
'currently', 'daily', 'date', 'day', 'days', 'definitely', 'did', 'different',
|
|
28
|
+
'do', 'does', 'doing', 'done', 'draft', 'during', 'each', 'earlier',
|
|
29
|
+
'easily', 'either', 'empty', 'end', 'ended', 'ending', 'ends', 'essentially',
|
|
30
|
+
'even', 'evening', 'ever', 'every', 'everyone', 'everything', 'everywhere', 'example',
|
|
31
|
+
'examples', 'except', 'false', 'feel', 'feeling', 'feels', 'felt', 'few',
|
|
32
|
+
'file', 'files', 'find', 'finding', 'finds', 'fine', 'finish', 'finished',
|
|
33
|
+
'finishes', 'finishing', 'first', 'fix', 'fixed', 'fixes', 'fixing', 'folder',
|
|
34
|
+
'folders', 'follow', 'followed', 'following', 'follows', 'for', 'found', 'from',
|
|
35
|
+
'frontmatter', 'full', 'further', 'furthermore', 'gave', 'get', 'gets', 'getting',
|
|
36
|
+
'give', 'given', 'gives', 'giving', 'go', 'going', 'gone', 'good',
|
|
37
|
+
'got', 'gotten', 'great', 'had', 'happen', 'happened', 'happening', 'happens',
|
|
38
|
+
'has', 'have', 'he', 'heading', 'headings', 'held', 'help', 'helped',
|
|
39
|
+
'helping', 'helps', 'hence', 'her', 'here', 'high', 'him', 'his',
|
|
40
|
+
'hold', 'holding', 'holds', 'hour', 'how', 'however', 'i', 'if',
|
|
41
|
+
'important', 'in', 'inbox', 'include', 'included', 'includes', 'including', 'info',
|
|
42
|
+
'information', 'instead', 'into', 'is', 'issue', 'issues', 'it', 'item',
|
|
43
|
+
'items', 'its', 'just', 'keep', 'keeping', 'keeps', 'kept', 'knew',
|
|
44
|
+
'know', 'knowing', 'known', 'knows', 'large', 'last', 'later', 'leave',
|
|
45
|
+
'leaves', 'leaving', 'left', 'length', 'level', 'levels', 'like', 'likely',
|
|
46
|
+
'line', 'lines', 'link', 'links', 'list', 'lists', 'little', 'long',
|
|
47
|
+
'look', 'looked', 'looking', 'looks', 'lot', 'lots', 'low', 'made',
|
|
48
|
+
'main', 'make', 'makes', 'making', 'many', 'markdown', 'may', 'maybe',
|
|
49
|
+
'me', 'meanwhile', 'message', 'messages', 'might', 'minute', 'mode', 'modes',
|
|
50
|
+
'month', 'monthly', 'months', 'more', 'moreover', 'morning', 'most', 'move',
|
|
51
|
+
'moved', 'moves', 'moving', 'much', 'must', 'my', 'name', 'names',
|
|
52
|
+
'nearly', 'need', 'neither', 'never', 'nevertheless', 'new', 'next', 'nice',
|
|
53
|
+
'night', 'no', 'nonetheless', 'noone', 'not', 'note', 'notes', 'nothing',
|
|
54
|
+
'now', 'nowhere', 'number', 'numbers', 'object', 'objects', 'of', 'off',
|
|
55
|
+
'often', 'okay', 'old', 'on', 'once', 'one', 'only', 'open',
|
|
56
|
+
'opened', 'opening', 'opens', 'option', 'options', 'or', 'other', 'otherwise',
|
|
57
|
+
'our', 'out', 'over', 'own', 'page', 'pages', 'part', 'particularly',
|
|
58
|
+
'path', 'paths', 'pending', 'people', 'perhaps', 'play', 'played', 'playing',
|
|
59
|
+
'plays', 'point', 'points', 'possibly', 'pretty', 'primarily', 'probably', 'problem',
|
|
60
|
+
'problems', 'put', 'puts', 'putting', 'quickly', 'quite', 'ran', 'rarely',
|
|
61
|
+
'rather', 'read', 'reading', 'reads', 'real', 'really', 'receive', 'received',
|
|
62
|
+
'receives', 'receiving', 'recently', 'release', 'released', 'releases', 'releasing', 'remove',
|
|
63
|
+
'removed', 'removes', 'removing', 'result', 'results', 'right', 'run', 'running',
|
|
64
|
+
'runs', 'same', 'say', 'second', 'section', 'sections', 'see', 'seem',
|
|
65
|
+
'seemed', 'seeming', 'seems', 'send', 'sending', 'sends', 'sent', 'set',
|
|
66
|
+
'sets', 'setting', 'several', 'shall', 'she', 'short', 'should', 'show',
|
|
67
|
+
'showed', 'showing', 'shown', 'shows', 'similar', 'simply', 'since', 'size',
|
|
68
|
+
'slowly', 'small', 'so', 'some', 'someone', 'something', 'sometimes', 'somewhere',
|
|
69
|
+
'soon', 'specifically', 'start', 'started', 'starting', 'starts', 'still', 'stop',
|
|
70
|
+
'stopped', 'stopping', 'stops', 'string', 'strings', 'stuff', 'such', 'take',
|
|
71
|
+
'taken', 'takes', 'taking', 'task', 'tasks', 'tell', 'telling', 'tells',
|
|
72
|
+
'template', 'templates', 'test', 'tested', 'testing', 'tests', 'text', 'than',
|
|
73
|
+
'that', 'the', 'their', 'them', 'then', 'there', 'therefore', 'these',
|
|
74
|
+
'they', 'thing', 'things', 'think', 'thinking', 'thinks', 'third', 'this',
|
|
75
|
+
'those', 'though', 'thought', 'through', 'thus', 'time', 'to', 'today',
|
|
76
|
+
'todo', 'todos', 'told', 'tomorrow', 'too', 'took', 'tried', 'tries',
|
|
77
|
+
'true', 'truly', 'try', 'trying', 'turn', 'turned', 'turning', 'turns',
|
|
78
|
+
'two', 'type', 'types', 'under', 'unless', 'unlikely', 'until', 'up',
|
|
79
|
+
'update', 'updated', 'updates', 'updating', 'us', 'use', 'used', 'uses',
|
|
80
|
+
'using', 'usually', 'value', 'values', 'various', 'vault', 'very', 'want',
|
|
81
|
+
'wanted', 'wanting', 'wants', 'was', 'way', 'we', 'week', 'weekly',
|
|
82
|
+
'weeks', 'well', 'went', 'were', 'what', 'when', 'where', 'whether',
|
|
83
|
+
'which', 'while', 'who', 'whole', 'whom', 'why', 'wikilink', 'wikilinks',
|
|
84
|
+
'will', 'with', 'work', 'worked', 'working', 'works', 'worse', 'worst',
|
|
85
|
+
'would', 'write', 'writes', 'writing', 'written', 'wrong', 'wrote', 'year',
|
|
86
|
+
'yearly', 'years', 'yesterday', 'yet', 'you', 'your',
|
|
87
|
+
]);
|
|
88
|
+
/**
|
|
89
|
+
* Check if a word is a stopword
|
|
90
|
+
*/
|
|
91
|
+
export function isStopword(word) {
|
|
92
|
+
return STOPWORDS_EN.has(word.toLowerCase());
|
|
93
|
+
}
|
|
94
|
+
//# sourceMappingURL=stopwords.js.map
|
package/package.json
CHANGED