@velvetmonkey/vault-core 2.0.98 → 2.0.99

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/types.d.ts CHANGED
@@ -131,9 +131,9 @@ export interface ImplicitEntityConfig {
131
131
  detectImplicit?: boolean;
132
132
  /**
133
133
  * Which patterns to use for detection
134
- * @default ['proper-nouns', 'quoted-terms']
134
+ * @default ['proper-nouns']
135
135
  */
136
- implicitPatterns?: Array<'proper-nouns' | 'quoted-terms' | 'single-caps' | 'camel-case' | 'acronyms'>;
136
+ implicitPatterns?: Array<'proper-nouns' | 'single-caps' | 'camel-case' | 'acronyms'>;
137
137
  /**
138
138
  * Regex patterns to exclude from implicit detection
139
139
  * @default ['^The ', '^A ', '^An ', '^This ', '^That ', '^These ', '^Those ']
@@ -165,7 +165,7 @@ export interface ImplicitEntityMatch {
165
165
  /** End position in content */
166
166
  end: number;
167
167
  /** Detection method used */
168
- pattern: 'proper-nouns' | 'quoted-terms' | 'single-caps' | 'camel-case' | 'acronyms';
168
+ pattern: 'proper-nouns' | 'single-caps' | 'camel-case' | 'acronyms';
169
169
  }
170
170
  /**
171
171
  * Options for resolving alias-based wikilinks
@@ -5,7 +5,7 @@
5
5
  * respecting protected zones (code, frontmatter, existing links, etc.)
6
6
  *
7
7
  * Also supports:
8
- * - Pattern-based detection for implicit entities (proper nouns, quoted terms)
8
+ * - Pattern-based detection for implicit entities (proper nouns, acronyms, CamelCase)
9
9
  * - Alias resolution for existing wikilinks (resolves [[alias]] to [[Entity|alias]])
10
10
  */
11
11
  import type { WikilinkOptions, WikilinkResult, Entity, ExtendedWikilinkOptions, ImplicitEntityMatch, ImplicitEntityConfig, ResolveAliasOptions } from './types.js';
@@ -62,7 +62,7 @@ export declare const IMPLICIT_EXCLUDE_WORDS: Set<string>;
62
62
  * This finds potential entities that don't have existing files:
63
63
  * - Multi-word proper nouns (e.g., "Marcus Johnson", "Project Alpha")
64
64
  * - Single capitalized words after lowercase (e.g., "discussed with Marcus")
65
- * - Quoted terms (e.g., "Turbopump" becomes [[Turbopump]])
65
+ * - CamelCase words (e.g., TypeScript, HuggingFace)
66
66
  *
67
67
  * @param content - The markdown content to analyze
68
68
  * @param config - Configuration for detection patterns
package/dist/wikilinks.js CHANGED
@@ -5,7 +5,7 @@
5
5
  * respecting protected zones (code, frontmatter, existing links, etc.)
6
6
  *
7
7
  * Also supports:
8
- * - Pattern-based detection for implicit entities (proper nouns, quoted terms)
8
+ * - Pattern-based detection for implicit entities (proper nouns, acronyms, CamelCase)
9
9
  * - Alias resolution for existing wikilinks (resolves [[alias]] to [[Entity|alias]])
10
10
  */
11
11
  import { getProtectedZones, rangeOverlapsProtectedZone } from './protectedZones.js';
@@ -44,6 +44,12 @@ const EXCLUDE_WORDS = new Set([
44
44
  // Stop words
45
45
  'the', 'and', 'for', 'with', 'from', 'this', 'that',
46
46
  'christmas', 'holiday', 'break',
47
+ // Common adjectives that should never be entities
48
+ 'safe', 'new', 'old', 'local', 'native', 'first', 'related', 'similar',
49
+ 'simple', 'basic', 'early', 'real', 'clear', 'fixed',
50
+ // Common verbs that should never be entities
51
+ 'remember', 'include', 'avoid', 'provide', 'create', 'build', 'target',
52
+ 'define', 'test', 'enable', 'handle', 'focus', 'track',
47
53
  ]);
48
54
  /**
49
55
  * Escape special regex characters in a string
@@ -55,7 +61,12 @@ function escapeRegex(str) {
55
61
  * Check if an entity should be excluded from wikilikning
56
62
  */
57
63
  function shouldExcludeEntity(entity) {
58
- return EXCLUDE_WORDS.has(entity.toLowerCase());
64
+ if (EXCLUDE_WORDS.has(entity.toLowerCase()))
65
+ return true;
66
+ // Skip lowercase hyphenated descriptors (e.g., self-improving, local-first, Claude-native)
67
+ if (entity.includes('-') && entity === entity.toLowerCase())
68
+ return true;
69
+ return false;
59
70
  }
60
71
  /**
61
72
  * Find all matches of an entity in content with word boundaries
@@ -454,7 +465,7 @@ export function resolveAliasWikilinks(content, entities, options = {}) {
454
465
  */
455
466
  const DEFAULT_IMPLICIT_CONFIG = {
456
467
  detectImplicit: false,
457
- implicitPatterns: ['proper-nouns', 'quoted-terms'],
468
+ implicitPatterns: ['proper-nouns'],
458
469
  excludePatterns: ['^The ', '^A ', '^An ', '^This ', '^That ', '^These ', '^Those '],
459
470
  minEntityLength: 3,
460
471
  };
@@ -498,12 +509,19 @@ const SENTENCE_STARTER_WORDS = new Set([
498
509
  'think', 'know', 'feel', 'seem', 'look', 'hear', 'watch', 'wait', 'work',
499
510
  'start', 'stop', 'open', 'close', 'move', 'turn', 'bring', 'send', 'leave',
500
511
  'meet', 'join', 'follow', 'include', 'consider', 'remember', 'forget',
512
+ // Additional common verbs/imperative starters
513
+ 'target', 'create', 'build', 'write', 'avoid', 'provide', 'maintain',
514
+ 'define', 'ensure', 'place', 'focus', 'track', 'enable', 'apply', 'test',
515
+ 'handle', 'load', 'link', 'pass', 'save', 'lead', 'frame', 'point',
501
516
  // Pronouns, possessives, determiners — capitalized at sentence start but not proper nouns
502
517
  'my', 'your', 'his', 'her', 'its', 'our', 'their',
503
518
  'some', 'any', 'every', 'each', 'both', 'few', 'many', 'most',
504
519
  // Common adjectives that precede proper nouns at sentence start
505
520
  'poor', 'old', 'new', 'big', 'little', 'great', 'good', 'bad',
506
521
  'first', 'last', 'next', 'other', 'more', 'just', 'very',
522
+ // Additional adjectives/adverbs that appear capitalized at sentence starts
523
+ 'still', 'clear', 'fixed', 'based', 'using', 'real', 'even',
524
+ 'safe', 'local', 'native', 'early', 'similar', 'simple', 'basic', 'related',
507
525
  ]);
508
526
  /**
509
527
  * Detect implicit entities in content using pattern matching
@@ -511,7 +529,7 @@ const SENTENCE_STARTER_WORDS = new Set([
511
529
  * This finds potential entities that don't have existing files:
512
530
  * - Multi-word proper nouns (e.g., "Marcus Johnson", "Project Alpha")
513
531
  * - Single capitalized words after lowercase (e.g., "discussed with Marcus")
514
- * - Quoted terms (e.g., "Turbopump" becomes [[Turbopump]])
532
+ * - CamelCase words (e.g., TypeScript, HuggingFace)
515
533
  *
516
534
  * @param content - The markdown content to analyze
517
535
  * @param config - Configuration for detection patterns
@@ -598,22 +616,6 @@ export function detectImplicitEntities(content, config = {}) {
598
616
  }
599
617
  }
600
618
  }
601
- // Pattern 3: Quoted terms (explicit entity markers)
602
- // Matches "Turbopump" -> [[Turbopump]]
603
- if (implicitPatterns.includes('quoted-terms')) {
604
- const quotedRegex = /"([^"]{3,30})"/g;
605
- let match;
606
- while ((match = quotedRegex.exec(content)) !== null) {
607
- const text = match[1];
608
- // Include the quotes in the position for replacement
609
- const start = match.index;
610
- const end = start + match[0].length;
611
- if (!shouldExclude(text) && !isProtected(start, end)) {
612
- detected.push({ text, start, end, pattern: 'quoted-terms' });
613
- seenTexts.add(text.toLowerCase());
614
- }
615
- }
616
- }
617
619
  // Pattern 4: CamelCase words (TypeScript, YouTube, HuggingFace)
618
620
  if (implicitPatterns.includes('camel-case')) {
619
621
  const camelRegex = /\b([A-Z][a-z]+[A-Z][a-zA-Z]*)\b/g;
@@ -634,6 +636,10 @@ export function detectImplicitEntities(content, config = {}) {
634
636
  let match;
635
637
  while ((match = acronymRegex.exec(content)) !== null) {
636
638
  const text = match[1];
639
+ // Skip long ALL-CAPS words (>5 chars) — likely English words in caps, not acronyms
640
+ // Real acronyms are typically 2-5 chars (API, SQL, LLM, ONNX)
641
+ if (text.length > 5)
642
+ continue;
637
643
  const start = match.index;
638
644
  const end = start + text.length;
639
645
  if (!shouldExclude(text) && !isProtected(start, end)) {
@@ -739,18 +745,10 @@ export function processWikilinks(content, entities, options = {}) {
739
745
  let wikilink;
740
746
  let replaceStart;
741
747
  let replaceEnd;
742
- if (match.pattern === 'quoted-terms') {
743
- // Replace "Term" with [[Term]] (remove quotes)
744
- wikilink = `[[${match.text}]]`;
745
- replaceStart = match.start;
746
- replaceEnd = match.end;
747
- }
748
- else {
749
- // Replace Term with [[Term]]
750
- wikilink = `[[${match.text}]]`;
751
- replaceStart = match.start;
752
- replaceEnd = match.end;
753
- }
748
+ // Replace Term with [[Term]]
749
+ wikilink = `[[${match.text}]]`;
750
+ replaceStart = match.start;
751
+ replaceEnd = match.end;
754
752
  processedContent =
755
753
  processedContent.slice(0, replaceStart) +
756
754
  wikilink +
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@velvetmonkey/vault-core",
3
- "version": "2.0.98",
3
+ "version": "2.0.99",
4
4
  "description": "Shared vault utilities for Flywheel ecosystem (entity scanning, wikilinks, protected zones)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",