@telvok/librarian-mcp 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Get embedding for a text string.
3
+ * Returns a 384-dimensional normalized vector.
4
+ */
5
+ export declare function getEmbedding(text: string): Promise<number[]>;
6
+ /**
7
+ * Check if embeddings are available (model can load).
8
+ */
9
+ export declare function isEmbeddingAvailable(): Promise<boolean>;
10
+ /**
11
+ * Calculate cosine similarity between two vectors.
12
+ * Since vectors are normalized, this is just the dot product.
13
+ */
14
+ export declare function cosineSimilarity(a: number[], b: number[]): number;
15
+ /**
16
+ * Split text into chunks at sentence boundaries.
17
+ * Aims for ~500 chars per chunk to preserve semantic meaning.
18
+ */
19
+ export declare function chunkText(text: string, maxChars?: number): string[];
20
+ export declare const EMBEDDING_MODEL_ID = "Xenova/all-MiniLM-L6-v2";
21
+ export declare const EMBEDDING_DIMENSION = 384;
@@ -0,0 +1,86 @@
1
+ import { pipeline, env } from '@huggingface/transformers';
2
+ import * as path from 'path';
3
+ import { getLibraryPath } from './storage.js';
4
+ // ============================================================================
5
+ // Configuration
6
+ // ============================================================================
7
+ // Cache model in .librarian/models
8
+ env.allowRemoteModels = true;
9
+ const MODEL_ID = 'Xenova/all-MiniLM-L6-v2';
10
+ // ============================================================================
11
+ // Embedding Generation
12
+ // ============================================================================
13
+ let embedder = null;
14
+ /**
15
+ * Get embedding for a text string.
16
+ * Returns a 384-dimensional normalized vector.
17
+ */
18
+ export async function getEmbedding(text) {
19
+ if (!embedder) {
20
+ // Set local model path on first call
21
+ const libraryPath = getLibraryPath();
22
+ env.localModelPath = path.join(libraryPath, 'models');
23
+ embedder = await pipeline('feature-extraction', MODEL_ID);
24
+ }
25
+ const result = await embedder(text, { pooling: 'mean', normalize: true });
26
+ return Array.from(result.data);
27
+ }
28
+ /**
29
+ * Check if embeddings are available (model can load).
30
+ */
31
+ export async function isEmbeddingAvailable() {
32
+ try {
33
+ await getEmbedding('test');
34
+ return true;
35
+ }
36
+ catch {
37
+ return false;
38
+ }
39
+ }
40
+ // ============================================================================
41
+ // Similarity Calculation
42
+ // ============================================================================
43
+ /**
44
+ * Calculate cosine similarity between two vectors.
45
+ * Since vectors are normalized, this is just the dot product.
46
+ */
47
+ export function cosineSimilarity(a, b) {
48
+ if (a.length !== b.length) {
49
+ throw new Error('Vectors must have same dimension');
50
+ }
51
+ return a.reduce((sum, val, i) => sum + val * b[i], 0);
52
+ }
53
+ // ============================================================================
54
+ // Text Chunking
55
+ // ============================================================================
56
+ /**
57
+ * Split text into chunks at sentence boundaries.
58
+ * Aims for ~500 chars per chunk to preserve semantic meaning.
59
+ */
60
+ export function chunkText(text, maxChars = 500) {
61
+ // Split at sentence boundaries (. ! ? followed by whitespace)
62
+ const sentences = text.split(/(?<=[.!?])\s+/);
63
+ const chunks = [];
64
+ let current = '';
65
+ for (const sentence of sentences) {
66
+ // If adding this sentence exceeds limit and we have content, start new chunk
67
+ if ((current + ' ' + sentence).length > maxChars && current.trim()) {
68
+ chunks.push(current.trim());
69
+ current = sentence;
70
+ }
71
+ else {
72
+ current = current ? current + ' ' + sentence : sentence;
73
+ }
74
+ }
75
+ // Don't forget the last chunk
76
+ if (current.trim()) {
77
+ chunks.push(current.trim());
78
+ }
79
+ // If no chunks created (e.g., no sentence boundaries), return original text
80
+ return chunks.length > 0 ? chunks : [text];
81
+ }
82
+ // ============================================================================
83
+ // Constants
84
+ // ============================================================================
85
+ export const EMBEDDING_MODEL_ID = MODEL_ID;
86
+ export const EMBEDDING_DIMENSION = 384;
@@ -0,0 +1,55 @@
1
+ export interface IndexEntry {
2
+ path: string;
3
+ title: string;
4
+ embedding: number[];
5
+ chunk: number;
6
+ preview: string;
7
+ }
8
+ export interface VectorIndex {
9
+ version: number;
10
+ rebuilt: string;
11
+ modelId: string;
12
+ entries: IndexEntry[];
13
+ }
14
+ export interface SemanticMatch {
15
+ path: string;
16
+ title: string;
17
+ similarity: number;
18
+ preview: string;
19
+ }
20
+ /**
21
+ * Load the vector index from disk.
22
+ * Returns empty index if file doesn't exist or is invalid.
23
+ */
24
+ export declare function loadIndex(): Promise<VectorIndex>;
25
+ /**
26
+ * Save the vector index to disk.
27
+ */
28
+ export declare function saveIndex(index: VectorIndex): Promise<void>;
29
+ /**
30
+ * Add or update an entry in the index.
31
+ * Chunks the content and generates embeddings for each chunk.
32
+ */
33
+ export declare function addToIndex(index: VectorIndex, entryPath: string, title: string, content: string): Promise<void>;
34
+ /**
35
+ * Remove an entry from the index.
36
+ */
37
+ export declare function removeFromIndex(index: VectorIndex, entryPath: string): void;
38
+ /**
39
+ * Search the index for entries semantically similar to the query.
40
+ * Returns paths ranked by similarity, deduped to best chunk per entry.
41
+ */
42
+ export declare function semanticSearch(index: VectorIndex, query: string, limit?: number): Promise<SemanticMatch[]>;
43
+ /**
44
+ * Check if the index might be stale (model changed).
45
+ */
46
+ export declare function isIndexStale(index: VectorIndex): boolean;
47
+ /**
48
+ * Get index statistics.
49
+ */
50
+ export declare function getIndexStats(index: VectorIndex): {
51
+ entryCount: number;
52
+ chunkCount: number;
53
+ modelId: string;
54
+ rebuilt: string;
55
+ };
@@ -0,0 +1,160 @@
1
+ import * as fs from 'fs/promises';
2
+ import * as path from 'path';
3
+ import { getLibraryPath } from './storage.js';
4
+ import { getEmbedding, chunkText, cosineSimilarity, EMBEDDING_MODEL_ID } from './embeddings.js';
5
+ // ============================================================================
6
+ // Constants
7
+ // ============================================================================
8
+ const INDEX_FILENAME = 'index.json';
9
+ const CURRENT_VERSION = 1;
10
+ // ============================================================================
11
+ // Index File Operations
12
+ // ============================================================================
13
+ /**
14
+ * Get path to the index file.
15
+ */
16
+ function getIndexPath() {
17
+ return path.join(getLibraryPath(), INDEX_FILENAME);
18
+ }
19
+ /**
20
+ * Load the vector index from disk.
21
+ * Returns empty index if file doesn't exist or is invalid.
22
+ */
23
+ export async function loadIndex() {
24
+ const indexPath = getIndexPath();
25
+ try {
26
+ const data = await fs.readFile(indexPath, 'utf-8');
27
+ const index = JSON.parse(data);
28
+ // Validate structure
29
+ if (!index.version || !Array.isArray(index.entries)) {
30
+ return createEmptyIndex();
31
+ }
32
+ return index;
33
+ }
34
+ catch {
35
+ // File doesn't exist or is invalid
36
+ return createEmptyIndex();
37
+ }
38
+ }
39
+ /**
40
+ * Save the vector index to disk.
41
+ */
42
+ export async function saveIndex(index) {
43
+ const indexPath = getIndexPath();
44
+ // Update metadata
45
+ index.rebuilt = new Date().toISOString();
46
+ index.modelId = EMBEDDING_MODEL_ID;
47
+ // Ensure directory exists
48
+ await fs.mkdir(path.dirname(indexPath), { recursive: true });
49
+ // Write atomically by writing to temp file first
50
+ const tempPath = indexPath + '.tmp';
51
+ await fs.writeFile(tempPath, JSON.stringify(index, null, 2), 'utf-8');
52
+ await fs.rename(tempPath, indexPath);
53
+ }
54
+ /**
55
+ * Create an empty index.
56
+ */
57
+ function createEmptyIndex() {
58
+ return {
59
+ version: CURRENT_VERSION,
60
+ rebuilt: '',
61
+ modelId: EMBEDDING_MODEL_ID,
62
+ entries: [],
63
+ };
64
+ }
65
+ // ============================================================================
66
+ // Index Operations
67
+ // ============================================================================
68
+ /**
69
+ * Add or update an entry in the index.
70
+ * Chunks the content and generates embeddings for each chunk.
71
+ */
72
+ export async function addToIndex(index, entryPath, title, content) {
73
+ // Remove any existing entries for this path
74
+ index.entries = index.entries.filter(e => e.path !== entryPath);
75
+ // Chunk the content
76
+ const chunks = chunkText(content);
77
+ // Generate embeddings for each chunk
78
+ for (let i = 0; i < chunks.length; i++) {
79
+ const chunk = chunks[i];
80
+ try {
81
+ const embedding = await getEmbedding(chunk);
82
+ index.entries.push({
83
+ path: entryPath,
84
+ title,
85
+ embedding,
86
+ chunk: i,
87
+ preview: chunk.slice(0, 100) + (chunk.length > 100 ? '...' : ''),
88
+ });
89
+ }
90
+ catch (error) {
91
+ // Log but don't fail - entry will still be searchable via keywords
92
+ console.error(`Failed to embed chunk ${i} for ${entryPath}:`, error);
93
+ }
94
+ }
95
+ }
96
+ /**
97
+ * Remove an entry from the index.
98
+ */
99
+ export function removeFromIndex(index, entryPath) {
100
+ index.entries = index.entries.filter(e => e.path !== entryPath);
101
+ }
102
+ // ============================================================================
103
+ // Semantic Search
104
+ // ============================================================================
105
+ /**
106
+ * Search the index for entries semantically similar to the query.
107
+ * Returns paths ranked by similarity, deduped to best chunk per entry.
108
+ */
109
+ export async function semanticSearch(index, query, limit = 5) {
110
+ if (index.entries.length === 0) {
111
+ return [];
112
+ }
113
+ // Get query embedding
114
+ const queryEmbedding = await getEmbedding(query);
115
+ // Score all entries
116
+ const scored = index.entries.map(entry => ({
117
+ ...entry,
118
+ similarity: cosineSimilarity(queryEmbedding, entry.embedding),
119
+ }));
120
+ // Dedupe by path - keep the chunk with highest similarity
121
+ const byPath = new Map();
122
+ for (const entry of scored) {
123
+ const existing = byPath.get(entry.path);
124
+ if (!existing || entry.similarity > existing.similarity) {
125
+ byPath.set(entry.path, entry);
126
+ }
127
+ }
128
+ // Sort by similarity descending and apply limit
129
+ const results = [...byPath.values()]
130
+ .sort((a, b) => b.similarity - a.similarity)
131
+ .slice(0, limit)
132
+ .map(entry => ({
133
+ path: entry.path,
134
+ title: entry.title,
135
+ similarity: entry.similarity,
136
+ preview: entry.preview,
137
+ }));
138
+ return results;
139
+ }
140
+ // ============================================================================
141
+ // Index Health
142
+ // ============================================================================
143
+ /**
144
+ * Check if the index might be stale (model changed).
145
+ */
146
+ export function isIndexStale(index) {
147
+ return index.modelId !== EMBEDDING_MODEL_ID;
148
+ }
149
+ /**
150
+ * Get index statistics.
151
+ */
152
+ export function getIndexStats(index) {
153
+ const uniquePaths = new Set(index.entries.map(e => e.path));
154
+ return {
155
+ entryCount: uniquePaths.size,
156
+ chunkCount: index.entries.length,
157
+ modelId: index.modelId,
158
+ rebuilt: index.rebuilt,
159
+ };
160
+ }
@@ -3,6 +3,7 @@ import * as path from 'path';
3
3
  import matter from 'gray-matter';
4
4
  import { glob } from 'glob';
5
5
  import { getLibraryPath, getLocalPath, getImportedPath } from '../library/storage.js';
6
+ import { loadIndex, semanticSearch, isIndexStale } from '../library/vector-index.js';
6
7
  // ============================================================================
7
8
  // Tool Definition
8
9
  // ============================================================================
@@ -39,6 +40,49 @@ Examples:
39
40
  const localPath = getLocalPath(libraryPath);
40
41
  const importedPath = getImportedPath(libraryPath);
41
42
  let allEntries = [];
43
+ let useSemanticSearch = false;
44
+ let semanticMatches = [];
45
+ // Try semantic search if query is provided
46
+ if (query) {
47
+ try {
48
+ const index = await loadIndex();
49
+ // Only use semantic search if index has entries and isn't stale
50
+ if (index.entries.length > 0 && !isIndexStale(index)) {
51
+ semanticMatches = await semanticSearch(index, query, limit);
52
+ useSemanticSearch = semanticMatches.length > 0;
53
+ }
54
+ }
55
+ catch {
56
+ // Semantic search unavailable, fall back to keyword search
57
+ useSemanticSearch = false;
58
+ }
59
+ }
60
+ if (useSemanticSearch && semanticMatches.length > 0) {
61
+ // Load only the entries that matched semantically
62
+ const matchedPaths = new Set(semanticMatches.map(m => m.path));
63
+ for (const match of semanticMatches) {
64
+ const fullPath = path.join(libraryPath, match.path);
65
+ const entry = await readEntry(fullPath, libraryPath);
66
+ if (entry) {
67
+ allEntries.push(entry);
68
+ }
69
+ }
70
+ // Sort by semantic similarity (order preserved from semanticSearch)
71
+ // Re-order allEntries to match semanticMatches order
72
+ const pathToEntry = new Map(allEntries.map(e => [e.path, e]));
73
+ allEntries = semanticMatches
74
+ .map(m => pathToEntry.get(m.path))
75
+ .filter((e) => e !== undefined);
76
+ const total = allEntries.length;
77
+ const entries = allEntries.slice(0, limit);
78
+ return {
79
+ entries,
80
+ total,
81
+ message: `Found ${total} ${total === 1 ? 'entry' : 'entries'} for "${query}" (semantic search).`,
82
+ libraryPath: localPath,
83
+ };
84
+ }
85
+ // Fall back to keyword search
42
86
  // Read local entries
43
87
  try {
44
88
  const localFiles = await glob(path.join(localPath, '**/*.md'), { nodir: true });
@@ -1,6 +1,7 @@
1
1
  import * as fs from 'fs/promises';
2
2
  import * as path from 'path';
3
3
  import { getLibraryPath, getLocalPath } from '../library/storage.js';
4
+ import { loadIndex, saveIndex, addToIndex } from '../library/vector-index.js';
4
5
  // ============================================================================
5
6
  // Tool Definition
6
7
  // ============================================================================
@@ -140,6 +141,26 @@ Rich:
140
141
  const fileContent = frontmatterLines.join('\n') + '\n\n' + bodyLines.join('\n') + '\n';
141
142
  await fs.writeFile(filePath, fileContent, 'utf-8');
142
143
  const relativePath = path.relative(libraryPath, filePath);
144
+ // Add to vector index for semantic search
145
+ try {
146
+ const index = await loadIndex();
147
+ // Combine all text for embedding
148
+ const fullContent = [
149
+ title,
150
+ intent || '',
151
+ insight,
152
+ reasoning || '',
153
+ example || '',
154
+ context || '',
155
+ ].filter(Boolean).join('\n\n');
156
+ await addToIndex(index, relativePath, title, fullContent);
157
+ await saveIndex(index);
158
+ }
159
+ catch (embeddingError) {
160
+ // Don't fail the record operation if embedding fails
161
+ // Entry is still saved and searchable via keywords
162
+ console.error('Failed to add embedding:', embeddingError);
163
+ }
143
164
  return {
144
165
  success: true,
145
166
  path: relativePath,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@telvok/librarian-mcp",
3
- "version": "1.1.0",
3
+ "version": "1.2.0",
4
4
  "description": "Knowledge capture MCP server - remember what you learn with AI",
5
5
  "type": "module",
6
6
  "main": "dist/server.js",
@@ -26,11 +26,12 @@
26
26
  ],
27
27
  "repository": {
28
28
  "type": "git",
29
- "url": "https://github.com/telvokdev/librarian.git"
29
+ "url": "git+https://github.com/telvokdev/librarian.git"
30
30
  },
31
31
  "author": "Telvok",
32
32
  "license": "MIT",
33
33
  "dependencies": {
34
+ "@huggingface/transformers": "^3.0.0",
34
35
  "@modelcontextprotocol/sdk": "^1.0.0",
35
36
  "glob": "^11.0.0",
36
37
  "gray-matter": "^4.0.3",