@telvok/librarian-mcp 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/library/embeddings.d.ts +21 -0
- package/dist/library/embeddings.js +86 -0
- package/dist/library/vector-index.d.ts +55 -0
- package/dist/library/vector-index.js +160 -0
- package/dist/tools/brief.js +44 -0
- package/dist/tools/record.js +21 -0
- package/package.json +3 -2
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Get embedding for a text string.
|
|
3
|
+
* Returns a 384-dimensional normalized vector.
|
|
4
|
+
*/
|
|
5
|
+
export declare function getEmbedding(text: string): Promise<number[]>;
|
|
6
|
+
/**
|
|
7
|
+
* Check if embeddings are available (model can load).
|
|
8
|
+
*/
|
|
9
|
+
export declare function isEmbeddingAvailable(): Promise<boolean>;
|
|
10
|
+
/**
|
|
11
|
+
* Calculate cosine similarity between two vectors.
|
|
12
|
+
* Since vectors are normalized, this is just the dot product.
|
|
13
|
+
*/
|
|
14
|
+
export declare function cosineSimilarity(a: number[], b: number[]): number;
|
|
15
|
+
/**
|
|
16
|
+
* Split text into chunks at sentence boundaries.
|
|
17
|
+
* Aims for ~500 chars per chunk to preserve semantic meaning.
|
|
18
|
+
*/
|
|
19
|
+
export declare function chunkText(text: string, maxChars?: number): string[];
|
|
20
|
+
export declare const EMBEDDING_MODEL_ID = "Xenova/all-MiniLM-L6-v2";
|
|
21
|
+
export declare const EMBEDDING_DIMENSION = 384;
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import { pipeline, env } from '@huggingface/transformers';
|
|
2
|
+
import * as path from 'path';
|
|
3
|
+
import { getLibraryPath } from './storage.js';
|
|
4
|
+
// ============================================================================
|
|
5
|
+
// Configuration
|
|
6
|
+
// ============================================================================
|
|
7
|
+
// Cache model in .librarian/models
|
|
8
|
+
env.allowRemoteModels = true;
|
|
9
|
+
const MODEL_ID = 'Xenova/all-MiniLM-L6-v2';
|
|
10
|
+
// ============================================================================
|
|
11
|
+
// Embedding Generation
|
|
12
|
+
// ============================================================================
|
|
13
|
+
let embedder = null;
|
|
14
|
+
/**
|
|
15
|
+
* Get embedding for a text string.
|
|
16
|
+
* Returns a 384-dimensional normalized vector.
|
|
17
|
+
*/
|
|
18
|
+
export async function getEmbedding(text) {
|
|
19
|
+
if (!embedder) {
|
|
20
|
+
// Set local model path on first call
|
|
21
|
+
const libraryPath = getLibraryPath();
|
|
22
|
+
env.localModelPath = path.join(libraryPath, 'models');
|
|
23
|
+
embedder = await pipeline('feature-extraction', MODEL_ID);
|
|
24
|
+
}
|
|
25
|
+
const result = await embedder(text, { pooling: 'mean', normalize: true });
|
|
26
|
+
return Array.from(result.data);
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Check if embeddings are available (model can load).
|
|
30
|
+
*/
|
|
31
|
+
export async function isEmbeddingAvailable() {
|
|
32
|
+
try {
|
|
33
|
+
await getEmbedding('test');
|
|
34
|
+
return true;
|
|
35
|
+
}
|
|
36
|
+
catch {
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
// ============================================================================
|
|
41
|
+
// Similarity Calculation
|
|
42
|
+
// ============================================================================
|
|
43
|
+
/**
|
|
44
|
+
* Calculate cosine similarity between two vectors.
|
|
45
|
+
* Since vectors are normalized, this is just the dot product.
|
|
46
|
+
*/
|
|
47
|
+
export function cosineSimilarity(a, b) {
|
|
48
|
+
if (a.length !== b.length) {
|
|
49
|
+
throw new Error('Vectors must have same dimension');
|
|
50
|
+
}
|
|
51
|
+
return a.reduce((sum, val, i) => sum + val * b[i], 0);
|
|
52
|
+
}
|
|
53
|
+
// ============================================================================
|
|
54
|
+
// Text Chunking
|
|
55
|
+
// ============================================================================
|
|
56
|
+
/**
|
|
57
|
+
* Split text into chunks at sentence boundaries.
|
|
58
|
+
* Aims for ~500 chars per chunk to preserve semantic meaning.
|
|
59
|
+
*/
|
|
60
|
+
export function chunkText(text, maxChars = 500) {
|
|
61
|
+
// Split at sentence boundaries (. ! ? followed by whitespace)
|
|
62
|
+
const sentences = text.split(/(?<=[.!?])\s+/);
|
|
63
|
+
const chunks = [];
|
|
64
|
+
let current = '';
|
|
65
|
+
for (const sentence of sentences) {
|
|
66
|
+
// If adding this sentence exceeds limit and we have content, start new chunk
|
|
67
|
+
if ((current + ' ' + sentence).length > maxChars && current.trim()) {
|
|
68
|
+
chunks.push(current.trim());
|
|
69
|
+
current = sentence;
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
current = current ? current + ' ' + sentence : sentence;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
// Don't forget the last chunk
|
|
76
|
+
if (current.trim()) {
|
|
77
|
+
chunks.push(current.trim());
|
|
78
|
+
}
|
|
79
|
+
// If no chunks created (e.g., no sentence boundaries), return original text
|
|
80
|
+
return chunks.length > 0 ? chunks : [text];
|
|
81
|
+
}
|
|
82
|
+
// ============================================================================
|
|
83
|
+
// Constants
|
|
84
|
+
// ============================================================================
|
|
85
|
+
export const EMBEDDING_MODEL_ID = MODEL_ID;
|
|
86
|
+
export const EMBEDDING_DIMENSION = 384;
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
export interface IndexEntry {
|
|
2
|
+
path: string;
|
|
3
|
+
title: string;
|
|
4
|
+
embedding: number[];
|
|
5
|
+
chunk: number;
|
|
6
|
+
preview: string;
|
|
7
|
+
}
|
|
8
|
+
export interface VectorIndex {
|
|
9
|
+
version: number;
|
|
10
|
+
rebuilt: string;
|
|
11
|
+
modelId: string;
|
|
12
|
+
entries: IndexEntry[];
|
|
13
|
+
}
|
|
14
|
+
export interface SemanticMatch {
|
|
15
|
+
path: string;
|
|
16
|
+
title: string;
|
|
17
|
+
similarity: number;
|
|
18
|
+
preview: string;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Load the vector index from disk.
|
|
22
|
+
* Returns empty index if file doesn't exist or is invalid.
|
|
23
|
+
*/
|
|
24
|
+
export declare function loadIndex(): Promise<VectorIndex>;
|
|
25
|
+
/**
|
|
26
|
+
* Save the vector index to disk.
|
|
27
|
+
*/
|
|
28
|
+
export declare function saveIndex(index: VectorIndex): Promise<void>;
|
|
29
|
+
/**
|
|
30
|
+
* Add or update an entry in the index.
|
|
31
|
+
* Chunks the content and generates embeddings for each chunk.
|
|
32
|
+
*/
|
|
33
|
+
export declare function addToIndex(index: VectorIndex, entryPath: string, title: string, content: string): Promise<void>;
|
|
34
|
+
/**
|
|
35
|
+
* Remove an entry from the index.
|
|
36
|
+
*/
|
|
37
|
+
export declare function removeFromIndex(index: VectorIndex, entryPath: string): void;
|
|
38
|
+
/**
|
|
39
|
+
* Search the index for entries semantically similar to the query.
|
|
40
|
+
* Returns paths ranked by similarity, deduped to best chunk per entry.
|
|
41
|
+
*/
|
|
42
|
+
export declare function semanticSearch(index: VectorIndex, query: string, limit?: number): Promise<SemanticMatch[]>;
|
|
43
|
+
/**
|
|
44
|
+
* Check if the index might be stale (model changed).
|
|
45
|
+
*/
|
|
46
|
+
export declare function isIndexStale(index: VectorIndex): boolean;
|
|
47
|
+
/**
|
|
48
|
+
* Get index statistics.
|
|
49
|
+
*/
|
|
50
|
+
export declare function getIndexStats(index: VectorIndex): {
|
|
51
|
+
entryCount: number;
|
|
52
|
+
chunkCount: number;
|
|
53
|
+
modelId: string;
|
|
54
|
+
rebuilt: string;
|
|
55
|
+
};
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import * as fs from 'fs/promises';
|
|
2
|
+
import * as path from 'path';
|
|
3
|
+
import { getLibraryPath } from './storage.js';
|
|
4
|
+
import { getEmbedding, chunkText, cosineSimilarity, EMBEDDING_MODEL_ID } from './embeddings.js';
|
|
5
|
+
// ============================================================================
|
|
6
|
+
// Constants
|
|
7
|
+
// ============================================================================
|
|
8
|
+
const INDEX_FILENAME = 'index.json';
|
|
9
|
+
const CURRENT_VERSION = 1;
|
|
10
|
+
// ============================================================================
|
|
11
|
+
// Index File Operations
|
|
12
|
+
// ============================================================================
|
|
13
|
+
/**
|
|
14
|
+
* Get path to the index file.
|
|
15
|
+
*/
|
|
16
|
+
function getIndexPath() {
|
|
17
|
+
return path.join(getLibraryPath(), INDEX_FILENAME);
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Load the vector index from disk.
|
|
21
|
+
* Returns empty index if file doesn't exist or is invalid.
|
|
22
|
+
*/
|
|
23
|
+
export async function loadIndex() {
|
|
24
|
+
const indexPath = getIndexPath();
|
|
25
|
+
try {
|
|
26
|
+
const data = await fs.readFile(indexPath, 'utf-8');
|
|
27
|
+
const index = JSON.parse(data);
|
|
28
|
+
// Validate structure
|
|
29
|
+
if (!index.version || !Array.isArray(index.entries)) {
|
|
30
|
+
return createEmptyIndex();
|
|
31
|
+
}
|
|
32
|
+
return index;
|
|
33
|
+
}
|
|
34
|
+
catch {
|
|
35
|
+
// File doesn't exist or is invalid
|
|
36
|
+
return createEmptyIndex();
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Save the vector index to disk.
|
|
41
|
+
*/
|
|
42
|
+
export async function saveIndex(index) {
|
|
43
|
+
const indexPath = getIndexPath();
|
|
44
|
+
// Update metadata
|
|
45
|
+
index.rebuilt = new Date().toISOString();
|
|
46
|
+
index.modelId = EMBEDDING_MODEL_ID;
|
|
47
|
+
// Ensure directory exists
|
|
48
|
+
await fs.mkdir(path.dirname(indexPath), { recursive: true });
|
|
49
|
+
// Write atomically by writing to temp file first
|
|
50
|
+
const tempPath = indexPath + '.tmp';
|
|
51
|
+
await fs.writeFile(tempPath, JSON.stringify(index, null, 2), 'utf-8');
|
|
52
|
+
await fs.rename(tempPath, indexPath);
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Create an empty index.
|
|
56
|
+
*/
|
|
57
|
+
function createEmptyIndex() {
|
|
58
|
+
return {
|
|
59
|
+
version: CURRENT_VERSION,
|
|
60
|
+
rebuilt: '',
|
|
61
|
+
modelId: EMBEDDING_MODEL_ID,
|
|
62
|
+
entries: [],
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
// ============================================================================
|
|
66
|
+
// Index Operations
|
|
67
|
+
// ============================================================================
|
|
68
|
+
/**
|
|
69
|
+
* Add or update an entry in the index.
|
|
70
|
+
* Chunks the content and generates embeddings for each chunk.
|
|
71
|
+
*/
|
|
72
|
+
export async function addToIndex(index, entryPath, title, content) {
|
|
73
|
+
// Remove any existing entries for this path
|
|
74
|
+
index.entries = index.entries.filter(e => e.path !== entryPath);
|
|
75
|
+
// Chunk the content
|
|
76
|
+
const chunks = chunkText(content);
|
|
77
|
+
// Generate embeddings for each chunk
|
|
78
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
79
|
+
const chunk = chunks[i];
|
|
80
|
+
try {
|
|
81
|
+
const embedding = await getEmbedding(chunk);
|
|
82
|
+
index.entries.push({
|
|
83
|
+
path: entryPath,
|
|
84
|
+
title,
|
|
85
|
+
embedding,
|
|
86
|
+
chunk: i,
|
|
87
|
+
preview: chunk.slice(0, 100) + (chunk.length > 100 ? '...' : ''),
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
catch (error) {
|
|
91
|
+
// Log but don't fail - entry will still be searchable via keywords
|
|
92
|
+
console.error(`Failed to embed chunk ${i} for ${entryPath}:`, error);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Remove an entry from the index.
|
|
98
|
+
*/
|
|
99
|
+
export function removeFromIndex(index, entryPath) {
|
|
100
|
+
index.entries = index.entries.filter(e => e.path !== entryPath);
|
|
101
|
+
}
|
|
102
|
+
// ============================================================================
|
|
103
|
+
// Semantic Search
|
|
104
|
+
// ============================================================================
|
|
105
|
+
/**
|
|
106
|
+
* Search the index for entries semantically similar to the query.
|
|
107
|
+
* Returns paths ranked by similarity, deduped to best chunk per entry.
|
|
108
|
+
*/
|
|
109
|
+
export async function semanticSearch(index, query, limit = 5) {
|
|
110
|
+
if (index.entries.length === 0) {
|
|
111
|
+
return [];
|
|
112
|
+
}
|
|
113
|
+
// Get query embedding
|
|
114
|
+
const queryEmbedding = await getEmbedding(query);
|
|
115
|
+
// Score all entries
|
|
116
|
+
const scored = index.entries.map(entry => ({
|
|
117
|
+
...entry,
|
|
118
|
+
similarity: cosineSimilarity(queryEmbedding, entry.embedding),
|
|
119
|
+
}));
|
|
120
|
+
// Dedupe by path - keep the chunk with highest similarity
|
|
121
|
+
const byPath = new Map();
|
|
122
|
+
for (const entry of scored) {
|
|
123
|
+
const existing = byPath.get(entry.path);
|
|
124
|
+
if (!existing || entry.similarity > existing.similarity) {
|
|
125
|
+
byPath.set(entry.path, entry);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
// Sort by similarity descending and apply limit
|
|
129
|
+
const results = [...byPath.values()]
|
|
130
|
+
.sort((a, b) => b.similarity - a.similarity)
|
|
131
|
+
.slice(0, limit)
|
|
132
|
+
.map(entry => ({
|
|
133
|
+
path: entry.path,
|
|
134
|
+
title: entry.title,
|
|
135
|
+
similarity: entry.similarity,
|
|
136
|
+
preview: entry.preview,
|
|
137
|
+
}));
|
|
138
|
+
return results;
|
|
139
|
+
}
|
|
140
|
+
// ============================================================================
|
|
141
|
+
// Index Health
|
|
142
|
+
// ============================================================================
|
|
143
|
+
/**
|
|
144
|
+
* Check if the index might be stale (model changed).
|
|
145
|
+
*/
|
|
146
|
+
export function isIndexStale(index) {
|
|
147
|
+
return index.modelId !== EMBEDDING_MODEL_ID;
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Get index statistics.
|
|
151
|
+
*/
|
|
152
|
+
export function getIndexStats(index) {
|
|
153
|
+
const uniquePaths = new Set(index.entries.map(e => e.path));
|
|
154
|
+
return {
|
|
155
|
+
entryCount: uniquePaths.size,
|
|
156
|
+
chunkCount: index.entries.length,
|
|
157
|
+
modelId: index.modelId,
|
|
158
|
+
rebuilt: index.rebuilt,
|
|
159
|
+
};
|
|
160
|
+
}
|
package/dist/tools/brief.js
CHANGED
|
@@ -3,6 +3,7 @@ import * as path from 'path';
|
|
|
3
3
|
import matter from 'gray-matter';
|
|
4
4
|
import { glob } from 'glob';
|
|
5
5
|
import { getLibraryPath, getLocalPath, getImportedPath } from '../library/storage.js';
|
|
6
|
+
import { loadIndex, semanticSearch, isIndexStale } from '../library/vector-index.js';
|
|
6
7
|
// ============================================================================
|
|
7
8
|
// Tool Definition
|
|
8
9
|
// ============================================================================
|
|
@@ -39,6 +40,49 @@ Examples:
|
|
|
39
40
|
const localPath = getLocalPath(libraryPath);
|
|
40
41
|
const importedPath = getImportedPath(libraryPath);
|
|
41
42
|
let allEntries = [];
|
|
43
|
+
let useSemanticSearch = false;
|
|
44
|
+
let semanticMatches = [];
|
|
45
|
+
// Try semantic search if query is provided
|
|
46
|
+
if (query) {
|
|
47
|
+
try {
|
|
48
|
+
const index = await loadIndex();
|
|
49
|
+
// Only use semantic search if index has entries and isn't stale
|
|
50
|
+
if (index.entries.length > 0 && !isIndexStale(index)) {
|
|
51
|
+
semanticMatches = await semanticSearch(index, query, limit);
|
|
52
|
+
useSemanticSearch = semanticMatches.length > 0;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
// Semantic search unavailable, fall back to keyword search
|
|
57
|
+
useSemanticSearch = false;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
if (useSemanticSearch && semanticMatches.length > 0) {
|
|
61
|
+
// Load only the entries that matched semantically
|
|
62
|
+
const matchedPaths = new Set(semanticMatches.map(m => m.path));
|
|
63
|
+
for (const match of semanticMatches) {
|
|
64
|
+
const fullPath = path.join(libraryPath, match.path);
|
|
65
|
+
const entry = await readEntry(fullPath, libraryPath);
|
|
66
|
+
if (entry) {
|
|
67
|
+
allEntries.push(entry);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
// Sort by semantic similarity (order preserved from semanticSearch)
|
|
71
|
+
// Re-order allEntries to match semanticMatches order
|
|
72
|
+
const pathToEntry = new Map(allEntries.map(e => [e.path, e]));
|
|
73
|
+
allEntries = semanticMatches
|
|
74
|
+
.map(m => pathToEntry.get(m.path))
|
|
75
|
+
.filter((e) => e !== undefined);
|
|
76
|
+
const total = allEntries.length;
|
|
77
|
+
const entries = allEntries.slice(0, limit);
|
|
78
|
+
return {
|
|
79
|
+
entries,
|
|
80
|
+
total,
|
|
81
|
+
message: `Found ${total} ${total === 1 ? 'entry' : 'entries'} for "${query}" (semantic search).`,
|
|
82
|
+
libraryPath: localPath,
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
// Fall back to keyword search
|
|
42
86
|
// Read local entries
|
|
43
87
|
try {
|
|
44
88
|
const localFiles = await glob(path.join(localPath, '**/*.md'), { nodir: true });
|
package/dist/tools/record.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import * as fs from 'fs/promises';
|
|
2
2
|
import * as path from 'path';
|
|
3
3
|
import { getLibraryPath, getLocalPath } from '../library/storage.js';
|
|
4
|
+
import { loadIndex, saveIndex, addToIndex } from '../library/vector-index.js';
|
|
4
5
|
// ============================================================================
|
|
5
6
|
// Tool Definition
|
|
6
7
|
// ============================================================================
|
|
@@ -140,6 +141,26 @@ Rich:
|
|
|
140
141
|
const fileContent = frontmatterLines.join('\n') + '\n\n' + bodyLines.join('\n') + '\n';
|
|
141
142
|
await fs.writeFile(filePath, fileContent, 'utf-8');
|
|
142
143
|
const relativePath = path.relative(libraryPath, filePath);
|
|
144
|
+
// Add to vector index for semantic search
|
|
145
|
+
try {
|
|
146
|
+
const index = await loadIndex();
|
|
147
|
+
// Combine all text for embedding
|
|
148
|
+
const fullContent = [
|
|
149
|
+
title,
|
|
150
|
+
intent || '',
|
|
151
|
+
insight,
|
|
152
|
+
reasoning || '',
|
|
153
|
+
example || '',
|
|
154
|
+
context || '',
|
|
155
|
+
].filter(Boolean).join('\n\n');
|
|
156
|
+
await addToIndex(index, relativePath, title, fullContent);
|
|
157
|
+
await saveIndex(index);
|
|
158
|
+
}
|
|
159
|
+
catch (embeddingError) {
|
|
160
|
+
// Don't fail the record operation if embedding fails
|
|
161
|
+
// Entry is still saved and searchable via keywords
|
|
162
|
+
console.error('Failed to add embedding:', embeddingError);
|
|
163
|
+
}
|
|
143
164
|
return {
|
|
144
165
|
success: true,
|
|
145
166
|
path: relativePath,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@telvok/librarian-mcp",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"description": "Knowledge capture MCP server - remember what you learn with AI",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/server.js",
|
|
@@ -26,11 +26,12 @@
|
|
|
26
26
|
],
|
|
27
27
|
"repository": {
|
|
28
28
|
"type": "git",
|
|
29
|
-
"url": "https://github.com/telvokdev/librarian.git"
|
|
29
|
+
"url": "git+https://github.com/telvokdev/librarian.git"
|
|
30
30
|
},
|
|
31
31
|
"author": "Telvok",
|
|
32
32
|
"license": "MIT",
|
|
33
33
|
"dependencies": {
|
|
34
|
+
"@huggingface/transformers": "^3.0.0",
|
|
34
35
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
35
36
|
"glob": "^11.0.0",
|
|
36
37
|
"gray-matter": "^4.0.3",
|