npm - @telvok/librarian-mcp - Versions diffs - 1.0.3 → 1.2.0 - Mend

@telvok/librarian-mcp 1.0.3 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/library/embeddings.d.ts +21 -0
package/dist/library/embeddings.js +86 -0
package/dist/library/vector-index.d.ts +55 -0
package/dist/library/vector-index.js +160 -0
package/dist/server.js +9 -0
package/dist/tools/brief.d.ts +2 -0
package/dist/tools/brief.js +77 -4
package/dist/tools/index.d.ts +1 -0
package/dist/tools/index.js +1 -0
package/dist/tools/mark-hit.d.ts +20 -0
package/dist/tools/mark-hit.js +71 -0
package/dist/tools/record.js +23 -0
package/package.json +3 -2

package/dist/library/embeddings.d.ts ADDED Viewed

@@ -0,0 +1,21 @@
+/**
+ * Get embedding for a text string.
+ * Returns a 384-dimensional normalized vector.
+ */
+export declare function getEmbedding(text: string): Promise<number[]>;
+/**
+ * Check if embeddings are available (model can load).
+ */
+export declare function isEmbeddingAvailable(): Promise<boolean>;
+/**
+ * Calculate cosine similarity between two vectors.
+ * Since vectors are normalized, this is just the dot product.
+ */
+export declare function cosineSimilarity(a: number[], b: number[]): number;
+/**
+ * Split text into chunks at sentence boundaries.
+ * Aims for ~500 chars per chunk to preserve semantic meaning.
+ */
+export declare function chunkText(text: string, maxChars?: number): string[];
+export declare const EMBEDDING_MODEL_ID = "Xenova/all-MiniLM-L6-v2";
+export declare const EMBEDDING_DIMENSION = 384;

package/dist/library/embeddings.js ADDED Viewed

@@ -0,0 +1,86 @@
+import { pipeline, env } from '@huggingface/transformers';
+import * as path from 'path';
+import { getLibraryPath } from './storage.js';
+// ============================================================================
+// Configuration
+// ============================================================================
+// Cache model in .librarian/models
+env.allowRemoteModels = true;
+const MODEL_ID = 'Xenova/all-MiniLM-L6-v2';
+// ============================================================================
+// Embedding Generation
+// ============================================================================
+let embedder = null;
+/**
+ * Get embedding for a text string.
+ * Returns a 384-dimensional normalized vector.
+ */
+export async function getEmbedding(text) {
+    if (!embedder) {
+        // Set local model path on first call
+        const libraryPath = getLibraryPath();
+        env.localModelPath = path.join(libraryPath, 'models');
+        embedder = await pipeline('feature-extraction', MODEL_ID);
+    }
+    const result = await embedder(text, { pooling: 'mean', normalize: true });
+    return Array.from(result.data);
+}
+/**
+ * Check if embeddings are available (model can load).
+ */
+export async function isEmbeddingAvailable() {
+    try {
+        await getEmbedding('test');
+        return true;
+    }
+    catch {
+        return false;
+    }
+}
+// ============================================================================
+// Similarity Calculation
+// ============================================================================
+/**
+ * Calculate cosine similarity between two vectors.
+ * Since vectors are normalized, this is just the dot product.
+ */
+export function cosineSimilarity(a, b) {
+    if (a.length !== b.length) {
+        throw new Error('Vectors must have same dimension');
+    }
+    return a.reduce((sum, val, i) => sum + val * b[i], 0);
+}
+// ============================================================================
+// Text Chunking
+// ============================================================================
+/**
+ * Split text into chunks at sentence boundaries.
+ * Aims for ~500 chars per chunk to preserve semantic meaning.
+ */
+export function chunkText(text, maxChars = 500) {
+    // Split at sentence boundaries (. ! ? followed by whitespace)
+    const sentences = text.split(/(?<=[.!?])\s+/);
+    const chunks = [];
+    let current = '';
+    for (const sentence of sentences) {
+        // If adding this sentence exceeds limit and we have content, start new chunk
+        if ((current + ' ' + sentence).length > maxChars && current.trim()) {
+            chunks.push(current.trim());
+            current = sentence;
+        }
+        else {
+            current = current ? current + ' ' + sentence : sentence;
+        }
+    }
+    // Don't forget the last chunk
+    if (current.trim()) {
+        chunks.push(current.trim());
+    }
+    // If no chunks created (e.g., no sentence boundaries), return original text
+    return chunks.length > 0 ? chunks : [text];
+}
+// ============================================================================
+// Constants
+// ============================================================================
+export const EMBEDDING_MODEL_ID = MODEL_ID;
+export const EMBEDDING_DIMENSION = 384;

package/dist/library/vector-index.d.ts ADDED Viewed

@@ -0,0 +1,55 @@
+export interface IndexEntry {
+    path: string;
+    title: string;
+    embedding: number[];
+    chunk: number;
+    preview: string;
+}
+export interface VectorIndex {
+    version: number;
+    rebuilt: string;
+    modelId: string;
+    entries: IndexEntry[];
+}
+export interface SemanticMatch {
+    path: string;
+    title: string;
+    similarity: number;
+    preview: string;
+}
+/**
+ * Load the vector index from disk.
+ * Returns empty index if file doesn't exist or is invalid.
+ */
+export declare function loadIndex(): Promise<VectorIndex>;
+/**
+ * Save the vector index to disk.
+ */
+export declare function saveIndex(index: VectorIndex): Promise<void>;
+/**
+ * Add or update an entry in the index.
+ * Chunks the content and generates embeddings for each chunk.
+ */
+export declare function addToIndex(index: VectorIndex, entryPath: string, title: string, content: string): Promise<void>;
+/**
+ * Remove an entry from the index.
+ */
+export declare function removeFromIndex(index: VectorIndex, entryPath: string): void;
+/**
+ * Search the index for entries semantically similar to the query.
+ * Returns paths ranked by similarity, deduped to best chunk per entry.
+ */
+export declare function semanticSearch(index: VectorIndex, query: string, limit?: number): Promise<SemanticMatch[]>;
+/**
+ * Check if the index might be stale (model changed).
+ */
+export declare function isIndexStale(index: VectorIndex): boolean;
+/**
+ * Get index statistics.
+ */
+export declare function getIndexStats(index: VectorIndex): {
+    entryCount: number;
+    chunkCount: number;
+    modelId: string;
+    rebuilt: string;
+};

package/dist/library/vector-index.js ADDED Viewed

@@ -0,0 +1,160 @@
+import * as fs from 'fs/promises';
+import * as path from 'path';
+import { getLibraryPath } from './storage.js';
+import { getEmbedding, chunkText, cosineSimilarity, EMBEDDING_MODEL_ID } from './embeddings.js';
+// ============================================================================
+// Constants
+// ============================================================================
+const INDEX_FILENAME = 'index.json';
+const CURRENT_VERSION = 1;
+// ============================================================================
+// Index File Operations
+// ============================================================================
+/**
+ * Get path to the index file.
+ */
+function getIndexPath() {
+    return path.join(getLibraryPath(), INDEX_FILENAME);
+}
+/**
+ * Load the vector index from disk.
+ * Returns empty index if file doesn't exist or is invalid.
+ */
+export async function loadIndex() {
+    const indexPath = getIndexPath();
+    try {
+        const data = await fs.readFile(indexPath, 'utf-8');
+        const index = JSON.parse(data);
+        // Validate structure
+        if (!index.version || !Array.isArray(index.entries)) {
+            return createEmptyIndex();
+        }
+        return index;
+    }
+    catch {
+        // File doesn't exist or is invalid
+        return createEmptyIndex();
+    }
+}
+/**
+ * Save the vector index to disk.
+ */
+export async function saveIndex(index) {
+    const indexPath = getIndexPath();
+    // Update metadata
+    index.rebuilt = new Date().toISOString();
+    index.modelId = EMBEDDING_MODEL_ID;
+    // Ensure directory exists
+    await fs.mkdir(path.dirname(indexPath), { recursive: true });
+    // Write atomically by writing to temp file first
+    const tempPath = indexPath + '.tmp';
+    await fs.writeFile(tempPath, JSON.stringify(index, null, 2), 'utf-8');
+    await fs.rename(tempPath, indexPath);
+}
+/**
+ * Create an empty index.
+ */
+function createEmptyIndex() {
+    return {
+        version: CURRENT_VERSION,
+        rebuilt: '',
+        modelId: EMBEDDING_MODEL_ID,
+        entries: [],
+    };
+}
+// ============================================================================
+// Index Operations
+// ============================================================================
+/**
+ * Add or update an entry in the index.
+ * Chunks the content and generates embeddings for each chunk.
+ */
+export async function addToIndex(index, entryPath, title, content) {
+    // Remove any existing entries for this path
+    index.entries = index.entries.filter(e => e.path !== entryPath);
+    // Chunk the content
+    const chunks = chunkText(content);
+    // Generate embeddings for each chunk
+    for (let i = 0; i < chunks.length; i++) {
+        const chunk = chunks[i];
+        try {
+            const embedding = await getEmbedding(chunk);
+            index.entries.push({
+                path: entryPath,
+                title,
+                embedding,
+                chunk: i,
+                preview: chunk.slice(0, 100) + (chunk.length > 100 ? '...' : ''),
+            });
+        }
+        catch (error) {
+            // Log but don't fail - entry will still be searchable via keywords
+            console.error(`Failed to embed chunk ${i} for ${entryPath}:`, error);
+        }
+    }
+}
+/**
+ * Remove an entry from the index.
+ */
+export function removeFromIndex(index, entryPath) {
+    index.entries = index.entries.filter(e => e.path !== entryPath);
+}
+// ============================================================================
+// Semantic Search
+// ============================================================================
+/**
+ * Search the index for entries semantically similar to the query.
+ * Returns paths ranked by similarity, deduped to best chunk per entry.
+ */
+export async function semanticSearch(index, query, limit = 5) {
+    if (index.entries.length === 0) {
+        return [];
+    }
+    // Get query embedding
+    const queryEmbedding = await getEmbedding(query);
+    // Score all entries
+    const scored = index.entries.map(entry => ({
+        ...entry,
+        similarity: cosineSimilarity(queryEmbedding, entry.embedding),
+    }));
+    // Dedupe by path - keep the chunk with highest similarity
+    const byPath = new Map();
+    for (const entry of scored) {
+        const existing = byPath.get(entry.path);
+        if (!existing || entry.similarity > existing.similarity) {
+            byPath.set(entry.path, entry);
+        }
+    }
+    // Sort by similarity descending and apply limit
+    const results = [...byPath.values()]
+        .sort((a, b) => b.similarity - a.similarity)
+        .slice(0, limit)
+        .map(entry => ({
+        path: entry.path,
+        title: entry.title,
+        similarity: entry.similarity,
+        preview: entry.preview,
+    }));
+    return results;
+}
+// ============================================================================
+// Index Health
+// ============================================================================
+/**
+ * Check if the index might be stale (model changed).
+ */
+export function isIndexStale(index) {
+    return index.modelId !== EMBEDDING_MODEL_ID;
+}
+/**
+ * Get index statistics.
+ */
+export function getIndexStats(index) {
+    const uniquePaths = new Set(index.entries.map(e => e.path));
+    return {
+        entryCount: uniquePaths.size,
+        chunkCount: index.entries.length,
+        modelId: index.modelId,
+        rebuilt: index.rebuilt,
+    };
+}

package/dist/server.js CHANGED Viewed

@@ -5,6 +5,7 @@ import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextpro
 import { briefTool } from './tools/brief.js';
 import { recordTool } from './tools/record.js';
 import { adoptTool } from './tools/adopt.js';
+import { markHitTool } from './tools/mark-hit.js';
 const server = new Server({
     name: 'librarian',
     version: '1.0.0',
@@ -32,6 +33,11 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
                 description: adoptTool.description,
                 inputSchema: adoptTool.inputSchema,
             },
+            {
+                name: markHitTool.name,
+                description: markHitTool.description,
+                inputSchema: markHitTool.inputSchema,
+            },
         ],
     };
 });
@@ -50,6 +56,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
             case 'adopt':
                 result = await adoptTool.handler(args);
                 break;
+            case 'mark_hit':
+                result = await markHitTool.handler(args);
+                break;
             default:
                 throw new Error(`Unknown tool: ${name}`);
         }

package/dist/tools/brief.d.ts CHANGED Viewed

@@ -5,6 +5,8 @@ export interface BriefEntry {
     preview: string;
     path: string;
     created: string;
+    hits: number;
+    last_hit: string | null;
 }
 export interface BriefResult {
     entries: BriefEntry[];

package/dist/tools/brief.js CHANGED Viewed

@@ -3,6 +3,7 @@ import * as path from 'path';
 import matter from 'gray-matter';
 import { glob } from 'glob';
 import { getLibraryPath, getLocalPath, getImportedPath } from '../library/storage.js';
+import { loadIndex, semanticSearch, isIndexStale } from '../library/vector-index.js';
 // ============================================================================
 // Tool Definition
 // ============================================================================
@@ -39,6 +40,49 @@ Examples:
         const localPath = getLocalPath(libraryPath);
         const importedPath = getImportedPath(libraryPath);
         let allEntries = [];
+        let useSemanticSearch = false;
+        let semanticMatches = [];
+        // Try semantic search if query is provided
+        if (query) {
+            try {
+                const index = await loadIndex();
+                // Only use semantic search if index has entries and isn't stale
+                if (index.entries.length > 0 && !isIndexStale(index)) {
+                    semanticMatches = await semanticSearch(index, query, limit);
+                    useSemanticSearch = semanticMatches.length > 0;
+                }
+            }
+            catch {
+                // Semantic search unavailable, fall back to keyword search
+                useSemanticSearch = false;
+            }
+        }
+        if (useSemanticSearch && semanticMatches.length > 0) {
+            // Load only the entries that matched semantically
+            const matchedPaths = new Set(semanticMatches.map(m => m.path));
+            for (const match of semanticMatches) {
+                const fullPath = path.join(libraryPath, match.path);
+                const entry = await readEntry(fullPath, libraryPath);
+                if (entry) {
+                    allEntries.push(entry);
+                }
+            }
+            // Sort by semantic similarity (order preserved from semanticSearch)
+            // Re-order allEntries to match semanticMatches order
+            const pathToEntry = new Map(allEntries.map(e => [e.path, e]));
+            allEntries = semanticMatches
+                .map(m => pathToEntry.get(m.path))
+                .filter((e) => e !== undefined);
+            const total = allEntries.length;
+            const entries = allEntries.slice(0, limit);
+            return {
+                entries,
+                total,
+                message: `Found ${total} ${total === 1 ? 'entry' : 'entries'} for "${query}" (semantic search).`,
+                libraryPath: localPath,
+            };
+        }
+        // Fall back to keyword search
         // Read local entries
         try {
             const localFiles = await glob(path.join(localPath, '**/*.md'), { nodir: true });
@@ -79,10 +123,9 @@ Examples:
             const searchTerm = query.toLowerCase();
             allEntries = allEntries.filter(entry => matchesSearch(entry, searchTerm));
         }
-        // Sort by created date (most recent first)
-        allEntries.sort((a, b) => {
-            return new Date(b.created).getTime() - new Date(a.created).getTime();
-        });
+        // Sort by blended score: 60% recency + 40% hits
+        // Entries that helped before bubble up, but new entries still surface
+        allEntries = rankEntries(allEntries);
         const total = allEntries.length;
         // Apply limit
         const entries = allEntries.slice(0, limit);
@@ -134,6 +177,8 @@ async function readEntry(filePath, libraryPath) {
             preview,
             path: path.relative(libraryPath, filePath),
             created: data.created || new Date().toISOString(),
+            hits: typeof data.hits === 'number' ? data.hits : 0,
+            last_hit: data.last_hit || null,
         };
     }
     catch {
@@ -159,3 +204,31 @@ function matchesSearch(entry, searchTerm) {
     }
     return false;
 }
+// ============================================================================
+// Smart Ranking
+// ============================================================================
+const RECENCY_WEIGHT = 0.6;
+const HITS_WEIGHT = 0.4;
+const RECENCY_DECAY_DAYS = 30; // Entries older than this get minimal recency score
+function rankEntries(entries) {
+    if (entries.length === 0)
+        return entries;
+    const now = Date.now();
+    // Find max hits for normalization (avoid divide by zero)
+    const maxHits = Math.max(1, ...entries.map(e => e.hits));
+    // Calculate scores
+    const scored = entries.map(entry => {
+        // Recency score: 1.0 for today, decays over RECENCY_DECAY_DAYS
+        const ageMs = now - new Date(entry.created).getTime();
+        const ageDays = ageMs / (1000 * 60 * 60 * 24);
+        const recencyScore = Math.max(0, 1 - (ageDays / RECENCY_DECAY_DAYS));
+        // Hits score: normalized 0-1 against max hits in library
+        const hitsScore = entry.hits / maxHits;
+        // Blended score
+        const score = (RECENCY_WEIGHT * recencyScore) + (HITS_WEIGHT * hitsScore);
+        return { entry, score };
+    });
+    // Sort by score descending
+    scored.sort((a, b) => b.score - a.score);
+    return scored.map(s => s.entry);
+}

package/dist/tools/index.d.ts CHANGED Viewed

@@ -1,3 +1,4 @@
 export { briefTool } from './brief.js';
 export { recordTool } from './record.js';
 export { adoptTool } from './adopt.js';
+export { markHitTool } from './mark-hit.js';

package/dist/tools/index.js CHANGED Viewed

@@ -1,3 +1,4 @@
 export { briefTool } from './brief.js';
 export { recordTool } from './record.js';
 export { adoptTool } from './adopt.js';
+export { markHitTool } from './mark-hit.js';

package/dist/tools/mark-hit.d.ts ADDED Viewed

@@ -0,0 +1,20 @@
+export interface MarkHitResult {
+    success: boolean;
+    path: string;
+    hits: number;
+}
+export declare const markHitTool: {
+    name: string;
+    description: string;
+    inputSchema: {
+        type: "object";
+        properties: {
+            path: {
+                type: string;
+                description: string;
+            };
+        };
+        required: string[];
+    };
+    handler(args: unknown): Promise<MarkHitResult>;
+};

package/dist/tools/mark-hit.js ADDED Viewed

@@ -0,0 +1,71 @@
+import * as fs from 'fs/promises';
+import * as path from 'path';
+import matter from 'gray-matter';
+import { getLibraryPath } from '../library/storage.js';
+// ============================================================================
+// Tool Definition
+// ============================================================================
+export const markHitTool = {
+    name: 'mark_hit',
+    description: `Mark a library entry as helpful - call this when knowledge from the library helped solve a problem.
+When an entry from brief() actually helped you complete a task or make a decision,
+call mark_hit() on it. This helps the library learn which entries are most useful.
+Entries with more hits bubble up in future brief() results.
+Fire and forget - call it and move on.
+Example:
+- mark_hit({ path: "local/stripe-webhooks-need-idempotency.md" })`,
+    inputSchema: {
+        type: 'object',
+        properties: {
+            path: {
+                type: 'string',
+                description: 'Path to the entry that helped (from brief() results)',
+            },
+        },
+        required: ['path'],
+    },
+    async handler(args) {
+        const { path: entryPath } = args;
+        if (!entryPath) {
+            throw new Error('path is required');
+        }
+        const libraryPath = getLibraryPath();
+        // Resolve the full path
+        let fullPath;
+        if (path.isAbsolute(entryPath)) {
+            fullPath = entryPath;
+        }
+        else {
+            fullPath = path.join(libraryPath, entryPath);
+        }
+        // Read existing file
+        let content;
+        try {
+            content = await fs.readFile(fullPath, 'utf-8');
+        }
+        catch {
+            throw new Error(`Entry not found: ${entryPath}`);
+        }
+        // Parse frontmatter
+        const { data, content: body } = matter(content);
+        // Increment hits
+        const currentHits = typeof data.hits === 'number' ? data.hits : 0;
+        const newHits = currentHits + 1;
+        // Update frontmatter
+        data.hits = newHits;
+        data.last_hit = new Date().toISOString();
+        // Rebuild file content
+        const updatedContent = matter.stringify(body, data);
+        // Write back
+        await fs.writeFile(fullPath, updatedContent, 'utf-8');
+        return {
+            success: true,
+            path: entryPath,
+            hits: newHits,
+        };
+    },
+};

package/dist/tools/record.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import * as fs from 'fs/promises';
 import * as path from 'path';
 import { getLibraryPath, getLocalPath } from '../library/storage.js';
+import { loadIndex, saveIndex, addToIndex } from '../library/vector-index.js';
 // ============================================================================
 // Tool Definition
 // ============================================================================
@@ -106,6 +107,8 @@ Rich:
         frontmatterLines.push(`created: "${created}"`);
         frontmatterLines.push(`updated: "${created}"`);
         frontmatterLines.push('source: "local"');
+        frontmatterLines.push('hits: 0');
+        frontmatterLines.push('last_hit: null');
         frontmatterLines.push('---');
         // Build body
         const bodyLines = [];
@@ -138,6 +141,26 @@ Rich:
         const fileContent = frontmatterLines.join('\n') + '\n\n' + bodyLines.join('\n') + '\n';
         await fs.writeFile(filePath, fileContent, 'utf-8');
         const relativePath = path.relative(libraryPath, filePath);
+        // Add to vector index for semantic search
+        try {
+            const index = await loadIndex();
+            // Combine all text for embedding
+            const fullContent = [
+                title,
+                intent || '',
+                insight,
+                reasoning || '',
+                example || '',
+                context || '',
+            ].filter(Boolean).join('\n\n');
+            await addToIndex(index, relativePath, title, fullContent);
+            await saveIndex(index);
+        }
+        catch (embeddingError) {
+            // Don't fail the record operation if embedding fails
+            // Entry is still saved and searchable via keywords
+            console.error('Failed to add embedding:', embeddingError);
+        }
         return {
             success: true,
             path: relativePath,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@telvok/librarian-mcp",
-  "version": "1.0.3",
+  "version": "1.2.0",
   "description": "Knowledge capture MCP server - remember what you learn with AI",
   "type": "module",
   "main": "dist/server.js",
@@ -26,11 +26,12 @@
   ],
   "repository": {
     "type": "git",
-    "url": "https://github.com/telvokdev/librarian.git"
+    "url": "git+https://github.com/telvokdev/librarian.git"
   },
   "author": "Telvok",
   "license": "MIT",
   "dependencies": {
+    "@huggingface/transformers": "^3.0.0",
     "@modelcontextprotocol/sdk": "^1.0.0",
     "glob": "^11.0.0",
     "gray-matter": "^4.0.3",