npm - @aeriondyseti/vector-memory-mcp - Versions diffs - 2.3.0 → 2.4.4-dev.1 - Mend

@aeriondyseti/vector-memory-mcp 2.3.0 → 2.4.4-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/package.json +6 -6
package/server/core/connection.ts +1 -1
package/server/core/conversation.repository.ts +113 -16
package/server/core/conversation.service.ts +19 -19
package/server/core/conversation.ts +7 -5
package/server/core/embeddings.service.ts +108 -17
package/server/core/memory.repository.ts +49 -14
package/server/core/memory.service.ts +47 -42
package/server/core/memory.ts +40 -1
package/server/core/migration.service.ts +3 -3
package/server/core/migrations.ts +60 -20
package/server/core/parsers/claude-code.parser.ts +3 -3
package/server/core/parsers/types.ts +1 -1
package/server/core/sqlite-utils.ts +67 -2
package/server/index.ts +13 -15
package/server/transports/http/mcp-transport.ts +5 -5
package/server/transports/http/server.ts +19 -6
package/server/transports/mcp/handlers.ts +47 -23
package/server/transports/mcp/server.ts +5 -5
package/scripts/lancedb-extract.ts +0 -181
package/scripts/smoke-test.ts +0 -699
package/scripts/sync-version.ts +0 -35
package/scripts/test-runner.ts +0 -76
package/scripts/warmup.ts +0 -72

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aeriondyseti/vector-memory-mcp",
-  "version": "2.3.0",
+  "version": "2.4.4-dev.1",
   "description": "A zero-configuration RAG memory server for MCP clients",
   "type": "module",
   "main": "server/index.ts",
@@ -9,7 +9,6 @@
   },
   "files": [
     "server",
-    "scripts",
     "README.md",
     "LICENSE"
   ],
@@ -31,6 +30,7 @@
     "test:quick": "bun test",
     "test:coverage": "bun test --preload ./tests/preload.ts --coverage",
     "benchmark": "bun test tests/benchmark.test.ts --preload ./tests/preload.ts",
+    "benchmark:update": "bun run scripts/update-benchmarks.ts",
     "test:preload": "bun run tests/preload.ts",
     "smoke": "bun run scripts/smoke-test.ts",
     "warmup": "bun run scripts/warmup.ts",
@@ -47,18 +47,18 @@
   ],
   "license": "MIT",
   "dependencies": {
-    "@huggingface/transformers": "^3.8.0",
+    "@huggingface/tokenizers": "^0.1.3",
     "@lancedb/lancedb": "^0.26.2",
     "@modelcontextprotocol/sdk": "^1.0.0",
     "arg": "^5.0.2",
-    "hono": "^4.11.3"
+    "hono": "^4.11.3",
+    "onnxruntime-node": "^1.21.0"
   },
   "devDependencies": {
     "@types/bun": "latest",
     "typescript": "^5.0.0"
   },
   "trustedDependencies": [
-    "protobufjs",
-    "sharp"
+    "protobufjs"
   ]
 }

package/server/core/connection.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { Database } from "bun:sqlite";
 import { existsSync, mkdirSync } from "fs";
 import { dirname } from "path";
-import { removeVec0Tables, runMigrations } from "./migrations.js";
+import { removeVec0Tables, runMigrations } from "./migrations";
 /**
  * Open (or create) a SQLite database at the given path

package/server/core/conversation.repository.ts CHANGED Viewed

@@ -2,15 +2,15 @@ import type { Database } from "bun:sqlite";
 import type {
   ConversationHybridRow,
   HistoryFilters,
-} from "./conversation.js";
+} from "./conversation";
 import {
   serializeVector,
   safeParseJsonObject,
   sanitizeFtsQuery,
-  hybridRRF,
+  hybridRRFWithSignals,
   topByRRF,
   knnSearch,
-} from "./sqlite-utils.js";
+} from "./sqlite-utils";
 export class ConversationRepository {
   constructor(private db: Database) {}
@@ -105,13 +105,102 @@ export class ConversationRepository {
     tx();
   }
+  async replaceSession(
+    sessionId: string,
+    rows: Array<{
+      id: string;
+      vector: number[];
+      content: string;
+      metadata: string;
+      created_at: number;
+      session_id: string;
+      role: string;
+      message_index_start: number;
+      message_index_end: number;
+      project: string;
+    }>
+  ): Promise<void> {
+    const insertMain = this.db.prepare(
+      `INSERT OR REPLACE INTO conversation_history
+        (id, content, metadata, created_at, session_id, role, message_index_start, message_index_end, project)
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
+    );
+    const deleteVec = this.db.prepare(
+      `DELETE FROM conversation_history_vec WHERE id = ?`
+    );
+    const insertVec = this.db.prepare(
+      `INSERT INTO conversation_history_vec (id, vector) VALUES (?, ?)`
+    );
+    const deleteFts = this.db.prepare(
+      `DELETE FROM conversation_history_fts WHERE id = ?`
+    );
+    const insertFts = this.db.prepare(
+      `INSERT INTO conversation_history_fts (id, content) VALUES (?, ?)`
+    );
+    const tx = this.db.transaction(() => {
+      // Delete old chunks first
+      const idRows = this.db
+        .prepare(`SELECT id FROM conversation_history WHERE session_id = ?`)
+        .all(sessionId) as Array<{ id: string }>;
+      if (idRows.length > 0) {
+        const ids = idRows.map((r) => r.id);
+        const placeholders = ids.map(() => "?").join(", ");
+        this.db
+          .prepare(
+            `DELETE FROM conversation_history_vec WHERE id IN (${placeholders})`
+          )
+          .run(...ids);
+        this.db
+          .prepare(
+            `DELETE FROM conversation_history_fts WHERE id IN (${placeholders})`
+          )
+          .run(...ids);
+        this.db
+          .prepare(`DELETE FROM conversation_history WHERE session_id = ?`)
+          .run(sessionId);
+      }
+      // Insert new chunks
+      for (const row of rows) {
+        insertMain.run(
+          row.id,
+          row.content,
+          row.metadata,
+          row.created_at,
+          row.session_id,
+          row.role,
+          row.message_index_start,
+          row.message_index_end,
+          row.project
+        );
+        deleteVec.run(row.id);
+        insertVec.run(row.id, serializeVector(row.vector));
+        deleteFts.run(row.id);
+        insertFts.run(row.id, row.content);
+      }
+    });
+    tx();
+  }
+  /**
+   * Hybrid search combining vector KNN and FTS5, fused with Reciprocal Rank Fusion.
+   *
+   * NOTE: Filters (session, role, project, date) are applied AFTER candidate selection
+   * and RRF scoring, not pushed into the KNN/FTS queries. This is an intentional
+   * performance tradeoff — KNN is brute-force JS-side (no SQL pre-filter possible),
+   * and filtering post-RRF avoids duplicating filter logic across both retrieval paths.
+   * The consequence is that filtered queries may return fewer than `limit` results.
+   */
   async findHybrid(
     embedding: number[],
     query: string,
     limit: number,
     filters?: HistoryFilters
   ): Promise<ConversationHybridRow[]> {
-    const candidateCount = limit * 3;
+    const candidateCount = limit * 5;
     // Vector KNN search (brute-force cosine similarity in JS)
     const vecResults = knnSearch(this.db, "conversation_history_vec", embedding, candidateCount);
@@ -127,8 +216,10 @@ export class ConversationRepository {
       )
       .all(ftsQuery, candidateCount) as Array<{ id: string }>;
-    // Compute RRF scores and get top ids
-    const rrfScores = hybridRRF(vecResults, ftsResults);
+    // Compute RRF scores with search signals for confidence scoring
+    const signalsMap = hybridRRFWithSignals(vecResults, ftsResults);
+    const rrfScores = new Map<string, number>();
+    for (const [id, s] of signalsMap) rrfScores.set(id, s.rrfScore);
     const topIds = topByRRF(rrfScores, limit);
     if (topIds.length === 0) return [];
@@ -185,17 +276,23 @@ export class ConversationRepository {
       project: string;
     }>;
-    // Build a lookup for ordering by RRF score
-    const scoreMap = new Map(topIds.map((id) => [id, rrfScores.get(id)!]));
     return fullRows
-      .map((row) => ({
-        id: row.id,
-        content: row.content,
-        metadata: safeParseJsonObject(row.metadata),
-        createdAt: new Date(row.created_at),
-        rrfScore: scoreMap.get(row.id) ?? 0,
-      }))
+      .map((row) => {
+        const signals = signalsMap.get(row.id)!;
+        return {
+          id: row.id,
+          content: row.content,
+          metadata: safeParseJsonObject(row.metadata),
+          createdAt: new Date(row.created_at),
+          rrfScore: signals.rrfScore,
+          signals: {
+            cosineSimilarity: signals.cosineSimilarity,
+            ftsMatch: signals.ftsMatch,
+            knnRank: signals.knnRank,
+            ftsRank: signals.ftsRank,
+          },
+        };
+      })
       .sort((a, b) => b.rrfScore - a.rrfScore);
   }
 }

package/server/core/conversation.service.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { createHash } from "crypto";
 import { readFile, writeFile, mkdir } from "fs/promises";
 import { dirname, join } from "path";
-import type { ConversationRepository } from "./conversation.repository.js";
+import type { ConversationRepository } from "./conversation.repository";
 import type {
   ConversationChunk,
   ConversationHybridRow,
@@ -10,12 +10,12 @@ import type {
   ParsedMessage,
   SessionFileInfo,
   SessionIndexDetail,
-} from "./conversation.js";
-import type { ConversationHistoryConfig } from "../config/index.js";
-import { resolveSessionLogPath } from "../config/index.js";
-import type { EmbeddingsService } from "./embeddings.service.js";
-import type { SessionLogParser } from "./parsers/types.js";
-import { ClaudeCodeSessionParser } from "./parsers/claude-code.parser.js";
+} from "./conversation";
+import type { ConversationHistoryConfig } from "../config/index";
+import { resolveSessionLogPath } from "../config/index";
+import type { EmbeddingsService } from "./embeddings.service";
+import type { SessionLogParser } from "./parsers/types";
+import { ClaudeCodeSessionParser } from "./parsers/claude-code.parser";
 /**
  * Generate a deterministic chunk ID from session ID and message indices.
@@ -78,12 +78,7 @@ export function chunkMessages(
       messageIndexEnd: lastMsg.messageIndex,
       project: firstMsg.project,
       metadata: {
-        session_id: firstMsg.sessionId,
         timestamp: firstMsg.timestamp.toISOString(),
-        role,
-        message_index_start: firstMsg.messageIndex,
-        message_index_end: lastMsg.messageIndex,
-        project: firstMsg.project,
         git_branch: firstMsg.gitBranch,
         is_subagent: firstMsg.isSubagent,
         agent_id: firstMsg.agentId,
@@ -273,20 +268,24 @@ export class ConversationHistoryService {
       this.config.chunkOverlap
     );
-    // Delete existing chunks for re-indexing
-    await this.repository.deleteBySessionId(file.sessionId);
-    // Embed all chunks
+    // Embed all chunks FIRST (pure computation, no DB side effects)
     const embeddings = await this.embeddings.embedBatch(
       chunks.map((c) => c.content)
     );
-    // Insert all chunks
+    // Build rows
     const rows = chunks.map((chunk, i) => ({
       id: chunk.id,
       vector: embeddings[i],
       content: chunk.content,
-      metadata: JSON.stringify(chunk.metadata),
+      metadata: JSON.stringify({
+        ...chunk.metadata,
+        session_id: chunk.sessionId,
+        role: chunk.role,
+        message_index_start: chunk.messageIndexStart,
+        message_index_end: chunk.messageIndexEnd,
+        project: chunk.project,
+      }),
       created_at: chunk.timestamp.getTime(),
       session_id: chunk.sessionId,
       role: chunk.role,
@@ -295,7 +294,8 @@ export class ConversationHistoryService {
       project: chunk.project,
     }));
-    await this.repository.insertBatch(rows);
+    // Atomically replace old chunks with new ones
+    await this.repository.replaceSession(file.sessionId, rows);
     // Update index state
     const session: IndexedSession = {

package/server/core/conversation.ts CHANGED Viewed

@@ -14,12 +14,7 @@ export interface ParsedMessage {
 /** Metadata stored per conversation chunk in the database */
 export interface ConversationChunkMetadata {
-  session_id: string;
   timestamp: string;
-  role: string;
-  message_index_start: number;
-  message_index_end: number;
-  project: string;
   git_branch?: string;
   is_subagent: boolean;
   agent_id?: string;
@@ -52,6 +47,8 @@ export interface IndexedSession {
   lastMessageAt: Date;
 }
+import type { SearchSignals } from "./memory";
 /** Raw row from conversation_history table with RRF score */
 export interface ConversationHybridRow {
   id: string;
@@ -59,6 +56,7 @@ export interface ConversationHybridRow {
   metadata: Record<string, unknown>;
   createdAt: Date;
   rrfScore: number;
+  signals: SearchSignals;
 }
 /** Unified search result with source provenance */
@@ -70,6 +68,8 @@ export interface SearchResult {
   updatedAt: Date;
   source: "memory" | "conversation_history";
   score: number;
+  /** Absolute relevance confidence (0.0-1.0). Based on cosine similarity + retrieval agreement. */
+  confidence: number;
   // Memory-specific fields
   supersededBy: string | null;
   usefulness?: number;
@@ -114,6 +114,8 @@ export interface HistoryFilters {
 /** Options for the integrated search across both sources */
 export interface SearchOptions {
+  limit?: number;
+  includeDeleted?: boolean;
   includeHistory?: boolean;
   historyOnly?: boolean;
   historyWeight?: number;

package/server/core/embeddings.service.ts CHANGED Viewed

@@ -1,9 +1,17 @@
-import { pipeline, type FeatureExtractionPipeline } from "@huggingface/transformers";
+import * as ort from "onnxruntime-node";
+import { Tokenizer } from "@huggingface/tokenizers";
+import { join, dirname } from "path";
+import { mkdir } from "fs/promises";
+import { existsSync } from "fs";
+const HF_CDN = "https://huggingface.co";
+const MAX_SEQ_LENGTH = 512;
 export class EmbeddingsService {
   private modelName: string;
-  private extractor: FeatureExtractionPipeline | null = null;
-  private initPromise: Promise<FeatureExtractionPipeline> | null = null;
+  private session: ort.InferenceSession | null = null;
+  private tokenizer: Tokenizer | null = null;
+  private initPromise: Promise<void> | null = null;
   private _dimension: number;
   constructor(modelName: string, dimension: number) {
@@ -15,27 +23,79 @@ export class EmbeddingsService {
     return this._dimension;
   }
-  private async getExtractor(): Promise<FeatureExtractionPipeline> {
-    if (this.extractor) {
-      return this.extractor;
-    }
+  get isReady(): boolean {
+    return this.session !== null;
+  }
+  async warmup(): Promise<void> {
+    await this.initialize();
+  }
+  private async initialize(): Promise<void> {
+    if (this.session) return;
     if (!this.initPromise) {
-      this.initPromise = pipeline(
-        "feature-extraction",
-        this.modelName,
-        { dtype: "fp32" } as any
-      ) as Promise<FeatureExtractionPipeline>;
+      this.initPromise = this._init();
     }
+    await this.initPromise;
+  }
+  private get cacheDir(): string {
+    const packageRoot = join(dirname(Bun.main), "..");
+    return join(packageRoot, ".cache", "models", this.modelName);
+  }
+  private async downloadIfMissing(fileName: string): Promise<string> {
+    const filePath = join(this.cacheDir, fileName);
+    if (existsSync(filePath)) return filePath;
+    const url = `${HF_CDN}/${this.modelName}/resolve/main/${fileName}`;
+    await mkdir(dirname(filePath), { recursive: true });
+    const response = await fetch(url);
+    if (!response.ok) throw new Error(`Failed to download ${url}: ${response.status}`);
+    const buffer = await response.arrayBuffer();
+    await Bun.write(filePath, buffer);
+    return filePath;
+  }
+  private async _init(): Promise<void> {
+    const modelPath = await this.downloadIfMissing("onnx/model.onnx");
+    const tokenizerJsonPath = await this.downloadIfMissing("tokenizer.json");
+    const tokenizerConfigPath = await this.downloadIfMissing("tokenizer_config.json");
+    this.session = await ort.InferenceSession.create(modelPath, {
+      executionProviders: ["cpu"],
+    });
-    this.extractor = await this.initPromise;
-    return this.extractor;
+    const tokenizerJson = await Bun.file(tokenizerJsonPath).json();
+    const tokenizerConfig = await Bun.file(tokenizerConfigPath).json();
+    this.tokenizer = new Tokenizer(tokenizerJson, tokenizerConfig);
   }
   async embed(text: string): Promise<number[]> {
-    const extractor = await this.getExtractor();
-    const output = await extractor(text, { pooling: "mean", normalize: true });
-    return Array.from(output.data as Float32Array);
+    await this.initialize();
+    const encoded = this.tokenizer!.encode(text);
+    // Truncate to model's max sequence length
+    const seqLen = Math.min(encoded.ids.length, MAX_SEQ_LENGTH);
+    const ids = encoded.ids.slice(0, seqLen);
+    const mask = encoded.attention_mask.slice(0, seqLen);
+    const inputIds = BigInt64Array.from(ids.map(BigInt));
+    const attentionMask = BigInt64Array.from(mask.map(BigInt));
+    const tokenTypeIds = new BigInt64Array(seqLen); // zeros for single-sequence input
+    const feeds: Record<string, ort.Tensor> = {
+      input_ids: new ort.Tensor("int64", inputIds, [1, seqLen]),
+      attention_mask: new ort.Tensor("int64", attentionMask, [1, seqLen]),
+      token_type_ids: new ort.Tensor("int64", tokenTypeIds, [1, seqLen]),
+    };
+    const output = await this.session!.run(feeds);
+    const lastHidden = output["last_hidden_state"];
+    const pooled = this.meanPool(lastHidden.data as Float32Array, mask, seqLen);
+    return this.normalize(pooled);
   }
   async embedBatch(texts: string[]): Promise<number[][]> {
@@ -45,4 +105,35 @@ export class EmbeddingsService {
     }
     return results;
   }
+  private meanPool(data: Float32Array, mask: number[], seqLen: number): number[] {
+    const dim = this._dimension;
+    const expectedLen = seqLen * dim;
+    if (data.length < expectedLen) {
+      throw new Error(
+        `ONNX output size ${data.length} < expected ${expectedLen} (seqLen=${seqLen}, dim=${dim}). Model/dimension mismatch?`,
+      );
+    }
+    const pooled = new Array(dim).fill(0);
+    let maskSum = 0;
+    for (let t = 0; t < seqLen; t++) {
+      if (mask[t]) {
+        maskSum += 1;
+        for (let d = 0; d < dim; d++) {
+          pooled[d] += data[t * dim + d];
+        }
+      }
+    }
+    for (let d = 0; d < dim; d++) {
+      pooled[d] /= maskSum;
+    }
+    return pooled;
+  }
+  private normalize(vec: number[]): number[] {
+    let norm = 0;
+    for (const v of vec) norm += v * v;
+    norm = Math.sqrt(norm);
+    return vec.map(v => v / norm);
+  }
 }

package/server/core/memory.repository.ts CHANGED Viewed

@@ -4,15 +4,17 @@ import {
   deserializeVector,
   safeParseJsonObject,
   sanitizeFtsQuery,
-  hybridRRF,
+  hybridRRFWithSignals,
   topByRRF,
   knnSearch,
-} from "./sqlite-utils.js";
+  batchedQuery,
+  SQLITE_BATCH_SIZE,
+} from "./sqlite-utils";
 import {
   type Memory,
   type HybridRow,
   DELETED_TOMBSTONE,
-} from "./memory.js";
+} from "./memory";
 export class MemoryRepository {
   constructor(private db: Database) {}
@@ -144,14 +146,16 @@ export class MemoryRepository {
   async findByIds(ids: string[]): Promise<Memory[]> {
     if (ids.length === 0) return [];
-    const placeholders = ids.map(() => "?").join(", ");
-    const rows = this.db
-      .prepare(`SELECT * FROM memories WHERE id IN (${placeholders})`)
-      .all(...ids) as Array<Record<string, unknown>>;
+    return batchedQuery(this.db, ids, (batch) => {
+      const placeholders = batch.map(() => "?").join(", ");
+      const rows = this.db
+        .prepare(`SELECT * FROM memories WHERE id IN (${placeholders})`)
+        .all(...batch) as Array<Record<string, unknown>>;
-    return rows.map((row) => {
-      const embedding = this.getEmbedding(row.id as string);
-      return this.rowToMemory(row, embedding);
+      return rows.map((row) => {
+        const embedding = this.getEmbedding(row.id as string);
+        return this.rowToMemory(row, embedding);
+      });
     });
   }
@@ -165,6 +169,28 @@ export class MemoryRepository {
     return result.changes > 0;
   }
+  /**
+   * Increment access_count and update last_accessed for multiple memories in batch.
+   * Uses batched IN clauses to stay within SQLite parameter limits.
+   */
+  bulkUpdateAccess(ids: string[], now: Date): void {
+    if (ids.length === 0) return;
+    const ts = now.getTime();
+    const runBatch = (batch: string[]) => {
+      const placeholders = batch.map(() => "?").join(", ");
+      this.db
+        .prepare(
+          `UPDATE memories SET access_count = access_count + 1, last_accessed = ? WHERE id IN (${placeholders})`
+        )
+        .run(ts, ...batch);
+    };
+    for (let i = 0; i < ids.length; i += SQLITE_BATCH_SIZE) {
+      runBatch(ids.slice(i, i + SQLITE_BATCH_SIZE));
+    }
+  }
   /**
    * Hybrid search combining vector KNN and FTS5, fused with Reciprocal Rank Fusion.
    */
@@ -173,7 +199,7 @@ export class MemoryRepository {
     query: string,
     limit: number,
   ): Promise<HybridRow[]> {
-    const candidateLimit = limit * 3;
+    const candidateLimit = limit * 5;
     // Vector KNN search (brute-force cosine similarity in JS)
     const vectorResults = knnSearch(this.db, "memories_vec", embedding, candidateLimit);
@@ -188,8 +214,10 @@ export class MemoryRepository {
           .all(ftsQuery, candidateLimit) as Array<{ id: string }>)
       : [];
-    // Compute RRF scores and pick top ids
-    const rrfScores = hybridRRF(vectorResults, ftsResults);
+    // Compute RRF scores with search signals for confidence scoring
+    const signalsMap = hybridRRFWithSignals(vectorResults, ftsResults);
+    const rrfScores = new Map<string, number>();
+    for (const [id, s] of signalsMap) rrfScores.set(id, s.rrfScore);
     const topIds = topByRRF(rrfScores, limit);
     if (topIds.length === 0) return [];
@@ -216,9 +244,16 @@ export class MemoryRepository {
       const memEmbedding = this.getEmbedding(id);
       const memory = this.rowToMemory(row, memEmbedding);
+      const signals = signalsMap.get(id)!;
       results.push({
         ...memory,
-        rrfScore: rrfScores.get(id) ?? 0,
+        rrfScore: signals.rrfScore,
+        signals: {
+          cosineSimilarity: signals.cosineSimilarity,
+          ftsMatch: signals.ftsMatch,
+          knnRank: signals.knnRank,
+          ftsRank: signals.ftsRank,
+        },
       });
     }