npm - @disco_trooper/apple-notes-mcp - Versions diffs - 1.2.0 → 1.3.0 - Mend

@disco_trooper/apple-notes-mcp 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/README.md +104 -24
package/package.json +10 -8
package/src/config/claude.test.ts +47 -0
package/src/config/claude.ts +106 -0
package/src/config/constants.ts +11 -2
package/src/config/paths.test.ts +40 -0
package/src/config/paths.ts +86 -0
package/src/db/arrow-fix.test.ts +101 -0
package/src/db/lancedb.test.ts +209 -2
package/src/db/lancedb.ts +345 -7
package/src/embeddings/cache.test.ts +150 -0
package/src/embeddings/cache.ts +204 -0
package/src/embeddings/index.ts +21 -2
package/src/embeddings/local.ts +61 -10
package/src/embeddings/openrouter.ts +233 -11
package/src/graph/export.test.ts +81 -0
package/src/graph/export.ts +163 -0
package/src/graph/extract.test.ts +90 -0
package/src/graph/extract.ts +52 -0
package/src/graph/queries.test.ts +156 -0
package/src/graph/queries.ts +224 -0
package/src/index.ts +249 -9
package/src/notes/crud.test.ts +26 -2
package/src/notes/crud.ts +43 -5
package/src/notes/read.ts +83 -68
package/src/search/chunk-indexer.test.ts +353 -0
package/src/search/chunk-indexer.ts +207 -0
package/src/search/chunk-search.test.ts +327 -0
package/src/search/chunk-search.ts +298 -0
package/src/search/indexer.ts +151 -109
package/src/setup.ts +46 -67
package/src/utils/chunker.test.ts +182 -0
package/src/utils/chunker.ts +170 -0
package/src/utils/content-filter.test.ts +225 -0
package/src/utils/content-filter.ts +275 -0
package/src/utils/runtime.test.ts +70 -0
package/src/utils/runtime.ts +40 -0

package/src/embeddings/cache.test.ts ADDED Viewed

@@ -0,0 +1,150 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { getEmbeddingCache, resetEmbeddingCache } from "./cache.js";
+describe("EmbeddingCache", () => {
+  beforeEach(() => {
+    resetEmbeddingCache();
+  });
+  describe("get/set", () => {
+    it("returns undefined for uncached query", () => {
+      const cache = getEmbeddingCache();
+      expect(cache.get("test query")).toBeUndefined();
+    });
+    it("returns cached embedding", () => {
+      const cache = getEmbeddingCache();
+      const embedding = [0.1, 0.2, 0.3];
+      cache.set("test query", embedding);
+      expect(cache.get("test query")).toEqual(embedding);
+    });
+    it("normalizes queries for better hit rate", () => {
+      const cache = getEmbeddingCache();
+      const embedding = [0.1, 0.2, 0.3];
+      cache.set("Test Query", embedding);
+      // Should match with different casing/spacing
+      expect(cache.get("test query")).toEqual(embedding);
+      expect(cache.get("  TEST   QUERY  ")).toEqual(embedding);
+    });
+  });
+  describe("getOrCompute", () => {
+    it("calls compute function on cache miss", async () => {
+      const cache = getEmbeddingCache();
+      const computeFn = vi.fn().mockResolvedValue([0.1, 0.2, 0.3]);
+      const result = await cache.getOrCompute("test query", computeFn);
+      expect(computeFn).toHaveBeenCalledWith("test query");
+      expect(result).toEqual([0.1, 0.2, 0.3]);
+    });
+    it("returns cached value without calling compute", async () => {
+      const cache = getEmbeddingCache();
+      const embedding = [0.1, 0.2, 0.3];
+      cache.set("test query", embedding);
+      const computeFn = vi.fn().mockResolvedValue([0.4, 0.5, 0.6]);
+      const result = await cache.getOrCompute("test query", computeFn);
+      expect(computeFn).not.toHaveBeenCalled();
+      expect(result).toEqual(embedding);
+    });
+    it("caches computed value for subsequent calls", async () => {
+      const cache = getEmbeddingCache();
+      const computeFn = vi.fn().mockResolvedValue([0.1, 0.2, 0.3]);
+      await cache.getOrCompute("test query", computeFn);
+      await cache.getOrCompute("test query", computeFn);
+      expect(computeFn).toHaveBeenCalledTimes(1);
+    });
+  });
+  describe("LRU eviction", () => {
+    it("evicts oldest entry when at capacity", () => {
+      // Create cache with small size for testing
+      resetEmbeddingCache();
+      const cache = getEmbeddingCache();
+      // We can't easily change max size, but we can test stats
+      // Fill cache with entries
+      for (let i = 0; i < 5; i++) {
+        cache.set(`query ${i}`, [i]);
+      }
+      const stats = cache.getStats();
+      expect(stats.size).toBeGreaterThan(0);
+    });
+  });
+  describe("stats", () => {
+    it("tracks hits and misses", () => {
+      const cache = getEmbeddingCache();
+      cache.set("query1", [0.1]);
+      cache.get("query1"); // hit
+      cache.get("query2"); // miss
+      cache.get("query1"); // hit
+      cache.get("query3"); // miss
+      const stats = cache.getStats();
+      expect(stats.hits).toBe(2);
+      expect(stats.misses).toBe(2);
+      expect(stats.hitRate).toBe(0.5);
+    });
+  });
+  describe("clear", () => {
+    it("clears all cached embeddings", () => {
+      const cache = getEmbeddingCache();
+      cache.set("query1", [0.1]);
+      cache.set("query2", [0.2]);
+      cache.clear();
+      expect(cache.get("query1")).toBeUndefined();
+      expect(cache.get("query2")).toBeUndefined();
+      expect(cache.getStats().size).toBe(0);
+    });
+    it("resets stats on clear", () => {
+      const cache = getEmbeddingCache();
+      cache.set("query1", [0.1]);
+      cache.get("query1");
+      cache.get("query2");
+      cache.clear();
+      const stats = cache.getStats();
+      expect(stats.hits).toBe(0);
+      expect(stats.misses).toBe(0);
+    });
+  });
+  describe("model version", () => {
+    it("invalidates cache when model version changes", () => {
+      const cache = getEmbeddingCache();
+      cache.set("query1", [0.1]);
+      cache.setModelVersion("new-model-v2");
+      expect(cache.get("query1")).toBeUndefined();
+    });
+    it("does not invalidate if version unchanged", () => {
+      const cache = getEmbeddingCache();
+      cache.set("query1", [0.1]);
+      cache.setModelVersion("default"); // Same as initial
+      cache.setModelVersion("default"); // Same again
+      // Cache should still have the value
+      expect(cache.get("query1")).toEqual([0.1]);
+    });
+  });
+});

package/src/embeddings/cache.ts ADDED Viewed

@@ -0,0 +1,204 @@
+/**
+ * LRU Cache for query embeddings.
+ * Dramatically speeds up hybrid search by caching repeated queries.
+ */
+import { createDebugLogger } from "../utils/debug.js";
+const debug = createDebugLogger("EMBED_CACHE");
+/**
+ * Simple LRU Cache implementation for embeddings.
+ */
+class LRUCache<K, V> {
+  private cache = new Map<K, V>();
+  private readonly maxSize: number;
+  constructor(maxSize: number) {
+    this.maxSize = maxSize;
+  }
+  get(key: K): V | undefined {
+    const value = this.cache.get(key);
+    if (value !== undefined) {
+      // Move to end (most recently used)
+      this.cache.delete(key);
+      this.cache.set(key, value);
+    }
+    return value;
+  }
+  set(key: K, value: V): void {
+    // Delete if exists (to update position)
+    if (this.cache.has(key)) {
+      this.cache.delete(key);
+    }
+    // Evict oldest if at capacity
+    else if (this.cache.size >= this.maxSize) {
+      const firstKey = this.cache.keys().next().value;
+      if (firstKey !== undefined) {
+        this.cache.delete(firstKey);
+      }
+    }
+    this.cache.set(key, value);
+  }
+  has(key: K): boolean {
+    return this.cache.has(key);
+  }
+  clear(): void {
+    this.cache.clear();
+  }
+  get size(): number {
+    return this.cache.size;
+  }
+}
+/**
+ * Normalize query for better cache hit rate.
+ * - Lowercase
+ * - Trim whitespace
+ * - Collapse multiple spaces
+ */
+function normalizeQuery(query: string): string {
+  return query.toLowerCase().trim().replace(/\s+/g, " ");
+}
+/**
+ * Cache statistics for monitoring.
+ */
+export interface CacheStats {
+  hits: number;
+  misses: number;
+  size: number;
+  hitRate: number;
+}
+/**
+ * Embedding cache with LRU eviction.
+ */
+class EmbeddingCache {
+  private cache: LRUCache<string, number[]>;
+  private modelVersion: string;
+  private hits = 0;
+  private misses = 0;
+  constructor(maxSize = 1000, modelVersion = "default") {
+    this.cache = new LRUCache(maxSize);
+    this.modelVersion = modelVersion;
+    debug(`Embedding cache initialized (max: ${maxSize})`);
+  }
+  /**
+   * Create cache key from query and model version.
+   */
+  private makeKey(query: string): string {
+    const normalized = normalizeQuery(query);
+    return `${this.modelVersion}:${normalized}`;
+  }
+  /**
+   * Get cached embedding for query.
+   * Returns undefined if not cached.
+   */
+  get(query: string): number[] | undefined {
+    const key = this.makeKey(query);
+    const cached = this.cache.get(key);
+    if (cached) {
+      this.hits++;
+      debug(`Cache HIT for "${query.slice(0, 30)}..." (hits: ${this.hits})`);
+      return cached;
+    }
+    this.misses++;
+    return undefined;
+  }
+  /**
+   * Store embedding in cache.
+   */
+  set(query: string, embedding: number[]): void {
+    const key = this.makeKey(query);
+    this.cache.set(key, embedding);
+    debug(`Cached embedding for "${query.slice(0, 30)}..." (size: ${this.cache.size})`);
+  }
+  /**
+   * Get or compute embedding using provided function.
+   * This is the main API for cached embedding retrieval.
+   */
+  async getOrCompute(
+    query: string,
+    computeFn: (q: string) => Promise<number[]>
+  ): Promise<number[]> {
+    const cached = this.get(query);
+    if (cached) {
+      return cached;
+    }
+    const embedding = await computeFn(query);
+    this.set(query, embedding);
+    return embedding;
+  }
+  /**
+   * Invalidate cache (e.g., when model changes).
+   */
+  clear(): void {
+    this.cache.clear();
+    this.hits = 0;
+    this.misses = 0;
+    debug("Cache cleared");
+  }
+  /**
+   * Update model version and clear cache.
+   */
+  setModelVersion(version: string): void {
+    if (version !== this.modelVersion) {
+      debug(`Model version changed: ${this.modelVersion} -> ${version}`);
+      this.modelVersion = version;
+      this.clear();
+    }
+  }
+  /**
+   * Get cache statistics.
+   */
+  getStats(): CacheStats {
+    const total = this.hits + this.misses;
+    return {
+      hits: this.hits,
+      misses: this.misses,
+      size: this.cache.size,
+      hitRate: total > 0 ? this.hits / total : 0,
+    };
+  }
+}
+// Singleton instance
+let cacheInstance: EmbeddingCache | null = null;
+/**
+ * Get the embedding cache singleton.
+ */
+export function getEmbeddingCache(): EmbeddingCache {
+  if (!cacheInstance) {
+    // Max 1000 queries * ~1.5KB per embedding = ~1.5MB
+    cacheInstance = new EmbeddingCache(1000);
+  }
+  return cacheInstance;
+}
+/**
+ * Reset the cache (useful for testing).
+ */
+export function resetEmbeddingCache(): void {
+  if (cacheInstance) {
+    cacheInstance.clear();
+  }
+  cacheInstance = null;
+}

package/src/embeddings/index.ts CHANGED Viewed

@@ -6,8 +6,8 @@
  * - Local HuggingFace (fallback)
  */
-import { getOpenRouterEmbedding, getOpenRouterDimensions } from "./openrouter.js";
-import { getLocalEmbedding, getLocalDimensions, getLocalModelName } from "./local.js";
+import { getOpenRouterEmbedding, getOpenRouterDimensions, getOpenRouterEmbeddingBatch } from "./openrouter.js";
+import { getLocalEmbedding, getLocalDimensions, getLocalModelName, getLocalEmbeddingBatch } from "./local.js";
 import { createDebugLogger } from "../utils/debug.js";
 // Debug logging
@@ -62,6 +62,23 @@ export async function getEmbedding(text: string): Promise<number[]> {
   }
 }
+/**
+ * Generate embeddings for multiple texts in batch.
+ * Uses native batch API for both OpenRouter and local providers.
+ *
+ * @param texts - Array of texts to embed
+ * @returns Promise resolving to array of embedding vectors
+ */
+export async function getEmbeddingBatch(texts: string[]): Promise<number[][]> {
+  const provider = getProvider();
+  if (provider === "openrouter") {
+    return getOpenRouterEmbeddingBatch(texts);
+  } else {
+    return getLocalEmbeddingBatch(texts);
+  }
+}
 /**
  * Get the embedding dimensions for the current provider.
  *
@@ -100,10 +117,12 @@ export function getProviderDescription(): string {
 export {
   getOpenRouterEmbedding,
   getOpenRouterDimensions,
+  getOpenRouterEmbeddingBatch,
 } from "./openrouter.js";
 export {
   getLocalEmbedding,
+  getLocalEmbeddingBatch,
   getLocalDimensions,
   getLocalModelName,
   isModelLoaded,

package/src/embeddings/local.ts CHANGED Viewed

@@ -25,7 +25,7 @@ const debug = createDebugLogger("LOCAL");
 // Lazy-loaded pipeline
 type FeatureExtractionPipeline = (
-  text: string,
+  text: string | string[],
   options?: { pooling?: string; normalize?: boolean }
 ) => Promise<{ tolist: () => number[][] }>;
@@ -40,6 +40,27 @@ function getModelName(): string {
   return process.env.EMBEDDING_MODEL || DEFAULT_MODEL;
 }
+/**
+ * Check if the model is an E5 model that requires prefixed input.
+ */
+function isE5Model(): boolean {
+  return getModelName().toLowerCase().includes("e5");
+}
+/**
+ * Prepare text for embedding by adding E5 prefix if needed.
+ */
+function prepareText(text: string): string {
+  return isE5Model() ? `passage: ${text}` : text;
+}
+/**
+ * Prepare multiple texts for embedding by adding E5 prefix if needed.
+ */
+function prepareTexts(texts: string[]): string[] {
+  return isE5Model() ? texts.map(t => `passage: ${t}`) : texts;
+}
 /**
  * Lazy-load the HuggingFace transformers pipeline.
  * Only loads once, subsequent calls return the cached instance.
@@ -116,19 +137,11 @@ export async function getLocalEmbedding(text: string): Promise<number[]> {
   const startTime = Date.now();
   try {
-    // For e5 models, prepend "passage: " for document embedding
-    // or "query: " for search queries - using passage for general text
-    const modelName = getModelName();
-    const isE5Model = modelName.toLowerCase().includes("e5");
-    const inputText = isE5Model ? `passage: ${text}` : text;
-    // Run inference with mean pooling and normalization
-    const output = await pipe(inputText, {
+    const output = await pipe(prepareText(text), {
       pooling: "mean",
       normalize: true,
     });
-    // Extract the embedding vector
     const embedding = output.tolist()[0];
     const inferenceTime = Date.now() - startTime;
@@ -178,3 +191,41 @@ export function getLocalModelName(): string {
 export function isModelLoaded(): boolean {
   return pipelineInstance !== null;
 }
+/**
+ * Generate embeddings for multiple texts in a single batch call.
+ * More efficient than calling getLocalEmbedding for each text individually.
+ *
+ * @param texts - Array of texts to embed
+ * @returns Promise resolving to array of embedding vectors
+ * @throws Error if model loading or inference fails
+ */
+export async function getLocalEmbeddingBatch(texts: string[]): Promise<number[][]> {
+  if (!texts || texts.length === 0) {
+    return [];
+  }
+  const pipe = await getPipeline();
+  debug(`Generating batch embeddings for ${texts.length} texts`);
+  const startTime = Date.now();
+  try {
+    const output = await pipe(prepareTexts(texts), {
+      pooling: "mean",
+      normalize: true,
+    });
+    const embeddings = output.tolist() as number[][];
+    const inferenceTime = Date.now() - startTime;
+    debug(`Batch embeddings generated in ${inferenceTime}ms (${embeddings.length} vectors, ${embeddings[0]?.length ?? 0} dims)`);
+    return embeddings;
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    debug(`Batch embedding generation failed: ${message}`);
+    throw new Error(`Failed to generate batch embeddings: ${message}`);
+  }
+}