npm - @disco_trooper/apple-notes-mcp - Versions diffs - 1.2.0 → 1.4.0 - Mend

@disco_trooper/apple-notes-mcp 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/README.md +136 -24
package/package.json +13 -9
package/src/config/claude.test.ts +47 -0
package/src/config/claude.ts +106 -0
package/src/config/constants.ts +11 -2
package/src/config/paths.test.ts +40 -0
package/src/config/paths.ts +86 -0
package/src/db/arrow-fix.test.ts +101 -0
package/src/db/lancedb.test.ts +209 -2
package/src/db/lancedb.ts +373 -7
package/src/embeddings/cache.test.ts +150 -0
package/src/embeddings/cache.ts +204 -0
package/src/embeddings/index.ts +21 -2
package/src/embeddings/local.ts +61 -10
package/src/embeddings/openrouter.ts +233 -11
package/src/graph/export.test.ts +81 -0
package/src/graph/export.ts +163 -0
package/src/graph/extract.test.ts +90 -0
package/src/graph/extract.ts +52 -0
package/src/graph/queries.test.ts +156 -0
package/src/graph/queries.ts +224 -0
package/src/index.ts +376 -10
package/src/notes/crud.test.ts +148 -3
package/src/notes/crud.ts +250 -5
package/src/notes/read.ts +83 -68
package/src/search/chunk-indexer.test.ts +353 -0
package/src/search/chunk-indexer.ts +254 -0
package/src/search/chunk-search.test.ts +327 -0
package/src/search/chunk-search.ts +298 -0
package/src/search/indexer.ts +151 -109
package/src/search/refresh.test.ts +173 -0
package/src/search/refresh.ts +151 -0
package/src/setup.ts +46 -67
package/src/utils/chunker.test.ts +182 -0
package/src/utils/chunker.ts +170 -0
package/src/utils/content-filter.test.ts +225 -0
package/src/utils/content-filter.ts +275 -0
package/src/utils/runtime.test.ts +70 -0
package/src/utils/runtime.ts +40 -0

package/src/embeddings/cache.ts ADDED Viewed

@@ -0,0 +1,204 @@
+/**
+ * LRU Cache for query embeddings.
+ * Dramatically speeds up hybrid search by caching repeated queries.
+ */
+import { createDebugLogger } from "../utils/debug.js";
+const debug = createDebugLogger("EMBED_CACHE");
+/**
+ * Simple LRU Cache implementation for embeddings.
+ */
+class LRUCache<K, V> {
+  private cache = new Map<K, V>();
+  private readonly maxSize: number;
+  constructor(maxSize: number) {
+    this.maxSize = maxSize;
+  }
+  get(key: K): V | undefined {
+    const value = this.cache.get(key);
+    if (value !== undefined) {
+      // Move to end (most recently used)
+      this.cache.delete(key);
+      this.cache.set(key, value);
+    }
+    return value;
+  }
+  set(key: K, value: V): void {
+    // Delete if exists (to update position)
+    if (this.cache.has(key)) {
+      this.cache.delete(key);
+    }
+    // Evict oldest if at capacity
+    else if (this.cache.size >= this.maxSize) {
+      const firstKey = this.cache.keys().next().value;
+      if (firstKey !== undefined) {
+        this.cache.delete(firstKey);
+      }
+    }
+    this.cache.set(key, value);
+  }
+  has(key: K): boolean {
+    return this.cache.has(key);
+  }
+  clear(): void {
+    this.cache.clear();
+  }
+  get size(): number {
+    return this.cache.size;
+  }
+}
+/**
+ * Normalize query for better cache hit rate.
+ * - Lowercase
+ * - Trim whitespace
+ * - Collapse multiple spaces
+ */
+function normalizeQuery(query: string): string {
+  return query.toLowerCase().trim().replace(/\s+/g, " ");
+}
+/**
+ * Cache statistics for monitoring.
+ */
+export interface CacheStats {
+  hits: number;
+  misses: number;
+  size: number;
+  hitRate: number;
+}
+/**
+ * Embedding cache with LRU eviction.
+ */
+class EmbeddingCache {
+  private cache: LRUCache<string, number[]>;
+  private modelVersion: string;
+  private hits = 0;
+  private misses = 0;
+  constructor(maxSize = 1000, modelVersion = "default") {
+    this.cache = new LRUCache(maxSize);
+    this.modelVersion = modelVersion;
+    debug(`Embedding cache initialized (max: ${maxSize})`);
+  }
+  /**
+   * Create cache key from query and model version.
+   */
+  private makeKey(query: string): string {
+    const normalized = normalizeQuery(query);
+    return `${this.modelVersion}:${normalized}`;
+  }
+  /**
+   * Get cached embedding for query.
+   * Returns undefined if not cached.
+   */
+  get(query: string): number[] | undefined {
+    const key = this.makeKey(query);
+    const cached = this.cache.get(key);
+    if (cached) {
+      this.hits++;
+      debug(`Cache HIT for "${query.slice(0, 30)}..." (hits: ${this.hits})`);
+      return cached;
+    }
+    this.misses++;
+    return undefined;
+  }
+  /**
+   * Store embedding in cache.
+   */
+  set(query: string, embedding: number[]): void {
+    const key = this.makeKey(query);
+    this.cache.set(key, embedding);
+    debug(`Cached embedding for "${query.slice(0, 30)}..." (size: ${this.cache.size})`);
+  }
+  /**
+   * Get or compute embedding using provided function.
+   * This is the main API for cached embedding retrieval.
+   */
+  async getOrCompute(
+    query: string,
+    computeFn: (q: string) => Promise<number[]>
+  ): Promise<number[]> {
+    const cached = this.get(query);
+    if (cached) {
+      return cached;
+    }
+    const embedding = await computeFn(query);
+    this.set(query, embedding);
+    return embedding;
+  }
+  /**
+   * Invalidate cache (e.g., when model changes).
+   */
+  clear(): void {
+    this.cache.clear();
+    this.hits = 0;
+    this.misses = 0;
+    debug("Cache cleared");
+  }
+  /**
+   * Update model version and clear cache.
+   */
+  setModelVersion(version: string): void {
+    if (version !== this.modelVersion) {
+      debug(`Model version changed: ${this.modelVersion} -> ${version}`);
+      this.modelVersion = version;
+      this.clear();
+    }
+  }
+  /**
+   * Get cache statistics.
+   */
+  getStats(): CacheStats {
+    const total = this.hits + this.misses;
+    return {
+      hits: this.hits,
+      misses: this.misses,
+      size: this.cache.size,
+      hitRate: total > 0 ? this.hits / total : 0,
+    };
+  }
+}
+// Singleton instance
+let cacheInstance: EmbeddingCache | null = null;
+/**
+ * Get the embedding cache singleton.
+ */
+export function getEmbeddingCache(): EmbeddingCache {
+  if (!cacheInstance) {
+    // Max 1000 queries * ~1.5KB per embedding = ~1.5MB
+    cacheInstance = new EmbeddingCache(1000);
+  }
+  return cacheInstance;
+}
+/**
+ * Reset the cache (useful for testing).
+ */
+export function resetEmbeddingCache(): void {
+  if (cacheInstance) {
+    cacheInstance.clear();
+  }
+  cacheInstance = null;
+}

package/src/embeddings/index.ts CHANGED Viewed

@@ -6,8 +6,8 @@
  * - Local HuggingFace (fallback)
  */
-import { getOpenRouterEmbedding, getOpenRouterDimensions } from "./openrouter.js";
-import { getLocalEmbedding, getLocalDimensions, getLocalModelName } from "./local.js";
+import { getOpenRouterEmbedding, getOpenRouterDimensions, getOpenRouterEmbeddingBatch } from "./openrouter.js";
+import { getLocalEmbedding, getLocalDimensions, getLocalModelName, getLocalEmbeddingBatch } from "./local.js";
 import { createDebugLogger } from "../utils/debug.js";
 // Debug logging
@@ -62,6 +62,23 @@ export async function getEmbedding(text: string): Promise<number[]> {
   }
 }
+/**
+ * Generate embeddings for multiple texts in batch.
+ * Uses native batch API for both OpenRouter and local providers.
+ *
+ * @param texts - Array of texts to embed
+ * @returns Promise resolving to array of embedding vectors
+ */
+export async function getEmbeddingBatch(texts: string[]): Promise<number[][]> {
+  const provider = getProvider();
+  if (provider === "openrouter") {
+    return getOpenRouterEmbeddingBatch(texts);
+  } else {
+    return getLocalEmbeddingBatch(texts);
+  }
+}
 /**
  * Get the embedding dimensions for the current provider.
  *
@@ -100,10 +117,12 @@ export function getProviderDescription(): string {
 export {
   getOpenRouterEmbedding,
   getOpenRouterDimensions,
+  getOpenRouterEmbeddingBatch,
 } from "./openrouter.js";
 export {
   getLocalEmbedding,
+  getLocalEmbeddingBatch,
   getLocalDimensions,
   getLocalModelName,
   isModelLoaded,

package/src/embeddings/local.ts CHANGED Viewed

@@ -25,7 +25,7 @@ const debug = createDebugLogger("LOCAL");
 // Lazy-loaded pipeline
 type FeatureExtractionPipeline = (
-  text: string,
+  text: string | string[],
   options?: { pooling?: string; normalize?: boolean }
 ) => Promise<{ tolist: () => number[][] }>;
@@ -40,6 +40,27 @@ function getModelName(): string {
   return process.env.EMBEDDING_MODEL || DEFAULT_MODEL;
 }
+/**
+ * Check if the model is an E5 model that requires prefixed input.
+ */
+function isE5Model(): boolean {
+  return getModelName().toLowerCase().includes("e5");
+}
+/**
+ * Prepare text for embedding by adding E5 prefix if needed.
+ */
+function prepareText(text: string): string {
+  return isE5Model() ? `passage: ${text}` : text;
+}
+/**
+ * Prepare multiple texts for embedding by adding E5 prefix if needed.
+ */
+function prepareTexts(texts: string[]): string[] {
+  return isE5Model() ? texts.map(t => `passage: ${t}`) : texts;
+}
 /**
  * Lazy-load the HuggingFace transformers pipeline.
  * Only loads once, subsequent calls return the cached instance.
@@ -116,19 +137,11 @@ export async function getLocalEmbedding(text: string): Promise<number[]> {
   const startTime = Date.now();
   try {
-    // For e5 models, prepend "passage: " for document embedding
-    // or "query: " for search queries - using passage for general text
-    const modelName = getModelName();
-    const isE5Model = modelName.toLowerCase().includes("e5");
-    const inputText = isE5Model ? `passage: ${text}` : text;
-    // Run inference with mean pooling and normalization
-    const output = await pipe(inputText, {
+    const output = await pipe(prepareText(text), {
       pooling: "mean",
       normalize: true,
     });
-    // Extract the embedding vector
     const embedding = output.tolist()[0];
     const inferenceTime = Date.now() - startTime;
@@ -178,3 +191,41 @@ export function getLocalModelName(): string {
 export function isModelLoaded(): boolean {
   return pipelineInstance !== null;
 }
+/**
+ * Generate embeddings for multiple texts in a single batch call.
+ * More efficient than calling getLocalEmbedding for each text individually.
+ *
+ * @param texts - Array of texts to embed
+ * @returns Promise resolving to array of embedding vectors
+ * @throws Error if model loading or inference fails
+ */
+export async function getLocalEmbeddingBatch(texts: string[]): Promise<number[][]> {
+  if (!texts || texts.length === 0) {
+    return [];
+  }
+  const pipe = await getPipeline();
+  debug(`Generating batch embeddings for ${texts.length} texts`);
+  const startTime = Date.now();
+  try {
+    const output = await pipe(prepareTexts(texts), {
+      pooling: "mean",
+      normalize: true,
+    });
+    const embeddings = output.tolist() as number[][];
+    const inferenceTime = Date.now() - startTime;
+    debug(`Batch embeddings generated in ${inferenceTime}ms (${embeddings.length} vectors, ${embeddings[0]?.length ?? 0} dims)`);
+    return embeddings;
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    debug(`Batch embedding generation failed: ${message}`);
+    throw new Error(`Failed to generate batch embeddings: ${message}`);
+  }
+}

package/src/embeddings/openrouter.ts CHANGED Viewed

@@ -108,6 +108,27 @@ class OpenRouterError extends Error {
   }
 }
+/** HTTP status codes that should not be retried */
+const NON_RETRYABLE_STATUS_CODES = [400, 401, 403, 404];
+/** Common headers for OpenRouter API requests */
+const API_HEADERS = {
+  "Content-Type": "application/json",
+  "HTTP-Referer": "https://github.com/apple-notes-mcp",
+  "X-Title": "Apple Notes MCP",
+} as const;
+/**
+ * Check if an error should trigger a retry or fail immediately.
+ * Returns true if the error is non-retryable.
+ */
+function isNonRetryableError(error: unknown): boolean {
+  if (error instanceof OpenRouterError && error.statusCode) {
+    return NON_RETRYABLE_STATUS_CODES.includes(error.statusCode);
+  }
+  return false;
+}
 /**
  * Get embedding vector for text using OpenRouter API
  *
@@ -157,9 +178,7 @@ export async function getOpenRouterEmbedding(text: string): Promise<number[]> {
         method: "POST",
         headers: {
           Authorization: `Bearer ${OPENROUTER_API_KEY}`,
-          "Content-Type": "application/json",
-          "HTTP-Referer": "https://github.com/apple-notes-mcp",
-          "X-Title": "Apple Notes MCP",
+          ...API_HEADERS,
         },
         body: JSON.stringify({
           model: EMBEDDING_MODEL,
@@ -224,17 +243,12 @@ export async function getOpenRouterEmbedding(text: string): Promise<number[]> {
           `Request timed out after ${OPENROUTER_TIMEOUT_MS}ms`,
           408
         );
-        // Don't throw - fall through to retry logic below
       } else {
         lastError = error instanceof Error ? error : new Error(String(error));
-        // Don't retry on non-retryable errors
-        if (error instanceof OpenRouterError && error.statusCode) {
-          const nonRetryable = [400, 401, 403, 404];
-          if (nonRetryable.includes(error.statusCode)) {
-            debug(`Non-retryable error (${error.statusCode}), failing immediately`);
-            throw error;
-          }
+        if (isNonRetryableError(error)) {
+          debug(`Non-retryable error, failing immediately`);
+          throw error;
         }
       }
@@ -283,3 +297,211 @@ export function clearEmbeddingCache(): void {
 export function getEmbeddingCacheSize(): number {
   return embeddingCache.size;
 }
+/**
+ * Batch size for embedding requests.
+ * OpenRouter supports up to 2048 inputs per request, but 50-100 is optimal.
+ */
+const BATCH_SIZE = 50;
+/**
+ * Number of concurrent batch API calls.
+ * Higher values increase throughput but may hit rate limits.
+ */
+const CONCURRENT_BATCHES = 3;
+/**
+ * Split an array into chunks of specified size.
+ */
+function chunk<T>(array: T[], size: number): T[][] {
+  const chunks: T[][] = [];
+  for (let i = 0; i < array.length; i += size) {
+    chunks.push(array.slice(i, i + size));
+  }
+  return chunks;
+}
+/**
+ * Process a single batch of texts and return embeddings.
+ * Internal helper for concurrent batch processing.
+ */
+async function processSingleBatch(
+  batchTexts: string[],
+  batchIndices: number[],
+  cacheKeys: string[],
+  results: (number[] | null)[],
+  batchNumber: number,
+  totalBatches: number
+): Promise<void> {
+  debug(`Processing batch ${batchNumber}/${totalBatches} (${batchTexts.length} texts)`);
+  let lastError: Error | null = null;
+  for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), OPENROUTER_TIMEOUT_MS * 2);
+    try {
+      const response = await fetch(API_URL, {
+        method: "POST",
+        headers: {
+          Authorization: `Bearer ${OPENROUTER_API_KEY}`,
+          ...API_HEADERS,
+        },
+        body: JSON.stringify({
+          model: EMBEDDING_MODEL,
+          input: batchTexts,
+          dimensions: EMBEDDING_DIMS,
+        }),
+        signal: controller.signal,
+      });
+      if (response.status === 429) {
+        clearTimeout(timeoutId);
+        const waitTime = getBackoffDelay(attempt, RATE_LIMIT_BACKOFF_BASE_MS);
+        debug(`Batch ${batchNumber}: Rate limited (429), waiting ${waitTime}ms`);
+        await sleep(waitTime);
+        continue;
+      }
+      if (!response.ok) {
+        const errorBody = await response.text();
+        throw new OpenRouterError(
+          `OpenRouter API error: ${response.status} - ${errorBody}`,
+          response.status,
+          errorBody
+        );
+      }
+      const data = await response.json() as {
+        data?: Array<{ embedding?: number[]; index?: number }>;
+      };
+      if (!data?.data || data.data.length !== batchTexts.length) {
+        throw new OpenRouterError(
+          `Invalid API response: expected ${batchTexts.length} embeddings, got ${data?.data?.length ?? 0}`,
+          response.status,
+          JSON.stringify(data)
+        );
+      }
+      // Store results and cache them
+      for (let j = 0; j < data.data.length; j++) {
+        const embedding = data.data[j].embedding;
+        if (!embedding) {
+          throw new OpenRouterError(
+            `Missing embedding at index ${j}`,
+            response.status,
+            JSON.stringify(data)
+          );
+        }
+        results[batchIndices[j]] = embedding;
+        embeddingCache.set(cacheKeys[batchIndices[j]], embedding);
+      }
+      return; // Success
+    } catch (error) {
+      if (error instanceof Error && error.name === "AbortError") {
+        lastError = new OpenRouterError(
+          `Batch request timed out after ${OPENROUTER_TIMEOUT_MS * 2}ms`,
+          408
+        );
+      } else {
+        lastError = error instanceof Error ? error : new Error(String(error));
+        if (isNonRetryableError(error)) {
+          throw error;
+        }
+      }
+      if (attempt < MAX_RETRIES - 1) {
+        const waitTime = getBackoffDelay(attempt);
+        debug(`Batch ${batchNumber} error: ${lastError.message}, retrying in ${waitTime}ms`);
+        await sleep(waitTime);
+      }
+    } finally {
+      clearTimeout(timeoutId);
+    }
+  }
+  throw new OpenRouterError(
+    `Failed to get batch ${batchNumber} embeddings after ${MAX_RETRIES} attempts: ${lastError?.message}`
+  );
+}
+/**
+ * Get embedding vectors for multiple texts using concurrent batch API calls.
+ * Much faster than calling getOpenRouterEmbedding individually.
+ *
+ * @param texts - Array of input texts to embed
+ * @returns Promise resolving to array of embedding vectors
+ * @throws OpenRouterError if API call fails
+ */
+export async function getOpenRouterEmbeddingBatch(texts: string[]): Promise<number[][]> {
+  if (!OPENROUTER_API_KEY) {
+    throw new OpenRouterError(
+      "OPENROUTER_API_KEY environment variable is not set"
+    );
+  }
+  if (texts.length === 0) {
+    return [];
+  }
+  // Truncate all inputs and check cache
+  const truncatedTexts = texts.map(t => truncateForEmbedding(t));
+  const cacheKeys = truncatedTexts.map(t => getCacheKey(t));
+  // Separate cached and uncached
+  const results: (number[] | null)[] = new Array(texts.length).fill(null);
+  const uncachedIndices: number[] = [];
+  const uncachedTexts: string[] = [];
+  for (let i = 0; i < truncatedTexts.length; i++) {
+    const cached = embeddingCache.get(cacheKeys[i]);
+    if (cached) {
+      results[i] = cached;
+    } else {
+      uncachedIndices.push(i);
+      uncachedTexts.push(truncatedTexts[i]);
+    }
+  }
+  debug(`Batch: ${texts.length} total, ${uncachedIndices.length} uncached`);
+  if (uncachedTexts.length === 0) {
+    return results as number[][];
+  }
+  // Split into batches
+  const textBatches = chunk(uncachedTexts, BATCH_SIZE);
+  const indexBatches = chunk(uncachedIndices, BATCH_SIZE);
+  const totalBatches = textBatches.length;
+  debug(`Processing ${totalBatches} batches with ${CONCURRENT_BATCHES} concurrent requests`);
+  // Process batches with concurrency limit
+  const batchGroups = chunk(
+    textBatches.map((texts, i) => ({ texts, indices: indexBatches[i], batchNumber: i + 1 })),
+    CONCURRENT_BATCHES
+  );
+  for (const group of batchGroups) {
+    await Promise.all(
+      group.map(batch =>
+        processSingleBatch(
+          batch.texts,
+          batch.indices,
+          cacheKeys,
+          results,
+          batch.batchNumber,
+          totalBatches
+        )
+      )
+    );
+  }
+  return results as number[][];
+}

package/src/graph/export.test.ts ADDED Viewed

@@ -0,0 +1,81 @@
+// src/graph/export.test.ts
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { exportGraph } from "./export.js";
+// Create a shared mock store instance
+const mockStore = {
+  getAll: vi.fn(),
+};
+vi.mock("../db/lancedb.js", () => ({
+  getVectorStore: vi.fn(() => mockStore),
+}));
+describe("exportGraph", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+  describe("JSON format", () => {
+    it("exports nodes and edges", async () => {
+      mockStore.getAll.mockResolvedValue([
+        { id: "1", title: "Note A", folder: "Work", tags: ["project"], outlinks: ["Note B"], vector: [1,0] },
+        { id: "2", title: "Note B", folder: "Work", tags: ["project"], outlinks: [], vector: [0,1] },
+      ]);
+      const result = await exportGraph({ format: "json" }) as any;
+      expect(result).toHaveProperty("nodes");
+      expect(result).toHaveProperty("edges");
+      expect(result.nodes).toHaveLength(2);
+      expect(result.edges.some((e: any) => e.type === "link")).toBe(true);
+      expect(result.edges.some((e: any) => e.type === "tag")).toBe(true);
+    });
+    it("filters by folder", async () => {
+      mockStore.getAll.mockResolvedValue([
+        { id: "1", title: "Note A", folder: "Work", tags: [], outlinks: [], vector: [] },
+        { id: "2", title: "Note B", folder: "Personal", tags: [], outlinks: [], vector: [] },
+      ]);
+      const result = await exportGraph({ format: "json", folder: "Work" }) as any;
+      expect(result.nodes).toHaveLength(1);
+      expect(result.nodes[0].folder).toBe("Work");
+    });
+  });
+  describe("GraphML format", () => {
+    it("exports valid GraphML XML", async () => {
+      mockStore.getAll.mockResolvedValue([
+        { id: "1", title: "Note A", folder: "Work", tags: [], outlinks: ["Note B"], vector: [] },
+        { id: "2", title: "Note B", folder: "Work", tags: [], outlinks: [], vector: [] },
+      ]);
+      const result = await exportGraph({ format: "graphml" });
+      expect(typeof result).toBe("string");
+      expect(result).toContain('<?xml version="1.0"');
+      expect(result).toContain("<graphml");
+      expect(result).toContain("<node");
+      expect(result).toContain("<edge");
+      expect(result).toContain("</graphml>");
+    });
+    it("escapes special XML characters in GraphML", async () => {
+      mockStore.getAll.mockResolvedValue([
+        { id: "1", title: 'Note <with> & "special"', folder: "Work", tags: [], outlinks: [], vector: [] },
+      ]);
+      const result = await exportGraph({ format: "graphml" }) as string;
+      expect(result).toContain("&lt;with&gt;");
+      expect(result).toContain("&amp;");
+    });
+  });
+  describe("Unknown format", () => {
+    it("throws for unknown format", async () => {
+      mockStore.getAll.mockResolvedValue([]);
+      await expect(exportGraph({ format: "unknown" as any })).rejects.toThrow("Unknown format");
+    });
+  });
+});