npm - @disco_trooper/apple-notes-mcp - Versions diffs - 1.2.0 → 1.3.0 - Mend

@disco_trooper/apple-notes-mcp 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/README.md +104 -24
package/package.json +10 -8
package/src/config/claude.test.ts +47 -0
package/src/config/claude.ts +106 -0
package/src/config/constants.ts +11 -2
package/src/config/paths.test.ts +40 -0
package/src/config/paths.ts +86 -0
package/src/db/arrow-fix.test.ts +101 -0
package/src/db/lancedb.test.ts +209 -2
package/src/db/lancedb.ts +345 -7
package/src/embeddings/cache.test.ts +150 -0
package/src/embeddings/cache.ts +204 -0
package/src/embeddings/index.ts +21 -2
package/src/embeddings/local.ts +61 -10
package/src/embeddings/openrouter.ts +233 -11
package/src/graph/export.test.ts +81 -0
package/src/graph/export.ts +163 -0
package/src/graph/extract.test.ts +90 -0
package/src/graph/extract.ts +52 -0
package/src/graph/queries.test.ts +156 -0
package/src/graph/queries.ts +224 -0
package/src/index.ts +249 -9
package/src/notes/crud.test.ts +26 -2
package/src/notes/crud.ts +43 -5
package/src/notes/read.ts +83 -68
package/src/search/chunk-indexer.test.ts +353 -0
package/src/search/chunk-indexer.ts +207 -0
package/src/search/chunk-search.test.ts +327 -0
package/src/search/chunk-search.ts +298 -0
package/src/search/indexer.ts +151 -109
package/src/setup.ts +46 -67
package/src/utils/chunker.test.ts +182 -0
package/src/utils/chunker.ts +170 -0
package/src/utils/content-filter.test.ts +225 -0
package/src/utils/content-filter.ts +275 -0
package/src/utils/runtime.test.ts +70 -0
package/src/utils/runtime.ts +40 -0

package/src/embeddings/openrouter.ts CHANGED Viewed

@@ -108,6 +108,27 @@ class OpenRouterError extends Error {
   }
 }
+/** HTTP status codes that should not be retried */
+const NON_RETRYABLE_STATUS_CODES = [400, 401, 403, 404];
+/** Common headers for OpenRouter API requests */
+const API_HEADERS = {
+  "Content-Type": "application/json",
+  "HTTP-Referer": "https://github.com/apple-notes-mcp",
+  "X-Title": "Apple Notes MCP",
+} as const;
+/**
+ * Check if an error should trigger a retry or fail immediately.
+ * Returns true if the error is non-retryable.
+ */
+function isNonRetryableError(error: unknown): boolean {
+  if (error instanceof OpenRouterError && error.statusCode) {
+    return NON_RETRYABLE_STATUS_CODES.includes(error.statusCode);
+  }
+  return false;
+}
 /**
  * Get embedding vector for text using OpenRouter API
  *
@@ -157,9 +178,7 @@ export async function getOpenRouterEmbedding(text: string): Promise<number[]> {
         method: "POST",
         headers: {
           Authorization: `Bearer ${OPENROUTER_API_KEY}`,
-          "Content-Type": "application/json",
-          "HTTP-Referer": "https://github.com/apple-notes-mcp",
-          "X-Title": "Apple Notes MCP",
+          ...API_HEADERS,
         },
         body: JSON.stringify({
           model: EMBEDDING_MODEL,
@@ -224,17 +243,12 @@ export async function getOpenRouterEmbedding(text: string): Promise<number[]> {
           `Request timed out after ${OPENROUTER_TIMEOUT_MS}ms`,
           408
         );
-        // Don't throw - fall through to retry logic below
       } else {
         lastError = error instanceof Error ? error : new Error(String(error));
-        // Don't retry on non-retryable errors
-        if (error instanceof OpenRouterError && error.statusCode) {
-          const nonRetryable = [400, 401, 403, 404];
-          if (nonRetryable.includes(error.statusCode)) {
-            debug(`Non-retryable error (${error.statusCode}), failing immediately`);
-            throw error;
-          }
+        if (isNonRetryableError(error)) {
+          debug(`Non-retryable error, failing immediately`);
+          throw error;
         }
       }
@@ -283,3 +297,211 @@ export function clearEmbeddingCache(): void {
 export function getEmbeddingCacheSize(): number {
   return embeddingCache.size;
 }
+/**
+ * Batch size for embedding requests.
+ * OpenRouter supports up to 2048 inputs per request, but 50-100 is optimal.
+ */
+const BATCH_SIZE = 50;
+/**
+ * Number of concurrent batch API calls.
+ * Higher values increase throughput but may hit rate limits.
+ */
+const CONCURRENT_BATCHES = 3;
+/**
+ * Split an array into chunks of specified size.
+ */
+function chunk<T>(array: T[], size: number): T[][] {
+  const chunks: T[][] = [];
+  for (let i = 0; i < array.length; i += size) {
+    chunks.push(array.slice(i, i + size));
+  }
+  return chunks;
+}
+/**
+ * Process a single batch of texts and return embeddings.
+ * Internal helper for concurrent batch processing.
+ */
+async function processSingleBatch(
+  batchTexts: string[],
+  batchIndices: number[],
+  cacheKeys: string[],
+  results: (number[] | null)[],
+  batchNumber: number,
+  totalBatches: number
+): Promise<void> {
+  debug(`Processing batch ${batchNumber}/${totalBatches} (${batchTexts.length} texts)`);
+  let lastError: Error | null = null;
+  for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), OPENROUTER_TIMEOUT_MS * 2);
+    try {
+      const response = await fetch(API_URL, {
+        method: "POST",
+        headers: {
+          Authorization: `Bearer ${OPENROUTER_API_KEY}`,
+          ...API_HEADERS,
+        },
+        body: JSON.stringify({
+          model: EMBEDDING_MODEL,
+          input: batchTexts,
+          dimensions: EMBEDDING_DIMS,
+        }),
+        signal: controller.signal,
+      });
+      if (response.status === 429) {
+        clearTimeout(timeoutId);
+        const waitTime = getBackoffDelay(attempt, RATE_LIMIT_BACKOFF_BASE_MS);
+        debug(`Batch ${batchNumber}: Rate limited (429), waiting ${waitTime}ms`);
+        await sleep(waitTime);
+        continue;
+      }
+      if (!response.ok) {
+        const errorBody = await response.text();
+        throw new OpenRouterError(
+          `OpenRouter API error: ${response.status} - ${errorBody}`,
+          response.status,
+          errorBody
+        );
+      }
+      const data = await response.json() as {
+        data?: Array<{ embedding?: number[]; index?: number }>;
+      };
+      if (!data?.data || data.data.length !== batchTexts.length) {
+        throw new OpenRouterError(
+          `Invalid API response: expected ${batchTexts.length} embeddings, got ${data?.data?.length ?? 0}`,
+          response.status,
+          JSON.stringify(data)
+        );
+      }
+      // Store results and cache them
+      for (let j = 0; j < data.data.length; j++) {
+        const embedding = data.data[j].embedding;
+        if (!embedding) {
+          throw new OpenRouterError(
+            `Missing embedding at index ${j}`,
+            response.status,
+            JSON.stringify(data)
+          );
+        }
+        results[batchIndices[j]] = embedding;
+        embeddingCache.set(cacheKeys[batchIndices[j]], embedding);
+      }
+      return; // Success
+    } catch (error) {
+      if (error instanceof Error && error.name === "AbortError") {
+        lastError = new OpenRouterError(
+          `Batch request timed out after ${OPENROUTER_TIMEOUT_MS * 2}ms`,
+          408
+        );
+      } else {
+        lastError = error instanceof Error ? error : new Error(String(error));
+        if (isNonRetryableError(error)) {
+          throw error;
+        }
+      }
+      if (attempt < MAX_RETRIES - 1) {
+        const waitTime = getBackoffDelay(attempt);
+        debug(`Batch ${batchNumber} error: ${lastError.message}, retrying in ${waitTime}ms`);
+        await sleep(waitTime);
+      }
+    } finally {
+      clearTimeout(timeoutId);
+    }
+  }
+  throw new OpenRouterError(
+    `Failed to get batch ${batchNumber} embeddings after ${MAX_RETRIES} attempts: ${lastError?.message}`
+  );
+}
+/**
+ * Get embedding vectors for multiple texts using concurrent batch API calls.
+ * Much faster than calling getOpenRouterEmbedding individually.
+ *
+ * @param texts - Array of input texts to embed
+ * @returns Promise resolving to array of embedding vectors
+ * @throws OpenRouterError if API call fails
+ */
+export async function getOpenRouterEmbeddingBatch(texts: string[]): Promise<number[][]> {
+  if (!OPENROUTER_API_KEY) {
+    throw new OpenRouterError(
+      "OPENROUTER_API_KEY environment variable is not set"
+    );
+  }
+  if (texts.length === 0) {
+    return [];
+  }
+  // Truncate all inputs and check cache
+  const truncatedTexts = texts.map(t => truncateForEmbedding(t));
+  const cacheKeys = truncatedTexts.map(t => getCacheKey(t));
+  // Separate cached and uncached
+  const results: (number[] | null)[] = new Array(texts.length).fill(null);
+  const uncachedIndices: number[] = [];
+  const uncachedTexts: string[] = [];
+  for (let i = 0; i < truncatedTexts.length; i++) {
+    const cached = embeddingCache.get(cacheKeys[i]);
+    if (cached) {
+      results[i] = cached;
+    } else {
+      uncachedIndices.push(i);
+      uncachedTexts.push(truncatedTexts[i]);
+    }
+  }
+  debug(`Batch: ${texts.length} total, ${uncachedIndices.length} uncached`);
+  if (uncachedTexts.length === 0) {
+    return results as number[][];
+  }
+  // Split into batches
+  const textBatches = chunk(uncachedTexts, BATCH_SIZE);
+  const indexBatches = chunk(uncachedIndices, BATCH_SIZE);
+  const totalBatches = textBatches.length;
+  debug(`Processing ${totalBatches} batches with ${CONCURRENT_BATCHES} concurrent requests`);
+  // Process batches with concurrency limit
+  const batchGroups = chunk(
+    textBatches.map((texts, i) => ({ texts, indices: indexBatches[i], batchNumber: i + 1 })),
+    CONCURRENT_BATCHES
+  );
+  for (const group of batchGroups) {
+    await Promise.all(
+      group.map(batch =>
+        processSingleBatch(
+          batch.texts,
+          batch.indices,
+          cacheKeys,
+          results,
+          batch.batchNumber,
+          totalBatches
+        )
+      )
+    );
+  }
+  return results as number[][];
+}

package/src/graph/export.test.ts ADDED Viewed

@@ -0,0 +1,81 @@
+// src/graph/export.test.ts
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { exportGraph } from "./export.js";
+// Create a shared mock store instance
+const mockStore = {
+  getAll: vi.fn(),
+};
+vi.mock("../db/lancedb.js", () => ({
+  getVectorStore: vi.fn(() => mockStore),
+}));
+describe("exportGraph", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+  describe("JSON format", () => {
+    it("exports nodes and edges", async () => {
+      mockStore.getAll.mockResolvedValue([
+        { id: "1", title: "Note A", folder: "Work", tags: ["project"], outlinks: ["Note B"], vector: [1,0] },
+        { id: "2", title: "Note B", folder: "Work", tags: ["project"], outlinks: [], vector: [0,1] },
+      ]);
+      const result = await exportGraph({ format: "json" }) as any;
+      expect(result).toHaveProperty("nodes");
+      expect(result).toHaveProperty("edges");
+      expect(result.nodes).toHaveLength(2);
+      expect(result.edges.some((e: any) => e.type === "link")).toBe(true);
+      expect(result.edges.some((e: any) => e.type === "tag")).toBe(true);
+    });
+    it("filters by folder", async () => {
+      mockStore.getAll.mockResolvedValue([
+        { id: "1", title: "Note A", folder: "Work", tags: [], outlinks: [], vector: [] },
+        { id: "2", title: "Note B", folder: "Personal", tags: [], outlinks: [], vector: [] },
+      ]);
+      const result = await exportGraph({ format: "json", folder: "Work" }) as any;
+      expect(result.nodes).toHaveLength(1);
+      expect(result.nodes[0].folder).toBe("Work");
+    });
+  });
+  describe("GraphML format", () => {
+    it("exports valid GraphML XML", async () => {
+      mockStore.getAll.mockResolvedValue([
+        { id: "1", title: "Note A", folder: "Work", tags: [], outlinks: ["Note B"], vector: [] },
+        { id: "2", title: "Note B", folder: "Work", tags: [], outlinks: [], vector: [] },
+      ]);
+      const result = await exportGraph({ format: "graphml" });
+      expect(typeof result).toBe("string");
+      expect(result).toContain('<?xml version="1.0"');
+      expect(result).toContain("<graphml");
+      expect(result).toContain("<node");
+      expect(result).toContain("<edge");
+      expect(result).toContain("</graphml>");
+    });
+    it("escapes special XML characters in GraphML", async () => {
+      mockStore.getAll.mockResolvedValue([
+        { id: "1", title: 'Note <with> & "special"', folder: "Work", tags: [], outlinks: [], vector: [] },
+      ]);
+      const result = await exportGraph({ format: "graphml" }) as string;
+      expect(result).toContain("&lt;with&gt;");
+      expect(result).toContain("&amp;");
+    });
+  });
+  describe("Unknown format", () => {
+    it("throws for unknown format", async () => {
+      mockStore.getAll.mockResolvedValue([]);
+      await expect(exportGraph({ format: "unknown" as any })).rejects.toThrow("Unknown format");
+    });
+  });
+});

package/src/graph/export.ts ADDED Viewed

@@ -0,0 +1,163 @@
+// src/graph/export.ts
+/**
+ * Knowledge graph export to various formats.
+ */
+import { getVectorStore } from "../db/lancedb.js";
+import { createDebugLogger } from "../utils/debug.js";
+import { GRAPH_LINK_WEIGHT, GRAPH_TAG_WEIGHT } from "../config/constants.js";
+const debug = createDebugLogger("EXPORT");
+export type GraphFormat = "json" | "graphml";
+export interface GraphNode {
+  id: string;
+  label: string;
+  folder: string;
+  tags: string[];
+}
+export interface GraphEdge {
+  source: string;
+  target: string;
+  type: "link" | "tag" | "similar";
+  weight: number;
+}
+export interface GraphData {
+  nodes: GraphNode[];
+  edges: GraphEdge[];
+}
+export interface ExportOptions {
+  format: GraphFormat;
+  folder?: string;
+}
+/**
+ * Export knowledge graph to specified format.
+ */
+export async function exportGraph(options: ExportOptions): Promise<GraphData | string> {
+  const { format, folder } = options;
+  debug(`Exporting graph in ${format} format`);
+  const store = getVectorStore();
+  let records = await store.getAll();
+  // Filter by folder if specified
+  if (folder) {
+    const normalizedFolder = folder.toLowerCase();
+    records = records.filter(r => r.folder.toLowerCase() === normalizedFolder);
+  }
+  // Build graph data
+  const nodes: GraphNode[] = records.map(r => ({
+    id: r.id,
+    label: r.title,
+    folder: r.folder,
+    tags: r.tags ?? [],
+  }));
+  const edges: GraphEdge[] = [];
+  const nodeIds = new Set(records.map(r => r.id));
+  // Add link edges
+  for (const record of records) {
+    for (const linkTitle of record.outlinks ?? []) {
+      const target = records.find(r => r.title.toLowerCase() === linkTitle.toLowerCase());
+      if (target && nodeIds.has(target.id)) {
+        edges.push({
+          source: record.id,
+          target: target.id,
+          type: "link",
+          weight: GRAPH_LINK_WEIGHT,
+        });
+      }
+    }
+  }
+  // Add tag edges (notes sharing same tag)
+  const tagGroups = new Map<string, string[]>();
+  for (const record of records) {
+    for (const tag of record.tags ?? []) {
+      if (!tagGroups.has(tag)) {
+        tagGroups.set(tag, []);
+      }
+      tagGroups.get(tag)!.push(record.id);
+    }
+  }
+  const seenTagEdges = new Set<string>();
+  for (const [, noteIds] of tagGroups) {
+    if (noteIds.length < 2) continue;
+    for (let i = 0; i < noteIds.length; i++) {
+      for (let j = i + 1; j < noteIds.length; j++) {
+        const edgeKey = [noteIds[i], noteIds[j]].sort().join("-");
+        if (seenTagEdges.has(edgeKey)) continue;
+        seenTagEdges.add(edgeKey);
+        edges.push({
+          source: noteIds[i],
+          target: noteIds[j],
+          type: "tag",
+          weight: GRAPH_TAG_WEIGHT,
+        });
+      }
+    }
+  }
+  const graphData: GraphData = { nodes, edges };
+  if (format === "json") {
+    return graphData;
+  }
+  if (format === "graphml") {
+    return toGraphML(graphData);
+  }
+  throw new Error(`Unknown format: ${format}`);
+}
+function escapeXml(str: string): string {
+  return str
+    .replace(/&/g, "&amp;")
+    .replace(/</g, "&lt;")
+    .replace(/>/g, "&gt;")
+    .replace(/"/g, "&quot;")
+    .replace(/'/g, "&apos;");
+}
+function toGraphML(data: GraphData): string {
+  const lines: string[] = [
+    '<?xml version="1.0" encoding="UTF-8"?>',
+    '<graphml xmlns="http://graphml.graphdrawing.org/xmlns">',
+    '  <key id="label" for="node" attr.name="label" attr.type="string"/>',
+    '  <key id="folder" for="node" attr.name="folder" attr.type="string"/>',
+    '  <key id="tags" for="node" attr.name="tags" attr.type="string"/>',
+    '  <key id="type" for="edge" attr.name="type" attr.type="string"/>',
+    '  <key id="weight" for="edge" attr.name="weight" attr.type="double"/>',
+    '  <graph id="G" edgedefault="directed">',
+  ];
+  for (const node of data.nodes) {
+    lines.push(`    <node id="${escapeXml(node.id)}">`);
+    lines.push(`      <data key="label">${escapeXml(node.label)}</data>`);
+    lines.push(`      <data key="folder">${escapeXml(node.folder)}</data>`);
+    lines.push(`      <data key="tags">${escapeXml(node.tags.join(","))}</data>`);
+    lines.push("    </node>");
+  }
+  for (const edge of data.edges) {
+    lines.push(`    <edge source="${escapeXml(edge.source)}" target="${escapeXml(edge.target)}">`);
+    lines.push(`      <data key="type">${edge.type}</data>`);
+    lines.push(`      <data key="weight">${edge.weight}</data>`);
+    lines.push("    </edge>");
+  }
+  lines.push("  </graph>");
+  lines.push("</graphml>");
+  return lines.join("\n");
+}

package/src/graph/extract.test.ts ADDED Viewed

@@ -0,0 +1,90 @@
+// src/graph/extract.test.ts
+import { describe, it, expect } from "vitest";
+import { extractTags, extractOutlinks, extractMetadata } from "./extract.js";
+describe("extractTags", () => {
+  it("extracts simple hashtags", () => {
+    const content = "This is a #project about #coding";
+    expect(extractTags(content)).toEqual(["project", "coding"]);
+  });
+  it("handles hyphenated tags", () => {
+    const content = "Working on #my-project and #some-idea";
+    expect(extractTags(content)).toEqual(["my-project", "some-idea"]);
+  });
+  it("normalizes to lowercase", () => {
+    const content = "#Project #IDEA #Mixed";
+    expect(extractTags(content)).toEqual(["project", "idea", "mixed"]);
+  });
+  it("deduplicates tags", () => {
+    const content = "#project #idea #project";
+    expect(extractTags(content)).toEqual(["project", "idea"]);
+  });
+  it("returns empty array for no tags", () => {
+    expect(extractTags("No tags here")).toEqual([]);
+  });
+  it("ignores tags in code blocks", () => {
+    const content = "Real #tag\n```\n#not-a-tag\n```\nAnother #real";
+    expect(extractTags(content)).toEqual(["tag", "real"]);
+  });
+  it("ignores tags in inline code", () => {
+    const content = "Real #tag and `#code-tag` should ignore inline";
+    expect(extractTags(content)).toEqual(["tag"]);
+  });
+  it("ignores hex colors", () => {
+    const content = "Color #fff and #000000 and #a1b2c3 are not tags";
+    expect(extractTags(content)).toEqual([]);
+  });
+  it("keeps tags that contain letters mixed with numbers", () => {
+    const content = "#project1 #2024goals #abc123xyz";
+    expect(extractTags(content)).toEqual(["project1", "2024goals", "abc123xyz"]);
+  });
+  it("extracts tag at string boundaries", () => {
+    expect(extractTags("#start of content")).toEqual(["start"]);
+    expect(extractTags("end of #content")).toEqual(["content"]);
+  });
+});
+describe("extractOutlinks", () => {
+  it("extracts wiki-style links", () => {
+    const content = "See [[Meeting Notes]] and [[Project Plan]]";
+    expect(extractOutlinks(content)).toEqual(["Meeting Notes", "Project Plan"]);
+  });
+  it("handles links with special characters", () => {
+    const content = "Check [[Note with / slash]] and [[Note: with colon]]";
+    expect(extractOutlinks(content)).toEqual(["Note with / slash", "Note: with colon"]);
+  });
+  it("deduplicates links", () => {
+    const content = "[[Note]] and [[Other]] and [[Note]]";
+    expect(extractOutlinks(content)).toEqual(["Note", "Other"]);
+  });
+  it("returns empty array for no links", () => {
+    expect(extractOutlinks("No links here")).toEqual([]);
+  });
+  it("ignores links in code blocks", () => {
+    const content = "Real [[Link]]\n```\n[[not-a-link]]\n```";
+    expect(extractOutlinks(content)).toEqual(["Link"]);
+  });
+});
+describe("extractMetadata", () => {
+  it("extracts both tags and outlinks", () => {
+    const content = "A #project note linking to [[Other Note]]";
+    expect(extractMetadata(content)).toEqual({
+      tags: ["project"],
+      outlinks: ["Other Note"],
+    });
+  });
+});

package/src/graph/extract.ts ADDED Viewed

@@ -0,0 +1,52 @@
+/**
+ * Knowledge graph metadata extraction from note content.
+ */
+/** Remove code blocks and inline code to avoid extracting metadata from code. */
+function stripCodeBlocks(content: string): string {
+  return content
+    .replace(/```[\s\S]*?```/g, "") // fenced code blocks
+    .replace(/`[^`]+`/g, ""); // inline code
+}
+/** Check if a string is a hex color code (e.g., fff, 000000, a1b2c3) */
+function isHexColor(value: string): boolean {
+  return /^[0-9a-fA-F]+$/.test(value);
+}
+/**
+ * Extract hashtags from content.
+ * Ignores tags inside code blocks and hex color codes.
+ */
+export function extractTags(content: string): string[] {
+  const matches = stripCodeBlocks(content).match(/#[\w-]+/g) || [];
+  const tags = matches
+    .map((t) => t.slice(1).toLowerCase())
+    .filter((t) => !isHexColor(t));
+  return [...new Set(tags)];
+}
+/**
+ * Extract wiki-style [[links]] from content.
+ * Ignores links inside code blocks.
+ */
+export function extractOutlinks(content: string): string[] {
+  const matches = stripCodeBlocks(content).match(/\[\[([^\]]+)\]\]/g) || [];
+  const links = matches.map((m) => m.slice(2, -2));
+  return [...new Set(links)];
+}
+export interface NoteMetadata {
+  tags: string[];
+  outlinks: string[];
+}
+/**
+ * Extract all metadata (tags and outlinks) from content.
+ */
+export function extractMetadata(content: string): NoteMetadata {
+  return {
+    tags: extractTags(content),
+    outlinks: extractOutlinks(content),
+  };
+}