npm - @disco_trooper/apple-notes-mcp - Versions diffs - 1.1.0 → 1.3.0 - Mend

@disco_trooper/apple-notes-mcp 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/README.md +104 -24
package/package.json +11 -12
package/src/config/claude.test.ts +47 -0
package/src/config/claude.ts +106 -0
package/src/config/constants.ts +11 -2
package/src/config/paths.test.ts +40 -0
package/src/config/paths.ts +86 -0
package/src/db/arrow-fix.test.ts +101 -0
package/src/db/lancedb.test.ts +254 -2
package/src/db/lancedb.ts +385 -38
package/src/embeddings/cache.test.ts +150 -0
package/src/embeddings/cache.ts +204 -0
package/src/embeddings/index.ts +22 -4
package/src/embeddings/local.ts +57 -17
package/src/embeddings/openrouter.ts +233 -11
package/src/errors/index.test.ts +64 -0
package/src/errors/index.ts +62 -0
package/src/graph/export.test.ts +81 -0
package/src/graph/export.ts +163 -0
package/src/graph/extract.test.ts +90 -0
package/src/graph/extract.ts +52 -0
package/src/graph/queries.test.ts +156 -0
package/src/graph/queries.ts +224 -0
package/src/index.ts +309 -23
package/src/notes/conversion.ts +62 -0
package/src/notes/crud.test.ts +41 -8
package/src/notes/crud.ts +75 -64
package/src/notes/read.test.ts +58 -3
package/src/notes/read.ts +142 -210
package/src/notes/resolve.ts +174 -0
package/src/notes/tables.ts +69 -40
package/src/search/chunk-indexer.test.ts +353 -0
package/src/search/chunk-indexer.ts +207 -0
package/src/search/chunk-search.test.ts +327 -0
package/src/search/chunk-search.ts +298 -0
package/src/search/index.ts +4 -6
package/src/search/indexer.ts +164 -109
package/src/setup.ts +46 -67
package/src/types/index.ts +4 -0
package/src/utils/chunker.test.ts +182 -0
package/src/utils/chunker.ts +170 -0
package/src/utils/content-filter.test.ts +225 -0
package/src/utils/content-filter.ts +275 -0
package/src/utils/debug.ts +0 -2
package/src/utils/runtime.test.ts +70 -0
package/src/utils/runtime.ts +40 -0
package/src/utils/text.test.ts +32 -0
package/CLAUDE.md +0 -56
package/src/server.ts +0 -427

package/src/utils/content-filter.ts ADDED Viewed

@@ -0,0 +1,275 @@
+/**
+ * Content quality filter for RAG indexing.
+ * Detects and filters Base64-encoded, binary, and secret content.
+ */
+import { createDebugLogger } from "./debug.js";
+const debug = createDebugLogger("CONTENT_FILTER");
+/**
+ * Result of content filtering.
+ */
+export interface FilterResult {
+  /** Whether to index this content */
+  action: "index" | "filter" | "skip";
+  /** Cleaned content (if action is "index" or "filter") */
+  cleanedContent?: string;
+  /** Reasons for filtering/skipping */
+  reasons: string[];
+}
+/**
+ * Calculate Shannon entropy of a string.
+ * Higher entropy = more random/encoded content.
+ *
+ * Typical values:
+ * - Normal text: 0.8 - 4.5
+ * - Base64: 5.0 - 6.0
+ * - Encrypted: 6.0+
+ *
+ * @param str - String to analyze
+ * @returns Entropy value (0-8)
+ */
+export function calculateEntropy(str: string): number {
+  if (!str || str.length === 0) return 0;
+  const freq = new Map<string, number>();
+  for (const char of str) {
+    freq.set(char, (freq.get(char) || 0) + 1);
+  }
+  let entropy = 0;
+  const len = str.length;
+  for (const count of freq.values()) {
+    const p = count / len;
+    entropy -= p * Math.log2(p);
+  }
+  return entropy;
+}
+/**
+ * Regex pattern for Base64 content (40+ chars).
+ */
+const BASE64_PATTERN = /[A-Za-z0-9+/]{40,}={0,2}/g;
+/**
+ * Regex pattern for URL-safe Base64.
+ */
+const BASE64_URL_SAFE_PATTERN = /[A-Za-z0-9_-]{40,}={0,2}/g;
+/**
+ * Patterns for common secrets/tokens.
+ */
+const SECRET_PATTERNS: Record<string, RegExp> = {
+  // Private Keys
+  privateKey: /-----BEGIN (?:RSA |DSA |EC |OPENSSH |PGP )?PRIVATE KEY(?: BLOCK)?-----/,
+  // JWT tokens
+  jwt: /eyJ[A-Za-z0-9-_=]+\.[A-Za-z0-9-_=]+\.?[A-Za-z0-9-_.+/=]*/g,
+  // AWS
+  awsAccessKey: /AKIA[0-9A-Z]{16}/g,
+  // GitHub
+  githubToken: /ghp_[a-zA-Z0-9]{36}/g,
+  githubFineGrained: /github_pat_[a-zA-Z0-9]{22}_[a-zA-Z0-9]{59}/g,
+  // Slack
+  slackToken: /xox[baprs]-[0-9a-zA-Z]{10,48}/g,
+  // Stripe
+  stripeKey: /sk_live_[0-9a-zA-Z]{24}/g,
+  // Database URIs with credentials
+  dbUri: /(?:mongodb|postgres(?:ql)?|mysql|redis):\/\/[^\s'"]+:[^\s'"]+@[^\s'"]+/g,
+};
+/**
+ * Check if a string segment is likely Base64 encoded.
+ */
+export function isLikelyBase64(str: string): boolean {
+  // Minimum length check
+  if (str.length < 40) return false;
+  // Check if only Base64 characters
+  if (!/^[A-Za-z0-9+/=_-]+$/.test(str)) return false;
+  // Check entropy - Base64 typically has high entropy
+  const entropy = calculateEntropy(str);
+  return entropy > 4.5;
+}
+/**
+ * Calculate the ratio of Base64-like content in a string.
+ */
+export function getBase64Ratio(content: string): number {
+  const matches = content.match(BASE64_PATTERN) || [];
+  const urlSafeMatches = content.match(BASE64_URL_SAFE_PATTERN) || [];
+  // Combine and deduplicate
+  const allMatches = new Set([...matches, ...urlSafeMatches]);
+  let totalBase64Length = 0;
+  for (const match of allMatches) {
+    if (isLikelyBase64(match)) {
+      totalBase64Length += match.length;
+    }
+  }
+  return content.length > 0 ? totalBase64Length / content.length : 0;
+}
+/**
+ * Check if content contains binary/control characters.
+ */
+export function hasBinaryContent(content: string): boolean {
+  // Check for null bytes or control characters (except newlines/tabs)
+  return /[\x00-\x08\x0B\x0C\x0E-\x1F]/.test(content);
+}
+/**
+ * Remove Base64 blocks from content.
+ */
+export function removeBase64Blocks(content: string): string {
+  let result = content;
+  // Remove standard Base64
+  result = result.replace(BASE64_PATTERN, (match) => {
+    if (isLikelyBase64(match)) {
+      return "[ENCODED]";
+    }
+    return match;
+  });
+  // Remove URL-safe Base64
+  result = result.replace(BASE64_URL_SAFE_PATTERN, (match) => {
+    if (isLikelyBase64(match)) {
+      return "[ENCODED]";
+    }
+    return match;
+  });
+  return result;
+}
+/**
+ * Redact detected secrets in content.
+ */
+export function redactSecrets(content: string): { content: string; secretsFound: string[] } {
+  let result = content;
+  const secretsFound: string[] = [];
+  for (const [name, pattern] of Object.entries(SECRET_PATTERNS)) {
+    if (pattern.test(result)) {
+      // Reset lastIndex for global patterns
+      pattern.lastIndex = 0;
+      result = result.replace(pattern, `[${name.toUpperCase()}_REDACTED]`);
+      secretsFound.push(name);
+    }
+  }
+  return { content: result, secretsFound };
+}
+/**
+ * Configuration for content filtering.
+ */
+export interface FilterConfig {
+  /** Maximum Base64 ratio before skipping (default: 0.5) */
+  maxBase64Ratio?: number;
+  /** Minimum meaningful content length after filtering (default: 50) */
+  minContentLength?: number;
+  /** Whether to redact secrets (default: true) */
+  redactSecrets?: boolean;
+  /** Whether to remove Base64 blocks (default: true) */
+  removeBase64?: boolean;
+}
+const DEFAULT_CONFIG: Required<FilterConfig> = {
+  maxBase64Ratio: 0.5,
+  minContentLength: 50,
+  redactSecrets: true,
+  removeBase64: true,
+};
+/**
+ * Filter content for RAG indexing.
+ *
+ * @param content - Raw content to filter
+ * @param config - Filter configuration
+ * @returns Filter result with action and cleaned content
+ */
+export function filterContent(
+  content: string,
+  config: FilterConfig = {}
+): FilterResult {
+  const cfg = { ...DEFAULT_CONFIG, ...config };
+  const reasons: string[] = [];
+  // 1. Check for binary content - skip entirely
+  if (hasBinaryContent(content)) {
+    debug("Skipping content with binary characters");
+    return { action: "skip", reasons: ["Contains binary content"] };
+  }
+  // 2. Calculate Base64 ratio
+  const base64Ratio = getBase64Ratio(content);
+  debug(`Base64 ratio: ${(base64Ratio * 100).toFixed(1)}%`);
+  // Skip if too much encoded content
+  if (base64Ratio > cfg.maxBase64Ratio) {
+    debug(`Skipping content: ${(base64Ratio * 100).toFixed(1)}% Base64`);
+    return {
+      action: "skip",
+      reasons: [`${(base64Ratio * 100).toFixed(1)}% is Base64 encoded (threshold: ${(cfg.maxBase64Ratio * 100).toFixed(0)}%)`],
+    };
+  }
+  let cleanedContent = content;
+  // 3. Remove Base64 blocks if present and configured
+  if (cfg.removeBase64 && base64Ratio > 0.1) {
+    cleanedContent = removeBase64Blocks(cleanedContent);
+    reasons.push("Removed Base64 blocks");
+  }
+  // 4. Redact secrets if configured
+  if (cfg.redactSecrets) {
+    const { content: redacted, secretsFound } = redactSecrets(cleanedContent);
+    if (secretsFound.length > 0) {
+      cleanedContent = redacted;
+      reasons.push(`Redacted secrets: ${secretsFound.join(", ")}`);
+    }
+  }
+  // 5. Check if remaining content is meaningful
+  const meaningfulContent = cleanedContent
+    .replace(/\[.*?_REDACTED\]|\[ENCODED\]/g, "")
+    .trim();
+  if (meaningfulContent.length < cfg.minContentLength) {
+    debug(`Skipping: insufficient content after filtering (${meaningfulContent.length} chars)`);
+    return {
+      action: "skip",
+      reasons: ["Insufficient meaningful content after filtering"],
+    };
+  }
+  // Determine action
+  const action = reasons.length > 0 ? "filter" : "index";
+  return { action, cleanedContent, reasons };
+}
+/**
+ * Quick check if content should be indexed.
+ * Use this for fast pre-filtering before chunking.
+ */
+export function shouldIndexContent(content: string): boolean {
+  // Quick checks
+  if (hasBinaryContent(content)) return false;
+  if (getBase64Ratio(content) > 0.5) return false;
+  return true;
+}

package/src/utils/debug.ts CHANGED Viewed

@@ -10,8 +10,6 @@ const COLORS = {
   dim: "\x1b[2m",
   cyan: "\x1b[36m",
   yellow: "\x1b[33m",
-  red: "\x1b[31m",
-  green: "\x1b[32m",
 } as const;
 /**

package/src/utils/runtime.test.ts ADDED Viewed

@@ -0,0 +1,70 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
+describe("runtime checks", () => {
+  beforeEach(() => {
+    vi.resetModules();
+  });
+  describe("isBunRuntime", () => {
+    it("should return false when Bun global is not defined", async () => {
+      // vitest runs in Node.js, so Bun is not defined
+      const { isBunRuntime } = await import("./runtime.js");
+      expect(isBunRuntime()).toBe(false);
+    });
+    it("should return true when Bun global is defined", async () => {
+      // Mock Bun global
+      (globalThis as Record<string, unknown>).Bun = {};
+      const { isBunRuntime } = await import("./runtime.js");
+      expect(isBunRuntime()).toBe(true);
+      delete (globalThis as Record<string, unknown>).Bun;
+    });
+  });
+  describe("checkBunRuntime", () => {
+    let mockExit: ReturnType<typeof vi.spyOn>;
+    let mockConsoleError: ReturnType<typeof vi.spyOn>;
+    beforeEach(() => {
+      mockExit = vi.spyOn(process, "exit").mockImplementation(() => {
+        throw new Error("process.exit called");
+      });
+      mockConsoleError = vi.spyOn(console, "error").mockImplementation(() => {});
+    });
+    afterEach(() => {
+      mockExit.mockRestore();
+      mockConsoleError.mockRestore();
+      delete (globalThis as Record<string, unknown>).Bun;
+    });
+    it("should exit with error message when Bun is not available", async () => {
+      const { checkBunRuntime } = await import("./runtime.js");
+      expect(() => checkBunRuntime()).toThrow("process.exit called");
+      expect(mockExit).toHaveBeenCalledWith(1);
+      expect(mockConsoleError).toHaveBeenCalled();
+    });
+    it("should not exit when Bun is available", async () => {
+      (globalThis as Record<string, unknown>).Bun = {};
+      const { checkBunRuntime } = await import("./runtime.js");
+      expect(() => checkBunRuntime()).not.toThrow();
+      expect(mockExit).not.toHaveBeenCalled();
+    });
+  });
+  describe("isTTY", () => {
+    it("should return boolean", async () => {
+      const { isTTY } = await import("./runtime.js");
+      expect(typeof isTTY()).toBe("boolean");
+    });
+    it("should return false when stdin or stdout is not a TTY", async () => {
+      // In CI/test environments, typically not a TTY
+      const { isTTY } = await import("./runtime.js");
+      // The result depends on the environment, but it should be a boolean
+      const result = isTTY();
+      expect(result === true || result === false).toBe(true);
+    });
+  });
+});

package/src/utils/runtime.ts ADDED Viewed

@@ -0,0 +1,40 @@
+/**
+ * Runtime environment checks.
+ */
+/**
+ * Check if running in Bun runtime.
+ */
+export function isBunRuntime(): boolean {
+  return typeof Bun !== "undefined";
+}
+/**
+ * Check Bun runtime and throw helpful error if not available.
+ */
+export function checkBunRuntime(): void {
+  if (!isBunRuntime()) {
+    console.error(`
+╭─────────────────────────────────────────────────────────────╮
+│  apple-notes-mcp requires Bun runtime                       │
+│                                                             │
+│  Install Bun:                                               │
+│    curl -fsSL https://bun.sh/install | bash                 │
+│                                                             │
+│  Or with Homebrew:                                          │
+│    brew install bun                                         │
+│                                                             │
+│  Then run again:                                            │
+│    apple-notes-mcp                                          │
+╰─────────────────────────────────────────────────────────────╯
+`);
+    process.exit(1);
+  }
+}
+/**
+ * Check if running in interactive terminal (TTY).
+ */
+export function isTTY(): boolean {
+  return process.stdin.isTTY === true && process.stdout.isTTY === true;
+}

package/src/utils/text.test.ts ADDED Viewed

@@ -0,0 +1,32 @@
+import { describe, it, expect } from "vitest";
+import { truncateForEmbedding } from "./text.js";
+import { MAX_INPUT_LENGTH } from "../config/constants.js";
+describe("truncateForEmbedding", () => {
+  it("should return text unchanged if within limit", () => {
+    const text = "Short text";
+    expect(truncateForEmbedding(text)).toBe(text);
+  });
+  it("should truncate text exceeding default limit", () => {
+    const text = "a".repeat(MAX_INPUT_LENGTH + 100);
+    const result = truncateForEmbedding(text);
+    expect(result.length).toBe(MAX_INPUT_LENGTH);
+    expect(result).toBe("a".repeat(MAX_INPUT_LENGTH));
+  });
+  it("should use custom maxLength when provided", () => {
+    const text = "Hello World";
+    const result = truncateForEmbedding(text, 5);
+    expect(result).toBe("Hello");
+  });
+  it("should handle empty string", () => {
+    expect(truncateForEmbedding("")).toBe("");
+  });
+  it("should handle text exactly at limit", () => {
+    const text = "a".repeat(MAX_INPUT_LENGTH);
+    expect(truncateForEmbedding(text)).toBe(text);
+  });
+});

package/CLAUDE.md DELETED Viewed

@@ -1,56 +0,0 @@
-# CLAUDE.md
-## Project Overview
-MCP server for Apple Notes with semantic search and CRUD operations.
-## Tech Stack
-- **Runtime**: Bun
-- **Language**: TypeScript
-- **Database**: LanceDB (vector store)
-- **Embeddings**: HuggingFace Transformers (local) or OpenRouter API
-- **Apple Notes**: JXA (JavaScript for Automation)
-## Commands
-```bash
-bun run start      # Start MCP server
-bun run setup      # Interactive setup wizard
-bun run dev        # Watch mode
-bun run check      # Type check
-bun run test       # Run tests (uses vitest, NOT bun test)
-```
-## Project Structure
-```
-src/
-├── index.ts          # MCP server entry (stdio transport)
-├── server.ts         # Smithery-compatible export
-├── setup.ts          # Interactive setup wizard
-├── config/           # Constants and env validation
-├── db/               # LanceDB vector store
-├── embeddings/       # Local and OpenRouter embeddings
-├── notes/            # Apple Notes CRUD via JXA
-├── search/           # Hybrid search and indexing
-└── utils/            # Debug logging, errors, text utils
-```
-## Key Patterns
-- **Dual embedding support**: Detects `OPENROUTER_API_KEY` to choose provider
-- **Hybrid search**: Combines vector + keyword search with RRF fusion
-- **Incremental indexing**: Only re-embeds changed notes
-- **Folder/title disambiguation**: Use `Folder/Note Title` format for duplicates
-## Testing
-Always use `bun run test` (vitest), never `bun test` (incompatible bun runner).
-## Environment Variables
-See README.md for full list. Key ones:
-- `OPENROUTER_API_KEY` - Enables cloud embeddings
-- `READONLY_MODE` - Blocks write operations
-- `DEBUG` - Enables debug logging