@disco_trooper/apple-notes-mcp 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +104 -24
  2. package/package.json +11 -12
  3. package/src/config/claude.test.ts +47 -0
  4. package/src/config/claude.ts +106 -0
  5. package/src/config/constants.ts +11 -2
  6. package/src/config/paths.test.ts +40 -0
  7. package/src/config/paths.ts +86 -0
  8. package/src/db/arrow-fix.test.ts +101 -0
  9. package/src/db/lancedb.test.ts +254 -2
  10. package/src/db/lancedb.ts +385 -38
  11. package/src/embeddings/cache.test.ts +150 -0
  12. package/src/embeddings/cache.ts +204 -0
  13. package/src/embeddings/index.ts +22 -4
  14. package/src/embeddings/local.ts +57 -17
  15. package/src/embeddings/openrouter.ts +233 -11
  16. package/src/errors/index.test.ts +64 -0
  17. package/src/errors/index.ts +62 -0
  18. package/src/graph/export.test.ts +81 -0
  19. package/src/graph/export.ts +163 -0
  20. package/src/graph/extract.test.ts +90 -0
  21. package/src/graph/extract.ts +52 -0
  22. package/src/graph/queries.test.ts +156 -0
  23. package/src/graph/queries.ts +224 -0
  24. package/src/index.ts +309 -23
  25. package/src/notes/conversion.ts +62 -0
  26. package/src/notes/crud.test.ts +41 -8
  27. package/src/notes/crud.ts +75 -64
  28. package/src/notes/read.test.ts +58 -3
  29. package/src/notes/read.ts +142 -210
  30. package/src/notes/resolve.ts +174 -0
  31. package/src/notes/tables.ts +69 -40
  32. package/src/search/chunk-indexer.test.ts +353 -0
  33. package/src/search/chunk-indexer.ts +207 -0
  34. package/src/search/chunk-search.test.ts +327 -0
  35. package/src/search/chunk-search.ts +298 -0
  36. package/src/search/index.ts +4 -6
  37. package/src/search/indexer.ts +164 -109
  38. package/src/setup.ts +46 -67
  39. package/src/types/index.ts +4 -0
  40. package/src/utils/chunker.test.ts +182 -0
  41. package/src/utils/chunker.ts +170 -0
  42. package/src/utils/content-filter.test.ts +225 -0
  43. package/src/utils/content-filter.ts +275 -0
  44. package/src/utils/debug.ts +0 -2
  45. package/src/utils/runtime.test.ts +70 -0
  46. package/src/utils/runtime.ts +40 -0
  47. package/src/utils/text.test.ts +32 -0
  48. package/CLAUDE.md +0 -56
  49. package/src/server.ts +0 -427
@@ -0,0 +1,275 @@
1
+ /**
2
+ * Content quality filter for RAG indexing.
3
+ * Detects and filters Base64-encoded, binary, and secret content.
4
+ */
5
+
6
+ import { createDebugLogger } from "./debug.js";
7
+
8
+ const debug = createDebugLogger("CONTENT_FILTER");
9
+
10
+ /**
11
+ * Result of content filtering.
12
+ */
13
+ export interface FilterResult {
14
+ /** Whether to index this content */
15
+ action: "index" | "filter" | "skip";
16
+ /** Cleaned content (if action is "index" or "filter") */
17
+ cleanedContent?: string;
18
+ /** Reasons for filtering/skipping */
19
+ reasons: string[];
20
+ }
21
+
22
+ /**
23
+ * Calculate Shannon entropy of a string.
24
+ * Higher entropy = more random/encoded content.
25
+ *
26
+ * Typical values:
27
+ * - Normal text: 0.8 - 4.5
28
+ * - Base64: 5.0 - 6.0
29
+ * - Encrypted: 6.0+
30
+ *
31
+ * @param str - String to analyze
32
+ * @returns Entropy value (0-8)
33
+ */
34
+ export function calculateEntropy(str: string): number {
35
+ if (!str || str.length === 0) return 0;
36
+
37
+ const freq = new Map<string, number>();
38
+ for (const char of str) {
39
+ freq.set(char, (freq.get(char) || 0) + 1);
40
+ }
41
+
42
+ let entropy = 0;
43
+ const len = str.length;
44
+ for (const count of freq.values()) {
45
+ const p = count / len;
46
+ entropy -= p * Math.log2(p);
47
+ }
48
+
49
+ return entropy;
50
+ }
51
+
52
+ /**
53
+ * Regex pattern for Base64 content (40+ chars).
54
+ */
55
+ const BASE64_PATTERN = /[A-Za-z0-9+/]{40,}={0,2}/g;
56
+
57
+ /**
58
+ * Regex pattern for URL-safe Base64.
59
+ */
60
+ const BASE64_URL_SAFE_PATTERN = /[A-Za-z0-9_-]{40,}={0,2}/g;
61
+
62
+ /**
63
+ * Patterns for common secrets/tokens.
64
+ */
65
+ const SECRET_PATTERNS: Record<string, RegExp> = {
66
+ // Private Keys
67
+ privateKey: /-----BEGIN (?:RSA |DSA |EC |OPENSSH |PGP )?PRIVATE KEY(?: BLOCK)?-----/,
68
+
69
+ // JWT tokens
70
+ jwt: /eyJ[A-Za-z0-9-_=]+\.[A-Za-z0-9-_=]+\.?[A-Za-z0-9-_.+/=]*/g,
71
+
72
+ // AWS
73
+ awsAccessKey: /AKIA[0-9A-Z]{16}/g,
74
+
75
+ // GitHub
76
+ githubToken: /ghp_[a-zA-Z0-9]{36}/g,
77
+ githubFineGrained: /github_pat_[a-zA-Z0-9]{22}_[a-zA-Z0-9]{59}/g,
78
+
79
+ // Slack
80
+ slackToken: /xox[baprs]-[0-9a-zA-Z]{10,48}/g,
81
+
82
+ // Stripe
83
+ stripeKey: /sk_live_[0-9a-zA-Z]{24}/g,
84
+
85
+ // Database URIs with credentials
86
+ dbUri: /(?:mongodb|postgres(?:ql)?|mysql|redis):\/\/[^\s'"]+:[^\s'"]+@[^\s'"]+/g,
87
+ };
88
+
89
+ /**
90
+ * Check if a string segment is likely Base64 encoded.
91
+ */
92
+ export function isLikelyBase64(str: string): boolean {
93
+ // Minimum length check
94
+ if (str.length < 40) return false;
95
+
96
+ // Check if only Base64 characters
97
+ if (!/^[A-Za-z0-9+/=_-]+$/.test(str)) return false;
98
+
99
+ // Check entropy - Base64 typically has high entropy
100
+ const entropy = calculateEntropy(str);
101
+ return entropy > 4.5;
102
+ }
103
+
104
+ /**
105
+ * Calculate the ratio of Base64-like content in a string.
106
+ */
107
+ export function getBase64Ratio(content: string): number {
108
+ const matches = content.match(BASE64_PATTERN) || [];
109
+ const urlSafeMatches = content.match(BASE64_URL_SAFE_PATTERN) || [];
110
+
111
+ // Combine and deduplicate
112
+ const allMatches = new Set([...matches, ...urlSafeMatches]);
113
+
114
+ let totalBase64Length = 0;
115
+ for (const match of allMatches) {
116
+ if (isLikelyBase64(match)) {
117
+ totalBase64Length += match.length;
118
+ }
119
+ }
120
+
121
+ return content.length > 0 ? totalBase64Length / content.length : 0;
122
+ }
123
+
124
+ /**
125
+ * Check if content contains binary/control characters.
126
+ */
127
+ export function hasBinaryContent(content: string): boolean {
128
+ // Check for null bytes or control characters (except newlines/tabs)
129
+ return /[\x00-\x08\x0B\x0C\x0E-\x1F]/.test(content);
130
+ }
131
+
132
+ /**
133
+ * Remove Base64 blocks from content.
134
+ */
135
+ export function removeBase64Blocks(content: string): string {
136
+ let result = content;
137
+
138
+ // Remove standard Base64
139
+ result = result.replace(BASE64_PATTERN, (match) => {
140
+ if (isLikelyBase64(match)) {
141
+ return "[ENCODED]";
142
+ }
143
+ return match;
144
+ });
145
+
146
+ // Remove URL-safe Base64
147
+ result = result.replace(BASE64_URL_SAFE_PATTERN, (match) => {
148
+ if (isLikelyBase64(match)) {
149
+ return "[ENCODED]";
150
+ }
151
+ return match;
152
+ });
153
+
154
+ return result;
155
+ }
156
+
157
+ /**
158
+ * Redact detected secrets in content.
159
+ */
160
+ export function redactSecrets(content: string): { content: string; secretsFound: string[] } {
161
+ let result = content;
162
+ const secretsFound: string[] = [];
163
+
164
+ for (const [name, pattern] of Object.entries(SECRET_PATTERNS)) {
165
+ if (pattern.test(result)) {
166
+ // Reset lastIndex for global patterns
167
+ pattern.lastIndex = 0;
168
+ result = result.replace(pattern, `[${name.toUpperCase()}_REDACTED]`);
169
+ secretsFound.push(name);
170
+ }
171
+ }
172
+
173
+ return { content: result, secretsFound };
174
+ }
175
+
176
+ /**
177
+ * Configuration for content filtering.
178
+ */
179
+ export interface FilterConfig {
180
+ /** Maximum Base64 ratio before skipping (default: 0.5) */
181
+ maxBase64Ratio?: number;
182
+ /** Minimum meaningful content length after filtering (default: 50) */
183
+ minContentLength?: number;
184
+ /** Whether to redact secrets (default: true) */
185
+ redactSecrets?: boolean;
186
+ /** Whether to remove Base64 blocks (default: true) */
187
+ removeBase64?: boolean;
188
+ }
189
+
190
+ const DEFAULT_CONFIG: Required<FilterConfig> = {
191
+ maxBase64Ratio: 0.5,
192
+ minContentLength: 50,
193
+ redactSecrets: true,
194
+ removeBase64: true,
195
+ };
196
+
197
+ /**
198
+ * Filter content for RAG indexing.
199
+ *
200
+ * @param content - Raw content to filter
201
+ * @param config - Filter configuration
202
+ * @returns Filter result with action and cleaned content
203
+ */
204
+ export function filterContent(
205
+ content: string,
206
+ config: FilterConfig = {}
207
+ ): FilterResult {
208
+ const cfg = { ...DEFAULT_CONFIG, ...config };
209
+ const reasons: string[] = [];
210
+
211
+ // 1. Check for binary content - skip entirely
212
+ if (hasBinaryContent(content)) {
213
+ debug("Skipping content with binary characters");
214
+ return { action: "skip", reasons: ["Contains binary content"] };
215
+ }
216
+
217
+ // 2. Calculate Base64 ratio
218
+ const base64Ratio = getBase64Ratio(content);
219
+ debug(`Base64 ratio: ${(base64Ratio * 100).toFixed(1)}%`);
220
+
221
+ // Skip if too much encoded content
222
+ if (base64Ratio > cfg.maxBase64Ratio) {
223
+ debug(`Skipping content: ${(base64Ratio * 100).toFixed(1)}% Base64`);
224
+ return {
225
+ action: "skip",
226
+ reasons: [`${(base64Ratio * 100).toFixed(1)}% is Base64 encoded (threshold: ${(cfg.maxBase64Ratio * 100).toFixed(0)}%)`],
227
+ };
228
+ }
229
+
230
+ let cleanedContent = content;
231
+
232
+ // 3. Remove Base64 blocks if present and configured
233
+ if (cfg.removeBase64 && base64Ratio > 0.1) {
234
+ cleanedContent = removeBase64Blocks(cleanedContent);
235
+ reasons.push("Removed Base64 blocks");
236
+ }
237
+
238
+ // 4. Redact secrets if configured
239
+ if (cfg.redactSecrets) {
240
+ const { content: redacted, secretsFound } = redactSecrets(cleanedContent);
241
+ if (secretsFound.length > 0) {
242
+ cleanedContent = redacted;
243
+ reasons.push(`Redacted secrets: ${secretsFound.join(", ")}`);
244
+ }
245
+ }
246
+
247
+ // 5. Check if remaining content is meaningful
248
+ const meaningfulContent = cleanedContent
249
+ .replace(/\[.*?_REDACTED\]|\[ENCODED\]/g, "")
250
+ .trim();
251
+
252
+ if (meaningfulContent.length < cfg.minContentLength) {
253
+ debug(`Skipping: insufficient content after filtering (${meaningfulContent.length} chars)`);
254
+ return {
255
+ action: "skip",
256
+ reasons: ["Insufficient meaningful content after filtering"],
257
+ };
258
+ }
259
+
260
+ // Determine action
261
+ const action = reasons.length > 0 ? "filter" : "index";
262
+
263
+ return { action, cleanedContent, reasons };
264
+ }
265
+
266
+ /**
267
+ * Quick check if content should be indexed.
268
+ * Use this for fast pre-filtering before chunking.
269
+ */
270
+ export function shouldIndexContent(content: string): boolean {
271
+ // Quick checks
272
+ if (hasBinaryContent(content)) return false;
273
+ if (getBase64Ratio(content) > 0.5) return false;
274
+ return true;
275
+ }
@@ -10,8 +10,6 @@ const COLORS = {
10
10
  dim: "\x1b[2m",
11
11
  cyan: "\x1b[36m",
12
12
  yellow: "\x1b[33m",
13
- red: "\x1b[31m",
14
- green: "\x1b[32m",
15
13
  } as const;
16
14
 
17
15
  /**
@@ -0,0 +1,70 @@
1
+ import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
2
+
3
+ describe("runtime checks", () => {
4
+ beforeEach(() => {
5
+ vi.resetModules();
6
+ });
7
+
8
+ describe("isBunRuntime", () => {
9
+ it("should return false when Bun global is not defined", async () => {
10
+ // vitest runs in Node.js, so Bun is not defined
11
+ const { isBunRuntime } = await import("./runtime.js");
12
+ expect(isBunRuntime()).toBe(false);
13
+ });
14
+
15
+ it("should return true when Bun global is defined", async () => {
16
+ // Mock Bun global
17
+ (globalThis as Record<string, unknown>).Bun = {};
18
+ const { isBunRuntime } = await import("./runtime.js");
19
+ expect(isBunRuntime()).toBe(true);
20
+ delete (globalThis as Record<string, unknown>).Bun;
21
+ });
22
+ });
23
+
24
+ describe("checkBunRuntime", () => {
25
+ let mockExit: ReturnType<typeof vi.spyOn>;
26
+ let mockConsoleError: ReturnType<typeof vi.spyOn>;
27
+
28
+ beforeEach(() => {
29
+ mockExit = vi.spyOn(process, "exit").mockImplementation(() => {
30
+ throw new Error("process.exit called");
31
+ });
32
+ mockConsoleError = vi.spyOn(console, "error").mockImplementation(() => {});
33
+ });
34
+
35
+ afterEach(() => {
36
+ mockExit.mockRestore();
37
+ mockConsoleError.mockRestore();
38
+ delete (globalThis as Record<string, unknown>).Bun;
39
+ });
40
+
41
+ it("should exit with error message when Bun is not available", async () => {
42
+ const { checkBunRuntime } = await import("./runtime.js");
43
+ expect(() => checkBunRuntime()).toThrow("process.exit called");
44
+ expect(mockExit).toHaveBeenCalledWith(1);
45
+ expect(mockConsoleError).toHaveBeenCalled();
46
+ });
47
+
48
+ it("should not exit when Bun is available", async () => {
49
+ (globalThis as Record<string, unknown>).Bun = {};
50
+ const { checkBunRuntime } = await import("./runtime.js");
51
+ expect(() => checkBunRuntime()).not.toThrow();
52
+ expect(mockExit).not.toHaveBeenCalled();
53
+ });
54
+ });
55
+
56
+ describe("isTTY", () => {
57
+ it("should return boolean", async () => {
58
+ const { isTTY } = await import("./runtime.js");
59
+ expect(typeof isTTY()).toBe("boolean");
60
+ });
61
+
62
+ it("should return false when stdin or stdout is not a TTY", async () => {
63
+ // In CI/test environments, typically not a TTY
64
+ const { isTTY } = await import("./runtime.js");
65
+ // The result depends on the environment, but it should be a boolean
66
+ const result = isTTY();
67
+ expect(result === true || result === false).toBe(true);
68
+ });
69
+ });
70
+ });
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Runtime environment checks.
3
+ */
4
+
5
+ /**
6
+ * Check if running in Bun runtime.
7
+ */
8
+ export function isBunRuntime(): boolean {
9
+ return typeof Bun !== "undefined";
10
+ }
11
+
12
+ /**
13
+ * Check Bun runtime and throw helpful error if not available.
14
+ */
15
+ export function checkBunRuntime(): void {
16
+ if (!isBunRuntime()) {
17
+ console.error(`
18
+ ╭─────────────────────────────────────────────────────────────╮
19
+ │ apple-notes-mcp requires Bun runtime │
20
+ │ │
21
+ │ Install Bun: │
22
+ │ curl -fsSL https://bun.sh/install | bash │
23
+ │ │
24
+ │ Or with Homebrew: │
25
+ │ brew install bun │
26
+ │ │
27
+ │ Then run again: │
28
+ │ apple-notes-mcp │
29
+ ╰─────────────────────────────────────────────────────────────╯
30
+ `);
31
+ process.exit(1);
32
+ }
33
+ }
34
+
35
+ /**
36
+ * Check if running in interactive terminal (TTY).
37
+ */
38
+ export function isTTY(): boolean {
39
+ return process.stdin.isTTY === true && process.stdout.isTTY === true;
40
+ }
@@ -0,0 +1,32 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { truncateForEmbedding } from "./text.js";
3
+ import { MAX_INPUT_LENGTH } from "../config/constants.js";
4
+
5
+ describe("truncateForEmbedding", () => {
6
+ it("should return text unchanged if within limit", () => {
7
+ const text = "Short text";
8
+ expect(truncateForEmbedding(text)).toBe(text);
9
+ });
10
+
11
+ it("should truncate text exceeding default limit", () => {
12
+ const text = "a".repeat(MAX_INPUT_LENGTH + 100);
13
+ const result = truncateForEmbedding(text);
14
+ expect(result.length).toBe(MAX_INPUT_LENGTH);
15
+ expect(result).toBe("a".repeat(MAX_INPUT_LENGTH));
16
+ });
17
+
18
+ it("should use custom maxLength when provided", () => {
19
+ const text = "Hello World";
20
+ const result = truncateForEmbedding(text, 5);
21
+ expect(result).toBe("Hello");
22
+ });
23
+
24
+ it("should handle empty string", () => {
25
+ expect(truncateForEmbedding("")).toBe("");
26
+ });
27
+
28
+ it("should handle text exactly at limit", () => {
29
+ const text = "a".repeat(MAX_INPUT_LENGTH);
30
+ expect(truncateForEmbedding(text)).toBe(text);
31
+ });
32
+ });
package/CLAUDE.md DELETED
@@ -1,56 +0,0 @@
1
- # CLAUDE.md
2
-
3
- ## Project Overview
4
-
5
- MCP server for Apple Notes with semantic search and CRUD operations.
6
-
7
- ## Tech Stack
8
-
9
- - **Runtime**: Bun
10
- - **Language**: TypeScript
11
- - **Database**: LanceDB (vector store)
12
- - **Embeddings**: HuggingFace Transformers (local) or OpenRouter API
13
- - **Apple Notes**: JXA (JavaScript for Automation)
14
-
15
- ## Commands
16
-
17
- ```bash
18
- bun run start # Start MCP server
19
- bun run setup # Interactive setup wizard
20
- bun run dev # Watch mode
21
- bun run check # Type check
22
- bun run test # Run tests (uses vitest, NOT bun test)
23
- ```
24
-
25
- ## Project Structure
26
-
27
- ```
28
- src/
29
- ├── index.ts # MCP server entry (stdio transport)
30
- ├── server.ts # Smithery-compatible export
31
- ├── setup.ts # Interactive setup wizard
32
- ├── config/ # Constants and env validation
33
- ├── db/ # LanceDB vector store
34
- ├── embeddings/ # Local and OpenRouter embeddings
35
- ├── notes/ # Apple Notes CRUD via JXA
36
- ├── search/ # Hybrid search and indexing
37
- └── utils/ # Debug logging, errors, text utils
38
- ```
39
-
40
- ## Key Patterns
41
-
42
- - **Dual embedding support**: Detects `OPENROUTER_API_KEY` to choose provider
43
- - **Hybrid search**: Combines vector + keyword search with RRF fusion
44
- - **Incremental indexing**: Only re-embeds changed notes
45
- - **Folder/title disambiguation**: Use `Folder/Note Title` format for duplicates
46
-
47
- ## Testing
48
-
49
- Always use `bun run test` (vitest), never `bun test` (incompatible bun runner).
50
-
51
- ## Environment Variables
52
-
53
- See README.md for full list. Key ones:
54
- - `OPENROUTER_API_KEY` - Enables cloud embeddings
55
- - `READONLY_MODE` - Blocks write operations
56
- - `DEBUG` - Enables debug logging