@disco_trooper/apple-notes-mcp 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +136 -24
  2. package/package.json +13 -9
  3. package/src/config/claude.test.ts +47 -0
  4. package/src/config/claude.ts +106 -0
  5. package/src/config/constants.ts +11 -2
  6. package/src/config/paths.test.ts +40 -0
  7. package/src/config/paths.ts +86 -0
  8. package/src/db/arrow-fix.test.ts +101 -0
  9. package/src/db/lancedb.test.ts +209 -2
  10. package/src/db/lancedb.ts +373 -7
  11. package/src/embeddings/cache.test.ts +150 -0
  12. package/src/embeddings/cache.ts +204 -0
  13. package/src/embeddings/index.ts +21 -2
  14. package/src/embeddings/local.ts +61 -10
  15. package/src/embeddings/openrouter.ts +233 -11
  16. package/src/graph/export.test.ts +81 -0
  17. package/src/graph/export.ts +163 -0
  18. package/src/graph/extract.test.ts +90 -0
  19. package/src/graph/extract.ts +52 -0
  20. package/src/graph/queries.test.ts +156 -0
  21. package/src/graph/queries.ts +224 -0
  22. package/src/index.ts +376 -10
  23. package/src/notes/crud.test.ts +148 -3
  24. package/src/notes/crud.ts +250 -5
  25. package/src/notes/read.ts +83 -68
  26. package/src/search/chunk-indexer.test.ts +353 -0
  27. package/src/search/chunk-indexer.ts +254 -0
  28. package/src/search/chunk-search.test.ts +327 -0
  29. package/src/search/chunk-search.ts +298 -0
  30. package/src/search/indexer.ts +151 -109
  31. package/src/search/refresh.test.ts +173 -0
  32. package/src/search/refresh.ts +151 -0
  33. package/src/setup.ts +46 -67
  34. package/src/utils/chunker.test.ts +182 -0
  35. package/src/utils/chunker.ts +170 -0
  36. package/src/utils/content-filter.test.ts +225 -0
  37. package/src/utils/content-filter.ts +275 -0
  38. package/src/utils/runtime.test.ts +70 -0
  39. package/src/utils/runtime.ts +40 -0
@@ -0,0 +1,327 @@
1
+ import { describe, it, expect, vi, beforeEach } from "vitest";
2
+ import {
3
+ rrfScore,
4
+ deduplicateByNote,
5
+ filterByFolder,
6
+ searchChunks,
7
+ type ChunkSearchResult,
8
+ } from "./chunk-search.js";
9
+
10
+ // Mock dependencies
11
+ vi.mock("../embeddings/index.js", () => ({
12
+ getEmbedding: vi.fn().mockResolvedValue([0.1, 0.2, 0.3]),
13
+ }));
14
+
15
+ vi.mock("../db/lancedb.js", () => ({
16
+ getChunkStore: vi.fn(),
17
+ }));
18
+
19
+ import { getChunkStore } from "../db/lancedb.js";
20
+
21
+ describe("rrfScore", () => {
22
+ it("calculates RRF score correctly", () => {
23
+ // RRF formula: 1 / (k + rank) where k = 60
24
+ expect(rrfScore(1)).toBeCloseTo(1 / 61, 5);
25
+ expect(rrfScore(10)).toBeCloseTo(1 / 70, 5);
26
+ });
27
+
28
+ it("returns smaller scores for higher ranks", () => {
29
+ expect(rrfScore(1)).toBeGreaterThan(rrfScore(10));
30
+ expect(rrfScore(10)).toBeGreaterThan(rrfScore(100));
31
+ });
32
+
33
+ it("returns correct score for rank 0", () => {
34
+ expect(rrfScore(0)).toBeCloseTo(1 / 60, 5);
35
+ });
36
+ });
37
+
38
+ describe("deduplicateByNote", () => {
39
+ it("keeps best-scoring chunk per note", () => {
40
+ const chunks: ChunkSearchResult[] = [
41
+ {
42
+ note_id: "note1",
43
+ note_title: "Note 1",
44
+ folder: "Work",
45
+ matchedChunk: "chunk 1",
46
+ matchedChunkIndex: 0,
47
+ score: 0.8,
48
+ modified: "2024-01-01",
49
+ },
50
+ {
51
+ note_id: "note1",
52
+ note_title: "Note 1",
53
+ folder: "Work",
54
+ matchedChunk: "chunk 2",
55
+ matchedChunkIndex: 1,
56
+ score: 0.9, // Higher score
57
+ modified: "2024-01-01",
58
+ },
59
+ {
60
+ note_id: "note2",
61
+ note_title: "Note 2",
62
+ folder: "Personal",
63
+ matchedChunk: "chunk 1",
64
+ matchedChunkIndex: 0,
65
+ score: 0.7,
66
+ modified: "2024-01-02",
67
+ },
68
+ ];
69
+
70
+ const result = deduplicateByNote(chunks);
71
+
72
+ expect(result).toHaveLength(2);
73
+ // note1 should have the higher scoring chunk (0.9)
74
+ const note1 = result.find((r) => r.note_id === "note1");
75
+ expect(note1?.score).toBe(0.9);
76
+ expect(note1?.matchedChunkIndex).toBe(1);
77
+ });
78
+
79
+ it("sorts by score descending", () => {
80
+ const chunks: ChunkSearchResult[] = [
81
+ {
82
+ note_id: "note1",
83
+ note_title: "Note 1",
84
+ folder: "Work",
85
+ matchedChunk: "chunk 1",
86
+ matchedChunkIndex: 0,
87
+ score: 0.5,
88
+ modified: "2024-01-01",
89
+ },
90
+ {
91
+ note_id: "note2",
92
+ note_title: "Note 2",
93
+ folder: "Personal",
94
+ matchedChunk: "chunk 1",
95
+ matchedChunkIndex: 0,
96
+ score: 0.9,
97
+ modified: "2024-01-02",
98
+ },
99
+ {
100
+ note_id: "note3",
101
+ note_title: "Note 3",
102
+ folder: "Work",
103
+ matchedChunk: "chunk 1",
104
+ matchedChunkIndex: 0,
105
+ score: 0.7,
106
+ modified: "2024-01-03",
107
+ },
108
+ ];
109
+
110
+ const result = deduplicateByNote(chunks);
111
+
112
+ expect(result).toHaveLength(3);
113
+ expect(result[0].score).toBe(0.9);
114
+ expect(result[1].score).toBe(0.7);
115
+ expect(result[2].score).toBe(0.5);
116
+ });
117
+
118
+ it("handles empty array", () => {
119
+ const result = deduplicateByNote([]);
120
+ expect(result).toHaveLength(0);
121
+ });
122
+ });
123
+
124
+ describe("filterByFolder", () => {
125
+ const mockChunks: ChunkSearchResult[] = [
126
+ {
127
+ note_id: "note1",
128
+ note_title: "Note 1",
129
+ folder: "Work",
130
+ matchedChunk: "content",
131
+ matchedChunkIndex: 0,
132
+ score: 1,
133
+ modified: "2024-01-01",
134
+ },
135
+ {
136
+ note_id: "note2",
137
+ note_title: "Note 2",
138
+ folder: "Personal",
139
+ matchedChunk: "content",
140
+ matchedChunkIndex: 0,
141
+ score: 0.9,
142
+ modified: "2024-01-01",
143
+ },
144
+ {
145
+ note_id: "note3",
146
+ note_title: "Note 3",
147
+ folder: "Work/Projects",
148
+ matchedChunk: "content",
149
+ matchedChunkIndex: 0,
150
+ score: 0.8,
151
+ modified: "2024-01-01",
152
+ },
153
+ ];
154
+
155
+ it("filters by exact folder name (case insensitive)", () => {
156
+ const filtered = filterByFolder(mockChunks, "work");
157
+ expect(filtered).toHaveLength(1);
158
+ expect(filtered[0].note_title).toBe("Note 1");
159
+ });
160
+
161
+ it("returns all results when folder is undefined", () => {
162
+ const filtered = filterByFolder(mockChunks, undefined);
163
+ expect(filtered).toHaveLength(3);
164
+ });
165
+
166
+ it("returns empty array when no matches", () => {
167
+ const filtered = filterByFolder(mockChunks, "NonExistent");
168
+ expect(filtered).toHaveLength(0);
169
+ });
170
+ });
171
+
172
+ describe("searchChunks", () => {
173
+ beforeEach(() => {
174
+ vi.clearAllMocks();
175
+ });
176
+
177
+ it("handles empty query", async () => {
178
+ const result = await searchChunks("");
179
+ expect(result).toHaveLength(0);
180
+ });
181
+
182
+ it("handles whitespace-only query", async () => {
183
+ const result = await searchChunks(" ");
184
+ expect(result).toHaveLength(0);
185
+ });
186
+
187
+ it("deduplicates results by note", async () => {
188
+ const mockStore = {
189
+ searchChunks: vi.fn().mockResolvedValue([
190
+ {
191
+ chunk_id: "note1_chunk_0",
192
+ note_id: "note1",
193
+ note_title: "Note 1",
194
+ folder: "Work",
195
+ chunk_index: 0,
196
+ total_chunks: 2,
197
+ content: "first chunk",
198
+ modified: "2024-01-01",
199
+ score: 0.8,
200
+ },
201
+ {
202
+ chunk_id: "note1_chunk_1",
203
+ note_id: "note1",
204
+ note_title: "Note 1",
205
+ folder: "Work",
206
+ chunk_index: 1,
207
+ total_chunks: 2,
208
+ content: "second chunk",
209
+ modified: "2024-01-01",
210
+ score: 0.9,
211
+ },
212
+ {
213
+ chunk_id: "note2_chunk_0",
214
+ note_id: "note2",
215
+ note_title: "Note 2",
216
+ folder: "Personal",
217
+ chunk_index: 0,
218
+ total_chunks: 1,
219
+ content: "only chunk",
220
+ modified: "2024-01-02",
221
+ score: 0.7,
222
+ },
223
+ ]),
224
+ searchChunksFTS: vi.fn().mockResolvedValue([]),
225
+ };
226
+
227
+ vi.mocked(getChunkStore).mockReturnValue(mockStore as unknown as ReturnType<typeof getChunkStore>);
228
+
229
+ const results = await searchChunks("test query", { mode: "semantic" });
230
+
231
+ // Should have 2 unique notes, not 3 chunks
232
+ expect(results).toHaveLength(2);
233
+ // note1 should have the higher scoring chunk (index 1, score 0.9)
234
+ const note1 = results.find((r) => r.note_id === "note1");
235
+ expect(note1?.matchedChunkIndex).toBe(1);
236
+ });
237
+
238
+ it("applies folder filter", async () => {
239
+ const mockStore = {
240
+ searchChunks: vi.fn().mockResolvedValue([
241
+ {
242
+ chunk_id: "note1_chunk_0",
243
+ note_id: "note1",
244
+ note_title: "Note 1",
245
+ folder: "Work",
246
+ chunk_index: 0,
247
+ total_chunks: 1,
248
+ content: "work content",
249
+ modified: "2024-01-01",
250
+ score: 0.9,
251
+ },
252
+ {
253
+ chunk_id: "note2_chunk_0",
254
+ note_id: "note2",
255
+ note_title: "Note 2",
256
+ folder: "Personal",
257
+ chunk_index: 0,
258
+ total_chunks: 1,
259
+ content: "personal content",
260
+ modified: "2024-01-02",
261
+ score: 0.8,
262
+ },
263
+ ]),
264
+ searchChunksFTS: vi.fn().mockResolvedValue([]),
265
+ };
266
+
267
+ vi.mocked(getChunkStore).mockReturnValue(mockStore as unknown as ReturnType<typeof getChunkStore>);
268
+
269
+ const results = await searchChunks("test query", {
270
+ mode: "semantic",
271
+ folder: "Work",
272
+ });
273
+
274
+ expect(results).toHaveLength(1);
275
+ expect(results[0].folder).toBe("Work");
276
+ });
277
+
278
+ it("respects limit option", async () => {
279
+ const mockStore = {
280
+ searchChunks: vi.fn().mockResolvedValue([
281
+ {
282
+ chunk_id: "note1_chunk_0",
283
+ note_id: "note1",
284
+ note_title: "Note 1",
285
+ folder: "Work",
286
+ chunk_index: 0,
287
+ total_chunks: 1,
288
+ content: "content 1",
289
+ modified: "2024-01-01",
290
+ score: 0.9,
291
+ },
292
+ {
293
+ chunk_id: "note2_chunk_0",
294
+ note_id: "note2",
295
+ note_title: "Note 2",
296
+ folder: "Work",
297
+ chunk_index: 0,
298
+ total_chunks: 1,
299
+ content: "content 2",
300
+ modified: "2024-01-02",
301
+ score: 0.8,
302
+ },
303
+ {
304
+ chunk_id: "note3_chunk_0",
305
+ note_id: "note3",
306
+ note_title: "Note 3",
307
+ folder: "Work",
308
+ chunk_index: 0,
309
+ total_chunks: 1,
310
+ content: "content 3",
311
+ modified: "2024-01-03",
312
+ score: 0.7,
313
+ },
314
+ ]),
315
+ searchChunksFTS: vi.fn().mockResolvedValue([]),
316
+ };
317
+
318
+ vi.mocked(getChunkStore).mockReturnValue(mockStore as unknown as ReturnType<typeof getChunkStore>);
319
+
320
+ const results = await searchChunks("test query", {
321
+ mode: "semantic",
322
+ limit: 2,
323
+ });
324
+
325
+ expect(results).toHaveLength(2);
326
+ });
327
+ });
@@ -0,0 +1,298 @@
1
+ /**
2
+ * Chunk-based search for Parent Document Retriever pattern.
3
+ *
4
+ * Searches individual chunks but returns results deduplicated by note,
5
+ * showing the best-matching chunk for each note.
6
+ *
7
+ * Uses Reciprocal Rank Fusion (RRF) to merge results from:
8
+ * - Vector search (semantic similarity)
9
+ * - Full-text search (keyword matching)
10
+ */
11
+
12
+ import { getEmbedding } from "../embeddings/index.js";
13
+ import { getEmbeddingCache } from "../embeddings/cache.js";
14
+ import { getChunkStore, type ChunkSearchResult as DBChunkSearchResult } from "../db/lancedb.js";
15
+ import {
16
+ DEFAULT_SEARCH_LIMIT,
17
+ HYBRID_SEARCH_MIN_FETCH,
18
+ RRF_K,
19
+ } from "../config/constants.js";
20
+ import { createDebugLogger } from "../utils/debug.js";
21
+
22
+ // Debug logging
23
+ const debug = createDebugLogger("CHUNK_SEARCH");
24
+
25
+ /**
26
+ * Options for chunk search operations.
27
+ */
28
+ export interface ChunkSearchOptions {
29
+ /** Filter by folder name */
30
+ folder?: string;
31
+ /** Maximum number of results (default: 20) */
32
+ limit?: number;
33
+ /** Search mode: hybrid, keyword, or semantic (default: hybrid) */
34
+ mode?: "hybrid" | "keyword" | "semantic";
35
+ }
36
+
37
+ /**
38
+ * Search result for chunk-based search.
39
+ * Returns one result per note with the best-matching chunk.
40
+ */
41
+ export interface ChunkSearchResult {
42
+ /** Apple Notes unique identifier */
43
+ note_id: string;
44
+ /** Note title */
45
+ note_title: string;
46
+ /** Folder containing the note */
47
+ folder: string;
48
+ /** The best-matching chunk content */
49
+ matchedChunk: string;
50
+ /** Index of the matched chunk within the note */
51
+ matchedChunkIndex: number;
52
+ /** Relevance score (higher = more relevant) */
53
+ score: number;
54
+ /** Last modified date (ISO string) */
55
+ modified: string;
56
+ }
57
+
58
+ /**
59
+ * Calculate RRF score for a result at a given rank.
60
+ * Formula: 1 / (k + rank)
61
+ * where k is a constant (typically 60) and rank is 0-indexed.
62
+ */
63
+ export function rrfScore(rank: number): number {
64
+ return 1 / (RRF_K + rank);
65
+ }
66
+
67
+ /**
68
+ * Filter results by folder if specified.
69
+ * Case-insensitive folder matching.
70
+ */
71
+ export function filterByFolder(
72
+ chunks: ChunkSearchResult[],
73
+ folder?: string
74
+ ): ChunkSearchResult[] {
75
+ if (!folder) {
76
+ return chunks;
77
+ }
78
+
79
+ const normalizedFolder = folder.toLowerCase();
80
+ return chunks.filter(
81
+ (r) => r.folder.toLowerCase() === normalizedFolder
82
+ );
83
+ }
84
+
85
+ /**
86
+ * Deduplicate chunks by note_id, keeping only the best-scoring chunk for each note.
87
+ * Returns results sorted by score in descending order.
88
+ */
89
+ export function deduplicateByNote(
90
+ chunks: ChunkSearchResult[]
91
+ ): ChunkSearchResult[] {
92
+ // Group by note_id, keeping the highest scoring chunk
93
+ const bestByNote = new Map<string, ChunkSearchResult>();
94
+
95
+ for (const chunk of chunks) {
96
+ const existing = bestByNote.get(chunk.note_id);
97
+ if (!existing || chunk.score > existing.score) {
98
+ bestByNote.set(chunk.note_id, chunk);
99
+ }
100
+ }
101
+
102
+ // Convert to array and sort by score descending
103
+ return Array.from(bestByNote.values()).sort((a, b) => b.score - a.score);
104
+ }
105
+
106
+ /**
107
+ * Convert DB chunk result to ChunkSearchResult format.
108
+ */
109
+ function toChunkSearchResult(
110
+ dbResult: DBChunkSearchResult
111
+ ): ChunkSearchResult {
112
+ return {
113
+ note_id: dbResult.note_id,
114
+ note_title: dbResult.note_title,
115
+ folder: dbResult.folder,
116
+ matchedChunk: dbResult.content,
117
+ matchedChunkIndex: dbResult.chunk_index,
118
+ score: dbResult.score,
119
+ modified: dbResult.modified,
120
+ };
121
+ }
122
+
123
+ /**
124
+ * Get cached or compute embedding for query.
125
+ */
126
+ async function getCachedQueryEmbedding(query: string): Promise<number[]> {
127
+ const cache = getEmbeddingCache();
128
+ return cache.getOrCompute(query, getEmbedding);
129
+ }
130
+
131
+ /**
132
+ * Perform vector-only search on chunks.
133
+ */
134
+ async function vectorSearch(
135
+ query: string,
136
+ limit: number
137
+ ): Promise<ChunkSearchResult[]> {
138
+ debug(`Vector search: "${query}" (limit: ${limit})`);
139
+
140
+ const store = getChunkStore();
141
+ const queryVector = await getCachedQueryEmbedding(query);
142
+
143
+ const results = await store.searchChunks(queryVector, limit);
144
+ return results.map(toChunkSearchResult);
145
+ }
146
+
147
+ /**
148
+ * Perform full-text search only on chunks.
149
+ */
150
+ async function keywordSearch(
151
+ query: string,
152
+ limit: number
153
+ ): Promise<ChunkSearchResult[]> {
154
+ debug(`FTS search: "${query}" (limit: ${limit})`);
155
+
156
+ const store = getChunkStore();
157
+ const results = await store.searchChunksFTS(query, limit);
158
+ return results.map(toChunkSearchResult);
159
+ }
160
+
161
+ /**
162
+ * Perform hybrid search combining vector and FTS results using RRF.
163
+ */
164
+ async function hybridSearch(
165
+ query: string,
166
+ limit: number
167
+ ): Promise<ChunkSearchResult[]> {
168
+ debug(`Hybrid search: "${query}" (limit: ${limit})`);
169
+
170
+ const store = getChunkStore();
171
+
172
+ // Fetch more results for RRF merging
173
+ const fetchLimit = Math.max(limit * 2, HYBRID_SEARCH_MIN_FETCH);
174
+
175
+ // Run both searches in parallel (use cached embedding)
176
+ const [queryVector, ftsResults] = await Promise.all([
177
+ getCachedQueryEmbedding(query),
178
+ store.searchChunksFTS(query, fetchLimit).catch(() => [] as DBChunkSearchResult[]),
179
+ ]);
180
+
181
+ const vectorResults = await store.searchChunks(queryVector, fetchLimit);
182
+
183
+ debug(`Vector results: ${vectorResults.length}, FTS results: ${ftsResults.length}`);
184
+
185
+ // Merge results using Reciprocal Rank Fusion
186
+ // Use chunk_id as key since we want to combine scores for the same chunk
187
+ const scoreMap = new Map<string, number>();
188
+ const contentMap = new Map<string, DBChunkSearchResult>();
189
+
190
+ // Process vector search results
191
+ vectorResults.forEach((item, rank) => {
192
+ const key = item.chunk_id;
193
+ scoreMap.set(key, (scoreMap.get(key) || 0) + rrfScore(rank));
194
+ contentMap.set(key, item);
195
+ });
196
+
197
+ // Process FTS results
198
+ ftsResults.forEach((item, rank) => {
199
+ const key = item.chunk_id;
200
+ scoreMap.set(key, (scoreMap.get(key) || 0) + rrfScore(rank));
201
+ if (!contentMap.has(key)) {
202
+ contentMap.set(key, item);
203
+ }
204
+ });
205
+
206
+ // Sort by combined RRF score and convert to ChunkSearchResult
207
+ const merged = Array.from(scoreMap.entries())
208
+ .sort((a, b) => b[1] - a[1])
209
+ .map(([key, score]) => {
210
+ const dbResult = contentMap.get(key)!;
211
+ return toChunkSearchResult({ ...dbResult, score });
212
+ });
213
+
214
+ return merged;
215
+ }
216
+
217
+ /**
218
+ * Search notes using chunk-based approach.
219
+ *
220
+ * Searches individual chunks for better relevance, then deduplicates
221
+ * by note to return one result per note with the best-matching chunk.
222
+ *
223
+ * @param query - Search query string
224
+ * @param options - Search configuration options
225
+ * @returns Array of search results sorted by relevance, one per note
226
+ *
227
+ * @example
228
+ * ```typescript
229
+ * // Basic hybrid search
230
+ * const results = await searchChunks("project ideas");
231
+ *
232
+ * // Keyword-only search in specific folder
233
+ * const results = await searchChunks("meeting", {
234
+ * mode: "keyword",
235
+ * folder: "Work",
236
+ * limit: 10,
237
+ * });
238
+ *
239
+ * // Semantic search
240
+ * const results = await searchChunks("concepts similar to machine learning", {
241
+ * mode: "semantic",
242
+ * });
243
+ * ```
244
+ */
245
+ export async function searchChunks(
246
+ query: string,
247
+ options: ChunkSearchOptions = {}
248
+ ): Promise<ChunkSearchResult[]> {
249
+ const {
250
+ folder,
251
+ limit = DEFAULT_SEARCH_LIMIT,
252
+ mode = "hybrid",
253
+ } = options;
254
+
255
+ if (!query || query.trim().length === 0) {
256
+ debug("Empty query, returning empty results");
257
+ return [];
258
+ }
259
+
260
+ const trimmedQuery = query.trim();
261
+
262
+ debug(`searchChunks: "${trimmedQuery}" mode=${mode} folder=${folder || "all"} limit=${limit}`);
263
+
264
+ // Fetch more results than needed because:
265
+ // 1. Deduplication may reduce count
266
+ // 2. Folder filtering may reduce count
267
+ const fetchMultiplier = folder ? 3 : 2;
268
+ const fetchLimit = Math.max(limit * fetchMultiplier, HYBRID_SEARCH_MIN_FETCH);
269
+
270
+ let rawResults: ChunkSearchResult[];
271
+
272
+ switch (mode) {
273
+ case "keyword":
274
+ rawResults = await keywordSearch(trimmedQuery, fetchLimit);
275
+ break;
276
+
277
+ case "semantic":
278
+ rawResults = await vectorSearch(trimmedQuery, fetchLimit);
279
+ break;
280
+
281
+ case "hybrid":
282
+ default:
283
+ rawResults = await hybridSearch(trimmedQuery, fetchLimit);
284
+ break;
285
+ }
286
+
287
+ // Apply folder filter
288
+ const filtered = filterByFolder(rawResults, folder);
289
+
290
+ // Deduplicate by note (keep best chunk per note)
291
+ const deduplicated = deduplicateByNote(filtered);
292
+
293
+ // Apply limit
294
+ const results = deduplicated.slice(0, limit);
295
+
296
+ debug(`Returning ${results.length} results (from ${rawResults.length} chunks)`);
297
+ return results;
298
+ }