@disco_trooper/apple-notes-mcp 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +104 -24
  2. package/package.json +11 -12
  3. package/src/config/claude.test.ts +47 -0
  4. package/src/config/claude.ts +106 -0
  5. package/src/config/constants.ts +11 -2
  6. package/src/config/paths.test.ts +40 -0
  7. package/src/config/paths.ts +86 -0
  8. package/src/db/arrow-fix.test.ts +101 -0
  9. package/src/db/lancedb.test.ts +254 -2
  10. package/src/db/lancedb.ts +385 -38
  11. package/src/embeddings/cache.test.ts +150 -0
  12. package/src/embeddings/cache.ts +204 -0
  13. package/src/embeddings/index.ts +22 -4
  14. package/src/embeddings/local.ts +57 -17
  15. package/src/embeddings/openrouter.ts +233 -11
  16. package/src/errors/index.test.ts +64 -0
  17. package/src/errors/index.ts +62 -0
  18. package/src/graph/export.test.ts +81 -0
  19. package/src/graph/export.ts +163 -0
  20. package/src/graph/extract.test.ts +90 -0
  21. package/src/graph/extract.ts +52 -0
  22. package/src/graph/queries.test.ts +156 -0
  23. package/src/graph/queries.ts +224 -0
  24. package/src/index.ts +309 -23
  25. package/src/notes/conversion.ts +62 -0
  26. package/src/notes/crud.test.ts +41 -8
  27. package/src/notes/crud.ts +75 -64
  28. package/src/notes/read.test.ts +58 -3
  29. package/src/notes/read.ts +142 -210
  30. package/src/notes/resolve.ts +174 -0
  31. package/src/notes/tables.ts +69 -40
  32. package/src/search/chunk-indexer.test.ts +353 -0
  33. package/src/search/chunk-indexer.ts +207 -0
  34. package/src/search/chunk-search.test.ts +327 -0
  35. package/src/search/chunk-search.ts +298 -0
  36. package/src/search/index.ts +4 -6
  37. package/src/search/indexer.ts +164 -109
  38. package/src/setup.ts +46 -67
  39. package/src/types/index.ts +4 -0
  40. package/src/utils/chunker.test.ts +182 -0
  41. package/src/utils/chunker.ts +170 -0
  42. package/src/utils/content-filter.test.ts +225 -0
  43. package/src/utils/content-filter.ts +275 -0
  44. package/src/utils/debug.ts +0 -2
  45. package/src/utils/runtime.test.ts +70 -0
  46. package/src/utils/runtime.ts +40 -0
  47. package/src/utils/text.test.ts +32 -0
  48. package/CLAUDE.md +0 -56
  49. package/src/server.ts +0 -427
@@ -0,0 +1,327 @@
1
+ import { describe, it, expect, vi, beforeEach } from "vitest";
2
+ import {
3
+ rrfScore,
4
+ deduplicateByNote,
5
+ filterByFolder,
6
+ searchChunks,
7
+ type ChunkSearchResult,
8
+ } from "./chunk-search.js";
9
+
10
+ // Mock dependencies
11
+ vi.mock("../embeddings/index.js", () => ({
12
+ getEmbedding: vi.fn().mockResolvedValue([0.1, 0.2, 0.3]),
13
+ }));
14
+
15
+ vi.mock("../db/lancedb.js", () => ({
16
+ getChunkStore: vi.fn(),
17
+ }));
18
+
19
+ import { getChunkStore } from "../db/lancedb.js";
20
+
21
+ describe("rrfScore", () => {
22
+ it("calculates RRF score correctly", () => {
23
+ // RRF formula: 1 / (k + rank) where k = 60
24
+ expect(rrfScore(1)).toBeCloseTo(1 / 61, 5);
25
+ expect(rrfScore(10)).toBeCloseTo(1 / 70, 5);
26
+ });
27
+
28
+ it("returns smaller scores for higher ranks", () => {
29
+ expect(rrfScore(1)).toBeGreaterThan(rrfScore(10));
30
+ expect(rrfScore(10)).toBeGreaterThan(rrfScore(100));
31
+ });
32
+
33
+ it("returns correct score for rank 0", () => {
34
+ expect(rrfScore(0)).toBeCloseTo(1 / 60, 5);
35
+ });
36
+ });
37
+
38
+ describe("deduplicateByNote", () => {
39
+ it("keeps best-scoring chunk per note", () => {
40
+ const chunks: ChunkSearchResult[] = [
41
+ {
42
+ note_id: "note1",
43
+ note_title: "Note 1",
44
+ folder: "Work",
45
+ matchedChunk: "chunk 1",
46
+ matchedChunkIndex: 0,
47
+ score: 0.8,
48
+ modified: "2024-01-01",
49
+ },
50
+ {
51
+ note_id: "note1",
52
+ note_title: "Note 1",
53
+ folder: "Work",
54
+ matchedChunk: "chunk 2",
55
+ matchedChunkIndex: 1,
56
+ score: 0.9, // Higher score
57
+ modified: "2024-01-01",
58
+ },
59
+ {
60
+ note_id: "note2",
61
+ note_title: "Note 2",
62
+ folder: "Personal",
63
+ matchedChunk: "chunk 1",
64
+ matchedChunkIndex: 0,
65
+ score: 0.7,
66
+ modified: "2024-01-02",
67
+ },
68
+ ];
69
+
70
+ const result = deduplicateByNote(chunks);
71
+
72
+ expect(result).toHaveLength(2);
73
+ // note1 should have the higher scoring chunk (0.9)
74
+ const note1 = result.find((r) => r.note_id === "note1");
75
+ expect(note1?.score).toBe(0.9);
76
+ expect(note1?.matchedChunkIndex).toBe(1);
77
+ });
78
+
79
+ it("sorts by score descending", () => {
80
+ const chunks: ChunkSearchResult[] = [
81
+ {
82
+ note_id: "note1",
83
+ note_title: "Note 1",
84
+ folder: "Work",
85
+ matchedChunk: "chunk 1",
86
+ matchedChunkIndex: 0,
87
+ score: 0.5,
88
+ modified: "2024-01-01",
89
+ },
90
+ {
91
+ note_id: "note2",
92
+ note_title: "Note 2",
93
+ folder: "Personal",
94
+ matchedChunk: "chunk 1",
95
+ matchedChunkIndex: 0,
96
+ score: 0.9,
97
+ modified: "2024-01-02",
98
+ },
99
+ {
100
+ note_id: "note3",
101
+ note_title: "Note 3",
102
+ folder: "Work",
103
+ matchedChunk: "chunk 1",
104
+ matchedChunkIndex: 0,
105
+ score: 0.7,
106
+ modified: "2024-01-03",
107
+ },
108
+ ];
109
+
110
+ const result = deduplicateByNote(chunks);
111
+
112
+ expect(result).toHaveLength(3);
113
+ expect(result[0].score).toBe(0.9);
114
+ expect(result[1].score).toBe(0.7);
115
+ expect(result[2].score).toBe(0.5);
116
+ });
117
+
118
+ it("handles empty array", () => {
119
+ const result = deduplicateByNote([]);
120
+ expect(result).toHaveLength(0);
121
+ });
122
+ });
123
+
124
+ describe("filterByFolder", () => {
125
+ const mockChunks: ChunkSearchResult[] = [
126
+ {
127
+ note_id: "note1",
128
+ note_title: "Note 1",
129
+ folder: "Work",
130
+ matchedChunk: "content",
131
+ matchedChunkIndex: 0,
132
+ score: 1,
133
+ modified: "2024-01-01",
134
+ },
135
+ {
136
+ note_id: "note2",
137
+ note_title: "Note 2",
138
+ folder: "Personal",
139
+ matchedChunk: "content",
140
+ matchedChunkIndex: 0,
141
+ score: 0.9,
142
+ modified: "2024-01-01",
143
+ },
144
+ {
145
+ note_id: "note3",
146
+ note_title: "Note 3",
147
+ folder: "Work/Projects",
148
+ matchedChunk: "content",
149
+ matchedChunkIndex: 0,
150
+ score: 0.8,
151
+ modified: "2024-01-01",
152
+ },
153
+ ];
154
+
155
+ it("filters by exact folder name (case insensitive)", () => {
156
+ const filtered = filterByFolder(mockChunks, "work");
157
+ expect(filtered).toHaveLength(1);
158
+ expect(filtered[0].note_title).toBe("Note 1");
159
+ });
160
+
161
+ it("returns all results when folder is undefined", () => {
162
+ const filtered = filterByFolder(mockChunks, undefined);
163
+ expect(filtered).toHaveLength(3);
164
+ });
165
+
166
+ it("returns empty array when no matches", () => {
167
+ const filtered = filterByFolder(mockChunks, "NonExistent");
168
+ expect(filtered).toHaveLength(0);
169
+ });
170
+ });
171
+
172
+ describe("searchChunks", () => {
173
+ beforeEach(() => {
174
+ vi.clearAllMocks();
175
+ });
176
+
177
+ it("handles empty query", async () => {
178
+ const result = await searchChunks("");
179
+ expect(result).toHaveLength(0);
180
+ });
181
+
182
+ it("handles whitespace-only query", async () => {
183
+ const result = await searchChunks(" ");
184
+ expect(result).toHaveLength(0);
185
+ });
186
+
187
+ it("deduplicates results by note", async () => {
188
+ const mockStore = {
189
+ searchChunks: vi.fn().mockResolvedValue([
190
+ {
191
+ chunk_id: "note1_chunk_0",
192
+ note_id: "note1",
193
+ note_title: "Note 1",
194
+ folder: "Work",
195
+ chunk_index: 0,
196
+ total_chunks: 2,
197
+ content: "first chunk",
198
+ modified: "2024-01-01",
199
+ score: 0.8,
200
+ },
201
+ {
202
+ chunk_id: "note1_chunk_1",
203
+ note_id: "note1",
204
+ note_title: "Note 1",
205
+ folder: "Work",
206
+ chunk_index: 1,
207
+ total_chunks: 2,
208
+ content: "second chunk",
209
+ modified: "2024-01-01",
210
+ score: 0.9,
211
+ },
212
+ {
213
+ chunk_id: "note2_chunk_0",
214
+ note_id: "note2",
215
+ note_title: "Note 2",
216
+ folder: "Personal",
217
+ chunk_index: 0,
218
+ total_chunks: 1,
219
+ content: "only chunk",
220
+ modified: "2024-01-02",
221
+ score: 0.7,
222
+ },
223
+ ]),
224
+ searchChunksFTS: vi.fn().mockResolvedValue([]),
225
+ };
226
+
227
+ vi.mocked(getChunkStore).mockReturnValue(mockStore as unknown as ReturnType<typeof getChunkStore>);
228
+
229
+ const results = await searchChunks("test query", { mode: "semantic" });
230
+
231
+ // Should have 2 unique notes, not 3 chunks
232
+ expect(results).toHaveLength(2);
233
+ // note1 should have the higher scoring chunk (index 1, score 0.9)
234
+ const note1 = results.find((r) => r.note_id === "note1");
235
+ expect(note1?.matchedChunkIndex).toBe(1);
236
+ });
237
+
238
+ it("applies folder filter", async () => {
239
+ const mockStore = {
240
+ searchChunks: vi.fn().mockResolvedValue([
241
+ {
242
+ chunk_id: "note1_chunk_0",
243
+ note_id: "note1",
244
+ note_title: "Note 1",
245
+ folder: "Work",
246
+ chunk_index: 0,
247
+ total_chunks: 1,
248
+ content: "work content",
249
+ modified: "2024-01-01",
250
+ score: 0.9,
251
+ },
252
+ {
253
+ chunk_id: "note2_chunk_0",
254
+ note_id: "note2",
255
+ note_title: "Note 2",
256
+ folder: "Personal",
257
+ chunk_index: 0,
258
+ total_chunks: 1,
259
+ content: "personal content",
260
+ modified: "2024-01-02",
261
+ score: 0.8,
262
+ },
263
+ ]),
264
+ searchChunksFTS: vi.fn().mockResolvedValue([]),
265
+ };
266
+
267
+ vi.mocked(getChunkStore).mockReturnValue(mockStore as unknown as ReturnType<typeof getChunkStore>);
268
+
269
+ const results = await searchChunks("test query", {
270
+ mode: "semantic",
271
+ folder: "Work",
272
+ });
273
+
274
+ expect(results).toHaveLength(1);
275
+ expect(results[0].folder).toBe("Work");
276
+ });
277
+
278
+ it("respects limit option", async () => {
279
+ const mockStore = {
280
+ searchChunks: vi.fn().mockResolvedValue([
281
+ {
282
+ chunk_id: "note1_chunk_0",
283
+ note_id: "note1",
284
+ note_title: "Note 1",
285
+ folder: "Work",
286
+ chunk_index: 0,
287
+ total_chunks: 1,
288
+ content: "content 1",
289
+ modified: "2024-01-01",
290
+ score: 0.9,
291
+ },
292
+ {
293
+ chunk_id: "note2_chunk_0",
294
+ note_id: "note2",
295
+ note_title: "Note 2",
296
+ folder: "Work",
297
+ chunk_index: 0,
298
+ total_chunks: 1,
299
+ content: "content 2",
300
+ modified: "2024-01-02",
301
+ score: 0.8,
302
+ },
303
+ {
304
+ chunk_id: "note3_chunk_0",
305
+ note_id: "note3",
306
+ note_title: "Note 3",
307
+ folder: "Work",
308
+ chunk_index: 0,
309
+ total_chunks: 1,
310
+ content: "content 3",
311
+ modified: "2024-01-03",
312
+ score: 0.7,
313
+ },
314
+ ]),
315
+ searchChunksFTS: vi.fn().mockResolvedValue([]),
316
+ };
317
+
318
+ vi.mocked(getChunkStore).mockReturnValue(mockStore as unknown as ReturnType<typeof getChunkStore>);
319
+
320
+ const results = await searchChunks("test query", {
321
+ mode: "semantic",
322
+ limit: 2,
323
+ });
324
+
325
+ expect(results).toHaveLength(2);
326
+ });
327
+ });
@@ -0,0 +1,298 @@
1
+ /**
2
+ * Chunk-based search for Parent Document Retriever pattern.
3
+ *
4
+ * Searches individual chunks but returns results deduplicated by note,
5
+ * showing the best-matching chunk for each note.
6
+ *
7
+ * Uses Reciprocal Rank Fusion (RRF) to merge results from:
8
+ * - Vector search (semantic similarity)
9
+ * - Full-text search (keyword matching)
10
+ */
11
+
12
+ import { getEmbedding } from "../embeddings/index.js";
13
+ import { getEmbeddingCache } from "../embeddings/cache.js";
14
+ import { getChunkStore, type ChunkSearchResult as DBChunkSearchResult } from "../db/lancedb.js";
15
+ import {
16
+ DEFAULT_SEARCH_LIMIT,
17
+ HYBRID_SEARCH_MIN_FETCH,
18
+ RRF_K,
19
+ } from "../config/constants.js";
20
+ import { createDebugLogger } from "../utils/debug.js";
21
+
22
+ // Debug logging
23
+ const debug = createDebugLogger("CHUNK_SEARCH");
24
+
25
+ /**
26
+ * Options for chunk search operations.
27
+ */
28
+ export interface ChunkSearchOptions {
29
+ /** Filter by folder name */
30
+ folder?: string;
31
+ /** Maximum number of results (default: 20) */
32
+ limit?: number;
33
+ /** Search mode: hybrid, keyword, or semantic (default: hybrid) */
34
+ mode?: "hybrid" | "keyword" | "semantic";
35
+ }
36
+
37
+ /**
38
+ * Search result for chunk-based search.
39
+ * Returns one result per note with the best-matching chunk.
40
+ */
41
+ export interface ChunkSearchResult {
42
+ /** Apple Notes unique identifier */
43
+ note_id: string;
44
+ /** Note title */
45
+ note_title: string;
46
+ /** Folder containing the note */
47
+ folder: string;
48
+ /** The best-matching chunk content */
49
+ matchedChunk: string;
50
+ /** Index of the matched chunk within the note */
51
+ matchedChunkIndex: number;
52
+ /** Relevance score (higher = more relevant) */
53
+ score: number;
54
+ /** Last modified date (ISO string) */
55
+ modified: string;
56
+ }
57
+
58
+ /**
59
+ * Calculate RRF score for a result at a given rank.
60
+ * Formula: 1 / (k + rank)
61
+ * where k is a constant (typically 60) and rank is 0-indexed.
62
+ */
63
+ export function rrfScore(rank: number): number {
64
+ return 1 / (RRF_K + rank);
65
+ }
66
+
67
+ /**
68
+ * Filter results by folder if specified.
69
+ * Case-insensitive folder matching.
70
+ */
71
+ export function filterByFolder(
72
+ chunks: ChunkSearchResult[],
73
+ folder?: string
74
+ ): ChunkSearchResult[] {
75
+ if (!folder) {
76
+ return chunks;
77
+ }
78
+
79
+ const normalizedFolder = folder.toLowerCase();
80
+ return chunks.filter(
81
+ (r) => r.folder.toLowerCase() === normalizedFolder
82
+ );
83
+ }
84
+
85
+ /**
86
+ * Deduplicate chunks by note_id, keeping only the best-scoring chunk for each note.
87
+ * Returns results sorted by score in descending order.
88
+ */
89
+ export function deduplicateByNote(
90
+ chunks: ChunkSearchResult[]
91
+ ): ChunkSearchResult[] {
92
+ // Group by note_id, keeping the highest scoring chunk
93
+ const bestByNote = new Map<string, ChunkSearchResult>();
94
+
95
+ for (const chunk of chunks) {
96
+ const existing = bestByNote.get(chunk.note_id);
97
+ if (!existing || chunk.score > existing.score) {
98
+ bestByNote.set(chunk.note_id, chunk);
99
+ }
100
+ }
101
+
102
+ // Convert to array and sort by score descending
103
+ return Array.from(bestByNote.values()).sort((a, b) => b.score - a.score);
104
+ }
105
+
106
+ /**
107
+ * Convert DB chunk result to ChunkSearchResult format.
108
+ */
109
+ function toChunkSearchResult(
110
+ dbResult: DBChunkSearchResult
111
+ ): ChunkSearchResult {
112
+ return {
113
+ note_id: dbResult.note_id,
114
+ note_title: dbResult.note_title,
115
+ folder: dbResult.folder,
116
+ matchedChunk: dbResult.content,
117
+ matchedChunkIndex: dbResult.chunk_index,
118
+ score: dbResult.score,
119
+ modified: dbResult.modified,
120
+ };
121
+ }
122
+
123
+ /**
124
+ * Get cached or compute embedding for query.
125
+ */
126
+ async function getCachedQueryEmbedding(query: string): Promise<number[]> {
127
+ const cache = getEmbeddingCache();
128
+ return cache.getOrCompute(query, getEmbedding);
129
+ }
130
+
131
+ /**
132
+ * Perform vector-only search on chunks.
133
+ */
134
+ async function vectorSearch(
135
+ query: string,
136
+ limit: number
137
+ ): Promise<ChunkSearchResult[]> {
138
+ debug(`Vector search: "${query}" (limit: ${limit})`);
139
+
140
+ const store = getChunkStore();
141
+ const queryVector = await getCachedQueryEmbedding(query);
142
+
143
+ const results = await store.searchChunks(queryVector, limit);
144
+ return results.map(toChunkSearchResult);
145
+ }
146
+
147
+ /**
148
+ * Perform full-text search only on chunks.
149
+ */
150
+ async function keywordSearch(
151
+ query: string,
152
+ limit: number
153
+ ): Promise<ChunkSearchResult[]> {
154
+ debug(`FTS search: "${query}" (limit: ${limit})`);
155
+
156
+ const store = getChunkStore();
157
+ const results = await store.searchChunksFTS(query, limit);
158
+ return results.map(toChunkSearchResult);
159
+ }
160
+
161
+ /**
162
+ * Perform hybrid search combining vector and FTS results using RRF.
163
+ */
164
+ async function hybridSearch(
165
+ query: string,
166
+ limit: number
167
+ ): Promise<ChunkSearchResult[]> {
168
+ debug(`Hybrid search: "${query}" (limit: ${limit})`);
169
+
170
+ const store = getChunkStore();
171
+
172
+ // Fetch more results for RRF merging
173
+ const fetchLimit = Math.max(limit * 2, HYBRID_SEARCH_MIN_FETCH);
174
+
175
+ // Run both searches in parallel (use cached embedding)
176
+ const [queryVector, ftsResults] = await Promise.all([
177
+ getCachedQueryEmbedding(query),
178
+ store.searchChunksFTS(query, fetchLimit).catch(() => [] as DBChunkSearchResult[]),
179
+ ]);
180
+
181
+ const vectorResults = await store.searchChunks(queryVector, fetchLimit);
182
+
183
+ debug(`Vector results: ${vectorResults.length}, FTS results: ${ftsResults.length}`);
184
+
185
+ // Merge results using Reciprocal Rank Fusion
186
+ // Use chunk_id as key since we want to combine scores for the same chunk
187
+ const scoreMap = new Map<string, number>();
188
+ const contentMap = new Map<string, DBChunkSearchResult>();
189
+
190
+ // Process vector search results
191
+ vectorResults.forEach((item, rank) => {
192
+ const key = item.chunk_id;
193
+ scoreMap.set(key, (scoreMap.get(key) || 0) + rrfScore(rank));
194
+ contentMap.set(key, item);
195
+ });
196
+
197
+ // Process FTS results
198
+ ftsResults.forEach((item, rank) => {
199
+ const key = item.chunk_id;
200
+ scoreMap.set(key, (scoreMap.get(key) || 0) + rrfScore(rank));
201
+ if (!contentMap.has(key)) {
202
+ contentMap.set(key, item);
203
+ }
204
+ });
205
+
206
+ // Sort by combined RRF score and convert to ChunkSearchResult
207
+ const merged = Array.from(scoreMap.entries())
208
+ .sort((a, b) => b[1] - a[1])
209
+ .map(([key, score]) => {
210
+ const dbResult = contentMap.get(key)!;
211
+ return toChunkSearchResult({ ...dbResult, score });
212
+ });
213
+
214
+ return merged;
215
+ }
216
+
217
+ /**
218
+ * Search notes using chunk-based approach.
219
+ *
220
+ * Searches individual chunks for better relevance, then deduplicates
221
+ * by note to return one result per note with the best-matching chunk.
222
+ *
223
+ * @param query - Search query string
224
+ * @param options - Search configuration options
225
+ * @returns Array of search results sorted by relevance, one per note
226
+ *
227
+ * @example
228
+ * ```typescript
229
+ * // Basic hybrid search
230
+ * const results = await searchChunks("project ideas");
231
+ *
232
+ * // Keyword-only search in specific folder
233
+ * const results = await searchChunks("meeting", {
234
+ * mode: "keyword",
235
+ * folder: "Work",
236
+ * limit: 10,
237
+ * });
238
+ *
239
+ * // Semantic search
240
+ * const results = await searchChunks("concepts similar to machine learning", {
241
+ * mode: "semantic",
242
+ * });
243
+ * ```
244
+ */
245
+ export async function searchChunks(
246
+ query: string,
247
+ options: ChunkSearchOptions = {}
248
+ ): Promise<ChunkSearchResult[]> {
249
+ const {
250
+ folder,
251
+ limit = DEFAULT_SEARCH_LIMIT,
252
+ mode = "hybrid",
253
+ } = options;
254
+
255
+ if (!query || query.trim().length === 0) {
256
+ debug("Empty query, returning empty results");
257
+ return [];
258
+ }
259
+
260
+ const trimmedQuery = query.trim();
261
+
262
+ debug(`searchChunks: "${trimmedQuery}" mode=${mode} folder=${folder || "all"} limit=${limit}`);
263
+
264
+ // Fetch more results than needed because:
265
+ // 1. Deduplication may reduce count
266
+ // 2. Folder filtering may reduce count
267
+ const fetchMultiplier = folder ? 3 : 2;
268
+ const fetchLimit = Math.max(limit * fetchMultiplier, HYBRID_SEARCH_MIN_FETCH);
269
+
270
+ let rawResults: ChunkSearchResult[];
271
+
272
+ switch (mode) {
273
+ case "keyword":
274
+ rawResults = await keywordSearch(trimmedQuery, fetchLimit);
275
+ break;
276
+
277
+ case "semantic":
278
+ rawResults = await vectorSearch(trimmedQuery, fetchLimit);
279
+ break;
280
+
281
+ case "hybrid":
282
+ default:
283
+ rawResults = await hybridSearch(trimmedQuery, fetchLimit);
284
+ break;
285
+ }
286
+
287
+ // Apply folder filter
288
+ const filtered = filterByFolder(rawResults, folder);
289
+
290
+ // Deduplicate by note (keep best chunk per note)
291
+ const deduplicated = deduplicateByNote(filtered);
292
+
293
+ // Apply limit
294
+ const results = deduplicated.slice(0, limit);
295
+
296
+ debug(`Returning ${results.length} results (from ${rawResults.length} chunks)`);
297
+ return results;
298
+ }
@@ -37,9 +37,6 @@ export interface SearchOptions {
37
37
  include_content?: boolean;
38
38
  }
39
39
 
40
- // SearchResult is imported from ../types/index.js
41
- // RRF_K is imported from ../config/constants.js
42
-
43
40
  /**
44
41
  * Calculate RRF score for a result at a given rank.
45
42
  * Formula: 1 / (k + rank)
@@ -163,15 +160,16 @@ async function hybridSearch(
163
160
  const contentMap = new Map<string, DBSearchResult>();
164
161
 
165
162
  // Process vector search results
163
+ // Use id as key to avoid collisions with duplicate titles in different folders
166
164
  vectorResults.forEach((item, rank) => {
167
- const key = item.title;
165
+ const key = item.id ?? item.title;
168
166
  scoreMap.set(key, (scoreMap.get(key) || 0) + rrfScore(rank));
169
167
  contentMap.set(key, item);
170
168
  });
171
169
 
172
170
  // Process FTS results
173
171
  ftsResults.forEach((item, rank) => {
174
- const key = item.title;
172
+ const key = item.id ?? item.title;
175
173
  scoreMap.set(key, (scoreMap.get(key) || 0) + rrfScore(rank));
176
174
  if (!contentMap.has(key)) {
177
175
  contentMap.set(key, item);
@@ -257,6 +255,7 @@ export async function searchNotes(
257
255
  // Transform to SearchResult format
258
256
  const results: SearchResult[] = dbResults.map((r) => {
259
257
  const result: SearchResult = {
258
+ id: r.id,
260
259
  title: r.title,
261
260
  folder: r.folder,
262
261
  preview: generatePreview(r.content),
@@ -276,7 +275,6 @@ export async function searchNotes(
276
275
  }
277
276
 
278
277
  // Re-export types for convenience
279
- export type { SearchMode as Mode };
280
278
  export type { SearchResult } from "../types/index.js";
281
279
 
282
280
  // Export utility functions for testing