@disco_trooper/apple-notes-mcp 1.1.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +104 -24
- package/package.json +11 -12
- package/src/config/claude.test.ts +47 -0
- package/src/config/claude.ts +106 -0
- package/src/config/constants.ts +11 -2
- package/src/config/paths.test.ts +40 -0
- package/src/config/paths.ts +86 -0
- package/src/db/arrow-fix.test.ts +101 -0
- package/src/db/lancedb.test.ts +254 -2
- package/src/db/lancedb.ts +385 -38
- package/src/embeddings/cache.test.ts +150 -0
- package/src/embeddings/cache.ts +204 -0
- package/src/embeddings/index.ts +22 -4
- package/src/embeddings/local.ts +57 -17
- package/src/embeddings/openrouter.ts +233 -11
- package/src/errors/index.test.ts +64 -0
- package/src/errors/index.ts +62 -0
- package/src/graph/export.test.ts +81 -0
- package/src/graph/export.ts +163 -0
- package/src/graph/extract.test.ts +90 -0
- package/src/graph/extract.ts +52 -0
- package/src/graph/queries.test.ts +156 -0
- package/src/graph/queries.ts +224 -0
- package/src/index.ts +309 -23
- package/src/notes/conversion.ts +62 -0
- package/src/notes/crud.test.ts +41 -8
- package/src/notes/crud.ts +75 -64
- package/src/notes/read.test.ts +58 -3
- package/src/notes/read.ts +142 -210
- package/src/notes/resolve.ts +174 -0
- package/src/notes/tables.ts +69 -40
- package/src/search/chunk-indexer.test.ts +353 -0
- package/src/search/chunk-indexer.ts +207 -0
- package/src/search/chunk-search.test.ts +327 -0
- package/src/search/chunk-search.ts +298 -0
- package/src/search/index.ts +4 -6
- package/src/search/indexer.ts +164 -109
- package/src/setup.ts +46 -67
- package/src/types/index.ts +4 -0
- package/src/utils/chunker.test.ts +182 -0
- package/src/utils/chunker.ts +170 -0
- package/src/utils/content-filter.test.ts +225 -0
- package/src/utils/content-filter.ts +275 -0
- package/src/utils/debug.ts +0 -2
- package/src/utils/runtime.test.ts +70 -0
- package/src/utils/runtime.ts +40 -0
- package/src/utils/text.test.ts +32 -0
- package/CLAUDE.md +0 -56
- package/src/server.ts +0 -427
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
rrfScore,
|
|
4
|
+
deduplicateByNote,
|
|
5
|
+
filterByFolder,
|
|
6
|
+
searchChunks,
|
|
7
|
+
type ChunkSearchResult,
|
|
8
|
+
} from "./chunk-search.js";
|
|
9
|
+
|
|
10
|
+
// Mock dependencies
|
|
11
|
+
vi.mock("../embeddings/index.js", () => ({
|
|
12
|
+
getEmbedding: vi.fn().mockResolvedValue([0.1, 0.2, 0.3]),
|
|
13
|
+
}));
|
|
14
|
+
|
|
15
|
+
vi.mock("../db/lancedb.js", () => ({
|
|
16
|
+
getChunkStore: vi.fn(),
|
|
17
|
+
}));
|
|
18
|
+
|
|
19
|
+
import { getChunkStore } from "../db/lancedb.js";
|
|
20
|
+
|
|
21
|
+
describe("rrfScore", () => {
|
|
22
|
+
it("calculates RRF score correctly", () => {
|
|
23
|
+
// RRF formula: 1 / (k + rank) where k = 60
|
|
24
|
+
expect(rrfScore(1)).toBeCloseTo(1 / 61, 5);
|
|
25
|
+
expect(rrfScore(10)).toBeCloseTo(1 / 70, 5);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it("returns smaller scores for higher ranks", () => {
|
|
29
|
+
expect(rrfScore(1)).toBeGreaterThan(rrfScore(10));
|
|
30
|
+
expect(rrfScore(10)).toBeGreaterThan(rrfScore(100));
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
it("returns correct score for rank 0", () => {
|
|
34
|
+
expect(rrfScore(0)).toBeCloseTo(1 / 60, 5);
|
|
35
|
+
});
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
describe("deduplicateByNote", () => {
|
|
39
|
+
it("keeps best-scoring chunk per note", () => {
|
|
40
|
+
const chunks: ChunkSearchResult[] = [
|
|
41
|
+
{
|
|
42
|
+
note_id: "note1",
|
|
43
|
+
note_title: "Note 1",
|
|
44
|
+
folder: "Work",
|
|
45
|
+
matchedChunk: "chunk 1",
|
|
46
|
+
matchedChunkIndex: 0,
|
|
47
|
+
score: 0.8,
|
|
48
|
+
modified: "2024-01-01",
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
note_id: "note1",
|
|
52
|
+
note_title: "Note 1",
|
|
53
|
+
folder: "Work",
|
|
54
|
+
matchedChunk: "chunk 2",
|
|
55
|
+
matchedChunkIndex: 1,
|
|
56
|
+
score: 0.9, // Higher score
|
|
57
|
+
modified: "2024-01-01",
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
note_id: "note2",
|
|
61
|
+
note_title: "Note 2",
|
|
62
|
+
folder: "Personal",
|
|
63
|
+
matchedChunk: "chunk 1",
|
|
64
|
+
matchedChunkIndex: 0,
|
|
65
|
+
score: 0.7,
|
|
66
|
+
modified: "2024-01-02",
|
|
67
|
+
},
|
|
68
|
+
];
|
|
69
|
+
|
|
70
|
+
const result = deduplicateByNote(chunks);
|
|
71
|
+
|
|
72
|
+
expect(result).toHaveLength(2);
|
|
73
|
+
// note1 should have the higher scoring chunk (0.9)
|
|
74
|
+
const note1 = result.find((r) => r.note_id === "note1");
|
|
75
|
+
expect(note1?.score).toBe(0.9);
|
|
76
|
+
expect(note1?.matchedChunkIndex).toBe(1);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
it("sorts by score descending", () => {
|
|
80
|
+
const chunks: ChunkSearchResult[] = [
|
|
81
|
+
{
|
|
82
|
+
note_id: "note1",
|
|
83
|
+
note_title: "Note 1",
|
|
84
|
+
folder: "Work",
|
|
85
|
+
matchedChunk: "chunk 1",
|
|
86
|
+
matchedChunkIndex: 0,
|
|
87
|
+
score: 0.5,
|
|
88
|
+
modified: "2024-01-01",
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
note_id: "note2",
|
|
92
|
+
note_title: "Note 2",
|
|
93
|
+
folder: "Personal",
|
|
94
|
+
matchedChunk: "chunk 1",
|
|
95
|
+
matchedChunkIndex: 0,
|
|
96
|
+
score: 0.9,
|
|
97
|
+
modified: "2024-01-02",
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
note_id: "note3",
|
|
101
|
+
note_title: "Note 3",
|
|
102
|
+
folder: "Work",
|
|
103
|
+
matchedChunk: "chunk 1",
|
|
104
|
+
matchedChunkIndex: 0,
|
|
105
|
+
score: 0.7,
|
|
106
|
+
modified: "2024-01-03",
|
|
107
|
+
},
|
|
108
|
+
];
|
|
109
|
+
|
|
110
|
+
const result = deduplicateByNote(chunks);
|
|
111
|
+
|
|
112
|
+
expect(result).toHaveLength(3);
|
|
113
|
+
expect(result[0].score).toBe(0.9);
|
|
114
|
+
expect(result[1].score).toBe(0.7);
|
|
115
|
+
expect(result[2].score).toBe(0.5);
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
it("handles empty array", () => {
|
|
119
|
+
const result = deduplicateByNote([]);
|
|
120
|
+
expect(result).toHaveLength(0);
|
|
121
|
+
});
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
describe("filterByFolder", () => {
|
|
125
|
+
const mockChunks: ChunkSearchResult[] = [
|
|
126
|
+
{
|
|
127
|
+
note_id: "note1",
|
|
128
|
+
note_title: "Note 1",
|
|
129
|
+
folder: "Work",
|
|
130
|
+
matchedChunk: "content",
|
|
131
|
+
matchedChunkIndex: 0,
|
|
132
|
+
score: 1,
|
|
133
|
+
modified: "2024-01-01",
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
note_id: "note2",
|
|
137
|
+
note_title: "Note 2",
|
|
138
|
+
folder: "Personal",
|
|
139
|
+
matchedChunk: "content",
|
|
140
|
+
matchedChunkIndex: 0,
|
|
141
|
+
score: 0.9,
|
|
142
|
+
modified: "2024-01-01",
|
|
143
|
+
},
|
|
144
|
+
{
|
|
145
|
+
note_id: "note3",
|
|
146
|
+
note_title: "Note 3",
|
|
147
|
+
folder: "Work/Projects",
|
|
148
|
+
matchedChunk: "content",
|
|
149
|
+
matchedChunkIndex: 0,
|
|
150
|
+
score: 0.8,
|
|
151
|
+
modified: "2024-01-01",
|
|
152
|
+
},
|
|
153
|
+
];
|
|
154
|
+
|
|
155
|
+
it("filters by exact folder name (case insensitive)", () => {
|
|
156
|
+
const filtered = filterByFolder(mockChunks, "work");
|
|
157
|
+
expect(filtered).toHaveLength(1);
|
|
158
|
+
expect(filtered[0].note_title).toBe("Note 1");
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
it("returns all results when folder is undefined", () => {
|
|
162
|
+
const filtered = filterByFolder(mockChunks, undefined);
|
|
163
|
+
expect(filtered).toHaveLength(3);
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
it("returns empty array when no matches", () => {
|
|
167
|
+
const filtered = filterByFolder(mockChunks, "NonExistent");
|
|
168
|
+
expect(filtered).toHaveLength(0);
|
|
169
|
+
});
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
describe("searchChunks", () => {
|
|
173
|
+
beforeEach(() => {
|
|
174
|
+
vi.clearAllMocks();
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
it("handles empty query", async () => {
|
|
178
|
+
const result = await searchChunks("");
|
|
179
|
+
expect(result).toHaveLength(0);
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
it("handles whitespace-only query", async () => {
|
|
183
|
+
const result = await searchChunks(" ");
|
|
184
|
+
expect(result).toHaveLength(0);
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
it("deduplicates results by note", async () => {
|
|
188
|
+
const mockStore = {
|
|
189
|
+
searchChunks: vi.fn().mockResolvedValue([
|
|
190
|
+
{
|
|
191
|
+
chunk_id: "note1_chunk_0",
|
|
192
|
+
note_id: "note1",
|
|
193
|
+
note_title: "Note 1",
|
|
194
|
+
folder: "Work",
|
|
195
|
+
chunk_index: 0,
|
|
196
|
+
total_chunks: 2,
|
|
197
|
+
content: "first chunk",
|
|
198
|
+
modified: "2024-01-01",
|
|
199
|
+
score: 0.8,
|
|
200
|
+
},
|
|
201
|
+
{
|
|
202
|
+
chunk_id: "note1_chunk_1",
|
|
203
|
+
note_id: "note1",
|
|
204
|
+
note_title: "Note 1",
|
|
205
|
+
folder: "Work",
|
|
206
|
+
chunk_index: 1,
|
|
207
|
+
total_chunks: 2,
|
|
208
|
+
content: "second chunk",
|
|
209
|
+
modified: "2024-01-01",
|
|
210
|
+
score: 0.9,
|
|
211
|
+
},
|
|
212
|
+
{
|
|
213
|
+
chunk_id: "note2_chunk_0",
|
|
214
|
+
note_id: "note2",
|
|
215
|
+
note_title: "Note 2",
|
|
216
|
+
folder: "Personal",
|
|
217
|
+
chunk_index: 0,
|
|
218
|
+
total_chunks: 1,
|
|
219
|
+
content: "only chunk",
|
|
220
|
+
modified: "2024-01-02",
|
|
221
|
+
score: 0.7,
|
|
222
|
+
},
|
|
223
|
+
]),
|
|
224
|
+
searchChunksFTS: vi.fn().mockResolvedValue([]),
|
|
225
|
+
};
|
|
226
|
+
|
|
227
|
+
vi.mocked(getChunkStore).mockReturnValue(mockStore as unknown as ReturnType<typeof getChunkStore>);
|
|
228
|
+
|
|
229
|
+
const results = await searchChunks("test query", { mode: "semantic" });
|
|
230
|
+
|
|
231
|
+
// Should have 2 unique notes, not 3 chunks
|
|
232
|
+
expect(results).toHaveLength(2);
|
|
233
|
+
// note1 should have the higher scoring chunk (index 1, score 0.9)
|
|
234
|
+
const note1 = results.find((r) => r.note_id === "note1");
|
|
235
|
+
expect(note1?.matchedChunkIndex).toBe(1);
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
it("applies folder filter", async () => {
|
|
239
|
+
const mockStore = {
|
|
240
|
+
searchChunks: vi.fn().mockResolvedValue([
|
|
241
|
+
{
|
|
242
|
+
chunk_id: "note1_chunk_0",
|
|
243
|
+
note_id: "note1",
|
|
244
|
+
note_title: "Note 1",
|
|
245
|
+
folder: "Work",
|
|
246
|
+
chunk_index: 0,
|
|
247
|
+
total_chunks: 1,
|
|
248
|
+
content: "work content",
|
|
249
|
+
modified: "2024-01-01",
|
|
250
|
+
score: 0.9,
|
|
251
|
+
},
|
|
252
|
+
{
|
|
253
|
+
chunk_id: "note2_chunk_0",
|
|
254
|
+
note_id: "note2",
|
|
255
|
+
note_title: "Note 2",
|
|
256
|
+
folder: "Personal",
|
|
257
|
+
chunk_index: 0,
|
|
258
|
+
total_chunks: 1,
|
|
259
|
+
content: "personal content",
|
|
260
|
+
modified: "2024-01-02",
|
|
261
|
+
score: 0.8,
|
|
262
|
+
},
|
|
263
|
+
]),
|
|
264
|
+
searchChunksFTS: vi.fn().mockResolvedValue([]),
|
|
265
|
+
};
|
|
266
|
+
|
|
267
|
+
vi.mocked(getChunkStore).mockReturnValue(mockStore as unknown as ReturnType<typeof getChunkStore>);
|
|
268
|
+
|
|
269
|
+
const results = await searchChunks("test query", {
|
|
270
|
+
mode: "semantic",
|
|
271
|
+
folder: "Work",
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
expect(results).toHaveLength(1);
|
|
275
|
+
expect(results[0].folder).toBe("Work");
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
it("respects limit option", async () => {
|
|
279
|
+
const mockStore = {
|
|
280
|
+
searchChunks: vi.fn().mockResolvedValue([
|
|
281
|
+
{
|
|
282
|
+
chunk_id: "note1_chunk_0",
|
|
283
|
+
note_id: "note1",
|
|
284
|
+
note_title: "Note 1",
|
|
285
|
+
folder: "Work",
|
|
286
|
+
chunk_index: 0,
|
|
287
|
+
total_chunks: 1,
|
|
288
|
+
content: "content 1",
|
|
289
|
+
modified: "2024-01-01",
|
|
290
|
+
score: 0.9,
|
|
291
|
+
},
|
|
292
|
+
{
|
|
293
|
+
chunk_id: "note2_chunk_0",
|
|
294
|
+
note_id: "note2",
|
|
295
|
+
note_title: "Note 2",
|
|
296
|
+
folder: "Work",
|
|
297
|
+
chunk_index: 0,
|
|
298
|
+
total_chunks: 1,
|
|
299
|
+
content: "content 2",
|
|
300
|
+
modified: "2024-01-02",
|
|
301
|
+
score: 0.8,
|
|
302
|
+
},
|
|
303
|
+
{
|
|
304
|
+
chunk_id: "note3_chunk_0",
|
|
305
|
+
note_id: "note3",
|
|
306
|
+
note_title: "Note 3",
|
|
307
|
+
folder: "Work",
|
|
308
|
+
chunk_index: 0,
|
|
309
|
+
total_chunks: 1,
|
|
310
|
+
content: "content 3",
|
|
311
|
+
modified: "2024-01-03",
|
|
312
|
+
score: 0.7,
|
|
313
|
+
},
|
|
314
|
+
]),
|
|
315
|
+
searchChunksFTS: vi.fn().mockResolvedValue([]),
|
|
316
|
+
};
|
|
317
|
+
|
|
318
|
+
vi.mocked(getChunkStore).mockReturnValue(mockStore as unknown as ReturnType<typeof getChunkStore>);
|
|
319
|
+
|
|
320
|
+
const results = await searchChunks("test query", {
|
|
321
|
+
mode: "semantic",
|
|
322
|
+
limit: 2,
|
|
323
|
+
});
|
|
324
|
+
|
|
325
|
+
expect(results).toHaveLength(2);
|
|
326
|
+
});
|
|
327
|
+
});
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunk-based search for Parent Document Retriever pattern.
|
|
3
|
+
*
|
|
4
|
+
* Searches individual chunks but returns results deduplicated by note,
|
|
5
|
+
* showing the best-matching chunk for each note.
|
|
6
|
+
*
|
|
7
|
+
* Uses Reciprocal Rank Fusion (RRF) to merge results from:
|
|
8
|
+
* - Vector search (semantic similarity)
|
|
9
|
+
* - Full-text search (keyword matching)
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { getEmbedding } from "../embeddings/index.js";
|
|
13
|
+
import { getEmbeddingCache } from "../embeddings/cache.js";
|
|
14
|
+
import { getChunkStore, type ChunkSearchResult as DBChunkSearchResult } from "../db/lancedb.js";
|
|
15
|
+
import {
|
|
16
|
+
DEFAULT_SEARCH_LIMIT,
|
|
17
|
+
HYBRID_SEARCH_MIN_FETCH,
|
|
18
|
+
RRF_K,
|
|
19
|
+
} from "../config/constants.js";
|
|
20
|
+
import { createDebugLogger } from "../utils/debug.js";
|
|
21
|
+
|
|
22
|
+
// Debug logging
|
|
23
|
+
const debug = createDebugLogger("CHUNK_SEARCH");
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Options for chunk search operations.
|
|
27
|
+
*/
|
|
28
|
+
export interface ChunkSearchOptions {
|
|
29
|
+
/** Filter by folder name */
|
|
30
|
+
folder?: string;
|
|
31
|
+
/** Maximum number of results (default: 20) */
|
|
32
|
+
limit?: number;
|
|
33
|
+
/** Search mode: hybrid, keyword, or semantic (default: hybrid) */
|
|
34
|
+
mode?: "hybrid" | "keyword" | "semantic";
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Search result for chunk-based search.
|
|
39
|
+
* Returns one result per note with the best-matching chunk.
|
|
40
|
+
*/
|
|
41
|
+
export interface ChunkSearchResult {
|
|
42
|
+
/** Apple Notes unique identifier */
|
|
43
|
+
note_id: string;
|
|
44
|
+
/** Note title */
|
|
45
|
+
note_title: string;
|
|
46
|
+
/** Folder containing the note */
|
|
47
|
+
folder: string;
|
|
48
|
+
/** The best-matching chunk content */
|
|
49
|
+
matchedChunk: string;
|
|
50
|
+
/** Index of the matched chunk within the note */
|
|
51
|
+
matchedChunkIndex: number;
|
|
52
|
+
/** Relevance score (higher = more relevant) */
|
|
53
|
+
score: number;
|
|
54
|
+
/** Last modified date (ISO string) */
|
|
55
|
+
modified: string;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Calculate RRF score for a result at a given rank.
|
|
60
|
+
* Formula: 1 / (k + rank)
|
|
61
|
+
* where k is a constant (typically 60) and rank is 0-indexed.
|
|
62
|
+
*/
|
|
63
|
+
export function rrfScore(rank: number): number {
|
|
64
|
+
return 1 / (RRF_K + rank);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Filter results by folder if specified.
|
|
69
|
+
* Case-insensitive folder matching.
|
|
70
|
+
*/
|
|
71
|
+
export function filterByFolder(
|
|
72
|
+
chunks: ChunkSearchResult[],
|
|
73
|
+
folder?: string
|
|
74
|
+
): ChunkSearchResult[] {
|
|
75
|
+
if (!folder) {
|
|
76
|
+
return chunks;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const normalizedFolder = folder.toLowerCase();
|
|
80
|
+
return chunks.filter(
|
|
81
|
+
(r) => r.folder.toLowerCase() === normalizedFolder
|
|
82
|
+
);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Deduplicate chunks by note_id, keeping only the best-scoring chunk for each note.
|
|
87
|
+
* Returns results sorted by score in descending order.
|
|
88
|
+
*/
|
|
89
|
+
export function deduplicateByNote(
|
|
90
|
+
chunks: ChunkSearchResult[]
|
|
91
|
+
): ChunkSearchResult[] {
|
|
92
|
+
// Group by note_id, keeping the highest scoring chunk
|
|
93
|
+
const bestByNote = new Map<string, ChunkSearchResult>();
|
|
94
|
+
|
|
95
|
+
for (const chunk of chunks) {
|
|
96
|
+
const existing = bestByNote.get(chunk.note_id);
|
|
97
|
+
if (!existing || chunk.score > existing.score) {
|
|
98
|
+
bestByNote.set(chunk.note_id, chunk);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Convert to array and sort by score descending
|
|
103
|
+
return Array.from(bestByNote.values()).sort((a, b) => b.score - a.score);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Convert DB chunk result to ChunkSearchResult format.
|
|
108
|
+
*/
|
|
109
|
+
function toChunkSearchResult(
|
|
110
|
+
dbResult: DBChunkSearchResult
|
|
111
|
+
): ChunkSearchResult {
|
|
112
|
+
return {
|
|
113
|
+
note_id: dbResult.note_id,
|
|
114
|
+
note_title: dbResult.note_title,
|
|
115
|
+
folder: dbResult.folder,
|
|
116
|
+
matchedChunk: dbResult.content,
|
|
117
|
+
matchedChunkIndex: dbResult.chunk_index,
|
|
118
|
+
score: dbResult.score,
|
|
119
|
+
modified: dbResult.modified,
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Get cached or compute embedding for query.
|
|
125
|
+
*/
|
|
126
|
+
async function getCachedQueryEmbedding(query: string): Promise<number[]> {
|
|
127
|
+
const cache = getEmbeddingCache();
|
|
128
|
+
return cache.getOrCompute(query, getEmbedding);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Perform vector-only search on chunks.
|
|
133
|
+
*/
|
|
134
|
+
async function vectorSearch(
|
|
135
|
+
query: string,
|
|
136
|
+
limit: number
|
|
137
|
+
): Promise<ChunkSearchResult[]> {
|
|
138
|
+
debug(`Vector search: "${query}" (limit: ${limit})`);
|
|
139
|
+
|
|
140
|
+
const store = getChunkStore();
|
|
141
|
+
const queryVector = await getCachedQueryEmbedding(query);
|
|
142
|
+
|
|
143
|
+
const results = await store.searchChunks(queryVector, limit);
|
|
144
|
+
return results.map(toChunkSearchResult);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Perform full-text search only on chunks.
|
|
149
|
+
*/
|
|
150
|
+
async function keywordSearch(
|
|
151
|
+
query: string,
|
|
152
|
+
limit: number
|
|
153
|
+
): Promise<ChunkSearchResult[]> {
|
|
154
|
+
debug(`FTS search: "${query}" (limit: ${limit})`);
|
|
155
|
+
|
|
156
|
+
const store = getChunkStore();
|
|
157
|
+
const results = await store.searchChunksFTS(query, limit);
|
|
158
|
+
return results.map(toChunkSearchResult);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Perform hybrid search combining vector and FTS results using RRF.
|
|
163
|
+
*/
|
|
164
|
+
async function hybridSearch(
|
|
165
|
+
query: string,
|
|
166
|
+
limit: number
|
|
167
|
+
): Promise<ChunkSearchResult[]> {
|
|
168
|
+
debug(`Hybrid search: "${query}" (limit: ${limit})`);
|
|
169
|
+
|
|
170
|
+
const store = getChunkStore();
|
|
171
|
+
|
|
172
|
+
// Fetch more results for RRF merging
|
|
173
|
+
const fetchLimit = Math.max(limit * 2, HYBRID_SEARCH_MIN_FETCH);
|
|
174
|
+
|
|
175
|
+
// Run both searches in parallel (use cached embedding)
|
|
176
|
+
const [queryVector, ftsResults] = await Promise.all([
|
|
177
|
+
getCachedQueryEmbedding(query),
|
|
178
|
+
store.searchChunksFTS(query, fetchLimit).catch(() => [] as DBChunkSearchResult[]),
|
|
179
|
+
]);
|
|
180
|
+
|
|
181
|
+
const vectorResults = await store.searchChunks(queryVector, fetchLimit);
|
|
182
|
+
|
|
183
|
+
debug(`Vector results: ${vectorResults.length}, FTS results: ${ftsResults.length}`);
|
|
184
|
+
|
|
185
|
+
// Merge results using Reciprocal Rank Fusion
|
|
186
|
+
// Use chunk_id as key since we want to combine scores for the same chunk
|
|
187
|
+
const scoreMap = new Map<string, number>();
|
|
188
|
+
const contentMap = new Map<string, DBChunkSearchResult>();
|
|
189
|
+
|
|
190
|
+
// Process vector search results
|
|
191
|
+
vectorResults.forEach((item, rank) => {
|
|
192
|
+
const key = item.chunk_id;
|
|
193
|
+
scoreMap.set(key, (scoreMap.get(key) || 0) + rrfScore(rank));
|
|
194
|
+
contentMap.set(key, item);
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
// Process FTS results
|
|
198
|
+
ftsResults.forEach((item, rank) => {
|
|
199
|
+
const key = item.chunk_id;
|
|
200
|
+
scoreMap.set(key, (scoreMap.get(key) || 0) + rrfScore(rank));
|
|
201
|
+
if (!contentMap.has(key)) {
|
|
202
|
+
contentMap.set(key, item);
|
|
203
|
+
}
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
// Sort by combined RRF score and convert to ChunkSearchResult
|
|
207
|
+
const merged = Array.from(scoreMap.entries())
|
|
208
|
+
.sort((a, b) => b[1] - a[1])
|
|
209
|
+
.map(([key, score]) => {
|
|
210
|
+
const dbResult = contentMap.get(key)!;
|
|
211
|
+
return toChunkSearchResult({ ...dbResult, score });
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
return merged;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Search notes using chunk-based approach.
|
|
219
|
+
*
|
|
220
|
+
* Searches individual chunks for better relevance, then deduplicates
|
|
221
|
+
* by note to return one result per note with the best-matching chunk.
|
|
222
|
+
*
|
|
223
|
+
* @param query - Search query string
|
|
224
|
+
* @param options - Search configuration options
|
|
225
|
+
* @returns Array of search results sorted by relevance, one per note
|
|
226
|
+
*
|
|
227
|
+
* @example
|
|
228
|
+
* ```typescript
|
|
229
|
+
* // Basic hybrid search
|
|
230
|
+
* const results = await searchChunks("project ideas");
|
|
231
|
+
*
|
|
232
|
+
* // Keyword-only search in specific folder
|
|
233
|
+
* const results = await searchChunks("meeting", {
|
|
234
|
+
* mode: "keyword",
|
|
235
|
+
* folder: "Work",
|
|
236
|
+
* limit: 10,
|
|
237
|
+
* });
|
|
238
|
+
*
|
|
239
|
+
* // Semantic search
|
|
240
|
+
* const results = await searchChunks("concepts similar to machine learning", {
|
|
241
|
+
* mode: "semantic",
|
|
242
|
+
* });
|
|
243
|
+
* ```
|
|
244
|
+
*/
|
|
245
|
+
export async function searchChunks(
|
|
246
|
+
query: string,
|
|
247
|
+
options: ChunkSearchOptions = {}
|
|
248
|
+
): Promise<ChunkSearchResult[]> {
|
|
249
|
+
const {
|
|
250
|
+
folder,
|
|
251
|
+
limit = DEFAULT_SEARCH_LIMIT,
|
|
252
|
+
mode = "hybrid",
|
|
253
|
+
} = options;
|
|
254
|
+
|
|
255
|
+
if (!query || query.trim().length === 0) {
|
|
256
|
+
debug("Empty query, returning empty results");
|
|
257
|
+
return [];
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
const trimmedQuery = query.trim();
|
|
261
|
+
|
|
262
|
+
debug(`searchChunks: "${trimmedQuery}" mode=${mode} folder=${folder || "all"} limit=${limit}`);
|
|
263
|
+
|
|
264
|
+
// Fetch more results than needed because:
|
|
265
|
+
// 1. Deduplication may reduce count
|
|
266
|
+
// 2. Folder filtering may reduce count
|
|
267
|
+
const fetchMultiplier = folder ? 3 : 2;
|
|
268
|
+
const fetchLimit = Math.max(limit * fetchMultiplier, HYBRID_SEARCH_MIN_FETCH);
|
|
269
|
+
|
|
270
|
+
let rawResults: ChunkSearchResult[];
|
|
271
|
+
|
|
272
|
+
switch (mode) {
|
|
273
|
+
case "keyword":
|
|
274
|
+
rawResults = await keywordSearch(trimmedQuery, fetchLimit);
|
|
275
|
+
break;
|
|
276
|
+
|
|
277
|
+
case "semantic":
|
|
278
|
+
rawResults = await vectorSearch(trimmedQuery, fetchLimit);
|
|
279
|
+
break;
|
|
280
|
+
|
|
281
|
+
case "hybrid":
|
|
282
|
+
default:
|
|
283
|
+
rawResults = await hybridSearch(trimmedQuery, fetchLimit);
|
|
284
|
+
break;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// Apply folder filter
|
|
288
|
+
const filtered = filterByFolder(rawResults, folder);
|
|
289
|
+
|
|
290
|
+
// Deduplicate by note (keep best chunk per note)
|
|
291
|
+
const deduplicated = deduplicateByNote(filtered);
|
|
292
|
+
|
|
293
|
+
// Apply limit
|
|
294
|
+
const results = deduplicated.slice(0, limit);
|
|
295
|
+
|
|
296
|
+
debug(`Returning ${results.length} results (from ${rawResults.length} chunks)`);
|
|
297
|
+
return results;
|
|
298
|
+
}
|
package/src/search/index.ts
CHANGED
|
@@ -37,9 +37,6 @@ export interface SearchOptions {
|
|
|
37
37
|
include_content?: boolean;
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
-
// SearchResult is imported from ../types/index.js
|
|
41
|
-
// RRF_K is imported from ../config/constants.js
|
|
42
|
-
|
|
43
40
|
/**
|
|
44
41
|
* Calculate RRF score for a result at a given rank.
|
|
45
42
|
* Formula: 1 / (k + rank)
|
|
@@ -163,15 +160,16 @@ async function hybridSearch(
|
|
|
163
160
|
const contentMap = new Map<string, DBSearchResult>();
|
|
164
161
|
|
|
165
162
|
// Process vector search results
|
|
163
|
+
// Use id as key to avoid collisions with duplicate titles in different folders
|
|
166
164
|
vectorResults.forEach((item, rank) => {
|
|
167
|
-
const key = item.title;
|
|
165
|
+
const key = item.id ?? item.title;
|
|
168
166
|
scoreMap.set(key, (scoreMap.get(key) || 0) + rrfScore(rank));
|
|
169
167
|
contentMap.set(key, item);
|
|
170
168
|
});
|
|
171
169
|
|
|
172
170
|
// Process FTS results
|
|
173
171
|
ftsResults.forEach((item, rank) => {
|
|
174
|
-
const key = item.title;
|
|
172
|
+
const key = item.id ?? item.title;
|
|
175
173
|
scoreMap.set(key, (scoreMap.get(key) || 0) + rrfScore(rank));
|
|
176
174
|
if (!contentMap.has(key)) {
|
|
177
175
|
contentMap.set(key, item);
|
|
@@ -257,6 +255,7 @@ export async function searchNotes(
|
|
|
257
255
|
// Transform to SearchResult format
|
|
258
256
|
const results: SearchResult[] = dbResults.map((r) => {
|
|
259
257
|
const result: SearchResult = {
|
|
258
|
+
id: r.id,
|
|
260
259
|
title: r.title,
|
|
261
260
|
folder: r.folder,
|
|
262
261
|
preview: generatePreview(r.content),
|
|
@@ -276,7 +275,6 @@ export async function searchNotes(
|
|
|
276
275
|
}
|
|
277
276
|
|
|
278
277
|
// Re-export types for convenience
|
|
279
|
-
export type { SearchMode as Mode };
|
|
280
278
|
export type { SearchResult } from "../types/index.js";
|
|
281
279
|
|
|
282
280
|
// Export utility functions for testing
|