@disco_trooper/apple-notes-mcp 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +104 -24
- package/package.json +10 -8
- package/src/config/claude.test.ts +47 -0
- package/src/config/claude.ts +106 -0
- package/src/config/constants.ts +11 -2
- package/src/config/paths.test.ts +40 -0
- package/src/config/paths.ts +86 -0
- package/src/db/arrow-fix.test.ts +101 -0
- package/src/db/lancedb.test.ts +209 -2
- package/src/db/lancedb.ts +345 -7
- package/src/embeddings/cache.test.ts +150 -0
- package/src/embeddings/cache.ts +204 -0
- package/src/embeddings/index.ts +21 -2
- package/src/embeddings/local.ts +61 -10
- package/src/embeddings/openrouter.ts +233 -11
- package/src/graph/export.test.ts +81 -0
- package/src/graph/export.ts +163 -0
- package/src/graph/extract.test.ts +90 -0
- package/src/graph/extract.ts +52 -0
- package/src/graph/queries.test.ts +156 -0
- package/src/graph/queries.ts +224 -0
- package/src/index.ts +249 -9
- package/src/notes/crud.test.ts +26 -2
- package/src/notes/crud.ts +43 -5
- package/src/notes/read.ts +83 -68
- package/src/search/chunk-indexer.test.ts +353 -0
- package/src/search/chunk-indexer.ts +207 -0
- package/src/search/chunk-search.test.ts +327 -0
- package/src/search/chunk-search.ts +298 -0
- package/src/search/indexer.ts +151 -109
- package/src/setup.ts +46 -67
- package/src/utils/chunker.test.ts +182 -0
- package/src/utils/chunker.ts +170 -0
- package/src/utils/content-filter.test.ts +225 -0
- package/src/utils/content-filter.ts +275 -0
- package/src/utils/runtime.test.ts +70 -0
- package/src/utils/runtime.ts +40 -0
package/src/db/lancedb.test.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
|
2
|
-
import { LanceDBStore } from "./lancedb.js";
|
|
3
|
-
import type { NoteRecord } from "./lancedb.js";
|
|
2
|
+
import { LanceDBStore, ChunkStore } from "./lancedb.js";
|
|
3
|
+
import type { NoteRecord, ChunkRecord } from "./lancedb.js";
|
|
4
4
|
import * as fs from "node:fs";
|
|
5
5
|
import * as path from "node:path";
|
|
6
6
|
|
|
@@ -28,6 +28,9 @@ describe("LanceDBStore", () => {
|
|
|
28
28
|
created: new Date().toISOString(),
|
|
29
29
|
indexed_at: new Date().toISOString(),
|
|
30
30
|
vector: Array(384).fill(0.1),
|
|
31
|
+
// LanceDB requires at least one element to infer the list type
|
|
32
|
+
tags: ["test-tag"],
|
|
33
|
+
outlinks: ["test-link"],
|
|
31
34
|
});
|
|
32
35
|
|
|
33
36
|
describe("index and getByTitle", () => {
|
|
@@ -184,3 +187,207 @@ describe("LanceDBStore", () => {
|
|
|
184
187
|
});
|
|
185
188
|
});
|
|
186
189
|
});
|
|
190
|
+
|
|
191
|
+
describe("ChunkStore", () => {
|
|
192
|
+
let chunkStore: ChunkStore;
|
|
193
|
+
let testDbPath: string;
|
|
194
|
+
|
|
195
|
+
beforeEach(() => {
|
|
196
|
+
testDbPath = path.join("/tmp", `lancedb-chunk-test-${Date.now()}`);
|
|
197
|
+
chunkStore = new ChunkStore(testDbPath);
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
afterEach(() => {
|
|
201
|
+
if (fs.existsSync(testDbPath)) {
|
|
202
|
+
fs.rmSync(testDbPath, { recursive: true, force: true });
|
|
203
|
+
}
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
const createTestChunk = (
|
|
207
|
+
noteId: string,
|
|
208
|
+
chunkIndex: number,
|
|
209
|
+
totalChunks: number,
|
|
210
|
+
content?: string
|
|
211
|
+
): ChunkRecord => ({
|
|
212
|
+
chunk_id: `${noteId}_chunk_${chunkIndex}`,
|
|
213
|
+
note_id: noteId,
|
|
214
|
+
note_title: `Note ${noteId}`,
|
|
215
|
+
folder: "Test",
|
|
216
|
+
chunk_index: chunkIndex,
|
|
217
|
+
total_chunks: totalChunks,
|
|
218
|
+
content: content ?? `Chunk ${chunkIndex} content for note ${noteId}`,
|
|
219
|
+
vector: Array(384).fill(0.1),
|
|
220
|
+
created: new Date().toISOString(),
|
|
221
|
+
modified: new Date().toISOString(),
|
|
222
|
+
indexed_at: new Date().toISOString(),
|
|
223
|
+
tags: ["test-tag"],
|
|
224
|
+
outlinks: ["test-link"],
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
describe("indexChunks", () => {
|
|
228
|
+
it("indexes chunks and allows retrieval", async () => {
|
|
229
|
+
const chunks = [
|
|
230
|
+
createTestChunk("note-1", 0, 2),
|
|
231
|
+
createTestChunk("note-1", 1, 2),
|
|
232
|
+
createTestChunk("note-2", 0, 1),
|
|
233
|
+
];
|
|
234
|
+
|
|
235
|
+
await chunkStore.indexChunks(chunks);
|
|
236
|
+
const count = await chunkStore.count();
|
|
237
|
+
|
|
238
|
+
expect(count).toBe(3);
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
it("handles empty chunks array", async () => {
|
|
242
|
+
await chunkStore.indexChunks([]);
|
|
243
|
+
const count = await chunkStore.count();
|
|
244
|
+
expect(count).toBe(0);
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
it("handles chunks with empty tags and outlinks", async () => {
|
|
248
|
+
const chunks = [
|
|
249
|
+
{ ...createTestChunk("note-1", 0, 1), tags: [], outlinks: [] },
|
|
250
|
+
];
|
|
251
|
+
|
|
252
|
+
await chunkStore.indexChunks(chunks);
|
|
253
|
+
const count = await chunkStore.count();
|
|
254
|
+
expect(count).toBe(1);
|
|
255
|
+
});
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
describe("searchChunks", () => {
|
|
259
|
+
it("returns results based on vector similarity", async () => {
|
|
260
|
+
const chunks = [
|
|
261
|
+
createTestChunk("note-1", 0, 2),
|
|
262
|
+
createTestChunk("note-1", 1, 2),
|
|
263
|
+
];
|
|
264
|
+
await chunkStore.indexChunks(chunks);
|
|
265
|
+
|
|
266
|
+
const queryVector = Array(384).fill(0.1);
|
|
267
|
+
const results = await chunkStore.searchChunks(queryVector, 2);
|
|
268
|
+
|
|
269
|
+
expect(results).toHaveLength(2);
|
|
270
|
+
expect(results[0]).toHaveProperty("chunk_id");
|
|
271
|
+
expect(results[0]).toHaveProperty("note_id");
|
|
272
|
+
expect(results[0]).toHaveProperty("score");
|
|
273
|
+
});
|
|
274
|
+
});
|
|
275
|
+
|
|
276
|
+
describe("searchChunksFTS", () => {
|
|
277
|
+
it("returns results matching query text", async () => {
|
|
278
|
+
const chunks = [
|
|
279
|
+
createTestChunk("note-1", 0, 1, "Meeting notes about project planning"),
|
|
280
|
+
createTestChunk("note-2", 0, 1, "Shopping list for groceries"),
|
|
281
|
+
];
|
|
282
|
+
await chunkStore.indexChunks(chunks);
|
|
283
|
+
await chunkStore.rebuildFtsIndex();
|
|
284
|
+
|
|
285
|
+
const results = await chunkStore.searchChunksFTS("Meeting", 10);
|
|
286
|
+
expect(results.length).toBeGreaterThanOrEqual(1);
|
|
287
|
+
expect(results[0].content).toContain("Meeting");
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
it("returns empty array for no matches", async () => {
|
|
291
|
+
const chunks = [createTestChunk("note-1", 0, 1)];
|
|
292
|
+
await chunkStore.indexChunks(chunks);
|
|
293
|
+
await chunkStore.rebuildFtsIndex();
|
|
294
|
+
|
|
295
|
+
const results = await chunkStore.searchChunksFTS("nonexistentquery12345", 10);
|
|
296
|
+
expect(results).toHaveLength(0);
|
|
297
|
+
});
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
describe("getChunksByNoteId", () => {
|
|
301
|
+
it("returns all chunks for a note sorted by chunk_index", async () => {
|
|
302
|
+
const chunks = [
|
|
303
|
+
createTestChunk("note-1", 2, 3),
|
|
304
|
+
createTestChunk("note-1", 0, 3),
|
|
305
|
+
createTestChunk("note-1", 1, 3),
|
|
306
|
+
createTestChunk("note-2", 0, 1),
|
|
307
|
+
];
|
|
308
|
+
await chunkStore.indexChunks(chunks);
|
|
309
|
+
|
|
310
|
+
const noteChunks = await chunkStore.getChunksByNoteId("note-1");
|
|
311
|
+
|
|
312
|
+
expect(noteChunks).toHaveLength(3);
|
|
313
|
+
expect(noteChunks[0].chunk_index).toBe(0);
|
|
314
|
+
expect(noteChunks[1].chunk_index).toBe(1);
|
|
315
|
+
expect(noteChunks[2].chunk_index).toBe(2);
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
it("returns empty array for non-existent note", async () => {
|
|
319
|
+
await chunkStore.indexChunks([createTestChunk("note-1", 0, 1)]);
|
|
320
|
+
|
|
321
|
+
const chunks = await chunkStore.getChunksByNoteId("non-existent");
|
|
322
|
+
expect(chunks).toHaveLength(0);
|
|
323
|
+
});
|
|
324
|
+
});
|
|
325
|
+
|
|
326
|
+
describe("deleteNoteChunks", () => {
|
|
327
|
+
it("deletes all chunks for a note", async () => {
|
|
328
|
+
const chunks = [
|
|
329
|
+
createTestChunk("note-1", 0, 2),
|
|
330
|
+
createTestChunk("note-1", 1, 2),
|
|
331
|
+
createTestChunk("note-2", 0, 1),
|
|
332
|
+
];
|
|
333
|
+
await chunkStore.indexChunks(chunks);
|
|
334
|
+
|
|
335
|
+
await chunkStore.deleteNoteChunks("note-1");
|
|
336
|
+
|
|
337
|
+
const remaining = await chunkStore.count();
|
|
338
|
+
expect(remaining).toBe(1);
|
|
339
|
+
|
|
340
|
+
const note1Chunks = await chunkStore.getChunksByNoteId("note-1");
|
|
341
|
+
expect(note1Chunks).toHaveLength(0);
|
|
342
|
+
|
|
343
|
+
const note2Chunks = await chunkStore.getChunksByNoteId("note-2");
|
|
344
|
+
expect(note2Chunks).toHaveLength(1);
|
|
345
|
+
});
|
|
346
|
+
|
|
347
|
+
it("does not throw when deleting non-existent note chunks", async () => {
|
|
348
|
+
await chunkStore.indexChunks([createTestChunk("note-1", 0, 1)]);
|
|
349
|
+
await expect(chunkStore.deleteNoteChunks("non-existent")).resolves.not.toThrow();
|
|
350
|
+
});
|
|
351
|
+
});
|
|
352
|
+
|
|
353
|
+
describe("count", () => {
|
|
354
|
+
it("returns correct count", async () => {
|
|
355
|
+
const chunks = [
|
|
356
|
+
createTestChunk("note-1", 0, 2),
|
|
357
|
+
createTestChunk("note-1", 1, 2),
|
|
358
|
+
createTestChunk("note-2", 0, 1),
|
|
359
|
+
];
|
|
360
|
+
await chunkStore.indexChunks(chunks);
|
|
361
|
+
|
|
362
|
+
expect(await chunkStore.count()).toBe(3);
|
|
363
|
+
});
|
|
364
|
+
|
|
365
|
+
it("returns 0 for empty store", async () => {
|
|
366
|
+
expect(await chunkStore.count()).toBe(0);
|
|
367
|
+
});
|
|
368
|
+
});
|
|
369
|
+
|
|
370
|
+
describe("clear", () => {
|
|
371
|
+
it("removes all chunks", async () => {
|
|
372
|
+
await chunkStore.indexChunks([
|
|
373
|
+
createTestChunk("note-1", 0, 1),
|
|
374
|
+
createTestChunk("note-2", 0, 1),
|
|
375
|
+
]);
|
|
376
|
+
expect(await chunkStore.count()).toBe(2);
|
|
377
|
+
|
|
378
|
+
await chunkStore.clear();
|
|
379
|
+
expect(await chunkStore.count()).toBe(0);
|
|
380
|
+
});
|
|
381
|
+
});
|
|
382
|
+
|
|
383
|
+
describe("rebuildFtsIndex", () => {
|
|
384
|
+
it("rebuilds FTS index without error", async () => {
|
|
385
|
+
await chunkStore.indexChunks([
|
|
386
|
+
createTestChunk("note-1", 0, 1),
|
|
387
|
+
createTestChunk("note-2", 0, 1),
|
|
388
|
+
]);
|
|
389
|
+
|
|
390
|
+
await expect(chunkStore.rebuildFtsIndex()).resolves.not.toThrow();
|
|
391
|
+
});
|
|
392
|
+
});
|
|
393
|
+
});
|
package/src/db/lancedb.ts
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import * as lancedb from "@lancedb/lancedb";
|
|
2
|
-
import path from "node:path";
|
|
3
|
-
import os from "node:os";
|
|
4
2
|
import { validateTitle, escapeForFilter } from "./validation.js";
|
|
5
3
|
import type { DBSearchResult as SearchResult } from "../types/index.js";
|
|
6
4
|
import { createDebugLogger } from "../utils/debug.js";
|
|
5
|
+
import { getDataDir } from "../config/paths.js";
|
|
7
6
|
|
|
8
7
|
// Schema for stored notes
|
|
9
8
|
export interface NoteRecord {
|
|
@@ -15,9 +14,30 @@ export interface NoteRecord {
|
|
|
15
14
|
created: string; // ISO date
|
|
16
15
|
modified: string; // ISO date
|
|
17
16
|
indexed_at: string; // ISO date - when embedding was generated
|
|
17
|
+
// Knowledge Graph fields
|
|
18
|
+
tags: string[]; // Extracted #hashtags (without #)
|
|
19
|
+
outlinks: string[]; // Extracted [[wiki-links]] titles
|
|
18
20
|
[key: string]: unknown; // Index signature for LanceDB compatibility
|
|
19
21
|
}
|
|
20
22
|
|
|
23
|
+
// Schema for chunked notes (Parent Document Retriever pattern)
|
|
24
|
+
export interface ChunkRecord {
|
|
25
|
+
chunk_id: string; // `${note_id}_chunk_${index}`
|
|
26
|
+
note_id: string; // Parent note Apple ID
|
|
27
|
+
note_title: string; // For display and deduplication
|
|
28
|
+
folder: string;
|
|
29
|
+
chunk_index: number; // 0, 1, 2...
|
|
30
|
+
total_chunks: number; // Total chunks in this note
|
|
31
|
+
content: string; // Chunk content
|
|
32
|
+
vector: number[];
|
|
33
|
+
created: string; // ISO date (from parent)
|
|
34
|
+
modified: string; // ISO date (from parent)
|
|
35
|
+
indexed_at: string; // ISO date
|
|
36
|
+
tags: string[]; // From parent note
|
|
37
|
+
outlinks: string[]; // From parent note
|
|
38
|
+
[key: string]: unknown; // Index signature for LanceDB
|
|
39
|
+
}
|
|
40
|
+
|
|
21
41
|
// SearchResult is imported from ../types/index.js as DBSearchResult
|
|
22
42
|
export type { SearchResult };
|
|
23
43
|
|
|
@@ -61,7 +81,7 @@ export class LanceDBStore implements VectorStore {
|
|
|
61
81
|
private readonly tableName = "notes";
|
|
62
82
|
|
|
63
83
|
constructor(dataDir?: string) {
|
|
64
|
-
this.dbPath = dataDir ||
|
|
84
|
+
this.dbPath = dataDir || getDataDir();
|
|
65
85
|
}
|
|
66
86
|
|
|
67
87
|
private async ensureConnection(): Promise<lancedb.Connection> {
|
|
@@ -103,9 +123,71 @@ export class LanceDBStore implements VectorStore {
|
|
|
103
123
|
debug("Table drop skipped (table may not exist):", error);
|
|
104
124
|
}
|
|
105
125
|
|
|
126
|
+
// Arrow type inference requires the FIRST record to have non-empty arrays.
|
|
127
|
+
// Strategy: Reorder records so the first has non-empty tags/outlinks,
|
|
128
|
+
// or add placeholders that stay in the data (filtered on read).
|
|
129
|
+
const processedRecords = records.map((r) => ({
|
|
130
|
+
...r,
|
|
131
|
+
tags: r.tags ?? [],
|
|
132
|
+
outlinks: r.outlinks ?? [],
|
|
133
|
+
}));
|
|
134
|
+
|
|
135
|
+
// Track if we added placeholders
|
|
136
|
+
let addedTagPlaceholder = false;
|
|
137
|
+
let addedOutlinkPlaceholder = false;
|
|
138
|
+
|
|
139
|
+
if (processedRecords.length > 0) {
|
|
140
|
+
// Ensure FIRST record has non-empty tags for type inference
|
|
141
|
+
if (processedRecords[0].tags.length === 0) {
|
|
142
|
+
// Try to find a record with tags and swap
|
|
143
|
+
const tagIdx = processedRecords.findIndex(r => r.tags.length > 0);
|
|
144
|
+
if (tagIdx > 0) {
|
|
145
|
+
// Swap first record with the one that has tags
|
|
146
|
+
[processedRecords[0], processedRecords[tagIdx]] =
|
|
147
|
+
[processedRecords[tagIdx], processedRecords[0]];
|
|
148
|
+
} else {
|
|
149
|
+
// No record has tags - add placeholder
|
|
150
|
+
processedRecords[0].tags = ["__type_placeholder__"];
|
|
151
|
+
addedTagPlaceholder = true;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Ensure FIRST record has non-empty outlinks for type inference
|
|
156
|
+
if (processedRecords[0].outlinks.length === 0) {
|
|
157
|
+
// Try to find a record with outlinks and copy its structure
|
|
158
|
+
const outlinkIdx = processedRecords.findIndex(r => r.outlinks.length > 0);
|
|
159
|
+
if (outlinkIdx === -1) {
|
|
160
|
+
// No record has outlinks - add placeholder
|
|
161
|
+
processedRecords[0].outlinks = ["__type_placeholder__"];
|
|
162
|
+
addedOutlinkPlaceholder = true;
|
|
163
|
+
} else {
|
|
164
|
+
// Copy first outlink to first record temporarily, then remove
|
|
165
|
+
processedRecords[0].outlinks = ["__type_placeholder__"];
|
|
166
|
+
addedOutlinkPlaceholder = true;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
debug(`Creating table with ${processedRecords.length} records (tag placeholder: ${addedTagPlaceholder}, outlink placeholder: ${addedOutlinkPlaceholder})`);
|
|
172
|
+
|
|
106
173
|
// Create new table with records
|
|
107
|
-
|
|
108
|
-
|
|
174
|
+
this.table = await db.createTable(this.tableName, processedRecords);
|
|
175
|
+
|
|
176
|
+
// Remove placeholders by deleting and re-inserting the first record
|
|
177
|
+
if (addedTagPlaceholder || addedOutlinkPlaceholder) {
|
|
178
|
+
const firstRecord = processedRecords[0];
|
|
179
|
+
const cleanRecord = {
|
|
180
|
+
...firstRecord,
|
|
181
|
+
tags: addedTagPlaceholder ? [] : firstRecord.tags,
|
|
182
|
+
outlinks: addedOutlinkPlaceholder ? [] : firstRecord.outlinks,
|
|
183
|
+
};
|
|
184
|
+
|
|
185
|
+
// Delete the record with placeholders
|
|
186
|
+
await this.table.delete(`id = '${escapeForFilter(firstRecord.id)}'`);
|
|
187
|
+
// Re-insert without placeholders
|
|
188
|
+
await this.table.add([cleanRecord]);
|
|
189
|
+
debug("Removed type inference placeholders via delete+insert");
|
|
190
|
+
}
|
|
109
191
|
|
|
110
192
|
// Create FTS index for hybrid search
|
|
111
193
|
debug("Creating FTS index on content");
|
|
@@ -217,15 +299,18 @@ export class LanceDBStore implements VectorStore {
|
|
|
217
299
|
|
|
218
300
|
const results = await table.query().toArray();
|
|
219
301
|
|
|
220
|
-
return results.map((row) => ({
|
|
302
|
+
return results.map((row): NoteRecord => ({
|
|
221
303
|
id: (row.id as string) ?? "",
|
|
222
304
|
title: row.title as string,
|
|
223
305
|
content: row.content as string,
|
|
224
|
-
vector: row.vector as number
|
|
306
|
+
vector: Array.isArray(row.vector) ? row.vector : Array.from(row.vector as Iterable<number>),
|
|
225
307
|
folder: row.folder as string,
|
|
226
308
|
created: row.created as string,
|
|
227
309
|
modified: row.modified as string,
|
|
228
310
|
indexed_at: row.indexed_at as string,
|
|
311
|
+
// Arrow Vectors need explicit conversion to JS arrays
|
|
312
|
+
tags: Array.isArray(row.tags) ? row.tags : Array.from(row.tags as Iterable<string>),
|
|
313
|
+
outlinks: Array.isArray(row.outlinks) ? row.outlinks : Array.from(row.outlinks as Iterable<string>),
|
|
229
314
|
}));
|
|
230
315
|
}
|
|
231
316
|
|
|
@@ -270,3 +355,256 @@ export function getVectorStore(): VectorStore {
|
|
|
270
355
|
}
|
|
271
356
|
return storeInstance;
|
|
272
357
|
}
|
|
358
|
+
|
|
359
|
+
// Search result type for chunks
|
|
360
|
+
export interface ChunkSearchResult {
|
|
361
|
+
chunk_id: string;
|
|
362
|
+
note_id: string;
|
|
363
|
+
note_title: string;
|
|
364
|
+
folder: string;
|
|
365
|
+
chunk_index: number;
|
|
366
|
+
total_chunks: number;
|
|
367
|
+
content: string;
|
|
368
|
+
modified: string;
|
|
369
|
+
score: number;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
/**
|
|
373
|
+
* Convert a chunk database row to a ChunkSearchResult with rank-based score.
|
|
374
|
+
*/
|
|
375
|
+
function rowToChunkSearchResult(row: Record<string, unknown>, index: number): ChunkSearchResult {
|
|
376
|
+
return {
|
|
377
|
+
chunk_id: row.chunk_id as string,
|
|
378
|
+
note_id: row.note_id as string,
|
|
379
|
+
note_title: row.note_title as string,
|
|
380
|
+
folder: row.folder as string,
|
|
381
|
+
chunk_index: row.chunk_index as number,
|
|
382
|
+
total_chunks: row.total_chunks as number,
|
|
383
|
+
content: row.content as string,
|
|
384
|
+
modified: row.modified as string,
|
|
385
|
+
score: 1 / (1 + index),
|
|
386
|
+
};
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
// ChunkStore for Parent Document Retriever pattern
|
|
390
|
+
export class ChunkStore {
|
|
391
|
+
private db: lancedb.Connection | null = null;
|
|
392
|
+
private table: lancedb.Table | null = null;
|
|
393
|
+
private readonly dbPath: string;
|
|
394
|
+
private readonly tableName = "chunks";
|
|
395
|
+
|
|
396
|
+
constructor(dataDir?: string) {
|
|
397
|
+
this.dbPath = dataDir || getDataDir();
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
private async ensureConnection(): Promise<lancedb.Connection> {
|
|
401
|
+
if (!this.db) {
|
|
402
|
+
debug(`ChunkStore: Connecting to LanceDB at ${this.dbPath}`);
|
|
403
|
+
this.db = await lancedb.connect(this.dbPath);
|
|
404
|
+
}
|
|
405
|
+
return this.db;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
private async ensureTable(): Promise<lancedb.Table> {
|
|
409
|
+
if (!this.table) {
|
|
410
|
+
const db = await this.ensureConnection();
|
|
411
|
+
try {
|
|
412
|
+
this.table = await db.openTable(this.tableName);
|
|
413
|
+
debug(`ChunkStore: Opened existing table: ${this.tableName}`);
|
|
414
|
+
} catch (error) {
|
|
415
|
+
debug(`ChunkStore: Table ${this.tableName} not found. Error:`, error);
|
|
416
|
+
throw new Error("Chunk index not found. Run index-notes first.");
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
return this.table;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
async indexChunks(chunks: ChunkRecord[]): Promise<void> {
|
|
423
|
+
if (chunks.length === 0) {
|
|
424
|
+
debug("ChunkStore: No chunks to index");
|
|
425
|
+
return;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
const db = await this.ensureConnection();
|
|
429
|
+
|
|
430
|
+
// Drop existing table if exists
|
|
431
|
+
try {
|
|
432
|
+
await db.dropTable(this.tableName);
|
|
433
|
+
debug(`ChunkStore: Dropped existing table: ${this.tableName}`);
|
|
434
|
+
} catch (error) {
|
|
435
|
+
debug("ChunkStore: Table drop skipped (table may not exist):", error);
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
// Arrow type inference requires the FIRST record to have non-empty arrays.
|
|
439
|
+
// Same strategy as LanceDBStore
|
|
440
|
+
const processedChunks = chunks.map((c) => ({
|
|
441
|
+
...c,
|
|
442
|
+
tags: c.tags ?? [],
|
|
443
|
+
outlinks: c.outlinks ?? [],
|
|
444
|
+
}));
|
|
445
|
+
|
|
446
|
+
let addedTagPlaceholder = false;
|
|
447
|
+
let addedOutlinkPlaceholder = false;
|
|
448
|
+
|
|
449
|
+
if (processedChunks.length > 0) {
|
|
450
|
+
// Ensure FIRST chunk has non-empty tags for type inference
|
|
451
|
+
if (processedChunks[0].tags.length === 0) {
|
|
452
|
+
const tagIdx = processedChunks.findIndex(c => c.tags.length > 0);
|
|
453
|
+
if (tagIdx > 0) {
|
|
454
|
+
[processedChunks[0], processedChunks[tagIdx]] =
|
|
455
|
+
[processedChunks[tagIdx], processedChunks[0]];
|
|
456
|
+
} else {
|
|
457
|
+
processedChunks[0].tags = ["__type_placeholder__"];
|
|
458
|
+
addedTagPlaceholder = true;
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// Ensure FIRST chunk has non-empty outlinks for type inference
|
|
463
|
+
if (processedChunks[0].outlinks.length === 0) {
|
|
464
|
+
const outlinkIdx = processedChunks.findIndex(c => c.outlinks.length > 0);
|
|
465
|
+
if (outlinkIdx === -1) {
|
|
466
|
+
processedChunks[0].outlinks = ["__type_placeholder__"];
|
|
467
|
+
addedOutlinkPlaceholder = true;
|
|
468
|
+
} else {
|
|
469
|
+
processedChunks[0].outlinks = ["__type_placeholder__"];
|
|
470
|
+
addedOutlinkPlaceholder = true;
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
debug(`ChunkStore: Creating table with ${processedChunks.length} chunks (tag placeholder: ${addedTagPlaceholder}, outlink placeholder: ${addedOutlinkPlaceholder})`);
|
|
476
|
+
|
|
477
|
+
// Create new table with chunks
|
|
478
|
+
this.table = await db.createTable(this.tableName, processedChunks);
|
|
479
|
+
|
|
480
|
+
// Remove placeholders by deleting and re-inserting the first chunk
|
|
481
|
+
if (addedTagPlaceholder || addedOutlinkPlaceholder) {
|
|
482
|
+
const firstChunk = processedChunks[0];
|
|
483
|
+
const cleanChunk = {
|
|
484
|
+
...firstChunk,
|
|
485
|
+
tags: addedTagPlaceholder ? [] : firstChunk.tags,
|
|
486
|
+
outlinks: addedOutlinkPlaceholder ? [] : firstChunk.outlinks,
|
|
487
|
+
};
|
|
488
|
+
|
|
489
|
+
// Delete the chunk with placeholders
|
|
490
|
+
await this.table.delete(`chunk_id = '${escapeForFilter(firstChunk.chunk_id)}'`);
|
|
491
|
+
// Re-insert without placeholders
|
|
492
|
+
await this.table.add([cleanChunk]);
|
|
493
|
+
debug("ChunkStore: Removed type inference placeholders via delete+insert");
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
// Create FTS index for hybrid search
|
|
497
|
+
debug("ChunkStore: Creating FTS index on content");
|
|
498
|
+
await this.table.createIndex("content", {
|
|
499
|
+
config: lancedb.Index.fts(),
|
|
500
|
+
replace: true,
|
|
501
|
+
});
|
|
502
|
+
|
|
503
|
+
debug(`ChunkStore: Indexed ${chunks.length} chunks`);
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
async searchChunks(queryVector: number[], limit: number): Promise<ChunkSearchResult[]> {
|
|
507
|
+
const table = await this.ensureTable();
|
|
508
|
+
|
|
509
|
+
const results = await table
|
|
510
|
+
.search(queryVector)
|
|
511
|
+
.limit(limit)
|
|
512
|
+
.toArray();
|
|
513
|
+
|
|
514
|
+
return results.map(rowToChunkSearchResult);
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
async searchChunksFTS(query: string, limit: number): Promise<ChunkSearchResult[]> {
|
|
518
|
+
const table = await this.ensureTable();
|
|
519
|
+
|
|
520
|
+
try {
|
|
521
|
+
const results = await table
|
|
522
|
+
.query()
|
|
523
|
+
.fullTextSearch(query)
|
|
524
|
+
.limit(limit)
|
|
525
|
+
.toArray();
|
|
526
|
+
|
|
527
|
+
return results.map(rowToChunkSearchResult);
|
|
528
|
+
} catch (error) {
|
|
529
|
+
debug("ChunkStore: FTS search failed, returning empty results. Error:", error);
|
|
530
|
+
return [];
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
async getChunksByNoteId(noteId: string): Promise<ChunkRecord[]> {
|
|
535
|
+
const table = await this.ensureTable();
|
|
536
|
+
const escapedNoteId = escapeForFilter(noteId);
|
|
537
|
+
|
|
538
|
+
const results = await table
|
|
539
|
+
.query()
|
|
540
|
+
.where(`note_id = '${escapedNoteId}'`)
|
|
541
|
+
.toArray();
|
|
542
|
+
|
|
543
|
+
// Convert and sort by chunk_index
|
|
544
|
+
const chunks = results.map((row): ChunkRecord => ({
|
|
545
|
+
chunk_id: row.chunk_id as string,
|
|
546
|
+
note_id: row.note_id as string,
|
|
547
|
+
note_title: row.note_title as string,
|
|
548
|
+
folder: row.folder as string,
|
|
549
|
+
chunk_index: row.chunk_index as number,
|
|
550
|
+
total_chunks: row.total_chunks as number,
|
|
551
|
+
content: row.content as string,
|
|
552
|
+
vector: Array.isArray(row.vector) ? row.vector : Array.from(row.vector as Iterable<number>),
|
|
553
|
+
created: row.created as string,
|
|
554
|
+
modified: row.modified as string,
|
|
555
|
+
indexed_at: row.indexed_at as string,
|
|
556
|
+
tags: Array.isArray(row.tags) ? row.tags : Array.from(row.tags as Iterable<string>),
|
|
557
|
+
outlinks: Array.isArray(row.outlinks) ? row.outlinks : Array.from(row.outlinks as Iterable<string>),
|
|
558
|
+
}));
|
|
559
|
+
|
|
560
|
+
return chunks.sort((a, b) => a.chunk_index - b.chunk_index);
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
async deleteNoteChunks(noteId: string): Promise<void> {
|
|
564
|
+
const table = await this.ensureTable();
|
|
565
|
+
const escapedNoteId = escapeForFilter(noteId);
|
|
566
|
+
await table.delete(`note_id = '${escapedNoteId}'`);
|
|
567
|
+
debug(`ChunkStore: Deleted chunks for note: ${noteId}`);
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
async count(): Promise<number> {
|
|
571
|
+
try {
|
|
572
|
+
const table = await this.ensureTable();
|
|
573
|
+
return await table.countRows();
|
|
574
|
+
} catch (error) {
|
|
575
|
+
debug("ChunkStore: Count failed (table may not exist):", error);
|
|
576
|
+
return 0;
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
async clear(): Promise<void> {
|
|
581
|
+
const db = await this.ensureConnection();
|
|
582
|
+
try {
|
|
583
|
+
await db.dropTable(this.tableName);
|
|
584
|
+
this.table = null;
|
|
585
|
+
debug("ChunkStore: Cleared table");
|
|
586
|
+
} catch (error) {
|
|
587
|
+
debug("ChunkStore: Clear skipped (table may not exist):", error);
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
async rebuildFtsIndex(): Promise<void> {
|
|
592
|
+
const table = await this.ensureTable();
|
|
593
|
+
debug("ChunkStore: Rebuilding FTS index on content");
|
|
594
|
+
await table.createIndex("content", {
|
|
595
|
+
config: lancedb.Index.fts(),
|
|
596
|
+
replace: true,
|
|
597
|
+
});
|
|
598
|
+
debug("ChunkStore: FTS index rebuilt");
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
// Singleton instance for ChunkStore
|
|
603
|
+
let chunkStoreInstance: ChunkStore | null = null;
|
|
604
|
+
|
|
605
|
+
export function getChunkStore(): ChunkStore {
|
|
606
|
+
if (!chunkStoreInstance) {
|
|
607
|
+
chunkStoreInstance = new ChunkStore();
|
|
608
|
+
}
|
|
609
|
+
return chunkStoreInstance;
|
|
610
|
+
}
|