@disco_trooper/apple-notes-mcp 1.1.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +104 -24
- package/package.json +11 -12
- package/src/config/claude.test.ts +47 -0
- package/src/config/claude.ts +106 -0
- package/src/config/constants.ts +11 -2
- package/src/config/paths.test.ts +40 -0
- package/src/config/paths.ts +86 -0
- package/src/db/arrow-fix.test.ts +101 -0
- package/src/db/lancedb.test.ts +254 -2
- package/src/db/lancedb.ts +385 -38
- package/src/embeddings/cache.test.ts +150 -0
- package/src/embeddings/cache.ts +204 -0
- package/src/embeddings/index.ts +22 -4
- package/src/embeddings/local.ts +57 -17
- package/src/embeddings/openrouter.ts +233 -11
- package/src/errors/index.test.ts +64 -0
- package/src/errors/index.ts +62 -0
- package/src/graph/export.test.ts +81 -0
- package/src/graph/export.ts +163 -0
- package/src/graph/extract.test.ts +90 -0
- package/src/graph/extract.ts +52 -0
- package/src/graph/queries.test.ts +156 -0
- package/src/graph/queries.ts +224 -0
- package/src/index.ts +309 -23
- package/src/notes/conversion.ts +62 -0
- package/src/notes/crud.test.ts +41 -8
- package/src/notes/crud.ts +75 -64
- package/src/notes/read.test.ts +58 -3
- package/src/notes/read.ts +142 -210
- package/src/notes/resolve.ts +174 -0
- package/src/notes/tables.ts +69 -40
- package/src/search/chunk-indexer.test.ts +353 -0
- package/src/search/chunk-indexer.ts +207 -0
- package/src/search/chunk-search.test.ts +327 -0
- package/src/search/chunk-search.ts +298 -0
- package/src/search/index.ts +4 -6
- package/src/search/indexer.ts +164 -109
- package/src/setup.ts +46 -67
- package/src/types/index.ts +4 -0
- package/src/utils/chunker.test.ts +182 -0
- package/src/utils/chunker.ts +170 -0
- package/src/utils/content-filter.test.ts +225 -0
- package/src/utils/content-filter.ts +275 -0
- package/src/utils/debug.ts +0 -2
- package/src/utils/runtime.test.ts +70 -0
- package/src/utils/runtime.ts +40 -0
- package/src/utils/text.test.ts +32 -0
- package/CLAUDE.md +0 -56
- package/src/server.ts +0 -427
package/src/db/lancedb.ts
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import * as lancedb from "@lancedb/lancedb";
|
|
2
|
-
import path from "node:path";
|
|
3
|
-
import os from "node:os";
|
|
4
2
|
import { validateTitle, escapeForFilter } from "./validation.js";
|
|
5
3
|
import type { DBSearchResult as SearchResult } from "../types/index.js";
|
|
6
4
|
import { createDebugLogger } from "../utils/debug.js";
|
|
5
|
+
import { getDataDir } from "../config/paths.js";
|
|
7
6
|
|
|
8
7
|
// Schema for stored notes
|
|
9
8
|
export interface NoteRecord {
|
|
9
|
+
id: string; // Apple Notes unique identifier
|
|
10
10
|
title: string;
|
|
11
11
|
content: string;
|
|
12
12
|
vector: number[];
|
|
@@ -14,9 +14,30 @@ export interface NoteRecord {
|
|
|
14
14
|
created: string; // ISO date
|
|
15
15
|
modified: string; // ISO date
|
|
16
16
|
indexed_at: string; // ISO date - when embedding was generated
|
|
17
|
+
// Knowledge Graph fields
|
|
18
|
+
tags: string[]; // Extracted #hashtags (without #)
|
|
19
|
+
outlinks: string[]; // Extracted [[wiki-links]] titles
|
|
17
20
|
[key: string]: unknown; // Index signature for LanceDB compatibility
|
|
18
21
|
}
|
|
19
22
|
|
|
23
|
+
// Schema for chunked notes (Parent Document Retriever pattern)
|
|
24
|
+
export interface ChunkRecord {
|
|
25
|
+
chunk_id: string; // `${note_id}_chunk_${index}`
|
|
26
|
+
note_id: string; // Parent note Apple ID
|
|
27
|
+
note_title: string; // For display and deduplication
|
|
28
|
+
folder: string;
|
|
29
|
+
chunk_index: number; // 0, 1, 2...
|
|
30
|
+
total_chunks: number; // Total chunks in this note
|
|
31
|
+
content: string; // Chunk content
|
|
32
|
+
vector: number[];
|
|
33
|
+
created: string; // ISO date (from parent)
|
|
34
|
+
modified: string; // ISO date (from parent)
|
|
35
|
+
indexed_at: string; // ISO date
|
|
36
|
+
tags: string[]; // From parent note
|
|
37
|
+
outlinks: string[]; // From parent note
|
|
38
|
+
[key: string]: unknown; // Index signature for LanceDB
|
|
39
|
+
}
|
|
40
|
+
|
|
20
41
|
// SearchResult is imported from ../types/index.js as DBSearchResult
|
|
21
42
|
export type { SearchResult };
|
|
22
43
|
|
|
@@ -32,11 +53,26 @@ export interface VectorStore {
|
|
|
32
53
|
getAll(): Promise<NoteRecord[]>;
|
|
33
54
|
count(): Promise<number>;
|
|
34
55
|
clear(): Promise<void>;
|
|
56
|
+
rebuildFtsIndex(): Promise<void>;
|
|
35
57
|
}
|
|
36
58
|
|
|
37
59
|
// Debug logging
|
|
38
60
|
const debug = createDebugLogger("DB");
|
|
39
61
|
|
|
62
|
+
/**
|
|
63
|
+
* Convert a database row to a SearchResult with rank-based score.
|
|
64
|
+
*/
|
|
65
|
+
function rowToSearchResult(row: Record<string, unknown>, index: number): SearchResult {
|
|
66
|
+
return {
|
|
67
|
+
id: row.id as string | undefined,
|
|
68
|
+
title: row.title as string,
|
|
69
|
+
folder: row.folder as string,
|
|
70
|
+
content: row.content as string,
|
|
71
|
+
modified: row.modified as string,
|
|
72
|
+
score: 1 / (1 + index),
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
|
|
40
76
|
// LanceDB implementation
|
|
41
77
|
export class LanceDBStore implements VectorStore {
|
|
42
78
|
private db: lancedb.Connection | null = null;
|
|
@@ -45,7 +81,7 @@ export class LanceDBStore implements VectorStore {
|
|
|
45
81
|
private readonly tableName = "notes";
|
|
46
82
|
|
|
47
83
|
constructor(dataDir?: string) {
|
|
48
|
-
this.dbPath = dataDir ||
|
|
84
|
+
this.dbPath = dataDir || getDataDir();
|
|
49
85
|
}
|
|
50
86
|
|
|
51
87
|
private async ensureConnection(): Promise<lancedb.Connection> {
|
|
@@ -83,13 +119,75 @@ export class LanceDBStore implements VectorStore {
|
|
|
83
119
|
try {
|
|
84
120
|
await db.dropTable(this.tableName);
|
|
85
121
|
debug(`Dropped existing table: ${this.tableName}`);
|
|
86
|
-
} catch {
|
|
87
|
-
|
|
122
|
+
} catch (error) {
|
|
123
|
+
debug("Table drop skipped (table may not exist):", error);
|
|
88
124
|
}
|
|
89
125
|
|
|
126
|
+
// Arrow type inference requires the FIRST record to have non-empty arrays.
|
|
127
|
+
// Strategy: Reorder records so the first has non-empty tags/outlinks,
|
|
128
|
+
// or add placeholders that stay in the data (filtered on read).
|
|
129
|
+
const processedRecords = records.map((r) => ({
|
|
130
|
+
...r,
|
|
131
|
+
tags: r.tags ?? [],
|
|
132
|
+
outlinks: r.outlinks ?? [],
|
|
133
|
+
}));
|
|
134
|
+
|
|
135
|
+
// Track if we added placeholders
|
|
136
|
+
let addedTagPlaceholder = false;
|
|
137
|
+
let addedOutlinkPlaceholder = false;
|
|
138
|
+
|
|
139
|
+
if (processedRecords.length > 0) {
|
|
140
|
+
// Ensure FIRST record has non-empty tags for type inference
|
|
141
|
+
if (processedRecords[0].tags.length === 0) {
|
|
142
|
+
// Try to find a record with tags and swap
|
|
143
|
+
const tagIdx = processedRecords.findIndex(r => r.tags.length > 0);
|
|
144
|
+
if (tagIdx > 0) {
|
|
145
|
+
// Swap first record with the one that has tags
|
|
146
|
+
[processedRecords[0], processedRecords[tagIdx]] =
|
|
147
|
+
[processedRecords[tagIdx], processedRecords[0]];
|
|
148
|
+
} else {
|
|
149
|
+
// No record has tags - add placeholder
|
|
150
|
+
processedRecords[0].tags = ["__type_placeholder__"];
|
|
151
|
+
addedTagPlaceholder = true;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Ensure FIRST record has non-empty outlinks for type inference
|
|
156
|
+
if (processedRecords[0].outlinks.length === 0) {
|
|
157
|
+
// Try to find a record with outlinks and copy its structure
|
|
158
|
+
const outlinkIdx = processedRecords.findIndex(r => r.outlinks.length > 0);
|
|
159
|
+
if (outlinkIdx === -1) {
|
|
160
|
+
// No record has outlinks - add placeholder
|
|
161
|
+
processedRecords[0].outlinks = ["__type_placeholder__"];
|
|
162
|
+
addedOutlinkPlaceholder = true;
|
|
163
|
+
} else {
|
|
164
|
+
// Copy first outlink to first record temporarily, then remove
|
|
165
|
+
processedRecords[0].outlinks = ["__type_placeholder__"];
|
|
166
|
+
addedOutlinkPlaceholder = true;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
debug(`Creating table with ${processedRecords.length} records (tag placeholder: ${addedTagPlaceholder}, outlink placeholder: ${addedOutlinkPlaceholder})`);
|
|
172
|
+
|
|
90
173
|
// Create new table with records
|
|
91
|
-
|
|
92
|
-
|
|
174
|
+
this.table = await db.createTable(this.tableName, processedRecords);
|
|
175
|
+
|
|
176
|
+
// Remove placeholders by deleting and re-inserting the first record
|
|
177
|
+
if (addedTagPlaceholder || addedOutlinkPlaceholder) {
|
|
178
|
+
const firstRecord = processedRecords[0];
|
|
179
|
+
const cleanRecord = {
|
|
180
|
+
...firstRecord,
|
|
181
|
+
tags: addedTagPlaceholder ? [] : firstRecord.tags,
|
|
182
|
+
outlinks: addedOutlinkPlaceholder ? [] : firstRecord.outlinks,
|
|
183
|
+
};
|
|
184
|
+
|
|
185
|
+
// Delete the record with placeholders
|
|
186
|
+
await this.table.delete(`id = '${escapeForFilter(firstRecord.id)}'`);
|
|
187
|
+
// Re-insert without placeholders
|
|
188
|
+
await this.table.add([cleanRecord]);
|
|
189
|
+
debug("Removed type inference placeholders via delete+insert");
|
|
190
|
+
}
|
|
93
191
|
|
|
94
192
|
// Create FTS index for hybrid search
|
|
95
193
|
debug("Creating FTS index on content");
|
|
@@ -106,13 +204,8 @@ export class LanceDBStore implements VectorStore {
|
|
|
106
204
|
|
|
107
205
|
// Add new record first (LanceDB allows duplicates with same title)
|
|
108
206
|
// This ensures we never lose data - if add fails, old record still exists
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
debug(`Added new version of record: ${record.title}`);
|
|
112
|
-
} catch (addError) {
|
|
113
|
-
// If add fails, old record still exists, throw original error
|
|
114
|
-
throw addError;
|
|
115
|
-
}
|
|
207
|
+
await table.add([record]);
|
|
208
|
+
debug(`Added new version of record: ${record.title}`);
|
|
116
209
|
|
|
117
210
|
// Now delete old record(s) - use indexed_at to identify which is old
|
|
118
211
|
const validTitle = validateTitle(record.title);
|
|
@@ -165,35 +258,21 @@ export class LanceDBStore implements VectorStore {
|
|
|
165
258
|
.limit(limit)
|
|
166
259
|
.toArray();
|
|
167
260
|
|
|
168
|
-
return results.map(
|
|
169
|
-
title: row.title as string,
|
|
170
|
-
folder: row.folder as string,
|
|
171
|
-
content: row.content as string,
|
|
172
|
-
modified: row.modified as string,
|
|
173
|
-
score: 1 / (1 + index), // Simple rank-based score
|
|
174
|
-
}));
|
|
261
|
+
return results.map(rowToSearchResult);
|
|
175
262
|
}
|
|
176
263
|
|
|
177
264
|
async searchFTS(query: string, limit: number): Promise<SearchResult[]> {
|
|
178
265
|
const table = await this.ensureTable();
|
|
179
266
|
|
|
180
267
|
try {
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
.search(query, { queryType: "fts" })
|
|
268
|
+
const results = await table
|
|
269
|
+
.query()
|
|
270
|
+
.fullTextSearch(query)
|
|
185
271
|
.limit(limit)
|
|
186
272
|
.toArray();
|
|
187
273
|
|
|
188
|
-
return results.map(
|
|
189
|
-
title: row.title as string,
|
|
190
|
-
folder: row.folder as string,
|
|
191
|
-
content: row.content as string,
|
|
192
|
-
modified: row.modified as string,
|
|
193
|
-
score: 1 / (1 + index),
|
|
194
|
-
}));
|
|
274
|
+
return results.map(rowToSearchResult);
|
|
195
275
|
} catch (error) {
|
|
196
|
-
// FTS might fail if no index or no matches
|
|
197
276
|
debug("FTS search failed, returning empty results. Error:", error);
|
|
198
277
|
return [];
|
|
199
278
|
}
|
|
@@ -220,14 +299,18 @@ export class LanceDBStore implements VectorStore {
|
|
|
220
299
|
|
|
221
300
|
const results = await table.query().toArray();
|
|
222
301
|
|
|
223
|
-
return results.map((row) => ({
|
|
302
|
+
return results.map((row): NoteRecord => ({
|
|
303
|
+
id: (row.id as string) ?? "",
|
|
224
304
|
title: row.title as string,
|
|
225
305
|
content: row.content as string,
|
|
226
|
-
vector: row.vector as number
|
|
306
|
+
vector: Array.isArray(row.vector) ? row.vector : Array.from(row.vector as Iterable<number>),
|
|
227
307
|
folder: row.folder as string,
|
|
228
308
|
created: row.created as string,
|
|
229
309
|
modified: row.modified as string,
|
|
230
310
|
indexed_at: row.indexed_at as string,
|
|
311
|
+
// Arrow Vectors need explicit conversion to JS arrays
|
|
312
|
+
tags: Array.isArray(row.tags) ? row.tags : Array.from(row.tags as Iterable<string>),
|
|
313
|
+
outlinks: Array.isArray(row.outlinks) ? row.outlinks : Array.from(row.outlinks as Iterable<string>),
|
|
231
314
|
}));
|
|
232
315
|
}
|
|
233
316
|
|
|
@@ -235,7 +318,8 @@ export class LanceDBStore implements VectorStore {
|
|
|
235
318
|
try {
|
|
236
319
|
const table = await this.ensureTable();
|
|
237
320
|
return await table.countRows();
|
|
238
|
-
} catch {
|
|
321
|
+
} catch (error) {
|
|
322
|
+
debug("Count failed (table may not exist):", error);
|
|
239
323
|
return 0;
|
|
240
324
|
}
|
|
241
325
|
}
|
|
@@ -246,10 +330,20 @@ export class LanceDBStore implements VectorStore {
|
|
|
246
330
|
await db.dropTable(this.tableName);
|
|
247
331
|
this.table = null;
|
|
248
332
|
debug("Cleared table");
|
|
249
|
-
} catch {
|
|
250
|
-
|
|
333
|
+
} catch (error) {
|
|
334
|
+
debug("Clear skipped (table may not exist):", error);
|
|
251
335
|
}
|
|
252
336
|
}
|
|
337
|
+
|
|
338
|
+
async rebuildFtsIndex(): Promise<void> {
|
|
339
|
+
const table = await this.ensureTable();
|
|
340
|
+
debug("Rebuilding FTS index on content");
|
|
341
|
+
await table.createIndex("content", {
|
|
342
|
+
config: lancedb.Index.fts(),
|
|
343
|
+
replace: true,
|
|
344
|
+
});
|
|
345
|
+
debug("FTS index rebuilt");
|
|
346
|
+
}
|
|
253
347
|
}
|
|
254
348
|
|
|
255
349
|
// Singleton instance
|
|
@@ -261,3 +355,256 @@ export function getVectorStore(): VectorStore {
|
|
|
261
355
|
}
|
|
262
356
|
return storeInstance;
|
|
263
357
|
}
|
|
358
|
+
|
|
359
|
+
// Search result type for chunks
|
|
360
|
+
export interface ChunkSearchResult {
|
|
361
|
+
chunk_id: string;
|
|
362
|
+
note_id: string;
|
|
363
|
+
note_title: string;
|
|
364
|
+
folder: string;
|
|
365
|
+
chunk_index: number;
|
|
366
|
+
total_chunks: number;
|
|
367
|
+
content: string;
|
|
368
|
+
modified: string;
|
|
369
|
+
score: number;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
/**
|
|
373
|
+
* Convert a chunk database row to a ChunkSearchResult with rank-based score.
|
|
374
|
+
*/
|
|
375
|
+
function rowToChunkSearchResult(row: Record<string, unknown>, index: number): ChunkSearchResult {
|
|
376
|
+
return {
|
|
377
|
+
chunk_id: row.chunk_id as string,
|
|
378
|
+
note_id: row.note_id as string,
|
|
379
|
+
note_title: row.note_title as string,
|
|
380
|
+
folder: row.folder as string,
|
|
381
|
+
chunk_index: row.chunk_index as number,
|
|
382
|
+
total_chunks: row.total_chunks as number,
|
|
383
|
+
content: row.content as string,
|
|
384
|
+
modified: row.modified as string,
|
|
385
|
+
score: 1 / (1 + index),
|
|
386
|
+
};
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
// ChunkStore for Parent Document Retriever pattern
|
|
390
|
+
export class ChunkStore {
|
|
391
|
+
private db: lancedb.Connection | null = null;
|
|
392
|
+
private table: lancedb.Table | null = null;
|
|
393
|
+
private readonly dbPath: string;
|
|
394
|
+
private readonly tableName = "chunks";
|
|
395
|
+
|
|
396
|
+
constructor(dataDir?: string) {
|
|
397
|
+
this.dbPath = dataDir || getDataDir();
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
private async ensureConnection(): Promise<lancedb.Connection> {
|
|
401
|
+
if (!this.db) {
|
|
402
|
+
debug(`ChunkStore: Connecting to LanceDB at ${this.dbPath}`);
|
|
403
|
+
this.db = await lancedb.connect(this.dbPath);
|
|
404
|
+
}
|
|
405
|
+
return this.db;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
private async ensureTable(): Promise<lancedb.Table> {
|
|
409
|
+
if (!this.table) {
|
|
410
|
+
const db = await this.ensureConnection();
|
|
411
|
+
try {
|
|
412
|
+
this.table = await db.openTable(this.tableName);
|
|
413
|
+
debug(`ChunkStore: Opened existing table: ${this.tableName}`);
|
|
414
|
+
} catch (error) {
|
|
415
|
+
debug(`ChunkStore: Table ${this.tableName} not found. Error:`, error);
|
|
416
|
+
throw new Error("Chunk index not found. Run index-notes first.");
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
return this.table;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
async indexChunks(chunks: ChunkRecord[]): Promise<void> {
|
|
423
|
+
if (chunks.length === 0) {
|
|
424
|
+
debug("ChunkStore: No chunks to index");
|
|
425
|
+
return;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
const db = await this.ensureConnection();
|
|
429
|
+
|
|
430
|
+
// Drop existing table if exists
|
|
431
|
+
try {
|
|
432
|
+
await db.dropTable(this.tableName);
|
|
433
|
+
debug(`ChunkStore: Dropped existing table: ${this.tableName}`);
|
|
434
|
+
} catch (error) {
|
|
435
|
+
debug("ChunkStore: Table drop skipped (table may not exist):", error);
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
// Arrow type inference requires the FIRST record to have non-empty arrays.
|
|
439
|
+
// Same strategy as LanceDBStore
|
|
440
|
+
const processedChunks = chunks.map((c) => ({
|
|
441
|
+
...c,
|
|
442
|
+
tags: c.tags ?? [],
|
|
443
|
+
outlinks: c.outlinks ?? [],
|
|
444
|
+
}));
|
|
445
|
+
|
|
446
|
+
let addedTagPlaceholder = false;
|
|
447
|
+
let addedOutlinkPlaceholder = false;
|
|
448
|
+
|
|
449
|
+
if (processedChunks.length > 0) {
|
|
450
|
+
// Ensure FIRST chunk has non-empty tags for type inference
|
|
451
|
+
if (processedChunks[0].tags.length === 0) {
|
|
452
|
+
const tagIdx = processedChunks.findIndex(c => c.tags.length > 0);
|
|
453
|
+
if (tagIdx > 0) {
|
|
454
|
+
[processedChunks[0], processedChunks[tagIdx]] =
|
|
455
|
+
[processedChunks[tagIdx], processedChunks[0]];
|
|
456
|
+
} else {
|
|
457
|
+
processedChunks[0].tags = ["__type_placeholder__"];
|
|
458
|
+
addedTagPlaceholder = true;
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// Ensure FIRST chunk has non-empty outlinks for type inference
|
|
463
|
+
if (processedChunks[0].outlinks.length === 0) {
|
|
464
|
+
const outlinkIdx = processedChunks.findIndex(c => c.outlinks.length > 0);
|
|
465
|
+
if (outlinkIdx === -1) {
|
|
466
|
+
processedChunks[0].outlinks = ["__type_placeholder__"];
|
|
467
|
+
addedOutlinkPlaceholder = true;
|
|
468
|
+
} else {
|
|
469
|
+
processedChunks[0].outlinks = ["__type_placeholder__"];
|
|
470
|
+
addedOutlinkPlaceholder = true;
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
debug(`ChunkStore: Creating table with ${processedChunks.length} chunks (tag placeholder: ${addedTagPlaceholder}, outlink placeholder: ${addedOutlinkPlaceholder})`);
|
|
476
|
+
|
|
477
|
+
// Create new table with chunks
|
|
478
|
+
this.table = await db.createTable(this.tableName, processedChunks);
|
|
479
|
+
|
|
480
|
+
// Remove placeholders by deleting and re-inserting the first chunk
|
|
481
|
+
if (addedTagPlaceholder || addedOutlinkPlaceholder) {
|
|
482
|
+
const firstChunk = processedChunks[0];
|
|
483
|
+
const cleanChunk = {
|
|
484
|
+
...firstChunk,
|
|
485
|
+
tags: addedTagPlaceholder ? [] : firstChunk.tags,
|
|
486
|
+
outlinks: addedOutlinkPlaceholder ? [] : firstChunk.outlinks,
|
|
487
|
+
};
|
|
488
|
+
|
|
489
|
+
// Delete the chunk with placeholders
|
|
490
|
+
await this.table.delete(`chunk_id = '${escapeForFilter(firstChunk.chunk_id)}'`);
|
|
491
|
+
// Re-insert without placeholders
|
|
492
|
+
await this.table.add([cleanChunk]);
|
|
493
|
+
debug("ChunkStore: Removed type inference placeholders via delete+insert");
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
// Create FTS index for hybrid search
|
|
497
|
+
debug("ChunkStore: Creating FTS index on content");
|
|
498
|
+
await this.table.createIndex("content", {
|
|
499
|
+
config: lancedb.Index.fts(),
|
|
500
|
+
replace: true,
|
|
501
|
+
});
|
|
502
|
+
|
|
503
|
+
debug(`ChunkStore: Indexed ${chunks.length} chunks`);
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
async searchChunks(queryVector: number[], limit: number): Promise<ChunkSearchResult[]> {
|
|
507
|
+
const table = await this.ensureTable();
|
|
508
|
+
|
|
509
|
+
const results = await table
|
|
510
|
+
.search(queryVector)
|
|
511
|
+
.limit(limit)
|
|
512
|
+
.toArray();
|
|
513
|
+
|
|
514
|
+
return results.map(rowToChunkSearchResult);
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
async searchChunksFTS(query: string, limit: number): Promise<ChunkSearchResult[]> {
|
|
518
|
+
const table = await this.ensureTable();
|
|
519
|
+
|
|
520
|
+
try {
|
|
521
|
+
const results = await table
|
|
522
|
+
.query()
|
|
523
|
+
.fullTextSearch(query)
|
|
524
|
+
.limit(limit)
|
|
525
|
+
.toArray();
|
|
526
|
+
|
|
527
|
+
return results.map(rowToChunkSearchResult);
|
|
528
|
+
} catch (error) {
|
|
529
|
+
debug("ChunkStore: FTS search failed, returning empty results. Error:", error);
|
|
530
|
+
return [];
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
async getChunksByNoteId(noteId: string): Promise<ChunkRecord[]> {
|
|
535
|
+
const table = await this.ensureTable();
|
|
536
|
+
const escapedNoteId = escapeForFilter(noteId);
|
|
537
|
+
|
|
538
|
+
const results = await table
|
|
539
|
+
.query()
|
|
540
|
+
.where(`note_id = '${escapedNoteId}'`)
|
|
541
|
+
.toArray();
|
|
542
|
+
|
|
543
|
+
// Convert and sort by chunk_index
|
|
544
|
+
const chunks = results.map((row): ChunkRecord => ({
|
|
545
|
+
chunk_id: row.chunk_id as string,
|
|
546
|
+
note_id: row.note_id as string,
|
|
547
|
+
note_title: row.note_title as string,
|
|
548
|
+
folder: row.folder as string,
|
|
549
|
+
chunk_index: row.chunk_index as number,
|
|
550
|
+
total_chunks: row.total_chunks as number,
|
|
551
|
+
content: row.content as string,
|
|
552
|
+
vector: Array.isArray(row.vector) ? row.vector : Array.from(row.vector as Iterable<number>),
|
|
553
|
+
created: row.created as string,
|
|
554
|
+
modified: row.modified as string,
|
|
555
|
+
indexed_at: row.indexed_at as string,
|
|
556
|
+
tags: Array.isArray(row.tags) ? row.tags : Array.from(row.tags as Iterable<string>),
|
|
557
|
+
outlinks: Array.isArray(row.outlinks) ? row.outlinks : Array.from(row.outlinks as Iterable<string>),
|
|
558
|
+
}));
|
|
559
|
+
|
|
560
|
+
return chunks.sort((a, b) => a.chunk_index - b.chunk_index);
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
async deleteNoteChunks(noteId: string): Promise<void> {
|
|
564
|
+
const table = await this.ensureTable();
|
|
565
|
+
const escapedNoteId = escapeForFilter(noteId);
|
|
566
|
+
await table.delete(`note_id = '${escapedNoteId}'`);
|
|
567
|
+
debug(`ChunkStore: Deleted chunks for note: ${noteId}`);
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
async count(): Promise<number> {
|
|
571
|
+
try {
|
|
572
|
+
const table = await this.ensureTable();
|
|
573
|
+
return await table.countRows();
|
|
574
|
+
} catch (error) {
|
|
575
|
+
debug("ChunkStore: Count failed (table may not exist):", error);
|
|
576
|
+
return 0;
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
async clear(): Promise<void> {
|
|
581
|
+
const db = await this.ensureConnection();
|
|
582
|
+
try {
|
|
583
|
+
await db.dropTable(this.tableName);
|
|
584
|
+
this.table = null;
|
|
585
|
+
debug("ChunkStore: Cleared table");
|
|
586
|
+
} catch (error) {
|
|
587
|
+
debug("ChunkStore: Clear skipped (table may not exist):", error);
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
async rebuildFtsIndex(): Promise<void> {
|
|
592
|
+
const table = await this.ensureTable();
|
|
593
|
+
debug("ChunkStore: Rebuilding FTS index on content");
|
|
594
|
+
await table.createIndex("content", {
|
|
595
|
+
config: lancedb.Index.fts(),
|
|
596
|
+
replace: true,
|
|
597
|
+
});
|
|
598
|
+
debug("ChunkStore: FTS index rebuilt");
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
// Singleton instance for ChunkStore
|
|
603
|
+
let chunkStoreInstance: ChunkStore | null = null;
|
|
604
|
+
|
|
605
|
+
export function getChunkStore(): ChunkStore {
|
|
606
|
+
if (!chunkStoreInstance) {
|
|
607
|
+
chunkStoreInstance = new ChunkStore();
|
|
608
|
+
}
|
|
609
|
+
return chunkStoreInstance;
|
|
610
|
+
}
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
2
|
+
import { getEmbeddingCache, resetEmbeddingCache } from "./cache.js";
|
|
3
|
+
|
|
4
|
+
describe("EmbeddingCache", () => {
|
|
5
|
+
beforeEach(() => {
|
|
6
|
+
resetEmbeddingCache();
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
describe("get/set", () => {
|
|
10
|
+
it("returns undefined for uncached query", () => {
|
|
11
|
+
const cache = getEmbeddingCache();
|
|
12
|
+
expect(cache.get("test query")).toBeUndefined();
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
it("returns cached embedding", () => {
|
|
16
|
+
const cache = getEmbeddingCache();
|
|
17
|
+
const embedding = [0.1, 0.2, 0.3];
|
|
18
|
+
|
|
19
|
+
cache.set("test query", embedding);
|
|
20
|
+
expect(cache.get("test query")).toEqual(embedding);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
it("normalizes queries for better hit rate", () => {
|
|
24
|
+
const cache = getEmbeddingCache();
|
|
25
|
+
const embedding = [0.1, 0.2, 0.3];
|
|
26
|
+
|
|
27
|
+
cache.set("Test Query", embedding);
|
|
28
|
+
// Should match with different casing/spacing
|
|
29
|
+
expect(cache.get("test query")).toEqual(embedding);
|
|
30
|
+
expect(cache.get(" TEST QUERY ")).toEqual(embedding);
|
|
31
|
+
});
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
describe("getOrCompute", () => {
|
|
35
|
+
it("calls compute function on cache miss", async () => {
|
|
36
|
+
const cache = getEmbeddingCache();
|
|
37
|
+
const computeFn = vi.fn().mockResolvedValue([0.1, 0.2, 0.3]);
|
|
38
|
+
|
|
39
|
+
const result = await cache.getOrCompute("test query", computeFn);
|
|
40
|
+
|
|
41
|
+
expect(computeFn).toHaveBeenCalledWith("test query");
|
|
42
|
+
expect(result).toEqual([0.1, 0.2, 0.3]);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it("returns cached value without calling compute", async () => {
|
|
46
|
+
const cache = getEmbeddingCache();
|
|
47
|
+
const embedding = [0.1, 0.2, 0.3];
|
|
48
|
+
cache.set("test query", embedding);
|
|
49
|
+
|
|
50
|
+
const computeFn = vi.fn().mockResolvedValue([0.4, 0.5, 0.6]);
|
|
51
|
+
const result = await cache.getOrCompute("test query", computeFn);
|
|
52
|
+
|
|
53
|
+
expect(computeFn).not.toHaveBeenCalled();
|
|
54
|
+
expect(result).toEqual(embedding);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it("caches computed value for subsequent calls", async () => {
|
|
58
|
+
const cache = getEmbeddingCache();
|
|
59
|
+
const computeFn = vi.fn().mockResolvedValue([0.1, 0.2, 0.3]);
|
|
60
|
+
|
|
61
|
+
await cache.getOrCompute("test query", computeFn);
|
|
62
|
+
await cache.getOrCompute("test query", computeFn);
|
|
63
|
+
|
|
64
|
+
expect(computeFn).toHaveBeenCalledTimes(1);
|
|
65
|
+
});
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
describe("LRU eviction", () => {
|
|
69
|
+
it("evicts oldest entry when at capacity", () => {
|
|
70
|
+
// Create cache with small size for testing
|
|
71
|
+
resetEmbeddingCache();
|
|
72
|
+
const cache = getEmbeddingCache();
|
|
73
|
+
// We can't easily change max size, but we can test stats
|
|
74
|
+
|
|
75
|
+
// Fill cache with entries
|
|
76
|
+
for (let i = 0; i < 5; i++) {
|
|
77
|
+
cache.set(`query ${i}`, [i]);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const stats = cache.getStats();
|
|
81
|
+
expect(stats.size).toBeGreaterThan(0);
|
|
82
|
+
});
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
describe("stats", () => {
|
|
86
|
+
it("tracks hits and misses", () => {
|
|
87
|
+
const cache = getEmbeddingCache();
|
|
88
|
+
cache.set("query1", [0.1]);
|
|
89
|
+
|
|
90
|
+
cache.get("query1"); // hit
|
|
91
|
+
cache.get("query2"); // miss
|
|
92
|
+
cache.get("query1"); // hit
|
|
93
|
+
cache.get("query3"); // miss
|
|
94
|
+
|
|
95
|
+
const stats = cache.getStats();
|
|
96
|
+
expect(stats.hits).toBe(2);
|
|
97
|
+
expect(stats.misses).toBe(2);
|
|
98
|
+
expect(stats.hitRate).toBe(0.5);
|
|
99
|
+
});
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
describe("clear", () => {
|
|
103
|
+
it("clears all cached embeddings", () => {
|
|
104
|
+
const cache = getEmbeddingCache();
|
|
105
|
+
cache.set("query1", [0.1]);
|
|
106
|
+
cache.set("query2", [0.2]);
|
|
107
|
+
|
|
108
|
+
cache.clear();
|
|
109
|
+
|
|
110
|
+
expect(cache.get("query1")).toBeUndefined();
|
|
111
|
+
expect(cache.get("query2")).toBeUndefined();
|
|
112
|
+
expect(cache.getStats().size).toBe(0);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
it("resets stats on clear", () => {
|
|
116
|
+
const cache = getEmbeddingCache();
|
|
117
|
+
cache.set("query1", [0.1]);
|
|
118
|
+
cache.get("query1");
|
|
119
|
+
cache.get("query2");
|
|
120
|
+
|
|
121
|
+
cache.clear();
|
|
122
|
+
|
|
123
|
+
const stats = cache.getStats();
|
|
124
|
+
expect(stats.hits).toBe(0);
|
|
125
|
+
expect(stats.misses).toBe(0);
|
|
126
|
+
});
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
describe("model version", () => {
|
|
130
|
+
it("invalidates cache when model version changes", () => {
|
|
131
|
+
const cache = getEmbeddingCache();
|
|
132
|
+
cache.set("query1", [0.1]);
|
|
133
|
+
|
|
134
|
+
cache.setModelVersion("new-model-v2");
|
|
135
|
+
|
|
136
|
+
expect(cache.get("query1")).toBeUndefined();
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
it("does not invalidate if version unchanged", () => {
|
|
140
|
+
const cache = getEmbeddingCache();
|
|
141
|
+
cache.set("query1", [0.1]);
|
|
142
|
+
|
|
143
|
+
cache.setModelVersion("default"); // Same as initial
|
|
144
|
+
cache.setModelVersion("default"); // Same again
|
|
145
|
+
|
|
146
|
+
// Cache should still have the value
|
|
147
|
+
expect(cache.get("query1")).toEqual([0.1]);
|
|
148
|
+
});
|
|
149
|
+
});
|
|
150
|
+
});
|