@disco_trooper/apple-notes-mcp 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +136 -24
- package/package.json +13 -9
- package/src/config/claude.test.ts +47 -0
- package/src/config/claude.ts +106 -0
- package/src/config/constants.ts +11 -2
- package/src/config/paths.test.ts +40 -0
- package/src/config/paths.ts +86 -0
- package/src/db/arrow-fix.test.ts +101 -0
- package/src/db/lancedb.test.ts +209 -2
- package/src/db/lancedb.ts +373 -7
- package/src/embeddings/cache.test.ts +150 -0
- package/src/embeddings/cache.ts +204 -0
- package/src/embeddings/index.ts +21 -2
- package/src/embeddings/local.ts +61 -10
- package/src/embeddings/openrouter.ts +233 -11
- package/src/graph/export.test.ts +81 -0
- package/src/graph/export.ts +163 -0
- package/src/graph/extract.test.ts +90 -0
- package/src/graph/extract.ts +52 -0
- package/src/graph/queries.test.ts +156 -0
- package/src/graph/queries.ts +224 -0
- package/src/index.ts +376 -10
- package/src/notes/crud.test.ts +148 -3
- package/src/notes/crud.ts +250 -5
- package/src/notes/read.ts +83 -68
- package/src/search/chunk-indexer.test.ts +353 -0
- package/src/search/chunk-indexer.ts +254 -0
- package/src/search/chunk-search.test.ts +327 -0
- package/src/search/chunk-search.ts +298 -0
- package/src/search/indexer.ts +151 -109
- package/src/search/refresh.test.ts +173 -0
- package/src/search/refresh.ts +151 -0
- package/src/setup.ts +46 -67
- package/src/utils/chunker.test.ts +182 -0
- package/src/utils/chunker.ts +170 -0
- package/src/utils/content-filter.test.ts +225 -0
- package/src/utils/content-filter.ts +275 -0
- package/src/utils/runtime.test.ts +70 -0
- package/src/utils/runtime.ts +40 -0
package/src/db/lancedb.ts
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import * as lancedb from "@lancedb/lancedb";
|
|
2
|
-
import path from "node:path";
|
|
3
|
-
import os from "node:os";
|
|
4
2
|
import { validateTitle, escapeForFilter } from "./validation.js";
|
|
5
3
|
import type { DBSearchResult as SearchResult } from "../types/index.js";
|
|
6
4
|
import { createDebugLogger } from "../utils/debug.js";
|
|
5
|
+
import { getDataDir } from "../config/paths.js";
|
|
7
6
|
|
|
8
7
|
// Schema for stored notes
|
|
9
8
|
export interface NoteRecord {
|
|
@@ -15,9 +14,30 @@ export interface NoteRecord {
|
|
|
15
14
|
created: string; // ISO date
|
|
16
15
|
modified: string; // ISO date
|
|
17
16
|
indexed_at: string; // ISO date - when embedding was generated
|
|
17
|
+
// Knowledge Graph fields
|
|
18
|
+
tags: string[]; // Extracted #hashtags (without #)
|
|
19
|
+
outlinks: string[]; // Extracted [[wiki-links]] titles
|
|
18
20
|
[key: string]: unknown; // Index signature for LanceDB compatibility
|
|
19
21
|
}
|
|
20
22
|
|
|
23
|
+
// Schema for chunked notes (Parent Document Retriever pattern)
|
|
24
|
+
export interface ChunkRecord {
|
|
25
|
+
chunk_id: string; // `${note_id}_chunk_${index}`
|
|
26
|
+
note_id: string; // Parent note Apple ID
|
|
27
|
+
note_title: string; // For display and deduplication
|
|
28
|
+
folder: string;
|
|
29
|
+
chunk_index: number; // 0, 1, 2...
|
|
30
|
+
total_chunks: number; // Total chunks in this note
|
|
31
|
+
content: string; // Chunk content
|
|
32
|
+
vector: number[];
|
|
33
|
+
created: string; // ISO date (from parent)
|
|
34
|
+
modified: string; // ISO date (from parent)
|
|
35
|
+
indexed_at: string; // ISO date
|
|
36
|
+
tags: string[]; // From parent note
|
|
37
|
+
outlinks: string[]; // From parent note
|
|
38
|
+
[key: string]: unknown; // Index signature for LanceDB
|
|
39
|
+
}
|
|
40
|
+
|
|
21
41
|
// SearchResult is imported from ../types/index.js as DBSearchResult
|
|
22
42
|
export type { SearchResult };
|
|
23
43
|
|
|
@@ -61,7 +81,7 @@ export class LanceDBStore implements VectorStore {
|
|
|
61
81
|
private readonly tableName = "notes";
|
|
62
82
|
|
|
63
83
|
constructor(dataDir?: string) {
|
|
64
|
-
this.dbPath = dataDir ||
|
|
84
|
+
this.dbPath = dataDir || getDataDir();
|
|
65
85
|
}
|
|
66
86
|
|
|
67
87
|
private async ensureConnection(): Promise<lancedb.Connection> {
|
|
@@ -103,9 +123,71 @@ export class LanceDBStore implements VectorStore {
|
|
|
103
123
|
debug("Table drop skipped (table may not exist):", error);
|
|
104
124
|
}
|
|
105
125
|
|
|
126
|
+
// Arrow type inference requires the FIRST record to have non-empty arrays.
|
|
127
|
+
// Strategy: Reorder records so the first has non-empty tags/outlinks,
|
|
128
|
+
// or add placeholders that stay in the data (filtered on read).
|
|
129
|
+
const processedRecords = records.map((r) => ({
|
|
130
|
+
...r,
|
|
131
|
+
tags: r.tags ?? [],
|
|
132
|
+
outlinks: r.outlinks ?? [],
|
|
133
|
+
}));
|
|
134
|
+
|
|
135
|
+
// Track if we added placeholders
|
|
136
|
+
let addedTagPlaceholder = false;
|
|
137
|
+
let addedOutlinkPlaceholder = false;
|
|
138
|
+
|
|
139
|
+
if (processedRecords.length > 0) {
|
|
140
|
+
// Ensure FIRST record has non-empty tags for type inference
|
|
141
|
+
if (processedRecords[0].tags.length === 0) {
|
|
142
|
+
// Try to find a record with tags and swap
|
|
143
|
+
const tagIdx = processedRecords.findIndex(r => r.tags.length > 0);
|
|
144
|
+
if (tagIdx > 0) {
|
|
145
|
+
// Swap first record with the one that has tags
|
|
146
|
+
[processedRecords[0], processedRecords[tagIdx]] =
|
|
147
|
+
[processedRecords[tagIdx], processedRecords[0]];
|
|
148
|
+
} else {
|
|
149
|
+
// No record has tags - add placeholder
|
|
150
|
+
processedRecords[0].tags = ["__type_placeholder__"];
|
|
151
|
+
addedTagPlaceholder = true;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Ensure FIRST record has non-empty outlinks for type inference
|
|
156
|
+
if (processedRecords[0].outlinks.length === 0) {
|
|
157
|
+
// Try to find a record with outlinks and copy its structure
|
|
158
|
+
const outlinkIdx = processedRecords.findIndex(r => r.outlinks.length > 0);
|
|
159
|
+
if (outlinkIdx === -1) {
|
|
160
|
+
// No record has outlinks - add placeholder
|
|
161
|
+
processedRecords[0].outlinks = ["__type_placeholder__"];
|
|
162
|
+
addedOutlinkPlaceholder = true;
|
|
163
|
+
} else {
|
|
164
|
+
// Copy first outlink to first record temporarily, then remove
|
|
165
|
+
processedRecords[0].outlinks = ["__type_placeholder__"];
|
|
166
|
+
addedOutlinkPlaceholder = true;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
debug(`Creating table with ${processedRecords.length} records (tag placeholder: ${addedTagPlaceholder}, outlink placeholder: ${addedOutlinkPlaceholder})`);
|
|
172
|
+
|
|
106
173
|
// Create new table with records
|
|
107
|
-
|
|
108
|
-
|
|
174
|
+
this.table = await db.createTable(this.tableName, processedRecords);
|
|
175
|
+
|
|
176
|
+
// Remove placeholders by deleting and re-inserting the first record
|
|
177
|
+
if (addedTagPlaceholder || addedOutlinkPlaceholder) {
|
|
178
|
+
const firstRecord = processedRecords[0];
|
|
179
|
+
const cleanRecord = {
|
|
180
|
+
...firstRecord,
|
|
181
|
+
tags: addedTagPlaceholder ? [] : firstRecord.tags,
|
|
182
|
+
outlinks: addedOutlinkPlaceholder ? [] : firstRecord.outlinks,
|
|
183
|
+
};
|
|
184
|
+
|
|
185
|
+
// Delete the record with placeholders
|
|
186
|
+
await this.table.delete(`id = '${escapeForFilter(firstRecord.id)}'`);
|
|
187
|
+
// Re-insert without placeholders
|
|
188
|
+
await this.table.add([cleanRecord]);
|
|
189
|
+
debug("Removed type inference placeholders via delete+insert");
|
|
190
|
+
}
|
|
109
191
|
|
|
110
192
|
// Create FTS index for hybrid search
|
|
111
193
|
debug("Creating FTS index on content");
|
|
@@ -217,15 +299,18 @@ export class LanceDBStore implements VectorStore {
|
|
|
217
299
|
|
|
218
300
|
const results = await table.query().toArray();
|
|
219
301
|
|
|
220
|
-
return results.map((row) => ({
|
|
302
|
+
return results.map((row): NoteRecord => ({
|
|
221
303
|
id: (row.id as string) ?? "",
|
|
222
304
|
title: row.title as string,
|
|
223
305
|
content: row.content as string,
|
|
224
|
-
vector: row.vector as number
|
|
306
|
+
vector: Array.isArray(row.vector) ? row.vector : Array.from(row.vector as Iterable<number>),
|
|
225
307
|
folder: row.folder as string,
|
|
226
308
|
created: row.created as string,
|
|
227
309
|
modified: row.modified as string,
|
|
228
310
|
indexed_at: row.indexed_at as string,
|
|
311
|
+
// Arrow Vectors need explicit conversion to JS arrays
|
|
312
|
+
tags: Array.isArray(row.tags) ? row.tags : Array.from(row.tags as Iterable<string>),
|
|
313
|
+
outlinks: Array.isArray(row.outlinks) ? row.outlinks : Array.from(row.outlinks as Iterable<string>),
|
|
229
314
|
}));
|
|
230
315
|
}
|
|
231
316
|
|
|
@@ -270,3 +355,284 @@ export function getVectorStore(): VectorStore {
|
|
|
270
355
|
}
|
|
271
356
|
return storeInstance;
|
|
272
357
|
}
|
|
358
|
+
|
|
359
|
+
// Search result type for chunks
|
|
360
|
+
export interface ChunkSearchResult {
|
|
361
|
+
chunk_id: string;
|
|
362
|
+
note_id: string;
|
|
363
|
+
note_title: string;
|
|
364
|
+
folder: string;
|
|
365
|
+
chunk_index: number;
|
|
366
|
+
total_chunks: number;
|
|
367
|
+
content: string;
|
|
368
|
+
modified: string;
|
|
369
|
+
score: number;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
/**
|
|
373
|
+
* Convert a chunk database row to a ChunkSearchResult with rank-based score.
|
|
374
|
+
*/
|
|
375
|
+
function rowToChunkSearchResult(row: Record<string, unknown>, index: number): ChunkSearchResult {
|
|
376
|
+
return {
|
|
377
|
+
chunk_id: row.chunk_id as string,
|
|
378
|
+
note_id: row.note_id as string,
|
|
379
|
+
note_title: row.note_title as string,
|
|
380
|
+
folder: row.folder as string,
|
|
381
|
+
chunk_index: row.chunk_index as number,
|
|
382
|
+
total_chunks: row.total_chunks as number,
|
|
383
|
+
content: row.content as string,
|
|
384
|
+
modified: row.modified as string,
|
|
385
|
+
score: 1 / (1 + index),
|
|
386
|
+
};
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
// ChunkStore for Parent Document Retriever pattern
|
|
390
|
+
export class ChunkStore {
|
|
391
|
+
private db: lancedb.Connection | null = null;
|
|
392
|
+
private table: lancedb.Table | null = null;
|
|
393
|
+
private readonly dbPath: string;
|
|
394
|
+
private readonly tableName = "chunks";
|
|
395
|
+
|
|
396
|
+
constructor(dataDir?: string) {
|
|
397
|
+
this.dbPath = dataDir || getDataDir();
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
private async ensureConnection(): Promise<lancedb.Connection> {
|
|
401
|
+
if (!this.db) {
|
|
402
|
+
debug(`ChunkStore: Connecting to LanceDB at ${this.dbPath}`);
|
|
403
|
+
this.db = await lancedb.connect(this.dbPath);
|
|
404
|
+
}
|
|
405
|
+
return this.db;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
private async ensureTable(): Promise<lancedb.Table> {
|
|
409
|
+
if (!this.table) {
|
|
410
|
+
const db = await this.ensureConnection();
|
|
411
|
+
try {
|
|
412
|
+
this.table = await db.openTable(this.tableName);
|
|
413
|
+
debug(`ChunkStore: Opened existing table: ${this.tableName}`);
|
|
414
|
+
} catch (error) {
|
|
415
|
+
debug(`ChunkStore: Table ${this.tableName} not found. Error:`, error);
|
|
416
|
+
throw new Error("Chunk index not found. Run index-notes first.");
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
return this.table;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
async indexChunks(chunks: ChunkRecord[]): Promise<void> {
|
|
423
|
+
if (chunks.length === 0) {
|
|
424
|
+
debug("ChunkStore: No chunks to index");
|
|
425
|
+
return;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
const db = await this.ensureConnection();
|
|
429
|
+
|
|
430
|
+
// Drop existing table if exists
|
|
431
|
+
try {
|
|
432
|
+
await db.dropTable(this.tableName);
|
|
433
|
+
debug(`ChunkStore: Dropped existing table: ${this.tableName}`);
|
|
434
|
+
} catch (error) {
|
|
435
|
+
debug("ChunkStore: Table drop skipped (table may not exist):", error);
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
// Arrow type inference requires the FIRST record to have non-empty arrays.
|
|
439
|
+
// Same strategy as LanceDBStore
|
|
440
|
+
const processedChunks = chunks.map((c) => ({
|
|
441
|
+
...c,
|
|
442
|
+
tags: c.tags ?? [],
|
|
443
|
+
outlinks: c.outlinks ?? [],
|
|
444
|
+
}));
|
|
445
|
+
|
|
446
|
+
let addedTagPlaceholder = false;
|
|
447
|
+
let addedOutlinkPlaceholder = false;
|
|
448
|
+
|
|
449
|
+
if (processedChunks.length > 0) {
|
|
450
|
+
// Ensure FIRST chunk has non-empty tags for type inference
|
|
451
|
+
if (processedChunks[0].tags.length === 0) {
|
|
452
|
+
const tagIdx = processedChunks.findIndex(c => c.tags.length > 0);
|
|
453
|
+
if (tagIdx > 0) {
|
|
454
|
+
[processedChunks[0], processedChunks[tagIdx]] =
|
|
455
|
+
[processedChunks[tagIdx], processedChunks[0]];
|
|
456
|
+
} else {
|
|
457
|
+
processedChunks[0].tags = ["__type_placeholder__"];
|
|
458
|
+
addedTagPlaceholder = true;
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// Ensure FIRST chunk has non-empty outlinks for type inference
|
|
463
|
+
if (processedChunks[0].outlinks.length === 0) {
|
|
464
|
+
const outlinkIdx = processedChunks.findIndex(c => c.outlinks.length > 0);
|
|
465
|
+
if (outlinkIdx === -1) {
|
|
466
|
+
processedChunks[0].outlinks = ["__type_placeholder__"];
|
|
467
|
+
addedOutlinkPlaceholder = true;
|
|
468
|
+
} else {
|
|
469
|
+
processedChunks[0].outlinks = ["__type_placeholder__"];
|
|
470
|
+
addedOutlinkPlaceholder = true;
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
debug(`ChunkStore: Creating table with ${processedChunks.length} chunks (tag placeholder: ${addedTagPlaceholder}, outlink placeholder: ${addedOutlinkPlaceholder})`);
|
|
476
|
+
|
|
477
|
+
// Create new table with chunks
|
|
478
|
+
this.table = await db.createTable(this.tableName, processedChunks);
|
|
479
|
+
|
|
480
|
+
// Remove placeholders by deleting and re-inserting the first chunk
|
|
481
|
+
if (addedTagPlaceholder || addedOutlinkPlaceholder) {
|
|
482
|
+
const firstChunk = processedChunks[0];
|
|
483
|
+
const cleanChunk = {
|
|
484
|
+
...firstChunk,
|
|
485
|
+
tags: addedTagPlaceholder ? [] : firstChunk.tags,
|
|
486
|
+
outlinks: addedOutlinkPlaceholder ? [] : firstChunk.outlinks,
|
|
487
|
+
};
|
|
488
|
+
|
|
489
|
+
// Delete the chunk with placeholders
|
|
490
|
+
await this.table.delete(`chunk_id = '${escapeForFilter(firstChunk.chunk_id)}'`);
|
|
491
|
+
// Re-insert without placeholders
|
|
492
|
+
await this.table.add([cleanChunk]);
|
|
493
|
+
debug("ChunkStore: Removed type inference placeholders via delete+insert");
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
// Create FTS index for hybrid search
|
|
497
|
+
debug("ChunkStore: Creating FTS index on content");
|
|
498
|
+
await this.table.createIndex("content", {
|
|
499
|
+
config: lancedb.Index.fts(),
|
|
500
|
+
replace: true,
|
|
501
|
+
});
|
|
502
|
+
|
|
503
|
+
debug(`ChunkStore: Indexed ${chunks.length} chunks`);
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
async searchChunks(queryVector: number[], limit: number): Promise<ChunkSearchResult[]> {
|
|
507
|
+
const table = await this.ensureTable();
|
|
508
|
+
|
|
509
|
+
const results = await table
|
|
510
|
+
.search(queryVector)
|
|
511
|
+
.limit(limit)
|
|
512
|
+
.toArray();
|
|
513
|
+
|
|
514
|
+
return results.map(rowToChunkSearchResult);
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
async searchChunksFTS(query: string, limit: number): Promise<ChunkSearchResult[]> {
|
|
518
|
+
const table = await this.ensureTable();
|
|
519
|
+
|
|
520
|
+
try {
|
|
521
|
+
const results = await table
|
|
522
|
+
.query()
|
|
523
|
+
.fullTextSearch(query)
|
|
524
|
+
.limit(limit)
|
|
525
|
+
.toArray();
|
|
526
|
+
|
|
527
|
+
return results.map(rowToChunkSearchResult);
|
|
528
|
+
} catch (error) {
|
|
529
|
+
debug("ChunkStore: FTS search failed, returning empty results. Error:", error);
|
|
530
|
+
return [];
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
async getChunksByNoteId(noteId: string): Promise<ChunkRecord[]> {
|
|
535
|
+
const table = await this.ensureTable();
|
|
536
|
+
const escapedNoteId = escapeForFilter(noteId);
|
|
537
|
+
|
|
538
|
+
const results = await table
|
|
539
|
+
.query()
|
|
540
|
+
.where(`note_id = '${escapedNoteId}'`)
|
|
541
|
+
.toArray();
|
|
542
|
+
|
|
543
|
+
// Convert and sort by chunk_index
|
|
544
|
+
const chunks = results.map((row): ChunkRecord => ({
|
|
545
|
+
chunk_id: row.chunk_id as string,
|
|
546
|
+
note_id: row.note_id as string,
|
|
547
|
+
note_title: row.note_title as string,
|
|
548
|
+
folder: row.folder as string,
|
|
549
|
+
chunk_index: row.chunk_index as number,
|
|
550
|
+
total_chunks: row.total_chunks as number,
|
|
551
|
+
content: row.content as string,
|
|
552
|
+
vector: Array.isArray(row.vector) ? row.vector : Array.from(row.vector as Iterable<number>),
|
|
553
|
+
created: row.created as string,
|
|
554
|
+
modified: row.modified as string,
|
|
555
|
+
indexed_at: row.indexed_at as string,
|
|
556
|
+
tags: Array.isArray(row.tags) ? row.tags : Array.from(row.tags as Iterable<string>),
|
|
557
|
+
outlinks: Array.isArray(row.outlinks) ? row.outlinks : Array.from(row.outlinks as Iterable<string>),
|
|
558
|
+
}));
|
|
559
|
+
|
|
560
|
+
return chunks.sort((a, b) => a.chunk_index - b.chunk_index);
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
async deleteNoteChunks(noteId: string): Promise<void> {
|
|
564
|
+
const table = await this.ensureTable();
|
|
565
|
+
const escapedNoteId = escapeForFilter(noteId);
|
|
566
|
+
await table.delete(`note_id = '${escapedNoteId}'`);
|
|
567
|
+
debug(`ChunkStore: Deleted chunks for note: ${noteId}`);
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
async count(): Promise<number> {
|
|
571
|
+
try {
|
|
572
|
+
const table = await this.ensureTable();
|
|
573
|
+
return await table.countRows();
|
|
574
|
+
} catch (error) {
|
|
575
|
+
debug("ChunkStore: Count failed (table may not exist):", error);
|
|
576
|
+
return 0;
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
async clear(): Promise<void> {
|
|
581
|
+
const db = await this.ensureConnection();
|
|
582
|
+
try {
|
|
583
|
+
await db.dropTable(this.tableName);
|
|
584
|
+
this.table = null;
|
|
585
|
+
debug("ChunkStore: Cleared table");
|
|
586
|
+
} catch (error) {
|
|
587
|
+
debug("ChunkStore: Clear skipped (table may not exist):", error);
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
async rebuildFtsIndex(): Promise<void> {
|
|
592
|
+
const table = await this.ensureTable();
|
|
593
|
+
debug("ChunkStore: Rebuilding FTS index on content");
|
|
594
|
+
await table.createIndex("content", {
|
|
595
|
+
config: lancedb.Index.fts(),
|
|
596
|
+
replace: true,
|
|
597
|
+
});
|
|
598
|
+
debug("ChunkStore: FTS index rebuilt");
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
/**
|
|
602
|
+
* Delete chunks for multiple notes at once.
|
|
603
|
+
*/
|
|
604
|
+
async deleteChunksByNoteIds(noteIds: string[]): Promise<void> {
|
|
605
|
+
if (noteIds.length === 0) return;
|
|
606
|
+
|
|
607
|
+
const table = await this.ensureTable();
|
|
608
|
+
for (const noteId of noteIds) {
|
|
609
|
+
const escapedNoteId = escapeForFilter(noteId);
|
|
610
|
+
await table.delete(`note_id = '${escapedNoteId}'`);
|
|
611
|
+
}
|
|
612
|
+
debug(`ChunkStore: Deleted chunks for ${noteIds.length} notes`);
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
/**
|
|
616
|
+
* Add chunks to existing table (for incremental updates).
|
|
617
|
+
*/
|
|
618
|
+
async addChunks(chunks: ChunkRecord[]): Promise<void> {
|
|
619
|
+
if (chunks.length === 0) return;
|
|
620
|
+
|
|
621
|
+
const table = await this.ensureTable();
|
|
622
|
+
await table.add(chunks);
|
|
623
|
+
debug(`ChunkStore: Added ${chunks.length} chunks`);
|
|
624
|
+
|
|
625
|
+
// Rebuild FTS index after adding
|
|
626
|
+
await this.rebuildFtsIndex();
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
// Singleton instance for ChunkStore
|
|
631
|
+
let chunkStoreInstance: ChunkStore | null = null;
|
|
632
|
+
|
|
633
|
+
export function getChunkStore(): ChunkStore {
|
|
634
|
+
if (!chunkStoreInstance) {
|
|
635
|
+
chunkStoreInstance = new ChunkStore();
|
|
636
|
+
}
|
|
637
|
+
return chunkStoreInstance;
|
|
638
|
+
}
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
2
|
+
import { getEmbeddingCache, resetEmbeddingCache } from "./cache.js";
|
|
3
|
+
|
|
4
|
+
describe("EmbeddingCache", () => {
|
|
5
|
+
beforeEach(() => {
|
|
6
|
+
resetEmbeddingCache();
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
describe("get/set", () => {
|
|
10
|
+
it("returns undefined for uncached query", () => {
|
|
11
|
+
const cache = getEmbeddingCache();
|
|
12
|
+
expect(cache.get("test query")).toBeUndefined();
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
it("returns cached embedding", () => {
|
|
16
|
+
const cache = getEmbeddingCache();
|
|
17
|
+
const embedding = [0.1, 0.2, 0.3];
|
|
18
|
+
|
|
19
|
+
cache.set("test query", embedding);
|
|
20
|
+
expect(cache.get("test query")).toEqual(embedding);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
it("normalizes queries for better hit rate", () => {
|
|
24
|
+
const cache = getEmbeddingCache();
|
|
25
|
+
const embedding = [0.1, 0.2, 0.3];
|
|
26
|
+
|
|
27
|
+
cache.set("Test Query", embedding);
|
|
28
|
+
// Should match with different casing/spacing
|
|
29
|
+
expect(cache.get("test query")).toEqual(embedding);
|
|
30
|
+
expect(cache.get(" TEST QUERY ")).toEqual(embedding);
|
|
31
|
+
});
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
describe("getOrCompute", () => {
|
|
35
|
+
it("calls compute function on cache miss", async () => {
|
|
36
|
+
const cache = getEmbeddingCache();
|
|
37
|
+
const computeFn = vi.fn().mockResolvedValue([0.1, 0.2, 0.3]);
|
|
38
|
+
|
|
39
|
+
const result = await cache.getOrCompute("test query", computeFn);
|
|
40
|
+
|
|
41
|
+
expect(computeFn).toHaveBeenCalledWith("test query");
|
|
42
|
+
expect(result).toEqual([0.1, 0.2, 0.3]);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it("returns cached value without calling compute", async () => {
|
|
46
|
+
const cache = getEmbeddingCache();
|
|
47
|
+
const embedding = [0.1, 0.2, 0.3];
|
|
48
|
+
cache.set("test query", embedding);
|
|
49
|
+
|
|
50
|
+
const computeFn = vi.fn().mockResolvedValue([0.4, 0.5, 0.6]);
|
|
51
|
+
const result = await cache.getOrCompute("test query", computeFn);
|
|
52
|
+
|
|
53
|
+
expect(computeFn).not.toHaveBeenCalled();
|
|
54
|
+
expect(result).toEqual(embedding);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it("caches computed value for subsequent calls", async () => {
|
|
58
|
+
const cache = getEmbeddingCache();
|
|
59
|
+
const computeFn = vi.fn().mockResolvedValue([0.1, 0.2, 0.3]);
|
|
60
|
+
|
|
61
|
+
await cache.getOrCompute("test query", computeFn);
|
|
62
|
+
await cache.getOrCompute("test query", computeFn);
|
|
63
|
+
|
|
64
|
+
expect(computeFn).toHaveBeenCalledTimes(1);
|
|
65
|
+
});
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
describe("LRU eviction", () => {
|
|
69
|
+
it("evicts oldest entry when at capacity", () => {
|
|
70
|
+
// Create cache with small size for testing
|
|
71
|
+
resetEmbeddingCache();
|
|
72
|
+
const cache = getEmbeddingCache();
|
|
73
|
+
// We can't easily change max size, but we can test stats
|
|
74
|
+
|
|
75
|
+
// Fill cache with entries
|
|
76
|
+
for (let i = 0; i < 5; i++) {
|
|
77
|
+
cache.set(`query ${i}`, [i]);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const stats = cache.getStats();
|
|
81
|
+
expect(stats.size).toBeGreaterThan(0);
|
|
82
|
+
});
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
describe("stats", () => {
|
|
86
|
+
it("tracks hits and misses", () => {
|
|
87
|
+
const cache = getEmbeddingCache();
|
|
88
|
+
cache.set("query1", [0.1]);
|
|
89
|
+
|
|
90
|
+
cache.get("query1"); // hit
|
|
91
|
+
cache.get("query2"); // miss
|
|
92
|
+
cache.get("query1"); // hit
|
|
93
|
+
cache.get("query3"); // miss
|
|
94
|
+
|
|
95
|
+
const stats = cache.getStats();
|
|
96
|
+
expect(stats.hits).toBe(2);
|
|
97
|
+
expect(stats.misses).toBe(2);
|
|
98
|
+
expect(stats.hitRate).toBe(0.5);
|
|
99
|
+
});
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
describe("clear", () => {
|
|
103
|
+
it("clears all cached embeddings", () => {
|
|
104
|
+
const cache = getEmbeddingCache();
|
|
105
|
+
cache.set("query1", [0.1]);
|
|
106
|
+
cache.set("query2", [0.2]);
|
|
107
|
+
|
|
108
|
+
cache.clear();
|
|
109
|
+
|
|
110
|
+
expect(cache.get("query1")).toBeUndefined();
|
|
111
|
+
expect(cache.get("query2")).toBeUndefined();
|
|
112
|
+
expect(cache.getStats().size).toBe(0);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
it("resets stats on clear", () => {
|
|
116
|
+
const cache = getEmbeddingCache();
|
|
117
|
+
cache.set("query1", [0.1]);
|
|
118
|
+
cache.get("query1");
|
|
119
|
+
cache.get("query2");
|
|
120
|
+
|
|
121
|
+
cache.clear();
|
|
122
|
+
|
|
123
|
+
const stats = cache.getStats();
|
|
124
|
+
expect(stats.hits).toBe(0);
|
|
125
|
+
expect(stats.misses).toBe(0);
|
|
126
|
+
});
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
describe("model version", () => {
|
|
130
|
+
it("invalidates cache when model version changes", () => {
|
|
131
|
+
const cache = getEmbeddingCache();
|
|
132
|
+
cache.set("query1", [0.1]);
|
|
133
|
+
|
|
134
|
+
cache.setModelVersion("new-model-v2");
|
|
135
|
+
|
|
136
|
+
expect(cache.get("query1")).toBeUndefined();
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
it("does not invalidate if version unchanged", () => {
|
|
140
|
+
const cache = getEmbeddingCache();
|
|
141
|
+
cache.set("query1", [0.1]);
|
|
142
|
+
|
|
143
|
+
cache.setModelVersion("default"); // Same as initial
|
|
144
|
+
cache.setModelVersion("default"); // Same again
|
|
145
|
+
|
|
146
|
+
// Cache should still have the value
|
|
147
|
+
expect(cache.get("query1")).toEqual([0.1]);
|
|
148
|
+
});
|
|
149
|
+
});
|
|
150
|
+
});
|