@disco_trooper/apple-notes-mcp 1.7.0 → 1.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +67 -5
- package/package.json +1 -1
- package/src/config/constants.ts +5 -0
- package/src/config/env.test.ts +14 -0
- package/src/config/env.ts +2 -0
- package/src/db/lancedb.test.ts +14 -0
- package/src/db/lancedb.ts +37 -0
- package/src/index.ts +164 -16
- package/src/indexing/contracts.test.ts +13 -0
- package/src/indexing/contracts.ts +28 -0
- package/src/indexing/job-manager.test.ts +185 -0
- package/src/indexing/job-manager.ts +377 -0
- package/src/notes/crud.test.ts +33 -6
- package/src/notes/crud.ts +62 -7
- package/src/notes/read.test.ts +139 -5
- package/src/notes/read.ts +58 -5
- package/src/search/chunk-indexer.ts +69 -4
- package/src/search/indexer.progress.test.ts +75 -0
- package/src/search/indexer.ts +149 -38
- package/src/search/refresh-policy.test.ts +25 -0
- package/src/search/refresh-policy.ts +33 -0
- package/src/search/refresh.test.ts +146 -25
- package/src/search/refresh.ts +207 -47
- package/src/search/write-sync.test.ts +133 -0
- package/src/search/write-sync.ts +155 -0
package/src/search/indexer.ts
CHANGED
|
@@ -8,7 +8,11 @@
|
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
10
|
import { getEmbedding, getEmbeddingBatch } from "../embeddings/index.js";
|
|
11
|
-
import {
|
|
11
|
+
import {
|
|
12
|
+
getVectorStore,
|
|
13
|
+
type NoteRecord,
|
|
14
|
+
type IndexMetadataRecord,
|
|
15
|
+
} from "../db/lancedb.js";
|
|
12
16
|
import {
|
|
13
17
|
getAllNotesWithFallback,
|
|
14
18
|
getNoteByTitle,
|
|
@@ -18,6 +22,11 @@ import { truncateForEmbedding } from "../utils/text.js";
|
|
|
18
22
|
import { NoteNotFoundError } from "../errors/index.js";
|
|
19
23
|
import { extractMetadata } from "../graph/extract.js";
|
|
20
24
|
import { getEmbeddingBatchSize } from "../config/constants.js";
|
|
25
|
+
import {
|
|
26
|
+
type IndexRunOptions,
|
|
27
|
+
type IndexProgressEvent,
|
|
28
|
+
throwIfCancelled,
|
|
29
|
+
} from "../indexing/contracts.js";
|
|
21
30
|
|
|
22
31
|
/**
|
|
23
32
|
* Extract note title from folder/title key.
|
|
@@ -134,6 +143,21 @@ function chunks<T>(array: T[], size: number): T[][] {
|
|
|
134
143
|
return result;
|
|
135
144
|
}
|
|
136
145
|
|
|
146
|
+
function emitProgress(
|
|
147
|
+
options: IndexRunOptions,
|
|
148
|
+
stage: IndexProgressEvent["stage"],
|
|
149
|
+
current: number,
|
|
150
|
+
total: number,
|
|
151
|
+
message: string
|
|
152
|
+
): void {
|
|
153
|
+
options.onProgress?.({
|
|
154
|
+
stage,
|
|
155
|
+
current,
|
|
156
|
+
total,
|
|
157
|
+
message,
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
|
|
137
161
|
/**
|
|
138
162
|
* Perform full reindexing of all notes.
|
|
139
163
|
* Drops existing index and rebuilds from scratch.
|
|
@@ -142,14 +166,19 @@ function chunks<T>(array: T[], size: number): T[][] {
|
|
|
142
166
|
* - Hybrid fallback for JXA fetch (single call → folder → note-by-note)
|
|
143
167
|
* - Streaming batch embedding (process & store in chunks to reduce memory)
|
|
144
168
|
*/
|
|
145
|
-
export async function fullIndex(): Promise<IndexResult> {
|
|
169
|
+
export async function fullIndex(options: IndexRunOptions = {}): Promise<IndexResult> {
|
|
146
170
|
const startTime = Date.now();
|
|
147
171
|
debug("Starting full index...");
|
|
148
172
|
|
|
173
|
+
throwIfCancelled(options.signal);
|
|
174
|
+
emitProgress(options, "fetch", 0, 1, "Fetching notes");
|
|
175
|
+
|
|
149
176
|
// Phase 1: Fetch all notes with hybrid fallback
|
|
150
177
|
debug("Phase 1: Fetching all notes (with fallback)...");
|
|
151
178
|
const { notes: allNotes, skipped: skippedNotes } = await getAllNotesWithFallback();
|
|
152
179
|
debug(`Fetched ${allNotes.length} notes, ${skippedNotes.length} skipped`);
|
|
180
|
+
emitProgress(options, "fetch", 1, 1, `Fetched ${allNotes.length} notes`);
|
|
181
|
+
throwIfCancelled(options.signal);
|
|
153
182
|
|
|
154
183
|
// Filter empty notes and prepare for embedding
|
|
155
184
|
const preparedNotes = allNotes
|
|
@@ -157,16 +186,14 @@ export async function fullIndex(): Promise<IndexResult> {
|
|
|
157
186
|
.filter((note): note is PreparedNote => note !== null);
|
|
158
187
|
|
|
159
188
|
debug(`Prepared ${preparedNotes.length} notes for embedding`);
|
|
189
|
+
emitProgress(options, "prepare", preparedNotes.length, allNotes.length, "Prepared notes for embedding");
|
|
190
|
+
throwIfCancelled(options.signal);
|
|
160
191
|
|
|
161
192
|
const store = getVectorStore();
|
|
162
193
|
|
|
163
|
-
// Phase 2:
|
|
164
|
-
debug("Phase 2: Clearing existing index...");
|
|
165
|
-
await store.clear();
|
|
166
|
-
|
|
167
|
-
// Phase 3: Stream process in batches
|
|
194
|
+
// Phase 2: Stream process in batches
|
|
168
195
|
const batchSize = getEmbeddingBatchSize();
|
|
169
|
-
debug(`Phase
|
|
196
|
+
debug(`Phase 2: Processing ${preparedNotes.length} notes in batches of ${batchSize}...`);
|
|
170
197
|
|
|
171
198
|
const batches = chunks(preparedNotes, batchSize);
|
|
172
199
|
const indexedAt = new Date().toISOString();
|
|
@@ -174,8 +201,17 @@ export async function fullIndex(): Promise<IndexResult> {
|
|
|
174
201
|
let isFirstBatch = true;
|
|
175
202
|
|
|
176
203
|
for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) {
|
|
204
|
+
throwIfCancelled(options.signal);
|
|
205
|
+
|
|
177
206
|
const batch = batches[batchIdx];
|
|
178
207
|
debug(`Batch ${batchIdx + 1}/${batches.length}: ${batch.length} notes`);
|
|
208
|
+
emitProgress(
|
|
209
|
+
options,
|
|
210
|
+
"embed",
|
|
211
|
+
batchIdx,
|
|
212
|
+
batches.length,
|
|
213
|
+
`Embedding batch ${batchIdx + 1}/${batches.length}`
|
|
214
|
+
);
|
|
179
215
|
|
|
180
216
|
// Generate embeddings for this batch
|
|
181
217
|
const textsToEmbed = batch.map((n) => n.truncatedContent);
|
|
@@ -186,6 +222,14 @@ export async function fullIndex(): Promise<IndexResult> {
|
|
|
186
222
|
debug(`Batch ${batchIdx + 1} embedding failed:`, error);
|
|
187
223
|
throw error;
|
|
188
224
|
}
|
|
225
|
+
throwIfCancelled(options.signal);
|
|
226
|
+
emitProgress(
|
|
227
|
+
options,
|
|
228
|
+
"embed",
|
|
229
|
+
batchIdx + 1,
|
|
230
|
+
batches.length,
|
|
231
|
+
`Embedded batch ${batchIdx + 1}/${batches.length}`
|
|
232
|
+
);
|
|
189
233
|
|
|
190
234
|
// Build records
|
|
191
235
|
const records = batch.map((note, i) =>
|
|
@@ -199,20 +243,31 @@ export async function fullIndex(): Promise<IndexResult> {
|
|
|
199
243
|
} else {
|
|
200
244
|
await store.addRecords(records);
|
|
201
245
|
}
|
|
246
|
+
throwIfCancelled(options.signal);
|
|
202
247
|
|
|
203
248
|
totalIndexed += records.length;
|
|
204
249
|
debug(`Batch ${batchIdx + 1} stored, total: ${totalIndexed}`);
|
|
250
|
+
emitProgress(
|
|
251
|
+
options,
|
|
252
|
+
"persist",
|
|
253
|
+
batchIdx + 1,
|
|
254
|
+
batches.length,
|
|
255
|
+
`Stored batch ${batchIdx + 1}/${batches.length}`
|
|
256
|
+
);
|
|
205
257
|
}
|
|
206
258
|
|
|
207
|
-
// Phase
|
|
208
|
-
debug("Phase
|
|
259
|
+
// Phase 3: Rebuild FTS index (once at end)
|
|
260
|
+
debug("Phase 3: Rebuilding FTS index...");
|
|
209
261
|
if (totalIndexed > 0) {
|
|
262
|
+
emitProgress(options, "rebuild-fts", 0, 1, "Rebuilding FTS index");
|
|
210
263
|
await store.rebuildFtsIndex();
|
|
264
|
+
emitProgress(options, "rebuild-fts", 1, 1, "FTS index rebuilt");
|
|
211
265
|
}
|
|
212
266
|
|
|
213
267
|
const timeMs = Date.now() - startTime;
|
|
214
268
|
const emptySkipped = allNotes.length - preparedNotes.length;
|
|
215
269
|
debug(`Full index complete: ${totalIndexed} indexed, ${emptySkipped} empty, ${skippedNotes.length} fetch-skipped, ${timeMs}ms`);
|
|
270
|
+
emitProgress(options, "done", 1, 1, "Full index completed");
|
|
216
271
|
|
|
217
272
|
return {
|
|
218
273
|
total: allNotes.length + skippedNotes.length,
|
|
@@ -228,29 +283,34 @@ export async function fullIndex(): Promise<IndexResult> {
|
|
|
228
283
|
* Only processes notes that have changed since last index.
|
|
229
284
|
* Uses batch fetch (getAllNotesWithFallback) instead of individual JXA calls.
|
|
230
285
|
*/
|
|
231
|
-
export async function incrementalIndex(): Promise<IndexResult> {
|
|
286
|
+
export async function incrementalIndex(options: IndexRunOptions = {}): Promise<IndexResult> {
|
|
232
287
|
const startTime = Date.now();
|
|
233
288
|
debug("Starting incremental index...");
|
|
234
289
|
|
|
290
|
+
throwIfCancelled(options.signal);
|
|
291
|
+
emitProgress(options, "fetch", 0, 1, "Fetching notes for incremental index");
|
|
292
|
+
|
|
235
293
|
const store = getVectorStore();
|
|
236
294
|
|
|
237
295
|
// Get existing indexed notes first
|
|
238
|
-
let existingRecords:
|
|
296
|
+
let existingRecords: IndexMetadataRecord[];
|
|
239
297
|
try {
|
|
240
|
-
existingRecords = await store.
|
|
298
|
+
existingRecords = await store.getIndexMetadata();
|
|
241
299
|
} catch (error) {
|
|
242
300
|
// No existing index, fall back to full index
|
|
243
301
|
debug("No existing index found, performing full index. Error:", error);
|
|
244
|
-
return fullIndex();
|
|
302
|
+
return fullIndex(options);
|
|
245
303
|
}
|
|
246
304
|
|
|
247
305
|
// Phase 1: Fetch ALL notes with content in batch (hybrid fallback)
|
|
248
306
|
debug("Phase 1: Fetching all notes with fallback...");
|
|
249
307
|
const { notes: allNotesWithContent, skipped: skippedNotes } = await getAllNotesWithFallback();
|
|
250
308
|
debug(`Fetched ${allNotesWithContent.length} notes, skipped ${skippedNotes.length}`);
|
|
309
|
+
emitProgress(options, "fetch", 1, 1, `Fetched ${allNotesWithContent.length} notes`);
|
|
310
|
+
throwIfCancelled(options.signal);
|
|
251
311
|
|
|
252
312
|
// Build lookup maps
|
|
253
|
-
const existingByKey = new Map<string,
|
|
313
|
+
const existingByKey = new Map<string, IndexMetadataRecord>();
|
|
254
314
|
for (const record of existingRecords) {
|
|
255
315
|
const key = `${record.folder}/${record.title}`;
|
|
256
316
|
existingByKey.set(key, record);
|
|
@@ -302,6 +362,7 @@ export async function incrementalIndex(): Promise<IndexResult> {
|
|
|
302
362
|
|
|
303
363
|
// Process additions and updates - notes already have content!
|
|
304
364
|
const toProcess = [...toAdd, ...toUpdate];
|
|
365
|
+
emitProgress(options, "prepare", toProcess.length, allNotesWithContent.length, "Prepared notes to process");
|
|
305
366
|
|
|
306
367
|
if (toProcess.length > 0) {
|
|
307
368
|
// Phase 2: Prepare notes for embedding (content already fetched)
|
|
@@ -309,6 +370,7 @@ export async function incrementalIndex(): Promise<IndexResult> {
|
|
|
309
370
|
const preparedNotes: PreparedNote[] = [];
|
|
310
371
|
|
|
311
372
|
for (const noteDetails of toProcess) {
|
|
373
|
+
throwIfCancelled(options.signal);
|
|
312
374
|
const prepared = prepareNoteForEmbedding(noteDetails);
|
|
313
375
|
if (prepared) {
|
|
314
376
|
preparedNotes.push(prepared);
|
|
@@ -318,37 +380,74 @@ export async function incrementalIndex(): Promise<IndexResult> {
|
|
|
318
380
|
if (preparedNotes.length > 0) {
|
|
319
381
|
// Phase 3: Generate embeddings in batch
|
|
320
382
|
debug(`Phase 3: Generating ${preparedNotes.length} embeddings in batch...`);
|
|
321
|
-
const
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
} catch (error) {
|
|
327
|
-
debug("Batch embedding failed:", error);
|
|
328
|
-
throw error;
|
|
329
|
-
}
|
|
383
|
+
const preparedBatches = chunks(preparedNotes, getEmbeddingBatchSize());
|
|
384
|
+
let persistedCount = 0;
|
|
385
|
+
|
|
386
|
+
for (let batchIdx = 0; batchIdx < preparedBatches.length; batchIdx++) {
|
|
387
|
+
throwIfCancelled(options.signal);
|
|
330
388
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
389
|
+
const batch = preparedBatches[batchIdx];
|
|
390
|
+
emitProgress(
|
|
391
|
+
options,
|
|
392
|
+
"embed",
|
|
393
|
+
batchIdx,
|
|
394
|
+
preparedBatches.length,
|
|
395
|
+
`Embedding batch ${batchIdx + 1}/${preparedBatches.length}`
|
|
396
|
+
);
|
|
334
397
|
|
|
335
|
-
|
|
336
|
-
const note = preparedNotes[i];
|
|
337
|
-
const record = buildNoteRecord(note, vectors[i], indexedAt);
|
|
398
|
+
const textsToEmbed = batch.map((n) => n.truncatedContent);
|
|
338
399
|
|
|
400
|
+
let vectors: number[][];
|
|
339
401
|
try {
|
|
340
|
-
await
|
|
402
|
+
vectors = await getEmbeddingBatch(textsToEmbed);
|
|
341
403
|
} catch (error) {
|
|
342
|
-
debug(
|
|
343
|
-
|
|
344
|
-
|
|
404
|
+
debug("Batch embedding failed:", error);
|
|
405
|
+
throw error;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
emitProgress(
|
|
409
|
+
options,
|
|
410
|
+
"embed",
|
|
411
|
+
batchIdx + 1,
|
|
412
|
+
preparedBatches.length,
|
|
413
|
+
`Embedded batch ${batchIdx + 1}/${preparedBatches.length}`
|
|
414
|
+
);
|
|
415
|
+
|
|
416
|
+
// Phase 4: Update database
|
|
417
|
+
debug("Phase 4: Updating database...");
|
|
418
|
+
const indexedAt = new Date().toISOString();
|
|
419
|
+
|
|
420
|
+
for (let i = 0; i < batch.length; i++) {
|
|
421
|
+
throwIfCancelled(options.signal);
|
|
422
|
+
|
|
423
|
+
const note = batch[i];
|
|
424
|
+
const record = buildNoteRecord(note, vectors[i], indexedAt);
|
|
425
|
+
|
|
426
|
+
try {
|
|
427
|
+
await store.update(record);
|
|
428
|
+
} catch (error) {
|
|
429
|
+
debug(`Error updating ${note.title}:`, error);
|
|
430
|
+
failedNotes.push(`${note.folder}/${note.title}`);
|
|
431
|
+
errors++;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
persistedCount += 1;
|
|
435
|
+
emitProgress(
|
|
436
|
+
options,
|
|
437
|
+
"persist",
|
|
438
|
+
persistedCount,
|
|
439
|
+
preparedNotes.length,
|
|
440
|
+
`Persisted ${persistedCount}/${preparedNotes.length} note updates`
|
|
441
|
+
);
|
|
345
442
|
}
|
|
346
443
|
}
|
|
347
444
|
}
|
|
348
445
|
}
|
|
349
446
|
|
|
350
447
|
// Process deletions
|
|
351
|
-
for (
|
|
448
|
+
for (let deleteIdx = 0; deleteIdx < toDelete.length; deleteIdx++) {
|
|
449
|
+
const key = toDelete[deleteIdx];
|
|
450
|
+
throwIfCancelled(options.signal);
|
|
352
451
|
try {
|
|
353
452
|
// Parse folder and title from key (e.g., "Work/Projects/My Note")
|
|
354
453
|
const lastSlash = key.lastIndexOf("/");
|
|
@@ -360,16 +459,27 @@ export async function incrementalIndex(): Promise<IndexResult> {
|
|
|
360
459
|
failedNotes.push(`DELETE: ${key}`);
|
|
361
460
|
errors++;
|
|
362
461
|
}
|
|
462
|
+
|
|
463
|
+
emitProgress(
|
|
464
|
+
options,
|
|
465
|
+
"delete",
|
|
466
|
+
deleteIdx + 1,
|
|
467
|
+
Math.max(toDelete.length, 1),
|
|
468
|
+
`Deleted ${deleteIdx + 1}/${toDelete.length} stale records`
|
|
469
|
+
);
|
|
363
470
|
}
|
|
364
471
|
|
|
365
472
|
// Rebuild FTS index if any changes were made
|
|
366
473
|
if (toAdd.length > 0 || toUpdate.length > 0 || toDelete.length > 0) {
|
|
367
474
|
debug("Rebuilding FTS index after incremental changes");
|
|
475
|
+
emitProgress(options, "rebuild-fts", 0, 1, "Rebuilding FTS index");
|
|
368
476
|
await store.rebuildFtsIndex();
|
|
477
|
+
emitProgress(options, "rebuild-fts", 1, 1, "FTS index rebuilt");
|
|
369
478
|
}
|
|
370
479
|
|
|
371
480
|
const timeMs = Date.now() - startTime;
|
|
372
481
|
debug(`Incremental index complete: ${timeMs}ms`);
|
|
482
|
+
emitProgress(options, "done", 1, 1, "Incremental index completed");
|
|
373
483
|
|
|
374
484
|
return {
|
|
375
485
|
total: allNotesWithContent.length,
|
|
@@ -420,10 +530,11 @@ export async function reindexNote(title: string): Promise<void> {
|
|
|
420
530
|
* Index notes based on mode.
|
|
421
531
|
*/
|
|
422
532
|
export async function indexNotes(
|
|
423
|
-
mode: "full" | "incremental" = "incremental"
|
|
533
|
+
mode: "full" | "incremental" = "incremental",
|
|
534
|
+
options: IndexRunOptions = {}
|
|
424
535
|
): Promise<IndexResult> {
|
|
425
536
|
if (mode === "full") {
|
|
426
|
-
return fullIndex();
|
|
537
|
+
return fullIndex(options);
|
|
427
538
|
}
|
|
428
|
-
return incrementalIndex();
|
|
539
|
+
return incrementalIndex(options);
|
|
429
540
|
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { shouldAutoRefreshByTtl } from "./refresh-policy.js";
|
|
3
|
+
|
|
4
|
+
describe("shouldAutoRefreshByTtl", () => {
|
|
5
|
+
it("returns false when INDEX_TTL is not configured", () => {
|
|
6
|
+
expect(shouldAutoRefreshByTtl(undefined, 2_000_000, 1_000_000)).toBe(false);
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
it("returns false when TTL is invalid", () => {
|
|
10
|
+
expect(shouldAutoRefreshByTtl("abc", 2_000_000, 1_000_000)).toBe(false);
|
|
11
|
+
expect(shouldAutoRefreshByTtl("0", 2_000_000, 1_000_000)).toBe(false);
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
it("returns false when TTL has not expired", () => {
|
|
15
|
+
expect(shouldAutoRefreshByTtl("3600", 2_000_000, 1_999_000)).toBe(false);
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
it("returns true when TTL has expired", () => {
|
|
19
|
+
expect(shouldAutoRefreshByTtl("60", 2_000_000, 1_000_000)).toBe(true);
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
it("returns true when index is empty and TTL is enabled", () => {
|
|
23
|
+
expect(shouldAutoRefreshByTtl("60", 2_000_000, null)).toBe(true);
|
|
24
|
+
});
|
|
25
|
+
});
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Refresh policy helpers for search-time auto-refresh.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Decide whether auto-refresh should run based on TTL.
|
|
7
|
+
*
|
|
8
|
+
* Rules:
|
|
9
|
+
* - No TTL configured => disabled
|
|
10
|
+
* - Invalid/zero TTL => disabled
|
|
11
|
+
* - Empty index (no indexed timestamp) => enabled
|
|
12
|
+
* - Otherwise run only when TTL has expired
|
|
13
|
+
*/
|
|
14
|
+
export function shouldAutoRefreshByTtl(
|
|
15
|
+
ttlSecondsRaw: string | undefined,
|
|
16
|
+
nowMs: number,
|
|
17
|
+
lastIndexedAtMs: number | null
|
|
18
|
+
): boolean {
|
|
19
|
+
if (!ttlSecondsRaw) {
|
|
20
|
+
return false;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const ttlSeconds = Number.parseInt(ttlSecondsRaw, 10);
|
|
24
|
+
if (!Number.isFinite(ttlSeconds) || ttlSeconds <= 0) {
|
|
25
|
+
return false;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
if (lastIndexedAtMs === null) {
|
|
29
|
+
return true;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return nowMs - lastIndexedAtMs >= ttlSeconds * 1000;
|
|
33
|
+
}
|