@disco_trooper/apple-notes-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +56 -0
- package/LICENSE +21 -0
- package/README.md +216 -0
- package/package.json +61 -0
- package/src/config/constants.ts +41 -0
- package/src/config/env.test.ts +58 -0
- package/src/config/env.ts +25 -0
- package/src/db/lancedb.test.ts +141 -0
- package/src/db/lancedb.ts +263 -0
- package/src/db/validation.test.ts +76 -0
- package/src/db/validation.ts +57 -0
- package/src/embeddings/index.test.ts +54 -0
- package/src/embeddings/index.ts +111 -0
- package/src/embeddings/local.test.ts +70 -0
- package/src/embeddings/local.ts +191 -0
- package/src/embeddings/openrouter.test.ts +21 -0
- package/src/embeddings/openrouter.ts +285 -0
- package/src/index.ts +387 -0
- package/src/notes/crud.test.ts +199 -0
- package/src/notes/crud.ts +257 -0
- package/src/notes/read.test.ts +131 -0
- package/src/notes/read.ts +504 -0
- package/src/search/index.test.ts +52 -0
- package/src/search/index.ts +283 -0
- package/src/search/indexer.test.ts +42 -0
- package/src/search/indexer.ts +335 -0
- package/src/server.ts +386 -0
- package/src/setup.ts +540 -0
- package/src/types/index.ts +39 -0
- package/src/utils/debug.test.ts +41 -0
- package/src/utils/debug.ts +51 -0
- package/src/utils/errors.test.ts +29 -0
- package/src/utils/errors.ts +46 -0
- package/src/utils/text.ts +23 -0
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hybrid search combining vector similarity and full-text search.
|
|
3
|
+
*
|
|
4
|
+
* Uses Reciprocal Rank Fusion (RRF) to merge results from:
|
|
5
|
+
* - Vector search (semantic similarity)
|
|
6
|
+
* - Full-text search (keyword matching)
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { getEmbedding } from "../embeddings/index.js";
|
|
10
|
+
import { getVectorStore } from "../db/lancedb.js";
|
|
11
|
+
import type { DBSearchResult, SearchResult } from "../types/index.js";
|
|
12
|
+
import { DEFAULT_SEARCH_LIMIT, FOLDER_FILTER_MULTIPLIER, HYBRID_SEARCH_MIN_FETCH, PREVIEW_LENGTH, PREVIEW_TRUNCATE_RATIO, RRF_K } from "../config/constants.js";
|
|
13
|
+
import { createDebugLogger } from "../utils/debug.js";
|
|
14
|
+
|
|
15
|
+
// Debug logging
|
|
16
|
+
const debug = createDebugLogger("SEARCH");
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Search mode options.
|
|
20
|
+
* - hybrid: Combine vector + FTS with RRF (default)
|
|
21
|
+
* - keyword: Full-text search only
|
|
22
|
+
* - semantic: Vector search only
|
|
23
|
+
*/
|
|
24
|
+
export type SearchMode = "hybrid" | "keyword" | "semantic";
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Options for search operations.
|
|
28
|
+
*/
|
|
29
|
+
export interface SearchOptions {
|
|
30
|
+
/** Filter by folder name */
|
|
31
|
+
folder?: string;
|
|
32
|
+
/** Maximum number of results (default: 20) */
|
|
33
|
+
limit?: number;
|
|
34
|
+
/** Search mode: hybrid, keyword, or semantic (default: hybrid) */
|
|
35
|
+
mode?: SearchMode;
|
|
36
|
+
/** Include full content instead of preview (default: false) */
|
|
37
|
+
include_content?: boolean;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// SearchResult is imported from ../types/index.js
|
|
41
|
+
// RRF_K is imported from ../config/constants.js
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Calculate RRF score for a result at a given rank.
|
|
45
|
+
* Formula: 1 / (k + rank)
|
|
46
|
+
* where k is a constant (typically 60) and rank is 0-indexed.
|
|
47
|
+
*/
|
|
48
|
+
function rrfScore(rank: number): number {
|
|
49
|
+
return 1 / (RRF_K + rank);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Generate a preview of content (first N characters, default from PREVIEW_LENGTH constant).
|
|
54
|
+
*/
|
|
55
|
+
function generatePreview(content: string, maxLength = PREVIEW_LENGTH): string {
|
|
56
|
+
if (!content) {
|
|
57
|
+
return "";
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Clean up whitespace
|
|
61
|
+
const cleaned = content.replace(/\s+/g, " ").trim();
|
|
62
|
+
|
|
63
|
+
if (cleaned.length <= maxLength) {
|
|
64
|
+
return cleaned;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Truncate at word boundary
|
|
68
|
+
const truncated = cleaned.slice(0, maxLength);
|
|
69
|
+
const lastSpace = truncated.lastIndexOf(" ");
|
|
70
|
+
|
|
71
|
+
if (lastSpace > maxLength * PREVIEW_TRUNCATE_RATIO) {
|
|
72
|
+
return truncated.slice(0, lastSpace) + "...";
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return truncated + "...";
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Filter results by folder if specified.
|
|
80
|
+
*/
|
|
81
|
+
function filterByFolder<T extends { folder: string }>(
|
|
82
|
+
results: T[],
|
|
83
|
+
folder?: string
|
|
84
|
+
): T[] {
|
|
85
|
+
if (!folder) {
|
|
86
|
+
return results;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const normalizedFolder = folder.toLowerCase();
|
|
90
|
+
return results.filter(
|
|
91
|
+
(r) => r.folder.toLowerCase() === normalizedFolder
|
|
92
|
+
);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Perform vector-only search.
|
|
97
|
+
*/
|
|
98
|
+
async function vectorSearch(
|
|
99
|
+
query: string,
|
|
100
|
+
limit: number,
|
|
101
|
+
folder?: string
|
|
102
|
+
): Promise<DBSearchResult[]> {
|
|
103
|
+
debug(`Vector search: "${query}" (limit: ${limit})`);
|
|
104
|
+
|
|
105
|
+
const store = getVectorStore();
|
|
106
|
+
const queryVector = await getEmbedding(query);
|
|
107
|
+
|
|
108
|
+
// Fetch more results than needed if filtering by folder
|
|
109
|
+
const fetchLimit = folder ? limit * FOLDER_FILTER_MULTIPLIER : limit;
|
|
110
|
+
const results = await store.search(queryVector, fetchLimit);
|
|
111
|
+
|
|
112
|
+
const filtered = filterByFolder(results, folder);
|
|
113
|
+
return filtered.slice(0, limit);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Perform full-text search only.
|
|
118
|
+
*/
|
|
119
|
+
async function keywordSearch(
|
|
120
|
+
query: string,
|
|
121
|
+
limit: number,
|
|
122
|
+
folder?: string
|
|
123
|
+
): Promise<DBSearchResult[]> {
|
|
124
|
+
debug(`FTS search: "${query}" (limit: ${limit})`);
|
|
125
|
+
|
|
126
|
+
const store = getVectorStore();
|
|
127
|
+
|
|
128
|
+
// Fetch more results than needed if filtering by folder
|
|
129
|
+
const fetchLimit = folder ? limit * FOLDER_FILTER_MULTIPLIER : limit;
|
|
130
|
+
const results = await store.searchFTS(query, fetchLimit);
|
|
131
|
+
|
|
132
|
+
const filtered = filterByFolder(results, folder);
|
|
133
|
+
return filtered.slice(0, limit);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Perform hybrid search combining vector and FTS results using RRF.
|
|
138
|
+
*/
|
|
139
|
+
async function hybridSearch(
|
|
140
|
+
query: string,
|
|
141
|
+
limit: number,
|
|
142
|
+
folder?: string
|
|
143
|
+
): Promise<DBSearchResult[]> {
|
|
144
|
+
debug(`Hybrid search: "${query}" (limit: ${limit})`);
|
|
145
|
+
|
|
146
|
+
const store = getVectorStore();
|
|
147
|
+
|
|
148
|
+
// Fetch more results for RRF merging
|
|
149
|
+
const fetchLimit = Math.max(limit * 2, HYBRID_SEARCH_MIN_FETCH);
|
|
150
|
+
|
|
151
|
+
// Run both searches in parallel
|
|
152
|
+
const [queryVector, ftsResults] = await Promise.all([
|
|
153
|
+
getEmbedding(query),
|
|
154
|
+
store.searchFTS(query, fetchLimit).catch(() => [] as DBSearchResult[]),
|
|
155
|
+
]);
|
|
156
|
+
|
|
157
|
+
const vectorResults = await store.search(queryVector, fetchLimit);
|
|
158
|
+
|
|
159
|
+
debug(`Vector results: ${vectorResults.length}, FTS results: ${ftsResults.length}`);
|
|
160
|
+
|
|
161
|
+
// Merge results using Reciprocal Rank Fusion
|
|
162
|
+
const scoreMap = new Map<string, number>();
|
|
163
|
+
const contentMap = new Map<string, DBSearchResult>();
|
|
164
|
+
|
|
165
|
+
// Process vector search results
|
|
166
|
+
vectorResults.forEach((item, rank) => {
|
|
167
|
+
const key = item.title;
|
|
168
|
+
scoreMap.set(key, (scoreMap.get(key) || 0) + rrfScore(rank));
|
|
169
|
+
contentMap.set(key, item);
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
// Process FTS results
|
|
173
|
+
ftsResults.forEach((item, rank) => {
|
|
174
|
+
const key = item.title;
|
|
175
|
+
scoreMap.set(key, (scoreMap.get(key) || 0) + rrfScore(rank));
|
|
176
|
+
if (!contentMap.has(key)) {
|
|
177
|
+
contentMap.set(key, item);
|
|
178
|
+
}
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
// Sort by combined RRF score
|
|
182
|
+
const merged = Array.from(scoreMap.entries())
|
|
183
|
+
.sort((a, b) => b[1] - a[1])
|
|
184
|
+
.map(([key, score]) => ({
|
|
185
|
+
...contentMap.get(key)!,
|
|
186
|
+
score,
|
|
187
|
+
}));
|
|
188
|
+
|
|
189
|
+
// Apply folder filter and limit
|
|
190
|
+
const filtered = filterByFolder(merged, folder);
|
|
191
|
+
return filtered.slice(0, limit);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Search notes with configurable search mode.
|
|
196
|
+
*
|
|
197
|
+
* @param query - Search query string
|
|
198
|
+
* @param options - Search configuration options
|
|
199
|
+
* @returns Array of search results sorted by relevance
|
|
200
|
+
*
|
|
201
|
+
* @example
|
|
202
|
+
* ```typescript
|
|
203
|
+
* // Basic hybrid search
|
|
204
|
+
* const results = await searchNotes("project ideas");
|
|
205
|
+
*
|
|
206
|
+
* // Keyword-only search in specific folder
|
|
207
|
+
* const results = await searchNotes("meeting", {
|
|
208
|
+
* mode: "keyword",
|
|
209
|
+
* folder: "Work",
|
|
210
|
+
* limit: 10,
|
|
211
|
+
* });
|
|
212
|
+
*
|
|
213
|
+
* // Semantic search with full content
|
|
214
|
+
* const results = await searchNotes("concepts similar to machine learning", {
|
|
215
|
+
* mode: "semantic",
|
|
216
|
+
* include_content: true,
|
|
217
|
+
* });
|
|
218
|
+
* ```
|
|
219
|
+
*/
|
|
220
|
+
export async function searchNotes(
|
|
221
|
+
query: string,
|
|
222
|
+
options: SearchOptions = {}
|
|
223
|
+
): Promise<SearchResult[]> {
|
|
224
|
+
const {
|
|
225
|
+
folder,
|
|
226
|
+
limit = DEFAULT_SEARCH_LIMIT,
|
|
227
|
+
mode = "hybrid",
|
|
228
|
+
include_content = false,
|
|
229
|
+
} = options;
|
|
230
|
+
|
|
231
|
+
if (!query || query.trim().length === 0) {
|
|
232
|
+
debug("Empty query, returning empty results");
|
|
233
|
+
return [];
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
const trimmedQuery = query.trim();
|
|
237
|
+
|
|
238
|
+
debug(`searchNotes: "${trimmedQuery}" mode=${mode} folder=${folder || "all"} limit=${limit}`);
|
|
239
|
+
|
|
240
|
+
let dbResults: DBSearchResult[];
|
|
241
|
+
|
|
242
|
+
switch (mode) {
|
|
243
|
+
case "keyword":
|
|
244
|
+
dbResults = await keywordSearch(trimmedQuery, limit, folder);
|
|
245
|
+
break;
|
|
246
|
+
|
|
247
|
+
case "semantic":
|
|
248
|
+
dbResults = await vectorSearch(trimmedQuery, limit, folder);
|
|
249
|
+
break;
|
|
250
|
+
|
|
251
|
+
case "hybrid":
|
|
252
|
+
default:
|
|
253
|
+
dbResults = await hybridSearch(trimmedQuery, limit, folder);
|
|
254
|
+
break;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// Transform to SearchResult format
|
|
258
|
+
const results: SearchResult[] = dbResults.map((r) => {
|
|
259
|
+
const result: SearchResult = {
|
|
260
|
+
title: r.title,
|
|
261
|
+
folder: r.folder,
|
|
262
|
+
preview: generatePreview(r.content),
|
|
263
|
+
modified: r.modified,
|
|
264
|
+
score: r.score,
|
|
265
|
+
};
|
|
266
|
+
|
|
267
|
+
if (include_content) {
|
|
268
|
+
result.content = r.content;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
return result;
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
debug(`Returning ${results.length} results`);
|
|
275
|
+
return results;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// Re-export types for convenience
|
|
279
|
+
export type { SearchMode as Mode };
|
|
280
|
+
export type { SearchResult } from "../types/index.js";
|
|
281
|
+
|
|
282
|
+
// Export utility functions for testing
|
|
283
|
+
export { rrfScore, generatePreview, filterByFolder };
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { extractTitleFromKey } from "./indexer.js";
|
|
3
|
+
|
|
4
|
+
describe("extractTitleFromKey", () => {
|
|
5
|
+
it("extracts title from simple folder/title key", () => {
|
|
6
|
+
expect(extractTitleFromKey("Work/My Note")).toBe("My Note");
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
it("extracts title from nested folder key", () => {
|
|
10
|
+
expect(extractTitleFromKey("Work/Projects/My Note")).toBe("My Note");
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
it("extracts title from deeply nested folder key", () => {
|
|
14
|
+
expect(extractTitleFromKey("Personal/Archive/2024/January/Meeting Notes")).toBe("Meeting Notes");
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
it("handles single segment (no folder)", () => {
|
|
18
|
+
expect(extractTitleFromKey("My Note")).toBe("My Note");
|
|
19
|
+
});
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
describe("delete key parsing", () => {
|
|
23
|
+
it("should correctly parse folder and title from key", () => {
|
|
24
|
+
const key = "Work/Projects/My Note";
|
|
25
|
+
const lastSlash = key.lastIndexOf("/");
|
|
26
|
+
const folder = key.substring(0, lastSlash);
|
|
27
|
+
const title = key.substring(lastSlash + 1);
|
|
28
|
+
|
|
29
|
+
expect(folder).toBe("Work/Projects");
|
|
30
|
+
expect(title).toBe("My Note");
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
it("should handle simple folder/title", () => {
|
|
34
|
+
const key = "Personal/My Note";
|
|
35
|
+
const lastSlash = key.lastIndexOf("/");
|
|
36
|
+
const folder = key.substring(0, lastSlash);
|
|
37
|
+
const title = key.substring(lastSlash + 1);
|
|
38
|
+
|
|
39
|
+
expect(folder).toBe("Personal");
|
|
40
|
+
expect(title).toBe("My Note");
|
|
41
|
+
});
|
|
42
|
+
});
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Note indexing pipeline for semantic search.
|
|
3
|
+
*
|
|
4
|
+
* Supports:
|
|
5
|
+
* - Full reindexing (drop all, reindex everything)
|
|
6
|
+
* - Incremental indexing (only changed notes)
|
|
7
|
+
* - Single note reindexing
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { getEmbedding } from "../embeddings/index.js";
|
|
11
|
+
import { getVectorStore, type NoteRecord } from "../db/lancedb.js";
|
|
12
|
+
import { getAllNotes, getNoteByFolderAndTitle, getNoteByTitle, type NoteInfo } from "../notes/read.js";
|
|
13
|
+
import { createDebugLogger } from "../utils/debug.js";
|
|
14
|
+
import { truncateForEmbedding } from "../utils/text.js";
|
|
15
|
+
import { EMBEDDING_DELAY_MS } from "../config/constants.js";
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Extract note title from folder/title key.
|
|
19
|
+
* Handles nested folders correctly by taking the last segment.
|
|
20
|
+
*/
|
|
21
|
+
export function extractTitleFromKey(key: string): string {
|
|
22
|
+
const parts = key.split("/");
|
|
23
|
+
return parts[parts.length - 1];
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Debug logging
|
|
27
|
+
const debug = createDebugLogger("INDEX");
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Result of an indexing operation.
|
|
31
|
+
*/
|
|
32
|
+
export interface IndexResult {
|
|
33
|
+
/** Total notes processed */
|
|
34
|
+
total: number;
|
|
35
|
+
/** Notes successfully indexed */
|
|
36
|
+
indexed: number;
|
|
37
|
+
/** Notes that failed to index */
|
|
38
|
+
errors: number;
|
|
39
|
+
/** Time taken in milliseconds */
|
|
40
|
+
timeMs: number;
|
|
41
|
+
/** Breakdown for incremental indexing */
|
|
42
|
+
breakdown?: {
|
|
43
|
+
added: number;
|
|
44
|
+
updated: number;
|
|
45
|
+
deleted: number;
|
|
46
|
+
skipped: number;
|
|
47
|
+
};
|
|
48
|
+
/** List of notes that failed to index (for debugging) */
|
|
49
|
+
failedNotes?: string[];
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Sleep for a specified duration.
|
|
54
|
+
*/
|
|
55
|
+
function sleep(ms: number): Promise<void> {
|
|
56
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Perform full reindexing of all notes.
|
|
61
|
+
* Drops existing index and rebuilds from scratch.
|
|
62
|
+
*/
|
|
63
|
+
export async function fullIndex(): Promise<IndexResult> {
|
|
64
|
+
const startTime = Date.now();
|
|
65
|
+
debug("Starting full index...");
|
|
66
|
+
|
|
67
|
+
// Get all notes from Apple Notes
|
|
68
|
+
const notes = await getAllNotes();
|
|
69
|
+
debug(`Found ${notes.length} notes in Apple Notes`);
|
|
70
|
+
|
|
71
|
+
const records: NoteRecord[] = [];
|
|
72
|
+
let errors = 0;
|
|
73
|
+
const failedNotes: string[] = [];
|
|
74
|
+
|
|
75
|
+
for (let i = 0; i < notes.length; i++) {
|
|
76
|
+
const noteInfo = notes[i];
|
|
77
|
+
debug(`Processing ${i + 1}/${notes.length}: ${noteInfo.title}`);
|
|
78
|
+
|
|
79
|
+
try {
|
|
80
|
+
// Get full note content using folder and title separately
|
|
81
|
+
// to handle notes with "/" in their titles
|
|
82
|
+
const noteDetails = await getNoteByFolderAndTitle(noteInfo.folder, noteInfo.title);
|
|
83
|
+
if (!noteDetails) {
|
|
84
|
+
debug(`Could not fetch note: ${noteInfo.title}`);
|
|
85
|
+
failedNotes.push(`${noteInfo.folder}/${noteInfo.title}`);
|
|
86
|
+
errors++;
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Skip empty notes
|
|
91
|
+
if (!noteDetails.content.trim()) {
|
|
92
|
+
debug(`Skipping empty note: ${noteInfo.title}`);
|
|
93
|
+
continue;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Generate embedding
|
|
97
|
+
const content = truncateForEmbedding(noteDetails.content);
|
|
98
|
+
const vector = await getEmbedding(content);
|
|
99
|
+
|
|
100
|
+
const record: NoteRecord = {
|
|
101
|
+
title: noteDetails.title,
|
|
102
|
+
content: noteDetails.content,
|
|
103
|
+
vector,
|
|
104
|
+
folder: noteDetails.folder,
|
|
105
|
+
created: noteDetails.created,
|
|
106
|
+
modified: noteDetails.modified,
|
|
107
|
+
indexed_at: new Date().toISOString(),
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
records.push(record);
|
|
111
|
+
|
|
112
|
+
// Delay to avoid rate limiting
|
|
113
|
+
if (i < notes.length - 1) {
|
|
114
|
+
await sleep(EMBEDDING_DELAY_MS);
|
|
115
|
+
}
|
|
116
|
+
} catch (error) {
|
|
117
|
+
debug(`Error processing ${noteInfo.title}:`, error);
|
|
118
|
+
failedNotes.push(`${noteInfo.folder}/${noteInfo.title}`);
|
|
119
|
+
errors++;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Store all records in vector database
|
|
124
|
+
const store = getVectorStore();
|
|
125
|
+
await store.index(records);
|
|
126
|
+
|
|
127
|
+
const timeMs = Date.now() - startTime;
|
|
128
|
+
debug(`Full index complete: ${records.length} indexed, ${errors} errors, ${timeMs}ms`);
|
|
129
|
+
|
|
130
|
+
return {
|
|
131
|
+
total: notes.length,
|
|
132
|
+
indexed: records.length,
|
|
133
|
+
errors,
|
|
134
|
+
timeMs,
|
|
135
|
+
failedNotes: failedNotes.length > 0 ? failedNotes : undefined,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Perform incremental indexing.
|
|
141
|
+
* Only processes notes that have changed since last index.
|
|
142
|
+
*/
|
|
143
|
+
export async function incrementalIndex(): Promise<IndexResult> {
|
|
144
|
+
const startTime = Date.now();
|
|
145
|
+
debug("Starting incremental index...");
|
|
146
|
+
|
|
147
|
+
const store = getVectorStore();
|
|
148
|
+
|
|
149
|
+
// Get all notes from Apple Notes
|
|
150
|
+
const currentNotes = await getAllNotes();
|
|
151
|
+
debug(`Found ${currentNotes.length} notes in Apple Notes`);
|
|
152
|
+
|
|
153
|
+
// Get existing indexed notes
|
|
154
|
+
let existingRecords: NoteRecord[];
|
|
155
|
+
try {
|
|
156
|
+
existingRecords = await store.getAll();
|
|
157
|
+
} catch (error) {
|
|
158
|
+
// No existing index, fall back to full index
|
|
159
|
+
debug("No existing index found, performing full index. Error:", error);
|
|
160
|
+
return fullIndex();
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Build lookup maps
|
|
164
|
+
const existingByTitle = new Map<string, NoteRecord>();
|
|
165
|
+
for (const record of existingRecords) {
|
|
166
|
+
const key = `${record.folder}/${record.title}`;
|
|
167
|
+
existingByTitle.set(key, record);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const currentByTitle = new Map<string, NoteInfo>();
|
|
171
|
+
for (const note of currentNotes) {
|
|
172
|
+
const key = `${note.folder}/${note.title}`;
|
|
173
|
+
currentByTitle.set(key, note);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Determine what needs to be done
|
|
177
|
+
const toAdd: NoteInfo[] = [];
|
|
178
|
+
const toUpdate: NoteInfo[] = [];
|
|
179
|
+
const toDelete: string[] = [];
|
|
180
|
+
const toSkip: string[] = [];
|
|
181
|
+
|
|
182
|
+
// Check current notes
|
|
183
|
+
for (const note of currentNotes) {
|
|
184
|
+
const key = `${note.folder}/${note.title}`;
|
|
185
|
+
const existing = existingByTitle.get(key);
|
|
186
|
+
|
|
187
|
+
if (!existing) {
|
|
188
|
+
toAdd.push(note);
|
|
189
|
+
} else {
|
|
190
|
+
// Compare modified date with indexed_at timestamp
|
|
191
|
+
const currentModified = new Date(note.modified).getTime();
|
|
192
|
+
const indexedAt = new Date(existing.indexed_at).getTime();
|
|
193
|
+
|
|
194
|
+
if (currentModified > indexedAt) {
|
|
195
|
+
toUpdate.push(note);
|
|
196
|
+
} else {
|
|
197
|
+
toSkip.push(key);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// Check for deleted notes
|
|
203
|
+
for (const [key] of existingByTitle) {
|
|
204
|
+
if (!currentByTitle.has(key)) {
|
|
205
|
+
toDelete.push(key);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
debug(`Incremental: add=${toAdd.length}, update=${toUpdate.length}, delete=${toDelete.length}, skip=${toSkip.length}`);
|
|
210
|
+
|
|
211
|
+
let errors = 0;
|
|
212
|
+
const failedNotes: string[] = [];
|
|
213
|
+
|
|
214
|
+
// Process additions and updates
|
|
215
|
+
const toProcess = [...toAdd, ...toUpdate];
|
|
216
|
+
for (let i = 0; i < toProcess.length; i++) {
|
|
217
|
+
const noteInfo = toProcess[i];
|
|
218
|
+
debug(`Processing ${i + 1}/${toProcess.length}: ${noteInfo.title}`);
|
|
219
|
+
|
|
220
|
+
try {
|
|
221
|
+
// Use folder and title separately to handle "/" in titles
|
|
222
|
+
const noteDetails = await getNoteByFolderAndTitle(noteInfo.folder, noteInfo.title);
|
|
223
|
+
if (!noteDetails) {
|
|
224
|
+
failedNotes.push(`${noteInfo.folder}/${noteInfo.title}`);
|
|
225
|
+
errors++;
|
|
226
|
+
continue;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
if (!noteDetails.content.trim()) {
|
|
230
|
+
continue;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
const content = truncateForEmbedding(noteDetails.content);
|
|
234
|
+
const vector = await getEmbedding(content);
|
|
235
|
+
|
|
236
|
+
const record: NoteRecord = {
|
|
237
|
+
title: noteDetails.title,
|
|
238
|
+
content: noteDetails.content,
|
|
239
|
+
vector,
|
|
240
|
+
folder: noteDetails.folder,
|
|
241
|
+
created: noteDetails.created,
|
|
242
|
+
modified: noteDetails.modified,
|
|
243
|
+
indexed_at: new Date().toISOString(),
|
|
244
|
+
};
|
|
245
|
+
|
|
246
|
+
await store.update(record);
|
|
247
|
+
|
|
248
|
+
if (i < toProcess.length - 1) {
|
|
249
|
+
await sleep(EMBEDDING_DELAY_MS);
|
|
250
|
+
}
|
|
251
|
+
} catch (error) {
|
|
252
|
+
debug(`Error processing ${noteInfo.title}:`, error);
|
|
253
|
+
failedNotes.push(`${noteInfo.folder}/${noteInfo.title}`);
|
|
254
|
+
errors++;
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// Process deletions
|
|
259
|
+
for (const key of toDelete) {
|
|
260
|
+
try {
|
|
261
|
+
// Parse folder and title from key (e.g., "Work/Projects/My Note")
|
|
262
|
+
const lastSlash = key.lastIndexOf("/");
|
|
263
|
+
const folder = key.substring(0, lastSlash);
|
|
264
|
+
const title = key.substring(lastSlash + 1);
|
|
265
|
+
await store.deleteByFolderAndTitle(folder, title);
|
|
266
|
+
} catch (error) {
|
|
267
|
+
debug(`Error deleting ${key}:`, error);
|
|
268
|
+
failedNotes.push(`DELETE: ${key}`);
|
|
269
|
+
errors++;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
const timeMs = Date.now() - startTime;
|
|
274
|
+
debug(`Incremental index complete: ${timeMs}ms`);
|
|
275
|
+
|
|
276
|
+
return {
|
|
277
|
+
total: currentNotes.length,
|
|
278
|
+
indexed: toAdd.length + toUpdate.length,
|
|
279
|
+
errors,
|
|
280
|
+
timeMs,
|
|
281
|
+
breakdown: {
|
|
282
|
+
added: toAdd.length,
|
|
283
|
+
updated: toUpdate.length,
|
|
284
|
+
deleted: toDelete.length,
|
|
285
|
+
skipped: toSkip.length,
|
|
286
|
+
},
|
|
287
|
+
failedNotes: failedNotes.length > 0 ? failedNotes : undefined,
|
|
288
|
+
};
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* Reindex a single note by title.
|
|
293
|
+
*/
|
|
294
|
+
export async function reindexNote(title: string): Promise<void> {
|
|
295
|
+
debug(`Reindexing single note: ${title}`);
|
|
296
|
+
|
|
297
|
+
const noteDetails = await getNoteByTitle(title);
|
|
298
|
+
if (!noteDetails) {
|
|
299
|
+
throw new Error(`Note not found: "${title}"`);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
if (!noteDetails.content.trim()) {
|
|
303
|
+
throw new Error(`Note is empty: "${title}"`);
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
const content = truncateForEmbedding(noteDetails.content);
|
|
307
|
+
const vector = await getEmbedding(content);
|
|
308
|
+
|
|
309
|
+
const record: NoteRecord = {
|
|
310
|
+
title: noteDetails.title,
|
|
311
|
+
content: noteDetails.content,
|
|
312
|
+
vector,
|
|
313
|
+
folder: noteDetails.folder,
|
|
314
|
+
created: noteDetails.created,
|
|
315
|
+
modified: noteDetails.modified,
|
|
316
|
+
indexed_at: new Date().toISOString(),
|
|
317
|
+
};
|
|
318
|
+
|
|
319
|
+
const store = getVectorStore();
|
|
320
|
+
await store.update(record);
|
|
321
|
+
|
|
322
|
+
debug(`Reindexed: ${title}`);
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
/**
|
|
326
|
+
* Index notes based on mode.
|
|
327
|
+
*/
|
|
328
|
+
export async function indexNotes(
|
|
329
|
+
mode: "full" | "incremental" = "incremental"
|
|
330
|
+
): Promise<IndexResult> {
|
|
331
|
+
if (mode === "full") {
|
|
332
|
+
return fullIndex();
|
|
333
|
+
}
|
|
334
|
+
return incrementalIndex();
|
|
335
|
+
}
|