@disco_trooper/apple-notes-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,283 @@
1
+ /**
2
+ * Hybrid search combining vector similarity and full-text search.
3
+ *
4
+ * Uses Reciprocal Rank Fusion (RRF) to merge results from:
5
+ * - Vector search (semantic similarity)
6
+ * - Full-text search (keyword matching)
7
+ */
8
+
9
+ import { getEmbedding } from "../embeddings/index.js";
10
+ import { getVectorStore } from "../db/lancedb.js";
11
+ import type { DBSearchResult, SearchResult } from "../types/index.js";
12
+ import { DEFAULT_SEARCH_LIMIT, FOLDER_FILTER_MULTIPLIER, HYBRID_SEARCH_MIN_FETCH, PREVIEW_LENGTH, PREVIEW_TRUNCATE_RATIO, RRF_K } from "../config/constants.js";
13
+ import { createDebugLogger } from "../utils/debug.js";
14
+
15
+ // Debug logging
16
+ const debug = createDebugLogger("SEARCH");
17
+
18
+ /**
19
+ * Search mode options.
20
+ * - hybrid: Combine vector + FTS with RRF (default)
21
+ * - keyword: Full-text search only
22
+ * - semantic: Vector search only
23
+ */
24
+ export type SearchMode = "hybrid" | "keyword" | "semantic";
25
+
26
+ /**
27
+ * Options for search operations.
28
+ */
29
+ export interface SearchOptions {
30
+ /** Filter by folder name */
31
+ folder?: string;
32
+ /** Maximum number of results (default: 20) */
33
+ limit?: number;
34
+ /** Search mode: hybrid, keyword, or semantic (default: hybrid) */
35
+ mode?: SearchMode;
36
+ /** Include full content instead of preview (default: false) */
37
+ include_content?: boolean;
38
+ }
39
+
40
+ // SearchResult is imported from ../types/index.js
41
+ // RRF_K is imported from ../config/constants.js
42
+
43
+ /**
44
+ * Calculate RRF score for a result at a given rank.
45
+ * Formula: 1 / (k + rank)
46
+ * where k is a constant (typically 60) and rank is 0-indexed.
47
+ */
48
+ function rrfScore(rank: number): number {
49
+ return 1 / (RRF_K + rank);
50
+ }
51
+
52
+ /**
53
+ * Generate a preview of content (first N characters, default from PREVIEW_LENGTH constant).
54
+ */
55
+ function generatePreview(content: string, maxLength = PREVIEW_LENGTH): string {
56
+ if (!content) {
57
+ return "";
58
+ }
59
+
60
+ // Clean up whitespace
61
+ const cleaned = content.replace(/\s+/g, " ").trim();
62
+
63
+ if (cleaned.length <= maxLength) {
64
+ return cleaned;
65
+ }
66
+
67
+ // Truncate at word boundary
68
+ const truncated = cleaned.slice(0, maxLength);
69
+ const lastSpace = truncated.lastIndexOf(" ");
70
+
71
+ if (lastSpace > maxLength * PREVIEW_TRUNCATE_RATIO) {
72
+ return truncated.slice(0, lastSpace) + "...";
73
+ }
74
+
75
+ return truncated + "...";
76
+ }
77
+
78
+ /**
79
+ * Filter results by folder if specified.
80
+ */
81
+ function filterByFolder<T extends { folder: string }>(
82
+ results: T[],
83
+ folder?: string
84
+ ): T[] {
85
+ if (!folder) {
86
+ return results;
87
+ }
88
+
89
+ const normalizedFolder = folder.toLowerCase();
90
+ return results.filter(
91
+ (r) => r.folder.toLowerCase() === normalizedFolder
92
+ );
93
+ }
94
+
95
+ /**
96
+ * Perform vector-only search.
97
+ */
98
+ async function vectorSearch(
99
+ query: string,
100
+ limit: number,
101
+ folder?: string
102
+ ): Promise<DBSearchResult[]> {
103
+ debug(`Vector search: "${query}" (limit: ${limit})`);
104
+
105
+ const store = getVectorStore();
106
+ const queryVector = await getEmbedding(query);
107
+
108
+ // Fetch more results than needed if filtering by folder
109
+ const fetchLimit = folder ? limit * FOLDER_FILTER_MULTIPLIER : limit;
110
+ const results = await store.search(queryVector, fetchLimit);
111
+
112
+ const filtered = filterByFolder(results, folder);
113
+ return filtered.slice(0, limit);
114
+ }
115
+
116
+ /**
117
+ * Perform full-text search only.
118
+ */
119
+ async function keywordSearch(
120
+ query: string,
121
+ limit: number,
122
+ folder?: string
123
+ ): Promise<DBSearchResult[]> {
124
+ debug(`FTS search: "${query}" (limit: ${limit})`);
125
+
126
+ const store = getVectorStore();
127
+
128
+ // Fetch more results than needed if filtering by folder
129
+ const fetchLimit = folder ? limit * FOLDER_FILTER_MULTIPLIER : limit;
130
+ const results = await store.searchFTS(query, fetchLimit);
131
+
132
+ const filtered = filterByFolder(results, folder);
133
+ return filtered.slice(0, limit);
134
+ }
135
+
136
+ /**
137
+ * Perform hybrid search combining vector and FTS results using RRF.
138
+ */
139
+ async function hybridSearch(
140
+ query: string,
141
+ limit: number,
142
+ folder?: string
143
+ ): Promise<DBSearchResult[]> {
144
+ debug(`Hybrid search: "${query}" (limit: ${limit})`);
145
+
146
+ const store = getVectorStore();
147
+
148
+ // Fetch more results for RRF merging
149
+ const fetchLimit = Math.max(limit * 2, HYBRID_SEARCH_MIN_FETCH);
150
+
151
+ // Run both searches in parallel
152
+ const [queryVector, ftsResults] = await Promise.all([
153
+ getEmbedding(query),
154
+ store.searchFTS(query, fetchLimit).catch(() => [] as DBSearchResult[]),
155
+ ]);
156
+
157
+ const vectorResults = await store.search(queryVector, fetchLimit);
158
+
159
+ debug(`Vector results: ${vectorResults.length}, FTS results: ${ftsResults.length}`);
160
+
161
+ // Merge results using Reciprocal Rank Fusion
162
+ const scoreMap = new Map<string, number>();
163
+ const contentMap = new Map<string, DBSearchResult>();
164
+
165
+ // Process vector search results
166
+ vectorResults.forEach((item, rank) => {
167
+ const key = item.title;
168
+ scoreMap.set(key, (scoreMap.get(key) || 0) + rrfScore(rank));
169
+ contentMap.set(key, item);
170
+ });
171
+
172
+ // Process FTS results
173
+ ftsResults.forEach((item, rank) => {
174
+ const key = item.title;
175
+ scoreMap.set(key, (scoreMap.get(key) || 0) + rrfScore(rank));
176
+ if (!contentMap.has(key)) {
177
+ contentMap.set(key, item);
178
+ }
179
+ });
180
+
181
+ // Sort by combined RRF score
182
+ const merged = Array.from(scoreMap.entries())
183
+ .sort((a, b) => b[1] - a[1])
184
+ .map(([key, score]) => ({
185
+ ...contentMap.get(key)!,
186
+ score,
187
+ }));
188
+
189
+ // Apply folder filter and limit
190
+ const filtered = filterByFolder(merged, folder);
191
+ return filtered.slice(0, limit);
192
+ }
193
+
194
+ /**
195
+ * Search notes with configurable search mode.
196
+ *
197
+ * @param query - Search query string
198
+ * @param options - Search configuration options
199
+ * @returns Array of search results sorted by relevance
200
+ *
201
+ * @example
202
+ * ```typescript
203
+ * // Basic hybrid search
204
+ * const results = await searchNotes("project ideas");
205
+ *
206
+ * // Keyword-only search in specific folder
207
+ * const results = await searchNotes("meeting", {
208
+ * mode: "keyword",
209
+ * folder: "Work",
210
+ * limit: 10,
211
+ * });
212
+ *
213
+ * // Semantic search with full content
214
+ * const results = await searchNotes("concepts similar to machine learning", {
215
+ * mode: "semantic",
216
+ * include_content: true,
217
+ * });
218
+ * ```
219
+ */
220
+ export async function searchNotes(
221
+ query: string,
222
+ options: SearchOptions = {}
223
+ ): Promise<SearchResult[]> {
224
+ const {
225
+ folder,
226
+ limit = DEFAULT_SEARCH_LIMIT,
227
+ mode = "hybrid",
228
+ include_content = false,
229
+ } = options;
230
+
231
+ if (!query || query.trim().length === 0) {
232
+ debug("Empty query, returning empty results");
233
+ return [];
234
+ }
235
+
236
+ const trimmedQuery = query.trim();
237
+
238
+ debug(`searchNotes: "${trimmedQuery}" mode=${mode} folder=${folder || "all"} limit=${limit}`);
239
+
240
+ let dbResults: DBSearchResult[];
241
+
242
+ switch (mode) {
243
+ case "keyword":
244
+ dbResults = await keywordSearch(trimmedQuery, limit, folder);
245
+ break;
246
+
247
+ case "semantic":
248
+ dbResults = await vectorSearch(trimmedQuery, limit, folder);
249
+ break;
250
+
251
+ case "hybrid":
252
+ default:
253
+ dbResults = await hybridSearch(trimmedQuery, limit, folder);
254
+ break;
255
+ }
256
+
257
+ // Transform to SearchResult format
258
+ const results: SearchResult[] = dbResults.map((r) => {
259
+ const result: SearchResult = {
260
+ title: r.title,
261
+ folder: r.folder,
262
+ preview: generatePreview(r.content),
263
+ modified: r.modified,
264
+ score: r.score,
265
+ };
266
+
267
+ if (include_content) {
268
+ result.content = r.content;
269
+ }
270
+
271
+ return result;
272
+ });
273
+
274
+ debug(`Returning ${results.length} results`);
275
+ return results;
276
+ }
277
+
278
+ // Re-export types for convenience
279
+ export type { SearchMode as Mode };
280
+ export type { SearchResult } from "../types/index.js";
281
+
282
+ // Export utility functions for testing
283
+ export { rrfScore, generatePreview, filterByFolder };
@@ -0,0 +1,42 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { extractTitleFromKey } from "./indexer.js";
3
+
4
+ describe("extractTitleFromKey", () => {
5
+ it("extracts title from simple folder/title key", () => {
6
+ expect(extractTitleFromKey("Work/My Note")).toBe("My Note");
7
+ });
8
+
9
+ it("extracts title from nested folder key", () => {
10
+ expect(extractTitleFromKey("Work/Projects/My Note")).toBe("My Note");
11
+ });
12
+
13
+ it("extracts title from deeply nested folder key", () => {
14
+ expect(extractTitleFromKey("Personal/Archive/2024/January/Meeting Notes")).toBe("Meeting Notes");
15
+ });
16
+
17
+ it("handles single segment (no folder)", () => {
18
+ expect(extractTitleFromKey("My Note")).toBe("My Note");
19
+ });
20
+ });
21
+
22
+ describe("delete key parsing", () => {
23
+ it("should correctly parse folder and title from key", () => {
24
+ const key = "Work/Projects/My Note";
25
+ const lastSlash = key.lastIndexOf("/");
26
+ const folder = key.substring(0, lastSlash);
27
+ const title = key.substring(lastSlash + 1);
28
+
29
+ expect(folder).toBe("Work/Projects");
30
+ expect(title).toBe("My Note");
31
+ });
32
+
33
+ it("should handle simple folder/title", () => {
34
+ const key = "Personal/My Note";
35
+ const lastSlash = key.lastIndexOf("/");
36
+ const folder = key.substring(0, lastSlash);
37
+ const title = key.substring(lastSlash + 1);
38
+
39
+ expect(folder).toBe("Personal");
40
+ expect(title).toBe("My Note");
41
+ });
42
+ });
@@ -0,0 +1,335 @@
1
+ /**
2
+ * Note indexing pipeline for semantic search.
3
+ *
4
+ * Supports:
5
+ * - Full reindexing (drop all, reindex everything)
6
+ * - Incremental indexing (only changed notes)
7
+ * - Single note reindexing
8
+ */
9
+
10
+ import { getEmbedding } from "../embeddings/index.js";
11
+ import { getVectorStore, type NoteRecord } from "../db/lancedb.js";
12
+ import { getAllNotes, getNoteByFolderAndTitle, getNoteByTitle, type NoteInfo } from "../notes/read.js";
13
+ import { createDebugLogger } from "../utils/debug.js";
14
+ import { truncateForEmbedding } from "../utils/text.js";
15
+ import { EMBEDDING_DELAY_MS } from "../config/constants.js";
16
+
17
+ /**
18
+ * Extract note title from folder/title key.
19
+ * Handles nested folders correctly by taking the last segment.
20
+ */
21
+ export function extractTitleFromKey(key: string): string {
22
+ const parts = key.split("/");
23
+ return parts[parts.length - 1];
24
+ }
25
+
26
+ // Debug logging
27
+ const debug = createDebugLogger("INDEX");
28
+
29
+ /**
30
+ * Result of an indexing operation.
31
+ */
32
+ export interface IndexResult {
33
+ /** Total notes processed */
34
+ total: number;
35
+ /** Notes successfully indexed */
36
+ indexed: number;
37
+ /** Notes that failed to index */
38
+ errors: number;
39
+ /** Time taken in milliseconds */
40
+ timeMs: number;
41
+ /** Breakdown for incremental indexing */
42
+ breakdown?: {
43
+ added: number;
44
+ updated: number;
45
+ deleted: number;
46
+ skipped: number;
47
+ };
48
+ /** List of notes that failed to index (for debugging) */
49
+ failedNotes?: string[];
50
+ }
51
+
52
+ /**
53
+ * Sleep for a specified duration.
54
+ */
55
+ function sleep(ms: number): Promise<void> {
56
+ return new Promise((resolve) => setTimeout(resolve, ms));
57
+ }
58
+
59
+ /**
60
+ * Perform full reindexing of all notes.
61
+ * Drops existing index and rebuilds from scratch.
62
+ */
63
+ export async function fullIndex(): Promise<IndexResult> {
64
+ const startTime = Date.now();
65
+ debug("Starting full index...");
66
+
67
+ // Get all notes from Apple Notes
68
+ const notes = await getAllNotes();
69
+ debug(`Found ${notes.length} notes in Apple Notes`);
70
+
71
+ const records: NoteRecord[] = [];
72
+ let errors = 0;
73
+ const failedNotes: string[] = [];
74
+
75
+ for (let i = 0; i < notes.length; i++) {
76
+ const noteInfo = notes[i];
77
+ debug(`Processing ${i + 1}/${notes.length}: ${noteInfo.title}`);
78
+
79
+ try {
80
+ // Get full note content using folder and title separately
81
+ // to handle notes with "/" in their titles
82
+ const noteDetails = await getNoteByFolderAndTitle(noteInfo.folder, noteInfo.title);
83
+ if (!noteDetails) {
84
+ debug(`Could not fetch note: ${noteInfo.title}`);
85
+ failedNotes.push(`${noteInfo.folder}/${noteInfo.title}`);
86
+ errors++;
87
+ continue;
88
+ }
89
+
90
+ // Skip empty notes
91
+ if (!noteDetails.content.trim()) {
92
+ debug(`Skipping empty note: ${noteInfo.title}`);
93
+ continue;
94
+ }
95
+
96
+ // Generate embedding
97
+ const content = truncateForEmbedding(noteDetails.content);
98
+ const vector = await getEmbedding(content);
99
+
100
+ const record: NoteRecord = {
101
+ title: noteDetails.title,
102
+ content: noteDetails.content,
103
+ vector,
104
+ folder: noteDetails.folder,
105
+ created: noteDetails.created,
106
+ modified: noteDetails.modified,
107
+ indexed_at: new Date().toISOString(),
108
+ };
109
+
110
+ records.push(record);
111
+
112
+ // Delay to avoid rate limiting
113
+ if (i < notes.length - 1) {
114
+ await sleep(EMBEDDING_DELAY_MS);
115
+ }
116
+ } catch (error) {
117
+ debug(`Error processing ${noteInfo.title}:`, error);
118
+ failedNotes.push(`${noteInfo.folder}/${noteInfo.title}`);
119
+ errors++;
120
+ }
121
+ }
122
+
123
+ // Store all records in vector database
124
+ const store = getVectorStore();
125
+ await store.index(records);
126
+
127
+ const timeMs = Date.now() - startTime;
128
+ debug(`Full index complete: ${records.length} indexed, ${errors} errors, ${timeMs}ms`);
129
+
130
+ return {
131
+ total: notes.length,
132
+ indexed: records.length,
133
+ errors,
134
+ timeMs,
135
+ failedNotes: failedNotes.length > 0 ? failedNotes : undefined,
136
+ };
137
+ }
138
+
139
+ /**
140
+ * Perform incremental indexing.
141
+ * Only processes notes that have changed since last index.
142
+ */
143
+ export async function incrementalIndex(): Promise<IndexResult> {
144
+ const startTime = Date.now();
145
+ debug("Starting incremental index...");
146
+
147
+ const store = getVectorStore();
148
+
149
+ // Get all notes from Apple Notes
150
+ const currentNotes = await getAllNotes();
151
+ debug(`Found ${currentNotes.length} notes in Apple Notes`);
152
+
153
+ // Get existing indexed notes
154
+ let existingRecords: NoteRecord[];
155
+ try {
156
+ existingRecords = await store.getAll();
157
+ } catch (error) {
158
+ // No existing index, fall back to full index
159
+ debug("No existing index found, performing full index. Error:", error);
160
+ return fullIndex();
161
+ }
162
+
163
+ // Build lookup maps
164
+ const existingByTitle = new Map<string, NoteRecord>();
165
+ for (const record of existingRecords) {
166
+ const key = `${record.folder}/${record.title}`;
167
+ existingByTitle.set(key, record);
168
+ }
169
+
170
+ const currentByTitle = new Map<string, NoteInfo>();
171
+ for (const note of currentNotes) {
172
+ const key = `${note.folder}/${note.title}`;
173
+ currentByTitle.set(key, note);
174
+ }
175
+
176
+ // Determine what needs to be done
177
+ const toAdd: NoteInfo[] = [];
178
+ const toUpdate: NoteInfo[] = [];
179
+ const toDelete: string[] = [];
180
+ const toSkip: string[] = [];
181
+
182
+ // Check current notes
183
+ for (const note of currentNotes) {
184
+ const key = `${note.folder}/${note.title}`;
185
+ const existing = existingByTitle.get(key);
186
+
187
+ if (!existing) {
188
+ toAdd.push(note);
189
+ } else {
190
+ // Compare modified date with indexed_at timestamp
191
+ const currentModified = new Date(note.modified).getTime();
192
+ const indexedAt = new Date(existing.indexed_at).getTime();
193
+
194
+ if (currentModified > indexedAt) {
195
+ toUpdate.push(note);
196
+ } else {
197
+ toSkip.push(key);
198
+ }
199
+ }
200
+ }
201
+
202
+ // Check for deleted notes
203
+ for (const [key] of existingByTitle) {
204
+ if (!currentByTitle.has(key)) {
205
+ toDelete.push(key);
206
+ }
207
+ }
208
+
209
+ debug(`Incremental: add=${toAdd.length}, update=${toUpdate.length}, delete=${toDelete.length}, skip=${toSkip.length}`);
210
+
211
+ let errors = 0;
212
+ const failedNotes: string[] = [];
213
+
214
+ // Process additions and updates
215
+ const toProcess = [...toAdd, ...toUpdate];
216
+ for (let i = 0; i < toProcess.length; i++) {
217
+ const noteInfo = toProcess[i];
218
+ debug(`Processing ${i + 1}/${toProcess.length}: ${noteInfo.title}`);
219
+
220
+ try {
221
+ // Use folder and title separately to handle "/" in titles
222
+ const noteDetails = await getNoteByFolderAndTitle(noteInfo.folder, noteInfo.title);
223
+ if (!noteDetails) {
224
+ failedNotes.push(`${noteInfo.folder}/${noteInfo.title}`);
225
+ errors++;
226
+ continue;
227
+ }
228
+
229
+ if (!noteDetails.content.trim()) {
230
+ continue;
231
+ }
232
+
233
+ const content = truncateForEmbedding(noteDetails.content);
234
+ const vector = await getEmbedding(content);
235
+
236
+ const record: NoteRecord = {
237
+ title: noteDetails.title,
238
+ content: noteDetails.content,
239
+ vector,
240
+ folder: noteDetails.folder,
241
+ created: noteDetails.created,
242
+ modified: noteDetails.modified,
243
+ indexed_at: new Date().toISOString(),
244
+ };
245
+
246
+ await store.update(record);
247
+
248
+ if (i < toProcess.length - 1) {
249
+ await sleep(EMBEDDING_DELAY_MS);
250
+ }
251
+ } catch (error) {
252
+ debug(`Error processing ${noteInfo.title}:`, error);
253
+ failedNotes.push(`${noteInfo.folder}/${noteInfo.title}`);
254
+ errors++;
255
+ }
256
+ }
257
+
258
+ // Process deletions
259
+ for (const key of toDelete) {
260
+ try {
261
+ // Parse folder and title from key (e.g., "Work/Projects/My Note")
262
+ const lastSlash = key.lastIndexOf("/");
263
+ const folder = key.substring(0, lastSlash);
264
+ const title = key.substring(lastSlash + 1);
265
+ await store.deleteByFolderAndTitle(folder, title);
266
+ } catch (error) {
267
+ debug(`Error deleting ${key}:`, error);
268
+ failedNotes.push(`DELETE: ${key}`);
269
+ errors++;
270
+ }
271
+ }
272
+
273
+ const timeMs = Date.now() - startTime;
274
+ debug(`Incremental index complete: ${timeMs}ms`);
275
+
276
+ return {
277
+ total: currentNotes.length,
278
+ indexed: toAdd.length + toUpdate.length,
279
+ errors,
280
+ timeMs,
281
+ breakdown: {
282
+ added: toAdd.length,
283
+ updated: toUpdate.length,
284
+ deleted: toDelete.length,
285
+ skipped: toSkip.length,
286
+ },
287
+ failedNotes: failedNotes.length > 0 ? failedNotes : undefined,
288
+ };
289
+ }
290
+
291
+ /**
292
+ * Reindex a single note by title.
293
+ */
294
+ export async function reindexNote(title: string): Promise<void> {
295
+ debug(`Reindexing single note: ${title}`);
296
+
297
+ const noteDetails = await getNoteByTitle(title);
298
+ if (!noteDetails) {
299
+ throw new Error(`Note not found: "${title}"`);
300
+ }
301
+
302
+ if (!noteDetails.content.trim()) {
303
+ throw new Error(`Note is empty: "${title}"`);
304
+ }
305
+
306
+ const content = truncateForEmbedding(noteDetails.content);
307
+ const vector = await getEmbedding(content);
308
+
309
+ const record: NoteRecord = {
310
+ title: noteDetails.title,
311
+ content: noteDetails.content,
312
+ vector,
313
+ folder: noteDetails.folder,
314
+ created: noteDetails.created,
315
+ modified: noteDetails.modified,
316
+ indexed_at: new Date().toISOString(),
317
+ };
318
+
319
+ const store = getVectorStore();
320
+ await store.update(record);
321
+
322
+ debug(`Reindexed: ${title}`);
323
+ }
324
+
325
+ /**
326
+ * Index notes based on mode.
327
+ */
328
+ export async function indexNotes(
329
+ mode: "full" | "incremental" = "incremental"
330
+ ): Promise<IndexResult> {
331
+ if (mode === "full") {
332
+ return fullIndex();
333
+ }
334
+ return incrementalIndex();
335
+ }