@disco_trooper/apple-notes-mcp 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +136 -24
  2. package/package.json +13 -9
  3. package/src/config/claude.test.ts +47 -0
  4. package/src/config/claude.ts +106 -0
  5. package/src/config/constants.ts +11 -2
  6. package/src/config/paths.test.ts +40 -0
  7. package/src/config/paths.ts +86 -0
  8. package/src/db/arrow-fix.test.ts +101 -0
  9. package/src/db/lancedb.test.ts +209 -2
  10. package/src/db/lancedb.ts +373 -7
  11. package/src/embeddings/cache.test.ts +150 -0
  12. package/src/embeddings/cache.ts +204 -0
  13. package/src/embeddings/index.ts +21 -2
  14. package/src/embeddings/local.ts +61 -10
  15. package/src/embeddings/openrouter.ts +233 -11
  16. package/src/graph/export.test.ts +81 -0
  17. package/src/graph/export.ts +163 -0
  18. package/src/graph/extract.test.ts +90 -0
  19. package/src/graph/extract.ts +52 -0
  20. package/src/graph/queries.test.ts +156 -0
  21. package/src/graph/queries.ts +224 -0
  22. package/src/index.ts +376 -10
  23. package/src/notes/crud.test.ts +148 -3
  24. package/src/notes/crud.ts +250 -5
  25. package/src/notes/read.ts +83 -68
  26. package/src/search/chunk-indexer.test.ts +353 -0
  27. package/src/search/chunk-indexer.ts +254 -0
  28. package/src/search/chunk-search.test.ts +327 -0
  29. package/src/search/chunk-search.ts +298 -0
  30. package/src/search/indexer.ts +151 -109
  31. package/src/search/refresh.test.ts +173 -0
  32. package/src/search/refresh.ts +151 -0
  33. package/src/setup.ts +46 -67
  34. package/src/utils/chunker.test.ts +182 -0
  35. package/src/utils/chunker.ts +170 -0
  36. package/src/utils/content-filter.test.ts +225 -0
  37. package/src/utils/content-filter.ts +275 -0
  38. package/src/utils/runtime.test.ts +70 -0
  39. package/src/utils/runtime.ts +40 -0
@@ -0,0 +1,204 @@
1
+ /**
2
+ * LRU Cache for query embeddings.
3
+ * Dramatically speeds up hybrid search by caching repeated queries.
4
+ */
5
+
6
+ import { createDebugLogger } from "../utils/debug.js";
7
+
8
+ const debug = createDebugLogger("EMBED_CACHE");
9
+
10
+ /**
11
+ * Simple LRU Cache implementation for embeddings.
12
+ */
13
+ class LRUCache<K, V> {
14
+ private cache = new Map<K, V>();
15
+ private readonly maxSize: number;
16
+
17
+ constructor(maxSize: number) {
18
+ this.maxSize = maxSize;
19
+ }
20
+
21
+ get(key: K): V | undefined {
22
+ const value = this.cache.get(key);
23
+ if (value !== undefined) {
24
+ // Move to end (most recently used)
25
+ this.cache.delete(key);
26
+ this.cache.set(key, value);
27
+ }
28
+ return value;
29
+ }
30
+
31
+ set(key: K, value: V): void {
32
+ // Delete if exists (to update position)
33
+ if (this.cache.has(key)) {
34
+ this.cache.delete(key);
35
+ }
36
+ // Evict oldest if at capacity
37
+ else if (this.cache.size >= this.maxSize) {
38
+ const firstKey = this.cache.keys().next().value;
39
+ if (firstKey !== undefined) {
40
+ this.cache.delete(firstKey);
41
+ }
42
+ }
43
+ this.cache.set(key, value);
44
+ }
45
+
46
+ has(key: K): boolean {
47
+ return this.cache.has(key);
48
+ }
49
+
50
+ clear(): void {
51
+ this.cache.clear();
52
+ }
53
+
54
+ get size(): number {
55
+ return this.cache.size;
56
+ }
57
+ }
58
+
59
+ /**
60
+ * Normalize query for better cache hit rate.
61
+ * - Lowercase
62
+ * - Trim whitespace
63
+ * - Collapse multiple spaces
64
+ */
65
+ function normalizeQuery(query: string): string {
66
+ return query.toLowerCase().trim().replace(/\s+/g, " ");
67
+ }
68
+
69
+ /**
70
+ * Cache statistics for monitoring.
71
+ */
72
+ export interface CacheStats {
73
+ hits: number;
74
+ misses: number;
75
+ size: number;
76
+ hitRate: number;
77
+ }
78
+
79
+ /**
80
+ * Embedding cache with LRU eviction.
81
+ */
82
+ class EmbeddingCache {
83
+ private cache: LRUCache<string, number[]>;
84
+ private modelVersion: string;
85
+ private hits = 0;
86
+ private misses = 0;
87
+
88
+ constructor(maxSize = 1000, modelVersion = "default") {
89
+ this.cache = new LRUCache(maxSize);
90
+ this.modelVersion = modelVersion;
91
+ debug(`Embedding cache initialized (max: ${maxSize})`);
92
+ }
93
+
94
+ /**
95
+ * Create cache key from query and model version.
96
+ */
97
+ private makeKey(query: string): string {
98
+ const normalized = normalizeQuery(query);
99
+ return `${this.modelVersion}:${normalized}`;
100
+ }
101
+
102
+ /**
103
+ * Get cached embedding for query.
104
+ * Returns undefined if not cached.
105
+ */
106
+ get(query: string): number[] | undefined {
107
+ const key = this.makeKey(query);
108
+ const cached = this.cache.get(key);
109
+
110
+ if (cached) {
111
+ this.hits++;
112
+ debug(`Cache HIT for "${query.slice(0, 30)}..." (hits: ${this.hits})`);
113
+ return cached;
114
+ }
115
+
116
+ this.misses++;
117
+ return undefined;
118
+ }
119
+
120
+ /**
121
+ * Store embedding in cache.
122
+ */
123
+ set(query: string, embedding: number[]): void {
124
+ const key = this.makeKey(query);
125
+ this.cache.set(key, embedding);
126
+ debug(`Cached embedding for "${query.slice(0, 30)}..." (size: ${this.cache.size})`);
127
+ }
128
+
129
+ /**
130
+ * Get or compute embedding using provided function.
131
+ * This is the main API for cached embedding retrieval.
132
+ */
133
+ async getOrCompute(
134
+ query: string,
135
+ computeFn: (q: string) => Promise<number[]>
136
+ ): Promise<number[]> {
137
+ const cached = this.get(query);
138
+ if (cached) {
139
+ return cached;
140
+ }
141
+
142
+ const embedding = await computeFn(query);
143
+ this.set(query, embedding);
144
+ return embedding;
145
+ }
146
+
147
+ /**
148
+ * Invalidate cache (e.g., when model changes).
149
+ */
150
+ clear(): void {
151
+ this.cache.clear();
152
+ this.hits = 0;
153
+ this.misses = 0;
154
+ debug("Cache cleared");
155
+ }
156
+
157
+ /**
158
+ * Update model version and clear cache.
159
+ */
160
+ setModelVersion(version: string): void {
161
+ if (version !== this.modelVersion) {
162
+ debug(`Model version changed: ${this.modelVersion} -> ${version}`);
163
+ this.modelVersion = version;
164
+ this.clear();
165
+ }
166
+ }
167
+
168
+ /**
169
+ * Get cache statistics.
170
+ */
171
+ getStats(): CacheStats {
172
+ const total = this.hits + this.misses;
173
+ return {
174
+ hits: this.hits,
175
+ misses: this.misses,
176
+ size: this.cache.size,
177
+ hitRate: total > 0 ? this.hits / total : 0,
178
+ };
179
+ }
180
+ }
181
+
182
+ // Singleton instance
183
+ let cacheInstance: EmbeddingCache | null = null;
184
+
185
+ /**
186
+ * Get the embedding cache singleton.
187
+ */
188
+ export function getEmbeddingCache(): EmbeddingCache {
189
+ if (!cacheInstance) {
190
+ // Max 1000 queries * ~1.5KB per embedding = ~1.5MB
191
+ cacheInstance = new EmbeddingCache(1000);
192
+ }
193
+ return cacheInstance;
194
+ }
195
+
196
+ /**
197
+ * Reset the cache (useful for testing).
198
+ */
199
+ export function resetEmbeddingCache(): void {
200
+ if (cacheInstance) {
201
+ cacheInstance.clear();
202
+ }
203
+ cacheInstance = null;
204
+ }
@@ -6,8 +6,8 @@
6
6
  * - Local HuggingFace (fallback)
7
7
  */
8
8
 
9
- import { getOpenRouterEmbedding, getOpenRouterDimensions } from "./openrouter.js";
10
- import { getLocalEmbedding, getLocalDimensions, getLocalModelName } from "./local.js";
9
+ import { getOpenRouterEmbedding, getOpenRouterDimensions, getOpenRouterEmbeddingBatch } from "./openrouter.js";
10
+ import { getLocalEmbedding, getLocalDimensions, getLocalModelName, getLocalEmbeddingBatch } from "./local.js";
11
11
  import { createDebugLogger } from "../utils/debug.js";
12
12
 
13
13
  // Debug logging
@@ -62,6 +62,23 @@ export async function getEmbedding(text: string): Promise<number[]> {
62
62
  }
63
63
  }
64
64
 
65
+ /**
66
+ * Generate embeddings for multiple texts in batch.
67
+ * Uses native batch API for both OpenRouter and local providers.
68
+ *
69
+ * @param texts - Array of texts to embed
70
+ * @returns Promise resolving to array of embedding vectors
71
+ */
72
+ export async function getEmbeddingBatch(texts: string[]): Promise<number[][]> {
73
+ const provider = getProvider();
74
+
75
+ if (provider === "openrouter") {
76
+ return getOpenRouterEmbeddingBatch(texts);
77
+ } else {
78
+ return getLocalEmbeddingBatch(texts);
79
+ }
80
+ }
81
+
65
82
  /**
66
83
  * Get the embedding dimensions for the current provider.
67
84
  *
@@ -100,10 +117,12 @@ export function getProviderDescription(): string {
100
117
  export {
101
118
  getOpenRouterEmbedding,
102
119
  getOpenRouterDimensions,
120
+ getOpenRouterEmbeddingBatch,
103
121
  } from "./openrouter.js";
104
122
 
105
123
  export {
106
124
  getLocalEmbedding,
125
+ getLocalEmbeddingBatch,
107
126
  getLocalDimensions,
108
127
  getLocalModelName,
109
128
  isModelLoaded,
@@ -25,7 +25,7 @@ const debug = createDebugLogger("LOCAL");
25
25
 
26
26
  // Lazy-loaded pipeline
27
27
  type FeatureExtractionPipeline = (
28
- text: string,
28
+ text: string | string[],
29
29
  options?: { pooling?: string; normalize?: boolean }
30
30
  ) => Promise<{ tolist: () => number[][] }>;
31
31
 
@@ -40,6 +40,27 @@ function getModelName(): string {
40
40
  return process.env.EMBEDDING_MODEL || DEFAULT_MODEL;
41
41
  }
42
42
 
43
+ /**
44
+ * Check if the model is an E5 model that requires prefixed input.
45
+ */
46
+ function isE5Model(): boolean {
47
+ return getModelName().toLowerCase().includes("e5");
48
+ }
49
+
50
+ /**
51
+ * Prepare text for embedding by adding E5 prefix if needed.
52
+ */
53
+ function prepareText(text: string): string {
54
+ return isE5Model() ? `passage: ${text}` : text;
55
+ }
56
+
57
+ /**
58
+ * Prepare multiple texts for embedding by adding E5 prefix if needed.
59
+ */
60
+ function prepareTexts(texts: string[]): string[] {
61
+ return isE5Model() ? texts.map(t => `passage: ${t}`) : texts;
62
+ }
63
+
43
64
  /**
44
65
  * Lazy-load the HuggingFace transformers pipeline.
45
66
  * Only loads once, subsequent calls return the cached instance.
@@ -116,19 +137,11 @@ export async function getLocalEmbedding(text: string): Promise<number[]> {
116
137
  const startTime = Date.now();
117
138
 
118
139
  try {
119
- // For e5 models, prepend "passage: " for document embedding
120
- // or "query: " for search queries - using passage for general text
121
- const modelName = getModelName();
122
- const isE5Model = modelName.toLowerCase().includes("e5");
123
- const inputText = isE5Model ? `passage: ${text}` : text;
124
-
125
- // Run inference with mean pooling and normalization
126
- const output = await pipe(inputText, {
140
+ const output = await pipe(prepareText(text), {
127
141
  pooling: "mean",
128
142
  normalize: true,
129
143
  });
130
144
 
131
- // Extract the embedding vector
132
145
  const embedding = output.tolist()[0];
133
146
 
134
147
  const inferenceTime = Date.now() - startTime;
@@ -178,3 +191,41 @@ export function getLocalModelName(): string {
178
191
  export function isModelLoaded(): boolean {
179
192
  return pipelineInstance !== null;
180
193
  }
194
+
195
+ /**
196
+ * Generate embeddings for multiple texts in a single batch call.
197
+ * More efficient than calling getLocalEmbedding for each text individually.
198
+ *
199
+ * @param texts - Array of texts to embed
200
+ * @returns Promise resolving to array of embedding vectors
201
+ * @throws Error if model loading or inference fails
202
+ */
203
+ export async function getLocalEmbeddingBatch(texts: string[]): Promise<number[][]> {
204
+ if (!texts || texts.length === 0) {
205
+ return [];
206
+ }
207
+
208
+ const pipe = await getPipeline();
209
+
210
+ debug(`Generating batch embeddings for ${texts.length} texts`);
211
+ const startTime = Date.now();
212
+
213
+ try {
214
+ const output = await pipe(prepareTexts(texts), {
215
+ pooling: "mean",
216
+ normalize: true,
217
+ });
218
+
219
+ const embeddings = output.tolist() as number[][];
220
+
221
+ const inferenceTime = Date.now() - startTime;
222
+ debug(`Batch embeddings generated in ${inferenceTime}ms (${embeddings.length} vectors, ${embeddings[0]?.length ?? 0} dims)`);
223
+
224
+ return embeddings;
225
+ } catch (error) {
226
+ const message = error instanceof Error ? error.message : String(error);
227
+ debug(`Batch embedding generation failed: ${message}`);
228
+
229
+ throw new Error(`Failed to generate batch embeddings: ${message}`);
230
+ }
231
+ }
@@ -108,6 +108,27 @@ class OpenRouterError extends Error {
108
108
  }
109
109
  }
110
110
 
111
+ /** HTTP status codes that should not be retried */
112
+ const NON_RETRYABLE_STATUS_CODES = [400, 401, 403, 404];
113
+
114
+ /** Common headers for OpenRouter API requests */
115
+ const API_HEADERS = {
116
+ "Content-Type": "application/json",
117
+ "HTTP-Referer": "https://github.com/apple-notes-mcp",
118
+ "X-Title": "Apple Notes MCP",
119
+ } as const;
120
+
121
+ /**
122
+ * Check if an error should trigger a retry or fail immediately.
123
+ * Returns true if the error is non-retryable.
124
+ */
125
+ function isNonRetryableError(error: unknown): boolean {
126
+ if (error instanceof OpenRouterError && error.statusCode) {
127
+ return NON_RETRYABLE_STATUS_CODES.includes(error.statusCode);
128
+ }
129
+ return false;
130
+ }
131
+
111
132
  /**
112
133
  * Get embedding vector for text using OpenRouter API
113
134
  *
@@ -157,9 +178,7 @@ export async function getOpenRouterEmbedding(text: string): Promise<number[]> {
157
178
  method: "POST",
158
179
  headers: {
159
180
  Authorization: `Bearer ${OPENROUTER_API_KEY}`,
160
- "Content-Type": "application/json",
161
- "HTTP-Referer": "https://github.com/apple-notes-mcp",
162
- "X-Title": "Apple Notes MCP",
181
+ ...API_HEADERS,
163
182
  },
164
183
  body: JSON.stringify({
165
184
  model: EMBEDDING_MODEL,
@@ -224,17 +243,12 @@ export async function getOpenRouterEmbedding(text: string): Promise<number[]> {
224
243
  `Request timed out after ${OPENROUTER_TIMEOUT_MS}ms`,
225
244
  408
226
245
  );
227
- // Don't throw - fall through to retry logic below
228
246
  } else {
229
247
  lastError = error instanceof Error ? error : new Error(String(error));
230
248
 
231
- // Don't retry on non-retryable errors
232
- if (error instanceof OpenRouterError && error.statusCode) {
233
- const nonRetryable = [400, 401, 403, 404];
234
- if (nonRetryable.includes(error.statusCode)) {
235
- debug(`Non-retryable error (${error.statusCode}), failing immediately`);
236
- throw error;
237
- }
249
+ if (isNonRetryableError(error)) {
250
+ debug(`Non-retryable error, failing immediately`);
251
+ throw error;
238
252
  }
239
253
  }
240
254
 
@@ -283,3 +297,211 @@ export function clearEmbeddingCache(): void {
283
297
  export function getEmbeddingCacheSize(): number {
284
298
  return embeddingCache.size;
285
299
  }
300
+
301
+ /**
302
+ * Batch size for embedding requests.
303
+ * OpenRouter supports up to 2048 inputs per request, but 50-100 is optimal.
304
+ */
305
+ const BATCH_SIZE = 50;
306
+
307
+ /**
308
+ * Number of concurrent batch API calls.
309
+ * Higher values increase throughput but may hit rate limits.
310
+ */
311
+ const CONCURRENT_BATCHES = 3;
312
+
313
+ /**
314
+ * Split an array into chunks of specified size.
315
+ */
316
+ function chunk<T>(array: T[], size: number): T[][] {
317
+ const chunks: T[][] = [];
318
+ for (let i = 0; i < array.length; i += size) {
319
+ chunks.push(array.slice(i, i + size));
320
+ }
321
+ return chunks;
322
+ }
323
+
324
+ /**
325
+ * Process a single batch of texts and return embeddings.
326
+ * Internal helper for concurrent batch processing.
327
+ */
328
+ async function processSingleBatch(
329
+ batchTexts: string[],
330
+ batchIndices: number[],
331
+ cacheKeys: string[],
332
+ results: (number[] | null)[],
333
+ batchNumber: number,
334
+ totalBatches: number
335
+ ): Promise<void> {
336
+ debug(`Processing batch ${batchNumber}/${totalBatches} (${batchTexts.length} texts)`);
337
+
338
+ let lastError: Error | null = null;
339
+
340
+ for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
341
+ const controller = new AbortController();
342
+ const timeoutId = setTimeout(() => controller.abort(), OPENROUTER_TIMEOUT_MS * 2);
343
+
344
+ try {
345
+ const response = await fetch(API_URL, {
346
+ method: "POST",
347
+ headers: {
348
+ Authorization: `Bearer ${OPENROUTER_API_KEY}`,
349
+ ...API_HEADERS,
350
+ },
351
+ body: JSON.stringify({
352
+ model: EMBEDDING_MODEL,
353
+ input: batchTexts,
354
+ dimensions: EMBEDDING_DIMS,
355
+ }),
356
+ signal: controller.signal,
357
+ });
358
+
359
+ if (response.status === 429) {
360
+ clearTimeout(timeoutId);
361
+ const waitTime = getBackoffDelay(attempt, RATE_LIMIT_BACKOFF_BASE_MS);
362
+ debug(`Batch ${batchNumber}: Rate limited (429), waiting ${waitTime}ms`);
363
+ await sleep(waitTime);
364
+ continue;
365
+ }
366
+
367
+ if (!response.ok) {
368
+ const errorBody = await response.text();
369
+ throw new OpenRouterError(
370
+ `OpenRouter API error: ${response.status} - ${errorBody}`,
371
+ response.status,
372
+ errorBody
373
+ );
374
+ }
375
+
376
+ const data = await response.json() as {
377
+ data?: Array<{ embedding?: number[]; index?: number }>;
378
+ };
379
+
380
+ if (!data?.data || data.data.length !== batchTexts.length) {
381
+ throw new OpenRouterError(
382
+ `Invalid API response: expected ${batchTexts.length} embeddings, got ${data?.data?.length ?? 0}`,
383
+ response.status,
384
+ JSON.stringify(data)
385
+ );
386
+ }
387
+
388
+ // Store results and cache them
389
+ for (let j = 0; j < data.data.length; j++) {
390
+ const embedding = data.data[j].embedding;
391
+ if (!embedding) {
392
+ throw new OpenRouterError(
393
+ `Missing embedding at index ${j}`,
394
+ response.status,
395
+ JSON.stringify(data)
396
+ );
397
+ }
398
+
399
+ results[batchIndices[j]] = embedding;
400
+ embeddingCache.set(cacheKeys[batchIndices[j]], embedding);
401
+ }
402
+
403
+ return; // Success
404
+
405
+ } catch (error) {
406
+ if (error instanceof Error && error.name === "AbortError") {
407
+ lastError = new OpenRouterError(
408
+ `Batch request timed out after ${OPENROUTER_TIMEOUT_MS * 2}ms`,
409
+ 408
410
+ );
411
+ } else {
412
+ lastError = error instanceof Error ? error : new Error(String(error));
413
+
414
+ if (isNonRetryableError(error)) {
415
+ throw error;
416
+ }
417
+ }
418
+
419
+ if (attempt < MAX_RETRIES - 1) {
420
+ const waitTime = getBackoffDelay(attempt);
421
+ debug(`Batch ${batchNumber} error: ${lastError.message}, retrying in ${waitTime}ms`);
422
+ await sleep(waitTime);
423
+ }
424
+ } finally {
425
+ clearTimeout(timeoutId);
426
+ }
427
+ }
428
+
429
+ throw new OpenRouterError(
430
+ `Failed to get batch ${batchNumber} embeddings after ${MAX_RETRIES} attempts: ${lastError?.message}`
431
+ );
432
+ }
433
+
434
+ /**
435
+ * Get embedding vectors for multiple texts using concurrent batch API calls.
436
+ * Much faster than calling getOpenRouterEmbedding individually.
437
+ *
438
+ * @param texts - Array of input texts to embed
439
+ * @returns Promise resolving to array of embedding vectors
440
+ * @throws OpenRouterError if API call fails
441
+ */
442
+ export async function getOpenRouterEmbeddingBatch(texts: string[]): Promise<number[][]> {
443
+ if (!OPENROUTER_API_KEY) {
444
+ throw new OpenRouterError(
445
+ "OPENROUTER_API_KEY environment variable is not set"
446
+ );
447
+ }
448
+
449
+ if (texts.length === 0) {
450
+ return [];
451
+ }
452
+
453
+ // Truncate all inputs and check cache
454
+ const truncatedTexts = texts.map(t => truncateForEmbedding(t));
455
+ const cacheKeys = truncatedTexts.map(t => getCacheKey(t));
456
+
457
+ // Separate cached and uncached
458
+ const results: (number[] | null)[] = new Array(texts.length).fill(null);
459
+ const uncachedIndices: number[] = [];
460
+ const uncachedTexts: string[] = [];
461
+
462
+ for (let i = 0; i < truncatedTexts.length; i++) {
463
+ const cached = embeddingCache.get(cacheKeys[i]);
464
+ if (cached) {
465
+ results[i] = cached;
466
+ } else {
467
+ uncachedIndices.push(i);
468
+ uncachedTexts.push(truncatedTexts[i]);
469
+ }
470
+ }
471
+
472
+ debug(`Batch: ${texts.length} total, ${uncachedIndices.length} uncached`);
473
+
474
+ if (uncachedTexts.length === 0) {
475
+ return results as number[][];
476
+ }
477
+
478
+ // Split into batches
479
+ const textBatches = chunk(uncachedTexts, BATCH_SIZE);
480
+ const indexBatches = chunk(uncachedIndices, BATCH_SIZE);
481
+ const totalBatches = textBatches.length;
482
+
483
+ debug(`Processing ${totalBatches} batches with ${CONCURRENT_BATCHES} concurrent requests`);
484
+
485
+ // Process batches with concurrency limit
486
+ const batchGroups = chunk(
487
+ textBatches.map((texts, i) => ({ texts, indices: indexBatches[i], batchNumber: i + 1 })),
488
+ CONCURRENT_BATCHES
489
+ );
490
+
491
+ for (const group of batchGroups) {
492
+ await Promise.all(
493
+ group.map(batch =>
494
+ processSingleBatch(
495
+ batch.texts,
496
+ batch.indices,
497
+ cacheKeys,
498
+ results,
499
+ batch.batchNumber,
500
+ totalBatches
501
+ )
502
+ )
503
+ );
504
+ }
505
+
506
+ return results as number[][];
507
+ }
@@ -0,0 +1,81 @@
1
+ // src/graph/export.test.ts
2
+ import { describe, it, expect, vi, beforeEach } from "vitest";
3
+ import { exportGraph } from "./export.js";
4
+
5
+ // Create a shared mock store instance
6
+ const mockStore = {
7
+ getAll: vi.fn(),
8
+ };
9
+
10
+ vi.mock("../db/lancedb.js", () => ({
11
+ getVectorStore: vi.fn(() => mockStore),
12
+ }));
13
+
14
+ describe("exportGraph", () => {
15
+ beforeEach(() => {
16
+ vi.clearAllMocks();
17
+ });
18
+
19
+ describe("JSON format", () => {
20
+ it("exports nodes and edges", async () => {
21
+ mockStore.getAll.mockResolvedValue([
22
+ { id: "1", title: "Note A", folder: "Work", tags: ["project"], outlinks: ["Note B"], vector: [1,0] },
23
+ { id: "2", title: "Note B", folder: "Work", tags: ["project"], outlinks: [], vector: [0,1] },
24
+ ]);
25
+
26
+ const result = await exportGraph({ format: "json" }) as any;
27
+
28
+ expect(result).toHaveProperty("nodes");
29
+ expect(result).toHaveProperty("edges");
30
+ expect(result.nodes).toHaveLength(2);
31
+ expect(result.edges.some((e: any) => e.type === "link")).toBe(true);
32
+ expect(result.edges.some((e: any) => e.type === "tag")).toBe(true);
33
+ });
34
+
35
+ it("filters by folder", async () => {
36
+ mockStore.getAll.mockResolvedValue([
37
+ { id: "1", title: "Note A", folder: "Work", tags: [], outlinks: [], vector: [] },
38
+ { id: "2", title: "Note B", folder: "Personal", tags: [], outlinks: [], vector: [] },
39
+ ]);
40
+
41
+ const result = await exportGraph({ format: "json", folder: "Work" }) as any;
42
+
43
+ expect(result.nodes).toHaveLength(1);
44
+ expect(result.nodes[0].folder).toBe("Work");
45
+ });
46
+ });
47
+
48
+ describe("GraphML format", () => {
49
+ it("exports valid GraphML XML", async () => {
50
+ mockStore.getAll.mockResolvedValue([
51
+ { id: "1", title: "Note A", folder: "Work", tags: [], outlinks: ["Note B"], vector: [] },
52
+ { id: "2", title: "Note B", folder: "Work", tags: [], outlinks: [], vector: [] },
53
+ ]);
54
+
55
+ const result = await exportGraph({ format: "graphml" });
56
+
57
+ expect(typeof result).toBe("string");
58
+ expect(result).toContain('<?xml version="1.0"');
59
+ expect(result).toContain("<graphml");
60
+ expect(result).toContain("<node");
61
+ expect(result).toContain("<edge");
62
+ expect(result).toContain("</graphml>");
63
+ });
64
+
65
+ it("escapes special XML characters in GraphML", async () => {
66
+ mockStore.getAll.mockResolvedValue([
67
+ { id: "1", title: 'Note <with> & "special"', folder: "Work", tags: [], outlinks: [], vector: [] },
68
+ ]);
69
+ const result = await exportGraph({ format: "graphml" }) as string;
70
+ expect(result).toContain("&lt;with&gt;");
71
+ expect(result).toContain("&amp;");
72
+ });
73
+ });
74
+
75
+ describe("Unknown format", () => {
76
+ it("throws for unknown format", async () => {
77
+ mockStore.getAll.mockResolvedValue([]);
78
+ await expect(exportGraph({ format: "unknown" as any })).rejects.toThrow("Unknown format");
79
+ });
80
+ });
81
+ });