@disco_trooper/apple-notes-mcp 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +104 -24
- package/package.json +10 -8
- package/src/config/claude.test.ts +47 -0
- package/src/config/claude.ts +106 -0
- package/src/config/constants.ts +11 -2
- package/src/config/paths.test.ts +40 -0
- package/src/config/paths.ts +86 -0
- package/src/db/arrow-fix.test.ts +101 -0
- package/src/db/lancedb.test.ts +209 -2
- package/src/db/lancedb.ts +345 -7
- package/src/embeddings/cache.test.ts +150 -0
- package/src/embeddings/cache.ts +204 -0
- package/src/embeddings/index.ts +21 -2
- package/src/embeddings/local.ts +61 -10
- package/src/embeddings/openrouter.ts +233 -11
- package/src/graph/export.test.ts +81 -0
- package/src/graph/export.ts +163 -0
- package/src/graph/extract.test.ts +90 -0
- package/src/graph/extract.ts +52 -0
- package/src/graph/queries.test.ts +156 -0
- package/src/graph/queries.ts +224 -0
- package/src/index.ts +249 -9
- package/src/notes/crud.test.ts +26 -2
- package/src/notes/crud.ts +43 -5
- package/src/notes/read.ts +83 -68
- package/src/search/chunk-indexer.test.ts +353 -0
- package/src/search/chunk-indexer.ts +207 -0
- package/src/search/chunk-search.test.ts +327 -0
- package/src/search/chunk-search.ts +298 -0
- package/src/search/indexer.ts +151 -109
- package/src/setup.ts +46 -67
- package/src/utils/chunker.test.ts +182 -0
- package/src/utils/chunker.ts +170 -0
- package/src/utils/content-filter.test.ts +225 -0
- package/src/utils/content-filter.ts +275 -0
- package/src/utils/runtime.test.ts +70 -0
- package/src/utils/runtime.ts +40 -0
|
@@ -108,6 +108,27 @@ class OpenRouterError extends Error {
|
|
|
108
108
|
}
|
|
109
109
|
}
|
|
110
110
|
|
|
111
|
+
/** HTTP status codes that should not be retried */
|
|
112
|
+
const NON_RETRYABLE_STATUS_CODES = [400, 401, 403, 404];
|
|
113
|
+
|
|
114
|
+
/** Common headers for OpenRouter API requests */
|
|
115
|
+
const API_HEADERS = {
|
|
116
|
+
"Content-Type": "application/json",
|
|
117
|
+
"HTTP-Referer": "https://github.com/apple-notes-mcp",
|
|
118
|
+
"X-Title": "Apple Notes MCP",
|
|
119
|
+
} as const;
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Check if an error should trigger a retry or fail immediately.
|
|
123
|
+
* Returns true if the error is non-retryable.
|
|
124
|
+
*/
|
|
125
|
+
function isNonRetryableError(error: unknown): boolean {
|
|
126
|
+
if (error instanceof OpenRouterError && error.statusCode) {
|
|
127
|
+
return NON_RETRYABLE_STATUS_CODES.includes(error.statusCode);
|
|
128
|
+
}
|
|
129
|
+
return false;
|
|
130
|
+
}
|
|
131
|
+
|
|
111
132
|
/**
|
|
112
133
|
* Get embedding vector for text using OpenRouter API
|
|
113
134
|
*
|
|
@@ -157,9 +178,7 @@ export async function getOpenRouterEmbedding(text: string): Promise<number[]> {
|
|
|
157
178
|
method: "POST",
|
|
158
179
|
headers: {
|
|
159
180
|
Authorization: `Bearer ${OPENROUTER_API_KEY}`,
|
|
160
|
-
|
|
161
|
-
"HTTP-Referer": "https://github.com/apple-notes-mcp",
|
|
162
|
-
"X-Title": "Apple Notes MCP",
|
|
181
|
+
...API_HEADERS,
|
|
163
182
|
},
|
|
164
183
|
body: JSON.stringify({
|
|
165
184
|
model: EMBEDDING_MODEL,
|
|
@@ -224,17 +243,12 @@ export async function getOpenRouterEmbedding(text: string): Promise<number[]> {
|
|
|
224
243
|
`Request timed out after ${OPENROUTER_TIMEOUT_MS}ms`,
|
|
225
244
|
408
|
|
226
245
|
);
|
|
227
|
-
// Don't throw - fall through to retry logic below
|
|
228
246
|
} else {
|
|
229
247
|
lastError = error instanceof Error ? error : new Error(String(error));
|
|
230
248
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
if (nonRetryable.includes(error.statusCode)) {
|
|
235
|
-
debug(`Non-retryable error (${error.statusCode}), failing immediately`);
|
|
236
|
-
throw error;
|
|
237
|
-
}
|
|
249
|
+
if (isNonRetryableError(error)) {
|
|
250
|
+
debug(`Non-retryable error, failing immediately`);
|
|
251
|
+
throw error;
|
|
238
252
|
}
|
|
239
253
|
}
|
|
240
254
|
|
|
@@ -283,3 +297,211 @@ export function clearEmbeddingCache(): void {
|
|
|
283
297
|
export function getEmbeddingCacheSize(): number {
|
|
284
298
|
return embeddingCache.size;
|
|
285
299
|
}
|
|
300
|
+
|
|
301
|
+
/**
|
|
302
|
+
* Batch size for embedding requests.
|
|
303
|
+
* OpenRouter supports up to 2048 inputs per request, but 50-100 is optimal.
|
|
304
|
+
*/
|
|
305
|
+
const BATCH_SIZE = 50;
|
|
306
|
+
|
|
307
|
+
/**
|
|
308
|
+
* Number of concurrent batch API calls.
|
|
309
|
+
* Higher values increase throughput but may hit rate limits.
|
|
310
|
+
*/
|
|
311
|
+
const CONCURRENT_BATCHES = 3;
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Split an array into chunks of specified size.
|
|
315
|
+
*/
|
|
316
|
+
function chunk<T>(array: T[], size: number): T[][] {
|
|
317
|
+
const chunks: T[][] = [];
|
|
318
|
+
for (let i = 0; i < array.length; i += size) {
|
|
319
|
+
chunks.push(array.slice(i, i + size));
|
|
320
|
+
}
|
|
321
|
+
return chunks;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Process a single batch of texts and return embeddings.
|
|
326
|
+
* Internal helper for concurrent batch processing.
|
|
327
|
+
*/
|
|
328
|
+
async function processSingleBatch(
|
|
329
|
+
batchTexts: string[],
|
|
330
|
+
batchIndices: number[],
|
|
331
|
+
cacheKeys: string[],
|
|
332
|
+
results: (number[] | null)[],
|
|
333
|
+
batchNumber: number,
|
|
334
|
+
totalBatches: number
|
|
335
|
+
): Promise<void> {
|
|
336
|
+
debug(`Processing batch ${batchNumber}/${totalBatches} (${batchTexts.length} texts)`);
|
|
337
|
+
|
|
338
|
+
let lastError: Error | null = null;
|
|
339
|
+
|
|
340
|
+
for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
|
|
341
|
+
const controller = new AbortController();
|
|
342
|
+
const timeoutId = setTimeout(() => controller.abort(), OPENROUTER_TIMEOUT_MS * 2);
|
|
343
|
+
|
|
344
|
+
try {
|
|
345
|
+
const response = await fetch(API_URL, {
|
|
346
|
+
method: "POST",
|
|
347
|
+
headers: {
|
|
348
|
+
Authorization: `Bearer ${OPENROUTER_API_KEY}`,
|
|
349
|
+
...API_HEADERS,
|
|
350
|
+
},
|
|
351
|
+
body: JSON.stringify({
|
|
352
|
+
model: EMBEDDING_MODEL,
|
|
353
|
+
input: batchTexts,
|
|
354
|
+
dimensions: EMBEDDING_DIMS,
|
|
355
|
+
}),
|
|
356
|
+
signal: controller.signal,
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
if (response.status === 429) {
|
|
360
|
+
clearTimeout(timeoutId);
|
|
361
|
+
const waitTime = getBackoffDelay(attempt, RATE_LIMIT_BACKOFF_BASE_MS);
|
|
362
|
+
debug(`Batch ${batchNumber}: Rate limited (429), waiting ${waitTime}ms`);
|
|
363
|
+
await sleep(waitTime);
|
|
364
|
+
continue;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
if (!response.ok) {
|
|
368
|
+
const errorBody = await response.text();
|
|
369
|
+
throw new OpenRouterError(
|
|
370
|
+
`OpenRouter API error: ${response.status} - ${errorBody}`,
|
|
371
|
+
response.status,
|
|
372
|
+
errorBody
|
|
373
|
+
);
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
const data = await response.json() as {
|
|
377
|
+
data?: Array<{ embedding?: number[]; index?: number }>;
|
|
378
|
+
};
|
|
379
|
+
|
|
380
|
+
if (!data?.data || data.data.length !== batchTexts.length) {
|
|
381
|
+
throw new OpenRouterError(
|
|
382
|
+
`Invalid API response: expected ${batchTexts.length} embeddings, got ${data?.data?.length ?? 0}`,
|
|
383
|
+
response.status,
|
|
384
|
+
JSON.stringify(data)
|
|
385
|
+
);
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
// Store results and cache them
|
|
389
|
+
for (let j = 0; j < data.data.length; j++) {
|
|
390
|
+
const embedding = data.data[j].embedding;
|
|
391
|
+
if (!embedding) {
|
|
392
|
+
throw new OpenRouterError(
|
|
393
|
+
`Missing embedding at index ${j}`,
|
|
394
|
+
response.status,
|
|
395
|
+
JSON.stringify(data)
|
|
396
|
+
);
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
results[batchIndices[j]] = embedding;
|
|
400
|
+
embeddingCache.set(cacheKeys[batchIndices[j]], embedding);
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
return; // Success
|
|
404
|
+
|
|
405
|
+
} catch (error) {
|
|
406
|
+
if (error instanceof Error && error.name === "AbortError") {
|
|
407
|
+
lastError = new OpenRouterError(
|
|
408
|
+
`Batch request timed out after ${OPENROUTER_TIMEOUT_MS * 2}ms`,
|
|
409
|
+
408
|
|
410
|
+
);
|
|
411
|
+
} else {
|
|
412
|
+
lastError = error instanceof Error ? error : new Error(String(error));
|
|
413
|
+
|
|
414
|
+
if (isNonRetryableError(error)) {
|
|
415
|
+
throw error;
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
if (attempt < MAX_RETRIES - 1) {
|
|
420
|
+
const waitTime = getBackoffDelay(attempt);
|
|
421
|
+
debug(`Batch ${batchNumber} error: ${lastError.message}, retrying in ${waitTime}ms`);
|
|
422
|
+
await sleep(waitTime);
|
|
423
|
+
}
|
|
424
|
+
} finally {
|
|
425
|
+
clearTimeout(timeoutId);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
throw new OpenRouterError(
|
|
430
|
+
`Failed to get batch ${batchNumber} embeddings after ${MAX_RETRIES} attempts: ${lastError?.message}`
|
|
431
|
+
);
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
/**
|
|
435
|
+
* Get embedding vectors for multiple texts using concurrent batch API calls.
|
|
436
|
+
* Much faster than calling getOpenRouterEmbedding individually.
|
|
437
|
+
*
|
|
438
|
+
* @param texts - Array of input texts to embed
|
|
439
|
+
* @returns Promise resolving to array of embedding vectors
|
|
440
|
+
* @throws OpenRouterError if API call fails
|
|
441
|
+
*/
|
|
442
|
+
export async function getOpenRouterEmbeddingBatch(texts: string[]): Promise<number[][]> {
|
|
443
|
+
if (!OPENROUTER_API_KEY) {
|
|
444
|
+
throw new OpenRouterError(
|
|
445
|
+
"OPENROUTER_API_KEY environment variable is not set"
|
|
446
|
+
);
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
if (texts.length === 0) {
|
|
450
|
+
return [];
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
// Truncate all inputs and check cache
|
|
454
|
+
const truncatedTexts = texts.map(t => truncateForEmbedding(t));
|
|
455
|
+
const cacheKeys = truncatedTexts.map(t => getCacheKey(t));
|
|
456
|
+
|
|
457
|
+
// Separate cached and uncached
|
|
458
|
+
const results: (number[] | null)[] = new Array(texts.length).fill(null);
|
|
459
|
+
const uncachedIndices: number[] = [];
|
|
460
|
+
const uncachedTexts: string[] = [];
|
|
461
|
+
|
|
462
|
+
for (let i = 0; i < truncatedTexts.length; i++) {
|
|
463
|
+
const cached = embeddingCache.get(cacheKeys[i]);
|
|
464
|
+
if (cached) {
|
|
465
|
+
results[i] = cached;
|
|
466
|
+
} else {
|
|
467
|
+
uncachedIndices.push(i);
|
|
468
|
+
uncachedTexts.push(truncatedTexts[i]);
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
debug(`Batch: ${texts.length} total, ${uncachedIndices.length} uncached`);
|
|
473
|
+
|
|
474
|
+
if (uncachedTexts.length === 0) {
|
|
475
|
+
return results as number[][];
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
// Split into batches
|
|
479
|
+
const textBatches = chunk(uncachedTexts, BATCH_SIZE);
|
|
480
|
+
const indexBatches = chunk(uncachedIndices, BATCH_SIZE);
|
|
481
|
+
const totalBatches = textBatches.length;
|
|
482
|
+
|
|
483
|
+
debug(`Processing ${totalBatches} batches with ${CONCURRENT_BATCHES} concurrent requests`);
|
|
484
|
+
|
|
485
|
+
// Process batches with concurrency limit
|
|
486
|
+
const batchGroups = chunk(
|
|
487
|
+
textBatches.map((texts, i) => ({ texts, indices: indexBatches[i], batchNumber: i + 1 })),
|
|
488
|
+
CONCURRENT_BATCHES
|
|
489
|
+
);
|
|
490
|
+
|
|
491
|
+
for (const group of batchGroups) {
|
|
492
|
+
await Promise.all(
|
|
493
|
+
group.map(batch =>
|
|
494
|
+
processSingleBatch(
|
|
495
|
+
batch.texts,
|
|
496
|
+
batch.indices,
|
|
497
|
+
cacheKeys,
|
|
498
|
+
results,
|
|
499
|
+
batch.batchNumber,
|
|
500
|
+
totalBatches
|
|
501
|
+
)
|
|
502
|
+
)
|
|
503
|
+
);
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
return results as number[][];
|
|
507
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
// src/graph/export.test.ts
|
|
2
|
+
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
3
|
+
import { exportGraph } from "./export.js";
|
|
4
|
+
|
|
5
|
+
// Create a shared mock store instance
|
|
6
|
+
const mockStore = {
|
|
7
|
+
getAll: vi.fn(),
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
vi.mock("../db/lancedb.js", () => ({
|
|
11
|
+
getVectorStore: vi.fn(() => mockStore),
|
|
12
|
+
}));
|
|
13
|
+
|
|
14
|
+
describe("exportGraph", () => {
|
|
15
|
+
beforeEach(() => {
|
|
16
|
+
vi.clearAllMocks();
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
describe("JSON format", () => {
|
|
20
|
+
it("exports nodes and edges", async () => {
|
|
21
|
+
mockStore.getAll.mockResolvedValue([
|
|
22
|
+
{ id: "1", title: "Note A", folder: "Work", tags: ["project"], outlinks: ["Note B"], vector: [1,0] },
|
|
23
|
+
{ id: "2", title: "Note B", folder: "Work", tags: ["project"], outlinks: [], vector: [0,1] },
|
|
24
|
+
]);
|
|
25
|
+
|
|
26
|
+
const result = await exportGraph({ format: "json" }) as any;
|
|
27
|
+
|
|
28
|
+
expect(result).toHaveProperty("nodes");
|
|
29
|
+
expect(result).toHaveProperty("edges");
|
|
30
|
+
expect(result.nodes).toHaveLength(2);
|
|
31
|
+
expect(result.edges.some((e: any) => e.type === "link")).toBe(true);
|
|
32
|
+
expect(result.edges.some((e: any) => e.type === "tag")).toBe(true);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it("filters by folder", async () => {
|
|
36
|
+
mockStore.getAll.mockResolvedValue([
|
|
37
|
+
{ id: "1", title: "Note A", folder: "Work", tags: [], outlinks: [], vector: [] },
|
|
38
|
+
{ id: "2", title: "Note B", folder: "Personal", tags: [], outlinks: [], vector: [] },
|
|
39
|
+
]);
|
|
40
|
+
|
|
41
|
+
const result = await exportGraph({ format: "json", folder: "Work" }) as any;
|
|
42
|
+
|
|
43
|
+
expect(result.nodes).toHaveLength(1);
|
|
44
|
+
expect(result.nodes[0].folder).toBe("Work");
|
|
45
|
+
});
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
describe("GraphML format", () => {
|
|
49
|
+
it("exports valid GraphML XML", async () => {
|
|
50
|
+
mockStore.getAll.mockResolvedValue([
|
|
51
|
+
{ id: "1", title: "Note A", folder: "Work", tags: [], outlinks: ["Note B"], vector: [] },
|
|
52
|
+
{ id: "2", title: "Note B", folder: "Work", tags: [], outlinks: [], vector: [] },
|
|
53
|
+
]);
|
|
54
|
+
|
|
55
|
+
const result = await exportGraph({ format: "graphml" });
|
|
56
|
+
|
|
57
|
+
expect(typeof result).toBe("string");
|
|
58
|
+
expect(result).toContain('<?xml version="1.0"');
|
|
59
|
+
expect(result).toContain("<graphml");
|
|
60
|
+
expect(result).toContain("<node");
|
|
61
|
+
expect(result).toContain("<edge");
|
|
62
|
+
expect(result).toContain("</graphml>");
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
it("escapes special XML characters in GraphML", async () => {
|
|
66
|
+
mockStore.getAll.mockResolvedValue([
|
|
67
|
+
{ id: "1", title: 'Note <with> & "special"', folder: "Work", tags: [], outlinks: [], vector: [] },
|
|
68
|
+
]);
|
|
69
|
+
const result = await exportGraph({ format: "graphml" }) as string;
|
|
70
|
+
expect(result).toContain("<with>");
|
|
71
|
+
expect(result).toContain("&");
|
|
72
|
+
});
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
describe("Unknown format", () => {
|
|
76
|
+
it("throws for unknown format", async () => {
|
|
77
|
+
mockStore.getAll.mockResolvedValue([]);
|
|
78
|
+
await expect(exportGraph({ format: "unknown" as any })).rejects.toThrow("Unknown format");
|
|
79
|
+
});
|
|
80
|
+
});
|
|
81
|
+
});
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
// src/graph/export.ts
|
|
2
|
+
/**
|
|
3
|
+
* Knowledge graph export to various formats.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { getVectorStore } from "../db/lancedb.js";
|
|
7
|
+
import { createDebugLogger } from "../utils/debug.js";
|
|
8
|
+
import { GRAPH_LINK_WEIGHT, GRAPH_TAG_WEIGHT } from "../config/constants.js";
|
|
9
|
+
|
|
10
|
+
const debug = createDebugLogger("EXPORT");
|
|
11
|
+
|
|
12
|
+
export type GraphFormat = "json" | "graphml";
|
|
13
|
+
|
|
14
|
+
export interface GraphNode {
|
|
15
|
+
id: string;
|
|
16
|
+
label: string;
|
|
17
|
+
folder: string;
|
|
18
|
+
tags: string[];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface GraphEdge {
|
|
22
|
+
source: string;
|
|
23
|
+
target: string;
|
|
24
|
+
type: "link" | "tag" | "similar";
|
|
25
|
+
weight: number;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface GraphData {
|
|
29
|
+
nodes: GraphNode[];
|
|
30
|
+
edges: GraphEdge[];
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface ExportOptions {
|
|
34
|
+
format: GraphFormat;
|
|
35
|
+
folder?: string;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Export knowledge graph to specified format.
|
|
40
|
+
*/
|
|
41
|
+
export async function exportGraph(options: ExportOptions): Promise<GraphData | string> {
|
|
42
|
+
const { format, folder } = options;
|
|
43
|
+
|
|
44
|
+
debug(`Exporting graph in ${format} format`);
|
|
45
|
+
|
|
46
|
+
const store = getVectorStore();
|
|
47
|
+
let records = await store.getAll();
|
|
48
|
+
|
|
49
|
+
// Filter by folder if specified
|
|
50
|
+
if (folder) {
|
|
51
|
+
const normalizedFolder = folder.toLowerCase();
|
|
52
|
+
records = records.filter(r => r.folder.toLowerCase() === normalizedFolder);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Build graph data
|
|
56
|
+
const nodes: GraphNode[] = records.map(r => ({
|
|
57
|
+
id: r.id,
|
|
58
|
+
label: r.title,
|
|
59
|
+
folder: r.folder,
|
|
60
|
+
tags: r.tags ?? [],
|
|
61
|
+
}));
|
|
62
|
+
|
|
63
|
+
const edges: GraphEdge[] = [];
|
|
64
|
+
const nodeIds = new Set(records.map(r => r.id));
|
|
65
|
+
|
|
66
|
+
// Add link edges
|
|
67
|
+
for (const record of records) {
|
|
68
|
+
for (const linkTitle of record.outlinks ?? []) {
|
|
69
|
+
const target = records.find(r => r.title.toLowerCase() === linkTitle.toLowerCase());
|
|
70
|
+
if (target && nodeIds.has(target.id)) {
|
|
71
|
+
edges.push({
|
|
72
|
+
source: record.id,
|
|
73
|
+
target: target.id,
|
|
74
|
+
type: "link",
|
|
75
|
+
weight: GRAPH_LINK_WEIGHT,
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Add tag edges (notes sharing same tag)
|
|
82
|
+
const tagGroups = new Map<string, string[]>();
|
|
83
|
+
for (const record of records) {
|
|
84
|
+
for (const tag of record.tags ?? []) {
|
|
85
|
+
if (!tagGroups.has(tag)) {
|
|
86
|
+
tagGroups.set(tag, []);
|
|
87
|
+
}
|
|
88
|
+
tagGroups.get(tag)!.push(record.id);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const seenTagEdges = new Set<string>();
|
|
93
|
+
for (const [, noteIds] of tagGroups) {
|
|
94
|
+
if (noteIds.length < 2) continue;
|
|
95
|
+
for (let i = 0; i < noteIds.length; i++) {
|
|
96
|
+
for (let j = i + 1; j < noteIds.length; j++) {
|
|
97
|
+
const edgeKey = [noteIds[i], noteIds[j]].sort().join("-");
|
|
98
|
+
if (seenTagEdges.has(edgeKey)) continue;
|
|
99
|
+
seenTagEdges.add(edgeKey);
|
|
100
|
+
edges.push({
|
|
101
|
+
source: noteIds[i],
|
|
102
|
+
target: noteIds[j],
|
|
103
|
+
type: "tag",
|
|
104
|
+
weight: GRAPH_TAG_WEIGHT,
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const graphData: GraphData = { nodes, edges };
|
|
111
|
+
|
|
112
|
+
if (format === "json") {
|
|
113
|
+
return graphData;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if (format === "graphml") {
|
|
117
|
+
return toGraphML(graphData);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
throw new Error(`Unknown format: ${format}`);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function escapeXml(str: string): string {
|
|
124
|
+
return str
|
|
125
|
+
.replace(/&/g, "&")
|
|
126
|
+
.replace(/</g, "<")
|
|
127
|
+
.replace(/>/g, ">")
|
|
128
|
+
.replace(/"/g, """)
|
|
129
|
+
.replace(/'/g, "'");
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function toGraphML(data: GraphData): string {
|
|
133
|
+
const lines: string[] = [
|
|
134
|
+
'<?xml version="1.0" encoding="UTF-8"?>',
|
|
135
|
+
'<graphml xmlns="http://graphml.graphdrawing.org/xmlns">',
|
|
136
|
+
' <key id="label" for="node" attr.name="label" attr.type="string"/>',
|
|
137
|
+
' <key id="folder" for="node" attr.name="folder" attr.type="string"/>',
|
|
138
|
+
' <key id="tags" for="node" attr.name="tags" attr.type="string"/>',
|
|
139
|
+
' <key id="type" for="edge" attr.name="type" attr.type="string"/>',
|
|
140
|
+
' <key id="weight" for="edge" attr.name="weight" attr.type="double"/>',
|
|
141
|
+
' <graph id="G" edgedefault="directed">',
|
|
142
|
+
];
|
|
143
|
+
|
|
144
|
+
for (const node of data.nodes) {
|
|
145
|
+
lines.push(` <node id="${escapeXml(node.id)}">`);
|
|
146
|
+
lines.push(` <data key="label">${escapeXml(node.label)}</data>`);
|
|
147
|
+
lines.push(` <data key="folder">${escapeXml(node.folder)}</data>`);
|
|
148
|
+
lines.push(` <data key="tags">${escapeXml(node.tags.join(","))}</data>`);
|
|
149
|
+
lines.push(" </node>");
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
for (const edge of data.edges) {
|
|
153
|
+
lines.push(` <edge source="${escapeXml(edge.source)}" target="${escapeXml(edge.target)}">`);
|
|
154
|
+
lines.push(` <data key="type">${edge.type}</data>`);
|
|
155
|
+
lines.push(` <data key="weight">${edge.weight}</data>`);
|
|
156
|
+
lines.push(" </edge>");
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
lines.push(" </graph>");
|
|
160
|
+
lines.push("</graphml>");
|
|
161
|
+
|
|
162
|
+
return lines.join("\n");
|
|
163
|
+
}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
// src/graph/extract.test.ts
|
|
2
|
+
import { describe, it, expect } from "vitest";
|
|
3
|
+
import { extractTags, extractOutlinks, extractMetadata } from "./extract.js";
|
|
4
|
+
|
|
5
|
+
describe("extractTags", () => {
|
|
6
|
+
it("extracts simple hashtags", () => {
|
|
7
|
+
const content = "This is a #project about #coding";
|
|
8
|
+
expect(extractTags(content)).toEqual(["project", "coding"]);
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
it("handles hyphenated tags", () => {
|
|
12
|
+
const content = "Working on #my-project and #some-idea";
|
|
13
|
+
expect(extractTags(content)).toEqual(["my-project", "some-idea"]);
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
it("normalizes to lowercase", () => {
|
|
17
|
+
const content = "#Project #IDEA #Mixed";
|
|
18
|
+
expect(extractTags(content)).toEqual(["project", "idea", "mixed"]);
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
it("deduplicates tags", () => {
|
|
22
|
+
const content = "#project #idea #project";
|
|
23
|
+
expect(extractTags(content)).toEqual(["project", "idea"]);
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it("returns empty array for no tags", () => {
|
|
27
|
+
expect(extractTags("No tags here")).toEqual([]);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it("ignores tags in code blocks", () => {
|
|
31
|
+
const content = "Real #tag\n```\n#not-a-tag\n```\nAnother #real";
|
|
32
|
+
expect(extractTags(content)).toEqual(["tag", "real"]);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it("ignores tags in inline code", () => {
|
|
36
|
+
const content = "Real #tag and `#code-tag` should ignore inline";
|
|
37
|
+
expect(extractTags(content)).toEqual(["tag"]);
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
it("ignores hex colors", () => {
|
|
41
|
+
const content = "Color #fff and #000000 and #a1b2c3 are not tags";
|
|
42
|
+
expect(extractTags(content)).toEqual([]);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it("keeps tags that contain letters mixed with numbers", () => {
|
|
46
|
+
const content = "#project1 #2024goals #abc123xyz";
|
|
47
|
+
expect(extractTags(content)).toEqual(["project1", "2024goals", "abc123xyz"]);
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
it("extracts tag at string boundaries", () => {
|
|
51
|
+
expect(extractTags("#start of content")).toEqual(["start"]);
|
|
52
|
+
expect(extractTags("end of #content")).toEqual(["content"]);
|
|
53
|
+
});
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
describe("extractOutlinks", () => {
|
|
57
|
+
it("extracts wiki-style links", () => {
|
|
58
|
+
const content = "See [[Meeting Notes]] and [[Project Plan]]";
|
|
59
|
+
expect(extractOutlinks(content)).toEqual(["Meeting Notes", "Project Plan"]);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it("handles links with special characters", () => {
|
|
63
|
+
const content = "Check [[Note with / slash]] and [[Note: with colon]]";
|
|
64
|
+
expect(extractOutlinks(content)).toEqual(["Note with / slash", "Note: with colon"]);
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
it("deduplicates links", () => {
|
|
68
|
+
const content = "[[Note]] and [[Other]] and [[Note]]";
|
|
69
|
+
expect(extractOutlinks(content)).toEqual(["Note", "Other"]);
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it("returns empty array for no links", () => {
|
|
73
|
+
expect(extractOutlinks("No links here")).toEqual([]);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
it("ignores links in code blocks", () => {
|
|
77
|
+
const content = "Real [[Link]]\n```\n[[not-a-link]]\n```";
|
|
78
|
+
expect(extractOutlinks(content)).toEqual(["Link"]);
|
|
79
|
+
});
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
describe("extractMetadata", () => {
|
|
83
|
+
it("extracts both tags and outlinks", () => {
|
|
84
|
+
const content = "A #project note linking to [[Other Note]]";
|
|
85
|
+
expect(extractMetadata(content)).toEqual({
|
|
86
|
+
tags: ["project"],
|
|
87
|
+
outlinks: ["Other Note"],
|
|
88
|
+
});
|
|
89
|
+
});
|
|
90
|
+
});
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge graph metadata extraction from note content.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
/** Remove code blocks and inline code to avoid extracting metadata from code. */
|
|
6
|
+
function stripCodeBlocks(content: string): string {
|
|
7
|
+
return content
|
|
8
|
+
.replace(/```[\s\S]*?```/g, "") // fenced code blocks
|
|
9
|
+
.replace(/`[^`]+`/g, ""); // inline code
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/** Check if a string is a hex color code (e.g., fff, 000000, a1b2c3) */
|
|
13
|
+
function isHexColor(value: string): boolean {
|
|
14
|
+
return /^[0-9a-fA-F]+$/.test(value);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Extract hashtags from content.
|
|
19
|
+
* Ignores tags inside code blocks and hex color codes.
|
|
20
|
+
*/
|
|
21
|
+
export function extractTags(content: string): string[] {
|
|
22
|
+
const matches = stripCodeBlocks(content).match(/#[\w-]+/g) || [];
|
|
23
|
+
const tags = matches
|
|
24
|
+
.map((t) => t.slice(1).toLowerCase())
|
|
25
|
+
.filter((t) => !isHexColor(t));
|
|
26
|
+
return [...new Set(tags)];
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Extract wiki-style [[links]] from content.
|
|
31
|
+
* Ignores links inside code blocks.
|
|
32
|
+
*/
|
|
33
|
+
export function extractOutlinks(content: string): string[] {
|
|
34
|
+
const matches = stripCodeBlocks(content).match(/\[\[([^\]]+)\]\]/g) || [];
|
|
35
|
+
const links = matches.map((m) => m.slice(2, -2));
|
|
36
|
+
return [...new Set(links)];
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export interface NoteMetadata {
|
|
40
|
+
tags: string[];
|
|
41
|
+
outlinks: string[];
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Extract all metadata (tags and outlinks) from content.
|
|
46
|
+
*/
|
|
47
|
+
export function extractMetadata(content: string): NoteMetadata {
|
|
48
|
+
return {
|
|
49
|
+
tags: extractTags(content),
|
|
50
|
+
outlinks: extractOutlinks(content),
|
|
51
|
+
};
|
|
52
|
+
}
|