pkm-mcp-server 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/embeddings.js ADDED
@@ -0,0 +1,672 @@
1
+ import Database from "better-sqlite3";
2
+ import * as sqliteVec from "sqlite-vec";
3
+ import fs from "fs/promises";
4
+ import fsSync from "fs";
5
+ import path from "path";
6
+ import crypto from "crypto";
7
+ import { getAllMarkdownFiles } from "./utils.js";
8
+
9
+ const EMBEDDING_MODEL = "text-embedding-3-large";
10
+ const EMBEDDING_DIMENSIONS = 3072;
11
+ const MAX_CHARS_PER_CHUNK = 8000; // ~2000 tokens
12
+ const BATCH_SIZE = 100; // max texts per OpenAI API call
13
+ const REINDEX_BATCH_SIZE = 10; // files per batch during startup sync
14
+ const DEBOUNCE_MS = 2000;
15
+
16
+ /**
17
+ * Semantic similarity search over vault notes using OpenAI embeddings.
18
+ * Stores embeddings in SQLite + sqlite-vec for fast KNN lookups.
19
+ * Automatically indexes on startup and watches for file changes.
20
+ */
21
+ export class SemanticIndex {
22
+ /**
23
+ * @param {Object} opts
24
+ * @param {string} opts.vaultPath - absolute path to vault root
25
+ * @param {string} opts.openaiApiKey - OpenAI API key for embeddings
26
+ * @param {string} [opts.dbPath] - override path for the SQLite database
27
+ */
28
+ constructor({ vaultPath, openaiApiKey, dbPath }) {
29
+ this.vaultPath = vaultPath;
30
+ this.openaiApiKey = openaiApiKey;
31
+ this.dbPath = dbPath || path.join(vaultPath, ".obsidian", "semantic-index.db");
32
+ this.db = null;
33
+ this.watcher = null;
34
+ this._debounceTimers = new Map();
35
+ this._syncState = { syncing: false, total: 0, done: 0 };
36
+ this._inflight = new Set();
37
+ this._abortController = null;
38
+ }
39
+
40
+ get isAvailable() {
41
+ return this.db !== null && !!this.openaiApiKey;
42
+ }
43
+
44
+ async initialize() {
45
+ // Ensure .obsidian dir exists
46
+ const dbDir = path.dirname(this.dbPath);
47
+ await fs.mkdir(dbDir, { recursive: true });
48
+
49
+ // Open DB and load sqlite-vec
50
+ this.db = new Database(this.dbPath);
51
+ sqliteVec.load(this.db);
52
+ this.db.pragma("journal_mode = WAL");
53
+ this.db.pragma("journal_size_limit = 32000000");
54
+
55
+ // Create schema
56
+ this.db.exec(`
57
+ CREATE VIRTUAL TABLE IF NOT EXISTS vec_chunks USING vec0(
58
+ embedding float[${EMBEDDING_DIMENSIONS}]
59
+ );
60
+
61
+ CREATE TABLE IF NOT EXISTS chunks (
62
+ id INTEGER PRIMARY KEY,
63
+ file_path TEXT NOT NULL,
64
+ chunk_index INTEGER NOT NULL,
65
+ heading TEXT,
66
+ content_preview TEXT NOT NULL,
67
+ UNIQUE(file_path, chunk_index)
68
+ );
69
+
70
+ CREATE TABLE IF NOT EXISTS files (
71
+ path TEXT PRIMARY KEY,
72
+ mtime_ms INTEGER NOT NULL,
73
+ content_hash TEXT NOT NULL,
74
+ chunk_count INTEGER NOT NULL,
75
+ updated_at TEXT NOT NULL
76
+ );
77
+
78
+ CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_path);
79
+ `);
80
+
81
+ // Start background sync (non-blocking)
82
+ this._abortController = new AbortController();
83
+ this._startupSync().catch(err => {
84
+ console.error(`Semantic index startup sync error: ${err.message}`);
85
+ });
86
+
87
+ // Start file watcher
88
+ this._startWatcher();
89
+ }
90
+
91
+ async shutdown() {
92
+ if (this._abortController) {
93
+ this._abortController.abort();
94
+ this._abortController = null;
95
+ }
96
+ if (this.watcher) {
97
+ this.watcher.close();
98
+ this.watcher = null;
99
+ }
100
+ for (const timer of this._debounceTimers.values()) {
101
+ clearTimeout(timer);
102
+ }
103
+ this._debounceTimers.clear();
104
+ if (this._inflight.size > 0) {
105
+ await Promise.allSettled([...this._inflight]);
106
+ }
107
+ if (this.db) {
108
+ this.db.close();
109
+ this.db = null;
110
+ }
111
+ }
112
+
113
+ /** Track a reindexFile call so shutdown() can await it. */
114
+ _trackedReindex(relativePath) {
115
+ const p = this.reindexFile(relativePath).finally(() => {
116
+ this._inflight.delete(p);
117
+ });
118
+ this._inflight.add(p);
119
+ return p;
120
+ }
121
+
122
+ /**
123
+ * Search for semantically similar notes and return formatted results.
124
+ * @param {Object} opts
125
+ * @param {string} opts.query - natural language search query
126
+ * @param {number} [opts.limit=5] - max results
127
+ * @param {string} [opts.folder] - restrict to folder prefix
128
+ * @param {number} [opts.threshold] - minimum similarity score (0-1)
129
+ * @returns {Promise<string>} formatted results text
130
+ */
131
+ async search({ query, limit = 5, folder, threshold }) {
132
+ if (!this.isAvailable) {
133
+ throw new Error("Semantic index not available");
134
+ }
135
+
136
+ // Embed the query
137
+ const [queryEmbedding] = await getEmbeddings([query], this.openaiApiKey);
138
+
139
+ // KNN search via sqlite-vec
140
+ const vecResults = this.db.prepare(`
141
+ SELECT rowid, distance
142
+ FROM vec_chunks
143
+ WHERE embedding MATCH ?
144
+ ORDER BY distance
145
+ LIMIT ?
146
+ `).all(
147
+ new Float32Array(queryEmbedding),
148
+ Math.min(limit * 3, 50) // overfetch for folder filtering
149
+ );
150
+
151
+ // Join with chunk metadata
152
+ const results = [];
153
+ const getChunk = this.db.prepare(`
154
+ SELECT file_path, chunk_index, heading, content_preview
155
+ FROM chunks WHERE id = ?
156
+ `);
157
+
158
+ const seenFiles = new Set();
159
+ for (const { rowid, distance } of vecResults) {
160
+ if (results.length >= limit) break;
161
+
162
+ const chunk = getChunk.get(rowid);
163
+ if (!chunk) continue;
164
+
165
+ // Folder filter (ensure prefix matches at directory boundary)
166
+ if (folder) {
167
+ const prefix = folder.endsWith("/") ? folder : folder + "/";
168
+ if (!chunk.file_path.startsWith(prefix)) continue;
169
+ }
170
+
171
+ // Threshold filter (convert L2 to similarity: 1 - distance/2)
172
+ const score = Math.max(0, Math.min(1, 1 - distance / 2));
173
+ if (threshold && score < threshold) continue;
174
+
175
+ // Deduplicate by file (show best chunk per file)
176
+ if (seenFiles.has(chunk.file_path)) continue;
177
+ seenFiles.add(chunk.file_path);
178
+
179
+ results.push({
180
+ path: chunk.file_path,
181
+ heading: chunk.heading,
182
+ score: Math.round(score * 1000) / 1000,
183
+ preview: chunk.content_preview
184
+ });
185
+ }
186
+
187
+ // Format output
188
+ let syncNote = "";
189
+ if (this._syncState.syncing) {
190
+ syncNote = `\n\n*Index syncing (${this._syncState.done}/${this._syncState.total} files)...*`;
191
+ }
192
+
193
+ if (results.length === 0) {
194
+ return `No semantically related notes found.${syncNote}`;
195
+ }
196
+
197
+ const formatted = results.map(r => {
198
+ const heading = r.heading ? ` > ${r.heading}` : "";
199
+ return `**${r.path}**${heading} (score: ${r.score})\n${r.preview}`;
200
+ }).join("\n\n");
201
+
202
+ return `Found ${results.length} semantically related note${results.length === 1 ? "" : "s"}:\n\n${formatted}${syncNote}`;
203
+ }
204
+
205
+ /**
206
+ * Search for semantically similar notes and return raw result objects.
207
+ * @param {Object} opts
208
+ * @param {string} opts.query - natural language search query
209
+ * @param {number} [opts.limit=5] - max results
210
+ * @param {string} [opts.folder] - restrict to folder prefix
211
+ * @param {number} [opts.threshold] - minimum similarity score (0-1)
212
+ * @param {Set<string>} [opts.excludeFiles] - file paths to exclude
213
+ * @returns {Promise<Array<{path: string, score: number, preview: string}>>}
214
+ */
215
+ async searchRaw({ query, limit = 5, folder, threshold, excludeFiles }) {
216
+ if (!this.isAvailable) {
217
+ throw new Error("Semantic index not available");
218
+ }
219
+
220
+ const [queryEmbedding] = await getEmbeddings([query], this.openaiApiKey);
221
+
222
+ const vecResults = this.db.prepare(`
223
+ SELECT rowid, distance
224
+ FROM vec_chunks
225
+ WHERE embedding MATCH ?
226
+ ORDER BY distance
227
+ LIMIT ?
228
+ `).all(
229
+ new Float32Array(queryEmbedding),
230
+ Math.min(limit * 3, 50)
231
+ );
232
+
233
+ const results = [];
234
+ const getChunk = this.db.prepare(`
235
+ SELECT file_path, chunk_index, heading, content_preview
236
+ FROM chunks WHERE id = ?
237
+ `);
238
+
239
+ const seenFiles = new Set();
240
+ for (const { rowid, distance } of vecResults) {
241
+ if (results.length >= limit) break;
242
+
243
+ const chunk = getChunk.get(rowid);
244
+ if (!chunk) continue;
245
+
246
+ if (folder) {
247
+ const prefix = folder.endsWith("/") ? folder : folder + "/";
248
+ if (!chunk.file_path.startsWith(prefix)) continue;
249
+ }
250
+
251
+ const score = Math.max(0, Math.min(1, 1 - distance / 2));
252
+ if (threshold && score < threshold) continue;
253
+
254
+ if (excludeFiles?.has(chunk.file_path)) continue;
255
+
256
+ if (seenFiles.has(chunk.file_path)) continue;
257
+ seenFiles.add(chunk.file_path);
258
+
259
+ results.push({
260
+ path: chunk.file_path,
261
+ score: Math.round(score * 1000) / 1000,
262
+ preview: chunk.content_preview
263
+ });
264
+ }
265
+
266
+ return results;
267
+ }
268
+
269
+ /**
270
+ * (Re-)index a single file: chunk it, embed it, store in SQLite.
271
+ * @param {string} relativePath - vault-relative file path
272
+ */
273
+ async reindexFile(relativePath) {
274
+ if (!this.db) return;
275
+
276
+ const absPath = path.resolve(this.vaultPath, relativePath);
277
+ let content;
278
+ try {
279
+ content = await fs.readFile(absPath, "utf-8");
280
+ } catch (e) {
281
+ if (e.code === "ENOENT") {
282
+ this.removeFile(relativePath);
283
+ return;
284
+ }
285
+ throw e;
286
+ }
287
+
288
+ const hash = contentHash(content);
289
+ const stat = await fs.stat(absPath);
290
+
291
+ // Check if unchanged
292
+ const existing = this.db.prepare("SELECT content_hash FROM files WHERE path = ?").get(relativePath);
293
+ if (existing && existing.content_hash === hash) return;
294
+
295
+ // Chunk the note
296
+ const chunks = chunkNote(content, relativePath);
297
+ if (chunks.length === 0) {
298
+ // Note has no indexable content — clean up any old chunks
299
+ this.removeFile(relativePath);
300
+ return;
301
+ }
302
+
303
+ // Get embeddings
304
+ const texts = chunks.map(c => c.text);
305
+ let embeddings;
306
+ try {
307
+ embeddings = await getEmbeddings(texts, this.openaiApiKey);
308
+ } catch (e) {
309
+ console.error(`Embedding error for ${relativePath}: ${e.message}`);
310
+ return;
311
+ }
312
+
313
+ // Store in transaction
314
+ const txn = this.db.transaction(() => {
315
+ // Remove old chunks
316
+ const oldChunks = this.db.prepare(
317
+ "SELECT id FROM chunks WHERE file_path = ?"
318
+ ).all(relativePath);
319
+
320
+ if (oldChunks.length > 0) {
321
+ const deleteVec = this.db.prepare("DELETE FROM vec_chunks WHERE rowid = ?");
322
+ const deleteChunk = this.db.prepare("DELETE FROM chunks WHERE id = ?");
323
+ for (const { id } of oldChunks) {
324
+ deleteVec.run(BigInt(id));
325
+ deleteChunk.run(id);
326
+ }
327
+ }
328
+
329
+ // Insert new chunks
330
+ const insertChunk = this.db.prepare(`
331
+ INSERT INTO chunks (file_path, chunk_index, heading, content_preview)
332
+ VALUES (?, ?, ?, ?)
333
+ `);
334
+ const insertVec = this.db.prepare(`
335
+ INSERT INTO vec_chunks (rowid, embedding) VALUES (?, ?)
336
+ `);
337
+
338
+ for (let i = 0; i < chunks.length; i++) {
339
+ const result = insertChunk.run(
340
+ relativePath,
341
+ i,
342
+ chunks[i].heading || null,
343
+ chunks[i].preview
344
+ );
345
+ insertVec.run(BigInt(result.lastInsertRowid), new Float32Array(embeddings[i]));
346
+ }
347
+
348
+ // Update file record
349
+ this.db.prepare(`
350
+ INSERT OR REPLACE INTO files (path, mtime_ms, content_hash, chunk_count, updated_at)
351
+ VALUES (?, ?, ?, ?, ?)
352
+ `).run(
353
+ relativePath,
354
+ Math.floor(stat.mtimeMs),
355
+ hash,
356
+ chunks.length,
357
+ new Date().toISOString()
358
+ );
359
+ });
360
+
361
+ txn();
362
+ }
363
+
364
+ /**
365
+ * Remove all chunks and metadata for a file from the index.
366
+ * @param {string} relativePath - vault-relative file path
367
+ */
368
+ removeFile(relativePath) {
369
+ if (!this.db) return;
370
+
371
+ const txn = this.db.transaction(() => {
372
+ const oldChunks = this.db.prepare(
373
+ "SELECT id FROM chunks WHERE file_path = ?"
374
+ ).all(relativePath);
375
+
376
+ if (oldChunks.length > 0) {
377
+ const deleteVec = this.db.prepare("DELETE FROM vec_chunks WHERE rowid = ?");
378
+ const deleteChunk = this.db.prepare("DELETE FROM chunks WHERE id = ?");
379
+ for (const { id } of oldChunks) {
380
+ deleteVec.run(BigInt(id));
381
+ deleteChunk.run(id);
382
+ }
383
+ }
384
+
385
+ this.db.prepare("DELETE FROM files WHERE path = ?").run(relativePath);
386
+ });
387
+
388
+ txn();
389
+ }
390
+
391
+ // --- Private methods ---
392
+
393
+ async _startupSync() {
394
+ this._syncState.syncing = true;
395
+
396
+ try {
397
+ // Get all vault .md files
398
+ const vaultFiles = await getAllMarkdownFiles(this.vaultPath);
399
+
400
+ // Get all indexed files
401
+ const indexedFiles = new Map();
402
+ for (const row of this.db.prepare("SELECT path, mtime_ms, content_hash FROM files").all()) {
403
+ indexedFiles.set(row.path, row);
404
+ }
405
+
406
+ // Find files needing reindex
407
+ const toReindex = [];
408
+ const vaultFileSet = new Set(vaultFiles);
409
+
410
+ for (const relPath of vaultFiles) {
411
+ const absPath = path.resolve(this.vaultPath, relPath);
412
+ try {
413
+ const stat = await fs.stat(absPath);
414
+ const indexed = indexedFiles.get(relPath);
415
+ if (!indexed || Math.floor(stat.mtimeMs) !== indexed.mtime_ms) {
416
+ toReindex.push(relPath);
417
+ }
418
+ } catch {
419
+ // File disappeared between listing and stat
420
+ }
421
+ }
422
+
423
+ // Find deleted files
424
+ for (const indexedPath of indexedFiles.keys()) {
425
+ if (!vaultFileSet.has(indexedPath)) {
426
+ this.removeFile(indexedPath);
427
+ }
428
+ }
429
+
430
+ if (toReindex.length === 0) {
431
+ console.error("Semantic index: up to date");
432
+ return;
433
+ }
434
+
435
+ this._syncState.total = toReindex.length;
436
+ this._syncState.done = 0;
437
+ console.error(`Semantic index: syncing ${toReindex.length} files...`);
438
+
439
+ // Process in batches
440
+ for (let i = 0; i < toReindex.length; i += REINDEX_BATCH_SIZE) {
441
+ if (this._abortController?.signal.aborted) {
442
+ console.error("Semantic index: startup sync aborted");
443
+ break;
444
+ }
445
+ const batch = toReindex.slice(i, i + REINDEX_BATCH_SIZE);
446
+ const results = await Promise.allSettled(batch.map(f => this._trackedReindex(f)));
447
+ const failures = results.filter(r => r.status === "rejected");
448
+ if (failures.length > 0) {
449
+ console.error(`Semantic index: ${failures.length} files failed in batch`);
450
+ }
451
+ this._syncState.done += batch.length;
452
+ console.error(`Semantic index: syncing ${this._syncState.done}/${this._syncState.total} files...`);
453
+ }
454
+
455
+ console.error(`Semantic index: sync complete (${toReindex.length} files updated)`);
456
+ } finally {
457
+ this._syncState.syncing = false;
458
+ }
459
+ }
460
+
461
+ _startWatcher() {
462
+ try {
463
+ this.watcher = fsSync.watch(this.vaultPath, { recursive: true }, (eventType, filename) => {
464
+ if (!filename) return;
465
+ if (!filename.endsWith(".md")) return;
466
+ if (this._syncState.syncing) return;
467
+
468
+ // Ignore dotfiles/dot directories
469
+ const parts = filename.split(path.sep);
470
+ if (parts.some(p => p.startsWith("."))) return;
471
+
472
+ // Normalize to forward slashes (consistent with vault paths)
473
+ const relativePath = filename.split(path.sep).join("/");
474
+
475
+ // Debounce per file
476
+ if (this._debounceTimers.has(relativePath)) {
477
+ clearTimeout(this._debounceTimers.get(relativePath));
478
+ this._debounceTimers.delete(relativePath);
479
+ }
480
+
481
+ const timer = setTimeout(async () => {
482
+ this._debounceTimers.delete(relativePath);
483
+ try {
484
+ // Check if file still exists
485
+ await fs.access(path.resolve(this.vaultPath, relativePath));
486
+ await this._trackedReindex(relativePath);
487
+ } catch (e) {
488
+ if (e.code === "ENOENT") {
489
+ this.removeFile(relativePath);
490
+ } else {
491
+ console.error(`Watcher reindex error for ${relativePath}: ${e.message}`);
492
+ }
493
+ }
494
+ }, DEBOUNCE_MS);
495
+
496
+ this._debounceTimers.set(relativePath, timer);
497
+ });
498
+
499
+ this.watcher.on("error", (err) => {
500
+ console.error(`File watcher error: ${err.message}. Stopping watcher.`);
501
+ if (this.watcher) {
502
+ this.watcher.close();
503
+ this.watcher = null;
504
+ }
505
+ });
506
+ } catch (err) {
507
+ console.error(`Could not start file watcher: ${err.message}`);
508
+ }
509
+ }
510
+ }
511
+
512
+ // --- Module-level helpers ---
513
+
514
+ function chunkNote(content, filePath) {
515
+ // Strip frontmatter
516
+ let body = content;
517
+ if (content.startsWith("---")) {
518
+ const endIndex = content.indexOf("\n---", 3);
519
+ if (endIndex !== -1) {
520
+ body = content.slice(endIndex + 4).trim();
521
+ }
522
+ }
523
+
524
+ if (!body) return [];
525
+
526
+ // Derive title from file path
527
+ const title = path.basename(filePath, ".md");
528
+
529
+ // Short note: single chunk
530
+ if (body.length <= MAX_CHARS_PER_CHUNK) {
531
+ return [{
532
+ text: `# ${title}\n\n${body}`,
533
+ heading: null,
534
+ preview: getPreview(body)
535
+ }];
536
+ }
537
+
538
+ // Long note: split at ## headings
539
+ const sections = splitByHeadings(body);
540
+ const chunks = [];
541
+
542
+ for (const section of sections) {
543
+ const sectionText = section.text.trim();
544
+ if (!sectionText) continue;
545
+
546
+ if (sectionText.length <= MAX_CHARS_PER_CHUNK) {
547
+ chunks.push({
548
+ text: `# ${title}\n\n${sectionText}`,
549
+ heading: section.heading,
550
+ preview: getPreview(sectionText)
551
+ });
552
+ } else {
553
+ // Further split at paragraph breaks
554
+ const paragraphChunks = splitByParagraphs(sectionText, MAX_CHARS_PER_CHUNK);
555
+ for (let i = 0; i < paragraphChunks.length; i++) {
556
+ chunks.push({
557
+ text: `# ${title}\n\n${paragraphChunks[i]}`,
558
+ heading: section.heading ? `${section.heading} (${i + 1})` : null,
559
+ preview: getPreview(paragraphChunks[i])
560
+ });
561
+ }
562
+ }
563
+ }
564
+
565
+ return chunks;
566
+ }
567
+
568
+ function splitByHeadings(body) {
569
+ const lines = body.split("\n");
570
+ const sections = [];
571
+ let currentHeading = null;
572
+ let currentLines = [];
573
+
574
+ for (const line of lines) {
575
+ if (line.startsWith("## ")) {
576
+ if (currentLines.length > 0) {
577
+ sections.push({ heading: currentHeading, text: currentLines.join("\n") });
578
+ }
579
+ currentHeading = line.replace(/^##\s+/, "");
580
+ currentLines = [line];
581
+ } else {
582
+ currentLines.push(line);
583
+ }
584
+ }
585
+
586
+ if (currentLines.length > 0) {
587
+ sections.push({ heading: currentHeading, text: currentLines.join("\n") });
588
+ }
589
+
590
+ return sections;
591
+ }
592
+
593
+ function splitByParagraphs(text, maxChars) {
594
+ const paragraphs = text.split(/\n\n+/);
595
+ const chunks = [];
596
+ let current = "";
597
+
598
+ for (const para of paragraphs) {
599
+ if (current && (current.length + para.length + 2) > maxChars) {
600
+ chunks.push(current);
601
+ current = para;
602
+ } else {
603
+ current = current ? current + "\n\n" + para : para;
604
+ }
605
+ }
606
+
607
+ if (current) chunks.push(current);
608
+ return chunks;
609
+ }
610
+
611
+ function getPreview(text) {
612
+ // Strip markdown heading markers for preview
613
+ const cleaned = text.replace(/^#+\s+/gm, "").trim();
614
+ const words = cleaned.split(/\s+/);
615
+ const preview = words.slice(0, 100).join(" ");
616
+ return preview.length < cleaned.length ? preview + "..." : preview;
617
+ }
618
+
619
+ async function getEmbeddings(texts, apiKey) {
620
+ const allEmbeddings = [];
621
+
622
+ for (let i = 0; i < texts.length; i += BATCH_SIZE) {
623
+ const batch = texts.slice(i, i + BATCH_SIZE);
624
+ const embeddings = await callEmbeddingAPI(batch, apiKey);
625
+ allEmbeddings.push(...embeddings);
626
+ }
627
+
628
+ return allEmbeddings;
629
+ }
630
+
631
+ async function callEmbeddingAPI(texts, apiKey, retries = 3) {
632
+ for (let attempt = 0; attempt < retries; attempt++) {
633
+ const response = await fetch("https://api.openai.com/v1/embeddings", {
634
+ method: "POST",
635
+ headers: {
636
+ "Authorization": `Bearer ${apiKey}`,
637
+ "Content-Type": "application/json"
638
+ },
639
+ body: JSON.stringify({
640
+ model: EMBEDDING_MODEL,
641
+ input: texts
642
+ }),
643
+ signal: AbortSignal.timeout(30000)
644
+ });
645
+
646
+ if (response.ok) {
647
+ const data = await response.json();
648
+ // Sort by index to maintain order
649
+ return data.data
650
+ .sort((a, b) => a.index - b.index)
651
+ .map(d => d.embedding);
652
+ }
653
+
654
+ if (response.status === 429 && attempt < retries - 1) {
655
+ // Rate limited — exponential backoff
656
+ const delay = Math.pow(2, attempt) * 1000;
657
+ console.error(`Rate limited, retrying in ${delay}ms...`);
658
+ await new Promise(resolve => setTimeout(resolve, delay));
659
+ continue;
660
+ }
661
+
662
+ const errBody = await response.text();
663
+ throw new Error(`OpenAI API error (${response.status}): ${errBody}`);
664
+ }
665
+ }
666
+
667
+ function contentHash(text) {
668
+ return crypto.createHash("sha256").update(text).digest("hex");
669
+ }
670
+
671
+ export { chunkNote, splitByHeadings, splitByParagraphs, getPreview, contentHash };
672
+