@voidwire/lore 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli.ts CHANGED
@@ -36,6 +36,7 @@ import {
36
36
  captureNote,
37
37
  captureTeaching,
38
38
  captureObservation,
39
+ indexAndEmbed,
39
40
  semanticSearch,
40
41
  formatBriefSearch,
41
42
  hasEmbeddings,
@@ -522,7 +523,7 @@ Examples:
522
523
  // Capture Command
523
524
  // ============================================================================
524
525
 
525
- function handleCaptureTask(args: string[]): void {
526
+ async function handleCaptureTask(args: string[]): Promise<void> {
526
527
  const parsed = parseArgs(args);
527
528
 
528
529
  const required = ["topic", "name", "problem", "solution"];
@@ -546,18 +547,26 @@ function handleCaptureTask(args: string[]): void {
546
547
  };
547
548
 
548
549
  const result = captureTask(input);
549
- output(result);
550
550
 
551
- if (result.success) {
552
- console.error("✅ Task logged");
553
- process.exit(0);
551
+ if (result.success && result.event) {
552
+ try {
553
+ await indexAndEmbed([result.event]);
554
+ output(result);
555
+ console.error("✅ Task logged and indexed");
556
+ process.exit(0);
557
+ } catch (error) {
558
+ output(result);
559
+ console.error(`✅ Task logged (indexing failed: ${error})`);
560
+ process.exit(0);
561
+ }
554
562
  } else {
563
+ output(result);
555
564
  console.error(`❌ ${result.error}`);
556
565
  process.exit(2);
557
566
  }
558
567
  }
559
568
 
560
- function handleCaptureKnowledge(args: string[]): void {
569
+ async function handleCaptureKnowledge(args: string[]): Promise<void> {
561
570
  const parsed = parseArgs(args);
562
571
 
563
572
  const required = ["topic", "text", "subtype"];
@@ -573,18 +582,26 @@ function handleCaptureKnowledge(args: string[]): void {
573
582
  };
574
583
 
575
584
  const result = captureKnowledge(input);
576
- output(result);
577
585
 
578
- if (result.success) {
579
- console.error("✅ Knowledge logged");
580
- process.exit(0);
586
+ if (result.success && result.event) {
587
+ try {
588
+ await indexAndEmbed([result.event]);
589
+ output(result);
590
+ console.error("✅ Knowledge logged and indexed");
591
+ process.exit(0);
592
+ } catch (error) {
593
+ output(result);
594
+ console.error(`✅ Knowledge logged (indexing failed: ${error})`);
595
+ process.exit(0);
596
+ }
581
597
  } else {
598
+ output(result);
582
599
  console.error(`❌ ${result.error}`);
583
600
  process.exit(1);
584
601
  }
585
602
  }
586
603
 
587
- function handleCaptureNote(args: string[]): void {
604
+ async function handleCaptureNote(args: string[]): Promise<void> {
588
605
  const parsed = parseArgs(args);
589
606
 
590
607
  if (!parsed.has("text")) {
@@ -598,18 +615,26 @@ function handleCaptureNote(args: string[]): void {
598
615
  };
599
616
 
600
617
  const result = captureNote(input);
601
- output(result);
602
618
 
603
- if (result.success) {
604
- console.error("✅ Note logged");
605
- process.exit(0);
619
+ if (result.success && result.event) {
620
+ try {
621
+ await indexAndEmbed([result.event]);
622
+ output(result);
623
+ console.error("✅ Note logged and indexed");
624
+ process.exit(0);
625
+ } catch (error) {
626
+ output(result);
627
+ console.error(`✅ Note logged (indexing failed: ${error})`);
628
+ process.exit(0);
629
+ }
606
630
  } else {
631
+ output(result);
607
632
  console.error(`❌ ${result.error}`);
608
633
  process.exit(2);
609
634
  }
610
635
  }
611
636
 
612
- function handleCaptureTeaching(args: string[]): void {
637
+ async function handleCaptureTeaching(args: string[]): Promise<void> {
613
638
  const parsed = parseArgs(args);
614
639
 
615
640
  const required = ["topic", "confidence", "text"];
@@ -626,18 +651,26 @@ function handleCaptureTeaching(args: string[]): void {
626
651
  };
627
652
 
628
653
  const result = captureTeaching(input);
629
- output(result);
630
654
 
631
- if (result.success) {
632
- console.error("✅ Teaching logged");
633
- process.exit(0);
655
+ if (result.success && result.event) {
656
+ try {
657
+ await indexAndEmbed([result.event]);
658
+ output(result);
659
+ console.error("✅ Teaching logged and indexed");
660
+ process.exit(0);
661
+ } catch (error) {
662
+ output(result);
663
+ console.error(`✅ Teaching logged (indexing failed: ${error})`);
664
+ process.exit(0);
665
+ }
634
666
  } else {
667
+ output(result);
635
668
  console.error(`❌ ${result.error}`);
636
669
  process.exit(2);
637
670
  }
638
671
  }
639
672
 
640
- function handleCaptureObservation(args: string[]): void {
673
+ async function handleCaptureObservation(args: string[]): Promise<void> {
641
674
  const parsed = parseArgs(args);
642
675
 
643
676
  const required = ["topic", "subtype", "confidence", "text"];
@@ -655,18 +688,26 @@ function handleCaptureObservation(args: string[]): void {
655
688
  };
656
689
 
657
690
  const result = captureObservation(input);
658
- output(result);
659
691
 
660
- if (result.success) {
661
- console.error("✅ Observation logged");
662
- process.exit(0);
692
+ if (result.success && result.event) {
693
+ try {
694
+ await indexAndEmbed([result.event]);
695
+ output(result);
696
+ console.error("✅ Observation logged and indexed");
697
+ process.exit(0);
698
+ } catch (error) {
699
+ output(result);
700
+ console.error(`✅ Observation logged (indexing failed: ${error})`);
701
+ process.exit(0);
702
+ }
663
703
  } else {
704
+ output(result);
664
705
  console.error(`❌ ${result.error}`);
665
706
  process.exit(2);
666
707
  }
667
708
  }
668
709
 
669
- function handleCapture(args: string[]): void {
710
+ async function handleCapture(args: string[]): Promise<void> {
670
711
  if (hasFlag(args, "help")) {
671
712
  showCaptureHelp();
672
713
  }
@@ -682,19 +723,19 @@ function handleCapture(args: string[]): void {
682
723
 
683
724
  switch (captureType) {
684
725
  case "task":
685
- handleCaptureTask(captureArgs);
726
+ await handleCaptureTask(captureArgs);
686
727
  break;
687
728
  case "knowledge":
688
- handleCaptureKnowledge(captureArgs);
729
+ await handleCaptureKnowledge(captureArgs);
689
730
  break;
690
731
  case "note":
691
- handleCaptureNote(captureArgs);
732
+ await handleCaptureNote(captureArgs);
692
733
  break;
693
734
  case "teaching":
694
- handleCaptureTeaching(captureArgs);
735
+ await handleCaptureTeaching(captureArgs);
695
736
  break;
696
737
  case "observation":
697
- handleCaptureObservation(captureArgs);
738
+ await handleCaptureObservation(captureArgs);
698
739
  break;
699
740
  default:
700
741
  fail(
@@ -1052,7 +1093,7 @@ Examples:
1052
1093
  process.exit(0);
1053
1094
  }
1054
1095
 
1055
- function main(): void {
1096
+ async function main(): Promise<void> {
1056
1097
  const args = process.argv.slice(2);
1057
1098
 
1058
1099
  // Show global help only when no args or help is first arg
@@ -1065,7 +1106,7 @@ function main(): void {
1065
1106
 
1066
1107
  switch (command) {
1067
1108
  case "search":
1068
- handleSearch(commandArgs);
1109
+ await handleSearch(commandArgs);
1069
1110
  break;
1070
1111
  case "list":
1071
1112
  handleList(commandArgs);
@@ -1083,7 +1124,7 @@ function main(): void {
1083
1124
  handleAbout(commandArgs);
1084
1125
  break;
1085
1126
  case "capture":
1086
- handleCapture(commandArgs);
1127
+ await handleCapture(commandArgs);
1087
1128
  break;
1088
1129
  default:
1089
1130
  fail(
package/index.ts CHANGED
@@ -93,3 +93,6 @@ export {
93
93
  type SemanticResult,
94
94
  type SemanticSearchOptions,
95
95
  } from "./lib/semantic";
96
+
97
+ // Real-time indexing
98
+ export { indexAndEmbed } from "./lib/realtime";
package/lib/cache.ts ADDED
@@ -0,0 +1,86 @@
1
+ /**
2
+ * lib/cache.ts - Embedding cache utilities
3
+ *
4
+ * Hash-based caching to avoid re-embedding unchanged content.
5
+ * Used by real-time indexing and batch lore-embed-all.
6
+ */
7
+
8
+ import { createHash } from "crypto";
9
+ import type { Database } from "bun:sqlite";
10
+
11
+ /**
12
+ * Generate SHA256 hash of content for cache lookup
13
+ */
14
+ export function hashContent(content: string): string {
15
+ return createHash("sha256").update(content).digest("hex");
16
+ }
17
+
18
+ /**
19
+ * Get cached embedding by content hash
20
+ * @returns embedding array or null if not cached
21
+ */
22
+ export function getCachedEmbedding(
23
+ db: Database,
24
+ hash: string,
25
+ ): number[] | null {
26
+ const stmt = db.prepare(
27
+ "SELECT embedding FROM embedding_cache WHERE hash = ?",
28
+ );
29
+ const row = stmt.get(hash) as { embedding: Uint8Array } | null;
30
+
31
+ if (!row) {
32
+ return null;
33
+ }
34
+
35
+ // Convert blob back to number array
36
+ const float32 = new Float32Array(row.embedding.buffer);
37
+ return Array.from(float32);
38
+ }
39
+
40
+ /**
41
+ * Store embedding in cache
42
+ */
43
+ export function cacheEmbedding(
44
+ db: Database,
45
+ hash: string,
46
+ embedding: number[],
47
+ model: string,
48
+ ): void {
49
+ // Serialize embedding to blob
50
+ const buffer = new Float32Array(embedding);
51
+ const blob = new Uint8Array(buffer.buffer);
52
+
53
+ const stmt = db.prepare(`
54
+ INSERT OR REPLACE INTO embedding_cache (hash, embedding, model, dims, created_at)
55
+ VALUES (?, ?, ?, ?, ?)
56
+ `);
57
+
58
+ stmt.run(hash, blob, model, embedding.length, Date.now());
59
+ }
60
+
61
+ /**
62
+ * Check if embedding exists in cache (without retrieving it)
63
+ */
64
+ export function hasEmbeddingCached(db: Database, hash: string): boolean {
65
+ const stmt = db.prepare(
66
+ "SELECT 1 FROM embedding_cache WHERE hash = ? LIMIT 1",
67
+ );
68
+ return stmt.get(hash) !== null;
69
+ }
70
+
71
+ /**
72
+ * Batch check which hashes are missing from cache
73
+ * @returns array of hashes that need embedding
74
+ */
75
+ export function getMissingHashes(db: Database, hashes: string[]): string[] {
76
+ if (hashes.length === 0) return [];
77
+
78
+ const placeholders = hashes.map(() => "?").join(",");
79
+ const stmt = db.prepare(
80
+ `SELECT hash FROM embedding_cache WHERE hash IN (${placeholders})`,
81
+ );
82
+ const rows = stmt.all(...hashes) as { hash: string }[];
83
+
84
+ const cachedSet = new Set(rows.map((r) => r.hash));
85
+ return hashes.filter((h) => !cachedSet.has(h));
86
+ }
package/lib/capture.ts CHANGED
@@ -12,6 +12,7 @@ import { homedir } from "os";
12
12
  export interface CaptureResult {
13
13
  success: boolean;
14
14
  error?: string;
15
+ event?: CaptureEvent;
15
16
  [key: string]: unknown;
16
17
  }
17
18
 
@@ -220,12 +221,15 @@ function writeEvent(event: CaptureEvent): CaptureResult {
220
221
  ensureLogDirectory();
221
222
 
222
223
  const logPath = getLogPath();
223
- const eventWithTimestamp = { ...event, timestamp: getTimestamp() };
224
+ const eventWithTimestamp = {
225
+ ...event,
226
+ timestamp: getTimestamp(),
227
+ } as CaptureEvent;
224
228
  const jsonLine = JSON.stringify(eventWithTimestamp) + "\n";
225
229
 
226
230
  try {
227
231
  appendFileSync(logPath, jsonLine, "utf8");
228
- return { success: true };
232
+ return { success: true, event: eventWithTimestamp };
229
233
  } catch (error) {
230
234
  return {
231
235
  success: false,
package/lib/db.ts ADDED
@@ -0,0 +1,64 @@
1
+ /**
2
+ * lib/db.ts - Shared database utilities
3
+ *
4
+ * Centralizes SQLite setup and database access for both
5
+ * semantic search and real-time indexing.
6
+ */
7
+
8
+ import { Database } from "bun:sqlite";
9
+ import { existsSync } from "fs";
10
+ import { homedir } from "os";
11
+
12
+ // Use Homebrew SQLite on macOS to enable extension loading
13
+ // Must be called before any Database instances are created
14
+ const HOMEBREW_SQLITE = "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib";
15
+ if (existsSync(HOMEBREW_SQLITE)) {
16
+ Database.setCustomSQLite(HOMEBREW_SQLITE);
17
+ }
18
+
19
+ /**
20
+ * Get the path to the lore database
21
+ */
22
+ export function getDatabasePath(): string {
23
+ return `${homedir()}/.local/share/lore/lore.db`;
24
+ }
25
+
26
+ /**
27
+ * Open the lore database with sqlite-vec extension loaded
28
+ * @param readonly - Open in readonly mode (default: false)
29
+ */
30
+ export function openDatabase(readonly = false): Database {
31
+ const dbPath = getDatabasePath();
32
+
33
+ if (!existsSync(dbPath)) {
34
+ throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
35
+ }
36
+
37
+ const db = new Database(dbPath, { readonly });
38
+
39
+ // Load sqlite-vec extension
40
+ const vecPath = process.env.SQLITE_VEC_PATH;
41
+ if (!vecPath) {
42
+ throw new Error(
43
+ 'SQLITE_VEC_PATH not set. Get path with: python3 -c "import sqlite_vec; print(sqlite_vec.loadable_path())"',
44
+ );
45
+ }
46
+
47
+ db.loadExtension(vecPath);
48
+
49
+ return db;
50
+ }
51
+
52
+ /**
53
+ * Open the lore database without sqlite-vec (for FTS5-only operations)
54
+ * @param readonly - Open in readonly mode (default: false)
55
+ */
56
+ export function openDatabaseBasic(readonly = false): Database {
57
+ const dbPath = getDatabasePath();
58
+
59
+ if (!existsSync(dbPath)) {
60
+ throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
61
+ }
62
+
63
+ return new Database(dbPath, { readonly });
64
+ }
package/lib/list.ts CHANGED
@@ -71,7 +71,7 @@ const PROJECT_FIELD: Record<string, string> = {
71
71
  commits: "project",
72
72
  sessions: "project",
73
73
  tasks: "project",
74
- insights: "project",
74
+ insights: "topic",
75
75
  captures: "topic",
76
76
  teachings: "topic",
77
77
  learnings: "topic",
@@ -161,23 +161,28 @@ function queryPersonalType(
161
161
  type: string,
162
162
  limit?: number,
163
163
  ): ListEntry[] {
164
- // Query personal source, then filter by type in metadata
165
- const sql = limit
166
- ? `SELECT title, content, metadata FROM search WHERE source = 'personal' LIMIT ?`
167
- : `SELECT title, content, metadata FROM search WHERE source = 'personal'`;
164
+ // Filter by type in SQL, not JS - avoids LIMIT truncation bug
165
+ let sql = `
166
+ SELECT title, content, metadata FROM search
167
+ WHERE source = 'personal'
168
+ AND json_extract(metadata, '$.type') = ?
169
+ ORDER BY json_extract(metadata, '$.timestamp') DESC
170
+ `;
171
+ const params: (string | number)[] = [type];
168
172
 
169
- const stmt = db.prepare(sql);
170
- const rows = (limit ? stmt.all(limit * 10) : stmt.all()) as RawRow[]; // Over-fetch for filtering
173
+ if (limit) {
174
+ sql += " LIMIT ?";
175
+ params.push(limit);
176
+ }
171
177
 
172
- const filtered = rows
173
- .map((row) => ({
174
- title: row.title,
175
- content: row.content,
176
- metadata: JSON.parse(row.metadata || "{}"),
177
- }))
178
- .filter((entry) => entry.metadata.type === type);
178
+ const stmt = db.prepare(sql);
179
+ const rows = stmt.all(...params) as RawRow[];
179
180
 
180
- return limit ? filtered.slice(0, limit) : filtered;
181
+ return rows.map((row) => ({
182
+ title: row.title,
183
+ content: row.content,
184
+ metadata: JSON.parse(row.metadata || "{}"),
185
+ }));
181
186
  }
182
187
 
183
188
  /**
package/lib/projects.ts CHANGED
@@ -14,7 +14,7 @@ const PROJECT_FIELD: Record<string, string> = {
14
14
  commits: "project",
15
15
  sessions: "project",
16
16
  tasks: "project",
17
- insights: "project",
17
+ insights: "topic",
18
18
  captures: "topic",
19
19
  teachings: "topic",
20
20
  learnings: "topic",
@@ -0,0 +1,265 @@
1
+ /**
2
+ * lib/realtime.ts - Real-time indexing for captures
3
+ *
4
+ * Makes captures immediately searchable (keyword + semantic) without
5
+ * waiting for batch indexers.
6
+ *
7
+ * Usage:
8
+ * // CLI - single capture
9
+ * const event = captureKnowledge(input);
10
+ * await indexAndEmbed([event]);
11
+ *
12
+ * // Hook - batch captures
13
+ * const events = captures.map(cap => captureKnowledge(cap));
14
+ * await indexAndEmbed(events);
15
+ */
16
+
17
+ import { Database } from "bun:sqlite";
18
+ import { existsSync } from "fs";
19
+ import {
20
+ embedDocuments,
21
+ getDatabasePath,
22
+ MODEL_NAME,
23
+ EMBEDDING_DIM,
24
+ serializeEmbedding,
25
+ } from "./semantic.js";
26
+ import {
27
+ hashContent,
28
+ getCachedEmbedding,
29
+ cacheEmbedding,
30
+ getMissingHashes,
31
+ } from "./cache.js";
32
+ import type { CaptureEvent } from "./capture.js";
33
+
34
+ /**
35
+ * Index and embed capture events for immediate searchability
36
+ *
37
+ * 1. Insert into FTS5 search table (instant keyword search)
38
+ * 2. Generate embeddings with cache (instant semantic search)
39
+ * 3. Insert into embeddings table
40
+ */
41
+ export async function indexAndEmbed(events: CaptureEvent[]): Promise<void> {
42
+ if (events.length === 0) return;
43
+
44
+ const dbPath = getDatabasePath();
45
+ if (!existsSync(dbPath)) {
46
+ throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
47
+ }
48
+
49
+ const db = new Database(dbPath);
50
+
51
+ try {
52
+ // Load sqlite-vec extension for embeddings table
53
+ const vecPath = process.env.SQLITE_VEC_PATH;
54
+ if (!vecPath) {
55
+ throw new Error(
56
+ 'SQLITE_VEC_PATH not set. Get path with: python3 -c "import sqlite_vec; print(sqlite_vec.loadable_path())"',
57
+ );
58
+ }
59
+ db.loadExtension(vecPath);
60
+
61
+ // 1. Insert into FTS5 and collect doc IDs
62
+ const docIds: number[] = [];
63
+ for (const event of events) {
64
+ const docId = insertSearchEntry(db, event);
65
+ docIds.push(docId);
66
+ }
67
+
68
+ // 2. Generate embeddings with cache
69
+ const contents = events.map((e) => getContentForEmbedding(e));
70
+ const embeddings = await embedWithCache(db, contents);
71
+
72
+ // 3. Insert embeddings
73
+ for (let i = 0; i < events.length; i++) {
74
+ insertEmbedding(db, docIds[i], embeddings[i], events[i]);
75
+ }
76
+ } finally {
77
+ db.close();
78
+ }
79
+ }
80
+
81
+ /**
82
+ * Insert event into FTS5 search table
83
+ * @returns rowid of inserted entry (used as doc_id for embeddings)
84
+ */
85
+ function insertSearchEntry(db: Database, event: CaptureEvent): number {
86
+ const source = getSourceForEvent(event);
87
+ const title = buildTitle(event);
88
+ const content = getContentForEmbedding(event);
89
+ const metadata = buildMetadata(event);
90
+
91
+ const stmt = db.prepare(`
92
+ INSERT INTO search (source, title, content, metadata)
93
+ VALUES (?, ?, ?, ?)
94
+ `);
95
+
96
+ const result = stmt.run(source, title, content, metadata);
97
+ return Number(result.lastInsertRowid);
98
+ }
99
+
100
+ /**
101
+ * Map event type to source name used in search table
102
+ */
103
+ function getSourceForEvent(event: CaptureEvent): string {
104
+ switch (event.type) {
105
+ case "knowledge":
106
+ return "captures";
107
+ case "teaching":
108
+ return "teachings";
109
+ case "observation":
110
+ return "observations";
111
+ case "insight":
112
+ return "insights";
113
+ case "learning":
114
+ return "learnings";
115
+ case "task":
116
+ return "tasks";
117
+ case "note":
118
+ return "captures";
119
+ default:
120
+ return "captures";
121
+ }
122
+ }
123
+
124
+ /**
125
+ * Build title for FTS5 entry, matching existing indexer format
126
+ */
127
+ function buildTitle(event: CaptureEvent): string {
128
+ const data = event.data as Record<string, unknown>;
129
+
130
+ switch (event.type) {
131
+ case "knowledge":
132
+ return `[${data.subtype || "knowledge"}] ${data.topic || "general"}`;
133
+ case "teaching":
134
+ return `[${data.topic || "general"}] (${data.confidence || "medium"})`;
135
+ case "observation":
136
+ return `[${data.subtype || "pattern"}] ${data.topic || "general"}`;
137
+ case "insight":
138
+ return `[${data.subtype || "insight"}] ${data.topic || "general"}`;
139
+ case "learning":
140
+ return `[learning] ${data.topic || "general"}`;
141
+ case "task":
142
+ return `[task] ${data.topic || "general"}: ${data.name || "untitled"}`;
143
+ case "note":
144
+ return `[note] ${data.topic || "general"}`;
145
+ }
146
+ }
147
+
148
+ /**
149
+ * Extract content for embedding from event
150
+ */
151
+ function getContentForEmbedding(event: CaptureEvent): string {
152
+ const data = event.data as Record<string, unknown>;
153
+ return String(data.content || data.text || "");
154
+ }
155
+
156
+ /**
157
+ * Build metadata JSON matching existing indexer format
158
+ */
159
+ function buildMetadata(event: CaptureEvent): string {
160
+ const data = event.data as Record<string, unknown>;
161
+ const timestamp = event.timestamp;
162
+ const date = timestamp ? timestamp.substring(0, 10) : "";
163
+
164
+ const metadata: Record<string, unknown> = {
165
+ topic: data.topic || "general",
166
+ timestamp,
167
+ date,
168
+ content: getContentForEmbedding(event),
169
+ };
170
+
171
+ // Add type-specific fields
172
+ switch (event.type) {
173
+ case "knowledge":
174
+ metadata.subtype = data.subtype;
175
+ break;
176
+ case "teaching":
177
+ metadata.confidence = data.confidence;
178
+ metadata.capture_source = data.source || "manual";
179
+ break;
180
+ case "observation":
181
+ metadata.subtype = data.subtype;
182
+ metadata.confidence = data.confidence;
183
+ metadata.capture_source = data.source || "auto";
184
+ break;
185
+ case "insight":
186
+ metadata.subtype = data.subtype;
187
+ metadata.session_id = data.session_id;
188
+ break;
189
+ case "learning":
190
+ metadata.persona = data.persona;
191
+ break;
192
+ case "task":
193
+ metadata.name = data.name;
194
+ metadata.problem = data.problem;
195
+ metadata.solution = data.solution;
196
+ break;
197
+ case "note":
198
+ metadata.tags = data.tags;
199
+ break;
200
+ }
201
+
202
+ return JSON.stringify(metadata);
203
+ }
204
+
205
+ /**
206
+ * Embed contents with cache lookup
207
+ * Only generates embeddings for cache misses
208
+ */
209
+ async function embedWithCache(
210
+ db: Database,
211
+ contents: string[],
212
+ ): Promise<number[][]> {
213
+ const results: (number[] | null)[] = new Array(contents.length).fill(null);
214
+ const toEmbed: { idx: number; content: string }[] = [];
215
+
216
+ // Check cache for each content
217
+ const hashes = contents.map((c) => hashContent(c));
218
+
219
+ for (let i = 0; i < contents.length; i++) {
220
+ const cached = getCachedEmbedding(db, hashes[i]);
221
+ if (cached) {
222
+ results[i] = cached;
223
+ } else {
224
+ toEmbed.push({ idx: i, content: contents[i] });
225
+ }
226
+ }
227
+
228
+ // Embed cache misses
229
+ if (toEmbed.length > 0) {
230
+ const embeddings = await embedDocuments(toEmbed.map((t) => t.content));
231
+
232
+ for (let i = 0; i < toEmbed.length; i++) {
233
+ const { idx, content } = toEmbed[i];
234
+ const embedding = embeddings[i];
235
+
236
+ results[idx] = embedding;
237
+ cacheEmbedding(db, hashContent(content), embedding, MODEL_NAME);
238
+ }
239
+ }
240
+
241
+ return results as number[][];
242
+ }
243
+
244
+ /**
245
+ * Insert embedding into vec0 table
246
+ */
247
+ function insertEmbedding(
248
+ db: Database,
249
+ docId: number,
250
+ embedding: number[],
251
+ event: CaptureEvent,
252
+ ): void {
253
+ const source = getSourceForEvent(event);
254
+ const data = event.data as Record<string, unknown>;
255
+ const topic = String(data.topic || "");
256
+
257
+ const embeddingBlob = serializeEmbedding(embedding);
258
+
259
+ const stmt = db.prepare(`
260
+ INSERT INTO embeddings (doc_id, chunk_idx, source, topic, embedding)
261
+ VALUES (?, 0, ?, ?, ?)
262
+ `);
263
+
264
+ stmt.run(docId, source, topic, embeddingBlob);
265
+ }
package/lib/search.ts CHANGED
@@ -27,6 +27,20 @@ function getDatabasePath(): string {
27
27
  return `${homedir()}/.local/share/lore/lore.db`;
28
28
  }
29
29
 
30
+ /**
31
+ * Escape a query for safe FTS5 MATCH
32
+ * Wraps terms in double quotes to prevent FTS5 syntax interpretation
33
+ * (e.g., "real-time" being parsed as column:term)
34
+ */
35
+ function escapeFts5Query(query: string): string {
36
+ // Split on whitespace, wrap each term in quotes, rejoin
37
+ return query
38
+ .split(/\s+/)
39
+ .filter(Boolean)
40
+ .map((term) => `"${term.replace(/"/g, '""')}"`)
41
+ .join(" ");
42
+ }
43
+
30
44
  /**
31
45
  * Search the Lore FTS5 database
32
46
  *
@@ -51,7 +65,7 @@ export function search(
51
65
  const limit = options.limit ?? 20;
52
66
 
53
67
  const conditions: string[] = ["search MATCH ?"];
54
- const params: (string | number)[] = [query];
68
+ const params: (string | number)[] = [escapeFts5Query(query)];
55
69
 
56
70
  if (options.source) {
57
71
  conditions.push("source = ?");
package/lib/semantic.ts CHANGED
@@ -4,22 +4,12 @@
4
4
  * Query embedding using @huggingface/transformers with nomic-embed-text-v1.5.
5
5
  * KNN search against sqlite-vec virtual table.
6
6
  * Uses Bun's built-in SQLite with sqlite-vec extension.
7
- *
8
- * Note: macOS ships Apple's SQLite which disables extension loading.
9
- * We use Homebrew's SQLite via setCustomSQLite() to enable sqlite-vec.
10
7
  */
11
8
 
12
9
  import { Database } from "bun:sqlite";
13
- import { homedir } from "os";
14
10
  import { existsSync } from "fs";
15
11
  import { pipeline } from "@huggingface/transformers";
16
-
17
- // Use Homebrew SQLite on macOS to enable extension loading
18
- // Must be called before any Database instances are created
19
- const HOMEBREW_SQLITE = "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib";
20
- if (existsSync(HOMEBREW_SQLITE)) {
21
- Database.setCustomSQLite(HOMEBREW_SQLITE);
22
- }
12
+ import { getDatabasePath, openDatabase } from "./db.js";
23
13
 
24
14
  export interface SemanticResult {
25
15
  source: string;
@@ -43,7 +33,7 @@ const PROJECT_FIELD: Record<string, string> = {
43
33
  commits: "project",
44
34
  sessions: "project",
45
35
  tasks: "project",
46
- insights: "project",
36
+ insights: "topic",
47
37
  captures: "topic",
48
38
  teachings: "topic",
49
39
  learnings: "topic",
@@ -51,6 +41,7 @@ const PROJECT_FIELD: Record<string, string> = {
51
41
  };
52
42
 
53
43
  const MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5";
44
+ const EMBEDDING_DIM = 768;
54
45
 
55
46
  interface EmbeddingPipeline {
56
47
  (
@@ -64,10 +55,6 @@ interface EmbeddingPipeline {
64
55
  // Cache the pipeline to avoid reloading on every query
65
56
  let cachedPipeline: EmbeddingPipeline | null = null;
66
57
 
67
- function getDatabasePath(): string {
68
- return `${homedir()}/.local/share/lore/lore.db`;
69
- }
70
-
71
58
  /**
72
59
  * Get or create the embedding pipeline
73
60
  * Pipeline is cached after first load for performance
@@ -111,9 +98,9 @@ export async function embedQuery(query: string): Promise<number[]> {
111
98
  // Output is a Tensor, convert to array
112
99
  const embedding = Array.from(output.data as Float32Array);
113
100
 
114
- if (embedding.length !== 768) {
101
+ if (embedding.length !== EMBEDDING_DIM) {
115
102
  throw new Error(
116
- `Invalid embedding: expected 768 dims, got ${embedding.length}`,
103
+ `Invalid embedding: expected ${EMBEDDING_DIM} dims, got ${embedding.length}`,
117
104
  );
118
105
  }
119
106
 
@@ -121,33 +108,79 @@ export async function embedQuery(query: string): Promise<number[]> {
121
108
  }
122
109
 
123
110
  /**
124
- * Check if embeddings table has any data
111
+ * Embed a document string using local transformers.js model
112
+ * Uses "search_document: " prefix as required by nomic-embed-text
113
+ * @returns 768-dimensional embedding vector
125
114
  */
126
- export function hasEmbeddings(): boolean {
127
- const dbPath = getDatabasePath();
115
+ export async function embedDocument(text: string): Promise<number[]> {
116
+ const embedder = await getEmbeddingPipeline();
128
117
 
129
- if (!existsSync(dbPath)) {
130
- return false;
118
+ const prefixedText = `search_document: ${text}`;
119
+ const output = await embedder(prefixedText, {
120
+ pooling: "mean",
121
+ normalize: true,
122
+ });
123
+
124
+ const embedding = Array.from(output.data as Float32Array);
125
+
126
+ if (embedding.length !== EMBEDDING_DIM) {
127
+ throw new Error(
128
+ `Invalid embedding: expected ${EMBEDDING_DIM} dims, got ${embedding.length}`,
129
+ );
131
130
  }
132
131
 
133
- const db = new Database(dbPath, { readonly: true });
132
+ return embedding;
133
+ }
134
134
 
135
- try {
136
- // Load sqlite-vec extension
137
- const vecPath = process.env.SQLITE_VEC_PATH;
138
- if (!vecPath) {
139
- return false;
135
+ /**
136
+ * Batch embed multiple documents
137
+ * More efficient than individual calls when embedding several documents
138
+ * @returns array of 768-dimensional embedding vectors
139
+ */
140
+ export async function embedDocuments(texts: string[]): Promise<number[][]> {
141
+ if (texts.length === 0) return [];
142
+
143
+ const embedder = await getEmbeddingPipeline();
144
+ const results: number[][] = [];
145
+
146
+ // Process one at a time (transformers.js doesn't batch well)
147
+ // But we benefit from cached pipeline
148
+ for (const text of texts) {
149
+ const prefixedText = `search_document: ${text}`;
150
+ const output = await embedder(prefixedText, {
151
+ pooling: "mean",
152
+ normalize: true,
153
+ });
154
+
155
+ const embedding = Array.from(output.data as Float32Array);
156
+
157
+ if (embedding.length !== EMBEDDING_DIM) {
158
+ throw new Error(
159
+ `Invalid embedding: expected ${EMBEDDING_DIM} dims, got ${embedding.length}`,
160
+ );
140
161
  }
141
162
 
142
- db.loadExtension(vecPath);
163
+ results.push(embedding);
164
+ }
165
+
166
+ return results;
167
+ }
143
168
 
144
- const stmt = db.prepare("SELECT COUNT(*) as count FROM embeddings");
145
- const result = stmt.get() as { count: number };
146
- return result.count > 0;
169
+ /**
170
+ * Check if embeddings table has any data
171
+ */
172
+ export function hasEmbeddings(): boolean {
173
+ try {
174
+ const db = openDatabase(true);
175
+ try {
176
+ const stmt = db.prepare("SELECT COUNT(*) as count FROM embeddings");
177
+ const result = stmt.get() as { count: number };
178
+ return result.count > 0;
179
+ } finally {
180
+ db.close();
181
+ }
147
182
  } catch {
148
183
  return false;
149
- } finally {
150
- db.close();
151
184
  }
152
185
  }
153
186
 
@@ -166,97 +199,53 @@ export async function semanticSearch(
166
199
  query: string,
167
200
  options: SemanticSearchOptions = {},
168
201
  ): Promise<SemanticResult[]> {
169
- const dbPath = getDatabasePath();
170
-
171
- if (!existsSync(dbPath)) {
172
- throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
173
- }
174
-
175
202
  // Get query embedding
176
203
  const queryEmbedding = await embedQuery(query);
177
204
  const queryBlob = serializeEmbedding(queryEmbedding);
178
205
 
179
- const db = new Database(dbPath, { readonly: true });
206
+ const db = openDatabase(true);
180
207
 
181
208
  try {
182
- // Load sqlite-vec extension
183
- const vecPath = process.env.SQLITE_VEC_PATH;
184
- if (!vecPath) {
185
- throw new Error(
186
- 'SQLITE_VEC_PATH not set. Get path with: python3 -c "import sqlite_vec; print(sqlite_vec.loadable_path())"',
187
- );
188
- }
189
-
190
- db.loadExtension(vecPath);
191
-
192
209
  const limit = options.limit ?? 20;
193
210
 
194
211
  // KNN query - 1:1 mapping between search rows and embeddings
195
212
  // Content is pre-chunked at ingest time
213
+ // source/topic partition columns enable filtered KNN (filter BEFORE search)
196
214
  let sql: string;
197
215
  const params: (Uint8Array | string | number)[] = [queryBlob];
198
216
 
217
+ // Build KNN query with optional partition filters
218
+ const conditions = ["e.embedding MATCH ?", "k = ?"];
219
+ params.push(limit);
220
+
199
221
  if (options.source) {
200
- // Filter by e.source (partition column) for KNN pre-filtering
201
- // This filters BEFORE KNN, not after — critical for domain-specific search
202
- sql = `
203
- SELECT
204
- s.source,
205
- s.title,
206
- s.content,
207
- s.metadata,
208
- e.distance
209
- FROM embeddings e
210
- JOIN search s ON e.doc_id = s.rowid
211
- WHERE e.embedding MATCH ?
212
- AND k = ?
213
- AND e.source = ?
214
- ORDER BY e.distance
215
- LIMIT ?
216
- `;
217
- params.push(limit);
222
+ conditions.push("e.source = ?");
218
223
  params.push(options.source);
219
- params.push(limit);
220
- } else {
221
- sql = `
222
- SELECT
223
- s.source,
224
- s.title,
225
- s.content,
226
- s.metadata,
227
- e.distance
228
- FROM embeddings e
229
- JOIN search s ON e.doc_id = s.rowid
230
- WHERE e.embedding MATCH ?
231
- AND k = ?
232
- ORDER BY e.distance
233
- LIMIT ?
234
- `;
235
- params.push(limit);
236
- params.push(limit);
237
224
  }
238
225
 
239
- const stmt = db.prepare(sql);
240
- const results = stmt.all(...params) as SemanticResult[];
241
-
242
- // Post-filter by project if specified
243
- // KNN WHERE clause doesn't support json_extract on joined metadata,
244
- // so we filter after the query returns
245
226
  if (options.project) {
246
- return results.filter((result) => {
247
- const field = PROJECT_FIELD[result.source];
248
- if (!field) return false;
249
-
250
- try {
251
- const metadata = JSON.parse(result.metadata);
252
- return metadata[field] === options.project;
253
- } catch {
254
- // Skip results with malformed metadata
255
- return false;
256
- }
257
- });
227
+ conditions.push("e.topic = ?");
228
+ params.push(options.project);
258
229
  }
259
230
 
231
+ sql = `
232
+ SELECT
233
+ s.source,
234
+ s.title,
235
+ s.content,
236
+ s.metadata,
237
+ e.distance
238
+ FROM embeddings e
239
+ JOIN search s ON e.doc_id = s.rowid
240
+ WHERE ${conditions.join("\n AND ")}
241
+ ORDER BY e.distance
242
+ LIMIT ?
243
+ `;
244
+ params.push(limit);
245
+
246
+ const stmt = db.prepare(sql);
247
+ const results = stmt.all(...params) as SemanticResult[];
248
+
260
249
  return results;
261
250
  } finally {
262
251
  db.close();
@@ -349,3 +338,6 @@ export function formatBriefSearch(results: SemanticResult[]): string {
349
338
 
350
339
  return sections.join("\n\n");
351
340
  }
341
+
342
+ // Export constants and helpers for realtime.ts
343
+ export { MODEL_NAME, EMBEDDING_DIM, serializeEmbedding, getDatabasePath };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@voidwire/lore",
3
- "version": "0.6.0",
3
+ "version": "0.6.2",
4
4
  "description": "Unified knowledge CLI - Search, list, and capture your indexed knowledge",
5
5
  "type": "module",
6
6
  "main": "./index.ts",