@voidwire/lore 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.ts +75 -34
- package/index.ts +3 -0
- package/lib/cache.ts +86 -0
- package/lib/capture.ts +6 -2
- package/lib/db.ts +64 -0
- package/lib/list.ts +20 -15
- package/lib/projects.ts +1 -1
- package/lib/realtime.ts +265 -0
- package/lib/search.ts +15 -1
- package/lib/semantic.ts +98 -106
- package/package.json +1 -1
package/cli.ts
CHANGED
|
@@ -36,6 +36,7 @@ import {
|
|
|
36
36
|
captureNote,
|
|
37
37
|
captureTeaching,
|
|
38
38
|
captureObservation,
|
|
39
|
+
indexAndEmbed,
|
|
39
40
|
semanticSearch,
|
|
40
41
|
formatBriefSearch,
|
|
41
42
|
hasEmbeddings,
|
|
@@ -522,7 +523,7 @@ Examples:
|
|
|
522
523
|
// Capture Command
|
|
523
524
|
// ============================================================================
|
|
524
525
|
|
|
525
|
-
function handleCaptureTask(args: string[]): void {
|
|
526
|
+
async function handleCaptureTask(args: string[]): Promise<void> {
|
|
526
527
|
const parsed = parseArgs(args);
|
|
527
528
|
|
|
528
529
|
const required = ["topic", "name", "problem", "solution"];
|
|
@@ -546,18 +547,26 @@ function handleCaptureTask(args: string[]): void {
|
|
|
546
547
|
};
|
|
547
548
|
|
|
548
549
|
const result = captureTask(input);
|
|
549
|
-
output(result);
|
|
550
550
|
|
|
551
|
-
if (result.success) {
|
|
552
|
-
|
|
553
|
-
|
|
551
|
+
if (result.success && result.event) {
|
|
552
|
+
try {
|
|
553
|
+
await indexAndEmbed([result.event]);
|
|
554
|
+
output(result);
|
|
555
|
+
console.error("✅ Task logged and indexed");
|
|
556
|
+
process.exit(0);
|
|
557
|
+
} catch (error) {
|
|
558
|
+
output(result);
|
|
559
|
+
console.error(`✅ Task logged (indexing failed: ${error})`);
|
|
560
|
+
process.exit(0);
|
|
561
|
+
}
|
|
554
562
|
} else {
|
|
563
|
+
output(result);
|
|
555
564
|
console.error(`❌ ${result.error}`);
|
|
556
565
|
process.exit(2);
|
|
557
566
|
}
|
|
558
567
|
}
|
|
559
568
|
|
|
560
|
-
function handleCaptureKnowledge(args: string[]): void {
|
|
569
|
+
async function handleCaptureKnowledge(args: string[]): Promise<void> {
|
|
561
570
|
const parsed = parseArgs(args);
|
|
562
571
|
|
|
563
572
|
const required = ["topic", "text", "subtype"];
|
|
@@ -573,18 +582,26 @@ function handleCaptureKnowledge(args: string[]): void {
|
|
|
573
582
|
};
|
|
574
583
|
|
|
575
584
|
const result = captureKnowledge(input);
|
|
576
|
-
output(result);
|
|
577
585
|
|
|
578
|
-
if (result.success) {
|
|
579
|
-
|
|
580
|
-
|
|
586
|
+
if (result.success && result.event) {
|
|
587
|
+
try {
|
|
588
|
+
await indexAndEmbed([result.event]);
|
|
589
|
+
output(result);
|
|
590
|
+
console.error("✅ Knowledge logged and indexed");
|
|
591
|
+
process.exit(0);
|
|
592
|
+
} catch (error) {
|
|
593
|
+
output(result);
|
|
594
|
+
console.error(`✅ Knowledge logged (indexing failed: ${error})`);
|
|
595
|
+
process.exit(0);
|
|
596
|
+
}
|
|
581
597
|
} else {
|
|
598
|
+
output(result);
|
|
582
599
|
console.error(`❌ ${result.error}`);
|
|
583
600
|
process.exit(1);
|
|
584
601
|
}
|
|
585
602
|
}
|
|
586
603
|
|
|
587
|
-
function handleCaptureNote(args: string[]): void {
|
|
604
|
+
async function handleCaptureNote(args: string[]): Promise<void> {
|
|
588
605
|
const parsed = parseArgs(args);
|
|
589
606
|
|
|
590
607
|
if (!parsed.has("text")) {
|
|
@@ -598,18 +615,26 @@ function handleCaptureNote(args: string[]): void {
|
|
|
598
615
|
};
|
|
599
616
|
|
|
600
617
|
const result = captureNote(input);
|
|
601
|
-
output(result);
|
|
602
618
|
|
|
603
|
-
if (result.success) {
|
|
604
|
-
|
|
605
|
-
|
|
619
|
+
if (result.success && result.event) {
|
|
620
|
+
try {
|
|
621
|
+
await indexAndEmbed([result.event]);
|
|
622
|
+
output(result);
|
|
623
|
+
console.error("✅ Note logged and indexed");
|
|
624
|
+
process.exit(0);
|
|
625
|
+
} catch (error) {
|
|
626
|
+
output(result);
|
|
627
|
+
console.error(`✅ Note logged (indexing failed: ${error})`);
|
|
628
|
+
process.exit(0);
|
|
629
|
+
}
|
|
606
630
|
} else {
|
|
631
|
+
output(result);
|
|
607
632
|
console.error(`❌ ${result.error}`);
|
|
608
633
|
process.exit(2);
|
|
609
634
|
}
|
|
610
635
|
}
|
|
611
636
|
|
|
612
|
-
function handleCaptureTeaching(args: string[]): void {
|
|
637
|
+
async function handleCaptureTeaching(args: string[]): Promise<void> {
|
|
613
638
|
const parsed = parseArgs(args);
|
|
614
639
|
|
|
615
640
|
const required = ["topic", "confidence", "text"];
|
|
@@ -626,18 +651,26 @@ function handleCaptureTeaching(args: string[]): void {
|
|
|
626
651
|
};
|
|
627
652
|
|
|
628
653
|
const result = captureTeaching(input);
|
|
629
|
-
output(result);
|
|
630
654
|
|
|
631
|
-
if (result.success) {
|
|
632
|
-
|
|
633
|
-
|
|
655
|
+
if (result.success && result.event) {
|
|
656
|
+
try {
|
|
657
|
+
await indexAndEmbed([result.event]);
|
|
658
|
+
output(result);
|
|
659
|
+
console.error("✅ Teaching logged and indexed");
|
|
660
|
+
process.exit(0);
|
|
661
|
+
} catch (error) {
|
|
662
|
+
output(result);
|
|
663
|
+
console.error(`✅ Teaching logged (indexing failed: ${error})`);
|
|
664
|
+
process.exit(0);
|
|
665
|
+
}
|
|
634
666
|
} else {
|
|
667
|
+
output(result);
|
|
635
668
|
console.error(`❌ ${result.error}`);
|
|
636
669
|
process.exit(2);
|
|
637
670
|
}
|
|
638
671
|
}
|
|
639
672
|
|
|
640
|
-
function handleCaptureObservation(args: string[]): void {
|
|
673
|
+
async function handleCaptureObservation(args: string[]): Promise<void> {
|
|
641
674
|
const parsed = parseArgs(args);
|
|
642
675
|
|
|
643
676
|
const required = ["topic", "subtype", "confidence", "text"];
|
|
@@ -655,18 +688,26 @@ function handleCaptureObservation(args: string[]): void {
|
|
|
655
688
|
};
|
|
656
689
|
|
|
657
690
|
const result = captureObservation(input);
|
|
658
|
-
output(result);
|
|
659
691
|
|
|
660
|
-
if (result.success) {
|
|
661
|
-
|
|
662
|
-
|
|
692
|
+
if (result.success && result.event) {
|
|
693
|
+
try {
|
|
694
|
+
await indexAndEmbed([result.event]);
|
|
695
|
+
output(result);
|
|
696
|
+
console.error("✅ Observation logged and indexed");
|
|
697
|
+
process.exit(0);
|
|
698
|
+
} catch (error) {
|
|
699
|
+
output(result);
|
|
700
|
+
console.error(`✅ Observation logged (indexing failed: ${error})`);
|
|
701
|
+
process.exit(0);
|
|
702
|
+
}
|
|
663
703
|
} else {
|
|
704
|
+
output(result);
|
|
664
705
|
console.error(`❌ ${result.error}`);
|
|
665
706
|
process.exit(2);
|
|
666
707
|
}
|
|
667
708
|
}
|
|
668
709
|
|
|
669
|
-
function handleCapture(args: string[]): void {
|
|
710
|
+
async function handleCapture(args: string[]): Promise<void> {
|
|
670
711
|
if (hasFlag(args, "help")) {
|
|
671
712
|
showCaptureHelp();
|
|
672
713
|
}
|
|
@@ -682,19 +723,19 @@ function handleCapture(args: string[]): void {
|
|
|
682
723
|
|
|
683
724
|
switch (captureType) {
|
|
684
725
|
case "task":
|
|
685
|
-
handleCaptureTask(captureArgs);
|
|
726
|
+
await handleCaptureTask(captureArgs);
|
|
686
727
|
break;
|
|
687
728
|
case "knowledge":
|
|
688
|
-
handleCaptureKnowledge(captureArgs);
|
|
729
|
+
await handleCaptureKnowledge(captureArgs);
|
|
689
730
|
break;
|
|
690
731
|
case "note":
|
|
691
|
-
handleCaptureNote(captureArgs);
|
|
732
|
+
await handleCaptureNote(captureArgs);
|
|
692
733
|
break;
|
|
693
734
|
case "teaching":
|
|
694
|
-
handleCaptureTeaching(captureArgs);
|
|
735
|
+
await handleCaptureTeaching(captureArgs);
|
|
695
736
|
break;
|
|
696
737
|
case "observation":
|
|
697
|
-
handleCaptureObservation(captureArgs);
|
|
738
|
+
await handleCaptureObservation(captureArgs);
|
|
698
739
|
break;
|
|
699
740
|
default:
|
|
700
741
|
fail(
|
|
@@ -1052,7 +1093,7 @@ Examples:
|
|
|
1052
1093
|
process.exit(0);
|
|
1053
1094
|
}
|
|
1054
1095
|
|
|
1055
|
-
function main(): void {
|
|
1096
|
+
async function main(): Promise<void> {
|
|
1056
1097
|
const args = process.argv.slice(2);
|
|
1057
1098
|
|
|
1058
1099
|
// Show global help only when no args or help is first arg
|
|
@@ -1065,7 +1106,7 @@ function main(): void {
|
|
|
1065
1106
|
|
|
1066
1107
|
switch (command) {
|
|
1067
1108
|
case "search":
|
|
1068
|
-
handleSearch(commandArgs);
|
|
1109
|
+
await handleSearch(commandArgs);
|
|
1069
1110
|
break;
|
|
1070
1111
|
case "list":
|
|
1071
1112
|
handleList(commandArgs);
|
|
@@ -1083,7 +1124,7 @@ function main(): void {
|
|
|
1083
1124
|
handleAbout(commandArgs);
|
|
1084
1125
|
break;
|
|
1085
1126
|
case "capture":
|
|
1086
|
-
handleCapture(commandArgs);
|
|
1127
|
+
await handleCapture(commandArgs);
|
|
1087
1128
|
break;
|
|
1088
1129
|
default:
|
|
1089
1130
|
fail(
|
package/index.ts
CHANGED
package/lib/cache.ts
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/cache.ts - Embedding cache utilities
|
|
3
|
+
*
|
|
4
|
+
* Hash-based caching to avoid re-embedding unchanged content.
|
|
5
|
+
* Used by real-time indexing and batch lore-embed-all.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { createHash } from "crypto";
|
|
9
|
+
import type { Database } from "bun:sqlite";
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Generate SHA256 hash of content for cache lookup
|
|
13
|
+
*/
|
|
14
|
+
export function hashContent(content: string): string {
|
|
15
|
+
return createHash("sha256").update(content).digest("hex");
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Get cached embedding by content hash
|
|
20
|
+
* @returns embedding array or null if not cached
|
|
21
|
+
*/
|
|
22
|
+
export function getCachedEmbedding(
|
|
23
|
+
db: Database,
|
|
24
|
+
hash: string,
|
|
25
|
+
): number[] | null {
|
|
26
|
+
const stmt = db.prepare(
|
|
27
|
+
"SELECT embedding FROM embedding_cache WHERE hash = ?",
|
|
28
|
+
);
|
|
29
|
+
const row = stmt.get(hash) as { embedding: Uint8Array } | null;
|
|
30
|
+
|
|
31
|
+
if (!row) {
|
|
32
|
+
return null;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// Convert blob back to number array
|
|
36
|
+
const float32 = new Float32Array(row.embedding.buffer);
|
|
37
|
+
return Array.from(float32);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Store embedding in cache
|
|
42
|
+
*/
|
|
43
|
+
export function cacheEmbedding(
|
|
44
|
+
db: Database,
|
|
45
|
+
hash: string,
|
|
46
|
+
embedding: number[],
|
|
47
|
+
model: string,
|
|
48
|
+
): void {
|
|
49
|
+
// Serialize embedding to blob
|
|
50
|
+
const buffer = new Float32Array(embedding);
|
|
51
|
+
const blob = new Uint8Array(buffer.buffer);
|
|
52
|
+
|
|
53
|
+
const stmt = db.prepare(`
|
|
54
|
+
INSERT OR REPLACE INTO embedding_cache (hash, embedding, model, dims, created_at)
|
|
55
|
+
VALUES (?, ?, ?, ?, ?)
|
|
56
|
+
`);
|
|
57
|
+
|
|
58
|
+
stmt.run(hash, blob, model, embedding.length, Date.now());
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Check if embedding exists in cache (without retrieving it)
|
|
63
|
+
*/
|
|
64
|
+
export function hasEmbeddingCached(db: Database, hash: string): boolean {
|
|
65
|
+
const stmt = db.prepare(
|
|
66
|
+
"SELECT 1 FROM embedding_cache WHERE hash = ? LIMIT 1",
|
|
67
|
+
);
|
|
68
|
+
return stmt.get(hash) !== null;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Batch check which hashes are missing from cache
|
|
73
|
+
* @returns array of hashes that need embedding
|
|
74
|
+
*/
|
|
75
|
+
export function getMissingHashes(db: Database, hashes: string[]): string[] {
|
|
76
|
+
if (hashes.length === 0) return [];
|
|
77
|
+
|
|
78
|
+
const placeholders = hashes.map(() => "?").join(",");
|
|
79
|
+
const stmt = db.prepare(
|
|
80
|
+
`SELECT hash FROM embedding_cache WHERE hash IN (${placeholders})`,
|
|
81
|
+
);
|
|
82
|
+
const rows = stmt.all(...hashes) as { hash: string }[];
|
|
83
|
+
|
|
84
|
+
const cachedSet = new Set(rows.map((r) => r.hash));
|
|
85
|
+
return hashes.filter((h) => !cachedSet.has(h));
|
|
86
|
+
}
|
package/lib/capture.ts
CHANGED
|
@@ -12,6 +12,7 @@ import { homedir } from "os";
|
|
|
12
12
|
export interface CaptureResult {
|
|
13
13
|
success: boolean;
|
|
14
14
|
error?: string;
|
|
15
|
+
event?: CaptureEvent;
|
|
15
16
|
[key: string]: unknown;
|
|
16
17
|
}
|
|
17
18
|
|
|
@@ -220,12 +221,15 @@ function writeEvent(event: CaptureEvent): CaptureResult {
|
|
|
220
221
|
ensureLogDirectory();
|
|
221
222
|
|
|
222
223
|
const logPath = getLogPath();
|
|
223
|
-
const eventWithTimestamp = {
|
|
224
|
+
const eventWithTimestamp = {
|
|
225
|
+
...event,
|
|
226
|
+
timestamp: getTimestamp(),
|
|
227
|
+
} as CaptureEvent;
|
|
224
228
|
const jsonLine = JSON.stringify(eventWithTimestamp) + "\n";
|
|
225
229
|
|
|
226
230
|
try {
|
|
227
231
|
appendFileSync(logPath, jsonLine, "utf8");
|
|
228
|
-
return { success: true };
|
|
232
|
+
return { success: true, event: eventWithTimestamp };
|
|
229
233
|
} catch (error) {
|
|
230
234
|
return {
|
|
231
235
|
success: false,
|
package/lib/db.ts
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/db.ts - Shared database utilities
|
|
3
|
+
*
|
|
4
|
+
* Centralizes SQLite setup and database access for both
|
|
5
|
+
* semantic search and real-time indexing.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { Database } from "bun:sqlite";
|
|
9
|
+
import { existsSync } from "fs";
|
|
10
|
+
import { homedir } from "os";
|
|
11
|
+
|
|
12
|
+
// Use Homebrew SQLite on macOS to enable extension loading
|
|
13
|
+
// Must be called before any Database instances are created
|
|
14
|
+
const HOMEBREW_SQLITE = "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib";
|
|
15
|
+
if (existsSync(HOMEBREW_SQLITE)) {
|
|
16
|
+
Database.setCustomSQLite(HOMEBREW_SQLITE);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Get the path to the lore database
|
|
21
|
+
*/
|
|
22
|
+
export function getDatabasePath(): string {
|
|
23
|
+
return `${homedir()}/.local/share/lore/lore.db`;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Open the lore database with sqlite-vec extension loaded
|
|
28
|
+
* @param readonly - Open in readonly mode (default: false)
|
|
29
|
+
*/
|
|
30
|
+
export function openDatabase(readonly = false): Database {
|
|
31
|
+
const dbPath = getDatabasePath();
|
|
32
|
+
|
|
33
|
+
if (!existsSync(dbPath)) {
|
|
34
|
+
throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const db = new Database(dbPath, { readonly });
|
|
38
|
+
|
|
39
|
+
// Load sqlite-vec extension
|
|
40
|
+
const vecPath = process.env.SQLITE_VEC_PATH;
|
|
41
|
+
if (!vecPath) {
|
|
42
|
+
throw new Error(
|
|
43
|
+
'SQLITE_VEC_PATH not set. Get path with: python3 -c "import sqlite_vec; print(sqlite_vec.loadable_path())"',
|
|
44
|
+
);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
db.loadExtension(vecPath);
|
|
48
|
+
|
|
49
|
+
return db;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Open the lore database without sqlite-vec (for FTS5-only operations)
|
|
54
|
+
* @param readonly - Open in readonly mode (default: false)
|
|
55
|
+
*/
|
|
56
|
+
export function openDatabaseBasic(readonly = false): Database {
|
|
57
|
+
const dbPath = getDatabasePath();
|
|
58
|
+
|
|
59
|
+
if (!existsSync(dbPath)) {
|
|
60
|
+
throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return new Database(dbPath, { readonly });
|
|
64
|
+
}
|
package/lib/list.ts
CHANGED
|
@@ -71,7 +71,7 @@ const PROJECT_FIELD: Record<string, string> = {
|
|
|
71
71
|
commits: "project",
|
|
72
72
|
sessions: "project",
|
|
73
73
|
tasks: "project",
|
|
74
|
-
insights: "
|
|
74
|
+
insights: "topic",
|
|
75
75
|
captures: "topic",
|
|
76
76
|
teachings: "topic",
|
|
77
77
|
learnings: "topic",
|
|
@@ -161,23 +161,28 @@ function queryPersonalType(
|
|
|
161
161
|
type: string,
|
|
162
162
|
limit?: number,
|
|
163
163
|
): ListEntry[] {
|
|
164
|
-
//
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
164
|
+
// Filter by type in SQL, not JS - avoids LIMIT truncation bug
|
|
165
|
+
let sql = `
|
|
166
|
+
SELECT title, content, metadata FROM search
|
|
167
|
+
WHERE source = 'personal'
|
|
168
|
+
AND json_extract(metadata, '$.type') = ?
|
|
169
|
+
ORDER BY json_extract(metadata, '$.timestamp') DESC
|
|
170
|
+
`;
|
|
171
|
+
const params: (string | number)[] = [type];
|
|
168
172
|
|
|
169
|
-
|
|
170
|
-
|
|
173
|
+
if (limit) {
|
|
174
|
+
sql += " LIMIT ?";
|
|
175
|
+
params.push(limit);
|
|
176
|
+
}
|
|
171
177
|
|
|
172
|
-
const
|
|
173
|
-
|
|
174
|
-
title: row.title,
|
|
175
|
-
content: row.content,
|
|
176
|
-
metadata: JSON.parse(row.metadata || "{}"),
|
|
177
|
-
}))
|
|
178
|
-
.filter((entry) => entry.metadata.type === type);
|
|
178
|
+
const stmt = db.prepare(sql);
|
|
179
|
+
const rows = stmt.all(...params) as RawRow[];
|
|
179
180
|
|
|
180
|
-
return
|
|
181
|
+
return rows.map((row) => ({
|
|
182
|
+
title: row.title,
|
|
183
|
+
content: row.content,
|
|
184
|
+
metadata: JSON.parse(row.metadata || "{}"),
|
|
185
|
+
}));
|
|
181
186
|
}
|
|
182
187
|
|
|
183
188
|
/**
|
package/lib/projects.ts
CHANGED
package/lib/realtime.ts
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/realtime.ts - Real-time indexing for captures
|
|
3
|
+
*
|
|
4
|
+
* Makes captures immediately searchable (keyword + semantic) without
|
|
5
|
+
* waiting for batch indexers.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* // CLI - single capture
|
|
9
|
+
* const event = captureKnowledge(input);
|
|
10
|
+
* await indexAndEmbed([event]);
|
|
11
|
+
*
|
|
12
|
+
* // Hook - batch captures
|
|
13
|
+
* const events = captures.map(cap => captureKnowledge(cap));
|
|
14
|
+
* await indexAndEmbed(events);
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { Database } from "bun:sqlite";
|
|
18
|
+
import { existsSync } from "fs";
|
|
19
|
+
import {
|
|
20
|
+
embedDocuments,
|
|
21
|
+
getDatabasePath,
|
|
22
|
+
MODEL_NAME,
|
|
23
|
+
EMBEDDING_DIM,
|
|
24
|
+
serializeEmbedding,
|
|
25
|
+
} from "./semantic.js";
|
|
26
|
+
import {
|
|
27
|
+
hashContent,
|
|
28
|
+
getCachedEmbedding,
|
|
29
|
+
cacheEmbedding,
|
|
30
|
+
getMissingHashes,
|
|
31
|
+
} from "./cache.js";
|
|
32
|
+
import type { CaptureEvent } from "./capture.js";
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Index and embed capture events for immediate searchability
|
|
36
|
+
*
|
|
37
|
+
* 1. Insert into FTS5 search table (instant keyword search)
|
|
38
|
+
* 2. Generate embeddings with cache (instant semantic search)
|
|
39
|
+
* 3. Insert into embeddings table
|
|
40
|
+
*/
|
|
41
|
+
export async function indexAndEmbed(events: CaptureEvent[]): Promise<void> {
|
|
42
|
+
if (events.length === 0) return;
|
|
43
|
+
|
|
44
|
+
const dbPath = getDatabasePath();
|
|
45
|
+
if (!existsSync(dbPath)) {
|
|
46
|
+
throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const db = new Database(dbPath);
|
|
50
|
+
|
|
51
|
+
try {
|
|
52
|
+
// Load sqlite-vec extension for embeddings table
|
|
53
|
+
const vecPath = process.env.SQLITE_VEC_PATH;
|
|
54
|
+
if (!vecPath) {
|
|
55
|
+
throw new Error(
|
|
56
|
+
'SQLITE_VEC_PATH not set. Get path with: python3 -c "import sqlite_vec; print(sqlite_vec.loadable_path())"',
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
db.loadExtension(vecPath);
|
|
60
|
+
|
|
61
|
+
// 1. Insert into FTS5 and collect doc IDs
|
|
62
|
+
const docIds: number[] = [];
|
|
63
|
+
for (const event of events) {
|
|
64
|
+
const docId = insertSearchEntry(db, event);
|
|
65
|
+
docIds.push(docId);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// 2. Generate embeddings with cache
|
|
69
|
+
const contents = events.map((e) => getContentForEmbedding(e));
|
|
70
|
+
const embeddings = await embedWithCache(db, contents);
|
|
71
|
+
|
|
72
|
+
// 3. Insert embeddings
|
|
73
|
+
for (let i = 0; i < events.length; i++) {
|
|
74
|
+
insertEmbedding(db, docIds[i], embeddings[i], events[i]);
|
|
75
|
+
}
|
|
76
|
+
} finally {
|
|
77
|
+
db.close();
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Insert event into FTS5 search table
|
|
83
|
+
* @returns rowid of inserted entry (used as doc_id for embeddings)
|
|
84
|
+
*/
|
|
85
|
+
function insertSearchEntry(db: Database, event: CaptureEvent): number {
|
|
86
|
+
const source = getSourceForEvent(event);
|
|
87
|
+
const title = buildTitle(event);
|
|
88
|
+
const content = getContentForEmbedding(event);
|
|
89
|
+
const metadata = buildMetadata(event);
|
|
90
|
+
|
|
91
|
+
const stmt = db.prepare(`
|
|
92
|
+
INSERT INTO search (source, title, content, metadata)
|
|
93
|
+
VALUES (?, ?, ?, ?)
|
|
94
|
+
`);
|
|
95
|
+
|
|
96
|
+
const result = stmt.run(source, title, content, metadata);
|
|
97
|
+
return Number(result.lastInsertRowid);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Map event type to source name used in search table
|
|
102
|
+
*/
|
|
103
|
+
function getSourceForEvent(event: CaptureEvent): string {
|
|
104
|
+
switch (event.type) {
|
|
105
|
+
case "knowledge":
|
|
106
|
+
return "captures";
|
|
107
|
+
case "teaching":
|
|
108
|
+
return "teachings";
|
|
109
|
+
case "observation":
|
|
110
|
+
return "observations";
|
|
111
|
+
case "insight":
|
|
112
|
+
return "insights";
|
|
113
|
+
case "learning":
|
|
114
|
+
return "learnings";
|
|
115
|
+
case "task":
|
|
116
|
+
return "tasks";
|
|
117
|
+
case "note":
|
|
118
|
+
return "captures";
|
|
119
|
+
default:
|
|
120
|
+
return "captures";
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Build title for FTS5 entry, matching existing indexer format
|
|
126
|
+
*/
|
|
127
|
+
function buildTitle(event: CaptureEvent): string {
|
|
128
|
+
const data = event.data as Record<string, unknown>;
|
|
129
|
+
|
|
130
|
+
switch (event.type) {
|
|
131
|
+
case "knowledge":
|
|
132
|
+
return `[${data.subtype || "knowledge"}] ${data.topic || "general"}`;
|
|
133
|
+
case "teaching":
|
|
134
|
+
return `[${data.topic || "general"}] (${data.confidence || "medium"})`;
|
|
135
|
+
case "observation":
|
|
136
|
+
return `[${data.subtype || "pattern"}] ${data.topic || "general"}`;
|
|
137
|
+
case "insight":
|
|
138
|
+
return `[${data.subtype || "insight"}] ${data.topic || "general"}`;
|
|
139
|
+
case "learning":
|
|
140
|
+
return `[learning] ${data.topic || "general"}`;
|
|
141
|
+
case "task":
|
|
142
|
+
return `[task] ${data.topic || "general"}: ${data.name || "untitled"}`;
|
|
143
|
+
case "note":
|
|
144
|
+
return `[note] ${data.topic || "general"}`;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Extract content for embedding from event
|
|
150
|
+
*/
|
|
151
|
+
function getContentForEmbedding(event: CaptureEvent): string {
|
|
152
|
+
const data = event.data as Record<string, unknown>;
|
|
153
|
+
return String(data.content || data.text || "");
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Build metadata JSON matching existing indexer format
|
|
158
|
+
*/
|
|
159
|
+
function buildMetadata(event: CaptureEvent): string {
|
|
160
|
+
const data = event.data as Record<string, unknown>;
|
|
161
|
+
const timestamp = event.timestamp;
|
|
162
|
+
const date = timestamp ? timestamp.substring(0, 10) : "";
|
|
163
|
+
|
|
164
|
+
const metadata: Record<string, unknown> = {
|
|
165
|
+
topic: data.topic || "general",
|
|
166
|
+
timestamp,
|
|
167
|
+
date,
|
|
168
|
+
content: getContentForEmbedding(event),
|
|
169
|
+
};
|
|
170
|
+
|
|
171
|
+
// Add type-specific fields
|
|
172
|
+
switch (event.type) {
|
|
173
|
+
case "knowledge":
|
|
174
|
+
metadata.subtype = data.subtype;
|
|
175
|
+
break;
|
|
176
|
+
case "teaching":
|
|
177
|
+
metadata.confidence = data.confidence;
|
|
178
|
+
metadata.capture_source = data.source || "manual";
|
|
179
|
+
break;
|
|
180
|
+
case "observation":
|
|
181
|
+
metadata.subtype = data.subtype;
|
|
182
|
+
metadata.confidence = data.confidence;
|
|
183
|
+
metadata.capture_source = data.source || "auto";
|
|
184
|
+
break;
|
|
185
|
+
case "insight":
|
|
186
|
+
metadata.subtype = data.subtype;
|
|
187
|
+
metadata.session_id = data.session_id;
|
|
188
|
+
break;
|
|
189
|
+
case "learning":
|
|
190
|
+
metadata.persona = data.persona;
|
|
191
|
+
break;
|
|
192
|
+
case "task":
|
|
193
|
+
metadata.name = data.name;
|
|
194
|
+
metadata.problem = data.problem;
|
|
195
|
+
metadata.solution = data.solution;
|
|
196
|
+
break;
|
|
197
|
+
case "note":
|
|
198
|
+
metadata.tags = data.tags;
|
|
199
|
+
break;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
return JSON.stringify(metadata);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Embed contents with cache lookup
|
|
207
|
+
* Only generates embeddings for cache misses
|
|
208
|
+
*/
|
|
209
|
+
async function embedWithCache(
|
|
210
|
+
db: Database,
|
|
211
|
+
contents: string[],
|
|
212
|
+
): Promise<number[][]> {
|
|
213
|
+
const results: (number[] | null)[] = new Array(contents.length).fill(null);
|
|
214
|
+
const toEmbed: { idx: number; content: string }[] = [];
|
|
215
|
+
|
|
216
|
+
// Check cache for each content
|
|
217
|
+
const hashes = contents.map((c) => hashContent(c));
|
|
218
|
+
|
|
219
|
+
for (let i = 0; i < contents.length; i++) {
|
|
220
|
+
const cached = getCachedEmbedding(db, hashes[i]);
|
|
221
|
+
if (cached) {
|
|
222
|
+
results[i] = cached;
|
|
223
|
+
} else {
|
|
224
|
+
toEmbed.push({ idx: i, content: contents[i] });
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Embed cache misses
|
|
229
|
+
if (toEmbed.length > 0) {
|
|
230
|
+
const embeddings = await embedDocuments(toEmbed.map((t) => t.content));
|
|
231
|
+
|
|
232
|
+
for (let i = 0; i < toEmbed.length; i++) {
|
|
233
|
+
const { idx, content } = toEmbed[i];
|
|
234
|
+
const embedding = embeddings[i];
|
|
235
|
+
|
|
236
|
+
results[idx] = embedding;
|
|
237
|
+
cacheEmbedding(db, hashContent(content), embedding, MODEL_NAME);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
return results as number[][];
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Insert embedding into vec0 table
|
|
246
|
+
*/
|
|
247
|
+
function insertEmbedding(
|
|
248
|
+
db: Database,
|
|
249
|
+
docId: number,
|
|
250
|
+
embedding: number[],
|
|
251
|
+
event: CaptureEvent,
|
|
252
|
+
): void {
|
|
253
|
+
const source = getSourceForEvent(event);
|
|
254
|
+
const data = event.data as Record<string, unknown>;
|
|
255
|
+
const topic = String(data.topic || "");
|
|
256
|
+
|
|
257
|
+
const embeddingBlob = serializeEmbedding(embedding);
|
|
258
|
+
|
|
259
|
+
const stmt = db.prepare(`
|
|
260
|
+
INSERT INTO embeddings (doc_id, chunk_idx, source, topic, embedding)
|
|
261
|
+
VALUES (?, 0, ?, ?, ?)
|
|
262
|
+
`);
|
|
263
|
+
|
|
264
|
+
stmt.run(docId, source, topic, embeddingBlob);
|
|
265
|
+
}
|
package/lib/search.ts
CHANGED
|
@@ -27,6 +27,20 @@ function getDatabasePath(): string {
|
|
|
27
27
|
return `${homedir()}/.local/share/lore/lore.db`;
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
+
/**
|
|
31
|
+
* Escape a query for safe FTS5 MATCH
|
|
32
|
+
* Wraps terms in double quotes to prevent FTS5 syntax interpretation
|
|
33
|
+
* (e.g., "real-time" being parsed as column:term)
|
|
34
|
+
*/
|
|
35
|
+
function escapeFts5Query(query: string): string {
|
|
36
|
+
// Split on whitespace, wrap each term in quotes, rejoin
|
|
37
|
+
return query
|
|
38
|
+
.split(/\s+/)
|
|
39
|
+
.filter(Boolean)
|
|
40
|
+
.map((term) => `"${term.replace(/"/g, '""')}"`)
|
|
41
|
+
.join(" ");
|
|
42
|
+
}
|
|
43
|
+
|
|
30
44
|
/**
|
|
31
45
|
* Search the Lore FTS5 database
|
|
32
46
|
*
|
|
@@ -51,7 +65,7 @@ export function search(
|
|
|
51
65
|
const limit = options.limit ?? 20;
|
|
52
66
|
|
|
53
67
|
const conditions: string[] = ["search MATCH ?"];
|
|
54
|
-
const params: (string | number)[] = [query];
|
|
68
|
+
const params: (string | number)[] = [escapeFts5Query(query)];
|
|
55
69
|
|
|
56
70
|
if (options.source) {
|
|
57
71
|
conditions.push("source = ?");
|
package/lib/semantic.ts
CHANGED
|
@@ -4,22 +4,12 @@
|
|
|
4
4
|
* Query embedding using @huggingface/transformers with nomic-embed-text-v1.5.
|
|
5
5
|
* KNN search against sqlite-vec virtual table.
|
|
6
6
|
* Uses Bun's built-in SQLite with sqlite-vec extension.
|
|
7
|
-
*
|
|
8
|
-
* Note: macOS ships Apple's SQLite which disables extension loading.
|
|
9
|
-
* We use Homebrew's SQLite via setCustomSQLite() to enable sqlite-vec.
|
|
10
7
|
*/
|
|
11
8
|
|
|
12
9
|
import { Database } from "bun:sqlite";
|
|
13
|
-
import { homedir } from "os";
|
|
14
10
|
import { existsSync } from "fs";
|
|
15
11
|
import { pipeline } from "@huggingface/transformers";
|
|
16
|
-
|
|
17
|
-
// Use Homebrew SQLite on macOS to enable extension loading
|
|
18
|
-
// Must be called before any Database instances are created
|
|
19
|
-
const HOMEBREW_SQLITE = "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib";
|
|
20
|
-
if (existsSync(HOMEBREW_SQLITE)) {
|
|
21
|
-
Database.setCustomSQLite(HOMEBREW_SQLITE);
|
|
22
|
-
}
|
|
12
|
+
import { getDatabasePath, openDatabase } from "./db.js";
|
|
23
13
|
|
|
24
14
|
export interface SemanticResult {
|
|
25
15
|
source: string;
|
|
@@ -43,7 +33,7 @@ const PROJECT_FIELD: Record<string, string> = {
|
|
|
43
33
|
commits: "project",
|
|
44
34
|
sessions: "project",
|
|
45
35
|
tasks: "project",
|
|
46
|
-
insights: "
|
|
36
|
+
insights: "topic",
|
|
47
37
|
captures: "topic",
|
|
48
38
|
teachings: "topic",
|
|
49
39
|
learnings: "topic",
|
|
@@ -51,6 +41,7 @@ const PROJECT_FIELD: Record<string, string> = {
|
|
|
51
41
|
};
|
|
52
42
|
|
|
53
43
|
const MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5";
|
|
44
|
+
const EMBEDDING_DIM = 768;
|
|
54
45
|
|
|
55
46
|
interface EmbeddingPipeline {
|
|
56
47
|
(
|
|
@@ -64,10 +55,6 @@ interface EmbeddingPipeline {
|
|
|
64
55
|
// Cache the pipeline to avoid reloading on every query
|
|
65
56
|
let cachedPipeline: EmbeddingPipeline | null = null;
|
|
66
57
|
|
|
67
|
-
function getDatabasePath(): string {
|
|
68
|
-
return `${homedir()}/.local/share/lore/lore.db`;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
58
|
/**
|
|
72
59
|
* Get or create the embedding pipeline
|
|
73
60
|
* Pipeline is cached after first load for performance
|
|
@@ -111,9 +98,9 @@ export async function embedQuery(query: string): Promise<number[]> {
|
|
|
111
98
|
// Output is a Tensor, convert to array
|
|
112
99
|
const embedding = Array.from(output.data as Float32Array);
|
|
113
100
|
|
|
114
|
-
if (embedding.length !==
|
|
101
|
+
if (embedding.length !== EMBEDDING_DIM) {
|
|
115
102
|
throw new Error(
|
|
116
|
-
`Invalid embedding: expected
|
|
103
|
+
`Invalid embedding: expected ${EMBEDDING_DIM} dims, got ${embedding.length}`,
|
|
117
104
|
);
|
|
118
105
|
}
|
|
119
106
|
|
|
@@ -121,33 +108,79 @@ export async function embedQuery(query: string): Promise<number[]> {
|
|
|
121
108
|
}
|
|
122
109
|
|
|
123
110
|
/**
|
|
124
|
-
*
|
|
111
|
+
* Embed a document string using local transformers.js model
|
|
112
|
+
* Uses "search_document: " prefix as required by nomic-embed-text
|
|
113
|
+
* @returns 768-dimensional embedding vector
|
|
125
114
|
*/
|
|
126
|
-
export function
|
|
127
|
-
const
|
|
115
|
+
export async function embedDocument(text: string): Promise<number[]> {
|
|
116
|
+
const embedder = await getEmbeddingPipeline();
|
|
128
117
|
|
|
129
|
-
|
|
130
|
-
|
|
118
|
+
const prefixedText = `search_document: ${text}`;
|
|
119
|
+
const output = await embedder(prefixedText, {
|
|
120
|
+
pooling: "mean",
|
|
121
|
+
normalize: true,
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
const embedding = Array.from(output.data as Float32Array);
|
|
125
|
+
|
|
126
|
+
if (embedding.length !== EMBEDDING_DIM) {
|
|
127
|
+
throw new Error(
|
|
128
|
+
`Invalid embedding: expected ${EMBEDDING_DIM} dims, got ${embedding.length}`,
|
|
129
|
+
);
|
|
131
130
|
}
|
|
132
131
|
|
|
133
|
-
|
|
132
|
+
return embedding;
|
|
133
|
+
}
|
|
134
134
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
135
|
+
/**
|
|
136
|
+
* Batch embed multiple documents
|
|
137
|
+
* More efficient than individual calls when embedding several documents
|
|
138
|
+
* @returns array of 768-dimensional embedding vectors
|
|
139
|
+
*/
|
|
140
|
+
export async function embedDocuments(texts: string[]): Promise<number[][]> {
|
|
141
|
+
if (texts.length === 0) return [];
|
|
142
|
+
|
|
143
|
+
const embedder = await getEmbeddingPipeline();
|
|
144
|
+
const results: number[][] = [];
|
|
145
|
+
|
|
146
|
+
// Process one at a time (transformers.js doesn't batch well)
|
|
147
|
+
// But we benefit from cached pipeline
|
|
148
|
+
for (const text of texts) {
|
|
149
|
+
const prefixedText = `search_document: ${text}`;
|
|
150
|
+
const output = await embedder(prefixedText, {
|
|
151
|
+
pooling: "mean",
|
|
152
|
+
normalize: true,
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
const embedding = Array.from(output.data as Float32Array);
|
|
156
|
+
|
|
157
|
+
if (embedding.length !== EMBEDDING_DIM) {
|
|
158
|
+
throw new Error(
|
|
159
|
+
`Invalid embedding: expected ${EMBEDDING_DIM} dims, got ${embedding.length}`,
|
|
160
|
+
);
|
|
140
161
|
}
|
|
141
162
|
|
|
142
|
-
|
|
163
|
+
results.push(embedding);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
return results;
|
|
167
|
+
}
|
|
143
168
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
169
|
+
/**
|
|
170
|
+
* Check if embeddings table has any data
|
|
171
|
+
*/
|
|
172
|
+
export function hasEmbeddings(): boolean {
|
|
173
|
+
try {
|
|
174
|
+
const db = openDatabase(true);
|
|
175
|
+
try {
|
|
176
|
+
const stmt = db.prepare("SELECT COUNT(*) as count FROM embeddings");
|
|
177
|
+
const result = stmt.get() as { count: number };
|
|
178
|
+
return result.count > 0;
|
|
179
|
+
} finally {
|
|
180
|
+
db.close();
|
|
181
|
+
}
|
|
147
182
|
} catch {
|
|
148
183
|
return false;
|
|
149
|
-
} finally {
|
|
150
|
-
db.close();
|
|
151
184
|
}
|
|
152
185
|
}
|
|
153
186
|
|
|
@@ -166,97 +199,53 @@ export async function semanticSearch(
|
|
|
166
199
|
query: string,
|
|
167
200
|
options: SemanticSearchOptions = {},
|
|
168
201
|
): Promise<SemanticResult[]> {
|
|
169
|
-
const dbPath = getDatabasePath();
|
|
170
|
-
|
|
171
|
-
if (!existsSync(dbPath)) {
|
|
172
|
-
throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
|
|
173
|
-
}
|
|
174
|
-
|
|
175
202
|
// Get query embedding
|
|
176
203
|
const queryEmbedding = await embedQuery(query);
|
|
177
204
|
const queryBlob = serializeEmbedding(queryEmbedding);
|
|
178
205
|
|
|
179
|
-
const db =
|
|
206
|
+
const db = openDatabase(true);
|
|
180
207
|
|
|
181
208
|
try {
|
|
182
|
-
// Load sqlite-vec extension
|
|
183
|
-
const vecPath = process.env.SQLITE_VEC_PATH;
|
|
184
|
-
if (!vecPath) {
|
|
185
|
-
throw new Error(
|
|
186
|
-
'SQLITE_VEC_PATH not set. Get path with: python3 -c "import sqlite_vec; print(sqlite_vec.loadable_path())"',
|
|
187
|
-
);
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
db.loadExtension(vecPath);
|
|
191
|
-
|
|
192
209
|
const limit = options.limit ?? 20;
|
|
193
210
|
|
|
194
211
|
// KNN query - 1:1 mapping between search rows and embeddings
|
|
195
212
|
// Content is pre-chunked at ingest time
|
|
213
|
+
// source/topic partition columns enable filtered KNN (filter BEFORE search)
|
|
196
214
|
let sql: string;
|
|
197
215
|
const params: (Uint8Array | string | number)[] = [queryBlob];
|
|
198
216
|
|
|
217
|
+
// Build KNN query with optional partition filters
|
|
218
|
+
const conditions = ["e.embedding MATCH ?", "k = ?"];
|
|
219
|
+
params.push(limit);
|
|
220
|
+
|
|
199
221
|
if (options.source) {
|
|
200
|
-
|
|
201
|
-
// This filters BEFORE KNN, not after — critical for domain-specific search
|
|
202
|
-
sql = `
|
|
203
|
-
SELECT
|
|
204
|
-
s.source,
|
|
205
|
-
s.title,
|
|
206
|
-
s.content,
|
|
207
|
-
s.metadata,
|
|
208
|
-
e.distance
|
|
209
|
-
FROM embeddings e
|
|
210
|
-
JOIN search s ON e.doc_id = s.rowid
|
|
211
|
-
WHERE e.embedding MATCH ?
|
|
212
|
-
AND k = ?
|
|
213
|
-
AND e.source = ?
|
|
214
|
-
ORDER BY e.distance
|
|
215
|
-
LIMIT ?
|
|
216
|
-
`;
|
|
217
|
-
params.push(limit);
|
|
222
|
+
conditions.push("e.source = ?");
|
|
218
223
|
params.push(options.source);
|
|
219
|
-
params.push(limit);
|
|
220
|
-
} else {
|
|
221
|
-
sql = `
|
|
222
|
-
SELECT
|
|
223
|
-
s.source,
|
|
224
|
-
s.title,
|
|
225
|
-
s.content,
|
|
226
|
-
s.metadata,
|
|
227
|
-
e.distance
|
|
228
|
-
FROM embeddings e
|
|
229
|
-
JOIN search s ON e.doc_id = s.rowid
|
|
230
|
-
WHERE e.embedding MATCH ?
|
|
231
|
-
AND k = ?
|
|
232
|
-
ORDER BY e.distance
|
|
233
|
-
LIMIT ?
|
|
234
|
-
`;
|
|
235
|
-
params.push(limit);
|
|
236
|
-
params.push(limit);
|
|
237
224
|
}
|
|
238
225
|
|
|
239
|
-
const stmt = db.prepare(sql);
|
|
240
|
-
const results = stmt.all(...params) as SemanticResult[];
|
|
241
|
-
|
|
242
|
-
// Post-filter by project if specified
|
|
243
|
-
// KNN WHERE clause doesn't support json_extract on joined metadata,
|
|
244
|
-
// so we filter after the query returns
|
|
245
226
|
if (options.project) {
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
if (!field) return false;
|
|
249
|
-
|
|
250
|
-
try {
|
|
251
|
-
const metadata = JSON.parse(result.metadata);
|
|
252
|
-
return metadata[field] === options.project;
|
|
253
|
-
} catch {
|
|
254
|
-
// Skip results with malformed metadata
|
|
255
|
-
return false;
|
|
256
|
-
}
|
|
257
|
-
});
|
|
227
|
+
conditions.push("e.topic = ?");
|
|
228
|
+
params.push(options.project);
|
|
258
229
|
}
|
|
259
230
|
|
|
231
|
+
sql = `
|
|
232
|
+
SELECT
|
|
233
|
+
s.source,
|
|
234
|
+
s.title,
|
|
235
|
+
s.content,
|
|
236
|
+
s.metadata,
|
|
237
|
+
e.distance
|
|
238
|
+
FROM embeddings e
|
|
239
|
+
JOIN search s ON e.doc_id = s.rowid
|
|
240
|
+
WHERE ${conditions.join("\n AND ")}
|
|
241
|
+
ORDER BY e.distance
|
|
242
|
+
LIMIT ?
|
|
243
|
+
`;
|
|
244
|
+
params.push(limit);
|
|
245
|
+
|
|
246
|
+
const stmt = db.prepare(sql);
|
|
247
|
+
const results = stmt.all(...params) as SemanticResult[];
|
|
248
|
+
|
|
260
249
|
return results;
|
|
261
250
|
} finally {
|
|
262
251
|
db.close();
|
|
@@ -349,3 +338,6 @@ export function formatBriefSearch(results: SemanticResult[]): string {
|
|
|
349
338
|
|
|
350
339
|
return sections.join("\n\n");
|
|
351
340
|
}
|
|
341
|
+
|
|
342
|
+
// Export constants and helpers for realtime.ts
|
|
343
|
+
export { MODEL_NAME, EMBEDDING_DIM, serializeEmbedding, getDatabasePath };
|