@voidwire/lore 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -0
- package/cli.ts +85 -3
- package/index.ts +17 -0
- package/lib/atuin.ts +160 -0
- package/lib/semantic.ts +278 -0
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -35,6 +35,20 @@ lore capture task|knowledge|note # Capture knowledge
|
|
|
35
35
|
- `--since <date>` — Filter by date (today, yesterday, this-week, YYYY-MM-DD)
|
|
36
36
|
- `--sources` — List indexed sources with counts
|
|
37
37
|
|
|
38
|
+
### Passthrough Sources
|
|
39
|
+
|
|
40
|
+
Some sources query external services rather than the local index:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
lore search prismis "kubernetes security" # Semantic search via prismis
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
| Source | Description | Requires |
|
|
47
|
+
|--------|-------------|----------|
|
|
48
|
+
| `prismis` | Semantic search across saved articles | prismis-daemon running |
|
|
49
|
+
|
|
50
|
+
Passthrough sources appear in `lore search --sources` with `type: "passthrough"`.
|
|
51
|
+
|
|
38
52
|
### List Options
|
|
39
53
|
|
|
40
54
|
- `--limit <n>` — Maximum entries
|
package/cli.ts
CHANGED
|
@@ -22,12 +22,16 @@
|
|
|
22
22
|
import {
|
|
23
23
|
search,
|
|
24
24
|
searchPrismis,
|
|
25
|
+
searchAtuin,
|
|
25
26
|
listSources,
|
|
26
27
|
list,
|
|
27
28
|
listDomains,
|
|
28
29
|
captureTask,
|
|
29
30
|
captureKnowledge,
|
|
30
31
|
captureNote,
|
|
32
|
+
semanticSearch,
|
|
33
|
+
isOllamaAvailable,
|
|
34
|
+
hasEmbeddings,
|
|
31
35
|
DOMAINS,
|
|
32
36
|
type SearchResult,
|
|
33
37
|
type ListResult,
|
|
@@ -72,8 +76,29 @@ function parseArgs(args: string[]): Map<string, string> {
|
|
|
72
76
|
return parsed;
|
|
73
77
|
}
|
|
74
78
|
|
|
79
|
+
// Boolean flags that don't take values
|
|
80
|
+
const BOOLEAN_FLAGS = new Set(["help", "sources", "domains", "exact"]);
|
|
81
|
+
|
|
75
82
|
function getPositionalArgs(args: string[]): string[] {
|
|
76
|
-
|
|
83
|
+
const result: string[] = [];
|
|
84
|
+
let i = 0;
|
|
85
|
+
while (i < args.length) {
|
|
86
|
+
const arg = args[i];
|
|
87
|
+
if (arg.startsWith("--")) {
|
|
88
|
+
const flag = arg.slice(2).split("=")[0]; // Handle --flag=value format
|
|
89
|
+
if (BOOLEAN_FLAGS.has(flag) || arg.includes("=")) {
|
|
90
|
+
i += 1; // Boolean flag or --flag=value, skip only the flag
|
|
91
|
+
} else if (i + 1 < args.length && !args[i + 1].startsWith("--")) {
|
|
92
|
+
i += 2; // Flag with separate value, skip both
|
|
93
|
+
} else {
|
|
94
|
+
i += 1; // Flag at end or followed by another flag
|
|
95
|
+
}
|
|
96
|
+
continue;
|
|
97
|
+
}
|
|
98
|
+
result.push(arg);
|
|
99
|
+
i++;
|
|
100
|
+
}
|
|
101
|
+
return result;
|
|
77
102
|
}
|
|
78
103
|
|
|
79
104
|
function hasFlag(args: string[], flag: string): boolean {
|
|
@@ -112,19 +137,21 @@ function fail(error: string, code: number = 1): never {
|
|
|
112
137
|
// Search Command
|
|
113
138
|
// ============================================================================
|
|
114
139
|
|
|
115
|
-
function handleSearch(args: string[]): void {
|
|
140
|
+
async function handleSearch(args: string[]): Promise<void> {
|
|
116
141
|
if (hasFlag(args, "help")) {
|
|
117
142
|
showSearchHelp();
|
|
118
143
|
}
|
|
119
144
|
|
|
120
145
|
const parsed = parseArgs(args);
|
|
121
146
|
const positional = getPositionalArgs(args);
|
|
147
|
+
const exact = hasFlag(args, "exact");
|
|
122
148
|
|
|
123
149
|
// Handle --sources flag
|
|
124
150
|
if (hasFlag(args, "sources")) {
|
|
125
151
|
const indexed = listSources();
|
|
126
152
|
const passthrough = [
|
|
127
153
|
{ source: "prismis", count: null, type: "passthrough" },
|
|
154
|
+
{ source: "atuin", count: null, type: "passthrough" },
|
|
128
155
|
];
|
|
129
156
|
const sources = [
|
|
130
157
|
...indexed.map((s) => ({ ...s, type: "indexed" })),
|
|
@@ -177,15 +204,63 @@ function handleSearch(args: string[]): void {
|
|
|
177
204
|
return;
|
|
178
205
|
}
|
|
179
206
|
|
|
207
|
+
// Handle atuin passthrough
|
|
208
|
+
if (source === "atuin") {
|
|
209
|
+
try {
|
|
210
|
+
const results = searchAtuin(query, { limit });
|
|
211
|
+
output({
|
|
212
|
+
success: true,
|
|
213
|
+
results,
|
|
214
|
+
count: results.length,
|
|
215
|
+
});
|
|
216
|
+
console.error(
|
|
217
|
+
`✅ ${results.length} result${results.length !== 1 ? "s" : ""} found`,
|
|
218
|
+
);
|
|
219
|
+
process.exit(0);
|
|
220
|
+
} catch (error) {
|
|
221
|
+
const message = error instanceof Error ? error.message : "Unknown error";
|
|
222
|
+
fail(message, 2);
|
|
223
|
+
}
|
|
224
|
+
return;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Route semantic vs FTS5 based on --exact flag and availability
|
|
228
|
+
if (!exact) {
|
|
229
|
+
try {
|
|
230
|
+
const canUseSemantic = hasEmbeddings() && (await isOllamaAvailable());
|
|
231
|
+
if (canUseSemantic) {
|
|
232
|
+
const results = await semanticSearch(query, { source, limit });
|
|
233
|
+
output({
|
|
234
|
+
success: true,
|
|
235
|
+
results,
|
|
236
|
+
count: results.length,
|
|
237
|
+
mode: "semantic",
|
|
238
|
+
});
|
|
239
|
+
console.error(
|
|
240
|
+
`✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (semantic)`,
|
|
241
|
+
);
|
|
242
|
+
process.exit(0);
|
|
243
|
+
}
|
|
244
|
+
// Fall through to FTS5 if semantic not available
|
|
245
|
+
} catch (error) {
|
|
246
|
+
// Semantic search failed, fall back to FTS5
|
|
247
|
+
console.error(
|
|
248
|
+
`⚠️ Semantic search unavailable: ${error instanceof Error ? error.message : "Unknown error"}`,
|
|
249
|
+
);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// FTS5 path (default fallback or explicit --exact)
|
|
180
254
|
try {
|
|
181
255
|
const results = search(query, { source, limit, since });
|
|
182
256
|
output({
|
|
183
257
|
success: true,
|
|
184
258
|
results,
|
|
185
259
|
count: results.length,
|
|
260
|
+
mode: exact ? "exact" : "fts5",
|
|
186
261
|
});
|
|
187
262
|
console.error(
|
|
188
|
-
`✅ ${results.length} result${results.length !== 1 ? "s" : ""} found`,
|
|
263
|
+
`✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (${exact ? "exact" : "fts5"})`,
|
|
189
264
|
);
|
|
190
265
|
process.exit(0);
|
|
191
266
|
} catch (error) {
|
|
@@ -409,12 +484,14 @@ Usage:
|
|
|
409
484
|
lore capture task|knowledge|note Capture knowledge
|
|
410
485
|
|
|
411
486
|
Search Options:
|
|
487
|
+
--exact Use FTS5 text search (bypasses semantic search)
|
|
412
488
|
--limit <n> Maximum results (default: 20)
|
|
413
489
|
--since <date> Filter by date (today, yesterday, this-week, YYYY-MM-DD)
|
|
414
490
|
--sources List indexed sources with counts
|
|
415
491
|
|
|
416
492
|
Passthrough Sources:
|
|
417
493
|
prismis Semantic search via prismis daemon (requires prismis-daemon running)
|
|
494
|
+
atuin Shell history search (queries ~/.local/share/atuin/history.db directly)
|
|
418
495
|
|
|
419
496
|
List Options:
|
|
420
497
|
--limit <n> Maximum entries
|
|
@@ -458,6 +535,7 @@ Usage:
|
|
|
458
535
|
lore search --sources List indexed sources
|
|
459
536
|
|
|
460
537
|
Options:
|
|
538
|
+
--exact Use FTS5 text search (bypasses semantic search)
|
|
461
539
|
--limit <n> Maximum results (default: 20)
|
|
462
540
|
--since <date> Filter by date (today, yesterday, this-week, YYYY-MM-DD)
|
|
463
541
|
--sources List indexed sources with counts
|
|
@@ -479,12 +557,16 @@ Indexed Sources:
|
|
|
479
557
|
Passthrough Sources:
|
|
480
558
|
prismis Semantic search via prismis daemon
|
|
481
559
|
(requires prismis-daemon running)
|
|
560
|
+
atuin Shell history search
|
|
561
|
+
(queries ~/.local/share/atuin/history.db directly)
|
|
482
562
|
|
|
483
563
|
Examples:
|
|
484
564
|
lore search "authentication"
|
|
485
565
|
lore search blogs "typescript patterns"
|
|
486
566
|
lore search commits --since this-week "refactor"
|
|
567
|
+
lore search --exact "def process_data"
|
|
487
568
|
lore search prismis "kubernetes security"
|
|
569
|
+
lore search atuin "docker build"
|
|
488
570
|
`);
|
|
489
571
|
process.exit(0);
|
|
490
572
|
}
|
package/index.ts
CHANGED
|
@@ -34,6 +34,13 @@ export {
|
|
|
34
34
|
type PrismisSearchOptions,
|
|
35
35
|
} from "./lib/prismis";
|
|
36
36
|
|
|
37
|
+
// Atuin integration
|
|
38
|
+
export {
|
|
39
|
+
searchAtuin,
|
|
40
|
+
type AtuinSearchResult,
|
|
41
|
+
type AtuinSearchOptions,
|
|
42
|
+
} from "./lib/atuin";
|
|
43
|
+
|
|
37
44
|
// Capture
|
|
38
45
|
export {
|
|
39
46
|
captureKnowledge,
|
|
@@ -46,3 +53,13 @@ export {
|
|
|
46
53
|
type NoteInput,
|
|
47
54
|
type CaptureEvent,
|
|
48
55
|
} from "./lib/capture";
|
|
56
|
+
|
|
57
|
+
// Semantic search
|
|
58
|
+
export {
|
|
59
|
+
semanticSearch,
|
|
60
|
+
embedQuery,
|
|
61
|
+
isOllamaAvailable,
|
|
62
|
+
hasEmbeddings,
|
|
63
|
+
type SemanticResult,
|
|
64
|
+
type SemanticSearchOptions,
|
|
65
|
+
} from "./lib/semantic";
|
package/lib/atuin.ts
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Atuin shell history integration
|
|
3
|
+
*
|
|
4
|
+
* Queries Atuin SQLite database directly for shell command history.
|
|
5
|
+
* Filters sensitive commands containing passwords, tokens, secrets, API keys.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { existsSync } from "fs";
|
|
9
|
+
import { join } from "path";
|
|
10
|
+
import { Database } from "bun:sqlite";
|
|
11
|
+
|
|
12
|
+
export interface AtuinSearchResult {
|
|
13
|
+
source: string;
|
|
14
|
+
title: string;
|
|
15
|
+
content: string;
|
|
16
|
+
metadata: string;
|
|
17
|
+
rank: number;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface AtuinSearchOptions {
|
|
21
|
+
limit?: number;
|
|
22
|
+
cwd?: string;
|
|
23
|
+
exitCode?: number;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const ATUIN_DB_PATH = join(
|
|
27
|
+
process.env.HOME ?? "",
|
|
28
|
+
".local",
|
|
29
|
+
"share",
|
|
30
|
+
"atuin",
|
|
31
|
+
"history.db",
|
|
32
|
+
);
|
|
33
|
+
|
|
34
|
+
// Patterns that indicate sensitive data - exclude these commands
|
|
35
|
+
const SENSITIVE_PATTERNS = [
|
|
36
|
+
"%--password%",
|
|
37
|
+
"%--token%",
|
|
38
|
+
"%--secret%",
|
|
39
|
+
"%PASSWORD=%",
|
|
40
|
+
"%TOKEN=%",
|
|
41
|
+
"%SECRET=%",
|
|
42
|
+
"%API_KEY=%",
|
|
43
|
+
"%APIKEY=%",
|
|
44
|
+
"%_KEY=%",
|
|
45
|
+
"export %KEY%",
|
|
46
|
+
"export %TOKEN%",
|
|
47
|
+
"export %SECRET%",
|
|
48
|
+
"export %PASSWORD%",
|
|
49
|
+
"%X-API-Key:%",
|
|
50
|
+
"%Authorization:%",
|
|
51
|
+
"echo $%KEY%",
|
|
52
|
+
"echo $%TOKEN%",
|
|
53
|
+
"echo $%SECRET%",
|
|
54
|
+
];
|
|
55
|
+
|
|
56
|
+
interface AtuinRow {
|
|
57
|
+
command: string;
|
|
58
|
+
cwd: string;
|
|
59
|
+
exit: number;
|
|
60
|
+
duration: number;
|
|
61
|
+
timestamp: number;
|
|
62
|
+
hostname: string;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Search Atuin shell history
|
|
67
|
+
*/
|
|
68
|
+
export function searchAtuin(
|
|
69
|
+
query: string,
|
|
70
|
+
options: AtuinSearchOptions = {},
|
|
71
|
+
): AtuinSearchResult[] {
|
|
72
|
+
if (!existsSync(ATUIN_DB_PATH)) {
|
|
73
|
+
throw new Error(
|
|
74
|
+
`Atuin database not found at ${ATUIN_DB_PATH}. ` +
|
|
75
|
+
"Install Atuin: https://atuin.sh",
|
|
76
|
+
);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const db = new Database(ATUIN_DB_PATH, { readonly: true });
|
|
80
|
+
const limit = options.limit ?? 20;
|
|
81
|
+
|
|
82
|
+
try {
|
|
83
|
+
// Build WHERE clause with sensitive data filtering
|
|
84
|
+
const conditions = [
|
|
85
|
+
"deleted_at IS NULL",
|
|
86
|
+
"command LIKE ?",
|
|
87
|
+
...SENSITIVE_PATTERNS.map(() => "command NOT LIKE ?"),
|
|
88
|
+
];
|
|
89
|
+
|
|
90
|
+
if (options.cwd) {
|
|
91
|
+
conditions.push("cwd = ?");
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (options.exitCode !== undefined) {
|
|
95
|
+
conditions.push("exit = ?");
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const whereClause = conditions.join(" AND ");
|
|
99
|
+
|
|
100
|
+
const sql = `
|
|
101
|
+
SELECT command, cwd, exit, duration, timestamp, hostname
|
|
102
|
+
FROM history
|
|
103
|
+
WHERE ${whereClause}
|
|
104
|
+
ORDER BY timestamp DESC
|
|
105
|
+
LIMIT ?
|
|
106
|
+
`;
|
|
107
|
+
|
|
108
|
+
// Build parameters
|
|
109
|
+
const params: (string | number)[] = [`%${query}%`, ...SENSITIVE_PATTERNS];
|
|
110
|
+
|
|
111
|
+
if (options.cwd) {
|
|
112
|
+
params.push(options.cwd);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if (options.exitCode !== undefined) {
|
|
116
|
+
params.push(options.exitCode);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
params.push(limit);
|
|
120
|
+
|
|
121
|
+
const stmt = db.prepare(sql);
|
|
122
|
+
const rows = stmt.all(...params) as AtuinRow[];
|
|
123
|
+
|
|
124
|
+
return rows.map((row, index) => {
|
|
125
|
+
// Convert nanosecond timestamp to ISO date
|
|
126
|
+
const timestampSec = Math.floor(row.timestamp / 1_000_000_000);
|
|
127
|
+
const date = new Date(timestampSec * 1000);
|
|
128
|
+
const dateStr = date.toISOString().split("T")[0];
|
|
129
|
+
|
|
130
|
+
// Convert duration from nanoseconds to milliseconds
|
|
131
|
+
const durationMs = Math.floor(row.duration / 1_000_000);
|
|
132
|
+
|
|
133
|
+
// Build title: truncate command to 80 chars
|
|
134
|
+
const title =
|
|
135
|
+
row.command.length > 80
|
|
136
|
+
? `[shell] ${row.command.slice(0, 77)}...`
|
|
137
|
+
: `[shell] ${row.command}`;
|
|
138
|
+
|
|
139
|
+
// Normalize cwd
|
|
140
|
+
const cwd = row.cwd === "unknown" ? "" : row.cwd;
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
source: "atuin",
|
|
144
|
+
title,
|
|
145
|
+
content: row.command,
|
|
146
|
+
metadata: JSON.stringify({
|
|
147
|
+
command: row.command,
|
|
148
|
+
cwd,
|
|
149
|
+
exit_code: row.exit,
|
|
150
|
+
duration_ms: durationMs,
|
|
151
|
+
date: dateStr,
|
|
152
|
+
hostname: row.hostname,
|
|
153
|
+
}),
|
|
154
|
+
rank: -index, // Simple ranking by recency
|
|
155
|
+
};
|
|
156
|
+
});
|
|
157
|
+
} finally {
|
|
158
|
+
db.close();
|
|
159
|
+
}
|
|
160
|
+
}
|
package/lib/semantic.ts
ADDED
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/semantic.ts - Semantic search via Ollama embeddings
|
|
3
|
+
*
|
|
4
|
+
* Query embedding and KNN search against sqlite-vec virtual table.
|
|
5
|
+
* Uses Bun's built-in SQLite with sqlite-vec extension.
|
|
6
|
+
*
|
|
7
|
+
* Note: macOS ships Apple's SQLite which disables extension loading.
|
|
8
|
+
* We use Homebrew's SQLite via setCustomSQLite() to enable sqlite-vec.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { Database } from "bun:sqlite";
|
|
12
|
+
import { homedir } from "os";
|
|
13
|
+
import { existsSync, readFileSync } from "fs";
|
|
14
|
+
|
|
15
|
+
// Use Homebrew SQLite on macOS to enable extension loading
|
|
16
|
+
// Must be called before any Database instances are created
|
|
17
|
+
const HOMEBREW_SQLITE = "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib";
|
|
18
|
+
if (existsSync(HOMEBREW_SQLITE)) {
|
|
19
|
+
Database.setCustomSQLite(HOMEBREW_SQLITE);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface SemanticResult {
|
|
23
|
+
source: string;
|
|
24
|
+
title: string;
|
|
25
|
+
content: string;
|
|
26
|
+
metadata: string;
|
|
27
|
+
distance: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface SemanticSearchOptions {
|
|
31
|
+
source?: string;
|
|
32
|
+
limit?: number;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
interface EmbeddingConfig {
|
|
36
|
+
endpoint: string;
|
|
37
|
+
model: string;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const DEFAULT_CONFIG: EmbeddingConfig = {
|
|
41
|
+
endpoint: "http://localhost:11434",
|
|
42
|
+
model: "nomic-embed-text",
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
function getDatabasePath(): string {
|
|
46
|
+
return `${homedir()}/.local/share/lore/lore.db`;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function getConfigPath(): string {
|
|
50
|
+
return `${homedir()}/.config/lore/config.toml`;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Load embedding config from config.toml
|
|
55
|
+
* Falls back to [llm].api_base if [embedding].endpoint not set
|
|
56
|
+
*/
|
|
57
|
+
function loadEmbeddingConfig(): EmbeddingConfig {
|
|
58
|
+
const configPath = getConfigPath();
|
|
59
|
+
|
|
60
|
+
if (!existsSync(configPath)) {
|
|
61
|
+
return DEFAULT_CONFIG;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
try {
|
|
65
|
+
const content = readFileSync(configPath, "utf-8");
|
|
66
|
+
|
|
67
|
+
// Extract [embedding].endpoint first
|
|
68
|
+
const endpointMatch = content.match(
|
|
69
|
+
/\[embedding\][^[]*endpoint\s*=\s*"([^"]+)"/s,
|
|
70
|
+
);
|
|
71
|
+
if (endpointMatch) {
|
|
72
|
+
const modelMatch = content.match(
|
|
73
|
+
/\[embedding\][^[]*model\s*=\s*"([^"]+)"/s,
|
|
74
|
+
);
|
|
75
|
+
return {
|
|
76
|
+
endpoint: endpointMatch[1],
|
|
77
|
+
model: modelMatch?.[1] ?? DEFAULT_CONFIG.model,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Fall back to [llm].api_base
|
|
82
|
+
const apiBaseMatch = content.match(/\[llm\][^[]*api_base\s*=\s*"([^"]+)"/s);
|
|
83
|
+
if (apiBaseMatch) {
|
|
84
|
+
const modelMatch = content.match(
|
|
85
|
+
/\[embedding\][^[]*model\s*=\s*"([^"]+)"/s,
|
|
86
|
+
);
|
|
87
|
+
return {
|
|
88
|
+
endpoint: apiBaseMatch[1],
|
|
89
|
+
model: modelMatch?.[1] ?? DEFAULT_CONFIG.model,
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return DEFAULT_CONFIG;
|
|
94
|
+
} catch {
|
|
95
|
+
return DEFAULT_CONFIG;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Check if Ollama is available at configured endpoint
|
|
101
|
+
*/
|
|
102
|
+
export async function isOllamaAvailable(): Promise<boolean> {
|
|
103
|
+
const config = loadEmbeddingConfig();
|
|
104
|
+
try {
|
|
105
|
+
const controller = new AbortController();
|
|
106
|
+
const timeout = setTimeout(() => controller.abort(), 2000);
|
|
107
|
+
|
|
108
|
+
const response = await fetch(`${config.endpoint}/api/tags`, {
|
|
109
|
+
method: "GET",
|
|
110
|
+
signal: controller.signal,
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
clearTimeout(timeout);
|
|
114
|
+
return response.ok;
|
|
115
|
+
} catch {
|
|
116
|
+
return false;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Embed a query string using Ollama
|
|
122
|
+
* @returns 768-dimensional embedding vector
|
|
123
|
+
*/
|
|
124
|
+
export async function embedQuery(query: string): Promise<number[]> {
|
|
125
|
+
const config = loadEmbeddingConfig();
|
|
126
|
+
const url = `${config.endpoint}/api/embeddings`;
|
|
127
|
+
|
|
128
|
+
const response = await fetch(url, {
|
|
129
|
+
method: "POST",
|
|
130
|
+
headers: { "Content-Type": "application/json" },
|
|
131
|
+
body: JSON.stringify({
|
|
132
|
+
model: config.model,
|
|
133
|
+
prompt: query,
|
|
134
|
+
}),
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
if (!response.ok) {
|
|
138
|
+
throw new Error(
|
|
139
|
+
`Ollama API error: ${response.status} ${response.statusText}`,
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const result = (await response.json()) as { embedding?: number[] };
|
|
144
|
+
const embedding = result.embedding;
|
|
145
|
+
|
|
146
|
+
if (!Array.isArray(embedding) || embedding.length !== 768) {
|
|
147
|
+
throw new Error(
|
|
148
|
+
`Invalid embedding: expected 768 dims, got ${embedding?.length ?? 0}`,
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return embedding;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Check if embeddings table has any data
|
|
157
|
+
*/
|
|
158
|
+
export function hasEmbeddings(): boolean {
|
|
159
|
+
const dbPath = getDatabasePath();
|
|
160
|
+
|
|
161
|
+
if (!existsSync(dbPath)) {
|
|
162
|
+
return false;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const db = new Database(dbPath, { readonly: true });
|
|
166
|
+
|
|
167
|
+
try {
|
|
168
|
+
// Load sqlite-vec extension
|
|
169
|
+
const vecPath = process.env.SQLITE_VEC_PATH;
|
|
170
|
+
if (!vecPath) {
|
|
171
|
+
return false;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
db.loadExtension(vecPath);
|
|
175
|
+
|
|
176
|
+
const stmt = db.prepare("SELECT COUNT(*) as count FROM embeddings");
|
|
177
|
+
const result = stmt.get() as { count: number };
|
|
178
|
+
return result.count > 0;
|
|
179
|
+
} catch {
|
|
180
|
+
return false;
|
|
181
|
+
} finally {
|
|
182
|
+
db.close();
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Serialize embedding to blob format for sqlite-vec
|
|
188
|
+
*/
|
|
189
|
+
function serializeEmbedding(embedding: number[]): Uint8Array {
|
|
190
|
+
const buffer = new Float32Array(embedding);
|
|
191
|
+
return new Uint8Array(buffer.buffer);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Perform semantic search using KNN against embeddings table
|
|
196
|
+
*/
|
|
197
|
+
export async function semanticSearch(
|
|
198
|
+
query: string,
|
|
199
|
+
options: SemanticSearchOptions = {},
|
|
200
|
+
): Promise<SemanticResult[]> {
|
|
201
|
+
const dbPath = getDatabasePath();
|
|
202
|
+
|
|
203
|
+
if (!existsSync(dbPath)) {
|
|
204
|
+
throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Get query embedding
|
|
208
|
+
const queryEmbedding = await embedQuery(query);
|
|
209
|
+
const queryBlob = serializeEmbedding(queryEmbedding);
|
|
210
|
+
|
|
211
|
+
const db = new Database(dbPath, { readonly: true });
|
|
212
|
+
|
|
213
|
+
try {
|
|
214
|
+
// Load sqlite-vec extension
|
|
215
|
+
const vecPath = process.env.SQLITE_VEC_PATH;
|
|
216
|
+
if (!vecPath) {
|
|
217
|
+
throw new Error(
|
|
218
|
+
'SQLITE_VEC_PATH not set. Get path with: python3 -c "import sqlite_vec; print(sqlite_vec.loadable_path())"',
|
|
219
|
+
);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
db.loadExtension(vecPath);
|
|
223
|
+
|
|
224
|
+
const limit = options.limit ?? 20;
|
|
225
|
+
|
|
226
|
+
// KNN query with join to search table
|
|
227
|
+
// Group by doc_id to return best chunk per document
|
|
228
|
+
let sql: string;
|
|
229
|
+
const params: (Uint8Array | string | number)[] = [queryBlob];
|
|
230
|
+
|
|
231
|
+
if (options.source) {
|
|
232
|
+
sql = `
|
|
233
|
+
SELECT
|
|
234
|
+
s.source,
|
|
235
|
+
s.title,
|
|
236
|
+
s.content,
|
|
237
|
+
s.metadata,
|
|
238
|
+
MIN(e.distance) as distance
|
|
239
|
+
FROM embeddings e
|
|
240
|
+
JOIN search s ON e.doc_id = s.rowid
|
|
241
|
+
WHERE e.embedding MATCH ?
|
|
242
|
+
AND k = ?
|
|
243
|
+
AND s.source = ?
|
|
244
|
+
GROUP BY s.rowid
|
|
245
|
+
ORDER BY distance
|
|
246
|
+
LIMIT ?
|
|
247
|
+
`;
|
|
248
|
+
params.push(limit * 3); // Fetch more for grouping
|
|
249
|
+
params.push(options.source);
|
|
250
|
+
params.push(limit);
|
|
251
|
+
} else {
|
|
252
|
+
sql = `
|
|
253
|
+
SELECT
|
|
254
|
+
s.source,
|
|
255
|
+
s.title,
|
|
256
|
+
s.content,
|
|
257
|
+
s.metadata,
|
|
258
|
+
MIN(e.distance) as distance
|
|
259
|
+
FROM embeddings e
|
|
260
|
+
JOIN search s ON e.doc_id = s.rowid
|
|
261
|
+
WHERE e.embedding MATCH ?
|
|
262
|
+
AND k = ?
|
|
263
|
+
GROUP BY s.rowid
|
|
264
|
+
ORDER BY distance
|
|
265
|
+
LIMIT ?
|
|
266
|
+
`;
|
|
267
|
+
params.push(limit * 3); // Fetch more for grouping
|
|
268
|
+
params.push(limit);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const stmt = db.prepare(sql);
|
|
272
|
+
const results = stmt.all(...params) as SemanticResult[];
|
|
273
|
+
|
|
274
|
+
return results;
|
|
275
|
+
} finally {
|
|
276
|
+
db.close();
|
|
277
|
+
}
|
|
278
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@voidwire/lore",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.4",
|
|
4
4
|
"description": "Unified knowledge CLI - Search, list, and capture your indexed knowledge",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
|
@@ -42,7 +42,7 @@
|
|
|
42
42
|
"bun": ">=1.0.0"
|
|
43
43
|
},
|
|
44
44
|
"devDependencies": {
|
|
45
|
-
"bun-types": "
|
|
45
|
+
"bun-types": "1.3.5"
|
|
46
46
|
},
|
|
47
47
|
"scripts": {
|
|
48
48
|
"test": "bun test"
|