@voidwire/lore 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.ts +57 -12
- package/index.ts +10 -0
- package/lib/semantic.ts +278 -0
- package/package.json +5 -5
package/cli.ts
CHANGED
|
@@ -29,6 +29,9 @@ import {
|
|
|
29
29
|
captureTask,
|
|
30
30
|
captureKnowledge,
|
|
31
31
|
captureNote,
|
|
32
|
+
semanticSearch,
|
|
33
|
+
isOllamaAvailable,
|
|
34
|
+
hasEmbeddings,
|
|
32
35
|
DOMAINS,
|
|
33
36
|
type SearchResult,
|
|
34
37
|
type ListResult,
|
|
@@ -73,20 +76,27 @@ function parseArgs(args: string[]): Map<string, string> {
|
|
|
73
76
|
return parsed;
|
|
74
77
|
}
|
|
75
78
|
|
|
79
|
+
// Boolean flags that don't take values
|
|
80
|
+
const BOOLEAN_FLAGS = new Set(["help", "sources", "domains", "exact"]);
|
|
81
|
+
|
|
76
82
|
function getPositionalArgs(args: string[]): string[] {
|
|
77
83
|
const result: string[] = [];
|
|
78
|
-
let
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
skipNext = false;
|
|
82
|
-
continue;
|
|
83
|
-
}
|
|
84
|
+
let i = 0;
|
|
85
|
+
while (i < args.length) {
|
|
86
|
+
const arg = args[i];
|
|
84
87
|
if (arg.startsWith("--")) {
|
|
85
|
-
|
|
86
|
-
|
|
88
|
+
const flag = arg.slice(2).split("=")[0]; // Handle --flag=value format
|
|
89
|
+
if (BOOLEAN_FLAGS.has(flag) || arg.includes("=")) {
|
|
90
|
+
i += 1; // Boolean flag or --flag=value, skip only the flag
|
|
91
|
+
} else if (i + 1 < args.length && !args[i + 1].startsWith("--")) {
|
|
92
|
+
i += 2; // Flag with separate value, skip both
|
|
93
|
+
} else {
|
|
94
|
+
i += 1; // Flag at end or followed by another flag
|
|
95
|
+
}
|
|
87
96
|
continue;
|
|
88
97
|
}
|
|
89
98
|
result.push(arg);
|
|
99
|
+
i++;
|
|
90
100
|
}
|
|
91
101
|
return result;
|
|
92
102
|
}
|
|
@@ -127,13 +137,14 @@ function fail(error: string, code: number = 1): never {
|
|
|
127
137
|
// Search Command
|
|
128
138
|
// ============================================================================
|
|
129
139
|
|
|
130
|
-
function handleSearch(args: string[]): void {
|
|
140
|
+
async function handleSearch(args: string[]): Promise<void> {
|
|
131
141
|
if (hasFlag(args, "help")) {
|
|
132
142
|
showSearchHelp();
|
|
133
143
|
}
|
|
134
144
|
|
|
135
145
|
const parsed = parseArgs(args);
|
|
136
146
|
const positional = getPositionalArgs(args);
|
|
147
|
+
const exact = hasFlag(args, "exact");
|
|
137
148
|
|
|
138
149
|
// Handle --sources flag
|
|
139
150
|
if (hasFlag(args, "sources")) {
|
|
@@ -213,20 +224,51 @@ function handleSearch(args: string[]): void {
|
|
|
213
224
|
return;
|
|
214
225
|
}
|
|
215
226
|
|
|
227
|
+
// FTS5 path (explicit --exact only)
|
|
228
|
+
if (exact) {
|
|
229
|
+
try {
|
|
230
|
+
const results = search(query, { source, limit, since });
|
|
231
|
+
output({
|
|
232
|
+
success: true,
|
|
233
|
+
results,
|
|
234
|
+
count: results.length,
|
|
235
|
+
mode: "exact",
|
|
236
|
+
});
|
|
237
|
+
console.error(
|
|
238
|
+
`✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (exact)`,
|
|
239
|
+
);
|
|
240
|
+
process.exit(0);
|
|
241
|
+
} catch (error) {
|
|
242
|
+
const message = error instanceof Error ? error.message : "Unknown error";
|
|
243
|
+
fail(message, 2);
|
|
244
|
+
}
|
|
245
|
+
return;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Semantic path (default) - fail if unavailable
|
|
249
|
+
if (!hasEmbeddings()) {
|
|
250
|
+
fail("No embeddings found. Run lore-embed-all first.", 2);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
if (!(await isOllamaAvailable())) {
|
|
254
|
+
fail("Ollama not available. Start Ollama or check SQLITE_VEC_PATH.", 2);
|
|
255
|
+
}
|
|
256
|
+
|
|
216
257
|
try {
|
|
217
|
-
const results =
|
|
258
|
+
const results = await semanticSearch(query, { source, limit });
|
|
218
259
|
output({
|
|
219
260
|
success: true,
|
|
220
261
|
results,
|
|
221
262
|
count: results.length,
|
|
263
|
+
mode: "semantic",
|
|
222
264
|
});
|
|
223
265
|
console.error(
|
|
224
|
-
`✅ ${results.length} result${results.length !== 1 ? "s" : ""} found`,
|
|
266
|
+
`✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (semantic)`,
|
|
225
267
|
);
|
|
226
268
|
process.exit(0);
|
|
227
269
|
} catch (error) {
|
|
228
270
|
const message = error instanceof Error ? error.message : "Unknown error";
|
|
229
|
-
fail(message
|
|
271
|
+
fail(`Semantic search failed: ${message}`, 2);
|
|
230
272
|
}
|
|
231
273
|
}
|
|
232
274
|
|
|
@@ -445,6 +487,7 @@ Usage:
|
|
|
445
487
|
lore capture task|knowledge|note Capture knowledge
|
|
446
488
|
|
|
447
489
|
Search Options:
|
|
490
|
+
--exact Use FTS5 text search (bypasses semantic search)
|
|
448
491
|
--limit <n> Maximum results (default: 20)
|
|
449
492
|
--since <date> Filter by date (today, yesterday, this-week, YYYY-MM-DD)
|
|
450
493
|
--sources List indexed sources with counts
|
|
@@ -495,6 +538,7 @@ Usage:
|
|
|
495
538
|
lore search --sources List indexed sources
|
|
496
539
|
|
|
497
540
|
Options:
|
|
541
|
+
--exact Use FTS5 text search (bypasses semantic search)
|
|
498
542
|
--limit <n> Maximum results (default: 20)
|
|
499
543
|
--since <date> Filter by date (today, yesterday, this-week, YYYY-MM-DD)
|
|
500
544
|
--sources List indexed sources with counts
|
|
@@ -523,6 +567,7 @@ Examples:
|
|
|
523
567
|
lore search "authentication"
|
|
524
568
|
lore search blogs "typescript patterns"
|
|
525
569
|
lore search commits --since this-week "refactor"
|
|
570
|
+
lore search --exact "def process_data"
|
|
526
571
|
lore search prismis "kubernetes security"
|
|
527
572
|
lore search atuin "docker build"
|
|
528
573
|
`);
|
package/index.ts
CHANGED
|
@@ -53,3 +53,13 @@ export {
|
|
|
53
53
|
type NoteInput,
|
|
54
54
|
type CaptureEvent,
|
|
55
55
|
} from "./lib/capture";
|
|
56
|
+
|
|
57
|
+
// Semantic search
|
|
58
|
+
export {
|
|
59
|
+
semanticSearch,
|
|
60
|
+
embedQuery,
|
|
61
|
+
isOllamaAvailable,
|
|
62
|
+
hasEmbeddings,
|
|
63
|
+
type SemanticResult,
|
|
64
|
+
type SemanticSearchOptions,
|
|
65
|
+
} from "./lib/semantic";
|
package/lib/semantic.ts
ADDED
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/semantic.ts - Semantic search via Ollama embeddings
|
|
3
|
+
*
|
|
4
|
+
* Query embedding and KNN search against sqlite-vec virtual table.
|
|
5
|
+
* Uses Bun's built-in SQLite with sqlite-vec extension.
|
|
6
|
+
*
|
|
7
|
+
* Note: macOS ships Apple's SQLite which disables extension loading.
|
|
8
|
+
* We use Homebrew's SQLite via setCustomSQLite() to enable sqlite-vec.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { Database } from "bun:sqlite";
|
|
12
|
+
import { homedir } from "os";
|
|
13
|
+
import { existsSync, readFileSync } from "fs";
|
|
14
|
+
|
|
15
|
+
// Use Homebrew SQLite on macOS to enable extension loading
|
|
16
|
+
// Must be called before any Database instances are created
|
|
17
|
+
const HOMEBREW_SQLITE = "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib";
|
|
18
|
+
if (existsSync(HOMEBREW_SQLITE)) {
|
|
19
|
+
Database.setCustomSQLite(HOMEBREW_SQLITE);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface SemanticResult {
|
|
23
|
+
source: string;
|
|
24
|
+
title: string;
|
|
25
|
+
content: string;
|
|
26
|
+
metadata: string;
|
|
27
|
+
distance: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface SemanticSearchOptions {
|
|
31
|
+
source?: string;
|
|
32
|
+
limit?: number;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
interface EmbeddingConfig {
|
|
36
|
+
endpoint: string;
|
|
37
|
+
model: string;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const DEFAULT_CONFIG: EmbeddingConfig = {
|
|
41
|
+
endpoint: "http://localhost:11434",
|
|
42
|
+
model: "nomic-embed-text",
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
function getDatabasePath(): string {
|
|
46
|
+
return `${homedir()}/.local/share/lore/lore.db`;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function getConfigPath(): string {
|
|
50
|
+
return `${homedir()}/.config/lore/config.toml`;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Load embedding config from config.toml
|
|
55
|
+
* Falls back to [llm].api_base if [embedding].endpoint not set
|
|
56
|
+
*/
|
|
57
|
+
function loadEmbeddingConfig(): EmbeddingConfig {
|
|
58
|
+
const configPath = getConfigPath();
|
|
59
|
+
|
|
60
|
+
if (!existsSync(configPath)) {
|
|
61
|
+
return DEFAULT_CONFIG;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
try {
|
|
65
|
+
const content = readFileSync(configPath, "utf-8");
|
|
66
|
+
|
|
67
|
+
// Extract [embedding].endpoint first
|
|
68
|
+
const endpointMatch = content.match(
|
|
69
|
+
/\[embedding\][^[]*endpoint\s*=\s*"([^"]+)"/s,
|
|
70
|
+
);
|
|
71
|
+
if (endpointMatch) {
|
|
72
|
+
const modelMatch = content.match(
|
|
73
|
+
/\[embedding\][^[]*model\s*=\s*"([^"]+)"/s,
|
|
74
|
+
);
|
|
75
|
+
return {
|
|
76
|
+
endpoint: endpointMatch[1],
|
|
77
|
+
model: modelMatch?.[1] ?? DEFAULT_CONFIG.model,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Fall back to [llm].api_base
|
|
82
|
+
const apiBaseMatch = content.match(/\[llm\][^[]*api_base\s*=\s*"([^"]+)"/s);
|
|
83
|
+
if (apiBaseMatch) {
|
|
84
|
+
const modelMatch = content.match(
|
|
85
|
+
/\[embedding\][^[]*model\s*=\s*"([^"]+)"/s,
|
|
86
|
+
);
|
|
87
|
+
return {
|
|
88
|
+
endpoint: apiBaseMatch[1],
|
|
89
|
+
model: modelMatch?.[1] ?? DEFAULT_CONFIG.model,
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return DEFAULT_CONFIG;
|
|
94
|
+
} catch {
|
|
95
|
+
return DEFAULT_CONFIG;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Check if Ollama is available at configured endpoint
|
|
101
|
+
*/
|
|
102
|
+
export async function isOllamaAvailable(): Promise<boolean> {
|
|
103
|
+
const config = loadEmbeddingConfig();
|
|
104
|
+
try {
|
|
105
|
+
const controller = new AbortController();
|
|
106
|
+
const timeout = setTimeout(() => controller.abort(), 2000);
|
|
107
|
+
|
|
108
|
+
const response = await fetch(`${config.endpoint}/api/tags`, {
|
|
109
|
+
method: "GET",
|
|
110
|
+
signal: controller.signal,
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
clearTimeout(timeout);
|
|
114
|
+
return response.ok;
|
|
115
|
+
} catch {
|
|
116
|
+
return false;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Embed a query string using Ollama
|
|
122
|
+
* @returns 768-dimensional embedding vector
|
|
123
|
+
*/
|
|
124
|
+
export async function embedQuery(query: string): Promise<number[]> {
|
|
125
|
+
const config = loadEmbeddingConfig();
|
|
126
|
+
const url = `${config.endpoint}/api/embeddings`;
|
|
127
|
+
|
|
128
|
+
const response = await fetch(url, {
|
|
129
|
+
method: "POST",
|
|
130
|
+
headers: { "Content-Type": "application/json" },
|
|
131
|
+
body: JSON.stringify({
|
|
132
|
+
model: config.model,
|
|
133
|
+
prompt: query,
|
|
134
|
+
}),
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
if (!response.ok) {
|
|
138
|
+
throw new Error(
|
|
139
|
+
`Ollama API error: ${response.status} ${response.statusText}`,
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const result = (await response.json()) as { embedding?: number[] };
|
|
144
|
+
const embedding = result.embedding;
|
|
145
|
+
|
|
146
|
+
if (!Array.isArray(embedding) || embedding.length !== 768) {
|
|
147
|
+
throw new Error(
|
|
148
|
+
`Invalid embedding: expected 768 dims, got ${embedding?.length ?? 0}`,
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return embedding;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Check if embeddings table has any data
|
|
157
|
+
*/
|
|
158
|
+
export function hasEmbeddings(): boolean {
|
|
159
|
+
const dbPath = getDatabasePath();
|
|
160
|
+
|
|
161
|
+
if (!existsSync(dbPath)) {
|
|
162
|
+
return false;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const db = new Database(dbPath, { readonly: true });
|
|
166
|
+
|
|
167
|
+
try {
|
|
168
|
+
// Load sqlite-vec extension
|
|
169
|
+
const vecPath = process.env.SQLITE_VEC_PATH;
|
|
170
|
+
if (!vecPath) {
|
|
171
|
+
return false;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
db.loadExtension(vecPath);
|
|
175
|
+
|
|
176
|
+
const stmt = db.prepare("SELECT COUNT(*) as count FROM embeddings");
|
|
177
|
+
const result = stmt.get() as { count: number };
|
|
178
|
+
return result.count > 0;
|
|
179
|
+
} catch {
|
|
180
|
+
return false;
|
|
181
|
+
} finally {
|
|
182
|
+
db.close();
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Serialize embedding to blob format for sqlite-vec
|
|
188
|
+
*/
|
|
189
|
+
function serializeEmbedding(embedding: number[]): Uint8Array {
|
|
190
|
+
const buffer = new Float32Array(embedding);
|
|
191
|
+
return new Uint8Array(buffer.buffer);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Perform semantic search using KNN against embeddings table
|
|
196
|
+
*/
|
|
197
|
+
export async function semanticSearch(
|
|
198
|
+
query: string,
|
|
199
|
+
options: SemanticSearchOptions = {},
|
|
200
|
+
): Promise<SemanticResult[]> {
|
|
201
|
+
const dbPath = getDatabasePath();
|
|
202
|
+
|
|
203
|
+
if (!existsSync(dbPath)) {
|
|
204
|
+
throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Get query embedding
|
|
208
|
+
const queryEmbedding = await embedQuery(query);
|
|
209
|
+
const queryBlob = serializeEmbedding(queryEmbedding);
|
|
210
|
+
|
|
211
|
+
const db = new Database(dbPath, { readonly: true });
|
|
212
|
+
|
|
213
|
+
try {
|
|
214
|
+
// Load sqlite-vec extension
|
|
215
|
+
const vecPath = process.env.SQLITE_VEC_PATH;
|
|
216
|
+
if (!vecPath) {
|
|
217
|
+
throw new Error(
|
|
218
|
+
'SQLITE_VEC_PATH not set. Get path with: python3 -c "import sqlite_vec; print(sqlite_vec.loadable_path())"',
|
|
219
|
+
);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
db.loadExtension(vecPath);
|
|
223
|
+
|
|
224
|
+
const limit = options.limit ?? 20;
|
|
225
|
+
|
|
226
|
+
// KNN query with join to search table
|
|
227
|
+
// Group by doc_id to return best chunk per document
|
|
228
|
+
let sql: string;
|
|
229
|
+
const params: (Uint8Array | string | number)[] = [queryBlob];
|
|
230
|
+
|
|
231
|
+
if (options.source) {
|
|
232
|
+
sql = `
|
|
233
|
+
SELECT
|
|
234
|
+
s.source,
|
|
235
|
+
s.title,
|
|
236
|
+
s.content,
|
|
237
|
+
s.metadata,
|
|
238
|
+
MIN(e.distance) as distance
|
|
239
|
+
FROM embeddings e
|
|
240
|
+
JOIN search s ON e.doc_id = s.rowid
|
|
241
|
+
WHERE e.embedding MATCH ?
|
|
242
|
+
AND k = ?
|
|
243
|
+
AND s.source = ?
|
|
244
|
+
GROUP BY s.rowid
|
|
245
|
+
ORDER BY distance
|
|
246
|
+
LIMIT ?
|
|
247
|
+
`;
|
|
248
|
+
params.push(limit * 3); // Fetch more for grouping
|
|
249
|
+
params.push(options.source);
|
|
250
|
+
params.push(limit);
|
|
251
|
+
} else {
|
|
252
|
+
sql = `
|
|
253
|
+
SELECT
|
|
254
|
+
s.source,
|
|
255
|
+
s.title,
|
|
256
|
+
s.content,
|
|
257
|
+
s.metadata,
|
|
258
|
+
MIN(e.distance) as distance
|
|
259
|
+
FROM embeddings e
|
|
260
|
+
JOIN search s ON e.doc_id = s.rowid
|
|
261
|
+
WHERE e.embedding MATCH ?
|
|
262
|
+
AND k = ?
|
|
263
|
+
GROUP BY s.rowid
|
|
264
|
+
ORDER BY distance
|
|
265
|
+
LIMIT ?
|
|
266
|
+
`;
|
|
267
|
+
params.push(limit * 3); // Fetch more for grouping
|
|
268
|
+
params.push(limit);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const stmt = db.prepare(sql);
|
|
272
|
+
const results = stmt.all(...params) as SemanticResult[];
|
|
273
|
+
|
|
274
|
+
return results;
|
|
275
|
+
} finally {
|
|
276
|
+
db.close();
|
|
277
|
+
}
|
|
278
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@voidwire/lore",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.5",
|
|
4
4
|
"description": "Unified knowledge CLI - Search, list, and capture your indexed knowledge",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
|
@@ -18,9 +18,6 @@
|
|
|
18
18
|
"README.md",
|
|
19
19
|
"LICENSE"
|
|
20
20
|
],
|
|
21
|
-
"scripts": {
|
|
22
|
-
"test": "bun test"
|
|
23
|
-
},
|
|
24
21
|
"keywords": [
|
|
25
22
|
"knowledge",
|
|
26
23
|
"search",
|
|
@@ -46,5 +43,8 @@
|
|
|
46
43
|
},
|
|
47
44
|
"devDependencies": {
|
|
48
45
|
"bun-types": "1.3.5"
|
|
46
|
+
},
|
|
47
|
+
"scripts": {
|
|
48
|
+
"test": "bun test"
|
|
49
49
|
}
|
|
50
|
-
}
|
|
50
|
+
}
|