@voidwire/lore 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.ts +52 -10
- package/index.ts +10 -0
- package/lib/semantic.ts +278 -0
- package/package.json +5 -5
package/cli.ts
CHANGED
|
@@ -29,6 +29,9 @@ import {
|
|
|
29
29
|
captureTask,
|
|
30
30
|
captureKnowledge,
|
|
31
31
|
captureNote,
|
|
32
|
+
semanticSearch,
|
|
33
|
+
isOllamaAvailable,
|
|
34
|
+
hasEmbeddings,
|
|
32
35
|
DOMAINS,
|
|
33
36
|
type SearchResult,
|
|
34
37
|
type ListResult,
|
|
@@ -73,20 +76,27 @@ function parseArgs(args: string[]): Map<string, string> {
|
|
|
73
76
|
return parsed;
|
|
74
77
|
}
|
|
75
78
|
|
|
79
|
+
// Boolean flags that don't take values
|
|
80
|
+
const BOOLEAN_FLAGS = new Set(["help", "sources", "domains", "exact"]);
|
|
81
|
+
|
|
76
82
|
function getPositionalArgs(args: string[]): string[] {
|
|
77
83
|
const result: string[] = [];
|
|
78
|
-
let
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
skipNext = false;
|
|
82
|
-
continue;
|
|
83
|
-
}
|
|
84
|
+
let i = 0;
|
|
85
|
+
while (i < args.length) {
|
|
86
|
+
const arg = args[i];
|
|
84
87
|
if (arg.startsWith("--")) {
|
|
85
|
-
|
|
86
|
-
|
|
88
|
+
const flag = arg.slice(2).split("=")[0]; // Handle --flag=value format
|
|
89
|
+
if (BOOLEAN_FLAGS.has(flag) || arg.includes("=")) {
|
|
90
|
+
i += 1; // Boolean flag or --flag=value, skip only the flag
|
|
91
|
+
} else if (i + 1 < args.length && !args[i + 1].startsWith("--")) {
|
|
92
|
+
i += 2; // Flag with separate value, skip both
|
|
93
|
+
} else {
|
|
94
|
+
i += 1; // Flag at end or followed by another flag
|
|
95
|
+
}
|
|
87
96
|
continue;
|
|
88
97
|
}
|
|
89
98
|
result.push(arg);
|
|
99
|
+
i++;
|
|
90
100
|
}
|
|
91
101
|
return result;
|
|
92
102
|
}
|
|
@@ -127,13 +137,14 @@ function fail(error: string, code: number = 1): never {
|
|
|
127
137
|
// Search Command
|
|
128
138
|
// ============================================================================
|
|
129
139
|
|
|
130
|
-
function handleSearch(args: string[]): void {
|
|
140
|
+
async function handleSearch(args: string[]): Promise<void> {
|
|
131
141
|
if (hasFlag(args, "help")) {
|
|
132
142
|
showSearchHelp();
|
|
133
143
|
}
|
|
134
144
|
|
|
135
145
|
const parsed = parseArgs(args);
|
|
136
146
|
const positional = getPositionalArgs(args);
|
|
147
|
+
const exact = hasFlag(args, "exact");
|
|
137
148
|
|
|
138
149
|
// Handle --sources flag
|
|
139
150
|
if (hasFlag(args, "sources")) {
|
|
@@ -213,15 +224,43 @@ function handleSearch(args: string[]): void {
|
|
|
213
224
|
return;
|
|
214
225
|
}
|
|
215
226
|
|
|
227
|
+
// Route semantic vs FTS5 based on --exact flag and availability
|
|
228
|
+
if (!exact) {
|
|
229
|
+
try {
|
|
230
|
+
const canUseSemantic = hasEmbeddings() && (await isOllamaAvailable());
|
|
231
|
+
if (canUseSemantic) {
|
|
232
|
+
const results = await semanticSearch(query, { source, limit });
|
|
233
|
+
output({
|
|
234
|
+
success: true,
|
|
235
|
+
results,
|
|
236
|
+
count: results.length,
|
|
237
|
+
mode: "semantic",
|
|
238
|
+
});
|
|
239
|
+
console.error(
|
|
240
|
+
`✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (semantic)`,
|
|
241
|
+
);
|
|
242
|
+
process.exit(0);
|
|
243
|
+
}
|
|
244
|
+
// Fall through to FTS5 if semantic not available
|
|
245
|
+
} catch (error) {
|
|
246
|
+
// Semantic search failed, fall back to FTS5
|
|
247
|
+
console.error(
|
|
248
|
+
`⚠️ Semantic search unavailable: ${error instanceof Error ? error.message : "Unknown error"}`,
|
|
249
|
+
);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// FTS5 path (default fallback or explicit --exact)
|
|
216
254
|
try {
|
|
217
255
|
const results = search(query, { source, limit, since });
|
|
218
256
|
output({
|
|
219
257
|
success: true,
|
|
220
258
|
results,
|
|
221
259
|
count: results.length,
|
|
260
|
+
mode: exact ? "exact" : "fts5",
|
|
222
261
|
});
|
|
223
262
|
console.error(
|
|
224
|
-
`✅ ${results.length} result${results.length !== 1 ? "s" : ""} found`,
|
|
263
|
+
`✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (${exact ? "exact" : "fts5"})`,
|
|
225
264
|
);
|
|
226
265
|
process.exit(0);
|
|
227
266
|
} catch (error) {
|
|
@@ -445,6 +484,7 @@ Usage:
|
|
|
445
484
|
lore capture task|knowledge|note Capture knowledge
|
|
446
485
|
|
|
447
486
|
Search Options:
|
|
487
|
+
--exact Use FTS5 text search (bypasses semantic search)
|
|
448
488
|
--limit <n> Maximum results (default: 20)
|
|
449
489
|
--since <date> Filter by date (today, yesterday, this-week, YYYY-MM-DD)
|
|
450
490
|
--sources List indexed sources with counts
|
|
@@ -495,6 +535,7 @@ Usage:
|
|
|
495
535
|
lore search --sources List indexed sources
|
|
496
536
|
|
|
497
537
|
Options:
|
|
538
|
+
--exact Use FTS5 text search (bypasses semantic search)
|
|
498
539
|
--limit <n> Maximum results (default: 20)
|
|
499
540
|
--since <date> Filter by date (today, yesterday, this-week, YYYY-MM-DD)
|
|
500
541
|
--sources List indexed sources with counts
|
|
@@ -523,6 +564,7 @@ Examples:
|
|
|
523
564
|
lore search "authentication"
|
|
524
565
|
lore search blogs "typescript patterns"
|
|
525
566
|
lore search commits --since this-week "refactor"
|
|
567
|
+
lore search --exact "def process_data"
|
|
526
568
|
lore search prismis "kubernetes security"
|
|
527
569
|
lore search atuin "docker build"
|
|
528
570
|
`);
|
package/index.ts
CHANGED
|
@@ -53,3 +53,13 @@ export {
|
|
|
53
53
|
type NoteInput,
|
|
54
54
|
type CaptureEvent,
|
|
55
55
|
} from "./lib/capture";
|
|
56
|
+
|
|
57
|
+
// Semantic search
|
|
58
|
+
export {
|
|
59
|
+
semanticSearch,
|
|
60
|
+
embedQuery,
|
|
61
|
+
isOllamaAvailable,
|
|
62
|
+
hasEmbeddings,
|
|
63
|
+
type SemanticResult,
|
|
64
|
+
type SemanticSearchOptions,
|
|
65
|
+
} from "./lib/semantic";
|
package/lib/semantic.ts
ADDED
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/semantic.ts - Semantic search via Ollama embeddings
|
|
3
|
+
*
|
|
4
|
+
* Query embedding and KNN search against sqlite-vec virtual table.
|
|
5
|
+
* Uses Bun's built-in SQLite with sqlite-vec extension.
|
|
6
|
+
*
|
|
7
|
+
* Note: macOS ships Apple's SQLite which disables extension loading.
|
|
8
|
+
* We use Homebrew's SQLite via setCustomSQLite() to enable sqlite-vec.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { Database } from "bun:sqlite";
|
|
12
|
+
import { homedir } from "os";
|
|
13
|
+
import { existsSync, readFileSync } from "fs";
|
|
14
|
+
|
|
15
|
+
// Use Homebrew SQLite on macOS to enable extension loading
|
|
16
|
+
// Must be called before any Database instances are created
|
|
17
|
+
const HOMEBREW_SQLITE = "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib";
|
|
18
|
+
if (existsSync(HOMEBREW_SQLITE)) {
|
|
19
|
+
Database.setCustomSQLite(HOMEBREW_SQLITE);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface SemanticResult {
|
|
23
|
+
source: string;
|
|
24
|
+
title: string;
|
|
25
|
+
content: string;
|
|
26
|
+
metadata: string;
|
|
27
|
+
distance: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface SemanticSearchOptions {
|
|
31
|
+
source?: string;
|
|
32
|
+
limit?: number;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
interface EmbeddingConfig {
|
|
36
|
+
endpoint: string;
|
|
37
|
+
model: string;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const DEFAULT_CONFIG: EmbeddingConfig = {
|
|
41
|
+
endpoint: "http://localhost:11434",
|
|
42
|
+
model: "nomic-embed-text",
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
function getDatabasePath(): string {
|
|
46
|
+
return `${homedir()}/.local/share/lore/lore.db`;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function getConfigPath(): string {
|
|
50
|
+
return `${homedir()}/.config/lore/config.toml`;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Load embedding config from config.toml
|
|
55
|
+
* Falls back to [llm].api_base if [embedding].endpoint not set
|
|
56
|
+
*/
|
|
57
|
+
function loadEmbeddingConfig(): EmbeddingConfig {
|
|
58
|
+
const configPath = getConfigPath();
|
|
59
|
+
|
|
60
|
+
if (!existsSync(configPath)) {
|
|
61
|
+
return DEFAULT_CONFIG;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
try {
|
|
65
|
+
const content = readFileSync(configPath, "utf-8");
|
|
66
|
+
|
|
67
|
+
// Extract [embedding].endpoint first
|
|
68
|
+
const endpointMatch = content.match(
|
|
69
|
+
/\[embedding\][^[]*endpoint\s*=\s*"([^"]+)"/s,
|
|
70
|
+
);
|
|
71
|
+
if (endpointMatch) {
|
|
72
|
+
const modelMatch = content.match(
|
|
73
|
+
/\[embedding\][^[]*model\s*=\s*"([^"]+)"/s,
|
|
74
|
+
);
|
|
75
|
+
return {
|
|
76
|
+
endpoint: endpointMatch[1],
|
|
77
|
+
model: modelMatch?.[1] ?? DEFAULT_CONFIG.model,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Fall back to [llm].api_base
|
|
82
|
+
const apiBaseMatch = content.match(/\[llm\][^[]*api_base\s*=\s*"([^"]+)"/s);
|
|
83
|
+
if (apiBaseMatch) {
|
|
84
|
+
const modelMatch = content.match(
|
|
85
|
+
/\[embedding\][^[]*model\s*=\s*"([^"]+)"/s,
|
|
86
|
+
);
|
|
87
|
+
return {
|
|
88
|
+
endpoint: apiBaseMatch[1],
|
|
89
|
+
model: modelMatch?.[1] ?? DEFAULT_CONFIG.model,
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return DEFAULT_CONFIG;
|
|
94
|
+
} catch {
|
|
95
|
+
return DEFAULT_CONFIG;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Check if Ollama is available at configured endpoint
|
|
101
|
+
*/
|
|
102
|
+
export async function isOllamaAvailable(): Promise<boolean> {
|
|
103
|
+
const config = loadEmbeddingConfig();
|
|
104
|
+
try {
|
|
105
|
+
const controller = new AbortController();
|
|
106
|
+
const timeout = setTimeout(() => controller.abort(), 2000);
|
|
107
|
+
|
|
108
|
+
const response = await fetch(`${config.endpoint}/api/tags`, {
|
|
109
|
+
method: "GET",
|
|
110
|
+
signal: controller.signal,
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
clearTimeout(timeout);
|
|
114
|
+
return response.ok;
|
|
115
|
+
} catch {
|
|
116
|
+
return false;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Embed a query string using Ollama
|
|
122
|
+
* @returns 768-dimensional embedding vector
|
|
123
|
+
*/
|
|
124
|
+
export async function embedQuery(query: string): Promise<number[]> {
|
|
125
|
+
const config = loadEmbeddingConfig();
|
|
126
|
+
const url = `${config.endpoint}/api/embeddings`;
|
|
127
|
+
|
|
128
|
+
const response = await fetch(url, {
|
|
129
|
+
method: "POST",
|
|
130
|
+
headers: { "Content-Type": "application/json" },
|
|
131
|
+
body: JSON.stringify({
|
|
132
|
+
model: config.model,
|
|
133
|
+
prompt: query,
|
|
134
|
+
}),
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
if (!response.ok) {
|
|
138
|
+
throw new Error(
|
|
139
|
+
`Ollama API error: ${response.status} ${response.statusText}`,
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const result = (await response.json()) as { embedding?: number[] };
|
|
144
|
+
const embedding = result.embedding;
|
|
145
|
+
|
|
146
|
+
if (!Array.isArray(embedding) || embedding.length !== 768) {
|
|
147
|
+
throw new Error(
|
|
148
|
+
`Invalid embedding: expected 768 dims, got ${embedding?.length ?? 0}`,
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return embedding;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Check if embeddings table has any data
|
|
157
|
+
*/
|
|
158
|
+
export function hasEmbeddings(): boolean {
|
|
159
|
+
const dbPath = getDatabasePath();
|
|
160
|
+
|
|
161
|
+
if (!existsSync(dbPath)) {
|
|
162
|
+
return false;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const db = new Database(dbPath, { readonly: true });
|
|
166
|
+
|
|
167
|
+
try {
|
|
168
|
+
// Load sqlite-vec extension
|
|
169
|
+
const vecPath = process.env.SQLITE_VEC_PATH;
|
|
170
|
+
if (!vecPath) {
|
|
171
|
+
return false;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
db.loadExtension(vecPath);
|
|
175
|
+
|
|
176
|
+
const stmt = db.prepare("SELECT COUNT(*) as count FROM embeddings");
|
|
177
|
+
const result = stmt.get() as { count: number };
|
|
178
|
+
return result.count > 0;
|
|
179
|
+
} catch {
|
|
180
|
+
return false;
|
|
181
|
+
} finally {
|
|
182
|
+
db.close();
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Serialize embedding to blob format for sqlite-vec
|
|
188
|
+
*/
|
|
189
|
+
function serializeEmbedding(embedding: number[]): Uint8Array {
|
|
190
|
+
const buffer = new Float32Array(embedding);
|
|
191
|
+
return new Uint8Array(buffer.buffer);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Perform semantic search using KNN against embeddings table
|
|
196
|
+
*/
|
|
197
|
+
export async function semanticSearch(
|
|
198
|
+
query: string,
|
|
199
|
+
options: SemanticSearchOptions = {},
|
|
200
|
+
): Promise<SemanticResult[]> {
|
|
201
|
+
const dbPath = getDatabasePath();
|
|
202
|
+
|
|
203
|
+
if (!existsSync(dbPath)) {
|
|
204
|
+
throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Get query embedding
|
|
208
|
+
const queryEmbedding = await embedQuery(query);
|
|
209
|
+
const queryBlob = serializeEmbedding(queryEmbedding);
|
|
210
|
+
|
|
211
|
+
const db = new Database(dbPath, { readonly: true });
|
|
212
|
+
|
|
213
|
+
try {
|
|
214
|
+
// Load sqlite-vec extension
|
|
215
|
+
const vecPath = process.env.SQLITE_VEC_PATH;
|
|
216
|
+
if (!vecPath) {
|
|
217
|
+
throw new Error(
|
|
218
|
+
'SQLITE_VEC_PATH not set. Get path with: python3 -c "import sqlite_vec; print(sqlite_vec.loadable_path())"',
|
|
219
|
+
);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
db.loadExtension(vecPath);
|
|
223
|
+
|
|
224
|
+
const limit = options.limit ?? 20;
|
|
225
|
+
|
|
226
|
+
// KNN query with join to search table
|
|
227
|
+
// Group by doc_id to return best chunk per document
|
|
228
|
+
let sql: string;
|
|
229
|
+
const params: (Uint8Array | string | number)[] = [queryBlob];
|
|
230
|
+
|
|
231
|
+
if (options.source) {
|
|
232
|
+
sql = `
|
|
233
|
+
SELECT
|
|
234
|
+
s.source,
|
|
235
|
+
s.title,
|
|
236
|
+
s.content,
|
|
237
|
+
s.metadata,
|
|
238
|
+
MIN(e.distance) as distance
|
|
239
|
+
FROM embeddings e
|
|
240
|
+
JOIN search s ON e.doc_id = s.rowid
|
|
241
|
+
WHERE e.embedding MATCH ?
|
|
242
|
+
AND k = ?
|
|
243
|
+
AND s.source = ?
|
|
244
|
+
GROUP BY s.rowid
|
|
245
|
+
ORDER BY distance
|
|
246
|
+
LIMIT ?
|
|
247
|
+
`;
|
|
248
|
+
params.push(limit * 3); // Fetch more for grouping
|
|
249
|
+
params.push(options.source);
|
|
250
|
+
params.push(limit);
|
|
251
|
+
} else {
|
|
252
|
+
sql = `
|
|
253
|
+
SELECT
|
|
254
|
+
s.source,
|
|
255
|
+
s.title,
|
|
256
|
+
s.content,
|
|
257
|
+
s.metadata,
|
|
258
|
+
MIN(e.distance) as distance
|
|
259
|
+
FROM embeddings e
|
|
260
|
+
JOIN search s ON e.doc_id = s.rowid
|
|
261
|
+
WHERE e.embedding MATCH ?
|
|
262
|
+
AND k = ?
|
|
263
|
+
GROUP BY s.rowid
|
|
264
|
+
ORDER BY distance
|
|
265
|
+
LIMIT ?
|
|
266
|
+
`;
|
|
267
|
+
params.push(limit * 3); // Fetch more for grouping
|
|
268
|
+
params.push(limit);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const stmt = db.prepare(sql);
|
|
272
|
+
const results = stmt.all(...params) as SemanticResult[];
|
|
273
|
+
|
|
274
|
+
return results;
|
|
275
|
+
} finally {
|
|
276
|
+
db.close();
|
|
277
|
+
}
|
|
278
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@voidwire/lore",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.4",
|
|
4
4
|
"description": "Unified knowledge CLI - Search, list, and capture your indexed knowledge",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
|
@@ -18,9 +18,6 @@
|
|
|
18
18
|
"README.md",
|
|
19
19
|
"LICENSE"
|
|
20
20
|
],
|
|
21
|
-
"scripts": {
|
|
22
|
-
"test": "bun test"
|
|
23
|
-
},
|
|
24
21
|
"keywords": [
|
|
25
22
|
"knowledge",
|
|
26
23
|
"search",
|
|
@@ -46,5 +43,8 @@
|
|
|
46
43
|
},
|
|
47
44
|
"devDependencies": {
|
|
48
45
|
"bun-types": "1.3.5"
|
|
46
|
+
},
|
|
47
|
+
"scripts": {
|
|
48
|
+
"test": "bun test"
|
|
49
49
|
}
|
|
50
|
-
}
|
|
50
|
+
}
|