@voidwire/lore 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/cli.ts +57 -12
  2. package/index.ts +10 -0
  3. package/lib/semantic.ts +278 -0
  4. package/package.json +5 -5
package/cli.ts CHANGED
@@ -29,6 +29,9 @@ import {
29
29
  captureTask,
30
30
  captureKnowledge,
31
31
  captureNote,
32
+ semanticSearch,
33
+ isOllamaAvailable,
34
+ hasEmbeddings,
32
35
  DOMAINS,
33
36
  type SearchResult,
34
37
  type ListResult,
@@ -73,20 +76,27 @@ function parseArgs(args: string[]): Map<string, string> {
73
76
  return parsed;
74
77
  }
75
78
 
79
+ // Boolean flags that don't take values
80
+ const BOOLEAN_FLAGS = new Set(["help", "sources", "domains", "exact"]);
81
+
76
82
  function getPositionalArgs(args: string[]): string[] {
77
83
  const result: string[] = [];
78
- let skipNext = false;
79
- for (const arg of args) {
80
- if (skipNext) {
81
- skipNext = false;
82
- continue;
83
- }
84
+ let i = 0;
85
+ while (i < args.length) {
86
+ const arg = args[i];
84
87
  if (arg.startsWith("--")) {
85
- // Skip this flag and its value (if next arg doesn't start with -)
86
- skipNext = true;
88
+ const flag = arg.slice(2).split("=")[0]; // Handle --flag=value format
89
+ if (BOOLEAN_FLAGS.has(flag) || arg.includes("=")) {
90
+ i += 1; // Boolean flag or --flag=value, skip only the flag
91
+ } else if (i + 1 < args.length && !args[i + 1].startsWith("--")) {
92
+ i += 2; // Flag with separate value, skip both
93
+ } else {
94
+ i += 1; // Flag at end or followed by another flag
95
+ }
87
96
  continue;
88
97
  }
89
98
  result.push(arg);
99
+ i++;
90
100
  }
91
101
  return result;
92
102
  }
@@ -127,13 +137,14 @@ function fail(error: string, code: number = 1): never {
127
137
  // Search Command
128
138
  // ============================================================================
129
139
 
130
- function handleSearch(args: string[]): void {
140
+ async function handleSearch(args: string[]): Promise<void> {
131
141
  if (hasFlag(args, "help")) {
132
142
  showSearchHelp();
133
143
  }
134
144
 
135
145
  const parsed = parseArgs(args);
136
146
  const positional = getPositionalArgs(args);
147
+ const exact = hasFlag(args, "exact");
137
148
 
138
149
  // Handle --sources flag
139
150
  if (hasFlag(args, "sources")) {
@@ -213,20 +224,51 @@ function handleSearch(args: string[]): void {
213
224
  return;
214
225
  }
215
226
 
227
+ // FTS5 path (explicit --exact only)
228
+ if (exact) {
229
+ try {
230
+ const results = search(query, { source, limit, since });
231
+ output({
232
+ success: true,
233
+ results,
234
+ count: results.length,
235
+ mode: "exact",
236
+ });
237
+ console.error(
238
+ `✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (exact)`,
239
+ );
240
+ process.exit(0);
241
+ } catch (error) {
242
+ const message = error instanceof Error ? error.message : "Unknown error";
243
+ fail(message, 2);
244
+ }
245
+ return;
246
+ }
247
+
248
+ // Semantic path (default) - fail if unavailable
249
+ if (!hasEmbeddings()) {
250
+ fail("No embeddings found. Run lore-embed-all first.", 2);
251
+ }
252
+
253
+ if (!(await isOllamaAvailable())) {
254
+ fail("Ollama not available. Start Ollama or check SQLITE_VEC_PATH.", 2);
255
+ }
256
+
216
257
  try {
217
- const results = search(query, { source, limit, since });
258
+ const results = await semanticSearch(query, { source, limit });
218
259
  output({
219
260
  success: true,
220
261
  results,
221
262
  count: results.length,
263
+ mode: "semantic",
222
264
  });
223
265
  console.error(
224
- `✅ ${results.length} result${results.length !== 1 ? "s" : ""} found`,
266
+ `✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (semantic)`,
225
267
  );
226
268
  process.exit(0);
227
269
  } catch (error) {
228
270
  const message = error instanceof Error ? error.message : "Unknown error";
229
- fail(message, 2);
271
+ fail(`Semantic search failed: ${message}`, 2);
230
272
  }
231
273
  }
232
274
 
@@ -445,6 +487,7 @@ Usage:
445
487
  lore capture task|knowledge|note Capture knowledge
446
488
 
447
489
  Search Options:
490
+ --exact Use FTS5 text search (bypasses semantic search)
448
491
  --limit <n> Maximum results (default: 20)
449
492
  --since <date> Filter by date (today, yesterday, this-week, YYYY-MM-DD)
450
493
  --sources List indexed sources with counts
@@ -495,6 +538,7 @@ Usage:
495
538
  lore search --sources List indexed sources
496
539
 
497
540
  Options:
541
+ --exact Use FTS5 text search (bypasses semantic search)
498
542
  --limit <n> Maximum results (default: 20)
499
543
  --since <date> Filter by date (today, yesterday, this-week, YYYY-MM-DD)
500
544
  --sources List indexed sources with counts
@@ -523,6 +567,7 @@ Examples:
523
567
  lore search "authentication"
524
568
  lore search blogs "typescript patterns"
525
569
  lore search commits --since this-week "refactor"
570
+ lore search --exact "def process_data"
526
571
  lore search prismis "kubernetes security"
527
572
  lore search atuin "docker build"
528
573
  `);
package/index.ts CHANGED
@@ -53,3 +53,13 @@ export {
53
53
  type NoteInput,
54
54
  type CaptureEvent,
55
55
  } from "./lib/capture";
56
+
57
+ // Semantic search
58
+ export {
59
+ semanticSearch,
60
+ embedQuery,
61
+ isOllamaAvailable,
62
+ hasEmbeddings,
63
+ type SemanticResult,
64
+ type SemanticSearchOptions,
65
+ } from "./lib/semantic";
@@ -0,0 +1,278 @@
1
+ /**
2
+ * lib/semantic.ts - Semantic search via Ollama embeddings
3
+ *
4
+ * Query embedding and KNN search against sqlite-vec virtual table.
5
+ * Uses Bun's built-in SQLite with sqlite-vec extension.
6
+ *
7
+ * Note: macOS ships Apple's SQLite which disables extension loading.
8
+ * We use Homebrew's SQLite via setCustomSQLite() to enable sqlite-vec.
9
+ */
10
+
11
+ import { Database } from "bun:sqlite";
12
+ import { homedir } from "os";
13
+ import { existsSync, readFileSync } from "fs";
14
+
15
+ // Use Homebrew SQLite on macOS to enable extension loading
16
+ // Must be called before any Database instances are created
17
+ const HOMEBREW_SQLITE = "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib";
18
+ if (existsSync(HOMEBREW_SQLITE)) {
19
+ Database.setCustomSQLite(HOMEBREW_SQLITE);
20
+ }
21
+
22
+ export interface SemanticResult {
23
+ source: string;
24
+ title: string;
25
+ content: string;
26
+ metadata: string;
27
+ distance: number;
28
+ }
29
+
30
+ export interface SemanticSearchOptions {
31
+ source?: string;
32
+ limit?: number;
33
+ }
34
+
35
+ interface EmbeddingConfig {
36
+ endpoint: string;
37
+ model: string;
38
+ }
39
+
40
+ const DEFAULT_CONFIG: EmbeddingConfig = {
41
+ endpoint: "http://localhost:11434",
42
+ model: "nomic-embed-text",
43
+ };
44
+
45
+ function getDatabasePath(): string {
46
+ return `${homedir()}/.local/share/lore/lore.db`;
47
+ }
48
+
49
+ function getConfigPath(): string {
50
+ return `${homedir()}/.config/lore/config.toml`;
51
+ }
52
+
53
+ /**
54
+ * Load embedding config from config.toml
55
+ * Falls back to [llm].api_base if [embedding].endpoint not set
56
+ */
57
+ function loadEmbeddingConfig(): EmbeddingConfig {
58
+ const configPath = getConfigPath();
59
+
60
+ if (!existsSync(configPath)) {
61
+ return DEFAULT_CONFIG;
62
+ }
63
+
64
+ try {
65
+ const content = readFileSync(configPath, "utf-8");
66
+
67
+ // Extract [embedding].endpoint first
68
+ const endpointMatch = content.match(
69
+ /\[embedding\][^[]*endpoint\s*=\s*"([^"]+)"/s,
70
+ );
71
+ if (endpointMatch) {
72
+ const modelMatch = content.match(
73
+ /\[embedding\][^[]*model\s*=\s*"([^"]+)"/s,
74
+ );
75
+ return {
76
+ endpoint: endpointMatch[1],
77
+ model: modelMatch?.[1] ?? DEFAULT_CONFIG.model,
78
+ };
79
+ }
80
+
81
+ // Fall back to [llm].api_base
82
+ const apiBaseMatch = content.match(/\[llm\][^[]*api_base\s*=\s*"([^"]+)"/s);
83
+ if (apiBaseMatch) {
84
+ const modelMatch = content.match(
85
+ /\[embedding\][^[]*model\s*=\s*"([^"]+)"/s,
86
+ );
87
+ return {
88
+ endpoint: apiBaseMatch[1],
89
+ model: modelMatch?.[1] ?? DEFAULT_CONFIG.model,
90
+ };
91
+ }
92
+
93
+ return DEFAULT_CONFIG;
94
+ } catch {
95
+ return DEFAULT_CONFIG;
96
+ }
97
+ }
98
+
99
+ /**
100
+ * Check if Ollama is available at configured endpoint
101
+ */
102
+ export async function isOllamaAvailable(): Promise<boolean> {
103
+ const config = loadEmbeddingConfig();
104
+ try {
105
+ const controller = new AbortController();
106
+ const timeout = setTimeout(() => controller.abort(), 2000);
107
+
108
+ const response = await fetch(`${config.endpoint}/api/tags`, {
109
+ method: "GET",
110
+ signal: controller.signal,
111
+ });
112
+
113
+ clearTimeout(timeout);
114
+ return response.ok;
115
+ } catch {
116
+ return false;
117
+ }
118
+ }
119
+
120
+ /**
121
+ * Embed a query string using Ollama
122
+ * @returns 768-dimensional embedding vector
123
+ */
124
+ export async function embedQuery(query: string): Promise<number[]> {
125
+ const config = loadEmbeddingConfig();
126
+ const url = `${config.endpoint}/api/embeddings`;
127
+
128
+ const response = await fetch(url, {
129
+ method: "POST",
130
+ headers: { "Content-Type": "application/json" },
131
+ body: JSON.stringify({
132
+ model: config.model,
133
+ prompt: query,
134
+ }),
135
+ });
136
+
137
+ if (!response.ok) {
138
+ throw new Error(
139
+ `Ollama API error: ${response.status} ${response.statusText}`,
140
+ );
141
+ }
142
+
143
+ const result = (await response.json()) as { embedding?: number[] };
144
+ const embedding = result.embedding;
145
+
146
+ if (!Array.isArray(embedding) || embedding.length !== 768) {
147
+ throw new Error(
148
+ `Invalid embedding: expected 768 dims, got ${embedding?.length ?? 0}`,
149
+ );
150
+ }
151
+
152
+ return embedding;
153
+ }
154
+
155
+ /**
156
+ * Check if embeddings table has any data
157
+ */
158
+ export function hasEmbeddings(): boolean {
159
+ const dbPath = getDatabasePath();
160
+
161
+ if (!existsSync(dbPath)) {
162
+ return false;
163
+ }
164
+
165
+ const db = new Database(dbPath, { readonly: true });
166
+
167
+ try {
168
+ // Load sqlite-vec extension
169
+ const vecPath = process.env.SQLITE_VEC_PATH;
170
+ if (!vecPath) {
171
+ return false;
172
+ }
173
+
174
+ db.loadExtension(vecPath);
175
+
176
+ const stmt = db.prepare("SELECT COUNT(*) as count FROM embeddings");
177
+ const result = stmt.get() as { count: number };
178
+ return result.count > 0;
179
+ } catch {
180
+ return false;
181
+ } finally {
182
+ db.close();
183
+ }
184
+ }
185
+
186
+ /**
187
+ * Serialize embedding to blob format for sqlite-vec
188
+ */
189
+ function serializeEmbedding(embedding: number[]): Uint8Array {
190
+ const buffer = new Float32Array(embedding);
191
+ return new Uint8Array(buffer.buffer);
192
+ }
193
+
194
+ /**
195
+ * Perform semantic search using KNN against embeddings table
196
+ */
197
+ export async function semanticSearch(
198
+ query: string,
199
+ options: SemanticSearchOptions = {},
200
+ ): Promise<SemanticResult[]> {
201
+ const dbPath = getDatabasePath();
202
+
203
+ if (!existsSync(dbPath)) {
204
+ throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
205
+ }
206
+
207
+ // Get query embedding
208
+ const queryEmbedding = await embedQuery(query);
209
+ const queryBlob = serializeEmbedding(queryEmbedding);
210
+
211
+ const db = new Database(dbPath, { readonly: true });
212
+
213
+ try {
214
+ // Load sqlite-vec extension
215
+ const vecPath = process.env.SQLITE_VEC_PATH;
216
+ if (!vecPath) {
217
+ throw new Error(
218
+ 'SQLITE_VEC_PATH not set. Get path with: python3 -c "import sqlite_vec; print(sqlite_vec.loadable_path())"',
219
+ );
220
+ }
221
+
222
+ db.loadExtension(vecPath);
223
+
224
+ const limit = options.limit ?? 20;
225
+
226
+ // KNN query with join to search table
227
+ // Group by doc_id to return best chunk per document
228
+ let sql: string;
229
+ const params: (Uint8Array | string | number)[] = [queryBlob];
230
+
231
+ if (options.source) {
232
+ sql = `
233
+ SELECT
234
+ s.source,
235
+ s.title,
236
+ s.content,
237
+ s.metadata,
238
+ MIN(e.distance) as distance
239
+ FROM embeddings e
240
+ JOIN search s ON e.doc_id = s.rowid
241
+ WHERE e.embedding MATCH ?
242
+ AND k = ?
243
+ AND s.source = ?
244
+ GROUP BY s.rowid
245
+ ORDER BY distance
246
+ LIMIT ?
247
+ `;
248
+ params.push(limit * 3); // Fetch more for grouping
249
+ params.push(options.source);
250
+ params.push(limit);
251
+ } else {
252
+ sql = `
253
+ SELECT
254
+ s.source,
255
+ s.title,
256
+ s.content,
257
+ s.metadata,
258
+ MIN(e.distance) as distance
259
+ FROM embeddings e
260
+ JOIN search s ON e.doc_id = s.rowid
261
+ WHERE e.embedding MATCH ?
262
+ AND k = ?
263
+ GROUP BY s.rowid
264
+ ORDER BY distance
265
+ LIMIT ?
266
+ `;
267
+ params.push(limit * 3); // Fetch more for grouping
268
+ params.push(limit);
269
+ }
270
+
271
+ const stmt = db.prepare(sql);
272
+ const results = stmt.all(...params) as SemanticResult[];
273
+
274
+ return results;
275
+ } finally {
276
+ db.close();
277
+ }
278
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@voidwire/lore",
3
- "version": "0.1.3",
3
+ "version": "0.1.5",
4
4
  "description": "Unified knowledge CLI - Search, list, and capture your indexed knowledge",
5
5
  "type": "module",
6
6
  "main": "./index.ts",
@@ -18,9 +18,6 @@
18
18
  "README.md",
19
19
  "LICENSE"
20
20
  ],
21
- "scripts": {
22
- "test": "bun test"
23
- },
24
21
  "keywords": [
25
22
  "knowledge",
26
23
  "search",
@@ -46,5 +43,8 @@
46
43
  },
47
44
  "devDependencies": {
48
45
  "bun-types": "1.3.5"
46
+ },
47
+ "scripts": {
48
+ "test": "bun test"
49
49
  }
50
- }
50
+ }