@voidwire/lore 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/cli.ts +52 -10
  2. package/index.ts +10 -0
  3. package/lib/semantic.ts +278 -0
  4. package/package.json +5 -5
package/cli.ts CHANGED
@@ -29,6 +29,9 @@ import {
29
29
  captureTask,
30
30
  captureKnowledge,
31
31
  captureNote,
32
+ semanticSearch,
33
+ isOllamaAvailable,
34
+ hasEmbeddings,
32
35
  DOMAINS,
33
36
  type SearchResult,
34
37
  type ListResult,
@@ -73,20 +76,27 @@ function parseArgs(args: string[]): Map<string, string> {
73
76
  return parsed;
74
77
  }
75
78
 
79
+ // Boolean flags that don't take values
80
+ const BOOLEAN_FLAGS = new Set(["help", "sources", "domains", "exact"]);
81
+
76
82
  function getPositionalArgs(args: string[]): string[] {
77
83
  const result: string[] = [];
78
- let skipNext = false;
79
- for (const arg of args) {
80
- if (skipNext) {
81
- skipNext = false;
82
- continue;
83
- }
84
+ let i = 0;
85
+ while (i < args.length) {
86
+ const arg = args[i];
84
87
  if (arg.startsWith("--")) {
85
- // Skip this flag and its value (if next arg doesn't start with -)
86
- skipNext = true;
88
+ const flag = arg.slice(2).split("=")[0]; // Handle --flag=value format
89
+ if (BOOLEAN_FLAGS.has(flag) || arg.includes("=")) {
90
+ i += 1; // Boolean flag or --flag=value, skip only the flag
91
+ } else if (i + 1 < args.length && !args[i + 1].startsWith("--")) {
92
+ i += 2; // Flag with separate value, skip both
93
+ } else {
94
+ i += 1; // Flag at end or followed by another flag
95
+ }
87
96
  continue;
88
97
  }
89
98
  result.push(arg);
99
+ i++;
90
100
  }
91
101
  return result;
92
102
  }
@@ -127,13 +137,14 @@ function fail(error: string, code: number = 1): never {
127
137
  // Search Command
128
138
  // ============================================================================
129
139
 
130
- function handleSearch(args: string[]): void {
140
+ async function handleSearch(args: string[]): Promise<void> {
131
141
  if (hasFlag(args, "help")) {
132
142
  showSearchHelp();
133
143
  }
134
144
 
135
145
  const parsed = parseArgs(args);
136
146
  const positional = getPositionalArgs(args);
147
+ const exact = hasFlag(args, "exact");
137
148
 
138
149
  // Handle --sources flag
139
150
  if (hasFlag(args, "sources")) {
@@ -213,15 +224,43 @@ function handleSearch(args: string[]): void {
213
224
  return;
214
225
  }
215
226
 
227
+ // Route semantic vs FTS5 based on --exact flag and availability
228
+ if (!exact) {
229
+ try {
230
+ const canUseSemantic = hasEmbeddings() && (await isOllamaAvailable());
231
+ if (canUseSemantic) {
232
+ const results = await semanticSearch(query, { source, limit });
233
+ output({
234
+ success: true,
235
+ results,
236
+ count: results.length,
237
+ mode: "semantic",
238
+ });
239
+ console.error(
240
+ `✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (semantic)`,
241
+ );
242
+ process.exit(0);
243
+ }
244
+ // Fall through to FTS5 if semantic not available
245
+ } catch (error) {
246
+ // Semantic search failed, fall back to FTS5
247
+ console.error(
248
+ `⚠️ Semantic search unavailable: ${error instanceof Error ? error.message : "Unknown error"}`,
249
+ );
250
+ }
251
+ }
252
+
253
+ // FTS5 path (default fallback or explicit --exact)
216
254
  try {
217
255
  const results = search(query, { source, limit, since });
218
256
  output({
219
257
  success: true,
220
258
  results,
221
259
  count: results.length,
260
+ mode: exact ? "exact" : "fts5",
222
261
  });
223
262
  console.error(
224
- `✅ ${results.length} result${results.length !== 1 ? "s" : ""} found`,
263
+ `✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (${exact ? "exact" : "fts5"})`,
225
264
  );
226
265
  process.exit(0);
227
266
  } catch (error) {
@@ -445,6 +484,7 @@ Usage:
445
484
  lore capture task|knowledge|note Capture knowledge
446
485
 
447
486
  Search Options:
487
+ --exact Use FTS5 text search (bypasses semantic search)
448
488
  --limit <n> Maximum results (default: 20)
449
489
  --since <date> Filter by date (today, yesterday, this-week, YYYY-MM-DD)
450
490
  --sources List indexed sources with counts
@@ -495,6 +535,7 @@ Usage:
495
535
  lore search --sources List indexed sources
496
536
 
497
537
  Options:
538
+ --exact Use FTS5 text search (bypasses semantic search)
498
539
  --limit <n> Maximum results (default: 20)
499
540
  --since <date> Filter by date (today, yesterday, this-week, YYYY-MM-DD)
500
541
  --sources List indexed sources with counts
@@ -523,6 +564,7 @@ Examples:
523
564
  lore search "authentication"
524
565
  lore search blogs "typescript patterns"
525
566
  lore search commits --since this-week "refactor"
567
+ lore search --exact "def process_data"
526
568
  lore search prismis "kubernetes security"
527
569
  lore search atuin "docker build"
528
570
  `);
package/index.ts CHANGED
@@ -53,3 +53,13 @@ export {
53
53
  type NoteInput,
54
54
  type CaptureEvent,
55
55
  } from "./lib/capture";
56
+
57
+ // Semantic search
58
+ export {
59
+ semanticSearch,
60
+ embedQuery,
61
+ isOllamaAvailable,
62
+ hasEmbeddings,
63
+ type SemanticResult,
64
+ type SemanticSearchOptions,
65
+ } from "./lib/semantic";
@@ -0,0 +1,278 @@
1
+ /**
2
+ * lib/semantic.ts - Semantic search via Ollama embeddings
3
+ *
4
+ * Query embedding and KNN search against sqlite-vec virtual table.
5
+ * Uses Bun's built-in SQLite with sqlite-vec extension.
6
+ *
7
+ * Note: macOS ships Apple's SQLite which disables extension loading.
8
+ * We use Homebrew's SQLite via setCustomSQLite() to enable sqlite-vec.
9
+ */
10
+
11
+ import { Database } from "bun:sqlite";
12
+ import { homedir } from "os";
13
+ import { existsSync, readFileSync } from "fs";
14
+
15
+ // Use Homebrew SQLite on macOS to enable extension loading
16
+ // Must be called before any Database instances are created
17
+ const HOMEBREW_SQLITE = "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib";
18
+ if (existsSync(HOMEBREW_SQLITE)) {
19
+ Database.setCustomSQLite(HOMEBREW_SQLITE);
20
+ }
21
+
22
+ export interface SemanticResult {
23
+ source: string;
24
+ title: string;
25
+ content: string;
26
+ metadata: string;
27
+ distance: number;
28
+ }
29
+
30
+ export interface SemanticSearchOptions {
31
+ source?: string;
32
+ limit?: number;
33
+ }
34
+
35
+ interface EmbeddingConfig {
36
+ endpoint: string;
37
+ model: string;
38
+ }
39
+
40
+ const DEFAULT_CONFIG: EmbeddingConfig = {
41
+ endpoint: "http://localhost:11434",
42
+ model: "nomic-embed-text",
43
+ };
44
+
45
+ function getDatabasePath(): string {
46
+ return `${homedir()}/.local/share/lore/lore.db`;
47
+ }
48
+
49
+ function getConfigPath(): string {
50
+ return `${homedir()}/.config/lore/config.toml`;
51
+ }
52
+
53
+ /**
54
+ * Load embedding config from config.toml
55
+ * Falls back to [llm].api_base if [embedding].endpoint not set
56
+ */
57
+ function loadEmbeddingConfig(): EmbeddingConfig {
58
+ const configPath = getConfigPath();
59
+
60
+ if (!existsSync(configPath)) {
61
+ return DEFAULT_CONFIG;
62
+ }
63
+
64
+ try {
65
+ const content = readFileSync(configPath, "utf-8");
66
+
67
+ // Extract [embedding].endpoint first
68
+ const endpointMatch = content.match(
69
+ /\[embedding\][^[]*endpoint\s*=\s*"([^"]+)"/s,
70
+ );
71
+ if (endpointMatch) {
72
+ const modelMatch = content.match(
73
+ /\[embedding\][^[]*model\s*=\s*"([^"]+)"/s,
74
+ );
75
+ return {
76
+ endpoint: endpointMatch[1],
77
+ model: modelMatch?.[1] ?? DEFAULT_CONFIG.model,
78
+ };
79
+ }
80
+
81
+ // Fall back to [llm].api_base
82
+ const apiBaseMatch = content.match(/\[llm\][^[]*api_base\s*=\s*"([^"]+)"/s);
83
+ if (apiBaseMatch) {
84
+ const modelMatch = content.match(
85
+ /\[embedding\][^[]*model\s*=\s*"([^"]+)"/s,
86
+ );
87
+ return {
88
+ endpoint: apiBaseMatch[1],
89
+ model: modelMatch?.[1] ?? DEFAULT_CONFIG.model,
90
+ };
91
+ }
92
+
93
+ return DEFAULT_CONFIG;
94
+ } catch {
95
+ return DEFAULT_CONFIG;
96
+ }
97
+ }
98
+
99
+ /**
100
+ * Check if Ollama is available at configured endpoint
101
+ */
102
+ export async function isOllamaAvailable(): Promise<boolean> {
103
+ const config = loadEmbeddingConfig();
104
+ try {
105
+ const controller = new AbortController();
106
+ const timeout = setTimeout(() => controller.abort(), 2000);
107
+
108
+ const response = await fetch(`${config.endpoint}/api/tags`, {
109
+ method: "GET",
110
+ signal: controller.signal,
111
+ });
112
+
113
+ clearTimeout(timeout);
114
+ return response.ok;
115
+ } catch {
116
+ return false;
117
+ }
118
+ }
119
+
120
+ /**
121
+ * Embed a query string using Ollama
122
+ * @returns 768-dimensional embedding vector
123
+ */
124
+ export async function embedQuery(query: string): Promise<number[]> {
125
+ const config = loadEmbeddingConfig();
126
+ const url = `${config.endpoint}/api/embeddings`;
127
+
128
+ const response = await fetch(url, {
129
+ method: "POST",
130
+ headers: { "Content-Type": "application/json" },
131
+ body: JSON.stringify({
132
+ model: config.model,
133
+ prompt: query,
134
+ }),
135
+ });
136
+
137
+ if (!response.ok) {
138
+ throw new Error(
139
+ `Ollama API error: ${response.status} ${response.statusText}`,
140
+ );
141
+ }
142
+
143
+ const result = (await response.json()) as { embedding?: number[] };
144
+ const embedding = result.embedding;
145
+
146
+ if (!Array.isArray(embedding) || embedding.length !== 768) {
147
+ throw new Error(
148
+ `Invalid embedding: expected 768 dims, got ${embedding?.length ?? 0}`,
149
+ );
150
+ }
151
+
152
+ return embedding;
153
+ }
154
+
155
+ /**
156
+ * Check if embeddings table has any data
157
+ */
158
+ export function hasEmbeddings(): boolean {
159
+ const dbPath = getDatabasePath();
160
+
161
+ if (!existsSync(dbPath)) {
162
+ return false;
163
+ }
164
+
165
+ const db = new Database(dbPath, { readonly: true });
166
+
167
+ try {
168
+ // Load sqlite-vec extension
169
+ const vecPath = process.env.SQLITE_VEC_PATH;
170
+ if (!vecPath) {
171
+ return false;
172
+ }
173
+
174
+ db.loadExtension(vecPath);
175
+
176
+ const stmt = db.prepare("SELECT COUNT(*) as count FROM embeddings");
177
+ const result = stmt.get() as { count: number };
178
+ return result.count > 0;
179
+ } catch {
180
+ return false;
181
+ } finally {
182
+ db.close();
183
+ }
184
+ }
185
+
186
+ /**
187
+ * Serialize embedding to blob format for sqlite-vec
188
+ */
189
+ function serializeEmbedding(embedding: number[]): Uint8Array {
190
+ const buffer = new Float32Array(embedding);
191
+ return new Uint8Array(buffer.buffer);
192
+ }
193
+
194
+ /**
195
+ * Perform semantic search using KNN against embeddings table
196
+ */
197
+ export async function semanticSearch(
198
+ query: string,
199
+ options: SemanticSearchOptions = {},
200
+ ): Promise<SemanticResult[]> {
201
+ const dbPath = getDatabasePath();
202
+
203
+ if (!existsSync(dbPath)) {
204
+ throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
205
+ }
206
+
207
+ // Get query embedding
208
+ const queryEmbedding = await embedQuery(query);
209
+ const queryBlob = serializeEmbedding(queryEmbedding);
210
+
211
+ const db = new Database(dbPath, { readonly: true });
212
+
213
+ try {
214
+ // Load sqlite-vec extension
215
+ const vecPath = process.env.SQLITE_VEC_PATH;
216
+ if (!vecPath) {
217
+ throw new Error(
218
+ 'SQLITE_VEC_PATH not set. Get path with: python3 -c "import sqlite_vec; print(sqlite_vec.loadable_path())"',
219
+ );
220
+ }
221
+
222
+ db.loadExtension(vecPath);
223
+
224
+ const limit = options.limit ?? 20;
225
+
226
+ // KNN query with join to search table
227
+ // Group by doc_id to return best chunk per document
228
+ let sql: string;
229
+ const params: (Uint8Array | string | number)[] = [queryBlob];
230
+
231
+ if (options.source) {
232
+ sql = `
233
+ SELECT
234
+ s.source,
235
+ s.title,
236
+ s.content,
237
+ s.metadata,
238
+ MIN(e.distance) as distance
239
+ FROM embeddings e
240
+ JOIN search s ON e.doc_id = s.rowid
241
+ WHERE e.embedding MATCH ?
242
+ AND k = ?
243
+ AND s.source = ?
244
+ GROUP BY s.rowid
245
+ ORDER BY distance
246
+ LIMIT ?
247
+ `;
248
+ params.push(limit * 3); // Fetch more for grouping
249
+ params.push(options.source);
250
+ params.push(limit);
251
+ } else {
252
+ sql = `
253
+ SELECT
254
+ s.source,
255
+ s.title,
256
+ s.content,
257
+ s.metadata,
258
+ MIN(e.distance) as distance
259
+ FROM embeddings e
260
+ JOIN search s ON e.doc_id = s.rowid
261
+ WHERE e.embedding MATCH ?
262
+ AND k = ?
263
+ GROUP BY s.rowid
264
+ ORDER BY distance
265
+ LIMIT ?
266
+ `;
267
+ params.push(limit * 3); // Fetch more for grouping
268
+ params.push(limit);
269
+ }
270
+
271
+ const stmt = db.prepare(sql);
272
+ const results = stmt.all(...params) as SemanticResult[];
273
+
274
+ return results;
275
+ } finally {
276
+ db.close();
277
+ }
278
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@voidwire/lore",
3
- "version": "0.1.3",
3
+ "version": "0.1.4",
4
4
  "description": "Unified knowledge CLI - Search, list, and capture your indexed knowledge",
5
5
  "type": "module",
6
6
  "main": "./index.ts",
@@ -18,9 +18,6 @@
18
18
  "README.md",
19
19
  "LICENSE"
20
20
  ],
21
- "scripts": {
22
- "test": "bun test"
23
- },
24
21
  "keywords": [
25
22
  "knowledge",
26
23
  "search",
@@ -46,5 +43,8 @@
46
43
  },
47
44
  "devDependencies": {
48
45
  "bun-types": "1.3.5"
46
+ },
47
+ "scripts": {
48
+ "test": "bun test"
49
49
  }
50
- }
50
+ }