@voidwire/lore 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -35,6 +35,20 @@ lore capture task|knowledge|note # Capture knowledge
35
35
  - `--since <date>` — Filter by date (today, yesterday, this-week, YYYY-MM-DD)
36
36
  - `--sources` — List indexed sources with counts
37
37
 
38
+ ### Passthrough Sources
39
+
40
+ Some sources query external services rather than the local index:
41
+
42
+ ```bash
43
+ lore search prismis "kubernetes security" # Semantic search via prismis
44
+ ```
45
+
46
+ | Source | Description | Requires |
47
+ |--------|-------------|----------|
48
+ | `prismis` | Semantic search across saved articles | prismis-daemon running |
49
+
50
+ Passthrough sources appear in `lore search --sources` with `type: "passthrough"`.
51
+
38
52
  ### List Options
39
53
 
40
54
  - `--limit <n>` — Maximum entries
package/cli.ts CHANGED
@@ -22,12 +22,16 @@
22
22
  import {
23
23
  search,
24
24
  searchPrismis,
25
+ searchAtuin,
25
26
  listSources,
26
27
  list,
27
28
  listDomains,
28
29
  captureTask,
29
30
  captureKnowledge,
30
31
  captureNote,
32
+ semanticSearch,
33
+ isOllamaAvailable,
34
+ hasEmbeddings,
31
35
  DOMAINS,
32
36
  type SearchResult,
33
37
  type ListResult,
@@ -72,8 +76,29 @@ function parseArgs(args: string[]): Map<string, string> {
72
76
  return parsed;
73
77
  }
74
78
 
79
+ // Boolean flags that don't take values
80
+ const BOOLEAN_FLAGS = new Set(["help", "sources", "domains", "exact"]);
81
+
75
82
  function getPositionalArgs(args: string[]): string[] {
76
- return args.filter((arg) => !arg.startsWith("--"));
83
+ const result: string[] = [];
84
+ let i = 0;
85
+ while (i < args.length) {
86
+ const arg = args[i];
87
+ if (arg.startsWith("--")) {
88
+ const flag = arg.slice(2).split("=")[0]; // Handle --flag=value format
89
+ if (BOOLEAN_FLAGS.has(flag) || arg.includes("=")) {
90
+ i += 1; // Boolean flag or --flag=value, skip only the flag
91
+ } else if (i + 1 < args.length && !args[i + 1].startsWith("--")) {
92
+ i += 2; // Flag with separate value, skip both
93
+ } else {
94
+ i += 1; // Flag at end or followed by another flag
95
+ }
96
+ continue;
97
+ }
98
+ result.push(arg);
99
+ i++;
100
+ }
101
+ return result;
77
102
  }
78
103
 
79
104
  function hasFlag(args: string[], flag: string): boolean {
@@ -112,19 +137,21 @@ function fail(error: string, code: number = 1): never {
112
137
  // Search Command
113
138
  // ============================================================================
114
139
 
115
- function handleSearch(args: string[]): void {
140
+ async function handleSearch(args: string[]): Promise<void> {
116
141
  if (hasFlag(args, "help")) {
117
142
  showSearchHelp();
118
143
  }
119
144
 
120
145
  const parsed = parseArgs(args);
121
146
  const positional = getPositionalArgs(args);
147
+ const exact = hasFlag(args, "exact");
122
148
 
123
149
  // Handle --sources flag
124
150
  if (hasFlag(args, "sources")) {
125
151
  const indexed = listSources();
126
152
  const passthrough = [
127
153
  { source: "prismis", count: null, type: "passthrough" },
154
+ { source: "atuin", count: null, type: "passthrough" },
128
155
  ];
129
156
  const sources = [
130
157
  ...indexed.map((s) => ({ ...s, type: "indexed" })),
@@ -177,15 +204,63 @@ function handleSearch(args: string[]): void {
177
204
  return;
178
205
  }
179
206
 
207
+ // Handle atuin passthrough
208
+ if (source === "atuin") {
209
+ try {
210
+ const results = searchAtuin(query, { limit });
211
+ output({
212
+ success: true,
213
+ results,
214
+ count: results.length,
215
+ });
216
+ console.error(
217
+ `✅ ${results.length} result${results.length !== 1 ? "s" : ""} found`,
218
+ );
219
+ process.exit(0);
220
+ } catch (error) {
221
+ const message = error instanceof Error ? error.message : "Unknown error";
222
+ fail(message, 2);
223
+ }
224
+ return;
225
+ }
226
+
227
+ // Route semantic vs FTS5 based on --exact flag and availability
228
+ if (!exact) {
229
+ try {
230
+ const canUseSemantic = hasEmbeddings() && (await isOllamaAvailable());
231
+ if (canUseSemantic) {
232
+ const results = await semanticSearch(query, { source, limit });
233
+ output({
234
+ success: true,
235
+ results,
236
+ count: results.length,
237
+ mode: "semantic",
238
+ });
239
+ console.error(
240
+ `✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (semantic)`,
241
+ );
242
+ process.exit(0);
243
+ }
244
+ // Fall through to FTS5 if semantic not available
245
+ } catch (error) {
246
+ // Semantic search failed, fall back to FTS5
247
+ console.error(
248
+ `⚠️ Semantic search unavailable: ${error instanceof Error ? error.message : "Unknown error"}`,
249
+ );
250
+ }
251
+ }
252
+
253
+ // FTS5 path (default fallback or explicit --exact)
180
254
  try {
181
255
  const results = search(query, { source, limit, since });
182
256
  output({
183
257
  success: true,
184
258
  results,
185
259
  count: results.length,
260
+ mode: exact ? "exact" : "fts5",
186
261
  });
187
262
  console.error(
188
- `✅ ${results.length} result${results.length !== 1 ? "s" : ""} found`,
263
+ `✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (${exact ? "exact" : "fts5"})`,
189
264
  );
190
265
  process.exit(0);
191
266
  } catch (error) {
@@ -409,12 +484,14 @@ Usage:
409
484
  lore capture task|knowledge|note Capture knowledge
410
485
 
411
486
  Search Options:
487
+ --exact Use FTS5 text search (bypasses semantic search)
412
488
  --limit <n> Maximum results (default: 20)
413
489
  --since <date> Filter by date (today, yesterday, this-week, YYYY-MM-DD)
414
490
  --sources List indexed sources with counts
415
491
 
416
492
  Passthrough Sources:
417
493
  prismis Semantic search via prismis daemon (requires prismis-daemon running)
494
+ atuin Shell history search (queries ~/.local/share/atuin/history.db directly)
418
495
 
419
496
  List Options:
420
497
  --limit <n> Maximum entries
@@ -458,6 +535,7 @@ Usage:
458
535
  lore search --sources List indexed sources
459
536
 
460
537
  Options:
538
+ --exact Use FTS5 text search (bypasses semantic search)
461
539
  --limit <n> Maximum results (default: 20)
462
540
  --since <date> Filter by date (today, yesterday, this-week, YYYY-MM-DD)
463
541
  --sources List indexed sources with counts
@@ -479,12 +557,16 @@ Indexed Sources:
479
557
  Passthrough Sources:
480
558
  prismis Semantic search via prismis daemon
481
559
  (requires prismis-daemon running)
560
+ atuin Shell history search
561
+ (queries ~/.local/share/atuin/history.db directly)
482
562
 
483
563
  Examples:
484
564
  lore search "authentication"
485
565
  lore search blogs "typescript patterns"
486
566
  lore search commits --since this-week "refactor"
567
+ lore search --exact "def process_data"
487
568
  lore search prismis "kubernetes security"
569
+ lore search atuin "docker build"
488
570
  `);
489
571
  process.exit(0);
490
572
  }
package/index.ts CHANGED
@@ -34,6 +34,13 @@ export {
34
34
  type PrismisSearchOptions,
35
35
  } from "./lib/prismis";
36
36
 
37
+ // Atuin integration
38
+ export {
39
+ searchAtuin,
40
+ type AtuinSearchResult,
41
+ type AtuinSearchOptions,
42
+ } from "./lib/atuin";
43
+
37
44
  // Capture
38
45
  export {
39
46
  captureKnowledge,
@@ -46,3 +53,13 @@ export {
46
53
  type NoteInput,
47
54
  type CaptureEvent,
48
55
  } from "./lib/capture";
56
+
57
+ // Semantic search
58
+ export {
59
+ semanticSearch,
60
+ embedQuery,
61
+ isOllamaAvailable,
62
+ hasEmbeddings,
63
+ type SemanticResult,
64
+ type SemanticSearchOptions,
65
+ } from "./lib/semantic";
package/lib/atuin.ts ADDED
@@ -0,0 +1,160 @@
1
+ /**
2
+ * Atuin shell history integration
3
+ *
4
+ * Queries Atuin SQLite database directly for shell command history.
5
+ * Filters sensitive commands containing passwords, tokens, secrets, API keys.
6
+ */
7
+
8
+ import { existsSync } from "fs";
9
+ import { join } from "path";
10
+ import { Database } from "bun:sqlite";
11
+
12
+ export interface AtuinSearchResult {
13
+ source: string;
14
+ title: string;
15
+ content: string;
16
+ metadata: string;
17
+ rank: number;
18
+ }
19
+
20
+ export interface AtuinSearchOptions {
21
+ limit?: number;
22
+ cwd?: string;
23
+ exitCode?: number;
24
+ }
25
+
26
+ const ATUIN_DB_PATH = join(
27
+ process.env.HOME ?? "",
28
+ ".local",
29
+ "share",
30
+ "atuin",
31
+ "history.db",
32
+ );
33
+
34
+ // Patterns that indicate sensitive data - exclude these commands
35
+ const SENSITIVE_PATTERNS = [
36
+ "%--password%",
37
+ "%--token%",
38
+ "%--secret%",
39
+ "%PASSWORD=%",
40
+ "%TOKEN=%",
41
+ "%SECRET=%",
42
+ "%API_KEY=%",
43
+ "%APIKEY=%",
44
+ "%_KEY=%",
45
+ "export %KEY%",
46
+ "export %TOKEN%",
47
+ "export %SECRET%",
48
+ "export %PASSWORD%",
49
+ "%X-API-Key:%",
50
+ "%Authorization:%",
51
+ "echo $%KEY%",
52
+ "echo $%TOKEN%",
53
+ "echo $%SECRET%",
54
+ ];
55
+
56
+ interface AtuinRow {
57
+ command: string;
58
+ cwd: string;
59
+ exit: number;
60
+ duration: number;
61
+ timestamp: number;
62
+ hostname: string;
63
+ }
64
+
65
+ /**
66
+ * Search Atuin shell history
67
+ */
68
+ export function searchAtuin(
69
+ query: string,
70
+ options: AtuinSearchOptions = {},
71
+ ): AtuinSearchResult[] {
72
+ if (!existsSync(ATUIN_DB_PATH)) {
73
+ throw new Error(
74
+ `Atuin database not found at ${ATUIN_DB_PATH}. ` +
75
+ "Install Atuin: https://atuin.sh",
76
+ );
77
+ }
78
+
79
+ const db = new Database(ATUIN_DB_PATH, { readonly: true });
80
+ const limit = options.limit ?? 20;
81
+
82
+ try {
83
+ // Build WHERE clause with sensitive data filtering
84
+ const conditions = [
85
+ "deleted_at IS NULL",
86
+ "command LIKE ?",
87
+ ...SENSITIVE_PATTERNS.map(() => "command NOT LIKE ?"),
88
+ ];
89
+
90
+ if (options.cwd) {
91
+ conditions.push("cwd = ?");
92
+ }
93
+
94
+ if (options.exitCode !== undefined) {
95
+ conditions.push("exit = ?");
96
+ }
97
+
98
+ const whereClause = conditions.join(" AND ");
99
+
100
+ const sql = `
101
+ SELECT command, cwd, exit, duration, timestamp, hostname
102
+ FROM history
103
+ WHERE ${whereClause}
104
+ ORDER BY timestamp DESC
105
+ LIMIT ?
106
+ `;
107
+
108
+ // Build parameters
109
+ const params: (string | number)[] = [`%${query}%`, ...SENSITIVE_PATTERNS];
110
+
111
+ if (options.cwd) {
112
+ params.push(options.cwd);
113
+ }
114
+
115
+ if (options.exitCode !== undefined) {
116
+ params.push(options.exitCode);
117
+ }
118
+
119
+ params.push(limit);
120
+
121
+ const stmt = db.prepare(sql);
122
+ const rows = stmt.all(...params) as AtuinRow[];
123
+
124
+ return rows.map((row, index) => {
125
+ // Convert nanosecond timestamp to ISO date
126
+ const timestampSec = Math.floor(row.timestamp / 1_000_000_000);
127
+ const date = new Date(timestampSec * 1000);
128
+ const dateStr = date.toISOString().split("T")[0];
129
+
130
+ // Convert duration from nanoseconds to milliseconds
131
+ const durationMs = Math.floor(row.duration / 1_000_000);
132
+
133
+ // Build title: truncate command to 80 chars
134
+ const title =
135
+ row.command.length > 80
136
+ ? `[shell] ${row.command.slice(0, 77)}...`
137
+ : `[shell] ${row.command}`;
138
+
139
+ // Normalize cwd
140
+ const cwd = row.cwd === "unknown" ? "" : row.cwd;
141
+
142
+ return {
143
+ source: "atuin",
144
+ title,
145
+ content: row.command,
146
+ metadata: JSON.stringify({
147
+ command: row.command,
148
+ cwd,
149
+ exit_code: row.exit,
150
+ duration_ms: durationMs,
151
+ date: dateStr,
152
+ hostname: row.hostname,
153
+ }),
154
+ rank: -index, // Simple ranking by recency
155
+ };
156
+ });
157
+ } finally {
158
+ db.close();
159
+ }
160
+ }
@@ -0,0 +1,278 @@
1
+ /**
2
+ * lib/semantic.ts - Semantic search via Ollama embeddings
3
+ *
4
+ * Query embedding and KNN search against sqlite-vec virtual table.
5
+ * Uses Bun's built-in SQLite with sqlite-vec extension.
6
+ *
7
+ * Note: macOS ships Apple's SQLite which disables extension loading.
8
+ * We use Homebrew's SQLite via setCustomSQLite() to enable sqlite-vec.
9
+ */
10
+
11
+ import { Database } from "bun:sqlite";
12
+ import { homedir } from "os";
13
+ import { existsSync, readFileSync } from "fs";
14
+
15
+ // Use Homebrew SQLite on macOS to enable extension loading
16
+ // Must be called before any Database instances are created
17
+ const HOMEBREW_SQLITE = "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib";
18
+ if (existsSync(HOMEBREW_SQLITE)) {
19
+ Database.setCustomSQLite(HOMEBREW_SQLITE);
20
+ }
21
+
22
+ export interface SemanticResult {
23
+ source: string;
24
+ title: string;
25
+ content: string;
26
+ metadata: string;
27
+ distance: number;
28
+ }
29
+
30
+ export interface SemanticSearchOptions {
31
+ source?: string;
32
+ limit?: number;
33
+ }
34
+
35
+ interface EmbeddingConfig {
36
+ endpoint: string;
37
+ model: string;
38
+ }
39
+
40
+ const DEFAULT_CONFIG: EmbeddingConfig = {
41
+ endpoint: "http://localhost:11434",
42
+ model: "nomic-embed-text",
43
+ };
44
+
45
+ function getDatabasePath(): string {
46
+ return `${homedir()}/.local/share/lore/lore.db`;
47
+ }
48
+
49
+ function getConfigPath(): string {
50
+ return `${homedir()}/.config/lore/config.toml`;
51
+ }
52
+
53
+ /**
54
+ * Load embedding config from config.toml
55
+ * Falls back to [llm].api_base if [embedding].endpoint not set
56
+ */
57
+ function loadEmbeddingConfig(): EmbeddingConfig {
58
+ const configPath = getConfigPath();
59
+
60
+ if (!existsSync(configPath)) {
61
+ return DEFAULT_CONFIG;
62
+ }
63
+
64
+ try {
65
+ const content = readFileSync(configPath, "utf-8");
66
+
67
+ // Extract [embedding].endpoint first
68
+ const endpointMatch = content.match(
69
+ /\[embedding\][^[]*endpoint\s*=\s*"([^"]+)"/s,
70
+ );
71
+ if (endpointMatch) {
72
+ const modelMatch = content.match(
73
+ /\[embedding\][^[]*model\s*=\s*"([^"]+)"/s,
74
+ );
75
+ return {
76
+ endpoint: endpointMatch[1],
77
+ model: modelMatch?.[1] ?? DEFAULT_CONFIG.model,
78
+ };
79
+ }
80
+
81
+ // Fall back to [llm].api_base
82
+ const apiBaseMatch = content.match(/\[llm\][^[]*api_base\s*=\s*"([^"]+)"/s);
83
+ if (apiBaseMatch) {
84
+ const modelMatch = content.match(
85
+ /\[embedding\][^[]*model\s*=\s*"([^"]+)"/s,
86
+ );
87
+ return {
88
+ endpoint: apiBaseMatch[1],
89
+ model: modelMatch?.[1] ?? DEFAULT_CONFIG.model,
90
+ };
91
+ }
92
+
93
+ return DEFAULT_CONFIG;
94
+ } catch {
95
+ return DEFAULT_CONFIG;
96
+ }
97
+ }
98
+
99
+ /**
100
+ * Check if Ollama is available at configured endpoint
101
+ */
102
+ export async function isOllamaAvailable(): Promise<boolean> {
103
+ const config = loadEmbeddingConfig();
104
+ try {
105
+ const controller = new AbortController();
106
+ const timeout = setTimeout(() => controller.abort(), 2000);
107
+
108
+ const response = await fetch(`${config.endpoint}/api/tags`, {
109
+ method: "GET",
110
+ signal: controller.signal,
111
+ });
112
+
113
+ clearTimeout(timeout);
114
+ return response.ok;
115
+ } catch {
116
+ return false;
117
+ }
118
+ }
119
+
120
+ /**
121
+ * Embed a query string using Ollama
122
+ * @returns 768-dimensional embedding vector
123
+ */
124
+ export async function embedQuery(query: string): Promise<number[]> {
125
+ const config = loadEmbeddingConfig();
126
+ const url = `${config.endpoint}/api/embeddings`;
127
+
128
+ const response = await fetch(url, {
129
+ method: "POST",
130
+ headers: { "Content-Type": "application/json" },
131
+ body: JSON.stringify({
132
+ model: config.model,
133
+ prompt: query,
134
+ }),
135
+ });
136
+
137
+ if (!response.ok) {
138
+ throw new Error(
139
+ `Ollama API error: ${response.status} ${response.statusText}`,
140
+ );
141
+ }
142
+
143
+ const result = (await response.json()) as { embedding?: number[] };
144
+ const embedding = result.embedding;
145
+
146
+ if (!Array.isArray(embedding) || embedding.length !== 768) {
147
+ throw new Error(
148
+ `Invalid embedding: expected 768 dims, got ${embedding?.length ?? 0}`,
149
+ );
150
+ }
151
+
152
+ return embedding;
153
+ }
154
+
155
+ /**
156
+ * Check if embeddings table has any data
157
+ */
158
+ export function hasEmbeddings(): boolean {
159
+ const dbPath = getDatabasePath();
160
+
161
+ if (!existsSync(dbPath)) {
162
+ return false;
163
+ }
164
+
165
+ const db = new Database(dbPath, { readonly: true });
166
+
167
+ try {
168
+ // Load sqlite-vec extension
169
+ const vecPath = process.env.SQLITE_VEC_PATH;
170
+ if (!vecPath) {
171
+ return false;
172
+ }
173
+
174
+ db.loadExtension(vecPath);
175
+
176
+ const stmt = db.prepare("SELECT COUNT(*) as count FROM embeddings");
177
+ const result = stmt.get() as { count: number };
178
+ return result.count > 0;
179
+ } catch {
180
+ return false;
181
+ } finally {
182
+ db.close();
183
+ }
184
+ }
185
+
186
+ /**
187
+ * Serialize embedding to blob format for sqlite-vec
188
+ */
189
+ function serializeEmbedding(embedding: number[]): Uint8Array {
190
+ const buffer = new Float32Array(embedding);
191
+ return new Uint8Array(buffer.buffer);
192
+ }
193
+
194
+ /**
195
+ * Perform semantic search using KNN against embeddings table
196
+ */
197
+ export async function semanticSearch(
198
+ query: string,
199
+ options: SemanticSearchOptions = {},
200
+ ): Promise<SemanticResult[]> {
201
+ const dbPath = getDatabasePath();
202
+
203
+ if (!existsSync(dbPath)) {
204
+ throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
205
+ }
206
+
207
+ // Get query embedding
208
+ const queryEmbedding = await embedQuery(query);
209
+ const queryBlob = serializeEmbedding(queryEmbedding);
210
+
211
+ const db = new Database(dbPath, { readonly: true });
212
+
213
+ try {
214
+ // Load sqlite-vec extension
215
+ const vecPath = process.env.SQLITE_VEC_PATH;
216
+ if (!vecPath) {
217
+ throw new Error(
218
+ 'SQLITE_VEC_PATH not set. Get path with: python3 -c "import sqlite_vec; print(sqlite_vec.loadable_path())"',
219
+ );
220
+ }
221
+
222
+ db.loadExtension(vecPath);
223
+
224
+ const limit = options.limit ?? 20;
225
+
226
+ // KNN query with join to search table
227
+ // Group by doc_id to return best chunk per document
228
+ let sql: string;
229
+ const params: (Uint8Array | string | number)[] = [queryBlob];
230
+
231
+ if (options.source) {
232
+ sql = `
233
+ SELECT
234
+ s.source,
235
+ s.title,
236
+ s.content,
237
+ s.metadata,
238
+ MIN(e.distance) as distance
239
+ FROM embeddings e
240
+ JOIN search s ON e.doc_id = s.rowid
241
+ WHERE e.embedding MATCH ?
242
+ AND k = ?
243
+ AND s.source = ?
244
+ GROUP BY s.rowid
245
+ ORDER BY distance
246
+ LIMIT ?
247
+ `;
248
+ params.push(limit * 3); // Fetch more for grouping
249
+ params.push(options.source);
250
+ params.push(limit);
251
+ } else {
252
+ sql = `
253
+ SELECT
254
+ s.source,
255
+ s.title,
256
+ s.content,
257
+ s.metadata,
258
+ MIN(e.distance) as distance
259
+ FROM embeddings e
260
+ JOIN search s ON e.doc_id = s.rowid
261
+ WHERE e.embedding MATCH ?
262
+ AND k = ?
263
+ GROUP BY s.rowid
264
+ ORDER BY distance
265
+ LIMIT ?
266
+ `;
267
+ params.push(limit * 3); // Fetch more for grouping
268
+ params.push(limit);
269
+ }
270
+
271
+ const stmt = db.prepare(sql);
272
+ const results = stmt.all(...params) as SemanticResult[];
273
+
274
+ return results;
275
+ } finally {
276
+ db.close();
277
+ }
278
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@voidwire/lore",
3
- "version": "0.1.2",
3
+ "version": "0.1.4",
4
4
  "description": "Unified knowledge CLI - Search, list, and capture your indexed knowledge",
5
5
  "type": "module",
6
6
  "main": "./index.ts",
@@ -42,7 +42,7 @@
42
42
  "bun": ">=1.0.0"
43
43
  },
44
44
  "devDependencies": {
45
- "bun-types": "latest"
45
+ "bun-types": "1.3.5"
46
46
  },
47
47
  "scripts": {
48
48
  "test": "bun test"