@voidwire/lore 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli.ts CHANGED
@@ -41,6 +41,9 @@ import {
41
41
  hybridSearch,
42
42
  formatBriefSearch,
43
43
  hasEmbeddings,
44
+ findPurgeMatches,
45
+ deleteEntries,
46
+ PURGEABLE_SOURCES,
44
47
  SOURCES,
45
48
  type SearchResult,
46
49
  type HybridResult,
@@ -55,6 +58,7 @@ import {
55
58
  type ObservationInput,
56
59
  type ObservationSubtype,
57
60
  type ObservationConfidence,
61
+ type PurgeableSource,
58
62
  } from "./index";
59
63
  import { isValidLoreType, LORE_TYPES } from "./lib/types";
60
64
  import { runIndexer } from "./lib/indexer";
@@ -103,6 +107,7 @@ const BOOLEAN_FLAGS = new Set([
103
107
  "brief",
104
108
  "list",
105
109
  "rebuild",
110
+ "force",
106
111
  ]);
107
112
 
108
113
  function getPositionalArgs(args: string[]): string[] {
@@ -611,6 +616,133 @@ Examples:
611
616
  process.exit(0);
612
617
  }
613
618
 
619
+ // ============================================================================
620
+ // Purge Command
621
+ // ============================================================================
622
+
623
+ async function handlePurge(args: string[]): Promise<void> {
624
+ if (hasFlag(args, "help")) {
625
+ showPurgeHelp();
626
+ }
627
+
628
+ const parsed = parseArgs(args);
629
+ const matchQuery = parsed.get("match");
630
+
631
+ if (!matchQuery) {
632
+ fail('Missing --match flag. Use: lore purge --match "content to find"');
633
+ }
634
+
635
+ const force = hasFlag(args, "force");
636
+ const sourceFilter = parsed.get("source") as PurgeableSource | undefined;
637
+
638
+ // Validate source filter if provided
639
+ if (
640
+ sourceFilter &&
641
+ !PURGEABLE_SOURCES.includes(sourceFilter as PurgeableSource)
642
+ ) {
643
+ fail(
644
+ `Invalid source: ${sourceFilter}. Purgeable sources: ${PURGEABLE_SOURCES.join(", ")}`,
645
+ );
646
+ }
647
+
648
+ try {
649
+ const matches = findPurgeMatches(matchQuery, { source: sourceFilter });
650
+
651
+ if (matches.length === 0) {
652
+ output({
653
+ success: true,
654
+ matches: 0,
655
+ deleted: 0,
656
+ message: "No matching entries found",
657
+ });
658
+ console.error("No matching entries found.");
659
+ process.exit(0);
660
+ }
661
+
662
+ // Display matches
663
+ console.error(`\nFound ${matches.length} matching entries:\n`);
664
+ for (const m of matches) {
665
+ const preview =
666
+ m.content.length > 100 ? m.content.slice(0, 100) + "..." : m.content;
667
+ console.error(` [${m.rowid}] ${m.source}/${m.type}: ${preview}`);
668
+ }
669
+ console.error("");
670
+
671
+ // Confirm unless --force
672
+ if (!force) {
673
+ process.stderr.write(`Delete ${matches.length} entries? (y/N) `);
674
+
675
+ // Read confirmation from stdin
676
+ const buf = Buffer.alloc(16);
677
+ const fd = require("fs").openSync("/dev/tty", "r");
678
+ const bytesRead = require("fs").readSync(fd, buf, 0, 16, null);
679
+ require("fs").closeSync(fd);
680
+ const answer = buf.toString("utf8", 0, bytesRead).trim().toLowerCase();
681
+
682
+ if (answer !== "y" && answer !== "yes") {
683
+ output({
684
+ success: true,
685
+ matches: matches.length,
686
+ deleted: 0,
687
+ message: "Aborted",
688
+ });
689
+ console.error("Aborted.");
690
+ process.exit(0);
691
+ }
692
+ }
693
+
694
+ const result = deleteEntries(matches.map((m) => m.rowid));
695
+
696
+ output({
697
+ success: true,
698
+ matches: matches.length,
699
+ deleted: result.deleted,
700
+ rowids: result.rowids,
701
+ });
702
+ console.error(`Purged ${result.deleted} entries from search + embeddings.`);
703
+ process.exit(0);
704
+ } catch (error) {
705
+ const message = error instanceof Error ? error.message : "Unknown error";
706
+ fail(message, 2);
707
+ }
708
+ }
709
+
710
+ function showPurgeHelp(): void {
711
+ console.log(`
712
+ lore purge - Delete entries from purgeable sources
713
+
714
+ Usage:
715
+ lore purge --match "content string" Find and delete matching entries
716
+ lore purge --match "content" --force Delete without confirmation
717
+ lore purge --match "content" --source captures Filter by source
718
+
719
+ Purgeable Sources:
720
+ captures Quick captures (knowledge, decisions, gotchas, etc.)
721
+ observations Model observations about user patterns
722
+ teachings Teaching moments
723
+
724
+ Non-purgeable sources (blogs, commits, obsidian, etc.) are never affected.
725
+
726
+ Options:
727
+ --match <text> Content to search for (required, uses LIKE matching)
728
+ --source <src> Limit to specific purgeable source
729
+ --force Skip confirmation prompt
730
+ --help Show this help
731
+
732
+ Process:
733
+ 1. Searches purgeable sources for entries containing the match text
734
+ 2. Displays matches with rowid, source, type, and content preview
735
+ 3. Asks for confirmation (unless --force)
736
+ 4. Deletes from both FTS5 search table and vec0 embeddings table
737
+
738
+ Examples:
739
+ lore purge --match "install.sh overwrites config.toml"
740
+ lore purge --match "bracket prefix" --force
741
+ lore purge --match "stale gotcha" --source captures
742
+ `);
743
+ process.exit(0);
744
+ }
745
+
614
746
  // ============================================================================
615
747
  // Capture Command
616
748
  // ============================================================================
@@ -916,6 +1048,7 @@ Usage:
916
1048
  lore about <project> Aggregate view of project knowledge
917
1049
  lore about <project> --brief Compact project summary
918
1050
  lore capture task|knowledge|note|teaching Capture knowledge
1051
+ lore purge --match "content" Delete matching entries
919
1052
  lore index [source] [--rebuild] [--list] Run indexers
920
1053
 
921
1054
  Search Options:
@@ -1303,9 +1436,12 @@ async function main(): Promise<void> {
1303
1436
  case "index":
1304
1437
  await handleIndex(commandArgs);
1305
1438
  break;
1439
+ case "purge":
1440
+ await handlePurge(commandArgs);
1441
+ break;
1306
1442
  default:
1307
1443
  fail(
1308
- `Unknown command: ${command}. Use: search, list, sources, info, projects, about, capture, or index`,
1444
+ `Unknown command: ${command}. Use: search, list, sources, info, projects, about, capture, purge, or index`,
1309
1445
  );
1310
1446
  }
1311
1447
  }
package/index.ts CHANGED
@@ -102,3 +102,13 @@ export { LoreType, LORE_TYPES, isValidLoreType } from "./lib/types";
102
102
 
103
103
  // Real-time indexing
104
104
  export { indexAndEmbed } from "./lib/realtime";
105
+
106
+ // Purge
107
+ export {
108
+ findPurgeMatches,
109
+ deleteEntries,
110
+ PURGEABLE_SOURCES,
111
+ type PurgeableSource,
112
+ type PurgeMatch,
113
+ type PurgeResult,
114
+ } from "./lib/purge";
package/lib/db.ts CHANGED
@@ -45,7 +45,9 @@ export function openDatabase(readonly = false): Database {
45
45
  throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
46
46
  }
47
47
 
48
- const db = new Database(dbPath, { readonly });
48
+ const db = readonly
49
+ ? new Database(dbPath, { readonly: true })
50
+ : new Database(dbPath);
49
51
 
50
52
  // Load sqlite-vec extension
51
53
  const vecPath = process.env.SQLITE_VEC_PATH;
@@ -71,5 +73,7 @@ export function openDatabaseBasic(readonly = false): Database {
71
73
  throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
72
74
  }
73
75
 
74
- return new Database(dbPath, { readonly });
76
+ return readonly
77
+ ? new Database(dbPath, { readonly: true })
78
+ : new Database(dbPath);
75
79
  }
package/lib/purge.ts ADDED
@@ -0,0 +1,125 @@
1
+ /**
2
+ * lib/purge.ts - Purge entries from lore database
3
+ *
4
+ * Search and delete entries from purgeable sources (captures, observations, teachings).
5
+ * Deletes from both FTS5 search table and vec0 embeddings table.
6
+ *
7
+ * Usage:
8
+ * const matches = findPurgeMatches(query);
9
+ * deleteEntries(matches.map(m => m.rowid));
10
+ */
11
+
12
+ import { Database } from "bun:sqlite";
13
+ import { existsSync } from "fs";
14
+ import { getDatabasePath, openDatabase } from "./db.js";
15
+
16
+ // Only these sources can be purged — indexed sources (blogs, commits, etc.) are never purgeable
17
+ export const PURGEABLE_SOURCES = [
18
+ "captures",
19
+ "observations",
20
+ "teachings",
21
+ ] as const;
22
+
23
+ export type PurgeableSource = (typeof PURGEABLE_SOURCES)[number];
24
+
25
+ export interface PurgeMatch {
26
+ rowid: number;
27
+ source: string;
28
+ title: string;
29
+ content: string;
30
+ type: string;
31
+ }
32
+
33
+ export interface PurgeResult {
34
+ deleted: number;
35
+ rowids: number[];
36
+ }
37
+
38
+ /**
39
+ * Find entries matching query in purgeable sources using LIKE.
40
+ *
41
+ * Uses SQL LIKE instead of FTS5 MATCH because content strings
42
+ * often contain dots, dashes, and special characters that break
43
+ * FTS5 query syntax.
44
+ */
45
+ export function findPurgeMatches(
46
+ query: string,
47
+ options: { source?: PurgeableSource } = {},
48
+ ): PurgeMatch[] {
49
+ const db = openDatabase(true);
50
+
51
+ try {
52
+ const conditions: string[] = ["content LIKE ?"];
53
+ const params: (string | number)[] = [`%${query}%`];
54
+
55
+ if (options.source) {
56
+ conditions.push("source = ?");
57
+ params.push(options.source);
58
+ } else {
59
+ const placeholders = PURGEABLE_SOURCES.map(() => "?").join(", ");
60
+ conditions.push(`source IN (${placeholders})`);
61
+ params.push(...PURGEABLE_SOURCES);
62
+ }
63
+
64
+ const sql = `
65
+ SELECT rowid, source, title, content, type
66
+ FROM search
67
+ WHERE ${conditions.join(" AND ")}
68
+ ORDER BY rowid DESC
69
+ `;
70
+
71
+ const stmt = db.prepare(sql);
72
+ return stmt.all(...params) as PurgeMatch[];
73
+ } finally {
74
+ db.close();
75
+ }
76
+ }
77
+
78
+ /**
79
+ * Delete entries from both FTS5 search table and vec0 embeddings table.
80
+ *
81
+ * FTS5 DELETE: DELETE FROM search WHERE rowid = ?
82
+ * vec0 DELETE: DELETE FROM embeddings WHERE doc_id = ?
83
+ */
84
+ export function deleteEntries(rowids: number[]): PurgeResult {
85
+ if (rowids.length === 0) {
86
+ return { deleted: 0, rowids: [] };
87
+ }
88
+
89
+ // Open DB directly for read-write (matches realtime.ts pattern —
90
+ // openDatabase(false) triggers SQLITE_MISUSE with custom_sqlite)
91
+ const dbPath = getDatabasePath();
92
+ if (!existsSync(dbPath)) {
93
+ throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
94
+ }
95
+
96
+ const db = new Database(dbPath);
97
+
98
+ try {
99
+ // Load sqlite-vec extension for embeddings table access
100
+ const vecPath = process.env.SQLITE_VEC_PATH;
101
+ if (!vecPath) {
102
+ throw new Error(
103
+ 'SQLITE_VEC_PATH not set. Get path with: python3 -c "import sqlite_vec; print(sqlite_vec.loadable_path())"',
104
+ );
105
+ }
106
+ db.loadExtension(vecPath);
107
+
108
+ const deleteSearch = db.prepare("DELETE FROM search WHERE rowid = ?");
109
+ const deleteEmbedding = db.prepare(
110
+ "DELETE FROM embeddings WHERE doc_id = ?",
111
+ );
112
+
113
+ let deleted = 0;
114
+
115
+ for (const rowid of rowids) {
116
+ deleteSearch.run(rowid);
117
+ deleteEmbedding.run(rowid);
118
+ deleted++;
119
+ }
120
+
121
+ return { deleted, rowids };
122
+ } finally {
123
+ db.close();
124
+ }
125
+ }
package/lib/semantic.ts CHANGED
@@ -13,6 +13,44 @@ import { getDatabasePath, openDatabase } from "./db.js";
13
13
  import { search as keywordSearch, type SearchResult } from "./search.js";
14
14
  import { getConfig } from "./config.js";
15
15
 
16
+ // ─── Embedding Server (server-first, in-process fallback) ────────────────────
17
+
18
+ const EMBED_SERVER = process.env.EMBED_SERVER_URL || "http://localhost:8090";
19
+
20
+ /**
21
+ * Try the persistent embedding server first (warm: ~9ms vs 244ms in-process).
22
+ * Returns null on any failure — caller falls back to in-process.
23
+ */
24
+ async function serverEmbed(
25
+ text: string,
26
+ prefix: string,
27
+ ): Promise<number[] | null> {
28
+ try {
29
+ const resp = await fetch(`${EMBED_SERVER}/embed`, {
30
+ method: "POST",
31
+ headers: { "Content-Type": "application/json" },
32
+ body: JSON.stringify({ text, prefix }),
33
+ signal: AbortSignal.timeout(500),
34
+ });
35
+ if (!resp.ok) return null;
36
+ const data = (await resp.json()) as {
37
+ embedding?: number[];
38
+ dims?: number;
39
+ };
40
+ if (
41
+ !Array.isArray(data.embedding) ||
42
+ data.embedding.length !== EMBEDDING_DIM
43
+ ) {
44
+ return null;
45
+ }
46
+ return data.embedding;
47
+ } catch {
48
+ return null; // Server not running or timed out — fall back silently
49
+ }
50
+ }
51
+
52
+ // ─── Types ───────────────────────────────────────────────────────────────────
53
+
16
54
  export interface SemanticResult {
17
55
  rowid: number;
18
56
  source: string;
@@ -76,6 +114,11 @@ async function getEmbeddingPipeline(): Promise<EmbeddingPipeline> {
76
114
  * @returns 768-dimensional embedding vector
77
115
  */
78
116
  export async function embedQuery(query: string): Promise<number[]> {
117
+ // Try persistent server first (~9ms warm vs 244ms in-process)
118
+ const serverResult = await serverEmbed(query, "search_query");
119
+ if (serverResult) return serverResult;
120
+
121
+ // Fall back to in-process model loading
79
122
  const embedder = await getEmbeddingPipeline();
80
123
 
81
124
  // nomic model requires "search_query: " prefix for queries
@@ -104,6 +147,11 @@ export async function embedQuery(query: string): Promise<number[]> {
104
147
  * @returns 768-dimensional embedding vector
105
148
  */
106
149
  export async function embedDocument(text: string): Promise<number[]> {
150
+ // Try persistent server first (~9ms warm vs 244ms in-process)
151
+ const serverResult = await serverEmbed(text, "search_document");
152
+ if (serverResult) return serverResult;
153
+
154
+ // Fall back to in-process model loading
107
155
  const embedder = await getEmbeddingPipeline();
108
156
 
109
157
  const prefixedText = `search_document: ${text}`;
@@ -131,12 +179,23 @@ export async function embedDocument(text: string): Promise<number[]> {
131
179
  export async function embedDocuments(texts: string[]): Promise<number[][]> {
132
180
  if (texts.length === 0) return [];
133
181
 
134
- const embedder = await getEmbeddingPipeline();
135
182
  const results: number[][] = [];
136
183
 
137
- // Process one at a time (transformers.js doesn't batch well)
138
- // But we benefit from cached pipeline
184
+ // Try persistent server first for each document
185
+ let serverAvailable = true;
139
186
  for (const text of texts) {
187
+ if (serverAvailable) {
188
+ const serverResult = await serverEmbed(text, "search_document");
189
+ if (serverResult) {
190
+ results.push(serverResult);
191
+ continue;
192
+ }
193
+ // Server failed — stop trying and fall back for remaining
194
+ serverAvailable = false;
195
+ }
196
+
197
+ // Fall back to in-process
198
+ const embedder = await getEmbeddingPipeline();
140
199
  const prefixedText = `search_document: ${text}`;
141
200
  const output = await embedder(prefixedText, {
142
201
  pooling: "mean",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@voidwire/lore",
3
- "version": "1.3.0",
3
+ "version": "1.5.0",
4
4
  "description": "Unified knowledge CLI - Search, list, and capture your indexed knowledge",
5
5
  "type": "module",
6
6
  "main": "./index.ts",