@voidwire/lore 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli.ts CHANGED
@@ -691,15 +691,24 @@ async function handlePurge(args: string[]): Promise<void> {
691
691
  }
692
692
  }
693
693
 
694
- const result = deleteEntries(matches.map((m) => m.rowid));
694
+ const result = deleteEntries(
695
+ matches.map((m) => m.rowid),
696
+ matches.map((m) => m.content),
697
+ );
695
698
 
696
699
  output({
697
700
  success: true,
698
701
  matches: matches.length,
699
702
  deleted: result.deleted,
700
703
  rowids: result.rowids,
704
+ logEntriesRemoved: result.logEntriesRemoved,
701
705
  });
702
- console.error(`Purged ${result.deleted} entries from search + embeddings.`);
706
+ console.error(
707
+ `Purged ${result.deleted} entries from search + embeddings` +
708
+ (result.logEntriesRemoved > 0
709
+ ? ` + ${result.logEntriesRemoved} from log.jsonl.`
710
+ : `.`),
711
+ );
703
712
  process.exit(0);
704
713
  } catch (error) {
705
714
  const message = error instanceof Error ? error.message : "Unknown error";
@@ -0,0 +1,214 @@
1
+ /**
2
+ * lib/contradiction.ts - Write-time contradiction detection
3
+ *
4
+ * Uses hybrid search + local MLX model to classify new captures against
5
+ * existing entries. Determines whether a new entry should ADD (new info),
6
+ * NOOP (redundant), or DELETE+ADD (supersedes existing).
7
+ *
8
+ * Fail-open design: any error defaults to ADD — never blocks a capture.
9
+ *
10
+ * Usage:
11
+ * const candidates = await findCandidates(event);
12
+ * const result = await classifyContradiction(event, candidates);
13
+ * // result.action: "ADD" | "NOOP" | "DELETE+ADD"
14
+ * // result.deleteRowid: number (only set for DELETE+ADD)
15
+ */
16
+
17
+ import { hybridSearch, type HybridResult } from "./semantic.js";
18
+ import { PURGEABLE_SOURCES } from "./purge.js";
19
+ import type { CaptureEvent } from "./capture.js";
20
+
21
+ // ─── Types ──────────────────────────────────────────────────────────────────
22
+
23
+ export type ContradictionAction = "ADD" | "DELETE+ADD" | "NOOP";
24
+
25
+ export interface ContradictionResult {
26
+ action: ContradictionAction;
27
+ deleteRowid?: number;
28
+ }
29
+
30
+ // ─── Constants ──────────────────────────────────────────────────────────────
31
+
32
+ const MLX_URL = "http://localhost:8080/v1/chat/completions";
33
+ const MLX_MODEL = "mlx-community/Qwen2.5-7B-Instruct-4bit";
34
+ const MLX_TIMEOUT_MS = 1500;
35
+
36
+ const CANDIDATE_LIMIT = 5;
37
+
38
+ // Sources eligible for contradiction checking (same as purgeable)
39
+ const CONTRADICTION_SOURCES = new Set<string>(PURGEABLE_SOURCES);
40
+
41
+ // ─── Public API ─────────────────────────────────────────────────────────────
42
+
43
+ /**
44
+ * Check if an event's source is eligible for contradiction detection.
45
+ */
46
+ export function isContradictionCheckable(source: string): boolean {
47
+ return CONTRADICTION_SOURCES.has(source);
48
+ }
49
+
50
+ /**
51
+ * Find existing entries that may contradict or duplicate the new event.
52
+ * Uses hybrid search scoped by source + topic for precision.
53
+ *
54
+ * Opens its own read connection (hybridSearch uses openDatabase(true)).
55
+ * Safe in WAL mode — concurrent reads with the write connection in indexAndEmbed.
56
+ *
57
+ * Note: entries inserted in the current indexAndEmbed batch are not visible
58
+ * to this read connection (uncommitted). This is acceptable — same-batch
59
+ * entries are unlikely to contradict each other.
60
+ */
61
+ export async function findCandidates(
62
+ event: CaptureEvent,
63
+ ): Promise<HybridResult[]> {
64
+ const data = event.data as Record<string, unknown>;
65
+ const content = String(data.content || data.text || "");
66
+ const topic = String(data.topic || "");
67
+ const source = getSourceForEvent(event);
68
+
69
+ if (!content) return [];
70
+
71
+ return hybridSearch(content, {
72
+ source,
73
+ project: topic || undefined,
74
+ limit: CANDIDATE_LIMIT,
75
+ });
76
+ }
77
+
78
+ /**
79
+ * Classify a new event against existing candidates using local MLX model.
80
+ *
81
+ * LLM returns one of:
82
+ * ADD — new information not covered by candidates
83
+ * NOOP — duplicate/redundant (already captured)
84
+ * DELETE <rowid> — new info supersedes a specific candidate
85
+ *
86
+ * Fail-open: any error (timeout, parse failure, model down) defaults to ADD.
87
+ */
88
+ export async function classifyContradiction(
89
+ event: CaptureEvent,
90
+ candidates: HybridResult[],
91
+ ): Promise<ContradictionResult> {
92
+ if (candidates.length === 0) {
93
+ return { action: "ADD" };
94
+ }
95
+
96
+ const data = event.data as Record<string, unknown>;
97
+ const content = String(data.content || data.text || "");
98
+ const topic = String(data.topic || "");
99
+ const source = getSourceForEvent(event);
100
+
101
+ const candidateLines = candidates
102
+ .map((c) => `[rowid: ${c.rowid}] ${c.content}`)
103
+ .join("\n");
104
+
105
+ const systemPrompt = `You classify knowledge contradictions. Reply with exactly one word: ADD, NOOP, or DELETE.
106
+ ADD: new information not covered by candidates
107
+ NOOP: duplicate or redundant (already captured)
108
+ DELETE: new information supersedes a candidate (also provide rowid)`;
109
+
110
+ const userPrompt = `New entry (source: ${source}, topic: ${topic}):
111
+ ${content}
112
+
113
+ Existing entries:
114
+ ${candidateLines}
115
+
116
+ If DELETE, reply: DELETE <rowid>
117
+ Otherwise reply: ADD or NOOP`;
118
+
119
+ try {
120
+ const resp = await fetch(MLX_URL, {
121
+ method: "POST",
122
+ headers: { "Content-Type": "application/json" },
123
+ body: JSON.stringify({
124
+ model: MLX_MODEL,
125
+ messages: [
126
+ { role: "system", content: systemPrompt },
127
+ { role: "user", content: userPrompt },
128
+ ],
129
+ max_tokens: 20,
130
+ temperature: 0,
131
+ }),
132
+ signal: AbortSignal.timeout(MLX_TIMEOUT_MS),
133
+ });
134
+
135
+ if (!resp.ok) {
136
+ console.error(
137
+ `[contradiction] MLX returned ${resp.status} — defaulting to ADD`,
138
+ );
139
+ return { action: "ADD" };
140
+ }
141
+
142
+ const json = (await resp.json()) as {
143
+ choices?: Array<{ message?: { content?: string } }>;
144
+ };
145
+
146
+ const raw = json.choices?.[0]?.message?.content?.trim() || "";
147
+ return parseClassification(raw);
148
+ } catch (err) {
149
+ // Timeout, network error, or model unavailable — fail open
150
+ const message = err instanceof Error ? err.message : String(err);
151
+ console.error(
152
+ `[contradiction] classification failed (${message}) — defaulting to ADD`,
153
+ );
154
+ return { action: "ADD" };
155
+ }
156
+ }
157
+
158
+ // ─── Internal Helpers ───────────────────────────────────────────────────────
159
+
160
+ /**
161
+ * Parse LLM response into a ContradictionResult.
162
+ * Accepts: "ADD", "NOOP", "DELETE 123", "DELETE <rowid>"
163
+ * Any parse failure defaults to ADD.
164
+ */
165
+ function parseClassification(raw: string): ContradictionResult {
166
+ const normalized = raw.toUpperCase().trim();
167
+
168
+ if (normalized === "NOOP") {
169
+ return { action: "NOOP" };
170
+ }
171
+
172
+ if (normalized === "ADD") {
173
+ return { action: "ADD" };
174
+ }
175
+
176
+ // Match "DELETE <number>" pattern
177
+ const deleteMatch = normalized.match(/^DELETE\s+(\d+)/);
178
+ if (deleteMatch) {
179
+ const rowid = parseInt(deleteMatch[1], 10);
180
+ if (!isNaN(rowid) && rowid > 0) {
181
+ return { action: "DELETE+ADD", deleteRowid: rowid };
182
+ }
183
+ }
184
+
185
+ // Unparseable — default to ADD
186
+ console.error(
187
+ `[contradiction] unparseable response "${raw}" — defaulting to ADD`,
188
+ );
189
+ return { action: "ADD" };
190
+ }
191
+
192
+ /**
193
+ * Map event type to source name (mirrors realtime.ts getSourceForEvent).
194
+ */
195
+ function getSourceForEvent(event: CaptureEvent): string {
196
+ switch (event.type) {
197
+ case "knowledge":
198
+ return "captures";
199
+ case "teaching":
200
+ return "teachings";
201
+ case "observation":
202
+ return "observations";
203
+ case "insight":
204
+ return "insights";
205
+ case "learning":
206
+ return "learnings";
207
+ case "task":
208
+ return "flux";
209
+ case "note":
210
+ return "captures";
211
+ default:
212
+ return "captures";
213
+ }
214
+ }
package/lib/indexer.ts CHANGED
@@ -40,6 +40,18 @@ export interface IndexerContext {
40
40
 
41
41
  export type IndexerFunction = (ctx: IndexerContext) => Promise<void>;
42
42
 
43
+ /**
44
+ * Sources excluded from --rebuild because the DB is their source of truth.
45
+ * These sources use real-time indexing (indexAndEmbed) with contradiction
46
+ * resolution. Rebuilding from log.jsonl would resurrect purged/superseded
47
+ * entries, breaking the contradiction resolution invariant.
48
+ */
49
+ export const REBUILD_EXCLUDED_SOURCES = new Set([
50
+ "captures",
51
+ "observations",
52
+ "teachings",
53
+ ]);
54
+
43
55
  /**
44
56
  * Check if a path is configured and exists on disk.
45
57
  * Logs a specific reason when the check fails:
@@ -231,6 +243,14 @@ export async function runIndexer(
231
243
  continue;
232
244
  }
233
245
 
246
+ // Skip rebuild-excluded sources — DB is source of truth, not log.jsonl
247
+ if (rebuild && REBUILD_EXCLUDED_SOURCES.has(src)) {
248
+ console.log(
249
+ `${src}: skipped (DB is source of truth — not rebuilt from log)`,
250
+ );
251
+ continue;
252
+ }
253
+
234
254
  console.log(`Indexing ${src}...`);
235
255
 
236
256
  // Clear source if rebuilding
package/lib/purge.ts CHANGED
@@ -10,8 +10,16 @@
10
10
  */
11
11
 
12
12
  import { Database } from "bun:sqlite";
13
- import { existsSync } from "fs";
13
+ import {
14
+ existsSync,
15
+ readFileSync,
16
+ writeFileSync,
17
+ renameSync,
18
+ unlinkSync,
19
+ } from "fs";
20
+ import { join } from "path";
14
21
  import { getDatabasePath, openDatabase } from "./db.js";
22
+ import { getConfig } from "./config.js";
15
23
 
16
24
  // Only these sources can be purged — indexed sources (blogs, commits, etc.) are never purgeable
17
25
  export const PURGEABLE_SOURCES = [
@@ -33,6 +41,7 @@ export interface PurgeMatch {
33
41
  export interface PurgeResult {
34
42
  deleted: number;
35
43
  rowids: number[];
44
+ logEntriesRemoved: number;
36
45
  }
37
46
 
38
47
  /**
@@ -76,14 +85,20 @@ export function findPurgeMatches(
76
85
  }
77
86
 
78
87
  /**
79
- * Delete entries from both FTS5 search table and vec0 embeddings table.
88
+ * Delete entries from FTS5 search table, vec0 embeddings table,
89
+ * and optionally clean matching lines from log.jsonl.
80
90
  *
81
- * FTS5 DELETE: DELETE FROM search WHERE rowid = ?
82
- * vec0 DELETE: DELETE FROM embeddings WHERE doc_id = ?
91
+ * @param rowids - Row IDs to delete from search + embeddings
92
+ * @param matchContents - Content strings from findPurgeMatches for log.jsonl filtering.
93
+ * Optional (default []); when empty, log.jsonl cleanup is skipped.
94
+ * This avoids a breaking change for existing callers.
83
95
  */
84
- export function deleteEntries(rowids: number[]): PurgeResult {
96
+ export function deleteEntries(
97
+ rowids: number[],
98
+ matchContents: string[] = [],
99
+ ): PurgeResult {
85
100
  if (rowids.length === 0) {
86
- return { deleted: 0, rowids: [] };
101
+ return { deleted: 0, rowids: [], logEntriesRemoved: 0 };
87
102
  }
88
103
 
89
104
  // Open DB directly for read-write (matches realtime.ts pattern —
@@ -118,8 +133,66 @@ export function deleteEntries(rowids: number[]): PurgeResult {
118
133
  deleted++;
119
134
  }
120
135
 
121
- return { deleted, rowids };
136
+ // Clean matching lines from log.jsonl (best-effort)
137
+ const logEntriesRemoved = purgeLogEntries(matchContents);
138
+
139
+ return { deleted, rowids, logEntriesRemoved };
122
140
  } finally {
123
141
  db.close();
124
142
  }
125
143
  }
144
+
145
+ /**
146
+ * Remove lines from log.jsonl whose content matches any of the given strings.
147
+ *
148
+ * Uses atomic write: writes filtered content to a temp file, then renames.
149
+ * Matches on event.data.content (the raw capture content), not the assembled
150
+ * search table content — task entries may use assembled content that differs.
151
+ * This is acceptable: the rebuild exclusion (Change 2) is the hard guard;
152
+ * log.jsonl cleanup is best-effort.
153
+ *
154
+ * @param matchContents - Content strings to filter out
155
+ * @returns Number of lines removed
156
+ */
157
+ function purgeLogEntries(matchContents: string[]): number {
158
+ if (matchContents.length === 0) return 0;
159
+
160
+ const logPath = join(getConfig().paths.data, "log.jsonl");
161
+ const tmpPath = logPath + ".tmp";
162
+
163
+ if (!existsSync(logPath)) return 0;
164
+
165
+ try {
166
+ // Clean up stale temp file from a prior crash
167
+ if (existsSync(tmpPath)) {
168
+ unlinkSync(tmpPath);
169
+ }
170
+
171
+ const lines = readFileSync(logPath, "utf-8").split("\n").filter(Boolean);
172
+ const filtered = lines.filter((line) => {
173
+ try {
174
+ const event = JSON.parse(line) as {
175
+ data?: { content?: string; text?: string };
176
+ };
177
+ const content = event.data?.content || event.data?.text || "";
178
+ return !matchContents.some((mc) => content.includes(mc));
179
+ } catch {
180
+ return true; // Keep unparseable lines
181
+ }
182
+ });
183
+
184
+ writeFileSync(
185
+ tmpPath,
186
+ filtered.join("\n") + (filtered.length > 0 ? "\n" : ""),
187
+ "utf-8",
188
+ );
189
+ renameSync(tmpPath, logPath);
190
+
191
+ return lines.length - filtered.length;
192
+ } catch (err) {
193
+ // log.jsonl cleanup is best-effort — never fail the purge
194
+ const message = err instanceof Error ? err.message : String(err);
195
+ console.error(`[purge] log.jsonl cleanup failed (${message})`);
196
+ return 0;
197
+ }
198
+ }
package/lib/realtime.ts CHANGED
@@ -25,6 +25,11 @@ import {
25
25
  } from "./semantic.js";
26
26
  import { hashContent, getCachedEmbedding, cacheEmbedding } from "./cache.js";
27
27
  import type { CaptureEvent } from "./capture.js";
28
+ import {
29
+ isContradictionCheckable,
30
+ findCandidates,
31
+ classifyContradiction,
32
+ } from "./contradiction.js";
28
33
 
29
34
  /**
30
35
  * Index and embed capture events for immediate searchability
@@ -53,20 +58,67 @@ export async function indexAndEmbed(events: CaptureEvent[]): Promise<void> {
53
58
  }
54
59
  db.loadExtension(vecPath);
55
60
 
61
+ // 0. Contradiction detection — filter events before insert
62
+ // For purgeable sources, check if the new event contradicts or
63
+ // duplicates existing entries. NOOP skips the event, DELETE+ADD
64
+ // removes the old entry before inserting the new one.
65
+ const eventsToIndex: CaptureEvent[] = [];
66
+ for (const event of events) {
67
+ const source = getSourceForEvent(event);
68
+
69
+ if (isContradictionCheckable(source)) {
70
+ try {
71
+ const candidates = await findCandidates(event);
72
+ if (candidates.length > 0) {
73
+ const result = await classifyContradiction(event, candidates);
74
+
75
+ if (result.action === "NOOP") {
76
+ const data = event.data as Record<string, unknown>;
77
+ const topic = String(data.topic || "");
78
+ console.error(
79
+ `[contradiction] NOOP: skipped as redundant (topic: ${topic})`,
80
+ );
81
+ continue;
82
+ }
83
+
84
+ if (result.action === "DELETE+ADD" && result.deleteRowid) {
85
+ deleteSearchAndEmbedding(db, result.deleteRowid);
86
+ const data = event.data as Record<string, unknown>;
87
+ const topic = String(data.topic || "");
88
+ console.error(
89
+ `[contradiction] DELETE+ADD: removed rowid ${result.deleteRowid}, topic: ${topic}`,
90
+ );
91
+ }
92
+ // ADD falls through to normal insert
93
+ }
94
+ } catch (err) {
95
+ // Fail open — if contradiction check fails, proceed with ADD
96
+ const message = err instanceof Error ? err.message : String(err);
97
+ console.error(
98
+ `[contradiction] check failed (${message}) — proceeding with ADD`,
99
+ );
100
+ }
101
+ }
102
+
103
+ eventsToIndex.push(event);
104
+ }
105
+
106
+ if (eventsToIndex.length === 0) return;
107
+
56
108
  // 1. Insert into FTS5 and collect doc IDs
57
109
  const docIds: number[] = [];
58
- for (const event of events) {
110
+ for (const event of eventsToIndex) {
59
111
  const docId = insertSearchEntry(db, event);
60
112
  docIds.push(docId);
61
113
  }
62
114
 
63
115
  // 2. Generate embeddings with cache
64
- const contents = events.map((e) => getContentForEmbedding(e));
116
+ const contents = eventsToIndex.map((e) => getContentForEmbedding(e));
65
117
  const embeddings = await embedWithCache(db, contents);
66
118
 
67
119
  // 3. Insert embeddings
68
- for (let i = 0; i < events.length; i++) {
69
- insertEmbedding(db, docIds[i], embeddings[i], events[i]);
120
+ for (let i = 0; i < eventsToIndex.length; i++) {
121
+ insertEmbedding(db, docIds[i], embeddings[i], eventsToIndex[i]);
70
122
  }
71
123
  } finally {
72
124
  db.close();
@@ -104,6 +156,16 @@ function insertSearchEntry(db: Database, event: CaptureEvent): number {
104
156
  return Number(result.lastInsertRowid);
105
157
  }
106
158
 
159
+ /**
160
+ * Delete an entry from both FTS5 search and vec0 embeddings tables.
161
+ * Used by contradiction resolution to remove superseded entries.
162
+ * Reuses the same prepared statement pattern as purge.ts:108-114.
163
+ */
164
+ function deleteSearchAndEmbedding(db: Database, rowid: number): void {
165
+ db.prepare("DELETE FROM search WHERE rowid = ?").run(rowid);
166
+ db.prepare("DELETE FROM embeddings WHERE doc_id = ?").run(rowid);
167
+ }
168
+
107
169
  /**
108
170
  * Map event type to source name used in search table
109
171
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@voidwire/lore",
3
- "version": "1.5.0",
3
+ "version": "1.6.0",
4
4
  "description": "Unified knowledge CLI - Search, list, and capture your indexed knowledge",
5
5
  "type": "module",
6
6
  "main": "./index.ts",