@voidwire/lore 1.4.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.ts +146 -1
- package/index.ts +10 -0
- package/lib/contradiction.ts +214 -0
- package/lib/db.ts +6 -2
- package/lib/indexer.ts +20 -0
- package/lib/purge.ts +198 -0
- package/lib/realtime.ts +66 -4
- package/package.json +1 -1
package/cli.ts
CHANGED
|
@@ -41,6 +41,9 @@ import {
|
|
|
41
41
|
hybridSearch,
|
|
42
42
|
formatBriefSearch,
|
|
43
43
|
hasEmbeddings,
|
|
44
|
+
findPurgeMatches,
|
|
45
|
+
deleteEntries,
|
|
46
|
+
PURGEABLE_SOURCES,
|
|
44
47
|
SOURCES,
|
|
45
48
|
type SearchResult,
|
|
46
49
|
type HybridResult,
|
|
@@ -55,6 +58,7 @@ import {
|
|
|
55
58
|
type ObservationInput,
|
|
56
59
|
type ObservationSubtype,
|
|
57
60
|
type ObservationConfidence,
|
|
61
|
+
type PurgeableSource,
|
|
58
62
|
} from "./index";
|
|
59
63
|
import { isValidLoreType, LORE_TYPES } from "./lib/types";
|
|
60
64
|
import { runIndexer } from "./lib/indexer";
|
|
@@ -103,6 +107,7 @@ const BOOLEAN_FLAGS = new Set([
|
|
|
103
107
|
"brief",
|
|
104
108
|
"list",
|
|
105
109
|
"rebuild",
|
|
110
|
+
"force",
|
|
106
111
|
]);
|
|
107
112
|
|
|
108
113
|
function getPositionalArgs(args: string[]): string[] {
|
|
@@ -611,6 +616,142 @@ Examples:
|
|
|
611
616
|
process.exit(0);
|
|
612
617
|
}
|
|
613
618
|
|
|
619
|
+
// ============================================================================
|
|
620
|
+
// Purge Command
|
|
621
|
+
// ============================================================================
|
|
622
|
+
|
|
623
|
+
async function handlePurge(args: string[]): Promise<void> {
|
|
624
|
+
if (hasFlag(args, "help")) {
|
|
625
|
+
showPurgeHelp();
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
const parsed = parseArgs(args);
|
|
629
|
+
const matchQuery = parsed.get("match");
|
|
630
|
+
|
|
631
|
+
if (!matchQuery) {
|
|
632
|
+
fail('Missing --match flag. Use: lore purge --match "content to find"');
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
const force = hasFlag(args, "force");
|
|
636
|
+
const sourceFilter = parsed.get("source") as PurgeableSource | undefined;
|
|
637
|
+
|
|
638
|
+
// Validate source filter if provided
|
|
639
|
+
if (
|
|
640
|
+
sourceFilter &&
|
|
641
|
+
!PURGEABLE_SOURCES.includes(sourceFilter as PurgeableSource)
|
|
642
|
+
) {
|
|
643
|
+
fail(
|
|
644
|
+
`Invalid source: ${sourceFilter}. Purgeable sources: ${PURGEABLE_SOURCES.join(", ")}`,
|
|
645
|
+
);
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
try {
|
|
649
|
+
const matches = findPurgeMatches(matchQuery, { source: sourceFilter });
|
|
650
|
+
|
|
651
|
+
if (matches.length === 0) {
|
|
652
|
+
output({
|
|
653
|
+
success: true,
|
|
654
|
+
matches: 0,
|
|
655
|
+
deleted: 0,
|
|
656
|
+
message: "No matching entries found",
|
|
657
|
+
});
|
|
658
|
+
console.error("No matching entries found.");
|
|
659
|
+
process.exit(0);
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
// Display matches
|
|
663
|
+
console.error(`\nFound ${matches.length} matching entries:\n`);
|
|
664
|
+
for (const m of matches) {
|
|
665
|
+
const preview =
|
|
666
|
+
m.content.length > 100 ? m.content.slice(0, 100) + "..." : m.content;
|
|
667
|
+
console.error(` [${m.rowid}] ${m.source}/${m.type}: ${preview}`);
|
|
668
|
+
}
|
|
669
|
+
console.error("");
|
|
670
|
+
|
|
671
|
+
// Confirm unless --force
|
|
672
|
+
if (!force) {
|
|
673
|
+
process.stderr.write(`Delete ${matches.length} entries? (y/N) `);
|
|
674
|
+
|
|
675
|
+
// Read confirmation from stdin
|
|
676
|
+
const buf = Buffer.alloc(16);
|
|
677
|
+
const fd = require("fs").openSync("/dev/tty", "r");
|
|
678
|
+
const bytesRead = require("fs").readSync(fd, buf, 0, 16, null);
|
|
679
|
+
require("fs").closeSync(fd);
|
|
680
|
+
const answer = buf.toString("utf8", 0, bytesRead).trim().toLowerCase();
|
|
681
|
+
|
|
682
|
+
if (answer !== "y" && answer !== "yes") {
|
|
683
|
+
output({
|
|
684
|
+
success: true,
|
|
685
|
+
matches: matches.length,
|
|
686
|
+
deleted: 0,
|
|
687
|
+
message: "Aborted",
|
|
688
|
+
});
|
|
689
|
+
console.error("Aborted.");
|
|
690
|
+
process.exit(0);
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
const result = deleteEntries(
|
|
695
|
+
matches.map((m) => m.rowid),
|
|
696
|
+
matches.map((m) => m.content),
|
|
697
|
+
);
|
|
698
|
+
|
|
699
|
+
output({
|
|
700
|
+
success: true,
|
|
701
|
+
matches: matches.length,
|
|
702
|
+
deleted: result.deleted,
|
|
703
|
+
rowids: result.rowids,
|
|
704
|
+
logEntriesRemoved: result.logEntriesRemoved,
|
|
705
|
+
});
|
|
706
|
+
console.error(
|
|
707
|
+
`Purged ${result.deleted} entries from search + embeddings` +
|
|
708
|
+
(result.logEntriesRemoved > 0
|
|
709
|
+
? ` + ${result.logEntriesRemoved} from log.jsonl.`
|
|
710
|
+
: `.`),
|
|
711
|
+
);
|
|
712
|
+
process.exit(0);
|
|
713
|
+
} catch (error) {
|
|
714
|
+
const message = error instanceof Error ? error.message : "Unknown error";
|
|
715
|
+
fail(message, 2);
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
function showPurgeHelp(): void {
|
|
720
|
+
console.log(`
|
|
721
|
+
lore purge - Delete entries from purgeable sources
|
|
722
|
+
|
|
723
|
+
Usage:
|
|
724
|
+
lore purge --match "content string" Find and delete matching entries
|
|
725
|
+
lore purge --match "content" --force Delete without confirmation
|
|
726
|
+
lore purge --match "content" --source captures Filter by source
|
|
727
|
+
|
|
728
|
+
Purgeable Sources:
|
|
729
|
+
captures Quick captures (knowledge, decisions, gotchas, etc.)
|
|
730
|
+
observations Model observations about user patterns
|
|
731
|
+
teachings Teaching moments
|
|
732
|
+
|
|
733
|
+
Non-purgeable sources (blogs, commits, obsidian, etc.) are never affected.
|
|
734
|
+
|
|
735
|
+
Options:
|
|
736
|
+
--match <text> Content to search for (required, uses LIKE matching)
|
|
737
|
+
--source <src> Limit to specific purgeable source
|
|
738
|
+
--force Skip confirmation prompt
|
|
739
|
+
--help Show this help
|
|
740
|
+
|
|
741
|
+
Process:
|
|
742
|
+
1. Searches purgeable sources for entries containing the match text
|
|
743
|
+
2. Displays matches with rowid, source, type, and content preview
|
|
744
|
+
3. Asks for confirmation (unless --force)
|
|
745
|
+
4. Deletes from both FTS5 search table and vec0 embeddings table
|
|
746
|
+
|
|
747
|
+
Examples:
|
|
748
|
+
lore purge --match "install.sh overwrites config.toml"
|
|
749
|
+
lore purge --match "bracket prefix" --force
|
|
750
|
+
lore purge --match "stale gotcha" --source captures
|
|
751
|
+
`);
|
|
752
|
+
process.exit(0);
|
|
753
|
+
}
|
|
754
|
+
|
|
614
755
|
// ============================================================================
|
|
615
756
|
// Capture Command
|
|
616
757
|
// ============================================================================
|
|
@@ -916,6 +1057,7 @@ Usage:
|
|
|
916
1057
|
lore about <project> Aggregate view of project knowledge
|
|
917
1058
|
lore about <project> --brief Compact project summary
|
|
918
1059
|
lore capture task|knowledge|note|teaching Capture knowledge
|
|
1060
|
+
lore purge --match "content" Delete matching entries
|
|
919
1061
|
lore index [source] [--rebuild] [--list] Run indexers
|
|
920
1062
|
|
|
921
1063
|
Search Options:
|
|
@@ -1303,9 +1445,12 @@ async function main(): Promise<void> {
|
|
|
1303
1445
|
case "index":
|
|
1304
1446
|
await handleIndex(commandArgs);
|
|
1305
1447
|
break;
|
|
1448
|
+
case "purge":
|
|
1449
|
+
await handlePurge(commandArgs);
|
|
1450
|
+
break;
|
|
1306
1451
|
default:
|
|
1307
1452
|
fail(
|
|
1308
|
-
`Unknown command: ${command}. Use: search, list, sources, info, projects, about, capture, or index`,
|
|
1453
|
+
`Unknown command: ${command}. Use: search, list, sources, info, projects, about, capture, purge, or index`,
|
|
1309
1454
|
);
|
|
1310
1455
|
}
|
|
1311
1456
|
}
|
package/index.ts
CHANGED
|
@@ -102,3 +102,13 @@ export { LoreType, LORE_TYPES, isValidLoreType } from "./lib/types";
|
|
|
102
102
|
|
|
103
103
|
// Real-time indexing
|
|
104
104
|
export { indexAndEmbed } from "./lib/realtime";
|
|
105
|
+
|
|
106
|
+
// Purge
|
|
107
|
+
export {
|
|
108
|
+
findPurgeMatches,
|
|
109
|
+
deleteEntries,
|
|
110
|
+
PURGEABLE_SOURCES,
|
|
111
|
+
type PurgeableSource,
|
|
112
|
+
type PurgeMatch,
|
|
113
|
+
type PurgeResult,
|
|
114
|
+
} from "./lib/purge";
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/contradiction.ts - Write-time contradiction detection
|
|
3
|
+
*
|
|
4
|
+
* Uses hybrid search + local MLX model to classify new captures against
|
|
5
|
+
* existing entries. Determines whether a new entry should ADD (new info),
|
|
6
|
+
* NOOP (redundant), or DELETE+ADD (supersedes existing).
|
|
7
|
+
*
|
|
8
|
+
* Fail-open design: any error defaults to ADD — never blocks a capture.
|
|
9
|
+
*
|
|
10
|
+
* Usage:
|
|
11
|
+
* const candidates = await findCandidates(event);
|
|
12
|
+
* const result = await classifyContradiction(event, candidates);
|
|
13
|
+
* // result.action: "ADD" | "NOOP" | "DELETE+ADD"
|
|
14
|
+
* // result.deleteRowid: number (only set for DELETE+ADD)
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { hybridSearch, type HybridResult } from "./semantic.js";
|
|
18
|
+
import { PURGEABLE_SOURCES } from "./purge.js";
|
|
19
|
+
import type { CaptureEvent } from "./capture.js";
|
|
20
|
+
|
|
21
|
+
// ─── Types ──────────────────────────────────────────────────────────────────
|
|
22
|
+
|
|
23
|
+
export type ContradictionAction = "ADD" | "DELETE+ADD" | "NOOP";
|
|
24
|
+
|
|
25
|
+
export interface ContradictionResult {
|
|
26
|
+
action: ContradictionAction;
|
|
27
|
+
deleteRowid?: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// ─── Constants ──────────────────────────────────────────────────────────────
|
|
31
|
+
|
|
32
|
+
const MLX_URL = "http://localhost:8080/v1/chat/completions";
|
|
33
|
+
const MLX_MODEL = "mlx-community/Qwen2.5-7B-Instruct-4bit";
|
|
34
|
+
const MLX_TIMEOUT_MS = 1500;
|
|
35
|
+
|
|
36
|
+
const CANDIDATE_LIMIT = 5;
|
|
37
|
+
|
|
38
|
+
// Sources eligible for contradiction checking (same as purgeable)
|
|
39
|
+
const CONTRADICTION_SOURCES = new Set<string>(PURGEABLE_SOURCES);
|
|
40
|
+
|
|
41
|
+
// ─── Public API ─────────────────────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Check if an event's source is eligible for contradiction detection.
|
|
45
|
+
*/
|
|
46
|
+
export function isContradictionCheckable(source: string): boolean {
|
|
47
|
+
return CONTRADICTION_SOURCES.has(source);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Find existing entries that may contradict or duplicate the new event.
|
|
52
|
+
* Uses hybrid search scoped by source + topic for precision.
|
|
53
|
+
*
|
|
54
|
+
* Opens its own read connection (hybridSearch uses openDatabase(true)).
|
|
55
|
+
* Safe in WAL mode — concurrent reads with the write connection in indexAndEmbed.
|
|
56
|
+
*
|
|
57
|
+
* Note: entries inserted in the current indexAndEmbed batch are not visible
|
|
58
|
+
* to this read connection (uncommitted). This is acceptable — same-batch
|
|
59
|
+
* entries are unlikely to contradict each other.
|
|
60
|
+
*/
|
|
61
|
+
export async function findCandidates(
|
|
62
|
+
event: CaptureEvent,
|
|
63
|
+
): Promise<HybridResult[]> {
|
|
64
|
+
const data = event.data as Record<string, unknown>;
|
|
65
|
+
const content = String(data.content || data.text || "");
|
|
66
|
+
const topic = String(data.topic || "");
|
|
67
|
+
const source = getSourceForEvent(event);
|
|
68
|
+
|
|
69
|
+
if (!content) return [];
|
|
70
|
+
|
|
71
|
+
return hybridSearch(content, {
|
|
72
|
+
source,
|
|
73
|
+
project: topic || undefined,
|
|
74
|
+
limit: CANDIDATE_LIMIT,
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Classify a new event against existing candidates using local MLX model.
|
|
80
|
+
*
|
|
81
|
+
* LLM returns one of:
|
|
82
|
+
* ADD — new information not covered by candidates
|
|
83
|
+
* NOOP — duplicate/redundant (already captured)
|
|
84
|
+
* DELETE <rowid> — new info supersedes a specific candidate
|
|
85
|
+
*
|
|
86
|
+
* Fail-open: any error (timeout, parse failure, model down) defaults to ADD.
|
|
87
|
+
*/
|
|
88
|
+
export async function classifyContradiction(
|
|
89
|
+
event: CaptureEvent,
|
|
90
|
+
candidates: HybridResult[],
|
|
91
|
+
): Promise<ContradictionResult> {
|
|
92
|
+
if (candidates.length === 0) {
|
|
93
|
+
return { action: "ADD" };
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const data = event.data as Record<string, unknown>;
|
|
97
|
+
const content = String(data.content || data.text || "");
|
|
98
|
+
const topic = String(data.topic || "");
|
|
99
|
+
const source = getSourceForEvent(event);
|
|
100
|
+
|
|
101
|
+
const candidateLines = candidates
|
|
102
|
+
.map((c) => `[rowid: ${c.rowid}] ${c.content}`)
|
|
103
|
+
.join("\n");
|
|
104
|
+
|
|
105
|
+
const systemPrompt = `You classify knowledge contradictions. Reply with exactly one word: ADD, NOOP, or DELETE.
|
|
106
|
+
ADD: new information not covered by candidates
|
|
107
|
+
NOOP: duplicate or redundant (already captured)
|
|
108
|
+
DELETE: new information supersedes a candidate (also provide rowid)`;
|
|
109
|
+
|
|
110
|
+
const userPrompt = `New entry (source: ${source}, topic: ${topic}):
|
|
111
|
+
${content}
|
|
112
|
+
|
|
113
|
+
Existing entries:
|
|
114
|
+
${candidateLines}
|
|
115
|
+
|
|
116
|
+
If DELETE, reply: DELETE <rowid>
|
|
117
|
+
Otherwise reply: ADD or NOOP`;
|
|
118
|
+
|
|
119
|
+
try {
|
|
120
|
+
const resp = await fetch(MLX_URL, {
|
|
121
|
+
method: "POST",
|
|
122
|
+
headers: { "Content-Type": "application/json" },
|
|
123
|
+
body: JSON.stringify({
|
|
124
|
+
model: MLX_MODEL,
|
|
125
|
+
messages: [
|
|
126
|
+
{ role: "system", content: systemPrompt },
|
|
127
|
+
{ role: "user", content: userPrompt },
|
|
128
|
+
],
|
|
129
|
+
max_tokens: 20,
|
|
130
|
+
temperature: 0,
|
|
131
|
+
}),
|
|
132
|
+
signal: AbortSignal.timeout(MLX_TIMEOUT_MS),
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
if (!resp.ok) {
|
|
136
|
+
console.error(
|
|
137
|
+
`[contradiction] MLX returned ${resp.status} — defaulting to ADD`,
|
|
138
|
+
);
|
|
139
|
+
return { action: "ADD" };
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const json = (await resp.json()) as {
|
|
143
|
+
choices?: Array<{ message?: { content?: string } }>;
|
|
144
|
+
};
|
|
145
|
+
|
|
146
|
+
const raw = json.choices?.[0]?.message?.content?.trim() || "";
|
|
147
|
+
return parseClassification(raw);
|
|
148
|
+
} catch (err) {
|
|
149
|
+
// Timeout, network error, or model unavailable — fail open
|
|
150
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
151
|
+
console.error(
|
|
152
|
+
`[contradiction] classification failed (${message}) — defaulting to ADD`,
|
|
153
|
+
);
|
|
154
|
+
return { action: "ADD" };
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// ─── Internal Helpers ───────────────────────────────────────────────────────
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Parse LLM response into a ContradictionResult.
|
|
162
|
+
* Accepts: "ADD", "NOOP", "DELETE 123", "DELETE <rowid>"
|
|
163
|
+
* Any parse failure defaults to ADD.
|
|
164
|
+
*/
|
|
165
|
+
function parseClassification(raw: string): ContradictionResult {
|
|
166
|
+
const normalized = raw.toUpperCase().trim();
|
|
167
|
+
|
|
168
|
+
if (normalized === "NOOP") {
|
|
169
|
+
return { action: "NOOP" };
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
if (normalized === "ADD") {
|
|
173
|
+
return { action: "ADD" };
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Match "DELETE <number>" pattern
|
|
177
|
+
const deleteMatch = normalized.match(/^DELETE\s+(\d+)/);
|
|
178
|
+
if (deleteMatch) {
|
|
179
|
+
const rowid = parseInt(deleteMatch[1], 10);
|
|
180
|
+
if (!isNaN(rowid) && rowid > 0) {
|
|
181
|
+
return { action: "DELETE+ADD", deleteRowid: rowid };
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Unparseable — default to ADD
|
|
186
|
+
console.error(
|
|
187
|
+
`[contradiction] unparseable response "${raw}" — defaulting to ADD`,
|
|
188
|
+
);
|
|
189
|
+
return { action: "ADD" };
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Map event type to source name (mirrors realtime.ts getSourceForEvent).
|
|
194
|
+
*/
|
|
195
|
+
function getSourceForEvent(event: CaptureEvent): string {
|
|
196
|
+
switch (event.type) {
|
|
197
|
+
case "knowledge":
|
|
198
|
+
return "captures";
|
|
199
|
+
case "teaching":
|
|
200
|
+
return "teachings";
|
|
201
|
+
case "observation":
|
|
202
|
+
return "observations";
|
|
203
|
+
case "insight":
|
|
204
|
+
return "insights";
|
|
205
|
+
case "learning":
|
|
206
|
+
return "learnings";
|
|
207
|
+
case "task":
|
|
208
|
+
return "flux";
|
|
209
|
+
case "note":
|
|
210
|
+
return "captures";
|
|
211
|
+
default:
|
|
212
|
+
return "captures";
|
|
213
|
+
}
|
|
214
|
+
}
|
package/lib/db.ts
CHANGED
|
@@ -45,7 +45,9 @@ export function openDatabase(readonly = false): Database {
|
|
|
45
45
|
throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
|
|
46
46
|
}
|
|
47
47
|
|
|
48
|
-
const db =
|
|
48
|
+
const db = readonly
|
|
49
|
+
? new Database(dbPath, { readonly: true })
|
|
50
|
+
: new Database(dbPath);
|
|
49
51
|
|
|
50
52
|
// Load sqlite-vec extension
|
|
51
53
|
const vecPath = process.env.SQLITE_VEC_PATH;
|
|
@@ -71,5 +73,7 @@ export function openDatabaseBasic(readonly = false): Database {
|
|
|
71
73
|
throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
|
|
72
74
|
}
|
|
73
75
|
|
|
74
|
-
return
|
|
76
|
+
return readonly
|
|
77
|
+
? new Database(dbPath, { readonly: true })
|
|
78
|
+
: new Database(dbPath);
|
|
75
79
|
}
|
package/lib/indexer.ts
CHANGED
|
@@ -40,6 +40,18 @@ export interface IndexerContext {
|
|
|
40
40
|
|
|
41
41
|
export type IndexerFunction = (ctx: IndexerContext) => Promise<void>;
|
|
42
42
|
|
|
43
|
+
/**
|
|
44
|
+
* Sources excluded from --rebuild because the DB is their source of truth.
|
|
45
|
+
* These sources use real-time indexing (indexAndEmbed) with contradiction
|
|
46
|
+
* resolution. Rebuilding from log.jsonl would resurrect purged/superseded
|
|
47
|
+
* entries, breaking the contradiction resolution invariant.
|
|
48
|
+
*/
|
|
49
|
+
export const REBUILD_EXCLUDED_SOURCES = new Set([
|
|
50
|
+
"captures",
|
|
51
|
+
"observations",
|
|
52
|
+
"teachings",
|
|
53
|
+
]);
|
|
54
|
+
|
|
43
55
|
/**
|
|
44
56
|
* Check if a path is configured and exists on disk.
|
|
45
57
|
* Logs a specific reason when the check fails:
|
|
@@ -231,6 +243,14 @@ export async function runIndexer(
|
|
|
231
243
|
continue;
|
|
232
244
|
}
|
|
233
245
|
|
|
246
|
+
// Skip rebuild-excluded sources — DB is source of truth, not log.jsonl
|
|
247
|
+
if (rebuild && REBUILD_EXCLUDED_SOURCES.has(src)) {
|
|
248
|
+
console.log(
|
|
249
|
+
`${src}: skipped (DB is source of truth — not rebuilt from log)`,
|
|
250
|
+
);
|
|
251
|
+
continue;
|
|
252
|
+
}
|
|
253
|
+
|
|
234
254
|
console.log(`Indexing ${src}...`);
|
|
235
255
|
|
|
236
256
|
// Clear source if rebuilding
|
package/lib/purge.ts
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/purge.ts - Purge entries from lore database
|
|
3
|
+
*
|
|
4
|
+
* Search and delete entries from purgeable sources (captures, observations, teachings).
|
|
5
|
+
* Deletes from both FTS5 search table and vec0 embeddings table.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* const matches = findPurgeMatches(query);
|
|
9
|
+
* deleteEntries(matches.map(m => m.rowid));
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { Database } from "bun:sqlite";
|
|
13
|
+
import {
|
|
14
|
+
existsSync,
|
|
15
|
+
readFileSync,
|
|
16
|
+
writeFileSync,
|
|
17
|
+
renameSync,
|
|
18
|
+
unlinkSync,
|
|
19
|
+
} from "fs";
|
|
20
|
+
import { join } from "path";
|
|
21
|
+
import { getDatabasePath, openDatabase } from "./db.js";
|
|
22
|
+
import { getConfig } from "./config.js";
|
|
23
|
+
|
|
24
|
+
// Only these sources can be purged — indexed sources (blogs, commits, etc.) are never purgeable
|
|
25
|
+
export const PURGEABLE_SOURCES = [
|
|
26
|
+
"captures",
|
|
27
|
+
"observations",
|
|
28
|
+
"teachings",
|
|
29
|
+
] as const;
|
|
30
|
+
|
|
31
|
+
export type PurgeableSource = (typeof PURGEABLE_SOURCES)[number];
|
|
32
|
+
|
|
33
|
+
export interface PurgeMatch {
|
|
34
|
+
rowid: number;
|
|
35
|
+
source: string;
|
|
36
|
+
title: string;
|
|
37
|
+
content: string;
|
|
38
|
+
type: string;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export interface PurgeResult {
|
|
42
|
+
deleted: number;
|
|
43
|
+
rowids: number[];
|
|
44
|
+
logEntriesRemoved: number;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Find entries matching query in purgeable sources using LIKE.
|
|
49
|
+
*
|
|
50
|
+
* Uses SQL LIKE instead of FTS5 MATCH because content strings
|
|
51
|
+
* often contain dots, dashes, and special characters that break
|
|
52
|
+
* FTS5 query syntax.
|
|
53
|
+
*/
|
|
54
|
+
export function findPurgeMatches(
|
|
55
|
+
query: string,
|
|
56
|
+
options: { source?: PurgeableSource } = {},
|
|
57
|
+
): PurgeMatch[] {
|
|
58
|
+
const db = openDatabase(true);
|
|
59
|
+
|
|
60
|
+
try {
|
|
61
|
+
const conditions: string[] = ["content LIKE ?"];
|
|
62
|
+
const params: (string | number)[] = [`%${query}%`];
|
|
63
|
+
|
|
64
|
+
if (options.source) {
|
|
65
|
+
conditions.push("source = ?");
|
|
66
|
+
params.push(options.source);
|
|
67
|
+
} else {
|
|
68
|
+
const placeholders = PURGEABLE_SOURCES.map(() => "?").join(", ");
|
|
69
|
+
conditions.push(`source IN (${placeholders})`);
|
|
70
|
+
params.push(...PURGEABLE_SOURCES);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const sql = `
|
|
74
|
+
SELECT rowid, source, title, content, type
|
|
75
|
+
FROM search
|
|
76
|
+
WHERE ${conditions.join(" AND ")}
|
|
77
|
+
ORDER BY rowid DESC
|
|
78
|
+
`;
|
|
79
|
+
|
|
80
|
+
const stmt = db.prepare(sql);
|
|
81
|
+
return stmt.all(...params) as PurgeMatch[];
|
|
82
|
+
} finally {
|
|
83
|
+
db.close();
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Delete entries from FTS5 search table, vec0 embeddings table,
|
|
89
|
+
* and optionally clean matching lines from log.jsonl.
|
|
90
|
+
*
|
|
91
|
+
* @param rowids - Row IDs to delete from search + embeddings
|
|
92
|
+
* @param matchContents - Content strings from findPurgeMatches for log.jsonl filtering.
|
|
93
|
+
* Optional (default []); when empty, log.jsonl cleanup is skipped.
|
|
94
|
+
* This avoids a breaking change for existing callers.
|
|
95
|
+
*/
|
|
96
|
+
export function deleteEntries(
|
|
97
|
+
rowids: number[],
|
|
98
|
+
matchContents: string[] = [],
|
|
99
|
+
): PurgeResult {
|
|
100
|
+
if (rowids.length === 0) {
|
|
101
|
+
return { deleted: 0, rowids: [], logEntriesRemoved: 0 };
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Open DB directly for read-write (matches realtime.ts pattern —
|
|
105
|
+
// openDatabase(false) triggers SQLITE_MISUSE with custom_sqlite)
|
|
106
|
+
const dbPath = getDatabasePath();
|
|
107
|
+
if (!existsSync(dbPath)) {
|
|
108
|
+
throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const db = new Database(dbPath);
|
|
112
|
+
|
|
113
|
+
try {
|
|
114
|
+
// Load sqlite-vec extension for embeddings table access
|
|
115
|
+
const vecPath = process.env.SQLITE_VEC_PATH;
|
|
116
|
+
if (!vecPath) {
|
|
117
|
+
throw new Error(
|
|
118
|
+
'SQLITE_VEC_PATH not set. Get path with: python3 -c "import sqlite_vec; print(sqlite_vec.loadable_path())"',
|
|
119
|
+
);
|
|
120
|
+
}
|
|
121
|
+
db.loadExtension(vecPath);
|
|
122
|
+
|
|
123
|
+
const deleteSearch = db.prepare("DELETE FROM search WHERE rowid = ?");
|
|
124
|
+
const deleteEmbedding = db.prepare(
|
|
125
|
+
"DELETE FROM embeddings WHERE doc_id = ?",
|
|
126
|
+
);
|
|
127
|
+
|
|
128
|
+
let deleted = 0;
|
|
129
|
+
|
|
130
|
+
for (const rowid of rowids) {
|
|
131
|
+
deleteSearch.run(rowid);
|
|
132
|
+
deleteEmbedding.run(rowid);
|
|
133
|
+
deleted++;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Clean matching lines from log.jsonl (best-effort)
|
|
137
|
+
const logEntriesRemoved = purgeLogEntries(matchContents);
|
|
138
|
+
|
|
139
|
+
return { deleted, rowids, logEntriesRemoved };
|
|
140
|
+
} finally {
|
|
141
|
+
db.close();
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Remove lines from log.jsonl whose content matches any of the given strings.
|
|
147
|
+
*
|
|
148
|
+
* Uses atomic write: writes filtered content to a temp file, then renames.
|
|
149
|
+
* Matches on event.data.content (the raw capture content), not the assembled
|
|
150
|
+
* search table content — task entries may use assembled content that differs.
|
|
151
|
+
* This is acceptable: the rebuild exclusion (Change 2) is the hard guard;
|
|
152
|
+
* log.jsonl cleanup is best-effort.
|
|
153
|
+
*
|
|
154
|
+
* @param matchContents - Content strings to filter out
|
|
155
|
+
* @returns Number of lines removed
|
|
156
|
+
*/
|
|
157
|
+
function purgeLogEntries(matchContents: string[]): number {
|
|
158
|
+
if (matchContents.length === 0) return 0;
|
|
159
|
+
|
|
160
|
+
const logPath = join(getConfig().paths.data, "log.jsonl");
|
|
161
|
+
const tmpPath = logPath + ".tmp";
|
|
162
|
+
|
|
163
|
+
if (!existsSync(logPath)) return 0;
|
|
164
|
+
|
|
165
|
+
try {
|
|
166
|
+
// Clean up stale temp file from a prior crash
|
|
167
|
+
if (existsSync(tmpPath)) {
|
|
168
|
+
unlinkSync(tmpPath);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
const lines = readFileSync(logPath, "utf-8").split("\n").filter(Boolean);
|
|
172
|
+
const filtered = lines.filter((line) => {
|
|
173
|
+
try {
|
|
174
|
+
const event = JSON.parse(line) as {
|
|
175
|
+
data?: { content?: string; text?: string };
|
|
176
|
+
};
|
|
177
|
+
const content = event.data?.content || event.data?.text || "";
|
|
178
|
+
return !matchContents.some((mc) => content.includes(mc));
|
|
179
|
+
} catch {
|
|
180
|
+
return true; // Keep unparseable lines
|
|
181
|
+
}
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
writeFileSync(
|
|
185
|
+
tmpPath,
|
|
186
|
+
filtered.join("\n") + (filtered.length > 0 ? "\n" : ""),
|
|
187
|
+
"utf-8",
|
|
188
|
+
);
|
|
189
|
+
renameSync(tmpPath, logPath);
|
|
190
|
+
|
|
191
|
+
return lines.length - filtered.length;
|
|
192
|
+
} catch (err) {
|
|
193
|
+
// log.jsonl cleanup is best-effort — never fail the purge
|
|
194
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
195
|
+
console.error(`[purge] log.jsonl cleanup failed (${message})`);
|
|
196
|
+
return 0;
|
|
197
|
+
}
|
|
198
|
+
}
|
package/lib/realtime.ts
CHANGED
|
@@ -25,6 +25,11 @@ import {
|
|
|
25
25
|
} from "./semantic.js";
|
|
26
26
|
import { hashContent, getCachedEmbedding, cacheEmbedding } from "./cache.js";
|
|
27
27
|
import type { CaptureEvent } from "./capture.js";
|
|
28
|
+
import {
|
|
29
|
+
isContradictionCheckable,
|
|
30
|
+
findCandidates,
|
|
31
|
+
classifyContradiction,
|
|
32
|
+
} from "./contradiction.js";
|
|
28
33
|
|
|
29
34
|
/**
|
|
30
35
|
* Index and embed capture events for immediate searchability
|
|
@@ -53,20 +58,67 @@ export async function indexAndEmbed(events: CaptureEvent[]): Promise<void> {
|
|
|
53
58
|
}
|
|
54
59
|
db.loadExtension(vecPath);
|
|
55
60
|
|
|
61
|
+
// 0. Contradiction detection — filter events before insert
|
|
62
|
+
// For purgeable sources, check if the new event contradicts or
|
|
63
|
+
// duplicates existing entries. NOOP skips the event, DELETE+ADD
|
|
64
|
+
// removes the old entry before inserting the new one.
|
|
65
|
+
const eventsToIndex: CaptureEvent[] = [];
|
|
66
|
+
for (const event of events) {
|
|
67
|
+
const source = getSourceForEvent(event);
|
|
68
|
+
|
|
69
|
+
if (isContradictionCheckable(source)) {
|
|
70
|
+
try {
|
|
71
|
+
const candidates = await findCandidates(event);
|
|
72
|
+
if (candidates.length > 0) {
|
|
73
|
+
const result = await classifyContradiction(event, candidates);
|
|
74
|
+
|
|
75
|
+
if (result.action === "NOOP") {
|
|
76
|
+
const data = event.data as Record<string, unknown>;
|
|
77
|
+
const topic = String(data.topic || "");
|
|
78
|
+
console.error(
|
|
79
|
+
`[contradiction] NOOP: skipped as redundant (topic: ${topic})`,
|
|
80
|
+
);
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (result.action === "DELETE+ADD" && result.deleteRowid) {
|
|
85
|
+
deleteSearchAndEmbedding(db, result.deleteRowid);
|
|
86
|
+
const data = event.data as Record<string, unknown>;
|
|
87
|
+
const topic = String(data.topic || "");
|
|
88
|
+
console.error(
|
|
89
|
+
`[contradiction] DELETE+ADD: removed rowid ${result.deleteRowid}, topic: ${topic}`,
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
// ADD falls through to normal insert
|
|
93
|
+
}
|
|
94
|
+
} catch (err) {
|
|
95
|
+
// Fail open — if contradiction check fails, proceed with ADD
|
|
96
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
97
|
+
console.error(
|
|
98
|
+
`[contradiction] check failed (${message}) — proceeding with ADD`,
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
eventsToIndex.push(event);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
if (eventsToIndex.length === 0) return;
|
|
107
|
+
|
|
56
108
|
// 1. Insert into FTS5 and collect doc IDs
|
|
57
109
|
const docIds: number[] = [];
|
|
58
|
-
for (const event of
|
|
110
|
+
for (const event of eventsToIndex) {
|
|
59
111
|
const docId = insertSearchEntry(db, event);
|
|
60
112
|
docIds.push(docId);
|
|
61
113
|
}
|
|
62
114
|
|
|
63
115
|
// 2. Generate embeddings with cache
|
|
64
|
-
const contents =
|
|
116
|
+
const contents = eventsToIndex.map((e) => getContentForEmbedding(e));
|
|
65
117
|
const embeddings = await embedWithCache(db, contents);
|
|
66
118
|
|
|
67
119
|
// 3. Insert embeddings
|
|
68
|
-
for (let i = 0; i <
|
|
69
|
-
insertEmbedding(db, docIds[i], embeddings[i],
|
|
120
|
+
for (let i = 0; i < eventsToIndex.length; i++) {
|
|
121
|
+
insertEmbedding(db, docIds[i], embeddings[i], eventsToIndex[i]);
|
|
70
122
|
}
|
|
71
123
|
} finally {
|
|
72
124
|
db.close();
|
|
@@ -104,6 +156,16 @@ function insertSearchEntry(db: Database, event: CaptureEvent): number {
|
|
|
104
156
|
return Number(result.lastInsertRowid);
|
|
105
157
|
}
|
|
106
158
|
|
|
159
|
+
/**
|
|
160
|
+
* Delete an entry from both FTS5 search and vec0 embeddings tables.
|
|
161
|
+
* Used by contradiction resolution to remove superseded entries.
|
|
162
|
+
* Reuses the same prepared statement pattern as purge.ts:108-114.
|
|
163
|
+
*/
|
|
164
|
+
function deleteSearchAndEmbedding(db: Database, rowid: number): void {
|
|
165
|
+
db.prepare("DELETE FROM search WHERE rowid = ?").run(rowid);
|
|
166
|
+
db.prepare("DELETE FROM embeddings WHERE doc_id = ?").run(rowid);
|
|
167
|
+
}
|
|
168
|
+
|
|
107
169
|
/**
|
|
108
170
|
* Map event type to source name used in search table
|
|
109
171
|
*/
|