@voidwire/lore 1.5.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.ts +42 -7
- package/index.ts +1 -0
- package/lib/contradiction.ts +213 -0
- package/lib/indexer.ts +20 -0
- package/lib/purge.ts +80 -7
- package/lib/realtime.ts +73 -6
- package/package.json +1 -1
package/cli.ts
CHANGED
|
@@ -59,6 +59,7 @@ import {
|
|
|
59
59
|
type ObservationSubtype,
|
|
60
60
|
type ObservationConfidence,
|
|
61
61
|
type PurgeableSource,
|
|
62
|
+
type ContradictionDecision,
|
|
62
63
|
} from "./index";
|
|
63
64
|
import { isValidLoreType, LORE_TYPES } from "./lib/types";
|
|
64
65
|
import { runIndexer } from "./lib/indexer";
|
|
@@ -164,6 +165,26 @@ function fail(error: string, code: number = 1): never {
|
|
|
164
165
|
process.exit(code);
|
|
165
166
|
}
|
|
166
167
|
|
|
168
|
+
/**
|
|
169
|
+
* Log contradiction decisions to stderr (CLI context).
|
|
170
|
+
* Only logs non-ADD actions — ADD is the default/expected path.
|
|
171
|
+
*/
|
|
172
|
+
function logContradictionDecisions(decisions: ContradictionDecision[]): void {
|
|
173
|
+
for (const d of decisions) {
|
|
174
|
+
if (d.action === "NOOP") {
|
|
175
|
+
console.error(
|
|
176
|
+
`[contradiction] NOOP: skipped as redundant (topic: ${d.topic})`,
|
|
177
|
+
);
|
|
178
|
+
} else if (d.action === "DELETE+ADD") {
|
|
179
|
+
console.error(
|
|
180
|
+
`[contradiction] DELETE+ADD: replaced rowid ${d.deleteRowid} (topic: ${d.topic})`,
|
|
181
|
+
);
|
|
182
|
+
} else if (d.error) {
|
|
183
|
+
console.error(`[contradiction] ADD (fallback): ${d.error}`);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
167
188
|
// ============================================================================
|
|
168
189
|
// Search Command
|
|
169
190
|
// ============================================================================
|
|
@@ -691,15 +712,24 @@ async function handlePurge(args: string[]): Promise<void> {
|
|
|
691
712
|
}
|
|
692
713
|
}
|
|
693
714
|
|
|
694
|
-
const result = deleteEntries(
|
|
715
|
+
const result = deleteEntries(
|
|
716
|
+
matches.map((m) => m.rowid),
|
|
717
|
+
matches.map((m) => m.content),
|
|
718
|
+
);
|
|
695
719
|
|
|
696
720
|
output({
|
|
697
721
|
success: true,
|
|
698
722
|
matches: matches.length,
|
|
699
723
|
deleted: result.deleted,
|
|
700
724
|
rowids: result.rowids,
|
|
725
|
+
logEntriesRemoved: result.logEntriesRemoved,
|
|
701
726
|
});
|
|
702
|
-
console.error(
|
|
727
|
+
console.error(
|
|
728
|
+
`Purged ${result.deleted} entries from search + embeddings` +
|
|
729
|
+
(result.logEntriesRemoved > 0
|
|
730
|
+
? ` + ${result.logEntriesRemoved} from log.jsonl.`
|
|
731
|
+
: `.`),
|
|
732
|
+
);
|
|
703
733
|
process.exit(0);
|
|
704
734
|
} catch (error) {
|
|
705
735
|
const message = error instanceof Error ? error.message : "Unknown error";
|
|
@@ -774,8 +804,9 @@ async function handleCaptureTask(args: string[]): Promise<void> {
|
|
|
774
804
|
|
|
775
805
|
if (result.success && result.event) {
|
|
776
806
|
try {
|
|
777
|
-
await indexAndEmbed([result.event]);
|
|
807
|
+
const decisions = await indexAndEmbed([result.event]);
|
|
778
808
|
output(result);
|
|
809
|
+
logContradictionDecisions(decisions);
|
|
779
810
|
console.error("✅ Task logged and indexed");
|
|
780
811
|
process.exit(0);
|
|
781
812
|
} catch (error) {
|
|
@@ -809,8 +840,9 @@ async function handleCaptureKnowledge(args: string[]): Promise<void> {
|
|
|
809
840
|
|
|
810
841
|
if (result.success && result.event) {
|
|
811
842
|
try {
|
|
812
|
-
await indexAndEmbed([result.event]);
|
|
843
|
+
const decisions = await indexAndEmbed([result.event]);
|
|
813
844
|
output(result);
|
|
845
|
+
logContradictionDecisions(decisions);
|
|
814
846
|
console.error("✅ Knowledge logged and indexed");
|
|
815
847
|
process.exit(0);
|
|
816
848
|
} catch (error) {
|
|
@@ -842,8 +874,9 @@ async function handleCaptureNote(args: string[]): Promise<void> {
|
|
|
842
874
|
|
|
843
875
|
if (result.success && result.event) {
|
|
844
876
|
try {
|
|
845
|
-
await indexAndEmbed([result.event]);
|
|
877
|
+
const decisions = await indexAndEmbed([result.event]);
|
|
846
878
|
output(result);
|
|
879
|
+
logContradictionDecisions(decisions);
|
|
847
880
|
console.error("✅ Note logged and indexed");
|
|
848
881
|
process.exit(0);
|
|
849
882
|
} catch (error) {
|
|
@@ -878,8 +911,9 @@ async function handleCaptureTeaching(args: string[]): Promise<void> {
|
|
|
878
911
|
|
|
879
912
|
if (result.success && result.event) {
|
|
880
913
|
try {
|
|
881
|
-
await indexAndEmbed([result.event]);
|
|
914
|
+
const decisions = await indexAndEmbed([result.event]);
|
|
882
915
|
output(result);
|
|
916
|
+
logContradictionDecisions(decisions);
|
|
883
917
|
console.error("✅ Teaching logged and indexed");
|
|
884
918
|
process.exit(0);
|
|
885
919
|
} catch (error) {
|
|
@@ -915,8 +949,9 @@ async function handleCaptureObservation(args: string[]): Promise<void> {
|
|
|
915
949
|
|
|
916
950
|
if (result.success && result.event) {
|
|
917
951
|
try {
|
|
918
|
-
await indexAndEmbed([result.event]);
|
|
952
|
+
const decisions = await indexAndEmbed([result.event]);
|
|
919
953
|
output(result);
|
|
954
|
+
logContradictionDecisions(decisions);
|
|
920
955
|
console.error("✅ Observation logged and indexed");
|
|
921
956
|
process.exit(0);
|
|
922
957
|
} catch (error) {
|
package/index.ts
CHANGED
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/contradiction.ts - Write-time contradiction detection
|
|
3
|
+
*
|
|
4
|
+
* Uses hybrid search + local MLX model to classify new captures against
|
|
5
|
+
* existing entries. Determines whether a new entry should ADD (new info),
|
|
6
|
+
* NOOP (redundant), or DELETE+ADD (supersedes existing).
|
|
7
|
+
*
|
|
8
|
+
* Fail-open design: any error defaults to ADD — never blocks a capture.
|
|
9
|
+
*
|
|
10
|
+
* Usage:
|
|
11
|
+
* const candidates = await findCandidates(event);
|
|
12
|
+
* const result = await classifyContradiction(event, candidates);
|
|
13
|
+
* // result.action: "ADD" | "NOOP" | "DELETE+ADD"
|
|
14
|
+
* // result.deleteRowid: number (only set for DELETE+ADD)
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { hybridSearch, type HybridResult } from "./semantic.js";
|
|
18
|
+
import { PURGEABLE_SOURCES } from "./purge.js";
|
|
19
|
+
import type { CaptureEvent } from "./capture.js";
|
|
20
|
+
|
|
21
|
+
// ─── Types ──────────────────────────────────────────────────────────────────
|
|
22
|
+
|
|
23
|
+
export type ContradictionAction = "ADD" | "DELETE+ADD" | "NOOP";
|
|
24
|
+
|
|
25
|
+
export interface ContradictionResult {
|
|
26
|
+
action: ContradictionAction;
|
|
27
|
+
deleteRowid?: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/** Decision record returned to callers for logging */
|
|
31
|
+
export interface ContradictionDecision {
|
|
32
|
+
action: ContradictionAction;
|
|
33
|
+
topic: string;
|
|
34
|
+
source: string;
|
|
35
|
+
deleteRowid?: number;
|
|
36
|
+
error?: string;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// ─── Constants ──────────────────────────────────────────────────────────────
|
|
40
|
+
|
|
41
|
+
const MLX_URL = "http://localhost:8080/v1/chat/completions";
|
|
42
|
+
const MLX_MODEL = "mlx-community/Qwen2.5-7B-Instruct-4bit";
|
|
43
|
+
const MLX_TIMEOUT_MS = 1500;
|
|
44
|
+
|
|
45
|
+
const CANDIDATE_LIMIT = 5;
|
|
46
|
+
|
|
47
|
+
// Sources eligible for contradiction checking (same as purgeable)
|
|
48
|
+
const CONTRADICTION_SOURCES = new Set<string>(PURGEABLE_SOURCES);
|
|
49
|
+
|
|
50
|
+
// ─── Public API ─────────────────────────────────────────────────────────────
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Check if an event's source is eligible for contradiction detection.
|
|
54
|
+
*/
|
|
55
|
+
export function isContradictionCheckable(source: string): boolean {
|
|
56
|
+
return CONTRADICTION_SOURCES.has(source);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Find existing entries that may contradict or duplicate the new event.
|
|
61
|
+
* Uses hybrid search scoped by source + topic for precision.
|
|
62
|
+
*
|
|
63
|
+
* Opens its own read connection (hybridSearch uses openDatabase(true)).
|
|
64
|
+
* Safe in WAL mode — concurrent reads with the write connection in indexAndEmbed.
|
|
65
|
+
*
|
|
66
|
+
* Note: entries inserted in the current indexAndEmbed batch are not visible
|
|
67
|
+
* to this read connection (uncommitted). This is acceptable — same-batch
|
|
68
|
+
* entries are unlikely to contradict each other.
|
|
69
|
+
*/
|
|
70
|
+
export async function findCandidates(
|
|
71
|
+
event: CaptureEvent,
|
|
72
|
+
): Promise<HybridResult[]> {
|
|
73
|
+
const data = event.data as Record<string, unknown>;
|
|
74
|
+
const content = String(data.content || data.text || "");
|
|
75
|
+
const topic = String(data.topic || "");
|
|
76
|
+
const source = getSourceForEvent(event);
|
|
77
|
+
|
|
78
|
+
if (!content) return [];
|
|
79
|
+
|
|
80
|
+
return hybridSearch(content, {
|
|
81
|
+
source,
|
|
82
|
+
project: topic || undefined,
|
|
83
|
+
limit: CANDIDATE_LIMIT,
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Classify a new event against existing candidates using local MLX model.
|
|
89
|
+
*
|
|
90
|
+
* LLM returns one of:
|
|
91
|
+
* ADD — new information not covered by candidates
|
|
92
|
+
* NOOP — duplicate/redundant (already captured)
|
|
93
|
+
* DELETE <rowid> — new info supersedes a specific candidate
|
|
94
|
+
*
|
|
95
|
+
* Fail-open: any error (timeout, parse failure, model down) defaults to ADD.
|
|
96
|
+
*/
|
|
97
|
+
export async function classifyContradiction(
|
|
98
|
+
event: CaptureEvent,
|
|
99
|
+
candidates: HybridResult[],
|
|
100
|
+
): Promise<ContradictionResult> {
|
|
101
|
+
if (candidates.length === 0) {
|
|
102
|
+
return { action: "ADD" };
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const data = event.data as Record<string, unknown>;
|
|
106
|
+
const content = String(data.content || data.text || "");
|
|
107
|
+
const topic = String(data.topic || "");
|
|
108
|
+
const source = getSourceForEvent(event);
|
|
109
|
+
|
|
110
|
+
const candidateLines = candidates
|
|
111
|
+
.map((c) => `[rowid: ${c.rowid}] ${c.content}`)
|
|
112
|
+
.join("\n");
|
|
113
|
+
|
|
114
|
+
const systemPrompt = `You classify knowledge contradictions. Reply with exactly one word: ADD, NOOP, or DELETE.
|
|
115
|
+
ADD: new information not covered by candidates
|
|
116
|
+
NOOP: duplicate or redundant (already captured)
|
|
117
|
+
DELETE: new information supersedes a candidate (also provide rowid)`;
|
|
118
|
+
|
|
119
|
+
const userPrompt = `New entry (source: ${source}, topic: ${topic}):
|
|
120
|
+
${content}
|
|
121
|
+
|
|
122
|
+
Existing entries:
|
|
123
|
+
${candidateLines}
|
|
124
|
+
|
|
125
|
+
If DELETE, reply: DELETE <rowid>
|
|
126
|
+
Otherwise reply: ADD or NOOP`;
|
|
127
|
+
|
|
128
|
+
try {
|
|
129
|
+
const resp = await fetch(MLX_URL, {
|
|
130
|
+
method: "POST",
|
|
131
|
+
headers: { "Content-Type": "application/json" },
|
|
132
|
+
body: JSON.stringify({
|
|
133
|
+
model: MLX_MODEL,
|
|
134
|
+
messages: [
|
|
135
|
+
{ role: "system", content: systemPrompt },
|
|
136
|
+
{ role: "user", content: userPrompt },
|
|
137
|
+
],
|
|
138
|
+
max_tokens: 20,
|
|
139
|
+
temperature: 0,
|
|
140
|
+
}),
|
|
141
|
+
signal: AbortSignal.timeout(MLX_TIMEOUT_MS),
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
if (!resp.ok) {
|
|
145
|
+
return { action: "ADD" };
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const json = (await resp.json()) as {
|
|
149
|
+
choices?: Array<{ message?: { content?: string } }>;
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
const raw = json.choices?.[0]?.message?.content?.trim() || "";
|
|
153
|
+
return parseClassification(raw);
|
|
154
|
+
} catch {
|
|
155
|
+
// Timeout, network error, or model unavailable — fail open
|
|
156
|
+
return { action: "ADD" };
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ─── Internal Helpers ───────────────────────────────────────────────────────
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Parse LLM response into a ContradictionResult.
|
|
164
|
+
* Accepts: "ADD", "NOOP", "DELETE 123", "DELETE <rowid>"
|
|
165
|
+
* Any parse failure defaults to ADD.
|
|
166
|
+
*/
|
|
167
|
+
function parseClassification(raw: string): ContradictionResult {
|
|
168
|
+
const normalized = raw.toUpperCase().trim();
|
|
169
|
+
|
|
170
|
+
if (normalized === "NOOP") {
|
|
171
|
+
return { action: "NOOP" };
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
if (normalized === "ADD") {
|
|
175
|
+
return { action: "ADD" };
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Match "DELETE <number>" pattern
|
|
179
|
+
const deleteMatch = normalized.match(/^DELETE\s+(\d+)/);
|
|
180
|
+
if (deleteMatch) {
|
|
181
|
+
const rowid = parseInt(deleteMatch[1], 10);
|
|
182
|
+
if (!isNaN(rowid) && rowid > 0) {
|
|
183
|
+
return { action: "DELETE+ADD", deleteRowid: rowid };
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Unparseable — default to ADD
|
|
188
|
+
return { action: "ADD" };
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Map event type to source name (mirrors realtime.ts getSourceForEvent).
|
|
193
|
+
*/
|
|
194
|
+
function getSourceForEvent(event: CaptureEvent): string {
|
|
195
|
+
switch (event.type) {
|
|
196
|
+
case "knowledge":
|
|
197
|
+
return "captures";
|
|
198
|
+
case "teaching":
|
|
199
|
+
return "teachings";
|
|
200
|
+
case "observation":
|
|
201
|
+
return "observations";
|
|
202
|
+
case "insight":
|
|
203
|
+
return "insights";
|
|
204
|
+
case "learning":
|
|
205
|
+
return "learnings";
|
|
206
|
+
case "task":
|
|
207
|
+
return "flux";
|
|
208
|
+
case "note":
|
|
209
|
+
return "captures";
|
|
210
|
+
default:
|
|
211
|
+
return "captures";
|
|
212
|
+
}
|
|
213
|
+
}
|
package/lib/indexer.ts
CHANGED
|
@@ -40,6 +40,18 @@ export interface IndexerContext {
|
|
|
40
40
|
|
|
41
41
|
export type IndexerFunction = (ctx: IndexerContext) => Promise<void>;
|
|
42
42
|
|
|
43
|
+
/**
|
|
44
|
+
* Sources excluded from --rebuild because the DB is their source of truth.
|
|
45
|
+
* These sources use real-time indexing (indexAndEmbed) with contradiction
|
|
46
|
+
* resolution. Rebuilding from log.jsonl would resurrect purged/superseded
|
|
47
|
+
* entries, breaking the contradiction resolution invariant.
|
|
48
|
+
*/
|
|
49
|
+
export const REBUILD_EXCLUDED_SOURCES = new Set([
|
|
50
|
+
"captures",
|
|
51
|
+
"observations",
|
|
52
|
+
"teachings",
|
|
53
|
+
]);
|
|
54
|
+
|
|
43
55
|
/**
|
|
44
56
|
* Check if a path is configured and exists on disk.
|
|
45
57
|
* Logs a specific reason when the check fails:
|
|
@@ -231,6 +243,14 @@ export async function runIndexer(
|
|
|
231
243
|
continue;
|
|
232
244
|
}
|
|
233
245
|
|
|
246
|
+
// Skip rebuild-excluded sources — DB is source of truth, not log.jsonl
|
|
247
|
+
if (rebuild && REBUILD_EXCLUDED_SOURCES.has(src)) {
|
|
248
|
+
console.log(
|
|
249
|
+
`${src}: skipped (DB is source of truth — not rebuilt from log)`,
|
|
250
|
+
);
|
|
251
|
+
continue;
|
|
252
|
+
}
|
|
253
|
+
|
|
234
254
|
console.log(`Indexing ${src}...`);
|
|
235
255
|
|
|
236
256
|
// Clear source if rebuilding
|
package/lib/purge.ts
CHANGED
|
@@ -10,8 +10,16 @@
|
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
12
|
import { Database } from "bun:sqlite";
|
|
13
|
-
import {
|
|
13
|
+
import {
|
|
14
|
+
existsSync,
|
|
15
|
+
readFileSync,
|
|
16
|
+
writeFileSync,
|
|
17
|
+
renameSync,
|
|
18
|
+
unlinkSync,
|
|
19
|
+
} from "fs";
|
|
20
|
+
import { join } from "path";
|
|
14
21
|
import { getDatabasePath, openDatabase } from "./db.js";
|
|
22
|
+
import { getConfig } from "./config.js";
|
|
15
23
|
|
|
16
24
|
// Only these sources can be purged — indexed sources (blogs, commits, etc.) are never purgeable
|
|
17
25
|
export const PURGEABLE_SOURCES = [
|
|
@@ -33,6 +41,7 @@ export interface PurgeMatch {
|
|
|
33
41
|
export interface PurgeResult {
|
|
34
42
|
deleted: number;
|
|
35
43
|
rowids: number[];
|
|
44
|
+
logEntriesRemoved: number;
|
|
36
45
|
}
|
|
37
46
|
|
|
38
47
|
/**
|
|
@@ -76,14 +85,20 @@ export function findPurgeMatches(
|
|
|
76
85
|
}
|
|
77
86
|
|
|
78
87
|
/**
|
|
79
|
-
* Delete entries from
|
|
88
|
+
* Delete entries from FTS5 search table, vec0 embeddings table,
|
|
89
|
+
* and optionally clean matching lines from log.jsonl.
|
|
80
90
|
*
|
|
81
|
-
*
|
|
82
|
-
*
|
|
91
|
+
* @param rowids - Row IDs to delete from search + embeddings
|
|
92
|
+
* @param matchContents - Content strings from findPurgeMatches for log.jsonl filtering.
|
|
93
|
+
* Optional (default []); when empty, log.jsonl cleanup is skipped.
|
|
94
|
+
* This avoids a breaking change for existing callers.
|
|
83
95
|
*/
|
|
84
|
-
export function deleteEntries(
|
|
96
|
+
export function deleteEntries(
|
|
97
|
+
rowids: number[],
|
|
98
|
+
matchContents: string[] = [],
|
|
99
|
+
): PurgeResult {
|
|
85
100
|
if (rowids.length === 0) {
|
|
86
|
-
return { deleted: 0, rowids: [] };
|
|
101
|
+
return { deleted: 0, rowids: [], logEntriesRemoved: 0 };
|
|
87
102
|
}
|
|
88
103
|
|
|
89
104
|
// Open DB directly for read-write (matches realtime.ts pattern —
|
|
@@ -118,8 +133,66 @@ export function deleteEntries(rowids: number[]): PurgeResult {
|
|
|
118
133
|
deleted++;
|
|
119
134
|
}
|
|
120
135
|
|
|
121
|
-
|
|
136
|
+
// Clean matching lines from log.jsonl (best-effort)
|
|
137
|
+
const logEntriesRemoved = purgeLogEntries(matchContents);
|
|
138
|
+
|
|
139
|
+
return { deleted, rowids, logEntriesRemoved };
|
|
122
140
|
} finally {
|
|
123
141
|
db.close();
|
|
124
142
|
}
|
|
125
143
|
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Remove lines from log.jsonl whose content matches any of the given strings.
|
|
147
|
+
*
|
|
148
|
+
* Uses atomic write: writes filtered content to a temp file, then renames.
|
|
149
|
+
* Matches on event.data.content (the raw capture content), not the assembled
|
|
150
|
+
* search table content — task entries may use assembled content that differs.
|
|
151
|
+
* This is acceptable: the rebuild exclusion (Change 2) is the hard guard;
|
|
152
|
+
* log.jsonl cleanup is best-effort.
|
|
153
|
+
*
|
|
154
|
+
* @param matchContents - Content strings to filter out
|
|
155
|
+
* @returns Number of lines removed
|
|
156
|
+
*/
|
|
157
|
+
function purgeLogEntries(matchContents: string[]): number {
|
|
158
|
+
if (matchContents.length === 0) return 0;
|
|
159
|
+
|
|
160
|
+
const logPath = join(getConfig().paths.data, "log.jsonl");
|
|
161
|
+
const tmpPath = logPath + ".tmp";
|
|
162
|
+
|
|
163
|
+
if (!existsSync(logPath)) return 0;
|
|
164
|
+
|
|
165
|
+
try {
|
|
166
|
+
// Clean up stale temp file from a prior crash
|
|
167
|
+
if (existsSync(tmpPath)) {
|
|
168
|
+
unlinkSync(tmpPath);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
const lines = readFileSync(logPath, "utf-8").split("\n").filter(Boolean);
|
|
172
|
+
const filtered = lines.filter((line) => {
|
|
173
|
+
try {
|
|
174
|
+
const event = JSON.parse(line) as {
|
|
175
|
+
data?: { content?: string; text?: string };
|
|
176
|
+
};
|
|
177
|
+
const content = event.data?.content || event.data?.text || "";
|
|
178
|
+
return !matchContents.some((mc) => content.includes(mc));
|
|
179
|
+
} catch {
|
|
180
|
+
return true; // Keep unparseable lines
|
|
181
|
+
}
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
writeFileSync(
|
|
185
|
+
tmpPath,
|
|
186
|
+
filtered.join("\n") + (filtered.length > 0 ? "\n" : ""),
|
|
187
|
+
"utf-8",
|
|
188
|
+
);
|
|
189
|
+
renameSync(tmpPath, logPath);
|
|
190
|
+
|
|
191
|
+
return lines.length - filtered.length;
|
|
192
|
+
} catch (err) {
|
|
193
|
+
// log.jsonl cleanup is best-effort — never fail the purge
|
|
194
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
195
|
+
console.error(`[purge] log.jsonl cleanup failed (${message})`);
|
|
196
|
+
return 0;
|
|
197
|
+
}
|
|
198
|
+
}
|
package/lib/realtime.ts
CHANGED
|
@@ -25,6 +25,12 @@ import {
|
|
|
25
25
|
} from "./semantic.js";
|
|
26
26
|
import { hashContent, getCachedEmbedding, cacheEmbedding } from "./cache.js";
|
|
27
27
|
import type { CaptureEvent } from "./capture.js";
|
|
28
|
+
import {
|
|
29
|
+
isContradictionCheckable,
|
|
30
|
+
findCandidates,
|
|
31
|
+
classifyContradiction,
|
|
32
|
+
type ContradictionDecision,
|
|
33
|
+
} from "./contradiction.js";
|
|
28
34
|
|
|
29
35
|
/**
|
|
30
36
|
* Index and embed capture events for immediate searchability
|
|
@@ -33,8 +39,10 @@ import type { CaptureEvent } from "./capture.js";
|
|
|
33
39
|
* 2. Generate embeddings with cache (instant semantic search)
|
|
34
40
|
* 3. Insert into embeddings table
|
|
35
41
|
*/
|
|
36
|
-
export async function indexAndEmbed(
|
|
37
|
-
|
|
42
|
+
export async function indexAndEmbed(
|
|
43
|
+
events: CaptureEvent[],
|
|
44
|
+
): Promise<ContradictionDecision[]> {
|
|
45
|
+
if (events.length === 0) return [];
|
|
38
46
|
|
|
39
47
|
const dbPath = getDatabasePath();
|
|
40
48
|
if (!existsSync(dbPath)) {
|
|
@@ -53,21 +61,70 @@ export async function indexAndEmbed(events: CaptureEvent[]): Promise<void> {
|
|
|
53
61
|
}
|
|
54
62
|
db.loadExtension(vecPath);
|
|
55
63
|
|
|
64
|
+
// 0. Contradiction detection — filter events before insert
|
|
65
|
+
// For purgeable sources, check if the new event contradicts or
|
|
66
|
+
// duplicates existing entries. NOOP skips the event, DELETE+ADD
|
|
67
|
+
// removes the old entry before inserting the new one.
|
|
68
|
+
const eventsToIndex: CaptureEvent[] = [];
|
|
69
|
+
const decisions: ContradictionDecision[] = [];
|
|
70
|
+
for (const event of events) {
|
|
71
|
+
const source = getSourceForEvent(event);
|
|
72
|
+
const data = event.data as Record<string, unknown>;
|
|
73
|
+
const topic = String(data.topic || "");
|
|
74
|
+
|
|
75
|
+
if (isContradictionCheckable(source)) {
|
|
76
|
+
try {
|
|
77
|
+
const candidates = await findCandidates(event);
|
|
78
|
+
if (candidates.length > 0) {
|
|
79
|
+
const result = await classifyContradiction(event, candidates);
|
|
80
|
+
|
|
81
|
+
if (result.action === "NOOP") {
|
|
82
|
+
decisions.push({ action: "NOOP", topic, source });
|
|
83
|
+
continue;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (result.action === "DELETE+ADD" && result.deleteRowid) {
|
|
87
|
+
deleteSearchAndEmbedding(db, result.deleteRowid);
|
|
88
|
+
decisions.push({
|
|
89
|
+
action: "DELETE+ADD",
|
|
90
|
+
topic,
|
|
91
|
+
source,
|
|
92
|
+
deleteRowid: result.deleteRowid,
|
|
93
|
+
});
|
|
94
|
+
} else {
|
|
95
|
+
decisions.push({ action: "ADD", topic, source });
|
|
96
|
+
}
|
|
97
|
+
// ADD falls through to normal insert
|
|
98
|
+
}
|
|
99
|
+
} catch (err) {
|
|
100
|
+
// Fail open — if contradiction check fails, proceed with ADD
|
|
101
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
102
|
+
decisions.push({ action: "ADD", topic, source, error: message });
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
eventsToIndex.push(event);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (eventsToIndex.length === 0) return decisions;
|
|
110
|
+
|
|
56
111
|
// 1. Insert into FTS5 and collect doc IDs
|
|
57
112
|
const docIds: number[] = [];
|
|
58
|
-
for (const event of
|
|
113
|
+
for (const event of eventsToIndex) {
|
|
59
114
|
const docId = insertSearchEntry(db, event);
|
|
60
115
|
docIds.push(docId);
|
|
61
116
|
}
|
|
62
117
|
|
|
63
118
|
// 2. Generate embeddings with cache
|
|
64
|
-
const contents =
|
|
119
|
+
const contents = eventsToIndex.map((e) => getContentForEmbedding(e));
|
|
65
120
|
const embeddings = await embedWithCache(db, contents);
|
|
66
121
|
|
|
67
122
|
// 3. Insert embeddings
|
|
68
|
-
for (let i = 0; i <
|
|
69
|
-
insertEmbedding(db, docIds[i], embeddings[i],
|
|
123
|
+
for (let i = 0; i < eventsToIndex.length; i++) {
|
|
124
|
+
insertEmbedding(db, docIds[i], embeddings[i], eventsToIndex[i]);
|
|
70
125
|
}
|
|
126
|
+
|
|
127
|
+
return decisions;
|
|
71
128
|
} finally {
|
|
72
129
|
db.close();
|
|
73
130
|
}
|
|
@@ -104,6 +161,16 @@ function insertSearchEntry(db: Database, event: CaptureEvent): number {
|
|
|
104
161
|
return Number(result.lastInsertRowid);
|
|
105
162
|
}
|
|
106
163
|
|
|
164
|
+
/**
|
|
165
|
+
* Delete an entry from both FTS5 search and vec0 embeddings tables.
|
|
166
|
+
* Used by contradiction resolution to remove superseded entries.
|
|
167
|
+
* Reuses the same prepared statement pattern as purge.ts:108-114.
|
|
168
|
+
*/
|
|
169
|
+
function deleteSearchAndEmbedding(db: Database, rowid: number): void {
|
|
170
|
+
db.prepare("DELETE FROM search WHERE rowid = ?").run(rowid);
|
|
171
|
+
db.prepare("DELETE FROM embeddings WHERE doc_id = ?").run(rowid);
|
|
172
|
+
}
|
|
173
|
+
|
|
107
174
|
/**
|
|
108
175
|
* Map event type to source name used in search table
|
|
109
176
|
*/
|