clawmem 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +9 -4
- package/CLAUDE.md +9 -4
- package/README.md +21 -3
- package/SKILL.md +15 -1
- package/package.json +1 -1
- package/src/consolidation.ts +514 -40
- package/src/deductive-guardrails.ts +481 -0
- package/src/hooks/context-surfacing.ts +181 -3
- package/src/merge-guards.ts +266 -0
- package/src/text-similarity.ts +364 -0
|
@@ -0,0 +1,481 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Anti-contamination LLM synthesis wrapper (Ext 1).
|
|
3
|
+
*
|
|
4
|
+
* Three guardrails around Phase 3 deductive synthesis:
|
|
5
|
+
*
|
|
6
|
+
* 1. **Evidence filtering** — `collectRelevantEvidence` splits each
|
|
7
|
+
* source doc's `facts` + `narrative` into sentences and keeps only
|
|
8
|
+
* those with lexical overlap against the draft conclusion/premises.
|
|
9
|
+
* The filtered evidence is fed to the validation LLM so it sees
|
|
10
|
+
* only the parts of each source that actually matter.
|
|
11
|
+
*
|
|
12
|
+
* 2. **Relation context injection** — `buildSourceRelationContext`
|
|
13
|
+
* queries `memory_relations` for edges AMONG the cited source docs
|
|
14
|
+
* and formats them as structural context. This lets the LLM
|
|
15
|
+
* cross-reference the graph shape alongside the raw text.
|
|
16
|
+
*
|
|
17
|
+
* 3. **Contamination scan** — `scanConclusionContamination` is the
|
|
18
|
+
* primary safety check. It compares entities (or lexical anchors)
|
|
19
|
+
* mentioned by the draft conclusion against the set of entities
|
|
20
|
+
* present in the cited sources. Any mention of an entity that
|
|
21
|
+
* exists in the BROADER candidate pool but NOT in the sources is
|
|
22
|
+
* flagged as contamination — the LLM imported content from a doc
|
|
23
|
+
* it wasn't supposed to reference. Entity-aware first (uses
|
|
24
|
+
* `entity_mentions`), lexical fallback when entity state is thin.
|
|
25
|
+
*
|
|
26
|
+
* `validateDeductiveDraft` orchestrates all three: deterministic
|
|
27
|
+
* pre-checks → contamination scan → LLM validation/refinement. Never
|
|
28
|
+
* throws, LLM null is a soft fall-through that still honors the
|
|
29
|
+
* deterministic safety gates.
|
|
30
|
+
*
|
|
31
|
+
* Adapted from Thoth `dream_cycle.py:371-565` + `prompts.py:552-579`
|
|
32
|
+
* (THOTH_EXTRACTION_PLAN.md Extraction 1).
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
import type { Store } from "./store.ts";
|
|
36
|
+
import type { LLM } from "./llm.ts";
|
|
37
|
+
import { extractJsonFromLLM } from "./amem.ts";
|
|
38
|
+
import { extractSubjectAnchorsLexical } from "./text-similarity.ts";
|
|
39
|
+
|
|
40
|
+
// =============================================================================
|
|
41
|
+
// Types
|
|
42
|
+
// =============================================================================
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* A deductive draft as emitted by the Phase 3 draft-generation LLM
|
|
46
|
+
* call. Matches the shape of `extractJsonFromLLM` output for the
|
|
47
|
+
* existing draft prompt.
|
|
48
|
+
*/
|
|
49
|
+
export interface DeductiveDraft {
|
|
50
|
+
conclusion: string;
|
|
51
|
+
premises: string[];
|
|
52
|
+
sourceIndices: number[];
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Minimal doc shape the guardrails need. Kept narrow so the module can
|
|
57
|
+
* be tested without the full `Document` row type — any object with
|
|
58
|
+
* `id`, `title`, and optionally `facts`/`narrative` satisfies it.
|
|
59
|
+
*/
|
|
60
|
+
export interface DocLike {
|
|
61
|
+
id: number;
|
|
62
|
+
title: string;
|
|
63
|
+
facts?: string | null;
|
|
64
|
+
narrative?: string | null;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export type ValidationRejectReason =
|
|
68
|
+
| "empty"
|
|
69
|
+
| "invalid_indices"
|
|
70
|
+
| "contamination"
|
|
71
|
+
| "unsupported"
|
|
72
|
+
| "null_llm";
|
|
73
|
+
|
|
74
|
+
export interface DeductiveValidation {
|
|
75
|
+
accepted: boolean;
|
|
76
|
+
conclusion?: string;
|
|
77
|
+
premises?: string[];
|
|
78
|
+
reason?: ValidationRejectReason;
|
|
79
|
+
contaminationHits?: string[];
|
|
80
|
+
contaminationMethod?: "entity" | "lexical";
|
|
81
|
+
/**
|
|
82
|
+
* True when `accepted === true` because the LLM validation path
|
|
83
|
+
* failed (null result, throw, or malformed JSON) and the deterministic
|
|
84
|
+
* pre-checks were treated as sufficient. Operators should track
|
|
85
|
+
* this separately from LLM-affirmed acceptances — a high
|
|
86
|
+
* fallback-accept rate means the LLM path is effectively disabled
|
|
87
|
+
* and deductions are only gated by the deterministic guardrails.
|
|
88
|
+
*/
|
|
89
|
+
fallbackAccepted?: boolean;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// =============================================================================
|
|
93
|
+
// Evidence filtering
|
|
94
|
+
// =============================================================================
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Split each source doc's `facts` + `narrative` into sentences, keep
|
|
98
|
+
* only sentences with lexical overlap against the draft conclusion or
|
|
99
|
+
* any premise (minimum 2 shared >3-char tokens). Returns the
|
|
100
|
+
* concatenated evidence text (for LLM context) and the raw sentence
|
|
101
|
+
* list (for further downstream validation or logging).
|
|
102
|
+
*
|
|
103
|
+
* Keeps evidence output bounded so long source docs don't blow the
|
|
104
|
+
* validation prompt budget.
|
|
105
|
+
*/
|
|
106
|
+
export function collectRelevantEvidence(
|
|
107
|
+
sourceDocs: DocLike[],
|
|
108
|
+
draft: DeductiveDraft
|
|
109
|
+
): { evidenceText: string; evidenceSentences: string[] } {
|
|
110
|
+
const draftTokens = new Set<string>();
|
|
111
|
+
const addTokens = (s: string) => {
|
|
112
|
+
for (const tok of s.toLowerCase().split(/\s+/)) {
|
|
113
|
+
if (tok.length > 3) draftTokens.add(tok);
|
|
114
|
+
}
|
|
115
|
+
};
|
|
116
|
+
addTokens(draft.conclusion);
|
|
117
|
+
for (const p of draft.premises ?? []) addTokens(p);
|
|
118
|
+
|
|
119
|
+
const relevant: string[] = [];
|
|
120
|
+
for (const doc of sourceDocs) {
|
|
121
|
+
const text = `${doc.facts ?? ""}\n${doc.narrative ?? ""}`;
|
|
122
|
+
const sentences = text
|
|
123
|
+
.split(/[.!?\n]+/)
|
|
124
|
+
.map((s) => s.trim())
|
|
125
|
+
.filter(Boolean);
|
|
126
|
+
for (const sentence of sentences) {
|
|
127
|
+
const sentenceTokens = new Set(
|
|
128
|
+
sentence
|
|
129
|
+
.toLowerCase()
|
|
130
|
+
.split(/\s+/)
|
|
131
|
+
.filter((t) => t.length > 3)
|
|
132
|
+
);
|
|
133
|
+
const overlap = [...sentenceTokens].filter((t) => draftTokens.has(t)).length;
|
|
134
|
+
if (overlap >= 2) {
|
|
135
|
+
relevant.push(`[doc#${doc.id}] ${sentence}`);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return {
|
|
141
|
+
evidenceText: relevant.join(". "),
|
|
142
|
+
evidenceSentences: relevant,
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// =============================================================================
|
|
147
|
+
// Source relation context
|
|
148
|
+
// =============================================================================
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Query `memory_relations` for edges AMONG the cited source docs and
|
|
152
|
+
* format them as a human-readable context string. Sorted by weight
|
|
153
|
+
* DESC, capped at `maxEdges` (default 10) to keep the prompt
|
|
154
|
+
* bounded. Returns the empty string when there are no edges or the
|
|
155
|
+
* query fails — callers treat that as "no structural context".
|
|
156
|
+
*/
|
|
157
|
+
export function buildSourceRelationContext(
|
|
158
|
+
store: Store,
|
|
159
|
+
sourceDocIds: number[],
|
|
160
|
+
maxEdges: number = 10
|
|
161
|
+
): string {
|
|
162
|
+
if (sourceDocIds.length < 2) return "";
|
|
163
|
+
|
|
164
|
+
const placeholders = sourceDocIds.map(() => "?").join(",");
|
|
165
|
+
let rows: {
|
|
166
|
+
source_id: number;
|
|
167
|
+
target_id: number;
|
|
168
|
+
relation_type: string;
|
|
169
|
+
weight: number;
|
|
170
|
+
}[];
|
|
171
|
+
try {
|
|
172
|
+
rows = store.db
|
|
173
|
+
.prepare(
|
|
174
|
+
`SELECT source_id, target_id, relation_type, weight
|
|
175
|
+
FROM memory_relations
|
|
176
|
+
WHERE source_id IN (${placeholders})
|
|
177
|
+
AND target_id IN (${placeholders})
|
|
178
|
+
ORDER BY weight DESC
|
|
179
|
+
LIMIT ?`
|
|
180
|
+
)
|
|
181
|
+
.all(...sourceDocIds, ...sourceDocIds, maxEdges) as typeof rows;
|
|
182
|
+
} catch {
|
|
183
|
+
return "";
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
if (rows.length === 0) return "";
|
|
187
|
+
|
|
188
|
+
return rows
|
|
189
|
+
.map(
|
|
190
|
+
(r) =>
|
|
191
|
+
`doc#${r.source_id} --[${r.relation_type} w=${r.weight.toFixed(2)}]--> doc#${r.target_id}`
|
|
192
|
+
)
|
|
193
|
+
.join("\n");
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// =============================================================================
|
|
197
|
+
// Contamination scan
|
|
198
|
+
// =============================================================================
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Scan a draft conclusion for "contamination" — content that appears
|
|
202
|
+
* in the broader candidate pool but NOT in the cited source docs.
|
|
203
|
+
*
|
|
204
|
+
* Entity-aware first: queries `entity_mentions` for both the source
|
|
205
|
+
* docs and the pool. When an entity is mentioned by the pool but not
|
|
206
|
+
* by the sources, look up its canonical name in `entity_nodes` and
|
|
207
|
+
* search for it in the conclusion (whole-word match, case-insensitive).
|
|
208
|
+
*
|
|
209
|
+
* Lexical fallback: when either side has zero entity mentions, extract
|
|
210
|
+
* proper-noun anchors from source text and pool text, find the set
|
|
211
|
+
* exclusive to the pool, and check whether the conclusion mentions
|
|
212
|
+
* any of them.
|
|
213
|
+
*
|
|
214
|
+
* Returns the list of contamination hits (anchor strings or entity
|
|
215
|
+
* names) and which path produced them.
|
|
216
|
+
*/
|
|
217
|
+
export function scanConclusionContamination(
|
|
218
|
+
store: Store,
|
|
219
|
+
conclusion: string,
|
|
220
|
+
sourceDocIds: number[],
|
|
221
|
+
candidatePool: DocLike[]
|
|
222
|
+
): { hits: string[]; method: "entity" | "lexical" } {
|
|
223
|
+
const candidateIds = candidatePool.map((d) => d.id);
|
|
224
|
+
|
|
225
|
+
const sourceEntities = getEntitiesForDocs(store, sourceDocIds);
|
|
226
|
+
const poolEntities = getEntitiesForDocs(store, candidateIds);
|
|
227
|
+
|
|
228
|
+
if (sourceEntities !== null && poolEntities !== null) {
|
|
229
|
+
const sourceSet = new Set(sourceEntities);
|
|
230
|
+
const outsideEntities = poolEntities.filter((e) => !sourceSet.has(e));
|
|
231
|
+
if (outsideEntities.length === 0) {
|
|
232
|
+
return { hits: [], method: "entity" };
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
let names: { entity_id: string; name: string }[];
|
|
236
|
+
try {
|
|
237
|
+
const placeholders = outsideEntities.map(() => "?").join(",");
|
|
238
|
+
names = store.db
|
|
239
|
+
.prepare(
|
|
240
|
+
`SELECT entity_id, name FROM entity_nodes WHERE entity_id IN (${placeholders})`
|
|
241
|
+
)
|
|
242
|
+
.all(...outsideEntities) as typeof names;
|
|
243
|
+
} catch {
|
|
244
|
+
return scanLexicalContamination(conclusion, sourceDocIds, candidatePool);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
const lowerConclusion = conclusion.toLowerCase();
|
|
248
|
+
const hitSet = new Set<string>();
|
|
249
|
+
for (const n of names) {
|
|
250
|
+
const nameLC = n.name.toLowerCase();
|
|
251
|
+
// Use a custom non-alnum boundary instead of `\b` because `\b` fails
|
|
252
|
+
// for names that BEGIN or END with punctuation (`auth-service`,
|
|
253
|
+
// `OAuth2.0`, `C++`, `.NET`). `\b` requires one side to be a word
|
|
254
|
+
// character, so a trailing `+` in `c++` followed by whitespace
|
|
255
|
+
// produces no match (both sides non-word).
|
|
256
|
+
//
|
|
257
|
+
// Lookbehind/lookahead on `[^a-z0-9]` (plus start/end anchors)
|
|
258
|
+
// correctly matches the name when surrounded by anything that
|
|
259
|
+
// isn't alphanumeric — including punctuation, whitespace, and
|
|
260
|
+
// string boundaries.
|
|
261
|
+
const regex = new RegExp(
|
|
262
|
+
`(?<=^|[^a-z0-9])${escapeRegex(nameLC)}(?=$|[^a-z0-9])`
|
|
263
|
+
);
|
|
264
|
+
if (regex.test(lowerConclusion)) {
|
|
265
|
+
hitSet.add(n.name);
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
return { hits: [...hitSet], method: "entity" };
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
return scanLexicalContamination(conclusion, sourceDocIds, candidatePool);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Get the canonical entity IDs mentioned by a set of docs. Returns
|
|
276
|
+
* null when the docs have no entity_mentions at all — caller should
|
|
277
|
+
* fall back to lexical scan (apples-to-apples comparison).
|
|
278
|
+
*/
|
|
279
|
+
function getEntitiesForDocs(store: Store, docIds: number[]): string[] | null {
|
|
280
|
+
if (docIds.length === 0) return [];
|
|
281
|
+
const placeholders = docIds.map(() => "?").join(",");
|
|
282
|
+
let rows: { entity_id: string }[];
|
|
283
|
+
try {
|
|
284
|
+
rows = store.db
|
|
285
|
+
.prepare(
|
|
286
|
+
`SELECT DISTINCT entity_id FROM entity_mentions WHERE doc_id IN (${placeholders})`
|
|
287
|
+
)
|
|
288
|
+
.all(...docIds) as typeof rows;
|
|
289
|
+
} catch {
|
|
290
|
+
return null;
|
|
291
|
+
}
|
|
292
|
+
if (rows.length === 0) return null;
|
|
293
|
+
return rows.map((r) => r.entity_id);
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
function scanLexicalContamination(
|
|
297
|
+
conclusion: string,
|
|
298
|
+
sourceDocIds: number[],
|
|
299
|
+
candidatePool: DocLike[]
|
|
300
|
+
): { hits: string[]; method: "lexical" } {
|
|
301
|
+
const sourceSet = new Set(sourceDocIds);
|
|
302
|
+
const sourceDocs = candidatePool.filter((d) => sourceSet.has(d.id));
|
|
303
|
+
const outsideDocs = candidatePool.filter((d) => !sourceSet.has(d.id));
|
|
304
|
+
|
|
305
|
+
const sourceText = sourceDocs
|
|
306
|
+
.map((d) => `${d.title}\n${d.facts ?? ""}\n${d.narrative ?? ""}`)
|
|
307
|
+
.join("\n");
|
|
308
|
+
const outsideText = outsideDocs
|
|
309
|
+
.map((d) => `${d.title}\n${d.facts ?? ""}\n${d.narrative ?? ""}`)
|
|
310
|
+
.join("\n");
|
|
311
|
+
|
|
312
|
+
const sourceAnchors = new Set(extractSubjectAnchorsLexical(sourceText));
|
|
313
|
+
const outsideAnchors = extractSubjectAnchorsLexical(outsideText);
|
|
314
|
+
|
|
315
|
+
const exclusiveOutside = outsideAnchors.filter((a) => !sourceAnchors.has(a));
|
|
316
|
+
if (exclusiveOutside.length === 0) {
|
|
317
|
+
return { hits: [], method: "lexical" };
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
const conclusionAnchors = new Set(extractSubjectAnchorsLexical(conclusion));
|
|
321
|
+
const hits = [...new Set(exclusiveOutside.filter((a) => conclusionAnchors.has(a)))];
|
|
322
|
+
return { hits, method: "lexical" };
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
function escapeRegex(s: string): string {
|
|
326
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// =============================================================================
|
|
330
|
+
// Validation orchestrator
|
|
331
|
+
// =============================================================================
|
|
332
|
+
|
|
333
|
+
const VALIDATION_PROMPT_TEMPLATE = `You are a logic validator. Check whether a proposed deductive conclusion is fully supported by the provided source evidence — and nothing beyond it.
|
|
334
|
+
|
|
335
|
+
Source evidence (only these observations are allowed):
|
|
336
|
+
{EVIDENCE}
|
|
337
|
+
{RELATIONS}
|
|
338
|
+
Proposed deduction:
|
|
339
|
+
Conclusion: {CONCLUSION}
|
|
340
|
+
Premises: {PREMISES}
|
|
341
|
+
|
|
342
|
+
Rules:
|
|
343
|
+
1. If the evidence does not fully support the conclusion, reject.
|
|
344
|
+
2. If the conclusion references anything NOT present in the source evidence, reject.
|
|
345
|
+
3. If the conclusion is supported but could be phrased more precisely, return a revised conclusion.
|
|
346
|
+
|
|
347
|
+
Respond with ONLY a JSON object:
|
|
348
|
+
{"accepted": true|false, "conclusion": "revised or original", "premises": ["revised or original"], "reason": "brief"}
|
|
349
|
+
|
|
350
|
+
Do not include any other text. /no_think`;
|
|
351
|
+
|
|
352
|
+
/**
|
|
353
|
+
* Validate a deductive draft against its source docs.
|
|
354
|
+
*
|
|
355
|
+
* Pipeline:
|
|
356
|
+
* 1. Deterministic pre-checks:
|
|
357
|
+
* - conclusion must be non-trivial (>= 10 chars after trim)
|
|
358
|
+
* - source docs must resolve to >= 2 unique ids
|
|
359
|
+
* 2. Contamination scan — reject immediately on any hit.
|
|
360
|
+
* 3. LLM validation/refinement. On null/malformed JSON, fall back to
|
|
361
|
+
* deterministic accept (the pre-checks already passed, so the
|
|
362
|
+
* draft is structurally valid).
|
|
363
|
+
*
|
|
364
|
+
* Never throws. Returns `accepted: false` with a `reason` on any
|
|
365
|
+
* rejection so the caller can track per-reason counters in stats.
|
|
366
|
+
*/
|
|
367
|
+
export async function validateDeductiveDraft(
|
|
368
|
+
store: Store,
|
|
369
|
+
llm: LLM,
|
|
370
|
+
draft: DeductiveDraft,
|
|
371
|
+
sourceDocs: DocLike[],
|
|
372
|
+
candidatePool: DocLike[]
|
|
373
|
+
): Promise<DeductiveValidation> {
|
|
374
|
+
// Pre-check 1: non-trivial conclusion
|
|
375
|
+
if (!draft.conclusion?.trim() || draft.conclusion.trim().length < 10) {
|
|
376
|
+
return { accepted: false, reason: "empty" };
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// Pre-check 2: at least 2 unique source docs
|
|
380
|
+
const uniqueSourceIds = [...new Set(sourceDocs.map((d) => d.id))];
|
|
381
|
+
if (uniqueSourceIds.length < 2) {
|
|
382
|
+
return { accepted: false, reason: "invalid_indices" };
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// Contamination scan
|
|
386
|
+
const contamination = scanConclusionContamination(
|
|
387
|
+
store,
|
|
388
|
+
draft.conclusion,
|
|
389
|
+
uniqueSourceIds,
|
|
390
|
+
candidatePool
|
|
391
|
+
);
|
|
392
|
+
if (contamination.hits.length > 0) {
|
|
393
|
+
return {
|
|
394
|
+
accepted: false,
|
|
395
|
+
reason: "contamination",
|
|
396
|
+
contaminationHits: contamination.hits,
|
|
397
|
+
contaminationMethod: contamination.method,
|
|
398
|
+
};
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
// LLM validation / refinement
|
|
402
|
+
const evidence = collectRelevantEvidence(sourceDocs, draft);
|
|
403
|
+
const relationContext = buildSourceRelationContext(store, uniqueSourceIds);
|
|
404
|
+
|
|
405
|
+
const evidenceBlock =
|
|
406
|
+
evidence.evidenceText ||
|
|
407
|
+
sourceDocs
|
|
408
|
+
.map(
|
|
409
|
+
(d) =>
|
|
410
|
+
`[doc#${d.id}] ${d.title}: ${(d.facts ?? "").slice(0, 200)} ${(d.narrative ?? "").slice(0, 200)}`
|
|
411
|
+
)
|
|
412
|
+
.join("\n");
|
|
413
|
+
|
|
414
|
+
const prompt = VALIDATION_PROMPT_TEMPLATE.replace("{EVIDENCE}", evidenceBlock)
|
|
415
|
+
.replace("{RELATIONS}", relationContext ? `\nRelations among sources:\n${relationContext}\n` : "")
|
|
416
|
+
.replace("{CONCLUSION}", draft.conclusion)
|
|
417
|
+
.replace("{PREMISES}", (draft.premises ?? []).join("; "));
|
|
418
|
+
|
|
419
|
+
let result;
|
|
420
|
+
try {
|
|
421
|
+
result = await llm.generate(prompt, { temperature: 0.2, maxTokens: 400 });
|
|
422
|
+
} catch {
|
|
423
|
+
// LLM call threw → deterministic accept (pre-checks already passed)
|
|
424
|
+
return {
|
|
425
|
+
accepted: true,
|
|
426
|
+
conclusion: draft.conclusion,
|
|
427
|
+
premises: draft.premises,
|
|
428
|
+
fallbackAccepted: true,
|
|
429
|
+
};
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
if (!result?.text) {
|
|
433
|
+
// LLM returned null (cooldown / remote down) → deterministic accept
|
|
434
|
+
return {
|
|
435
|
+
accepted: true,
|
|
436
|
+
conclusion: draft.conclusion,
|
|
437
|
+
premises: draft.premises,
|
|
438
|
+
fallbackAccepted: true,
|
|
439
|
+
};
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
const parsed = extractJsonFromLLM(result.text) as {
|
|
443
|
+
accepted?: unknown;
|
|
444
|
+
conclusion?: unknown;
|
|
445
|
+
premises?: unknown;
|
|
446
|
+
reason?: unknown;
|
|
447
|
+
} | null;
|
|
448
|
+
|
|
449
|
+
if (!parsed || typeof parsed.accepted !== "boolean") {
|
|
450
|
+
// Malformed → deterministic accept
|
|
451
|
+
return {
|
|
452
|
+
accepted: true,
|
|
453
|
+
conclusion: draft.conclusion,
|
|
454
|
+
premises: draft.premises,
|
|
455
|
+
fallbackAccepted: true,
|
|
456
|
+
};
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
if (!parsed.accepted) {
|
|
460
|
+
return {
|
|
461
|
+
accepted: false,
|
|
462
|
+
reason: "unsupported",
|
|
463
|
+
conclusion:
|
|
464
|
+
typeof parsed.conclusion === "string" ? parsed.conclusion : draft.conclusion,
|
|
465
|
+
};
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
// Accepted, possibly with LLM refinement
|
|
469
|
+
return {
|
|
470
|
+
accepted: true,
|
|
471
|
+
conclusion:
|
|
472
|
+
typeof parsed.conclusion === "string" && parsed.conclusion.trim()
|
|
473
|
+
? parsed.conclusion
|
|
474
|
+
: draft.conclusion,
|
|
475
|
+
premises:
|
|
476
|
+
Array.isArray(parsed.premises) &&
|
|
477
|
+
parsed.premises.every((p) => typeof p === "string")
|
|
478
|
+
? (parsed.premises as string[])
|
|
479
|
+
: draft.premises,
|
|
480
|
+
};
|
|
481
|
+
}
|
|
@@ -58,6 +58,17 @@ const NUDGE_INTERVAL = parseInt(process.env.CLAWMEM_NUDGE_INTERVAL || "15", 10);
|
|
|
58
58
|
const LIFECYCLE_HOOK_NAMES = ["memory_pin", "memory_forget", "memory_snooze", "lifecycle-archive"];
|
|
59
59
|
const NUDGE_TEXT = "You haven't managed memory recently. If vault-context is surfacing noise → snooze it. If a critical decision was just made → pin it. If stale knowledge appeared → forget it.";
|
|
60
60
|
|
|
61
|
+
// Ext 6a: Context instruction + relationship snippets
|
|
62
|
+
// The instruction is ALWAYS prepended when the hook emits context — it frames
|
|
63
|
+
// the surfaced facts as background knowledge the agent already holds, reducing
|
|
64
|
+
// prompt-level ambiguity. Relationship snippets are fetched from the vault
|
|
65
|
+
// knowledge graph for edges where BOTH endpoints are in the surfaced doc set.
|
|
66
|
+
const INSTRUCTION_TEXT = "Treat the following as background facts you already know unless the user corrects them.";
|
|
67
|
+
const INSTRUCTION_XML = `<instruction>${INSTRUCTION_TEXT}</instruction>`;
|
|
68
|
+
const INSTRUCTION_TOKEN_COST = estimateTokens(INSTRUCTION_XML);
|
|
69
|
+
const RELATIONSHIPS_XML_OVERHEAD_TOKENS = estimateTokens("<relationships>\n\n</relationships>");
|
|
70
|
+
const MAX_RELATION_SNIPPETS = 10;
|
|
71
|
+
|
|
61
72
|
// File path patterns to extract from prompts (E13 replacement: file-aware UserPromptSubmit)
|
|
62
73
|
const FILE_PATH_RE = /(?:^|\s)((?:\/[\w.@-]+)+(?:\.\w+)?|[\w.@-]+\.(?:ts|js|py|md|sh|yaml|yml|json|toml|rs|go|tsx|jsx|css|html))\b/g;
|
|
63
74
|
|
|
@@ -349,8 +360,13 @@ export async function contextSurfacing(
|
|
|
349
360
|
}
|
|
350
361
|
}
|
|
351
362
|
|
|
352
|
-
// Build context within token budget (profile-driven)
|
|
353
|
-
|
|
363
|
+
// Build context within token budget (profile-driven).
|
|
364
|
+
// Ext 6a: Reserve budget for the always-on instruction line so the final
|
|
365
|
+
// vault-context payload stays within `tokenBudget`. Relations are layered
|
|
366
|
+
// in afterward using whatever budget remains and are the first thing
|
|
367
|
+
// truncated when the payload would overflow.
|
|
368
|
+
const factsBudget = Math.max(0, tokenBudget - INSTRUCTION_TOKEN_COST);
|
|
369
|
+
const { context, paths, tokens } = buildContext(scored, prompt, factsBudget);
|
|
354
370
|
|
|
355
371
|
if (!context) {
|
|
356
372
|
logEmptyTurn(store, input);
|
|
@@ -417,9 +433,29 @@ export async function contextSurfacing(
|
|
|
417
433
|
// Memory nudge: periodically remind agent to use lifecycle tools
|
|
418
434
|
const nudge = NUDGE_INTERVAL > 0 ? shouldNudge(store) : null;
|
|
419
435
|
|
|
436
|
+
// Ext 6a: Enrich vault-context with instruction framing + optional
|
|
437
|
+
// relationship snippets sourced from memory_relations. Only edges where
|
|
438
|
+
// BOTH endpoints are in the surfaced doc set are included. The relations
|
|
439
|
+
// block is the first thing dropped when the payload would overflow budget.
|
|
440
|
+
//
|
|
441
|
+
// Budget accounting (Turn 11 fix): `tokens` from buildContext only sums per-
|
|
442
|
+
// entry bodies and misses both the `<facts>...</facts>` wrapper and the
|
|
443
|
+
// `\n\n---\n\n` separators between entries. Compute the wrapped-facts cost
|
|
444
|
+
// directly from the rendered string so the relationships block can never
|
|
445
|
+
// push the final `<vault-context>` inner payload past `tokenBudget`.
|
|
446
|
+
const surfacedDocIds = lookupSurfacedDocIds(store, paths);
|
|
447
|
+
const relationSnippets = fetchRelationSnippets(store, surfacedDocIds);
|
|
448
|
+
const factsBlockXml = `<facts>\n${context}\n</facts>`;
|
|
449
|
+
const factsWrappedTokens = estimateTokens(factsBlockXml);
|
|
450
|
+
const relationBudget = Math.max(
|
|
451
|
+
0,
|
|
452
|
+
tokenBudget - INSTRUCTION_TOKEN_COST - factsWrappedTokens
|
|
453
|
+
);
|
|
454
|
+
const vaultInner = buildVaultContextInner(context, relationSnippets, relationBudget);
|
|
455
|
+
|
|
420
456
|
const parts: string[] = [];
|
|
421
457
|
if (routingHint) parts.push(`<vault-routing>${routingHint}</vault-routing>`);
|
|
422
|
-
parts.push(`<vault-context>\n${
|
|
458
|
+
parts.push(`<vault-context>\n${vaultInner}\n</vault-context>`);
|
|
423
459
|
if (nudge) parts.push(`<vault-nudge>${NUDGE_TEXT}</vault-nudge>`);
|
|
424
460
|
|
|
425
461
|
return makeContextOutput("context-surfacing", parts.join("\n"));
|
|
@@ -522,6 +558,148 @@ function buildContext(
|
|
|
522
558
|
};
|
|
523
559
|
}
|
|
524
560
|
|
|
561
|
+
// =============================================================================
|
|
562
|
+
// Ext 6a: Relationship snippets + instruction framing
|
|
563
|
+
// =============================================================================
|
|
564
|
+
|
|
565
|
+
/**
|
|
566
|
+
* Relationship snippet derived from a memory_relations edge whose source and
|
|
567
|
+
* target are both active documents currently surfaced by the context hook.
|
|
568
|
+
*/
|
|
569
|
+
export interface RelationSnippet {
|
|
570
|
+
sourceTitle: string;
|
|
571
|
+
targetTitle: string;
|
|
572
|
+
relationType: string;
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
/**
|
|
576
|
+
* Resolve surfaced display paths back to document ids so the relation query
|
|
577
|
+
* can filter memory_relations edges to the surfaced set. Silently drops paths
|
|
578
|
+
* that don't match an active row in the general vault (e.g. skill-vault paths
|
|
579
|
+
* or deactivated docs) — fail-open, never throws.
|
|
580
|
+
*/
|
|
581
|
+
export function lookupSurfacedDocIds(
|
|
582
|
+
store: Store,
|
|
583
|
+
displayPaths: string[]
|
|
584
|
+
): number[] {
|
|
585
|
+
if (displayPaths.length === 0) return [];
|
|
586
|
+
try {
|
|
587
|
+
const placeholders = displayPaths.map(() => "?").join(",");
|
|
588
|
+
const rows = store.db
|
|
589
|
+
.prepare(
|
|
590
|
+
`SELECT id FROM documents
|
|
591
|
+
WHERE active = 1
|
|
592
|
+
AND (collection || '/' || path) IN (${placeholders})`
|
|
593
|
+
)
|
|
594
|
+
.all(...displayPaths) as Array<{ id: number }>;
|
|
595
|
+
return rows.map((r) => r.id);
|
|
596
|
+
} catch {
|
|
597
|
+
return [];
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
/**
|
|
602
|
+
* Fetch relationship snippets for edges where BOTH endpoints are in the
|
|
603
|
+
* surfaced doc set. Returns an empty list on empty input, zero/one surfaced
|
|
604
|
+
* docs, self-loops, or any DB error (fail-open, never throws). Results are
|
|
605
|
+
* ordered by relation weight DESC then recency so the most salient edges
|
|
606
|
+
* survive budget truncation.
|
|
607
|
+
*/
|
|
608
|
+
export function fetchRelationSnippets(
|
|
609
|
+
store: Store,
|
|
610
|
+
surfacedDocIds: number[],
|
|
611
|
+
limit: number = MAX_RELATION_SNIPPETS
|
|
612
|
+
): RelationSnippet[] {
|
|
613
|
+
if (surfacedDocIds.length < 2) return [];
|
|
614
|
+
try {
|
|
615
|
+
const placeholders = surfacedDocIds.map(() => "?").join(",");
|
|
616
|
+
const rows = store.db
|
|
617
|
+
.prepare(
|
|
618
|
+
`SELECT mr.relation_type,
|
|
619
|
+
ds.title AS source_title,
|
|
620
|
+
dt.title AS target_title
|
|
621
|
+
FROM memory_relations mr
|
|
622
|
+
JOIN documents ds ON ds.id = mr.source_id AND ds.active = 1
|
|
623
|
+
JOIN documents dt ON dt.id = mr.target_id AND dt.active = 1
|
|
624
|
+
WHERE mr.source_id IN (${placeholders})
|
|
625
|
+
AND mr.target_id IN (${placeholders})
|
|
626
|
+
AND mr.source_id != mr.target_id
|
|
627
|
+
ORDER BY mr.weight DESC, mr.created_at DESC
|
|
628
|
+
LIMIT ?`
|
|
629
|
+
)
|
|
630
|
+
.all(...surfacedDocIds, ...surfacedDocIds, limit) as Array<{
|
|
631
|
+
relation_type: string;
|
|
632
|
+
source_title: string;
|
|
633
|
+
target_title: string;
|
|
634
|
+
}>;
|
|
635
|
+
return rows.map((r) => ({
|
|
636
|
+
sourceTitle: r.source_title,
|
|
637
|
+
targetTitle: r.target_title,
|
|
638
|
+
relationType: r.relation_type,
|
|
639
|
+
}));
|
|
640
|
+
} catch {
|
|
641
|
+
return [];
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
/**
|
|
646
|
+
* Render relationship snippets as bullet lines, sanitizing titles to block
|
|
647
|
+
* prompt-injection via metadata fields. Lines that become filtered-content
|
|
648
|
+
* markers after sanitization are dropped.
|
|
649
|
+
*/
|
|
650
|
+
export function renderRelationshipLines(
|
|
651
|
+
relations: RelationSnippet[]
|
|
652
|
+
): string[] {
|
|
653
|
+
const FILTERED = "[content filtered for security]";
|
|
654
|
+
const out: string[] = [];
|
|
655
|
+
for (const r of relations) {
|
|
656
|
+
const src = sanitizeSnippet(r.sourceTitle);
|
|
657
|
+
const tgt = sanitizeSnippet(r.targetTitle);
|
|
658
|
+
if (src === FILTERED || tgt === FILTERED) continue;
|
|
659
|
+
out.push(`- ${src} --[${r.relationType}]--> ${tgt}`);
|
|
660
|
+
}
|
|
661
|
+
return out;
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
/**
|
|
665
|
+
* Assemble the inner body of <vault-context>: always instruction + facts,
|
|
666
|
+
* optionally relationships when at least one line fits in the remaining
|
|
667
|
+
* budget. Relationships are the first thing dropped — if the relationships
|
|
668
|
+
* XML wrapper alone would exceed `remainingBudgetTokens`, the whole block
|
|
669
|
+
* is omitted rather than emitting an empty wrapper.
|
|
670
|
+
*/
|
|
671
|
+
export function buildVaultContextInner(
|
|
672
|
+
factsBlock: string,
|
|
673
|
+
relations: RelationSnippet[],
|
|
674
|
+
remainingBudgetTokens: number
|
|
675
|
+
): string {
|
|
676
|
+
const lines: string[] = [];
|
|
677
|
+
lines.push(INSTRUCTION_XML);
|
|
678
|
+
lines.push(`<facts>\n${factsBlock}\n</facts>`);
|
|
679
|
+
|
|
680
|
+
if (relations.length === 0 || remainingBudgetTokens <= 0) {
|
|
681
|
+
return lines.join("\n");
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
const relationLines = renderRelationshipLines(relations);
|
|
685
|
+
if (relationLines.length === 0) return lines.join("\n");
|
|
686
|
+
|
|
687
|
+
// The XML wrapper itself consumes tokens — if there's no room for even one
|
|
688
|
+
// line on top of the wrapper, drop the block entirely.
|
|
689
|
+
const fittedLines: string[] = [];
|
|
690
|
+
let used = RELATIONSHIPS_XML_OVERHEAD_TOKENS;
|
|
691
|
+
for (const line of relationLines) {
|
|
692
|
+
const lineTokens = estimateTokens(line + "\n");
|
|
693
|
+
if (used + lineTokens > remainingBudgetTokens) break;
|
|
694
|
+
fittedLines.push(line);
|
|
695
|
+
used += lineTokens;
|
|
696
|
+
}
|
|
697
|
+
if (fittedLines.length === 0) return lines.join("\n");
|
|
698
|
+
|
|
699
|
+
lines.push(`<relationships>\n${fittedLines.join("\n")}\n</relationships>`);
|
|
700
|
+
return lines.join("\n");
|
|
701
|
+
}
|
|
702
|
+
|
|
525
703
|
/**
|
|
526
704
|
* Check if the agent should be nudged to use lifecycle tools.
|
|
527
705
|
* Returns true if N+ context-surfacing invocations have occurred since the
|