clawmem 0.6.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +10 -5
- package/CLAUDE.md +10 -5
- package/README.md +34 -4
- package/SKILL.md +15 -1
- package/package.json +1 -1
- package/src/consolidation.ts +525 -40
- package/src/deductive-guardrails.ts +481 -0
- package/src/hooks/context-surfacing.ts +285 -16
- package/src/hooks/feedback-loop.ts +40 -0
- package/src/hooks.ts +8 -3
- package/src/mcp.ts +32 -1
- package/src/merge-guards.ts +266 -0
- package/src/recall-attribution.ts +182 -0
- package/src/recall-buffer.ts +85 -0
- package/src/store.ts +271 -12
- package/src/text-similarity.ts +364 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text similarity + merge safety gate for consolidation.
|
|
3
|
+
*
|
|
4
|
+
* Prevents semantic collision between topics that share vocabulary but
|
|
5
|
+
* refer to different subjects (e.g., two observations about "Dan" vs
|
|
6
|
+
* "Dad" or "Bob" vs "Rob"). Adds a dual-threshold safety check after
|
|
7
|
+
* the cheap Jaccard candidate-generation step.
|
|
8
|
+
*
|
|
9
|
+
* Entity-aware first: uses `entity_mentions` when both sides have canonical
|
|
10
|
+
* entities resolved. Lexical fallback via proper-noun anchor regex when
|
|
11
|
+
* either side lacks entity state. Strictest default when both sides are
|
|
12
|
+
* empty (no anchors at all).
|
|
13
|
+
*
|
|
14
|
+
* Adapted from Thoth `dream_cycle.py:218-272` subject-name guard
|
|
15
|
+
* (THOTH_EXTRACTION_PLAN.md Extraction 3).
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import type { Store } from "./store.ts";
|
|
19
|
+
|
|
20
|
+
// =============================================================================
|
|
21
|
+
// Config — dual-threshold merge safety
|
|
22
|
+
// =============================================================================
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* NORMAL threshold: applies when anchor sets are compatible (subset or
|
|
26
|
+
* high overlap — same primary subject) AND the gate is gating on text
|
|
27
|
+
* similarity alone. Overridable via `CLAWMEM_MERGE_SCORE_NORMAL` env var
|
|
28
|
+
* for operator calibration during rollout.
|
|
29
|
+
*
|
|
30
|
+
* ⚠ Threshold is inherited from Thoth's `dream_cycle.py:218-272` guard,
|
|
31
|
+
* which uses Python's `difflib.SequenceMatcher` (character-level LCS).
|
|
32
|
+
* ClawMem uses normalized character 3-gram cosine, which is systematically
|
|
33
|
+
* harsher on benign rephrasings (word-order changes, synonym swaps). A
|
|
34
|
+
* same-meaning paraphrase like "The team migrated auth to OAuth2 last
|
|
35
|
+
* Friday" vs "Last Friday the team completed the auth migration to
|
|
36
|
+
* OAuth2" lands around 0.5 in 3-gram cosine but near 0.85 in
|
|
37
|
+
* SequenceMatcher. Consequence: merges will fragment more than Thoth
|
|
38
|
+
* did. This is the SAFE trade-off — fragmentation > false merges — but
|
|
39
|
+
* operators should tune via env var once they have real data.
|
|
40
|
+
*/
|
|
41
|
+
export const MERGE_SCORE_NORMAL = parseEnvFloat(
|
|
42
|
+
"CLAWMEM_MERGE_SCORE_NORMAL",
|
|
43
|
+
0.93
|
|
44
|
+
);
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* STRICT threshold: applies in the strictest-default path (both sides
|
|
48
|
+
* have zero anchors — no canonical entities, no proper-noun anchors).
|
|
49
|
+
* Overridable via `CLAWMEM_MERGE_SCORE_STRICT`.
|
|
50
|
+
*
|
|
51
|
+
* Non-strictest-default paths use the hard-reject rule on materially
|
|
52
|
+
* different anchors, not this threshold.
|
|
53
|
+
*/
|
|
54
|
+
export const MERGE_SCORE_STRICT = parseEnvFloat(
|
|
55
|
+
"CLAWMEM_MERGE_SCORE_STRICT",
|
|
56
|
+
0.98
|
|
57
|
+
);
|
|
58
|
+
|
|
59
|
+
function parseEnvFloat(name: string, fallback: number): number {
|
|
60
|
+
const raw = process.env[name];
|
|
61
|
+
if (raw === undefined) return fallback;
|
|
62
|
+
const n = Number.parseFloat(raw);
|
|
63
|
+
if (!Number.isFinite(n) || n < 0 || n > 1) return fallback;
|
|
64
|
+
return n;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// =============================================================================
|
|
68
|
+
// Anchor extraction (entity-first, lexical fallback)
|
|
69
|
+
// =============================================================================
|
|
70
|
+
|
|
71
|
+
export type AnchorSource = "entity_mentions" | "lexical_fallback";
|
|
72
|
+
|
|
73
|
+
export interface ExtractedAnchors {
|
|
74
|
+
entities: string[];
|
|
75
|
+
method: AnchorSource;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Get canonical entity IDs referenced by a set of source documents.
|
|
80
|
+
* Returns `{ entities: [], method: 'lexical_fallback' }` when no entity
|
|
81
|
+
* mentions exist for any of the given docs — the caller should then
|
|
82
|
+
* fall back to lexical anchor extraction over the raw text.
|
|
83
|
+
*/
|
|
84
|
+
export function extractSourceDocEntities(
|
|
85
|
+
store: Store,
|
|
86
|
+
sourceDocIds: number[]
|
|
87
|
+
): ExtractedAnchors {
|
|
88
|
+
if (sourceDocIds.length === 0) {
|
|
89
|
+
return { entities: [], method: "lexical_fallback" };
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const placeholders = sourceDocIds.map(() => "?").join(",");
|
|
93
|
+
let rows: { entity_id: string }[];
|
|
94
|
+
try {
|
|
95
|
+
rows = store.db
|
|
96
|
+
.prepare(
|
|
97
|
+
`SELECT DISTINCT entity_id FROM entity_mentions WHERE doc_id IN (${placeholders})`
|
|
98
|
+
)
|
|
99
|
+
.all(...sourceDocIds) as { entity_id: string }[];
|
|
100
|
+
} catch {
|
|
101
|
+
return { entities: [], method: "lexical_fallback" };
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if (rows.length === 0) {
|
|
105
|
+
return { entities: [], method: "lexical_fallback" };
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return {
|
|
109
|
+
entities: rows.map((r) => r.entity_id),
|
|
110
|
+
method: "entity_mentions",
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Extract lexical subject anchors from raw text.
|
|
116
|
+
*
|
|
117
|
+
* Heuristic: capitalized tokens that are not common sentence-start words.
|
|
118
|
+
* This is the fallback when `entity_mentions` is empty (the doc has not
|
|
119
|
+
* been through entity enrichment yet, or is from the pre-entity era).
|
|
120
|
+
*/
|
|
121
|
+
export function extractSubjectAnchorsLexical(text: string): string[] {
|
|
122
|
+
if (!text) return [];
|
|
123
|
+
|
|
124
|
+
// Match capitalized tokens: CamelCase, UPPERCASE, Capitalized.
|
|
125
|
+
// Minimum 2 chars to avoid matching stray initials at sentence start.
|
|
126
|
+
const matches = text.match(/\b[A-Z][a-zA-Z0-9]{1,}\b/g) || [];
|
|
127
|
+
|
|
128
|
+
// Filter common sentence-start capitalized words that aren't proper nouns
|
|
129
|
+
const stopwords = new Set<string>([
|
|
130
|
+
"the", "a", "an", "this", "that", "these", "those",
|
|
131
|
+
"it", "we", "i", "he", "she", "they", "you", "me", "him", "her", "us", "them",
|
|
132
|
+
"and", "but", "or", "not", "is", "was", "are", "were",
|
|
133
|
+
"be", "been", "being", "have", "has", "had",
|
|
134
|
+
"do", "does", "did", "will", "would", "should", "could", "can",
|
|
135
|
+
"may", "might", "must", "shall",
|
|
136
|
+
"in", "on", "at", "to", "for", "of", "with", "by", "from",
|
|
137
|
+
"if", "then", "else", "when", "while", "where", "how", "why",
|
|
138
|
+
"all", "any", "some", "no", "one", "two",
|
|
139
|
+
]);
|
|
140
|
+
|
|
141
|
+
const normalized = new Set<string>();
|
|
142
|
+
for (const token of matches) {
|
|
143
|
+
const lower = token.toLowerCase();
|
|
144
|
+
if (lower.length >= 2 && !stopwords.has(lower)) {
|
|
145
|
+
normalized.add(lower);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
return [...normalized];
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// =============================================================================
|
|
153
|
+
// Normalized character 3-gram cosine similarity
|
|
154
|
+
// =============================================================================
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Character 3-gram cosine similarity.
|
|
158
|
+
*
|
|
159
|
+
* Robust to word-level permutation and punctuation; catches near-duplicate
|
|
160
|
+
* statements that differ only in wording or whitespace. Returns 0.0..1.0.
|
|
161
|
+
*
|
|
162
|
+
* Chosen over Jaccard (used as the cheap first-stage filter) because
|
|
163
|
+
* 3-gram cosine is tighter on paraphrase detection — it distinguishes
|
|
164
|
+
* "Dan visited Paris" from "Dad visited Paris" while the Jaccard over
|
|
165
|
+
* long-word sets would treat both as near-duplicates.
|
|
166
|
+
*/
|
|
167
|
+
export function normalizedCosine3Gram(a: string, b: string): number {
|
|
168
|
+
const na = normalizeForTrigram(a);
|
|
169
|
+
const nb = normalizeForTrigram(b);
|
|
170
|
+
|
|
171
|
+
if (na.length === 0 || nb.length === 0) return 0;
|
|
172
|
+
if (na === nb) return 1.0;
|
|
173
|
+
|
|
174
|
+
const ta = trigramCounts(na);
|
|
175
|
+
const tb = trigramCounts(nb);
|
|
176
|
+
|
|
177
|
+
let dot = 0;
|
|
178
|
+
for (const [gram, count] of ta) {
|
|
179
|
+
const other = tb.get(gram);
|
|
180
|
+
if (other) dot += count * other;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
const ma = magnitude(ta);
|
|
184
|
+
const mb = magnitude(tb);
|
|
185
|
+
if (ma === 0 || mb === 0) return 0;
|
|
186
|
+
|
|
187
|
+
return dot / (ma * mb);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
function normalizeForTrigram(s: string): string {
|
|
191
|
+
return s
|
|
192
|
+
.toLowerCase()
|
|
193
|
+
.replace(/[^a-z0-9 ]+/g, " ")
|
|
194
|
+
.replace(/\s+/g, " ")
|
|
195
|
+
.trim();
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
function trigramCounts(s: string): Map<string, number> {
|
|
199
|
+
const out = new Map<string, number>();
|
|
200
|
+
if (s.length < 3) {
|
|
201
|
+
out.set(s, 1);
|
|
202
|
+
return out;
|
|
203
|
+
}
|
|
204
|
+
for (let i = 0; i <= s.length - 3; i++) {
|
|
205
|
+
const gram = s.slice(i, i + 3);
|
|
206
|
+
out.set(gram, (out.get(gram) || 0) + 1);
|
|
207
|
+
}
|
|
208
|
+
return out;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
function magnitude(m: Map<string, number>): number {
|
|
212
|
+
let sum = 0;
|
|
213
|
+
for (const v of m.values()) sum += v * v;
|
|
214
|
+
return Math.sqrt(sum);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// =============================================================================
|
|
218
|
+
// Anchor set comparison
|
|
219
|
+
// =============================================================================
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Determine whether two anchor sets "materially differ".
|
|
223
|
+
*
|
|
224
|
+
* Rules (all case-insensitive):
|
|
225
|
+
* 1. Either side empty → NOT materially different (caller handles via
|
|
226
|
+
* strictest-default path).
|
|
227
|
+
* 2. One set is a subset of the other → NOT materially different
|
|
228
|
+
* (allows `"Bob"` ↔ `"Bob Smith"`).
|
|
229
|
+
* 3. Intersection empty → materially different (`"Dan"` vs `"Dad"`).
|
|
230
|
+
* 4. Partial overlap → materially different when AT MOST half of the
|
|
231
|
+
* smaller set is shared (boundary `≤ 0.5` treated as material to
|
|
232
|
+
* fence off primary-subject mismatches like
|
|
233
|
+
* `[alice, auth-service]` vs `[bob, auth-service]` where the only
|
|
234
|
+
* shared anchor is the context, not the subject).
|
|
235
|
+
*/
|
|
236
|
+
export function anchorSetsMateriallyDiffer(a: string[], b: string[]): boolean {
|
|
237
|
+
if (a.length === 0 || b.length === 0) return false;
|
|
238
|
+
|
|
239
|
+
const setA = new Set(a.map((x) => x.toLowerCase()));
|
|
240
|
+
const setB = new Set(b.map((x) => x.toLowerCase()));
|
|
241
|
+
|
|
242
|
+
const aSubB = [...setA].every((x) => setB.has(x));
|
|
243
|
+
const bSubA = [...setB].every((x) => setA.has(x));
|
|
244
|
+
if (aSubB || bSubA) return false;
|
|
245
|
+
|
|
246
|
+
const intersect = [...setA].filter((x) => setB.has(x));
|
|
247
|
+
if (intersect.length === 0) return true;
|
|
248
|
+
|
|
249
|
+
const smaller = Math.min(setA.size, setB.size);
|
|
250
|
+
return intersect.length / smaller <= 0.5;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// =============================================================================
|
|
254
|
+
// Merge safety gate
|
|
255
|
+
// =============================================================================
|
|
256
|
+
|
|
257
|
+
export type MergeSafetyMethod = "entity_aware" | "lexical_only" | "strictest_default";
|
|
258
|
+
|
|
259
|
+
export interface MergeSafetyResult {
|
|
260
|
+
accepted: boolean;
|
|
261
|
+
score: number;
|
|
262
|
+
threshold: number;
|
|
263
|
+
reason: string;
|
|
264
|
+
method: MergeSafetyMethod;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* Merge safety gate.
|
|
269
|
+
*
|
|
270
|
+
* Flow:
|
|
271
|
+
* 1. Compute normalized character 3-gram cosine similarity between the
|
|
272
|
+
* candidate and existing observation texts.
|
|
273
|
+
* 2. Extract anchor sets for both sides. Entity-aware first
|
|
274
|
+
* (`entity_mentions`), lexical fallback otherwise. If EITHER side
|
|
275
|
+
* lacks `entity_mentions` coverage, both sides fall back to lexical
|
|
276
|
+
* so the comparison is apples-to-apples.
|
|
277
|
+
* 3. Decide:
|
|
278
|
+
* - Both anchor sets empty (strictest default) → accept iff
|
|
279
|
+
* `score >= MERGE_SCORE_STRICT`.
|
|
280
|
+
* - Anchors materially differ → **HARD REJECT regardless of text
|
|
281
|
+
* similarity**. This is the primary safety goal: two observations
|
|
282
|
+
* whose canonical subjects differ are never the same observation,
|
|
283
|
+
* even if the LLM emits identical wording. Historically the gate
|
|
284
|
+
* upgraded to a stricter threshold instead of hard-rejecting, but
|
|
285
|
+
* that allowed merges at score 1.0 when the LLM emitted templated
|
|
286
|
+
* text with no subject name.
|
|
287
|
+
* - Anchors compatible (subset or high overlap) → accept iff
|
|
288
|
+
* `score >= MERGE_SCORE_NORMAL`.
|
|
289
|
+
*/
|
|
290
|
+
export function passesMergeSafety(
|
|
291
|
+
store: Store,
|
|
292
|
+
candidateText: string,
|
|
293
|
+
candidateSourceDocIds: number[],
|
|
294
|
+
existingText: string,
|
|
295
|
+
existingSourceDocIds: number[]
|
|
296
|
+
): MergeSafetyResult {
|
|
297
|
+
const score = normalizedCosine3Gram(candidateText, existingText);
|
|
298
|
+
|
|
299
|
+
const candEnt = extractSourceDocEntities(store, candidateSourceDocIds);
|
|
300
|
+
const existEnt = extractSourceDocEntities(store, existingSourceDocIds);
|
|
301
|
+
|
|
302
|
+
// Use entity-aware path only when BOTH sides have entity mentions —
|
|
303
|
+
// otherwise the comparison is apples-to-oranges (one side is a set of
|
|
304
|
+
// canonical IDs, the other is a set of lexical tokens).
|
|
305
|
+
const bothEntity =
|
|
306
|
+
candEnt.method === "entity_mentions" && existEnt.method === "entity_mentions";
|
|
307
|
+
|
|
308
|
+
let anchorsA: string[];
|
|
309
|
+
let anchorsB: string[];
|
|
310
|
+
let method: MergeSafetyMethod;
|
|
311
|
+
|
|
312
|
+
if (bothEntity) {
|
|
313
|
+
anchorsA = candEnt.entities;
|
|
314
|
+
anchorsB = existEnt.entities;
|
|
315
|
+
method = "entity_aware";
|
|
316
|
+
} else {
|
|
317
|
+
anchorsA = extractSubjectAnchorsLexical(candidateText);
|
|
318
|
+
anchorsB = extractSubjectAnchorsLexical(existingText);
|
|
319
|
+
method = "lexical_only";
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// Strictest default: both sides empty → no subject signal at all
|
|
323
|
+
if (anchorsA.length === 0 && anchorsB.length === 0) {
|
|
324
|
+
const threshold = MERGE_SCORE_STRICT;
|
|
325
|
+
const accepted = score >= threshold;
|
|
326
|
+
return {
|
|
327
|
+
accepted,
|
|
328
|
+
score,
|
|
329
|
+
threshold,
|
|
330
|
+
reason: accepted
|
|
331
|
+
? `strictest-default met (${score.toFixed(3)} >= ${threshold})`
|
|
332
|
+
: `strictest-default unmet (${score.toFixed(3)} < ${threshold})`,
|
|
333
|
+
method: "strictest_default",
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// Hard reject on materially different anchors — this is the primary
|
|
338
|
+
// safety goal of the extraction. Applies to BOTH entity_aware and
|
|
339
|
+
// lexical_only modes so the policy is uniform.
|
|
340
|
+
if (anchorSetsMateriallyDiffer(anchorsA, anchorsB)) {
|
|
341
|
+
return {
|
|
342
|
+
accepted: false,
|
|
343
|
+
score,
|
|
344
|
+
// Reported threshold is STRICT only for operator logging; the
|
|
345
|
+
// decision was hard-reject, not threshold-gated.
|
|
346
|
+
threshold: MERGE_SCORE_STRICT,
|
|
347
|
+
reason: `${method} materially different anchors — hard reject (score=${score.toFixed(3)})`,
|
|
348
|
+
method,
|
|
349
|
+
};
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
// Compatible anchors (subset or high overlap): gate on text similarity
|
|
353
|
+
const threshold = MERGE_SCORE_NORMAL;
|
|
354
|
+
const accepted = score >= threshold;
|
|
355
|
+
return {
|
|
356
|
+
accepted,
|
|
357
|
+
score,
|
|
358
|
+
threshold,
|
|
359
|
+
reason: accepted
|
|
360
|
+
? `${method} aligned anchors — ${score.toFixed(3)} >= ${threshold}`
|
|
361
|
+
: `${method} aligned anchors — ${score.toFixed(3)} < ${threshold}`,
|
|
362
|
+
method,
|
|
363
|
+
};
|
|
364
|
+
}
|