clawmem 0.1.8 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,17 @@
1
1
  /**
2
2
  * ClawMem Consolidation Worker
3
3
  *
4
- * Background worker that enriches documents missing A-MEM metadata.
5
- * Runs periodically to backfill memory notes for documents indexed before A-MEM.
4
+ * Two-phase background worker:
5
+ * 1. A-MEM backfill: enriches documents missing memory notes
6
+ * 2. 3-tier consolidation: synthesizes clusters of related observations
7
+ * into higher-order consolidated observations with proof counts and trends
8
+ *
9
+ * Pattern H from ENHANCEMENT-PLAN.md (source: Hindsight consolidator.py)
6
10
  */
7
11
 
8
12
  import type { Store } from "./store.ts";
9
13
  import type { LlamaCpp } from "./llm.ts";
14
+ import { extractJsonFromLLM } from "./amem.ts";
10
15
 
11
16
  // =============================================================================
12
17
  // Types
@@ -18,19 +23,40 @@ interface DocumentToEnrich {
18
23
  title: string;
19
24
  }
20
25
 
26
+ export type TrendEnum = 'NEW' | 'STABLE' | 'STRENGTHENING' | 'WEAKENING' | 'STALE';
27
+
28
+ export interface ConsolidatedObservation {
29
+ id: number;
30
+ observation: string;
31
+ proof_count: number;
32
+ source_doc_ids: number[];
33
+ trend: TrendEnum;
34
+ status: string;
35
+ created_at: string;
36
+ updated_at: string;
37
+ collection: string | null;
38
+ }
39
+
40
+ interface ObservationCluster {
41
+ docs: { id: number; title: string; facts: string; context: string; modified_at: string }[];
42
+ collection: string;
43
+ }
44
+
21
45
  // =============================================================================
22
46
  // Worker State
23
47
  // =============================================================================
24
48
 
25
49
  let consolidationTimer: Timer | null = null;
26
50
  let isRunning = false;
51
+ let tickCount = 0;
27
52
 
28
53
  // =============================================================================
29
54
  // Worker Functions
30
55
  // =============================================================================
31
56
 
32
57
  /**
33
- * Starts the consolidation worker that enriches documents missing A-MEM metadata.
58
+ * Starts the consolidation worker that enriches documents missing A-MEM metadata
59
+ * and periodically consolidates observations.
34
60
  *
35
61
  * @param store - Store instance with A-MEM methods
36
62
  * @param llm - LLM instance for memory note construction
@@ -69,7 +95,7 @@ export function stopConsolidationWorker(): void {
69
95
  }
70
96
 
71
97
  /**
72
- * Single worker tick: find and enrich up to 3 documents missing A-MEM metadata.
98
+ * Single worker tick: A-MEM backfill + periodic observation consolidation.
73
99
  */
74
100
  async function tick(store: Store, llm: LlamaCpp): Promise<void> {
75
101
  // Reentrancy guard
@@ -79,45 +105,337 @@ async function tick(store: Store, llm: LlamaCpp): Promise<void> {
79
105
  }
80
106
 
81
107
  isRunning = true;
108
+ tickCount++;
82
109
 
83
110
  try {
84
- // Find documents missing A-MEM keywords (primary indicator of unenriched docs)
85
- const docs = store.db
86
- .prepare<DocumentToEnrich, []>(
87
- `SELECT id, hash, title
88
- FROM documents
89
- WHERE amem_keywords IS NULL AND active = 1
90
- ORDER BY created_at ASC
91
- LIMIT 3`
111
+ // Phase 1: A-MEM backfill (every tick)
112
+ await backfillAmem(store, llm);
113
+
114
+ // Phase 2: Observation consolidation (every 6th tick, ~30 min at default interval)
115
+ if (tickCount % 6 === 0) {
116
+ await consolidateObservations(store, llm);
117
+ }
118
+ } catch (err) {
119
+ console.error("[consolidation] Tick failed:", err);
120
+ } finally {
121
+ isRunning = false;
122
+ }
123
+ }
124
+
125
+ /**
126
+ * Phase 1: Find and enrich up to 3 documents missing A-MEM metadata.
127
+ */
128
+ async function backfillAmem(store: Store, llm: LlamaCpp): Promise<void> {
129
+ const docs = store.db
130
+ .prepare<DocumentToEnrich, []>(
131
+ `SELECT id, hash, title
132
+ FROM documents
133
+ WHERE amem_keywords IS NULL AND active = 1
134
+ ORDER BY created_at ASC
135
+ LIMIT 3`
136
+ )
137
+ .all();
138
+
139
+ if (docs.length === 0) return;
140
+
141
+ console.log(`[consolidation] Enriching ${docs.length} documents`);
142
+
143
+ for (const doc of docs) {
144
+ try {
145
+ const note = await store.constructMemoryNote(llm, doc.id);
146
+ await store.storeMemoryNote(doc.id, note);
147
+ await store.generateMemoryLinks(llm, doc.id);
148
+ console.log(`[consolidation] Enriched doc ${doc.id} (${doc.title})`);
149
+ } catch (err) {
150
+ console.error(`[consolidation] Failed to enrich doc ${doc.id}:`, err);
151
+ }
152
+ }
153
+ }
154
+
155
+ // =============================================================================
156
+ // Phase 2: 3-Tier Observation Consolidation
157
+ // =============================================================================
158
+
159
+ /**
160
+ * Find clusters of related observations and synthesize into consolidated observations.
161
+ * Runs per-collection to prevent cross-vault false merges.
162
+ */
163
+ async function consolidateObservations(store: Store, llm: LlamaCpp): Promise<void> {
164
+ console.log("[consolidation] Starting observation consolidation");
165
+
166
+ // Find observation-type documents not yet consolidated
167
+ const observations = store.db.prepare(`
168
+ SELECT d.id, d.title, d.facts, d.amem_context as context, d.modified_at, d.collection
169
+ FROM documents d
170
+ WHERE d.active = 1
171
+ AND d.content_type = 'observation'
172
+ AND d.facts IS NOT NULL
173
+ AND d.id NOT IN (
174
+ SELECT value FROM (
175
+ SELECT json_each.value as value
176
+ FROM consolidated_observations co, json_each(co.source_doc_ids)
177
+ WHERE co.status = 'active'
178
+ )
92
179
  )
93
- .all();
180
+ ORDER BY d.collection, d.modified_at DESC
181
+ LIMIT 50
182
+ `).all() as { id: number; title: string; facts: string; context: string; modified_at: string; collection: string }[];
183
+
184
+ if (observations.length === 0) {
185
+ console.log("[consolidation] No unconsolidated observations found");
186
+ return;
187
+ }
188
+
189
+ // Group by collection
190
+ const clusters = new Map<string, ObservationCluster>();
191
+ for (const obs of observations) {
192
+ if (!clusters.has(obs.collection)) {
193
+ clusters.set(obs.collection, { docs: [], collection: obs.collection });
194
+ }
195
+ clusters.get(obs.collection)!.docs.push(obs);
196
+ }
197
+
198
+ // Process each collection cluster
199
+ for (const [collection, cluster] of clusters) {
200
+ if (cluster.docs.length < 2) continue; // Need at least 2 observations to consolidate
201
+
202
+ try {
203
+ await synthesizeCluster(store, llm, cluster);
204
+ } catch (err) {
205
+ console.error(`[consolidation] Failed to consolidate cluster for ${collection}:`, err);
206
+ }
207
+ }
208
+
209
+ // Update trends on existing consolidated observations
210
+ updateTrends(store);
211
+
212
+ console.log("[consolidation] Observation consolidation complete");
213
+ }
214
+
215
+ /**
216
+ * Synthesize a cluster of observations into consolidated observations using LLM.
217
+ */
218
+ async function synthesizeCluster(
219
+ store: Store,
220
+ llm: LlamaCpp,
221
+ cluster: ObservationCluster
222
+ ): Promise<void> {
223
+ const docsText = cluster.docs.map((d, i) =>
224
+ `${i + 1}. [${d.modified_at}] "${d.title}"\n Facts: ${d.facts?.slice(0, 300) || 'none'}\n Context: ${d.context?.slice(0, 200) || 'none'}`
225
+ ).join('\n\n');
226
+
227
+ const prompt = `Analyze these ${cluster.docs.length} session observations and identify recurring patterns or cross-session themes.
228
+
229
+ Observations:
230
+ ${docsText}
231
+
232
+ For each pattern you identify:
233
+ 1. Write a clear, actionable observation (1-2 sentences)
234
+ 2. Count how many source observations support it (proof_count)
235
+ 3. List which source numbers (1-indexed) contribute
236
+
237
+ Return ONLY valid JSON array:
238
+ [
239
+ {
240
+ "observation": "Clear statement of the pattern",
241
+ "proof_count": 3,
242
+ "source_indices": [1, 3, 5]
243
+ }
244
+ ]
245
+
246
+ Rules:
247
+ - Only include patterns supported by 2+ observations
248
+ - Be specific — "user frequently modifies X" > "user works on code"
249
+ - 1-5 patterns maximum
250
+ Return ONLY the JSON array. /no_think`;
94
251
 
95
- if (docs.length === 0) {
96
- // No work to do
97
- return;
252
+ const result = await llm.generate(prompt, {
253
+ temperature: 0.3,
254
+ maxTokens: 500,
255
+ });
256
+
257
+ if (!result) return;
258
+
259
+ const parsed = extractJsonFromLLM(result.text) as Array<{
260
+ observation: string;
261
+ proof_count: number;
262
+ source_indices: number[];
263
+ }> | null;
264
+
265
+ if (!Array.isArray(parsed)) return;
266
+
267
+ for (const pattern of parsed) {
268
+ if (!pattern.observation || !Array.isArray(pattern.source_indices) || pattern.source_indices.length < 2) continue;
269
+
270
+ // Map source indices to doc IDs
271
+ const sourceDocIds = pattern.source_indices
272
+ .filter(i => i >= 1 && i <= cluster.docs.length)
273
+ .map(i => cluster.docs[i - 1]!.id);
274
+
275
+ if (sourceDocIds.length < 2) continue;
276
+
277
+ // Check for existing similar consolidated observation (avoid duplicates)
278
+ const existing = findSimilarConsolidation(store, pattern.observation, cluster.collection);
279
+ if (existing) {
280
+ // Update existing: merge source docs, increment proof count
281
+ const existingSourceIds: number[] = JSON.parse(existing.source_doc_ids as unknown as string || '[]');
282
+ const mergedIds = [...new Set([...existingSourceIds, ...sourceDocIds])];
283
+
284
+ store.db.prepare(`
285
+ UPDATE consolidated_observations
286
+ SET proof_count = ?,
287
+ source_doc_ids = ?,
288
+ updated_at = datetime('now'),
289
+ observation = ?
290
+ WHERE id = ?
291
+ `).run(mergedIds.length, JSON.stringify(mergedIds), pattern.observation, existing.id);
292
+
293
+ console.log(`[consolidation] Updated observation #${existing.id}: proof_count=${mergedIds.length}`);
294
+ } else {
295
+ // Insert new consolidated observation
296
+ store.db.prepare(`
297
+ INSERT INTO consolidated_observations (observation, proof_count, source_doc_ids, trend, status, collection)
298
+ VALUES (?, ?, ?, 'NEW', 'active', ?)
299
+ `).run(pattern.observation, sourceDocIds.length, JSON.stringify(sourceDocIds), cluster.collection);
300
+
301
+ console.log(`[consolidation] Created new observation: "${pattern.observation.slice(0, 60)}..." (proof=${sourceDocIds.length})`);
98
302
  }
303
+ }
304
+ }
99
305
 
100
- console.log(`[consolidation] Enriching ${docs.length} documents`);
306
+ /**
307
+ * Find an existing consolidated observation similar to the given text.
308
+ * Uses simple word overlap (Jaccard) to detect near-duplicates.
309
+ */
310
+ function findSimilarConsolidation(
311
+ store: Store,
312
+ observation: string,
313
+ collection: string
314
+ ): { id: number; source_doc_ids: string } | null {
315
+ const existing = store.db.prepare(`
316
+ SELECT id, observation, source_doc_ids
317
+ FROM consolidated_observations
318
+ WHERE status = 'active' AND collection = ?
319
+ `).all(collection) as { id: number; observation: string; source_doc_ids: string }[];
101
320
 
102
- // Enrich each document (note + links, skip evolution to avoid cascades)
103
- for (const doc of docs) {
104
- try {
105
- // Construct and store memory note
106
- const note = await store.constructMemoryNote(llm, doc.id);
107
- await store.storeMemoryNote(doc.id, note);
321
+ const queryWords = new Set(observation.toLowerCase().split(/\s+/).filter(w => w.length > 3));
108
322
 
109
- // Generate memory links (skip evolution for backlog)
110
- await store.generateMemoryLinks(llm, doc.id);
323
+ for (const obs of existing) {
324
+ const obsWords = new Set(obs.observation.toLowerCase().split(/\s+/).filter(w => w.length > 3));
325
+ const intersection = [...queryWords].filter(w => obsWords.has(w)).length;
326
+ const union = new Set([...queryWords, ...obsWords]).size;
327
+ const jaccard = union > 0 ? intersection / union : 0;
111
328
 
112
- console.log(`[consolidation] Enriched doc ${doc.id} (${doc.title})`);
113
- } catch (err) {
114
- console.error(`[consolidation] Failed to enrich doc ${doc.id}:`, err);
115
- // Continue with remaining docs (don't let one failure block the queue)
116
- }
329
+ if (jaccard > 0.5) {
330
+ return { id: obs.id, source_doc_ids: obs.source_doc_ids };
117
331
  }
118
- } catch (err) {
119
- console.error("[consolidation] Tick failed:", err);
120
- } finally {
121
- isRunning = false;
122
332
  }
333
+
334
+ return null;
335
+ }
336
+
337
+ /**
338
+ * Update trend labels on consolidated observations based on evidence timestamps.
339
+ * Trends: NEW (< 7 days), STRENGTHENING (proof growing), WEAKENING (no new evidence 30+ days),
340
+ * STABLE (steady), STALE (60+ days without new evidence).
341
+ */
342
+ function updateTrends(store: Store): void {
343
+ const observations = store.db.prepare(`
344
+ SELECT id, proof_count, source_doc_ids, trend, created_at, updated_at
345
+ FROM consolidated_observations
346
+ WHERE status = 'active'
347
+ `).all() as {
348
+ id: number; proof_count: number; source_doc_ids: string;
349
+ trend: string; created_at: string; updated_at: string;
350
+ }[];
351
+
352
+ const now = Date.now();
353
+ const DAY_MS = 86400000;
354
+
355
+ for (const obs of observations) {
356
+ const createdAge = (now - new Date(obs.created_at).getTime()) / DAY_MS;
357
+ const updatedAge = (now - new Date(obs.updated_at).getTime()) / DAY_MS;
358
+
359
+ let newTrend: TrendEnum;
360
+ if (createdAge < 7) {
361
+ newTrend = 'NEW';
362
+ } else if (updatedAge > 60) {
363
+ newTrend = 'STALE';
364
+ } else if (updatedAge > 30) {
365
+ newTrend = 'WEAKENING';
366
+ } else if (obs.proof_count >= 4 && updatedAge < 14) {
367
+ newTrend = 'STRENGTHENING';
368
+ } else {
369
+ newTrend = 'STABLE';
370
+ }
371
+
372
+ if (newTrend !== obs.trend) {
373
+ store.db.prepare(`UPDATE consolidated_observations SET trend = ? WHERE id = ?`).run(newTrend, obs.id);
374
+ }
375
+ }
376
+ }
377
+
378
+ // =============================================================================
379
+ // Public API for MCP / CLI
380
+ // =============================================================================
381
+
382
+ /**
383
+ * Get consolidated observations, optionally filtered.
384
+ */
385
+ export function getConsolidatedObservations(
386
+ store: Store,
387
+ options?: { collection?: string; trend?: TrendEnum; minProof?: number; limit?: number }
388
+ ): ConsolidatedObservation[] {
389
+ let sql = `SELECT * FROM consolidated_observations WHERE status = 'active'`;
390
+ const params: any[] = [];
391
+
392
+ if (options?.collection) {
393
+ sql += ` AND collection = ?`;
394
+ params.push(options.collection);
395
+ }
396
+ if (options?.trend) {
397
+ sql += ` AND trend = ?`;
398
+ params.push(options.trend);
399
+ }
400
+ if (options?.minProof) {
401
+ sql += ` AND proof_count >= ?`;
402
+ params.push(options.minProof);
403
+ }
404
+
405
+ sql += ` ORDER BY proof_count DESC, updated_at DESC LIMIT ?`;
406
+ params.push(options?.limit || 20);
407
+
408
+ return store.db.prepare(sql).all(...params) as ConsolidatedObservation[];
409
+ }
410
+
411
+ /**
412
+ * Manually trigger consolidation (for CLI or MCP tool).
413
+ */
414
+ export async function runConsolidation(
415
+ store: Store,
416
+ llm: LlamaCpp,
417
+ dryRun: boolean = false
418
+ ): Promise<{ clustersFound: number; observationsCreated: number }> {
419
+ if (dryRun) {
420
+ // Count unconsolidated observations
421
+ const count = store.db.prepare(`
422
+ SELECT COUNT(*) as cnt FROM documents
423
+ WHERE active = 1 AND content_type = 'observation' AND facts IS NOT NULL
424
+ AND id NOT IN (
425
+ SELECT value FROM (
426
+ SELECT json_each.value as value
427
+ FROM consolidated_observations co, json_each(co.source_doc_ids)
428
+ WHERE co.status = 'active'
429
+ )
430
+ )
431
+ `).get() as { cnt: number };
432
+
433
+ return { clustersFound: count.cnt, observationsCreated: 0 };
434
+ }
435
+
436
+ const before = store.db.prepare(`SELECT COUNT(*) as cnt FROM consolidated_observations WHERE status = 'active'`).get() as { cnt: number };
437
+ await consolidateObservations(store, llm);
438
+ const after = store.db.prepare(`SELECT COUNT(*) as cnt FROM consolidated_observations WHERE status = 'active'`).get() as { cnt: number };
439
+
440
+ return { clustersFound: 0, observationsCreated: after.cnt - before.cnt };
123
441
  }