clawmem 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -44,8 +44,6 @@ Runs fully local with no API keys and no cloud services. Integrates via Claude C
44
44
 
45
45
  ### v0.2.0 Enhancements
46
46
 
47
- Seven patterns extracted from competitor analysis ([Hindsight](https://github.com/vectorize-io/hindsight), [Hermes Agent](https://github.com/NousResearch/hermes-agent), [claude-mem](https://github.com/thedotmack/claude-mem)):
48
-
49
47
  - **Entity resolution + co-occurrence graph** — LLM entity extraction during A-MEM enrichment, canonical normalization via FTS5 + Levenshtein fuzzy matching, co-occurrence tracking, entity graph traversal for ENTITY intent queries
50
48
  - **MPFP graph retrieval** — Multi-Path Fact Propagation with meta-path patterns per intent, hop-synchronized edge cache, Forward Push with α=0.15 teleport probability. Replaces single-beam traversal for causal/entity/temporal queries.
51
49
  - **Temporal query extraction** — regex-based date range extraction from natural language queries ("last week", "March 2026"), wired as WHERE filters into BM25 and vector search
@@ -1046,6 +1044,8 @@ Built on the shoulders of:
1046
1044
  - [Beads](https://github.com/steveyegge/beads) — Dolt-backed issue tracker for AI agents
1047
1045
  - [claude-mem](https://github.com/thedotmack/claude-mem) — Claude Code memory integration reference
1048
1046
  - [Engram](https://github.com/Gentleman-Programming/engram) — observation dedup window, topic-key upsert pattern, temporal timeline navigation, duplicate metadata scoring signals
1047
+ - [Hermes Agent](https://github.com/NousResearch/hermes-agent) — memory nudge system (periodic lifecycle tool prompting)
1048
+ - [Hindsight](https://github.com/vectorize-io/hindsight) — entity resolution, MPFP graph traversal, temporal extraction, 3-tier consolidation, observation invalidation, 4-way parallel retrieval
1049
1049
  - [MAGMA](https://arxiv.org/abs/2501.13956) — multi-graph memory agent
1050
1050
  - [memory-lancedb-pro](https://github.com/CortexReach/memory-lancedb-pro) — retrieval gate, length normalization, MMR diversity, access reinforcement algorithms
1051
1051
  - [OpenViking](https://github.com/volcengine/OpenViking) — query decomposition patterns, collection-scoped retrieval, transaction-safe indexing
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmem",
3
- "version": "0.2.1",
3
+ "version": "0.2.2",
4
4
  "description": "On-device context engine and memory for AI agents. Claude Code and OpenClaw. Hooks + MCP server + hybrid RAG search.",
5
5
  "type": "module",
6
6
  "bin": {
package/src/entity.ts CHANGED
@@ -8,6 +8,7 @@
8
8
  */
9
9
 
10
10
  import type { Database } from "bun:sqlite";
11
+ import { createHash } from "crypto";
11
12
  import type { LlamaCpp } from "./llm.ts";
12
13
  import { extractJsonFromLLM } from "./amem.ts";
13
14
 
@@ -299,12 +300,63 @@ export function trackCoOccurrences(
299
300
  // Entity Enrichment Pipeline (called during A-MEM postIndexEnrich)
300
301
  // =============================================================================
301
302
 
303
+ /**
304
+ * Compute extraction input hash from title + body.
305
+ * Captures the actual input to the LLM prompt — changes to either trigger re-extraction.
306
+ */
307
+ function computeInputHash(title: string, body: string): string {
308
+ return createHash('sha256').update(title + '\0' + body).digest('hex');
309
+ }
310
+
311
+ /**
312
+ * Clear all derived entity state for a document:
313
+ * mentions, co-occurrence contributions, entity edges, and mention counts.
314
+ */
315
+ function clearDocEntityState(db: Database, docId: number): void {
316
+ // Get entity IDs this doc mentions (before deletion)
317
+ const oldMentions = db.prepare(
318
+ `SELECT entity_id FROM entity_mentions WHERE doc_id = ?`
319
+ ).all(docId) as { entity_id: string }[];
320
+
321
+ // Delete mentions
322
+ db.prepare(`DELETE FROM entity_mentions WHERE doc_id = ?`).run(docId);
323
+
324
+ // Decrement mention_count for each entity
325
+ for (const m of oldMentions) {
326
+ db.prepare(`
327
+ UPDATE entity_nodes SET mention_count = MAX(0, mention_count - 1) WHERE entity_id = ?
328
+ `).run(m.entity_id);
329
+ }
330
+
331
+ // Remove entity edges involving this doc
332
+ db.prepare(`
333
+ DELETE FROM memory_relations WHERE (source_id = ? OR target_id = ?) AND relation_type = 'entity'
334
+ `).run(docId, docId);
335
+
336
+ // Decrement co-occurrence counts for entity pairs from this doc
337
+ if (oldMentions.length >= 2) {
338
+ const ids = oldMentions.map(m => m.entity_id);
339
+ for (let i = 0; i < ids.length; i++) {
340
+ for (let j = i + 1; j < ids.length; j++) {
341
+ const sorted = [ids[i]!, ids[j]!].sort();
342
+ db.prepare(`
343
+ UPDATE entity_cooccurrences SET count = MAX(0, count - 1)
344
+ WHERE entity_a = ? AND entity_b = ?
345
+ `).run(sorted[0]!, sorted[1]!);
346
+ }
347
+ }
348
+ // Clean up zero-count rows
349
+ db.prepare(`DELETE FROM entity_cooccurrences WHERE count <= 0`).run();
350
+ }
351
+ }
352
+
302
353
  /**
303
354
  * Full entity enrichment for a document:
304
- * 1. Extract entities via LLM
305
- * 2. Resolve each to canonical form
306
- * 3. Record mentions
307
- * 4. Track co-occurrences
355
+ * 1. Check enrichment state (skip if input unchanged)
356
+ * 2. Extract entities via LLM
357
+ * 3. Resolve each to canonical form
358
+ * 4. Record mentions + co-occurrences + entity edges
359
+ * 5. Persist enrichment state for idempotency
308
360
  *
309
361
  * @returns Number of entities resolved
310
362
  */
@@ -315,7 +367,7 @@ export async function enrichDocumentEntities(
315
367
  vault: string = 'default'
316
368
  ): Promise<number> {
317
369
  try {
318
- // Get document content
370
+ // Get document content (snapshot for extraction)
319
371
  const doc = db.prepare(`
320
372
  SELECT d.title, c.doc as body
321
373
  FROM documents d
@@ -328,14 +380,34 @@ export async function enrichDocumentEntities(
328
380
  return 0;
329
381
  }
330
382
 
331
- // Step 1: Extract entities
383
+ // Compute extraction input hash (title + body — the actual LLM prompt input)
384
+ const inputHash = computeInputHash(doc.title, doc.body);
385
+
386
+ // Check enrichment state — skip if already enriched with same input
387
+ const existingState = db.prepare(
388
+ `SELECT input_hash FROM entity_enrichment_state WHERE doc_id = ?`
389
+ ).get(docId) as { input_hash: string } | undefined;
390
+
391
+ if (existingState?.input_hash === inputHash) {
392
+ return 0; // Same input, already enriched — skip
393
+ }
394
+
395
+ // Step 1: Extract entities via LLM
332
396
  const entities = await extractEntities(llm, doc.title, doc.body);
333
- if (entities.length === 0) {
334
- console.log(`[entity] No entities found in docId ${docId}`);
397
+
398
+ // Recheck input hash before writing — abort if content changed during LLM call
399
+ const recheckHash = db.prepare(`
400
+ SELECT d.title, c.doc as body FROM documents d
401
+ JOIN content c ON c.hash = d.hash WHERE d.id = ? AND d.active = 1
402
+ `).get(docId) as { title: string; body: string } | null;
403
+
404
+ if (!recheckHash || computeInputHash(recheckHash.title, recheckHash.body) !== inputHash) {
405
+ console.log(`[entity] Document ${docId} changed during extraction — aborting`);
335
406
  return 0;
336
407
  }
337
408
 
338
- // Step 2-3: Deduplicate entities by name+type, then resolve and record mentions
409
+ // Step 3: Deduplicate entities by surface form, then resolve canonical IDs
410
+ // Done BEFORE transaction to avoid calling upsertEntity (which mutates counters) for dupes
339
411
  const seenKeys = new Set<string>();
340
412
  const uniqueEntities: ExtractedEntity[] = [];
341
413
  for (const entity of entities) {
@@ -346,36 +418,87 @@ export async function enrichDocumentEntities(
346
418
  }
347
419
  }
348
420
 
349
- const resolvedIds: string[] = [];
421
+ // Resolve canonical IDs first (read-only lookups, no counter mutation yet)
422
+ const resolvedPairs: { entity: ExtractedEntity; canonicalId: string }[] = [];
423
+ const seenCanonicalIds = new Set<string>();
350
424
  for (const entity of uniqueEntities) {
351
- const entityId = upsertEntity(db, entity.name, entity.type, vault);
352
- resolvedIds.push(entityId);
353
- recordEntityMention(db, entityId, docId, entity.name);
425
+ const canonicalId = resolveEntityCanonical(db, entity.name, entity.type, vault)
426
+ || makeEntityId(entity.name, entity.type, vault);
427
+ if (!seenCanonicalIds.has(canonicalId)) {
428
+ seenCanonicalIds.add(canonicalId);
429
+ resolvedPairs.push({ entity, canonicalId });
430
+ }
354
431
  }
355
432
 
356
- // Step 4: Track co-occurrences (deduplicated IDs prevent inflated pair counts)
357
- trackCoOccurrences(db, resolvedIds);
433
+ // All writes in a transaction partial failure rolls back cleanly
434
+ try {
435
+ db.exec("BEGIN");
436
+
437
+ // Re-check enrichment state inside transaction (prevents concurrent overcount)
438
+ const txState = db.prepare(
439
+ `SELECT input_hash FROM entity_enrichment_state WHERE doc_id = ?`
440
+ ).get(docId) as { input_hash: string } | undefined;
358
441
 
359
- // Step 5: Create entity edges in memory_relations
360
- for (const entityId of resolvedIds) {
361
- // Find other documents mentioning this entity
362
- const otherDocs = db.prepare(`
363
- SELECT doc_id FROM entity_mentions
364
- WHERE entity_id = ? AND doc_id != ?
365
- LIMIT 10
366
- `).all(entityId, docId) as { doc_id: number }[];
442
+ if (txState?.input_hash === inputHash) {
443
+ db.exec("ROLLBACK");
444
+ return 0; // Another worker already committed this exact enrichment
445
+ }
446
+
447
+ // Clear old derived state if re-enriching (content changed)
448
+ if (txState || existingState) {
449
+ clearDocEntityState(db, docId);
450
+ }
367
451
 
368
- for (const other of otherDocs) {
369
- // Insert entity relation (unidirectional; graph traversal handles inbound for entity/semantic types)
452
+ if (entities.length === 0) {
370
453
  db.prepare(`
371
- INSERT OR IGNORE INTO memory_relations (source_id, target_id, relation_type, weight, metadata, created_at)
372
- VALUES (?, ?, 'entity', 0.7, ?, datetime('now'))
373
- `).run(docId, other.doc_id, JSON.stringify({ entity: entityId }));
454
+ INSERT OR REPLACE INTO entity_enrichment_state (doc_id, input_hash, enriched_at)
455
+ VALUES (?, ?, datetime('now'))
456
+ `).run(docId, inputHash);
457
+ db.exec("COMMIT");
458
+ console.log(`[entity] No entities found in docId ${docId}`);
459
+ return 0;
374
460
  }
375
- }
376
461
 
377
- console.log(`[entity] Enriched docId ${docId}: ${resolvedIds.length} entities, ${entities.length} extracted`);
378
- return resolvedIds.length;
462
+ // Now mutate counters one upsert per unique canonical ID (no inflation)
463
+ const resolvedIds: string[] = [];
464
+ for (const { entity, canonicalId } of resolvedPairs) {
465
+ const entityId = upsertEntity(db, entity.name, entity.type, vault);
466
+ resolvedIds.push(entityId);
467
+ recordEntityMention(db, entityId, docId, entity.name);
468
+ }
469
+
470
+ // Step 4: Track co-occurrences (deduplicated by canonical ID)
471
+ trackCoOccurrences(db, resolvedIds);
472
+
473
+ // Step 5: Create entity edges in memory_relations
474
+ for (const entityId of resolvedIds) {
475
+ const otherDocs = db.prepare(`
476
+ SELECT doc_id FROM entity_mentions
477
+ WHERE entity_id = ? AND doc_id != ?
478
+ LIMIT 10
479
+ `).all(entityId, docId) as { doc_id: number }[];
480
+
481
+ for (const other of otherDocs) {
482
+ db.prepare(`
483
+ INSERT OR IGNORE INTO memory_relations (source_id, target_id, relation_type, weight, metadata, created_at)
484
+ VALUES (?, ?, 'entity', 0.7, ?, datetime('now'))
485
+ `).run(docId, other.doc_id, JSON.stringify({ entity: entityId }));
486
+ }
487
+ }
488
+
489
+ // Persist enrichment state LAST — only after all derived data written
490
+ db.prepare(`
491
+ INSERT OR REPLACE INTO entity_enrichment_state (doc_id, input_hash, enriched_at)
492
+ VALUES (?, ?, datetime('now'))
493
+ `).run(docId, inputHash);
494
+
495
+ db.exec("COMMIT");
496
+ console.log(`[entity] Enriched docId ${docId}: ${resolvedIds.length} entities, ${entities.length} extracted`);
497
+ return resolvedIds.length;
498
+ } catch (txErr) {
499
+ try { db.exec("ROLLBACK"); } catch { /* already rolled back */ }
500
+ throw txErr; // re-throw to outer catch
501
+ }
379
502
  } catch (err) {
380
503
  console.log(`[entity] Error enriching docId ${docId}:`, err);
381
504
  return 0;
package/src/store.ts CHANGED
@@ -711,6 +711,16 @@ function initializeDatabase(db: Database): void {
711
711
  // Entity FTS5 for fuzzy name lookup
712
712
  db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS entities_fts USING fts5(entity_id, name, entity_type)`);
713
713
 
714
+ // Entity enrichment state: tracks what input was used for extraction (idempotent --enrich)
715
+ db.exec(`
716
+ CREATE TABLE IF NOT EXISTS entity_enrichment_state (
717
+ doc_id INTEGER PRIMARY KEY,
718
+ input_hash TEXT NOT NULL,
719
+ enriched_at TEXT NOT NULL,
720
+ FOREIGN KEY (doc_id) REFERENCES documents(id) ON DELETE CASCADE
721
+ )
722
+ `);
723
+
714
724
  // 3-tier consolidation: observations synthesized from clusters of related facts
715
725
  db.exec(`
716
726
  CREATE TABLE IF NOT EXISTS consolidated_observations (