clawmem 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/package.json +1 -1
- package/src/entity.ts +154 -31
- package/src/store.ts +10 -0
package/README.md
CHANGED
|
@@ -44,8 +44,6 @@ Runs fully local with no API keys and no cloud services. Integrates via Claude C
|
|
|
44
44
|
|
|
45
45
|
### v0.2.0 Enhancements
|
|
46
46
|
|
|
47
|
-
Seven patterns extracted from competitor analysis ([Hindsight](https://github.com/vectorize-io/hindsight), [Hermes Agent](https://github.com/NousResearch/hermes-agent), [claude-mem](https://github.com/thedotmack/claude-mem)):
|
|
48
|
-
|
|
49
47
|
- **Entity resolution + co-occurrence graph** — LLM entity extraction during A-MEM enrichment, canonical normalization via FTS5 + Levenshtein fuzzy matching, co-occurrence tracking, entity graph traversal for ENTITY intent queries
|
|
50
48
|
- **MPFP graph retrieval** — Multi-Path Fact Propagation with meta-path patterns per intent, hop-synchronized edge cache, Forward Push with α=0.15 teleport probability. Replaces single-beam traversal for causal/entity/temporal queries.
|
|
51
49
|
- **Temporal query extraction** — regex-based date range extraction from natural language queries ("last week", "March 2026"), wired as WHERE filters into BM25 and vector search
|
|
@@ -1046,6 +1044,8 @@ Built on the shoulders of:
|
|
|
1046
1044
|
- [Beads](https://github.com/steveyegge/beads) — Dolt-backed issue tracker for AI agents
|
|
1047
1045
|
- [claude-mem](https://github.com/thedotmack/claude-mem) — Claude Code memory integration reference
|
|
1048
1046
|
- [Engram](https://github.com/Gentleman-Programming/engram) — observation dedup window, topic-key upsert pattern, temporal timeline navigation, duplicate metadata scoring signals
|
|
1047
|
+
- [Hermes Agent](https://github.com/NousResearch/hermes-agent) — memory nudge system (periodic lifecycle tool prompting)
|
|
1048
|
+
- [Hindsight](https://github.com/vectorize-io/hindsight) — entity resolution, MPFP graph traversal, temporal extraction, 3-tier consolidation, observation invalidation, 4-way parallel retrieval
|
|
1049
1049
|
- [MAGMA](https://arxiv.org/abs/2501.13956) — multi-graph memory agent
|
|
1050
1050
|
- [memory-lancedb-pro](https://github.com/CortexReach/memory-lancedb-pro) — retrieval gate, length normalization, MMR diversity, access reinforcement algorithms
|
|
1051
1051
|
- [OpenViking](https://github.com/volcengine/OpenViking) — query decomposition patterns, collection-scoped retrieval, transaction-safe indexing
|
package/package.json
CHANGED
package/src/entity.ts
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
10
|
import type { Database } from "bun:sqlite";
|
|
11
|
+
import { createHash } from "crypto";
|
|
11
12
|
import type { LlamaCpp } from "./llm.ts";
|
|
12
13
|
import { extractJsonFromLLM } from "./amem.ts";
|
|
13
14
|
|
|
@@ -299,12 +300,63 @@ export function trackCoOccurrences(
|
|
|
299
300
|
// Entity Enrichment Pipeline (called during A-MEM postIndexEnrich)
|
|
300
301
|
// =============================================================================
|
|
301
302
|
|
|
303
|
+
/**
|
|
304
|
+
* Compute extraction input hash from title + body.
|
|
305
|
+
* Captures the actual input to the LLM prompt — changes to either trigger re-extraction.
|
|
306
|
+
*/
|
|
307
|
+
function computeInputHash(title: string, body: string): string {
|
|
308
|
+
return createHash('sha256').update(title + '\0' + body).digest('hex');
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Clear all derived entity state for a document:
|
|
313
|
+
* mentions, co-occurrence contributions, entity edges, and mention counts.
|
|
314
|
+
*/
|
|
315
|
+
function clearDocEntityState(db: Database, docId: number): void {
|
|
316
|
+
// Get entity IDs this doc mentions (before deletion)
|
|
317
|
+
const oldMentions = db.prepare(
|
|
318
|
+
`SELECT entity_id FROM entity_mentions WHERE doc_id = ?`
|
|
319
|
+
).all(docId) as { entity_id: string }[];
|
|
320
|
+
|
|
321
|
+
// Delete mentions
|
|
322
|
+
db.prepare(`DELETE FROM entity_mentions WHERE doc_id = ?`).run(docId);
|
|
323
|
+
|
|
324
|
+
// Decrement mention_count for each entity
|
|
325
|
+
for (const m of oldMentions) {
|
|
326
|
+
db.prepare(`
|
|
327
|
+
UPDATE entity_nodes SET mention_count = MAX(0, mention_count - 1) WHERE entity_id = ?
|
|
328
|
+
`).run(m.entity_id);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// Remove entity edges involving this doc
|
|
332
|
+
db.prepare(`
|
|
333
|
+
DELETE FROM memory_relations WHERE (source_id = ? OR target_id = ?) AND relation_type = 'entity'
|
|
334
|
+
`).run(docId, docId);
|
|
335
|
+
|
|
336
|
+
// Decrement co-occurrence counts for entity pairs from this doc
|
|
337
|
+
if (oldMentions.length >= 2) {
|
|
338
|
+
const ids = oldMentions.map(m => m.entity_id);
|
|
339
|
+
for (let i = 0; i < ids.length; i++) {
|
|
340
|
+
for (let j = i + 1; j < ids.length; j++) {
|
|
341
|
+
const sorted = [ids[i]!, ids[j]!].sort();
|
|
342
|
+
db.prepare(`
|
|
343
|
+
UPDATE entity_cooccurrences SET count = MAX(0, count - 1)
|
|
344
|
+
WHERE entity_a = ? AND entity_b = ?
|
|
345
|
+
`).run(sorted[0]!, sorted[1]!);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
// Clean up zero-count rows
|
|
349
|
+
db.prepare(`DELETE FROM entity_cooccurrences WHERE count <= 0`).run();
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
302
353
|
/**
|
|
303
354
|
* Full entity enrichment for a document:
|
|
304
|
-
* 1.
|
|
305
|
-
* 2.
|
|
306
|
-
* 3.
|
|
307
|
-
* 4.
|
|
355
|
+
* 1. Check enrichment state (skip if input unchanged)
|
|
356
|
+
* 2. Extract entities via LLM
|
|
357
|
+
* 3. Resolve each to canonical form
|
|
358
|
+
* 4. Record mentions + co-occurrences + entity edges
|
|
359
|
+
* 5. Persist enrichment state for idempotency
|
|
308
360
|
*
|
|
309
361
|
* @returns Number of entities resolved
|
|
310
362
|
*/
|
|
@@ -315,7 +367,7 @@ export async function enrichDocumentEntities(
|
|
|
315
367
|
vault: string = 'default'
|
|
316
368
|
): Promise<number> {
|
|
317
369
|
try {
|
|
318
|
-
// Get document content
|
|
370
|
+
// Get document content (snapshot for extraction)
|
|
319
371
|
const doc = db.prepare(`
|
|
320
372
|
SELECT d.title, c.doc as body
|
|
321
373
|
FROM documents d
|
|
@@ -328,14 +380,34 @@ export async function enrichDocumentEntities(
|
|
|
328
380
|
return 0;
|
|
329
381
|
}
|
|
330
382
|
|
|
331
|
-
//
|
|
383
|
+
// Compute extraction input hash (title + body — the actual LLM prompt input)
|
|
384
|
+
const inputHash = computeInputHash(doc.title, doc.body);
|
|
385
|
+
|
|
386
|
+
// Check enrichment state — skip if already enriched with same input
|
|
387
|
+
const existingState = db.prepare(
|
|
388
|
+
`SELECT input_hash FROM entity_enrichment_state WHERE doc_id = ?`
|
|
389
|
+
).get(docId) as { input_hash: string } | undefined;
|
|
390
|
+
|
|
391
|
+
if (existingState?.input_hash === inputHash) {
|
|
392
|
+
return 0; // Same input, already enriched — skip
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// Step 1: Extract entities via LLM
|
|
332
396
|
const entities = await extractEntities(llm, doc.title, doc.body);
|
|
333
|
-
|
|
334
|
-
|
|
397
|
+
|
|
398
|
+
// Recheck input hash before writing — abort if content changed during LLM call
|
|
399
|
+
const recheckHash = db.prepare(`
|
|
400
|
+
SELECT d.title, c.doc as body FROM documents d
|
|
401
|
+
JOIN content c ON c.hash = d.hash WHERE d.id = ? AND d.active = 1
|
|
402
|
+
`).get(docId) as { title: string; body: string } | null;
|
|
403
|
+
|
|
404
|
+
if (!recheckHash || computeInputHash(recheckHash.title, recheckHash.body) !== inputHash) {
|
|
405
|
+
console.log(`[entity] Document ${docId} changed during extraction — aborting`);
|
|
335
406
|
return 0;
|
|
336
407
|
}
|
|
337
408
|
|
|
338
|
-
// Step
|
|
409
|
+
// Step 3: Deduplicate entities by surface form, then resolve canonical IDs
|
|
410
|
+
// Done BEFORE transaction to avoid calling upsertEntity (which mutates counters) for dupes
|
|
339
411
|
const seenKeys = new Set<string>();
|
|
340
412
|
const uniqueEntities: ExtractedEntity[] = [];
|
|
341
413
|
for (const entity of entities) {
|
|
@@ -346,36 +418,87 @@ export async function enrichDocumentEntities(
|
|
|
346
418
|
}
|
|
347
419
|
}
|
|
348
420
|
|
|
349
|
-
|
|
421
|
+
// Resolve canonical IDs first (read-only lookups, no counter mutation yet)
|
|
422
|
+
const resolvedPairs: { entity: ExtractedEntity; canonicalId: string }[] = [];
|
|
423
|
+
const seenCanonicalIds = new Set<string>();
|
|
350
424
|
for (const entity of uniqueEntities) {
|
|
351
|
-
const
|
|
352
|
-
|
|
353
|
-
|
|
425
|
+
const canonicalId = resolveEntityCanonical(db, entity.name, entity.type, vault)
|
|
426
|
+
|| makeEntityId(entity.name, entity.type, vault);
|
|
427
|
+
if (!seenCanonicalIds.has(canonicalId)) {
|
|
428
|
+
seenCanonicalIds.add(canonicalId);
|
|
429
|
+
resolvedPairs.push({ entity, canonicalId });
|
|
430
|
+
}
|
|
354
431
|
}
|
|
355
432
|
|
|
356
|
-
//
|
|
357
|
-
|
|
433
|
+
// All writes in a transaction — partial failure rolls back cleanly
|
|
434
|
+
try {
|
|
435
|
+
db.exec("BEGIN");
|
|
436
|
+
|
|
437
|
+
// Re-check enrichment state inside transaction (prevents concurrent overcount)
|
|
438
|
+
const txState = db.prepare(
|
|
439
|
+
`SELECT input_hash FROM entity_enrichment_state WHERE doc_id = ?`
|
|
440
|
+
).get(docId) as { input_hash: string } | undefined;
|
|
358
441
|
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
442
|
+
if (txState?.input_hash === inputHash) {
|
|
443
|
+
db.exec("ROLLBACK");
|
|
444
|
+
return 0; // Another worker already committed this exact enrichment
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// Clear old derived state if re-enriching (content changed)
|
|
448
|
+
if (txState || existingState) {
|
|
449
|
+
clearDocEntityState(db, docId);
|
|
450
|
+
}
|
|
367
451
|
|
|
368
|
-
|
|
369
|
-
// Insert entity relation (unidirectional; graph traversal handles inbound for entity/semantic types)
|
|
452
|
+
if (entities.length === 0) {
|
|
370
453
|
db.prepare(`
|
|
371
|
-
INSERT OR
|
|
372
|
-
VALUES (?, ?,
|
|
373
|
-
`).run(docId,
|
|
454
|
+
INSERT OR REPLACE INTO entity_enrichment_state (doc_id, input_hash, enriched_at)
|
|
455
|
+
VALUES (?, ?, datetime('now'))
|
|
456
|
+
`).run(docId, inputHash);
|
|
457
|
+
db.exec("COMMIT");
|
|
458
|
+
console.log(`[entity] No entities found in docId ${docId}`);
|
|
459
|
+
return 0;
|
|
374
460
|
}
|
|
375
|
-
}
|
|
376
461
|
|
|
377
|
-
|
|
378
|
-
|
|
462
|
+
// Now mutate counters — one upsert per unique canonical ID (no inflation)
|
|
463
|
+
const resolvedIds: string[] = [];
|
|
464
|
+
for (const { entity, canonicalId } of resolvedPairs) {
|
|
465
|
+
const entityId = upsertEntity(db, entity.name, entity.type, vault);
|
|
466
|
+
resolvedIds.push(entityId);
|
|
467
|
+
recordEntityMention(db, entityId, docId, entity.name);
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
// Step 4: Track co-occurrences (deduplicated by canonical ID)
|
|
471
|
+
trackCoOccurrences(db, resolvedIds);
|
|
472
|
+
|
|
473
|
+
// Step 5: Create entity edges in memory_relations
|
|
474
|
+
for (const entityId of resolvedIds) {
|
|
475
|
+
const otherDocs = db.prepare(`
|
|
476
|
+
SELECT doc_id FROM entity_mentions
|
|
477
|
+
WHERE entity_id = ? AND doc_id != ?
|
|
478
|
+
LIMIT 10
|
|
479
|
+
`).all(entityId, docId) as { doc_id: number }[];
|
|
480
|
+
|
|
481
|
+
for (const other of otherDocs) {
|
|
482
|
+
db.prepare(`
|
|
483
|
+
INSERT OR IGNORE INTO memory_relations (source_id, target_id, relation_type, weight, metadata, created_at)
|
|
484
|
+
VALUES (?, ?, 'entity', 0.7, ?, datetime('now'))
|
|
485
|
+
`).run(docId, other.doc_id, JSON.stringify({ entity: entityId }));
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
// Persist enrichment state LAST — only after all derived data written
|
|
490
|
+
db.prepare(`
|
|
491
|
+
INSERT OR REPLACE INTO entity_enrichment_state (doc_id, input_hash, enriched_at)
|
|
492
|
+
VALUES (?, ?, datetime('now'))
|
|
493
|
+
`).run(docId, inputHash);
|
|
494
|
+
|
|
495
|
+
db.exec("COMMIT");
|
|
496
|
+
console.log(`[entity] Enriched docId ${docId}: ${resolvedIds.length} entities, ${entities.length} extracted`);
|
|
497
|
+
return resolvedIds.length;
|
|
498
|
+
} catch (txErr) {
|
|
499
|
+
try { db.exec("ROLLBACK"); } catch { /* already rolled back */ }
|
|
500
|
+
throw txErr; // re-throw to outer catch
|
|
501
|
+
}
|
|
379
502
|
} catch (err) {
|
|
380
503
|
console.log(`[entity] Error enriching docId ${docId}:`, err);
|
|
381
504
|
return 0;
|
package/src/store.ts
CHANGED
|
@@ -711,6 +711,16 @@ function initializeDatabase(db: Database): void {
|
|
|
711
711
|
// Entity FTS5 for fuzzy name lookup
|
|
712
712
|
db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS entities_fts USING fts5(entity_id, name, entity_type)`);
|
|
713
713
|
|
|
714
|
+
// Entity enrichment state: tracks what input was used for extraction (idempotent --enrich)
|
|
715
|
+
db.exec(`
|
|
716
|
+
CREATE TABLE IF NOT EXISTS entity_enrichment_state (
|
|
717
|
+
doc_id INTEGER PRIMARY KEY,
|
|
718
|
+
input_hash TEXT NOT NULL,
|
|
719
|
+
enriched_at TEXT NOT NULL,
|
|
720
|
+
FOREIGN KEY (doc_id) REFERENCES documents(id) ON DELETE CASCADE
|
|
721
|
+
)
|
|
722
|
+
`);
|
|
723
|
+
|
|
714
724
|
// 3-tier consolidation: observations synthesized from clusters of related facts
|
|
715
725
|
db.exec(`
|
|
716
726
|
CREATE TABLE IF NOT EXISTS consolidated_observations (
|