clawmem 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -250,7 +250,7 @@ ClawMem hooks handle ~90% of retrieval automatically. Agent-initiated MCP calls
250
250
  | `postcompact-inject` | SessionStart (compact) | 1200 tokens | re-injects authoritative context after compaction: precompact state (600) + recent decisions (400) + antipatterns (150) + vault context (200) → `<vault-postcompact>` |
251
251
  | `curator-nudge` | SessionStart | 200 tokens | surfaces curator report actions, nudges when report is stale (>7 days) |
252
252
  | `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions → writes `precompact-state.md` to auto-memory. Query-aware decision ranking. Reindexes auto-memory collection. |
253
- | `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions with prior decisions |
253
+ | `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions, extracts SPO triples from decision/preference/milestone/problem facts. Background consolidation worker synthesizes deductive observations from related facts (Phase 3, every ~15 min). |
254
254
  | `handoff-generator` | Stop | — | LLM summarizes session → `_clawmem/agent/handoffs/` |
255
255
  | `feedback-loop` | Stop | — | tracks referenced notes → boosts confidence, records usage relations + co-activations between co-referenced docs, tracks utility signals (surfaced vs referenced ratio for lifecycle automation) |
256
256
 
@@ -447,7 +447,7 @@ compositeScore = (0.10 × searchScore + 0.70 × recencyScore + 0.20 × confidenc
447
447
 
448
448
  | Content Type | Half-Life | Effect |
449
449
  |--------------|-----------|--------|
450
- | decision, preference, hub | ∞ | Never decay |
450
+ | decision, deductive, preference, hub | ∞ | Never decay |
451
451
  | antipattern | ∞ | Never decay — accumulated negative patterns persist |
452
452
  | project | 120 days | Slow decay |
453
453
  | research | 90 days | Moderate decay |
@@ -456,7 +456,7 @@ compositeScore = (0.10 × searchScore + 0.70 × recencyScore + 0.20 × confidenc
456
456
  | handoff | 30 days | Fast — recent matters most |
457
457
 
458
458
  Half-lives extend up to 3× for frequently-accessed memories (access reinforcement decays over 90 days).
459
- Attention decay: non-durable types (handoff, progress, conversation, note, project) lose 5% confidence per week without access. Decision/preference/hub/research/antipattern are exempt.
459
+ Attention decay: non-durable types (handoff, progress, conversation, note, project) lose 5% confidence per week without access. Decision/deductive/preference/hub/research/antipattern are exempt.
460
460
 
461
461
  ## Indexing & Graph Building
462
462
 
@@ -499,6 +499,7 @@ The `memory_relations` table is populated by multiple independent sources:
499
499
  | `buildSemanticGraph()` | semantic | `build_graphs` MCP tool (manual) | Pure cosine similarity. PK collision: `INSERT OR IGNORE` means A-MEM semantic edges take precedence if they exist first. |
500
500
  | Entity co-occurrence graph | entity | A-MEM enrichment (indexing) | LLM entity extraction → quality filters (title/length/blocklist/location validation) → type-agnostic canonical resolution within compatibility buckets (person, org, location, tech=project/service/tool/concept) → `entity_mentions` + `entity_cooccurrences` tables. Entity edges use IDF-based specificity scoring. Feeds ENTITY intent queries and MPFP `[entity, semantic]` patterns. |
501
501
  | `consolidated_observations` | supporting | Consolidation worker (background) | 3-tier consolidation: facts → observations → mental models. Observations track `proof_count`, `trend` (STABLE/STRENGTHENING/WEAKENING/STALE), and source links. |
502
+ | Deductive synthesis | supporting | Consolidation worker Phase 3 (background, every ~15 min) | Combines 2-3 related recent observations (decision/preference/milestone/problem, last 7 days) into `content_type='deductive'` documents with `source_doc_ids` provenance. First-class searchable docs with ∞ half-life. |
502
503
 
503
504
  **Edge collision:** Both `generateMemoryLinks()` and `buildSemanticGraph()` insert `relation_type='semantic'`. PK is `(source_id, target_id, relation_type)` — first writer wins.
504
505
 
package/CLAUDE.md CHANGED
@@ -250,7 +250,7 @@ ClawMem hooks handle ~90% of retrieval automatically. Agent-initiated MCP calls
250
250
  | `postcompact-inject` | SessionStart (compact) | 1200 tokens | re-injects authoritative context after compaction: precompact state (600) + recent decisions (400) + antipatterns (150) + vault context (200) → `<vault-postcompact>` |
251
251
  | `curator-nudge` | SessionStart | 200 tokens | surfaces curator report actions, nudges when report is stale (>7 days) |
252
252
  | `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions → writes `precompact-state.md` to auto-memory. Query-aware decision ranking. Reindexes auto-memory collection. |
253
- | `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions with prior decisions |
253
+ | `decision-extractor` | Stop | — | LLM extracts observations → `_clawmem/agent/observations/`, infers causal links, detects contradictions, extracts SPO triples from decision/preference/milestone/problem facts. Background consolidation worker synthesizes deductive observations from related facts (Phase 3, every ~15 min). |
254
254
  | `handoff-generator` | Stop | — | LLM summarizes session → `_clawmem/agent/handoffs/` |
255
255
  | `feedback-loop` | Stop | — | tracks referenced notes → boosts confidence, records usage relations + co-activations between co-referenced docs, tracks utility signals (surfaced vs referenced ratio for lifecycle automation) |
256
256
 
@@ -447,7 +447,7 @@ compositeScore = (0.10 × searchScore + 0.70 × recencyScore + 0.20 × confidenc
447
447
 
448
448
  | Content Type | Half-Life | Effect |
449
449
  |--------------|-----------|--------|
450
- | decision, preference, hub | ∞ | Never decay |
450
+ | decision, deductive, preference, hub | ∞ | Never decay |
451
451
  | antipattern | ∞ | Never decay — accumulated negative patterns persist |
452
452
  | project | 120 days | Slow decay |
453
453
  | research | 90 days | Moderate decay |
@@ -456,7 +456,7 @@ compositeScore = (0.10 × searchScore + 0.70 × recencyScore + 0.20 × confidenc
456
456
  | handoff | 30 days | Fast — recent matters most |
457
457
 
458
458
  Half-lives extend up to 3× for frequently-accessed memories (access reinforcement decays over 90 days).
459
- Attention decay: non-durable types (handoff, progress, conversation, note, project) lose 5% confidence per week without access. Decision/preference/hub/research/antipattern are exempt.
459
+ Attention decay: non-durable types (handoff, progress, conversation, note, project) lose 5% confidence per week without access. Decision/deductive/preference/hub/research/antipattern are exempt.
460
460
 
461
461
  ## Indexing & Graph Building
462
462
 
@@ -499,6 +499,7 @@ The `memory_relations` table is populated by multiple independent sources:
499
499
  | `buildSemanticGraph()` | semantic | `build_graphs` MCP tool (manual) | Pure cosine similarity. PK collision: `INSERT OR IGNORE` means A-MEM semantic edges take precedence if they exist first. |
500
500
  | Entity co-occurrence graph | entity | A-MEM enrichment (indexing) | LLM entity extraction → quality filters (title/length/blocklist/location validation) → type-agnostic canonical resolution within compatibility buckets (person, org, location, tech=project/service/tool/concept) → `entity_mentions` + `entity_cooccurrences` tables. Entity edges use IDF-based specificity scoring. Feeds ENTITY intent queries and MPFP `[entity, semantic]` patterns. |
501
501
  | `consolidated_observations` | supporting | Consolidation worker (background) | 3-tier consolidation: facts → observations → mental models. Observations track `proof_count`, `trend` (STABLE/STRENGTHENING/WEAKENING/STALE), and source links. |
502
+ | Deductive synthesis | supporting | Consolidation worker Phase 3 (background, every ~15 min) | Combines 2-3 related recent observations (decision/preference/milestone/problem, last 7 days) into `content_type='deductive'` documents with `source_doc_ids` provenance. First-class searchable docs with ∞ half-life. |
502
503
 
503
504
  **Edge collision:** Both `generateMemoryLinks()` and `buildSemanticGraph()` insert `relation_type='semantic'`. PK is `(source_id, target_id, relation_type)` — first writer wins.
504
505
 
package/README.md CHANGED
@@ -823,6 +823,7 @@ For WHY and ENTITY queries, the search pipeline expands results through the memo
823
823
  | Type | Half-life | Baseline | Notes |
824
824
  |---|---|---|---|
825
825
  | `decision` | ∞ | 0.85 | Never decays |
826
+ | `deductive` | ∞ | 0.85 | Never decays — cross-session derived insights with source provenance |
826
827
  | `preference` | ∞ | 0.80 | Never decays — user preferences are durable facts |
827
828
  | `hub` | ∞ | 0.80 | Never decays |
828
829
  | `antipattern` | ∞ | 0.75 | Never decays — accumulated negative patterns persist |
@@ -835,7 +836,7 @@ For WHY and ENTITY queries, the search pipeline expands results through the memo
835
836
  | `progress` | 45 days | 0.50 | |
836
837
  | `note` | 60 days | 0.50 | Default |
837
838
 
838
- Content types are inferred from frontmatter or file path patterns. Half-lives extend up to 3× for frequently-accessed memories (access reinforcement, decays over 90 days). Non-durable types (handoff, progress, conversation, note, project) lose 5% confidence per week without access (attention decay). Decision/preference/hub/research/antipattern are exempt.
839
+ Content types are inferred from frontmatter or file path patterns. Half-lives extend up to 3× for frequently-accessed memories (access reinforcement, decays over 90 days). Non-durable types (handoff, progress, conversation, note, project) lose 5% confidence per week without access (attention decay). Decision/deductive/preference/hub/research/antipattern are exempt.
839
840
 
840
841
  **Quality scoring:** Each document gets a `quality_score` (0.0–1.0) computed during indexing based on length, structure (headings, lists), decision/correction keywords, and frontmatter richness. Applied as `qualityMultiplier = 0.7 + 0.6 × qualityScore` (range: 0.7× penalty to 1.3× boost).
841
842
 
@@ -1119,6 +1120,7 @@ Built on the shoulders of:
1119
1120
  - [Engram](https://github.com/Gentleman-Programming/engram) — observation dedup window, topic-key upsert pattern, temporal timeline navigation, duplicate metadata scoring signals
1120
1121
  - [Hermes Agent](https://github.com/NousResearch/hermes-agent) — MemoryProvider plugin integration, memory nudge system (periodic lifecycle tool prompting)
1121
1122
  - [Hindsight](https://github.com/vectorize-io/hindsight) — entity resolution, MPFP graph traversal, temporal extraction, 3-tier consolidation, observation invalidation, 4-way parallel retrieval
1123
+ - [Honcho](https://github.com/plastic-labs/honcho) — deductive observation synthesis patterns, surprisal-based anomaly scoring concept, embed-state self-healing, retrieval separation (raw vs derived)
1122
1124
  - [MAGMA](https://arxiv.org/abs/2501.13956) — multi-graph memory agent
1123
1125
  - [MemPalace](https://github.com/milla-jovovich/mempalace) — conversation import patterns, broadened observation taxonomy (preference/milestone/problem), session-bootstrap synthesis
1124
1126
  - [memory-lancedb-pro](https://github.com/CortexReach/memory-lancedb-pro) — retrieval gate, length normalization, MMR diversity, access reinforcement algorithms
package/SKILL.md CHANGED
@@ -451,7 +451,7 @@ compositeScore = (0.10 x searchScore + 0.70 x recencyScore + 0.20 x confidenceSc
451
451
 
452
452
  | Content Type | Half-Life | Effect |
453
453
  |--------------|-----------|--------|
454
- | decision, preference, hub | infinity | Never decay |
454
+ | decision, deductive, preference, hub | infinity | Never decay |
455
455
  | antipattern | infinity | Never decay — accumulated negative patterns persist |
456
456
  | project | 120 days | Slow decay |
457
457
  | research | 90 days | Moderate decay |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmem",
3
- "version": "0.5.1",
3
+ "version": "0.6.0",
4
4
  "description": "On-device context engine and memory for AI agents. Claude Code and OpenClaw. Hooks + MCP server + hybrid RAG search.",
5
5
  "type": "module",
6
6
  "bin": {
package/src/clawmem.ts CHANGED
@@ -410,6 +410,9 @@ async function cmdEmbed(args: string[]) {
410
410
 
411
411
  const fragments = splitDocument(body, frontmatter);
412
412
  const docStart = Date.now();
413
+ const prevTotalFragments = totalFragments;
414
+ const prevFailedFragments = failedFragments;
415
+ let seq0Succeeded = false;
413
416
  console.error(` [${docIdx + 1}/${hashes.length}] ${basename(path)} (${fragments.length} frags, ${body.length} chars)`);
414
417
 
415
418
  if (isCloudEmbed) {
@@ -463,6 +466,7 @@ async function cmdEmbed(args: string[]) {
463
466
  result.model, new Date().toISOString(), frag.type, frag.label ?? undefined, canId
464
467
  );
465
468
  totalFragments++;
469
+ if (seq === 0) seq0Succeeded = true;
466
470
  } else {
467
471
  failedFragments++;
468
472
  }
@@ -491,6 +495,7 @@ async function cmdEmbed(args: string[]) {
491
495
  result.model, new Date().toISOString(), frag.type, frag.label ?? undefined, canId
492
496
  );
493
497
  totalFragments++;
498
+ if (seq === 0) seq0Succeeded = true;
494
499
  if (seq === 0 || (seq + 1) % 5 === 0 || seq === fragments.length - 1) {
495
500
  console.error(` frag ${seq + 1}/${fragments.length} (${frag.type}) ${fragMs}ms [${text.length} chars]`);
496
501
  }
@@ -505,6 +510,18 @@ async function cmdEmbed(args: string[]) {
505
510
  }
506
511
  }
507
512
 
513
+ // Track embed state per document — seq=0 (primary) must succeed for synced status
514
+ const docFragsOk = totalFragments - prevTotalFragments;
515
+ const docFragsFail = failedFragments - prevFailedFragments;
516
+ if (seq0Succeeded) {
517
+ s.markEmbedSynced(hash);
518
+ } else if (docFragsOk === 0 && docFragsFail > 0) {
519
+ s.markEmbedFailed(hash, "all fragments failed");
520
+ } else {
521
+ // seq=0 failed but some later fragments succeeded — mark failed so seq=0 gets retried
522
+ s.markEmbedFailed(hash, "primary fragment (seq=0) failed");
523
+ }
524
+
508
525
  embedded++;
509
526
  const docMs = Date.now() - docStart;
510
527
  const elapsed = ((Date.now() - batchStart) / 1000).toFixed(0);
@@ -1,17 +1,21 @@
1
1
  /**
2
2
  * ClawMem Consolidation Worker
3
3
  *
4
- * Two-phase background worker:
4
+ * Three-phase background worker:
5
5
  * 1. A-MEM backfill: enriches documents missing memory notes
6
6
  * 2. 3-tier consolidation: synthesizes clusters of related observations
7
7
  * into higher-order consolidated observations with proof counts and trends
8
+ * 3. Deductive synthesis: combines related recent observations into
9
+ * first-class deductive documents with source provenance
8
10
  *
9
11
  * Pattern H from ENHANCEMENT-PLAN.md (source: Hindsight consolidator.py)
12
+ * Deductive synthesis inspired by Honcho's Dreamer deduction specialist.
10
13
  */
11
14
 
12
15
  import type { Store } from "./store.ts";
13
16
  import type { LlamaCpp } from "./llm.ts";
14
17
  import { extractJsonFromLLM } from "./amem.ts";
18
+ import { hashContent } from "./indexer.ts";
15
19
 
16
20
  // =============================================================================
17
21
  // Types
@@ -115,6 +119,11 @@ async function tick(store: Store, llm: LlamaCpp): Promise<void> {
115
119
  if (tickCount % 6 === 0) {
116
120
  await consolidateObservations(store, llm);
117
121
  }
122
+
123
+ // Phase 3: Deductive synthesis (every 3rd tick, ~15 min at default interval)
124
+ if (tickCount % 3 === 0) {
125
+ await generateDeductiveObservations(store, llm);
126
+ }
118
127
  } catch (err) {
119
128
  console.error("[consolidation] Tick failed:", err);
120
129
  } finally {
@@ -375,6 +384,308 @@ function updateTrends(store: Store): void {
375
384
  }
376
385
  }
377
386
 
387
+ // =============================================================================
388
+ // Phase 3: Deductive Observation Synthesis
389
+ // =============================================================================
390
+
391
+ /**
392
+ * Find pairs/groups of recent high-confidence observations that can be combined
393
+ * into higher-level deductive conclusions. Creates first-class documents with
394
+ * content_type='deductive' and source_doc_ids provenance.
395
+ *
396
+ * Only considers decision/preference/milestone/problem observations from the
397
+ * last 7 days that haven't already been used as sources for deductions.
398
+ */
399
+ async function generateDeductiveObservations(store: Store, llm: LlamaCpp): Promise<number> {
400
+ // Find recent high-value observations not yet used in deductions
401
+ const DEDUCTIVE_TYPES = ['decision', 'preference', 'milestone', 'problem'];
402
+ const recentObs = store.db.prepare(`
403
+ SELECT d.id, d.title, d.facts, d.narrative, d.observation_type, d.content_type,
404
+ d.collection, d.path, d.modified_at
405
+ FROM documents d
406
+ WHERE d.active = 1
407
+ AND d.content_type IN (${DEDUCTIVE_TYPES.map(() => '?').join(',')})
408
+ AND d.observation_type IS NOT NULL
409
+ AND d.facts IS NOT NULL
410
+ AND d.modified_at >= datetime('now', '-7 days')
411
+ AND d.id NOT IN (
412
+ SELECT value FROM (
413
+ SELECT json_each.value as value
414
+ FROM documents dd, json_each(dd.source_doc_ids)
415
+ WHERE dd.content_type = 'deductive' AND dd.active = 1
416
+ )
417
+ )
418
+ ORDER BY d.modified_at DESC
419
+ LIMIT 20
420
+ `).all(...DEDUCTIVE_TYPES) as {
421
+ id: number; title: string; facts: string; narrative: string;
422
+ observation_type: string; content_type: string; collection: string;
423
+ path: string; modified_at: string;
424
+ }[];
425
+
426
+ if (recentObs.length < 2) return 0;
427
+
428
+ // Build context for LLM
429
+ const obsText = recentObs.map((o, i) =>
430
+ `[${i + 1}] (${o.content_type}/${o.observation_type}) "${o.title}"\n Facts: ${(o.facts || '').slice(0, 300)}\n Narrative: ${(o.narrative || '').slice(0, 200)}`
431
+ ).join('\n\n');
432
+
433
+ const prompt = `You are analyzing recent observations from a developer's work sessions. Find logical deductions that can be drawn by combining 2-3 observations.
434
+
435
+ A deduction combines facts from different observations into a NEW conclusion that isn't stated in any single observation alone.
436
+
437
+ Observations:
438
+ ${obsText}
439
+
440
+ For each valid deduction:
441
+ 1. State the conclusion clearly (1-2 sentences)
442
+ 2. List the premises (which observations support it)
443
+ 3. List the source indices (1-indexed)
444
+
445
+ Return ONLY valid JSON array:
446
+ [
447
+ {
448
+ "conclusion": "Clear deductive statement",
449
+ "premises": ["Premise from obs 1", "Premise from obs 3"],
450
+ "source_indices": [1, 3]
451
+ }
452
+ ]
453
+
454
+ Rules:
455
+ - Each deduction MUST combine 2+ different observations (not restate a single one)
456
+ - Only include conclusions with genuine logical basis
457
+ - Maximum 3 deductions
458
+ - If no valid deductions exist, return []
459
+ Return ONLY the JSON array. /no_think`;
460
+
461
+ const result = await llm.generate(prompt, { temperature: 0.3, maxTokens: 500 });
462
+ if (!result?.text) return 0;
463
+
464
+ const parsed = extractJsonFromLLM(result.text) as Array<{
465
+ conclusion: string;
466
+ premises: string[];
467
+ source_indices: number[];
468
+ }> | null;
469
+
470
+ if (!Array.isArray(parsed)) return 0;
471
+
472
+ let created = 0;
473
+ const timestamp = new Date().toISOString();
474
+ const dateStr = timestamp.slice(0, 10);
475
+
476
+ for (const deduction of parsed) {
477
+ if (!deduction.conclusion || !Array.isArray(deduction.source_indices) || deduction.source_indices.length < 2) continue;
478
+
479
+ const sourceDocIds = deduction.source_indices
480
+ .filter(i => i >= 1 && i <= recentObs.length)
481
+ .map(i => recentObs[i - 1]!.id);
482
+
483
+ if (sourceDocIds.length < 2) continue;
484
+
485
+ // Check for duplicate deduction (Jaccard on conclusion text)
486
+ const existingDedups = store.db.prepare(`
487
+ SELECT id, title FROM documents
488
+ WHERE content_type = 'deductive' AND active = 1
489
+ ORDER BY created_at DESC LIMIT 20
490
+ `).all() as { id: number; title: string }[];
491
+
492
+ const conclusionWords = new Set(deduction.conclusion.toLowerCase().split(/\s+/).filter(w => w.length > 3));
493
+ const isDuplicate = existingDedups.some(d => {
494
+ const titleWords = new Set(d.title.toLowerCase().split(/\s+/).filter(w => w.length > 3));
495
+ const intersection = [...conclusionWords].filter(w => titleWords.has(w)).length;
496
+ const union = new Set([...conclusionWords, ...titleWords]).size;
497
+ return union > 0 && intersection / union > 0.5;
498
+ });
499
+
500
+ if (isDuplicate) continue;
501
+
502
+ // Build the deductive document
503
+ const premisesText = (deduction.premises || []).map(p => `- ${p}`).join('\n');
504
+ const sourceRefs = sourceDocIds.map(id => {
505
+ const obs = recentObs.find(o => o.id === id);
506
+ return obs ? `- "${obs.title}" (${obs.content_type})` : `- doc#${id}`;
507
+ }).join('\n');
508
+
509
+ const body = [
510
+ `---`,
511
+ `content_type: deductive`,
512
+ `tags: [auto-deduced, consolidation]`,
513
+ `---`,
514
+ ``,
515
+ `# ${deduction.conclusion.slice(0, 80)}`,
516
+ ``,
517
+ deduction.conclusion,
518
+ ``,
519
+ `## Premises`,
520
+ ``,
521
+ premisesText,
522
+ ``,
523
+ `## Sources`,
524
+ ``,
525
+ sourceRefs,
526
+ ``,
527
+ ].join('\n');
528
+
529
+ const dedPath = `deductions/${dateStr}-${sourceDocIds.join('-')}.md`;
530
+ const hash = hashContent(body);
531
+
532
+ try {
533
+ store.insertContent(hash, body, timestamp);
534
+ store.insertDocument("_clawmem", dedPath, deduction.conclusion.slice(0, 80), hash, timestamp, timestamp);
535
+
536
+ const doc = store.findActiveDocument("_clawmem", dedPath);
537
+ if (doc) {
538
+ store.updateDocumentMeta(doc.id, {
539
+ content_type: "deductive",
540
+ confidence: 0.85,
541
+ });
542
+ store.updateObservationFields(dedPath, "_clawmem", {
543
+ observation_type: "deductive",
544
+ facts: JSON.stringify(deduction.premises || []),
545
+ narrative: deduction.conclusion,
546
+ });
547
+ // Store source provenance
548
+ store.db.prepare(`UPDATE documents SET source_doc_ids = ? WHERE id = ?`)
549
+ .run(JSON.stringify(sourceDocIds), doc.id);
550
+
551
+ // Create supporting edges in memory_relations
552
+ for (const sourceId of sourceDocIds) {
553
+ try {
554
+ store.db.prepare(`
555
+ INSERT OR IGNORE INTO memory_relations (source_id, target_id, relation_type, weight, created_at)
556
+ VALUES (?, ?, 'supporting', 0.85, datetime('now'))
557
+ `).run(sourceId, doc.id);
558
+ } catch { /* non-fatal */ }
559
+ }
560
+
561
+ created++;
562
+ console.log(`[deductive] Created: "${deduction.conclusion.slice(0, 60)}..." from ${sourceDocIds.length} sources`);
563
+ }
564
+ } catch (err) {
565
+ console.error(`[deductive] Failed to create deduction:`, err);
566
+ }
567
+ }
568
+
569
+ return created;
570
+ }
571
+
572
+ /**
573
+ * Manually trigger deductive synthesis (for CLI or MCP tool).
574
+ */
575
+ export async function runDeductiveSynthesis(
576
+ store: Store,
577
+ llm: LlamaCpp,
578
+ ): Promise<{ created: number }> {
579
+ const created = await generateDeductiveObservations(store, llm);
580
+ return { created };
581
+ }
582
+
583
+ // =============================================================================
584
+ // Surprisal Scoring (k-NN density anomaly detection)
585
+ // =============================================================================
586
+
587
+ export interface SurprisalResult {
588
+ docId: number;
589
+ title: string;
590
+ path: string;
591
+ collection: string;
592
+ contentType: string;
593
+ avgNeighborDistance: number; // higher = more anomalous
594
+ neighborCount: number;
595
+ }
596
+
597
+ /**
598
+ * Compute surprisal scores for observation documents using k-NN average
599
+ * neighbor distance in embedding space. High-surprisal observations are
600
+ * anomalous — they don't fit existing patterns and deserve curator attention.
601
+ *
602
+ * Uses sqlite-vec's built-in KNN query (vec0 virtual table) for efficiency.
603
+ * Only scores documents that have embeddings (content_vectors + vectors_vec).
604
+ */
605
+ export function computeSurprisalScores(
606
+ store: Store,
607
+ options?: { collection?: string; limit?: number; k?: number; minScore?: number }
608
+ ): SurprisalResult[] {
609
+ const k = options?.k ?? 5;
610
+ const limit = options?.limit ?? 20;
611
+ const minScore = options?.minScore ?? 0;
612
+
613
+ // Get observation documents with embeddings (seq=0 = primary fragment)
614
+ let sql = `
615
+ SELECT d.id, d.title, d.path, d.collection, d.content_type,
616
+ cv.hash || '_0' as hash_seq
617
+ FROM documents d
618
+ JOIN content_vectors cv ON d.hash = cv.hash AND cv.seq = 0
619
+ WHERE d.active = 1
620
+ AND d.observation_type IS NOT NULL
621
+ `;
622
+ const params: any[] = [];
623
+ if (options?.collection) {
624
+ sql += ` AND d.collection = ?`;
625
+ params.push(options.collection);
626
+ }
627
+ sql += ` ORDER BY d.modified_at DESC LIMIT 100`;
628
+
629
+ const docs = store.db.prepare(sql).all(...params) as {
630
+ id: number; title: string; path: string; collection: string;
631
+ content_type: string; hash_seq: string;
632
+ }[];
633
+
634
+ if (docs.length < k + 1) return []; // Not enough docs for meaningful k-NN
635
+
636
+ // For each doc, query its k nearest neighbors and compute average distance
637
+ const results: SurprisalResult[] = [];
638
+
639
+ // Check if vectors_vec exists
640
+ const vecTable = store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
641
+ if (!vecTable) return [];
642
+
643
+ for (const doc of docs) {
644
+ try {
645
+ // Get this doc's embedding from vectors_vec
646
+ const vecRow = store.db.prepare(
647
+ `SELECT embedding FROM vectors_vec WHERE hash_seq = ?`
648
+ ).get(doc.hash_seq) as { embedding: Float32Array | number[] } | null;
649
+
650
+ if (!vecRow?.embedding) continue;
651
+
652
+ // Query k+1 nearest neighbors (first result is the doc itself)
653
+ const neighbors = store.db.prepare(`
654
+ SELECT distance
655
+ FROM vectors_vec
656
+ WHERE embedding MATCH ?
657
+ ORDER BY distance
658
+ LIMIT ?
659
+ `).all(vecRow.embedding, k + 1) as { distance: number }[];
660
+
661
+ // Skip the first result (self, distance ≈ 0) and compute average
662
+ const nonSelf = neighbors.filter(n => n.distance > 0.001);
663
+ if (nonSelf.length === 0) continue;
664
+
665
+ const avgDist = nonSelf.reduce((sum, n) => sum + n.distance, 0) / nonSelf.length;
666
+
667
+ if (avgDist >= minScore) {
668
+ results.push({
669
+ docId: doc.id,
670
+ title: doc.title,
671
+ path: doc.path,
672
+ collection: doc.collection,
673
+ contentType: doc.content_type,
674
+ avgNeighborDistance: avgDist,
675
+ neighborCount: nonSelf.length,
676
+ });
677
+ }
678
+ } catch {
679
+ // Skip docs that fail vector lookup (missing embedding, dimension mismatch)
680
+ continue;
681
+ }
682
+ }
683
+
684
+ // Sort by surprisal (highest first) and limit
685
+ results.sort((a, b) => b.avgNeighborDistance - a.avgNeighborDistance);
686
+ return results.slice(0, limit);
687
+ }
688
+
378
689
  // =============================================================================
379
690
  // Public API for MCP / CLI
380
691
  // =============================================================================
@@ -260,10 +260,11 @@ function getCurrentFocus(
260
260
  cutoff.setDate(cutoff.getDate() - DECISION_LOOKBACK_DAYS);
261
261
  const cutoffStr = cutoff.toISOString();
262
262
 
263
- // Gather recent decisions, preferences, and active problems
263
+ // Gather recent decisions, preferences, active problems, and deductive insights
264
264
  const decisions = store.getDocumentsByType("decision", 10);
265
265
  const preferences = store.getDocumentsByType("preference", 5);
266
266
  const problems = store.getDocumentsByType("problem", 5);
267
+ const deductions = store.getDocumentsByType("deductive", 5);
267
268
 
268
269
  // Rank by: pinned first, then recency, then access_count
269
270
  const now = Date.now();
@@ -285,7 +286,11 @@ function getCurrentFocus(
285
286
  // Preferences are durable — no date filter, just rank
286
287
  const rankedPrefs = [...preferences].sort((a, b) => rankDoc(b) - rankDoc(a));
287
288
 
288
- if (recentDecisions.length === 0 && rankedPrefs.length === 0 && activeProblems.length === 0) {
289
+ const recentDeductions = deductions
290
+ .filter(d => d.modifiedAt >= cutoffStr)
291
+ .sort((a, b) => rankDoc(b) - rankDoc(a));
292
+
293
+ if (recentDecisions.length === 0 && rankedPrefs.length === 0 && activeProblems.length === 0 && recentDeductions.length === 0) {
289
294
  return null;
290
295
  }
291
296
 
@@ -338,6 +343,19 @@ function getCurrentFocus(
338
343
  }
339
344
  }
340
345
 
346
+ // Cross-session deductions (derived insights with source provenance)
347
+ if (recentDeductions.length > 0) {
348
+ lines.push("**Derived Insights:**");
349
+ charCount += 24;
350
+ for (const d of recentDeductions) {
351
+ if (charCount >= maxChars) break;
352
+ const entry = `- ${d.title} (${d.modifiedAt.slice(0, 10)})`;
353
+ lines.push(entry);
354
+ paths.push(`${d.collection}/${d.path}`);
355
+ charCount += entry.length + 2;
356
+ }
357
+ }
358
+
341
359
  return lines.length > 1 ? { text: lines.join("\n"), paths } : null;
342
360
  }
343
361
 
package/src/memory.ts CHANGED
@@ -20,6 +20,7 @@ export const HALF_LIVES: Record<string, number> = {
20
20
  project: 120,
21
21
  preference: Infinity,
22
22
  decision: Infinity,
23
+ deductive: Infinity,
23
24
  hub: Infinity,
24
25
  };
25
26
 
@@ -29,6 +30,7 @@ export const HALF_LIVES: Record<string, number> = {
29
30
 
30
31
  export const TYPE_BASELINES: Record<string, number> = {
31
32
  decision: 0.85,
33
+ deductive: 0.85,
32
34
  preference: 0.80,
33
35
  hub: 0.80,
34
36
  problem: 0.75,
@@ -45,7 +47,7 @@ export const TYPE_BASELINES: Record<string, number> = {
45
47
  // Content Type Inference
46
48
  // =============================================================================
47
49
 
48
- export type ContentType = "decision" | "preference" | "hub" | "research" | "project" | "handoff" | "conversation" | "progress" | "milestone" | "problem" | "note";
50
+ export type ContentType = "decision" | "deductive" | "preference" | "hub" | "research" | "project" | "handoff" | "conversation" | "progress" | "milestone" | "problem" | "note";
49
51
 
50
52
  export function inferContentType(path: string, explicitType?: string): ContentType {
51
53
  if (explicitType && explicitType in TYPE_BASELINES) return explicitType as ContentType;
@@ -75,7 +77,7 @@ export type MemoryType = "episodic" | "semantic" | "procedural";
75
77
  */
76
78
  export function inferMemoryType(path: string, contentType: string, body?: string): MemoryType {
77
79
  if (["handoff", "progress", "conversation"].includes(contentType)) return "episodic";
78
- if (["decision", "hub", "research"].includes(contentType)) return "semantic";
80
+ if (["decision", "deductive", "hub", "research"].includes(contentType)) return "semantic";
79
81
  if (body && /\b(step\s+\d|workflow|recipe|how\s+to|procedure|runbook|playbook)\b/i.test(body)) return "procedural";
80
82
  if (path.includes("sop") || path.includes("runbook") || path.includes("playbook")) return "procedural";
81
83
  if (contentType === "antipattern") return "semantic";
@@ -150,7 +152,7 @@ export function confidenceScore(
150
152
  // Attention decay: reduce confidence if not accessed recently (5% per week)
151
153
  // Only apply to episodic/progress content — skip for durable types (decision, hub, research)
152
154
  // Also skip if last_accessed_at was backfilled from modified_at (no real access yet)
153
- const DECAY_EXEMPT_TYPES = new Set(["decision", "hub", "research", "antipattern", "preference"]);
155
+ const DECAY_EXEMPT_TYPES = new Set(["decision", "deductive", "hub", "research", "antipattern", "preference"]);
154
156
  let attentionDecay = 1.0;
155
157
  if (lastAccessedAt && !DECAY_EXEMPT_TYPES.has(contentType)) {
156
158
  const lastAccess = typeof lastAccessedAt === "string" ? new Date(lastAccessedAt) : lastAccessedAt;
package/src/store.ts CHANGED
@@ -544,6 +544,10 @@ function initializeDatabase(db: Database): void {
544
544
  ["skill_name", "ALTER TABLE documents ADD COLUMN skill_name TEXT"],
545
545
  ["obs_quality_score", "ALTER TABLE documents ADD COLUMN obs_quality_score REAL"],
546
546
  ["failure_reason", "ALTER TABLE documents ADD COLUMN failure_reason TEXT"],
547
+ ["source_doc_ids", "ALTER TABLE documents ADD COLUMN source_doc_ids TEXT"],
548
+ ["embed_state", "ALTER TABLE documents ADD COLUMN embed_state TEXT DEFAULT 'pending'"],
549
+ ["embed_error", "ALTER TABLE documents ADD COLUMN embed_error TEXT"],
550
+ ["embed_attempts", "ALTER TABLE documents ADD COLUMN embed_attempts INTEGER DEFAULT 0"],
547
551
  ];
548
552
  for (const [col, sql] of obsMigrations) {
549
553
  if (!colNames.has(col)) {
@@ -906,6 +910,11 @@ export type Store = {
906
910
  pinDocument: (collection: string, path: string, pinned: boolean) => void;
907
911
  snoozeDocument: (collection: string, path: string, until: string | null) => void;
908
912
 
913
+ // Embed state tracking
914
+ markEmbedSynced: (hash: string) => void;
915
+ markEmbedFailed: (hash: string, error: string) => void;
916
+ getEmbedStats: () => { pending: number; synced: number; failed: number };
917
+
909
918
  // Beads integration
910
919
  syncBeadsIssues: (projectDir: string) => Promise<{ synced: number; created: number; newDocIds: number[] }>;
911
920
  detectBeadsProject: (cwd: string) => string | null;
@@ -1078,6 +1087,24 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
1078
1087
  pinDocument: (collection: string, path: string, pinned: boolean) => pinDocumentFn(db, collection, path, pinned),
1079
1088
  snoozeDocument: (collection: string, path: string, until: string | null) => snoozeDocumentFn(db, collection, path, until),
1080
1089
 
1090
+ // Embed state tracking
1091
+ markEmbedSynced: (hash: string) => {
1092
+ db.prepare(`UPDATE documents SET embed_state = 'synced' WHERE hash = ? AND active = 1`).run(hash);
1093
+ },
1094
+ markEmbedFailed: (hash: string, error: string) => {
1095
+ db.prepare(`UPDATE documents SET embed_state = 'failed', embed_error = ?, embed_attempts = COALESCE(embed_attempts, 0) + 1 WHERE hash = ? AND active = 1`).run(error, hash);
1096
+ },
1097
+ getEmbedStats: () => {
1098
+ const stats = db.prepare(`
1099
+ SELECT
1100
+ SUM(CASE WHEN embed_state = 'pending' OR embed_state IS NULL THEN 1 ELSE 0 END) as pending,
1101
+ SUM(CASE WHEN embed_state = 'synced' THEN 1 ELSE 0 END) as synced,
1102
+ SUM(CASE WHEN embed_state = 'failed' THEN 1 ELSE 0 END) as failed
1103
+ FROM documents WHERE active = 1
1104
+ `).get() as { pending: number; synced: number; failed: number };
1105
+ return { pending: stats.pending || 0, synced: stats.synced || 0, failed: stats.failed || 0 };
1106
+ },
1107
+
1081
1108
  // Beads integration
1082
1109
  syncBeadsIssues: (projectDir: string) => syncBeadsIssues(db, projectDir),
1083
1110
  detectBeadsProject,
@@ -2924,12 +2951,17 @@ export function getHashesForEmbedding(db: Database): { hash: string; body: strin
2924
2951
  * Returns hashes that have no content_vectors row with fragment_type set.
2925
2952
  */
2926
2953
  export function getHashesNeedingFragments(db: Database): { hash: string; body: string; path: string; title: string; collection: string }[] {
2954
+ // Select docs that either have no fragments at all OR are missing the primary (seq=0) fragment.
2955
+ // The seq=0 embedding is critical — surprisal scoring, semantic graph, and health checks depend on it.
2927
2956
  return db.prepare(`
2928
2957
  SELECT d.hash, c.doc as body, MIN(d.path) as path, MIN(d.title) as title, MIN(d.collection) as collection
2929
2958
  FROM documents d
2930
2959
  JOIN content c ON d.hash = c.hash
2931
2960
  LEFT JOIN content_vectors v ON d.hash = v.hash AND v.fragment_type IS NOT NULL
2932
- WHERE d.active = 1 AND v.hash IS NULL
2961
+ LEFT JOIN content_vectors v0 ON d.hash = v0.hash AND v0.seq = 0
2962
+ WHERE d.active = 1
2963
+ AND (v.hash IS NULL OR v0.hash IS NULL)
2964
+ AND COALESCE(d.embed_attempts, 0) < 3
2933
2965
  GROUP BY d.hash
2934
2966
  `).all() as { hash: string; body: string; path: string; title: string; collection: string }[];
2935
2967
  }
@@ -2941,6 +2973,8 @@ export function getHashesNeedingFragments(db: Database): { hash: string; body: s
2941
2973
  export function clearAllEmbeddings(db: Database): void {
2942
2974
  db.exec(`DELETE FROM content_vectors`);
2943
2975
  db.exec(`DROP TABLE IF EXISTS vectors_vec`);
2976
+ // Reset embed state so failed docs get retried after force re-embed
2977
+ try { db.exec(`UPDATE documents SET embed_state = 'pending', embed_error = NULL, embed_attempts = 0 WHERE active = 1`); } catch { /* column may not exist yet */ }
2944
2978
  vecTableDimsCache.delete(db);
2945
2979
  }
2946
2980