clawmem 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,18 @@ import type { Store } from "./store.ts";
16
16
  import type { LlamaCpp } from "./llm.ts";
17
17
  import { extractJsonFromLLM } from "./amem.ts";
18
18
  import { hashContent } from "./indexer.ts";
19
+ import { passesMergeSafety } from "./text-similarity.ts";
20
+ import {
21
+ checkContradiction,
22
+ isActionableContradiction,
23
+ resolveContradictionPolicy,
24
+ type ContradictionResult,
25
+ } from "./merge-guards.ts";
26
+ import {
27
+ validateDeductiveDraft,
28
+ type DeductiveDraft,
29
+ type DocLike,
30
+ } from "./deductive-guardrails.ts";
19
31
 
20
32
  // =============================================================================
21
33
  // Types
@@ -29,6 +41,62 @@ interface DocumentToEnrich {
29
41
 
30
42
  export type TrendEnum = 'NEW' | 'STABLE' | 'STRENGTHENING' | 'WEAKENING' | 'STALE';
31
43
 
44
+ /**
45
+ * Phase 3 deductive synthesis stats. Each counter is incremented at a
46
+ * specific decision point in `generateDeductiveObservations`, giving
47
+ * operators per-rejection-reason visibility into why drafts didn't land.
48
+ */
49
+ export interface DeductiveSynthesisStats {
50
+ /** Final number of deductive documents written to disk + indexed */
51
+ created: number;
52
+ /** Recent observations passed to the draft-generation LLM */
53
+ considered: number;
54
+ /** Drafts returned by the draft-generation LLM (before validation) */
55
+ drafted: number;
56
+ /** Drafts accepted by validation (pre-dedupe count) */
57
+ accepted: number;
58
+ /** Drafts rejected by validation (sum of all reject reasons) */
59
+ rejected: number;
60
+ /** LLM `generate()` returned null (cooldown / remote down) — draft-gen + validation */
61
+ nullCalls: number;
62
+ /** Drafts rejected because the conclusion mentioned a non-source entity */
63
+ contaminationRejects: number;
64
+ /** Drafts rejected because sourceIndices didn't resolve to ≥2 unique source docs */
65
+ invalidIndexRejects: number;
66
+ /** Drafts rejected because the LLM validator said `accepted: false` */
67
+ unsupportedRejects: number;
68
+ /** Drafts rejected because the conclusion was empty/trivial */
69
+ emptyRejects: number;
70
+ /** Accepted drafts that were then skipped as deductive dedupe duplicates */
71
+ dedupSkipped: number;
72
+ /**
73
+ * Accepted drafts that went through the validator fail-open path
74
+ * (LLM null/throw/malformed JSON). These passed the deterministic
75
+ * pre-checks but were NOT affirmed by the LLM validator. A high
76
+ * ratio of this counter to `accepted` means the LLM path is
77
+ * effectively offline and deductions are only gated by the
78
+ * deterministic guardrails (empty, invalid_indices, contamination).
79
+ */
80
+ validatorFallbackAccepts: number;
81
+ }
82
+
83
+ function emptyDeductiveStats(considered: number = 0): DeductiveSynthesisStats {
84
+ return {
85
+ created: 0,
86
+ considered,
87
+ drafted: 0,
88
+ accepted: 0,
89
+ rejected: 0,
90
+ nullCalls: 0,
91
+ contaminationRejects: 0,
92
+ invalidIndexRejects: 0,
93
+ unsupportedRejects: 0,
94
+ emptyRejects: 0,
95
+ dedupSkipped: 0,
96
+ validatorFallbackAccepts: 0,
97
+ };
98
+ }
99
+
32
100
  export interface ConsolidatedObservation {
33
101
  id: number;
34
102
  observation: string;
@@ -294,23 +362,45 @@ Return ONLY the JSON array. /no_think`;
294
362
 
295
363
  if (sourceDocIds.length < 2) continue;
296
364
 
297
- // Check for existing similar consolidated observation (avoid duplicates)
298
- const existing = findSimilarConsolidation(store, pattern.observation, cluster.collection);
365
+ // Check for existing similar consolidated observation (avoid duplicates).
366
+ // Two-stage gate: Jaccard shortlist + name-aware merge safety (Ext 3).
367
+ const existing = findSimilarConsolidation(
368
+ store,
369
+ pattern.observation,
370
+ cluster.collection,
371
+ sourceDocIds
372
+ );
299
373
  if (existing) {
300
- // Update existing: merge source docs, increment proof count
301
- const existingSourceIds: number[] = JSON.parse(existing.source_doc_ids as unknown as string || '[]');
302
- const mergedIds = [...new Set([...existingSourceIds, ...sourceDocIds])];
303
-
304
- store.db.prepare(`
305
- UPDATE consolidated_observations
306
- SET proof_count = ?,
307
- source_doc_ids = ?,
308
- updated_at = datetime('now'),
309
- observation = ?
310
- WHERE id = ?
311
- `).run(mergedIds.length, JSON.stringify(mergedIds), pattern.observation, existing.id);
312
-
313
- console.log(`[consolidation] Updated observation #${existing.id}: proof_count=${mergedIds.length}`);
374
+ // Ext 2: contradiction gate. Before merging into an existing
375
+ // consolidation, check whether the new observation contradicts
376
+ // the existing one. On actionable contradiction we do NOT merge;
377
+ // instead we insert the new row as a separate consolidation and
378
+ // apply the configured policy (link or supersede).
379
+ const contradiction = await checkContradiction(
380
+ llm,
381
+ existing.observation,
382
+ pattern.observation,
383
+ `collection: ${cluster.collection}`
384
+ );
385
+
386
+ if (isActionableContradiction(contradiction)) {
387
+ applyContradictoryConsolidation(
388
+ store,
389
+ existing,
390
+ pattern.observation,
391
+ sourceDocIds,
392
+ cluster.collection,
393
+ contradiction
394
+ );
395
+ } else {
396
+ const { mergedIds } = mergeIntoExistingConsolidation(
397
+ store,
398
+ existing,
399
+ sourceDocIds,
400
+ pattern.observation
401
+ );
402
+ console.log(`[consolidation] Updated observation #${existing.id}: proof_count=${mergedIds.length}`);
403
+ }
314
404
  } else {
315
405
  // Insert new consolidated observation
316
406
  store.db.prepare(`
@@ -323,23 +413,133 @@ Return ONLY the JSON array. /no_think`;
323
413
  }
324
414
  }
325
415
 
416
+ /**
417
+ * Handle a contradictory Phase 2 merge attempt.
418
+ *
419
+ * Inserts the new observation as a separate active consolidation row and
420
+ * applies the resolved contradiction policy, atomically:
421
+ *
422
+ * - **link** (default): old row stays active (`status='active'`); sets
423
+ * `invalidated_by = newId` as a backlink so operators can find the
424
+ * contradiction via `SELECT * FROM consolidated_observations WHERE
425
+ * invalidated_by IS NOT NULL AND invalidated_at IS NULL`.
426
+ * - **supersede**: sets `invalidated_at = now`, `invalidated_by = newId`,
427
+ * `superseded_by = newId`, **AND `status = 'inactive'`** — the old
428
+ * row stops surfacing via every consolidation reader (all of which
429
+ * filter by `status = 'active'`). Subsequent recalls and merge
430
+ * matches see only the new row.
431
+ *
432
+ * The INSERT + UPDATE pair runs inside a SQLite transaction so a
433
+ * failure on the UPDATE side rolls back the new row, preventing a
434
+ * dangling active consolidation with no backlink.
435
+ *
436
+ * Policy is resolved via `CLAWMEM_CONTRADICTION_POLICY=link|supersede`.
437
+ *
438
+ * Returns the new consolidation's id and the policy used.
439
+ */
440
+ export function applyContradictoryConsolidation(
441
+ store: Store,
442
+ existing: { id: number; observation: string; source_doc_ids: string },
443
+ newObservation: string,
444
+ newSourceDocIds: number[],
445
+ collection: string,
446
+ contradiction: ContradictionResult
447
+ ): { newId: number; policy: "link" | "supersede" } {
448
+ const policy = resolveContradictionPolicy();
449
+
450
+ let newId = 0;
451
+ const tx = store.db.transaction(() => {
452
+ // Insert the new consolidation as a separate active row
453
+ const insertResult = store.db
454
+ .prepare(
455
+ `INSERT INTO consolidated_observations
456
+ (observation, proof_count, source_doc_ids, trend, status, collection)
457
+ VALUES (?, ?, ?, 'NEW', 'active', ?)`
458
+ )
459
+ .run(
460
+ newObservation,
461
+ newSourceDocIds.length,
462
+ JSON.stringify(newSourceDocIds),
463
+ collection
464
+ );
465
+ newId = Number(insertResult.lastInsertRowid);
466
+
467
+ // Apply the policy to the old row
468
+ if (policy === "supersede") {
469
+ // Mark the old row as fully inactive so existing readers
470
+ // (filter on `status = 'active'`) stop surfacing it, and set
471
+ // all three invalidation columns for operator queries.
472
+ store.db
473
+ .prepare(
474
+ `UPDATE consolidated_observations
475
+ SET invalidated_at = datetime('now'),
476
+ invalidated_by = ?,
477
+ superseded_by = ?,
478
+ status = 'inactive'
479
+ WHERE id = ?`
480
+ )
481
+ .run(newId, newId, existing.id);
482
+ } else {
483
+ // link: old row stays active, set backlink only
484
+ store.db
485
+ .prepare(
486
+ `UPDATE consolidated_observations
487
+ SET invalidated_by = ?
488
+ WHERE id = ?`
489
+ )
490
+ .run(newId, existing.id);
491
+ }
492
+ });
493
+ tx();
494
+
495
+ console.log(
496
+ `[consolidation] contradiction detected (policy=${policy} source=${contradiction.source} ` +
497
+ `confidence=${contradiction.confidence.toFixed(2)}): ` +
498
+ `existing #${existing.id} + new #${newId} — reason="${contradiction.reason ?? ""}"`
499
+ );
500
+
501
+ return { newId, policy };
502
+ }
503
+
326
504
  /**
327
505
  * Find an existing consolidated observation similar to the given text.
328
- * Uses simple word overlap (Jaccard) to detect near-duplicates.
506
+ *
507
+ * Two-stage gate (Ext 3 — name-aware merge safety):
508
+ * 1. Jaccard > 0.5 on long-word sets — cheap candidate shortlist
509
+ * 2. Name-aware dual-threshold merge safety gate — entity-first, lexical
510
+ * fallback, strictest default when both sides have no anchors
511
+ *
512
+ * Returns the highest-scoring candidate that passes BOTH gates, or null
513
+ * when no candidate passes. Previously the function returned the first
514
+ * Jaccard hit, which allowed semantic-collision merges between topics
515
+ * sharing vocabulary but referring to different subjects (e.g. "Dan" vs
516
+ * "Dad"). The second gate blocks those.
517
+ *
518
+ * Respects `CLAWMEM_MERGE_GUARD_DRY_RUN=true` — in dry-run mode the gate
519
+ * logs its decision for each candidate but does NOT block the merge; the
520
+ * first Jaccard hit is returned (legacy behavior). Use during rollout to
521
+ * observe gate decisions before enforcement.
329
522
  */
330
- function findSimilarConsolidation(
523
+ export function findSimilarConsolidation(
331
524
  store: Store,
332
525
  observation: string,
333
- collection: string
334
- ): { id: number; source_doc_ids: string } | null {
526
+ collection: string,
527
+ candidateSourceDocIds: number[]
528
+ ): { id: number; observation: string; source_doc_ids: string } | null {
529
+ // ORDER BY id ASC makes "first shortlist hit" deterministic across
530
+ // SQLite plan changes — the dry-run legacy parity case relies on
531
+ // iterating rows in a stable insertion order.
335
532
  const existing = store.db.prepare(`
336
533
  SELECT id, observation, source_doc_ids
337
534
  FROM consolidated_observations
338
535
  WHERE status = 'active' AND collection = ?
536
+ ORDER BY id ASC
339
537
  `).all(collection) as { id: number; observation: string; source_doc_ids: string }[];
340
538
 
341
539
  const queryWords = new Set(observation.toLowerCase().split(/\s+/).filter(w => w.length > 3));
342
540
 
541
+ // Stage 1: Jaccard shortlist (broad candidate generation)
542
+ const shortlist: Array<{ row: typeof existing[number]; jaccard: number }> = [];
343
543
  for (const obs of existing) {
344
544
  const obsWords = new Set(obs.observation.toLowerCase().split(/\s+/).filter(w => w.length > 3));
345
545
  const intersection = [...queryWords].filter(w => obsWords.has(w)).length;
@@ -347,11 +547,128 @@ function findSimilarConsolidation(
347
547
  const jaccard = union > 0 ? intersection / union : 0;
348
548
 
349
549
  if (jaccard > 0.5) {
350
- return { id: obs.id, source_doc_ids: obs.source_doc_ids };
550
+ shortlist.push({ row: obs, jaccard });
551
+ }
552
+ }
553
+
554
+ if (shortlist.length === 0) return null;
555
+
556
+ const dryRun = process.env.CLAWMEM_MERGE_GUARD_DRY_RUN === "true";
557
+
558
+ // Dry-run: preserve EXACT legacy behavior — return the first shortlist
559
+ // hit (the pre-Ext-3 code iterated the SELECT rows in order and returned
560
+ // on first Jaccard > 0.5), while still logging every candidate's gate
561
+ // decision for operator observation.
562
+ if (dryRun) {
563
+ for (const candidate of shortlist) {
564
+ const existingSourceIds = safeParseDocIds(candidate.row.source_doc_ids);
565
+ const result = passesMergeSafety(
566
+ store,
567
+ observation,
568
+ candidateSourceDocIds,
569
+ candidate.row.observation,
570
+ existingSourceIds
571
+ );
572
+ console.log(
573
+ `[consolidation] merge-safety[dry-run] id=${candidate.row.id} ` +
574
+ `jaccard=${candidate.jaccard.toFixed(2)} ` +
575
+ `score=${result.score.toFixed(3)} threshold=${result.threshold} ` +
576
+ `method=${result.method} accepted=${result.accepted} reason="${result.reason}"`
577
+ );
578
+ }
579
+ const first = shortlist[0]!;
580
+ return {
581
+ id: first.row.id,
582
+ observation: first.row.observation,
583
+ source_doc_ids: first.row.source_doc_ids,
584
+ };
585
+ }
586
+
587
+ // Stage 2: Merge safety gate — keep best candidate that passes
588
+ let best: { row: typeof existing[number]; gateScore: number } | null = null;
589
+ for (const candidate of shortlist) {
590
+ const existingSourceIds = safeParseDocIds(candidate.row.source_doc_ids);
591
+ const result = passesMergeSafety(
592
+ store,
593
+ observation,
594
+ candidateSourceDocIds,
595
+ candidate.row.observation,
596
+ existingSourceIds
597
+ );
598
+
599
+ if (!result.accepted) {
600
+ console.log(
601
+ `[consolidation] merge-safety rejected id=${candidate.row.id} ` +
602
+ `method=${result.method} score=${result.score.toFixed(3)} ` +
603
+ `threshold=${result.threshold} reason="${result.reason}"`
604
+ );
605
+ continue;
606
+ }
607
+
608
+ if (!best || result.score > best.gateScore) {
609
+ best = { row: candidate.row, gateScore: result.score };
351
610
  }
352
611
  }
353
612
 
354
- return null;
613
+ if (!best) return null;
614
+ return {
615
+ id: best.row.id,
616
+ observation: best.row.observation,
617
+ source_doc_ids: best.row.source_doc_ids,
618
+ };
619
+ }
620
+
621
+ /**
622
+ * Safely parse a JSON array of doc IDs from a stored string column.
623
+ * Returns an empty array on any parse failure (null, empty string,
624
+ * malformed JSON, non-array JSON). Exported so tests can drive the
625
+ * exact parse path that the merge-update helper and findSimilarConsolidation
626
+ * both rely on.
627
+ */
628
+ export function safeParseDocIds(raw: string | null | undefined): number[] {
629
+ try {
630
+ const parsed = JSON.parse(raw || "[]");
631
+ return Array.isArray(parsed) ? parsed.filter(x => typeof x === "number") : [];
632
+ } catch {
633
+ return [];
634
+ }
635
+ }
636
+
637
+ /**
638
+ * Merge a new set of source doc IDs into an existing consolidated
639
+ * observation row. Idempotent: deduplicates source IDs, updates proof
640
+ * count to the final de-duplicated size, refreshes observation text.
641
+ *
642
+ * Uses `safeParseDocIds` so a corrupted `source_doc_ids` value on the
643
+ * existing row (NULL, empty string, malformed JSON, non-array JSON)
644
+ * cannot crash the merge path. A corrupted existing row is treated as
645
+ * if it had no prior source IDs, and the merged list contains only the
646
+ * new IDs — recovering the row instead of losing the entire cluster.
647
+ *
648
+ * Extracted from `synthesizeCluster` so the update-path safety can be
649
+ * unit-tested directly (Ext 3 Low finding — review Turn 5).
650
+ */
651
+ export function mergeIntoExistingConsolidation(
652
+ store: Store,
653
+ existing: { id: number; source_doc_ids: string },
654
+ newSourceDocIds: number[],
655
+ newObservation: string
656
+ ): { mergedIds: number[] } {
657
+ const existingSourceIds = safeParseDocIds(existing.source_doc_ids);
658
+ const mergedIds = [...new Set([...existingSourceIds, ...newSourceDocIds])];
659
+
660
+ store.db
661
+ .prepare(
662
+ `UPDATE consolidated_observations
663
+ SET proof_count = ?,
664
+ source_doc_ids = ?,
665
+ updated_at = datetime('now'),
666
+ observation = ?
667
+ WHERE id = ?`
668
+ )
669
+ .run(mergedIds.length, JSON.stringify(mergedIds), newObservation, existing.id);
670
+
671
+ return { mergedIds };
355
672
  }
356
673
 
357
674
  /**
@@ -407,7 +724,11 @@ function updateTrends(store: Store): void {
407
724
  * Only considers decision/preference/milestone/problem observations from the
408
725
  * last 7 days that haven't already been used as sources for deductions.
409
726
  */
410
- async function generateDeductiveObservations(store: Store, llm: LlamaCpp): Promise<number> {
727
+ async function generateDeductiveObservations(
728
+ store: Store,
729
+ llm: LlamaCpp
730
+ ): Promise<DeductiveSynthesisStats> {
731
+ const stats = emptyDeductiveStats();
411
732
  // Find recent high-value observations not yet used in deductions
412
733
  const DEDUCTIVE_TYPES = ['decision', 'preference', 'milestone', 'problem'];
413
734
  const recentObs = store.db.prepare(`
@@ -434,7 +755,8 @@ async function generateDeductiveObservations(store: Store, llm: LlamaCpp): Promi
434
755
  path: string; modified_at: string;
435
756
  }[];
436
757
 
437
- if (recentObs.length < 2) return 0;
758
+ stats.considered = recentObs.length;
759
+ if (recentObs.length < 2) return stats;
438
760
 
439
761
  // Build context for LLM
440
762
  const obsText = recentObs.map((o, i) =>
@@ -470,7 +792,11 @@ Rules:
470
792
  Return ONLY the JSON array. /no_think`;
471
793
 
472
794
  const result = await llm.generate(prompt, { temperature: 0.3, maxTokens: 500 });
473
- if (!result?.text) return 0;
795
+ if (!result?.text) {
796
+ stats.nullCalls++;
797
+ console.log(`[deductive] draft-generation LLM null — skipping Phase 3 tick`);
798
+ return stats;
799
+ }
474
800
 
475
801
  const parsed = extractJsonFromLLM(result.text) as Array<{
476
802
  conclusion: string;
@@ -478,22 +804,115 @@ Return ONLY the JSON array. /no_think`;
478
804
  source_indices: number[];
479
805
  }> | null;
480
806
 
481
- if (!Array.isArray(parsed)) return 0;
807
+ if (!Array.isArray(parsed)) return stats;
808
+
809
+ stats.drafted = parsed.length;
482
810
 
483
- let created = 0;
484
811
  const timestamp = new Date().toISOString();
485
812
  const dateStr = timestamp.slice(0, 10);
486
813
 
487
814
  for (const deduction of parsed) {
488
- if (!deduction.conclusion || !Array.isArray(deduction.source_indices) || deduction.source_indices.length < 2) continue;
815
+ if (!deduction.conclusion || !Array.isArray(deduction.source_indices) || deduction.source_indices.length < 2) {
816
+ stats.rejected++;
817
+ stats.invalidIndexRejects++;
818
+ continue;
819
+ }
489
820
 
490
- const sourceDocIds = deduction.source_indices
491
- .filter(i => i >= 1 && i <= recentObs.length)
492
- .map(i => recentObs[i - 1]!.id);
821
+ const sourceDocIds = [...new Set(
822
+ deduction.source_indices
823
+ .filter(i => i >= 1 && i <= recentObs.length)
824
+ .map(i => recentObs[i - 1]!.id)
825
+ )];
493
826
 
494
- if (sourceDocIds.length < 2) continue;
827
+ if (sourceDocIds.length < 2) {
828
+ stats.rejected++;
829
+ stats.invalidIndexRejects++;
830
+ continue;
831
+ }
832
+
833
+ // Ext 1: Anti-contamination validation. Build the source doc
834
+ // subset from recentObs, then run the guardrails:
835
+ // 1. Deterministic pre-checks (non-trivial conclusion, ≥2 sources)
836
+ // 2. Entity-aware / lexical-fallback contamination scan
837
+ // 3. LLM validation/refinement with filtered evidence + relation
838
+ // context — on null/malformed, fall back to deterministic accept
839
+ const sourceDocs: DocLike[] = sourceDocIds
840
+ .map(id => recentObs.find(o => o.id === id))
841
+ .filter((d): d is typeof recentObs[number] => Boolean(d))
842
+ .map(d => ({
843
+ id: d.id,
844
+ title: d.title,
845
+ facts: d.facts,
846
+ narrative: d.narrative,
847
+ }));
848
+
849
+ const draft: DeductiveDraft = {
850
+ conclusion: deduction.conclusion,
851
+ premises: deduction.premises ?? [],
852
+ sourceIndices: deduction.source_indices,
853
+ };
854
+
855
+ const validation = await validateDeductiveDraft(
856
+ store,
857
+ llm,
858
+ draft,
859
+ sourceDocs,
860
+ recentObs.map(r => ({
861
+ id: r.id,
862
+ title: r.title,
863
+ facts: r.facts,
864
+ narrative: r.narrative,
865
+ }))
866
+ );
867
+
868
+ if (!validation.accepted) {
869
+ stats.rejected++;
870
+ switch (validation.reason) {
871
+ case "contamination":
872
+ stats.contaminationRejects++;
873
+ console.log(
874
+ `[deductive] rejected for contamination (method=${validation.contaminationMethod}): ` +
875
+ `hits=${(validation.contaminationHits ?? []).join(",")} — ` +
876
+ `"${deduction.conclusion.slice(0, 60)}..."`
877
+ );
878
+ break;
879
+ case "invalid_indices":
880
+ stats.invalidIndexRejects++;
881
+ break;
882
+ case "unsupported":
883
+ stats.unsupportedRejects++;
884
+ console.log(
885
+ `[deductive] rejected as unsupported by LLM validator: ` +
886
+ `"${deduction.conclusion.slice(0, 60)}..."`
887
+ );
888
+ break;
889
+ case "empty":
890
+ stats.emptyRejects++;
891
+ break;
892
+ }
893
+ continue;
894
+ }
495
895
 
496
- // Check for duplicate deduction (Jaccard on conclusion text)
896
+ stats.accepted++;
897
+ if (validation.fallbackAccepted) {
898
+ stats.validatorFallbackAccepts++;
899
+ }
900
+ // Use validated (possibly LLM-refined) conclusion + premises from
901
+ // here on. This replaces the draft's original text for dedupe,
902
+ // persistence, and the sourceRefs block.
903
+ deduction.conclusion = validation.conclusion ?? deduction.conclusion;
904
+ deduction.premises = validation.premises ?? deduction.premises;
905
+
906
+ // Check for duplicate deduction (Jaccard on conclusion text) →
907
+ // contradiction gate (Ext 2). Evaluate ALL near-duplicates, not
908
+ // just the first one, so the decision is order-independent:
909
+ //
910
+ // - If ANY existing deduction is a non-contradictory duplicate,
911
+ // skip the new deduction (something already says this).
912
+ // - Else if ANY existing deduction is an actionable contradiction,
913
+ // KEEP the new deduction and link to EVERY contradictory match
914
+ // via `contradicts` relations.
915
+ // - Else (no Jaccard matches at all) → insert as new.
497
916
  const existingDedups = store.db.prepare(`
498
917
  SELECT id, title FROM documents
499
918
  WHERE content_type = 'deductive' AND active = 1
@@ -501,14 +920,43 @@ Return ONLY the JSON array. /no_think`;
501
920
  `).all() as { id: number; title: string }[];
502
921
 
503
922
  const conclusionWords = new Set(deduction.conclusion.toLowerCase().split(/\s+/).filter(w => w.length > 3));
504
- const isDuplicate = existingDedups.some(d => {
923
+ const jaccardDuplicates = existingDedups.filter(d => {
505
924
  const titleWords = new Set(d.title.toLowerCase().split(/\s+/).filter(w => w.length > 3));
506
925
  const intersection = [...conclusionWords].filter(w => titleWords.has(w)).length;
507
926
  const union = new Set([...conclusionWords, ...titleWords]).size;
508
927
  return union > 0 && intersection / union > 0.5;
509
928
  });
510
929
 
511
- if (isDuplicate) continue;
930
+ const contradictoryDuplicates: { id: number; confidence: number; reason?: string }[] = [];
931
+ let hasNonContradictoryDuplicate = false;
932
+ for (const candidate of jaccardDuplicates) {
933
+ const contradiction = await checkContradiction(
934
+ llm,
935
+ candidate.title,
936
+ deduction.conclusion,
937
+ "deductive synthesis phase"
938
+ );
939
+ if (isActionableContradiction(contradiction)) {
940
+ contradictoryDuplicates.push({
941
+ id: candidate.id,
942
+ confidence: contradiction.confidence,
943
+ reason: contradiction.reason,
944
+ });
945
+ } else {
946
+ hasNonContradictoryDuplicate = true;
947
+ // Don't break — keep scanning to give operator full log coverage,
948
+ // but we already know we'll skip.
949
+ }
950
+ }
951
+
952
+ // Skip rule: if ANY non-contradictory duplicate exists, the new
953
+ // deduction is redundant regardless of any contradictions.
954
+ if (hasNonContradictoryDuplicate) {
955
+ stats.dedupSkipped++;
956
+ continue;
957
+ }
958
+ // Otherwise we either have no matches (fall through to insert as new)
959
+ // or only contradictory matches (insert + link).
512
960
 
513
961
  // Build the deductive document
514
962
  const premisesText = (deduction.premises || []).map(p => `- ${p}`).join('\n');
@@ -569,7 +1017,34 @@ Return ONLY the JSON array. /no_think`;
569
1017
  } catch { /* non-fatal */ }
570
1018
  }
571
1019
 
572
- created++;
1020
+ // Ext 2: If we kept this deduction because it contradicts one
1021
+ // or more existing deductive docs, link them ALL via
1022
+ // `contradicts` relations so operators can find every conflict
1023
+ // via `SELECT * FROM memory_relations WHERE relation_type = 'contradicts'`.
1024
+ // Uses the A-MEM convention plural form (P0-enforced).
1025
+ if (contradictoryDuplicates.length > 0) {
1026
+ const relStmt = store.db.prepare(
1027
+ `INSERT OR IGNORE INTO memory_relations
1028
+ (source_id, target_id, relation_type, weight, contradict_confidence, metadata, created_at)
1029
+ VALUES (?, ?, 'contradicts', 0, ?, ?, datetime('now'))`
1030
+ );
1031
+ for (const contra of contradictoryDuplicates) {
1032
+ try {
1033
+ relStmt.run(
1034
+ doc.id,
1035
+ contra.id,
1036
+ contra.confidence,
1037
+ JSON.stringify({ reason: contra.reason ?? "" })
1038
+ );
1039
+ console.log(
1040
+ `[deductive] contradiction linked: new #${doc.id} contradicts existing #${contra.id} ` +
1041
+ `(confidence=${contra.confidence.toFixed(2)})`
1042
+ );
1043
+ } catch { /* non-fatal — the deduction itself still landed */ }
1044
+ }
1045
+ }
1046
+
1047
+ stats.created++;
573
1048
  console.log(`[deductive] Created: "${deduction.conclusion.slice(0, 60)}..." from ${sourceDocIds.length} sources`);
574
1049
  }
575
1050
  } catch (err) {
@@ -577,7 +1052,7 @@ Return ONLY the JSON array. /no_think`;
577
1052
  }
578
1053
  }
579
1054
 
580
- return created;
1055
+ return stats;
581
1056
  }
582
1057
 
583
1058
  /**
@@ -586,9 +1061,8 @@ Return ONLY the JSON array. /no_think`;
586
1061
  export async function runDeductiveSynthesis(
587
1062
  store: Store,
588
1063
  llm: LlamaCpp,
589
- ): Promise<{ created: number }> {
590
- const created = await generateDeductiveObservations(store, llm);
591
- return { created };
1064
+ ): Promise<DeductiveSynthesisStats> {
1065
+ return await generateDeductiveObservations(store, llm);
592
1066
  }
593
1067
 
594
1068
  // =============================================================================