PyPI - memorymaster - Versions diffs - 3.21.0__tar.gz → 3.22.0__tar.gz - Mend

memorymaster 3.21.0tar.gz → 3.22.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (395) hide show

{memorymaster-3.21.0/memorymaster.egg-info → memorymaster-3.22.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: memorymaster
-Version: 3.21.0
+Version: 3.22.0
 Summary: Production-grade memory reliability system for AI coding agents. Lifecycle-managed claims with citations, conflict detection, steward governance, and MCP integration.
 Author: wolverin0
 License: MIT
@@ -52,7 +52,7 @@ Lifecycle-managed claims with citations, conflict detection, steward governance,
 [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
 [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
-[![Tests](https://img.shields.io/badge/tests-2194-green.svg)]()
+[![Tests](https://img.shields.io/badge/tests-2214-green.svg)]()
 [![MCP Tools](https://img.shields.io/badge/MCP%20tools-24-purple.svg)]()
 [![CLI Commands](https://img.shields.io/badge/CLI%20commands-86-orange.svg)]()
 [![PyPI](https://img.shields.io/pypi/v/memorymaster.svg)](https://pypi.org/project/memorymaster/)
@@ -90,6 +90,8 @@ recent PR status, and sensitivity-filter invariants.
 - **Rule-shaped claims** (new in v3.21.0): prescriptive `when <trigger>, do <action> because <rationale>` claims (`ingest_rule` / `query_rules`) — the shape an agent needs to actually change behaviour next time, not just recall a fact
 - **Correction mining** (new in v3.21.0): `mine-rules` scans the verbatim transcript archive for user corrections and distills them into rule claims; the Stop hook also mines each session's latest correction automatically
 - **Versioned schema migrations** (new in v3.20.0): `migrate` applies SQLite/Postgres migrations with sha256 drift detection; incremental `export-delta` ships small claim deltas for cheap cross-machine sync
+- **Retrieval quality** (new in v3.22.0): floor-ratio boost gate (`MEMORYMASTER_BOOST_FLOOR_RATIO`) stops fresh-but-wrong claims outranking the true match; `query --explain` shows per-stage score attribution; an opt-in correctness-safe query cache (`MEMORYMASTER_QUERY_CACHE`) with a generation gate
+- **Semantic contradiction probe** (new in v3.22.0): `detect-contradictions` finds claims that genuinely contradict each other (beyond the deterministic same-subject conflict check) via an LLM judge with a Wilson-CI rate and verdict cache
 - **Steward governance**: multi-probe validators (filesystem, format, citation, semantic, tool) with proposal review
 - **Conflict resolution**: 5-tier auto (confidence > freshness > citations > LLM > manual)
 - **Auto-redaction** at ingest: JWT, GitHub tokens, Bearer, AWS keys, SSH keys, custom patterns

{memorymaster-3.21.0 → memorymaster-3.22.0}/README.md RENAMED Viewed

@@ -6,7 +6,7 @@ Lifecycle-managed claims with citations, conflict detection, steward governance,
 [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
 [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
-[![Tests](https://img.shields.io/badge/tests-2194-green.svg)]()
+[![Tests](https://img.shields.io/badge/tests-2214-green.svg)]()
 [![MCP Tools](https://img.shields.io/badge/MCP%20tools-24-purple.svg)]()
 [![CLI Commands](https://img.shields.io/badge/CLI%20commands-86-orange.svg)]()
 [![PyPI](https://img.shields.io/pypi/v/memorymaster.svg)](https://pypi.org/project/memorymaster/)
@@ -44,6 +44,8 @@ recent PR status, and sensitivity-filter invariants.
 - **Rule-shaped claims** (new in v3.21.0): prescriptive `when <trigger>, do <action> because <rationale>` claims (`ingest_rule` / `query_rules`) — the shape an agent needs to actually change behaviour next time, not just recall a fact
 - **Correction mining** (new in v3.21.0): `mine-rules` scans the verbatim transcript archive for user corrections and distills them into rule claims; the Stop hook also mines each session's latest correction automatically
 - **Versioned schema migrations** (new in v3.20.0): `migrate` applies SQLite/Postgres migrations with sha256 drift detection; incremental `export-delta` ships small claim deltas for cheap cross-machine sync
+- **Retrieval quality** (new in v3.22.0): floor-ratio boost gate (`MEMORYMASTER_BOOST_FLOOR_RATIO`) stops fresh-but-wrong claims outranking the true match; `query --explain` shows per-stage score attribution; an opt-in correctness-safe query cache (`MEMORYMASTER_QUERY_CACHE`) with a generation gate
+- **Semantic contradiction probe** (new in v3.22.0): `detect-contradictions` finds claims that genuinely contradict each other (beyond the deterministic same-subject conflict check) via an LLM judge with a Wilson-CI rate and verdict cache
 - **Steward governance**: multi-probe validators (filesystem, format, citation, semantic, tool) with proposal review
 - **Conflict resolution**: 5-tier auto (confidence > freshness > citations > LLM > manual)
 - **Auto-redaction** at ingest: JWT, GitHub tokens, Bearer, AWS keys, SSH keys, custom patterns

{memorymaster-3.21.0 → memorymaster-3.22.0}/memorymaster/__init__.py RENAMED Viewed

@@ -2,4 +2,4 @@
 __all__ = ["__version__"]
-__version__ = "3.21.0"
+__version__ = "3.22.0"

{memorymaster-3.21.0 → memorymaster-3.22.0}/memorymaster/cli.py RENAMED Viewed

@@ -185,6 +185,7 @@ def build_parser() -> argparse.ArgumentParser:
     query.add_argument("--scope-allowlist", default="", help="Comma-separated scopes to include (e.g. project,team_x)")
     query.add_argument("--as-of", default="", help="Temporal query: show claims valid at this ISO timestamp")
     query.add_argument("--auto-classify", action="store_true", help="Auto-classify query type and use optimal retrieval mode")
+    query.add_argument("--explain", action="store_true", help="Show per-stage score attribution (relevance vs. boosts, floor-gate status) for each result")
     context = sub.add_parser("context", help="Pack relevant claims into a token-budgeted context block for AI agents")
     context.add_argument("text", help="Query text describing what context is needed")
@@ -463,6 +464,13 @@ def build_parser() -> argparse.ArgumentParser:
     mine_rules_cmd.add_argument("--provider", default="claude_cli", help="LLM provider for this run (default: claude_cli)")
     mine_rules_cmd.add_argument("--reset", action="store_true", help="Clear the stored watermark before running (re-scan from the start)")
+    detect_contra = sub.add_parser("detect-contradictions", help="Find semantic contradictions between topically-similar claims via an LLM judge (v3.22)")
+    detect_contra.add_argument("--limit", type=int, default=200, help="Max claims to load for pair sampling")
+    detect_contra.add_argument("--sample", type=int, default=50, help="Max candidate pairs to judge this run (caps LLM calls)")
+    detect_contra.add_argument("--sim-low", dest="sim_low", type=float, default=0.60, help="Lower similarity-band bound (below = unrelated)")
+    detect_contra.add_argument("--sim-high", dest="sim_high", type=float, default=0.92, help="Upper similarity-band bound (at/above = near-duplicates, dedup's job)")
+    detect_contra.add_argument("--apply", action="store_true", help="Flag the lower-confidence claim of each contradicting pair as conflicted (reversible)")
     verify_cmd = sub.add_parser("verify-claims", help="Cross-check claims against current codebase")
     verify_cmd.add_argument("--scope", default="", help="Scope filter")
     verify_cmd.add_argument("--limit", type=int, default=200, help="Max claims to check")

{memorymaster-3.21.0 → memorymaster-3.22.0}/memorymaster/cli_handlers_basic.py RENAMED Viewed

@@ -832,6 +832,25 @@ def _handle_run_cycle(args: argparse.Namespace, service, parser: argparse.Argume
     return 0
+def _print_score_explanation(breakdown: dict | None) -> None:
+    """Render per-stage score attribution for `query --explain`.
+    Shows query-relevance vs. the metadata boost terms and whether the
+    floor-ratio gate suppressed the boosts for this result.
+    """
+    if not breakdown:
+        print("    explain: (no breakdown — legacy retrieval mode)")
+        return
+    terms = breakdown.get("boost_terms", {})
+    w = breakdown.get("weights", (0, 0, 0, 0))
+    applied = breakdown.get("boosts_applied", True)
+    gate = "applied" if applied else f"GATED (relevance < floor={breakdown.get('floor', 0.0):.3f})"
+    term_str = " ".join(f"{k}={v:+.3f}" for k, v in terms.items())
+    print(f"    explain: relevance={breakdown.get('relevance', 0.0):.3f} "
+          f"boosts={breakdown.get('boosts_total', 0.0):+.3f} [{gate}] -> final={breakdown.get('final', 0.0):.3f}")
+    print(f"             weights(l,c,f,v)={tuple(round(x, 2) for x in w)}  boost_terms: {term_str}")
 def _handle_query(args: argparse.Namespace, service, parser: argparse.ArgumentParser, effective_db: str) -> int:
     resolve_allow_sensitive_access(allow_sensitive=args.allow_sensitive, context="cli.query")
     if getattr(args, "as_of", ""):
@@ -876,6 +895,8 @@ def _handle_query(args: argparse.Namespace, service, parser: argparse.ArgumentPa
                   f"vec={sc['vector_score']:.3f} "
                   f"active={int(bool(ann.get('active')))} stale={int(bool(ann.get('stale')))} "
                   f"conflicted={int(bool(ann.get('conflicted')))} pinned={int(bool(ann.get('pinned')))}")
+            if getattr(args, "explain", False):
+                _print_score_explanation(row.get("breakdown"))
         print(f"rows={len(rows_data)}")
     return 0

{memorymaster-3.21.0 → memorymaster-3.22.0}/memorymaster/cli_handlers_curation.py RENAMED Viewed

@@ -368,6 +368,35 @@ def _handle_mine_rules(args: argparse.Namespace, service, parser: argparse.Argum
     return 0
+def _handle_detect_contradictions(args: argparse.Namespace, service, parser: argparse.ArgumentParser, effective_db: str) -> int:
+    from memorymaster.contradiction_probe import run_probe
+    t0 = time.perf_counter()
+    result = run_probe(
+        effective_db, service,
+        limit=getattr(args, "limit", 200),
+        sample=getattr(args, "sample", 50),
+        sim_low=getattr(args, "sim_low", 0.60),
+        sim_high=getattr(args, "sim_high", 0.92),
+        apply=getattr(args, "apply", False),
+    )
+    elapsed_ms = (time.perf_counter() - t0) * 1000
+    if args.json_output:
+        print(_json_envelope(result, query_ms=elapsed_ms))
+    else:
+        ci = result["rate_ci"]
+        abort = f", ABORTED ({result['aborted_reason']})" if result.get("aborted_reason") else ""
+        print(
+            f"Contradictions: {result['contradictions']}/{result['judged']} judged "
+            f"(rate={result['rate']:.2f} CI95=[{ci[0]:.2f},{ci[1]:.2f}]) from "
+            f"{result['candidate_pairs']} candidate pairs; cache_hits={result['cache_hits']}, "
+            f"errors={result['judge_errors']}, flagged={result['flagged_conflicted']}{abort} ({elapsed_ms:.0f}ms)"
+        )
+        for f in result["found"][:20]:
+            print(f"  [{f['severity']}] claims {f['claim_a_id']} <> {f['claim_b_id']} "
+                  f"(sim={f['similarity']}): {f['reason']}")
+    return 0
 def _handle_wiki_breakdown(args: argparse.Namespace, service, parser: argparse.ArgumentParser, effective_db: str) -> int:
     from memorymaster.wiki_engine import breakdown
     t0 = time.perf_counter()
@@ -729,6 +758,7 @@ COMMAND_HANDLERS: dict[str, object] = {
     "bases-generate": _handle_bases_generate,
     "mine-transcript": _handle_mine_transcript,
     "mine-rules": _handle_mine_rules,
+    "detect-contradictions": _handle_detect_contradictions,
     "verify-claims": _handle_verify_claims,
     "extract-entities": _handle_extract_entities,
     "entity-stats": _handle_entity_stats,

{memorymaster-3.21.0 → memorymaster-3.22.0}/memorymaster/config.py RENAMED Viewed

@@ -222,6 +222,11 @@ class Config:
     llm_rerank: bool = False
     rrf_tiebreaker_enabled: bool = False
     rrf_tiebreaker_threshold: float = 0.01
+    # Floor-ratio gate (gbrain v0.35.6 "hybrid.floor_ratio"): metadata boosts
+    # (confidence/freshness/tier/pinned) only apply to candidates whose
+    # query-relevance (lexical+vector) is >= boost_floor_ratio * top relevance.
+    # 0.0 = disabled (boosts always apply) — preserves pre-v3.22 behaviour.
+    boost_floor_ratio: float = 0.0
     # --- Initial confidence priors calibrated from validator outcomes ---
     default_initial_confidence: float = DEFAULT_INITIAL_CONFIDENCE
@@ -397,6 +402,7 @@ def load_config(config_path: str | Path | None = None) -> Config:
     _apply_env_bool(overrides, "MEMORYMASTER_LLM_RERANK", "llm_rerank")
     _apply_env_bool(overrides, "MEMORYMASTER_RRF_TIEBREAKER", "rrf_tiebreaker_enabled")
     _apply_env_float(overrides, "MEMORYMASTER_RRF_TIEBREAKER_THRESHOLD", "rrf_tiebreaker_threshold")
+    _apply_env_float(overrides, "MEMORYMASTER_BOOST_FLOOR_RATIO", "boost_floor_ratio")
     _apply_env_retrieval_profiles(overrides)
     # Filter to only valid Config fields

memorymaster-3.22.0/memorymaster/contradiction_probe.py ADDED Viewed

@@ -0,0 +1,326 @@
+"""Suspected-contradictions probe (v3.22, ported from gbrain v0.32.6).
+MemoryMaster's deterministic conflict detection (conflict_resolver,
+jobs/dedup.find_conflicts) only catches claims with the SAME subject+predicate
+and a different object_value. It misses *semantic* contradictions phrased
+differently — e.g. "the API is rate-limited at 100 req/min" vs "there is no
+rate limit on the API". This probe finds those:
+1. Sample topically-similar claim pairs via embedding cosine similarity in a
+   band (similar enough to be about the same thing, not near-duplicates).
+2. Cheap pre-filter to skip pairs the deterministic path owns or that are
+   already linked by supersession.
+3. Ask an LLM whether the pair genuinely contradicts (severity-scored), with a
+   persistent verdict cache so re-runs don't re-pay.
+4. Report a contradiction rate with a Wilson 95% confidence interval (judge
+   errors counted in the denominator so the rate stays honest).
+It does NOT auto-resolve. Default is a dry-run report; ``apply=True`` flags the
+lower-confidence claim of each contradicting pair as ``conflicted`` (the
+needs-human-arbitration state) via the lifecycle helper — never raw SQL, never
+archive/supersede.
+"""
+from __future__ import annotations
+import logging
+import math
+import os
+import sqlite3
+from datetime import datetime, timezone
+from typing import Any
+from memorymaster import llm_budget, llm_provider
+from memorymaster.embeddings import EmbeddingProvider, cosine_similarity, create_best_provider
+from memorymaster.lifecycle import transition_claim
+from memorymaster.models import Claim
+logger = logging.getLogger(__name__)
+PROMPT_VERSION = "v1"
+_SKIP_STATUSES = {"superseded", "archived"}
+_PROMPT = """You compare two memory claims and decide if they CONTRADICT each other.
+A contradiction means both cannot be true at the same time about the same thing.
+Topically related but compatible claims do NOT contradict. Different subjects do
+NOT contradict.
+Output ONE JSON object and nothing else:
+{"contradicts": true|false, "severity": "low"|"medium"|"high", "reason": "<one short clause>"}
+If they do not contradict, return {"contradicts": false, "severity": "low", "reason": ""}.
+No markdown, no commentary."""
+# ---------------------------------------------------------------------------
+# Verdict cache
+# ---------------------------------------------------------------------------
+_VERDICT_DDL = """
+CREATE TABLE IF NOT EXISTS contradiction_verdicts (
+    claim_a_id INTEGER NOT NULL,
+    claim_b_id INTEGER NOT NULL,
+    model TEXT NOT NULL,
+    prompt_version TEXT NOT NULL,
+    contradicts INTEGER NOT NULL,
+    severity TEXT,
+    reason TEXT,
+    created_at TEXT NOT NULL,
+    PRIMARY KEY (claim_a_id, claim_b_id, model, prompt_version)
+)
+""".strip()
+def _canonical_pair(a_id: int, b_id: int) -> tuple[int, int]:
+    """Order a pair so the symmetric (a,b)/(b,a) cache to one row."""
+    return (a_id, b_id) if a_id <= b_id else (b_id, a_id)
+def _ensure_verdict_table(conn: sqlite3.Connection) -> None:
+    conn.execute(_VERDICT_DDL)
+    conn.commit()
+def _cache_get(conn: sqlite3.Connection, a_id: int, b_id: int, model: str) -> dict | None:
+    lo, hi = _canonical_pair(a_id, b_id)
+    row = conn.execute(
+        """SELECT contradicts, severity, reason FROM contradiction_verdicts
+           WHERE claim_a_id = ? AND claim_b_id = ? AND model = ? AND prompt_version = ?""",
+        (lo, hi, model, PROMPT_VERSION),
+    ).fetchone()
+    if row is None:
+        return None
+    return {"contradicts": bool(row[0]), "severity": row[1] or "low", "reason": row[2] or "", "cached": True}
+def _cache_put(conn: sqlite3.Connection, a_id: int, b_id: int, model: str, verdict: dict) -> None:
+    lo, hi = _canonical_pair(a_id, b_id)
+    conn.execute(
+        """INSERT OR REPLACE INTO contradiction_verdicts
+           (claim_a_id, claim_b_id, model, prompt_version, contradicts, severity, reason, created_at)
+           VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
+        (lo, hi, model, PROMPT_VERSION, int(bool(verdict.get("contradicts"))),
+         verdict.get("severity", "low"), verdict.get("reason", ""),
+         datetime.now(timezone.utc).isoformat()),
+    )
+    conn.commit()
+# ---------------------------------------------------------------------------
+# Pair sampling
+# ---------------------------------------------------------------------------
+def _embed_text(claim: Claim) -> str:
+    if claim.subject and claim.predicate:
+        return f"{claim.subject} {claim.predicate} {claim.object_value or ''} {claim.text}"
+    return claim.text
+def _same_subject_predicate(a: Claim, b: Claim) -> bool:
+    return (
+        bool(a.subject) and bool(a.predicate)
+        and (a.subject or "").strip().lower() == (b.subject or "").strip().lower()
+        and (a.predicate or "").strip().lower() == (b.predicate or "").strip().lower()
+    )
+def _already_linked(a: Claim, b: Claim) -> bool:
+    """Pair is already resolved by supersession — the deterministic path owns it."""
+    return (
+        a.supersedes_claim_id == b.id or b.supersedes_claim_id == a.id
+        or a.replaced_by_claim_id == b.id or b.replaced_by_claim_id == a.id
+    )
+def _prefiltered(a: Claim, b: Claim) -> bool:
+    """Cheap skip BEFORE the LLM: deterministic-domain or already-resolved pairs."""
+    if a.status in _SKIP_STATUSES or b.status in _SKIP_STATUSES:
+        return True
+    if _same_subject_predicate(a, b):
+        return True  # conflict_resolver / find_conflicts already own these
+    if _already_linked(a, b):
+        return True
+    return False
+def sample_candidate_pairs(
+    claims: list[Claim],
+    provider: EmbeddingProvider,
+    *,
+    sim_low: float = 0.60,
+    sim_high: float = 0.92,
+    limit: int | None = None,
+) -> list[tuple[Claim, Claim, float]]:
+    """Return (a, b, similarity) pairs in the [sim_low, sim_high) band.
+    The band is the key idea: below ``sim_low`` the claims are unrelated (can't
+    contradict); at/above ``sim_high`` they're near-duplicates (dedup's job).
+    In between is where genuine contradictions live.
+    """
+    usable = [c for c in claims if c.status not in _SKIP_STATUSES]
+    if len(usable) < 2:
+        return []
+    embeddings = [provider.embed(_embed_text(c)) for c in usable]
+    pairs: list[tuple[Claim, Claim, float]] = []
+    for i in range(len(usable)):
+        for j in range(i + 1, len(usable)):
+            if _prefiltered(usable[i], usable[j]):
+                continue
+            sim = cosine_similarity(embeddings[i], embeddings[j])
+            if sim_low <= sim < sim_high:
+                pairs.append((usable[i], usable[j], round(sim, 4)))
+    pairs.sort(key=lambda p: -p[2])
+    if limit is not None:
+        pairs = pairs[:limit]
+    return pairs
+# ---------------------------------------------------------------------------
+# LLM judge
+# ---------------------------------------------------------------------------
+def _judge_llm(a: Claim, b: Claim) -> dict | None:
+    """Ask the LLM whether a and b contradict. Returns a verdict dict or None
+    on parse/empty failure. May raise LLMBudgetExceeded."""
+    body = f"Claim A: {a.text}\nClaim B: {b.text}"
+    raw = llm_provider.call_llm(_PROMPT, body)
+    if not raw or not raw.strip():
+        return None
+    for item in llm_provider.parse_json_response(raw):
+        if isinstance(item, dict) and "contradicts" in item:
+            return {
+                "contradicts": bool(item.get("contradicts")),
+                "severity": (item.get("severity") or "low").strip().lower(),
+                "reason": (item.get("reason") or "").strip(),
+                "cached": False,
+            }
+    return None
+def _model_key() -> str:
+    provider = os.environ.get("MEMORYMASTER_LLM_PROVIDER", "google").strip().lower()
+    model = os.environ.get("MEMORYMASTER_LLM_MODEL", "").strip() or "default"
+    return f"{provider}:{model}"
+# ---------------------------------------------------------------------------
+# Wilson confidence interval
+# ---------------------------------------------------------------------------
+def wilson_interval(successes: int, n: int, z: float = 1.96) -> tuple[float, float]:
+    """Wilson score 95% CI for a binomial proportion. Returns (low, high).
+    Used for the contradiction rate so a handful of judged pairs doesn't read
+    as a precise number. ``n`` includes judge errors (counted as non-success).
+    """
+    if n <= 0:
+        return (0.0, 0.0)
+    phat = successes / n
+    denom = 1.0 + z * z / n
+    center = (phat + z * z / (2 * n)) / denom
+    margin = (z * math.sqrt((phat * (1 - phat) + z * z / (4 * n)) / n)) / denom
+    return (max(0.0, center - margin), min(1.0, center + margin))
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+def run_probe(
+    db_path: str,
+    service: Any,
+    *,
+    limit: int | None = 200,
+    sample: int | None = 50,
+    sim_low: float = 0.60,
+    sim_high: float = 0.92,
+    apply: bool = False,
+    provider: EmbeddingProvider | None = None,
+) -> dict[str, Any]:
+    """Sample similar claim pairs, judge contradictions (cached + budget-capped),
+    and report a Wilson-bounded contradiction rate.
+    Args:
+        limit: max claims to load for pair sampling (oldest-first cap upstream).
+        sample: max candidate pairs to judge this run.
+        apply: if True, flag the lower-confidence claim of each contradicting
+            pair as ``conflicted`` (reversible; never archives/supersedes).
+    """
+    if "://" in str(db_path):
+        raise ValueError("contradiction probe is SQLite-only")
+    stats: dict[str, Any] = {
+        "claims_scanned": 0,
+        "candidate_pairs": 0,
+        "judged": 0,
+        "cache_hits": 0,
+        "llm_calls": 0,
+        "judge_errors": 0,
+        "contradictions": 0,
+        "flagged_conflicted": 0,
+        "aborted_reason": None,
+        "rate": 0.0,
+        "rate_ci": [0.0, 0.0],
+        "found": [],
+    }
+    claims = service.store.list_claims(limit=limit or 1000, include_citations=False)
+    stats["claims_scanned"] = len(claims)
+    prov = provider or create_best_provider()
+    pairs = sample_candidate_pairs(claims, prov, sim_low=sim_low, sim_high=sim_high, limit=sample)
+    stats["candidate_pairs"] = len(pairs)
+    if not pairs:
+        return stats
+    model = _model_key()
+    conn = sqlite3.connect(db_path)
+    try:
+        _ensure_verdict_table(conn)
+        with llm_budget.cycle_scope() as budget:
+            for a, b, sim in pairs:
+                verdict = _cache_get(conn, a.id, b.id, model)
+                if verdict is not None:
+                    stats["cache_hits"] += 1
+                else:
+                    try:
+                        verdict = _judge_llm(a, b)
+                    except llm_budget.LLMBudgetExceeded as exc:
+                        stats["aborted_reason"] = exc.reason
+                        break
+                    stats["llm_calls"] += 1
+                    if verdict is None:
+                        stats["judge_errors"] += 1
+                        stats["judged"] += 1
+                        continue
+                    _cache_put(conn, a.id, b.id, model, verdict)
+                stats["judged"] += 1
+                if verdict["contradicts"]:
+                    stats["contradictions"] += 1
+                    loser, winner = (a, b) if a.confidence <= b.confidence else (b, a)
+                    stats["found"].append({
+                        "claim_a_id": a.id, "claim_b_id": b.id, "similarity": sim,
+                        "severity": verdict["severity"], "reason": verdict["reason"],
+                        "flag_candidate_id": loser.id,
+                    })
+                    if apply:
+                        transition_claim(
+                            service.store, loser.id, "conflicted",
+                            reason=f"contradiction_probe: contradicts claim {winner.id} ({verdict['reason']})",
+                            event_type="transition",
+                        )
+                        stats["flagged_conflicted"] += 1
+            if budget.aborted_reason and not stats["aborted_reason"]:
+                stats["aborted_reason"] = budget.aborted_reason
+    finally:
+        conn.close()
+    n = stats["judged"]
+    if n > 0:
+        stats["rate"] = round(stats["contradictions"] / n, 4)
+        lo, hi = wilson_interval(stats["contradictions"], n)
+        stats["rate_ci"] = [round(lo, 4), round(hi, 4)]
+    return stats

memorymaster-3.22.0/memorymaster/migrations/0003_contradiction_verdicts.py ADDED Viewed

@@ -0,0 +1,42 @@
+"""0003_contradiction_verdicts — LLM verdict cache for the contradiction probe.
+The suspected-contradictions probe (:mod:`memorymaster.contradiction_probe`)
+asks an LLM whether two topically-similar claims contradict. Judging is the
+expensive step, so verdicts are cached keyed on the (canonical-ordered) claim
+pair + model + prompt_version: re-running the probe never re-pays for a pair
+already judged by the same model/prompt. A prompt_version bump invalidates the
+cache for that pair automatically (new key).
+"""
+from __future__ import annotations
+VERSION = 3
+DESCRIPTION = "contradiction_verdicts cache for the suspected-contradictions probe"
+_DDL = """
+CREATE TABLE IF NOT EXISTS contradiction_verdicts (
+    claim_a_id INTEGER NOT NULL,
+    claim_b_id INTEGER NOT NULL,
+    model TEXT NOT NULL,
+    prompt_version TEXT NOT NULL,
+    contradicts INTEGER NOT NULL,
+    severity TEXT,
+    reason TEXT,
+    created_at TEXT NOT NULL,
+    PRIMARY KEY (claim_a_id, claim_b_id, model, prompt_version)
+)
+""".strip()
+def apply_sqlite(conn) -> None:
+    conn.execute(_DDL)
+    commit = getattr(conn, "commit", None)
+    if callable(commit):
+        commit()
+def apply_postgres(conn) -> None:
+    cur = conn.cursor()
+    cur.execute(_DDL)
+    commit = getattr(conn, "commit", None)
+    if callable(commit):
+        commit()

memorymaster-3.22.0/memorymaster/migrations/0004_query_cache.py ADDED Viewed

@@ -0,0 +1,114 @@
+"""0004_query_cache — correctness-safe result cache for retrieval (gbrain v0.40.3).
+Two tables plus write-triggers on ``claims``:
+- ``cache_meta`` holds a single monotonic ``corpus_generation`` counter.
+- INSERT/DELETE and column-scoped UPDATE triggers on ``claims`` bump that
+  counter, so any *retrieval-relevant* claim write advances the generation. The
+  UPDATE trigger deliberately EXCLUDES ``access_count``/``last_accessed`` —
+  otherwise recording an access on every query would invalidate the cache it
+  just served. A cache row is valid only if the generation it was written at
+  still equals the current corpus generation.
+- ``query_cache`` stores serialized retrieval results keyed by a hash that
+  folds in the query, params, AND the retrieval config fingerprint (so a
+  weight/mode/floor change also invalidates).
+The cache is opt-in (``MEMORYMASTER_QUERY_CACHE=1``); the triggers are always
+active so the generation stays accurate, but a single-row integer bump per
+claim write is negligible.
+"""
+from __future__ import annotations
+VERSION = 4
+DESCRIPTION = "query_cache + cache_meta + claims generation triggers (correctness-safe recall cache)"
+_SQLITE_TABLES = """
+CREATE TABLE IF NOT EXISTS cache_meta (
+    key TEXT PRIMARY KEY,
+    value INTEGER NOT NULL
+);
+INSERT OR IGNORE INTO cache_meta(key, value) VALUES ('corpus_generation', 0);
+CREATE TABLE IF NOT EXISTS query_cache (
+    cache_key TEXT PRIMARY KEY,
+    result_json TEXT NOT NULL,
+    generation INTEGER NOT NULL,
+    created_at TEXT NOT NULL
+);
+""".strip()
+# Triggers depend on the claims table; created only when it exists (it always
+# does in a real DB — baseline schema precedes migrations — but the migration
+# unit tests apply on a bare connection).
+_SQLITE_TRIGGERS = """
+CREATE TRIGGER IF NOT EXISTS claims_gen_ai AFTER INSERT ON claims BEGIN
+    UPDATE cache_meta SET value = value + 1 WHERE key = 'corpus_generation';
+END;
+CREATE TRIGGER IF NOT EXISTS claims_gen_au AFTER UPDATE OF
+    text, normalized_text, subject, predicate, object_value, scope,
+    confidence, status, pinned, tier, volatility, valid_from, valid_until,
+    archived_at, updated_at, last_validated_at
+ON claims BEGIN
+    UPDATE cache_meta SET value = value + 1 WHERE key = 'corpus_generation';
+END;
+CREATE TRIGGER IF NOT EXISTS claims_gen_ad AFTER DELETE ON claims BEGIN
+    UPDATE cache_meta SET value = value + 1 WHERE key = 'corpus_generation';
+END;
+""".strip()
+_POSTGRES_TABLES = """
+CREATE TABLE IF NOT EXISTS cache_meta (
+    key TEXT PRIMARY KEY,
+    value BIGINT NOT NULL
+);
+INSERT INTO cache_meta(key, value) VALUES ('corpus_generation', 0)
+    ON CONFLICT (key) DO NOTHING;
+CREATE TABLE IF NOT EXISTS query_cache (
+    cache_key TEXT PRIMARY KEY,
+    result_json TEXT NOT NULL,
+    generation BIGINT NOT NULL,
+    created_at TEXT NOT NULL
+);
+""".strip()
+_POSTGRES_TRIGGERS = """
+CREATE OR REPLACE FUNCTION mm_bump_corpus_generation() RETURNS trigger AS $$
+BEGIN
+    UPDATE cache_meta SET value = value + 1 WHERE key = 'corpus_generation';
+    RETURN NULL;
+END;
+$$ LANGUAGE plpgsql;
+DROP TRIGGER IF EXISTS claims_gen_ins_del ON claims;
+CREATE TRIGGER claims_gen_ins_del
+    AFTER INSERT OR DELETE ON claims
+    FOR EACH STATEMENT EXECUTE FUNCTION mm_bump_corpus_generation();
+DROP TRIGGER IF EXISTS claims_gen_upd ON claims;
+CREATE TRIGGER claims_gen_upd
+    AFTER UPDATE OF text, normalized_text, subject, predicate, object_value,
+        scope, confidence, status, pinned, tier, volatility, valid_from,
+        valid_until, archived_at, updated_at, last_validated_at ON claims
+    FOR EACH STATEMENT EXECUTE FUNCTION mm_bump_corpus_generation();
+""".strip()
+def apply_sqlite(conn) -> None:
+    conn.executescript(_SQLITE_TABLES)
+    has_claims = conn.execute(
+        "SELECT 1 FROM sqlite_master WHERE type='table' AND name='claims'"
+    ).fetchone()
+    if has_claims:
+        conn.executescript(_SQLITE_TRIGGERS)
+    commit = getattr(conn, "commit", None)
+    if callable(commit):
+        commit()
+def apply_postgres(conn) -> None:
+    cur = conn.cursor()
+    cur.execute(_POSTGRES_TABLES)
+    cur.execute("SELECT to_regclass('claims')")
+    row = cur.fetchone()
+    if row and row[0] is not None:
+        cur.execute(_POSTGRES_TRIGGERS)
+    commit = getattr(conn, "commit", None)
+    if callable(commit):
+        commit()

memorymaster 3.21.0__tar.gz → 3.22.0__tar.gz

memorymaster 3.21.0tar.gz → 3.22.0tar.gz