PyPI - agmem - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

agmem 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

{agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/METADATA +144 -14
{agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/RECORD +48 -28
memvcs/cli.py +10 -0
memvcs/commands/add.py +6 -0
memvcs/commands/audit.py +59 -0
memvcs/commands/clone.py +7 -0
memvcs/commands/daemon.py +45 -0
memvcs/commands/distill.py +24 -0
memvcs/commands/federated.py +59 -0
memvcs/commands/fsck.py +31 -0
memvcs/commands/garden.py +22 -0
memvcs/commands/gc.py +66 -0
memvcs/commands/merge.py +55 -1
memvcs/commands/prove.py +66 -0
memvcs/commands/pull.py +27 -0
memvcs/commands/resolve.py +130 -0
memvcs/commands/timeline.py +27 -0
memvcs/commands/verify.py +74 -23
memvcs/commands/when.py +27 -0
memvcs/core/audit.py +124 -0
memvcs/core/compression_pipeline.py +157 -0
memvcs/core/consistency.py +9 -9
memvcs/core/crypto_verify.py +291 -0
memvcs/core/distiller.py +47 -29
memvcs/core/encryption.py +169 -0
memvcs/core/federated.py +147 -0
memvcs/core/gardener.py +47 -29
memvcs/core/ipfs_remote.py +200 -0
memvcs/core/knowledge_graph.py +77 -5
memvcs/core/llm/__init__.py +10 -0
memvcs/core/llm/anthropic_provider.py +50 -0
memvcs/core/llm/base.py +27 -0
memvcs/core/llm/factory.py +30 -0
memvcs/core/llm/openai_provider.py +36 -0
memvcs/core/merge.py +36 -23
memvcs/core/objects.py +39 -19
memvcs/core/pack.py +278 -0
memvcs/core/privacy_budget.py +63 -0
memvcs/core/remote.py +229 -3
memvcs/core/repository.py +82 -2
memvcs/core/temporal_index.py +9 -0
memvcs/core/trust.py +103 -0
memvcs/core/vector_store.py +15 -1
memvcs/core/zk_proofs.py +158 -0
{agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/WHEEL +0 -0
{agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/entry_points.txt +0 -0
{agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/licenses/LICENSE +0 -0
{agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/top_level.txt +0 -0

memvcs/commands/verify.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
-agmem verify - Belief consistency checker.
+agmem verify - Belief consistency and cryptographic commit verification.
-Scans semantic memories for logical contradictions.
+Scans semantic memories for logical contradictions; optionally verifies commit Merkle/signatures.
 """
 import argparse
@@ -12,10 +12,10 @@ from ..core.consistency import ConsistencyChecker, ConsistencyResult
 class VerifyCommand:
-    """Verify belief consistency of semantic memories."""
+    """Verify belief consistency and/or cryptographic integrity of commits."""
     name = "verify"
-    help = "Scan semantic memories for logical contradictions"
+    help = "Scan semantic memories for contradictions; optionally verify commit signatures"
     @staticmethod
     def add_arguments(parser: argparse.ArgumentParser):
@@ -23,8 +23,17 @@ class VerifyCommand:
             "--consistency",
             "-c",
             action="store_true",
-            default=True,
-            help="Check for contradictions (default)",
+            help="Check semantic memories for contradictions",
+        )
+        parser.add_argument(
+            "--crypto",
+            action="store_true",
+            help="Verify Merkle tree and signatures for commits",
+        )
+        parser.add_argument(
+            "--ref",
+            metavar="REF",
+            help="Commit or ref to verify (with --crypto); default HEAD",
         )
         parser.add_argument(
             "--llm",
@@ -32,28 +41,70 @@ class VerifyCommand:
             help="Use LLM for triple extraction (requires OpenAI)",
         )
+    @staticmethod
+    def _run_crypto_verify(repo, ref: str = None) -> int:
+        """Run cryptographic verification. Returns 0 if all OK, 1 on failure."""
+        from ..core.crypto_verify import verify_commit, load_public_key
+        if ref:
+            commit_hash = repo.resolve_ref(ref)
+            if not commit_hash:
+                print(f"Ref not found: {ref}")
+                return 1
+        else:
+            head = repo.refs.get_head()
+            if head["type"] == "branch":
+                commit_hash = repo.refs.get_branch_commit(head["value"])
+            else:
+                commit_hash = head.get("value")
+            if not commit_hash:
+                print("No commit to verify (empty repo).")
+                return 0
+        pub = load_public_key(repo.mem_dir)
+        ok, err = verify_commit(
+            repo.object_store, commit_hash, public_key_pem=pub, mem_dir=repo.mem_dir
+        )
+        if ok:
+            print(f"Commit {commit_hash[:8]} verified (Merkle + signature OK).")
+            return 0
+        print(f"Commit {commit_hash[:8]} verification failed: {err}")
+        return 1
     @staticmethod
     def execute(args) -> int:
         repo, code = require_repo()
         if code != 0:
             return code
-        checker = ConsistencyChecker(repo, llm_provider="openai" if args.llm else None)
-        result = checker.check(use_llm=args.llm)
+        run_consistency = args.consistency
+        run_crypto = args.crypto
+        if not run_consistency and not run_crypto:
+            run_consistency = True
-        print(f"Checked {result.files_checked} semantic file(s)")
-        if result.valid:
-            print("No contradictions found.")
-            return 0
+        exit_code = 0
-        print(f"\nFound {len(result.contradictions)} contradiction(s):")
-        for i, c in enumerate(result.contradictions, 1):
-            print(f"\n[{i}] {c.reason}")
-            print(
-                f"    {c.triple1.source}:{c.triple1.line}: {c.triple1.subject} {c.triple1.predicate} {c.triple1.obj}"
-            )
-            print(
-                f"    {c.triple2.source}:{c.triple2.line}: {c.triple2.subject} {c.triple2.predicate} {c.triple2.obj}"
-            )
-        print("\nUse 'agmem repair --strategy confidence' to attempt auto-fix.")
-        return 1
+        if run_crypto:
+            if VerifyCommand._run_crypto_verify(repo, args.ref) != 0:
+                exit_code = 1
+        if run_consistency:
+            checker = ConsistencyChecker(repo, llm_provider="openai" if args.llm else None)
+            result = checker.check(use_llm=args.llm)
+            print(f"Checked {result.files_checked} semantic file(s)")
+            if result.valid:
+                print("No contradictions found.")
+            else:
+                exit_code = 1
+                print(f"\nFound {len(result.contradictions)} contradiction(s):")
+                for i, c in enumerate(result.contradictions, 1):
+                    print(f"\n[{i}] {c.reason}")
+                    print(
+                        f"    {c.triple1.source}:{c.triple1.line}: {c.triple1.subject} {c.triple1.predicate} {c.triple1.obj}"
+                    )
+                    print(
+                        f"    {c.triple2.source}:{c.triple2.line}: {c.triple2.subject} {c.triple2.predicate} {c.triple2.obj}"
+                    )
+                print("\nUse 'agmem repair --strategy confidence' to attempt auto-fix.")
+        return exit_code

memvcs/commands/when.py CHANGED Viewed

@@ -34,6 +34,18 @@ class WhenCommand:
             default=10,
             help="Max commits to report (default: 10)",
         )
+        parser.add_argument(
+            "--from",
+            dest="from_ts",
+            metavar="ISO",
+            help="Start of time range (ISO 8601)",
+        )
+        parser.add_argument(
+            "--to",
+            dest="to_ts",
+            metavar="ISO",
+            help="End of time range (ISO 8601)",
+        )
     @staticmethod
     def execute(args) -> int:
@@ -48,6 +60,17 @@ class WhenCommand:
         fact_lower = args.fact.lower()
         file_filter = args.file.replace("current/", "").lstrip("/") if args.file else None
+        from_ts = getattr(args, "from_ts", None)
+        to_ts = getattr(args, "to_ts", None)
+        commits_in_range = None
+        if from_ts and to_ts:
+            try:
+                from ..core.temporal_index import TemporalIndex
+                ti = TemporalIndex(repo.mem_dir, repo.object_store)
+                range_entries = ti.range_query(from_ts, to_ts)
+                commits_in_range = {ch for _, ch in range_entries}
+            except Exception:
+                pass
         # Walk commit history from HEAD
         head = repo.refs.get_head()
@@ -63,6 +86,10 @@ class WhenCommand:
             if commit_hash in seen:
                 break
             seen.add(commit_hash)
+            if commits_in_range is not None and commit_hash not in commits_in_range:
+                commit = Commit.load(repo.object_store, commit_hash)
+                commit_hash = commit.parents[0] if commit and commit.parents else None
+                continue
             commit = Commit.load(repo.object_store, commit_hash)
             if not commit:

memvcs/core/audit.py ADDED Viewed

@@ -0,0 +1,124 @@
+"""
+Tamper-evident audit trail for agmem.
+Append-only, hash-chained log of significant operations.
+"""
+import datetime
+import hashlib
+import hmac
+import json
+import os
+from pathlib import Path
+from typing import Optional, List, Dict, Any, Tuple
+def _audit_dir(mem_dir: Path) -> Path:
+    return mem_dir / "audit"
+def _log_path(mem_dir: Path) -> Path:
+    return _audit_dir(mem_dir) / "log"
+def _get_previous_hash(mem_dir: Path) -> str:
+    """Read last line of audit log and return its entry hash, or empty for first entry."""
+    path = _log_path(mem_dir)
+    if not path.exists():
+        return ""
+    lines = path.read_text().strip().split("\n")
+    if not lines:
+        return ""
+    # Format per line: entry_hash\tpayload_json
+    for line in reversed(lines):
+        line = line.strip()
+        if not line:
+            continue
+        if "\t" in line:
+            return line.split("\t", 1)[0]
+        return ""
+    return ""
+def _hash_entry(prev_hash: str, payload: str) -> str:
+    """Compute this entry's hash: SHA-256(prev_hash + payload)."""
+    return hashlib.sha256((prev_hash + payload).encode()).hexdigest()
+def append_audit(
+    mem_dir: Path,
+    operation: str,
+    details: Optional[Dict[str, Any]] = None,
+) -> None:
+    """
+    Append a tamper-evident audit entry. Write synchronously.
+    Each entry: entry_hash TAB payload_json (payload has timestamp, operation, details, prev_hash).
+    """
+    mem_dir = Path(mem_dir)
+    _audit_dir(mem_dir).mkdir(parents=True, exist_ok=True)
+    path = _log_path(mem_dir)
+    prev_hash = _get_previous_hash(mem_dir)
+    payload = {
+        "timestamp": datetime.datetime.utcnow().isoformat() + "Z",
+        "operation": operation,
+        "details": details or {},
+        "prev_hash": prev_hash,
+    }
+    payload_str = json.dumps(payload, sort_keys=True)
+    entry_hash = _hash_entry(prev_hash, payload_str)
+    line = f"{entry_hash}\t{payload_str}\n"
+    with open(path, "a", encoding="utf-8") as f:
+        f.write(line)
+        f.flush()
+        try:
+            os.fsync(f.fileno())
+        except (AttributeError, OSError):
+            pass
+def read_audit(mem_dir: Path, max_entries: int = 1000) -> List[Dict[str, Any]]:
+    """Read audit log entries (newest first). Each entry has entry_hash, prev_hash, timestamp, operation, details."""
+    path = _log_path(mem_dir)
+    if not path.exists():
+        return []
+    entries = []
+    for line in reversed(path.read_text().strip().split("\n")):
+        line = line.strip()
+        if not line:
+            continue
+        if "\t" not in line:
+            continue
+        entry_hash, payload_str = line.split("\t", 1)
+        try:
+            payload = json.loads(payload_str)
+        except json.JSONDecodeError:
+            continue
+        payload["entry_hash"] = entry_hash
+        entries.append(payload)
+        if len(entries) >= max_entries:
+            break
+    return entries
+def verify_audit(mem_dir: Path) -> Tuple[bool, Optional[int]]:
+    """
+    Verify the audit log chain. Returns (valid, first_bad_index).
+    first_bad_index is 0-based index of first entry that fails chain verification.
+    """
+    path = _log_path(mem_dir)
+    if not path.exists():
+        return (True, None)
+    lines = path.read_text().strip().split("\n")
+    prev_hash = ""
+    for i, line in enumerate(lines):
+        line = line.strip()
+        if not line:
+            continue
+        if "\t" not in line:
+            return (False, i)
+        entry_hash, payload_str = line.split("\t", 1)
+        expected_hash = _hash_entry(prev_hash, payload_str)
+        if not hmac.compare_digest(entry_hash, expected_hash):
+            return (False, i)
+        prev_hash = entry_hash
+    return (True, None)

memvcs/core/compression_pipeline.py ADDED Viewed

@@ -0,0 +1,157 @@
+"""
+Enhanced semantic compression pipeline for agmem (#11).
+Multi-stage: chunk -> fact extraction -> dedup -> embed -> tiered storage.
+Hybrid retrieval (keyword + vector) is in memvcs.retrieval.strategies.HybridStrategy.
+"""
+import hashlib
+import re
+from pathlib import Path
+from typing import List, Optional, Tuple, Any
+from .constants import MEMORY_TYPES
+CHUNK_SIZE_DEFAULT = 512
+CHUNK_OVERLAP = 64
+DEDUP_HASH_ALGO = "sha256"
+TIER_HOT_DAYS = 7
+def chunk_by_size(text: str, size: int = CHUNK_SIZE_DEFAULT, overlap: int = CHUNK_OVERLAP) -> List[str]:
+    """Split text into chunks by character size with optional overlap."""
+    if not text or size <= 0:
+        return []
+    chunks = []
+    start = 0
+    while start < len(text):
+        end = min(start + size, len(text))
+        chunk = text[start:end].strip()
+        if chunk:
+            chunks.append(chunk)
+        start = end - overlap if end < len(text) else len(text)
+    return chunks
+def chunk_by_sentences(text: str, max_chunk_chars: int = 512) -> List[str]:
+    """Split text into chunks by sentence boundaries, up to max_chunk_chars per chunk."""
+    if not text:
+        return []
+    sentences = re.split(r'(?<=[.!?])\s+', text)
+    chunks = []
+    current = []
+    current_len = 0
+    for s in sentences:
+        s = s.strip()
+        if not s:
+            continue
+        if current_len + len(s) + 1 <= max_chunk_chars:
+            current.append(s)
+            current_len += len(s) + 1
+        else:
+            if current:
+                chunks.append(" ".join(current))
+            current = [s]
+            current_len = len(s) + 1
+    if current:
+        chunks.append(" ".join(current))
+    return chunks
+def extract_facts_from_chunk(chunk: str) -> List[str]:
+    """Extract fact-like lines (bullets or short statements). Reuse distiller logic in callers if needed."""
+    facts = []
+    for line in chunk.splitlines():
+        line = line.strip()
+        if not line or line.startswith("#"):
+            continue
+        if line.startswith("- ") and len(line) > 10:
+            facts.append(line)
+        elif len(line) > 20 and len(line) < 300 and not line.startswith("```"):
+            facts.append(line)
+    return facts[:15]
+def dedup_by_hash(items: List[str]) -> List[Tuple[str, str]]:
+    """Return (item, hash_hex) for unique items by content hash. Order preserved, first occurrence kept."""
+    seen_hashes = set()
+    result = []
+    for item in items:
+        h = hashlib.new(DEDUP_HASH_ALGO, item.encode()).hexdigest()
+        if h not in seen_hashes:
+            seen_hashes.add(h)
+            result.append((item, h))
+    return result
+def dedup_by_similarity_threshold(
+    items: List[str], vector_store: Any, threshold: float = 0.95
+) -> List[str]:
+    """Filter items by embedding similarity; keep first of clusters above threshold. Requires vector_store."""
+    if not items or vector_store is None:
+        return items
+    try:
+        embeddings = vector_store.embed(items)
+        kept = [items[0]]
+        for i in range(1, len(items)):
+            sims = [vector_store.similarity(embeddings[i], vector_store.embed([kept[j]])[0]) for j in range(len(kept))]
+            if not any(s >= threshold for s in sims):
+                kept.append(items[i])
+        return kept
+    except Exception:
+        return items
+class CompressionPipeline:
+    """
+    Multi-stage compression: chunk -> optional fact extraction -> dedup -> optional embed -> tiered storage.
+    Wire to vector_store and retrieval for hybrid recall.
+    """
+    def __init__(
+        self,
+        chunk_size: int = CHUNK_SIZE_DEFAULT,
+        use_sentences: bool = True,
+        extract_facts: bool = False,
+        dedup_hash: bool = True,
+        vector_store: Optional[Any] = None,
+        tier_by_recency: bool = True,
+    ):
+        self.chunk_size = chunk_size
+        self.use_sentences = use_sentences
+        self.extract_facts = extract_facts
+        self.dedup_hash = dedup_hash
+        self.vector_store = vector_store
+        self.tier_by_recency = tier_by_recency
+    def chunk(self, text: str) -> List[str]:
+        """Chunk text by size or sentences."""
+        if self.use_sentences:
+            return chunk_by_sentences(text, max_chunk_chars=self.chunk_size)
+        return chunk_by_size(text, size=self.chunk_size)
+    def run(self, text: str, path: Optional[Path] = None) -> List[Tuple[str, str, Optional[str]]]:
+        """
+        Run pipeline: chunk -> optional fact extraction -> dedup.
+        Returns list of (content, content_hash, tier) where tier is "hot" or "cold" or None.
+        """
+        chunks = self.chunk(text)
+        if self.extract_facts:
+            facts = []
+            for c in chunks:
+                facts.extend(extract_facts_from_chunk(c))
+            chunks = facts if facts else chunks
+        if self.dedup_hash:
+            chunk_tuples = dedup_by_hash(chunks)
+        else:
+            chunk_tuples = [(c, hashlib.new(DEDUP_HASH_ALGO, c.encode()).hexdigest()) for c in chunks]
+        tier = None
+        if self.tier_by_recency and path and path.exists():
+            try:
+                mtime = path.stat().st_mtime
+                from datetime import datetime, timezone
+                age_days = (datetime.now(timezone.utc).timestamp() - mtime) / 86400
+                tier = "hot" if age_days <= TIER_HOT_DAYS else "cold"
+            except Exception:
+                pass
+        return [(c, h, tier) for c, h in chunk_tuples]

memvcs/core/consistency.py CHANGED Viewed

@@ -100,23 +100,23 @@ class ConsistencyChecker:
         return triples
     def _extract_triples_llm(self, content: str, source: str) -> List[Triple]:
-        """Extract triples using LLM."""
+        """Extract triples using LLM (multi-provider)."""
         try:
-            import openai
+            from .llm import get_provider
-            response = openai.chat.completions.create(
-                model="gpt-3.5-turbo",
-                messages=[
+            provider = get_provider(provider_name=self.llm_provider)
+            if not provider:
+                return []
+            text = provider.complete(
+                [
                     {
                         "role": "system",
-                        "content": "Extract factual statements as (subject, predicate, object) triples. "
-                        "One per line, format: SUBJECT | PREDICATE | OBJECT",
+                        "content": "Extract factual statements as (subject, predicate, object) triples. One per line, format: SUBJECT | PREDICATE | OBJECT",
                     },
                     {"role": "user", "content": content[:3000]},
                 ],
                 max_tokens=500,
             )
-            text = response.choices[0].message.content
             triples = []
             for i, line in enumerate(text.splitlines(), 1):
                 if "|" in line:
@@ -138,7 +138,7 @@ class ConsistencyChecker:
     def extract_triples(self, content: str, source: str, use_llm: bool = False) -> List[Triple]:
         """Extract triples from content."""
-        if use_llm and self.llm_provider == "openai":
+        if use_llm and self.llm_provider:
             t = self._extract_triples_llm(content, source)
             if t:
                 return t

agmem 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

agmem 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl