agmem 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/METADATA +144 -14
  2. {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/RECORD +48 -28
  3. memvcs/cli.py +10 -0
  4. memvcs/commands/add.py +6 -0
  5. memvcs/commands/audit.py +59 -0
  6. memvcs/commands/clone.py +7 -0
  7. memvcs/commands/daemon.py +45 -0
  8. memvcs/commands/distill.py +24 -0
  9. memvcs/commands/federated.py +59 -0
  10. memvcs/commands/fsck.py +31 -0
  11. memvcs/commands/garden.py +22 -0
  12. memvcs/commands/gc.py +66 -0
  13. memvcs/commands/merge.py +55 -1
  14. memvcs/commands/prove.py +66 -0
  15. memvcs/commands/pull.py +27 -0
  16. memvcs/commands/resolve.py +130 -0
  17. memvcs/commands/timeline.py +27 -0
  18. memvcs/commands/verify.py +74 -23
  19. memvcs/commands/when.py +27 -0
  20. memvcs/core/audit.py +124 -0
  21. memvcs/core/compression_pipeline.py +157 -0
  22. memvcs/core/consistency.py +9 -9
  23. memvcs/core/crypto_verify.py +291 -0
  24. memvcs/core/distiller.py +47 -29
  25. memvcs/core/encryption.py +169 -0
  26. memvcs/core/federated.py +147 -0
  27. memvcs/core/gardener.py +47 -29
  28. memvcs/core/ipfs_remote.py +200 -0
  29. memvcs/core/knowledge_graph.py +77 -5
  30. memvcs/core/llm/__init__.py +10 -0
  31. memvcs/core/llm/anthropic_provider.py +50 -0
  32. memvcs/core/llm/base.py +27 -0
  33. memvcs/core/llm/factory.py +30 -0
  34. memvcs/core/llm/openai_provider.py +36 -0
  35. memvcs/core/merge.py +36 -23
  36. memvcs/core/objects.py +39 -19
  37. memvcs/core/pack.py +278 -0
  38. memvcs/core/privacy_budget.py +63 -0
  39. memvcs/core/remote.py +229 -3
  40. memvcs/core/repository.py +82 -2
  41. memvcs/core/temporal_index.py +9 -0
  42. memvcs/core/trust.py +103 -0
  43. memvcs/core/vector_store.py +15 -1
  44. memvcs/core/zk_proofs.py +158 -0
  45. {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/WHEEL +0 -0
  46. {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/entry_points.txt +0 -0
  47. {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/licenses/LICENSE +0 -0
  48. {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/top_level.txt +0 -0
memvcs/commands/verify.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """
2
- agmem verify - Belief consistency checker.
2
+ agmem verify - Belief consistency and cryptographic commit verification.
3
3
 
4
- Scans semantic memories for logical contradictions.
4
+ Scans semantic memories for logical contradictions; optionally verifies commit Merkle/signatures.
5
5
  """
6
6
 
7
7
  import argparse
@@ -12,10 +12,10 @@ from ..core.consistency import ConsistencyChecker, ConsistencyResult
12
12
 
13
13
 
14
14
  class VerifyCommand:
15
- """Verify belief consistency of semantic memories."""
15
+ """Verify belief consistency and/or cryptographic integrity of commits."""
16
16
 
17
17
  name = "verify"
18
- help = "Scan semantic memories for logical contradictions"
18
+ help = "Scan semantic memories for contradictions; optionally verify commit signatures"
19
19
 
20
20
  @staticmethod
21
21
  def add_arguments(parser: argparse.ArgumentParser):
@@ -23,8 +23,17 @@ class VerifyCommand:
23
23
  "--consistency",
24
24
  "-c",
25
25
  action="store_true",
26
- default=True,
27
- help="Check for contradictions (default)",
26
+ help="Check semantic memories for contradictions",
27
+ )
28
+ parser.add_argument(
29
+ "--crypto",
30
+ action="store_true",
31
+ help="Verify Merkle tree and signatures for commits",
32
+ )
33
+ parser.add_argument(
34
+ "--ref",
35
+ metavar="REF",
36
+ help="Commit or ref to verify (with --crypto); default HEAD",
28
37
  )
29
38
  parser.add_argument(
30
39
  "--llm",
@@ -32,28 +41,70 @@ class VerifyCommand:
32
41
  help="Use LLM for triple extraction (requires OpenAI)",
33
42
  )
34
43
 
44
+ @staticmethod
45
+ def _run_crypto_verify(repo, ref: str = None) -> int:
46
+ """Run cryptographic verification. Returns 0 if all OK, 1 on failure."""
47
+ from ..core.crypto_verify import verify_commit, load_public_key
48
+
49
+ if ref:
50
+ commit_hash = repo.resolve_ref(ref)
51
+ if not commit_hash:
52
+ print(f"Ref not found: {ref}")
53
+ return 1
54
+ else:
55
+ head = repo.refs.get_head()
56
+ if head["type"] == "branch":
57
+ commit_hash = repo.refs.get_branch_commit(head["value"])
58
+ else:
59
+ commit_hash = head.get("value")
60
+ if not commit_hash:
61
+ print("No commit to verify (empty repo).")
62
+ return 0
63
+ pub = load_public_key(repo.mem_dir)
64
+ ok, err = verify_commit(
65
+ repo.object_store, commit_hash, public_key_pem=pub, mem_dir=repo.mem_dir
66
+ )
67
+ if ok:
68
+ print(f"Commit {commit_hash[:8]} verified (Merkle + signature OK).")
69
+ return 0
70
+ print(f"Commit {commit_hash[:8]} verification failed: {err}")
71
+ return 1
72
+
35
73
  @staticmethod
36
74
  def execute(args) -> int:
37
75
  repo, code = require_repo()
38
76
  if code != 0:
39
77
  return code
40
78
 
41
- checker = ConsistencyChecker(repo, llm_provider="openai" if args.llm else None)
42
- result = checker.check(use_llm=args.llm)
79
+ run_consistency = args.consistency
80
+ run_crypto = args.crypto
81
+ if not run_consistency and not run_crypto:
82
+ run_consistency = True
43
83
 
44
- print(f"Checked {result.files_checked} semantic file(s)")
45
- if result.valid:
46
- print("No contradictions found.")
47
- return 0
84
+ exit_code = 0
48
85
 
49
- print(f"\nFound {len(result.contradictions)} contradiction(s):")
50
- for i, c in enumerate(result.contradictions, 1):
51
- print(f"\n[{i}] {c.reason}")
52
- print(
53
- f" {c.triple1.source}:{c.triple1.line}: {c.triple1.subject} {c.triple1.predicate} {c.triple1.obj}"
54
- )
55
- print(
56
- f" {c.triple2.source}:{c.triple2.line}: {c.triple2.subject} {c.triple2.predicate} {c.triple2.obj}"
57
- )
58
- print("\nUse 'agmem repair --strategy confidence' to attempt auto-fix.")
59
- return 1
86
+ if run_crypto:
87
+ if VerifyCommand._run_crypto_verify(repo, args.ref) != 0:
88
+ exit_code = 1
89
+
90
+ if run_consistency:
91
+ checker = ConsistencyChecker(repo, llm_provider="openai" if args.llm else None)
92
+ result = checker.check(use_llm=args.llm)
93
+
94
+ print(f"Checked {result.files_checked} semantic file(s)")
95
+ if result.valid:
96
+ print("No contradictions found.")
97
+ else:
98
+ exit_code = 1
99
+ print(f"\nFound {len(result.contradictions)} contradiction(s):")
100
+ for i, c in enumerate(result.contradictions, 1):
101
+ print(f"\n[{i}] {c.reason}")
102
+ print(
103
+ f" {c.triple1.source}:{c.triple1.line}: {c.triple1.subject} {c.triple1.predicate} {c.triple1.obj}"
104
+ )
105
+ print(
106
+ f" {c.triple2.source}:{c.triple2.line}: {c.triple2.subject} {c.triple2.predicate} {c.triple2.obj}"
107
+ )
108
+ print("\nUse 'agmem repair --strategy confidence' to attempt auto-fix.")
109
+
110
+ return exit_code
memvcs/commands/when.py CHANGED
@@ -34,6 +34,18 @@ class WhenCommand:
34
34
  default=10,
35
35
  help="Max commits to report (default: 10)",
36
36
  )
37
+ parser.add_argument(
38
+ "--from",
39
+ dest="from_ts",
40
+ metavar="ISO",
41
+ help="Start of time range (ISO 8601)",
42
+ )
43
+ parser.add_argument(
44
+ "--to",
45
+ dest="to_ts",
46
+ metavar="ISO",
47
+ help="End of time range (ISO 8601)",
48
+ )
37
49
 
38
50
  @staticmethod
39
51
  def execute(args) -> int:
@@ -48,6 +60,17 @@ class WhenCommand:
48
60
 
49
61
  fact_lower = args.fact.lower()
50
62
  file_filter = args.file.replace("current/", "").lstrip("/") if args.file else None
63
+ from_ts = getattr(args, "from_ts", None)
64
+ to_ts = getattr(args, "to_ts", None)
65
+ commits_in_range = None
66
+ if from_ts and to_ts:
67
+ try:
68
+ from ..core.temporal_index import TemporalIndex
69
+ ti = TemporalIndex(repo.mem_dir, repo.object_store)
70
+ range_entries = ti.range_query(from_ts, to_ts)
71
+ commits_in_range = {ch for _, ch in range_entries}
72
+ except Exception:
73
+ pass
51
74
 
52
75
  # Walk commit history from HEAD
53
76
  head = repo.refs.get_head()
@@ -63,6 +86,10 @@ class WhenCommand:
63
86
  if commit_hash in seen:
64
87
  break
65
88
  seen.add(commit_hash)
89
+ if commits_in_range is not None and commit_hash not in commits_in_range:
90
+ commit = Commit.load(repo.object_store, commit_hash)
91
+ commit_hash = commit.parents[0] if commit and commit.parents else None
92
+ continue
66
93
 
67
94
  commit = Commit.load(repo.object_store, commit_hash)
68
95
  if not commit:
memvcs/core/audit.py ADDED
@@ -0,0 +1,124 @@
1
+ """
2
+ Tamper-evident audit trail for agmem.
3
+
4
+ Append-only, hash-chained log of significant operations.
5
+ """
6
+
7
+ import datetime
8
+ import hashlib
9
+ import hmac
10
+ import json
11
+ import os
12
+ from pathlib import Path
13
+ from typing import Optional, List, Dict, Any, Tuple
14
+
15
+
16
+ def _audit_dir(mem_dir: Path) -> Path:
17
+ return mem_dir / "audit"
18
+
19
+
20
+ def _log_path(mem_dir: Path) -> Path:
21
+ return _audit_dir(mem_dir) / "log"
22
+
23
+
24
+ def _get_previous_hash(mem_dir: Path) -> str:
25
+ """Read last line of audit log and return its entry hash, or empty for first entry."""
26
+ path = _log_path(mem_dir)
27
+ if not path.exists():
28
+ return ""
29
+ lines = path.read_text().strip().split("\n")
30
+ if not lines:
31
+ return ""
32
+ # Format per line: entry_hash\tpayload_json
33
+ for line in reversed(lines):
34
+ line = line.strip()
35
+ if not line:
36
+ continue
37
+ if "\t" in line:
38
+ return line.split("\t", 1)[0]
39
+ return ""
40
+ return ""
41
+
42
+
43
+ def _hash_entry(prev_hash: str, payload: str) -> str:
44
+ """Compute this entry's hash: SHA-256(prev_hash + payload)."""
45
+ return hashlib.sha256((prev_hash + payload).encode()).hexdigest()
46
+
47
+
48
+ def append_audit(
49
+ mem_dir: Path,
50
+ operation: str,
51
+ details: Optional[Dict[str, Any]] = None,
52
+ ) -> None:
53
+ """
54
+ Append a tamper-evident audit entry. Write synchronously.
55
+ Each entry: entry_hash TAB payload_json (payload has timestamp, operation, details, prev_hash).
56
+ """
57
+ mem_dir = Path(mem_dir)
58
+ _audit_dir(mem_dir).mkdir(parents=True, exist_ok=True)
59
+ path = _log_path(mem_dir)
60
+ prev_hash = _get_previous_hash(mem_dir)
61
+ payload = {
62
+ "timestamp": datetime.datetime.utcnow().isoformat() + "Z",
63
+ "operation": operation,
64
+ "details": details or {},
65
+ "prev_hash": prev_hash,
66
+ }
67
+ payload_str = json.dumps(payload, sort_keys=True)
68
+ entry_hash = _hash_entry(prev_hash, payload_str)
69
+ line = f"{entry_hash}\t{payload_str}\n"
70
+ with open(path, "a", encoding="utf-8") as f:
71
+ f.write(line)
72
+ f.flush()
73
+ try:
74
+ os.fsync(f.fileno())
75
+ except (AttributeError, OSError):
76
+ pass
77
+
78
+
79
+ def read_audit(mem_dir: Path, max_entries: int = 1000) -> List[Dict[str, Any]]:
80
+ """Read audit log entries (newest first). Each entry has entry_hash, prev_hash, timestamp, operation, details."""
81
+ path = _log_path(mem_dir)
82
+ if not path.exists():
83
+ return []
84
+ entries = []
85
+ for line in reversed(path.read_text().strip().split("\n")):
86
+ line = line.strip()
87
+ if not line:
88
+ continue
89
+ if "\t" not in line:
90
+ continue
91
+ entry_hash, payload_str = line.split("\t", 1)
92
+ try:
93
+ payload = json.loads(payload_str)
94
+ except json.JSONDecodeError:
95
+ continue
96
+ payload["entry_hash"] = entry_hash
97
+ entries.append(payload)
98
+ if len(entries) >= max_entries:
99
+ break
100
+ return entries
101
+
102
+
103
+ def verify_audit(mem_dir: Path) -> Tuple[bool, Optional[int]]:
104
+ """
105
+ Verify the audit log chain. Returns (valid, first_bad_index).
106
+ first_bad_index is 0-based index of first entry that fails chain verification.
107
+ """
108
+ path = _log_path(mem_dir)
109
+ if not path.exists():
110
+ return (True, None)
111
+ lines = path.read_text().strip().split("\n")
112
+ prev_hash = ""
113
+ for i, line in enumerate(lines):
114
+ line = line.strip()
115
+ if not line:
116
+ continue
117
+ if "\t" not in line:
118
+ return (False, i)
119
+ entry_hash, payload_str = line.split("\t", 1)
120
+ expected_hash = _hash_entry(prev_hash, payload_str)
121
+ if not hmac.compare_digest(entry_hash, expected_hash):
122
+ return (False, i)
123
+ prev_hash = entry_hash
124
+ return (True, None)
@@ -0,0 +1,157 @@
1
+ """
2
+ Enhanced semantic compression pipeline for agmem (#11).
3
+
4
+ Multi-stage: chunk -> fact extraction -> dedup -> embed -> tiered storage.
5
+ Hybrid retrieval (keyword + vector) is in memvcs.retrieval.strategies.HybridStrategy.
6
+ """
7
+
8
+ import hashlib
9
+ import re
10
+ from pathlib import Path
11
+ from typing import List, Optional, Tuple, Any
12
+
13
+ from .constants import MEMORY_TYPES
14
+
15
+ CHUNK_SIZE_DEFAULT = 512
16
+ CHUNK_OVERLAP = 64
17
+ DEDUP_HASH_ALGO = "sha256"
18
+ TIER_HOT_DAYS = 7
19
+
20
+
21
+ def chunk_by_size(text: str, size: int = CHUNK_SIZE_DEFAULT, overlap: int = CHUNK_OVERLAP) -> List[str]:
22
+ """Split text into chunks by character size with optional overlap."""
23
+ if not text or size <= 0:
24
+ return []
25
+ chunks = []
26
+ start = 0
27
+ while start < len(text):
28
+ end = min(start + size, len(text))
29
+ chunk = text[start:end].strip()
30
+ if chunk:
31
+ chunks.append(chunk)
32
+ start = end - overlap if end < len(text) else len(text)
33
+ return chunks
34
+
35
+
36
+ def chunk_by_sentences(text: str, max_chunk_chars: int = 512) -> List[str]:
37
+ """Split text into chunks by sentence boundaries, up to max_chunk_chars per chunk."""
38
+ if not text:
39
+ return []
40
+ sentences = re.split(r'(?<=[.!?])\s+', text)
41
+ chunks = []
42
+ current = []
43
+ current_len = 0
44
+ for s in sentences:
45
+ s = s.strip()
46
+ if not s:
47
+ continue
48
+ if current_len + len(s) + 1 <= max_chunk_chars:
49
+ current.append(s)
50
+ current_len += len(s) + 1
51
+ else:
52
+ if current:
53
+ chunks.append(" ".join(current))
54
+ current = [s]
55
+ current_len = len(s) + 1
56
+ if current:
57
+ chunks.append(" ".join(current))
58
+ return chunks
59
+
60
+
61
+ def extract_facts_from_chunk(chunk: str) -> List[str]:
62
+ """Extract fact-like lines (bullets or short statements). Reuse distiller logic in callers if needed."""
63
+ facts = []
64
+ for line in chunk.splitlines():
65
+ line = line.strip()
66
+ if not line or line.startswith("#"):
67
+ continue
68
+ if line.startswith("- ") and len(line) > 10:
69
+ facts.append(line)
70
+ elif len(line) > 20 and len(line) < 300 and not line.startswith("```"):
71
+ facts.append(line)
72
+ return facts[:15]
73
+
74
+
75
+ def dedup_by_hash(items: List[str]) -> List[Tuple[str, str]]:
76
+ """Return (item, hash_hex) for unique items by content hash. Order preserved, first occurrence kept."""
77
+ seen_hashes = set()
78
+ result = []
79
+ for item in items:
80
+ h = hashlib.new(DEDUP_HASH_ALGO, item.encode()).hexdigest()
81
+ if h not in seen_hashes:
82
+ seen_hashes.add(h)
83
+ result.append((item, h))
84
+ return result
85
+
86
+
87
+ def dedup_by_similarity_threshold(
88
+ items: List[str], vector_store: Any, threshold: float = 0.95
89
+ ) -> List[str]:
90
+ """Filter items by embedding similarity; keep first of clusters above threshold. Requires vector_store."""
91
+ if not items or vector_store is None:
92
+ return items
93
+ try:
94
+ embeddings = vector_store.embed(items)
95
+ kept = [items[0]]
96
+ for i in range(1, len(items)):
97
+ sims = [vector_store.similarity(embeddings[i], vector_store.embed([kept[j]])[0]) for j in range(len(kept))]
98
+ if not any(s >= threshold for s in sims):
99
+ kept.append(items[i])
100
+ return kept
101
+ except Exception:
102
+ return items
103
+
104
+
105
+ class CompressionPipeline:
106
+ """
107
+ Multi-stage compression: chunk -> optional fact extraction -> dedup -> optional embed -> tiered storage.
108
+ Wire to vector_store and retrieval for hybrid recall.
109
+ """
110
+
111
+ def __init__(
112
+ self,
113
+ chunk_size: int = CHUNK_SIZE_DEFAULT,
114
+ use_sentences: bool = True,
115
+ extract_facts: bool = False,
116
+ dedup_hash: bool = True,
117
+ vector_store: Optional[Any] = None,
118
+ tier_by_recency: bool = True,
119
+ ):
120
+ self.chunk_size = chunk_size
121
+ self.use_sentences = use_sentences
122
+ self.extract_facts = extract_facts
123
+ self.dedup_hash = dedup_hash
124
+ self.vector_store = vector_store
125
+ self.tier_by_recency = tier_by_recency
126
+
127
+ def chunk(self, text: str) -> List[str]:
128
+ """Chunk text by size or sentences."""
129
+ if self.use_sentences:
130
+ return chunk_by_sentences(text, max_chunk_chars=self.chunk_size)
131
+ return chunk_by_size(text, size=self.chunk_size)
132
+
133
+ def run(self, text: str, path: Optional[Path] = None) -> List[Tuple[str, str, Optional[str]]]:
134
+ """
135
+ Run pipeline: chunk -> optional fact extraction -> dedup.
136
+ Returns list of (content, content_hash, tier) where tier is "hot" or "cold" or None.
137
+ """
138
+ chunks = self.chunk(text)
139
+ if self.extract_facts:
140
+ facts = []
141
+ for c in chunks:
142
+ facts.extend(extract_facts_from_chunk(c))
143
+ chunks = facts if facts else chunks
144
+ if self.dedup_hash:
145
+ chunk_tuples = dedup_by_hash(chunks)
146
+ else:
147
+ chunk_tuples = [(c, hashlib.new(DEDUP_HASH_ALGO, c.encode()).hexdigest()) for c in chunks]
148
+ tier = None
149
+ if self.tier_by_recency and path and path.exists():
150
+ try:
151
+ mtime = path.stat().st_mtime
152
+ from datetime import datetime, timezone
153
+ age_days = (datetime.now(timezone.utc).timestamp() - mtime) / 86400
154
+ tier = "hot" if age_days <= TIER_HOT_DAYS else "cold"
155
+ except Exception:
156
+ pass
157
+ return [(c, h, tier) for c, h in chunk_tuples]
@@ -100,23 +100,23 @@ class ConsistencyChecker:
100
100
  return triples
101
101
 
102
102
  def _extract_triples_llm(self, content: str, source: str) -> List[Triple]:
103
- """Extract triples using LLM."""
103
+ """Extract triples using LLM (multi-provider)."""
104
104
  try:
105
- import openai
105
+ from .llm import get_provider
106
106
 
107
- response = openai.chat.completions.create(
108
- model="gpt-3.5-turbo",
109
- messages=[
107
+ provider = get_provider(provider_name=self.llm_provider)
108
+ if not provider:
109
+ return []
110
+ text = provider.complete(
111
+ [
110
112
  {
111
113
  "role": "system",
112
- "content": "Extract factual statements as (subject, predicate, object) triples. "
113
- "One per line, format: SUBJECT | PREDICATE | OBJECT",
114
+ "content": "Extract factual statements as (subject, predicate, object) triples. One per line, format: SUBJECT | PREDICATE | OBJECT",
114
115
  },
115
116
  {"role": "user", "content": content[:3000]},
116
117
  ],
117
118
  max_tokens=500,
118
119
  )
119
- text = response.choices[0].message.content
120
120
  triples = []
121
121
  for i, line in enumerate(text.splitlines(), 1):
122
122
  if "|" in line:
@@ -138,7 +138,7 @@ class ConsistencyChecker:
138
138
 
139
139
  def extract_triples(self, content: str, source: str, use_llm: bool = False) -> List[Triple]:
140
140
  """Extract triples from content."""
141
- if use_llm and self.llm_provider == "openai":
141
+ if use_llm and self.llm_provider:
142
142
  t = self._extract_triples_llm(content, source)
143
143
  if t:
144
144
  return t