agmem 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/METADATA +157 -16
- agmem-0.1.3.dist-info/RECORD +105 -0
- memvcs/__init__.py +1 -1
- memvcs/cli.py +45 -31
- memvcs/commands/__init__.py +9 -9
- memvcs/commands/add.py +83 -76
- memvcs/commands/audit.py +59 -0
- memvcs/commands/blame.py +46 -53
- memvcs/commands/branch.py +13 -33
- memvcs/commands/checkout.py +27 -32
- memvcs/commands/clean.py +18 -23
- memvcs/commands/clone.py +11 -1
- memvcs/commands/commit.py +40 -39
- memvcs/commands/daemon.py +109 -76
- memvcs/commands/decay.py +77 -0
- memvcs/commands/diff.py +56 -57
- memvcs/commands/distill.py +90 -0
- memvcs/commands/federated.py +53 -0
- memvcs/commands/fsck.py +86 -61
- memvcs/commands/garden.py +40 -35
- memvcs/commands/gc.py +51 -0
- memvcs/commands/graph.py +41 -48
- memvcs/commands/init.py +16 -24
- memvcs/commands/log.py +25 -40
- memvcs/commands/merge.py +69 -27
- memvcs/commands/pack.py +129 -0
- memvcs/commands/prove.py +66 -0
- memvcs/commands/pull.py +31 -1
- memvcs/commands/push.py +4 -2
- memvcs/commands/recall.py +145 -0
- memvcs/commands/reflog.py +13 -22
- memvcs/commands/remote.py +1 -0
- memvcs/commands/repair.py +66 -0
- memvcs/commands/reset.py +23 -33
- memvcs/commands/resolve.py +130 -0
- memvcs/commands/resurrect.py +82 -0
- memvcs/commands/search.py +3 -4
- memvcs/commands/serve.py +2 -1
- memvcs/commands/show.py +66 -36
- memvcs/commands/stash.py +34 -34
- memvcs/commands/status.py +27 -35
- memvcs/commands/tag.py +23 -47
- memvcs/commands/test.py +30 -44
- memvcs/commands/timeline.py +111 -0
- memvcs/commands/tree.py +26 -27
- memvcs/commands/verify.py +110 -0
- memvcs/commands/when.py +115 -0
- memvcs/core/access_index.py +167 -0
- memvcs/core/audit.py +124 -0
- memvcs/core/config_loader.py +3 -1
- memvcs/core/consistency.py +214 -0
- memvcs/core/crypto_verify.py +280 -0
- memvcs/core/decay.py +185 -0
- memvcs/core/diff.py +158 -143
- memvcs/core/distiller.py +277 -0
- memvcs/core/encryption.py +169 -0
- memvcs/core/federated.py +86 -0
- memvcs/core/gardener.py +176 -145
- memvcs/core/hooks.py +48 -14
- memvcs/core/ipfs_remote.py +39 -0
- memvcs/core/knowledge_graph.py +135 -138
- memvcs/core/llm/__init__.py +10 -0
- memvcs/core/llm/anthropic_provider.py +50 -0
- memvcs/core/llm/base.py +27 -0
- memvcs/core/llm/factory.py +30 -0
- memvcs/core/llm/openai_provider.py +36 -0
- memvcs/core/merge.py +260 -170
- memvcs/core/objects.py +110 -101
- memvcs/core/pack.py +92 -0
- memvcs/core/pii_scanner.py +147 -146
- memvcs/core/privacy_budget.py +63 -0
- memvcs/core/refs.py +132 -115
- memvcs/core/remote.py +38 -0
- memvcs/core/repository.py +254 -164
- memvcs/core/schema.py +155 -113
- memvcs/core/staging.py +60 -65
- memvcs/core/storage/__init__.py +20 -18
- memvcs/core/storage/base.py +74 -70
- memvcs/core/storage/gcs.py +70 -68
- memvcs/core/storage/local.py +42 -40
- memvcs/core/storage/s3.py +105 -110
- memvcs/core/temporal_index.py +121 -0
- memvcs/core/test_runner.py +101 -93
- memvcs/core/trust.py +103 -0
- memvcs/core/vector_store.py +56 -36
- memvcs/core/zk_proofs.py +26 -0
- memvcs/integrations/mcp_server.py +1 -3
- memvcs/integrations/web_ui/server.py +25 -26
- memvcs/retrieval/__init__.py +22 -0
- memvcs/retrieval/base.py +54 -0
- memvcs/retrieval/pack.py +128 -0
- memvcs/retrieval/recaller.py +105 -0
- memvcs/retrieval/strategies.py +314 -0
- memvcs/utils/__init__.py +3 -3
- memvcs/utils/helpers.py +52 -52
- agmem-0.1.1.dist-info/RECORD +0 -67
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/WHEEL +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Access index for agmem - tracks recall access patterns for importance weighting and decay.
|
|
3
|
+
|
|
4
|
+
Stores access log and recall cache in .mem/index.json.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import hashlib
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import List, Dict, Any, Optional
|
|
12
|
+
|
|
13
|
+
# Maximum entries in access log before compaction
|
|
14
|
+
ACCESS_LOG_MAX = 10_000
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AccessIndex:
|
|
18
|
+
"""Tracks access patterns for recall, importance, and decay."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, mem_dir: Path):
|
|
21
|
+
self.mem_dir = Path(mem_dir)
|
|
22
|
+
self.index_path = self.mem_dir / "index.json"
|
|
23
|
+
self._data: Optional[Dict[str, Any]] = None
|
|
24
|
+
|
|
25
|
+
def _load(self) -> Dict[str, Any]:
|
|
26
|
+
"""Load index from disk."""
|
|
27
|
+
if self._data is not None:
|
|
28
|
+
return self._data
|
|
29
|
+
if self.index_path.exists():
|
|
30
|
+
try:
|
|
31
|
+
self._data = json.loads(self.index_path.read_text(encoding="utf-8"))
|
|
32
|
+
except (json.JSONDecodeError, OSError):
|
|
33
|
+
self._data = self._default_structure()
|
|
34
|
+
else:
|
|
35
|
+
self._data = self._default_structure()
|
|
36
|
+
return self._data
|
|
37
|
+
|
|
38
|
+
def _default_structure(self) -> Dict[str, Any]:
|
|
39
|
+
"""Return default index structure."""
|
|
40
|
+
return {
|
|
41
|
+
"version": 1,
|
|
42
|
+
"access_log": [],
|
|
43
|
+
"recall_cache": {},
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
def _save(self, data: Optional[Dict[str, Any]] = None) -> None:
|
|
47
|
+
"""Save index to disk."""
|
|
48
|
+
data = data or self._data
|
|
49
|
+
if data is None:
|
|
50
|
+
return
|
|
51
|
+
self.mem_dir.mkdir(parents=True, exist_ok=True)
|
|
52
|
+
self.index_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
|
53
|
+
self._data = data
|
|
54
|
+
|
|
55
|
+
def record_access(self, path: str, commit: str, timestamp: Optional[str] = None) -> None:
|
|
56
|
+
"""
|
|
57
|
+
Record that a memory file was accessed (e.g., during recall).
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
path: File path relative to current/
|
|
61
|
+
commit: Commit hash at time of access
|
|
62
|
+
timestamp: ISO 8601 timestamp (default: now)
|
|
63
|
+
"""
|
|
64
|
+
data = self._load()
|
|
65
|
+
ts = timestamp or datetime.utcnow().isoformat() + "Z"
|
|
66
|
+
data["access_log"].append({"path": path, "commit": commit, "timestamp": ts})
|
|
67
|
+
self._trim_access_log_if_needed(data)
|
|
68
|
+
self._save()
|
|
69
|
+
|
|
70
|
+
def _trim_access_log_if_needed(self, data: Dict[str, Any]) -> None:
|
|
71
|
+
if len(data.get("access_log", [])) > ACCESS_LOG_MAX:
|
|
72
|
+
data["access_log"] = data["access_log"][-ACCESS_LOG_MAX:]
|
|
73
|
+
|
|
74
|
+
def get_access_count(self, path: Optional[str] = None, commit: Optional[str] = None) -> int:
|
|
75
|
+
"""
|
|
76
|
+
Get access count for a path and/or commit.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
path: Filter by path (None = any)
|
|
80
|
+
commit: Filter by commit (None = any)
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Number of matching access entries
|
|
84
|
+
"""
|
|
85
|
+
data = self._load()
|
|
86
|
+
entries = data.get("access_log", [])
|
|
87
|
+
count = 0
|
|
88
|
+
for entry in entries:
|
|
89
|
+
if path is not None and entry.get("path") != path:
|
|
90
|
+
continue
|
|
91
|
+
if commit is not None and entry.get("commit") != commit:
|
|
92
|
+
continue
|
|
93
|
+
count += 1
|
|
94
|
+
return count
|
|
95
|
+
|
|
96
|
+
def get_recent_accesses(
|
|
97
|
+
self,
|
|
98
|
+
limit: int = 100,
|
|
99
|
+
path: Optional[str] = None,
|
|
100
|
+
) -> List[Dict[str, Any]]:
|
|
101
|
+
"""
|
|
102
|
+
Get most recent access entries.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
limit: Max entries to return
|
|
106
|
+
path: Filter by path (None = any)
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
List of access entries (newest first)
|
|
110
|
+
"""
|
|
111
|
+
data = self._load()
|
|
112
|
+
entries = data.get("access_log", [])
|
|
113
|
+
if path is not None:
|
|
114
|
+
entries = [e for e in entries if e.get("path") == path]
|
|
115
|
+
return list(reversed(entries[-limit:]))
|
|
116
|
+
|
|
117
|
+
def get_access_counts_by_path(self) -> Dict[str, int]:
|
|
118
|
+
"""Aggregate access counts per path (for importance weighting)."""
|
|
119
|
+
data = self._load()
|
|
120
|
+
counts: Dict[str, int] = {}
|
|
121
|
+
for entry in data.get("access_log", []):
|
|
122
|
+
p = entry.get("path", "")
|
|
123
|
+
counts[p] = counts.get(p, 0) + 1
|
|
124
|
+
return counts
|
|
125
|
+
|
|
126
|
+
def get_cache_key(self, context: str, strategy: str, limit: int, exclude: List[str]) -> str:
|
|
127
|
+
"""Compute cache key for recall results."""
|
|
128
|
+
payload = f"{context}|{strategy}|{limit}|{','.join(sorted(exclude))}"
|
|
129
|
+
return hashlib.sha256(payload.encode()).hexdigest()
|
|
130
|
+
|
|
131
|
+
def get_cached_recall(
|
|
132
|
+
self,
|
|
133
|
+
context: str,
|
|
134
|
+
strategy: str,
|
|
135
|
+
limit: int,
|
|
136
|
+
exclude: List[str],
|
|
137
|
+
) -> Optional[Dict[str, Any]]:
|
|
138
|
+
"""Get cached recall results if available."""
|
|
139
|
+
key = self.get_cache_key(context, strategy, limit, exclude)
|
|
140
|
+
data = self._load()
|
|
141
|
+
cache = data.get("recall_cache", {})
|
|
142
|
+
return cache.get(key)
|
|
143
|
+
|
|
144
|
+
def set_cached_recall(
|
|
145
|
+
self,
|
|
146
|
+
context: str,
|
|
147
|
+
strategy: str,
|
|
148
|
+
limit: int,
|
|
149
|
+
exclude: List[str],
|
|
150
|
+
results: List[Dict[str, Any]],
|
|
151
|
+
) -> None:
|
|
152
|
+
"""Cache recall results."""
|
|
153
|
+
key = self.get_cache_key(context, strategy, limit, exclude)
|
|
154
|
+
data = self._load()
|
|
155
|
+
if "recall_cache" not in data:
|
|
156
|
+
data["recall_cache"] = {}
|
|
157
|
+
data["recall_cache"][key] = {
|
|
158
|
+
"results": results,
|
|
159
|
+
"cached_at": datetime.utcnow().isoformat() + "Z",
|
|
160
|
+
}
|
|
161
|
+
# Limit cache size
|
|
162
|
+
cache = data["recall_cache"]
|
|
163
|
+
if len(cache) > 100:
|
|
164
|
+
oldest = sorted(cache.items(), key=lambda x: x[1].get("cached_at", ""))[:50]
|
|
165
|
+
for k, _ in oldest:
|
|
166
|
+
del cache[k]
|
|
167
|
+
self._save()
|
memvcs/core/audit.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tamper-evident audit trail for agmem.
|
|
3
|
+
|
|
4
|
+
Append-only, hash-chained log of significant operations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import datetime
|
|
8
|
+
import hashlib
|
|
9
|
+
import hmac
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Optional, List, Dict, Any, Tuple
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _audit_dir(mem_dir: Path) -> Path:
|
|
17
|
+
return mem_dir / "audit"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _log_path(mem_dir: Path) -> Path:
|
|
21
|
+
return _audit_dir(mem_dir) / "log"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _get_previous_hash(mem_dir: Path) -> str:
|
|
25
|
+
"""Read last line of audit log and return its entry hash, or empty for first entry."""
|
|
26
|
+
path = _log_path(mem_dir)
|
|
27
|
+
if not path.exists():
|
|
28
|
+
return ""
|
|
29
|
+
lines = path.read_text().strip().split("\n")
|
|
30
|
+
if not lines:
|
|
31
|
+
return ""
|
|
32
|
+
# Format per line: entry_hash\tpayload_json
|
|
33
|
+
for line in reversed(lines):
|
|
34
|
+
line = line.strip()
|
|
35
|
+
if not line:
|
|
36
|
+
continue
|
|
37
|
+
if "\t" in line:
|
|
38
|
+
return line.split("\t", 1)[0]
|
|
39
|
+
return ""
|
|
40
|
+
return ""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _hash_entry(prev_hash: str, payload: str) -> str:
|
|
44
|
+
"""Compute this entry's hash: SHA-256(prev_hash + payload)."""
|
|
45
|
+
return hashlib.sha256((prev_hash + payload).encode()).hexdigest()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def append_audit(
|
|
49
|
+
mem_dir: Path,
|
|
50
|
+
operation: str,
|
|
51
|
+
details: Optional[Dict[str, Any]] = None,
|
|
52
|
+
) -> None:
|
|
53
|
+
"""
|
|
54
|
+
Append a tamper-evident audit entry. Write synchronously.
|
|
55
|
+
Each entry: entry_hash TAB payload_json (payload has timestamp, operation, details, prev_hash).
|
|
56
|
+
"""
|
|
57
|
+
mem_dir = Path(mem_dir)
|
|
58
|
+
_audit_dir(mem_dir).mkdir(parents=True, exist_ok=True)
|
|
59
|
+
path = _log_path(mem_dir)
|
|
60
|
+
prev_hash = _get_previous_hash(mem_dir)
|
|
61
|
+
payload = {
|
|
62
|
+
"timestamp": datetime.datetime.utcnow().isoformat() + "Z",
|
|
63
|
+
"operation": operation,
|
|
64
|
+
"details": details or {},
|
|
65
|
+
"prev_hash": prev_hash,
|
|
66
|
+
}
|
|
67
|
+
payload_str = json.dumps(payload, sort_keys=True)
|
|
68
|
+
entry_hash = _hash_entry(prev_hash, payload_str)
|
|
69
|
+
line = f"{entry_hash}\t{payload_str}\n"
|
|
70
|
+
with open(path, "a", encoding="utf-8") as f:
|
|
71
|
+
f.write(line)
|
|
72
|
+
f.flush()
|
|
73
|
+
try:
|
|
74
|
+
os.fsync(f.fileno())
|
|
75
|
+
except (AttributeError, OSError):
|
|
76
|
+
pass
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def read_audit(mem_dir: Path, max_entries: int = 1000) -> List[Dict[str, Any]]:
|
|
80
|
+
"""Read audit log entries (newest first). Each entry has entry_hash, prev_hash, timestamp, operation, details."""
|
|
81
|
+
path = _log_path(mem_dir)
|
|
82
|
+
if not path.exists():
|
|
83
|
+
return []
|
|
84
|
+
entries = []
|
|
85
|
+
for line in reversed(path.read_text().strip().split("\n")):
|
|
86
|
+
line = line.strip()
|
|
87
|
+
if not line:
|
|
88
|
+
continue
|
|
89
|
+
if "\t" not in line:
|
|
90
|
+
continue
|
|
91
|
+
entry_hash, payload_str = line.split("\t", 1)
|
|
92
|
+
try:
|
|
93
|
+
payload = json.loads(payload_str)
|
|
94
|
+
except json.JSONDecodeError:
|
|
95
|
+
continue
|
|
96
|
+
payload["entry_hash"] = entry_hash
|
|
97
|
+
entries.append(payload)
|
|
98
|
+
if len(entries) >= max_entries:
|
|
99
|
+
break
|
|
100
|
+
return entries
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def verify_audit(mem_dir: Path) -> Tuple[bool, Optional[int]]:
|
|
104
|
+
"""
|
|
105
|
+
Verify the audit log chain. Returns (valid, first_bad_index).
|
|
106
|
+
first_bad_index is 0-based index of first entry that fails chain verification.
|
|
107
|
+
"""
|
|
108
|
+
path = _log_path(mem_dir)
|
|
109
|
+
if not path.exists():
|
|
110
|
+
return (True, None)
|
|
111
|
+
lines = path.read_text().strip().split("\n")
|
|
112
|
+
prev_hash = ""
|
|
113
|
+
for i, line in enumerate(lines):
|
|
114
|
+
line = line.strip()
|
|
115
|
+
if not line:
|
|
116
|
+
continue
|
|
117
|
+
if "\t" not in line:
|
|
118
|
+
return (False, i)
|
|
119
|
+
entry_hash, payload_str = line.split("\t", 1)
|
|
120
|
+
expected_hash = _hash_entry(prev_hash, payload_str)
|
|
121
|
+
if not hmac.compare_digest(entry_hash, expected_hash):
|
|
122
|
+
return (False, i)
|
|
123
|
+
prev_hash = entry_hash
|
|
124
|
+
return (True, None)
|
memvcs/core/config_loader.py
CHANGED
|
@@ -13,6 +13,7 @@ from typing import Any, Dict, List, Optional
|
|
|
13
13
|
|
|
14
14
|
try:
|
|
15
15
|
import yaml
|
|
16
|
+
|
|
16
17
|
YAML_AVAILABLE = True
|
|
17
18
|
except ImportError:
|
|
18
19
|
YAML_AVAILABLE = False
|
|
@@ -116,7 +117,8 @@ def _apply_gcs_credentials_path(config: Dict[str, Any], repo_root: Optional[Path
|
|
|
116
117
|
config[CONFIG_CLOUD][CONFIG_CLOUD_GCS]["credentials_path"] = resolved
|
|
117
118
|
else:
|
|
118
119
|
config[CONFIG_CLOUD][CONFIG_CLOUD_GCS] = {
|
|
119
|
-
k: v
|
|
120
|
+
k: v
|
|
121
|
+
for k, v in config[CONFIG_CLOUD][CONFIG_CLOUD_GCS].items()
|
|
120
122
|
if k != "credentials_path"
|
|
121
123
|
}
|
|
122
124
|
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Consistency checker - belief consistency for agmem semantic memories.
|
|
3
|
+
|
|
4
|
+
Extracts (subject, predicate, object) triples and detects logical contradictions.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import List, Dict, Any, Optional, Tuple
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
|
|
12
|
+
from .constants import MEMORY_TYPES
|
|
13
|
+
from .schema import FrontmatterParser
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class Triple:
|
|
18
|
+
"""A (subject, predicate, object) triple."""
|
|
19
|
+
|
|
20
|
+
subject: str
|
|
21
|
+
predicate: str
|
|
22
|
+
obj: str
|
|
23
|
+
confidence: float
|
|
24
|
+
source: str
|
|
25
|
+
line: int
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class Contradiction:
|
|
30
|
+
"""A detected contradiction."""
|
|
31
|
+
|
|
32
|
+
triple1: Triple
|
|
33
|
+
triple2: Triple
|
|
34
|
+
reason: str
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class ConsistencyResult:
|
|
39
|
+
"""Result of consistency check."""
|
|
40
|
+
|
|
41
|
+
valid: bool
|
|
42
|
+
contradictions: List[Contradiction] = field(default_factory=list)
|
|
43
|
+
triples: List[Triple] = field(default_factory=list)
|
|
44
|
+
files_checked: int = 0
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# Inverse predicate pairs (A likes B vs B disliked by A)
|
|
48
|
+
INVERSE_PREDICATES = [
|
|
49
|
+
("likes", "dislikes"),
|
|
50
|
+
("prefers", "avoids"),
|
|
51
|
+
("uses", "avoids"),
|
|
52
|
+
("enables", "disables"),
|
|
53
|
+
("true", "false"),
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class ConsistencyChecker:
|
|
58
|
+
"""Detects logical contradictions in semantic memories."""
|
|
59
|
+
|
|
60
|
+
def __init__(self, repo: Any, llm_provider: Optional[str] = None):
|
|
61
|
+
self.repo = repo
|
|
62
|
+
self.llm_provider = llm_provider
|
|
63
|
+
self.current_dir = repo.root / "current"
|
|
64
|
+
|
|
65
|
+
def _extract_triples_simple(self, content: str, source: str) -> List[Triple]:
|
|
66
|
+
"""Simple heuristic extraction of triples from text."""
|
|
67
|
+
triples = []
|
|
68
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
69
|
+
line = line.strip()
|
|
70
|
+
if not line or line.startswith("#") or line.startswith("---"):
|
|
71
|
+
continue
|
|
72
|
+
# Pattern: "user prefers X", "user likes Y", "X uses Y"
|
|
73
|
+
m = re.search(r"(user|agent)\s+(prefers|likes|uses|avoids|dislikes)\s+(.+)", line, re.I)
|
|
74
|
+
if m:
|
|
75
|
+
subj, pred, obj = m.group(1), m.group(2), m.group(3).strip()
|
|
76
|
+
triples.append(
|
|
77
|
+
Triple(
|
|
78
|
+
subject=subj.lower(),
|
|
79
|
+
predicate=pred.lower(),
|
|
80
|
+
obj=obj[:100],
|
|
81
|
+
confidence=0.6,
|
|
82
|
+
source=source,
|
|
83
|
+
line=i,
|
|
84
|
+
)
|
|
85
|
+
)
|
|
86
|
+
# Pattern: "X is Y"
|
|
87
|
+
m = re.search(r"^(.+?)\s+is\s+(.+?)(?:\.|$)", line)
|
|
88
|
+
if m:
|
|
89
|
+
subj, obj = m.group(1).strip(), m.group(2).strip()
|
|
90
|
+
triples.append(
|
|
91
|
+
Triple(
|
|
92
|
+
subject=subj[:50],
|
|
93
|
+
predicate="is",
|
|
94
|
+
obj=obj[:100],
|
|
95
|
+
confidence=0.5,
|
|
96
|
+
source=source,
|
|
97
|
+
line=i,
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
return triples
|
|
101
|
+
|
|
102
|
+
def _extract_triples_llm(self, content: str, source: str) -> List[Triple]:
|
|
103
|
+
"""Extract triples using LLM (multi-provider)."""
|
|
104
|
+
try:
|
|
105
|
+
from .llm import get_provider
|
|
106
|
+
|
|
107
|
+
provider = get_provider(provider_name=self.llm_provider)
|
|
108
|
+
if not provider:
|
|
109
|
+
return []
|
|
110
|
+
text = provider.complete(
|
|
111
|
+
[
|
|
112
|
+
{
|
|
113
|
+
"role": "system",
|
|
114
|
+
"content": "Extract factual statements as (subject, predicate, object) triples. One per line, format: SUBJECT | PREDICATE | OBJECT",
|
|
115
|
+
},
|
|
116
|
+
{"role": "user", "content": content[:3000]},
|
|
117
|
+
],
|
|
118
|
+
max_tokens=500,
|
|
119
|
+
)
|
|
120
|
+
triples = []
|
|
121
|
+
for i, line in enumerate(text.splitlines(), 1):
|
|
122
|
+
if "|" in line:
|
|
123
|
+
parts = [p.strip() for p in line.split("|", 2)]
|
|
124
|
+
if len(parts) >= 3:
|
|
125
|
+
triples.append(
|
|
126
|
+
Triple(
|
|
127
|
+
subject=parts[0][:50],
|
|
128
|
+
predicate=parts[1][:30],
|
|
129
|
+
obj=parts[2][:100],
|
|
130
|
+
confidence=0.8,
|
|
131
|
+
source=source,
|
|
132
|
+
line=i,
|
|
133
|
+
)
|
|
134
|
+
)
|
|
135
|
+
return triples
|
|
136
|
+
except Exception:
|
|
137
|
+
return []
|
|
138
|
+
|
|
139
|
+
def extract_triples(self, content: str, source: str, use_llm: bool = False) -> List[Triple]:
|
|
140
|
+
"""Extract triples from content."""
|
|
141
|
+
if use_llm and self.llm_provider:
|
|
142
|
+
t = self._extract_triples_llm(content, source)
|
|
143
|
+
if t:
|
|
144
|
+
return t
|
|
145
|
+
return self._extract_triples_simple(content, source)
|
|
146
|
+
|
|
147
|
+
def _are_inverse(self, pred1: str, pred2: str) -> bool:
|
|
148
|
+
"""Check if predicates are inverses."""
|
|
149
|
+
for a, b in INVERSE_PREDICATES:
|
|
150
|
+
if (pred1 == a and pred2 == b) or (pred1 == b and pred2 == a):
|
|
151
|
+
return True
|
|
152
|
+
return False
|
|
153
|
+
|
|
154
|
+
def _same_subject_object(self, t1: Triple, t2: Triple) -> bool:
|
|
155
|
+
"""Check if triples refer to same subject and object."""
|
|
156
|
+
s1, o1 = t1.subject.lower(), t1.obj.lower()
|
|
157
|
+
s2, o2 = t2.subject.lower(), t2.obj.lower()
|
|
158
|
+
return (s1 == s2 and o1 == o2) or (s1 == o2 and o1 == s2)
|
|
159
|
+
|
|
160
|
+
def detect_contradictions(self, triples: List[Triple]) -> List[Contradiction]:
|
|
161
|
+
"""Detect contradictions among triples."""
|
|
162
|
+
contradictions = []
|
|
163
|
+
for i, t1 in enumerate(triples):
|
|
164
|
+
for t2 in triples[i + 1 :]:
|
|
165
|
+
if self._same_subject_object(t1, t2) and self._are_inverse(
|
|
166
|
+
t1.predicate, t2.predicate
|
|
167
|
+
):
|
|
168
|
+
contradictions.append(
|
|
169
|
+
Contradiction(
|
|
170
|
+
triple1=t1,
|
|
171
|
+
triple2=t2,
|
|
172
|
+
reason=f"{t1.predicate} vs {t2.predicate}",
|
|
173
|
+
)
|
|
174
|
+
)
|
|
175
|
+
return contradictions
|
|
176
|
+
|
|
177
|
+
def check(self, use_llm: bool = False) -> ConsistencyResult:
|
|
178
|
+
"""Check consistency of semantic memories."""
|
|
179
|
+
triples = []
|
|
180
|
+
files_checked = 0
|
|
181
|
+
|
|
182
|
+
if not self.current_dir.exists():
|
|
183
|
+
return ConsistencyResult(valid=True, files_checked=0)
|
|
184
|
+
|
|
185
|
+
semantic_dir = self.current_dir / "semantic"
|
|
186
|
+
if not semantic_dir.exists():
|
|
187
|
+
return ConsistencyResult(valid=True, files_checked=0)
|
|
188
|
+
|
|
189
|
+
for f in semantic_dir.rglob("*.md"):
|
|
190
|
+
if not f.is_file():
|
|
191
|
+
continue
|
|
192
|
+
try:
|
|
193
|
+
rel = str(f.relative_to(self.current_dir))
|
|
194
|
+
content = f.read_text(encoding="utf-8", errors="replace")
|
|
195
|
+
except Exception:
|
|
196
|
+
continue
|
|
197
|
+
files_checked += 1
|
|
198
|
+
triples.extend(self.extract_triples(content, rel, use_llm))
|
|
199
|
+
|
|
200
|
+
contradictions = self.detect_contradictions(triples)
|
|
201
|
+
return ConsistencyResult(
|
|
202
|
+
valid=len(contradictions) == 0,
|
|
203
|
+
contradictions=contradictions,
|
|
204
|
+
triples=triples,
|
|
205
|
+
files_checked=files_checked,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
def repair(self, strategy: str = "confidence") -> ConsistencyResult:
|
|
209
|
+
"""Attempt to auto-fix contradictions using strategy."""
|
|
210
|
+
result = self.check(use_llm=(strategy == "llm"))
|
|
211
|
+
if result.valid:
|
|
212
|
+
return result
|
|
213
|
+
# For now, repair just reports - actual fix would modify files
|
|
214
|
+
return result
|