agmem 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/METADATA +20 -3
- agmem-0.1.2.dist-info/RECORD +86 -0
- memvcs/__init__.py +1 -1
- memvcs/cli.py +35 -31
- memvcs/commands/__init__.py +9 -9
- memvcs/commands/add.py +77 -76
- memvcs/commands/blame.py +46 -53
- memvcs/commands/branch.py +13 -33
- memvcs/commands/checkout.py +27 -32
- memvcs/commands/clean.py +18 -23
- memvcs/commands/clone.py +4 -1
- memvcs/commands/commit.py +40 -39
- memvcs/commands/daemon.py +81 -76
- memvcs/commands/decay.py +77 -0
- memvcs/commands/diff.py +56 -57
- memvcs/commands/distill.py +74 -0
- memvcs/commands/fsck.py +55 -61
- memvcs/commands/garden.py +28 -37
- memvcs/commands/graph.py +41 -48
- memvcs/commands/init.py +16 -24
- memvcs/commands/log.py +25 -40
- memvcs/commands/merge.py +16 -28
- memvcs/commands/pack.py +129 -0
- memvcs/commands/pull.py +4 -1
- memvcs/commands/push.py +4 -2
- memvcs/commands/recall.py +145 -0
- memvcs/commands/reflog.py +13 -22
- memvcs/commands/remote.py +1 -0
- memvcs/commands/repair.py +66 -0
- memvcs/commands/reset.py +23 -33
- memvcs/commands/resurrect.py +82 -0
- memvcs/commands/search.py +3 -4
- memvcs/commands/serve.py +2 -1
- memvcs/commands/show.py +66 -36
- memvcs/commands/stash.py +34 -34
- memvcs/commands/status.py +27 -35
- memvcs/commands/tag.py +23 -47
- memvcs/commands/test.py +30 -44
- memvcs/commands/timeline.py +111 -0
- memvcs/commands/tree.py +26 -27
- memvcs/commands/verify.py +59 -0
- memvcs/commands/when.py +115 -0
- memvcs/core/access_index.py +167 -0
- memvcs/core/config_loader.py +3 -1
- memvcs/core/consistency.py +214 -0
- memvcs/core/decay.py +185 -0
- memvcs/core/diff.py +158 -143
- memvcs/core/distiller.py +277 -0
- memvcs/core/gardener.py +164 -132
- memvcs/core/hooks.py +48 -14
- memvcs/core/knowledge_graph.py +134 -138
- memvcs/core/merge.py +248 -171
- memvcs/core/objects.py +95 -96
- memvcs/core/pii_scanner.py +147 -146
- memvcs/core/refs.py +132 -115
- memvcs/core/repository.py +174 -164
- memvcs/core/schema.py +155 -113
- memvcs/core/staging.py +60 -65
- memvcs/core/storage/__init__.py +20 -18
- memvcs/core/storage/base.py +74 -70
- memvcs/core/storage/gcs.py +70 -68
- memvcs/core/storage/local.py +42 -40
- memvcs/core/storage/s3.py +105 -110
- memvcs/core/temporal_index.py +112 -0
- memvcs/core/test_runner.py +101 -93
- memvcs/core/vector_store.py +41 -35
- memvcs/integrations/mcp_server.py +1 -3
- memvcs/integrations/web_ui/server.py +25 -26
- memvcs/retrieval/__init__.py +22 -0
- memvcs/retrieval/base.py +54 -0
- memvcs/retrieval/pack.py +128 -0
- memvcs/retrieval/recaller.py +105 -0
- memvcs/retrieval/strategies.py +314 -0
- memvcs/utils/__init__.py +3 -3
- memvcs/utils/helpers.py +52 -52
- agmem-0.1.1.dist-info/RECORD +0 -67
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/WHEEL +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/top_level.txt +0 -0
memvcs/commands/when.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""
|
|
2
|
+
agmem when - Find when a specific fact was learned.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from ..commands.base import require_repo
|
|
9
|
+
from ..core.objects import Commit, Tree, Blob
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class WhenCommand:
|
|
13
|
+
"""Find when a fact was learned in memory history."""
|
|
14
|
+
|
|
15
|
+
name = "when"
|
|
16
|
+
help = "Find when a specific fact was learned"
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def add_arguments(parser: argparse.ArgumentParser):
|
|
20
|
+
parser.add_argument(
|
|
21
|
+
"fact",
|
|
22
|
+
nargs="?",
|
|
23
|
+
help="Fact or text to search for (e.g., 'user prefers dark mode')",
|
|
24
|
+
)
|
|
25
|
+
parser.add_argument(
|
|
26
|
+
"--file",
|
|
27
|
+
"-f",
|
|
28
|
+
help="Limit search to specific file (e.g., semantic/preferences.md)",
|
|
29
|
+
)
|
|
30
|
+
parser.add_argument(
|
|
31
|
+
"--limit",
|
|
32
|
+
"-n",
|
|
33
|
+
type=int,
|
|
34
|
+
default=10,
|
|
35
|
+
help="Max commits to report (default: 10)",
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
@staticmethod
|
|
39
|
+
def execute(args) -> int:
|
|
40
|
+
repo, code = require_repo()
|
|
41
|
+
if code != 0:
|
|
42
|
+
return code
|
|
43
|
+
|
|
44
|
+
if not args.fact:
|
|
45
|
+
print("Error: Fact to search for is required.")
|
|
46
|
+
print('Usage: agmem when "fact to find" [--file path]')
|
|
47
|
+
return 1
|
|
48
|
+
|
|
49
|
+
fact_lower = args.fact.lower()
|
|
50
|
+
file_filter = args.file.replace("current/", "").lstrip("/") if args.file else None
|
|
51
|
+
|
|
52
|
+
# Walk commit history from HEAD
|
|
53
|
+
head = repo.refs.get_head()
|
|
54
|
+
commit_hash = (
|
|
55
|
+
repo.refs.get_branch_commit(head["value"])
|
|
56
|
+
if head["type"] == "branch"
|
|
57
|
+
else head.get("value")
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
found = []
|
|
61
|
+
seen = set()
|
|
62
|
+
while commit_hash and len(found) < args.limit:
|
|
63
|
+
if commit_hash in seen:
|
|
64
|
+
break
|
|
65
|
+
seen.add(commit_hash)
|
|
66
|
+
|
|
67
|
+
commit = Commit.load(repo.object_store, commit_hash)
|
|
68
|
+
if not commit:
|
|
69
|
+
break
|
|
70
|
+
|
|
71
|
+
tree = repo.get_commit_tree(commit_hash)
|
|
72
|
+
if not tree:
|
|
73
|
+
commit_hash = commit.parents[0] if commit.parents else None
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
# Check each file in tree
|
|
77
|
+
for entry in tree.entries:
|
|
78
|
+
path = entry.path + "/" + entry.name if entry.path else entry.name
|
|
79
|
+
if file_filter and path != file_filter:
|
|
80
|
+
continue
|
|
81
|
+
if entry.obj_type != "blob":
|
|
82
|
+
continue
|
|
83
|
+
blob = Blob.load(repo.object_store, entry.hash)
|
|
84
|
+
if not blob:
|
|
85
|
+
continue
|
|
86
|
+
try:
|
|
87
|
+
content = blob.content.decode("utf-8", errors="replace")
|
|
88
|
+
except Exception:
|
|
89
|
+
continue
|
|
90
|
+
if fact_lower in content.lower():
|
|
91
|
+
found.append(
|
|
92
|
+
{
|
|
93
|
+
"commit": commit_hash,
|
|
94
|
+
"path": path,
|
|
95
|
+
"timestamp": commit.timestamp,
|
|
96
|
+
"author": commit.author,
|
|
97
|
+
"message": commit.message,
|
|
98
|
+
}
|
|
99
|
+
)
|
|
100
|
+
break # One match per commit
|
|
101
|
+
|
|
102
|
+
commit_hash = commit.parents[0] if commit.parents else None
|
|
103
|
+
|
|
104
|
+
if not found:
|
|
105
|
+
scope = f" in {file_filter}" if file_filter else ""
|
|
106
|
+
print(f'No commits found containing "{args.fact}"{scope}')
|
|
107
|
+
return 0
|
|
108
|
+
|
|
109
|
+
print(f'Fact "{args.fact}" found in {len(found)} commit(s):')
|
|
110
|
+
print()
|
|
111
|
+
for i, m in enumerate(found, 1):
|
|
112
|
+
print(f"[{i}] {m['commit'][:8]} {m['timestamp']} - {m['path']}")
|
|
113
|
+
print(f" {m['message'][:60]}")
|
|
114
|
+
print()
|
|
115
|
+
return 0
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Access index for agmem - tracks recall access patterns for importance weighting and decay.
|
|
3
|
+
|
|
4
|
+
Stores access log and recall cache in .mem/index.json.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import hashlib
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import List, Dict, Any, Optional
|
|
12
|
+
|
|
13
|
+
# Maximum entries in access log before compaction
|
|
14
|
+
ACCESS_LOG_MAX = 10_000
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AccessIndex:
|
|
18
|
+
"""Tracks access patterns for recall, importance, and decay."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, mem_dir: Path):
|
|
21
|
+
self.mem_dir = Path(mem_dir)
|
|
22
|
+
self.index_path = self.mem_dir / "index.json"
|
|
23
|
+
self._data: Optional[Dict[str, Any]] = None
|
|
24
|
+
|
|
25
|
+
def _load(self) -> Dict[str, Any]:
|
|
26
|
+
"""Load index from disk."""
|
|
27
|
+
if self._data is not None:
|
|
28
|
+
return self._data
|
|
29
|
+
if self.index_path.exists():
|
|
30
|
+
try:
|
|
31
|
+
self._data = json.loads(self.index_path.read_text(encoding="utf-8"))
|
|
32
|
+
except (json.JSONDecodeError, OSError):
|
|
33
|
+
self._data = self._default_structure()
|
|
34
|
+
else:
|
|
35
|
+
self._data = self._default_structure()
|
|
36
|
+
return self._data
|
|
37
|
+
|
|
38
|
+
def _default_structure(self) -> Dict[str, Any]:
|
|
39
|
+
"""Return default index structure."""
|
|
40
|
+
return {
|
|
41
|
+
"version": 1,
|
|
42
|
+
"access_log": [],
|
|
43
|
+
"recall_cache": {},
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
def _save(self, data: Optional[Dict[str, Any]] = None) -> None:
|
|
47
|
+
"""Save index to disk."""
|
|
48
|
+
data = data or self._data
|
|
49
|
+
if data is None:
|
|
50
|
+
return
|
|
51
|
+
self.mem_dir.mkdir(parents=True, exist_ok=True)
|
|
52
|
+
self.index_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
|
53
|
+
self._data = data
|
|
54
|
+
|
|
55
|
+
def record_access(self, path: str, commit: str, timestamp: Optional[str] = None) -> None:
|
|
56
|
+
"""
|
|
57
|
+
Record that a memory file was accessed (e.g., during recall).
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
path: File path relative to current/
|
|
61
|
+
commit: Commit hash at time of access
|
|
62
|
+
timestamp: ISO 8601 timestamp (default: now)
|
|
63
|
+
"""
|
|
64
|
+
data = self._load()
|
|
65
|
+
ts = timestamp or datetime.utcnow().isoformat() + "Z"
|
|
66
|
+
data["access_log"].append({"path": path, "commit": commit, "timestamp": ts})
|
|
67
|
+
self._trim_access_log_if_needed(data)
|
|
68
|
+
self._save()
|
|
69
|
+
|
|
70
|
+
def _trim_access_log_if_needed(self, data: Dict[str, Any]) -> None:
|
|
71
|
+
if len(data.get("access_log", [])) > ACCESS_LOG_MAX:
|
|
72
|
+
data["access_log"] = data["access_log"][-ACCESS_LOG_MAX:]
|
|
73
|
+
|
|
74
|
+
def get_access_count(self, path: Optional[str] = None, commit: Optional[str] = None) -> int:
|
|
75
|
+
"""
|
|
76
|
+
Get access count for a path and/or commit.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
path: Filter by path (None = any)
|
|
80
|
+
commit: Filter by commit (None = any)
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Number of matching access entries
|
|
84
|
+
"""
|
|
85
|
+
data = self._load()
|
|
86
|
+
entries = data.get("access_log", [])
|
|
87
|
+
count = 0
|
|
88
|
+
for entry in entries:
|
|
89
|
+
if path is not None and entry.get("path") != path:
|
|
90
|
+
continue
|
|
91
|
+
if commit is not None and entry.get("commit") != commit:
|
|
92
|
+
continue
|
|
93
|
+
count += 1
|
|
94
|
+
return count
|
|
95
|
+
|
|
96
|
+
def get_recent_accesses(
|
|
97
|
+
self,
|
|
98
|
+
limit: int = 100,
|
|
99
|
+
path: Optional[str] = None,
|
|
100
|
+
) -> List[Dict[str, Any]]:
|
|
101
|
+
"""
|
|
102
|
+
Get most recent access entries.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
limit: Max entries to return
|
|
106
|
+
path: Filter by path (None = any)
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
List of access entries (newest first)
|
|
110
|
+
"""
|
|
111
|
+
data = self._load()
|
|
112
|
+
entries = data.get("access_log", [])
|
|
113
|
+
if path is not None:
|
|
114
|
+
entries = [e for e in entries if e.get("path") == path]
|
|
115
|
+
return list(reversed(entries[-limit:]))
|
|
116
|
+
|
|
117
|
+
def get_access_counts_by_path(self) -> Dict[str, int]:
|
|
118
|
+
"""Aggregate access counts per path (for importance weighting)."""
|
|
119
|
+
data = self._load()
|
|
120
|
+
counts: Dict[str, int] = {}
|
|
121
|
+
for entry in data.get("access_log", []):
|
|
122
|
+
p = entry.get("path", "")
|
|
123
|
+
counts[p] = counts.get(p, 0) + 1
|
|
124
|
+
return counts
|
|
125
|
+
|
|
126
|
+
def get_cache_key(self, context: str, strategy: str, limit: int, exclude: List[str]) -> str:
|
|
127
|
+
"""Compute cache key for recall results."""
|
|
128
|
+
payload = f"{context}|{strategy}|{limit}|{','.join(sorted(exclude))}"
|
|
129
|
+
return hashlib.sha256(payload.encode()).hexdigest()
|
|
130
|
+
|
|
131
|
+
def get_cached_recall(
|
|
132
|
+
self,
|
|
133
|
+
context: str,
|
|
134
|
+
strategy: str,
|
|
135
|
+
limit: int,
|
|
136
|
+
exclude: List[str],
|
|
137
|
+
) -> Optional[Dict[str, Any]]:
|
|
138
|
+
"""Get cached recall results if available."""
|
|
139
|
+
key = self.get_cache_key(context, strategy, limit, exclude)
|
|
140
|
+
data = self._load()
|
|
141
|
+
cache = data.get("recall_cache", {})
|
|
142
|
+
return cache.get(key)
|
|
143
|
+
|
|
144
|
+
def set_cached_recall(
|
|
145
|
+
self,
|
|
146
|
+
context: str,
|
|
147
|
+
strategy: str,
|
|
148
|
+
limit: int,
|
|
149
|
+
exclude: List[str],
|
|
150
|
+
results: List[Dict[str, Any]],
|
|
151
|
+
) -> None:
|
|
152
|
+
"""Cache recall results."""
|
|
153
|
+
key = self.get_cache_key(context, strategy, limit, exclude)
|
|
154
|
+
data = self._load()
|
|
155
|
+
if "recall_cache" not in data:
|
|
156
|
+
data["recall_cache"] = {}
|
|
157
|
+
data["recall_cache"][key] = {
|
|
158
|
+
"results": results,
|
|
159
|
+
"cached_at": datetime.utcnow().isoformat() + "Z",
|
|
160
|
+
}
|
|
161
|
+
# Limit cache size
|
|
162
|
+
cache = data["recall_cache"]
|
|
163
|
+
if len(cache) > 100:
|
|
164
|
+
oldest = sorted(cache.items(), key=lambda x: x[1].get("cached_at", ""))[:50]
|
|
165
|
+
for k, _ in oldest:
|
|
166
|
+
del cache[k]
|
|
167
|
+
self._save()
|
memvcs/core/config_loader.py
CHANGED
|
@@ -13,6 +13,7 @@ from typing import Any, Dict, List, Optional
|
|
|
13
13
|
|
|
14
14
|
try:
|
|
15
15
|
import yaml
|
|
16
|
+
|
|
16
17
|
YAML_AVAILABLE = True
|
|
17
18
|
except ImportError:
|
|
18
19
|
YAML_AVAILABLE = False
|
|
@@ -116,7 +117,8 @@ def _apply_gcs_credentials_path(config: Dict[str, Any], repo_root: Optional[Path
|
|
|
116
117
|
config[CONFIG_CLOUD][CONFIG_CLOUD_GCS]["credentials_path"] = resolved
|
|
117
118
|
else:
|
|
118
119
|
config[CONFIG_CLOUD][CONFIG_CLOUD_GCS] = {
|
|
119
|
-
k: v
|
|
120
|
+
k: v
|
|
121
|
+
for k, v in config[CONFIG_CLOUD][CONFIG_CLOUD_GCS].items()
|
|
120
122
|
if k != "credentials_path"
|
|
121
123
|
}
|
|
122
124
|
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Consistency checker - belief consistency for agmem semantic memories.
|
|
3
|
+
|
|
4
|
+
Extracts (subject, predicate, object) triples and detects logical contradictions.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import List, Dict, Any, Optional, Tuple
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
|
|
12
|
+
from .constants import MEMORY_TYPES
|
|
13
|
+
from .schema import FrontmatterParser
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class Triple:
|
|
18
|
+
"""A (subject, predicate, object) triple."""
|
|
19
|
+
|
|
20
|
+
subject: str
|
|
21
|
+
predicate: str
|
|
22
|
+
obj: str
|
|
23
|
+
confidence: float
|
|
24
|
+
source: str
|
|
25
|
+
line: int
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class Contradiction:
|
|
30
|
+
"""A detected contradiction."""
|
|
31
|
+
|
|
32
|
+
triple1: Triple
|
|
33
|
+
triple2: Triple
|
|
34
|
+
reason: str
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class ConsistencyResult:
|
|
39
|
+
"""Result of consistency check."""
|
|
40
|
+
|
|
41
|
+
valid: bool
|
|
42
|
+
contradictions: List[Contradiction] = field(default_factory=list)
|
|
43
|
+
triples: List[Triple] = field(default_factory=list)
|
|
44
|
+
files_checked: int = 0
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# Inverse predicate pairs (A likes B vs B disliked by A)
|
|
48
|
+
INVERSE_PREDICATES = [
|
|
49
|
+
("likes", "dislikes"),
|
|
50
|
+
("prefers", "avoids"),
|
|
51
|
+
("uses", "avoids"),
|
|
52
|
+
("enables", "disables"),
|
|
53
|
+
("true", "false"),
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class ConsistencyChecker:
|
|
58
|
+
"""Detects logical contradictions in semantic memories."""
|
|
59
|
+
|
|
60
|
+
def __init__(self, repo: Any, llm_provider: Optional[str] = None):
|
|
61
|
+
self.repo = repo
|
|
62
|
+
self.llm_provider = llm_provider
|
|
63
|
+
self.current_dir = repo.root / "current"
|
|
64
|
+
|
|
65
|
+
def _extract_triples_simple(self, content: str, source: str) -> List[Triple]:
|
|
66
|
+
"""Simple heuristic extraction of triples from text."""
|
|
67
|
+
triples = []
|
|
68
|
+
for i, line in enumerate(content.splitlines(), 1):
|
|
69
|
+
line = line.strip()
|
|
70
|
+
if not line or line.startswith("#") or line.startswith("---"):
|
|
71
|
+
continue
|
|
72
|
+
# Pattern: "user prefers X", "user likes Y", "X uses Y"
|
|
73
|
+
m = re.search(r"(user|agent)\s+(prefers|likes|uses|avoids|dislikes)\s+(.+)", line, re.I)
|
|
74
|
+
if m:
|
|
75
|
+
subj, pred, obj = m.group(1), m.group(2), m.group(3).strip()
|
|
76
|
+
triples.append(
|
|
77
|
+
Triple(
|
|
78
|
+
subject=subj.lower(),
|
|
79
|
+
predicate=pred.lower(),
|
|
80
|
+
obj=obj[:100],
|
|
81
|
+
confidence=0.6,
|
|
82
|
+
source=source,
|
|
83
|
+
line=i,
|
|
84
|
+
)
|
|
85
|
+
)
|
|
86
|
+
# Pattern: "X is Y"
|
|
87
|
+
m = re.search(r"^(.+?)\s+is\s+(.+?)(?:\.|$)", line)
|
|
88
|
+
if m:
|
|
89
|
+
subj, obj = m.group(1).strip(), m.group(2).strip()
|
|
90
|
+
triples.append(
|
|
91
|
+
Triple(
|
|
92
|
+
subject=subj[:50],
|
|
93
|
+
predicate="is",
|
|
94
|
+
obj=obj[:100],
|
|
95
|
+
confidence=0.5,
|
|
96
|
+
source=source,
|
|
97
|
+
line=i,
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
return triples
|
|
101
|
+
|
|
102
|
+
def _extract_triples_llm(self, content: str, source: str) -> List[Triple]:
|
|
103
|
+
"""Extract triples using LLM."""
|
|
104
|
+
try:
|
|
105
|
+
import openai
|
|
106
|
+
|
|
107
|
+
response = openai.chat.completions.create(
|
|
108
|
+
model="gpt-3.5-turbo",
|
|
109
|
+
messages=[
|
|
110
|
+
{
|
|
111
|
+
"role": "system",
|
|
112
|
+
"content": "Extract factual statements as (subject, predicate, object) triples. "
|
|
113
|
+
"One per line, format: SUBJECT | PREDICATE | OBJECT",
|
|
114
|
+
},
|
|
115
|
+
{"role": "user", "content": content[:3000]},
|
|
116
|
+
],
|
|
117
|
+
max_tokens=500,
|
|
118
|
+
)
|
|
119
|
+
text = response.choices[0].message.content
|
|
120
|
+
triples = []
|
|
121
|
+
for i, line in enumerate(text.splitlines(), 1):
|
|
122
|
+
if "|" in line:
|
|
123
|
+
parts = [p.strip() for p in line.split("|", 2)]
|
|
124
|
+
if len(parts) >= 3:
|
|
125
|
+
triples.append(
|
|
126
|
+
Triple(
|
|
127
|
+
subject=parts[0][:50],
|
|
128
|
+
predicate=parts[1][:30],
|
|
129
|
+
obj=parts[2][:100],
|
|
130
|
+
confidence=0.8,
|
|
131
|
+
source=source,
|
|
132
|
+
line=i,
|
|
133
|
+
)
|
|
134
|
+
)
|
|
135
|
+
return triples
|
|
136
|
+
except Exception:
|
|
137
|
+
return []
|
|
138
|
+
|
|
139
|
+
def extract_triples(self, content: str, source: str, use_llm: bool = False) -> List[Triple]:
|
|
140
|
+
"""Extract triples from content."""
|
|
141
|
+
if use_llm and self.llm_provider == "openai":
|
|
142
|
+
t = self._extract_triples_llm(content, source)
|
|
143
|
+
if t:
|
|
144
|
+
return t
|
|
145
|
+
return self._extract_triples_simple(content, source)
|
|
146
|
+
|
|
147
|
+
def _are_inverse(self, pred1: str, pred2: str) -> bool:
|
|
148
|
+
"""Check if predicates are inverses."""
|
|
149
|
+
for a, b in INVERSE_PREDICATES:
|
|
150
|
+
if (pred1 == a and pred2 == b) or (pred1 == b and pred2 == a):
|
|
151
|
+
return True
|
|
152
|
+
return False
|
|
153
|
+
|
|
154
|
+
def _same_subject_object(self, t1: Triple, t2: Triple) -> bool:
|
|
155
|
+
"""Check if triples refer to same subject and object."""
|
|
156
|
+
s1, o1 = t1.subject.lower(), t1.obj.lower()
|
|
157
|
+
s2, o2 = t2.subject.lower(), t2.obj.lower()
|
|
158
|
+
return (s1 == s2 and o1 == o2) or (s1 == o2 and o1 == s2)
|
|
159
|
+
|
|
160
|
+
def detect_contradictions(self, triples: List[Triple]) -> List[Contradiction]:
|
|
161
|
+
"""Detect contradictions among triples."""
|
|
162
|
+
contradictions = []
|
|
163
|
+
for i, t1 in enumerate(triples):
|
|
164
|
+
for t2 in triples[i + 1 :]:
|
|
165
|
+
if self._same_subject_object(t1, t2) and self._are_inverse(
|
|
166
|
+
t1.predicate, t2.predicate
|
|
167
|
+
):
|
|
168
|
+
contradictions.append(
|
|
169
|
+
Contradiction(
|
|
170
|
+
triple1=t1,
|
|
171
|
+
triple2=t2,
|
|
172
|
+
reason=f"{t1.predicate} vs {t2.predicate}",
|
|
173
|
+
)
|
|
174
|
+
)
|
|
175
|
+
return contradictions
|
|
176
|
+
|
|
177
|
+
def check(self, use_llm: bool = False) -> ConsistencyResult:
|
|
178
|
+
"""Check consistency of semantic memories."""
|
|
179
|
+
triples = []
|
|
180
|
+
files_checked = 0
|
|
181
|
+
|
|
182
|
+
if not self.current_dir.exists():
|
|
183
|
+
return ConsistencyResult(valid=True, files_checked=0)
|
|
184
|
+
|
|
185
|
+
semantic_dir = self.current_dir / "semantic"
|
|
186
|
+
if not semantic_dir.exists():
|
|
187
|
+
return ConsistencyResult(valid=True, files_checked=0)
|
|
188
|
+
|
|
189
|
+
for f in semantic_dir.rglob("*.md"):
|
|
190
|
+
if not f.is_file():
|
|
191
|
+
continue
|
|
192
|
+
try:
|
|
193
|
+
rel = str(f.relative_to(self.current_dir))
|
|
194
|
+
content = f.read_text(encoding="utf-8", errors="replace")
|
|
195
|
+
except Exception:
|
|
196
|
+
continue
|
|
197
|
+
files_checked += 1
|
|
198
|
+
triples.extend(self.extract_triples(content, rel, use_llm))
|
|
199
|
+
|
|
200
|
+
contradictions = self.detect_contradictions(triples)
|
|
201
|
+
return ConsistencyResult(
|
|
202
|
+
valid=len(contradictions) == 0,
|
|
203
|
+
contradictions=contradictions,
|
|
204
|
+
triples=triples,
|
|
205
|
+
files_checked=files_checked,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
def repair(self, strategy: str = "confidence") -> ConsistencyResult:
|
|
209
|
+
"""Attempt to auto-fix contradictions using strategy."""
|
|
210
|
+
result = self.check(use_llm=(strategy == "llm"))
|
|
211
|
+
if result.valid:
|
|
212
|
+
return result
|
|
213
|
+
# For now, repair just reports - actual fix would modify files
|
|
214
|
+
return result
|
memvcs/core/decay.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Decay engine - memory decay and forgetting for agmem.
|
|
3
|
+
|
|
4
|
+
Mimics human forgetting: irrelevant details fade, important ones strengthen.
|
|
5
|
+
Ebbinghaus-inspired time decay + retrieval-induced enhancement.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import math
|
|
9
|
+
import shutil
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import List, Dict, Any, Optional, Tuple
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from datetime import datetime, timedelta
|
|
14
|
+
|
|
15
|
+
from .constants import MEMORY_TYPES
|
|
16
|
+
from .access_index import AccessIndex
|
|
17
|
+
from .objects import Commit
|
|
18
|
+
from .schema import FrontmatterParser
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class DecayConfig:
|
|
23
|
+
"""Configuration for decay engine."""
|
|
24
|
+
|
|
25
|
+
episodic_half_life_days: int = 30
|
|
26
|
+
semantic_min_importance: float = 0.3
|
|
27
|
+
access_count_threshold: int = 2
|
|
28
|
+
forgetting_dir: str = "forgetting"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class DecayCandidate:
|
|
33
|
+
"""A memory candidate for decay (archiving)."""
|
|
34
|
+
|
|
35
|
+
path: str
|
|
36
|
+
memory_type: str
|
|
37
|
+
importance: float
|
|
38
|
+
last_access_days: Optional[float]
|
|
39
|
+
access_count: int
|
|
40
|
+
decay_score: float
|
|
41
|
+
reason: str
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class DecayEngine:
|
|
45
|
+
"""Computes decay scores and archives low-importance memories."""
|
|
46
|
+
|
|
47
|
+
def __init__(self, repo: Any, config: Optional[DecayConfig] = None):
|
|
48
|
+
self.repo = repo
|
|
49
|
+
self.config = config or DecayConfig()
|
|
50
|
+
self.access_index = AccessIndex(repo.mem_dir)
|
|
51
|
+
self.forgetting_dir = repo.mem_dir / self.config.forgetting_dir
|
|
52
|
+
self.current_dir = repo.current_dir
|
|
53
|
+
|
|
54
|
+
def _get_importance(self, path: str, content: str) -> float:
|
|
55
|
+
"""Get importance from frontmatter or default."""
|
|
56
|
+
fm, _ = FrontmatterParser.parse(content)
|
|
57
|
+
if fm and fm.importance is not None:
|
|
58
|
+
return float(fm.importance)
|
|
59
|
+
if fm and fm.confidence_score is not None:
|
|
60
|
+
return float(fm.confidence_score)
|
|
61
|
+
return 0.5
|
|
62
|
+
|
|
63
|
+
def _get_access_info(self, path: str) -> Tuple[int, Optional[float]]:
|
|
64
|
+
"""Get access count and days since last access."""
|
|
65
|
+
counts = self.access_index.get_access_counts_by_path()
|
|
66
|
+
count = counts.get(path, 0)
|
|
67
|
+
recent = self.access_index.get_recent_accesses(limit=1, path=path)
|
|
68
|
+
if not recent:
|
|
69
|
+
return count, None
|
|
70
|
+
ts_str = recent[0].get("timestamp", "")
|
|
71
|
+
if not ts_str:
|
|
72
|
+
return count, None
|
|
73
|
+
try:
|
|
74
|
+
if ts_str.endswith("Z"):
|
|
75
|
+
ts_str = ts_str[:-1] + "+00:00"
|
|
76
|
+
last = datetime.fromisoformat(ts_str)
|
|
77
|
+
days = (datetime.utcnow() - last.replace(tzinfo=None)).total_seconds() / 86400
|
|
78
|
+
return count, days
|
|
79
|
+
except Exception:
|
|
80
|
+
return count, None
|
|
81
|
+
|
|
82
|
+
def compute_decay_score(
|
|
83
|
+
self,
|
|
84
|
+
path: str,
|
|
85
|
+
content: str,
|
|
86
|
+
memory_type: str,
|
|
87
|
+
) -> DecayCandidate:
|
|
88
|
+
"""
|
|
89
|
+
Compute decay score for a memory.
|
|
90
|
+
|
|
91
|
+
Higher score = more likely to decay (archive).
|
|
92
|
+
Time decay: importance * 0.5^(days/half_life) when never accessed.
|
|
93
|
+
Retrieval-induced enhancement: access boosts strength (lower decay).
|
|
94
|
+
"""
|
|
95
|
+
importance = self._get_importance(path, content)
|
|
96
|
+
access_count, last_access_days = self._get_access_info(path)
|
|
97
|
+
|
|
98
|
+
decay_score = 0.0
|
|
99
|
+
reason = ""
|
|
100
|
+
|
|
101
|
+
if "episodic" in memory_type.lower():
|
|
102
|
+
half_life = self.config.episodic_half_life_days
|
|
103
|
+
if last_access_days is not None:
|
|
104
|
+
decay_score = 1.0 - (importance * math.pow(0.5, last_access_days / half_life))
|
|
105
|
+
if access_count < self.config.access_count_threshold:
|
|
106
|
+
decay_score += 0.2
|
|
107
|
+
reason = f"episodic: {last_access_days:.0f}d since access, imp={importance:.2f}"
|
|
108
|
+
else:
|
|
109
|
+
decay_score = 0.5
|
|
110
|
+
reason = "episodic: never accessed"
|
|
111
|
+
else:
|
|
112
|
+
if importance < self.config.semantic_min_importance:
|
|
113
|
+
decay_score = 1.0 - importance
|
|
114
|
+
reason = f"semantic: low importance {importance:.2f}"
|
|
115
|
+
elif (
|
|
116
|
+
access_count < self.config.access_count_threshold
|
|
117
|
+
and last_access_days
|
|
118
|
+
and last_access_days > 60
|
|
119
|
+
):
|
|
120
|
+
decay_score = 0.4
|
|
121
|
+
reason = "semantic: rarely accessed"
|
|
122
|
+
|
|
123
|
+
return DecayCandidate(
|
|
124
|
+
path=path,
|
|
125
|
+
memory_type=memory_type,
|
|
126
|
+
importance=importance,
|
|
127
|
+
last_access_days=last_access_days,
|
|
128
|
+
access_count=access_count,
|
|
129
|
+
decay_score=decay_score,
|
|
130
|
+
reason=reason,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
def get_decay_candidates(self) -> List[DecayCandidate]:
|
|
134
|
+
"""Get list of memories that would be archived (dry-run)."""
|
|
135
|
+
candidates = []
|
|
136
|
+
if not self.current_dir.exists():
|
|
137
|
+
return candidates
|
|
138
|
+
|
|
139
|
+
for subdir in MEMORY_TYPES:
|
|
140
|
+
dir_path = self.current_dir / subdir
|
|
141
|
+
if not dir_path.exists():
|
|
142
|
+
continue
|
|
143
|
+
for f in dir_path.rglob("*"):
|
|
144
|
+
if not f.is_file() or f.suffix.lower() not in (".md", ".txt"):
|
|
145
|
+
continue
|
|
146
|
+
try:
|
|
147
|
+
rel_path = str(f.relative_to(self.current_dir))
|
|
148
|
+
content = f.read_text(encoding="utf-8", errors="replace")
|
|
149
|
+
except Exception:
|
|
150
|
+
continue
|
|
151
|
+
cand = self.compute_decay_score(rel_path, content, subdir)
|
|
152
|
+
if cand.decay_score > 0.5:
|
|
153
|
+
candidates.append(cand)
|
|
154
|
+
|
|
155
|
+
candidates.sort(key=lambda x: x.decay_score, reverse=True)
|
|
156
|
+
return candidates
|
|
157
|
+
|
|
158
|
+
def apply_decay(self, candidates: Optional[List[DecayCandidate]] = None) -> int:
|
|
159
|
+
"""
|
|
160
|
+
Archive low-importance memories to .mem/forgetting/.
|
|
161
|
+
|
|
162
|
+
Returns count of files archived.
|
|
163
|
+
"""
|
|
164
|
+
if candidates is None:
|
|
165
|
+
candidates = self.get_decay_candidates()
|
|
166
|
+
self.forgetting_dir.mkdir(parents=True, exist_ok=True)
|
|
167
|
+
ts = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
|
|
168
|
+
archive_sub = self.forgetting_dir / ts
|
|
169
|
+
archive_sub.mkdir(exist_ok=True)
|
|
170
|
+
count = 0
|
|
171
|
+
for cand in candidates:
|
|
172
|
+
if cand.decay_score <= 0.5:
|
|
173
|
+
continue
|
|
174
|
+
src = self.current_dir / cand.path
|
|
175
|
+
if not src.exists():
|
|
176
|
+
continue
|
|
177
|
+
try:
|
|
178
|
+
safe_name = cand.path.replace("/", "_").replace("..", "_")
|
|
179
|
+
dest = (archive_sub / safe_name).resolve()
|
|
180
|
+
dest.relative_to(self.forgetting_dir.resolve())
|
|
181
|
+
shutil.move(str(src), str(dest))
|
|
182
|
+
count += 1
|
|
183
|
+
except (ValueError, Exception):
|
|
184
|
+
continue
|
|
185
|
+
return count
|