agmem 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/METADATA +20 -3
- agmem-0.1.2.dist-info/RECORD +86 -0
- memvcs/__init__.py +1 -1
- memvcs/cli.py +35 -31
- memvcs/commands/__init__.py +9 -9
- memvcs/commands/add.py +77 -76
- memvcs/commands/blame.py +46 -53
- memvcs/commands/branch.py +13 -33
- memvcs/commands/checkout.py +27 -32
- memvcs/commands/clean.py +18 -23
- memvcs/commands/clone.py +4 -1
- memvcs/commands/commit.py +40 -39
- memvcs/commands/daemon.py +81 -76
- memvcs/commands/decay.py +77 -0
- memvcs/commands/diff.py +56 -57
- memvcs/commands/distill.py +74 -0
- memvcs/commands/fsck.py +55 -61
- memvcs/commands/garden.py +28 -37
- memvcs/commands/graph.py +41 -48
- memvcs/commands/init.py +16 -24
- memvcs/commands/log.py +25 -40
- memvcs/commands/merge.py +16 -28
- memvcs/commands/pack.py +129 -0
- memvcs/commands/pull.py +4 -1
- memvcs/commands/push.py +4 -2
- memvcs/commands/recall.py +145 -0
- memvcs/commands/reflog.py +13 -22
- memvcs/commands/remote.py +1 -0
- memvcs/commands/repair.py +66 -0
- memvcs/commands/reset.py +23 -33
- memvcs/commands/resurrect.py +82 -0
- memvcs/commands/search.py +3 -4
- memvcs/commands/serve.py +2 -1
- memvcs/commands/show.py +66 -36
- memvcs/commands/stash.py +34 -34
- memvcs/commands/status.py +27 -35
- memvcs/commands/tag.py +23 -47
- memvcs/commands/test.py +30 -44
- memvcs/commands/timeline.py +111 -0
- memvcs/commands/tree.py +26 -27
- memvcs/commands/verify.py +59 -0
- memvcs/commands/when.py +115 -0
- memvcs/core/access_index.py +167 -0
- memvcs/core/config_loader.py +3 -1
- memvcs/core/consistency.py +214 -0
- memvcs/core/decay.py +185 -0
- memvcs/core/diff.py +158 -143
- memvcs/core/distiller.py +277 -0
- memvcs/core/gardener.py +164 -132
- memvcs/core/hooks.py +48 -14
- memvcs/core/knowledge_graph.py +134 -138
- memvcs/core/merge.py +248 -171
- memvcs/core/objects.py +95 -96
- memvcs/core/pii_scanner.py +147 -146
- memvcs/core/refs.py +132 -115
- memvcs/core/repository.py +174 -164
- memvcs/core/schema.py +155 -113
- memvcs/core/staging.py +60 -65
- memvcs/core/storage/__init__.py +20 -18
- memvcs/core/storage/base.py +74 -70
- memvcs/core/storage/gcs.py +70 -68
- memvcs/core/storage/local.py +42 -40
- memvcs/core/storage/s3.py +105 -110
- memvcs/core/temporal_index.py +112 -0
- memvcs/core/test_runner.py +101 -93
- memvcs/core/vector_store.py +41 -35
- memvcs/integrations/mcp_server.py +1 -3
- memvcs/integrations/web_ui/server.py +25 -26
- memvcs/retrieval/__init__.py +22 -0
- memvcs/retrieval/base.py +54 -0
- memvcs/retrieval/pack.py +128 -0
- memvcs/retrieval/recaller.py +105 -0
- memvcs/retrieval/strategies.py +314 -0
- memvcs/utils/__init__.py +3 -3
- memvcs/utils/helpers.py +52 -52
- agmem-0.1.1.dist-info/RECORD +0 -67
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/WHEEL +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/top_level.txt +0 -0
memvcs/core/distiller.py
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Distiller - Episodic-to-semantic distillation pipeline for agmem.
|
|
3
|
+
|
|
4
|
+
Converts session logs into compact facts (like memory consolidation during sleep).
|
|
5
|
+
Extends Gardener with factual extraction and safety branches.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import shutil
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import List, Dict, Any, Optional, Tuple
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from collections import defaultdict
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
import yaml
|
|
17
|
+
|
|
18
|
+
YAML_AVAILABLE = True
|
|
19
|
+
except ImportError:
|
|
20
|
+
YAML_AVAILABLE = False
|
|
21
|
+
|
|
22
|
+
from .gardener import Gardener, GardenerConfig, EpisodeCluster
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class DistillerConfig:
|
|
27
|
+
"""Configuration for the Distiller."""
|
|
28
|
+
|
|
29
|
+
source_dir: str = "episodic"
|
|
30
|
+
target_dir: str = "semantic/consolidated"
|
|
31
|
+
archive_dir: str = "archive"
|
|
32
|
+
min_cluster_size: int = 3
|
|
33
|
+
extraction_confidence_threshold: float = 0.7
|
|
34
|
+
safety_branch_prefix: str = "auto-distill/"
|
|
35
|
+
llm_provider: Optional[str] = None
|
|
36
|
+
llm_model: Optional[str] = None
|
|
37
|
+
create_safety_branch: bool = True
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class DistillerResult:
|
|
42
|
+
"""Result of a distillation run."""
|
|
43
|
+
|
|
44
|
+
success: bool
|
|
45
|
+
clusters_processed: int
|
|
46
|
+
facts_extracted: int
|
|
47
|
+
episodes_archived: int
|
|
48
|
+
branch_created: Optional[str] = None
|
|
49
|
+
commit_hash: Optional[str] = None
|
|
50
|
+
message: str = ""
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class Distiller:
|
|
54
|
+
"""
|
|
55
|
+
Distills episodic memory into semantic facts.
|
|
56
|
+
|
|
57
|
+
Pipeline: cluster episodes -> extract facts via LLM -> merge with semantic -> archive.
|
|
58
|
+
Creates safety branch for human review before merging to main.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __init__(self, repo: Any, config: Optional[DistillerConfig] = None):
|
|
62
|
+
self.repo = repo
|
|
63
|
+
self.config = config or DistillerConfig()
|
|
64
|
+
self.source_dir = repo.root / "current" / self.config.source_dir
|
|
65
|
+
self.target_dir = repo.root / "current" / self.config.target_dir.rstrip("/")
|
|
66
|
+
archive_candidate = repo.current_dir / self.config.archive_dir
|
|
67
|
+
try:
|
|
68
|
+
archive_candidate.resolve().relative_to(repo.current_dir.resolve())
|
|
69
|
+
self.archive_dir = archive_candidate
|
|
70
|
+
except (ValueError, RuntimeError):
|
|
71
|
+
self.archive_dir = repo.current_dir / "archive"
|
|
72
|
+
self.gardener = Gardener(
|
|
73
|
+
repo,
|
|
74
|
+
GardenerConfig(
|
|
75
|
+
threshold=1,
|
|
76
|
+
archive_dir=self.config.archive_dir,
|
|
77
|
+
min_cluster_size=self.config.min_cluster_size,
|
|
78
|
+
llm_provider=self.config.llm_provider,
|
|
79
|
+
llm_model=self.config.llm_model,
|
|
80
|
+
),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
def load_episodes_from(self, source_path: Path) -> List[Tuple[Path, str]]:
|
|
84
|
+
"""Load episodes from source directory."""
|
|
85
|
+
episodes = []
|
|
86
|
+
if not source_path.exists():
|
|
87
|
+
return episodes
|
|
88
|
+
for f in source_path.glob("**/*.md"):
|
|
89
|
+
if f.is_file():
|
|
90
|
+
try:
|
|
91
|
+
episodes.append((f, f.read_text(encoding="utf-8", errors="replace")))
|
|
92
|
+
except Exception:
|
|
93
|
+
continue
|
|
94
|
+
return episodes
|
|
95
|
+
|
|
96
|
+
def cluster_episodes(self, episodes: List[Tuple[Path, str]]) -> List[EpisodeCluster]:
|
|
97
|
+
"""Cluster episodes using Gardener's logic."""
|
|
98
|
+
try:
|
|
99
|
+
return self.gardener.cluster_episodes_with_embeddings(episodes)
|
|
100
|
+
except Exception:
|
|
101
|
+
return self.gardener.cluster_episodes(episodes)
|
|
102
|
+
|
|
103
|
+
def extract_facts(self, cluster: EpisodeCluster) -> List[str]:
|
|
104
|
+
"""Extract factual statements from cluster via LLM or heuristics."""
|
|
105
|
+
contents = []
|
|
106
|
+
for ep_path in cluster.episodes[:10]:
|
|
107
|
+
try:
|
|
108
|
+
contents.append(ep_path.read_text()[:1000])
|
|
109
|
+
except Exception:
|
|
110
|
+
continue
|
|
111
|
+
combined = "\n---\n".join(contents)
|
|
112
|
+
|
|
113
|
+
if self.config.llm_provider == "openai" and self.config.llm_model:
|
|
114
|
+
try:
|
|
115
|
+
return self._extract_with_openai(combined, cluster.topic)
|
|
116
|
+
except Exception:
|
|
117
|
+
pass
|
|
118
|
+
|
|
119
|
+
# Fallback: simple extraction
|
|
120
|
+
facts = []
|
|
121
|
+
for line in combined.splitlines():
|
|
122
|
+
line = line.strip()
|
|
123
|
+
if len(line) > 20 and not line.startswith("#") and not line.startswith("-"):
|
|
124
|
+
if any(w in line.lower() for w in ["prefers", "likes", "uses", "learned", "user"]):
|
|
125
|
+
facts.append(f"- {line[:200]}")
|
|
126
|
+
return facts[:10] if facts else [f"- Learned about {cluster.topic}"]
|
|
127
|
+
|
|
128
|
+
def _extract_with_openai(self, content: str, topic: str) -> List[str]:
|
|
129
|
+
"""Extract facts using OpenAI API."""
|
|
130
|
+
import openai
|
|
131
|
+
|
|
132
|
+
response = openai.chat.completions.create(
|
|
133
|
+
model=self.config.llm_model or "gpt-3.5-turbo",
|
|
134
|
+
messages=[
|
|
135
|
+
{
|
|
136
|
+
"role": "system",
|
|
137
|
+
"content": "Extract factual statements from the text. "
|
|
138
|
+
"Output as bullet points (one fact per line). "
|
|
139
|
+
"Focus on: user preferences, learned facts, key decisions.",
|
|
140
|
+
},
|
|
141
|
+
{
|
|
142
|
+
"role": "user",
|
|
143
|
+
"content": f"Topic: {topic}\n\n{content[:4000]}",
|
|
144
|
+
},
|
|
145
|
+
],
|
|
146
|
+
max_tokens=500,
|
|
147
|
+
)
|
|
148
|
+
text = response.choices[0].message.content
|
|
149
|
+
return [line.strip() for line in text.splitlines() if line.strip().startswith("-")][:15]
|
|
150
|
+
|
|
151
|
+
def write_consolidated(self, cluster: EpisodeCluster, facts: List[str]) -> Path:
|
|
152
|
+
"""Write consolidated semantic file."""
|
|
153
|
+
self.target_dir.mkdir(parents=True, exist_ok=True)
|
|
154
|
+
safe_topic = cluster.topic.replace(" ", "-").lower().replace("/", "_")[:30]
|
|
155
|
+
ts = datetime.utcnow().strftime("%Y%m%d")
|
|
156
|
+
filename = f"consolidated-{safe_topic}-{ts}.md"
|
|
157
|
+
out_path = (self.target_dir / filename).resolve()
|
|
158
|
+
try:
|
|
159
|
+
out_path.relative_to(self.repo.current_dir.resolve())
|
|
160
|
+
except ValueError:
|
|
161
|
+
out_path = self.target_dir / f"consolidated-{ts}.md"
|
|
162
|
+
|
|
163
|
+
frontmatter = {
|
|
164
|
+
"schema_version": "1.0",
|
|
165
|
+
"last_updated": datetime.utcnow().isoformat() + "Z",
|
|
166
|
+
"source_agent_id": "distiller",
|
|
167
|
+
"memory_type": "semantic",
|
|
168
|
+
"tags": cluster.tags + ["auto-generated", "consolidated"],
|
|
169
|
+
"confidence_score": self.config.extraction_confidence_threshold,
|
|
170
|
+
}
|
|
171
|
+
body = f"# Consolidated: {cluster.topic}\n\n" + "\n".join(facts)
|
|
172
|
+
if YAML_AVAILABLE:
|
|
173
|
+
import yaml
|
|
174
|
+
|
|
175
|
+
content = f"---\n{yaml.dump(frontmatter, default_flow_style=False)}---\n\n{body}"
|
|
176
|
+
else:
|
|
177
|
+
content = body
|
|
178
|
+
out_path.write_text(content)
|
|
179
|
+
return out_path
|
|
180
|
+
|
|
181
|
+
def archive_episodes(self, episodes: List[Path]) -> int:
|
|
182
|
+
"""Archive processed episodes to .mem/archive/."""
|
|
183
|
+
archive_base = self.repo.mem_dir / "archive"
|
|
184
|
+
archive_base.mkdir(parents=True, exist_ok=True)
|
|
185
|
+
ts = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
|
|
186
|
+
archive_sub = archive_base / ts
|
|
187
|
+
archive_sub.mkdir(exist_ok=True)
|
|
188
|
+
count = 0
|
|
189
|
+
for ep in episodes:
|
|
190
|
+
try:
|
|
191
|
+
safe_name = ep.name.replace("..", "_").replace("/", "_")
|
|
192
|
+
dest = (archive_sub / safe_name).resolve()
|
|
193
|
+
dest.relative_to(archive_base.resolve())
|
|
194
|
+
shutil.move(str(ep), str(dest))
|
|
195
|
+
count += 1
|
|
196
|
+
except (ValueError, Exception):
|
|
197
|
+
continue
|
|
198
|
+
return count
|
|
199
|
+
|
|
200
|
+
def run(
|
|
201
|
+
self,
|
|
202
|
+
source: Optional[str] = None,
|
|
203
|
+
target: Optional[str] = None,
|
|
204
|
+
model: Optional[str] = None,
|
|
205
|
+
) -> DistillerResult:
|
|
206
|
+
"""Run distillation pipeline."""
|
|
207
|
+
source_path = Path(source) if source else self.source_dir
|
|
208
|
+
if not source_path.is_absolute():
|
|
209
|
+
source_path = self.repo.root / "current" / source_path
|
|
210
|
+
target_path = Path(target) if target else self.target_dir
|
|
211
|
+
if not target_path.is_absolute():
|
|
212
|
+
target_path = self.repo.root / "current" / target_path
|
|
213
|
+
self.target_dir = target_path
|
|
214
|
+
if model:
|
|
215
|
+
self.config.llm_model = model
|
|
216
|
+
|
|
217
|
+
episodes = self.load_episodes_from(source_path)
|
|
218
|
+
if not episodes:
|
|
219
|
+
return DistillerResult(
|
|
220
|
+
success=True,
|
|
221
|
+
clusters_processed=0,
|
|
222
|
+
facts_extracted=0,
|
|
223
|
+
episodes_archived=0,
|
|
224
|
+
message="No episodes to process",
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
clusters = self.cluster_episodes(episodes)
|
|
228
|
+
if not clusters:
|
|
229
|
+
return DistillerResult(
|
|
230
|
+
success=True,
|
|
231
|
+
clusters_processed=0,
|
|
232
|
+
facts_extracted=0,
|
|
233
|
+
episodes_archived=0,
|
|
234
|
+
message="No clusters formed",
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# Create safety branch if configured
|
|
238
|
+
branch_name = None
|
|
239
|
+
if self.config.create_safety_branch:
|
|
240
|
+
ts = datetime.utcnow().strftime("%Y-%m-%d")
|
|
241
|
+
branch_name = f"{self.config.safety_branch_prefix}{ts}"
|
|
242
|
+
if not self.repo.refs.branch_exists(branch_name):
|
|
243
|
+
self.repo.refs.create_branch(branch_name)
|
|
244
|
+
self.repo.checkout(branch_name, force=True)
|
|
245
|
+
|
|
246
|
+
facts_count = 0
|
|
247
|
+
all_archived = []
|
|
248
|
+
for cluster in clusters:
|
|
249
|
+
facts = self.extract_facts(cluster)
|
|
250
|
+
self.write_consolidated(cluster, facts)
|
|
251
|
+
facts_count += len(facts)
|
|
252
|
+
all_archived.extend(cluster.episodes)
|
|
253
|
+
|
|
254
|
+
archived = self.archive_episodes(all_archived)
|
|
255
|
+
|
|
256
|
+
commit_hash = None
|
|
257
|
+
if facts_count > 0:
|
|
258
|
+
try:
|
|
259
|
+
for f in self.target_dir.glob("consolidated-*.md"):
|
|
260
|
+
rel = str(f.relative_to(self.repo.root / "current"))
|
|
261
|
+
self.repo.stage_file(rel)
|
|
262
|
+
commit_hash = self.repo.commit(
|
|
263
|
+
f"distiller: consolidated {facts_count} facts from {len(episodes)} episodes",
|
|
264
|
+
{"distiller": True, "clusters": len(clusters)},
|
|
265
|
+
)
|
|
266
|
+
except Exception:
|
|
267
|
+
pass
|
|
268
|
+
|
|
269
|
+
return DistillerResult(
|
|
270
|
+
success=True,
|
|
271
|
+
clusters_processed=len(clusters),
|
|
272
|
+
facts_extracted=facts_count,
|
|
273
|
+
episodes_archived=archived,
|
|
274
|
+
branch_created=branch_name,
|
|
275
|
+
commit_hash=commit_hash,
|
|
276
|
+
message=f"Processed {len(clusters)} clusters, extracted {facts_count} facts",
|
|
277
|
+
)
|