agmem 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/METADATA +20 -3
  2. agmem-0.1.2.dist-info/RECORD +86 -0
  3. memvcs/__init__.py +1 -1
  4. memvcs/cli.py +35 -31
  5. memvcs/commands/__init__.py +9 -9
  6. memvcs/commands/add.py +77 -76
  7. memvcs/commands/blame.py +46 -53
  8. memvcs/commands/branch.py +13 -33
  9. memvcs/commands/checkout.py +27 -32
  10. memvcs/commands/clean.py +18 -23
  11. memvcs/commands/clone.py +4 -1
  12. memvcs/commands/commit.py +40 -39
  13. memvcs/commands/daemon.py +81 -76
  14. memvcs/commands/decay.py +77 -0
  15. memvcs/commands/diff.py +56 -57
  16. memvcs/commands/distill.py +74 -0
  17. memvcs/commands/fsck.py +55 -61
  18. memvcs/commands/garden.py +28 -37
  19. memvcs/commands/graph.py +41 -48
  20. memvcs/commands/init.py +16 -24
  21. memvcs/commands/log.py +25 -40
  22. memvcs/commands/merge.py +16 -28
  23. memvcs/commands/pack.py +129 -0
  24. memvcs/commands/pull.py +4 -1
  25. memvcs/commands/push.py +4 -2
  26. memvcs/commands/recall.py +145 -0
  27. memvcs/commands/reflog.py +13 -22
  28. memvcs/commands/remote.py +1 -0
  29. memvcs/commands/repair.py +66 -0
  30. memvcs/commands/reset.py +23 -33
  31. memvcs/commands/resurrect.py +82 -0
  32. memvcs/commands/search.py +3 -4
  33. memvcs/commands/serve.py +2 -1
  34. memvcs/commands/show.py +66 -36
  35. memvcs/commands/stash.py +34 -34
  36. memvcs/commands/status.py +27 -35
  37. memvcs/commands/tag.py +23 -47
  38. memvcs/commands/test.py +30 -44
  39. memvcs/commands/timeline.py +111 -0
  40. memvcs/commands/tree.py +26 -27
  41. memvcs/commands/verify.py +59 -0
  42. memvcs/commands/when.py +115 -0
  43. memvcs/core/access_index.py +167 -0
  44. memvcs/core/config_loader.py +3 -1
  45. memvcs/core/consistency.py +214 -0
  46. memvcs/core/decay.py +185 -0
  47. memvcs/core/diff.py +158 -143
  48. memvcs/core/distiller.py +277 -0
  49. memvcs/core/gardener.py +164 -132
  50. memvcs/core/hooks.py +48 -14
  51. memvcs/core/knowledge_graph.py +134 -138
  52. memvcs/core/merge.py +248 -171
  53. memvcs/core/objects.py +95 -96
  54. memvcs/core/pii_scanner.py +147 -146
  55. memvcs/core/refs.py +132 -115
  56. memvcs/core/repository.py +174 -164
  57. memvcs/core/schema.py +155 -113
  58. memvcs/core/staging.py +60 -65
  59. memvcs/core/storage/__init__.py +20 -18
  60. memvcs/core/storage/base.py +74 -70
  61. memvcs/core/storage/gcs.py +70 -68
  62. memvcs/core/storage/local.py +42 -40
  63. memvcs/core/storage/s3.py +105 -110
  64. memvcs/core/temporal_index.py +112 -0
  65. memvcs/core/test_runner.py +101 -93
  66. memvcs/core/vector_store.py +41 -35
  67. memvcs/integrations/mcp_server.py +1 -3
  68. memvcs/integrations/web_ui/server.py +25 -26
  69. memvcs/retrieval/__init__.py +22 -0
  70. memvcs/retrieval/base.py +54 -0
  71. memvcs/retrieval/pack.py +128 -0
  72. memvcs/retrieval/recaller.py +105 -0
  73. memvcs/retrieval/strategies.py +314 -0
  74. memvcs/utils/__init__.py +3 -3
  75. memvcs/utils/helpers.py +52 -52
  76. agmem-0.1.1.dist-info/RECORD +0 -67
  77. {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/WHEEL +0 -0
  78. {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/entry_points.txt +0 -0
  79. {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/licenses/LICENSE +0 -0
  80. {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,277 @@
1
+ """
2
+ Distiller - Episodic-to-semantic distillation pipeline for agmem.
3
+
4
+ Converts session logs into compact facts (like memory consolidation during sleep).
5
+ Extends Gardener with factual extraction and safety branches.
6
+ """
7
+
8
+ import shutil
9
+ from pathlib import Path
10
+ from typing import List, Dict, Any, Optional, Tuple
11
+ from dataclasses import dataclass, field
12
+ from datetime import datetime
13
+ from collections import defaultdict
14
+
15
+ try:
16
+ import yaml
17
+
18
+ YAML_AVAILABLE = True
19
+ except ImportError:
20
+ YAML_AVAILABLE = False
21
+
22
+ from .gardener import Gardener, GardenerConfig, EpisodeCluster
23
+
24
+
25
+ @dataclass
26
+ class DistillerConfig:
27
+ """Configuration for the Distiller."""
28
+
29
+ source_dir: str = "episodic"
30
+ target_dir: str = "semantic/consolidated"
31
+ archive_dir: str = "archive"
32
+ min_cluster_size: int = 3
33
+ extraction_confidence_threshold: float = 0.7
34
+ safety_branch_prefix: str = "auto-distill/"
35
+ llm_provider: Optional[str] = None
36
+ llm_model: Optional[str] = None
37
+ create_safety_branch: bool = True
38
+
39
+
40
+ @dataclass
41
+ class DistillerResult:
42
+ """Result of a distillation run."""
43
+
44
+ success: bool
45
+ clusters_processed: int
46
+ facts_extracted: int
47
+ episodes_archived: int
48
+ branch_created: Optional[str] = None
49
+ commit_hash: Optional[str] = None
50
+ message: str = ""
51
+
52
+
53
+ class Distiller:
54
+ """
55
+ Distills episodic memory into semantic facts.
56
+
57
+ Pipeline: cluster episodes -> extract facts via LLM -> merge with semantic -> archive.
58
+ Creates safety branch for human review before merging to main.
59
+ """
60
+
61
+ def __init__(self, repo: Any, config: Optional[DistillerConfig] = None):
62
+ self.repo = repo
63
+ self.config = config or DistillerConfig()
64
+ self.source_dir = repo.root / "current" / self.config.source_dir
65
+ self.target_dir = repo.root / "current" / self.config.target_dir.rstrip("/")
66
+ archive_candidate = repo.current_dir / self.config.archive_dir
67
+ try:
68
+ archive_candidate.resolve().relative_to(repo.current_dir.resolve())
69
+ self.archive_dir = archive_candidate
70
+ except (ValueError, RuntimeError):
71
+ self.archive_dir = repo.current_dir / "archive"
72
+ self.gardener = Gardener(
73
+ repo,
74
+ GardenerConfig(
75
+ threshold=1,
76
+ archive_dir=self.config.archive_dir,
77
+ min_cluster_size=self.config.min_cluster_size,
78
+ llm_provider=self.config.llm_provider,
79
+ llm_model=self.config.llm_model,
80
+ ),
81
+ )
82
+
83
+ def load_episodes_from(self, source_path: Path) -> List[Tuple[Path, str]]:
84
+ """Load episodes from source directory."""
85
+ episodes = []
86
+ if not source_path.exists():
87
+ return episodes
88
+ for f in source_path.glob("**/*.md"):
89
+ if f.is_file():
90
+ try:
91
+ episodes.append((f, f.read_text(encoding="utf-8", errors="replace")))
92
+ except Exception:
93
+ continue
94
+ return episodes
95
+
96
+ def cluster_episodes(self, episodes: List[Tuple[Path, str]]) -> List[EpisodeCluster]:
97
+ """Cluster episodes using Gardener's logic."""
98
+ try:
99
+ return self.gardener.cluster_episodes_with_embeddings(episodes)
100
+ except Exception:
101
+ return self.gardener.cluster_episodes(episodes)
102
+
103
+ def extract_facts(self, cluster: EpisodeCluster) -> List[str]:
104
+ """Extract factual statements from cluster via LLM or heuristics."""
105
+ contents = []
106
+ for ep_path in cluster.episodes[:10]:
107
+ try:
108
+ contents.append(ep_path.read_text()[:1000])
109
+ except Exception:
110
+ continue
111
+ combined = "\n---\n".join(contents)
112
+
113
+ if self.config.llm_provider == "openai" and self.config.llm_model:
114
+ try:
115
+ return self._extract_with_openai(combined, cluster.topic)
116
+ except Exception:
117
+ pass
118
+
119
+ # Fallback: simple extraction
120
+ facts = []
121
+ for line in combined.splitlines():
122
+ line = line.strip()
123
+ if len(line) > 20 and not line.startswith("#") and not line.startswith("-"):
124
+ if any(w in line.lower() for w in ["prefers", "likes", "uses", "learned", "user"]):
125
+ facts.append(f"- {line[:200]}")
126
+ return facts[:10] if facts else [f"- Learned about {cluster.topic}"]
127
+
128
+ def _extract_with_openai(self, content: str, topic: str) -> List[str]:
129
+ """Extract facts using OpenAI API."""
130
+ import openai
131
+
132
+ response = openai.chat.completions.create(
133
+ model=self.config.llm_model or "gpt-3.5-turbo",
134
+ messages=[
135
+ {
136
+ "role": "system",
137
+ "content": "Extract factual statements from the text. "
138
+ "Output as bullet points (one fact per line). "
139
+ "Focus on: user preferences, learned facts, key decisions.",
140
+ },
141
+ {
142
+ "role": "user",
143
+ "content": f"Topic: {topic}\n\n{content[:4000]}",
144
+ },
145
+ ],
146
+ max_tokens=500,
147
+ )
148
+ text = response.choices[0].message.content
149
+ return [line.strip() for line in text.splitlines() if line.strip().startswith("-")][:15]
150
+
151
+ def write_consolidated(self, cluster: EpisodeCluster, facts: List[str]) -> Path:
152
+ """Write consolidated semantic file."""
153
+ self.target_dir.mkdir(parents=True, exist_ok=True)
154
+ safe_topic = cluster.topic.replace(" ", "-").lower().replace("/", "_")[:30]
155
+ ts = datetime.utcnow().strftime("%Y%m%d")
156
+ filename = f"consolidated-{safe_topic}-{ts}.md"
157
+ out_path = (self.target_dir / filename).resolve()
158
+ try:
159
+ out_path.relative_to(self.repo.current_dir.resolve())
160
+ except ValueError:
161
+ out_path = self.target_dir / f"consolidated-{ts}.md"
162
+
163
+ frontmatter = {
164
+ "schema_version": "1.0",
165
+ "last_updated": datetime.utcnow().isoformat() + "Z",
166
+ "source_agent_id": "distiller",
167
+ "memory_type": "semantic",
168
+ "tags": cluster.tags + ["auto-generated", "consolidated"],
169
+ "confidence_score": self.config.extraction_confidence_threshold,
170
+ }
171
+ body = f"# Consolidated: {cluster.topic}\n\n" + "\n".join(facts)
172
+ if YAML_AVAILABLE:
173
+ import yaml
174
+
175
+ content = f"---\n{yaml.dump(frontmatter, default_flow_style=False)}---\n\n{body}"
176
+ else:
177
+ content = body
178
+ out_path.write_text(content)
179
+ return out_path
180
+
181
+ def archive_episodes(self, episodes: List[Path]) -> int:
182
+ """Archive processed episodes to .mem/archive/."""
183
+ archive_base = self.repo.mem_dir / "archive"
184
+ archive_base.mkdir(parents=True, exist_ok=True)
185
+ ts = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
186
+ archive_sub = archive_base / ts
187
+ archive_sub.mkdir(exist_ok=True)
188
+ count = 0
189
+ for ep in episodes:
190
+ try:
191
+ safe_name = ep.name.replace("..", "_").replace("/", "_")
192
+ dest = (archive_sub / safe_name).resolve()
193
+ dest.relative_to(archive_base.resolve())
194
+ shutil.move(str(ep), str(dest))
195
+ count += 1
196
+ except (ValueError, Exception):
197
+ continue
198
+ return count
199
+
200
+ def run(
201
+ self,
202
+ source: Optional[str] = None,
203
+ target: Optional[str] = None,
204
+ model: Optional[str] = None,
205
+ ) -> DistillerResult:
206
+ """Run distillation pipeline."""
207
+ source_path = Path(source) if source else self.source_dir
208
+ if not source_path.is_absolute():
209
+ source_path = self.repo.root / "current" / source_path
210
+ target_path = Path(target) if target else self.target_dir
211
+ if not target_path.is_absolute():
212
+ target_path = self.repo.root / "current" / target_path
213
+ self.target_dir = target_path
214
+ if model:
215
+ self.config.llm_model = model
216
+
217
+ episodes = self.load_episodes_from(source_path)
218
+ if not episodes:
219
+ return DistillerResult(
220
+ success=True,
221
+ clusters_processed=0,
222
+ facts_extracted=0,
223
+ episodes_archived=0,
224
+ message="No episodes to process",
225
+ )
226
+
227
+ clusters = self.cluster_episodes(episodes)
228
+ if not clusters:
229
+ return DistillerResult(
230
+ success=True,
231
+ clusters_processed=0,
232
+ facts_extracted=0,
233
+ episodes_archived=0,
234
+ message="No clusters formed",
235
+ )
236
+
237
+ # Create safety branch if configured
238
+ branch_name = None
239
+ if self.config.create_safety_branch:
240
+ ts = datetime.utcnow().strftime("%Y-%m-%d")
241
+ branch_name = f"{self.config.safety_branch_prefix}{ts}"
242
+ if not self.repo.refs.branch_exists(branch_name):
243
+ self.repo.refs.create_branch(branch_name)
244
+ self.repo.checkout(branch_name, force=True)
245
+
246
+ facts_count = 0
247
+ all_archived = []
248
+ for cluster in clusters:
249
+ facts = self.extract_facts(cluster)
250
+ self.write_consolidated(cluster, facts)
251
+ facts_count += len(facts)
252
+ all_archived.extend(cluster.episodes)
253
+
254
+ archived = self.archive_episodes(all_archived)
255
+
256
+ commit_hash = None
257
+ if facts_count > 0:
258
+ try:
259
+ for f in self.target_dir.glob("consolidated-*.md"):
260
+ rel = str(f.relative_to(self.repo.root / "current"))
261
+ self.repo.stage_file(rel)
262
+ commit_hash = self.repo.commit(
263
+ f"distiller: consolidated {facts_count} facts from {len(episodes)} episodes",
264
+ {"distiller": True, "clusters": len(clusters)},
265
+ )
266
+ except Exception:
267
+ pass
268
+
269
+ return DistillerResult(
270
+ success=True,
271
+ clusters_processed=len(clusters),
272
+ facts_extracted=facts_count,
273
+ episodes_archived=archived,
274
+ branch_created=branch_name,
275
+ commit_hash=commit_hash,
276
+ message=f"Processed {len(clusters)} clusters, extracted {facts_count} facts",
277
+ )