agmem 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agmem-0.1.1.dist-info/METADATA +656 -0
- agmem-0.1.1.dist-info/RECORD +67 -0
- agmem-0.1.1.dist-info/WHEEL +5 -0
- agmem-0.1.1.dist-info/entry_points.txt +2 -0
- agmem-0.1.1.dist-info/licenses/LICENSE +21 -0
- agmem-0.1.1.dist-info/top_level.txt +1 -0
- memvcs/__init__.py +9 -0
- memvcs/cli.py +178 -0
- memvcs/commands/__init__.py +23 -0
- memvcs/commands/add.py +258 -0
- memvcs/commands/base.py +23 -0
- memvcs/commands/blame.py +169 -0
- memvcs/commands/branch.py +110 -0
- memvcs/commands/checkout.py +101 -0
- memvcs/commands/clean.py +76 -0
- memvcs/commands/clone.py +91 -0
- memvcs/commands/commit.py +174 -0
- memvcs/commands/daemon.py +267 -0
- memvcs/commands/diff.py +157 -0
- memvcs/commands/fsck.py +203 -0
- memvcs/commands/garden.py +107 -0
- memvcs/commands/graph.py +151 -0
- memvcs/commands/init.py +61 -0
- memvcs/commands/log.py +103 -0
- memvcs/commands/mcp.py +59 -0
- memvcs/commands/merge.py +88 -0
- memvcs/commands/pull.py +65 -0
- memvcs/commands/push.py +143 -0
- memvcs/commands/reflog.py +52 -0
- memvcs/commands/remote.py +51 -0
- memvcs/commands/reset.py +98 -0
- memvcs/commands/search.py +163 -0
- memvcs/commands/serve.py +54 -0
- memvcs/commands/show.py +125 -0
- memvcs/commands/stash.py +97 -0
- memvcs/commands/status.py +112 -0
- memvcs/commands/tag.py +117 -0
- memvcs/commands/test.py +132 -0
- memvcs/commands/tree.py +156 -0
- memvcs/core/__init__.py +21 -0
- memvcs/core/config_loader.py +245 -0
- memvcs/core/constants.py +12 -0
- memvcs/core/diff.py +380 -0
- memvcs/core/gardener.py +466 -0
- memvcs/core/hooks.py +151 -0
- memvcs/core/knowledge_graph.py +381 -0
- memvcs/core/merge.py +474 -0
- memvcs/core/objects.py +323 -0
- memvcs/core/pii_scanner.py +343 -0
- memvcs/core/refs.py +447 -0
- memvcs/core/remote.py +278 -0
- memvcs/core/repository.py +522 -0
- memvcs/core/schema.py +414 -0
- memvcs/core/staging.py +227 -0
- memvcs/core/storage/__init__.py +72 -0
- memvcs/core/storage/base.py +359 -0
- memvcs/core/storage/gcs.py +308 -0
- memvcs/core/storage/local.py +182 -0
- memvcs/core/storage/s3.py +369 -0
- memvcs/core/test_runner.py +371 -0
- memvcs/core/vector_store.py +313 -0
- memvcs/integrations/__init__.py +5 -0
- memvcs/integrations/mcp_server.py +267 -0
- memvcs/integrations/web_ui/__init__.py +1 -0
- memvcs/integrations/web_ui/server.py +352 -0
- memvcs/utils/__init__.py +9 -0
- memvcs/utils/helpers.py +178 -0
memvcs/core/gardener.py
ADDED
|
@@ -0,0 +1,466 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Gardener - The "Hindsight" reflection loop for agmem.
|
|
3
|
+
|
|
4
|
+
A background process that synthesizes raw episodic logs into semantic insights,
|
|
5
|
+
turning noise into wisdom over time.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import json
|
|
10
|
+
import shutil
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import List, Dict, Any, Optional, Tuple
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
from collections import defaultdict
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
import yaml
|
|
19
|
+
YAML_AVAILABLE = True
|
|
20
|
+
except ImportError:
|
|
21
|
+
YAML_AVAILABLE = False
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class EpisodeCluster:
|
|
26
|
+
"""A cluster of related episodes."""
|
|
27
|
+
topic: str
|
|
28
|
+
episodes: List[Path]
|
|
29
|
+
summary: Optional[str] = None
|
|
30
|
+
tags: List[str] = field(default_factory=list)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class GardenerConfig:
|
|
35
|
+
"""Configuration for the Gardener."""
|
|
36
|
+
threshold: int = 50 # Number of episodic files before triggering
|
|
37
|
+
archive_dir: str = "archive"
|
|
38
|
+
min_cluster_size: int = 3
|
|
39
|
+
max_clusters: int = 10
|
|
40
|
+
llm_provider: Optional[str] = None # "openai", "anthropic", etc.
|
|
41
|
+
llm_model: Optional[str] = None
|
|
42
|
+
auto_commit: bool = True
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class GardenerResult:
|
|
47
|
+
"""Result of a gardener run."""
|
|
48
|
+
success: bool
|
|
49
|
+
clusters_found: int
|
|
50
|
+
insights_generated: int
|
|
51
|
+
episodes_archived: int
|
|
52
|
+
commit_hash: Optional[str] = None
|
|
53
|
+
message: str = ""
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class Gardener:
|
|
57
|
+
"""
|
|
58
|
+
The Gardener agent that refines memory over time.
|
|
59
|
+
|
|
60
|
+
Wakes up when episodic/ files exceed a threshold, clusters them by topic,
|
|
61
|
+
generates summaries, and archives the raw episodes.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(self, repo, config: Optional[GardenerConfig] = None):
|
|
65
|
+
"""
|
|
66
|
+
Initialize the Gardener.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
repo: Repository instance
|
|
70
|
+
config: Optional configuration
|
|
71
|
+
"""
|
|
72
|
+
self.repo = repo
|
|
73
|
+
self.config = config or GardenerConfig()
|
|
74
|
+
self.episodic_dir = repo.root / 'current' / 'episodic'
|
|
75
|
+
self.semantic_dir = repo.root / 'current' / 'semantic'
|
|
76
|
+
# Ensure archive_dir stays under current/ (path safety)
|
|
77
|
+
try:
|
|
78
|
+
archive_candidate = (repo.current_dir / self.config.archive_dir).resolve()
|
|
79
|
+
archive_candidate.relative_to(repo.current_dir.resolve())
|
|
80
|
+
self.archive_dir = archive_candidate
|
|
81
|
+
except (ValueError, RuntimeError):
|
|
82
|
+
self.archive_dir = repo.current_dir / 'archive'
|
|
83
|
+
|
|
84
|
+
def should_run(self) -> bool:
|
|
85
|
+
"""Check if the Gardener should run based on threshold."""
|
|
86
|
+
if not self.episodic_dir.exists():
|
|
87
|
+
return False
|
|
88
|
+
|
|
89
|
+
episode_count = len(list(self.episodic_dir.glob('**/*.md')))
|
|
90
|
+
return episode_count >= self.config.threshold
|
|
91
|
+
|
|
92
|
+
def get_episode_count(self) -> int:
|
|
93
|
+
"""Get the current number of episodic files."""
|
|
94
|
+
if not self.episodic_dir.exists():
|
|
95
|
+
return 0
|
|
96
|
+
return len(list(self.episodic_dir.glob('**/*.md')))
|
|
97
|
+
|
|
98
|
+
def load_episodes(self) -> List[Tuple[Path, str]]:
|
|
99
|
+
"""
|
|
100
|
+
Load all episodic files.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
List of (path, content) tuples
|
|
104
|
+
"""
|
|
105
|
+
episodes = []
|
|
106
|
+
|
|
107
|
+
if not self.episodic_dir.exists():
|
|
108
|
+
return episodes
|
|
109
|
+
|
|
110
|
+
for episode_file in self.episodic_dir.glob('**/*.md'):
|
|
111
|
+
try:
|
|
112
|
+
content = episode_file.read_text()
|
|
113
|
+
episodes.append((episode_file, content))
|
|
114
|
+
except Exception:
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
return episodes
|
|
118
|
+
|
|
119
|
+
def cluster_episodes(self, episodes: List[Tuple[Path, str]]) -> List[EpisodeCluster]:
|
|
120
|
+
"""
|
|
121
|
+
Cluster episodes by topic using keyword analysis.
|
|
122
|
+
|
|
123
|
+
For more sophisticated clustering, this could use embeddings with k-means.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
episodes: List of (path, content) tuples
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
List of EpisodeCluster objects
|
|
130
|
+
"""
|
|
131
|
+
# Simple keyword-based clustering
|
|
132
|
+
keyword_to_episodes: Dict[str, List[Path]] = defaultdict(list)
|
|
133
|
+
|
|
134
|
+
# Common programming/tech keywords to look for
|
|
135
|
+
keywords = [
|
|
136
|
+
'python', 'javascript', 'typescript', 'rust', 'go',
|
|
137
|
+
'error', 'bug', 'fix', 'debug', 'issue',
|
|
138
|
+
'api', 'database', 'server', 'client', 'frontend', 'backend',
|
|
139
|
+
'test', 'testing', 'deploy', 'deployment',
|
|
140
|
+
'config', 'setup', 'install', 'environment',
|
|
141
|
+
'performance', 'optimization', 'memory', 'cache',
|
|
142
|
+
'security', 'auth', 'authentication', 'permission',
|
|
143
|
+
'user', 'preference', 'setting', 'option',
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
for path, content in episodes:
|
|
147
|
+
content_lower = content.lower()
|
|
148
|
+
found_keywords = []
|
|
149
|
+
|
|
150
|
+
for keyword in keywords:
|
|
151
|
+
if keyword in content_lower:
|
|
152
|
+
found_keywords.append(keyword)
|
|
153
|
+
keyword_to_episodes[keyword].append(path)
|
|
154
|
+
|
|
155
|
+
# Create clusters from keywords with enough episodes
|
|
156
|
+
clusters = []
|
|
157
|
+
used_episodes = set()
|
|
158
|
+
|
|
159
|
+
# Sort by number of episodes (descending)
|
|
160
|
+
sorted_keywords = sorted(
|
|
161
|
+
keyword_to_episodes.items(),
|
|
162
|
+
key=lambda x: len(x[1]),
|
|
163
|
+
reverse=True
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
for keyword, episode_paths in sorted_keywords:
|
|
167
|
+
if len(clusters) >= self.config.max_clusters:
|
|
168
|
+
break
|
|
169
|
+
|
|
170
|
+
# Filter out already-used episodes
|
|
171
|
+
unused_paths = [p for p in episode_paths if p not in used_episodes]
|
|
172
|
+
|
|
173
|
+
if len(unused_paths) >= self.config.min_cluster_size:
|
|
174
|
+
clusters.append(EpisodeCluster(
|
|
175
|
+
topic=keyword,
|
|
176
|
+
episodes=unused_paths,
|
|
177
|
+
tags=[keyword]
|
|
178
|
+
))
|
|
179
|
+
used_episodes.update(unused_paths)
|
|
180
|
+
|
|
181
|
+
return clusters
|
|
182
|
+
|
|
183
|
+
def cluster_episodes_with_embeddings(self, episodes: List[Tuple[Path, str]]) -> List[EpisodeCluster]:
|
|
184
|
+
"""
|
|
185
|
+
Cluster episodes using embeddings and k-means.
|
|
186
|
+
|
|
187
|
+
Requires scikit-learn and sentence-transformers.
|
|
188
|
+
"""
|
|
189
|
+
try:
|
|
190
|
+
from sklearn.cluster import KMeans
|
|
191
|
+
from sentence_transformers import SentenceTransformer
|
|
192
|
+
except ImportError:
|
|
193
|
+
# Fall back to keyword clustering
|
|
194
|
+
return self.cluster_episodes(episodes)
|
|
195
|
+
|
|
196
|
+
if len(episodes) < self.config.min_cluster_size:
|
|
197
|
+
return []
|
|
198
|
+
|
|
199
|
+
# Generate embeddings
|
|
200
|
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
201
|
+
texts = [content[:2000] for _, content in episodes] # Truncate long texts
|
|
202
|
+
embeddings = model.encode(texts)
|
|
203
|
+
|
|
204
|
+
# Determine number of clusters
|
|
205
|
+
n_clusters = min(self.config.max_clusters, len(episodes) // self.config.min_cluster_size)
|
|
206
|
+
n_clusters = max(1, n_clusters)
|
|
207
|
+
|
|
208
|
+
# Cluster
|
|
209
|
+
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
|
|
210
|
+
labels = kmeans.fit_predict(embeddings)
|
|
211
|
+
|
|
212
|
+
# Group episodes by cluster
|
|
213
|
+
cluster_episodes: Dict[int, List[Tuple[Path, str]]] = defaultdict(list)
|
|
214
|
+
for i, (path, content) in enumerate(episodes):
|
|
215
|
+
cluster_episodes[labels[i]].append((path, content))
|
|
216
|
+
|
|
217
|
+
# Create cluster objects
|
|
218
|
+
clusters = []
|
|
219
|
+
for cluster_id, eps in cluster_episodes.items():
|
|
220
|
+
if len(eps) >= self.config.min_cluster_size:
|
|
221
|
+
# Extract topic from first few words of first episode
|
|
222
|
+
first_content = eps[0][1]
|
|
223
|
+
topic = self._extract_topic(first_content)
|
|
224
|
+
|
|
225
|
+
clusters.append(EpisodeCluster(
|
|
226
|
+
topic=topic,
|
|
227
|
+
episodes=[p for p, _ in eps]
|
|
228
|
+
))
|
|
229
|
+
|
|
230
|
+
return clusters
|
|
231
|
+
|
|
232
|
+
def _extract_topic(self, content: str) -> str:
|
|
233
|
+
"""Extract a topic label from content."""
|
|
234
|
+
# Take first line or first 50 chars
|
|
235
|
+
lines = content.strip().split('\n')
|
|
236
|
+
first_line = lines[0] if lines else content[:50]
|
|
237
|
+
|
|
238
|
+
# Clean up
|
|
239
|
+
topic = first_line.strip('#').strip()
|
|
240
|
+
if len(topic) > 50:
|
|
241
|
+
topic = topic[:47] + '...'
|
|
242
|
+
|
|
243
|
+
return topic or "general"
|
|
244
|
+
|
|
245
|
+
def generate_summary(self, cluster: EpisodeCluster) -> str:
|
|
246
|
+
"""
|
|
247
|
+
Generate a summary for a cluster of episodes.
|
|
248
|
+
|
|
249
|
+
Uses LLM if configured, otherwise generates a simple summary.
|
|
250
|
+
"""
|
|
251
|
+
# Collect content from episodes
|
|
252
|
+
contents = []
|
|
253
|
+
for episode_path in cluster.episodes[:10]: # Limit to 10 episodes
|
|
254
|
+
try:
|
|
255
|
+
content = episode_path.read_text()
|
|
256
|
+
contents.append(content[:1000]) # Truncate
|
|
257
|
+
except Exception:
|
|
258
|
+
continue
|
|
259
|
+
|
|
260
|
+
combined = '\n---\n'.join(contents)
|
|
261
|
+
|
|
262
|
+
# Try LLM summarization
|
|
263
|
+
if self.config.llm_provider == 'openai' and self.config.llm_model:
|
|
264
|
+
try:
|
|
265
|
+
return self._summarize_with_openai(combined, cluster.topic)
|
|
266
|
+
except Exception:
|
|
267
|
+
pass
|
|
268
|
+
|
|
269
|
+
# Fall back to simple summary
|
|
270
|
+
return self._simple_summary(cluster, contents)
|
|
271
|
+
|
|
272
|
+
def _summarize_with_openai(self, content: str, topic: str) -> str:
|
|
273
|
+
"""Summarize using OpenAI API."""
|
|
274
|
+
import openai
|
|
275
|
+
|
|
276
|
+
response = openai.chat.completions.create(
|
|
277
|
+
model=self.config.llm_model or 'gpt-3.5-turbo',
|
|
278
|
+
messages=[
|
|
279
|
+
{
|
|
280
|
+
'role': 'system',
|
|
281
|
+
'content': 'You are a helpful assistant that summarizes conversation logs into actionable insights.'
|
|
282
|
+
},
|
|
283
|
+
{
|
|
284
|
+
'role': 'user',
|
|
285
|
+
'content': f"Summarize these conversation logs about '{topic}' into 2-3 key insights:\n\n{content[:4000]}"
|
|
286
|
+
}
|
|
287
|
+
],
|
|
288
|
+
max_tokens=500
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
return response.choices[0].message.content
|
|
292
|
+
|
|
293
|
+
def _simple_summary(self, cluster: EpisodeCluster, contents: List[str]) -> str:
|
|
294
|
+
"""Generate a simple summary without LLM."""
|
|
295
|
+
return f"""# Insights: {cluster.topic.title()}
|
|
296
|
+
|
|
297
|
+
**Summary**: The user had {len(cluster.episodes)} conversations related to {cluster.topic}.
|
|
298
|
+
|
|
299
|
+
**Common themes observed**:
|
|
300
|
+
- Multiple discussions about {cluster.topic}
|
|
301
|
+
- Recurring questions and patterns detected
|
|
302
|
+
|
|
303
|
+
**Generated**: {datetime.utcnow().isoformat()}Z
|
|
304
|
+
|
|
305
|
+
---
|
|
306
|
+
*This summary was auto-generated by the Gardener. Review and edit as needed.*
|
|
307
|
+
"""
|
|
308
|
+
|
|
309
|
+
def write_insight(self, cluster: EpisodeCluster) -> Path:
|
|
310
|
+
"""
|
|
311
|
+
Write cluster summary to semantic memory.
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
Path to the written insight file
|
|
315
|
+
"""
|
|
316
|
+
self.semantic_dir.mkdir(parents=True, exist_ok=True)
|
|
317
|
+
|
|
318
|
+
# Generate filename (sanitize topic to avoid path traversal)
|
|
319
|
+
timestamp = datetime.utcnow().strftime('%Y%m%d')
|
|
320
|
+
safe_topic = cluster.topic.replace(' ', '-').lower().replace('/', '_').replace('\\', '_')[:30]
|
|
321
|
+
filename = f"insight-{safe_topic}-{timestamp}.md"
|
|
322
|
+
insight_path = (self.semantic_dir / filename).resolve()
|
|
323
|
+
try:
|
|
324
|
+
insight_path.relative_to(self.repo.current_dir.resolve())
|
|
325
|
+
except ValueError:
|
|
326
|
+
insight_path = self.semantic_dir / f"insight-{timestamp}.md"
|
|
327
|
+
|
|
328
|
+
# Generate frontmatter
|
|
329
|
+
frontmatter = {
|
|
330
|
+
'schema_version': '1.0',
|
|
331
|
+
'last_updated': datetime.utcnow().isoformat() + 'Z',
|
|
332
|
+
'source_agent_id': 'gardener',
|
|
333
|
+
'memory_type': 'semantic',
|
|
334
|
+
'tags': cluster.tags + ['auto-generated', 'insight'],
|
|
335
|
+
'source_episodes': len(cluster.episodes)
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
# Write file
|
|
339
|
+
if YAML_AVAILABLE:
|
|
340
|
+
import yaml
|
|
341
|
+
content = f"---\n{yaml.dump(frontmatter, default_flow_style=False)}---\n\n{cluster.summary}"
|
|
342
|
+
else:
|
|
343
|
+
content = cluster.summary
|
|
344
|
+
|
|
345
|
+
insight_path.write_text(content)
|
|
346
|
+
return insight_path
|
|
347
|
+
|
|
348
|
+
def archive_episodes(self, episodes: List[Path]) -> int:
|
|
349
|
+
"""
|
|
350
|
+
Archive processed episodes.
|
|
351
|
+
|
|
352
|
+
Moves files to archive directory with timestamp prefix.
|
|
353
|
+
|
|
354
|
+
Returns:
|
|
355
|
+
Number of files archived
|
|
356
|
+
"""
|
|
357
|
+
self.archive_dir.mkdir(parents=True, exist_ok=True)
|
|
358
|
+
|
|
359
|
+
timestamp = datetime.utcnow().strftime('%Y%m%d-%H%M%S')
|
|
360
|
+
archive_subdir = self.archive_dir / timestamp
|
|
361
|
+
archive_subdir.mkdir(exist_ok=True)
|
|
362
|
+
|
|
363
|
+
count = 0
|
|
364
|
+
for episode_path in episodes:
|
|
365
|
+
try:
|
|
366
|
+
safe_name = episode_path.name.replace('..', '_').replace('/', '_').replace('\\', '_')
|
|
367
|
+
dest = (archive_subdir / safe_name).resolve()
|
|
368
|
+
dest.relative_to(self.archive_dir.resolve())
|
|
369
|
+
shutil.move(str(episode_path), str(dest))
|
|
370
|
+
count += 1
|
|
371
|
+
except (ValueError, Exception):
|
|
372
|
+
continue
|
|
373
|
+
|
|
374
|
+
return count
|
|
375
|
+
|
|
376
|
+
def run(self, force: bool = False) -> GardenerResult:
|
|
377
|
+
"""
|
|
378
|
+
Run the Gardener process.
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
force: Run even if threshold not met
|
|
382
|
+
|
|
383
|
+
Returns:
|
|
384
|
+
GardenerResult with operation details
|
|
385
|
+
"""
|
|
386
|
+
if not force and not self.should_run():
|
|
387
|
+
return GardenerResult(
|
|
388
|
+
success=True,
|
|
389
|
+
clusters_found=0,
|
|
390
|
+
insights_generated=0,
|
|
391
|
+
episodes_archived=0,
|
|
392
|
+
message=f"Threshold not met ({self.get_episode_count()}/{self.config.threshold} episodes)"
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
# Load episodes
|
|
396
|
+
episodes = self.load_episodes()
|
|
397
|
+
if not episodes:
|
|
398
|
+
return GardenerResult(
|
|
399
|
+
success=True,
|
|
400
|
+
clusters_found=0,
|
|
401
|
+
insights_generated=0,
|
|
402
|
+
episodes_archived=0,
|
|
403
|
+
message="No episodes to process"
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
# Cluster episodes
|
|
407
|
+
try:
|
|
408
|
+
clusters = self.cluster_episodes_with_embeddings(episodes)
|
|
409
|
+
except Exception:
|
|
410
|
+
clusters = self.cluster_episodes(episodes)
|
|
411
|
+
|
|
412
|
+
if not clusters:
|
|
413
|
+
return GardenerResult(
|
|
414
|
+
success=True,
|
|
415
|
+
clusters_found=0,
|
|
416
|
+
insights_generated=0,
|
|
417
|
+
episodes_archived=0,
|
|
418
|
+
message="No clusters could be formed"
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
# Generate summaries and write insights
|
|
422
|
+
insights_written = 0
|
|
423
|
+
all_archived_episodes = []
|
|
424
|
+
|
|
425
|
+
for cluster in clusters:
|
|
426
|
+
try:
|
|
427
|
+
# Generate summary
|
|
428
|
+
cluster.summary = self.generate_summary(cluster)
|
|
429
|
+
|
|
430
|
+
# Write insight
|
|
431
|
+
self.write_insight(cluster)
|
|
432
|
+
insights_written += 1
|
|
433
|
+
|
|
434
|
+
# Track episodes to archive
|
|
435
|
+
all_archived_episodes.extend(cluster.episodes)
|
|
436
|
+
except Exception as e:
|
|
437
|
+
print(f"Warning: Failed to process cluster '{cluster.topic}': {e}")
|
|
438
|
+
|
|
439
|
+
# Archive processed episodes
|
|
440
|
+
archived_count = self.archive_episodes(all_archived_episodes)
|
|
441
|
+
|
|
442
|
+
# Auto-commit if configured
|
|
443
|
+
commit_hash = None
|
|
444
|
+
if self.config.auto_commit and insights_written > 0:
|
|
445
|
+
try:
|
|
446
|
+
# Stage new insights
|
|
447
|
+
for insight_file in self.semantic_dir.glob('insight-*.md'):
|
|
448
|
+
rel_path = str(insight_file.relative_to(self.repo.root / 'current'))
|
|
449
|
+
self.repo.stage_file(f"current/{rel_path}")
|
|
450
|
+
|
|
451
|
+
# Commit
|
|
452
|
+
commit_hash = self.repo.commit(
|
|
453
|
+
f"gardener: synthesized {insights_written} insights from {archived_count} episodes",
|
|
454
|
+
{'gardener': True, 'clusters': len(clusters)}
|
|
455
|
+
)
|
|
456
|
+
except Exception as e:
|
|
457
|
+
print(f"Warning: Auto-commit failed: {e}")
|
|
458
|
+
|
|
459
|
+
return GardenerResult(
|
|
460
|
+
success=True,
|
|
461
|
+
clusters_found=len(clusters),
|
|
462
|
+
insights_generated=insights_written,
|
|
463
|
+
episodes_archived=archived_count,
|
|
464
|
+
commit_hash=commit_hash,
|
|
465
|
+
message=f"Processed {len(clusters)} clusters, generated {insights_written} insights"
|
|
466
|
+
)
|
memvcs/core/hooks.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pre-commit hooks for agmem.
|
|
3
|
+
|
|
4
|
+
Provides hook infrastructure for validation before commits.
|
|
5
|
+
PII scanning can be disabled or allowlisted via agmem config (pii.enabled, pii.allowlist).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import fnmatch
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import List, Dict, Any, Callable, Optional
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
PII_SEVERITY_HIGH = "high"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class HookResult:
|
|
18
|
+
"""Result of running hooks."""
|
|
19
|
+
success: bool
|
|
20
|
+
errors: List[str] = field(default_factory=list)
|
|
21
|
+
warnings: List[str] = field(default_factory=list)
|
|
22
|
+
|
|
23
|
+
def add_error(self, message: str):
|
|
24
|
+
"""Add an error and mark as failed."""
|
|
25
|
+
self.errors.append(message)
|
|
26
|
+
self.success = False
|
|
27
|
+
|
|
28
|
+
def add_warning(self, message: str):
|
|
29
|
+
"""Add a warning (doesn't affect success)."""
|
|
30
|
+
self.warnings.append(message)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _is_path_allowlisted(filepath: str, patterns: List[str]) -> bool:
|
|
34
|
+
"""Return True if filepath matches any allowlist glob pattern."""
|
|
35
|
+
return any(fnmatch.fnmatch(filepath, pat) for pat in patterns)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _pii_staged_files_to_scan(repo, staged_files: Dict[str, Any]) -> Dict[str, Any]:
|
|
39
|
+
"""Return staged files to scan for PII (excludes allowlisted paths)."""
|
|
40
|
+
try:
|
|
41
|
+
from .config_loader import load_agmem_config, pii_enabled, pii_allowlist
|
|
42
|
+
config = load_agmem_config(getattr(repo, "root", None))
|
|
43
|
+
except ImportError:
|
|
44
|
+
return staged_files
|
|
45
|
+
if not pii_enabled(config):
|
|
46
|
+
return {}
|
|
47
|
+
patterns = pii_allowlist(config)
|
|
48
|
+
if not patterns:
|
|
49
|
+
return staged_files
|
|
50
|
+
return {
|
|
51
|
+
filepath: info
|
|
52
|
+
for filepath, info in staged_files.items()
|
|
53
|
+
if not _is_path_allowlisted(filepath, patterns)
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _run_pii_hook(repo, staged_files: Dict[str, Any], result: HookResult) -> None:
|
|
58
|
+
"""Run PII scanner on staged files; high severity → error, else → warning."""
|
|
59
|
+
try:
|
|
60
|
+
from .pii_scanner import PIIScanner
|
|
61
|
+
to_scan = _pii_staged_files_to_scan(repo, staged_files)
|
|
62
|
+
pii_result = PIIScanner.scan_staged_files(repo, to_scan)
|
|
63
|
+
if not pii_result.has_issues:
|
|
64
|
+
return
|
|
65
|
+
for issue in pii_result.issues:
|
|
66
|
+
msg = f"PII detected in {issue.filepath}: {issue.description}"
|
|
67
|
+
if issue.severity == PII_SEVERITY_HIGH:
|
|
68
|
+
result.add_error(msg)
|
|
69
|
+
else:
|
|
70
|
+
result.add_warning(msg)
|
|
71
|
+
except ImportError:
|
|
72
|
+
pass
|
|
73
|
+
except Exception as e:
|
|
74
|
+
result.add_warning(f"PII scanner failed: {e}")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def run_pre_commit_hooks(repo, staged_files: Dict[str, Any]) -> HookResult:
|
|
78
|
+
"""
|
|
79
|
+
Run all pre-commit hooks on staged files.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
repo: Repository instance
|
|
83
|
+
staged_files: Dict of staged files with their info
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
HookResult with success status and any errors/warnings
|
|
87
|
+
"""
|
|
88
|
+
result = HookResult(success=True)
|
|
89
|
+
_run_pii_hook(repo, staged_files, result)
|
|
90
|
+
file_type_result = validate_file_types(repo, staged_files)
|
|
91
|
+
if not file_type_result.success:
|
|
92
|
+
for error in file_type_result.errors:
|
|
93
|
+
result.add_error(error)
|
|
94
|
+
for warning in file_type_result.warnings:
|
|
95
|
+
result.add_warning(warning)
|
|
96
|
+
|
|
97
|
+
return result
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def validate_file_types(repo, staged_files: Dict[str, Any]) -> HookResult:
|
|
101
|
+
"""
|
|
102
|
+
Validate that staged files are allowed types.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
repo: Repository instance
|
|
106
|
+
staged_files: Dict of staged files
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
HookResult with validation status
|
|
110
|
+
"""
|
|
111
|
+
result = HookResult(success=True)
|
|
112
|
+
|
|
113
|
+
# Get config for allowed extensions
|
|
114
|
+
config = repo.get_config()
|
|
115
|
+
allowed_extensions = config.get('allowed_extensions', ['.md', '.txt', '.json', '.yaml', '.yml'])
|
|
116
|
+
|
|
117
|
+
for filepath in staged_files.keys():
|
|
118
|
+
path = Path(filepath)
|
|
119
|
+
ext = path.suffix.lower()
|
|
120
|
+
|
|
121
|
+
# Skip files without extensions (might be valid)
|
|
122
|
+
if not ext:
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
# Check if extension is allowed
|
|
126
|
+
if ext not in allowed_extensions:
|
|
127
|
+
result.add_warning(
|
|
128
|
+
f"File '{filepath}' has extension '{ext}' which may not be optimal for memory storage. "
|
|
129
|
+
f"Recommended: {', '.join(allowed_extensions)}"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
return result
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
# Hook registry for custom hooks
|
|
136
|
+
_registered_hooks: List[Callable] = []
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def register_hook(hook_fn: Callable):
|
|
140
|
+
"""
|
|
141
|
+
Register a custom pre-commit hook.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
hook_fn: Function that takes (repo, staged_files) and returns HookResult
|
|
145
|
+
"""
|
|
146
|
+
_registered_hooks.append(hook_fn)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def get_registered_hooks() -> List[Callable]:
|
|
150
|
+
"""Get all registered hooks."""
|
|
151
|
+
return _registered_hooks.copy()
|