agmem 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/METADATA +157 -16
- agmem-0.1.3.dist-info/RECORD +105 -0
- memvcs/__init__.py +1 -1
- memvcs/cli.py +45 -31
- memvcs/commands/__init__.py +9 -9
- memvcs/commands/add.py +83 -76
- memvcs/commands/audit.py +59 -0
- memvcs/commands/blame.py +46 -53
- memvcs/commands/branch.py +13 -33
- memvcs/commands/checkout.py +27 -32
- memvcs/commands/clean.py +18 -23
- memvcs/commands/clone.py +11 -1
- memvcs/commands/commit.py +40 -39
- memvcs/commands/daemon.py +109 -76
- memvcs/commands/decay.py +77 -0
- memvcs/commands/diff.py +56 -57
- memvcs/commands/distill.py +90 -0
- memvcs/commands/federated.py +53 -0
- memvcs/commands/fsck.py +86 -61
- memvcs/commands/garden.py +40 -35
- memvcs/commands/gc.py +51 -0
- memvcs/commands/graph.py +41 -48
- memvcs/commands/init.py +16 -24
- memvcs/commands/log.py +25 -40
- memvcs/commands/merge.py +69 -27
- memvcs/commands/pack.py +129 -0
- memvcs/commands/prove.py +66 -0
- memvcs/commands/pull.py +31 -1
- memvcs/commands/push.py +4 -2
- memvcs/commands/recall.py +145 -0
- memvcs/commands/reflog.py +13 -22
- memvcs/commands/remote.py +1 -0
- memvcs/commands/repair.py +66 -0
- memvcs/commands/reset.py +23 -33
- memvcs/commands/resolve.py +130 -0
- memvcs/commands/resurrect.py +82 -0
- memvcs/commands/search.py +3 -4
- memvcs/commands/serve.py +2 -1
- memvcs/commands/show.py +66 -36
- memvcs/commands/stash.py +34 -34
- memvcs/commands/status.py +27 -35
- memvcs/commands/tag.py +23 -47
- memvcs/commands/test.py +30 -44
- memvcs/commands/timeline.py +111 -0
- memvcs/commands/tree.py +26 -27
- memvcs/commands/verify.py +110 -0
- memvcs/commands/when.py +115 -0
- memvcs/core/access_index.py +167 -0
- memvcs/core/audit.py +124 -0
- memvcs/core/config_loader.py +3 -1
- memvcs/core/consistency.py +214 -0
- memvcs/core/crypto_verify.py +280 -0
- memvcs/core/decay.py +185 -0
- memvcs/core/diff.py +158 -143
- memvcs/core/distiller.py +277 -0
- memvcs/core/encryption.py +169 -0
- memvcs/core/federated.py +86 -0
- memvcs/core/gardener.py +176 -145
- memvcs/core/hooks.py +48 -14
- memvcs/core/ipfs_remote.py +39 -0
- memvcs/core/knowledge_graph.py +135 -138
- memvcs/core/llm/__init__.py +10 -0
- memvcs/core/llm/anthropic_provider.py +50 -0
- memvcs/core/llm/base.py +27 -0
- memvcs/core/llm/factory.py +30 -0
- memvcs/core/llm/openai_provider.py +36 -0
- memvcs/core/merge.py +260 -170
- memvcs/core/objects.py +110 -101
- memvcs/core/pack.py +92 -0
- memvcs/core/pii_scanner.py +147 -146
- memvcs/core/privacy_budget.py +63 -0
- memvcs/core/refs.py +132 -115
- memvcs/core/remote.py +38 -0
- memvcs/core/repository.py +254 -164
- memvcs/core/schema.py +155 -113
- memvcs/core/staging.py +60 -65
- memvcs/core/storage/__init__.py +20 -18
- memvcs/core/storage/base.py +74 -70
- memvcs/core/storage/gcs.py +70 -68
- memvcs/core/storage/local.py +42 -40
- memvcs/core/storage/s3.py +105 -110
- memvcs/core/temporal_index.py +121 -0
- memvcs/core/test_runner.py +101 -93
- memvcs/core/trust.py +103 -0
- memvcs/core/vector_store.py +56 -36
- memvcs/core/zk_proofs.py +26 -0
- memvcs/integrations/mcp_server.py +1 -3
- memvcs/integrations/web_ui/server.py +25 -26
- memvcs/retrieval/__init__.py +22 -0
- memvcs/retrieval/base.py +54 -0
- memvcs/retrieval/pack.py +128 -0
- memvcs/retrieval/recaller.py +105 -0
- memvcs/retrieval/strategies.py +314 -0
- memvcs/utils/__init__.py +3 -3
- memvcs/utils/helpers.py +52 -52
- agmem-0.1.1.dist-info/RECORD +0 -67
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/WHEEL +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/top_level.txt +0 -0
memvcs/core/gardener.py
CHANGED
|
@@ -16,6 +16,7 @@ from collections import defaultdict
|
|
|
16
16
|
|
|
17
17
|
try:
|
|
18
18
|
import yaml
|
|
19
|
+
|
|
19
20
|
YAML_AVAILABLE = True
|
|
20
21
|
except ImportError:
|
|
21
22
|
YAML_AVAILABLE = False
|
|
@@ -24,6 +25,7 @@ except ImportError:
|
|
|
24
25
|
@dataclass
|
|
25
26
|
class EpisodeCluster:
|
|
26
27
|
"""A cluster of related episodes."""
|
|
28
|
+
|
|
27
29
|
topic: str
|
|
28
30
|
episodes: List[Path]
|
|
29
31
|
summary: Optional[str] = None
|
|
@@ -33,6 +35,7 @@ class EpisodeCluster:
|
|
|
33
35
|
@dataclass
|
|
34
36
|
class GardenerConfig:
|
|
35
37
|
"""Configuration for the Gardener."""
|
|
38
|
+
|
|
36
39
|
threshold: int = 50 # Number of episodic files before triggering
|
|
37
40
|
archive_dir: str = "archive"
|
|
38
41
|
min_cluster_size: int = 3
|
|
@@ -45,6 +48,7 @@ class GardenerConfig:
|
|
|
45
48
|
@dataclass
|
|
46
49
|
class GardenerResult:
|
|
47
50
|
"""Result of a gardener run."""
|
|
51
|
+
|
|
48
52
|
success: bool
|
|
49
53
|
clusters_found: int
|
|
50
54
|
insights_generated: int
|
|
@@ -56,134 +60,158 @@ class GardenerResult:
|
|
|
56
60
|
class Gardener:
|
|
57
61
|
"""
|
|
58
62
|
The Gardener agent that refines memory over time.
|
|
59
|
-
|
|
63
|
+
|
|
60
64
|
Wakes up when episodic/ files exceed a threshold, clusters them by topic,
|
|
61
65
|
generates summaries, and archives the raw episodes.
|
|
62
66
|
"""
|
|
63
|
-
|
|
67
|
+
|
|
64
68
|
def __init__(self, repo, config: Optional[GardenerConfig] = None):
|
|
65
69
|
"""
|
|
66
70
|
Initialize the Gardener.
|
|
67
|
-
|
|
71
|
+
|
|
68
72
|
Args:
|
|
69
73
|
repo: Repository instance
|
|
70
74
|
config: Optional configuration
|
|
71
75
|
"""
|
|
72
76
|
self.repo = repo
|
|
73
77
|
self.config = config or GardenerConfig()
|
|
74
|
-
self.episodic_dir = repo.root /
|
|
75
|
-
self.semantic_dir = repo.root /
|
|
78
|
+
self.episodic_dir = repo.root / "current" / "episodic"
|
|
79
|
+
self.semantic_dir = repo.root / "current" / "semantic"
|
|
76
80
|
# Ensure archive_dir stays under current/ (path safety)
|
|
77
81
|
try:
|
|
78
82
|
archive_candidate = (repo.current_dir / self.config.archive_dir).resolve()
|
|
79
83
|
archive_candidate.relative_to(repo.current_dir.resolve())
|
|
80
84
|
self.archive_dir = archive_candidate
|
|
81
85
|
except (ValueError, RuntimeError):
|
|
82
|
-
self.archive_dir = repo.current_dir /
|
|
83
|
-
|
|
86
|
+
self.archive_dir = repo.current_dir / "archive"
|
|
87
|
+
|
|
84
88
|
def should_run(self) -> bool:
|
|
85
89
|
"""Check if the Gardener should run based on threshold."""
|
|
86
90
|
if not self.episodic_dir.exists():
|
|
87
91
|
return False
|
|
88
|
-
|
|
89
|
-
episode_count = len(list(self.episodic_dir.glob(
|
|
92
|
+
|
|
93
|
+
episode_count = len(list(self.episodic_dir.glob("**/*.md")))
|
|
90
94
|
return episode_count >= self.config.threshold
|
|
91
|
-
|
|
95
|
+
|
|
92
96
|
def get_episode_count(self) -> int:
|
|
93
97
|
"""Get the current number of episodic files."""
|
|
94
98
|
if not self.episodic_dir.exists():
|
|
95
99
|
return 0
|
|
96
|
-
return len(list(self.episodic_dir.glob(
|
|
97
|
-
|
|
100
|
+
return len(list(self.episodic_dir.glob("**/*.md")))
|
|
101
|
+
|
|
98
102
|
def load_episodes(self) -> List[Tuple[Path, str]]:
|
|
99
103
|
"""
|
|
100
104
|
Load all episodic files.
|
|
101
|
-
|
|
105
|
+
|
|
102
106
|
Returns:
|
|
103
107
|
List of (path, content) tuples
|
|
104
108
|
"""
|
|
105
109
|
episodes = []
|
|
106
|
-
|
|
110
|
+
|
|
107
111
|
if not self.episodic_dir.exists():
|
|
108
112
|
return episodes
|
|
109
|
-
|
|
110
|
-
for episode_file in self.episodic_dir.glob(
|
|
113
|
+
|
|
114
|
+
for episode_file in self.episodic_dir.glob("**/*.md"):
|
|
111
115
|
try:
|
|
112
116
|
content = episode_file.read_text()
|
|
113
117
|
episodes.append((episode_file, content))
|
|
114
118
|
except Exception:
|
|
115
119
|
continue
|
|
116
|
-
|
|
120
|
+
|
|
117
121
|
return episodes
|
|
118
|
-
|
|
122
|
+
|
|
119
123
|
def cluster_episodes(self, episodes: List[Tuple[Path, str]]) -> List[EpisodeCluster]:
|
|
120
124
|
"""
|
|
121
125
|
Cluster episodes by topic using keyword analysis.
|
|
122
|
-
|
|
126
|
+
|
|
123
127
|
For more sophisticated clustering, this could use embeddings with k-means.
|
|
124
|
-
|
|
128
|
+
|
|
125
129
|
Args:
|
|
126
130
|
episodes: List of (path, content) tuples
|
|
127
|
-
|
|
131
|
+
|
|
128
132
|
Returns:
|
|
129
133
|
List of EpisodeCluster objects
|
|
130
134
|
"""
|
|
131
135
|
# Simple keyword-based clustering
|
|
132
136
|
keyword_to_episodes: Dict[str, List[Path]] = defaultdict(list)
|
|
133
|
-
|
|
137
|
+
|
|
134
138
|
# Common programming/tech keywords to look for
|
|
135
139
|
keywords = [
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
140
|
+
"python",
|
|
141
|
+
"javascript",
|
|
142
|
+
"typescript",
|
|
143
|
+
"rust",
|
|
144
|
+
"go",
|
|
145
|
+
"error",
|
|
146
|
+
"bug",
|
|
147
|
+
"fix",
|
|
148
|
+
"debug",
|
|
149
|
+
"issue",
|
|
150
|
+
"api",
|
|
151
|
+
"database",
|
|
152
|
+
"server",
|
|
153
|
+
"client",
|
|
154
|
+
"frontend",
|
|
155
|
+
"backend",
|
|
156
|
+
"test",
|
|
157
|
+
"testing",
|
|
158
|
+
"deploy",
|
|
159
|
+
"deployment",
|
|
160
|
+
"config",
|
|
161
|
+
"setup",
|
|
162
|
+
"install",
|
|
163
|
+
"environment",
|
|
164
|
+
"performance",
|
|
165
|
+
"optimization",
|
|
166
|
+
"memory",
|
|
167
|
+
"cache",
|
|
168
|
+
"security",
|
|
169
|
+
"auth",
|
|
170
|
+
"authentication",
|
|
171
|
+
"permission",
|
|
172
|
+
"user",
|
|
173
|
+
"preference",
|
|
174
|
+
"setting",
|
|
175
|
+
"option",
|
|
144
176
|
]
|
|
145
|
-
|
|
177
|
+
|
|
146
178
|
for path, content in episodes:
|
|
147
179
|
content_lower = content.lower()
|
|
148
180
|
found_keywords = []
|
|
149
|
-
|
|
181
|
+
|
|
150
182
|
for keyword in keywords:
|
|
151
183
|
if keyword in content_lower:
|
|
152
184
|
found_keywords.append(keyword)
|
|
153
185
|
keyword_to_episodes[keyword].append(path)
|
|
154
|
-
|
|
186
|
+
|
|
155
187
|
# Create clusters from keywords with enough episodes
|
|
156
188
|
clusters = []
|
|
157
189
|
used_episodes = set()
|
|
158
|
-
|
|
190
|
+
|
|
159
191
|
# Sort by number of episodes (descending)
|
|
160
|
-
sorted_keywords = sorted(
|
|
161
|
-
|
|
162
|
-
key=lambda x: len(x[1]),
|
|
163
|
-
reverse=True
|
|
164
|
-
)
|
|
165
|
-
|
|
192
|
+
sorted_keywords = sorted(keyword_to_episodes.items(), key=lambda x: len(x[1]), reverse=True)
|
|
193
|
+
|
|
166
194
|
for keyword, episode_paths in sorted_keywords:
|
|
167
195
|
if len(clusters) >= self.config.max_clusters:
|
|
168
196
|
break
|
|
169
|
-
|
|
197
|
+
|
|
170
198
|
# Filter out already-used episodes
|
|
171
199
|
unused_paths = [p for p in episode_paths if p not in used_episodes]
|
|
172
|
-
|
|
200
|
+
|
|
173
201
|
if len(unused_paths) >= self.config.min_cluster_size:
|
|
174
|
-
clusters.append(
|
|
175
|
-
topic=keyword,
|
|
176
|
-
|
|
177
|
-
tags=[keyword]
|
|
178
|
-
))
|
|
202
|
+
clusters.append(
|
|
203
|
+
EpisodeCluster(topic=keyword, episodes=unused_paths, tags=[keyword])
|
|
204
|
+
)
|
|
179
205
|
used_episodes.update(unused_paths)
|
|
180
|
-
|
|
206
|
+
|
|
181
207
|
return clusters
|
|
182
|
-
|
|
183
|
-
def cluster_episodes_with_embeddings(
|
|
208
|
+
|
|
209
|
+
def cluster_episodes_with_embeddings(
|
|
210
|
+
self, episodes: List[Tuple[Path, str]]
|
|
211
|
+
) -> List[EpisodeCluster]:
|
|
184
212
|
"""
|
|
185
213
|
Cluster episodes using embeddings and k-means.
|
|
186
|
-
|
|
214
|
+
|
|
187
215
|
Requires scikit-learn and sentence-transformers.
|
|
188
216
|
"""
|
|
189
217
|
try:
|
|
@@ -192,28 +220,28 @@ class Gardener:
|
|
|
192
220
|
except ImportError:
|
|
193
221
|
# Fall back to keyword clustering
|
|
194
222
|
return self.cluster_episodes(episodes)
|
|
195
|
-
|
|
223
|
+
|
|
196
224
|
if len(episodes) < self.config.min_cluster_size:
|
|
197
225
|
return []
|
|
198
|
-
|
|
226
|
+
|
|
199
227
|
# Generate embeddings
|
|
200
|
-
model = SentenceTransformer(
|
|
228
|
+
model = SentenceTransformer("all-MiniLM-L6-v2")
|
|
201
229
|
texts = [content[:2000] for _, content in episodes] # Truncate long texts
|
|
202
230
|
embeddings = model.encode(texts)
|
|
203
|
-
|
|
231
|
+
|
|
204
232
|
# Determine number of clusters
|
|
205
233
|
n_clusters = min(self.config.max_clusters, len(episodes) // self.config.min_cluster_size)
|
|
206
234
|
n_clusters = max(1, n_clusters)
|
|
207
|
-
|
|
235
|
+
|
|
208
236
|
# Cluster
|
|
209
237
|
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
|
|
210
238
|
labels = kmeans.fit_predict(embeddings)
|
|
211
|
-
|
|
239
|
+
|
|
212
240
|
# Group episodes by cluster
|
|
213
241
|
cluster_episodes: Dict[int, List[Tuple[Path, str]]] = defaultdict(list)
|
|
214
242
|
for i, (path, content) in enumerate(episodes):
|
|
215
243
|
cluster_episodes[labels[i]].append((path, content))
|
|
216
|
-
|
|
244
|
+
|
|
217
245
|
# Create cluster objects
|
|
218
246
|
clusters = []
|
|
219
247
|
for cluster_id, eps in cluster_episodes.items():
|
|
@@ -221,31 +249,28 @@ class Gardener:
|
|
|
221
249
|
# Extract topic from first few words of first episode
|
|
222
250
|
first_content = eps[0][1]
|
|
223
251
|
topic = self._extract_topic(first_content)
|
|
224
|
-
|
|
225
|
-
clusters.append(EpisodeCluster(
|
|
226
|
-
|
|
227
|
-
episodes=[p for p, _ in eps]
|
|
228
|
-
))
|
|
229
|
-
|
|
252
|
+
|
|
253
|
+
clusters.append(EpisodeCluster(topic=topic, episodes=[p for p, _ in eps]))
|
|
254
|
+
|
|
230
255
|
return clusters
|
|
231
|
-
|
|
256
|
+
|
|
232
257
|
def _extract_topic(self, content: str) -> str:
|
|
233
258
|
"""Extract a topic label from content."""
|
|
234
259
|
# Take first line or first 50 chars
|
|
235
|
-
lines = content.strip().split(
|
|
260
|
+
lines = content.strip().split("\n")
|
|
236
261
|
first_line = lines[0] if lines else content[:50]
|
|
237
|
-
|
|
262
|
+
|
|
238
263
|
# Clean up
|
|
239
|
-
topic = first_line.strip(
|
|
264
|
+
topic = first_line.strip("#").strip()
|
|
240
265
|
if len(topic) > 50:
|
|
241
|
-
topic = topic[:47] +
|
|
242
|
-
|
|
266
|
+
topic = topic[:47] + "..."
|
|
267
|
+
|
|
243
268
|
return topic or "general"
|
|
244
|
-
|
|
269
|
+
|
|
245
270
|
def generate_summary(self, cluster: EpisodeCluster) -> str:
|
|
246
271
|
"""
|
|
247
272
|
Generate a summary for a cluster of episodes.
|
|
248
|
-
|
|
273
|
+
|
|
249
274
|
Uses LLM if configured, otherwise generates a simple summary.
|
|
250
275
|
"""
|
|
251
276
|
# Collect content from episodes
|
|
@@ -256,40 +281,39 @@ class Gardener:
|
|
|
256
281
|
contents.append(content[:1000]) # Truncate
|
|
257
282
|
except Exception:
|
|
258
283
|
continue
|
|
259
|
-
|
|
260
|
-
combined =
|
|
261
|
-
|
|
262
|
-
# Try LLM summarization
|
|
263
|
-
if self.config.llm_provider
|
|
284
|
+
|
|
285
|
+
combined = "\n---\n".join(contents)
|
|
286
|
+
|
|
287
|
+
# Try LLM summarization (multi-provider)
|
|
288
|
+
if self.config.llm_provider and self.config.llm_model:
|
|
264
289
|
try:
|
|
265
|
-
|
|
290
|
+
from .llm import get_provider
|
|
291
|
+
|
|
292
|
+
config = {
|
|
293
|
+
"llm_provider": self.config.llm_provider,
|
|
294
|
+
"llm_model": self.config.llm_model,
|
|
295
|
+
}
|
|
296
|
+
provider = get_provider(config=config)
|
|
297
|
+
if provider:
|
|
298
|
+
return provider.complete(
|
|
299
|
+
[
|
|
300
|
+
{
|
|
301
|
+
"role": "system",
|
|
302
|
+
"content": "You are a helpful assistant that summarizes conversation logs into actionable insights.",
|
|
303
|
+
},
|
|
304
|
+
{
|
|
305
|
+
"role": "user",
|
|
306
|
+
"content": f"Summarize these conversation logs about '{cluster.topic}' into 2-3 key insights:\n\n{combined[:4000]}",
|
|
307
|
+
},
|
|
308
|
+
],
|
|
309
|
+
max_tokens=500,
|
|
310
|
+
)
|
|
266
311
|
except Exception:
|
|
267
312
|
pass
|
|
268
|
-
|
|
313
|
+
|
|
269
314
|
# Fall back to simple summary
|
|
270
315
|
return self._simple_summary(cluster, contents)
|
|
271
|
-
|
|
272
|
-
def _summarize_with_openai(self, content: str, topic: str) -> str:
|
|
273
|
-
"""Summarize using OpenAI API."""
|
|
274
|
-
import openai
|
|
275
|
-
|
|
276
|
-
response = openai.chat.completions.create(
|
|
277
|
-
model=self.config.llm_model or 'gpt-3.5-turbo',
|
|
278
|
-
messages=[
|
|
279
|
-
{
|
|
280
|
-
'role': 'system',
|
|
281
|
-
'content': 'You are a helpful assistant that summarizes conversation logs into actionable insights.'
|
|
282
|
-
},
|
|
283
|
-
{
|
|
284
|
-
'role': 'user',
|
|
285
|
-
'content': f"Summarize these conversation logs about '{topic}' into 2-3 key insights:\n\n{content[:4000]}"
|
|
286
|
-
}
|
|
287
|
-
],
|
|
288
|
-
max_tokens=500
|
|
289
|
-
)
|
|
290
|
-
|
|
291
|
-
return response.choices[0].message.content
|
|
292
|
-
|
|
316
|
+
|
|
293
317
|
def _simple_summary(self, cluster: EpisodeCluster, contents: List[str]) -> str:
|
|
294
318
|
"""Generate a simple summary without LLM."""
|
|
295
319
|
return f"""# Insights: {cluster.topic.title()}
|
|
@@ -305,81 +329,88 @@ class Gardener:
|
|
|
305
329
|
---
|
|
306
330
|
*This summary was auto-generated by the Gardener. Review and edit as needed.*
|
|
307
331
|
"""
|
|
308
|
-
|
|
332
|
+
|
|
309
333
|
def write_insight(self, cluster: EpisodeCluster) -> Path:
|
|
310
334
|
"""
|
|
311
335
|
Write cluster summary to semantic memory.
|
|
312
|
-
|
|
336
|
+
|
|
313
337
|
Returns:
|
|
314
338
|
Path to the written insight file
|
|
315
339
|
"""
|
|
316
340
|
self.semantic_dir.mkdir(parents=True, exist_ok=True)
|
|
317
|
-
|
|
341
|
+
|
|
318
342
|
# Generate filename (sanitize topic to avoid path traversal)
|
|
319
|
-
timestamp = datetime.utcnow().strftime(
|
|
320
|
-
safe_topic =
|
|
343
|
+
timestamp = datetime.utcnow().strftime("%Y%m%d")
|
|
344
|
+
safe_topic = (
|
|
345
|
+
cluster.topic.replace(" ", "-").lower().replace("/", "_").replace("\\", "_")[:30]
|
|
346
|
+
)
|
|
321
347
|
filename = f"insight-{safe_topic}-{timestamp}.md"
|
|
322
348
|
insight_path = (self.semantic_dir / filename).resolve()
|
|
323
349
|
try:
|
|
324
350
|
insight_path.relative_to(self.repo.current_dir.resolve())
|
|
325
351
|
except ValueError:
|
|
326
352
|
insight_path = self.semantic_dir / f"insight-{timestamp}.md"
|
|
327
|
-
|
|
353
|
+
|
|
328
354
|
# Generate frontmatter
|
|
329
355
|
frontmatter = {
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
356
|
+
"schema_version": "1.0",
|
|
357
|
+
"last_updated": datetime.utcnow().isoformat() + "Z",
|
|
358
|
+
"source_agent_id": "gardener",
|
|
359
|
+
"memory_type": "semantic",
|
|
360
|
+
"tags": cluster.tags + ["auto-generated", "insight"],
|
|
361
|
+
"source_episodes": len(cluster.episodes),
|
|
336
362
|
}
|
|
337
|
-
|
|
363
|
+
|
|
338
364
|
# Write file
|
|
339
365
|
if YAML_AVAILABLE:
|
|
340
366
|
import yaml
|
|
341
|
-
|
|
367
|
+
|
|
368
|
+
content = (
|
|
369
|
+
f"---\n{yaml.dump(frontmatter, default_flow_style=False)}---\n\n{cluster.summary}"
|
|
370
|
+
)
|
|
342
371
|
else:
|
|
343
372
|
content = cluster.summary
|
|
344
|
-
|
|
373
|
+
|
|
345
374
|
insight_path.write_text(content)
|
|
346
375
|
return insight_path
|
|
347
|
-
|
|
376
|
+
|
|
348
377
|
def archive_episodes(self, episodes: List[Path]) -> int:
|
|
349
378
|
"""
|
|
350
379
|
Archive processed episodes.
|
|
351
|
-
|
|
380
|
+
|
|
352
381
|
Moves files to archive directory with timestamp prefix.
|
|
353
|
-
|
|
382
|
+
|
|
354
383
|
Returns:
|
|
355
384
|
Number of files archived
|
|
356
385
|
"""
|
|
357
386
|
self.archive_dir.mkdir(parents=True, exist_ok=True)
|
|
358
|
-
|
|
359
|
-
timestamp = datetime.utcnow().strftime(
|
|
387
|
+
|
|
388
|
+
timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
|
|
360
389
|
archive_subdir = self.archive_dir / timestamp
|
|
361
390
|
archive_subdir.mkdir(exist_ok=True)
|
|
362
|
-
|
|
391
|
+
|
|
363
392
|
count = 0
|
|
364
393
|
for episode_path in episodes:
|
|
365
394
|
try:
|
|
366
|
-
safe_name =
|
|
395
|
+
safe_name = (
|
|
396
|
+
episode_path.name.replace("..", "_").replace("/", "_").replace("\\", "_")
|
|
397
|
+
)
|
|
367
398
|
dest = (archive_subdir / safe_name).resolve()
|
|
368
399
|
dest.relative_to(self.archive_dir.resolve())
|
|
369
400
|
shutil.move(str(episode_path), str(dest))
|
|
370
401
|
count += 1
|
|
371
402
|
except (ValueError, Exception):
|
|
372
403
|
continue
|
|
373
|
-
|
|
404
|
+
|
|
374
405
|
return count
|
|
375
|
-
|
|
406
|
+
|
|
376
407
|
def run(self, force: bool = False) -> GardenerResult:
|
|
377
408
|
"""
|
|
378
409
|
Run the Gardener process.
|
|
379
|
-
|
|
410
|
+
|
|
380
411
|
Args:
|
|
381
412
|
force: Run even if threshold not met
|
|
382
|
-
|
|
413
|
+
|
|
383
414
|
Returns:
|
|
384
415
|
GardenerResult with operation details
|
|
385
416
|
"""
|
|
@@ -389,9 +420,9 @@ class Gardener:
|
|
|
389
420
|
clusters_found=0,
|
|
390
421
|
insights_generated=0,
|
|
391
422
|
episodes_archived=0,
|
|
392
|
-
message=f"Threshold not met ({self.get_episode_count()}/{self.config.threshold} episodes)"
|
|
423
|
+
message=f"Threshold not met ({self.get_episode_count()}/{self.config.threshold} episodes)",
|
|
393
424
|
)
|
|
394
|
-
|
|
425
|
+
|
|
395
426
|
# Load episodes
|
|
396
427
|
episodes = self.load_episodes()
|
|
397
428
|
if not episodes:
|
|
@@ -400,67 +431,67 @@ class Gardener:
|
|
|
400
431
|
clusters_found=0,
|
|
401
432
|
insights_generated=0,
|
|
402
433
|
episodes_archived=0,
|
|
403
|
-
message="No episodes to process"
|
|
434
|
+
message="No episodes to process",
|
|
404
435
|
)
|
|
405
|
-
|
|
436
|
+
|
|
406
437
|
# Cluster episodes
|
|
407
438
|
try:
|
|
408
439
|
clusters = self.cluster_episodes_with_embeddings(episodes)
|
|
409
440
|
except Exception:
|
|
410
441
|
clusters = self.cluster_episodes(episodes)
|
|
411
|
-
|
|
442
|
+
|
|
412
443
|
if not clusters:
|
|
413
444
|
return GardenerResult(
|
|
414
445
|
success=True,
|
|
415
446
|
clusters_found=0,
|
|
416
447
|
insights_generated=0,
|
|
417
448
|
episodes_archived=0,
|
|
418
|
-
message="No clusters could be formed"
|
|
449
|
+
message="No clusters could be formed",
|
|
419
450
|
)
|
|
420
|
-
|
|
451
|
+
|
|
421
452
|
# Generate summaries and write insights
|
|
422
453
|
insights_written = 0
|
|
423
454
|
all_archived_episodes = []
|
|
424
|
-
|
|
455
|
+
|
|
425
456
|
for cluster in clusters:
|
|
426
457
|
try:
|
|
427
458
|
# Generate summary
|
|
428
459
|
cluster.summary = self.generate_summary(cluster)
|
|
429
|
-
|
|
460
|
+
|
|
430
461
|
# Write insight
|
|
431
462
|
self.write_insight(cluster)
|
|
432
463
|
insights_written += 1
|
|
433
|
-
|
|
464
|
+
|
|
434
465
|
# Track episodes to archive
|
|
435
466
|
all_archived_episodes.extend(cluster.episodes)
|
|
436
467
|
except Exception as e:
|
|
437
468
|
print(f"Warning: Failed to process cluster '{cluster.topic}': {e}")
|
|
438
|
-
|
|
469
|
+
|
|
439
470
|
# Archive processed episodes
|
|
440
471
|
archived_count = self.archive_episodes(all_archived_episodes)
|
|
441
|
-
|
|
472
|
+
|
|
442
473
|
# Auto-commit if configured
|
|
443
474
|
commit_hash = None
|
|
444
475
|
if self.config.auto_commit and insights_written > 0:
|
|
445
476
|
try:
|
|
446
477
|
# Stage new insights
|
|
447
|
-
for insight_file in self.semantic_dir.glob(
|
|
448
|
-
rel_path = str(insight_file.relative_to(self.repo.root /
|
|
478
|
+
for insight_file in self.semantic_dir.glob("insight-*.md"):
|
|
479
|
+
rel_path = str(insight_file.relative_to(self.repo.root / "current"))
|
|
449
480
|
self.repo.stage_file(f"current/{rel_path}")
|
|
450
|
-
|
|
481
|
+
|
|
451
482
|
# Commit
|
|
452
483
|
commit_hash = self.repo.commit(
|
|
453
484
|
f"gardener: synthesized {insights_written} insights from {archived_count} episodes",
|
|
454
|
-
{
|
|
485
|
+
{"gardener": True, "clusters": len(clusters)},
|
|
455
486
|
)
|
|
456
487
|
except Exception as e:
|
|
457
488
|
print(f"Warning: Auto-commit failed: {e}")
|
|
458
|
-
|
|
489
|
+
|
|
459
490
|
return GardenerResult(
|
|
460
491
|
success=True,
|
|
461
492
|
clusters_found=len(clusters),
|
|
462
493
|
insights_generated=insights_written,
|
|
463
494
|
episodes_archived=archived_count,
|
|
464
495
|
commit_hash=commit_hash,
|
|
465
|
-
message=f"Processed {len(clusters)} clusters, generated {insights_written} insights"
|
|
496
|
+
message=f"Processed {len(clusters)} clusters, generated {insights_written} insights",
|
|
466
497
|
)
|