agmem 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/METADATA +20 -3
- agmem-0.1.2.dist-info/RECORD +86 -0
- memvcs/__init__.py +1 -1
- memvcs/cli.py +35 -31
- memvcs/commands/__init__.py +9 -9
- memvcs/commands/add.py +77 -76
- memvcs/commands/blame.py +46 -53
- memvcs/commands/branch.py +13 -33
- memvcs/commands/checkout.py +27 -32
- memvcs/commands/clean.py +18 -23
- memvcs/commands/clone.py +4 -1
- memvcs/commands/commit.py +40 -39
- memvcs/commands/daemon.py +81 -76
- memvcs/commands/decay.py +77 -0
- memvcs/commands/diff.py +56 -57
- memvcs/commands/distill.py +74 -0
- memvcs/commands/fsck.py +55 -61
- memvcs/commands/garden.py +28 -37
- memvcs/commands/graph.py +41 -48
- memvcs/commands/init.py +16 -24
- memvcs/commands/log.py +25 -40
- memvcs/commands/merge.py +16 -28
- memvcs/commands/pack.py +129 -0
- memvcs/commands/pull.py +4 -1
- memvcs/commands/push.py +4 -2
- memvcs/commands/recall.py +145 -0
- memvcs/commands/reflog.py +13 -22
- memvcs/commands/remote.py +1 -0
- memvcs/commands/repair.py +66 -0
- memvcs/commands/reset.py +23 -33
- memvcs/commands/resurrect.py +82 -0
- memvcs/commands/search.py +3 -4
- memvcs/commands/serve.py +2 -1
- memvcs/commands/show.py +66 -36
- memvcs/commands/stash.py +34 -34
- memvcs/commands/status.py +27 -35
- memvcs/commands/tag.py +23 -47
- memvcs/commands/test.py +30 -44
- memvcs/commands/timeline.py +111 -0
- memvcs/commands/tree.py +26 -27
- memvcs/commands/verify.py +59 -0
- memvcs/commands/when.py +115 -0
- memvcs/core/access_index.py +167 -0
- memvcs/core/config_loader.py +3 -1
- memvcs/core/consistency.py +214 -0
- memvcs/core/decay.py +185 -0
- memvcs/core/diff.py +158 -143
- memvcs/core/distiller.py +277 -0
- memvcs/core/gardener.py +164 -132
- memvcs/core/hooks.py +48 -14
- memvcs/core/knowledge_graph.py +134 -138
- memvcs/core/merge.py +248 -171
- memvcs/core/objects.py +95 -96
- memvcs/core/pii_scanner.py +147 -146
- memvcs/core/refs.py +132 -115
- memvcs/core/repository.py +174 -164
- memvcs/core/schema.py +155 -113
- memvcs/core/staging.py +60 -65
- memvcs/core/storage/__init__.py +20 -18
- memvcs/core/storage/base.py +74 -70
- memvcs/core/storage/gcs.py +70 -68
- memvcs/core/storage/local.py +42 -40
- memvcs/core/storage/s3.py +105 -110
- memvcs/core/temporal_index.py +112 -0
- memvcs/core/test_runner.py +101 -93
- memvcs/core/vector_store.py +41 -35
- memvcs/integrations/mcp_server.py +1 -3
- memvcs/integrations/web_ui/server.py +25 -26
- memvcs/retrieval/__init__.py +22 -0
- memvcs/retrieval/base.py +54 -0
- memvcs/retrieval/pack.py +128 -0
- memvcs/retrieval/recaller.py +105 -0
- memvcs/retrieval/strategies.py +314 -0
- memvcs/utils/__init__.py +3 -3
- memvcs/utils/helpers.py +52 -52
- agmem-0.1.1.dist-info/RECORD +0 -67
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/WHEEL +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/top_level.txt +0 -0
memvcs/core/gardener.py
CHANGED
|
@@ -16,6 +16,7 @@ from collections import defaultdict
|
|
|
16
16
|
|
|
17
17
|
try:
|
|
18
18
|
import yaml
|
|
19
|
+
|
|
19
20
|
YAML_AVAILABLE = True
|
|
20
21
|
except ImportError:
|
|
21
22
|
YAML_AVAILABLE = False
|
|
@@ -24,6 +25,7 @@ except ImportError:
|
|
|
24
25
|
@dataclass
|
|
25
26
|
class EpisodeCluster:
|
|
26
27
|
"""A cluster of related episodes."""
|
|
28
|
+
|
|
27
29
|
topic: str
|
|
28
30
|
episodes: List[Path]
|
|
29
31
|
summary: Optional[str] = None
|
|
@@ -33,6 +35,7 @@ class EpisodeCluster:
|
|
|
33
35
|
@dataclass
|
|
34
36
|
class GardenerConfig:
|
|
35
37
|
"""Configuration for the Gardener."""
|
|
38
|
+
|
|
36
39
|
threshold: int = 50 # Number of episodic files before triggering
|
|
37
40
|
archive_dir: str = "archive"
|
|
38
41
|
min_cluster_size: int = 3
|
|
@@ -45,6 +48,7 @@ class GardenerConfig:
|
|
|
45
48
|
@dataclass
|
|
46
49
|
class GardenerResult:
|
|
47
50
|
"""Result of a gardener run."""
|
|
51
|
+
|
|
48
52
|
success: bool
|
|
49
53
|
clusters_found: int
|
|
50
54
|
insights_generated: int
|
|
@@ -56,134 +60,158 @@ class GardenerResult:
|
|
|
56
60
|
class Gardener:
|
|
57
61
|
"""
|
|
58
62
|
The Gardener agent that refines memory over time.
|
|
59
|
-
|
|
63
|
+
|
|
60
64
|
Wakes up when episodic/ files exceed a threshold, clusters them by topic,
|
|
61
65
|
generates summaries, and archives the raw episodes.
|
|
62
66
|
"""
|
|
63
|
-
|
|
67
|
+
|
|
64
68
|
def __init__(self, repo, config: Optional[GardenerConfig] = None):
|
|
65
69
|
"""
|
|
66
70
|
Initialize the Gardener.
|
|
67
|
-
|
|
71
|
+
|
|
68
72
|
Args:
|
|
69
73
|
repo: Repository instance
|
|
70
74
|
config: Optional configuration
|
|
71
75
|
"""
|
|
72
76
|
self.repo = repo
|
|
73
77
|
self.config = config or GardenerConfig()
|
|
74
|
-
self.episodic_dir = repo.root /
|
|
75
|
-
self.semantic_dir = repo.root /
|
|
78
|
+
self.episodic_dir = repo.root / "current" / "episodic"
|
|
79
|
+
self.semantic_dir = repo.root / "current" / "semantic"
|
|
76
80
|
# Ensure archive_dir stays under current/ (path safety)
|
|
77
81
|
try:
|
|
78
82
|
archive_candidate = (repo.current_dir / self.config.archive_dir).resolve()
|
|
79
83
|
archive_candidate.relative_to(repo.current_dir.resolve())
|
|
80
84
|
self.archive_dir = archive_candidate
|
|
81
85
|
except (ValueError, RuntimeError):
|
|
82
|
-
self.archive_dir = repo.current_dir /
|
|
83
|
-
|
|
86
|
+
self.archive_dir = repo.current_dir / "archive"
|
|
87
|
+
|
|
84
88
|
def should_run(self) -> bool:
|
|
85
89
|
"""Check if the Gardener should run based on threshold."""
|
|
86
90
|
if not self.episodic_dir.exists():
|
|
87
91
|
return False
|
|
88
|
-
|
|
89
|
-
episode_count = len(list(self.episodic_dir.glob(
|
|
92
|
+
|
|
93
|
+
episode_count = len(list(self.episodic_dir.glob("**/*.md")))
|
|
90
94
|
return episode_count >= self.config.threshold
|
|
91
|
-
|
|
95
|
+
|
|
92
96
|
def get_episode_count(self) -> int:
|
|
93
97
|
"""Get the current number of episodic files."""
|
|
94
98
|
if not self.episodic_dir.exists():
|
|
95
99
|
return 0
|
|
96
|
-
return len(list(self.episodic_dir.glob(
|
|
97
|
-
|
|
100
|
+
return len(list(self.episodic_dir.glob("**/*.md")))
|
|
101
|
+
|
|
98
102
|
def load_episodes(self) -> List[Tuple[Path, str]]:
|
|
99
103
|
"""
|
|
100
104
|
Load all episodic files.
|
|
101
|
-
|
|
105
|
+
|
|
102
106
|
Returns:
|
|
103
107
|
List of (path, content) tuples
|
|
104
108
|
"""
|
|
105
109
|
episodes = []
|
|
106
|
-
|
|
110
|
+
|
|
107
111
|
if not self.episodic_dir.exists():
|
|
108
112
|
return episodes
|
|
109
|
-
|
|
110
|
-
for episode_file in self.episodic_dir.glob(
|
|
113
|
+
|
|
114
|
+
for episode_file in self.episodic_dir.glob("**/*.md"):
|
|
111
115
|
try:
|
|
112
116
|
content = episode_file.read_text()
|
|
113
117
|
episodes.append((episode_file, content))
|
|
114
118
|
except Exception:
|
|
115
119
|
continue
|
|
116
|
-
|
|
120
|
+
|
|
117
121
|
return episodes
|
|
118
|
-
|
|
122
|
+
|
|
119
123
|
def cluster_episodes(self, episodes: List[Tuple[Path, str]]) -> List[EpisodeCluster]:
|
|
120
124
|
"""
|
|
121
125
|
Cluster episodes by topic using keyword analysis.
|
|
122
|
-
|
|
126
|
+
|
|
123
127
|
For more sophisticated clustering, this could use embeddings with k-means.
|
|
124
|
-
|
|
128
|
+
|
|
125
129
|
Args:
|
|
126
130
|
episodes: List of (path, content) tuples
|
|
127
|
-
|
|
131
|
+
|
|
128
132
|
Returns:
|
|
129
133
|
List of EpisodeCluster objects
|
|
130
134
|
"""
|
|
131
135
|
# Simple keyword-based clustering
|
|
132
136
|
keyword_to_episodes: Dict[str, List[Path]] = defaultdict(list)
|
|
133
|
-
|
|
137
|
+
|
|
134
138
|
# Common programming/tech keywords to look for
|
|
135
139
|
keywords = [
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
140
|
+
"python",
|
|
141
|
+
"javascript",
|
|
142
|
+
"typescript",
|
|
143
|
+
"rust",
|
|
144
|
+
"go",
|
|
145
|
+
"error",
|
|
146
|
+
"bug",
|
|
147
|
+
"fix",
|
|
148
|
+
"debug",
|
|
149
|
+
"issue",
|
|
150
|
+
"api",
|
|
151
|
+
"database",
|
|
152
|
+
"server",
|
|
153
|
+
"client",
|
|
154
|
+
"frontend",
|
|
155
|
+
"backend",
|
|
156
|
+
"test",
|
|
157
|
+
"testing",
|
|
158
|
+
"deploy",
|
|
159
|
+
"deployment",
|
|
160
|
+
"config",
|
|
161
|
+
"setup",
|
|
162
|
+
"install",
|
|
163
|
+
"environment",
|
|
164
|
+
"performance",
|
|
165
|
+
"optimization",
|
|
166
|
+
"memory",
|
|
167
|
+
"cache",
|
|
168
|
+
"security",
|
|
169
|
+
"auth",
|
|
170
|
+
"authentication",
|
|
171
|
+
"permission",
|
|
172
|
+
"user",
|
|
173
|
+
"preference",
|
|
174
|
+
"setting",
|
|
175
|
+
"option",
|
|
144
176
|
]
|
|
145
|
-
|
|
177
|
+
|
|
146
178
|
for path, content in episodes:
|
|
147
179
|
content_lower = content.lower()
|
|
148
180
|
found_keywords = []
|
|
149
|
-
|
|
181
|
+
|
|
150
182
|
for keyword in keywords:
|
|
151
183
|
if keyword in content_lower:
|
|
152
184
|
found_keywords.append(keyword)
|
|
153
185
|
keyword_to_episodes[keyword].append(path)
|
|
154
|
-
|
|
186
|
+
|
|
155
187
|
# Create clusters from keywords with enough episodes
|
|
156
188
|
clusters = []
|
|
157
189
|
used_episodes = set()
|
|
158
|
-
|
|
190
|
+
|
|
159
191
|
# Sort by number of episodes (descending)
|
|
160
|
-
sorted_keywords = sorted(
|
|
161
|
-
|
|
162
|
-
key=lambda x: len(x[1]),
|
|
163
|
-
reverse=True
|
|
164
|
-
)
|
|
165
|
-
|
|
192
|
+
sorted_keywords = sorted(keyword_to_episodes.items(), key=lambda x: len(x[1]), reverse=True)
|
|
193
|
+
|
|
166
194
|
for keyword, episode_paths in sorted_keywords:
|
|
167
195
|
if len(clusters) >= self.config.max_clusters:
|
|
168
196
|
break
|
|
169
|
-
|
|
197
|
+
|
|
170
198
|
# Filter out already-used episodes
|
|
171
199
|
unused_paths = [p for p in episode_paths if p not in used_episodes]
|
|
172
|
-
|
|
200
|
+
|
|
173
201
|
if len(unused_paths) >= self.config.min_cluster_size:
|
|
174
|
-
clusters.append(
|
|
175
|
-
topic=keyword,
|
|
176
|
-
|
|
177
|
-
tags=[keyword]
|
|
178
|
-
))
|
|
202
|
+
clusters.append(
|
|
203
|
+
EpisodeCluster(topic=keyword, episodes=unused_paths, tags=[keyword])
|
|
204
|
+
)
|
|
179
205
|
used_episodes.update(unused_paths)
|
|
180
|
-
|
|
206
|
+
|
|
181
207
|
return clusters
|
|
182
|
-
|
|
183
|
-
def cluster_episodes_with_embeddings(
|
|
208
|
+
|
|
209
|
+
def cluster_episodes_with_embeddings(
|
|
210
|
+
self, episodes: List[Tuple[Path, str]]
|
|
211
|
+
) -> List[EpisodeCluster]:
|
|
184
212
|
"""
|
|
185
213
|
Cluster episodes using embeddings and k-means.
|
|
186
|
-
|
|
214
|
+
|
|
187
215
|
Requires scikit-learn and sentence-transformers.
|
|
188
216
|
"""
|
|
189
217
|
try:
|
|
@@ -192,28 +220,28 @@ class Gardener:
|
|
|
192
220
|
except ImportError:
|
|
193
221
|
# Fall back to keyword clustering
|
|
194
222
|
return self.cluster_episodes(episodes)
|
|
195
|
-
|
|
223
|
+
|
|
196
224
|
if len(episodes) < self.config.min_cluster_size:
|
|
197
225
|
return []
|
|
198
|
-
|
|
226
|
+
|
|
199
227
|
# Generate embeddings
|
|
200
|
-
model = SentenceTransformer(
|
|
228
|
+
model = SentenceTransformer("all-MiniLM-L6-v2")
|
|
201
229
|
texts = [content[:2000] for _, content in episodes] # Truncate long texts
|
|
202
230
|
embeddings = model.encode(texts)
|
|
203
|
-
|
|
231
|
+
|
|
204
232
|
# Determine number of clusters
|
|
205
233
|
n_clusters = min(self.config.max_clusters, len(episodes) // self.config.min_cluster_size)
|
|
206
234
|
n_clusters = max(1, n_clusters)
|
|
207
|
-
|
|
235
|
+
|
|
208
236
|
# Cluster
|
|
209
237
|
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
|
|
210
238
|
labels = kmeans.fit_predict(embeddings)
|
|
211
|
-
|
|
239
|
+
|
|
212
240
|
# Group episodes by cluster
|
|
213
241
|
cluster_episodes: Dict[int, List[Tuple[Path, str]]] = defaultdict(list)
|
|
214
242
|
for i, (path, content) in enumerate(episodes):
|
|
215
243
|
cluster_episodes[labels[i]].append((path, content))
|
|
216
|
-
|
|
244
|
+
|
|
217
245
|
# Create cluster objects
|
|
218
246
|
clusters = []
|
|
219
247
|
for cluster_id, eps in cluster_episodes.items():
|
|
@@ -221,31 +249,28 @@ class Gardener:
|
|
|
221
249
|
# Extract topic from first few words of first episode
|
|
222
250
|
first_content = eps[0][1]
|
|
223
251
|
topic = self._extract_topic(first_content)
|
|
224
|
-
|
|
225
|
-
clusters.append(EpisodeCluster(
|
|
226
|
-
|
|
227
|
-
episodes=[p for p, _ in eps]
|
|
228
|
-
))
|
|
229
|
-
|
|
252
|
+
|
|
253
|
+
clusters.append(EpisodeCluster(topic=topic, episodes=[p for p, _ in eps]))
|
|
254
|
+
|
|
230
255
|
return clusters
|
|
231
|
-
|
|
256
|
+
|
|
232
257
|
def _extract_topic(self, content: str) -> str:
|
|
233
258
|
"""Extract a topic label from content."""
|
|
234
259
|
# Take first line or first 50 chars
|
|
235
|
-
lines = content.strip().split(
|
|
260
|
+
lines = content.strip().split("\n")
|
|
236
261
|
first_line = lines[0] if lines else content[:50]
|
|
237
|
-
|
|
262
|
+
|
|
238
263
|
# Clean up
|
|
239
|
-
topic = first_line.strip(
|
|
264
|
+
topic = first_line.strip("#").strip()
|
|
240
265
|
if len(topic) > 50:
|
|
241
|
-
topic = topic[:47] +
|
|
242
|
-
|
|
266
|
+
topic = topic[:47] + "..."
|
|
267
|
+
|
|
243
268
|
return topic or "general"
|
|
244
|
-
|
|
269
|
+
|
|
245
270
|
def generate_summary(self, cluster: EpisodeCluster) -> str:
|
|
246
271
|
"""
|
|
247
272
|
Generate a summary for a cluster of episodes.
|
|
248
|
-
|
|
273
|
+
|
|
249
274
|
Uses LLM if configured, otherwise generates a simple summary.
|
|
250
275
|
"""
|
|
251
276
|
# Collect content from episodes
|
|
@@ -256,40 +281,40 @@ class Gardener:
|
|
|
256
281
|
contents.append(content[:1000]) # Truncate
|
|
257
282
|
except Exception:
|
|
258
283
|
continue
|
|
259
|
-
|
|
260
|
-
combined =
|
|
261
|
-
|
|
284
|
+
|
|
285
|
+
combined = "\n---\n".join(contents)
|
|
286
|
+
|
|
262
287
|
# Try LLM summarization
|
|
263
|
-
if self.config.llm_provider ==
|
|
288
|
+
if self.config.llm_provider == "openai" and self.config.llm_model:
|
|
264
289
|
try:
|
|
265
290
|
return self._summarize_with_openai(combined, cluster.topic)
|
|
266
291
|
except Exception:
|
|
267
292
|
pass
|
|
268
|
-
|
|
293
|
+
|
|
269
294
|
# Fall back to simple summary
|
|
270
295
|
return self._simple_summary(cluster, contents)
|
|
271
|
-
|
|
296
|
+
|
|
272
297
|
def _summarize_with_openai(self, content: str, topic: str) -> str:
|
|
273
298
|
"""Summarize using OpenAI API."""
|
|
274
299
|
import openai
|
|
275
|
-
|
|
300
|
+
|
|
276
301
|
response = openai.chat.completions.create(
|
|
277
|
-
model=self.config.llm_model or
|
|
302
|
+
model=self.config.llm_model or "gpt-3.5-turbo",
|
|
278
303
|
messages=[
|
|
279
304
|
{
|
|
280
|
-
|
|
281
|
-
|
|
305
|
+
"role": "system",
|
|
306
|
+
"content": "You are a helpful assistant that summarizes conversation logs into actionable insights.",
|
|
282
307
|
},
|
|
283
308
|
{
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
}
|
|
309
|
+
"role": "user",
|
|
310
|
+
"content": f"Summarize these conversation logs about '{topic}' into 2-3 key insights:\n\n{content[:4000]}",
|
|
311
|
+
},
|
|
287
312
|
],
|
|
288
|
-
max_tokens=500
|
|
313
|
+
max_tokens=500,
|
|
289
314
|
)
|
|
290
|
-
|
|
315
|
+
|
|
291
316
|
return response.choices[0].message.content
|
|
292
|
-
|
|
317
|
+
|
|
293
318
|
def _simple_summary(self, cluster: EpisodeCluster, contents: List[str]) -> str:
|
|
294
319
|
"""Generate a simple summary without LLM."""
|
|
295
320
|
return f"""# Insights: {cluster.topic.title()}
|
|
@@ -305,81 +330,88 @@ class Gardener:
|
|
|
305
330
|
---
|
|
306
331
|
*This summary was auto-generated by the Gardener. Review and edit as needed.*
|
|
307
332
|
"""
|
|
308
|
-
|
|
333
|
+
|
|
309
334
|
def write_insight(self, cluster: EpisodeCluster) -> Path:
|
|
310
335
|
"""
|
|
311
336
|
Write cluster summary to semantic memory.
|
|
312
|
-
|
|
337
|
+
|
|
313
338
|
Returns:
|
|
314
339
|
Path to the written insight file
|
|
315
340
|
"""
|
|
316
341
|
self.semantic_dir.mkdir(parents=True, exist_ok=True)
|
|
317
|
-
|
|
342
|
+
|
|
318
343
|
# Generate filename (sanitize topic to avoid path traversal)
|
|
319
|
-
timestamp = datetime.utcnow().strftime(
|
|
320
|
-
safe_topic =
|
|
344
|
+
timestamp = datetime.utcnow().strftime("%Y%m%d")
|
|
345
|
+
safe_topic = (
|
|
346
|
+
cluster.topic.replace(" ", "-").lower().replace("/", "_").replace("\\", "_")[:30]
|
|
347
|
+
)
|
|
321
348
|
filename = f"insight-{safe_topic}-{timestamp}.md"
|
|
322
349
|
insight_path = (self.semantic_dir / filename).resolve()
|
|
323
350
|
try:
|
|
324
351
|
insight_path.relative_to(self.repo.current_dir.resolve())
|
|
325
352
|
except ValueError:
|
|
326
353
|
insight_path = self.semantic_dir / f"insight-{timestamp}.md"
|
|
327
|
-
|
|
354
|
+
|
|
328
355
|
# Generate frontmatter
|
|
329
356
|
frontmatter = {
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
357
|
+
"schema_version": "1.0",
|
|
358
|
+
"last_updated": datetime.utcnow().isoformat() + "Z",
|
|
359
|
+
"source_agent_id": "gardener",
|
|
360
|
+
"memory_type": "semantic",
|
|
361
|
+
"tags": cluster.tags + ["auto-generated", "insight"],
|
|
362
|
+
"source_episodes": len(cluster.episodes),
|
|
336
363
|
}
|
|
337
|
-
|
|
364
|
+
|
|
338
365
|
# Write file
|
|
339
366
|
if YAML_AVAILABLE:
|
|
340
367
|
import yaml
|
|
341
|
-
|
|
368
|
+
|
|
369
|
+
content = (
|
|
370
|
+
f"---\n{yaml.dump(frontmatter, default_flow_style=False)}---\n\n{cluster.summary}"
|
|
371
|
+
)
|
|
342
372
|
else:
|
|
343
373
|
content = cluster.summary
|
|
344
|
-
|
|
374
|
+
|
|
345
375
|
insight_path.write_text(content)
|
|
346
376
|
return insight_path
|
|
347
|
-
|
|
377
|
+
|
|
348
378
|
def archive_episodes(self, episodes: List[Path]) -> int:
|
|
349
379
|
"""
|
|
350
380
|
Archive processed episodes.
|
|
351
|
-
|
|
381
|
+
|
|
352
382
|
Moves files to archive directory with timestamp prefix.
|
|
353
|
-
|
|
383
|
+
|
|
354
384
|
Returns:
|
|
355
385
|
Number of files archived
|
|
356
386
|
"""
|
|
357
387
|
self.archive_dir.mkdir(parents=True, exist_ok=True)
|
|
358
|
-
|
|
359
|
-
timestamp = datetime.utcnow().strftime(
|
|
388
|
+
|
|
389
|
+
timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
|
|
360
390
|
archive_subdir = self.archive_dir / timestamp
|
|
361
391
|
archive_subdir.mkdir(exist_ok=True)
|
|
362
|
-
|
|
392
|
+
|
|
363
393
|
count = 0
|
|
364
394
|
for episode_path in episodes:
|
|
365
395
|
try:
|
|
366
|
-
safe_name =
|
|
396
|
+
safe_name = (
|
|
397
|
+
episode_path.name.replace("..", "_").replace("/", "_").replace("\\", "_")
|
|
398
|
+
)
|
|
367
399
|
dest = (archive_subdir / safe_name).resolve()
|
|
368
400
|
dest.relative_to(self.archive_dir.resolve())
|
|
369
401
|
shutil.move(str(episode_path), str(dest))
|
|
370
402
|
count += 1
|
|
371
403
|
except (ValueError, Exception):
|
|
372
404
|
continue
|
|
373
|
-
|
|
405
|
+
|
|
374
406
|
return count
|
|
375
|
-
|
|
407
|
+
|
|
376
408
|
def run(self, force: bool = False) -> GardenerResult:
|
|
377
409
|
"""
|
|
378
410
|
Run the Gardener process.
|
|
379
|
-
|
|
411
|
+
|
|
380
412
|
Args:
|
|
381
413
|
force: Run even if threshold not met
|
|
382
|
-
|
|
414
|
+
|
|
383
415
|
Returns:
|
|
384
416
|
GardenerResult with operation details
|
|
385
417
|
"""
|
|
@@ -389,9 +421,9 @@ class Gardener:
|
|
|
389
421
|
clusters_found=0,
|
|
390
422
|
insights_generated=0,
|
|
391
423
|
episodes_archived=0,
|
|
392
|
-
message=f"Threshold not met ({self.get_episode_count()}/{self.config.threshold} episodes)"
|
|
424
|
+
message=f"Threshold not met ({self.get_episode_count()}/{self.config.threshold} episodes)",
|
|
393
425
|
)
|
|
394
|
-
|
|
426
|
+
|
|
395
427
|
# Load episodes
|
|
396
428
|
episodes = self.load_episodes()
|
|
397
429
|
if not episodes:
|
|
@@ -400,67 +432,67 @@ class Gardener:
|
|
|
400
432
|
clusters_found=0,
|
|
401
433
|
insights_generated=0,
|
|
402
434
|
episodes_archived=0,
|
|
403
|
-
message="No episodes to process"
|
|
435
|
+
message="No episodes to process",
|
|
404
436
|
)
|
|
405
|
-
|
|
437
|
+
|
|
406
438
|
# Cluster episodes
|
|
407
439
|
try:
|
|
408
440
|
clusters = self.cluster_episodes_with_embeddings(episodes)
|
|
409
441
|
except Exception:
|
|
410
442
|
clusters = self.cluster_episodes(episodes)
|
|
411
|
-
|
|
443
|
+
|
|
412
444
|
if not clusters:
|
|
413
445
|
return GardenerResult(
|
|
414
446
|
success=True,
|
|
415
447
|
clusters_found=0,
|
|
416
448
|
insights_generated=0,
|
|
417
449
|
episodes_archived=0,
|
|
418
|
-
message="No clusters could be formed"
|
|
450
|
+
message="No clusters could be formed",
|
|
419
451
|
)
|
|
420
|
-
|
|
452
|
+
|
|
421
453
|
# Generate summaries and write insights
|
|
422
454
|
insights_written = 0
|
|
423
455
|
all_archived_episodes = []
|
|
424
|
-
|
|
456
|
+
|
|
425
457
|
for cluster in clusters:
|
|
426
458
|
try:
|
|
427
459
|
# Generate summary
|
|
428
460
|
cluster.summary = self.generate_summary(cluster)
|
|
429
|
-
|
|
461
|
+
|
|
430
462
|
# Write insight
|
|
431
463
|
self.write_insight(cluster)
|
|
432
464
|
insights_written += 1
|
|
433
|
-
|
|
465
|
+
|
|
434
466
|
# Track episodes to archive
|
|
435
467
|
all_archived_episodes.extend(cluster.episodes)
|
|
436
468
|
except Exception as e:
|
|
437
469
|
print(f"Warning: Failed to process cluster '{cluster.topic}': {e}")
|
|
438
|
-
|
|
470
|
+
|
|
439
471
|
# Archive processed episodes
|
|
440
472
|
archived_count = self.archive_episodes(all_archived_episodes)
|
|
441
|
-
|
|
473
|
+
|
|
442
474
|
# Auto-commit if configured
|
|
443
475
|
commit_hash = None
|
|
444
476
|
if self.config.auto_commit and insights_written > 0:
|
|
445
477
|
try:
|
|
446
478
|
# Stage new insights
|
|
447
|
-
for insight_file in self.semantic_dir.glob(
|
|
448
|
-
rel_path = str(insight_file.relative_to(self.repo.root /
|
|
479
|
+
for insight_file in self.semantic_dir.glob("insight-*.md"):
|
|
480
|
+
rel_path = str(insight_file.relative_to(self.repo.root / "current"))
|
|
449
481
|
self.repo.stage_file(f"current/{rel_path}")
|
|
450
|
-
|
|
482
|
+
|
|
451
483
|
# Commit
|
|
452
484
|
commit_hash = self.repo.commit(
|
|
453
485
|
f"gardener: synthesized {insights_written} insights from {archived_count} episodes",
|
|
454
|
-
{
|
|
486
|
+
{"gardener": True, "clusters": len(clusters)},
|
|
455
487
|
)
|
|
456
488
|
except Exception as e:
|
|
457
489
|
print(f"Warning: Auto-commit failed: {e}")
|
|
458
|
-
|
|
490
|
+
|
|
459
491
|
return GardenerResult(
|
|
460
492
|
success=True,
|
|
461
493
|
clusters_found=len(clusters),
|
|
462
494
|
insights_generated=insights_written,
|
|
463
495
|
episodes_archived=archived_count,
|
|
464
496
|
commit_hash=commit_hash,
|
|
465
|
-
message=f"Processed {len(clusters)} clusters, generated {insights_written} insights"
|
|
497
|
+
message=f"Processed {len(clusters)} clusters, generated {insights_written} insights",
|
|
466
498
|
)
|