agmem 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,452 @@
1
+ """
2
+ Health monitoring for agmem daemon.
3
+
4
+ Periodic checks for repository health:
5
+ - Storage metrics (size, growth rate)
6
+ - Semantic redundancy detection
7
+ - Stale memory detection
8
+ - Knowledge graph consistency
9
+ """
10
+
11
+ import hashlib
12
+ import json
13
+ from dataclasses import dataclass
14
+ from datetime import datetime, timezone, timedelta
15
+ from pathlib import Path
16
+ from typing import Dict, List, Optional, Any, Tuple
17
+
18
+
19
+ @dataclass
20
+ class StorageMetrics:
21
+ """Repository storage metrics."""
22
+
23
+ total_size_bytes: int
24
+ objects_size_bytes: int
25
+ pack_size_bytes: int
26
+ loose_objects_count: int
27
+ packed_objects_count: int
28
+ growth_rate_per_hour: float # bytes per hour
29
+ warning: Optional[str] = None
30
+
31
+
32
+ @dataclass
33
+ class RedundancyReport:
34
+ """Semantic redundancy analysis."""
35
+
36
+ total_files: int
37
+ total_size_bytes: int
38
+ duplicate_hashes: Dict[str, List[str]] # hash -> [file paths]
39
+ redundancy_percentage: float
40
+ similar_files: List[Tuple[str, str, float]] # (file1, file2, similarity 0-1)
41
+ warning: Optional[str] = None
42
+
43
+
44
+ @dataclass
45
+ class StaleMemoryReport:
46
+ """Stale/unused memory detection."""
47
+
48
+ total_files: int
49
+ stale_files: List[Dict[str, Any]] # {path, days_unaccessed, size_bytes}
50
+ stale_percentage: float
51
+ warning: Optional[str] = None
52
+
53
+
54
+ @dataclass
55
+ class GraphConsistencyReport:
56
+ """Knowledge graph integrity check."""
57
+
58
+ total_nodes: int
59
+ total_edges: int
60
+ orphaned_nodes: List[str]
61
+ dangling_edges: List[Tuple[str, str]] # (source, target)
62
+ contradictions: List[Dict[str, Any]]
63
+ warning: Optional[str] = None
64
+
65
+
66
+ class StorageMonitor:
67
+ """Monitor repository storage growth and usage."""
68
+
69
+ def __init__(self, mem_dir: Path):
70
+ self.mem_dir = mem_dir
71
+ self.objects_dir = mem_dir / "objects"
72
+ self.pack_dir = self.objects_dir / "pack"
73
+ self.metrics_file = mem_dir / ".health" / "storage_metrics.json"
74
+
75
+ def get_metrics(self) -> StorageMetrics:
76
+ """Compute current storage metrics."""
77
+ self.metrics_file.parent.mkdir(parents=True, exist_ok=True)
78
+
79
+ # Calculate object sizes
80
+ objects_size = self._dir_size(self.objects_dir)
81
+ pack_size = self._dir_size(self.pack_dir) if self.pack_dir.exists() else 0
82
+ loose_size = objects_size - pack_size
83
+
84
+ # Count objects
85
+ loose_count = self._count_objects(self.objects_dir, "loose")
86
+ pack_count = self._count_objects(self.objects_dir, "packed")
87
+
88
+ # Calculate growth rate
89
+ growth_rate = self._calculate_growth_rate(objects_size)
90
+
91
+ warning = None
92
+ if objects_size > 5 * 1024 * 1024 * 1024: # 5GB
93
+ warning = "Repository exceeds 5GB - consider archival or splitting"
94
+
95
+ return StorageMetrics(
96
+ total_size_bytes=objects_size,
97
+ objects_size_bytes=objects_size,
98
+ pack_size_bytes=pack_size,
99
+ loose_objects_count=loose_count,
100
+ packed_objects_count=pack_count,
101
+ growth_rate_per_hour=growth_rate,
102
+ warning=warning,
103
+ )
104
+
105
+ def _dir_size(self, path: Path) -> int:
106
+ """Recursively sum directory size."""
107
+ if not path.exists():
108
+ return 0
109
+ total = 0
110
+ for item in path.rglob("*"):
111
+ if item.is_file():
112
+ try:
113
+ total += item.stat().st_size
114
+ except OSError:
115
+ pass
116
+ return total
117
+
118
+ def _count_objects(self, obj_dir: Path, obj_type: str) -> int:
119
+ """Count objects by type."""
120
+ if not obj_dir.exists():
121
+ return 0
122
+ count = 0
123
+ for type_dir in ["blob", "tree", "commit", "tag"]:
124
+ type_path = obj_dir / type_dir
125
+ if type_path.exists():
126
+ for item in type_path.rglob("*"):
127
+ if item.is_file():
128
+ count += 1
129
+ return count
130
+
131
+ def _calculate_growth_rate(self, current_size: int) -> float:
132
+ """Calculate bytes/hour growth rate from historical data."""
133
+ try:
134
+ metrics_data = json.loads(self.metrics_file.read_text())
135
+ prev_size = metrics_data.get("total_size_bytes", current_size)
136
+ prev_time = metrics_data.get("timestamp")
137
+ if prev_time:
138
+ hours_elapsed = (datetime.now(timezone.utc).timestamp() - prev_time) / 3600
139
+ if hours_elapsed > 0:
140
+ rate = (current_size - prev_size) / hours_elapsed
141
+ return max(0, rate)
142
+ except Exception:
143
+ pass
144
+
145
+ # Store current metrics for next check
146
+ try:
147
+ json.dump(
148
+ {
149
+ "total_size_bytes": current_size,
150
+ "timestamp": datetime.now(timezone.utc).timestamp(),
151
+ },
152
+ open(self.metrics_file, "w"),
153
+ )
154
+ except Exception:
155
+ pass
156
+
157
+ return 0.0
158
+
159
+
160
+ class SemanticRedundancyChecker:
161
+ """Detect duplicate and similar semantic memories."""
162
+
163
+ def __init__(self, current_dir: Path):
164
+ self.current_dir = current_dir
165
+ self.semantic_dir = current_dir / "semantic"
166
+
167
+ def check_redundancy(self) -> RedundancyReport:
168
+ """Check for content and semantic redundancy."""
169
+ if not self.semantic_dir.exists():
170
+ return RedundancyReport(
171
+ total_files=0,
172
+ total_size_bytes=0,
173
+ duplicate_hashes={},
174
+ redundancy_percentage=0.0,
175
+ similar_files=[],
176
+ )
177
+
178
+ files = list(self.semantic_dir.rglob("*.md"))
179
+ if not files:
180
+ return RedundancyReport(
181
+ total_files=0,
182
+ total_size_bytes=0,
183
+ duplicate_hashes={},
184
+ redundancy_percentage=0.0,
185
+ similar_files=[],
186
+ )
187
+
188
+ # Hash-based deduplication
189
+ hash_map: Dict[str, List[str]] = {}
190
+ total_size = 0
191
+
192
+ for fpath in files:
193
+ try:
194
+ content = fpath.read_text(encoding="utf-8", errors="replace")
195
+ content_hash = hashlib.sha256(content.encode()).hexdigest()
196
+ hash_map.setdefault(content_hash, []).append(
197
+ str(fpath.relative_to(self.current_dir))
198
+ )
199
+ total_size += len(content.encode())
200
+ except Exception:
201
+ pass
202
+
203
+ # Find duplicates
204
+ duplicates = {h: paths for h, paths in hash_map.items() if len(paths) > 1}
205
+
206
+ # Calculate redundancy: measure wasted space from duplicates
207
+ # For each duplicate set, count all but the first as redundant
208
+ duplicate_waste_size = 0
209
+ file_sizes = {}
210
+
211
+ for fpath in files:
212
+ try:
213
+ rel_path = str(fpath.relative_to(self.current_dir))
214
+ file_sizes[rel_path] = fpath.stat().st_size
215
+ except (OSError, TypeError):
216
+ pass
217
+
218
+ for hash_val, paths in duplicates.items():
219
+ if len(paths) > 1:
220
+ # All copies except the first are redundant
221
+ for dup_path in paths[1:]:
222
+ duplicate_waste_size += file_sizes.get(dup_path, 0)
223
+
224
+ redundancy_pct = (duplicate_waste_size / total_size * 100) if total_size > 0 else 0
225
+
226
+ warning = None
227
+ if redundancy_pct > 20:
228
+ warning = f"High semantic redundancy ({redundancy_pct:.1f}%) - consolidate memories"
229
+
230
+ return RedundancyReport(
231
+ total_files=len(files),
232
+ total_size_bytes=total_size,
233
+ duplicate_hashes=duplicates,
234
+ redundancy_percentage=redundancy_pct,
235
+ similar_files=[],
236
+ warning=warning,
237
+ )
238
+
239
+
240
+ class StaleMemoryDetector:
241
+ """Detect unused/stale memories."""
242
+
243
+ def __init__(self, current_dir: Path):
244
+ self.current_dir = current_dir
245
+ self.stale_threshold_days = 90
246
+
247
+ def detect_stale(self) -> StaleMemoryReport:
248
+ """Find memories not accessed in threshold period."""
249
+ files = list(self.current_dir.rglob("*.md"))
250
+ if not files:
251
+ return StaleMemoryReport(
252
+ total_files=0,
253
+ stale_files=[],
254
+ stale_percentage=0.0,
255
+ )
256
+
257
+ now = datetime.now(timezone.utc).timestamp()
258
+ stale_list = []
259
+ total_size = 0
260
+
261
+ for fpath in files:
262
+ try:
263
+ stat = fpath.stat()
264
+ age_days = (now - stat.st_atime) / 86400
265
+ total_size += stat.st_size
266
+
267
+ if age_days > self.stale_threshold_days:
268
+ stale_list.append(
269
+ {
270
+ "path": str(fpath.relative_to(self.current_dir)),
271
+ "days_unaccessed": int(age_days),
272
+ "size_bytes": stat.st_size,
273
+ }
274
+ )
275
+ except Exception:
276
+ pass
277
+
278
+ stale_pct = (len(stale_list) / len(files) * 100) if files else 0
279
+
280
+ warning = None
281
+ if stale_pct > 30:
282
+ warning = f"High stale memory percentage ({stale_pct:.1f}%) - consider archival"
283
+
284
+ return StaleMemoryReport(
285
+ total_files=len(files),
286
+ stale_files=sorted(stale_list, key=lambda x: x["days_unaccessed"], reverse=True),
287
+ stale_percentage=stale_pct,
288
+ warning=warning,
289
+ )
290
+
291
+
292
+ class GraphConsistencyValidator:
293
+ """Validate knowledge graph integrity."""
294
+
295
+ def __init__(self, current_dir: Path):
296
+ self.current_dir = current_dir
297
+
298
+ def validate_graph(self) -> GraphConsistencyReport:
299
+ """Check graph for orphaned nodes, dangling edges, contradictions."""
300
+ try:
301
+ import re
302
+ except ImportError:
303
+ return GraphConsistencyReport(
304
+ total_nodes=0,
305
+ total_edges=0,
306
+ orphaned_nodes=[],
307
+ dangling_edges=[],
308
+ contradictions=[],
309
+ )
310
+
311
+ semantic_dir = self.current_dir / "semantic"
312
+ if not semantic_dir.exists():
313
+ return GraphConsistencyReport(
314
+ total_nodes=0,
315
+ total_edges=0,
316
+ orphaned_nodes=[],
317
+ dangling_edges=[],
318
+ contradictions=[],
319
+ )
320
+
321
+ # Extract all nodes (files) and edges (wikilinks)
322
+ nodes = set()
323
+ edges = []
324
+ contradictions = []
325
+
326
+ for fpath in semantic_dir.rglob("*.md"):
327
+ try:
328
+ content = fpath.read_text(encoding="utf-8", errors="replace")
329
+ node_name = fpath.stem
330
+
331
+ nodes.add(node_name)
332
+
333
+ # Find wikilinks: [[target]], [[target|label]]
334
+ wikilinks = re.findall(r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]", content)
335
+ for target in wikilinks:
336
+ edges.append((node_name, target.strip()))
337
+
338
+ # Find conflict markers (potential contradictions)
339
+ if "<<<<<" in content or "=====" in content or ">>>>>" in content:
340
+ contradictions.append(
341
+ {
342
+ "file": str(fpath.relative_to(self.current_dir)),
343
+ "type": "unresolved_merge_conflict",
344
+ }
345
+ )
346
+ except Exception:
347
+ pass
348
+
349
+ # Find dangling edges (edges to non-existent nodes)
350
+ dangling = [(src, tgt) for src, tgt in edges if tgt not in nodes]
351
+
352
+ # Find orphaned nodes (nodes with no edges)
353
+ nodes_with_edges = set(src for src, _ in edges) | set(tgt for _, tgt in edges)
354
+ orphaned = list(nodes - nodes_with_edges)
355
+
356
+ warning = None
357
+ if dangling:
358
+ warning = f"Graph has {len(dangling)} dangling edge(s) - fix broken links"
359
+ if orphaned:
360
+ warning = (warning or "") + f" {len(orphaned)} orphaned node(s) - no connections"
361
+ if contradictions:
362
+ warning = (warning or "") + f" {len(contradictions)} conflict marker(s)"
363
+
364
+ return GraphConsistencyReport(
365
+ total_nodes=len(nodes),
366
+ total_edges=len(edges),
367
+ orphaned_nodes=orphaned,
368
+ dangling_edges=dangling,
369
+ contradictions=contradictions,
370
+ warning=warning.strip() if warning else None,
371
+ )
372
+
373
+
374
+ class HealthMonitor:
375
+ """Orchestrate all health checks."""
376
+
377
+ def __init__(self, repo_path: Path):
378
+ self.repo_path = repo_path
379
+ self.mem_dir = repo_path / ".mem"
380
+ self.current_dir = repo_path / "current"
381
+
382
+ def perform_all_checks(self) -> Dict[str, Any]:
383
+ """Run all health checks and return comprehensive report."""
384
+ report = {
385
+ "timestamp": datetime.now(timezone.utc).isoformat(),
386
+ "storage": None,
387
+ "redundancy": None,
388
+ "stale_memory": None,
389
+ "graph_consistency": None,
390
+ "warnings": [],
391
+ }
392
+
393
+ # Storage check
394
+ try:
395
+ storage_monitor = StorageMonitor(self.mem_dir)
396
+ metrics = storage_monitor.get_metrics()
397
+ report["storage"] = {
398
+ "total_size_mb": metrics.total_size_bytes / 1024 / 1024,
399
+ "loose_objects": metrics.loose_objects_count,
400
+ "packed_objects": metrics.packed_objects_count,
401
+ "growth_rate_mb_per_hour": metrics.growth_rate_per_hour / 1024 / 1024,
402
+ }
403
+ if metrics.warning:
404
+ report["warnings"].append(metrics.warning)
405
+ except Exception as e:
406
+ report["storage"] = {"error": str(e)}
407
+
408
+ # Redundancy check
409
+ try:
410
+ redundancy = SemanticRedundancyChecker(self.current_dir)
411
+ red_report = redundancy.check_redundancy()
412
+ report["redundancy"] = {
413
+ "total_files": red_report.total_files,
414
+ "duplicates_found": len(red_report.duplicate_hashes),
415
+ "redundancy_percentage": red_report.redundancy_percentage,
416
+ }
417
+ if red_report.warning:
418
+ report["warnings"].append(red_report.warning)
419
+ except Exception as e:
420
+ report["redundancy"] = {"error": str(e)}
421
+
422
+ # Stale memory check
423
+ try:
424
+ stale_detector = StaleMemoryDetector(self.current_dir)
425
+ stale_report = stale_detector.detect_stale()
426
+ report["stale_memory"] = {
427
+ "total_files": stale_report.total_files,
428
+ "stale_files": len(stale_report.stale_files),
429
+ "stale_percentage": stale_report.stale_percentage,
430
+ }
431
+ if stale_report.warning:
432
+ report["warnings"].append(stale_report.warning)
433
+ except Exception as e:
434
+ report["stale_memory"] = {"error": str(e)}
435
+
436
+ # Graph consistency check
437
+ try:
438
+ graph_validator = GraphConsistencyValidator(self.current_dir)
439
+ graph_report = graph_validator.validate_graph()
440
+ report["graph_consistency"] = {
441
+ "total_nodes": graph_report.total_nodes,
442
+ "total_edges": graph_report.total_edges,
443
+ "orphaned_nodes": len(graph_report.orphaned_nodes),
444
+ "dangling_edges": len(graph_report.dangling_edges),
445
+ "contradictions": len(graph_report.contradictions),
446
+ }
447
+ if graph_report.warning:
448
+ report["warnings"].append(graph_report.warning)
449
+ except Exception as e:
450
+ report["graph_consistency"] = {"error": str(e)}
451
+
452
+ return report
File without changes