claude-mpm 4.3.19__py3-none-any.whl → 4.3.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/agents/agent_loader.py +2 -2
  3. claude_mpm/agents/agent_loader_integration.py +2 -2
  4. claude_mpm/agents/async_agent_loader.py +2 -2
  5. claude_mpm/agents/base_agent_loader.py +2 -2
  6. claude_mpm/agents/frontmatter_validator.py +2 -2
  7. claude_mpm/agents/system_agent_config.py +2 -2
  8. claude_mpm/agents/templates/clerk-ops.json +6 -4
  9. claude_mpm/agents/templates/data_engineer.json +1 -2
  10. claude_mpm/cli/commands/doctor.py +2 -2
  11. claude_mpm/cli/commands/mpm_init.py +560 -47
  12. claude_mpm/cli/commands/mpm_init_handler.py +6 -0
  13. claude_mpm/cli/parsers/mpm_init_parser.py +39 -1
  14. claude_mpm/cli/startup_logging.py +11 -9
  15. claude_mpm/commands/mpm-init.md +76 -12
  16. claude_mpm/config/agent_config.py +2 -2
  17. claude_mpm/config/paths.py +2 -2
  18. claude_mpm/core/agent_name_normalizer.py +2 -2
  19. claude_mpm/core/config.py +2 -1
  20. claude_mpm/core/config_aliases.py +2 -2
  21. claude_mpm/core/file_utils.py +1 -0
  22. claude_mpm/core/log_manager.py +2 -2
  23. claude_mpm/core/tool_access_control.py +2 -2
  24. claude_mpm/core/unified_agent_registry.py +2 -2
  25. claude_mpm/core/unified_paths.py +2 -2
  26. claude_mpm/experimental/cli_enhancements.py +3 -2
  27. claude_mpm/hooks/base_hook.py +2 -2
  28. claude_mpm/hooks/instruction_reinforcement.py +2 -2
  29. claude_mpm/hooks/validation_hooks.py +2 -2
  30. claude_mpm/scripts/mpm_doctor.py +2 -2
  31. claude_mpm/services/agents/loading/agent_profile_loader.py +2 -2
  32. claude_mpm/services/agents/loading/base_agent_manager.py +2 -2
  33. claude_mpm/services/agents/loading/framework_agent_loader.py +2 -2
  34. claude_mpm/services/agents/management/agent_capabilities_generator.py +2 -2
  35. claude_mpm/services/agents/management/agent_management_service.py +2 -2
  36. claude_mpm/services/agents/memory/memory_categorization_service.py +5 -2
  37. claude_mpm/services/agents/memory/memory_file_service.py +27 -6
  38. claude_mpm/services/agents/memory/memory_format_service.py +5 -2
  39. claude_mpm/services/agents/memory/memory_limits_service.py +3 -2
  40. claude_mpm/services/agents/registry/deployed_agent_discovery.py +2 -2
  41. claude_mpm/services/agents/registry/modification_tracker.py +4 -4
  42. claude_mpm/services/async_session_logger.py +2 -1
  43. claude_mpm/services/claude_session_logger.py +2 -2
  44. claude_mpm/services/core/path_resolver.py +3 -2
  45. claude_mpm/services/diagnostics/diagnostic_runner.py +4 -3
  46. claude_mpm/services/event_bus/direct_relay.py +2 -1
  47. claude_mpm/services/event_bus/event_bus.py +2 -1
  48. claude_mpm/services/event_bus/relay.py +2 -2
  49. claude_mpm/services/framework_claude_md_generator/content_assembler.py +2 -2
  50. claude_mpm/services/infrastructure/daemon_manager.py +2 -2
  51. claude_mpm/services/memory/cache/simple_cache.py +2 -2
  52. claude_mpm/services/project/archive_manager.py +981 -0
  53. claude_mpm/services/project/documentation_manager.py +536 -0
  54. claude_mpm/services/project/enhanced_analyzer.py +491 -0
  55. claude_mpm/services/project/project_organizer.py +904 -0
  56. claude_mpm/services/response_tracker.py +2 -2
  57. claude_mpm/services/socketio/handlers/connection.py +14 -33
  58. claude_mpm/services/socketio/server/eventbus_integration.py +2 -2
  59. claude_mpm/services/version_control/version_parser.py +5 -4
  60. claude_mpm/storage/state_storage.py +2 -2
  61. claude_mpm/utils/agent_dependency_loader.py +49 -0
  62. claude_mpm/utils/common.py +542 -0
  63. claude_mpm/utils/database_connector.py +298 -0
  64. claude_mpm/utils/error_handler.py +2 -1
  65. claude_mpm/utils/log_cleanup.py +2 -2
  66. claude_mpm/utils/path_operations.py +2 -2
  67. claude_mpm/utils/robust_installer.py +56 -0
  68. claude_mpm/utils/session_logging.py +2 -2
  69. claude_mpm/utils/subprocess_utils.py +2 -2
  70. claude_mpm/validation/agent_validator.py +2 -2
  71. {claude_mpm-4.3.19.dist-info → claude_mpm-4.3.22.dist-info}/METADATA +1 -1
  72. {claude_mpm-4.3.19.dist-info → claude_mpm-4.3.22.dist-info}/RECORD +76 -70
  73. {claude_mpm-4.3.19.dist-info → claude_mpm-4.3.22.dist-info}/WHEEL +0 -0
  74. {claude_mpm-4.3.19.dist-info → claude_mpm-4.3.22.dist-info}/entry_points.txt +0 -0
  75. {claude_mpm-4.3.19.dist-info → claude_mpm-4.3.22.dist-info}/licenses/LICENSE +0 -0
  76. {claude_mpm-4.3.19.dist-info → claude_mpm-4.3.22.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,981 @@
1
+ """
2
+ Archive Manager Service for Claude MPM Documentation Versioning
3
+ ==============================================================
4
+
5
+ This service manages archival and versioning of project documentation,
6
+ particularly CLAUDE.md and related files during updates.
7
+
8
+ Key Features:
9
+ - Automatic backup before updates
10
+ - Timestamped archive files
11
+ - Version comparison and diff generation
12
+ - Archive cleanup and rotation
13
+ - Restoration capabilities
14
+
15
+ Author: Claude MPM Development Team
16
+ Created: 2025-01-26
17
+ """
18
+
19
+ import difflib
20
+ import gzip
21
+ import hashlib
22
+ import json
23
+ import re
24
+ import shutil
25
+ import subprocess
26
+ from datetime import datetime, timedelta
27
+ from pathlib import Path
28
+ from typing import Any, Dict, List, Optional, Set, Tuple
29
+
30
+ from rich.console import Console
31
+ from rich.table import Table
32
+
33
+ from claude_mpm.core.logging_utils import get_logger
34
+ logger = get_logger(__name__)
35
+ console = Console()
36
+
37
+
38
+ class ArchiveManager:
39
+ """Manages documentation archival and versioning."""
40
+
41
+ # Archive settings
42
+ ARCHIVE_DIR = "docs/_archive"
43
+ MAX_ARCHIVES = 10 # Maximum number of archives to keep per file
44
+ COMPRESS_AFTER_DAYS = 7 # Compress archives older than this
45
+ DELETE_AFTER_DAYS = 90 # Delete archives older than this
46
+
47
+ def __init__(self, project_path: Path):
48
+ """Initialize the archive manager."""
49
+ self.project_path = project_path
50
+ self.archive_path = project_path / self.ARCHIVE_DIR
51
+ self.is_git_repo = (project_path / ".git").exists()
52
+ self._ensure_archive_directory()
53
+
54
+ # Documentation patterns
55
+ self.version_patterns = {
56
+ "semantic": re.compile(r"v?(\d+)\.(\d+)\.(\d+)"),
57
+ "date": re.compile(r"(\d{4})-(\d{2})-(\d{2})"),
58
+ "build": re.compile(r"build[.\-](\d+)"),
59
+ }
60
+
61
+ # Key documentation files to track
62
+ self.key_docs = {
63
+ "CLAUDE.md": "Project instructions and guidelines",
64
+ "README.md": "Project overview and setup",
65
+ "CHANGELOG.md": "Version history and changes",
66
+ "docs/README.md": "Documentation index",
67
+ }
68
+
69
+ def _ensure_archive_directory(self) -> None:
70
+ """Ensure archive directory exists."""
71
+ self.archive_path.mkdir(parents=True, exist_ok=True)
72
+
73
+ # Create README if not exists
74
+ readme_path = self.archive_path / "README.md"
75
+ if not readme_path.exists():
76
+ readme_content = """# Documentation Archives
77
+
78
+ This directory contains archived versions of project documentation files.
79
+
80
+ ## Archive Naming Convention
81
+
82
+ Files are archived with timestamps:
83
+ - `CLAUDE.md.2024-01-15T10-30-45.md` - Regular archive
84
+ - `CLAUDE.md.2024-01-15T10-30-45.md.gz` - Compressed archive (older than 7 days)
85
+
86
+ ## Archive Management
87
+
88
+ - Archives are created automatically when documentation is updated
89
+ - Older archives are compressed after 7 days
90
+ - Archives older than 90 days are automatically deleted
91
+ - Maximum of 10 archives kept per file
92
+
93
+ ## Restoration
94
+
95
+ To restore an archived file:
96
+ 1. Find the desired version by timestamp
97
+ 2. Copy it back to the project root
98
+ 3. Rename to remove the timestamp
99
+
100
+ Generated by Claude MPM Archive Manager
101
+ """
102
+ readme_path.write_text(readme_content)
103
+
104
+ def archive_file(
105
+ self,
106
+ file_path: Path,
107
+ reason: Optional[str] = None,
108
+ metadata: Optional[Dict] = None,
109
+ ) -> Optional[Path]:
110
+ """Archive a file with timestamp and optional metadata."""
111
+ if not file_path.exists():
112
+ logger.warning(f"File {file_path} does not exist, cannot archive")
113
+ return None
114
+
115
+ try:
116
+ # Generate archive filename
117
+ timestamp = datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
118
+ archive_name = f"{file_path.name}.{timestamp}{file_path.suffix}"
119
+ archive_file_path = self.archive_path / archive_name
120
+
121
+ # Copy file to archive
122
+ shutil.copy2(file_path, archive_file_path)
123
+ logger.info(f"Archived {file_path.name} to {archive_file_path}")
124
+
125
+ # Create metadata file if provided
126
+ if metadata or reason:
127
+ meta_data = metadata or {}
128
+ meta_data.update({
129
+ "original_path": str(file_path),
130
+ "archived_at": datetime.now().isoformat(),
131
+ "reason": reason or "Manual archive",
132
+ "file_size": file_path.stat().st_size,
133
+ "file_hash": self._calculate_file_hash(file_path),
134
+ })
135
+
136
+ meta_path = self.archive_path / f"{archive_name}.meta.json"
137
+ meta_path.write_text(json.dumps(meta_data, indent=2))
138
+
139
+ # Cleanup old archives
140
+ self._cleanup_archives(file_path.name)
141
+
142
+ return archive_file_path
143
+
144
+ except Exception as e:
145
+ logger.error(f"Failed to archive {file_path}: {e}")
146
+ return None
147
+
148
+ def _calculate_file_hash(self, file_path: Path) -> str:
149
+ """Calculate MD5 hash of file."""
150
+ hasher = hashlib.md5()
151
+ with open(file_path, "rb") as f:
152
+ for chunk in iter(lambda: f.read(4096), b""):
153
+ hasher.update(chunk)
154
+ return hasher.hexdigest()
155
+
156
+ def _cleanup_archives(self, original_filename: str) -> None:
157
+ """Clean up old archives for a specific file."""
158
+ # Find all archives for this file
159
+ archives = self._find_archives(original_filename)
160
+
161
+ # Sort by modification time
162
+ archives.sort(key=lambda p: p.stat().st_mtime)
163
+
164
+ # Remove oldest if exceeding max count
165
+ if len(archives) > self.MAX_ARCHIVES:
166
+ for archive in archives[: -self.MAX_ARCHIVES]:
167
+ self._remove_archive(archive)
168
+ logger.info(f"Removed old archive: {archive.name}")
169
+
170
+ # Compress old archives
171
+ cutoff_compress = datetime.now() - timedelta(days=self.COMPRESS_AFTER_DAYS)
172
+ for archive in archives:
173
+ if not archive.suffix == ".gz":
174
+ mtime = datetime.fromtimestamp(archive.stat().st_mtime)
175
+ if mtime < cutoff_compress:
176
+ self._compress_archive(archive)
177
+
178
+ # Delete very old archives
179
+ cutoff_delete = datetime.now() - timedelta(days=self.DELETE_AFTER_DAYS)
180
+ for archive in archives:
181
+ mtime = datetime.fromtimestamp(archive.stat().st_mtime)
182
+ if mtime < cutoff_delete:
183
+ self._remove_archive(archive)
184
+ logger.info(f"Deleted old archive: {archive.name}")
185
+
186
+ def _find_archives(self, original_filename: str) -> List[Path]:
187
+ """Find all archives for a specific file."""
188
+ archives = []
189
+ pattern = f"{original_filename}.*"
190
+
191
+ for file in self.archive_path.glob(pattern):
192
+ # Skip metadata files
193
+ if not file.name.endswith(".meta.json"):
194
+ archives.append(file)
195
+
196
+ return archives
197
+
198
+ def _compress_archive(self, archive_path: Path) -> None:
199
+ """Compress an archive file using gzip."""
200
+ try:
201
+ compressed_path = archive_path.with_suffix(archive_path.suffix + ".gz")
202
+
203
+ with open(archive_path, "rb") as f_in:
204
+ with gzip.open(compressed_path, "wb") as f_out:
205
+ shutil.copyfileobj(f_in, f_out)
206
+
207
+ # Remove original after successful compression
208
+ archive_path.unlink()
209
+ logger.debug(f"Compressed archive: {archive_path.name}")
210
+
211
+ # Update metadata file if exists
212
+ meta_path = self.archive_path / f"{archive_path.name}.meta.json"
213
+ if meta_path.exists():
214
+ new_meta_path = self.archive_path / f"{compressed_path.name}.meta.json"
215
+ meta_path.rename(new_meta_path)
216
+
217
+ except Exception as e:
218
+ logger.error(f"Failed to compress {archive_path}: {e}")
219
+
220
+ def _remove_archive(self, archive_path: Path) -> None:
221
+ """Remove an archive and its metadata."""
222
+ try:
223
+ # Remove main file
224
+ if archive_path.exists():
225
+ archive_path.unlink()
226
+
227
+ # Remove metadata if exists
228
+ meta_path = self.archive_path / f"{archive_path.name}.meta.json"
229
+ if meta_path.exists():
230
+ meta_path.unlink()
231
+
232
+ except Exception as e:
233
+ logger.error(f"Failed to remove archive {archive_path}: {e}")
234
+
235
+ def list_archives(
236
+ self, filename: Optional[str] = None, include_metadata: bool = False
237
+ ) -> List[Dict]:
238
+ """List all archives or archives for a specific file."""
239
+ archives = []
240
+
241
+ if filename:
242
+ archive_files = self._find_archives(filename)
243
+ else:
244
+ archive_files = [
245
+ f
246
+ for f in self.archive_path.glob("*")
247
+ if not f.name.endswith(".meta.json") and not f.name == "README.md"
248
+ ]
249
+
250
+ for archive_file in archive_files:
251
+ info = {
252
+ "name": archive_file.name,
253
+ "path": str(archive_file),
254
+ "size": archive_file.stat().st_size,
255
+ "modified": datetime.fromtimestamp(archive_file.stat().st_mtime).isoformat(),
256
+ "compressed": archive_file.suffix == ".gz",
257
+ }
258
+
259
+ # Add metadata if requested and available
260
+ if include_metadata:
261
+ meta_path = self.archive_path / f"{archive_file.name}.meta.json"
262
+ if meta_path.exists():
263
+ try:
264
+ info["metadata"] = json.loads(meta_path.read_text())
265
+ except Exception:
266
+ pass
267
+
268
+ archives.append(info)
269
+
270
+ # Sort by modification time, newest first
271
+ archives.sort(key=lambda x: x["modified"], reverse=True)
272
+
273
+ return archives
274
+
275
+ def get_latest_archive(self, filename: str) -> Optional[Path]:
276
+ """Get the most recent archive for a file."""
277
+ archives = self._find_archives(filename)
278
+ if not archives:
279
+ return None
280
+
281
+ # Sort by modification time and return latest
282
+ archives.sort(key=lambda p: p.stat().st_mtime, reverse=True)
283
+ return archives[0]
284
+
285
+ def restore_archive(
286
+ self, archive_name: str, target_path: Optional[Path] = None
287
+ ) -> Tuple[bool, str]:
288
+ """Restore an archived file to its original location or specified path."""
289
+ archive_file = self.archive_path / archive_name
290
+
291
+ if not archive_file.exists():
292
+ return False, f"Archive {archive_name} not found"
293
+
294
+ try:
295
+ # Determine target path
296
+ if target_path is None:
297
+ # Extract original filename from archive name
298
+ # Format: original.ext.timestamp.ext[.gz]
299
+ parts = archive_name.split(".")
300
+ if archive_name.endswith(".gz"):
301
+ # Remove .gz and timestamp
302
+ original_name = ".".join(parts[:-3])
303
+ else:
304
+ # Remove timestamp
305
+ original_name = ".".join(parts[:-2])
306
+ target_path = self.project_path / original_name
307
+
308
+ # Backup current file if it exists
309
+ if target_path.exists():
310
+ self.archive_file(
311
+ target_path, reason="Backup before restoration", metadata={"restoration_from": archive_name}
312
+ )
313
+
314
+ # Restore file
315
+ if archive_file.suffix == ".gz":
316
+ # Decompress first
317
+ with gzip.open(archive_file, "rb") as f_in:
318
+ with open(target_path, "wb") as f_out:
319
+ shutil.copyfileobj(f_in, f_out)
320
+ else:
321
+ shutil.copy2(archive_file, target_path)
322
+
323
+ logger.info(f"Restored {archive_name} to {target_path}")
324
+ return True, f"Successfully restored to {target_path}"
325
+
326
+ except Exception as e:
327
+ logger.error(f"Failed to restore {archive_name}: {e}")
328
+ return False, f"Restoration failed: {str(e)}"
329
+
330
+ def compare_with_archive(self, current_file: Path, archive_name: str) -> Dict:
331
+ """Compare current file with an archived version."""
332
+ archive_file = self.archive_path / archive_name
333
+
334
+ if not archive_file.exists():
335
+ return {"error": f"Archive {archive_name} not found"}
336
+
337
+ if not current_file.exists():
338
+ return {"error": f"Current file {current_file} not found"}
339
+
340
+ try:
341
+ # Read archived content
342
+ if archive_file.suffix == ".gz":
343
+ with gzip.open(archive_file, "rt", encoding="utf-8") as f:
344
+ archive_content = f.read()
345
+ else:
346
+ archive_content = archive_file.read_text()
347
+
348
+ # Read current content
349
+ current_content = current_file.read_text()
350
+
351
+ # Calculate differences
352
+ current_lines = current_content.splitlines()
353
+ archive_lines = archive_content.splitlines()
354
+
355
+ return {
356
+ "current_file": str(current_file),
357
+ "archive_file": archive_name,
358
+ "current_lines": len(current_lines),
359
+ "archive_lines": len(archive_lines),
360
+ "lines_added": len(current_lines) - len(archive_lines),
361
+ "current_size": len(current_content),
362
+ "archive_size": len(archive_content),
363
+ "size_change": len(current_content) - len(archive_content),
364
+ "current_hash": self._calculate_file_hash(current_file),
365
+ "identical": current_content == archive_content,
366
+ }
367
+
368
+ except Exception as e:
369
+ return {"error": f"Comparison failed: {str(e)}"}
370
+
371
+ def create_archive_report(self) -> Dict:
372
+ """Generate a report of all archives."""
373
+ report = {
374
+ "archive_directory": str(self.archive_path),
375
+ "total_archives": 0,
376
+ "total_size": 0,
377
+ "compressed_count": 0,
378
+ "files_tracked": {},
379
+ "oldest_archive": None,
380
+ "newest_archive": None,
381
+ }
382
+
383
+ archives = self.list_archives(include_metadata=True)
384
+ report["total_archives"] = len(archives)
385
+
386
+ for archive in archives:
387
+ report["total_size"] += archive["size"]
388
+ if archive["compressed"]:
389
+ report["compressed_count"] += 1
390
+
391
+ # Track by original file
392
+ original = archive["name"].split(".")[0]
393
+ if original not in report["files_tracked"]:
394
+ report["files_tracked"][original] = {
395
+ "count": 0,
396
+ "total_size": 0,
397
+ "versions": [],
398
+ }
399
+
400
+ report["files_tracked"][original]["count"] += 1
401
+ report["files_tracked"][original]["total_size"] += archive["size"]
402
+ report["files_tracked"][original]["versions"].append(archive["name"])
403
+
404
+ # Find oldest and newest
405
+ if archives:
406
+ report["oldest_archive"] = archives[-1]["name"]
407
+ report["newest_archive"] = archives[0]["name"]
408
+
409
+ return report
410
+
411
+ def auto_archive_before_update(
412
+ self, file_path: Path, update_reason: str = "Before update"
413
+ ) -> bool:
414
+ """Automatically archive a file before updating it."""
415
+ if not file_path.exists():
416
+ logger.debug(f"File {file_path} does not exist, skipping archive")
417
+ return True
418
+
419
+ # Check if content has changed since last archive
420
+ latest = self.get_latest_archive(file_path.name)
421
+ if latest:
422
+ comparison = self.compare_with_archive(file_path, latest.name)
423
+ if comparison.get("identical"):
424
+ logger.debug(f"File {file_path.name} unchanged, skipping archive")
425
+ return True
426
+
427
+ # Archive the file
428
+ result = self.archive_file(
429
+ file_path,
430
+ reason=update_reason,
431
+ metadata={
432
+ "update_type": "mpm-init",
433
+ "auto_archive": True,
434
+ },
435
+ )
436
+
437
+ return result is not None
438
+
439
+ # ========== Git Integration Methods ==========
440
+
441
+ def _run_git_command(self, args: List[str]) -> Optional[str]:
442
+ """Run a git command and return output."""
443
+ if not self.is_git_repo:
444
+ return None
445
+
446
+ try:
447
+ result = subprocess.run(
448
+ ["git"] + args,
449
+ cwd=self.project_path,
450
+ capture_output=True,
451
+ text=True,
452
+ check=True,
453
+ )
454
+ return result.stdout.strip()
455
+ except subprocess.CalledProcessError as e:
456
+ logger.debug(f"Git command failed: {e}")
457
+ return None
458
+
459
+ def get_file_git_history(self, file_path: Path, limit: int = 10) -> List[Dict]:
460
+ """Get git commit history for a specific file."""
461
+ if not self.is_git_repo or not file_path.exists():
462
+ return []
463
+
464
+ relative_path = file_path.relative_to(self.project_path)
465
+ output = self._run_git_command([
466
+ "log",
467
+ f"-{limit}",
468
+ "--pretty=format:%H|%an|%at|%s",
469
+ "--follow",
470
+ str(relative_path),
471
+ ])
472
+
473
+ if not output:
474
+ return []
475
+
476
+ commits = []
477
+ for line in output.splitlines():
478
+ parts = line.split("|", 3)
479
+ if len(parts) == 4:
480
+ commits.append({
481
+ "hash": parts[0][:8],
482
+ "author": parts[1],
483
+ "date": datetime.fromtimestamp(int(parts[2])).isoformat(),
484
+ "message": parts[3],
485
+ })
486
+ return commits
487
+
488
+ def get_file_last_modified(self, file_path: Path) -> Optional[datetime]:
489
+ """Get the last git modification date for a file."""
490
+ if not self.is_git_repo or not file_path.exists():
491
+ return None
492
+
493
+ relative_path = file_path.relative_to(self.project_path)
494
+ output = self._run_git_command([
495
+ "log",
496
+ "-1",
497
+ "--format=%at",
498
+ str(relative_path),
499
+ ])
500
+
501
+ if output:
502
+ return datetime.fromtimestamp(int(output))
503
+ return None
504
+
505
+ # ========== Documentation Review Methods ==========
506
+
507
+ def review_documentation(self, check_git: bool = True) -> Dict:
508
+ """Comprehensive documentation review with outdated detection."""
509
+ report = {
510
+ "timestamp": datetime.now().isoformat(),
511
+ "files_reviewed": {},
512
+ "outdated_sections": [],
513
+ "synchronization_issues": [],
514
+ "recommendations": [],
515
+ }
516
+
517
+ # Review each key documentation file
518
+ for doc_file, description in self.key_docs.items():
519
+ file_path = self.project_path / doc_file
520
+ if file_path.exists():
521
+ file_report = self._review_single_doc(file_path, check_git)
522
+ report["files_reviewed"][doc_file] = file_report
523
+
524
+ # Check for outdated content
525
+ if file_report.get("outdated_indicators"):
526
+ report["outdated_sections"].append({
527
+ "file": doc_file,
528
+ "indicators": file_report["outdated_indicators"],
529
+ })
530
+
531
+ # Check synchronization between docs
532
+ sync_issues = self._check_documentation_sync()
533
+ if sync_issues:
534
+ report["synchronization_issues"] = sync_issues
535
+
536
+ # Generate recommendations
537
+ report["recommendations"] = self._generate_recommendations(report)
538
+
539
+ return report
540
+
541
+ def _review_single_doc(self, file_path: Path, check_git: bool) -> Dict:
542
+ """Review a single documentation file for outdated content."""
543
+ content = file_path.read_text()
544
+ report = {
545
+ "exists": True,
546
+ "size": len(content),
547
+ "lines": len(content.splitlines()),
548
+ "last_modified": file_path.stat().st_mtime,
549
+ "outdated_indicators": [],
550
+ "version_references": [],
551
+ }
552
+
553
+ # Check git history if available
554
+ if check_git and self.is_git_repo:
555
+ git_history = self.get_file_git_history(file_path, limit=5)
556
+ if git_history:
557
+ report["last_git_update"] = git_history[0]["date"]
558
+ report["recent_changes"] = len(git_history)
559
+
560
+ # Find version references
561
+ for pattern_name, pattern in self.version_patterns.items():
562
+ matches = pattern.findall(content)
563
+ if matches:
564
+ report["version_references"].append({
565
+ "type": pattern_name,
566
+ "versions": matches[:5], # First 5 matches
567
+ })
568
+
569
+ # Detect outdated indicators
570
+ outdated_indicators = self._detect_outdated_content(content, file_path.name)
571
+ if outdated_indicators:
572
+ report["outdated_indicators"] = outdated_indicators
573
+
574
+ return report
575
+
576
+ def _detect_outdated_content(self, content: str, filename: str) -> List[Dict]:
577
+ """Detect potentially outdated content in documentation."""
578
+ indicators = []
579
+ lines = content.splitlines()
580
+
581
+ # Pattern-based outdated detection
582
+ outdated_patterns = [
583
+ (r"TODO|FIXME|XXX", "Unresolved TODOs"),
584
+ (r"deprecated|obsolete|legacy", "Deprecated references"),
585
+ (r"coming soon|upcoming|future release", "Future tense content"),
586
+ (r"alpha|beta|experimental", "Pre-release indicators"),
587
+ (r"temporary|workaround|hack", "Temporary solutions"),
588
+ ]
589
+
590
+ for pattern, description in outdated_patterns:
591
+ regex = re.compile(pattern, re.IGNORECASE)
592
+ for i, line in enumerate(lines, 1):
593
+ if regex.search(line):
594
+ indicators.append({
595
+ "line": i,
596
+ "type": description,
597
+ "content": line.strip()[:100], # First 100 chars
598
+ })
599
+
600
+ # Check for old version numbers if VERSION file exists
601
+ version_file = self.project_path / "VERSION"
602
+ if version_file.exists():
603
+ current_version = version_file.read_text().strip()
604
+ old_version_pattern = re.compile(r"v?(\d+)\.(\d+)\.(\d+)")
605
+
606
+ for match in old_version_pattern.finditer(content):
607
+ found_version = match.group(0)
608
+ if found_version != current_version and self._is_older_version(found_version, current_version):
609
+ pos = content[:match.start()].count('\n') + 1
610
+ indicators.append({
611
+ "line": pos,
612
+ "type": "Old version reference",
613
+ "content": f"Found {found_version} (current: {current_version})",
614
+ })
615
+
616
+ return indicators[:20] # Limit to 20 most relevant
617
+
618
+ def _is_older_version(self, version1: str, version2: str) -> bool:
619
+ """Compare two version strings."""
620
+ try:
621
+ # Parse semantic versions
622
+ v1_match = self.version_patterns["semantic"].search(version1)
623
+ v2_match = self.version_patterns["semantic"].search(version2)
624
+
625
+ if v1_match and v2_match:
626
+ v1 = tuple(map(int, v1_match.groups()))
627
+ v2 = tuple(map(int, v2_match.groups()))
628
+ return v1 < v2
629
+ except Exception:
630
+ pass
631
+ return False
632
+
633
+ def _check_documentation_sync(self) -> List[Dict]:
634
+ """Check synchronization between key documentation files."""
635
+ issues = []
636
+
637
+ # Check CLAUDE.md vs README.md
638
+ claude_path = self.project_path / "CLAUDE.md"
639
+ readme_path = self.project_path / "README.md"
640
+
641
+ if claude_path.exists() and readme_path.exists():
642
+ claude_content = claude_path.read_text()
643
+ readme_content = readme_path.read_text()
644
+
645
+ # Check for version discrepancies
646
+ claude_versions = self.version_patterns["semantic"].findall(claude_content)
647
+ readme_versions = self.version_patterns["semantic"].findall(readme_content)
648
+
649
+ if claude_versions and readme_versions:
650
+ if claude_versions[0] != readme_versions[0]:
651
+ issues.append({
652
+ "type": "Version mismatch",
653
+ "files": ["CLAUDE.md", "README.md"],
654
+ "details": f"CLAUDE.md: {claude_versions[0]}, README.md: {readme_versions[0]}",
655
+ })
656
+
657
+ # Check for project name consistency
658
+ project_names = re.findall(r"Claude MPM|claude-mpm", readme_content, re.IGNORECASE)
659
+ if project_names:
660
+ unique_names = set(project_names)
661
+ if len(unique_names) > 1:
662
+ issues.append({
663
+ "type": "Inconsistent project naming",
664
+ "files": ["README.md"],
665
+ "details": f"Found variations: {', '.join(unique_names)}",
666
+ })
667
+
668
+ # Check CHANGELOG.md exists and is recent
669
+ changelog_path = self.project_path / "CHANGELOG.md"
670
+ if changelog_path.exists():
671
+ last_modified = self.get_file_last_modified(changelog_path)
672
+ if last_modified:
673
+ days_old = (datetime.now() - last_modified).days
674
+ if days_old > 30:
675
+ issues.append({
676
+ "type": "Stale changelog",
677
+ "files": ["CHANGELOG.md"],
678
+ "details": f"Last updated {days_old} days ago",
679
+ })
680
+ else:
681
+ issues.append({
682
+ "type": "Missing file",
683
+ "files": ["CHANGELOG.md"],
684
+ "details": "CHANGELOG.md does not exist",
685
+ })
686
+
687
+ return issues
688
+
689
+ def _generate_recommendations(self, report: Dict) -> List[str]:
690
+ """Generate actionable recommendations from review report."""
691
+ recommendations = []
692
+
693
+ # Check for outdated content
694
+ if report["outdated_sections"]:
695
+ recommendations.append(
696
+ f"📝 Review and update {len(report['outdated_sections'])} files with outdated content"
697
+ )
698
+
699
+ # Check for sync issues
700
+ if report["synchronization_issues"]:
701
+ for issue in report["synchronization_issues"]:
702
+ if issue["type"] == "Version mismatch":
703
+ recommendations.append("🔄 Synchronize version numbers across documentation files")
704
+ elif issue["type"] == "Stale changelog":
705
+ recommendations.append("📅 Update CHANGELOG.md with recent changes")
706
+ elif issue["type"] == "Missing file":
707
+ recommendations.append(f"➕ Create missing {issue['files'][0]}")
708
+
709
+ # Check for TODO items
710
+ total_todos = sum(
711
+ len([i for i in file_report.get("outdated_indicators", [])
712
+ if i["type"] == "Unresolved TODOs"])
713
+ for file_report in report["files_reviewed"].values()
714
+ )
715
+ if total_todos > 0:
716
+ recommendations.append(f"✅ Resolve {total_todos} TODO items in documentation")
717
+
718
+ # Check for deprecated references
719
+ deprecated_count = sum(
720
+ len([i for i in file_report.get("outdated_indicators", [])
721
+ if "deprecated" in i["type"].lower()])
722
+ for file_report in report["files_reviewed"].values()
723
+ )
724
+ if deprecated_count > 0:
725
+ recommendations.append(f"⚠️ Update {deprecated_count} deprecated references")
726
+
727
+ return recommendations
728
+
729
+ def auto_detect_and_archive_outdated(self, dry_run: bool = False) -> Dict:
730
+ """Automatically detect and archive outdated documentation."""
731
+ result = {
732
+ "reviewed_files": [],
733
+ "archived_files": [],
734
+ "skipped_files": [],
735
+ }
736
+
737
+ # Review documentation
738
+ review = self.review_documentation()
739
+
740
+ for filename, file_report in review["files_reviewed"].items():
741
+ file_path = self.project_path / filename
742
+ result["reviewed_files"].append(filename)
743
+
744
+ # Determine if file should be archived
745
+ should_archive = False
746
+ archive_reason = []
747
+
748
+ # Check for significant outdated content
749
+ outdated_count = len(file_report.get("outdated_indicators", []))
750
+ if outdated_count > 10:
751
+ should_archive = True
752
+ archive_reason.append(f"{outdated_count} outdated indicators")
753
+
754
+ # Check if file hasn't been updated in git for long time
755
+ if file_report.get("last_git_update"):
756
+ last_update = datetime.fromisoformat(file_report["last_git_update"])
757
+ days_old = (datetime.now() - last_update).days
758
+ if days_old > 90:
759
+ should_archive = True
760
+ archive_reason.append(f"No updates for {days_old} days")
761
+
762
+ # Archive if needed
763
+ if should_archive and not dry_run:
764
+ archive_result = self.archive_file(
765
+ file_path,
766
+ reason=f"Auto-archived: {', '.join(archive_reason)}",
767
+ metadata={
768
+ "auto_detection": True,
769
+ "indicators": file_report.get("outdated_indicators", [])[:5],
770
+ "review_timestamp": review["timestamp"],
771
+ }
772
+ )
773
+ if archive_result:
774
+ result["archived_files"].append({
775
+ "file": filename,
776
+ "reason": archive_reason,
777
+ "archive_path": str(archive_result),
778
+ })
779
+ elif should_archive:
780
+ result["skipped_files"].append({
781
+ "file": filename,
782
+ "reason": archive_reason,
783
+ "action": "Would archive (dry run)",
784
+ })
785
+
786
+ return result
787
+
788
+ def generate_documentation_diff_report(
789
+ self, file1: Path, file2: Optional[Path] = None
790
+ ) -> str:
791
+ """Generate a diff report between two documentation files or versions."""
792
+ if not file1.exists():
793
+ return f"Error: {file1} does not exist"
794
+
795
+ # If file2 not specified, compare with latest archive
796
+ if file2 is None:
797
+ latest = self.get_latest_archive(file1.name)
798
+ if not latest:
799
+ return f"No archive found for {file1.name}"
800
+ file2 = latest
801
+
802
+ # Read contents
803
+ content1 = file1.read_text()
804
+
805
+ if file2.suffix == ".gz":
806
+ with gzip.open(file2, "rt", encoding="utf-8") as f:
807
+ content2 = f.read()
808
+ else:
809
+ content2 = file2.read_text() if isinstance(file2, Path) else file2
810
+
811
+ # Generate diff
812
+ diff = difflib.unified_diff(
813
+ content2.splitlines(keepends=True),
814
+ content1.splitlines(keepends=True),
815
+ fromfile=str(file2) if isinstance(file2, Path) else "Archive",
816
+ tofile=str(file1),
817
+ n=3,
818
+ )
819
+
820
+ return "".join(diff)
821
+
822
+ def sync_with_readme_and_changelog(self) -> Dict:
823
+ """Sync key information between CLAUDE.md, README.md, and CHANGELOG.md."""
824
+ result = {
825
+ "synced": False,
826
+ "changes": [],
827
+ "errors": [],
828
+ }
829
+
830
+ claude_path = self.project_path / "CLAUDE.md"
831
+ readme_path = self.project_path / "README.md"
832
+ changelog_path = self.project_path / "CHANGELOG.md"
833
+
834
+ if not claude_path.exists():
835
+ result["errors"].append("CLAUDE.md not found")
836
+ return result
837
+
838
+ try:
839
+ # Extract current version
840
+ version_file = self.project_path / "VERSION"
841
+ if version_file.exists():
842
+ current_version = version_file.read_text().strip()
843
+ else:
844
+ current_version = None
845
+
846
+ # Update README.md if exists
847
+ if readme_path.exists() and current_version:
848
+ readme_content = readme_path.read_text()
849
+ # Update version badges or references
850
+ updated_readme = self._update_version_references(
851
+ readme_content, current_version
852
+ )
853
+ if updated_readme != readme_content:
854
+ # Archive before update
855
+ self.archive_file(readme_path, reason="Before version sync")
856
+ readme_path.write_text(updated_readme)
857
+ result["changes"].append(f"Updated README.md to version {current_version}")
858
+
859
+ # Update CHANGELOG.md header if exists
860
+ if changelog_path.exists() and current_version:
861
+ changelog_content = changelog_path.read_text()
862
+ if f"## [{current_version}]" not in changelog_content:
863
+ # Add new version section
864
+ today = datetime.now().strftime("%Y-%m-%d")
865
+ new_section = f"\n## [{current_version}] - {today}\n\n### Added\n\n### Changed\n\n### Fixed\n\n"
866
+
867
+ # Insert after header
868
+ lines = changelog_content.splitlines()
869
+ insert_pos = 0
870
+ for i, line in enumerate(lines):
871
+ if line.startswith("## "):
872
+ insert_pos = i
873
+ break
874
+
875
+ if insert_pos > 0:
876
+ lines.insert(insert_pos, new_section)
877
+ updated_changelog = "\n".join(lines)
878
+
879
+ # Archive before update
880
+ self.archive_file(changelog_path, reason="Before adding new version")
881
+ changelog_path.write_text(updated_changelog)
882
+ result["changes"].append(f"Added {current_version} section to CHANGELOG.md")
883
+
884
+ result["synced"] = len(result["changes"]) > 0
885
+
886
+ except Exception as e:
887
+ result["errors"].append(f"Sync failed: {str(e)}")
888
+ logger.error(f"Documentation sync failed: {e}")
889
+
890
+ return result
891
+
892
+ def _update_version_references(self, content: str, new_version: str) -> str:
893
+ """Update version references in documentation content."""
894
+ # Update version badges
895
+ content = re.sub(
896
+ r"badge/version-v?\d+\.\d+\.\d+",
897
+ f"badge/version-v{new_version}",
898
+ content,
899
+ )
900
+
901
+ # Update explicit version mentions
902
+ content = re.sub(
903
+ r"(?:Version|version|v)\s*\d+\.\d+\.\d+",
904
+ f"v{new_version}",
905
+ content,
906
+ )
907
+
908
+ return content
909
+
910
+ def display_review_summary(self, review: Dict) -> None:
911
+ """Display a formatted summary of the documentation review."""
912
+ console.print("\n[bold cyan]📚 Documentation Review Summary[/bold cyan]\n")
913
+
914
+ # Create summary table
915
+ table = Table(title="Documentation Files Status")
916
+ table.add_column("File", style="cyan")
917
+ table.add_column("Status", style="green")
918
+ table.add_column("Issues", style="yellow")
919
+ table.add_column("Last Updated", style="magenta")
920
+
921
+ for filename, report in review["files_reviewed"].items():
922
+ status = "✅ OK" if not report.get("outdated_indicators") else "⚠️ Needs Review"
923
+ issues = len(report.get("outdated_indicators", []))
924
+
925
+ last_updated = "Unknown"
926
+ if report.get("last_git_update"):
927
+ last_date = datetime.fromisoformat(report["last_git_update"])
928
+ days_ago = (datetime.now() - last_date).days
929
+ last_updated = f"{days_ago} days ago"
930
+
931
+ table.add_row(filename, status, str(issues), last_updated)
932
+
933
+ console.print(table)
934
+
935
+ # Print synchronization issues
936
+ if review["synchronization_issues"]:
937
+ console.print("\n[bold yellow]⚠️ Synchronization Issues:[/bold yellow]")
938
+ for issue in review["synchronization_issues"]:
939
+ console.print(f" • {issue['type']}: {issue['details']}")
940
+
941
+ # Print recommendations
942
+ if review["recommendations"]:
943
+ console.print("\n[bold green]💡 Recommendations:[/bold green]")
944
+ for rec in review["recommendations"]:
945
+ console.print(f" {rec}")
946
+
947
+ def restore_from_archive_with_review(
948
+ self, archive_name: str, review_changes: bool = True
949
+ ) -> Tuple[bool, str]:
950
+ """Restore an archive with optional change review."""
951
+ archive_file = self.archive_path / archive_name
952
+
953
+ if not archive_file.exists():
954
+ return False, f"Archive {archive_name} not found"
955
+
956
+ # Extract target file name
957
+ parts = archive_name.split(".")
958
+ if archive_name.endswith(".gz"):
959
+ original_name = ".".join(parts[:-3])
960
+ else:
961
+ original_name = ".".join(parts[:-2])
962
+
963
+ target_path = self.project_path / original_name
964
+
965
+ # Generate diff if current file exists and review requested
966
+ if target_path.exists() and review_changes:
967
+ diff_report = self.generate_documentation_diff_report(target_path, archive_file)
968
+
969
+ console.print("\n[bold cyan]📝 Changes to be applied:[/bold cyan]")
970
+ console.print(diff_report)
971
+
972
+ # Ask for confirmation
973
+ console.print("\n[bold yellow]Proceed with restoration? (y/n): [/bold yellow]", end="")
974
+ # In automated context, assume yes
975
+ confirm = True
976
+
977
+ if not confirm:
978
+ return False, "Restoration cancelled by user"
979
+
980
+ # Proceed with restoration
981
+ return self.restore_archive(archive_name, target_path)