claude-mpm 3.3.0__py3-none-any.whl → 3.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. claude_mpm/agents/templates/data_engineer.json +1 -1
  2. claude_mpm/agents/templates/documentation.json +1 -1
  3. claude_mpm/agents/templates/engineer.json +1 -1
  4. claude_mpm/agents/templates/ops.json +1 -1
  5. claude_mpm/agents/templates/pm.json +1 -1
  6. claude_mpm/agents/templates/qa.json +1 -1
  7. claude_mpm/agents/templates/research.json +1 -1
  8. claude_mpm/agents/templates/security.json +1 -1
  9. claude_mpm/agents/templates/test_integration.json +112 -0
  10. claude_mpm/agents/templates/version_control.json +1 -1
  11. claude_mpm/cli/commands/memory.py +749 -26
  12. claude_mpm/cli/commands/run.py +115 -14
  13. claude_mpm/cli/parser.py +89 -1
  14. claude_mpm/constants.py +6 -0
  15. claude_mpm/core/claude_runner.py +74 -11
  16. claude_mpm/core/config.py +1 -1
  17. claude_mpm/core/session_manager.py +46 -0
  18. claude_mpm/core/simple_runner.py +74 -11
  19. claude_mpm/hooks/builtin/mpm_command_hook.py +5 -5
  20. claude_mpm/hooks/claude_hooks/hook_handler.py +213 -30
  21. claude_mpm/hooks/claude_hooks/hook_wrapper.sh +9 -2
  22. claude_mpm/hooks/memory_integration_hook.py +51 -5
  23. claude_mpm/services/__init__.py +23 -5
  24. claude_mpm/services/agent_memory_manager.py +800 -71
  25. claude_mpm/services/memory_builder.py +823 -0
  26. claude_mpm/services/memory_optimizer.py +619 -0
  27. claude_mpm/services/memory_router.py +445 -0
  28. claude_mpm/services/project_analyzer.py +771 -0
  29. claude_mpm/services/socketio_server.py +649 -45
  30. claude_mpm/services/version_control/git_operations.py +26 -0
  31. claude_mpm-3.4.0.dist-info/METADATA +183 -0
  32. {claude_mpm-3.3.0.dist-info → claude_mpm-3.4.0.dist-info}/RECORD +36 -52
  33. claude_mpm/agents/agent-template.yaml +0 -83
  34. claude_mpm/agents/templates/test-integration-agent.md +0 -34
  35. claude_mpm/agents/test_fix_deployment/.claude-pm/config/project.json +0 -6
  36. claude_mpm/cli/README.md +0 -109
  37. claude_mpm/cli_module/refactoring_guide.md +0 -253
  38. claude_mpm/core/agent_registry.py.bak +0 -312
  39. claude_mpm/core/base_service.py.bak +0 -406
  40. claude_mpm/core/websocket_handler.py +0 -233
  41. claude_mpm/hooks/README.md +0 -97
  42. claude_mpm/orchestration/SUBPROCESS_DESIGN.md +0 -66
  43. claude_mpm/schemas/README_SECURITY.md +0 -92
  44. claude_mpm/schemas/agent_schema.json +0 -395
  45. claude_mpm/schemas/agent_schema_documentation.md +0 -181
  46. claude_mpm/schemas/agent_schema_security_notes.md +0 -165
  47. claude_mpm/schemas/examples/standard_workflow.json +0 -505
  48. claude_mpm/schemas/ticket_workflow_documentation.md +0 -482
  49. claude_mpm/schemas/ticket_workflow_schema.json +0 -590
  50. claude_mpm/services/framework_claude_md_generator/README.md +0 -92
  51. claude_mpm/services/parent_directory_manager/README.md +0 -83
  52. claude_mpm/services/version_control/VERSION +0 -1
  53. claude_mpm/services/websocket_server.py +0 -376
  54. claude_mpm-3.3.0.dist-info/METADATA +0 -432
  55. {claude_mpm-3.3.0.dist-info → claude_mpm-3.4.0.dist-info}/WHEEL +0 -0
  56. {claude_mpm-3.3.0.dist-info → claude_mpm-3.4.0.dist-info}/entry_points.txt +0 -0
  57. {claude_mpm-3.3.0.dist-info → claude_mpm-3.4.0.dist-info}/licenses/LICENSE +0 -0
  58. {claude_mpm-3.3.0.dist-info → claude_mpm-3.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,823 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Memory Builder Service
4
+ =====================
5
+
6
+ Builds agent memories from project documentation by parsing and extracting
7
+ memory-worthy content for appropriate agents.
8
+
9
+ This service provides:
10
+ - Documentation parsing (CLAUDE.md, QA.md, STRUCTURE.md, etc.)
11
+ - Content extraction and categorization
12
+ - Agent assignment based on content type
13
+ - Concise memory entry creation (< 100 chars)
14
+ - Batch building from multiple docs
15
+
16
+ WHY: Project documentation contains valuable patterns, guidelines, and knowledge
17
+ that agents should be aware of. This service automatically extracts and assigns
18
+ relevant information to appropriate agents.
19
+
20
+ DESIGN DECISION: Focuses on extracting actionable insights rather than copying
21
+ documentation verbatim. Creates concise learnings that fit memory constraints
22
+ while preserving essential information.
23
+ """
24
+
25
+ import re
26
+ from pathlib import Path
27
+ from typing import Dict, List, Optional, Any, Tuple
28
+ from datetime import datetime
29
+
30
+ from claude_mpm.core import LoggerMixin
31
+ from claude_mpm.core.config import Config
32
+ from claude_mpm.utils.paths import PathResolver
33
+ from claude_mpm.services.memory_router import MemoryRouter
34
+ from claude_mpm.services.project_analyzer import ProjectAnalyzer
35
+
36
+
37
+ class MemoryBuilder(LoggerMixin):
38
+ """Builds agent memories from project documentation.
39
+
40
+ WHY: Documentation contains patterns and guidelines that agents should know
41
+ about. Manual memory creation is time-consuming and prone to inconsistency.
42
+ This service automates the extraction and assignment process.
43
+
44
+ DESIGN DECISION: Uses pattern matching and content analysis to extract
45
+ actionable insights rather than copying raw documentation. Focuses on
46
+ creating learnings that will actually be useful to agents.
47
+ """
48
+
49
+ # Documentation files to process
50
+ DOC_FILES = {
51
+ 'CLAUDE.md': {
52
+ 'priority': 'high',
53
+ 'sections': ['development guidelines', 'key components', 'common issues'],
54
+ 'agents': ['pm', 'engineer']
55
+ },
56
+ 'docs/STRUCTURE.md': {
57
+ 'priority': 'high',
58
+ 'sections': ['file placement', 'design patterns', 'directory structure'],
59
+ 'agents': ['engineer', 'documentation']
60
+ },
61
+ 'docs/QA.md': {
62
+ 'priority': 'high',
63
+ 'sections': ['testing', 'quality assurance', 'validation'],
64
+ 'agents': ['qa', 'engineer']
65
+ },
66
+ 'docs/DEPLOY.md': {
67
+ 'priority': 'medium',
68
+ 'sections': ['deployment', 'versioning', 'release'],
69
+ 'agents': ['engineer', 'pm']
70
+ },
71
+ 'docs/VERSIONING.md': {
72
+ 'priority': 'medium',
73
+ 'sections': ['version management', 'semantic versioning'],
74
+ 'agents': ['engineer', 'pm']
75
+ }
76
+ }
77
+
78
+ # Patterns for extracting actionable content
79
+ EXTRACTION_PATTERNS = {
80
+ 'guidelines': [
81
+ r'(?:must|should|always|never|avoid|ensure|remember to)\s+(.+?)(?:\.|$)',
82
+ r'(?:important|note|warning|tip):\s*(.+?)(?:\.|$)',
83
+ r'(?:do not|don\'t)\s+(.+?)(?:\.|$)'
84
+ ],
85
+ 'patterns': [
86
+ r'(?:pattern|approach|strategy|method):\s*(.+?)(?:\.|$)',
87
+ r'(?:use|implement|follow)\s+(.+?)\s+(?:pattern|approach|for)',
88
+ r'(?:follows|uses|implements)\s+(.+?)\s+(?:pattern|architecture)'
89
+ ],
90
+ 'mistakes': [
91
+ r'(?:common\s+)?(?:mistake|error|issue|problem):\s*(.+?)(?:\.|$)',
92
+ r'(?:avoid|never|don\'t)\s+(.+?)(?:\.|$)',
93
+ r'(?:pitfall|gotcha|warning):\s*(.+?)(?:\.|$)'
94
+ ],
95
+ 'architecture': [
96
+ r'(?:architecture|structure|design):\s*(.+?)(?:\.|$)',
97
+ r'(?:component|service|module)\s+(.+?)\s+(?:provides|handles|manages)',
98
+ r'(?:uses|implements|follows)\s+(.+?)\s+(?:architecture|pattern)'
99
+ ]
100
+ }
101
+
102
+ def __init__(self, config: Optional[Config] = None, working_directory: Optional[Path] = None):
103
+ """Initialize the memory builder.
104
+
105
+ Args:
106
+ config: Optional Config object
107
+ working_directory: Optional working directory for project-specific analysis
108
+ """
109
+ super().__init__()
110
+ self.config = config or Config()
111
+ self.project_root = PathResolver.get_project_root()
112
+ self.working_directory = working_directory or self.project_root
113
+ self.memories_dir = self.project_root / ".claude-mpm" / "memories"
114
+ self.router = MemoryRouter(config)
115
+ self.project_analyzer = ProjectAnalyzer(config, self.working_directory)
116
+
117
+ def _get_dynamic_doc_files(self) -> Dict[str, Dict[str, Any]]:
118
+ """Get documentation files to process based on project analysis.
119
+
120
+ WHY: Instead of hardcoded file list, dynamically discover important files
121
+ based on actual project structure and characteristics.
122
+
123
+ Returns:
124
+ Dict mapping file paths to processing configuration
125
+ """
126
+ dynamic_files = {}
127
+
128
+ # Start with static important files
129
+ static_files = self.DOC_FILES.copy()
130
+
131
+ # Get project-specific important files
132
+ try:
133
+ important_files = self.project_analyzer.get_important_files_for_context()
134
+ project_characteristics = self.project_analyzer.analyze_project()
135
+
136
+ # Add configuration files
137
+ for config_file in project_characteristics.important_configs:
138
+ if config_file not in static_files:
139
+ file_ext = Path(config_file).suffix.lower()
140
+
141
+ if file_ext in ['.json', '.toml', '.yaml', '.yml']:
142
+ dynamic_files[config_file] = {
143
+ 'priority': 'medium',
144
+ 'sections': ['configuration', 'setup', 'dependencies'],
145
+ 'agents': ['engineer', 'pm'],
146
+ 'file_type': 'config'
147
+ }
148
+
149
+ # Add project-specific documentation
150
+ for doc_file in important_files:
151
+ if doc_file not in static_files and doc_file not in dynamic_files:
152
+ file_path = Path(doc_file)
153
+
154
+ # Determine processing config based on file name/path
155
+ if 'api' in doc_file.lower() or 'endpoint' in doc_file.lower():
156
+ dynamic_files[doc_file] = {
157
+ 'priority': 'high',
158
+ 'sections': ['api', 'endpoints', 'integration'],
159
+ 'agents': ['engineer', 'integration'],
160
+ 'file_type': 'api_doc'
161
+ }
162
+ elif 'architecture' in doc_file.lower() or 'design' in doc_file.lower():
163
+ dynamic_files[doc_file] = {
164
+ 'priority': 'high',
165
+ 'sections': ['architecture', 'design', 'patterns'],
166
+ 'agents': ['engineer', 'architect'],
167
+ 'file_type': 'architecture'
168
+ }
169
+ elif 'test' in doc_file.lower():
170
+ dynamic_files[doc_file] = {
171
+ 'priority': 'medium',
172
+ 'sections': ['testing', 'quality'],
173
+ 'agents': ['qa', 'engineer'],
174
+ 'file_type': 'test_doc'
175
+ }
176
+ elif file_path.suffix.lower() == '.md':
177
+ # Generic markdown file
178
+ dynamic_files[doc_file] = {
179
+ 'priority': 'low',
180
+ 'sections': ['documentation', 'guidelines'],
181
+ 'agents': ['pm', 'engineer'],
182
+ 'file_type': 'markdown'
183
+ }
184
+
185
+ # Add key source files for pattern analysis (limited selection)
186
+ if project_characteristics.entry_points:
187
+ for entry_point in project_characteristics.entry_points[:2]: # Only first 2
188
+ if entry_point not in dynamic_files:
189
+ dynamic_files[entry_point] = {
190
+ 'priority': 'low',
191
+ 'sections': ['patterns', 'implementation'],
192
+ 'agents': ['engineer'],
193
+ 'file_type': 'source',
194
+ 'extract_patterns_only': True # Only extract patterns, not full content
195
+ }
196
+
197
+ except Exception as e:
198
+ self.logger.warning(f"Error getting dynamic doc files: {e}")
199
+
200
+ # Merge static and dynamic files
201
+ all_files = {**static_files, **dynamic_files}
202
+
203
+ self.logger.debug(f"Processing {len(all_files)} documentation files ({len(static_files)} static, {len(dynamic_files)} dynamic)")
204
+ return all_files
205
+
206
+ def build_from_documentation(self, force_rebuild: bool = False) -> Dict[str, Any]:
207
+ """Build agent memories from project documentation.
208
+
209
+ WHY: Documentation contains project-specific knowledge that agents need.
210
+ This method extracts and assigns relevant information to appropriate agents.
211
+
212
+ Args:
213
+ force_rebuild: If True, rebuilds even if docs haven't changed
214
+
215
+ Returns:
216
+ Dict containing build results and statistics
217
+ """
218
+ try:
219
+ results = {
220
+ "success": True,
221
+ "timestamp": datetime.now().isoformat(),
222
+ "files_processed": 0,
223
+ "memories_created": 0,
224
+ "memories_updated": 0,
225
+ "agents_affected": set(),
226
+ "files": {},
227
+ "errors": []
228
+ }
229
+
230
+ # Get dynamic list of files to process
231
+ doc_files = self._get_dynamic_doc_files()
232
+
233
+ # Process each documentation file
234
+ for doc_path, doc_config in doc_files.items():
235
+ file_path = self.project_root / doc_path
236
+
237
+ if not file_path.exists():
238
+ self.logger.debug(f"Documentation file not found: {doc_path}")
239
+ continue
240
+
241
+ # Check if rebuild is needed
242
+ if not force_rebuild and not self._needs_rebuild(file_path):
243
+ self.logger.debug(f"Skipping {doc_path} - no changes detected")
244
+ continue
245
+
246
+ file_result = self._process_documentation_file(file_path, doc_config)
247
+ results["files"][doc_path] = file_result
248
+
249
+ # Aggregate results
250
+ if file_result.get("success"):
251
+ results["files_processed"] += 1
252
+ results["memories_created"] += file_result.get("memories_created", 0)
253
+ results["memories_updated"] += file_result.get("memories_updated", 0)
254
+ results["agents_affected"].update(file_result.get("agents_affected", []))
255
+ else:
256
+ results["errors"].append(f"{doc_path}: {file_result.get('error', 'Unknown error')}")
257
+
258
+ # Convert set to list for JSON serialization
259
+ results["agents_affected"] = list(results["agents_affected"])
260
+ results["total_agents_affected"] = len(results["agents_affected"])
261
+
262
+ self.logger.info(f"Built memories from documentation: {results['files_processed']} files, {results['memories_created']} memories created")
263
+ return results
264
+
265
+ except Exception as e:
266
+ self.logger.error(f"Error building memories from documentation: {e}")
267
+ return {
268
+ "success": False,
269
+ "error": str(e),
270
+ "timestamp": datetime.now().isoformat()
271
+ }
272
+
273
+ def extract_from_text(self, text: str, source: str) -> List[Dict[str, Any]]:
274
+ """Extract memory-worthy content from text.
275
+
276
+ WHY: Provides reusable text extraction logic that can be used for
277
+ custom documentation or other text sources beyond standard files.
278
+
279
+ Args:
280
+ text: Text content to analyze
281
+ source: Source identifier for context
282
+
283
+ Returns:
284
+ List of extracted memory items with metadata
285
+ """
286
+ try:
287
+ extracted_items = []
288
+
289
+ # Process each extraction pattern type
290
+ for pattern_type, patterns in self.EXTRACTION_PATTERNS.items():
291
+ for pattern in patterns:
292
+ matches = re.finditer(pattern, text, re.IGNORECASE | re.MULTILINE)
293
+
294
+ for match in matches:
295
+ content = match.group(1).strip()
296
+
297
+ # Clean and validate content
298
+ content = self._clean_extracted_content(content)
299
+ if not self._is_valid_memory_content(content):
300
+ continue
301
+
302
+ # Route to appropriate agent
303
+ routing_result = self.router.analyze_and_route(content)
304
+
305
+ extracted_item = {
306
+ "content": content,
307
+ "type": pattern_type,
308
+ "source": source,
309
+ "target_agent": routing_result.get("target_agent", "pm"),
310
+ "section": routing_result.get("section", "Recent Learnings"),
311
+ "confidence": routing_result.get("confidence", 0.5),
312
+ "pattern_matched": pattern
313
+ }
314
+
315
+ extracted_items.append(extracted_item)
316
+
317
+ # Remove near-duplicates
318
+ unique_items = self._deduplicate_extracted_items(extracted_items)
319
+
320
+ self.logger.debug(f"Extracted {len(unique_items)} unique items from {source}")
321
+ return unique_items
322
+
323
+ except Exception as e:
324
+ self.logger.error(f"Error extracting content from text: {e}")
325
+ return []
326
+
327
+ def build_agent_memory_from_items(self, agent_id: str, items: List[Dict[str, Any]]) -> Dict[str, Any]:
328
+ """Build or update agent memory from extracted items.
329
+
330
+ WHY: Extracted items need to be properly integrated into agent memory
331
+ files while respecting existing content and size limits.
332
+
333
+ Args:
334
+ agent_id: Target agent identifier
335
+ items: List of extracted memory items
336
+
337
+ Returns:
338
+ Dict containing update results
339
+ """
340
+ try:
341
+ from claude_mpm.services.agent_memory_manager import get_memory_manager
342
+ memory_manager = get_memory_manager(self.config)
343
+
344
+ result = {
345
+ "success": True,
346
+ "agent_id": agent_id,
347
+ "items_processed": 0,
348
+ "items_added": 0,
349
+ "items_skipped": 0,
350
+ "sections_updated": set(),
351
+ "errors": []
352
+ }
353
+
354
+ # Filter items for this agent
355
+ agent_items = [item for item in items if item.get("target_agent") == agent_id]
356
+
357
+ for item in agent_items:
358
+ result["items_processed"] += 1
359
+
360
+ try:
361
+ # Add to memory
362
+ section = item.get("section", "Recent Learnings")
363
+ content = item.get("content", "")
364
+
365
+ success = memory_manager.update_agent_memory(agent_id, section, content)
366
+
367
+ if success:
368
+ result["items_added"] += 1
369
+ result["sections_updated"].add(section)
370
+ else:
371
+ result["items_skipped"] += 1
372
+ result["errors"].append(f"Failed to add: {content[:50]}...")
373
+
374
+ except Exception as e:
375
+ result["items_skipped"] += 1
376
+ result["errors"].append(f"Error processing item: {str(e)}")
377
+
378
+ # Convert set to list
379
+ result["sections_updated"] = list(result["sections_updated"])
380
+
381
+ return result
382
+
383
+ except Exception as e:
384
+ self.logger.error(f"Error building memory for {agent_id}: {e}")
385
+ return {
386
+ "success": False,
387
+ "agent_id": agent_id,
388
+ "error": str(e)
389
+ }
390
+
391
+ def _extract_from_config_file(self, content: str, file_path: Path, doc_config: Dict[str, Any]) -> List[Dict[str, Any]]:
392
+ """Extract memory-worthy information from configuration files.
393
+
394
+ WHY: Configuration files contain important setup patterns, dependencies,
395
+ and architectural decisions that agents should understand.
396
+
397
+ Args:
398
+ content: File content
399
+ file_path: Path to the file
400
+ doc_config: Processing configuration
401
+
402
+ Returns:
403
+ List of extracted memory items
404
+ """
405
+ extracted_items = []
406
+ source = str(file_path.relative_to(self.project_root))
407
+
408
+ try:
409
+ file_ext = file_path.suffix.lower()
410
+
411
+ if file_ext == '.json':
412
+ # Parse JSON configuration
413
+ import json
414
+ config_data = json.loads(content)
415
+ items = self._extract_from_json_config(config_data, source)
416
+ extracted_items.extend(items)
417
+
418
+ elif file_ext in ['.toml']:
419
+ # Parse TOML configuration
420
+ try:
421
+ try:
422
+ import tomllib
423
+ except ImportError:
424
+ import tomli as tomllib
425
+ with open(file_path, 'rb') as f:
426
+ config_data = tomllib.load(f)
427
+ items = self._extract_from_toml_config(config_data, source)
428
+ extracted_items.extend(items)
429
+ except ImportError:
430
+ self.logger.warning(f"TOML parsing not available for {source}")
431
+
432
+ elif file_ext in ['.yaml', '.yml']:
433
+ # For YAML, fall back to text-based extraction for now
434
+ items = self.extract_from_text(content, source)
435
+ extracted_items.extend(items)
436
+
437
+ # Also extract text patterns for comments and documentation
438
+ text_items = self.extract_from_text(content, source)
439
+ extracted_items.extend(text_items)
440
+
441
+ except Exception as e:
442
+ self.logger.warning(f"Error parsing config file {source}: {e}")
443
+ # Fall back to text extraction
444
+ extracted_items = self.extract_from_text(content, source)
445
+
446
+ return extracted_items
447
+
448
+ def _extract_from_json_config(self, config_data: dict, source: str) -> List[Dict[str, Any]]:
449
+ """Extract patterns from JSON configuration."""
450
+ items = []
451
+
452
+ # Extract dependencies information
453
+ if 'dependencies' in config_data:
454
+ deps = config_data['dependencies']
455
+ if isinstance(deps, dict) and deps:
456
+ dep_names = list(deps.keys())[:5] # Limit to prevent overwhelming
457
+ deps_str = ", ".join(dep_names)
458
+ items.append({
459
+ "content": f"Key dependencies: {deps_str}",
460
+ "type": "dependency_info",
461
+ "source": source,
462
+ "target_agent": "engineer",
463
+ "section": "Current Technical Context",
464
+ "confidence": 0.8
465
+ })
466
+
467
+ # Extract scripts (for package.json)
468
+ if 'scripts' in config_data:
469
+ scripts = config_data['scripts']
470
+ if isinstance(scripts, dict):
471
+ for script_name, script_cmd in list(scripts.items())[:3]: # Limit to first 3
472
+ items.append({
473
+ "content": f"Build script '{script_name}': {script_cmd[:50]}{'...' if len(script_cmd) > 50 else ''}",
474
+ "type": "build_pattern",
475
+ "source": source,
476
+ "target_agent": "engineer",
477
+ "section": "Implementation Guidelines",
478
+ "confidence": 0.7
479
+ })
480
+
481
+ return items
482
+
483
+ def _extract_from_toml_config(self, config_data: dict, source: str) -> List[Dict[str, Any]]:
484
+ """Extract patterns from TOML configuration."""
485
+ items = []
486
+
487
+ # Extract project metadata (for pyproject.toml)
488
+ if 'project' in config_data:
489
+ project_info = config_data['project']
490
+ if 'dependencies' in project_info:
491
+ deps = project_info['dependencies']
492
+ if deps:
493
+ items.append({
494
+ "content": f"Python dependencies: {', '.join(deps[:5])}",
495
+ "type": "dependency_info",
496
+ "source": source,
497
+ "target_agent": "engineer",
498
+ "section": "Current Technical Context",
499
+ "confidence": 0.8
500
+ })
501
+
502
+ # Extract Rust dependencies (for Cargo.toml)
503
+ if 'dependencies' in config_data:
504
+ deps = config_data['dependencies']
505
+ if isinstance(deps, dict) and deps:
506
+ dep_names = list(deps.keys())[:5]
507
+ items.append({
508
+ "content": f"Rust dependencies: {', '.join(dep_names)}",
509
+ "type": "dependency_info",
510
+ "source": source,
511
+ "target_agent": "engineer",
512
+ "section": "Current Technical Context",
513
+ "confidence": 0.8
514
+ })
515
+
516
+ return items
517
+
518
+ def _extract_from_source_file(self, content: str, file_path: Path, doc_config: Dict[str, Any]) -> List[Dict[str, Any]]:
519
+ """Extract patterns from source code files.
520
+
521
+ WHY: Source files contain implementation patterns and architectural
522
+ decisions that agents should be aware of, but we only extract high-level
523
+ patterns rather than detailed code analysis.
524
+
525
+ Args:
526
+ content: File content
527
+ file_path: Path to the file
528
+ doc_config: Processing configuration
529
+
530
+ Returns:
531
+ List of extracted memory items
532
+ """
533
+ extracted_items = []
534
+ source = str(file_path.relative_to(self.project_root))
535
+
536
+ # Only extract patterns if specified
537
+ if not doc_config.get('extract_patterns_only', False):
538
+ return []
539
+
540
+ file_ext = file_path.suffix.lower()
541
+
542
+ # Language-specific pattern extraction
543
+ if file_ext == '.py':
544
+ items = self._extract_python_patterns(content, source)
545
+ extracted_items.extend(items)
546
+ elif file_ext in ['.js', '.ts']:
547
+ items = self._extract_javascript_patterns(content, source)
548
+ extracted_items.extend(items)
549
+
550
+ return extracted_items[:3] # Limit to prevent overwhelming
551
+
552
+ def _extract_python_patterns(self, content: str, source: str) -> List[Dict[str, Any]]:
553
+ """Extract high-level patterns from Python source."""
554
+ items = []
555
+
556
+ # Check for common patterns
557
+ if 'if __name__ == "__main__"' in content:
558
+ items.append({
559
+ "content": "Uses if __name__ == '__main__' pattern for script execution",
560
+ "type": "pattern",
561
+ "source": source,
562
+ "target_agent": "engineer",
563
+ "section": "Coding Patterns Learned",
564
+ "confidence": 0.8
565
+ })
566
+
567
+ if 'from pathlib import Path' in content:
568
+ items.append({
569
+ "content": "Uses pathlib.Path for file operations (recommended pattern)",
570
+ "type": "pattern",
571
+ "source": source,
572
+ "target_agent": "engineer",
573
+ "section": "Coding Patterns Learned",
574
+ "confidence": 0.7
575
+ })
576
+
577
+ # Check for class definitions
578
+ class_matches = re.findall(r'class\s+(\w+)', content)
579
+ if class_matches:
580
+ items.append({
581
+ "content": f"Defines classes: {', '.join(class_matches[:3])}",
582
+ "type": "architecture",
583
+ "source": source,
584
+ "target_agent": "engineer",
585
+ "section": "Project Architecture",
586
+ "confidence": 0.6
587
+ })
588
+
589
+ return items
590
+
591
+ def _extract_javascript_patterns(self, content: str, source: str) -> List[Dict[str, Any]]:
592
+ """Extract high-level patterns from JavaScript/TypeScript source."""
593
+ items = []
594
+
595
+ # Check for async patterns
596
+ if 'async function' in content or 'async ' in content:
597
+ items.append({
598
+ "content": "Uses async/await patterns for asynchronous operations",
599
+ "type": "pattern",
600
+ "source": source,
601
+ "target_agent": "engineer",
602
+ "section": "Coding Patterns Learned",
603
+ "confidence": 0.8
604
+ })
605
+
606
+ # Check for module patterns
607
+ if 'export ' in content:
608
+ items.append({
609
+ "content": "Uses ES6 module export patterns",
610
+ "type": "pattern",
611
+ "source": source,
612
+ "target_agent": "engineer",
613
+ "section": "Coding Patterns Learned",
614
+ "confidence": 0.7
615
+ })
616
+
617
+ return items
618
+
619
+ def _process_documentation_file(self, file_path: Path, doc_config: Dict[str, Any]) -> Dict[str, Any]:
620
+ """Process a single documentation file with enhanced file type support.
621
+
622
+ Args:
623
+ file_path: Path to documentation file
624
+ doc_config: Configuration for this file type
625
+
626
+ Returns:
627
+ Processing results
628
+ """
629
+ try:
630
+ # Read file content
631
+ content = file_path.read_text(encoding='utf-8', errors='ignore')
632
+
633
+ # Handle different file types
634
+ file_type = doc_config.get('file_type', 'markdown')
635
+
636
+ if file_type == 'config':
637
+ extracted_items = self._extract_from_config_file(content, file_path, doc_config)
638
+ elif file_type == 'source':
639
+ extracted_items = self._extract_from_source_file(content, file_path, doc_config)
640
+ else:
641
+ # Default markdown/text processing
642
+ extracted_items = self.extract_from_text(content, str(file_path.relative_to(self.project_root)))
643
+
644
+ result = {
645
+ "success": True,
646
+ "file_path": str(file_path),
647
+ "content_length": len(content),
648
+ "items_extracted": len(extracted_items),
649
+ "memories_created": 0,
650
+ "memories_updated": 0,
651
+ "agents_affected": [],
652
+ "agent_results": {}
653
+ }
654
+
655
+ # Group items by target agent
656
+ agent_items = {}
657
+ for item in extracted_items:
658
+ agent = item.get("target_agent", "pm")
659
+ if agent not in agent_items:
660
+ agent_items[agent] = []
661
+ agent_items[agent].append(item)
662
+
663
+ # Update each agent's memory
664
+ for agent_id, items in agent_items.items():
665
+ agent_result = self.build_agent_memory_from_items(agent_id, items)
666
+ result["agent_results"][agent_id] = agent_result
667
+
668
+ if agent_result.get("success"):
669
+ result["agents_affected"].append(agent_id)
670
+ result["memories_created"] += agent_result.get("items_added", 0)
671
+
672
+ # Update last processed timestamp
673
+ self._update_last_processed(file_path)
674
+
675
+ return result
676
+
677
+ except Exception as e:
678
+ self.logger.error(f"Error processing documentation file {file_path}: {e}")
679
+ return {
680
+ "success": False,
681
+ "file_path": str(file_path),
682
+ "error": str(e)
683
+ }
684
+
685
+ def _needs_rebuild(self, file_path: Path) -> bool:
686
+ """Check if documentation file needs to be processed.
687
+
688
+ Args:
689
+ file_path: Path to documentation file
690
+
691
+ Returns:
692
+ True if file needs processing
693
+ """
694
+ # Check if file was modified since last processing
695
+ try:
696
+ last_processed_file = self.memories_dir / ".last_processed.json"
697
+
698
+ if not last_processed_file.exists():
699
+ return True
700
+
701
+ import json
702
+ last_processed = json.loads(last_processed_file.read_text())
703
+
704
+ file_key = str(file_path.relative_to(self.project_root))
705
+ if file_key not in last_processed:
706
+ return True
707
+
708
+ last_processed_time = datetime.fromisoformat(last_processed[file_key])
709
+ file_modified_time = datetime.fromtimestamp(file_path.stat().st_mtime)
710
+
711
+ return file_modified_time > last_processed_time
712
+
713
+ except Exception as e:
714
+ self.logger.debug(f"Error checking rebuild status for {file_path}: {e}")
715
+ return True # Default to rebuilding if we can't determine
716
+
717
+ def _update_last_processed(self, file_path: Path):
718
+ """Update last processed timestamp for file.
719
+
720
+ Args:
721
+ file_path: Path to documentation file
722
+ """
723
+ try:
724
+ self.memories_dir.mkdir(parents=True, exist_ok=True)
725
+ last_processed_file = self.memories_dir / ".last_processed.json"
726
+
727
+ # Load existing data
728
+ if last_processed_file.exists():
729
+ import json
730
+ last_processed = json.loads(last_processed_file.read_text())
731
+ else:
732
+ last_processed = {}
733
+
734
+ # Update timestamp
735
+ file_key = str(file_path.relative_to(self.project_root))
736
+ last_processed[file_key] = datetime.now().isoformat()
737
+
738
+ # Save back
739
+ import json
740
+ last_processed_file.write_text(json.dumps(last_processed, indent=2))
741
+
742
+ except Exception as e:
743
+ self.logger.warning(f"Error updating last processed timestamp: {e}")
744
+
745
+ def _clean_extracted_content(self, content: str) -> str:
746
+ """Clean and normalize extracted content.
747
+
748
+ Args:
749
+ content: Raw extracted content
750
+
751
+ Returns:
752
+ Cleaned content string
753
+ """
754
+ # Remove markdown formatting
755
+ content = re.sub(r'[*_`#]+', '', content)
756
+
757
+ # Remove extra whitespace
758
+ content = re.sub(r'\s+', ' ', content).strip()
759
+
760
+ # Remove common prefixes that don't add value
761
+ content = re.sub(r'^(?:note:|tip:|important:|warning:)\s*', '', content, flags=re.IGNORECASE)
762
+
763
+ # Truncate to memory limit (with ellipsis if needed)
764
+ if len(content) > 95: # Leave room for ellipsis
765
+ content = content[:95] + "..."
766
+
767
+ return content
768
+
769
+ def _is_valid_memory_content(self, content: str) -> bool:
770
+ """Validate if content is suitable for memory storage.
771
+
772
+ Args:
773
+ content: Content to validate
774
+
775
+ Returns:
776
+ True if content is valid for memory
777
+ """
778
+ # Must have minimum length
779
+ if len(content) < 10:
780
+ return False
781
+
782
+ # Must contain actionable information
783
+ actionable_words = ['use', 'avoid', 'ensure', 'follow', 'implement', 'check', 'must', 'should', 'never', 'always']
784
+ if not any(word in content.lower() for word in actionable_words):
785
+ return False
786
+
787
+ # Avoid overly generic content
788
+ generic_phrases = ['this is', 'this document', 'see above', 'as mentioned', 'for more info']
789
+ if any(phrase in content.lower() for phrase in generic_phrases):
790
+ return False
791
+
792
+ return True
793
+
794
+ def _deduplicate_extracted_items(self, items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
795
+ """Remove near-duplicate extracted items.
796
+
797
+ Args:
798
+ items: List of extracted items
799
+
800
+ Returns:
801
+ Deduplicated list
802
+ """
803
+ from difflib import SequenceMatcher
804
+
805
+ unique_items = []
806
+
807
+ for item in items:
808
+ content = item.get("content", "")
809
+ is_duplicate = False
810
+
811
+ # Check against existing unique items
812
+ for unique_item in unique_items:
813
+ unique_content = unique_item.get("content", "")
814
+ similarity = SequenceMatcher(None, content.lower(), unique_content.lower()).ratio()
815
+
816
+ if similarity > 0.8: # 80% similarity threshold
817
+ is_duplicate = True
818
+ break
819
+
820
+ if not is_duplicate:
821
+ unique_items.append(item)
822
+
823
+ return unique_items