claude-mpm 2.1.1__py3-none-any.whl → 3.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. claude_mpm/_version.py +2 -2
  2. claude_mpm/agents/agent_loader.py +682 -102
  3. claude_mpm/agents/base_agent_loader.py +23 -8
  4. claude_mpm/agents/schema/agent_schema.json +237 -83
  5. claude_mpm/agents/templates/data_engineer.json +6 -3
  6. claude_mpm/agents/templates/documentation.json +6 -3
  7. claude_mpm/agents/templates/engineer.json +7 -4
  8. claude_mpm/agents/templates/ops.json +6 -3
  9. claude_mpm/agents/templates/qa.json +10 -5
  10. claude_mpm/agents/templates/research.json +31 -42
  11. claude_mpm/agents/templates/security.json +14 -6
  12. claude_mpm/agents/templates/version_control.json +9 -5
  13. claude_mpm/core/base_service.py +61 -1
  14. claude_mpm/hooks/claude_hooks/hook_handler.py +224 -20
  15. claude_mpm/schemas/README_SECURITY.md +92 -0
  16. claude_mpm/schemas/agent_schema.json +130 -51
  17. claude_mpm/schemas/agent_schema_security_notes.md +165 -0
  18. claude_mpm/services/agent_capabilities_generator.py +0 -1
  19. claude_mpm/services/agent_deployment.py +479 -91
  20. claude_mpm/services/agent_lifecycle_manager.py +62 -4
  21. claude_mpm/services/deployed_agent_discovery.py +0 -1
  22. claude_mpm/services/version_control/semantic_versioning.py +165 -16
  23. claude_mpm/validation/agent_validator.py +147 -13
  24. {claude_mpm-2.1.1.dist-info → claude_mpm-3.0.1.dist-info}/METADATA +2 -2
  25. {claude_mpm-2.1.1.dist-info → claude_mpm-3.0.1.dist-info}/RECORD +29 -29
  26. claude_mpm/cli_old/__init__.py +0 -1
  27. claude_mpm/cli_old/ticket_cli.py +0 -102
  28. {claude_mpm-2.1.1.dist-info → claude_mpm-3.0.1.dist-info}/WHEEL +0 -0
  29. {claude_mpm-2.1.1.dist-info → claude_mpm-3.0.1.dist-info}/entry_points.txt +0 -0
  30. {claude_mpm-2.1.1.dist-info → claude_mpm-3.0.1.dist-info}/licenses/LICENSE +0 -0
  31. {claude_mpm-2.1.1.dist-info → claude_mpm-3.0.1.dist-info}/top_level.txt +0 -0
@@ -3,26 +3,62 @@
3
3
  Unified Agent Loader System
4
4
  ==========================
5
5
 
6
- Provides unified loading of agent prompts from JSON template files using
7
- the new standardized schema format.
6
+ This module provides a unified system for loading and managing AI agent configurations
7
+ from JSON template files. It serves as the central registry for all agent types in the
8
+ Claude MPM system, handling discovery, validation, caching, and dynamic model selection.
9
+
10
+ Architecture Overview:
11
+ ----------------------
12
+ The agent loader follows a plugin-like architecture where agents are discovered from
13
+ JSON template files in a designated directory. Each agent is validated against a
14
+ standardized schema before being registered for use.
8
15
 
9
16
  Key Features:
10
- - Loads agent prompts from src/claude_mpm/agents/templates/*.json files
11
- - Handles base_agent.md prepending
12
- - Provides backward-compatible get_*_agent_prompt() functions
13
- - Uses SharedPromptCache for performance
14
- - Validates agents against schema before loading
17
+ -------------
18
+ - Automatic agent discovery from src/claude_mpm/agents/templates/*.json files
19
+ - Schema validation ensures all agents conform to the expected structure
20
+ - Intelligent caching using SharedPromptCache for performance optimization
21
+ - Dynamic model selection based on task complexity analysis
22
+ - Backward compatibility with legacy get_*_agent_prompt() functions
23
+ - Prepends base instructions to maintain consistency across all agents
24
+
25
+ Design Decisions:
26
+ -----------------
27
+ 1. JSON-based Configuration: We chose JSON over YAML or Python files for:
28
+ - Schema validation support
29
+ - Language-agnostic configuration
30
+ - Easy parsing and generation by tools
31
+
32
+ 2. Lazy Loading with Caching: Agents are loaded on-demand and cached to:
33
+ - Reduce startup time
34
+ - Minimize memory usage for unused agents
35
+ - Allow hot-reloading during development
15
36
 
16
- Usage:
17
- from claude_pm.agents.agent_loader import get_documentation_agent_prompt
37
+ 3. Dynamic Model Selection: The system can analyze task complexity to:
38
+ - Optimize cost by using appropriate model tiers
39
+ - Improve performance for simple tasks
40
+ - Ensure complex tasks get sufficient model capabilities
41
+
42
+ Usage Examples:
43
+ --------------
44
+ from claude_mpm.agents.agent_loader import get_documentation_agent_prompt
18
45
 
19
- # Get agent prompt from JSON template
46
+ # Get agent prompt using backward-compatible function
20
47
  prompt = get_documentation_agent_prompt()
48
+
49
+ # Get agent with model selection info
50
+ prompt, model, config = get_agent_prompt("research_agent",
51
+ return_model_info=True,
52
+ task_description="Analyze codebase")
53
+
54
+ # List all available agents
55
+ agents = list_available_agents()
21
56
  """
22
57
 
23
58
  import json
24
59
  import logging
25
60
  import os
61
+ import yaml
26
62
  from pathlib import Path
27
63
  from typing import Optional, Dict, Any, Tuple, Union, List
28
64
 
@@ -32,61 +68,169 @@ from ..validation.agent_validator import AgentValidator, ValidationResult
32
68
  from ..utils.paths import PathResolver
33
69
 
34
70
  # Temporary placeholders for missing module
71
+ # WHY: These classes would normally come from a task_complexity module, but
72
+ # we've included them here temporarily to avoid breaking dependencies.
73
+ # This allows the agent loader to function independently while the full
74
+ # complexity analysis system is being developed.
35
75
  class ComplexityLevel:
36
- LOW = "LOW"
37
- MEDIUM = "MEDIUM"
38
- HIGH = "HIGH"
76
+ """Represents the complexity level of a task for model selection."""
77
+ LOW = "LOW" # Simple tasks suitable for fast, economical models
78
+ MEDIUM = "MEDIUM" # Standard tasks requiring balanced capabilities
79
+ HIGH = "HIGH" # Complex tasks needing advanced reasoning
39
80
 
40
81
  class ModelType:
41
- HAIKU = "haiku"
42
- SONNET = "sonnet"
43
- OPUS = "opus"
82
+ """Claude model tiers used for dynamic selection based on task complexity."""
83
+ HAIKU = "haiku" # Fast, economical model for simple tasks
84
+ SONNET = "sonnet" # Balanced model for general-purpose tasks
85
+ OPUS = "opus" # Most capable model for complex reasoning
44
86
 
45
87
  # Module-level logger
46
88
  logger = logging.getLogger(__name__)
47
89
 
48
90
 
49
91
  def _get_agent_templates_dir() -> Path:
50
- """Get the agent templates directory."""
92
+ """
93
+ Get the directory containing agent template JSON files.
94
+
95
+ WHY: We use a function instead of a direct constant to ensure the path
96
+ is always resolved relative to this module's location, making the code
97
+ portable across different installation methods (pip install, development mode, etc.).
98
+
99
+ Returns:
100
+ Path: Absolute path to the templates directory
101
+ """
51
102
  return Path(__file__).parent / "templates"
52
103
 
53
104
 
54
- # Agent templates directory
105
+ # Agent templates directory - where all agent JSON files are stored
55
106
  AGENT_TEMPLATES_DIR = _get_agent_templates_dir()
56
107
 
57
- # Cache prefix for agent prompts
108
+ # Cache prefix for agent prompts - versioned to allow cache invalidation on schema changes
109
+ # WHY: The "v2:" suffix allows us to invalidate all cached prompts when we make
110
+ # breaking changes to the agent schema format
58
111
  AGENT_CACHE_PREFIX = "agent_prompt:v2:"
59
112
 
60
- # Model configuration thresholds
113
+ # Model configuration thresholds for dynamic selection
114
+ # WHY: These thresholds define complexity score ranges (0-100) that map to
115
+ # appropriate Claude models. The ranges are based on empirical testing of
116
+ # task performance across different model tiers.
61
117
  MODEL_THRESHOLDS = {
62
118
  ModelType.HAIKU: {"min_complexity": 0, "max_complexity": 30},
63
119
  ModelType.SONNET: {"min_complexity": 31, "max_complexity": 70},
64
120
  ModelType.OPUS: {"min_complexity": 71, "max_complexity": 100}
65
121
  }
66
122
 
67
- # Model name mappings for Claude API (updated for new schema)
123
+ # Model name mappings for Claude API
124
+ # WHY: These map our internal model types to the actual API model identifiers.
125
+ # The specific versions are chosen for their stability and feature completeness.
68
126
  MODEL_NAME_MAPPINGS = {
69
- ModelType.HAIKU: "claude-3-haiku-20240307",
70
- ModelType.SONNET: "claude-sonnet-4-20250514",
71
- ModelType.OPUS: "claude-opus-4-20250514"
127
+ ModelType.HAIKU: "claude-3-haiku-20240307", # Fast, cost-effective
128
+ ModelType.SONNET: "claude-sonnet-4-20250514", # Balanced performance
129
+ ModelType.OPUS: "claude-opus-4-20250514" # Maximum capability
72
130
  }
73
131
 
74
132
 
75
133
  class AgentLoader:
76
- """Loads and manages agent templates with schema validation."""
134
+ """
135
+ Central registry for loading and managing agent configurations.
136
+
137
+ This class implements the core agent discovery and management system. It:
138
+ 1. Discovers agent JSON files from the templates directory
139
+ 2. Validates each agent against the standardized schema
140
+ 3. Maintains an in-memory registry of valid agents
141
+ 4. Provides caching for performance optimization
142
+ 5. Supports dynamic agent reloading
143
+
144
+ METRICS COLLECTION OPPORTUNITIES:
145
+ - Agent load times and cache hit rates
146
+ - Validation performance by agent type
147
+ - Agent usage frequency and patterns
148
+ - Model selection distribution
149
+ - Task complexity analysis results
150
+ - Memory usage for agent templates
151
+ - Error rates during loading/validation
152
+ - Agent prompt size distributions
153
+
154
+ The loader follows a singleton-like pattern through the module-level
155
+ _loader instance to ensure consistent state across the application.
156
+
157
+ Attributes:
158
+ validator: AgentValidator instance for schema validation
159
+ cache: SharedPromptCache instance for performance optimization
160
+ _agent_registry: Internal dictionary mapping agent IDs to their configurations
161
+ """
77
162
 
78
163
  def __init__(self):
79
- """Initialize the agent loader."""
164
+ """
165
+ Initialize the agent loader and discover available agents.
166
+
167
+ The initialization process:
168
+ 1. Creates validator for schema checking
169
+ 2. Gets shared cache instance for performance
170
+ 3. Initializes empty agent registry
171
+ 4. Triggers agent discovery and loading
172
+
173
+ METRICS OPPORTUNITIES:
174
+ - Track initialization time
175
+ - Monitor agent discovery performance
176
+ - Count total agents loaded vs validation failures
177
+ - Measure memory footprint of loaded agents
178
+
179
+ WHY: We load agents eagerly during initialization to:
180
+ - Detect configuration errors early
181
+ - Build the registry once for efficient access
182
+ - Validate all agents before the system starts using them
183
+ """
80
184
  self.validator = AgentValidator()
81
185
  self.cache = SharedPromptCache.get_instance()
82
186
  self._agent_registry: Dict[str, Dict[str, Any]] = {}
187
+
188
+ # METRICS: Initialize performance tracking
189
+ # This structure collects valuable telemetry for AI agent performance
190
+ self._metrics = {
191
+ 'agents_loaded': 0,
192
+ 'validation_failures': 0,
193
+ 'cache_hits': 0,
194
+ 'cache_misses': 0,
195
+ 'load_times': {}, # agent_id -> load time ms
196
+ 'usage_counts': {}, # agent_id -> usage count
197
+ 'model_selections': {}, # model -> count
198
+ 'complexity_scores': [], # Distribution of task complexity
199
+ 'prompt_sizes': {}, # agent_id -> prompt size in chars
200
+ 'error_types': {}, # error_type -> count
201
+ 'initialization_time_ms': 0
202
+ }
203
+
204
+ # METRICS: Track initialization performance
205
+ start_time = time.time()
83
206
  self._load_agents()
207
+ self._metrics['initialization_time_ms'] = (time.time() - start_time) * 1000
208
+ logger.debug(f"Agent loader initialized in {self._metrics['initialization_time_ms']:.2f}ms")
84
209
 
85
210
  def _load_agents(self) -> None:
86
- """Load all valid agents from the templates directory."""
211
+ """
212
+ Discover and load all valid agents from the templates directory.
213
+
214
+ This method implements the agent discovery mechanism:
215
+ 1. Scans the templates directory for JSON files
216
+ 2. Skips the schema definition file
217
+ 3. Loads and validates each potential agent file
218
+ 4. Registers valid agents in the internal registry
219
+
220
+ WHY: We use a file-based discovery mechanism because:
221
+ - It allows easy addition of new agents without code changes
222
+ - Agents can be distributed as simple JSON files
223
+ - The system remains extensible and maintainable
224
+
225
+ Error Handling:
226
+ - Invalid JSON files are logged but don't stop the loading process
227
+ - Schema validation failures are logged with details
228
+ - The system continues to function with whatever valid agents it finds
229
+ """
87
230
  logger.info(f"Loading agents from {AGENT_TEMPLATES_DIR}")
88
231
 
89
232
  for json_file in AGENT_TEMPLATES_DIR.glob("*.json"):
233
+ # Skip the schema definition file itself
90
234
  if json_file.name == "agent_schema.json":
91
235
  continue
92
236
 
@@ -94,28 +238,58 @@ class AgentLoader:
94
238
  with open(json_file, 'r') as f:
95
239
  agent_data = json.load(f)
96
240
 
97
- # Validate against schema
241
+ # Validate against schema to ensure consistency
98
242
  validation_result = self.validator.validate_agent(agent_data)
99
243
 
100
244
  if validation_result.is_valid:
101
- agent_id = agent_data.get("id")
245
+ agent_id = agent_data.get("agent_id")
102
246
  if agent_id:
103
247
  self._agent_registry[agent_id] = agent_data
248
+ # METRICS: Track successful agent load
249
+ self._metrics['agents_loaded'] += 1
104
250
  logger.debug(f"Loaded agent: {agent_id}")
105
251
  else:
252
+ # Log validation errors but continue loading other agents
253
+ # METRICS: Track validation failure
254
+ self._metrics['validation_failures'] += 1
106
255
  logger.warning(f"Invalid agent in {json_file.name}: {validation_result.errors}")
107
256
 
108
257
  except Exception as e:
258
+ # Log loading errors but don't crash - system should be resilient
109
259
  logger.error(f"Failed to load {json_file.name}: {e}")
110
260
 
111
261
  def get_agent(self, agent_id: str) -> Optional[Dict[str, Any]]:
112
- """Get agent data by ID."""
262
+ """
263
+ Retrieve agent configuration by ID.
264
+
265
+ Args:
266
+ agent_id: Unique identifier for the agent (e.g., "research_agent")
267
+
268
+ Returns:
269
+ Dict containing the full agent configuration, or None if not found
270
+
271
+ WHY: Direct dictionary lookup for O(1) performance, essential for
272
+ frequently accessed agents during runtime.
273
+ """
113
274
  return self._agent_registry.get(agent_id)
114
275
 
115
276
  def list_agents(self) -> List[Dict[str, Any]]:
116
- """List all available agents."""
277
+ """
278
+ Get a summary list of all available agents.
279
+
280
+ Returns:
281
+ List of agent summaries containing key metadata fields
282
+
283
+ WHY: We return a summary instead of full configurations to:
284
+ - Reduce memory usage when listing many agents
285
+ - Provide only the information needed for agent selection
286
+ - Keep the API response size manageable
287
+
288
+ The returned list is sorted by ID for consistent ordering across calls.
289
+ """
117
290
  agents = []
118
291
  for agent_id, agent_data in self._agent_registry.items():
292
+ # Extract key fields from nested structure for easy consumption
119
293
  agents.append({
120
294
  "id": agent_id,
121
295
  "name": agent_data.get("metadata", {}).get("name", agent_id),
@@ -127,36 +301,151 @@ class AgentLoader:
127
301
  return sorted(agents, key=lambda x: x["id"])
128
302
 
129
303
  def get_agent_prompt(self, agent_id: str, force_reload: bool = False) -> Optional[str]:
130
- """Get agent instructions by ID."""
304
+ """
305
+ Retrieve agent instructions/prompt by ID with caching support.
306
+
307
+ Args:
308
+ agent_id: Unique identifier for the agent
309
+ force_reload: If True, bypass cache and reload from registry
310
+
311
+ Returns:
312
+ The agent's instruction prompt, or None if not found
313
+
314
+ Caching Strategy:
315
+ - Prompts are cached for 1 hour (3600 seconds) by default
316
+ - Cache keys are versioned (v2:) to allow bulk invalidation
317
+ - Force reload bypasses cache for development/debugging
318
+
319
+ METRICS TRACKED:
320
+ - Cache hit/miss rates for optimization
321
+ - Agent usage frequency for popular agents
322
+ - Prompt loading times for performance
323
+ - Prompt sizes for memory analysis
324
+
325
+ WHY: Caching is critical here because:
326
+ - Agent prompts can be large (several KB)
327
+ - They're accessed frequently during agent execution
328
+ - They rarely change in production
329
+ - The 1-hour TTL balances freshness with performance
330
+ """
131
331
  cache_key = f"{AGENT_CACHE_PREFIX}{agent_id}"
132
332
 
133
- # Check cache first
333
+ # METRICS: Track usage count for this agent
334
+ self._metrics['usage_counts'][agent_id] = self._metrics['usage_counts'].get(agent_id, 0) + 1
335
+
336
+ # METRICS: Track load time
337
+ load_start = time.time()
338
+
339
+ # Check cache first unless force reload is requested
134
340
  if not force_reload:
135
341
  cached_content = self.cache.get(cache_key)
136
342
  if cached_content is not None:
343
+ # METRICS: Track cache hit
344
+ self._metrics['cache_hits'] += 1
137
345
  logger.debug(f"Agent prompt for '{agent_id}' loaded from cache")
138
346
  return str(cached_content)
139
347
 
140
- # Get agent data
348
+ # METRICS: Track cache miss
349
+ self._metrics['cache_misses'] += 1
350
+
351
+ # Get agent data from registry
141
352
  agent_data = self.get_agent(agent_id)
142
353
  if not agent_data:
143
354
  logger.warning(f"Agent not found: {agent_id}")
144
355
  return None
145
356
 
146
- # Extract instructions
357
+ # Extract instructions from the agent configuration
147
358
  instructions = agent_data.get("instructions", "")
148
359
  if not instructions:
149
360
  logger.warning(f"No instructions found for agent: {agent_id}")
150
361
  return None
151
362
 
152
- # Cache the content with 1 hour TTL
363
+ # METRICS: Track prompt size for memory analysis
364
+ self._metrics['prompt_sizes'][agent_id] = len(instructions)
365
+
366
+ # METRICS: Record load time
367
+ load_time_ms = (time.time() - load_start) * 1000
368
+ self._metrics['load_times'][agent_id] = load_time_ms
369
+
370
+ # Cache the content with 1 hour TTL for performance
153
371
  self.cache.set(cache_key, instructions, ttl=3600)
154
372
  logger.debug(f"Agent prompt for '{agent_id}' cached successfully")
155
373
 
156
374
  return instructions
157
375
 
376
+ def get_metrics(self) -> Dict[str, Any]:
377
+ """
378
+ Get collected performance metrics.
379
+
380
+ Returns:
381
+ Dictionary containing:
382
+ - Cache performance (hit rate, miss rate)
383
+ - Agent usage statistics
384
+ - Load time analysis
385
+ - Memory usage patterns
386
+ - Error tracking
387
+
388
+ This data could be:
389
+ - Exposed via monitoring endpoints
390
+ - Logged periodically for analysis
391
+ - Used for capacity planning
392
+ - Fed to AI operations platforms
393
+ """
394
+ cache_total = self._metrics['cache_hits'] + self._metrics['cache_misses']
395
+ cache_hit_rate = 0.0
396
+ if cache_total > 0:
397
+ cache_hit_rate = (self._metrics['cache_hits'] / cache_total) * 100
398
+
399
+ # Calculate average load times
400
+ avg_load_time = 0.0
401
+ if self._metrics['load_times']:
402
+ avg_load_time = sum(self._metrics['load_times'].values()) / len(self._metrics['load_times'])
403
+
404
+ # Find most used agents
405
+ top_agents = sorted(
406
+ self._metrics['usage_counts'].items(),
407
+ key=lambda x: x[1],
408
+ reverse=True
409
+ )[:5]
410
+
411
+ return {
412
+ 'initialization_time_ms': self._metrics['initialization_time_ms'],
413
+ 'agents_loaded': self._metrics['agents_loaded'],
414
+ 'validation_failures': self._metrics['validation_failures'],
415
+ 'cache_hit_rate_percent': cache_hit_rate,
416
+ 'cache_hits': self._metrics['cache_hits'],
417
+ 'cache_misses': self._metrics['cache_misses'],
418
+ 'average_load_time_ms': avg_load_time,
419
+ 'top_agents_by_usage': dict(top_agents),
420
+ 'model_selection_distribution': self._metrics['model_selections'].copy(),
421
+ 'prompt_size_stats': {
422
+ 'total_agents': len(self._metrics['prompt_sizes']),
423
+ 'average_size': sum(self._metrics['prompt_sizes'].values()) / len(self._metrics['prompt_sizes']) if self._metrics['prompt_sizes'] else 0,
424
+ 'max_size': max(self._metrics['prompt_sizes'].values()) if self._metrics['prompt_sizes'] else 0,
425
+ 'min_size': min(self._metrics['prompt_sizes'].values()) if self._metrics['prompt_sizes'] else 0
426
+ },
427
+ 'error_types': self._metrics['error_types'].copy()
428
+ }
429
+
158
430
  def get_agent_metadata(self, agent_id: str) -> Optional[Dict[str, Any]]:
159
- """Get agent metadata including capabilities and configuration."""
431
+ """
432
+ Get comprehensive agent metadata including capabilities and configuration.
433
+
434
+ Args:
435
+ agent_id: Unique identifier for the agent
436
+
437
+ Returns:
438
+ Dictionary containing all agent metadata except instructions,
439
+ or None if agent not found
440
+
441
+ WHY: This method provides access to agent configuration without
442
+ including the potentially large instruction text. This is useful for:
443
+ - UI displays showing agent capabilities
444
+ - Programmatic agent selection based on features
445
+ - Debugging and introspection
446
+
447
+ The returned structure mirrors the JSON schema sections for consistency.
448
+ """
160
449
  agent_data = self.get_agent(agent_id)
161
450
  if not agent_data:
162
451
  return None
@@ -164,19 +453,37 @@ class AgentLoader:
164
453
  return {
165
454
  "id": agent_id,
166
455
  "version": agent_data.get("version", "1.0.0"),
167
- "metadata": agent_data.get("metadata", {}),
168
- "capabilities": agent_data.get("capabilities", {}),
169
- "knowledge": agent_data.get("knowledge", {}),
170
- "interactions": agent_data.get("interactions", {})
456
+ "metadata": agent_data.get("metadata", {}), # Name, description, category
457
+ "capabilities": agent_data.get("capabilities", {}), # Model, tools, features
458
+ "knowledge": agent_data.get("knowledge", {}), # Domain expertise
459
+ "interactions": agent_data.get("interactions", {}) # User interaction patterns
171
460
  }
172
461
 
173
462
 
174
- # Global loader instance
463
+ # Global loader instance - singleton pattern for consistent state
464
+ # WHY: We use a module-level singleton because:
465
+ # - Agent configurations should be consistent across the application
466
+ # - Loading and validation only needs to happen once
467
+ # - Multiple loaders would lead to cache inconsistencies
175
468
  _loader: Optional[AgentLoader] = None
176
469
 
177
470
 
178
471
  def _get_loader() -> AgentLoader:
179
- """Get or create the global agent loader instance."""
472
+ """
473
+ Get or create the global agent loader instance (singleton pattern).
474
+
475
+ Returns:
476
+ AgentLoader: The single global instance
477
+
478
+ WHY: The singleton pattern ensures:
479
+ - Agents are loaded and validated only once
480
+ - All parts of the application see the same agent registry
481
+ - Cache state remains consistent
482
+ - Memory usage is minimized
483
+
484
+ Thread Safety: Python's GIL makes this simple implementation thread-safe
485
+ for the single assignment operation.
486
+ """
180
487
  global _loader
181
488
  if _loader is None:
182
489
  _loader = AgentLoader()
@@ -185,7 +492,7 @@ def _get_loader() -> AgentLoader:
185
492
 
186
493
  def load_agent_prompt_from_md(agent_name: str, force_reload: bool = False) -> Optional[str]:
187
494
  """
188
- Load agent prompt from new schema JSON template.
495
+ Load agent prompt from JSON template (legacy function name).
189
496
 
190
497
  Args:
191
498
  agent_name: Agent name (matches agent ID in new schema)
@@ -193,6 +500,13 @@ def load_agent_prompt_from_md(agent_name: str, force_reload: bool = False) -> Op
193
500
 
194
501
  Returns:
195
502
  str: Agent instructions from JSON template, or None if not found
503
+
504
+ NOTE: Despite the "md" in the function name, this loads from JSON files.
505
+ The name is kept for backward compatibility with existing code that
506
+ expects this interface. New code should use get_agent_prompt() directly.
507
+
508
+ WHY: This wrapper exists to maintain backward compatibility during the
509
+ migration from markdown-based agents to JSON-based agents.
196
510
  """
197
511
  loader = _get_loader()
198
512
  return loader.get_agent_prompt(agent_name, force_reload)
@@ -200,15 +514,33 @@ def load_agent_prompt_from_md(agent_name: str, force_reload: bool = False) -> Op
200
514
 
201
515
  def _analyze_task_complexity(task_description: str, context_size: int = 0, **kwargs: Any) -> Dict[str, Any]:
202
516
  """
203
- Analyze task complexity using TaskComplexityAnalyzer.
517
+ Analyze task complexity to determine optimal model selection.
204
518
 
205
519
  Args:
206
- task_description: Description of the task
207
- context_size: Size of context in characters
208
- **kwargs: Additional parameters for complexity analysis
520
+ task_description: Description of the task to analyze
521
+ context_size: Size of context in characters (affects complexity)
522
+ **kwargs: Additional parameters for complexity analysis such as:
523
+ - code_analysis: Whether code analysis is required
524
+ - multi_step: Whether the task involves multiple steps
525
+ - domain_expertise: Required domain knowledge level
209
526
 
210
527
  Returns:
211
- Dictionary containing complexity analysis results
528
+ Dictionary containing:
529
+ - complexity_score: Numeric score 0-100
530
+ - complexity_level: LOW, MEDIUM, or HIGH
531
+ - recommended_model: Suggested Claude model tier
532
+ - optimal_prompt_size: Recommended prompt size range
533
+ - error: Error message if analysis fails
534
+
535
+ WHY: This is a placeholder implementation that returns sensible defaults.
536
+ The actual TaskComplexityAnalyzer module would use NLP techniques to:
537
+ - Analyze task description for complexity indicators
538
+ - Consider context size and memory requirements
539
+ - Factor in domain-specific requirements
540
+ - Optimize for cost vs capability trade-offs
541
+
542
+ Current Implementation: Returns medium complexity as a safe default that
543
+ works well for most tasks while the full analyzer is being developed.
212
544
  """
213
545
  # Temporary implementation until TaskComplexityAnalyzer is available
214
546
  logger.warning("TaskComplexityAnalyzer not available, using default values")
@@ -223,29 +555,52 @@ def _analyze_task_complexity(task_description: str, context_size: int = 0, **kwa
223
555
 
224
556
  def _get_model_config(agent_name: str, complexity_analysis: Optional[Dict[str, Any]] = None) -> Tuple[str, Dict[str, Any]]:
225
557
  """
226
- Get model configuration based on agent type and task complexity.
558
+ Determine optimal model configuration based on agent type and task complexity.
559
+
560
+ METRICS TRACKED:
561
+ - Model selection distribution
562
+ - Complexity score distribution
563
+ - Dynamic vs static selection rates
227
564
 
228
565
  Args:
229
- agent_name: Name of the agent
230
- complexity_analysis: Results from task complexity analysis
566
+ agent_name: Name of the agent requesting model selection
567
+ complexity_analysis: Results from task complexity analysis (if available)
231
568
 
232
569
  Returns:
233
- Tuple of (selected_model, model_config)
570
+ Tuple of (selected_model, model_config) where:
571
+ - selected_model: Claude API model identifier
572
+ - model_config: Dictionary with selection metadata
573
+
574
+ Model Selection Strategy:
575
+ 1. Each agent has a default model defined in its capabilities
576
+ 2. Dynamic selection can override based on task complexity
577
+ 3. Environment variables can control selection behavior
578
+
579
+ Environment Variables:
580
+ - ENABLE_DYNAMIC_MODEL_SELECTION: Global toggle (default: true)
581
+ - CLAUDE_PM_{AGENT}_MODEL_SELECTION: Per-agent override
582
+
583
+ WHY: This flexible approach allows:
584
+ - Cost optimization by using cheaper models for simple tasks
585
+ - Performance optimization by using powerful models only when needed
586
+ - Easy override for testing or production constraints
587
+ - Gradual rollout of dynamic selection features
234
588
  """
235
589
  loader = _get_loader()
236
590
  agent_data = loader.get_agent(agent_name)
237
591
 
238
592
  if not agent_data:
239
- # Fallback for unknown agents
593
+ # Fallback for unknown agents - use Sonnet as safe default
240
594
  return "claude-sonnet-4-20250514", {"selection_method": "default"}
241
595
 
242
- # Get model from agent capabilities
596
+ # Get model from agent capabilities (agent's preferred model)
243
597
  default_model = agent_data.get("capabilities", {}).get("model", "claude-sonnet-4-20250514")
244
598
 
245
- # Check if dynamic model selection is enabled
599
+ # Check if dynamic model selection is enabled globally
246
600
  enable_dynamic_selection = os.getenv('ENABLE_DYNAMIC_MODEL_SELECTION', 'true').lower() == 'true'
247
601
 
248
602
  # Check for per-agent override in environment
603
+ # This allows fine-grained control over specific agents
249
604
  agent_override_key = f"CLAUDE_PM_{agent_name.upper()}_MODEL_SELECTION"
250
605
  agent_override = os.getenv(agent_override_key, '').lower()
251
606
 
@@ -254,19 +609,28 @@ def _get_model_config(agent_name: str, complexity_analysis: Optional[Dict[str, A
254
609
  elif agent_override == 'false':
255
610
  enable_dynamic_selection = False
256
611
 
257
- # Dynamic model selection based on complexity
612
+ # Apply dynamic model selection based on task complexity
258
613
  if enable_dynamic_selection and complexity_analysis:
259
614
  recommended_model = complexity_analysis.get('recommended_model', ModelType.SONNET)
260
615
  selected_model = MODEL_NAME_MAPPINGS.get(recommended_model, default_model)
261
616
 
617
+ # METRICS: Track complexity scores for distribution analysis
618
+ complexity_score = complexity_analysis.get('complexity_score', 50)
619
+ if hasattr(loader, '_metrics'):
620
+ loader._metrics['complexity_scores'].append(complexity_score)
621
+ # Keep only last 1000 scores for memory efficiency
622
+ if len(loader._metrics['complexity_scores']) > 1000:
623
+ loader._metrics['complexity_scores'] = loader._metrics['complexity_scores'][-1000:]
624
+
262
625
  model_config = {
263
626
  "selection_method": "dynamic_complexity_based",
264
- "complexity_score": complexity_analysis.get('complexity_score', 50),
627
+ "complexity_score": complexity_score,
265
628
  "complexity_level": complexity_analysis.get('complexity_level', ComplexityLevel.MEDIUM).value,
266
629
  "optimal_prompt_size": complexity_analysis.get('optimal_prompt_size', (700, 1000)),
267
630
  "default_model": default_model
268
631
  }
269
632
  else:
633
+ # Use agent's default model when dynamic selection is disabled
270
634
  selected_model = default_model
271
635
  model_config = {
272
636
  "selection_method": "agent_default",
@@ -274,27 +638,57 @@ def _get_model_config(agent_name: str, complexity_analysis: Optional[Dict[str, A
274
638
  "default_model": default_model
275
639
  }
276
640
 
641
+ # METRICS: Track model selection distribution
642
+ # This helps understand model usage patterns and costs
643
+ if hasattr(loader, '_metrics'):
644
+ loader._metrics['model_selections'][selected_model] = \
645
+ loader._metrics['model_selections'].get(selected_model, 0) + 1
646
+
277
647
  return selected_model, model_config
278
648
 
279
649
 
280
650
  def get_agent_prompt(agent_name: str, force_reload: bool = False, return_model_info: bool = False, **kwargs: Any) -> Union[str, Tuple[str, str, Dict[str, Any]]]:
281
651
  """
282
- Get agent prompt from JSON template with optional dynamic model selection.
652
+ Get agent prompt with optional dynamic model selection and base instructions.
653
+
654
+ This is the primary interface for retrieving agent prompts. It handles:
655
+ 1. Loading the agent's instructions from the registry
656
+ 2. Optionally analyzing task complexity for model selection
657
+ 3. Prepending base instructions for consistency
658
+ 4. Adding metadata about model selection decisions
283
659
 
284
660
  Args:
285
- agent_name: Agent name (agent ID in new schema)
661
+ agent_name: Agent ID (e.g., "research_agent", "qa_agent")
286
662
  force_reload: Force reload from source, bypassing cache
287
- return_model_info: If True, returns tuple (prompt, model, config)
288
- **kwargs: Additional arguments including:
289
- - task_description: Description of the task for complexity analysis
290
- - context_size: Size of context for complexity analysis
291
- - enable_complexity_analysis: Override for complexity analysis
663
+ return_model_info: If True, returns extended info tuple
664
+ **kwargs: Additional arguments:
665
+ - task_description: Description for complexity analysis
666
+ - context_size: Size of context in characters
667
+ - enable_complexity_analysis: Toggle complexity analysis (default: True)
668
+ - Additional task-specific parameters
292
669
 
293
670
  Returns:
294
- str or tuple: Complete agent prompt with base instructions prepended,
295
- or tuple of (prompt, selected_model, model_config) if return_model_info=True
671
+ If return_model_info=False: Complete agent prompt string
672
+ If return_model_info=True: Tuple of (prompt, selected_model, model_config)
673
+
674
+ Raises:
675
+ ValueError: If the requested agent is not found
676
+
677
+ Processing Flow:
678
+ 1. Load agent instructions (with caching)
679
+ 2. Analyze task complexity (if enabled and task_description provided)
680
+ 3. Determine optimal model based on complexity
681
+ 4. Add model selection metadata to prompt
682
+ 5. Prepend base instructions
683
+ 6. Return appropriate format based on return_model_info
684
+
685
+ WHY: This comprehensive approach ensures:
686
+ - Consistent prompt structure across all agents
687
+ - Optimal model selection for cost/performance
688
+ - Transparency in model selection decisions
689
+ - Flexibility for different use cases
296
690
  """
297
- # Load from new schema JSON template
691
+ # Load from JSON template via the loader
298
692
  prompt = load_agent_prompt_from_md(agent_name, force_reload)
299
693
 
300
694
  if prompt is None:
@@ -306,25 +700,30 @@ def get_agent_prompt(agent_name: str, force_reload: bool = False, return_model_i
306
700
  enable_analysis = kwargs.get('enable_complexity_analysis', True)
307
701
 
308
702
  if task_description and enable_analysis:
703
+ # Extract relevant kwargs for complexity analysis
704
+ complexity_kwargs = {k: v for k, v in kwargs.items()
705
+ if k not in ['task_description', 'context_size', 'enable_complexity_analysis']}
309
706
  complexity_analysis = _analyze_task_complexity(
310
707
  task_description=task_description,
311
708
  context_size=kwargs.get('context_size', 0),
312
- **{k: v for k, v in kwargs.items() if k not in ['task_description', 'context_size']}
709
+ **complexity_kwargs
313
710
  )
314
711
 
315
- # Get model configuration
712
+ # Get model configuration based on agent and complexity
316
713
  selected_model, model_config = _get_model_config(agent_name, complexity_analysis)
317
714
 
318
- # Add model selection metadata to prompt if dynamic selection is enabled
715
+ # Add model selection metadata to prompt for transparency
716
+ # This helps with debugging and understanding model choices
319
717
  if selected_model and model_config.get('selection_method') == 'dynamic_complexity_based':
320
718
  model_metadata = f"\n<!-- Model Selection: {selected_model} (Complexity: {model_config.get('complexity_level', 'UNKNOWN')}) -->\n"
321
719
  prompt = model_metadata + prompt
322
720
 
323
721
  # Prepend base instructions with dynamic template based on complexity
722
+ # The base instructions provide common guidelines all agents should follow
324
723
  complexity_score = model_config.get('complexity_score', 50) if model_config else 50
325
724
  final_prompt = prepend_base_instructions(prompt, complexity_score=complexity_score)
326
725
 
327
- # Return model info if requested
726
+ # Return format based on caller's needs
328
727
  if return_model_info:
329
728
  return final_prompt, selected_model, model_config
330
729
  else:
@@ -332,81 +731,161 @@ def get_agent_prompt(agent_name: str, force_reload: bool = False, return_model_i
332
731
 
333
732
 
334
733
  # Backward-compatible functions
734
+ # WHY: These functions exist to maintain backward compatibility with existing code
735
+ # that expects agent-specific getter functions. New code should use get_agent_prompt()
736
+ # directly with the agent_id parameter for more flexibility.
737
+ #
738
+ # DEPRECATION NOTE: These functions may be removed in a future major version.
739
+ # They add maintenance overhead and limit extensibility compared to the generic interface.
740
+
335
741
  def get_documentation_agent_prompt() -> str:
336
- """Get the complete Documentation Agent prompt with base instructions."""
337
- prompt = get_agent_prompt("documentation", return_model_info=False)
742
+ """
743
+ Get the complete Documentation Agent prompt with base instructions.
744
+
745
+ Returns:
746
+ Complete prompt string ready for use with Claude API
747
+
748
+ DEPRECATED: Use get_agent_prompt("documentation_agent") instead
749
+ """
750
+ prompt = get_agent_prompt("documentation_agent", return_model_info=False)
338
751
  assert isinstance(prompt, str), "Expected string when return_model_info=False"
339
752
  return prompt
340
753
 
341
754
 
342
755
  def get_version_control_agent_prompt() -> str:
343
- """Get the complete Version Control Agent prompt with base instructions."""
344
- prompt = get_agent_prompt("version_control", return_model_info=False)
756
+ """
757
+ Get the complete Version Control Agent prompt with base instructions.
758
+
759
+ Returns:
760
+ Complete prompt string ready for use with Claude API
761
+
762
+ DEPRECATED: Use get_agent_prompt("version_control_agent") instead
763
+ """
764
+ prompt = get_agent_prompt("version_control_agent", return_model_info=False)
345
765
  assert isinstance(prompt, str), "Expected string when return_model_info=False"
346
766
  return prompt
347
767
 
348
768
 
349
769
  def get_qa_agent_prompt() -> str:
350
- """Get the complete QA Agent prompt with base instructions."""
351
- prompt = get_agent_prompt("qa", return_model_info=False)
770
+ """
771
+ Get the complete QA Agent prompt with base instructions.
772
+
773
+ Returns:
774
+ Complete prompt string ready for use with Claude API
775
+
776
+ DEPRECATED: Use get_agent_prompt("qa_agent") instead
777
+ """
778
+ prompt = get_agent_prompt("qa_agent", return_model_info=False)
352
779
  assert isinstance(prompt, str), "Expected string when return_model_info=False"
353
780
  return prompt
354
781
 
355
782
 
356
783
  def get_research_agent_prompt() -> str:
357
- """Get the complete Research Agent prompt with base instructions."""
358
- prompt = get_agent_prompt("research", return_model_info=False)
784
+ """
785
+ Get the complete Research Agent prompt with base instructions.
786
+
787
+ Returns:
788
+ Complete prompt string ready for use with Claude API
789
+
790
+ DEPRECATED: Use get_agent_prompt("research_agent") instead
791
+ """
792
+ prompt = get_agent_prompt("research_agent", return_model_info=False)
359
793
  assert isinstance(prompt, str), "Expected string when return_model_info=False"
360
794
  return prompt
361
795
 
362
796
 
363
797
  def get_ops_agent_prompt() -> str:
364
- """Get the complete Ops Agent prompt with base instructions."""
365
- prompt = get_agent_prompt("ops", return_model_info=False)
798
+ """
799
+ Get the complete Ops Agent prompt with base instructions.
800
+
801
+ Returns:
802
+ Complete prompt string ready for use with Claude API
803
+
804
+ DEPRECATED: Use get_agent_prompt("ops_agent") instead
805
+ """
806
+ prompt = get_agent_prompt("ops_agent", return_model_info=False)
366
807
  assert isinstance(prompt, str), "Expected string when return_model_info=False"
367
808
  return prompt
368
809
 
369
810
 
370
811
  def get_security_agent_prompt() -> str:
371
- """Get the complete Security Agent prompt with base instructions."""
372
- prompt = get_agent_prompt("security", return_model_info=False)
812
+ """
813
+ Get the complete Security Agent prompt with base instructions.
814
+
815
+ Returns:
816
+ Complete prompt string ready for use with Claude API
817
+
818
+ DEPRECATED: Use get_agent_prompt("security_agent") instead
819
+ """
820
+ prompt = get_agent_prompt("security_agent", return_model_info=False)
373
821
  assert isinstance(prompt, str), "Expected string when return_model_info=False"
374
822
  return prompt
375
823
 
376
824
 
377
825
  def get_engineer_agent_prompt() -> str:
378
- """Get the complete Engineer Agent prompt with base instructions."""
379
- prompt = get_agent_prompt("engineer", return_model_info=False)
826
+ """
827
+ Get the complete Engineer Agent prompt with base instructions.
828
+
829
+ Returns:
830
+ Complete prompt string ready for use with Claude API
831
+
832
+ DEPRECATED: Use get_agent_prompt("engineer_agent") instead
833
+ """
834
+ prompt = get_agent_prompt("engineer_agent", return_model_info=False)
380
835
  assert isinstance(prompt, str), "Expected string when return_model_info=False"
381
836
  return prompt
382
837
 
383
838
 
384
839
  def get_data_engineer_agent_prompt() -> str:
385
- """Get the complete Data Engineer Agent prompt with base instructions."""
386
- prompt = get_agent_prompt("data_engineer", return_model_info=False)
840
+ """
841
+ Get the complete Data Engineer Agent prompt with base instructions.
842
+
843
+ Returns:
844
+ Complete prompt string ready for use with Claude API
845
+
846
+ DEPRECATED: Use get_agent_prompt("data_engineer_agent") instead
847
+ """
848
+ prompt = get_agent_prompt("data_engineer_agent", return_model_info=False)
387
849
  assert isinstance(prompt, str), "Expected string when return_model_info=False"
388
850
  return prompt
389
851
 
390
852
 
391
853
  def get_agent_prompt_with_model_info(agent_name: str, force_reload: bool = False, **kwargs: Any) -> Tuple[str, str, Dict[str, Any]]:
392
854
  """
393
- Get agent prompt with model selection information.
855
+ Convenience wrapper to always get agent prompt with model selection information.
394
856
 
395
857
  Args:
396
- agent_name: Agent name (agent ID)
858
+ agent_name: Agent ID (e.g., "research_agent")
397
859
  force_reload: Force reload from source, bypassing cache
398
860
  **kwargs: Additional arguments for prompt generation and model selection
861
+ - task_description: For complexity analysis
862
+ - context_size: For complexity scoring
863
+ - Other task-specific parameters
399
864
 
400
865
  Returns:
401
- Tuple of (prompt, selected_model, model_config)
866
+ Tuple of (prompt, selected_model, model_config) where:
867
+ - prompt: Complete agent prompt with base instructions
868
+ - selected_model: Claude API model identifier
869
+ - model_config: Dictionary with selection metadata
870
+
871
+ WHY: This dedicated function ensures type safety for callers that always
872
+ need model information, avoiding the need to handle Union types.
873
+
874
+ Example:
875
+ prompt, model, config = get_agent_prompt_with_model_info(
876
+ "research_agent",
877
+ task_description="Analyze Python codebase architecture"
878
+ )
879
+ print(f"Using model: {model} (method: {config['selection_method']})")
402
880
  """
403
881
  result = get_agent_prompt(agent_name, force_reload, return_model_info=True, **kwargs)
404
882
 
405
- # Ensure we have a tuple
883
+ # Type narrowing - we know this returns a tuple when return_model_info=True
406
884
  if isinstance(result, tuple):
407
885
  return result
408
886
 
409
- # Fallback (shouldn't happen)
887
+ # Fallback (shouldn't happen with current implementation)
888
+ # This defensive code ensures we always return the expected tuple format
410
889
  loader = _get_loader()
411
890
  agent_data = loader.get_agent(agent_name)
412
891
  default_model = "claude-sonnet-4-20250514"
@@ -416,13 +895,40 @@ def get_agent_prompt_with_model_info(agent_name: str, force_reload: bool = False
416
895
  return result, default_model, {"selection_method": "default"}
417
896
 
418
897
 
419
- # Utility functions
898
+ # Utility functions for agent management
899
+
420
900
  def list_available_agents() -> Dict[str, Dict[str, Any]]:
421
901
  """
422
- List all available agents with their metadata.
902
+ List all available agents with their key metadata.
423
903
 
424
904
  Returns:
425
- dict: Agent information including capabilities and metadata
905
+ Dictionary mapping agent IDs to their metadata summaries
906
+
907
+ The returned dictionary provides a comprehensive view of all registered
908
+ agents, useful for:
909
+ - UI agent selection interfaces
910
+ - Documentation generation
911
+ - System introspection and debugging
912
+ - Programmatic agent discovery
913
+
914
+ Example Return Value:
915
+ {
916
+ "research_agent": {
917
+ "name": "Research Agent",
918
+ "description": "Analyzes codebases...",
919
+ "category": "analysis",
920
+ "version": "1.0.0",
921
+ "model": "claude-opus-4-20250514",
922
+ "resource_tier": "standard",
923
+ "tools": ["code_analysis", "search"]
924
+ },
925
+ ...
926
+ }
927
+
928
+ WHY: This aggregated view is more useful than raw agent data because:
929
+ - It provides a consistent interface regardless of schema changes
930
+ - It includes only the fields relevant for agent selection
931
+ - It's optimized for UI display and decision making
426
932
  """
427
933
  loader = _get_loader()
428
934
  agents = {}
@@ -432,6 +938,7 @@ def list_available_agents() -> Dict[str, Dict[str, Any]]:
432
938
  metadata = loader.get_agent_metadata(agent_id)
433
939
 
434
940
  if metadata:
941
+ # Extract and flatten key information for easy consumption
435
942
  agents[agent_id] = {
436
943
  "name": metadata["metadata"].get("name", agent_id),
437
944
  "description": metadata["metadata"].get("description", ""),
@@ -447,20 +954,42 @@ def list_available_agents() -> Dict[str, Dict[str, Any]]:
447
954
 
448
955
  def clear_agent_cache(agent_name: Optional[str] = None) -> None:
449
956
  """
450
- Clear cached agent prompts.
957
+ Clear cached agent prompts for development or after updates.
451
958
 
452
959
  Args:
453
- agent_name: Specific agent to clear, or None to clear all
960
+ agent_name: Specific agent ID to clear, or None to clear all agents
961
+
962
+ This function is useful for:
963
+ - Development when modifying agent prompts
964
+ - Forcing reload after agent template updates
965
+ - Troubleshooting caching issues
966
+ - Memory management in long-running processes
967
+
968
+ Examples:
969
+ # Clear specific agent cache
970
+ clear_agent_cache("research_agent")
971
+
972
+ # Clear all agent caches
973
+ clear_agent_cache()
974
+
975
+ WHY: Manual cache management is important because:
976
+ - Agent prompts have a 1-hour TTL but may need immediate refresh
977
+ - Development requires seeing changes without waiting for TTL
978
+ - System administrators need cache control for troubleshooting
979
+
980
+ Error Handling: Failures are logged but don't raise exceptions to ensure
981
+ the system remains operational even if cache clearing fails.
454
982
  """
455
983
  try:
456
984
  cache = SharedPromptCache.get_instance()
457
985
 
458
986
  if agent_name:
987
+ # Clear specific agent's cache entry
459
988
  cache_key = f"{AGENT_CACHE_PREFIX}{agent_name}"
460
989
  cache.invalidate(cache_key)
461
990
  logger.debug(f"Cache cleared for agent: {agent_name}")
462
991
  else:
463
- # Clear all agent caches
992
+ # Clear all agent caches by iterating through registry
464
993
  loader = _get_loader()
465
994
  for agent_id in loader._agent_registry.keys():
466
995
  cache_key = f"{AGENT_CACHE_PREFIX}{agent_id}"
@@ -468,20 +997,51 @@ def clear_agent_cache(agent_name: Optional[str] = None) -> None:
468
997
  logger.debug("All agent caches cleared")
469
998
 
470
999
  except Exception as e:
1000
+ # Log but don't raise - cache clearing shouldn't break the system
471
1001
  logger.error(f"Error clearing agent cache: {e}")
472
1002
 
473
1003
 
474
1004
  def validate_agent_files() -> Dict[str, Dict[str, Any]]:
475
1005
  """
476
- Validate all agent files in the templates directory.
1006
+ Validate all agent template files against the schema.
477
1007
 
478
1008
  Returns:
479
- dict: Validation results for each agent
1009
+ Dictionary mapping agent names to validation results
1010
+
1011
+ This function performs comprehensive validation of all agent files,
1012
+ checking for:
1013
+ - JSON syntax errors
1014
+ - Schema compliance
1015
+ - Required fields presence
1016
+ - Data type correctness
1017
+ - Constraint violations
1018
+
1019
+ Return Format:
1020
+ {
1021
+ "agent_name": {
1022
+ "valid": bool,
1023
+ "errors": [list of error messages],
1024
+ "warnings": [list of warning messages],
1025
+ "file_path": "/full/path/to/file.json"
1026
+ },
1027
+ ...
1028
+ }
1029
+
1030
+ Use Cases:
1031
+ - Pre-deployment validation in CI/CD
1032
+ - Development-time agent verification
1033
+ - Troubleshooting agent loading issues
1034
+ - Automated testing of agent configurations
1035
+
1036
+ WHY: Separate validation allows checking agents without loading them,
1037
+ useful for CI/CD pipelines and development workflows where we want to
1038
+ catch errors before runtime.
480
1039
  """
481
1040
  validator = AgentValidator()
482
1041
  results = {}
483
1042
 
484
1043
  for json_file in AGENT_TEMPLATES_DIR.glob("*.json"):
1044
+ # Skip the schema definition file itself
485
1045
  if json_file.name == "agent_schema.json":
486
1046
  continue
487
1047
 
@@ -497,7 +1057,27 @@ def validate_agent_files() -> Dict[str, Dict[str, Any]]:
497
1057
 
498
1058
 
499
1059
  def reload_agents() -> None:
500
- """Force reload all agents from disk."""
1060
+ """
1061
+ Force reload all agents from disk, clearing the registry and cache.
1062
+
1063
+ This function completely resets the agent loader state, causing:
1064
+ 1. The global loader instance to be destroyed
1065
+ 2. All cached agent prompts to be invalidated
1066
+ 3. Fresh agent discovery on next access
1067
+
1068
+ Use Cases:
1069
+ - Hot-reloading during development
1070
+ - Picking up new agent files without restart
1071
+ - Recovering from corrupted state
1072
+ - Testing agent loading logic
1073
+
1074
+ WHY: Hot-reloading is essential for development productivity and
1075
+ allows dynamic agent updates in production without service restart.
1076
+
1077
+ Implementation Note: We simply clear the global loader reference.
1078
+ The next call to _get_loader() will create a fresh instance that
1079
+ re-discovers and re-validates all agents.
1080
+ """
501
1081
  global _loader
502
1082
  _loader = None
503
1083
  logger.info("Agent registry cleared, will reload on next access")