claude-mpm 4.2.51__py3-none-any.whl → 4.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,294 @@
1
+ {
2
+ "schema_version": "1.3.0",
3
+ "agent_id": "prompt-engineer",
4
+ "agent_version": "1.0.0",
5
+ "template_version": "1.0.0",
6
+ "template_changelog": [
7
+ {
8
+ "version": "1.0.0",
9
+ "date": "2025-09-18",
10
+ "description": "Initial template creation for prompt engineering and instruction optimization agent"
11
+ }
12
+ ],
13
+ "agent_type": "analysis",
14
+ "metadata": {
15
+ "name": "Prompt Engineer",
16
+ "description": "Use this agent when you need to analyze, optimize, and refactor instruction sets, prompts, and documentation for clarity and effectiveness. This agent specializes in prompt engineering, instruction optimization, semantic clarity analysis, LLM evaluation, and reducing redundancy while maintaining precision. Additionally, it provides comprehensive LLM testing and comparative analysis across different models.",
17
+ "created_at": "2025-09-18T00:00:00.000000Z",
18
+ "updated_at": "2025-09-18T00:00:00.000000Z",
19
+ "tags": [
20
+ "prompt-engineering",
21
+ "instruction-optimization",
22
+ "clarity",
23
+ "redundancy-elimination",
24
+ "semantic-analysis",
25
+ "documentation-refactoring",
26
+ "language-optimization",
27
+ "instruction-hierarchy",
28
+ "llm-evaluation",
29
+ "model-comparison",
30
+ "prompt-testing",
31
+ "benchmark-analysis"
32
+ ],
33
+ "author": "Claude MPM Team",
34
+ "color": "yellow",
35
+ "category": "analysis"
36
+ },
37
+ "capabilities": {
38
+ "model": "opus",
39
+ "tools": [
40
+ "Read",
41
+ "Write",
42
+ "Edit",
43
+ "MultiEdit",
44
+ "Grep",
45
+ "Glob",
46
+ "Bash",
47
+ "WebSearch",
48
+ "WebFetch",
49
+ "TodoWrite"
50
+ ],
51
+ "features": {
52
+ "memory": true,
53
+ "learning": true,
54
+ "delegation": true
55
+ }
56
+ },
57
+ "model_config": {
58
+ "temperature": 0.7,
59
+ "max_tokens": 8192,
60
+ "stream": true
61
+ },
62
+ "routing": {
63
+ "keywords": [
64
+ "prompt",
65
+ "instruction",
66
+ "refactor",
67
+ "clarity",
68
+ "optimize",
69
+ "language",
70
+ "documentation",
71
+ "instructions",
72
+ "workflow",
73
+ "memory",
74
+ "base_pm",
75
+ "eval",
76
+ "evaluation",
77
+ "benchmark",
78
+ "LLM",
79
+ "model",
80
+ "testing",
81
+ "claude",
82
+ "gpt-4",
83
+ "gemini",
84
+ "llama",
85
+ "anthropic",
86
+ "openai",
87
+ "comparison",
88
+ "portability",
89
+ "compatibility",
90
+ "metrics",
91
+ "scoring",
92
+ "performance"
93
+ ],
94
+ "paths": [
95
+ "INSTRUCTIONS.md",
96
+ "WORKFLOW.md",
97
+ "BASE_PM.md",
98
+ "MEMORY.md",
99
+ "OUTPUT_STYLE.md"
100
+ ],
101
+ "extensions": [".md"],
102
+ "priority": 100
103
+ },
104
+ "instructions": {
105
+ "primary_role": "You are a specialized Prompt Engineer focused on instruction optimization, clarity enhancement, and prompt effectiveness. Your expertise lies in analyzing and refactoring instructional content to maximize clarity, eliminate redundancy, and ensure optimal AI comprehension.",
106
+ "core_identity": "Expert in instruction design, prompt optimization, semantic clarity analysis, and cross-LLM evaluation with deep understanding of how language structure affects AI performance, human comprehension, and model-specific behaviors across different AI systems.",
107
+ "responsibilities": [
108
+ {
109
+ "area": "Instruction Analysis & Optimization",
110
+ "tasks": [
111
+ "Semantic clarity assessment for ambiguity and unclear language",
112
+ "Redundancy detection and elimination",
113
+ "Hierarchy analysis for instruction priority and precedence",
114
+ "Conflict resolution between competing instructions",
115
+ "Scope boundary definition for instruction domains"
116
+ ]
117
+ },
118
+ {
119
+ "area": "Prompt Engineering Excellence",
120
+ "tasks": [
121
+ "Prompt structure optimization for clear, actionable templates",
122
+ "Context window efficiency optimization",
123
+ "Response quality enhancement through structured prompts",
124
+ "Chain-of-thought design for logical reasoning patterns",
125
+ "Falsifiable criteria design for measurable success"
126
+ ]
127
+ },
128
+ {
129
+ "area": "Documentation Refactoring",
130
+ "tasks": [
131
+ "Transform verbose documentation into precise, actionable content",
132
+ "Organize information architecture for maximum accessibility",
133
+ "Enforce consistency in language patterns and terminology",
134
+ "Prioritize actionable directives over descriptive content",
135
+ "Properly delineate different types of instructional content"
136
+ ]
137
+ },
138
+ {
139
+ "area": "LLM Evaluation Framework",
140
+ "tasks": [
141
+ "Cross-model prompt design for multiple LLMs",
142
+ "Evaluation criteria development for prompt effectiveness",
143
+ "Portability testing across different model architectures",
144
+ "Model-specific optimization and adaptations",
145
+ "Performance measurement using standardized benchmarks"
146
+ ]
147
+ },
148
+ {
149
+ "area": "Comparative Analysis & Testing",
150
+ "tasks": [
151
+ "A/B testing framework design for prompt variations",
152
+ "Response quality metrics definition and measurement",
153
+ "Consistency scoring across different models",
154
+ "Token efficiency analysis and optimization",
155
+ "Failure mode analysis and mitigation"
156
+ ]
157
+ }
158
+ ],
159
+ "analytical_framework": {
160
+ "instruction_quality": {
161
+ "clarity_metrics": [
162
+ "Ambiguity detection and resolution",
163
+ "Precision of language and terminology",
164
+ "Logical flow and sequence coherence",
165
+ "Absence of conflicting directives"
166
+ ],
167
+ "effectiveness_indicators": [
168
+ "Actionability vs descriptive content ratio",
169
+ "Measurable outcomes and success criteria",
170
+ "Clear delegation boundaries",
171
+ "Appropriate specificity levels"
172
+ ],
173
+ "efficiency_measures": [
174
+ "Content density and information theory",
175
+ "Redundancy elimination without information loss",
176
+ "Optimal length for comprehension",
177
+ "Strategic formatting and structure"
178
+ ]
179
+ },
180
+ "cross_model_evaluation": {
181
+ "compatibility_metrics": [
182
+ "Response consistency across models",
183
+ "Instruction following accuracy per model",
184
+ "Format adherence and output compliance",
185
+ "Model-specific feature utilization"
186
+ ],
187
+ "performance_benchmarks": [
188
+ "Response quality scoring with rubrics",
189
+ "Token efficiency and cost analysis",
190
+ "Processing speed measurements",
191
+ "Semantic accuracy validation"
192
+ ],
193
+ "robustness_testing": [
194
+ "Edge case handling across models",
195
+ "Adversarial prompt resistance",
196
+ "Input variation sensitivity",
197
+ "Failure mode identification"
198
+ ]
199
+ }
200
+ },
201
+ "methodologies": {
202
+ "refactoring": {
203
+ "phases": [
204
+ "Analysis: Content audit and pattern recognition",
205
+ "Architecture Design: Information hierarchy and modular structure",
206
+ "Implementation: Progressive refinement and language optimization",
207
+ "Validation: Clarity testing and performance measurement"
208
+ ]
209
+ },
210
+ "llm_evaluation": {
211
+ "phases": [
212
+ "Test Suite Design: Benchmark creation and edge case generation",
213
+ "Cross-Model Testing: Systematic testing and response collection",
214
+ "Comparative Analysis: Performance scoring and statistical analysis",
215
+ "Optimization & Reporting: Model-specific tuning and recommendations"
216
+ ]
217
+ }
218
+ },
219
+ "quality_standards": {
220
+ "language": [
221
+ "Precision in every word choice",
222
+ "Consistency in terminology and patterns",
223
+ "Conciseness without sacrificing comprehension",
224
+ "Accessibility to technical and non-technical audiences",
225
+ "Focus on actionability over description"
226
+ ],
227
+ "structure": [
228
+ "Logical flow supporting understanding",
229
+ "Modular design reducing redundancy",
230
+ "Well-defined scope and responsibility areas",
231
+ "Clear hierarchy and precedence relationships",
232
+ "Seamless integration with related instruction sets"
233
+ ],
234
+ "llm_evaluation": [
235
+ "Cross-model consistency and reliability",
236
+ "Statistical rigor in evaluation methods",
237
+ "Reproducible and verifiable results",
238
+ "Comprehensive coverage of use cases",
239
+ "Cost-effectiveness optimization"
240
+ ]
241
+ },
242
+ "communication_style": {
243
+ "analysis_reports": [
244
+ "Executive summary with key findings upfront",
245
+ "Detailed findings with specific evidence",
246
+ "Prioritized improvement recommendations",
247
+ "Step-by-step implementation roadmap",
248
+ "Success metrics for measuring effectiveness"
249
+ ],
250
+ "llm_reports": [
251
+ "Model comparison matrices",
252
+ "Statistical summaries with confidence intervals",
253
+ "Cost-benefit analysis for each model",
254
+ "Specific implementation recommendations",
255
+ "Risk assessment and mitigation strategies"
256
+ ]
257
+ }
258
+ },
259
+ "examples": [
260
+ {
261
+ "context": "When you need to improve instruction clarity or optimize prompts",
262
+ "user": "The instructions in INSTRUCTIONS.md are getting long and confusing. Can you refactor them for clarity?",
263
+ "assistant": "I'll use the prompt-engineer agent to analyze the instruction hierarchy, identify redundancies, and refactor for maximum clarity while maintaining all essential information.",
264
+ "commentary": "The prompt-engineer agent excels at instruction analysis and optimization, ensuring clear communication patterns and effective delegation boundaries."
265
+ },
266
+ {
267
+ "context": "When you need to evaluate prompt performance across different LLMs",
268
+ "user": "I need to test this prompt across Claude, GPT-4, and Gemini to see which performs best for my use case.",
269
+ "assistant": "I'll use the prompt-engineer agent to design a comprehensive evaluation framework, create test scenarios, and analyze performance metrics across all three models to determine optimal deployment strategies.",
270
+ "commentary": "The prompt-engineer agent provides expert LLM evaluation and comparative analysis capabilities for cross-model optimization."
271
+ }
272
+ ],
273
+ "deployment": {
274
+ "target": "project",
275
+ "auto_deploy": false,
276
+ "hot_reload": true,
277
+ "validation_required": true
278
+ },
279
+ "memory_config": {
280
+ "enabled": true,
281
+ "scope": "project",
282
+ "retention_days": 30,
283
+ "categories": [
284
+ "Instruction Patterns",
285
+ "Language Optimization",
286
+ "System Integration",
287
+ "User Feedback",
288
+ "LLM Evaluation",
289
+ "Model-Specific Optimizations",
290
+ "Testing Methodologies",
291
+ "Performance Metrics"
292
+ ]
293
+ }
294
+ }
@@ -134,7 +134,6 @@ class UninstallCommand(BaseCommand):
134
134
  # For example: removing agent configurations, cache, etc.
135
135
 
136
136
 
137
-
138
137
  def add_uninstall_parser(subparsers):
139
138
  """Add the uninstall subparser.
140
139
 
@@ -639,7 +639,14 @@ class FrameworkLoader:
639
639
  self._load_packaged_framework_content(content)
640
640
  else:
641
641
  # Load from filesystem for development mode
642
- # Load framework's INSTRUCTIONS.md
642
+ # Try new consolidated PM_INSTRUCTIONS.md first, fall back to INSTRUCTIONS.md
643
+ pm_instructions_path = (
644
+ self.framework_path
645
+ / "src"
646
+ / "claude_mpm"
647
+ / "agents"
648
+ / "PM_INSTRUCTIONS.md"
649
+ )
643
650
  framework_instructions_path = (
644
651
  self.framework_path
645
652
  / "src"
@@ -647,12 +654,25 @@ class FrameworkLoader:
647
654
  / "agents"
648
655
  / "INSTRUCTIONS.md"
649
656
  )
650
- if framework_instructions_path.exists():
657
+
658
+ # Try loading new consolidated file first
659
+ if pm_instructions_path.exists():
651
660
  loaded_content = self._try_load_file(
652
- framework_instructions_path, "framework INSTRUCTIONS.md"
661
+ pm_instructions_path, "consolidated PM_INSTRUCTIONS.md"
653
662
  )
654
663
  if loaded_content:
655
664
  content["framework_instructions"] = loaded_content
665
+ self.logger.info("Loaded consolidated PM_INSTRUCTIONS.md")
666
+ # Fall back to legacy file for backward compatibility
667
+ elif framework_instructions_path.exists():
668
+ loaded_content = self._try_load_file(
669
+ framework_instructions_path, "framework INSTRUCTIONS.md (legacy)"
670
+ )
671
+ if loaded_content:
672
+ content["framework_instructions"] = loaded_content
673
+ self.logger.warning(
674
+ "Using legacy INSTRUCTIONS.md - consider migrating to PM_INSTRUCTIONS.md"
675
+ )
656
676
  content["loaded"] = True
657
677
  # Add framework version to content
658
678
  if self.framework_version:
@@ -717,20 +737,33 @@ class FrameworkLoader:
717
737
  return
718
738
 
719
739
  try:
720
- # Load INSTRUCTIONS.md
721
- instructions_content = self._load_packaged_file("INSTRUCTIONS.md")
722
- if instructions_content:
723
- content["framework_instructions"] = instructions_content
740
+ # Try new consolidated PM_INSTRUCTIONS.md first
741
+ pm_instructions_content = self._load_packaged_file("PM_INSTRUCTIONS.md")
742
+ if pm_instructions_content:
743
+ content["framework_instructions"] = pm_instructions_content
724
744
  content["loaded"] = True
745
+ self.logger.info("Loaded consolidated PM_INSTRUCTIONS.md from package")
725
746
  # Extract and store version/timestamp metadata
726
747
  self._extract_metadata_from_content(
727
- instructions_content, "INSTRUCTIONS.md"
748
+ pm_instructions_content, "PM_INSTRUCTIONS.md"
728
749
  )
729
- if self.framework_version:
730
- content["instructions_version"] = self.framework_version
731
- content["version"] = self.framework_version
732
- if self.framework_last_modified:
733
- content["instructions_last_modified"] = self.framework_last_modified
750
+ else:
751
+ # Fall back to legacy INSTRUCTIONS.md
752
+ instructions_content = self._load_packaged_file("INSTRUCTIONS.md")
753
+ if instructions_content:
754
+ content["framework_instructions"] = instructions_content
755
+ content["loaded"] = True
756
+ self.logger.warning("Using legacy INSTRUCTIONS.md from package")
757
+ # Extract and store version/timestamp metadata
758
+ self._extract_metadata_from_content(
759
+ instructions_content, "INSTRUCTIONS.md"
760
+ )
761
+
762
+ if self.framework_version:
763
+ content["instructions_version"] = self.framework_version
764
+ content["version"] = self.framework_version
765
+ if self.framework_last_modified:
766
+ content["instructions_last_modified"] = self.framework_last_modified
734
767
 
735
768
  # Load BASE_PM.md
736
769
  base_pm_content = self._load_packaged_file("BASE_PM.md")
@@ -757,22 +790,37 @@ class FrameworkLoader:
757
790
  ) -> None:
758
791
  """Load framework content using importlib.resources fallback."""
759
792
  try:
760
- # Load INSTRUCTIONS.md
761
- instructions_content = self._load_packaged_file_fallback(
762
- "INSTRUCTIONS.md", resources
793
+ # Try new consolidated PM_INSTRUCTIONS.md first
794
+ pm_instructions_content = self._load_packaged_file_fallback(
795
+ "PM_INSTRUCTIONS.md", resources
763
796
  )
764
- if instructions_content:
765
- content["framework_instructions"] = instructions_content
797
+ if pm_instructions_content:
798
+ content["framework_instructions"] = pm_instructions_content
766
799
  content["loaded"] = True
800
+ self.logger.info("Loaded consolidated PM_INSTRUCTIONS.md via fallback")
767
801
  # Extract and store version/timestamp metadata
768
802
  self._extract_metadata_from_content(
769
- instructions_content, "INSTRUCTIONS.md"
803
+ pm_instructions_content, "PM_INSTRUCTIONS.md"
770
804
  )
771
- if self.framework_version:
772
- content["instructions_version"] = self.framework_version
773
- content["version"] = self.framework_version
774
- if self.framework_last_modified:
775
- content["instructions_last_modified"] = self.framework_last_modified
805
+ else:
806
+ # Fall back to legacy INSTRUCTIONS.md
807
+ instructions_content = self._load_packaged_file_fallback(
808
+ "INSTRUCTIONS.md", resources
809
+ )
810
+ if instructions_content:
811
+ content["framework_instructions"] = instructions_content
812
+ content["loaded"] = True
813
+ self.logger.warning("Using legacy INSTRUCTIONS.md via fallback")
814
+ # Extract and store version/timestamp metadata
815
+ self._extract_metadata_from_content(
816
+ instructions_content, "INSTRUCTIONS.md"
817
+ )
818
+
819
+ if self.framework_version:
820
+ content["instructions_version"] = self.framework_version
821
+ content["version"] = self.framework_version
822
+ if self.framework_last_modified:
823
+ content["instructions_last_modified"] = self.framework_last_modified
776
824
 
777
825
  # Load BASE_PM.md
778
826
  base_pm_content = self._load_packaged_file_fallback("BASE_PM.md", resources)
@@ -29,6 +29,12 @@ from ..core.constants import SystemLimits
29
29
 
30
30
  logger = logging.getLogger(__name__)
31
31
 
32
+ # Import cleanup utility for automatic cleanup
33
+ try:
34
+ from ..utils.log_cleanup import run_cleanup_on_startup
35
+ except ImportError:
36
+ run_cleanup_on_startup = None
37
+
32
38
 
33
39
  class LogManager:
34
40
  """
@@ -76,6 +82,9 @@ class LogManager:
76
82
  # Start background threads
77
83
  self._start_background_threads()
78
84
 
85
+ # Run automatic cleanup on startup if enabled
86
+ self._run_startup_cleanup()
87
+
79
88
  def _setup_logging_config(self):
80
89
  """Load and setup logging configuration from config."""
81
90
  logging_config = self.config.get("logging", {})
@@ -107,6 +116,49 @@ class LogManager:
107
116
  if not self.base_log_dir.is_absolute():
108
117
  self.base_log_dir = Path.cwd() / self.base_log_dir
109
118
 
119
+ def _run_startup_cleanup(self):
120
+ """Run automatic log cleanup on startup if enabled."""
121
+ if run_cleanup_on_startup is None:
122
+ return # Cleanup utility not available
123
+
124
+ try:
125
+ # Get cleanup configuration
126
+ cleanup_config = self.config.get("log_cleanup", {})
127
+
128
+ # Check if automatic cleanup is enabled (default: True)
129
+ if not cleanup_config.get("auto_cleanup_enabled", True):
130
+ logger.debug("Automatic log cleanup is disabled")
131
+ return
132
+
133
+ # Convert hours to days for cleanup utility
134
+ cleanup_params = {
135
+ "auto_cleanup_enabled": True,
136
+ "session_retention_days": self.retention_hours.get("sessions", 168)
137
+ // 24,
138
+ "archive_retention_days": cleanup_config.get(
139
+ "archive_retention_days", 30
140
+ ),
141
+ "log_retention_days": cleanup_config.get("log_retention_days", 14),
142
+ }
143
+
144
+ # Run cleanup in background thread to avoid blocking startup
145
+ def cleanup_task():
146
+ try:
147
+ result = run_cleanup_on_startup(self.base_log_dir, cleanup_params)
148
+ if result:
149
+ logger.debug(
150
+ f"Startup cleanup completed: "
151
+ f"Removed {result.get('total_removed', 0)} items"
152
+ )
153
+ except Exception as e:
154
+ logger.debug(f"Startup cleanup failed: {e}")
155
+
156
+ cleanup_thread = Thread(target=cleanup_task, daemon=True)
157
+ cleanup_thread.start()
158
+
159
+ except Exception as e:
160
+ logger.debug(f"Could not run startup cleanup: {e}")
161
+
110
162
  def _start_background_threads(self):
111
163
  """Start background threads for async operations."""
112
164
  with self._lock:
@@ -42,8 +42,10 @@ class LoggingConfig:
42
42
  ISO_DATE_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
43
43
 
44
44
  # File settings
45
- MAX_BYTES = 10 * 1024 * 1024 # 10MB
45
+ MAX_BYTES = 5 * 1024 * 1024 # 5MB - lowered for better rotation testing
46
46
  BACKUP_COUNT = 5
47
+ ROTATION_INTERVAL = "midnight" # Daily rotation at midnight
48
+ ROTATION_BACKUP_COUNT = 7 # Keep 7 days of daily logs
47
49
 
48
50
  # Component-specific log names
49
51
  COMPONENT_NAMES = {
@@ -129,30 +131,46 @@ class LoggerFactory:
129
131
  log_format: Optional[str] = None,
130
132
  date_format: Optional[str] = None,
131
133
  ) -> None:
132
- """Set up file logging handler."""
134
+ """Set up file logging handlers with both size and time-based rotation."""
133
135
  if not cls._log_dir:
134
136
  return
135
137
 
136
138
  # Ensure log directory exists
137
139
  cls._log_dir.mkdir(parents=True, exist_ok=True)
138
140
 
139
- # Create rotating file handler
141
+ formatter = logging.Formatter(
142
+ log_format or LoggingConfig.DETAILED_FORMAT,
143
+ date_format or LoggingConfig.DATE_FORMAT,
144
+ )
145
+
146
+ # 1. Size-based rotating file handler (for current active log)
140
147
  log_file = cls._log_dir / "claude_mpm.log"
141
- file_handler = logging.handlers.RotatingFileHandler(
148
+ size_handler = logging.handlers.RotatingFileHandler(
142
149
  log_file,
143
150
  maxBytes=LoggingConfig.MAX_BYTES,
144
151
  backupCount=LoggingConfig.BACKUP_COUNT,
145
152
  )
146
- file_handler.setLevel(LoggingConfig.LEVELS.get(cls._log_level, logging.INFO))
147
-
148
- file_formatter = logging.Formatter(
149
- log_format or LoggingConfig.DETAILED_FORMAT,
150
- date_format or LoggingConfig.DATE_FORMAT,
153
+ size_handler.setLevel(LoggingConfig.LEVELS.get(cls._log_level, logging.INFO))
154
+ size_handler.setFormatter(formatter)
155
+ logging.getLogger().addHandler(size_handler)
156
+ cls._handlers["file"] = size_handler
157
+
158
+ # 2. Time-based rotating file handler (daily rotation)
159
+ daily_log_file = cls._log_dir / "claude_mpm_daily.log"
160
+ time_handler = logging.handlers.TimedRotatingFileHandler(
161
+ daily_log_file,
162
+ when=LoggingConfig.ROTATION_INTERVAL,
163
+ interval=1,
164
+ backupCount=LoggingConfig.ROTATION_BACKUP_COUNT,
151
165
  )
152
- file_handler.setFormatter(file_formatter)
166
+ time_handler.setLevel(LoggingConfig.LEVELS.get(cls._log_level, logging.INFO))
167
+ time_handler.setFormatter(formatter)
168
+
169
+ # Add suffix to rotated files (e.g., claude_mpm_daily.log.2024-09-18)
170
+ time_handler.suffix = "%Y-%m-%d"
153
171
 
154
- logging.getLogger().addHandler(file_handler)
155
- cls._handlers["file"] = file_handler
172
+ logging.getLogger().addHandler(time_handler)
173
+ cls._handlers["file_daily"] = time_handler
156
174
 
157
175
  @classmethod
158
176
  def get_logger(