claude-mpm 5.1.8__py3-none-any.whl → 5.4.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of claude-mpm might be problematic. Click here for more details.

Files changed (191) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/__init__.py +4 -0
  3. claude_mpm/agents/{PM_INSTRUCTIONS_TEACH.md → CLAUDE_MPM_TEACHER_OUTPUT_STYLE.md} +721 -41
  4. claude_mpm/agents/PM_INSTRUCTIONS.md +290 -34
  5. claude_mpm/agents/agent_loader.py +13 -44
  6. claude_mpm/agents/frontmatter_validator.py +68 -0
  7. claude_mpm/agents/templates/circuit-breakers.md +138 -1
  8. claude_mpm/cli/__main__.py +4 -0
  9. claude_mpm/cli/chrome_devtools_installer.py +175 -0
  10. claude_mpm/cli/commands/agent_state_manager.py +8 -17
  11. claude_mpm/cli/commands/agents.py +169 -31
  12. claude_mpm/cli/commands/auto_configure.py +210 -25
  13. claude_mpm/cli/commands/config.py +88 -2
  14. claude_mpm/cli/commands/configure.py +1111 -161
  15. claude_mpm/cli/commands/configure_agent_display.py +15 -6
  16. claude_mpm/cli/commands/mpm_init/core.py +160 -46
  17. claude_mpm/cli/commands/mpm_init/knowledge_extractor.py +481 -0
  18. claude_mpm/cli/commands/mpm_init/prompts.py +280 -0
  19. claude_mpm/cli/commands/skills.py +214 -189
  20. claude_mpm/cli/commands/summarize.py +413 -0
  21. claude_mpm/cli/executor.py +11 -3
  22. claude_mpm/cli/parsers/agents_parser.py +54 -9
  23. claude_mpm/cli/parsers/auto_configure_parser.py +0 -138
  24. claude_mpm/cli/parsers/base_parser.py +5 -0
  25. claude_mpm/cli/parsers/config_parser.py +153 -83
  26. claude_mpm/cli/parsers/skills_parser.py +3 -2
  27. claude_mpm/cli/startup.py +550 -94
  28. claude_mpm/commands/mpm-config.md +265 -0
  29. claude_mpm/commands/mpm-help.md +14 -95
  30. claude_mpm/commands/mpm-organize.md +500 -0
  31. claude_mpm/config/agent_sources.py +27 -0
  32. claude_mpm/core/framework/formatters/content_formatter.py +3 -13
  33. claude_mpm/core/framework/loaders/agent_loader.py +8 -5
  34. claude_mpm/core/framework_loader.py +4 -2
  35. claude_mpm/core/logger.py +13 -0
  36. claude_mpm/core/output_style_manager.py +173 -43
  37. claude_mpm/core/socketio_pool.py +3 -3
  38. claude_mpm/core/unified_agent_registry.py +134 -16
  39. claude_mpm/hooks/claude_hooks/correlation_manager.py +60 -0
  40. claude_mpm/hooks/claude_hooks/event_handlers.py +211 -78
  41. claude_mpm/hooks/claude_hooks/hook_handler.py +6 -0
  42. claude_mpm/hooks/claude_hooks/installer.py +33 -10
  43. claude_mpm/hooks/claude_hooks/memory_integration.py +26 -9
  44. claude_mpm/hooks/claude_hooks/response_tracking.py +2 -3
  45. claude_mpm/hooks/claude_hooks/services/connection_manager.py +4 -0
  46. claude_mpm/hooks/memory_integration_hook.py +46 -1
  47. claude_mpm/init.py +0 -19
  48. claude_mpm/models/agent_definition.py +7 -0
  49. claude_mpm/scripts/claude-hook-handler.sh +58 -18
  50. claude_mpm/scripts/launch_monitor.py +93 -13
  51. claude_mpm/scripts/start_activity_logging.py +0 -0
  52. claude_mpm/services/agents/agent_recommendation_service.py +278 -0
  53. claude_mpm/services/agents/agent_review_service.py +280 -0
  54. claude_mpm/services/agents/deployment/agent_discovery_service.py +2 -3
  55. claude_mpm/services/agents/deployment/agent_template_builder.py +4 -2
  56. claude_mpm/services/agents/deployment/multi_source_deployment_service.py +188 -12
  57. claude_mpm/services/agents/deployment/remote_agent_discovery_service.py +531 -55
  58. claude_mpm/services/agents/git_source_manager.py +34 -0
  59. claude_mpm/services/agents/loading/base_agent_manager.py +1 -13
  60. claude_mpm/services/agents/sources/git_source_sync_service.py +8 -1
  61. claude_mpm/services/agents/toolchain_detector.py +10 -6
  62. claude_mpm/services/analysis/__init__.py +11 -1
  63. claude_mpm/services/analysis/clone_detector.py +1030 -0
  64. claude_mpm/services/command_deployment_service.py +81 -10
  65. claude_mpm/services/event_bus/config.py +3 -1
  66. claude_mpm/services/git/git_operations_service.py +93 -8
  67. claude_mpm/services/monitor/daemon.py +9 -2
  68. claude_mpm/services/monitor/daemon_manager.py +39 -3
  69. claude_mpm/services/monitor/server.py +225 -19
  70. claude_mpm/services/self_upgrade_service.py +120 -12
  71. claude_mpm/services/skills/__init__.py +3 -0
  72. claude_mpm/services/skills/git_skill_source_manager.py +32 -2
  73. claude_mpm/services/skills/selective_skill_deployer.py +704 -0
  74. claude_mpm/services/skills/skill_to_agent_mapper.py +406 -0
  75. claude_mpm/services/skills_deployer.py +126 -9
  76. claude_mpm/services/socketio/event_normalizer.py +15 -1
  77. claude_mpm/services/socketio/server/core.py +160 -21
  78. claude_mpm/services/version_control/git_operations.py +103 -0
  79. claude_mpm/utils/agent_filters.py +17 -44
  80. {claude_mpm-5.1.8.dist-info → claude_mpm-5.4.22.dist-info}/METADATA +47 -84
  81. {claude_mpm-5.1.8.dist-info → claude_mpm-5.4.22.dist-info}/RECORD +86 -176
  82. claude_mpm-5.4.22.dist-info/entry_points.txt +5 -0
  83. claude_mpm-5.4.22.dist-info/licenses/LICENSE +94 -0
  84. claude_mpm-5.4.22.dist-info/licenses/LICENSE-FAQ.md +153 -0
  85. claude_mpm/agents/BASE_AGENT_TEMPLATE.md +0 -292
  86. claude_mpm/agents/BASE_DOCUMENTATION.md +0 -53
  87. claude_mpm/agents/BASE_ENGINEER.md +0 -658
  88. claude_mpm/agents/BASE_OPS.md +0 -219
  89. claude_mpm/agents/BASE_PM.md +0 -480
  90. claude_mpm/agents/BASE_PROMPT_ENGINEER.md +0 -787
  91. claude_mpm/agents/BASE_QA.md +0 -167
  92. claude_mpm/agents/BASE_RESEARCH.md +0 -53
  93. claude_mpm/agents/base_agent.json +0 -31
  94. claude_mpm/agents/base_agent_loader.py +0 -601
  95. claude_mpm/cli/commands/agents_detect.py +0 -380
  96. claude_mpm/cli/commands/agents_recommend.py +0 -309
  97. claude_mpm/cli/ticket_cli.py +0 -35
  98. claude_mpm/commands/mpm-agents-auto-configure.md +0 -278
  99. claude_mpm/commands/mpm-agents-detect.md +0 -177
  100. claude_mpm/commands/mpm-agents-list.md +0 -131
  101. claude_mpm/commands/mpm-agents-recommend.md +0 -223
  102. claude_mpm/commands/mpm-config-view.md +0 -150
  103. claude_mpm/commands/mpm-ticket-organize.md +0 -304
  104. claude_mpm/dashboard/analysis_runner.py +0 -455
  105. claude_mpm/dashboard/index.html +0 -13
  106. claude_mpm/dashboard/open_dashboard.py +0 -66
  107. claude_mpm/dashboard/static/css/activity.css +0 -1958
  108. claude_mpm/dashboard/static/css/connection-status.css +0 -370
  109. claude_mpm/dashboard/static/css/dashboard.css +0 -4701
  110. claude_mpm/dashboard/static/js/components/activity-tree.js +0 -1871
  111. claude_mpm/dashboard/static/js/components/agent-hierarchy.js +0 -777
  112. claude_mpm/dashboard/static/js/components/agent-inference.js +0 -956
  113. claude_mpm/dashboard/static/js/components/build-tracker.js +0 -333
  114. claude_mpm/dashboard/static/js/components/code-simple.js +0 -857
  115. claude_mpm/dashboard/static/js/components/connection-debug.js +0 -654
  116. claude_mpm/dashboard/static/js/components/diff-viewer.js +0 -891
  117. claude_mpm/dashboard/static/js/components/event-processor.js +0 -542
  118. claude_mpm/dashboard/static/js/components/event-viewer.js +0 -1155
  119. claude_mpm/dashboard/static/js/components/export-manager.js +0 -368
  120. claude_mpm/dashboard/static/js/components/file-change-tracker.js +0 -443
  121. claude_mpm/dashboard/static/js/components/file-change-viewer.js +0 -690
  122. claude_mpm/dashboard/static/js/components/file-tool-tracker.js +0 -724
  123. claude_mpm/dashboard/static/js/components/file-viewer.js +0 -580
  124. claude_mpm/dashboard/static/js/components/hud-library-loader.js +0 -211
  125. claude_mpm/dashboard/static/js/components/hud-manager.js +0 -671
  126. claude_mpm/dashboard/static/js/components/hud-visualizer.js +0 -1718
  127. claude_mpm/dashboard/static/js/components/module-viewer.js +0 -2764
  128. claude_mpm/dashboard/static/js/components/session-manager.js +0 -579
  129. claude_mpm/dashboard/static/js/components/socket-manager.js +0 -368
  130. claude_mpm/dashboard/static/js/components/ui-state-manager.js +0 -749
  131. claude_mpm/dashboard/static/js/components/unified-data-viewer.js +0 -1824
  132. claude_mpm/dashboard/static/js/components/working-directory.js +0 -920
  133. claude_mpm/dashboard/static/js/connection-manager.js +0 -536
  134. claude_mpm/dashboard/static/js/dashboard.js +0 -1914
  135. claude_mpm/dashboard/static/js/extension-error-handler.js +0 -164
  136. claude_mpm/dashboard/static/js/socket-client.js +0 -1474
  137. claude_mpm/dashboard/static/js/tab-isolation-fix.js +0 -185
  138. claude_mpm/dashboard/static/socket.io.min.js +0 -7
  139. claude_mpm/dashboard/static/socket.io.v4.8.1.backup.js +0 -7
  140. claude_mpm/dashboard/templates/code_simple.html +0 -153
  141. claude_mpm/dashboard/templates/index.html +0 -606
  142. claude_mpm/dashboard/test_dashboard.html +0 -372
  143. claude_mpm/hooks/claude_hooks/__pycache__/__init__.cpython-313.pyc +0 -0
  144. claude_mpm/hooks/claude_hooks/__pycache__/event_handlers.cpython-313.pyc +0 -0
  145. claude_mpm/hooks/claude_hooks/__pycache__/hook_handler.cpython-313.pyc +0 -0
  146. claude_mpm/hooks/claude_hooks/__pycache__/memory_integration.cpython-313.pyc +0 -0
  147. claude_mpm/hooks/claude_hooks/__pycache__/response_tracking.cpython-313.pyc +0 -0
  148. claude_mpm/hooks/claude_hooks/__pycache__/tool_analysis.cpython-313.pyc +0 -0
  149. claude_mpm/hooks/claude_hooks/services/__pycache__/__init__.cpython-313.pyc +0 -0
  150. claude_mpm/hooks/claude_hooks/services/__pycache__/connection_manager_http.cpython-313.pyc +0 -0
  151. claude_mpm/hooks/claude_hooks/services/__pycache__/duplicate_detector.cpython-313.pyc +0 -0
  152. claude_mpm/hooks/claude_hooks/services/__pycache__/state_manager.cpython-313.pyc +0 -0
  153. claude_mpm/hooks/claude_hooks/services/__pycache__/subagent_processor.cpython-313.pyc +0 -0
  154. claude_mpm/scripts/mcp_server.py +0 -75
  155. claude_mpm/scripts/mcp_wrapper.py +0 -39
  156. claude_mpm/services/mcp_gateway/__init__.py +0 -159
  157. claude_mpm/services/mcp_gateway/auto_configure.py +0 -369
  158. claude_mpm/services/mcp_gateway/config/__init__.py +0 -17
  159. claude_mpm/services/mcp_gateway/config/config_loader.py +0 -296
  160. claude_mpm/services/mcp_gateway/config/config_schema.py +0 -243
  161. claude_mpm/services/mcp_gateway/config/configuration.py +0 -429
  162. claude_mpm/services/mcp_gateway/core/__init__.py +0 -43
  163. claude_mpm/services/mcp_gateway/core/base.py +0 -312
  164. claude_mpm/services/mcp_gateway/core/exceptions.py +0 -253
  165. claude_mpm/services/mcp_gateway/core/interfaces.py +0 -443
  166. claude_mpm/services/mcp_gateway/core/process_pool.py +0 -977
  167. claude_mpm/services/mcp_gateway/core/singleton_manager.py +0 -315
  168. claude_mpm/services/mcp_gateway/core/startup_verification.py +0 -316
  169. claude_mpm/services/mcp_gateway/main.py +0 -589
  170. claude_mpm/services/mcp_gateway/registry/__init__.py +0 -12
  171. claude_mpm/services/mcp_gateway/registry/service_registry.py +0 -412
  172. claude_mpm/services/mcp_gateway/registry/tool_registry.py +0 -489
  173. claude_mpm/services/mcp_gateway/server/__init__.py +0 -15
  174. claude_mpm/services/mcp_gateway/server/mcp_gateway.py +0 -414
  175. claude_mpm/services/mcp_gateway/server/stdio_handler.py +0 -372
  176. claude_mpm/services/mcp_gateway/server/stdio_server.py +0 -712
  177. claude_mpm/services/mcp_gateway/tools/__init__.py +0 -36
  178. claude_mpm/services/mcp_gateway/tools/base_adapter.py +0 -485
  179. claude_mpm/services/mcp_gateway/tools/document_summarizer.py +0 -789
  180. claude_mpm/services/mcp_gateway/tools/external_mcp_services.py +0 -654
  181. claude_mpm/services/mcp_gateway/tools/health_check_tool.py +0 -456
  182. claude_mpm/services/mcp_gateway/tools/hello_world.py +0 -551
  183. claude_mpm/services/mcp_gateway/tools/kuzu_memory_service.py +0 -555
  184. claude_mpm/services/mcp_gateway/utils/__init__.py +0 -14
  185. claude_mpm/services/mcp_gateway/utils/package_version_checker.py +0 -160
  186. claude_mpm/services/mcp_gateway/utils/update_preferences.py +0 -170
  187. claude_mpm-5.1.8.dist-info/entry_points.txt +0 -10
  188. claude_mpm-5.1.8.dist-info/licenses/LICENSE +0 -21
  189. /claude_mpm/agents/{OUTPUT_STYLE.md → CLAUDE_MPM_OUTPUT_STYLE.md} +0 -0
  190. {claude_mpm-5.1.8.dist-info → claude_mpm-5.4.22.dist-info}/WHEEL +0 -0
  191. {claude_mpm-5.1.8.dist-info → claude_mpm-5.4.22.dist-info}/top_level.txt +0 -0
@@ -1,789 +0,0 @@
1
- """
2
- Document Summarizer Tool
3
- ========================
4
-
5
- Intelligent document processing tool for solving Claude Code memory issues.
6
- Supports multiple file formats and summarization strategies.
7
-
8
- Part of ISS-0037: Document Summarizer Tool - Intelligent Document Processing
9
- """
10
-
11
- import hashlib
12
- import mimetypes
13
- import re
14
- from collections import OrderedDict
15
- from datetime import datetime, timezone
16
- from pathlib import Path
17
- from typing import Any, Dict, List, Optional, Tuple
18
-
19
- from claude_mpm.services.mcp_gateway.core.interfaces import (
20
- MCPToolDefinition,
21
- MCPToolInvocation,
22
- MCPToolResult,
23
- )
24
- from claude_mpm.services.mcp_gateway.tools.base_adapter import BaseToolAdapter
25
-
26
-
27
- class LRUCache:
28
- """
29
- Simple LRU cache implementation for document summaries.
30
-
31
- WHY: We need a memory-efficient cache to avoid re-processing documents
32
- that are accessed repeatedly, which is common in Claude Code sessions.
33
- """
34
-
35
- def __init__(self, max_size: int = 100, max_memory_mb: int = 100):
36
- """
37
- Initialize LRU cache.
38
-
39
- Args:
40
- max_size: Maximum number of entries
41
- max_memory_mb: Maximum memory usage in MB
42
- """
43
- self.cache = OrderedDict()
44
- self.max_size = max_size
45
- self.max_memory_bytes = max_memory_mb * 1024 * 1024
46
- self.current_memory = 0
47
- self.hits = 0
48
- self.misses = 0
49
-
50
- def get(self, key: str) -> Optional[Dict[str, Any]]:
51
- """Get item from cache, updating LRU order."""
52
- if key in self.cache:
53
- self.hits += 1
54
- # Move to end (most recently used)
55
- self.cache.move_to_end(key)
56
- return self.cache[key]
57
- self.misses += 1
58
- return None
59
-
60
- def put(self, key: str, value: Dict[str, Any], size_bytes: int) -> None:
61
- """Add item to cache, evicting LRU items if necessary."""
62
- # Remove item if it already exists
63
- if key in self.cache:
64
- old_size = self.cache[key].get("size_bytes", 0)
65
- self.current_memory -= old_size
66
- del self.cache[key]
67
-
68
- # Evict items if necessary
69
- while (
70
- len(self.cache) >= self.max_size
71
- or self.current_memory + size_bytes > self.max_memory_bytes
72
- ):
73
- if not self.cache:
74
- break
75
- # Remove least recently used item
76
- _removed_key, removed_value = self.cache.popitem(last=False)
77
- self.current_memory -= removed_value.get("size_bytes", 0)
78
-
79
- # Add new item
80
- value["size_bytes"] = size_bytes
81
- self.cache[key] = value
82
- self.current_memory += size_bytes
83
-
84
- def get_stats(self) -> Dict[str, Any]:
85
- """Get cache statistics."""
86
- hit_rate = (
87
- self.hits / (self.hits + self.misses)
88
- if (self.hits + self.misses) > 0
89
- else 0
90
- )
91
- return {
92
- "size": len(self.cache),
93
- "memory_mb": self.current_memory / (1024 * 1024),
94
- "hits": self.hits,
95
- "misses": self.misses,
96
- "hit_rate": hit_rate,
97
- }
98
-
99
-
100
- class DocumentSummarizerTool(BaseToolAdapter):
101
- """
102
- Document summarizer tool for intelligent document processing.
103
-
104
- WHY: Claude Code accumulates massive memory from reading full files,
105
- leading to context overflow. This tool reduces document size by 60%+
106
- while preserving essential information through intelligent summarization.
107
-
108
- DESIGN DECISIONS:
109
- - Use sentence boundary detection to preserve readability
110
- - Implement multiple summarization modes for different use cases
111
- - Cache summaries to avoid re-processing frequently accessed files
112
- - Support common file formats used in development
113
- """
114
-
115
- # File size limits (in bytes)
116
- MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB
117
- CHUNK_SIZE = 50000 # Characters per chunk for large files
118
-
119
- # Token estimation (rough approximation)
120
- CHARS_PER_TOKEN = 4 # Approximate for Claude's tokenizer
121
-
122
- def __init__(self):
123
- """Initialize the document summarizer tool."""
124
- definition = MCPToolDefinition(
125
- name="document_summarizer",
126
- description="Intelligently summarizes documents to reduce memory usage while preserving key information",
127
- input_schema={
128
- "type": "object",
129
- "properties": {
130
- "file_path": {
131
- "type": "string",
132
- "description": "Path to the document file",
133
- },
134
- "mode": {
135
- "type": "string",
136
- "enum": ["brief", "detailed", "key_points", "technical"],
137
- "description": "Summarization mode",
138
- "default": "brief",
139
- },
140
- "max_tokens": {
141
- "type": "integer",
142
- "description": "Maximum tokens in summary (optional)",
143
- "minimum": 100,
144
- "maximum": 10000,
145
- },
146
- "max_percentage": {
147
- "type": "integer",
148
- "description": "Maximum percentage of original to keep (1-100)",
149
- "minimum": 1,
150
- "maximum": 100,
151
- "default": 40,
152
- },
153
- "preserve_code": {
154
- "type": "boolean",
155
- "description": "Whether to preserve code blocks intact",
156
- "default": True,
157
- },
158
- "use_cache": {
159
- "type": "boolean",
160
- "description": "Whether to use cached summaries",
161
- "default": True,
162
- },
163
- },
164
- "required": ["file_path"],
165
- },
166
- output_schema={
167
- "type": "object",
168
- "properties": {
169
- "summary": {
170
- "type": "string",
171
- "description": "The summarized content",
172
- },
173
- "original_size": {
174
- "type": "integer",
175
- "description": "Original document size in bytes",
176
- },
177
- "summary_size": {
178
- "type": "integer",
179
- "description": "Summary size in bytes",
180
- },
181
- "reduction_percentage": {
182
- "type": "number",
183
- "description": "Percentage reduction achieved",
184
- },
185
- "token_estimate": {
186
- "type": "object",
187
- "properties": {
188
- "original": {"type": "integer"},
189
- "summary": {"type": "integer"},
190
- "saved": {"type": "integer"},
191
- },
192
- },
193
- "chunks_processed": {
194
- "type": "integer",
195
- "description": "Number of chunks processed for large files",
196
- },
197
- "cache_hit": {
198
- "type": "boolean",
199
- "description": "Whether summary was retrieved from cache",
200
- },
201
- },
202
- },
203
- version="1.0.0",
204
- metadata={
205
- "category": "document_processing",
206
- "supported_formats": [
207
- "txt",
208
- "md",
209
- "pdf",
210
- "docx",
211
- "json",
212
- "yaml",
213
- "csv",
214
- "py",
215
- "js",
216
- "ts",
217
- "java",
218
- "cpp",
219
- "c",
220
- "h",
221
- "hpp",
222
- ],
223
- },
224
- )
225
- super().__init__(definition)
226
-
227
- # Initialize cache
228
- self._cache = LRUCache(max_size=100, max_memory_mb=50)
229
-
230
- # Sentence boundary patterns
231
- self._sentence_endings = re.compile(r"[.!?]\s+")
232
-
233
- # Code block patterns for different formats
234
- self._code_patterns = {
235
- "markdown": re.compile(r"```[\s\S]*?```", re.MULTILINE),
236
- "inline": re.compile(r"`[^`]+`"),
237
- "indent": re.compile(r"^( |\t).*$", re.MULTILINE),
238
- }
239
-
240
- def _get_file_hash(self, file_path: str) -> str:
241
- """Generate hash for file caching."""
242
- stat = Path(file_path).stat()
243
- hash_input = f"{file_path}:{stat.st_size}:{stat.st_mtime}"
244
- return hashlib.md5(hash_input.encode()).hexdigest()
245
-
246
- def _estimate_tokens(self, text: str) -> int:
247
- """Estimate token count for text."""
248
- return len(text) // self.CHARS_PER_TOKEN
249
-
250
- def _validate_file(self, file_path: str) -> Tuple[bool, Optional[str]]:
251
- """
252
- Validate file for processing.
253
-
254
- Returns:
255
- Tuple of (is_valid, error_message)
256
- """
257
- path = Path(file_path)
258
-
259
- # Check if file exists
260
- if not path.exists():
261
- return False, f"File not found: {file_path}"
262
-
263
- # Check if it's a file (not directory)
264
- if not path.is_file():
265
- return False, f"Path is not a file: {file_path}"
266
-
267
- # Check file size
268
- file_size = path.stat().st_size
269
- if file_size > self.MAX_FILE_SIZE:
270
- return (
271
- False,
272
- f"File too large: {file_size} bytes (max: {self.MAX_FILE_SIZE})",
273
- )
274
-
275
- # Check file extension
276
- extension = path.suffix.lower().lstrip(".")
277
- supported = self._definition.metadata.get("supported_formats", [])
278
- if extension and extension not in supported:
279
- # Try to detect by mime type
280
- mime_type, _ = mimetypes.guess_type(file_path)
281
- if not mime_type or not mime_type.startswith("text/"):
282
- return False, f"Unsupported file format: {extension}"
283
-
284
- return True, None
285
-
286
- def _read_file(self, file_path: str) -> str:
287
- """
288
- Read file content with appropriate encoding.
289
-
290
- Args:
291
- file_path: Path to file
292
-
293
- Returns:
294
- File content as string
295
- """
296
- path = Path(file_path)
297
-
298
- # Try different encodings
299
- encodings = ["utf-8", "latin-1", "cp1252"]
300
-
301
- for encoding in encodings:
302
- try:
303
- return path.read_text(encoding=encoding)
304
- except UnicodeDecodeError:
305
- continue
306
-
307
- # If all fail, read as binary and decode with errors='ignore'
308
- with file_path.open("rb") as f:
309
- content = f.read()
310
- return content.decode("utf-8", errors="ignore")
311
-
312
- def _extract_code_blocks(self, text: str) -> Tuple[List[str], str]:
313
- """
314
- Extract code blocks from text for preservation.
315
-
316
- Returns:
317
- Tuple of (code_blocks, text_without_code)
318
- """
319
- code_blocks = []
320
- placeholder_template = "[[CODE_BLOCK_{}]]"
321
-
322
- # Extract markdown code blocks
323
- for match in self._code_patterns["markdown"].finditer(text):
324
- code_blocks.append(match.group(0))
325
- text = text.replace(
326
- match.group(0), placeholder_template.format(len(code_blocks) - 1)
327
- )
328
-
329
- return code_blocks, text
330
-
331
- def _restore_code_blocks(self, text: str, code_blocks: List[str]) -> str:
332
- """Restore code blocks to summarized text."""
333
- for i, block in enumerate(code_blocks):
334
- placeholder = f"[[CODE_BLOCK_{i}]]"
335
- text = text.replace(placeholder, block)
336
- return text
337
-
338
- def _truncate_at_sentence(self, text: str, max_chars: int) -> str:
339
- """
340
- Truncate text at sentence boundary.
341
-
342
- WHY: Truncating mid-sentence makes summaries harder to read and
343
- can lose important context. Sentence boundaries preserve meaning.
344
- """
345
- if len(text) <= max_chars:
346
- return text
347
-
348
- # Find sentence boundaries
349
- sentences = self._sentence_endings.split(text)
350
-
351
- result = []
352
- current_length = 0
353
-
354
- for i, sentence in enumerate(sentences):
355
- # Add sentence ending back if not last sentence
356
- if i < len(sentences) - 1:
357
- sentence += ". "
358
-
359
- if current_length + len(sentence) <= max_chars:
360
- result.append(sentence)
361
- current_length += len(sentence)
362
- else:
363
- # Add partial sentence if we haven't added anything yet
364
- if not result and sentence:
365
- result.append(sentence[: max_chars - 3] + "...")
366
- break
367
-
368
- return "".join(result)
369
-
370
- def _summarize_brief(self, text: str, max_chars: int) -> str:
371
- """
372
- Brief summarization - first and last portions.
373
-
374
- WHY: For quick overview, showing beginning and end gives context
375
- about what the document covers and its conclusions.
376
- """
377
- if len(text) <= max_chars:
378
- return text
379
-
380
- # Split available space between beginning and end
381
- half_chars = max_chars // 2 - 20 # Reserve space for separator
382
-
383
- beginning = self._truncate_at_sentence(text, half_chars)
384
- ending = self._truncate_at_sentence(text[-half_chars * 2 :], half_chars)
385
-
386
- return f"{beginning}\n\n[... content omitted for brevity ...]\n\n{ending}"
387
-
388
- def _summarize_detailed(self, text: str, max_chars: int) -> str:
389
- """
390
- Detailed summarization - extract key paragraphs.
391
-
392
- WHY: For technical documents, we want to preserve more structure
393
- and include middle sections that might contain important details.
394
- """
395
- if len(text) <= max_chars:
396
- return text
397
-
398
- # Split into paragraphs
399
- paragraphs = text.split("\n\n")
400
-
401
- # Calculate importance scores (based on length and position)
402
- scored_paragraphs = []
403
- for i, para in enumerate(paragraphs):
404
- # Skip empty paragraphs
405
- if not para.strip():
406
- continue
407
-
408
- # Score based on position (beginning and end are important)
409
- position_score = 1.0
410
- if i < 3: # First 3 paragraphs
411
- position_score = 2.0
412
- elif i >= len(paragraphs) - 3: # Last 3 paragraphs
413
- position_score = 1.5
414
-
415
- # Score based on content indicators
416
- content_score = 1.0
417
- if any(
418
- keyword in para.lower()
419
- for keyword in ["summary", "conclusion", "important", "note", "warning"]
420
- ):
421
- content_score = 1.5
422
-
423
- score = position_score * content_score * (1 + len(para) / 1000)
424
- scored_paragraphs.append((score, i, para))
425
-
426
- # Sort by score and select top paragraphs
427
- scored_paragraphs.sort(reverse=True)
428
-
429
- selected = []
430
- current_length = 0
431
-
432
- for score, original_index, para in scored_paragraphs:
433
- truncated_para = self._truncate_at_sentence(
434
- para, max_chars - current_length
435
- )
436
- if current_length + len(truncated_para) <= max_chars:
437
- selected.append((original_index, truncated_para))
438
- current_length += len(truncated_para) + 2 # Account for newlines
439
-
440
- if current_length >= max_chars * 0.9: # Stop at 90% to leave some buffer
441
- break
442
-
443
- # Sort selected paragraphs by original order
444
- selected.sort()
445
-
446
- return "\n\n".join(para for _, para in selected)
447
-
448
- def _summarize_key_points(self, text: str, max_chars: int) -> str:
449
- """
450
- Extract key points and bullet points.
451
-
452
- WHY: Many documents have lists, bullet points, or numbered items
453
- that contain the most important information in condensed form.
454
- """
455
- if len(text) <= max_chars:
456
- return text
457
-
458
- lines = text.split("\n")
459
- key_lines = []
460
-
461
- # Patterns for identifying key points
462
- list_patterns = [
463
- re.compile(r"^\s*[-*•]\s+"), # Bullet points
464
- re.compile(r"^\s*\d+[.)]\s+"), # Numbered lists
465
- re.compile(r"^\s*[A-Z][.)]\s+"), # Letter lists
466
- re.compile(r"^#+\s+"), # Markdown headers
467
- re.compile(r"^[A-Z][^.!?]*:"), # Definition lists
468
- ]
469
-
470
- # Extract lines that match key point patterns
471
- for line in lines:
472
- if any(pattern.match(line) for pattern in list_patterns):
473
- key_lines.append(line)
474
-
475
- # If we found key points, use them
476
- if key_lines:
477
- result = "\n".join(key_lines)
478
- if len(result) <= max_chars:
479
- return result
480
- return self._truncate_at_sentence(result, max_chars)
481
-
482
- # Fallback to brief summary if no key points found
483
- return self._summarize_brief(text, max_chars)
484
-
485
- def _summarize_technical(
486
- self, text: str, max_chars: int, preserve_code: bool
487
- ) -> str:
488
- """
489
- Technical summarization - preserve code and technical details.
490
-
491
- WHY: For code files and technical documentation, we need to
492
- preserve function signatures, class definitions, and important code.
493
- """
494
- if len(text) <= max_chars:
495
- return text
496
-
497
- # Extract and preserve code blocks if requested
498
- code_blocks = []
499
- text_without_code = text
500
-
501
- if preserve_code:
502
- code_blocks, text_without_code = self._extract_code_blocks(text)
503
-
504
- # Extract technical patterns
505
- tech_patterns = [
506
- re.compile(
507
- r"^(class|def|function|interface|struct)\s+\w+.*$", re.MULTILINE
508
- ), # Definitions
509
- re.compile(
510
- r"^(import|from|require|include|using)\s+.*$", re.MULTILINE
511
- ), # Imports
512
- re.compile(r"^\s*@\w+.*$", re.MULTILINE), # Decorators/Annotations
513
- re.compile(
514
- r"^(public|private|protected|static).*\{?$", re.MULTILINE
515
- ), # Method signatures
516
- ]
517
-
518
- important_lines = []
519
- for pattern in tech_patterns:
520
- important_lines.extend(pattern.findall(text_without_code))
521
-
522
- # Build technical summary
523
- result_parts = []
524
-
525
- # Add imports/includes first
526
- imports = [
527
- line
528
- for line in important_lines
529
- if any(
530
- keyword in line
531
- for keyword in ["import", "from", "require", "include", "using"]
532
- )
533
- ]
534
- if imports:
535
- result_parts.append("# Imports/Dependencies\n" + "\n".join(imports[:10]))
536
-
537
- # Add class/function definitions
538
- definitions = [
539
- line
540
- for line in important_lines
541
- if any(
542
- keyword in line
543
- for keyword in ["class", "def", "function", "interface", "struct"]
544
- )
545
- ]
546
- if definitions:
547
- result_parts.append("# Key Definitions\n" + "\n".join(definitions[:20]))
548
-
549
- # Add some code blocks if space allows
550
- if preserve_code and code_blocks:
551
- result_parts.append("# Code Samples")
552
- for _i, block in enumerate(code_blocks[:3]): # Limit to first 3 blocks
553
- if len("\n".join(result_parts)) + len(block) < max_chars * 0.8:
554
- result_parts.append(block)
555
-
556
- result = "\n\n".join(result_parts)
557
-
558
- # If still too long, truncate
559
- if len(result) > max_chars:
560
- result = self._truncate_at_sentence(result, max_chars)
561
-
562
- return result
563
-
564
- def _process_chunks(
565
- self, text: str, mode: str, max_chars_per_chunk: int, preserve_code: bool
566
- ) -> str:
567
- """
568
- Process large documents in chunks.
569
-
570
- WHY: Very large documents need to be processed in chunks to
571
- avoid memory issues and maintain performance.
572
- """
573
- chunks = []
574
- chunk_size = self.CHUNK_SIZE
575
-
576
- for i in range(0, len(text), chunk_size):
577
- chunk = text[i : i + chunk_size]
578
-
579
- # Summarize chunk based on mode
580
- if mode == "brief":
581
- summarized = self._summarize_brief(chunk, max_chars_per_chunk)
582
- elif mode == "detailed":
583
- summarized = self._summarize_detailed(chunk, max_chars_per_chunk)
584
- elif mode == "key_points":
585
- summarized = self._summarize_key_points(chunk, max_chars_per_chunk)
586
- elif mode == "technical":
587
- summarized = self._summarize_technical(
588
- chunk, max_chars_per_chunk, preserve_code
589
- )
590
- else:
591
- summarized = self._summarize_brief(chunk, max_chars_per_chunk)
592
-
593
- chunks.append(summarized)
594
-
595
- return "\n\n[--- Next Section ---]\n\n".join(chunks)
596
-
597
- async def invoke(self, invocation: MCPToolInvocation) -> MCPToolResult:
598
- """
599
- Invoke the document summarizer tool.
600
-
601
- Args:
602
- invocation: Tool invocation request
603
-
604
- Returns:
605
- Tool execution result with summary
606
- """
607
- start_time = datetime.now(timezone.utc)
608
-
609
- try:
610
- # Get parameters
611
- file_path = invocation.parameters["file_path"]
612
- mode = invocation.parameters.get("mode", "brief")
613
- max_tokens = invocation.parameters.get("max_tokens")
614
- max_percentage = invocation.parameters.get("max_percentage", 40)
615
- preserve_code = invocation.parameters.get("preserve_code", True)
616
- use_cache = invocation.parameters.get("use_cache", True)
617
-
618
- # Validate file
619
- is_valid, error_msg = self._validate_file(file_path)
620
- if not is_valid:
621
- raise ValueError(error_msg)
622
-
623
- # Check cache if enabled
624
- cache_hit = False
625
- if use_cache:
626
- cache_key = f"{self._get_file_hash(file_path)}:{mode}:{max_percentage}"
627
- cached_result = self._cache.get(cache_key)
628
- if cached_result:
629
- cache_hit = True
630
- execution_time = (
631
- datetime.now(timezone.utc) - start_time
632
- ).total_seconds()
633
- self._update_metrics(True, execution_time)
634
-
635
- return MCPToolResult(
636
- success=True,
637
- data={
638
- **cached_result,
639
- "cache_hit": True,
640
- "cache_stats": self._cache.get_stats(),
641
- },
642
- execution_time=execution_time,
643
- metadata={
644
- "tool": "document_summarizer",
645
- "mode": mode,
646
- "cached": True,
647
- },
648
- )
649
-
650
- # Read file content
651
- content = self._read_file(file_path)
652
- original_size = len(content)
653
-
654
- # Calculate target size
655
- if max_tokens:
656
- max_chars = max_tokens * self.CHARS_PER_TOKEN
657
- else:
658
- max_chars = int(original_size * (max_percentage / 100))
659
-
660
- # Process based on file size
661
- chunks_processed = 1
662
- if original_size > self.CHUNK_SIZE:
663
- # Process in chunks for large files
664
- chunks_processed = (original_size // self.CHUNK_SIZE) + 1
665
- max_chars_per_chunk = max_chars // chunks_processed
666
- summary = self._process_chunks(
667
- content, mode, max_chars_per_chunk, preserve_code
668
- )
669
- # Process entire file
670
- elif mode == "brief":
671
- summary = self._summarize_brief(content, max_chars)
672
- elif mode == "detailed":
673
- summary = self._summarize_detailed(content, max_chars)
674
- elif mode == "key_points":
675
- summary = self._summarize_key_points(content, max_chars)
676
- elif mode == "technical":
677
- summary = self._summarize_technical(content, max_chars, preserve_code)
678
- else:
679
- summary = self._summarize_brief(content, max_chars)
680
-
681
- # Calculate metrics
682
- summary_size = len(summary)
683
- reduction_percentage = (
684
- (original_size - summary_size) / original_size
685
- ) * 100
686
-
687
- # Token estimates
688
- original_tokens = self._estimate_tokens(content)
689
- summary_tokens = self._estimate_tokens(summary)
690
- saved_tokens = original_tokens - summary_tokens
691
-
692
- # Prepare result
693
- result = {
694
- "summary": summary,
695
- "original_size": original_size,
696
- "summary_size": summary_size,
697
- "reduction_percentage": round(reduction_percentage, 2),
698
- "token_estimate": {
699
- "original": original_tokens,
700
- "summary": summary_tokens,
701
- "saved": saved_tokens,
702
- },
703
- "chunks_processed": chunks_processed,
704
- "cache_hit": cache_hit,
705
- }
706
-
707
- # Cache result if enabled
708
- if use_cache and not cache_hit:
709
- cache_key = f"{self._get_file_hash(file_path)}:{mode}:{max_percentage}"
710
- self._cache.put(cache_key, result.copy(), summary_size)
711
-
712
- # Calculate execution time
713
- execution_time = (datetime.now(timezone.utc) - start_time).total_seconds()
714
-
715
- # Update metrics
716
- self._update_metrics(True, execution_time)
717
-
718
- # Add cache stats to result
719
- result["cache_stats"] = self._cache.get_stats()
720
-
721
- return MCPToolResult(
722
- success=True,
723
- data=result,
724
- execution_time=execution_time,
725
- metadata={
726
- "tool": "document_summarizer",
727
- "mode": mode,
728
- "file_path": file_path,
729
- "reduction_achieved": reduction_percentage >= 60,
730
- },
731
- )
732
-
733
- except Exception as e:
734
- execution_time = (datetime.now(timezone.utc) - start_time).total_seconds()
735
- self._update_metrics(False, execution_time)
736
- self._metrics["last_error"] = str(e)
737
-
738
- self.log_error(f"Document summarizer failed: {e}")
739
-
740
- return MCPToolResult(
741
- success=False,
742
- error=f"Document summarizer failed: {e!s}",
743
- execution_time=execution_time,
744
- metadata={
745
- "tool": "document_summarizer",
746
- "error_type": type(e).__name__,
747
- },
748
- )
749
-
750
- async def initialize(self) -> bool:
751
- """
752
- Initialize the document summarizer tool.
753
-
754
- Returns:
755
- True if initialization successful
756
- """
757
- try:
758
- self.log_info("Initializing document summarizer tool")
759
-
760
- # Clear cache on initialization
761
- self._cache = LRUCache(max_size=100, max_memory_mb=50)
762
-
763
- self._initialized = True
764
- self.log_info("Document summarizer tool initialized successfully")
765
- return True
766
-
767
- except Exception as e:
768
- self.log_error(f"Failed to initialize document summarizer: {e}")
769
- return False
770
-
771
- async def shutdown(self) -> None:
772
- """
773
- Shutdown the document summarizer tool and clean up resources.
774
- """
775
- try:
776
- self.log_info("Shutting down document summarizer tool")
777
-
778
- # Log final cache stats
779
- cache_stats = self._cache.get_stats()
780
- self.log_info(f"Final cache stats: {cache_stats}")
781
-
782
- # Clear cache
783
- self._cache = None
784
-
785
- self._initialized = False
786
- self.log_info("Document summarizer tool shutdown complete")
787
-
788
- except Exception as e:
789
- self.log_error(f"Error during document summarizer shutdown: {e}")