claude-mpm 5.1.9__py3-none-any.whl → 5.4.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of claude-mpm might be problematic. Click here for more details.

Files changed (176) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/__init__.py +4 -0
  3. claude_mpm/agents/CLAUDE_MPM_TEACHER_OUTPUT_STYLE.md +1 -1
  4. claude_mpm/agents/PM_INSTRUCTIONS.md +290 -34
  5. claude_mpm/agents/agent_loader.py +13 -44
  6. claude_mpm/agents/templates/circuit-breakers.md +138 -1
  7. claude_mpm/cli/__main__.py +4 -0
  8. claude_mpm/cli/chrome_devtools_installer.py +175 -0
  9. claude_mpm/cli/commands/agent_state_manager.py +8 -17
  10. claude_mpm/cli/commands/agents.py +0 -31
  11. claude_mpm/cli/commands/auto_configure.py +210 -25
  12. claude_mpm/cli/commands/config.py +88 -2
  13. claude_mpm/cli/commands/configure.py +1097 -158
  14. claude_mpm/cli/commands/configure_agent_display.py +15 -6
  15. claude_mpm/cli/commands/mpm_init/core.py +160 -46
  16. claude_mpm/cli/commands/mpm_init/knowledge_extractor.py +481 -0
  17. claude_mpm/cli/commands/mpm_init/prompts.py +280 -0
  18. claude_mpm/cli/commands/skills.py +214 -189
  19. claude_mpm/cli/commands/summarize.py +413 -0
  20. claude_mpm/cli/executor.py +11 -3
  21. claude_mpm/cli/parsers/agents_parser.py +0 -9
  22. claude_mpm/cli/parsers/auto_configure_parser.py +0 -138
  23. claude_mpm/cli/parsers/base_parser.py +5 -0
  24. claude_mpm/cli/parsers/config_parser.py +153 -83
  25. claude_mpm/cli/parsers/skills_parser.py +3 -2
  26. claude_mpm/cli/startup.py +550 -94
  27. claude_mpm/commands/mpm-config.md +265 -0
  28. claude_mpm/commands/mpm-help.md +14 -95
  29. claude_mpm/commands/mpm-organize.md +500 -0
  30. claude_mpm/config/agent_sources.py +27 -0
  31. claude_mpm/core/framework/formatters/content_formatter.py +3 -13
  32. claude_mpm/core/framework/loaders/agent_loader.py +8 -5
  33. claude_mpm/core/framework_loader.py +4 -2
  34. claude_mpm/core/logger.py +13 -0
  35. claude_mpm/core/socketio_pool.py +3 -3
  36. claude_mpm/core/unified_agent_registry.py +5 -15
  37. claude_mpm/hooks/claude_hooks/correlation_manager.py +60 -0
  38. claude_mpm/hooks/claude_hooks/event_handlers.py +211 -78
  39. claude_mpm/hooks/claude_hooks/hook_handler.py +6 -0
  40. claude_mpm/hooks/claude_hooks/installer.py +33 -10
  41. claude_mpm/hooks/claude_hooks/memory_integration.py +26 -9
  42. claude_mpm/hooks/claude_hooks/response_tracking.py +2 -3
  43. claude_mpm/hooks/claude_hooks/services/connection_manager.py +4 -0
  44. claude_mpm/hooks/memory_integration_hook.py +46 -1
  45. claude_mpm/init.py +0 -19
  46. claude_mpm/scripts/claude-hook-handler.sh +58 -18
  47. claude_mpm/scripts/launch_monitor.py +93 -13
  48. claude_mpm/scripts/start_activity_logging.py +0 -0
  49. claude_mpm/services/agents/agent_recommendation_service.py +278 -0
  50. claude_mpm/services/agents/agent_review_service.py +280 -0
  51. claude_mpm/services/agents/deployment/agent_discovery_service.py +2 -3
  52. claude_mpm/services/agents/deployment/agent_template_builder.py +4 -2
  53. claude_mpm/services/agents/deployment/multi_source_deployment_service.py +78 -9
  54. claude_mpm/services/agents/deployment/remote_agent_discovery_service.py +335 -53
  55. claude_mpm/services/agents/git_source_manager.py +34 -0
  56. claude_mpm/services/agents/loading/base_agent_manager.py +1 -13
  57. claude_mpm/services/agents/sources/git_source_sync_service.py +8 -1
  58. claude_mpm/services/agents/toolchain_detector.py +10 -6
  59. claude_mpm/services/analysis/__init__.py +11 -1
  60. claude_mpm/services/analysis/clone_detector.py +1030 -0
  61. claude_mpm/services/command_deployment_service.py +81 -10
  62. claude_mpm/services/event_bus/config.py +3 -1
  63. claude_mpm/services/git/git_operations_service.py +93 -8
  64. claude_mpm/services/monitor/daemon.py +9 -2
  65. claude_mpm/services/monitor/daemon_manager.py +39 -3
  66. claude_mpm/services/monitor/server.py +225 -19
  67. claude_mpm/services/self_upgrade_service.py +120 -12
  68. claude_mpm/services/skills/__init__.py +3 -0
  69. claude_mpm/services/skills/git_skill_source_manager.py +32 -2
  70. claude_mpm/services/skills/selective_skill_deployer.py +704 -0
  71. claude_mpm/services/skills/skill_to_agent_mapper.py +406 -0
  72. claude_mpm/services/skills_deployer.py +126 -9
  73. claude_mpm/services/socketio/event_normalizer.py +15 -1
  74. claude_mpm/services/socketio/server/core.py +160 -21
  75. claude_mpm/services/version_control/git_operations.py +103 -0
  76. claude_mpm/utils/agent_filters.py +17 -44
  77. {claude_mpm-5.1.9.dist-info → claude_mpm-5.4.22.dist-info}/METADATA +47 -84
  78. {claude_mpm-5.1.9.dist-info → claude_mpm-5.4.22.dist-info}/RECORD +82 -161
  79. claude_mpm-5.4.22.dist-info/entry_points.txt +5 -0
  80. claude_mpm-5.4.22.dist-info/licenses/LICENSE +94 -0
  81. claude_mpm-5.4.22.dist-info/licenses/LICENSE-FAQ.md +153 -0
  82. claude_mpm/agents/BASE_AGENT_TEMPLATE.md +0 -292
  83. claude_mpm/agents/BASE_DOCUMENTATION.md +0 -53
  84. claude_mpm/agents/BASE_ENGINEER.md +0 -658
  85. claude_mpm/agents/BASE_OPS.md +0 -219
  86. claude_mpm/agents/BASE_PM.md +0 -480
  87. claude_mpm/agents/BASE_PROMPT_ENGINEER.md +0 -787
  88. claude_mpm/agents/BASE_QA.md +0 -167
  89. claude_mpm/agents/BASE_RESEARCH.md +0 -53
  90. claude_mpm/agents/base_agent.json +0 -31
  91. claude_mpm/agents/base_agent_loader.py +0 -601
  92. claude_mpm/cli/commands/agents_detect.py +0 -380
  93. claude_mpm/cli/commands/agents_recommend.py +0 -309
  94. claude_mpm/cli/ticket_cli.py +0 -35
  95. claude_mpm/commands/mpm-agents-auto-configure.md +0 -278
  96. claude_mpm/commands/mpm-agents-detect.md +0 -177
  97. claude_mpm/commands/mpm-agents-list.md +0 -131
  98. claude_mpm/commands/mpm-agents-recommend.md +0 -223
  99. claude_mpm/commands/mpm-config-view.md +0 -150
  100. claude_mpm/commands/mpm-ticket-organize.md +0 -304
  101. claude_mpm/dashboard/analysis_runner.py +0 -455
  102. claude_mpm/dashboard/index.html +0 -13
  103. claude_mpm/dashboard/open_dashboard.py +0 -66
  104. claude_mpm/dashboard/static/css/activity.css +0 -1958
  105. claude_mpm/dashboard/static/css/connection-status.css +0 -370
  106. claude_mpm/dashboard/static/css/dashboard.css +0 -4701
  107. claude_mpm/dashboard/static/js/components/activity-tree.js +0 -1871
  108. claude_mpm/dashboard/static/js/components/agent-hierarchy.js +0 -777
  109. claude_mpm/dashboard/static/js/components/agent-inference.js +0 -956
  110. claude_mpm/dashboard/static/js/components/build-tracker.js +0 -333
  111. claude_mpm/dashboard/static/js/components/code-simple.js +0 -857
  112. claude_mpm/dashboard/static/js/components/connection-debug.js +0 -654
  113. claude_mpm/dashboard/static/js/components/diff-viewer.js +0 -891
  114. claude_mpm/dashboard/static/js/components/event-processor.js +0 -542
  115. claude_mpm/dashboard/static/js/components/event-viewer.js +0 -1155
  116. claude_mpm/dashboard/static/js/components/export-manager.js +0 -368
  117. claude_mpm/dashboard/static/js/components/file-change-tracker.js +0 -443
  118. claude_mpm/dashboard/static/js/components/file-change-viewer.js +0 -690
  119. claude_mpm/dashboard/static/js/components/file-tool-tracker.js +0 -724
  120. claude_mpm/dashboard/static/js/components/file-viewer.js +0 -580
  121. claude_mpm/dashboard/static/js/components/hud-library-loader.js +0 -211
  122. claude_mpm/dashboard/static/js/components/hud-manager.js +0 -671
  123. claude_mpm/dashboard/static/js/components/hud-visualizer.js +0 -1718
  124. claude_mpm/dashboard/static/js/components/module-viewer.js +0 -2764
  125. claude_mpm/dashboard/static/js/components/session-manager.js +0 -579
  126. claude_mpm/dashboard/static/js/components/socket-manager.js +0 -368
  127. claude_mpm/dashboard/static/js/components/ui-state-manager.js +0 -749
  128. claude_mpm/dashboard/static/js/components/unified-data-viewer.js +0 -1824
  129. claude_mpm/dashboard/static/js/components/working-directory.js +0 -920
  130. claude_mpm/dashboard/static/js/connection-manager.js +0 -536
  131. claude_mpm/dashboard/static/js/dashboard.js +0 -1914
  132. claude_mpm/dashboard/static/js/extension-error-handler.js +0 -164
  133. claude_mpm/dashboard/static/js/socket-client.js +0 -1474
  134. claude_mpm/dashboard/static/js/tab-isolation-fix.js +0 -185
  135. claude_mpm/dashboard/static/socket.io.min.js +0 -7
  136. claude_mpm/dashboard/static/socket.io.v4.8.1.backup.js +0 -7
  137. claude_mpm/dashboard/templates/code_simple.html +0 -153
  138. claude_mpm/dashboard/templates/index.html +0 -606
  139. claude_mpm/dashboard/test_dashboard.html +0 -372
  140. claude_mpm/scripts/mcp_server.py +0 -75
  141. claude_mpm/scripts/mcp_wrapper.py +0 -39
  142. claude_mpm/services/mcp_gateway/__init__.py +0 -159
  143. claude_mpm/services/mcp_gateway/auto_configure.py +0 -369
  144. claude_mpm/services/mcp_gateway/config/__init__.py +0 -17
  145. claude_mpm/services/mcp_gateway/config/config_loader.py +0 -296
  146. claude_mpm/services/mcp_gateway/config/config_schema.py +0 -243
  147. claude_mpm/services/mcp_gateway/config/configuration.py +0 -429
  148. claude_mpm/services/mcp_gateway/core/__init__.py +0 -43
  149. claude_mpm/services/mcp_gateway/core/base.py +0 -312
  150. claude_mpm/services/mcp_gateway/core/exceptions.py +0 -253
  151. claude_mpm/services/mcp_gateway/core/interfaces.py +0 -443
  152. claude_mpm/services/mcp_gateway/core/process_pool.py +0 -977
  153. claude_mpm/services/mcp_gateway/core/singleton_manager.py +0 -315
  154. claude_mpm/services/mcp_gateway/core/startup_verification.py +0 -316
  155. claude_mpm/services/mcp_gateway/main.py +0 -589
  156. claude_mpm/services/mcp_gateway/registry/__init__.py +0 -12
  157. claude_mpm/services/mcp_gateway/registry/service_registry.py +0 -412
  158. claude_mpm/services/mcp_gateway/registry/tool_registry.py +0 -489
  159. claude_mpm/services/mcp_gateway/server/__init__.py +0 -15
  160. claude_mpm/services/mcp_gateway/server/mcp_gateway.py +0 -414
  161. claude_mpm/services/mcp_gateway/server/stdio_handler.py +0 -372
  162. claude_mpm/services/mcp_gateway/server/stdio_server.py +0 -712
  163. claude_mpm/services/mcp_gateway/tools/__init__.py +0 -36
  164. claude_mpm/services/mcp_gateway/tools/base_adapter.py +0 -485
  165. claude_mpm/services/mcp_gateway/tools/document_summarizer.py +0 -789
  166. claude_mpm/services/mcp_gateway/tools/external_mcp_services.py +0 -654
  167. claude_mpm/services/mcp_gateway/tools/health_check_tool.py +0 -456
  168. claude_mpm/services/mcp_gateway/tools/hello_world.py +0 -551
  169. claude_mpm/services/mcp_gateway/tools/kuzu_memory_service.py +0 -555
  170. claude_mpm/services/mcp_gateway/utils/__init__.py +0 -14
  171. claude_mpm/services/mcp_gateway/utils/package_version_checker.py +0 -160
  172. claude_mpm/services/mcp_gateway/utils/update_preferences.py +0 -170
  173. claude_mpm-5.1.9.dist-info/entry_points.txt +0 -10
  174. claude_mpm-5.1.9.dist-info/licenses/LICENSE +0 -21
  175. {claude_mpm-5.1.9.dist-info → claude_mpm-5.4.22.dist-info}/WHEEL +0 -0
  176. {claude_mpm-5.1.9.dist-info → claude_mpm-5.4.22.dist-info}/top_level.txt +0 -0
@@ -1,789 +0,0 @@
1
- """
2
- Document Summarizer Tool
3
- ========================
4
-
5
- Intelligent document processing tool for solving Claude Code memory issues.
6
- Supports multiple file formats and summarization strategies.
7
-
8
- Part of ISS-0037: Document Summarizer Tool - Intelligent Document Processing
9
- """
10
-
11
- import hashlib
12
- import mimetypes
13
- import re
14
- from collections import OrderedDict
15
- from datetime import datetime, timezone
16
- from pathlib import Path
17
- from typing import Any, Dict, List, Optional, Tuple
18
-
19
- from claude_mpm.services.mcp_gateway.core.interfaces import (
20
- MCPToolDefinition,
21
- MCPToolInvocation,
22
- MCPToolResult,
23
- )
24
- from claude_mpm.services.mcp_gateway.tools.base_adapter import BaseToolAdapter
25
-
26
-
27
- class LRUCache:
28
- """
29
- Simple LRU cache implementation for document summaries.
30
-
31
- WHY: We need a memory-efficient cache to avoid re-processing documents
32
- that are accessed repeatedly, which is common in Claude Code sessions.
33
- """
34
-
35
- def __init__(self, max_size: int = 100, max_memory_mb: int = 100):
36
- """
37
- Initialize LRU cache.
38
-
39
- Args:
40
- max_size: Maximum number of entries
41
- max_memory_mb: Maximum memory usage in MB
42
- """
43
- self.cache = OrderedDict()
44
- self.max_size = max_size
45
- self.max_memory_bytes = max_memory_mb * 1024 * 1024
46
- self.current_memory = 0
47
- self.hits = 0
48
- self.misses = 0
49
-
50
- def get(self, key: str) -> Optional[Dict[str, Any]]:
51
- """Get item from cache, updating LRU order."""
52
- if key in self.cache:
53
- self.hits += 1
54
- # Move to end (most recently used)
55
- self.cache.move_to_end(key)
56
- return self.cache[key]
57
- self.misses += 1
58
- return None
59
-
60
- def put(self, key: str, value: Dict[str, Any], size_bytes: int) -> None:
61
- """Add item to cache, evicting LRU items if necessary."""
62
- # Remove item if it already exists
63
- if key in self.cache:
64
- old_size = self.cache[key].get("size_bytes", 0)
65
- self.current_memory -= old_size
66
- del self.cache[key]
67
-
68
- # Evict items if necessary
69
- while (
70
- len(self.cache) >= self.max_size
71
- or self.current_memory + size_bytes > self.max_memory_bytes
72
- ):
73
- if not self.cache:
74
- break
75
- # Remove least recently used item
76
- _removed_key, removed_value = self.cache.popitem(last=False)
77
- self.current_memory -= removed_value.get("size_bytes", 0)
78
-
79
- # Add new item
80
- value["size_bytes"] = size_bytes
81
- self.cache[key] = value
82
- self.current_memory += size_bytes
83
-
84
- def get_stats(self) -> Dict[str, Any]:
85
- """Get cache statistics."""
86
- hit_rate = (
87
- self.hits / (self.hits + self.misses)
88
- if (self.hits + self.misses) > 0
89
- else 0
90
- )
91
- return {
92
- "size": len(self.cache),
93
- "memory_mb": self.current_memory / (1024 * 1024),
94
- "hits": self.hits,
95
- "misses": self.misses,
96
- "hit_rate": hit_rate,
97
- }
98
-
99
-
100
- class DocumentSummarizerTool(BaseToolAdapter):
101
- """
102
- Document summarizer tool for intelligent document processing.
103
-
104
- WHY: Claude Code accumulates massive memory from reading full files,
105
- leading to context overflow. This tool reduces document size by 60%+
106
- while preserving essential information through intelligent summarization.
107
-
108
- DESIGN DECISIONS:
109
- - Use sentence boundary detection to preserve readability
110
- - Implement multiple summarization modes for different use cases
111
- - Cache summaries to avoid re-processing frequently accessed files
112
- - Support common file formats used in development
113
- """
114
-
115
- # File size limits (in bytes)
116
- MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB
117
- CHUNK_SIZE = 50000 # Characters per chunk for large files
118
-
119
- # Token estimation (rough approximation)
120
- CHARS_PER_TOKEN = 4 # Approximate for Claude's tokenizer
121
-
122
- def __init__(self):
123
- """Initialize the document summarizer tool."""
124
- definition = MCPToolDefinition(
125
- name="document_summarizer",
126
- description="Intelligently summarizes documents to reduce memory usage while preserving key information",
127
- input_schema={
128
- "type": "object",
129
- "properties": {
130
- "file_path": {
131
- "type": "string",
132
- "description": "Path to the document file",
133
- },
134
- "mode": {
135
- "type": "string",
136
- "enum": ["brief", "detailed", "key_points", "technical"],
137
- "description": "Summarization mode",
138
- "default": "brief",
139
- },
140
- "max_tokens": {
141
- "type": "integer",
142
- "description": "Maximum tokens in summary (optional)",
143
- "minimum": 100,
144
- "maximum": 10000,
145
- },
146
- "max_percentage": {
147
- "type": "integer",
148
- "description": "Maximum percentage of original to keep (1-100)",
149
- "minimum": 1,
150
- "maximum": 100,
151
- "default": 40,
152
- },
153
- "preserve_code": {
154
- "type": "boolean",
155
- "description": "Whether to preserve code blocks intact",
156
- "default": True,
157
- },
158
- "use_cache": {
159
- "type": "boolean",
160
- "description": "Whether to use cached summaries",
161
- "default": True,
162
- },
163
- },
164
- "required": ["file_path"],
165
- },
166
- output_schema={
167
- "type": "object",
168
- "properties": {
169
- "summary": {
170
- "type": "string",
171
- "description": "The summarized content",
172
- },
173
- "original_size": {
174
- "type": "integer",
175
- "description": "Original document size in bytes",
176
- },
177
- "summary_size": {
178
- "type": "integer",
179
- "description": "Summary size in bytes",
180
- },
181
- "reduction_percentage": {
182
- "type": "number",
183
- "description": "Percentage reduction achieved",
184
- },
185
- "token_estimate": {
186
- "type": "object",
187
- "properties": {
188
- "original": {"type": "integer"},
189
- "summary": {"type": "integer"},
190
- "saved": {"type": "integer"},
191
- },
192
- },
193
- "chunks_processed": {
194
- "type": "integer",
195
- "description": "Number of chunks processed for large files",
196
- },
197
- "cache_hit": {
198
- "type": "boolean",
199
- "description": "Whether summary was retrieved from cache",
200
- },
201
- },
202
- },
203
- version="1.0.0",
204
- metadata={
205
- "category": "document_processing",
206
- "supported_formats": [
207
- "txt",
208
- "md",
209
- "pdf",
210
- "docx",
211
- "json",
212
- "yaml",
213
- "csv",
214
- "py",
215
- "js",
216
- "ts",
217
- "java",
218
- "cpp",
219
- "c",
220
- "h",
221
- "hpp",
222
- ],
223
- },
224
- )
225
- super().__init__(definition)
226
-
227
- # Initialize cache
228
- self._cache = LRUCache(max_size=100, max_memory_mb=50)
229
-
230
- # Sentence boundary patterns
231
- self._sentence_endings = re.compile(r"[.!?]\s+")
232
-
233
- # Code block patterns for different formats
234
- self._code_patterns = {
235
- "markdown": re.compile(r"```[\s\S]*?```", re.MULTILINE),
236
- "inline": re.compile(r"`[^`]+`"),
237
- "indent": re.compile(r"^( |\t).*$", re.MULTILINE),
238
- }
239
-
240
- def _get_file_hash(self, file_path: str) -> str:
241
- """Generate hash for file caching."""
242
- stat = Path(file_path).stat()
243
- hash_input = f"{file_path}:{stat.st_size}:{stat.st_mtime}"
244
- return hashlib.md5(hash_input.encode()).hexdigest()
245
-
246
- def _estimate_tokens(self, text: str) -> int:
247
- """Estimate token count for text."""
248
- return len(text) // self.CHARS_PER_TOKEN
249
-
250
- def _validate_file(self, file_path: str) -> Tuple[bool, Optional[str]]:
251
- """
252
- Validate file for processing.
253
-
254
- Returns:
255
- Tuple of (is_valid, error_message)
256
- """
257
- path = Path(file_path)
258
-
259
- # Check if file exists
260
- if not path.exists():
261
- return False, f"File not found: {file_path}"
262
-
263
- # Check if it's a file (not directory)
264
- if not path.is_file():
265
- return False, f"Path is not a file: {file_path}"
266
-
267
- # Check file size
268
- file_size = path.stat().st_size
269
- if file_size > self.MAX_FILE_SIZE:
270
- return (
271
- False,
272
- f"File too large: {file_size} bytes (max: {self.MAX_FILE_SIZE})",
273
- )
274
-
275
- # Check file extension
276
- extension = path.suffix.lower().lstrip(".")
277
- supported = self._definition.metadata.get("supported_formats", [])
278
- if extension and extension not in supported:
279
- # Try to detect by mime type
280
- mime_type, _ = mimetypes.guess_type(file_path)
281
- if not mime_type or not mime_type.startswith("text/"):
282
- return False, f"Unsupported file format: {extension}"
283
-
284
- return True, None
285
-
286
- def _read_file(self, file_path: str) -> str:
287
- """
288
- Read file content with appropriate encoding.
289
-
290
- Args:
291
- file_path: Path to file
292
-
293
- Returns:
294
- File content as string
295
- """
296
- path = Path(file_path)
297
-
298
- # Try different encodings
299
- encodings = ["utf-8", "latin-1", "cp1252"]
300
-
301
- for encoding in encodings:
302
- try:
303
- return path.read_text(encoding=encoding)
304
- except UnicodeDecodeError:
305
- continue
306
-
307
- # If all fail, read as binary and decode with errors='ignore'
308
- with file_path.open("rb") as f:
309
- content = f.read()
310
- return content.decode("utf-8", errors="ignore")
311
-
312
- def _extract_code_blocks(self, text: str) -> Tuple[List[str], str]:
313
- """
314
- Extract code blocks from text for preservation.
315
-
316
- Returns:
317
- Tuple of (code_blocks, text_without_code)
318
- """
319
- code_blocks = []
320
- placeholder_template = "[[CODE_BLOCK_{}]]"
321
-
322
- # Extract markdown code blocks
323
- for match in self._code_patterns["markdown"].finditer(text):
324
- code_blocks.append(match.group(0))
325
- text = text.replace(
326
- match.group(0), placeholder_template.format(len(code_blocks) - 1)
327
- )
328
-
329
- return code_blocks, text
330
-
331
- def _restore_code_blocks(self, text: str, code_blocks: List[str]) -> str:
332
- """Restore code blocks to summarized text."""
333
- for i, block in enumerate(code_blocks):
334
- placeholder = f"[[CODE_BLOCK_{i}]]"
335
- text = text.replace(placeholder, block)
336
- return text
337
-
338
- def _truncate_at_sentence(self, text: str, max_chars: int) -> str:
339
- """
340
- Truncate text at sentence boundary.
341
-
342
- WHY: Truncating mid-sentence makes summaries harder to read and
343
- can lose important context. Sentence boundaries preserve meaning.
344
- """
345
- if len(text) <= max_chars:
346
- return text
347
-
348
- # Find sentence boundaries
349
- sentences = self._sentence_endings.split(text)
350
-
351
- result = []
352
- current_length = 0
353
-
354
- for i, sentence in enumerate(sentences):
355
- # Add sentence ending back if not last sentence
356
- if i < len(sentences) - 1:
357
- sentence += ". "
358
-
359
- if current_length + len(sentence) <= max_chars:
360
- result.append(sentence)
361
- current_length += len(sentence)
362
- else:
363
- # Add partial sentence if we haven't added anything yet
364
- if not result and sentence:
365
- result.append(sentence[: max_chars - 3] + "...")
366
- break
367
-
368
- return "".join(result)
369
-
370
- def _summarize_brief(self, text: str, max_chars: int) -> str:
371
- """
372
- Brief summarization - first and last portions.
373
-
374
- WHY: For quick overview, showing beginning and end gives context
375
- about what the document covers and its conclusions.
376
- """
377
- if len(text) <= max_chars:
378
- return text
379
-
380
- # Split available space between beginning and end
381
- half_chars = max_chars // 2 - 20 # Reserve space for separator
382
-
383
- beginning = self._truncate_at_sentence(text, half_chars)
384
- ending = self._truncate_at_sentence(text[-half_chars * 2 :], half_chars)
385
-
386
- return f"{beginning}\n\n[... content omitted for brevity ...]\n\n{ending}"
387
-
388
- def _summarize_detailed(self, text: str, max_chars: int) -> str:
389
- """
390
- Detailed summarization - extract key paragraphs.
391
-
392
- WHY: For technical documents, we want to preserve more structure
393
- and include middle sections that might contain important details.
394
- """
395
- if len(text) <= max_chars:
396
- return text
397
-
398
- # Split into paragraphs
399
- paragraphs = text.split("\n\n")
400
-
401
- # Calculate importance scores (based on length and position)
402
- scored_paragraphs = []
403
- for i, para in enumerate(paragraphs):
404
- # Skip empty paragraphs
405
- if not para.strip():
406
- continue
407
-
408
- # Score based on position (beginning and end are important)
409
- position_score = 1.0
410
- if i < 3: # First 3 paragraphs
411
- position_score = 2.0
412
- elif i >= len(paragraphs) - 3: # Last 3 paragraphs
413
- position_score = 1.5
414
-
415
- # Score based on content indicators
416
- content_score = 1.0
417
- if any(
418
- keyword in para.lower()
419
- for keyword in ["summary", "conclusion", "important", "note", "warning"]
420
- ):
421
- content_score = 1.5
422
-
423
- score = position_score * content_score * (1 + len(para) / 1000)
424
- scored_paragraphs.append((score, i, para))
425
-
426
- # Sort by score and select top paragraphs
427
- scored_paragraphs.sort(reverse=True)
428
-
429
- selected = []
430
- current_length = 0
431
-
432
- for score, original_index, para in scored_paragraphs:
433
- truncated_para = self._truncate_at_sentence(
434
- para, max_chars - current_length
435
- )
436
- if current_length + len(truncated_para) <= max_chars:
437
- selected.append((original_index, truncated_para))
438
- current_length += len(truncated_para) + 2 # Account for newlines
439
-
440
- if current_length >= max_chars * 0.9: # Stop at 90% to leave some buffer
441
- break
442
-
443
- # Sort selected paragraphs by original order
444
- selected.sort()
445
-
446
- return "\n\n".join(para for _, para in selected)
447
-
448
- def _summarize_key_points(self, text: str, max_chars: int) -> str:
449
- """
450
- Extract key points and bullet points.
451
-
452
- WHY: Many documents have lists, bullet points, or numbered items
453
- that contain the most important information in condensed form.
454
- """
455
- if len(text) <= max_chars:
456
- return text
457
-
458
- lines = text.split("\n")
459
- key_lines = []
460
-
461
- # Patterns for identifying key points
462
- list_patterns = [
463
- re.compile(r"^\s*[-*•]\s+"), # Bullet points
464
- re.compile(r"^\s*\d+[.)]\s+"), # Numbered lists
465
- re.compile(r"^\s*[A-Z][.)]\s+"), # Letter lists
466
- re.compile(r"^#+\s+"), # Markdown headers
467
- re.compile(r"^[A-Z][^.!?]*:"), # Definition lists
468
- ]
469
-
470
- # Extract lines that match key point patterns
471
- for line in lines:
472
- if any(pattern.match(line) for pattern in list_patterns):
473
- key_lines.append(line)
474
-
475
- # If we found key points, use them
476
- if key_lines:
477
- result = "\n".join(key_lines)
478
- if len(result) <= max_chars:
479
- return result
480
- return self._truncate_at_sentence(result, max_chars)
481
-
482
- # Fallback to brief summary if no key points found
483
- return self._summarize_brief(text, max_chars)
484
-
485
- def _summarize_technical(
486
- self, text: str, max_chars: int, preserve_code: bool
487
- ) -> str:
488
- """
489
- Technical summarization - preserve code and technical details.
490
-
491
- WHY: For code files and technical documentation, we need to
492
- preserve function signatures, class definitions, and important code.
493
- """
494
- if len(text) <= max_chars:
495
- return text
496
-
497
- # Extract and preserve code blocks if requested
498
- code_blocks = []
499
- text_without_code = text
500
-
501
- if preserve_code:
502
- code_blocks, text_without_code = self._extract_code_blocks(text)
503
-
504
- # Extract technical patterns
505
- tech_patterns = [
506
- re.compile(
507
- r"^(class|def|function|interface|struct)\s+\w+.*$", re.MULTILINE
508
- ), # Definitions
509
- re.compile(
510
- r"^(import|from|require|include|using)\s+.*$", re.MULTILINE
511
- ), # Imports
512
- re.compile(r"^\s*@\w+.*$", re.MULTILINE), # Decorators/Annotations
513
- re.compile(
514
- r"^(public|private|protected|static).*\{?$", re.MULTILINE
515
- ), # Method signatures
516
- ]
517
-
518
- important_lines = []
519
- for pattern in tech_patterns:
520
- important_lines.extend(pattern.findall(text_without_code))
521
-
522
- # Build technical summary
523
- result_parts = []
524
-
525
- # Add imports/includes first
526
- imports = [
527
- line
528
- for line in important_lines
529
- if any(
530
- keyword in line
531
- for keyword in ["import", "from", "require", "include", "using"]
532
- )
533
- ]
534
- if imports:
535
- result_parts.append("# Imports/Dependencies\n" + "\n".join(imports[:10]))
536
-
537
- # Add class/function definitions
538
- definitions = [
539
- line
540
- for line in important_lines
541
- if any(
542
- keyword in line
543
- for keyword in ["class", "def", "function", "interface", "struct"]
544
- )
545
- ]
546
- if definitions:
547
- result_parts.append("# Key Definitions\n" + "\n".join(definitions[:20]))
548
-
549
- # Add some code blocks if space allows
550
- if preserve_code and code_blocks:
551
- result_parts.append("# Code Samples")
552
- for _i, block in enumerate(code_blocks[:3]): # Limit to first 3 blocks
553
- if len("\n".join(result_parts)) + len(block) < max_chars * 0.8:
554
- result_parts.append(block)
555
-
556
- result = "\n\n".join(result_parts)
557
-
558
- # If still too long, truncate
559
- if len(result) > max_chars:
560
- result = self._truncate_at_sentence(result, max_chars)
561
-
562
- return result
563
-
564
- def _process_chunks(
565
- self, text: str, mode: str, max_chars_per_chunk: int, preserve_code: bool
566
- ) -> str:
567
- """
568
- Process large documents in chunks.
569
-
570
- WHY: Very large documents need to be processed in chunks to
571
- avoid memory issues and maintain performance.
572
- """
573
- chunks = []
574
- chunk_size = self.CHUNK_SIZE
575
-
576
- for i in range(0, len(text), chunk_size):
577
- chunk = text[i : i + chunk_size]
578
-
579
- # Summarize chunk based on mode
580
- if mode == "brief":
581
- summarized = self._summarize_brief(chunk, max_chars_per_chunk)
582
- elif mode == "detailed":
583
- summarized = self._summarize_detailed(chunk, max_chars_per_chunk)
584
- elif mode == "key_points":
585
- summarized = self._summarize_key_points(chunk, max_chars_per_chunk)
586
- elif mode == "technical":
587
- summarized = self._summarize_technical(
588
- chunk, max_chars_per_chunk, preserve_code
589
- )
590
- else:
591
- summarized = self._summarize_brief(chunk, max_chars_per_chunk)
592
-
593
- chunks.append(summarized)
594
-
595
- return "\n\n[--- Next Section ---]\n\n".join(chunks)
596
-
597
- async def invoke(self, invocation: MCPToolInvocation) -> MCPToolResult:
598
- """
599
- Invoke the document summarizer tool.
600
-
601
- Args:
602
- invocation: Tool invocation request
603
-
604
- Returns:
605
- Tool execution result with summary
606
- """
607
- start_time = datetime.now(timezone.utc)
608
-
609
- try:
610
- # Get parameters
611
- file_path = invocation.parameters["file_path"]
612
- mode = invocation.parameters.get("mode", "brief")
613
- max_tokens = invocation.parameters.get("max_tokens")
614
- max_percentage = invocation.parameters.get("max_percentage", 40)
615
- preserve_code = invocation.parameters.get("preserve_code", True)
616
- use_cache = invocation.parameters.get("use_cache", True)
617
-
618
- # Validate file
619
- is_valid, error_msg = self._validate_file(file_path)
620
- if not is_valid:
621
- raise ValueError(error_msg)
622
-
623
- # Check cache if enabled
624
- cache_hit = False
625
- if use_cache:
626
- cache_key = f"{self._get_file_hash(file_path)}:{mode}:{max_percentage}"
627
- cached_result = self._cache.get(cache_key)
628
- if cached_result:
629
- cache_hit = True
630
- execution_time = (
631
- datetime.now(timezone.utc) - start_time
632
- ).total_seconds()
633
- self._update_metrics(True, execution_time)
634
-
635
- return MCPToolResult(
636
- success=True,
637
- data={
638
- **cached_result,
639
- "cache_hit": True,
640
- "cache_stats": self._cache.get_stats(),
641
- },
642
- execution_time=execution_time,
643
- metadata={
644
- "tool": "document_summarizer",
645
- "mode": mode,
646
- "cached": True,
647
- },
648
- )
649
-
650
- # Read file content
651
- content = self._read_file(file_path)
652
- original_size = len(content)
653
-
654
- # Calculate target size
655
- if max_tokens:
656
- max_chars = max_tokens * self.CHARS_PER_TOKEN
657
- else:
658
- max_chars = int(original_size * (max_percentage / 100))
659
-
660
- # Process based on file size
661
- chunks_processed = 1
662
- if original_size > self.CHUNK_SIZE:
663
- # Process in chunks for large files
664
- chunks_processed = (original_size // self.CHUNK_SIZE) + 1
665
- max_chars_per_chunk = max_chars // chunks_processed
666
- summary = self._process_chunks(
667
- content, mode, max_chars_per_chunk, preserve_code
668
- )
669
- # Process entire file
670
- elif mode == "brief":
671
- summary = self._summarize_brief(content, max_chars)
672
- elif mode == "detailed":
673
- summary = self._summarize_detailed(content, max_chars)
674
- elif mode == "key_points":
675
- summary = self._summarize_key_points(content, max_chars)
676
- elif mode == "technical":
677
- summary = self._summarize_technical(content, max_chars, preserve_code)
678
- else:
679
- summary = self._summarize_brief(content, max_chars)
680
-
681
- # Calculate metrics
682
- summary_size = len(summary)
683
- reduction_percentage = (
684
- (original_size - summary_size) / original_size
685
- ) * 100
686
-
687
- # Token estimates
688
- original_tokens = self._estimate_tokens(content)
689
- summary_tokens = self._estimate_tokens(summary)
690
- saved_tokens = original_tokens - summary_tokens
691
-
692
- # Prepare result
693
- result = {
694
- "summary": summary,
695
- "original_size": original_size,
696
- "summary_size": summary_size,
697
- "reduction_percentage": round(reduction_percentage, 2),
698
- "token_estimate": {
699
- "original": original_tokens,
700
- "summary": summary_tokens,
701
- "saved": saved_tokens,
702
- },
703
- "chunks_processed": chunks_processed,
704
- "cache_hit": cache_hit,
705
- }
706
-
707
- # Cache result if enabled
708
- if use_cache and not cache_hit:
709
- cache_key = f"{self._get_file_hash(file_path)}:{mode}:{max_percentage}"
710
- self._cache.put(cache_key, result.copy(), summary_size)
711
-
712
- # Calculate execution time
713
- execution_time = (datetime.now(timezone.utc) - start_time).total_seconds()
714
-
715
- # Update metrics
716
- self._update_metrics(True, execution_time)
717
-
718
- # Add cache stats to result
719
- result["cache_stats"] = self._cache.get_stats()
720
-
721
- return MCPToolResult(
722
- success=True,
723
- data=result,
724
- execution_time=execution_time,
725
- metadata={
726
- "tool": "document_summarizer",
727
- "mode": mode,
728
- "file_path": file_path,
729
- "reduction_achieved": reduction_percentage >= 60,
730
- },
731
- )
732
-
733
- except Exception as e:
734
- execution_time = (datetime.now(timezone.utc) - start_time).total_seconds()
735
- self._update_metrics(False, execution_time)
736
- self._metrics["last_error"] = str(e)
737
-
738
- self.log_error(f"Document summarizer failed: {e}")
739
-
740
- return MCPToolResult(
741
- success=False,
742
- error=f"Document summarizer failed: {e!s}",
743
- execution_time=execution_time,
744
- metadata={
745
- "tool": "document_summarizer",
746
- "error_type": type(e).__name__,
747
- },
748
- )
749
-
750
- async def initialize(self) -> bool:
751
- """
752
- Initialize the document summarizer tool.
753
-
754
- Returns:
755
- True if initialization successful
756
- """
757
- try:
758
- self.log_info("Initializing document summarizer tool")
759
-
760
- # Clear cache on initialization
761
- self._cache = LRUCache(max_size=100, max_memory_mb=50)
762
-
763
- self._initialized = True
764
- self.log_info("Document summarizer tool initialized successfully")
765
- return True
766
-
767
- except Exception as e:
768
- self.log_error(f"Failed to initialize document summarizer: {e}")
769
- return False
770
-
771
- async def shutdown(self) -> None:
772
- """
773
- Shutdown the document summarizer tool and clean up resources.
774
- """
775
- try:
776
- self.log_info("Shutting down document summarizer tool")
777
-
778
- # Log final cache stats
779
- cache_stats = self._cache.get_stats()
780
- self.log_info(f"Final cache stats: {cache_stats}")
781
-
782
- # Clear cache
783
- self._cache = None
784
-
785
- self._initialized = False
786
- self.log_info("Document summarizer tool shutdown complete")
787
-
788
- except Exception as e:
789
- self.log_error(f"Error during document summarizer shutdown: {e}")