claude-mpm 3.9.8__py3-none-any.whl → 3.9.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/agents/base_agent.json +1 -1
  3. claude_mpm/cli/__init__.py +3 -1
  4. claude_mpm/cli/commands/__init__.py +3 -1
  5. claude_mpm/cli/commands/cleanup.py +21 -1
  6. claude_mpm/cli/commands/mcp.py +821 -0
  7. claude_mpm/cli/parser.py +148 -1
  8. claude_mpm/config/memory_guardian_config.py +325 -0
  9. claude_mpm/constants.py +13 -0
  10. claude_mpm/hooks/claude_hooks/hook_handler.py +76 -19
  11. claude_mpm/models/state_models.py +433 -0
  12. claude_mpm/services/communication/__init__.py +2 -2
  13. claude_mpm/services/communication/socketio.py +18 -16
  14. claude_mpm/services/infrastructure/__init__.py +4 -1
  15. claude_mpm/services/infrastructure/logging.py +3 -3
  16. claude_mpm/services/infrastructure/memory_guardian.py +770 -0
  17. claude_mpm/services/mcp_gateway/__init__.py +28 -12
  18. claude_mpm/services/mcp_gateway/main.py +326 -0
  19. claude_mpm/services/mcp_gateway/registry/__init__.py +6 -3
  20. claude_mpm/services/mcp_gateway/registry/service_registry.py +397 -0
  21. claude_mpm/services/mcp_gateway/registry/tool_registry.py +477 -0
  22. claude_mpm/services/mcp_gateway/server/__init__.py +9 -3
  23. claude_mpm/services/mcp_gateway/server/mcp_server.py +430 -0
  24. claude_mpm/services/mcp_gateway/server/mcp_server_simple.py +444 -0
  25. claude_mpm/services/mcp_gateway/server/stdio_handler.py +373 -0
  26. claude_mpm/services/mcp_gateway/tools/__init__.py +16 -3
  27. claude_mpm/services/mcp_gateway/tools/base_adapter.py +497 -0
  28. claude_mpm/services/mcp_gateway/tools/document_summarizer.py +729 -0
  29. claude_mpm/services/mcp_gateway/tools/hello_world.py +551 -0
  30. claude_mpm/utils/file_utils.py +293 -0
  31. claude_mpm/utils/platform_memory.py +524 -0
  32. claude_mpm/utils/subprocess_utils.py +305 -0
  33. {claude_mpm-3.9.8.dist-info → claude_mpm-3.9.9.dist-info}/METADATA +3 -1
  34. {claude_mpm-3.9.8.dist-info → claude_mpm-3.9.9.dist-info}/RECORD +39 -28
  35. claude_mpm/agents/templates/.claude-mpm/memories/README.md +0 -36
  36. claude_mpm/agents/templates/.claude-mpm/memories/engineer_agent.md +0 -39
  37. claude_mpm/agents/templates/.claude-mpm/memories/qa_agent.md +0 -38
  38. claude_mpm/agents/templates/.claude-mpm/memories/research_agent.md +0 -39
  39. claude_mpm/agents/templates/.claude-mpm/memories/version_control_agent.md +0 -38
  40. /claude_mpm/agents/templates/{research_memory_efficient.json → backup/research_memory_efficient.json} +0 -0
  41. {claude_mpm-3.9.8.dist-info → claude_mpm-3.9.9.dist-info}/WHEEL +0 -0
  42. {claude_mpm-3.9.8.dist-info → claude_mpm-3.9.9.dist-info}/entry_points.txt +0 -0
  43. {claude_mpm-3.9.8.dist-info → claude_mpm-3.9.9.dist-info}/licenses/LICENSE +0 -0
  44. {claude_mpm-3.9.8.dist-info → claude_mpm-3.9.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,729 @@
1
+ """
2
+ Document Summarizer Tool
3
+ ========================
4
+
5
+ Intelligent document processing tool for solving Claude Code memory issues.
6
+ Supports multiple file formats and summarization strategies.
7
+
8
+ Part of ISS-0037: Document Summarizer Tool - Intelligent Document Processing
9
+ """
10
+
11
+ import os
12
+ import re
13
+ import json
14
+ import yaml
15
+ import csv
16
+ import mimetypes
17
+ from pathlib import Path
18
+ from typing import Any, Dict, List, Optional, Tuple
19
+ from datetime import datetime
20
+ from functools import lru_cache
21
+ from collections import OrderedDict
22
+ import hashlib
23
+
24
+ from claude_mpm.services.mcp_gateway.core.interfaces import (
25
+ MCPToolDefinition,
26
+ MCPToolInvocation,
27
+ MCPToolResult,
28
+ )
29
+ from claude_mpm.services.mcp_gateway.tools.base_adapter import BaseToolAdapter
30
+
31
+
32
+ class LRUCache:
33
+ """
34
+ Simple LRU cache implementation for document summaries.
35
+
36
+ WHY: We need a memory-efficient cache to avoid re-processing documents
37
+ that are accessed repeatedly, which is common in Claude Code sessions.
38
+ """
39
+
40
+ def __init__(self, max_size: int = 100, max_memory_mb: int = 100):
41
+ """
42
+ Initialize LRU cache.
43
+
44
+ Args:
45
+ max_size: Maximum number of entries
46
+ max_memory_mb: Maximum memory usage in MB
47
+ """
48
+ self.cache = OrderedDict()
49
+ self.max_size = max_size
50
+ self.max_memory_bytes = max_memory_mb * 1024 * 1024
51
+ self.current_memory = 0
52
+ self.hits = 0
53
+ self.misses = 0
54
+
55
+ def get(self, key: str) -> Optional[Dict[str, Any]]:
56
+ """Get item from cache, updating LRU order."""
57
+ if key in self.cache:
58
+ self.hits += 1
59
+ # Move to end (most recently used)
60
+ self.cache.move_to_end(key)
61
+ return self.cache[key]
62
+ self.misses += 1
63
+ return None
64
+
65
+ def put(self, key: str, value: Dict[str, Any], size_bytes: int) -> None:
66
+ """Add item to cache, evicting LRU items if necessary."""
67
+ # Remove item if it already exists
68
+ if key in self.cache:
69
+ old_size = self.cache[key].get('size_bytes', 0)
70
+ self.current_memory -= old_size
71
+ del self.cache[key]
72
+
73
+ # Evict items if necessary
74
+ while (len(self.cache) >= self.max_size or
75
+ self.current_memory + size_bytes > self.max_memory_bytes):
76
+ if not self.cache:
77
+ break
78
+ # Remove least recently used item
79
+ removed_key, removed_value = self.cache.popitem(last=False)
80
+ self.current_memory -= removed_value.get('size_bytes', 0)
81
+
82
+ # Add new item
83
+ value['size_bytes'] = size_bytes
84
+ self.cache[key] = value
85
+ self.current_memory += size_bytes
86
+
87
+ def get_stats(self) -> Dict[str, Any]:
88
+ """Get cache statistics."""
89
+ hit_rate = self.hits / (self.hits + self.misses) if (self.hits + self.misses) > 0 else 0
90
+ return {
91
+ 'size': len(self.cache),
92
+ 'memory_mb': self.current_memory / (1024 * 1024),
93
+ 'hits': self.hits,
94
+ 'misses': self.misses,
95
+ 'hit_rate': hit_rate
96
+ }
97
+
98
+
99
+ class DocumentSummarizerTool(BaseToolAdapter):
100
+ """
101
+ Document summarizer tool for intelligent document processing.
102
+
103
+ WHY: Claude Code accumulates massive memory from reading full files,
104
+ leading to context overflow. This tool reduces document size by 60%+
105
+ while preserving essential information through intelligent summarization.
106
+
107
+ DESIGN DECISIONS:
108
+ - Use sentence boundary detection to preserve readability
109
+ - Implement multiple summarization modes for different use cases
110
+ - Cache summaries to avoid re-processing frequently accessed files
111
+ - Support common file formats used in development
112
+ """
113
+
114
+ # File size limits (in bytes)
115
+ MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB
116
+ CHUNK_SIZE = 50000 # Characters per chunk for large files
117
+
118
+ # Token estimation (rough approximation)
119
+ CHARS_PER_TOKEN = 4 # Approximate for Claude's tokenizer
120
+
121
+ def __init__(self):
122
+ """Initialize the document summarizer tool."""
123
+ definition = MCPToolDefinition(
124
+ name="document_summarizer",
125
+ description="Intelligently summarizes documents to reduce memory usage while preserving key information",
126
+ input_schema={
127
+ "type": "object",
128
+ "properties": {
129
+ "file_path": {
130
+ "type": "string",
131
+ "description": "Path to the document file"
132
+ },
133
+ "mode": {
134
+ "type": "string",
135
+ "enum": ["brief", "detailed", "key_points", "technical"],
136
+ "description": "Summarization mode",
137
+ "default": "brief"
138
+ },
139
+ "max_tokens": {
140
+ "type": "integer",
141
+ "description": "Maximum tokens in summary (optional)",
142
+ "minimum": 100,
143
+ "maximum": 10000
144
+ },
145
+ "max_percentage": {
146
+ "type": "integer",
147
+ "description": "Maximum percentage of original to keep (1-100)",
148
+ "minimum": 1,
149
+ "maximum": 100,
150
+ "default": 40
151
+ },
152
+ "preserve_code": {
153
+ "type": "boolean",
154
+ "description": "Whether to preserve code blocks intact",
155
+ "default": True
156
+ },
157
+ "use_cache": {
158
+ "type": "boolean",
159
+ "description": "Whether to use cached summaries",
160
+ "default": True
161
+ }
162
+ },
163
+ "required": ["file_path"]
164
+ },
165
+ output_schema={
166
+ "type": "object",
167
+ "properties": {
168
+ "summary": {
169
+ "type": "string",
170
+ "description": "The summarized content"
171
+ },
172
+ "original_size": {
173
+ "type": "integer",
174
+ "description": "Original document size in bytes"
175
+ },
176
+ "summary_size": {
177
+ "type": "integer",
178
+ "description": "Summary size in bytes"
179
+ },
180
+ "reduction_percentage": {
181
+ "type": "number",
182
+ "description": "Percentage reduction achieved"
183
+ },
184
+ "token_estimate": {
185
+ "type": "object",
186
+ "properties": {
187
+ "original": {"type": "integer"},
188
+ "summary": {"type": "integer"},
189
+ "saved": {"type": "integer"}
190
+ }
191
+ },
192
+ "chunks_processed": {
193
+ "type": "integer",
194
+ "description": "Number of chunks processed for large files"
195
+ },
196
+ "cache_hit": {
197
+ "type": "boolean",
198
+ "description": "Whether summary was retrieved from cache"
199
+ }
200
+ }
201
+ },
202
+ version="1.0.0",
203
+ metadata={
204
+ "category": "document_processing",
205
+ "supported_formats": ["txt", "md", "pdf", "docx", "json", "yaml", "csv", "py", "js", "ts", "java", "cpp", "c", "h", "hpp"]
206
+ }
207
+ )
208
+ super().__init__(definition)
209
+
210
+ # Initialize cache
211
+ self._cache = LRUCache(max_size=100, max_memory_mb=50)
212
+
213
+ # Sentence boundary patterns
214
+ self._sentence_endings = re.compile(r'[.!?]\s+')
215
+
216
+ # Code block patterns for different formats
217
+ self._code_patterns = {
218
+ 'markdown': re.compile(r'```[\s\S]*?```', re.MULTILINE),
219
+ 'inline': re.compile(r'`[^`]+`'),
220
+ 'indent': re.compile(r'^( |\t).*$', re.MULTILINE)
221
+ }
222
+
223
+ def _get_file_hash(self, file_path: str) -> str:
224
+ """Generate hash for file caching."""
225
+ stat = os.stat(file_path)
226
+ hash_input = f"{file_path}:{stat.st_size}:{stat.st_mtime}"
227
+ return hashlib.md5(hash_input.encode()).hexdigest()
228
+
229
+ def _estimate_tokens(self, text: str) -> int:
230
+ """Estimate token count for text."""
231
+ return len(text) // self.CHARS_PER_TOKEN
232
+
233
+ def _validate_file(self, file_path: str) -> Tuple[bool, Optional[str]]:
234
+ """
235
+ Validate file for processing.
236
+
237
+ Returns:
238
+ Tuple of (is_valid, error_message)
239
+ """
240
+ path = Path(file_path)
241
+
242
+ # Check if file exists
243
+ if not path.exists():
244
+ return False, f"File not found: {file_path}"
245
+
246
+ # Check if it's a file (not directory)
247
+ if not path.is_file():
248
+ return False, f"Path is not a file: {file_path}"
249
+
250
+ # Check file size
251
+ file_size = path.stat().st_size
252
+ if file_size > self.MAX_FILE_SIZE:
253
+ return False, f"File too large: {file_size} bytes (max: {self.MAX_FILE_SIZE})"
254
+
255
+ # Check file extension
256
+ extension = path.suffix.lower().lstrip('.')
257
+ supported = self._definition.metadata.get('supported_formats', [])
258
+ if extension and extension not in supported:
259
+ # Try to detect by mime type
260
+ mime_type, _ = mimetypes.guess_type(file_path)
261
+ if not mime_type or not mime_type.startswith('text/'):
262
+ return False, f"Unsupported file format: {extension}"
263
+
264
+ return True, None
265
+
266
+ def _read_file(self, file_path: str) -> str:
267
+ """
268
+ Read file content with appropriate encoding.
269
+
270
+ Args:
271
+ file_path: Path to file
272
+
273
+ Returns:
274
+ File content as string
275
+ """
276
+ path = Path(file_path)
277
+
278
+ # Try different encodings
279
+ encodings = ['utf-8', 'latin-1', 'cp1252']
280
+
281
+ for encoding in encodings:
282
+ try:
283
+ return path.read_text(encoding=encoding)
284
+ except UnicodeDecodeError:
285
+ continue
286
+
287
+ # If all fail, read as binary and decode with errors='ignore'
288
+ with open(file_path, 'rb') as f:
289
+ content = f.read()
290
+ return content.decode('utf-8', errors='ignore')
291
+
292
+ def _extract_code_blocks(self, text: str) -> Tuple[List[str], str]:
293
+ """
294
+ Extract code blocks from text for preservation.
295
+
296
+ Returns:
297
+ Tuple of (code_blocks, text_without_code)
298
+ """
299
+ code_blocks = []
300
+ placeholder_template = "[[CODE_BLOCK_{}]]"
301
+
302
+ # Extract markdown code blocks
303
+ for match in self._code_patterns['markdown'].finditer(text):
304
+ code_blocks.append(match.group(0))
305
+ text = text.replace(match.group(0), placeholder_template.format(len(code_blocks) - 1))
306
+
307
+ return code_blocks, text
308
+
309
+ def _restore_code_blocks(self, text: str, code_blocks: List[str]) -> str:
310
+ """Restore code blocks to summarized text."""
311
+ for i, block in enumerate(code_blocks):
312
+ placeholder = f"[[CODE_BLOCK_{i}]]"
313
+ text = text.replace(placeholder, block)
314
+ return text
315
+
316
+ def _truncate_at_sentence(self, text: str, max_chars: int) -> str:
317
+ """
318
+ Truncate text at sentence boundary.
319
+
320
+ WHY: Truncating mid-sentence makes summaries harder to read and
321
+ can lose important context. Sentence boundaries preserve meaning.
322
+ """
323
+ if len(text) <= max_chars:
324
+ return text
325
+
326
+ # Find sentence boundaries
327
+ sentences = self._sentence_endings.split(text)
328
+
329
+ result = []
330
+ current_length = 0
331
+
332
+ for i, sentence in enumerate(sentences):
333
+ # Add sentence ending back if not last sentence
334
+ if i < len(sentences) - 1:
335
+ sentence += '. '
336
+
337
+ if current_length + len(sentence) <= max_chars:
338
+ result.append(sentence)
339
+ current_length += len(sentence)
340
+ else:
341
+ # Add partial sentence if we haven't added anything yet
342
+ if not result and sentence:
343
+ result.append(sentence[:max_chars - 3] + '...')
344
+ break
345
+
346
+ return ''.join(result)
347
+
348
+ def _summarize_brief(self, text: str, max_chars: int) -> str:
349
+ """
350
+ Brief summarization - first and last portions.
351
+
352
+ WHY: For quick overview, showing beginning and end gives context
353
+ about what the document covers and its conclusions.
354
+ """
355
+ if len(text) <= max_chars:
356
+ return text
357
+
358
+ # Split available space between beginning and end
359
+ half_chars = max_chars // 2 - 20 # Reserve space for separator
360
+
361
+ beginning = self._truncate_at_sentence(text, half_chars)
362
+ ending = self._truncate_at_sentence(text[-half_chars * 2:], half_chars)
363
+
364
+ return f"{beginning}\n\n[... content omitted for brevity ...]\n\n{ending}"
365
+
366
+ def _summarize_detailed(self, text: str, max_chars: int) -> str:
367
+ """
368
+ Detailed summarization - extract key paragraphs.
369
+
370
+ WHY: For technical documents, we want to preserve more structure
371
+ and include middle sections that might contain important details.
372
+ """
373
+ if len(text) <= max_chars:
374
+ return text
375
+
376
+ # Split into paragraphs
377
+ paragraphs = text.split('\n\n')
378
+
379
+ # Calculate importance scores (based on length and position)
380
+ scored_paragraphs = []
381
+ for i, para in enumerate(paragraphs):
382
+ # Skip empty paragraphs
383
+ if not para.strip():
384
+ continue
385
+
386
+ # Score based on position (beginning and end are important)
387
+ position_score = 1.0
388
+ if i < 3: # First 3 paragraphs
389
+ position_score = 2.0
390
+ elif i >= len(paragraphs) - 3: # Last 3 paragraphs
391
+ position_score = 1.5
392
+
393
+ # Score based on content indicators
394
+ content_score = 1.0
395
+ if any(keyword in para.lower() for keyword in ['summary', 'conclusion', 'important', 'note', 'warning']):
396
+ content_score = 1.5
397
+
398
+ score = position_score * content_score * (1 + len(para) / 1000)
399
+ scored_paragraphs.append((score, i, para))
400
+
401
+ # Sort by score and select top paragraphs
402
+ scored_paragraphs.sort(reverse=True)
403
+
404
+ selected = []
405
+ current_length = 0
406
+
407
+ for score, original_index, para in scored_paragraphs:
408
+ truncated_para = self._truncate_at_sentence(para, max_chars - current_length)
409
+ if current_length + len(truncated_para) <= max_chars:
410
+ selected.append((original_index, truncated_para))
411
+ current_length += len(truncated_para) + 2 # Account for newlines
412
+
413
+ if current_length >= max_chars * 0.9: # Stop at 90% to leave some buffer
414
+ break
415
+
416
+ # Sort selected paragraphs by original order
417
+ selected.sort()
418
+
419
+ return '\n\n'.join(para for _, para in selected)
420
+
421
+ def _summarize_key_points(self, text: str, max_chars: int) -> str:
422
+ """
423
+ Extract key points and bullet points.
424
+
425
+ WHY: Many documents have lists, bullet points, or numbered items
426
+ that contain the most important information in condensed form.
427
+ """
428
+ if len(text) <= max_chars:
429
+ return text
430
+
431
+ lines = text.split('\n')
432
+ key_lines = []
433
+
434
+ # Patterns for identifying key points
435
+ list_patterns = [
436
+ re.compile(r'^\s*[-*•]\s+'), # Bullet points
437
+ re.compile(r'^\s*\d+[.)]\s+'), # Numbered lists
438
+ re.compile(r'^\s*[A-Z][.)]\s+'), # Letter lists
439
+ re.compile(r'^#+\s+'), # Markdown headers
440
+ re.compile(r'^[A-Z][^.!?]*:'), # Definition lists
441
+ ]
442
+
443
+ # Extract lines that match key point patterns
444
+ for line in lines:
445
+ if any(pattern.match(line) for pattern in list_patterns):
446
+ key_lines.append(line)
447
+
448
+ # If we found key points, use them
449
+ if key_lines:
450
+ result = '\n'.join(key_lines)
451
+ if len(result) <= max_chars:
452
+ return result
453
+ else:
454
+ return self._truncate_at_sentence(result, max_chars)
455
+
456
+ # Fallback to brief summary if no key points found
457
+ return self._summarize_brief(text, max_chars)
458
+
459
+ def _summarize_technical(self, text: str, max_chars: int, preserve_code: bool) -> str:
460
+ """
461
+ Technical summarization - preserve code and technical details.
462
+
463
+ WHY: For code files and technical documentation, we need to
464
+ preserve function signatures, class definitions, and important code.
465
+ """
466
+ if len(text) <= max_chars:
467
+ return text
468
+
469
+ # Extract and preserve code blocks if requested
470
+ code_blocks = []
471
+ text_without_code = text
472
+
473
+ if preserve_code:
474
+ code_blocks, text_without_code = self._extract_code_blocks(text)
475
+
476
+ # Extract technical patterns
477
+ tech_patterns = [
478
+ re.compile(r'^(class|def|function|interface|struct)\s+\w+.*$', re.MULTILINE), # Definitions
479
+ re.compile(r'^(import|from|require|include|using)\s+.*$', re.MULTILINE), # Imports
480
+ re.compile(r'^\s*@\w+.*$', re.MULTILINE), # Decorators/Annotations
481
+ re.compile(r'^(public|private|protected|static).*\{?$', re.MULTILINE), # Method signatures
482
+ ]
483
+
484
+ important_lines = []
485
+ for pattern in tech_patterns:
486
+ important_lines.extend(pattern.findall(text_without_code))
487
+
488
+ # Build technical summary
489
+ result_parts = []
490
+
491
+ # Add imports/includes first
492
+ imports = [line for line in important_lines if any(
493
+ keyword in line for keyword in ['import', 'from', 'require', 'include', 'using']
494
+ )]
495
+ if imports:
496
+ result_parts.append("# Imports/Dependencies\n" + '\n'.join(imports[:10]))
497
+
498
+ # Add class/function definitions
499
+ definitions = [line for line in important_lines if any(
500
+ keyword in line for keyword in ['class', 'def', 'function', 'interface', 'struct']
501
+ )]
502
+ if definitions:
503
+ result_parts.append("# Key Definitions\n" + '\n'.join(definitions[:20]))
504
+
505
+ # Add some code blocks if space allows
506
+ if preserve_code and code_blocks:
507
+ result_parts.append("# Code Samples")
508
+ for i, block in enumerate(code_blocks[:3]): # Limit to first 3 blocks
509
+ if len('\n'.join(result_parts)) + len(block) < max_chars * 0.8:
510
+ result_parts.append(block)
511
+
512
+ result = '\n\n'.join(result_parts)
513
+
514
+ # If still too long, truncate
515
+ if len(result) > max_chars:
516
+ result = self._truncate_at_sentence(result, max_chars)
517
+
518
+ return result
519
+
520
+ def _process_chunks(self, text: str, mode: str, max_chars_per_chunk: int, preserve_code: bool) -> str:
521
+ """
522
+ Process large documents in chunks.
523
+
524
+ WHY: Very large documents need to be processed in chunks to
525
+ avoid memory issues and maintain performance.
526
+ """
527
+ chunks = []
528
+ chunk_size = self.CHUNK_SIZE
529
+
530
+ for i in range(0, len(text), chunk_size):
531
+ chunk = text[i:i + chunk_size]
532
+
533
+ # Summarize chunk based on mode
534
+ if mode == "brief":
535
+ summarized = self._summarize_brief(chunk, max_chars_per_chunk)
536
+ elif mode == "detailed":
537
+ summarized = self._summarize_detailed(chunk, max_chars_per_chunk)
538
+ elif mode == "key_points":
539
+ summarized = self._summarize_key_points(chunk, max_chars_per_chunk)
540
+ elif mode == "technical":
541
+ summarized = self._summarize_technical(chunk, max_chars_per_chunk, preserve_code)
542
+ else:
543
+ summarized = self._summarize_brief(chunk, max_chars_per_chunk)
544
+
545
+ chunks.append(summarized)
546
+
547
+ return '\n\n[--- Next Section ---]\n\n'.join(chunks)
548
+
549
+ async def invoke(self, invocation: MCPToolInvocation) -> MCPToolResult:
550
+ """
551
+ Invoke the document summarizer tool.
552
+
553
+ Args:
554
+ invocation: Tool invocation request
555
+
556
+ Returns:
557
+ Tool execution result with summary
558
+ """
559
+ start_time = datetime.now()
560
+
561
+ try:
562
+ # Get parameters
563
+ file_path = invocation.parameters["file_path"]
564
+ mode = invocation.parameters.get("mode", "brief")
565
+ max_tokens = invocation.parameters.get("max_tokens")
566
+ max_percentage = invocation.parameters.get("max_percentage", 40)
567
+ preserve_code = invocation.parameters.get("preserve_code", True)
568
+ use_cache = invocation.parameters.get("use_cache", True)
569
+
570
+ # Validate file
571
+ is_valid, error_msg = self._validate_file(file_path)
572
+ if not is_valid:
573
+ raise ValueError(error_msg)
574
+
575
+ # Check cache if enabled
576
+ cache_hit = False
577
+ if use_cache:
578
+ cache_key = f"{self._get_file_hash(file_path)}:{mode}:{max_percentage}"
579
+ cached_result = self._cache.get(cache_key)
580
+ if cached_result:
581
+ cache_hit = True
582
+ execution_time = (datetime.now() - start_time).total_seconds()
583
+ self._update_metrics(True, execution_time)
584
+
585
+ return MCPToolResult(
586
+ success=True,
587
+ data={
588
+ **cached_result,
589
+ "cache_hit": True,
590
+ "cache_stats": self._cache.get_stats()
591
+ },
592
+ execution_time=execution_time,
593
+ metadata={"tool": "document_summarizer", "mode": mode, "cached": True}
594
+ )
595
+
596
+ # Read file content
597
+ content = self._read_file(file_path)
598
+ original_size = len(content)
599
+
600
+ # Calculate target size
601
+ if max_tokens:
602
+ max_chars = max_tokens * self.CHARS_PER_TOKEN
603
+ else:
604
+ max_chars = int(original_size * (max_percentage / 100))
605
+
606
+ # Process based on file size
607
+ chunks_processed = 1
608
+ if original_size > self.CHUNK_SIZE:
609
+ # Process in chunks for large files
610
+ chunks_processed = (original_size // self.CHUNK_SIZE) + 1
611
+ max_chars_per_chunk = max_chars // chunks_processed
612
+ summary = self._process_chunks(content, mode, max_chars_per_chunk, preserve_code)
613
+ else:
614
+ # Process entire file
615
+ if mode == "brief":
616
+ summary = self._summarize_brief(content, max_chars)
617
+ elif mode == "detailed":
618
+ summary = self._summarize_detailed(content, max_chars)
619
+ elif mode == "key_points":
620
+ summary = self._summarize_key_points(content, max_chars)
621
+ elif mode == "technical":
622
+ summary = self._summarize_technical(content, max_chars, preserve_code)
623
+ else:
624
+ summary = self._summarize_brief(content, max_chars)
625
+
626
+ # Calculate metrics
627
+ summary_size = len(summary)
628
+ reduction_percentage = ((original_size - summary_size) / original_size) * 100
629
+
630
+ # Token estimates
631
+ original_tokens = self._estimate_tokens(content)
632
+ summary_tokens = self._estimate_tokens(summary)
633
+ saved_tokens = original_tokens - summary_tokens
634
+
635
+ # Prepare result
636
+ result = {
637
+ "summary": summary,
638
+ "original_size": original_size,
639
+ "summary_size": summary_size,
640
+ "reduction_percentage": round(reduction_percentage, 2),
641
+ "token_estimate": {
642
+ "original": original_tokens,
643
+ "summary": summary_tokens,
644
+ "saved": saved_tokens
645
+ },
646
+ "chunks_processed": chunks_processed,
647
+ "cache_hit": cache_hit
648
+ }
649
+
650
+ # Cache result if enabled
651
+ if use_cache and not cache_hit:
652
+ cache_key = f"{self._get_file_hash(file_path)}:{mode}:{max_percentage}"
653
+ self._cache.put(cache_key, result.copy(), summary_size)
654
+
655
+ # Calculate execution time
656
+ execution_time = (datetime.now() - start_time).total_seconds()
657
+
658
+ # Update metrics
659
+ self._update_metrics(True, execution_time)
660
+
661
+ # Add cache stats to result
662
+ result["cache_stats"] = self._cache.get_stats()
663
+
664
+ return MCPToolResult(
665
+ success=True,
666
+ data=result,
667
+ execution_time=execution_time,
668
+ metadata={
669
+ "tool": "document_summarizer",
670
+ "mode": mode,
671
+ "file_path": file_path,
672
+ "reduction_achieved": reduction_percentage >= 60
673
+ }
674
+ )
675
+
676
+ except Exception as e:
677
+ execution_time = (datetime.now() - start_time).total_seconds()
678
+ self._update_metrics(False, execution_time)
679
+ self._metrics["last_error"] = str(e)
680
+
681
+ self.log_error(f"Document summarizer failed: {e}")
682
+
683
+ return MCPToolResult(
684
+ success=False,
685
+ error=f"Document summarizer failed: {str(e)}",
686
+ execution_time=execution_time,
687
+ metadata={"tool": "document_summarizer", "error_type": type(e).__name__}
688
+ )
689
+
690
+ async def initialize(self) -> bool:
691
+ """
692
+ Initialize the document summarizer tool.
693
+
694
+ Returns:
695
+ True if initialization successful
696
+ """
697
+ try:
698
+ self.log_info("Initializing document summarizer tool")
699
+
700
+ # Clear cache on initialization
701
+ self._cache = LRUCache(max_size=100, max_memory_mb=50)
702
+
703
+ self._initialized = True
704
+ self.log_info("Document summarizer tool initialized successfully")
705
+ return True
706
+
707
+ except Exception as e:
708
+ self.log_error(f"Failed to initialize document summarizer: {e}")
709
+ return False
710
+
711
+ async def shutdown(self) -> None:
712
+ """
713
+ Shutdown the document summarizer tool and clean up resources.
714
+ """
715
+ try:
716
+ self.log_info("Shutting down document summarizer tool")
717
+
718
+ # Log final cache stats
719
+ cache_stats = self._cache.get_stats()
720
+ self.log_info(f"Final cache stats: {cache_stats}")
721
+
722
+ # Clear cache
723
+ self._cache = None
724
+
725
+ self._initialized = False
726
+ self.log_info("Document summarizer tool shutdown complete")
727
+
728
+ except Exception as e:
729
+ self.log_error(f"Error during document summarizer shutdown: {e}")