atlas-chat 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atlas/__init__.py +40 -0
- atlas/application/__init__.py +7 -0
- atlas/application/chat/__init__.py +7 -0
- atlas/application/chat/agent/__init__.py +10 -0
- atlas/application/chat/agent/act_loop.py +179 -0
- atlas/application/chat/agent/factory.py +142 -0
- atlas/application/chat/agent/protocols.py +46 -0
- atlas/application/chat/agent/react_loop.py +338 -0
- atlas/application/chat/agent/think_act_loop.py +171 -0
- atlas/application/chat/approval_manager.py +151 -0
- atlas/application/chat/elicitation_manager.py +191 -0
- atlas/application/chat/events/__init__.py +1 -0
- atlas/application/chat/events/agent_event_relay.py +112 -0
- atlas/application/chat/modes/__init__.py +1 -0
- atlas/application/chat/modes/agent.py +125 -0
- atlas/application/chat/modes/plain.py +74 -0
- atlas/application/chat/modes/rag.py +81 -0
- atlas/application/chat/modes/tools.py +179 -0
- atlas/application/chat/orchestrator.py +213 -0
- atlas/application/chat/policies/__init__.py +1 -0
- atlas/application/chat/policies/tool_authorization.py +99 -0
- atlas/application/chat/preprocessors/__init__.py +1 -0
- atlas/application/chat/preprocessors/message_builder.py +92 -0
- atlas/application/chat/preprocessors/prompt_override_service.py +104 -0
- atlas/application/chat/service.py +454 -0
- atlas/application/chat/utilities/__init__.py +6 -0
- atlas/application/chat/utilities/error_handler.py +367 -0
- atlas/application/chat/utilities/event_notifier.py +546 -0
- atlas/application/chat/utilities/file_processor.py +613 -0
- atlas/application/chat/utilities/tool_executor.py +789 -0
- atlas/atlas_chat_cli.py +347 -0
- atlas/atlas_client.py +238 -0
- atlas/core/__init__.py +0 -0
- atlas/core/auth.py +205 -0
- atlas/core/authorization_manager.py +27 -0
- atlas/core/capabilities.py +123 -0
- atlas/core/compliance.py +215 -0
- atlas/core/domain_whitelist.py +147 -0
- atlas/core/domain_whitelist_middleware.py +82 -0
- atlas/core/http_client.py +28 -0
- atlas/core/log_sanitizer.py +102 -0
- atlas/core/metrics_logger.py +59 -0
- atlas/core/middleware.py +131 -0
- atlas/core/otel_config.py +242 -0
- atlas/core/prompt_risk.py +200 -0
- atlas/core/rate_limit.py +0 -0
- atlas/core/rate_limit_middleware.py +64 -0
- atlas/core/security_headers_middleware.py +51 -0
- atlas/domain/__init__.py +37 -0
- atlas/domain/chat/__init__.py +1 -0
- atlas/domain/chat/dtos.py +85 -0
- atlas/domain/errors.py +96 -0
- atlas/domain/messages/__init__.py +12 -0
- atlas/domain/messages/models.py +160 -0
- atlas/domain/rag_mcp_service.py +664 -0
- atlas/domain/sessions/__init__.py +7 -0
- atlas/domain/sessions/models.py +36 -0
- atlas/domain/unified_rag_service.py +371 -0
- atlas/infrastructure/__init__.py +10 -0
- atlas/infrastructure/app_factory.py +135 -0
- atlas/infrastructure/events/__init__.py +1 -0
- atlas/infrastructure/events/cli_event_publisher.py +140 -0
- atlas/infrastructure/events/websocket_publisher.py +140 -0
- atlas/infrastructure/sessions/in_memory_repository.py +56 -0
- atlas/infrastructure/transport/__init__.py +7 -0
- atlas/infrastructure/transport/websocket_connection_adapter.py +33 -0
- atlas/init_cli.py +226 -0
- atlas/interfaces/__init__.py +15 -0
- atlas/interfaces/events.py +134 -0
- atlas/interfaces/llm.py +54 -0
- atlas/interfaces/rag.py +40 -0
- atlas/interfaces/sessions.py +75 -0
- atlas/interfaces/tools.py +57 -0
- atlas/interfaces/transport.py +24 -0
- atlas/main.py +564 -0
- atlas/mcp/api_key_demo/README.md +76 -0
- atlas/mcp/api_key_demo/main.py +172 -0
- atlas/mcp/api_key_demo/run.sh +56 -0
- atlas/mcp/basictable/main.py +147 -0
- atlas/mcp/calculator/main.py +149 -0
- atlas/mcp/code-executor/execution_engine.py +98 -0
- atlas/mcp/code-executor/execution_environment.py +95 -0
- atlas/mcp/code-executor/main.py +528 -0
- atlas/mcp/code-executor/result_processing.py +276 -0
- atlas/mcp/code-executor/script_generation.py +195 -0
- atlas/mcp/code-executor/security_checker.py +140 -0
- atlas/mcp/corporate_cars/main.py +437 -0
- atlas/mcp/csv_reporter/main.py +545 -0
- atlas/mcp/duckduckgo/main.py +182 -0
- atlas/mcp/elicitation_demo/README.md +171 -0
- atlas/mcp/elicitation_demo/main.py +262 -0
- atlas/mcp/env-demo/README.md +158 -0
- atlas/mcp/env-demo/main.py +199 -0
- atlas/mcp/file_size_test/main.py +284 -0
- atlas/mcp/filesystem/main.py +348 -0
- atlas/mcp/image_demo/main.py +113 -0
- atlas/mcp/image_demo/requirements.txt +4 -0
- atlas/mcp/logging_demo/README.md +72 -0
- atlas/mcp/logging_demo/main.py +103 -0
- atlas/mcp/many_tools_demo/main.py +50 -0
- atlas/mcp/order_database/__init__.py +0 -0
- atlas/mcp/order_database/main.py +369 -0
- atlas/mcp/order_database/signal_data.csv +1001 -0
- atlas/mcp/pdfbasic/main.py +394 -0
- atlas/mcp/pptx_generator/main.py +760 -0
- atlas/mcp/pptx_generator/requirements.txt +13 -0
- atlas/mcp/pptx_generator/run_test.sh +1 -0
- atlas/mcp/pptx_generator/test_pptx_generator_security.py +169 -0
- atlas/mcp/progress_demo/main.py +167 -0
- atlas/mcp/progress_updates_demo/QUICKSTART.md +273 -0
- atlas/mcp/progress_updates_demo/README.md +120 -0
- atlas/mcp/progress_updates_demo/main.py +497 -0
- atlas/mcp/prompts/main.py +222 -0
- atlas/mcp/public_demo/main.py +189 -0
- atlas/mcp/sampling_demo/README.md +169 -0
- atlas/mcp/sampling_demo/main.py +234 -0
- atlas/mcp/thinking/main.py +77 -0
- atlas/mcp/tool_planner/main.py +240 -0
- atlas/mcp/ui-demo/badmesh.png +0 -0
- atlas/mcp/ui-demo/main.py +383 -0
- atlas/mcp/ui-demo/templates/button_demo.html +32 -0
- atlas/mcp/ui-demo/templates/data_visualization.html +32 -0
- atlas/mcp/ui-demo/templates/form_demo.html +28 -0
- atlas/mcp/username-override-demo/README.md +320 -0
- atlas/mcp/username-override-demo/main.py +308 -0
- atlas/modules/__init__.py +0 -0
- atlas/modules/config/__init__.py +34 -0
- atlas/modules/config/cli.py +231 -0
- atlas/modules/config/config_manager.py +1096 -0
- atlas/modules/file_storage/__init__.py +22 -0
- atlas/modules/file_storage/cli.py +330 -0
- atlas/modules/file_storage/content_extractor.py +290 -0
- atlas/modules/file_storage/manager.py +295 -0
- atlas/modules/file_storage/mock_s3_client.py +402 -0
- atlas/modules/file_storage/s3_client.py +417 -0
- atlas/modules/llm/__init__.py +19 -0
- atlas/modules/llm/caller.py +287 -0
- atlas/modules/llm/litellm_caller.py +675 -0
- atlas/modules/llm/models.py +19 -0
- atlas/modules/mcp_tools/__init__.py +17 -0
- atlas/modules/mcp_tools/client.py +2123 -0
- atlas/modules/mcp_tools/token_storage.py +556 -0
- atlas/modules/prompts/prompt_provider.py +130 -0
- atlas/modules/rag/__init__.py +24 -0
- atlas/modules/rag/atlas_rag_client.py +336 -0
- atlas/modules/rag/client.py +129 -0
- atlas/routes/admin_routes.py +865 -0
- atlas/routes/config_routes.py +484 -0
- atlas/routes/feedback_routes.py +361 -0
- atlas/routes/files_routes.py +274 -0
- atlas/routes/health_routes.py +40 -0
- atlas/routes/mcp_auth_routes.py +223 -0
- atlas/server_cli.py +164 -0
- atlas/tests/conftest.py +20 -0
- atlas/tests/integration/test_mcp_auth_integration.py +152 -0
- atlas/tests/manual_test_sampling.py +87 -0
- atlas/tests/modules/mcp_tools/test_client_auth.py +226 -0
- atlas/tests/modules/mcp_tools/test_client_env.py +191 -0
- atlas/tests/test_admin_mcp_server_management_routes.py +141 -0
- atlas/tests/test_agent_roa.py +135 -0
- atlas/tests/test_app_factory_smoke.py +47 -0
- atlas/tests/test_approval_manager.py +439 -0
- atlas/tests/test_atlas_client.py +188 -0
- atlas/tests/test_atlas_rag_client.py +447 -0
- atlas/tests/test_atlas_rag_integration.py +224 -0
- atlas/tests/test_attach_file_flow.py +287 -0
- atlas/tests/test_auth_utils.py +165 -0
- atlas/tests/test_backend_public_url.py +185 -0
- atlas/tests/test_banner_logging.py +287 -0
- atlas/tests/test_capability_tokens_and_injection.py +203 -0
- atlas/tests/test_compliance_level.py +54 -0
- atlas/tests/test_compliance_manager.py +253 -0
- atlas/tests/test_config_manager.py +617 -0
- atlas/tests/test_config_manager_paths.py +12 -0
- atlas/tests/test_core_auth.py +18 -0
- atlas/tests/test_core_utils.py +190 -0
- atlas/tests/test_docker_env_sync.py +202 -0
- atlas/tests/test_domain_errors.py +329 -0
- atlas/tests/test_domain_whitelist.py +359 -0
- atlas/tests/test_elicitation_manager.py +408 -0
- atlas/tests/test_elicitation_routing.py +296 -0
- atlas/tests/test_env_demo_server.py +88 -0
- atlas/tests/test_error_classification.py +113 -0
- atlas/tests/test_error_flow_integration.py +116 -0
- atlas/tests/test_feedback_routes.py +333 -0
- atlas/tests/test_file_content_extraction.py +1134 -0
- atlas/tests/test_file_extraction_routes.py +158 -0
- atlas/tests/test_file_library.py +107 -0
- atlas/tests/test_file_manager_unit.py +18 -0
- atlas/tests/test_health_route.py +49 -0
- atlas/tests/test_http_client_stub.py +8 -0
- atlas/tests/test_imports_smoke.py +30 -0
- atlas/tests/test_interfaces_llm_response.py +9 -0
- atlas/tests/test_issue_access_denied_fix.py +136 -0
- atlas/tests/test_llm_env_expansion.py +836 -0
- atlas/tests/test_log_level_sensitive_data.py +285 -0
- atlas/tests/test_mcp_auth_routes.py +341 -0
- atlas/tests/test_mcp_client_auth.py +331 -0
- atlas/tests/test_mcp_data_injection.py +270 -0
- atlas/tests/test_mcp_get_authorized_servers.py +95 -0
- atlas/tests/test_mcp_hot_reload.py +512 -0
- atlas/tests/test_mcp_image_content.py +424 -0
- atlas/tests/test_mcp_logging.py +172 -0
- atlas/tests/test_mcp_progress_updates.py +313 -0
- atlas/tests/test_mcp_prompt_override_system_prompt.py +102 -0
- atlas/tests/test_mcp_prompts_server.py +39 -0
- atlas/tests/test_mcp_tool_result_parsing.py +296 -0
- atlas/tests/test_metrics_logger.py +56 -0
- atlas/tests/test_middleware_auth.py +379 -0
- atlas/tests/test_prompt_risk_and_acl.py +141 -0
- atlas/tests/test_rag_mcp_aggregator.py +204 -0
- atlas/tests/test_rag_mcp_service.py +224 -0
- atlas/tests/test_rate_limit_middleware.py +45 -0
- atlas/tests/test_routes_config_smoke.py +60 -0
- atlas/tests/test_routes_files_download_token.py +41 -0
- atlas/tests/test_routes_files_health.py +18 -0
- atlas/tests/test_runtime_imports.py +53 -0
- atlas/tests/test_sampling_integration.py +482 -0
- atlas/tests/test_security_admin_routes.py +61 -0
- atlas/tests/test_security_capability_tokens.py +65 -0
- atlas/tests/test_security_file_stats_scope.py +21 -0
- atlas/tests/test_security_header_injection.py +191 -0
- atlas/tests/test_security_headers_and_filename.py +63 -0
- atlas/tests/test_shared_session_repository.py +101 -0
- atlas/tests/test_system_prompt_loading.py +181 -0
- atlas/tests/test_token_storage.py +505 -0
- atlas/tests/test_tool_approval_config.py +93 -0
- atlas/tests/test_tool_approval_utils.py +356 -0
- atlas/tests/test_tool_authorization_group_filtering.py +223 -0
- atlas/tests/test_tool_details_in_config.py +108 -0
- atlas/tests/test_tool_planner.py +300 -0
- atlas/tests/test_unified_rag_service.py +398 -0
- atlas/tests/test_username_override_in_approval.py +258 -0
- atlas/tests/test_websocket_auth_header.py +168 -0
- atlas/version.py +6 -0
- atlas_chat-0.1.0.data/data/.env.example +253 -0
- atlas_chat-0.1.0.data/data/config/defaults/compliance-levels.json +44 -0
- atlas_chat-0.1.0.data/data/config/defaults/domain-whitelist.json +123 -0
- atlas_chat-0.1.0.data/data/config/defaults/file-extractors.json +74 -0
- atlas_chat-0.1.0.data/data/config/defaults/help-config.json +198 -0
- atlas_chat-0.1.0.data/data/config/defaults/llmconfig-buggy.yml +11 -0
- atlas_chat-0.1.0.data/data/config/defaults/llmconfig.yml +19 -0
- atlas_chat-0.1.0.data/data/config/defaults/mcp.json +138 -0
- atlas_chat-0.1.0.data/data/config/defaults/rag-sources.json +17 -0
- atlas_chat-0.1.0.data/data/config/defaults/splash-config.json +16 -0
- atlas_chat-0.1.0.dist-info/METADATA +236 -0
- atlas_chat-0.1.0.dist-info/RECORD +250 -0
- atlas_chat-0.1.0.dist-info/WHEEL +5 -0
- atlas_chat-0.1.0.dist-info/entry_points.txt +4 -0
- atlas_chat-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
PDF Analyzer MCP Server using FastMCP.
|
|
4
|
+
Provides PDF text analysis and report generation through the MCP protocol.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import base64
|
|
8
|
+
import io
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
import re
|
|
12
|
+
from collections import Counter
|
|
13
|
+
from typing import Annotated, Any, Dict, Optional
|
|
14
|
+
|
|
15
|
+
import requests
|
|
16
|
+
from fastmcp import FastMCP
|
|
17
|
+
|
|
18
|
+
# This tool requires the PyPDF2 and reportlab libraries.
|
|
19
|
+
# Install them using: pip install PyPDF2 reportlab
|
|
20
|
+
from PyPDF2 import PdfReader
|
|
21
|
+
from reportlab.lib.pagesizes import letter
|
|
22
|
+
from reportlab.lib.units import inch
|
|
23
|
+
from reportlab.pdfgen import canvas
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
mcp = FastMCP("PDF_Analyzer")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _analyze_pdf_content(instructions: str, filename: str, original_filename: Optional[str] = None) -> Dict[str, Any]:
|
|
31
|
+
"""
|
|
32
|
+
Core PDF analysis logic that can be reused by multiple tools.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
instructions: Instructions for the tool, not used in this implementation.
|
|
36
|
+
filename: The name of the file, which must have a '.pdf' extension.
|
|
37
|
+
original_filename: The original name of the file.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
A dictionary containing the analysis results or an error message.
|
|
41
|
+
"""
|
|
42
|
+
try:
|
|
43
|
+
# print the instructions.
|
|
44
|
+
logger.info(f"Instructions: {instructions}")
|
|
45
|
+
# 1. Validate that the filename is for a PDF
|
|
46
|
+
if not (filename.lower().endswith('.pdf') or (original_filename and original_filename.lower().endswith('.pdf'))):
|
|
47
|
+
return {"results": {"error": "Invalid file type. This tool only accepts PDF files."}}
|
|
48
|
+
|
|
49
|
+
# 2. Decode the Base64 data and read the PDF content
|
|
50
|
+
# Check if filename is a URL (absolute or relative)
|
|
51
|
+
is_url = (
|
|
52
|
+
filename.startswith("http://") or
|
|
53
|
+
filename.startswith("https://") or
|
|
54
|
+
filename.startswith("/api/") or
|
|
55
|
+
filename.startswith("/")
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
if is_url:
|
|
59
|
+
# Convert relative URLs to absolute URLs
|
|
60
|
+
if filename.startswith("/"):
|
|
61
|
+
# Construct absolute URL from relative path
|
|
62
|
+
# Default to localhost:8000 for local development
|
|
63
|
+
backend_url = os.getenv("BACKEND_URL", "http://localhost:8000")
|
|
64
|
+
url = f"{backend_url}{filename}"
|
|
65
|
+
else:
|
|
66
|
+
url = filename
|
|
67
|
+
|
|
68
|
+
logger.info(f"Step 9: Downloading file from URL: {url}")
|
|
69
|
+
response = requests.get(url, timeout=30)
|
|
70
|
+
response.raise_for_status()
|
|
71
|
+
pdf_stream = io.BytesIO(response.content)
|
|
72
|
+
else:
|
|
73
|
+
# Assume it's base64-encoded data
|
|
74
|
+
decoded_bytes = base64.b64decode(filename)
|
|
75
|
+
pdf_stream = io.BytesIO(decoded_bytes)
|
|
76
|
+
|
|
77
|
+
reader = PdfReader(pdf_stream)
|
|
78
|
+
|
|
79
|
+
full_text = ""
|
|
80
|
+
for page in reader.pages:
|
|
81
|
+
page_text = page.extract_text()
|
|
82
|
+
if page_text:
|
|
83
|
+
full_text += page_text + "\n"
|
|
84
|
+
|
|
85
|
+
if not full_text.strip():
|
|
86
|
+
return {
|
|
87
|
+
"results": {
|
|
88
|
+
"operation": "pdf_analysis",
|
|
89
|
+
"filename": original_filename or filename,
|
|
90
|
+
"status": "Success",
|
|
91
|
+
"message": "PDF contained no extractable text.",
|
|
92
|
+
"total_word_count": 0,
|
|
93
|
+
"top_100_words": {}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
# 3. Process the text to get a word list and count
|
|
98
|
+
# This regex finds all word-like sequences, ignoring case
|
|
99
|
+
words = re.findall(r'\b\w+\b', full_text.lower())
|
|
100
|
+
total_word_count = len(words)
|
|
101
|
+
|
|
102
|
+
# 4. Count word frequencies and get the top 100
|
|
103
|
+
word_counts = Counter(words)
|
|
104
|
+
# Convert list of (word, count) tuples to a dictionary
|
|
105
|
+
top_100_words_dict = dict(word_counts.most_common(100))
|
|
106
|
+
|
|
107
|
+
# 5. Return the successful result
|
|
108
|
+
return {
|
|
109
|
+
"results": {
|
|
110
|
+
"operation": "pdf_analysis",
|
|
111
|
+
"filename": original_filename or filename,
|
|
112
|
+
"total_word_count": total_word_count,
|
|
113
|
+
"top_100_words": top_100_words_dict
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
except Exception as e:
|
|
118
|
+
# print traceback for debugging
|
|
119
|
+
import traceback
|
|
120
|
+
traceback.print_exc()
|
|
121
|
+
# 6. Return an error message if something goes wrong
|
|
122
|
+
return {"results": {"error": f"PDF analysis failed: {str(e)}"}}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@mcp.tool
|
|
126
|
+
def analyze_pdf(
|
|
127
|
+
instructions: Annotated[str, "Instructions for the tool, not used in this implementation"],
|
|
128
|
+
filename: Annotated[str, "The name of the file, which must have a '.pdf' extension"],
|
|
129
|
+
original_filename: Optional[str] = None
|
|
130
|
+
) -> Dict[str, Any]:
|
|
131
|
+
"""
|
|
132
|
+
Extract and analyze text content from PDF documents with comprehensive word frequency analysis.
|
|
133
|
+
|
|
134
|
+
<<<<<<< HEAD
|
|
135
|
+
This PDF processing tool provides detailed text analytics for PDF documents:
|
|
136
|
+
=======
|
|
137
|
+
This PDF processing tool provides detailed text analytics for PDF documents:
|
|
138
|
+
>>>>>>> main
|
|
139
|
+
|
|
140
|
+
**PDF Text Extraction:**
|
|
141
|
+
- Extracts text from all pages in PDF documents
|
|
142
|
+
- Handles various PDF formats and structures
|
|
143
|
+
- Works with both text-based and scanned PDFs (text extraction only)
|
|
144
|
+
- Preserves document structure and content flow
|
|
145
|
+
|
|
146
|
+
**Text Analysis Features:**
|
|
147
|
+
- Complete word count across entire document
|
|
148
|
+
- Top 100 most frequently used words identification
|
|
149
|
+
- Case-insensitive word analysis for accurate frequency counting
|
|
150
|
+
- Word pattern recognition and linguistic analysis
|
|
151
|
+
- Document length and content density assessment
|
|
152
|
+
|
|
153
|
+
**Content Processing:**
|
|
154
|
+
- Intelligent text cleaning and normalization
|
|
155
|
+
- Punctuation and formatting handling
|
|
156
|
+
- Multi-language text support
|
|
157
|
+
- Special character and encoding management
|
|
158
|
+
|
|
159
|
+
**Analytics Insights:**
|
|
160
|
+
- Document vocabulary richness and complexity
|
|
161
|
+
- Key topic identification through word frequency
|
|
162
|
+
- Content themes and focus areas analysis
|
|
163
|
+
- Writing style and language pattern recognition
|
|
164
|
+
- Document structure and organization assessment
|
|
165
|
+
|
|
166
|
+
**Use Cases:**
|
|
167
|
+
- Academic paper and research document analysis
|
|
168
|
+
- Legal document keyword extraction and analysis
|
|
169
|
+
- Content marketing and SEO keyword research
|
|
170
|
+
- Document classification and categorization
|
|
171
|
+
- Research literature review and summarization
|
|
172
|
+
- Contract and agreement content analysis
|
|
173
|
+
|
|
174
|
+
**Supported PDF Types:**
|
|
175
|
+
- Research papers, reports, and academic documents
|
|
176
|
+
- Business documents, contracts, and agreements
|
|
177
|
+
- Marketing materials and content documents
|
|
178
|
+
- Technical documentation and manuals
|
|
179
|
+
- Legal documents and regulatory filings
|
|
180
|
+
|
|
181
|
+
**Output Format:**
|
|
182
|
+
- Structured word frequency data
|
|
183
|
+
- Total document word count statistics
|
|
184
|
+
- Top 100 words with occurrence frequencies
|
|
185
|
+
- Document metadata and processing information
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
instructions: Processing instructions or requirements (currently not used)
|
|
189
|
+
filename: PDF file name (must end with .pdf extension)
|
|
190
|
+
original_filename: The original name of the file.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
Dictionary containing:
|
|
194
|
+
- operation: Processing type confirmation
|
|
195
|
+
- filename: Source PDF file name
|
|
196
|
+
- total_word_count: Complete document word count
|
|
197
|
+
- top_100_words: Dictionary of most frequent words with counts
|
|
198
|
+
Or error message if PDF cannot be processed or contains no extractable text
|
|
199
|
+
"""
|
|
200
|
+
logger.info("Step 8: Entering analyze_pdf tool")
|
|
201
|
+
return _analyze_pdf_content(instructions, filename, original_filename)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
@mcp.tool
|
|
205
|
+
def generate_report_about_pdf(
|
|
206
|
+
instructions: Annotated[str, "Instructions for the tool, not used in this implementation"],
|
|
207
|
+
filename: Annotated[str, "The name of the file, which must have a '.pdf' extension"],
|
|
208
|
+
original_filename: Optional[str] = None
|
|
209
|
+
) -> Dict[str, Any]:
|
|
210
|
+
"""
|
|
211
|
+
Create comprehensive PDF analysis reports with professional formatting and detailed word frequency insights.
|
|
212
|
+
|
|
213
|
+
This advanced PDF reporting tool combines text analysis with professional document generation:
|
|
214
|
+
|
|
215
|
+
**Complete PDF Analysis Workflow:**
|
|
216
|
+
- Performs full text extraction and word frequency analysis
|
|
217
|
+
- Generates professional analysis reports in PDF format
|
|
218
|
+
- Creates downloadable documents with structured data presentation
|
|
219
|
+
- Provides ready-to-share analytical insights
|
|
220
|
+
|
|
221
|
+
**Report Contents:**
|
|
222
|
+
- Executive summary with document overview
|
|
223
|
+
- Total word count and document statistics
|
|
224
|
+
- Top 100 most frequent words with occurrence counts
|
|
225
|
+
- Professional multi-column layout for easy reading
|
|
226
|
+
- Organized tabular presentation of word frequency data
|
|
227
|
+
|
|
228
|
+
**Report Features:**
|
|
229
|
+
- Clean, professional PDF formatting using ReportLab
|
|
230
|
+
- Multi-column layout optimizing space usage
|
|
231
|
+
- Clear headers and structured information hierarchy
|
|
232
|
+
- Page management for large datasets
|
|
233
|
+
- High-quality typography and spacing
|
|
234
|
+
|
|
235
|
+
**Document Generation:**
|
|
236
|
+
- Creates new PDF reports from analysis results
|
|
237
|
+
- Professional business document appearance
|
|
238
|
+
- Optimized layout for printing and digital sharing
|
|
239
|
+
- Comprehensive data presentation in readable format
|
|
240
|
+
|
|
241
|
+
**Use Cases:**
|
|
242
|
+
- Academic research document analysis reporting
|
|
243
|
+
- Legal document content analysis for litigation support
|
|
244
|
+
- Content marketing keyword research documentation
|
|
245
|
+
- Business document compliance and review reporting
|
|
246
|
+
- Research literature analysis and summarization
|
|
247
|
+
- Document classification and content audit reports
|
|
248
|
+
|
|
249
|
+
**Report Applications:**
|
|
250
|
+
- Stakeholder presentations with document insights
|
|
251
|
+
- Content strategy planning based on word analysis
|
|
252
|
+
- Academic research methodology documentation
|
|
253
|
+
- Legal discovery and document review processes
|
|
254
|
+
- Quality assurance for written content
|
|
255
|
+
|
|
256
|
+
**Output Features:**
|
|
257
|
+
- Professional PDF report with embedded analysis
|
|
258
|
+
- Downloadable file for offline access and sharing
|
|
259
|
+
- Structured data visualization in document format
|
|
260
|
+
- Ready-to-present analytical insights
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
instructions: Report generation instructions or requirements (currently not used)
|
|
264
|
+
filename: Source PDF file name (must end with .pdf extension)
|
|
265
|
+
original_filename: The original name of the file.
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
Dictionary containing:
|
|
269
|
+
- results: Report generation summary and success confirmation
|
|
270
|
+
- artifacts: Professional PDF report with complete analysis
|
|
271
|
+
- display: Optimized viewer configuration for report presentation
|
|
272
|
+
- meta_data: Source file information and analysis statistics
|
|
273
|
+
Or error message if PDF cannot be processed or report generation fails
|
|
274
|
+
"""
|
|
275
|
+
logger.info("Step 8: Entering generate_report_about_pdf tool")
|
|
276
|
+
# --- 1. Perform the same analysis as the first function ---
|
|
277
|
+
analysis_result = _analyze_pdf_content(instructions, filename, original_filename)
|
|
278
|
+
if "error" in analysis_result.get("results", {}):
|
|
279
|
+
return analysis_result
|
|
280
|
+
|
|
281
|
+
# --- 2. Generate the PDF report ---
|
|
282
|
+
try:
|
|
283
|
+
results_data = analysis_result["results"]
|
|
284
|
+
|
|
285
|
+
# Create PDF report in memory
|
|
286
|
+
pdf_buffer = io.BytesIO()
|
|
287
|
+
c = canvas.Canvas(pdf_buffer, pagesize=letter)
|
|
288
|
+
width, height = letter
|
|
289
|
+
|
|
290
|
+
# Title
|
|
291
|
+
c.setFont("Helvetica-Bold", 16)
|
|
292
|
+
c.drawString(1 * inch, height - 1 * inch, "PDF Analysis Report")
|
|
293
|
+
|
|
294
|
+
# Document info
|
|
295
|
+
c.setFont("Helvetica-Bold", 12)
|
|
296
|
+
c.drawString(1 * inch, height - 1.5 * inch, "Document:")
|
|
297
|
+
c.setFont("Helvetica", 10)
|
|
298
|
+
c.drawString(1.5 * inch, height - 1.5 * inch, results_data.get("filename", "Unknown"))
|
|
299
|
+
|
|
300
|
+
# Total word count
|
|
301
|
+
c.setFont("Helvetica-Bold", 12)
|
|
302
|
+
c.drawString(1 * inch, height - 2 * inch, "Total Words:")
|
|
303
|
+
c.setFont("Helvetica", 10)
|
|
304
|
+
c.drawString(1.5 * inch, height - 2 * inch, str(results_data.get("total_word_count", 0)))
|
|
305
|
+
|
|
306
|
+
# Top 100 words header
|
|
307
|
+
c.setFont("Helvetica-Bold", 12)
|
|
308
|
+
c.drawString(1 * inch, height - 2.5 * inch, "Top 100 Most Frequent Words:")
|
|
309
|
+
|
|
310
|
+
# Display top words in columns
|
|
311
|
+
c.setFont("Helvetica", 9)
|
|
312
|
+
y_position = height - 3 * inch
|
|
313
|
+
x_col1 = 1 * inch
|
|
314
|
+
x_col2 = 3.5 * inch
|
|
315
|
+
x_col3 = 6 * inch
|
|
316
|
+
|
|
317
|
+
top_100_words = results_data.get("top_100_words", {})
|
|
318
|
+
words_list = list(top_100_words.items())
|
|
319
|
+
|
|
320
|
+
for idx, (word, count) in enumerate(words_list):
|
|
321
|
+
# Determine column position
|
|
322
|
+
col = idx % 3
|
|
323
|
+
if col == 0:
|
|
324
|
+
x_pos = x_col1
|
|
325
|
+
elif col == 1:
|
|
326
|
+
x_pos = x_col2
|
|
327
|
+
else:
|
|
328
|
+
x_pos = x_col3
|
|
329
|
+
|
|
330
|
+
# Move to next row after every 3 words
|
|
331
|
+
if col == 0 and idx > 0:
|
|
332
|
+
y_position -= 0.2 * inch
|
|
333
|
+
|
|
334
|
+
# Check if we need a new page
|
|
335
|
+
if y_position < 1 * inch:
|
|
336
|
+
c.showPage()
|
|
337
|
+
c.setFont("Helvetica", 9)
|
|
338
|
+
y_position = height - 1 * inch
|
|
339
|
+
|
|
340
|
+
# Draw word and count
|
|
341
|
+
text = f"{word}: {count}"
|
|
342
|
+
c.drawString(x_pos, y_position, text)
|
|
343
|
+
|
|
344
|
+
c.save()
|
|
345
|
+
|
|
346
|
+
# Get PDF bytes and encode to base64
|
|
347
|
+
pdf_bytes = pdf_buffer.getvalue()
|
|
348
|
+
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
|
|
349
|
+
|
|
350
|
+
# --- 3. Return the structured response (v2 MCP compliant) ---
|
|
351
|
+
report_name = f"analysis_report_{results_data.get('filename', 'document').replace('.pdf', '')}.pdf"
|
|
352
|
+
|
|
353
|
+
return {
|
|
354
|
+
"results": {
|
|
355
|
+
"operation": "pdf_report_generation",
|
|
356
|
+
"status": "Success",
|
|
357
|
+
"message": f"Generated analysis report for {results_data.get('filename', 'document')}",
|
|
358
|
+
"total_word_count": results_data.get("total_word_count", 0),
|
|
359
|
+
"words_analyzed": len(top_100_words)
|
|
360
|
+
},
|
|
361
|
+
"artifacts": [
|
|
362
|
+
{
|
|
363
|
+
"name": report_name,
|
|
364
|
+
"b64": pdf_base64,
|
|
365
|
+
"mime": "application/pdf",
|
|
366
|
+
"size": len(pdf_bytes),
|
|
367
|
+
"description": "PDF analysis report with word frequency statistics"
|
|
368
|
+
}
|
|
369
|
+
],
|
|
370
|
+
"display": {
|
|
371
|
+
"open_canvas": True,
|
|
372
|
+
"primary_file": report_name,
|
|
373
|
+
"mode": "replace",
|
|
374
|
+
"viewer_hint": "pdf"
|
|
375
|
+
},
|
|
376
|
+
"meta_data": {
|
|
377
|
+
"source_file": results_data.get("filename", "Unknown"),
|
|
378
|
+
"total_words": results_data.get("total_word_count", 0)
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
except Exception as e:
|
|
383
|
+
import traceback
|
|
384
|
+
traceback.print_exc()
|
|
385
|
+
return {
|
|
386
|
+
"results": {
|
|
387
|
+
"error": f"Report generation failed: {str(e)}"
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
if __name__ == "__main__":
|
|
394
|
+
mcp.run()
|