atlas-chat 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. atlas/__init__.py +40 -0
  2. atlas/application/__init__.py +7 -0
  3. atlas/application/chat/__init__.py +7 -0
  4. atlas/application/chat/agent/__init__.py +10 -0
  5. atlas/application/chat/agent/act_loop.py +179 -0
  6. atlas/application/chat/agent/factory.py +142 -0
  7. atlas/application/chat/agent/protocols.py +46 -0
  8. atlas/application/chat/agent/react_loop.py +338 -0
  9. atlas/application/chat/agent/think_act_loop.py +171 -0
  10. atlas/application/chat/approval_manager.py +151 -0
  11. atlas/application/chat/elicitation_manager.py +191 -0
  12. atlas/application/chat/events/__init__.py +1 -0
  13. atlas/application/chat/events/agent_event_relay.py +112 -0
  14. atlas/application/chat/modes/__init__.py +1 -0
  15. atlas/application/chat/modes/agent.py +125 -0
  16. atlas/application/chat/modes/plain.py +74 -0
  17. atlas/application/chat/modes/rag.py +81 -0
  18. atlas/application/chat/modes/tools.py +179 -0
  19. atlas/application/chat/orchestrator.py +213 -0
  20. atlas/application/chat/policies/__init__.py +1 -0
  21. atlas/application/chat/policies/tool_authorization.py +99 -0
  22. atlas/application/chat/preprocessors/__init__.py +1 -0
  23. atlas/application/chat/preprocessors/message_builder.py +92 -0
  24. atlas/application/chat/preprocessors/prompt_override_service.py +104 -0
  25. atlas/application/chat/service.py +454 -0
  26. atlas/application/chat/utilities/__init__.py +6 -0
  27. atlas/application/chat/utilities/error_handler.py +367 -0
  28. atlas/application/chat/utilities/event_notifier.py +546 -0
  29. atlas/application/chat/utilities/file_processor.py +613 -0
  30. atlas/application/chat/utilities/tool_executor.py +789 -0
  31. atlas/atlas_chat_cli.py +347 -0
  32. atlas/atlas_client.py +238 -0
  33. atlas/core/__init__.py +0 -0
  34. atlas/core/auth.py +205 -0
  35. atlas/core/authorization_manager.py +27 -0
  36. atlas/core/capabilities.py +123 -0
  37. atlas/core/compliance.py +215 -0
  38. atlas/core/domain_whitelist.py +147 -0
  39. atlas/core/domain_whitelist_middleware.py +82 -0
  40. atlas/core/http_client.py +28 -0
  41. atlas/core/log_sanitizer.py +102 -0
  42. atlas/core/metrics_logger.py +59 -0
  43. atlas/core/middleware.py +131 -0
  44. atlas/core/otel_config.py +242 -0
  45. atlas/core/prompt_risk.py +200 -0
  46. atlas/core/rate_limit.py +0 -0
  47. atlas/core/rate_limit_middleware.py +64 -0
  48. atlas/core/security_headers_middleware.py +51 -0
  49. atlas/domain/__init__.py +37 -0
  50. atlas/domain/chat/__init__.py +1 -0
  51. atlas/domain/chat/dtos.py +85 -0
  52. atlas/domain/errors.py +96 -0
  53. atlas/domain/messages/__init__.py +12 -0
  54. atlas/domain/messages/models.py +160 -0
  55. atlas/domain/rag_mcp_service.py +664 -0
  56. atlas/domain/sessions/__init__.py +7 -0
  57. atlas/domain/sessions/models.py +36 -0
  58. atlas/domain/unified_rag_service.py +371 -0
  59. atlas/infrastructure/__init__.py +10 -0
  60. atlas/infrastructure/app_factory.py +135 -0
  61. atlas/infrastructure/events/__init__.py +1 -0
  62. atlas/infrastructure/events/cli_event_publisher.py +140 -0
  63. atlas/infrastructure/events/websocket_publisher.py +140 -0
  64. atlas/infrastructure/sessions/in_memory_repository.py +56 -0
  65. atlas/infrastructure/transport/__init__.py +7 -0
  66. atlas/infrastructure/transport/websocket_connection_adapter.py +33 -0
  67. atlas/init_cli.py +226 -0
  68. atlas/interfaces/__init__.py +15 -0
  69. atlas/interfaces/events.py +134 -0
  70. atlas/interfaces/llm.py +54 -0
  71. atlas/interfaces/rag.py +40 -0
  72. atlas/interfaces/sessions.py +75 -0
  73. atlas/interfaces/tools.py +57 -0
  74. atlas/interfaces/transport.py +24 -0
  75. atlas/main.py +564 -0
  76. atlas/mcp/api_key_demo/README.md +76 -0
  77. atlas/mcp/api_key_demo/main.py +172 -0
  78. atlas/mcp/api_key_demo/run.sh +56 -0
  79. atlas/mcp/basictable/main.py +147 -0
  80. atlas/mcp/calculator/main.py +149 -0
  81. atlas/mcp/code-executor/execution_engine.py +98 -0
  82. atlas/mcp/code-executor/execution_environment.py +95 -0
  83. atlas/mcp/code-executor/main.py +528 -0
  84. atlas/mcp/code-executor/result_processing.py +276 -0
  85. atlas/mcp/code-executor/script_generation.py +195 -0
  86. atlas/mcp/code-executor/security_checker.py +140 -0
  87. atlas/mcp/corporate_cars/main.py +437 -0
  88. atlas/mcp/csv_reporter/main.py +545 -0
  89. atlas/mcp/duckduckgo/main.py +182 -0
  90. atlas/mcp/elicitation_demo/README.md +171 -0
  91. atlas/mcp/elicitation_demo/main.py +262 -0
  92. atlas/mcp/env-demo/README.md +158 -0
  93. atlas/mcp/env-demo/main.py +199 -0
  94. atlas/mcp/file_size_test/main.py +284 -0
  95. atlas/mcp/filesystem/main.py +348 -0
  96. atlas/mcp/image_demo/main.py +113 -0
  97. atlas/mcp/image_demo/requirements.txt +4 -0
  98. atlas/mcp/logging_demo/README.md +72 -0
  99. atlas/mcp/logging_demo/main.py +103 -0
  100. atlas/mcp/many_tools_demo/main.py +50 -0
  101. atlas/mcp/order_database/__init__.py +0 -0
  102. atlas/mcp/order_database/main.py +369 -0
  103. atlas/mcp/order_database/signal_data.csv +1001 -0
  104. atlas/mcp/pdfbasic/main.py +394 -0
  105. atlas/mcp/pptx_generator/main.py +760 -0
  106. atlas/mcp/pptx_generator/requirements.txt +13 -0
  107. atlas/mcp/pptx_generator/run_test.sh +1 -0
  108. atlas/mcp/pptx_generator/test_pptx_generator_security.py +169 -0
  109. atlas/mcp/progress_demo/main.py +167 -0
  110. atlas/mcp/progress_updates_demo/QUICKSTART.md +273 -0
  111. atlas/mcp/progress_updates_demo/README.md +120 -0
  112. atlas/mcp/progress_updates_demo/main.py +497 -0
  113. atlas/mcp/prompts/main.py +222 -0
  114. atlas/mcp/public_demo/main.py +189 -0
  115. atlas/mcp/sampling_demo/README.md +169 -0
  116. atlas/mcp/sampling_demo/main.py +234 -0
  117. atlas/mcp/thinking/main.py +77 -0
  118. atlas/mcp/tool_planner/main.py +240 -0
  119. atlas/mcp/ui-demo/badmesh.png +0 -0
  120. atlas/mcp/ui-demo/main.py +383 -0
  121. atlas/mcp/ui-demo/templates/button_demo.html +32 -0
  122. atlas/mcp/ui-demo/templates/data_visualization.html +32 -0
  123. atlas/mcp/ui-demo/templates/form_demo.html +28 -0
  124. atlas/mcp/username-override-demo/README.md +320 -0
  125. atlas/mcp/username-override-demo/main.py +308 -0
  126. atlas/modules/__init__.py +0 -0
  127. atlas/modules/config/__init__.py +34 -0
  128. atlas/modules/config/cli.py +231 -0
  129. atlas/modules/config/config_manager.py +1096 -0
  130. atlas/modules/file_storage/__init__.py +22 -0
  131. atlas/modules/file_storage/cli.py +330 -0
  132. atlas/modules/file_storage/content_extractor.py +290 -0
  133. atlas/modules/file_storage/manager.py +295 -0
  134. atlas/modules/file_storage/mock_s3_client.py +402 -0
  135. atlas/modules/file_storage/s3_client.py +417 -0
  136. atlas/modules/llm/__init__.py +19 -0
  137. atlas/modules/llm/caller.py +287 -0
  138. atlas/modules/llm/litellm_caller.py +675 -0
  139. atlas/modules/llm/models.py +19 -0
  140. atlas/modules/mcp_tools/__init__.py +17 -0
  141. atlas/modules/mcp_tools/client.py +2123 -0
  142. atlas/modules/mcp_tools/token_storage.py +556 -0
  143. atlas/modules/prompts/prompt_provider.py +130 -0
  144. atlas/modules/rag/__init__.py +24 -0
  145. atlas/modules/rag/atlas_rag_client.py +336 -0
  146. atlas/modules/rag/client.py +129 -0
  147. atlas/routes/admin_routes.py +865 -0
  148. atlas/routes/config_routes.py +484 -0
  149. atlas/routes/feedback_routes.py +361 -0
  150. atlas/routes/files_routes.py +274 -0
  151. atlas/routes/health_routes.py +40 -0
  152. atlas/routes/mcp_auth_routes.py +223 -0
  153. atlas/server_cli.py +164 -0
  154. atlas/tests/conftest.py +20 -0
  155. atlas/tests/integration/test_mcp_auth_integration.py +152 -0
  156. atlas/tests/manual_test_sampling.py +87 -0
  157. atlas/tests/modules/mcp_tools/test_client_auth.py +226 -0
  158. atlas/tests/modules/mcp_tools/test_client_env.py +191 -0
  159. atlas/tests/test_admin_mcp_server_management_routes.py +141 -0
  160. atlas/tests/test_agent_roa.py +135 -0
  161. atlas/tests/test_app_factory_smoke.py +47 -0
  162. atlas/tests/test_approval_manager.py +439 -0
  163. atlas/tests/test_atlas_client.py +188 -0
  164. atlas/tests/test_atlas_rag_client.py +447 -0
  165. atlas/tests/test_atlas_rag_integration.py +224 -0
  166. atlas/tests/test_attach_file_flow.py +287 -0
  167. atlas/tests/test_auth_utils.py +165 -0
  168. atlas/tests/test_backend_public_url.py +185 -0
  169. atlas/tests/test_banner_logging.py +287 -0
  170. atlas/tests/test_capability_tokens_and_injection.py +203 -0
  171. atlas/tests/test_compliance_level.py +54 -0
  172. atlas/tests/test_compliance_manager.py +253 -0
  173. atlas/tests/test_config_manager.py +617 -0
  174. atlas/tests/test_config_manager_paths.py +12 -0
  175. atlas/tests/test_core_auth.py +18 -0
  176. atlas/tests/test_core_utils.py +190 -0
  177. atlas/tests/test_docker_env_sync.py +202 -0
  178. atlas/tests/test_domain_errors.py +329 -0
  179. atlas/tests/test_domain_whitelist.py +359 -0
  180. atlas/tests/test_elicitation_manager.py +408 -0
  181. atlas/tests/test_elicitation_routing.py +296 -0
  182. atlas/tests/test_env_demo_server.py +88 -0
  183. atlas/tests/test_error_classification.py +113 -0
  184. atlas/tests/test_error_flow_integration.py +116 -0
  185. atlas/tests/test_feedback_routes.py +333 -0
  186. atlas/tests/test_file_content_extraction.py +1134 -0
  187. atlas/tests/test_file_extraction_routes.py +158 -0
  188. atlas/tests/test_file_library.py +107 -0
  189. atlas/tests/test_file_manager_unit.py +18 -0
  190. atlas/tests/test_health_route.py +49 -0
  191. atlas/tests/test_http_client_stub.py +8 -0
  192. atlas/tests/test_imports_smoke.py +30 -0
  193. atlas/tests/test_interfaces_llm_response.py +9 -0
  194. atlas/tests/test_issue_access_denied_fix.py +136 -0
  195. atlas/tests/test_llm_env_expansion.py +836 -0
  196. atlas/tests/test_log_level_sensitive_data.py +285 -0
  197. atlas/tests/test_mcp_auth_routes.py +341 -0
  198. atlas/tests/test_mcp_client_auth.py +331 -0
  199. atlas/tests/test_mcp_data_injection.py +270 -0
  200. atlas/tests/test_mcp_get_authorized_servers.py +95 -0
  201. atlas/tests/test_mcp_hot_reload.py +512 -0
  202. atlas/tests/test_mcp_image_content.py +424 -0
  203. atlas/tests/test_mcp_logging.py +172 -0
  204. atlas/tests/test_mcp_progress_updates.py +313 -0
  205. atlas/tests/test_mcp_prompt_override_system_prompt.py +102 -0
  206. atlas/tests/test_mcp_prompts_server.py +39 -0
  207. atlas/tests/test_mcp_tool_result_parsing.py +296 -0
  208. atlas/tests/test_metrics_logger.py +56 -0
  209. atlas/tests/test_middleware_auth.py +379 -0
  210. atlas/tests/test_prompt_risk_and_acl.py +141 -0
  211. atlas/tests/test_rag_mcp_aggregator.py +204 -0
  212. atlas/tests/test_rag_mcp_service.py +224 -0
  213. atlas/tests/test_rate_limit_middleware.py +45 -0
  214. atlas/tests/test_routes_config_smoke.py +60 -0
  215. atlas/tests/test_routes_files_download_token.py +41 -0
  216. atlas/tests/test_routes_files_health.py +18 -0
  217. atlas/tests/test_runtime_imports.py +53 -0
  218. atlas/tests/test_sampling_integration.py +482 -0
  219. atlas/tests/test_security_admin_routes.py +61 -0
  220. atlas/tests/test_security_capability_tokens.py +65 -0
  221. atlas/tests/test_security_file_stats_scope.py +21 -0
  222. atlas/tests/test_security_header_injection.py +191 -0
  223. atlas/tests/test_security_headers_and_filename.py +63 -0
  224. atlas/tests/test_shared_session_repository.py +101 -0
  225. atlas/tests/test_system_prompt_loading.py +181 -0
  226. atlas/tests/test_token_storage.py +505 -0
  227. atlas/tests/test_tool_approval_config.py +93 -0
  228. atlas/tests/test_tool_approval_utils.py +356 -0
  229. atlas/tests/test_tool_authorization_group_filtering.py +223 -0
  230. atlas/tests/test_tool_details_in_config.py +108 -0
  231. atlas/tests/test_tool_planner.py +300 -0
  232. atlas/tests/test_unified_rag_service.py +398 -0
  233. atlas/tests/test_username_override_in_approval.py +258 -0
  234. atlas/tests/test_websocket_auth_header.py +168 -0
  235. atlas/version.py +6 -0
  236. atlas_chat-0.1.0.data/data/.env.example +253 -0
  237. atlas_chat-0.1.0.data/data/config/defaults/compliance-levels.json +44 -0
  238. atlas_chat-0.1.0.data/data/config/defaults/domain-whitelist.json +123 -0
  239. atlas_chat-0.1.0.data/data/config/defaults/file-extractors.json +74 -0
  240. atlas_chat-0.1.0.data/data/config/defaults/help-config.json +198 -0
  241. atlas_chat-0.1.0.data/data/config/defaults/llmconfig-buggy.yml +11 -0
  242. atlas_chat-0.1.0.data/data/config/defaults/llmconfig.yml +19 -0
  243. atlas_chat-0.1.0.data/data/config/defaults/mcp.json +138 -0
  244. atlas_chat-0.1.0.data/data/config/defaults/rag-sources.json +17 -0
  245. atlas_chat-0.1.0.data/data/config/defaults/splash-config.json +16 -0
  246. atlas_chat-0.1.0.dist-info/METADATA +236 -0
  247. atlas_chat-0.1.0.dist-info/RECORD +250 -0
  248. atlas_chat-0.1.0.dist-info/WHEEL +5 -0
  249. atlas_chat-0.1.0.dist-info/entry_points.txt +4 -0
  250. atlas_chat-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,394 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ PDF Analyzer MCP Server using FastMCP.
4
+ Provides PDF text analysis and report generation through the MCP protocol.
5
+ """
6
+
7
+ import base64
8
+ import io
9
+ import logging
10
+ import os
11
+ import re
12
+ from collections import Counter
13
+ from typing import Annotated, Any, Dict, Optional
14
+
15
+ import requests
16
+ from fastmcp import FastMCP
17
+
18
+ # This tool requires the PyPDF2 and reportlab libraries.
19
+ # Install them using: pip install PyPDF2 reportlab
20
+ from PyPDF2 import PdfReader
21
+ from reportlab.lib.pagesizes import letter
22
+ from reportlab.lib.units import inch
23
+ from reportlab.pdfgen import canvas
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ mcp = FastMCP("PDF_Analyzer")
28
+
29
+
30
+ def _analyze_pdf_content(instructions: str, filename: str, original_filename: Optional[str] = None) -> Dict[str, Any]:
31
+ """
32
+ Core PDF analysis logic that can be reused by multiple tools.
33
+
34
+ Args:
35
+ instructions: Instructions for the tool, not used in this implementation.
36
+ filename: The name of the file, which must have a '.pdf' extension.
37
+ original_filename: The original name of the file.
38
+
39
+ Returns:
40
+ A dictionary containing the analysis results or an error message.
41
+ """
42
+ try:
43
+ # print the instructions.
44
+ logger.info(f"Instructions: {instructions}")
45
+ # 1. Validate that the filename is for a PDF
46
+ if not (filename.lower().endswith('.pdf') or (original_filename and original_filename.lower().endswith('.pdf'))):
47
+ return {"results": {"error": "Invalid file type. This tool only accepts PDF files."}}
48
+
49
+ # 2. Decode the Base64 data and read the PDF content
50
+ # Check if filename is a URL (absolute or relative)
51
+ is_url = (
52
+ filename.startswith("http://") or
53
+ filename.startswith("https://") or
54
+ filename.startswith("/api/") or
55
+ filename.startswith("/")
56
+ )
57
+
58
+ if is_url:
59
+ # Convert relative URLs to absolute URLs
60
+ if filename.startswith("/"):
61
+ # Construct absolute URL from relative path
62
+ # Default to localhost:8000 for local development
63
+ backend_url = os.getenv("BACKEND_URL", "http://localhost:8000")
64
+ url = f"{backend_url}{filename}"
65
+ else:
66
+ url = filename
67
+
68
+ logger.info(f"Step 9: Downloading file from URL: {url}")
69
+ response = requests.get(url, timeout=30)
70
+ response.raise_for_status()
71
+ pdf_stream = io.BytesIO(response.content)
72
+ else:
73
+ # Assume it's base64-encoded data
74
+ decoded_bytes = base64.b64decode(filename)
75
+ pdf_stream = io.BytesIO(decoded_bytes)
76
+
77
+ reader = PdfReader(pdf_stream)
78
+
79
+ full_text = ""
80
+ for page in reader.pages:
81
+ page_text = page.extract_text()
82
+ if page_text:
83
+ full_text += page_text + "\n"
84
+
85
+ if not full_text.strip():
86
+ return {
87
+ "results": {
88
+ "operation": "pdf_analysis",
89
+ "filename": original_filename or filename,
90
+ "status": "Success",
91
+ "message": "PDF contained no extractable text.",
92
+ "total_word_count": 0,
93
+ "top_100_words": {}
94
+ }
95
+ }
96
+
97
+ # 3. Process the text to get a word list and count
98
+ # This regex finds all word-like sequences, ignoring case
99
+ words = re.findall(r'\b\w+\b', full_text.lower())
100
+ total_word_count = len(words)
101
+
102
+ # 4. Count word frequencies and get the top 100
103
+ word_counts = Counter(words)
104
+ # Convert list of (word, count) tuples to a dictionary
105
+ top_100_words_dict = dict(word_counts.most_common(100))
106
+
107
+ # 5. Return the successful result
108
+ return {
109
+ "results": {
110
+ "operation": "pdf_analysis",
111
+ "filename": original_filename or filename,
112
+ "total_word_count": total_word_count,
113
+ "top_100_words": top_100_words_dict
114
+ }
115
+ }
116
+
117
+ except Exception as e:
118
+ # print traceback for debugging
119
+ import traceback
120
+ traceback.print_exc()
121
+ # 6. Return an error message if something goes wrong
122
+ return {"results": {"error": f"PDF analysis failed: {str(e)}"}}
123
+
124
+
125
+ @mcp.tool
126
+ def analyze_pdf(
127
+ instructions: Annotated[str, "Instructions for the tool, not used in this implementation"],
128
+ filename: Annotated[str, "The name of the file, which must have a '.pdf' extension"],
129
+ original_filename: Optional[str] = None
130
+ ) -> Dict[str, Any]:
131
+ """
132
+ Extract and analyze text content from PDF documents with comprehensive word frequency analysis.
133
+
134
+ <<<<<<< HEAD
135
+ This PDF processing tool provides detailed text analytics for PDF documents:
136
+ =======
137
+ This PDF processing tool provides detailed text analytics for PDF documents:
138
+ >>>>>>> main
139
+
140
+ **PDF Text Extraction:**
141
+ - Extracts text from all pages in PDF documents
142
+ - Handles various PDF formats and structures
143
+ - Works with both text-based and scanned PDFs (text extraction only)
144
+ - Preserves document structure and content flow
145
+
146
+ **Text Analysis Features:**
147
+ - Complete word count across entire document
148
+ - Top 100 most frequently used words identification
149
+ - Case-insensitive word analysis for accurate frequency counting
150
+ - Word pattern recognition and linguistic analysis
151
+ - Document length and content density assessment
152
+
153
+ **Content Processing:**
154
+ - Intelligent text cleaning and normalization
155
+ - Punctuation and formatting handling
156
+ - Multi-language text support
157
+ - Special character and encoding management
158
+
159
+ **Analytics Insights:**
160
+ - Document vocabulary richness and complexity
161
+ - Key topic identification through word frequency
162
+ - Content themes and focus areas analysis
163
+ - Writing style and language pattern recognition
164
+ - Document structure and organization assessment
165
+
166
+ **Use Cases:**
167
+ - Academic paper and research document analysis
168
+ - Legal document keyword extraction and analysis
169
+ - Content marketing and SEO keyword research
170
+ - Document classification and categorization
171
+ - Research literature review and summarization
172
+ - Contract and agreement content analysis
173
+
174
+ **Supported PDF Types:**
175
+ - Research papers, reports, and academic documents
176
+ - Business documents, contracts, and agreements
177
+ - Marketing materials and content documents
178
+ - Technical documentation and manuals
179
+ - Legal documents and regulatory filings
180
+
181
+ **Output Format:**
182
+ - Structured word frequency data
183
+ - Total document word count statistics
184
+ - Top 100 words with occurrence frequencies
185
+ - Document metadata and processing information
186
+
187
+ Args:
188
+ instructions: Processing instructions or requirements (currently not used)
189
+ filename: PDF file name (must end with .pdf extension)
190
+ original_filename: The original name of the file.
191
+
192
+ Returns:
193
+ Dictionary containing:
194
+ - operation: Processing type confirmation
195
+ - filename: Source PDF file name
196
+ - total_word_count: Complete document word count
197
+ - top_100_words: Dictionary of most frequent words with counts
198
+ Or error message if PDF cannot be processed or contains no extractable text
199
+ """
200
+ logger.info("Step 8: Entering analyze_pdf tool")
201
+ return _analyze_pdf_content(instructions, filename, original_filename)
202
+
203
+
204
+ @mcp.tool
205
+ def generate_report_about_pdf(
206
+ instructions: Annotated[str, "Instructions for the tool, not used in this implementation"],
207
+ filename: Annotated[str, "The name of the file, which must have a '.pdf' extension"],
208
+ original_filename: Optional[str] = None
209
+ ) -> Dict[str, Any]:
210
+ """
211
+ Create comprehensive PDF analysis reports with professional formatting and detailed word frequency insights.
212
+
213
+ This advanced PDF reporting tool combines text analysis with professional document generation:
214
+
215
+ **Complete PDF Analysis Workflow:**
216
+ - Performs full text extraction and word frequency analysis
217
+ - Generates professional analysis reports in PDF format
218
+ - Creates downloadable documents with structured data presentation
219
+ - Provides ready-to-share analytical insights
220
+
221
+ **Report Contents:**
222
+ - Executive summary with document overview
223
+ - Total word count and document statistics
224
+ - Top 100 most frequent words with occurrence counts
225
+ - Professional multi-column layout for easy reading
226
+ - Organized tabular presentation of word frequency data
227
+
228
+ **Report Features:**
229
+ - Clean, professional PDF formatting using ReportLab
230
+ - Multi-column layout optimizing space usage
231
+ - Clear headers and structured information hierarchy
232
+ - Page management for large datasets
233
+ - High-quality typography and spacing
234
+
235
+ **Document Generation:**
236
+ - Creates new PDF reports from analysis results
237
+ - Professional business document appearance
238
+ - Optimized layout for printing and digital sharing
239
+ - Comprehensive data presentation in readable format
240
+
241
+ **Use Cases:**
242
+ - Academic research document analysis reporting
243
+ - Legal document content analysis for litigation support
244
+ - Content marketing keyword research documentation
245
+ - Business document compliance and review reporting
246
+ - Research literature analysis and summarization
247
+ - Document classification and content audit reports
248
+
249
+ **Report Applications:**
250
+ - Stakeholder presentations with document insights
251
+ - Content strategy planning based on word analysis
252
+ - Academic research methodology documentation
253
+ - Legal discovery and document review processes
254
+ - Quality assurance for written content
255
+
256
+ **Output Features:**
257
+ - Professional PDF report with embedded analysis
258
+ - Downloadable file for offline access and sharing
259
+ - Structured data visualization in document format
260
+ - Ready-to-present analytical insights
261
+
262
+ Args:
263
+ instructions: Report generation instructions or requirements (currently not used)
264
+ filename: Source PDF file name (must end with .pdf extension)
265
+ original_filename: The original name of the file.
266
+
267
+ Returns:
268
+ Dictionary containing:
269
+ - results: Report generation summary and success confirmation
270
+ - artifacts: Professional PDF report with complete analysis
271
+ - display: Optimized viewer configuration for report presentation
272
+ - meta_data: Source file information and analysis statistics
273
+ Or error message if PDF cannot be processed or report generation fails
274
+ """
275
+ logger.info("Step 8: Entering generate_report_about_pdf tool")
276
+ # --- 1. Perform the same analysis as the first function ---
277
+ analysis_result = _analyze_pdf_content(instructions, filename, original_filename)
278
+ if "error" in analysis_result.get("results", {}):
279
+ return analysis_result
280
+
281
+ # --- 2. Generate the PDF report ---
282
+ try:
283
+ results_data = analysis_result["results"]
284
+
285
+ # Create PDF report in memory
286
+ pdf_buffer = io.BytesIO()
287
+ c = canvas.Canvas(pdf_buffer, pagesize=letter)
288
+ width, height = letter
289
+
290
+ # Title
291
+ c.setFont("Helvetica-Bold", 16)
292
+ c.drawString(1 * inch, height - 1 * inch, "PDF Analysis Report")
293
+
294
+ # Document info
295
+ c.setFont("Helvetica-Bold", 12)
296
+ c.drawString(1 * inch, height - 1.5 * inch, "Document:")
297
+ c.setFont("Helvetica", 10)
298
+ c.drawString(1.5 * inch, height - 1.5 * inch, results_data.get("filename", "Unknown"))
299
+
300
+ # Total word count
301
+ c.setFont("Helvetica-Bold", 12)
302
+ c.drawString(1 * inch, height - 2 * inch, "Total Words:")
303
+ c.setFont("Helvetica", 10)
304
+ c.drawString(1.5 * inch, height - 2 * inch, str(results_data.get("total_word_count", 0)))
305
+
306
+ # Top 100 words header
307
+ c.setFont("Helvetica-Bold", 12)
308
+ c.drawString(1 * inch, height - 2.5 * inch, "Top 100 Most Frequent Words:")
309
+
310
+ # Display top words in columns
311
+ c.setFont("Helvetica", 9)
312
+ y_position = height - 3 * inch
313
+ x_col1 = 1 * inch
314
+ x_col2 = 3.5 * inch
315
+ x_col3 = 6 * inch
316
+
317
+ top_100_words = results_data.get("top_100_words", {})
318
+ words_list = list(top_100_words.items())
319
+
320
+ for idx, (word, count) in enumerate(words_list):
321
+ # Determine column position
322
+ col = idx % 3
323
+ if col == 0:
324
+ x_pos = x_col1
325
+ elif col == 1:
326
+ x_pos = x_col2
327
+ else:
328
+ x_pos = x_col3
329
+
330
+ # Move to next row after every 3 words
331
+ if col == 0 and idx > 0:
332
+ y_position -= 0.2 * inch
333
+
334
+ # Check if we need a new page
335
+ if y_position < 1 * inch:
336
+ c.showPage()
337
+ c.setFont("Helvetica", 9)
338
+ y_position = height - 1 * inch
339
+
340
+ # Draw word and count
341
+ text = f"{word}: {count}"
342
+ c.drawString(x_pos, y_position, text)
343
+
344
+ c.save()
345
+
346
+ # Get PDF bytes and encode to base64
347
+ pdf_bytes = pdf_buffer.getvalue()
348
+ pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
349
+
350
+ # --- 3. Return the structured response (v2 MCP compliant) ---
351
+ report_name = f"analysis_report_{results_data.get('filename', 'document').replace('.pdf', '')}.pdf"
352
+
353
+ return {
354
+ "results": {
355
+ "operation": "pdf_report_generation",
356
+ "status": "Success",
357
+ "message": f"Generated analysis report for {results_data.get('filename', 'document')}",
358
+ "total_word_count": results_data.get("total_word_count", 0),
359
+ "words_analyzed": len(top_100_words)
360
+ },
361
+ "artifacts": [
362
+ {
363
+ "name": report_name,
364
+ "b64": pdf_base64,
365
+ "mime": "application/pdf",
366
+ "size": len(pdf_bytes),
367
+ "description": "PDF analysis report with word frequency statistics"
368
+ }
369
+ ],
370
+ "display": {
371
+ "open_canvas": True,
372
+ "primary_file": report_name,
373
+ "mode": "replace",
374
+ "viewer_hint": "pdf"
375
+ },
376
+ "meta_data": {
377
+ "source_file": results_data.get("filename", "Unknown"),
378
+ "total_words": results_data.get("total_word_count", 0)
379
+ }
380
+ }
381
+
382
+ except Exception as e:
383
+ import traceback
384
+ traceback.print_exc()
385
+ return {
386
+ "results": {
387
+ "error": f"Report generation failed: {str(e)}"
388
+ }
389
+ }
390
+
391
+
392
+
393
+ if __name__ == "__main__":
394
+ mcp.run()