atlas-chat 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. atlas/__init__.py +40 -0
  2. atlas/application/__init__.py +7 -0
  3. atlas/application/chat/__init__.py +7 -0
  4. atlas/application/chat/agent/__init__.py +10 -0
  5. atlas/application/chat/agent/act_loop.py +179 -0
  6. atlas/application/chat/agent/factory.py +142 -0
  7. atlas/application/chat/agent/protocols.py +46 -0
  8. atlas/application/chat/agent/react_loop.py +338 -0
  9. atlas/application/chat/agent/think_act_loop.py +171 -0
  10. atlas/application/chat/approval_manager.py +151 -0
  11. atlas/application/chat/elicitation_manager.py +191 -0
  12. atlas/application/chat/events/__init__.py +1 -0
  13. atlas/application/chat/events/agent_event_relay.py +112 -0
  14. atlas/application/chat/modes/__init__.py +1 -0
  15. atlas/application/chat/modes/agent.py +125 -0
  16. atlas/application/chat/modes/plain.py +74 -0
  17. atlas/application/chat/modes/rag.py +81 -0
  18. atlas/application/chat/modes/tools.py +179 -0
  19. atlas/application/chat/orchestrator.py +213 -0
  20. atlas/application/chat/policies/__init__.py +1 -0
  21. atlas/application/chat/policies/tool_authorization.py +99 -0
  22. atlas/application/chat/preprocessors/__init__.py +1 -0
  23. atlas/application/chat/preprocessors/message_builder.py +92 -0
  24. atlas/application/chat/preprocessors/prompt_override_service.py +104 -0
  25. atlas/application/chat/service.py +454 -0
  26. atlas/application/chat/utilities/__init__.py +6 -0
  27. atlas/application/chat/utilities/error_handler.py +367 -0
  28. atlas/application/chat/utilities/event_notifier.py +546 -0
  29. atlas/application/chat/utilities/file_processor.py +613 -0
  30. atlas/application/chat/utilities/tool_executor.py +789 -0
  31. atlas/atlas_chat_cli.py +347 -0
  32. atlas/atlas_client.py +238 -0
  33. atlas/core/__init__.py +0 -0
  34. atlas/core/auth.py +205 -0
  35. atlas/core/authorization_manager.py +27 -0
  36. atlas/core/capabilities.py +123 -0
  37. atlas/core/compliance.py +215 -0
  38. atlas/core/domain_whitelist.py +147 -0
  39. atlas/core/domain_whitelist_middleware.py +82 -0
  40. atlas/core/http_client.py +28 -0
  41. atlas/core/log_sanitizer.py +102 -0
  42. atlas/core/metrics_logger.py +59 -0
  43. atlas/core/middleware.py +131 -0
  44. atlas/core/otel_config.py +242 -0
  45. atlas/core/prompt_risk.py +200 -0
  46. atlas/core/rate_limit.py +0 -0
  47. atlas/core/rate_limit_middleware.py +64 -0
  48. atlas/core/security_headers_middleware.py +51 -0
  49. atlas/domain/__init__.py +37 -0
  50. atlas/domain/chat/__init__.py +1 -0
  51. atlas/domain/chat/dtos.py +85 -0
  52. atlas/domain/errors.py +96 -0
  53. atlas/domain/messages/__init__.py +12 -0
  54. atlas/domain/messages/models.py +160 -0
  55. atlas/domain/rag_mcp_service.py +664 -0
  56. atlas/domain/sessions/__init__.py +7 -0
  57. atlas/domain/sessions/models.py +36 -0
  58. atlas/domain/unified_rag_service.py +371 -0
  59. atlas/infrastructure/__init__.py +10 -0
  60. atlas/infrastructure/app_factory.py +135 -0
  61. atlas/infrastructure/events/__init__.py +1 -0
  62. atlas/infrastructure/events/cli_event_publisher.py +140 -0
  63. atlas/infrastructure/events/websocket_publisher.py +140 -0
  64. atlas/infrastructure/sessions/in_memory_repository.py +56 -0
  65. atlas/infrastructure/transport/__init__.py +7 -0
  66. atlas/infrastructure/transport/websocket_connection_adapter.py +33 -0
  67. atlas/init_cli.py +226 -0
  68. atlas/interfaces/__init__.py +15 -0
  69. atlas/interfaces/events.py +134 -0
  70. atlas/interfaces/llm.py +54 -0
  71. atlas/interfaces/rag.py +40 -0
  72. atlas/interfaces/sessions.py +75 -0
  73. atlas/interfaces/tools.py +57 -0
  74. atlas/interfaces/transport.py +24 -0
  75. atlas/main.py +564 -0
  76. atlas/mcp/api_key_demo/README.md +76 -0
  77. atlas/mcp/api_key_demo/main.py +172 -0
  78. atlas/mcp/api_key_demo/run.sh +56 -0
  79. atlas/mcp/basictable/main.py +147 -0
  80. atlas/mcp/calculator/main.py +149 -0
  81. atlas/mcp/code-executor/execution_engine.py +98 -0
  82. atlas/mcp/code-executor/execution_environment.py +95 -0
  83. atlas/mcp/code-executor/main.py +528 -0
  84. atlas/mcp/code-executor/result_processing.py +276 -0
  85. atlas/mcp/code-executor/script_generation.py +195 -0
  86. atlas/mcp/code-executor/security_checker.py +140 -0
  87. atlas/mcp/corporate_cars/main.py +437 -0
  88. atlas/mcp/csv_reporter/main.py +545 -0
  89. atlas/mcp/duckduckgo/main.py +182 -0
  90. atlas/mcp/elicitation_demo/README.md +171 -0
  91. atlas/mcp/elicitation_demo/main.py +262 -0
  92. atlas/mcp/env-demo/README.md +158 -0
  93. atlas/mcp/env-demo/main.py +199 -0
  94. atlas/mcp/file_size_test/main.py +284 -0
  95. atlas/mcp/filesystem/main.py +348 -0
  96. atlas/mcp/image_demo/main.py +113 -0
  97. atlas/mcp/image_demo/requirements.txt +4 -0
  98. atlas/mcp/logging_demo/README.md +72 -0
  99. atlas/mcp/logging_demo/main.py +103 -0
  100. atlas/mcp/many_tools_demo/main.py +50 -0
  101. atlas/mcp/order_database/__init__.py +0 -0
  102. atlas/mcp/order_database/main.py +369 -0
  103. atlas/mcp/order_database/signal_data.csv +1001 -0
  104. atlas/mcp/pdfbasic/main.py +394 -0
  105. atlas/mcp/pptx_generator/main.py +760 -0
  106. atlas/mcp/pptx_generator/requirements.txt +13 -0
  107. atlas/mcp/pptx_generator/run_test.sh +1 -0
  108. atlas/mcp/pptx_generator/test_pptx_generator_security.py +169 -0
  109. atlas/mcp/progress_demo/main.py +167 -0
  110. atlas/mcp/progress_updates_demo/QUICKSTART.md +273 -0
  111. atlas/mcp/progress_updates_demo/README.md +120 -0
  112. atlas/mcp/progress_updates_demo/main.py +497 -0
  113. atlas/mcp/prompts/main.py +222 -0
  114. atlas/mcp/public_demo/main.py +189 -0
  115. atlas/mcp/sampling_demo/README.md +169 -0
  116. atlas/mcp/sampling_demo/main.py +234 -0
  117. atlas/mcp/thinking/main.py +77 -0
  118. atlas/mcp/tool_planner/main.py +240 -0
  119. atlas/mcp/ui-demo/badmesh.png +0 -0
  120. atlas/mcp/ui-demo/main.py +383 -0
  121. atlas/mcp/ui-demo/templates/button_demo.html +32 -0
  122. atlas/mcp/ui-demo/templates/data_visualization.html +32 -0
  123. atlas/mcp/ui-demo/templates/form_demo.html +28 -0
  124. atlas/mcp/username-override-demo/README.md +320 -0
  125. atlas/mcp/username-override-demo/main.py +308 -0
  126. atlas/modules/__init__.py +0 -0
  127. atlas/modules/config/__init__.py +34 -0
  128. atlas/modules/config/cli.py +231 -0
  129. atlas/modules/config/config_manager.py +1096 -0
  130. atlas/modules/file_storage/__init__.py +22 -0
  131. atlas/modules/file_storage/cli.py +330 -0
  132. atlas/modules/file_storage/content_extractor.py +290 -0
  133. atlas/modules/file_storage/manager.py +295 -0
  134. atlas/modules/file_storage/mock_s3_client.py +402 -0
  135. atlas/modules/file_storage/s3_client.py +417 -0
  136. atlas/modules/llm/__init__.py +19 -0
  137. atlas/modules/llm/caller.py +287 -0
  138. atlas/modules/llm/litellm_caller.py +675 -0
  139. atlas/modules/llm/models.py +19 -0
  140. atlas/modules/mcp_tools/__init__.py +17 -0
  141. atlas/modules/mcp_tools/client.py +2123 -0
  142. atlas/modules/mcp_tools/token_storage.py +556 -0
  143. atlas/modules/prompts/prompt_provider.py +130 -0
  144. atlas/modules/rag/__init__.py +24 -0
  145. atlas/modules/rag/atlas_rag_client.py +336 -0
  146. atlas/modules/rag/client.py +129 -0
  147. atlas/routes/admin_routes.py +865 -0
  148. atlas/routes/config_routes.py +484 -0
  149. atlas/routes/feedback_routes.py +361 -0
  150. atlas/routes/files_routes.py +274 -0
  151. atlas/routes/health_routes.py +40 -0
  152. atlas/routes/mcp_auth_routes.py +223 -0
  153. atlas/server_cli.py +164 -0
  154. atlas/tests/conftest.py +20 -0
  155. atlas/tests/integration/test_mcp_auth_integration.py +152 -0
  156. atlas/tests/manual_test_sampling.py +87 -0
  157. atlas/tests/modules/mcp_tools/test_client_auth.py +226 -0
  158. atlas/tests/modules/mcp_tools/test_client_env.py +191 -0
  159. atlas/tests/test_admin_mcp_server_management_routes.py +141 -0
  160. atlas/tests/test_agent_roa.py +135 -0
  161. atlas/tests/test_app_factory_smoke.py +47 -0
  162. atlas/tests/test_approval_manager.py +439 -0
  163. atlas/tests/test_atlas_client.py +188 -0
  164. atlas/tests/test_atlas_rag_client.py +447 -0
  165. atlas/tests/test_atlas_rag_integration.py +224 -0
  166. atlas/tests/test_attach_file_flow.py +287 -0
  167. atlas/tests/test_auth_utils.py +165 -0
  168. atlas/tests/test_backend_public_url.py +185 -0
  169. atlas/tests/test_banner_logging.py +287 -0
  170. atlas/tests/test_capability_tokens_and_injection.py +203 -0
  171. atlas/tests/test_compliance_level.py +54 -0
  172. atlas/tests/test_compliance_manager.py +253 -0
  173. atlas/tests/test_config_manager.py +617 -0
  174. atlas/tests/test_config_manager_paths.py +12 -0
  175. atlas/tests/test_core_auth.py +18 -0
  176. atlas/tests/test_core_utils.py +190 -0
  177. atlas/tests/test_docker_env_sync.py +202 -0
  178. atlas/tests/test_domain_errors.py +329 -0
  179. atlas/tests/test_domain_whitelist.py +359 -0
  180. atlas/tests/test_elicitation_manager.py +408 -0
  181. atlas/tests/test_elicitation_routing.py +296 -0
  182. atlas/tests/test_env_demo_server.py +88 -0
  183. atlas/tests/test_error_classification.py +113 -0
  184. atlas/tests/test_error_flow_integration.py +116 -0
  185. atlas/tests/test_feedback_routes.py +333 -0
  186. atlas/tests/test_file_content_extraction.py +1134 -0
  187. atlas/tests/test_file_extraction_routes.py +158 -0
  188. atlas/tests/test_file_library.py +107 -0
  189. atlas/tests/test_file_manager_unit.py +18 -0
  190. atlas/tests/test_health_route.py +49 -0
  191. atlas/tests/test_http_client_stub.py +8 -0
  192. atlas/tests/test_imports_smoke.py +30 -0
  193. atlas/tests/test_interfaces_llm_response.py +9 -0
  194. atlas/tests/test_issue_access_denied_fix.py +136 -0
  195. atlas/tests/test_llm_env_expansion.py +836 -0
  196. atlas/tests/test_log_level_sensitive_data.py +285 -0
  197. atlas/tests/test_mcp_auth_routes.py +341 -0
  198. atlas/tests/test_mcp_client_auth.py +331 -0
  199. atlas/tests/test_mcp_data_injection.py +270 -0
  200. atlas/tests/test_mcp_get_authorized_servers.py +95 -0
  201. atlas/tests/test_mcp_hot_reload.py +512 -0
  202. atlas/tests/test_mcp_image_content.py +424 -0
  203. atlas/tests/test_mcp_logging.py +172 -0
  204. atlas/tests/test_mcp_progress_updates.py +313 -0
  205. atlas/tests/test_mcp_prompt_override_system_prompt.py +102 -0
  206. atlas/tests/test_mcp_prompts_server.py +39 -0
  207. atlas/tests/test_mcp_tool_result_parsing.py +296 -0
  208. atlas/tests/test_metrics_logger.py +56 -0
  209. atlas/tests/test_middleware_auth.py +379 -0
  210. atlas/tests/test_prompt_risk_and_acl.py +141 -0
  211. atlas/tests/test_rag_mcp_aggregator.py +204 -0
  212. atlas/tests/test_rag_mcp_service.py +224 -0
  213. atlas/tests/test_rate_limit_middleware.py +45 -0
  214. atlas/tests/test_routes_config_smoke.py +60 -0
  215. atlas/tests/test_routes_files_download_token.py +41 -0
  216. atlas/tests/test_routes_files_health.py +18 -0
  217. atlas/tests/test_runtime_imports.py +53 -0
  218. atlas/tests/test_sampling_integration.py +482 -0
  219. atlas/tests/test_security_admin_routes.py +61 -0
  220. atlas/tests/test_security_capability_tokens.py +65 -0
  221. atlas/tests/test_security_file_stats_scope.py +21 -0
  222. atlas/tests/test_security_header_injection.py +191 -0
  223. atlas/tests/test_security_headers_and_filename.py +63 -0
  224. atlas/tests/test_shared_session_repository.py +101 -0
  225. atlas/tests/test_system_prompt_loading.py +181 -0
  226. atlas/tests/test_token_storage.py +505 -0
  227. atlas/tests/test_tool_approval_config.py +93 -0
  228. atlas/tests/test_tool_approval_utils.py +356 -0
  229. atlas/tests/test_tool_authorization_group_filtering.py +223 -0
  230. atlas/tests/test_tool_details_in_config.py +108 -0
  231. atlas/tests/test_tool_planner.py +300 -0
  232. atlas/tests/test_unified_rag_service.py +398 -0
  233. atlas/tests/test_username_override_in_approval.py +258 -0
  234. atlas/tests/test_websocket_auth_header.py +168 -0
  235. atlas/version.py +6 -0
  236. atlas_chat-0.1.0.data/data/.env.example +253 -0
  237. atlas_chat-0.1.0.data/data/config/defaults/compliance-levels.json +44 -0
  238. atlas_chat-0.1.0.data/data/config/defaults/domain-whitelist.json +123 -0
  239. atlas_chat-0.1.0.data/data/config/defaults/file-extractors.json +74 -0
  240. atlas_chat-0.1.0.data/data/config/defaults/help-config.json +198 -0
  241. atlas_chat-0.1.0.data/data/config/defaults/llmconfig-buggy.yml +11 -0
  242. atlas_chat-0.1.0.data/data/config/defaults/llmconfig.yml +19 -0
  243. atlas_chat-0.1.0.data/data/config/defaults/mcp.json +138 -0
  244. atlas_chat-0.1.0.data/data/config/defaults/rag-sources.json +17 -0
  245. atlas_chat-0.1.0.data/data/config/defaults/splash-config.json +16 -0
  246. atlas_chat-0.1.0.dist-info/METADATA +236 -0
  247. atlas_chat-0.1.0.dist-info/RECORD +250 -0
  248. atlas_chat-0.1.0.dist-info/WHEEL +5 -0
  249. atlas_chat-0.1.0.dist-info/entry_points.txt +4 -0
  250. atlas_chat-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,545 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ CSV Reporter MCP Server using FastMCP.
4
+
5
+ Demonstrates two v2 behaviors described in v2_mcp_note.md:
6
+ 1) filename(s) to downloadable URLs: If the backend rewrites filename/file_names
7
+ to /api/files/download/... URLs, this server will fetch and process them.
8
+ It also accepts file_data_base64 as a fallback for content delivery.
9
+ 2) username injection: If a `username` parameter is defined in the tool schema,
10
+ the backend can inject the authenticated user's email/username. This server
11
+ trusts the provided username value and echoes it in outputs.
12
+
13
+ Tools:
14
+ - generate_csv_report: Build a summary report for a single CSV.
15
+ - summarize_multiple_csvs: Summarize multiple CSVs (using file_names[]).
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import base64
21
+ import io
22
+ import os
23
+ from typing import Annotated, Any, Dict, List, Optional
24
+
25
+ import matplotlib.pyplot as plt
26
+ import numpy as np
27
+ import pandas as pd
28
+ import requests
29
+ import seaborn as sns
30
+ from fastmcp import FastMCP
31
+
32
+ mcp = FastMCP("CSV_Reporter")
33
+
34
+
35
+ _PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
36
+ RUNTIME_UPLOADS = os.environ.get(
37
+ "CHATUI_RUNTIME_UPLOADS", os.path.join(_PROJECT_ROOT, "runtime", "uploads")
38
+ )
39
+
40
+
41
+ def _is_http_url(s: str) -> bool:
42
+ return s.startswith("http://") or s.startswith("https://")
43
+
44
+
45
+ def _is_backend_download_path(s: str) -> bool:
46
+ """Detect backend-relative download paths like /api/files/download/...."""
47
+ return isinstance(s, str) and s.startswith("/api/files/download/")
48
+
49
+
50
+ def _backend_base_url() -> str:
51
+ """Resolve backend base URL from environment variable.
52
+
53
+ Fallback to http://127.0.0.1:8000.
54
+ """
55
+ return os.environ.get("CHATUI_BACKEND_BASE_URL", "http://127.0.0.1:8000")
56
+
57
+
58
+
59
+
60
+ def _load_csv_bytes(filename: str, file_data_base64: str = "") -> bytes:
61
+ """Return raw CSV bytes from either base64, URL, or local uploads path.
62
+
63
+ Priority:
64
+ 1) file_data_base64 if provided
65
+ 2) If filename is URL -> GET
66
+ 3) Try local file in runtime uploads
67
+ Raises FileNotFoundError or requests.HTTPError as appropriate.
68
+ """
69
+ if file_data_base64:
70
+ return base64.b64decode(file_data_base64)
71
+
72
+ # Support backend-injected relative download URLs by resolving with a base URL
73
+ if filename and _is_backend_download_path(filename):
74
+ base = _backend_base_url()
75
+ url = base.rstrip("/") + filename
76
+ r = requests.get(url, timeout=20)
77
+ r.raise_for_status()
78
+ return r.content
79
+
80
+ if filename and _is_http_url(filename):
81
+ r = requests.get(filename, timeout=20)
82
+ r.raise_for_status()
83
+ return r.content
84
+
85
+ # Fallback: treat filename as a key under runtime uploads
86
+ if filename:
87
+ local_path = filename
88
+ if not os.path.isabs(local_path):
89
+ local_path = os.path.join(RUNTIME_UPLOADS, filename)
90
+ if not os.path.exists(local_path):
91
+ raise FileNotFoundError(f"CSV not found: {local_path}")
92
+ with open(local_path, "rb") as f:
93
+ return f.read()
94
+
95
+ raise FileNotFoundError("No filename or file data provided")
96
+
97
+
98
+ def _dataframe_report(df: pd.DataFrame, *, username: str, source_name: str) -> str:
99
+ """Create a human-readable report for a DataFrame."""
100
+ lines: List[str] = []
101
+ lines.append(f"CSV Report for: {source_name}")
102
+ lines.append(f"Requested by: {username}")
103
+ lines.append("")
104
+ lines.append(f"Shape: {df.shape[0]} rows x {df.shape[1]} columns")
105
+ lines.append("")
106
+ # Column dtypes
107
+ lines.append("Column types:")
108
+ lines.append(df.dtypes.to_string())
109
+ lines.append("")
110
+ # Missing values
111
+ na_counts = df.isna().sum()
112
+ if (na_counts > 0).any():
113
+ lines.append("Missing values per column:")
114
+ lines.append(na_counts.to_string())
115
+ lines.append("")
116
+ # Numeric summary
117
+ num_df = df.select_dtypes(include=["number"]) # type: ignore[arg-type]
118
+ if not num_df.empty:
119
+ desc = num_df.describe().transpose()
120
+ lines.append("Numeric columns summary:")
121
+ lines.append(desc.to_string())
122
+ lines.append("")
123
+ # Sample rows
124
+ try:
125
+ sample = df.head(5)
126
+ lines.append("Sample (first 5 rows):")
127
+ lines.append(sample.to_string(index=False))
128
+ except Exception:
129
+ # Ignore display errors for sample rows
130
+ pass
131
+ return "\n".join(lines)
132
+
133
+
134
+ @mcp.tool
135
+ def generate_csv_report(
136
+ instructions: Annotated[str, "Instructions for the tool, not used for logic"],
137
+ filename: Annotated[str, "CSV filename. Backend may rewrite to a downloadable URL."],
138
+ username: Annotated[str, "Injected by backend. Trust this value."] = "",
139
+ file_data_base64: Annotated[str, "Framework may supply Base64 content as fallback."] = "",
140
+ ) -> Dict[str, Any]:
141
+ """Generate comprehensive statistical analysis and summary report for CSV data files.
142
+
143
+ This tool performs in-depth analysis of CSV files to provide actionable insights:
144
+
145
+ **Data Analysis Features:**
146
+ - Complete dataset overview (rows, columns, data types)
147
+ - Statistical summaries for all numeric columns (mean, median, std, min, max, quartiles)
148
+ - Missing value analysis and data quality assessment
149
+ - Column type detection and classification
150
+ - Sample data preview for context understanding
151
+
152
+ **Report Contents:**
153
+ - Dataset dimensions and structure
154
+ - Data type distribution across columns
155
+ - Missing value patterns and percentages
156
+ - Descriptive statistics for numeric data
157
+ - Sample rows for data format verification
158
+ - Data quality indicators and potential issues
159
+
160
+ **File Input Support:**
161
+ - Direct CSV file upload via file browser
162
+ - Base64 encoded CSV content
163
+ - Backend-generated downloadable URLs
164
+ - UTF-8 and common CSV encoding formats
165
+
166
+ **Output Format:**
167
+ - Structured text report with clear sections
168
+ - Easy-to-read tabular summaries
169
+ - Professional formatting suitable for sharing
170
+ - Downloadable report file for future reference
171
+
172
+ **Use Cases:**
173
+ - Initial data exploration and quality assessment
174
+ - Dataset documentation and profiling
175
+ - Data validation before analysis or modeling
176
+ - Quick statistical overview for stakeholder reports
177
+ - Data preprocessing planning and strategy
178
+
179
+ **Examples:**
180
+ - Sales data: Revenue distribution, transaction patterns, missing customer info
181
+ - Survey data: Response rates, demographic breakdowns, incomplete answers
182
+ - Financial data: Account balances, transaction volumes, data completeness
183
+
184
+ Args:
185
+ instructions: Optional analysis instructions (currently not used in processing logic)
186
+ filename: Name/path of CSV file to analyze (supports various input methods)
187
+ username: User identity for report attribution (automatically injected by backend)
188
+ file_data_base64: Base64-encoded CSV content (alternative input method)
189
+
190
+ Returns:
191
+ Dictionary containing:
192
+ - results: Analysis summary and status message
193
+ - artifacts: Downloadable text report with complete analysis
194
+ - display: Viewer configuration for optimal report presentation
195
+ - meta_data: Dataset metrics (rows, columns, generator info)
196
+ Or error message if file cannot be processed
197
+ """
198
+ try:
199
+ raw = _load_csv_bytes(filename, file_data_base64)
200
+ df = pd.read_csv(io.BytesIO(raw))
201
+ if df.empty:
202
+ return {"results": {"error": "CSV is empty."}}
203
+
204
+ # Use the raw filename; let the chat UI handle any sanitization
205
+ report_text = _dataframe_report(df, username=username or "unknown", source_name=filename)
206
+ report_b64 = base64.b64encode(report_text.encode("utf-8")).decode("utf-8")
207
+
208
+ return {
209
+ "results": {
210
+ "operation": "csv_report",
211
+ "filename": filename,
212
+ "message": "CSV report generated.",
213
+ },
214
+ "artifacts": [
215
+ {
216
+ "name": "report.txt",
217
+ "b64": report_b64,
218
+ "mime": "text/plain",
219
+ }
220
+ ],
221
+ "display": {
222
+ "open_canvas": True,
223
+ "primary_file": "report.txt",
224
+ "mode": "replace",
225
+ "viewer_hint": "code",
226
+ },
227
+ "meta_data": {
228
+ "generated_by": username,
229
+ "rows": int(df.shape[0]),
230
+ "columns": int(df.shape[1]),
231
+ },
232
+ }
233
+ except FileNotFoundError as e:
234
+ return {"results": {"error": str(e)}}
235
+ except pd.errors.EmptyDataError:
236
+ return {"results": {"error": "CSV file is empty or unreadable."}}
237
+ except pd.errors.ParserError as e:
238
+ return {"results": {"error": f"CSV parsing error: {e}"}}
239
+ except requests.HTTPError as e:
240
+ return {"results": {"error": f"Download failed: {e}"}}
241
+ except Exception as e: # noqa: BLE001
242
+ return {"results": {"error": f"Unexpected error: {e}"}}
243
+
244
+
245
+ @mcp.tool
246
+ def summarize_multiple_csvs(
247
+ instructions: Annotated[str, "Instructions for the tool, not used for logic"],
248
+ file_names: Annotated[List[str], "Array of CSV filenames. Backend may rewrite to downloadable URLs."],
249
+ username: Annotated[str, "Injected by backend. Trust this value."] = "",
250
+ ) -> Dict[str, Any]:
251
+ """Create comparative analysis and consolidated summary across multiple CSV datasets.
252
+
253
+ This advanced tool processes multiple CSV files simultaneously to provide:
254
+
255
+ **Cross-Dataset Analysis:**
256
+ - Comparative dataset metrics (rows, columns, sizes)
257
+ - Column name consistency analysis across files
258
+ - Data type compatibility assessment
259
+ - Missing value patterns comparison
260
+ - Overall data quality evaluation across all files
261
+
262
+ **Consolidated Reporting:**
263
+ - Unified summary of all datasets
264
+ - Total record counts and column inventories
265
+ - Data structure compatibility matrix
266
+ - Common and unique column identification
267
+ - Quality metrics aggregation
268
+
269
+ **Batch Processing Features:**
270
+ - Processes all files in a single operation
271
+ - Error handling for individual file failures
272
+ - Continues processing even if some files fail
273
+ - Detailed error reporting for problematic files
274
+ - Success rate and processing statistics
275
+
276
+ **Multi-File Insights:**
277
+ - Dataset size distribution across files
278
+ - Schema consistency validation
279
+ - Potential data merging opportunities
280
+ - Data integration readiness assessment
281
+ - Standardization recommendations
282
+
283
+ **Use Cases:**
284
+ - Data integration planning and validation
285
+ - Multi-source data quality assessment
286
+ - Database migration preparation
287
+ - Data warehouse loading validation
288
+ - Cross-system data consistency checks
289
+ - Batch data processing workflows
290
+
291
+ **Examples:**
292
+ - Multiple monthly sales reports → Consolidated annual analysis
293
+ - Regional customer databases → Cross-region data consistency check
294
+ - Survey results from different periods → Longitudinal study preparation
295
+
296
+ Args:
297
+ instructions: Optional processing instructions (currently not used in logic)
298
+ file_names: List of CSV file names/paths to analyze (supports various input methods)
299
+ username: User identity for report attribution (automatically injected by backend)
300
+
301
+ Returns:
302
+ Dictionary containing:
303
+ - results: Consolidated analysis summary with cross-file insights
304
+ - artifacts: Downloadable comprehensive report with all file analyses
305
+ - display: Viewer configuration for optimal multi-file report presentation
306
+ - meta_data: Aggregated statistics (total files, success rate, combined metrics)
307
+ Or error summary if multiple files cannot be processed
308
+ """
309
+ summaries: List[str] = []
310
+ total_rows = 0
311
+ total_cols_unique = set()
312
+ processed = 0
313
+ errors: List[str] = []
314
+
315
+ for name in file_names:
316
+ try:
317
+ raw = _load_csv_bytes(name)
318
+ df = pd.read_csv(io.BytesIO(raw))
319
+ processed += 1
320
+ total_rows += int(df.shape[0])
321
+ total_cols_unique.update(df.columns.tolist())
322
+ summaries.append(f"{name}: {df.shape[0]} rows x {df.shape[1]} cols")
323
+ except Exception as e: # collect per-file error, continue
324
+ errors.append(f"{name}: {e}")
325
+
326
+ report_lines = [f"Multi-CSV summary for {username or 'unknown'}:"]
327
+ report_lines.extend(summaries or ["No files processed."])
328
+ if errors:
329
+ report_lines.append("")
330
+ report_lines.append("Errors:")
331
+ report_lines.extend(errors)
332
+
333
+ text = "\n".join(report_lines)
334
+ b64 = base64.b64encode(text.encode("utf-8")).decode("utf-8")
335
+
336
+ return {
337
+ "results": {
338
+ "operation": "multi_csv_summary",
339
+ "processed_files": processed,
340
+ "message": "Summary generated.",
341
+ },
342
+ "artifacts": [
343
+ {"name": "multi_csv_summary.txt", "b64": b64, "mime": "text/plain"}
344
+ ],
345
+ "display": {
346
+ "open_canvas": True,
347
+ "primary_file": "multi_csv_summary.txt",
348
+ "mode": "replace",
349
+ "viewer_hint": "code",
350
+ },
351
+ "meta_data": {
352
+ "generated_by": username,
353
+ "total_rows": total_rows,
354
+ "unique_columns": sorted(list(total_cols_unique)),
355
+ "errors": errors,
356
+ },
357
+ }
358
+
359
+
360
+ @mcp.tool
361
+ def plot_correlation_matrix(
362
+ instructions: Annotated[str, "Instructions for the tool, not used for logic"],
363
+ filename: Annotated[str, "CSV filename. Backend may rewrite to a downloadable URL."],
364
+ columns: Annotated[Optional[List[str]], "Specific columns to plot. If None, plots all numeric columns."] = None,
365
+ username: Annotated[str, "Injected by backend. Trust this value."] = "",
366
+ file_data_base64: Annotated[str, "Framework may supply Base64 content as fallback."] = "",
367
+ ) -> Dict[str, Any]:
368
+ """Generate an N by N correlation matrix plot for numeric columns in a CSV file.
369
+
370
+ Creates a heatmap showing linear correlations between specified columns or all numeric columns.
371
+ """
372
+ try:
373
+ # Load and parse CSV
374
+ raw = _load_csv_bytes(filename, file_data_base64)
375
+ df = pd.read_csv(io.BytesIO(raw))
376
+ if df.empty:
377
+ return {"results": {"error": "CSV is empty."}}
378
+
379
+ # Select numeric columns
380
+ numeric_df = df.select_dtypes(include=[np.number])
381
+ if numeric_df.empty:
382
+ return {"results": {"error": "No numeric columns found in the CSV."}}
383
+
384
+ # Filter to specified columns if provided
385
+ if columns:
386
+ available_cols = [col for col in columns if col in numeric_df.columns]
387
+ if not available_cols:
388
+ return {"results": {"error": f"None of the specified columns {columns} are numeric or exist in the CSV."}}
389
+ numeric_df = numeric_df[available_cols]
390
+
391
+ # Calculate correlation matrix
392
+ corr_matrix = numeric_df.corr()
393
+
394
+ # Create the plot
395
+ plt.figure(figsize=(10, 8))
396
+ sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0,
397
+ square=True, fmt='.2f', cbar_kws={'shrink': 0.8})
398
+ plt.title('Correlation Matrix')
399
+ plt.tight_layout()
400
+
401
+ # Save plot to bytes
402
+ img_buffer = io.BytesIO()
403
+ plt.savefig(img_buffer, format='png', dpi=300, bbox_inches='tight')
404
+ img_buffer.seek(0)
405
+ img_b64 = base64.b64encode(img_buffer.getvalue()).decode('utf-8')
406
+ plt.close()
407
+
408
+ return {
409
+ "results": {
410
+ "operation": "correlation_matrix_plot",
411
+ "filename": filename,
412
+ "columns_plotted": list(numeric_df.columns),
413
+ "message": "Correlation matrix plot generated.",
414
+ },
415
+ "artifacts": [
416
+ {
417
+ "name": "correlation_matrix.png",
418
+ "b64": img_b64,
419
+ "mime": "image/png",
420
+ }
421
+ ],
422
+ "display": {
423
+ "open_canvas": True,
424
+ "primary_file": "correlation_matrix.png",
425
+ "mode": "replace",
426
+ "viewer_hint": "image",
427
+ },
428
+ "meta_data": {
429
+ "generated_by": username,
430
+ "correlation_shape": corr_matrix.shape,
431
+ "columns_used": list(numeric_df.columns),
432
+ },
433
+ }
434
+ except FileNotFoundError as e:
435
+ return {"results": {"error": str(e)}}
436
+ except pd.errors.EmptyDataError:
437
+ return {"results": {"error": "CSV file is empty or unreadable."}}
438
+ except pd.errors.ParserError as e:
439
+ return {"results": {"error": f"CSV parsing error: {e}"}}
440
+ except requests.HTTPError as e:
441
+ return {"results": {"error": f"Download failed: {e}"}}
442
+ except Exception as e:
443
+ return {"results": {"error": f"Unexpected error: {e}"}}
444
+
445
+
446
+ @mcp.tool
447
+ def plot_time_series(
448
+ instructions: Annotated[str, "Instructions for the tool, not used for logic"],
449
+ filename: Annotated[str, "CSV filename. Backend may rewrite to a downloadable URL."],
450
+ columns: Annotated[List[str], "Columns to plot as time series with index as x-axis."],
451
+ username: Annotated[str, "Injected by backend. Trust this value."] = "",
452
+ file_data_base64: Annotated[str, "Framework may supply Base64 content as fallback."] = "",
453
+ ) -> Dict[str, Any]:
454
+ """Generate connected scatter plots for specified columns with index as x-axis.
455
+
456
+ Creates a time series style plot where each specified column is plotted against the row index.
457
+ """
458
+ try:
459
+ # Load and parse CSV
460
+ raw = _load_csv_bytes(filename, file_data_base64)
461
+ df = pd.read_csv(io.BytesIO(raw))
462
+ if df.empty:
463
+ return {"results": {"error": "CSV is empty."}}
464
+
465
+ # Handle cases where columns is None or empty
466
+ if columns is None or not columns:
467
+ columns = df.columns.tolist()
468
+ else:
469
+ # Check if specified columns exist
470
+ missing_cols = [col for col in columns if col not in df.columns]
471
+ if missing_cols:
472
+ return {"results": {"error": f"Columns not found in CSV: {missing_cols}"}}
473
+
474
+ # Select only the specified columns
475
+ plot_df = df[columns]
476
+
477
+ # Check if columns are numeric (convert if possible)
478
+ for col in columns:
479
+ if not pd.api.types.is_numeric_dtype(plot_df[col]):
480
+ try:
481
+ plot_df[col] = pd.to_numeric(plot_df[col], errors='coerce')
482
+ except Exception:
483
+ return {"results": {"error": f"Column '{col}' cannot be converted to numeric values."}}
484
+
485
+ # Create the plot
486
+ plt.figure(figsize=(12, 8))
487
+
488
+ for col in columns:
489
+ plt.plot(plot_df.index, plot_df[col], marker='o', markersize=3,
490
+ linewidth=1.5, label=col, alpha=0.8)
491
+
492
+ plt.xlabel('Index')
493
+ plt.ylabel('Values')
494
+ plt.title('Time Series Plot')
495
+ plt.legend()
496
+ plt.grid(True, alpha=0.3)
497
+ plt.tight_layout()
498
+
499
+ # Save plot to bytes
500
+ img_buffer = io.BytesIO()
501
+ plt.savefig(img_buffer, format='png', dpi=300, bbox_inches='tight')
502
+ img_buffer.seek(0)
503
+ img_b64 = base64.b64encode(img_buffer.getvalue()).decode('utf-8')
504
+ plt.close()
505
+
506
+ return {
507
+ "results": {
508
+ "operation": "time_series_plot",
509
+ "filename": filename,
510
+ "columns_plotted": columns,
511
+ "message": "Time series plot generated.",
512
+ },
513
+ "artifacts": [
514
+ {
515
+ "name": "time_series.png",
516
+ "b64": img_b64,
517
+ "mime": "image/png",
518
+ }
519
+ ],
520
+ "display": {
521
+ "open_canvas": True,
522
+ "primary_file": "time_series.png",
523
+ "mode": "replace",
524
+ "viewer_hint": "image",
525
+ },
526
+ "meta_data": {
527
+ "generated_by": username,
528
+ "data_points": len(plot_df),
529
+ "columns_plotted": columns,
530
+ },
531
+ }
532
+ except FileNotFoundError as e:
533
+ return {"results": {"error": str(e)}}
534
+ except pd.errors.EmptyDataError:
535
+ return {"results": {"error": "CSV file is empty or unreadable."}}
536
+ except pd.errors.ParserError as e:
537
+ return {"results": {"error": f"CSV parsing error: {e}"}}
538
+ except requests.HTTPError as e:
539
+ return {"results": {"error": f"Download failed: {e}"}}
540
+ except Exception as e:
541
+ return {"results": {"error": f"Unexpected error: {e}"}}
542
+
543
+
544
+ if __name__ == "__main__":
545
+ mcp.run()