atlas-chat 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atlas/__init__.py +40 -0
- atlas/application/__init__.py +7 -0
- atlas/application/chat/__init__.py +7 -0
- atlas/application/chat/agent/__init__.py +10 -0
- atlas/application/chat/agent/act_loop.py +179 -0
- atlas/application/chat/agent/factory.py +142 -0
- atlas/application/chat/agent/protocols.py +46 -0
- atlas/application/chat/agent/react_loop.py +338 -0
- atlas/application/chat/agent/think_act_loop.py +171 -0
- atlas/application/chat/approval_manager.py +151 -0
- atlas/application/chat/elicitation_manager.py +191 -0
- atlas/application/chat/events/__init__.py +1 -0
- atlas/application/chat/events/agent_event_relay.py +112 -0
- atlas/application/chat/modes/__init__.py +1 -0
- atlas/application/chat/modes/agent.py +125 -0
- atlas/application/chat/modes/plain.py +74 -0
- atlas/application/chat/modes/rag.py +81 -0
- atlas/application/chat/modes/tools.py +179 -0
- atlas/application/chat/orchestrator.py +213 -0
- atlas/application/chat/policies/__init__.py +1 -0
- atlas/application/chat/policies/tool_authorization.py +99 -0
- atlas/application/chat/preprocessors/__init__.py +1 -0
- atlas/application/chat/preprocessors/message_builder.py +92 -0
- atlas/application/chat/preprocessors/prompt_override_service.py +104 -0
- atlas/application/chat/service.py +454 -0
- atlas/application/chat/utilities/__init__.py +6 -0
- atlas/application/chat/utilities/error_handler.py +367 -0
- atlas/application/chat/utilities/event_notifier.py +546 -0
- atlas/application/chat/utilities/file_processor.py +613 -0
- atlas/application/chat/utilities/tool_executor.py +789 -0
- atlas/atlas_chat_cli.py +347 -0
- atlas/atlas_client.py +238 -0
- atlas/core/__init__.py +0 -0
- atlas/core/auth.py +205 -0
- atlas/core/authorization_manager.py +27 -0
- atlas/core/capabilities.py +123 -0
- atlas/core/compliance.py +215 -0
- atlas/core/domain_whitelist.py +147 -0
- atlas/core/domain_whitelist_middleware.py +82 -0
- atlas/core/http_client.py +28 -0
- atlas/core/log_sanitizer.py +102 -0
- atlas/core/metrics_logger.py +59 -0
- atlas/core/middleware.py +131 -0
- atlas/core/otel_config.py +242 -0
- atlas/core/prompt_risk.py +200 -0
- atlas/core/rate_limit.py +0 -0
- atlas/core/rate_limit_middleware.py +64 -0
- atlas/core/security_headers_middleware.py +51 -0
- atlas/domain/__init__.py +37 -0
- atlas/domain/chat/__init__.py +1 -0
- atlas/domain/chat/dtos.py +85 -0
- atlas/domain/errors.py +96 -0
- atlas/domain/messages/__init__.py +12 -0
- atlas/domain/messages/models.py +160 -0
- atlas/domain/rag_mcp_service.py +664 -0
- atlas/domain/sessions/__init__.py +7 -0
- atlas/domain/sessions/models.py +36 -0
- atlas/domain/unified_rag_service.py +371 -0
- atlas/infrastructure/__init__.py +10 -0
- atlas/infrastructure/app_factory.py +135 -0
- atlas/infrastructure/events/__init__.py +1 -0
- atlas/infrastructure/events/cli_event_publisher.py +140 -0
- atlas/infrastructure/events/websocket_publisher.py +140 -0
- atlas/infrastructure/sessions/in_memory_repository.py +56 -0
- atlas/infrastructure/transport/__init__.py +7 -0
- atlas/infrastructure/transport/websocket_connection_adapter.py +33 -0
- atlas/init_cli.py +226 -0
- atlas/interfaces/__init__.py +15 -0
- atlas/interfaces/events.py +134 -0
- atlas/interfaces/llm.py +54 -0
- atlas/interfaces/rag.py +40 -0
- atlas/interfaces/sessions.py +75 -0
- atlas/interfaces/tools.py +57 -0
- atlas/interfaces/transport.py +24 -0
- atlas/main.py +564 -0
- atlas/mcp/api_key_demo/README.md +76 -0
- atlas/mcp/api_key_demo/main.py +172 -0
- atlas/mcp/api_key_demo/run.sh +56 -0
- atlas/mcp/basictable/main.py +147 -0
- atlas/mcp/calculator/main.py +149 -0
- atlas/mcp/code-executor/execution_engine.py +98 -0
- atlas/mcp/code-executor/execution_environment.py +95 -0
- atlas/mcp/code-executor/main.py +528 -0
- atlas/mcp/code-executor/result_processing.py +276 -0
- atlas/mcp/code-executor/script_generation.py +195 -0
- atlas/mcp/code-executor/security_checker.py +140 -0
- atlas/mcp/corporate_cars/main.py +437 -0
- atlas/mcp/csv_reporter/main.py +545 -0
- atlas/mcp/duckduckgo/main.py +182 -0
- atlas/mcp/elicitation_demo/README.md +171 -0
- atlas/mcp/elicitation_demo/main.py +262 -0
- atlas/mcp/env-demo/README.md +158 -0
- atlas/mcp/env-demo/main.py +199 -0
- atlas/mcp/file_size_test/main.py +284 -0
- atlas/mcp/filesystem/main.py +348 -0
- atlas/mcp/image_demo/main.py +113 -0
- atlas/mcp/image_demo/requirements.txt +4 -0
- atlas/mcp/logging_demo/README.md +72 -0
- atlas/mcp/logging_demo/main.py +103 -0
- atlas/mcp/many_tools_demo/main.py +50 -0
- atlas/mcp/order_database/__init__.py +0 -0
- atlas/mcp/order_database/main.py +369 -0
- atlas/mcp/order_database/signal_data.csv +1001 -0
- atlas/mcp/pdfbasic/main.py +394 -0
- atlas/mcp/pptx_generator/main.py +760 -0
- atlas/mcp/pptx_generator/requirements.txt +13 -0
- atlas/mcp/pptx_generator/run_test.sh +1 -0
- atlas/mcp/pptx_generator/test_pptx_generator_security.py +169 -0
- atlas/mcp/progress_demo/main.py +167 -0
- atlas/mcp/progress_updates_demo/QUICKSTART.md +273 -0
- atlas/mcp/progress_updates_demo/README.md +120 -0
- atlas/mcp/progress_updates_demo/main.py +497 -0
- atlas/mcp/prompts/main.py +222 -0
- atlas/mcp/public_demo/main.py +189 -0
- atlas/mcp/sampling_demo/README.md +169 -0
- atlas/mcp/sampling_demo/main.py +234 -0
- atlas/mcp/thinking/main.py +77 -0
- atlas/mcp/tool_planner/main.py +240 -0
- atlas/mcp/ui-demo/badmesh.png +0 -0
- atlas/mcp/ui-demo/main.py +383 -0
- atlas/mcp/ui-demo/templates/button_demo.html +32 -0
- atlas/mcp/ui-demo/templates/data_visualization.html +32 -0
- atlas/mcp/ui-demo/templates/form_demo.html +28 -0
- atlas/mcp/username-override-demo/README.md +320 -0
- atlas/mcp/username-override-demo/main.py +308 -0
- atlas/modules/__init__.py +0 -0
- atlas/modules/config/__init__.py +34 -0
- atlas/modules/config/cli.py +231 -0
- atlas/modules/config/config_manager.py +1096 -0
- atlas/modules/file_storage/__init__.py +22 -0
- atlas/modules/file_storage/cli.py +330 -0
- atlas/modules/file_storage/content_extractor.py +290 -0
- atlas/modules/file_storage/manager.py +295 -0
- atlas/modules/file_storage/mock_s3_client.py +402 -0
- atlas/modules/file_storage/s3_client.py +417 -0
- atlas/modules/llm/__init__.py +19 -0
- atlas/modules/llm/caller.py +287 -0
- atlas/modules/llm/litellm_caller.py +675 -0
- atlas/modules/llm/models.py +19 -0
- atlas/modules/mcp_tools/__init__.py +17 -0
- atlas/modules/mcp_tools/client.py +2123 -0
- atlas/modules/mcp_tools/token_storage.py +556 -0
- atlas/modules/prompts/prompt_provider.py +130 -0
- atlas/modules/rag/__init__.py +24 -0
- atlas/modules/rag/atlas_rag_client.py +336 -0
- atlas/modules/rag/client.py +129 -0
- atlas/routes/admin_routes.py +865 -0
- atlas/routes/config_routes.py +484 -0
- atlas/routes/feedback_routes.py +361 -0
- atlas/routes/files_routes.py +274 -0
- atlas/routes/health_routes.py +40 -0
- atlas/routes/mcp_auth_routes.py +223 -0
- atlas/server_cli.py +164 -0
- atlas/tests/conftest.py +20 -0
- atlas/tests/integration/test_mcp_auth_integration.py +152 -0
- atlas/tests/manual_test_sampling.py +87 -0
- atlas/tests/modules/mcp_tools/test_client_auth.py +226 -0
- atlas/tests/modules/mcp_tools/test_client_env.py +191 -0
- atlas/tests/test_admin_mcp_server_management_routes.py +141 -0
- atlas/tests/test_agent_roa.py +135 -0
- atlas/tests/test_app_factory_smoke.py +47 -0
- atlas/tests/test_approval_manager.py +439 -0
- atlas/tests/test_atlas_client.py +188 -0
- atlas/tests/test_atlas_rag_client.py +447 -0
- atlas/tests/test_atlas_rag_integration.py +224 -0
- atlas/tests/test_attach_file_flow.py +287 -0
- atlas/tests/test_auth_utils.py +165 -0
- atlas/tests/test_backend_public_url.py +185 -0
- atlas/tests/test_banner_logging.py +287 -0
- atlas/tests/test_capability_tokens_and_injection.py +203 -0
- atlas/tests/test_compliance_level.py +54 -0
- atlas/tests/test_compliance_manager.py +253 -0
- atlas/tests/test_config_manager.py +617 -0
- atlas/tests/test_config_manager_paths.py +12 -0
- atlas/tests/test_core_auth.py +18 -0
- atlas/tests/test_core_utils.py +190 -0
- atlas/tests/test_docker_env_sync.py +202 -0
- atlas/tests/test_domain_errors.py +329 -0
- atlas/tests/test_domain_whitelist.py +359 -0
- atlas/tests/test_elicitation_manager.py +408 -0
- atlas/tests/test_elicitation_routing.py +296 -0
- atlas/tests/test_env_demo_server.py +88 -0
- atlas/tests/test_error_classification.py +113 -0
- atlas/tests/test_error_flow_integration.py +116 -0
- atlas/tests/test_feedback_routes.py +333 -0
- atlas/tests/test_file_content_extraction.py +1134 -0
- atlas/tests/test_file_extraction_routes.py +158 -0
- atlas/tests/test_file_library.py +107 -0
- atlas/tests/test_file_manager_unit.py +18 -0
- atlas/tests/test_health_route.py +49 -0
- atlas/tests/test_http_client_stub.py +8 -0
- atlas/tests/test_imports_smoke.py +30 -0
- atlas/tests/test_interfaces_llm_response.py +9 -0
- atlas/tests/test_issue_access_denied_fix.py +136 -0
- atlas/tests/test_llm_env_expansion.py +836 -0
- atlas/tests/test_log_level_sensitive_data.py +285 -0
- atlas/tests/test_mcp_auth_routes.py +341 -0
- atlas/tests/test_mcp_client_auth.py +331 -0
- atlas/tests/test_mcp_data_injection.py +270 -0
- atlas/tests/test_mcp_get_authorized_servers.py +95 -0
- atlas/tests/test_mcp_hot_reload.py +512 -0
- atlas/tests/test_mcp_image_content.py +424 -0
- atlas/tests/test_mcp_logging.py +172 -0
- atlas/tests/test_mcp_progress_updates.py +313 -0
- atlas/tests/test_mcp_prompt_override_system_prompt.py +102 -0
- atlas/tests/test_mcp_prompts_server.py +39 -0
- atlas/tests/test_mcp_tool_result_parsing.py +296 -0
- atlas/tests/test_metrics_logger.py +56 -0
- atlas/tests/test_middleware_auth.py +379 -0
- atlas/tests/test_prompt_risk_and_acl.py +141 -0
- atlas/tests/test_rag_mcp_aggregator.py +204 -0
- atlas/tests/test_rag_mcp_service.py +224 -0
- atlas/tests/test_rate_limit_middleware.py +45 -0
- atlas/tests/test_routes_config_smoke.py +60 -0
- atlas/tests/test_routes_files_download_token.py +41 -0
- atlas/tests/test_routes_files_health.py +18 -0
- atlas/tests/test_runtime_imports.py +53 -0
- atlas/tests/test_sampling_integration.py +482 -0
- atlas/tests/test_security_admin_routes.py +61 -0
- atlas/tests/test_security_capability_tokens.py +65 -0
- atlas/tests/test_security_file_stats_scope.py +21 -0
- atlas/tests/test_security_header_injection.py +191 -0
- atlas/tests/test_security_headers_and_filename.py +63 -0
- atlas/tests/test_shared_session_repository.py +101 -0
- atlas/tests/test_system_prompt_loading.py +181 -0
- atlas/tests/test_token_storage.py +505 -0
- atlas/tests/test_tool_approval_config.py +93 -0
- atlas/tests/test_tool_approval_utils.py +356 -0
- atlas/tests/test_tool_authorization_group_filtering.py +223 -0
- atlas/tests/test_tool_details_in_config.py +108 -0
- atlas/tests/test_tool_planner.py +300 -0
- atlas/tests/test_unified_rag_service.py +398 -0
- atlas/tests/test_username_override_in_approval.py +258 -0
- atlas/tests/test_websocket_auth_header.py +168 -0
- atlas/version.py +6 -0
- atlas_chat-0.1.0.data/data/.env.example +253 -0
- atlas_chat-0.1.0.data/data/config/defaults/compliance-levels.json +44 -0
- atlas_chat-0.1.0.data/data/config/defaults/domain-whitelist.json +123 -0
- atlas_chat-0.1.0.data/data/config/defaults/file-extractors.json +74 -0
- atlas_chat-0.1.0.data/data/config/defaults/help-config.json +198 -0
- atlas_chat-0.1.0.data/data/config/defaults/llmconfig-buggy.yml +11 -0
- atlas_chat-0.1.0.data/data/config/defaults/llmconfig.yml +19 -0
- atlas_chat-0.1.0.data/data/config/defaults/mcp.json +138 -0
- atlas_chat-0.1.0.data/data/config/defaults/rag-sources.json +17 -0
- atlas_chat-0.1.0.data/data/config/defaults/splash-config.json +16 -0
- atlas_chat-0.1.0.dist-info/METADATA +236 -0
- atlas_chat-0.1.0.dist-info/RECORD +250 -0
- atlas_chat-0.1.0.dist-info/WHEEL +5 -0
- atlas_chat-0.1.0.dist-info/entry_points.txt +4 -0
- atlas_chat-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,545 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
CSV Reporter MCP Server using FastMCP.
|
|
4
|
+
|
|
5
|
+
Demonstrates two v2 behaviors described in v2_mcp_note.md:
|
|
6
|
+
1) filename(s) to downloadable URLs: If the backend rewrites filename/file_names
|
|
7
|
+
to /api/files/download/... URLs, this server will fetch and process them.
|
|
8
|
+
It also accepts file_data_base64 as a fallback for content delivery.
|
|
9
|
+
2) username injection: If a `username` parameter is defined in the tool schema,
|
|
10
|
+
the backend can inject the authenticated user's email/username. This server
|
|
11
|
+
trusts the provided username value and echoes it in outputs.
|
|
12
|
+
|
|
13
|
+
Tools:
|
|
14
|
+
- generate_csv_report: Build a summary report for a single CSV.
|
|
15
|
+
- summarize_multiple_csvs: Summarize multiple CSVs (using file_names[]).
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import base64
|
|
21
|
+
import io
|
|
22
|
+
import os
|
|
23
|
+
from typing import Annotated, Any, Dict, List, Optional
|
|
24
|
+
|
|
25
|
+
import matplotlib.pyplot as plt
|
|
26
|
+
import numpy as np
|
|
27
|
+
import pandas as pd
|
|
28
|
+
import requests
|
|
29
|
+
import seaborn as sns
|
|
30
|
+
from fastmcp import FastMCP
|
|
31
|
+
|
|
32
|
+
mcp = FastMCP("CSV_Reporter")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
_PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
|
|
36
|
+
RUNTIME_UPLOADS = os.environ.get(
|
|
37
|
+
"CHATUI_RUNTIME_UPLOADS", os.path.join(_PROJECT_ROOT, "runtime", "uploads")
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _is_http_url(s: str) -> bool:
|
|
42
|
+
return s.startswith("http://") or s.startswith("https://")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _is_backend_download_path(s: str) -> bool:
|
|
46
|
+
"""Detect backend-relative download paths like /api/files/download/...."""
|
|
47
|
+
return isinstance(s, str) and s.startswith("/api/files/download/")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _backend_base_url() -> str:
|
|
51
|
+
"""Resolve backend base URL from environment variable.
|
|
52
|
+
|
|
53
|
+
Fallback to http://127.0.0.1:8000.
|
|
54
|
+
"""
|
|
55
|
+
return os.environ.get("CHATUI_BACKEND_BASE_URL", "http://127.0.0.1:8000")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _load_csv_bytes(filename: str, file_data_base64: str = "") -> bytes:
|
|
61
|
+
"""Return raw CSV bytes from either base64, URL, or local uploads path.
|
|
62
|
+
|
|
63
|
+
Priority:
|
|
64
|
+
1) file_data_base64 if provided
|
|
65
|
+
2) If filename is URL -> GET
|
|
66
|
+
3) Try local file in runtime uploads
|
|
67
|
+
Raises FileNotFoundError or requests.HTTPError as appropriate.
|
|
68
|
+
"""
|
|
69
|
+
if file_data_base64:
|
|
70
|
+
return base64.b64decode(file_data_base64)
|
|
71
|
+
|
|
72
|
+
# Support backend-injected relative download URLs by resolving with a base URL
|
|
73
|
+
if filename and _is_backend_download_path(filename):
|
|
74
|
+
base = _backend_base_url()
|
|
75
|
+
url = base.rstrip("/") + filename
|
|
76
|
+
r = requests.get(url, timeout=20)
|
|
77
|
+
r.raise_for_status()
|
|
78
|
+
return r.content
|
|
79
|
+
|
|
80
|
+
if filename and _is_http_url(filename):
|
|
81
|
+
r = requests.get(filename, timeout=20)
|
|
82
|
+
r.raise_for_status()
|
|
83
|
+
return r.content
|
|
84
|
+
|
|
85
|
+
# Fallback: treat filename as a key under runtime uploads
|
|
86
|
+
if filename:
|
|
87
|
+
local_path = filename
|
|
88
|
+
if not os.path.isabs(local_path):
|
|
89
|
+
local_path = os.path.join(RUNTIME_UPLOADS, filename)
|
|
90
|
+
if not os.path.exists(local_path):
|
|
91
|
+
raise FileNotFoundError(f"CSV not found: {local_path}")
|
|
92
|
+
with open(local_path, "rb") as f:
|
|
93
|
+
return f.read()
|
|
94
|
+
|
|
95
|
+
raise FileNotFoundError("No filename or file data provided")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _dataframe_report(df: pd.DataFrame, *, username: str, source_name: str) -> str:
|
|
99
|
+
"""Create a human-readable report for a DataFrame."""
|
|
100
|
+
lines: List[str] = []
|
|
101
|
+
lines.append(f"CSV Report for: {source_name}")
|
|
102
|
+
lines.append(f"Requested by: {username}")
|
|
103
|
+
lines.append("")
|
|
104
|
+
lines.append(f"Shape: {df.shape[0]} rows x {df.shape[1]} columns")
|
|
105
|
+
lines.append("")
|
|
106
|
+
# Column dtypes
|
|
107
|
+
lines.append("Column types:")
|
|
108
|
+
lines.append(df.dtypes.to_string())
|
|
109
|
+
lines.append("")
|
|
110
|
+
# Missing values
|
|
111
|
+
na_counts = df.isna().sum()
|
|
112
|
+
if (na_counts > 0).any():
|
|
113
|
+
lines.append("Missing values per column:")
|
|
114
|
+
lines.append(na_counts.to_string())
|
|
115
|
+
lines.append("")
|
|
116
|
+
# Numeric summary
|
|
117
|
+
num_df = df.select_dtypes(include=["number"]) # type: ignore[arg-type]
|
|
118
|
+
if not num_df.empty:
|
|
119
|
+
desc = num_df.describe().transpose()
|
|
120
|
+
lines.append("Numeric columns summary:")
|
|
121
|
+
lines.append(desc.to_string())
|
|
122
|
+
lines.append("")
|
|
123
|
+
# Sample rows
|
|
124
|
+
try:
|
|
125
|
+
sample = df.head(5)
|
|
126
|
+
lines.append("Sample (first 5 rows):")
|
|
127
|
+
lines.append(sample.to_string(index=False))
|
|
128
|
+
except Exception:
|
|
129
|
+
# Ignore display errors for sample rows
|
|
130
|
+
pass
|
|
131
|
+
return "\n".join(lines)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@mcp.tool
|
|
135
|
+
def generate_csv_report(
|
|
136
|
+
instructions: Annotated[str, "Instructions for the tool, not used for logic"],
|
|
137
|
+
filename: Annotated[str, "CSV filename. Backend may rewrite to a downloadable URL."],
|
|
138
|
+
username: Annotated[str, "Injected by backend. Trust this value."] = "",
|
|
139
|
+
file_data_base64: Annotated[str, "Framework may supply Base64 content as fallback."] = "",
|
|
140
|
+
) -> Dict[str, Any]:
|
|
141
|
+
"""Generate comprehensive statistical analysis and summary report for CSV data files.
|
|
142
|
+
|
|
143
|
+
This tool performs in-depth analysis of CSV files to provide actionable insights:
|
|
144
|
+
|
|
145
|
+
**Data Analysis Features:**
|
|
146
|
+
- Complete dataset overview (rows, columns, data types)
|
|
147
|
+
- Statistical summaries for all numeric columns (mean, median, std, min, max, quartiles)
|
|
148
|
+
- Missing value analysis and data quality assessment
|
|
149
|
+
- Column type detection and classification
|
|
150
|
+
- Sample data preview for context understanding
|
|
151
|
+
|
|
152
|
+
**Report Contents:**
|
|
153
|
+
- Dataset dimensions and structure
|
|
154
|
+
- Data type distribution across columns
|
|
155
|
+
- Missing value patterns and percentages
|
|
156
|
+
- Descriptive statistics for numeric data
|
|
157
|
+
- Sample rows for data format verification
|
|
158
|
+
- Data quality indicators and potential issues
|
|
159
|
+
|
|
160
|
+
**File Input Support:**
|
|
161
|
+
- Direct CSV file upload via file browser
|
|
162
|
+
- Base64 encoded CSV content
|
|
163
|
+
- Backend-generated downloadable URLs
|
|
164
|
+
- UTF-8 and common CSV encoding formats
|
|
165
|
+
|
|
166
|
+
**Output Format:**
|
|
167
|
+
- Structured text report with clear sections
|
|
168
|
+
- Easy-to-read tabular summaries
|
|
169
|
+
- Professional formatting suitable for sharing
|
|
170
|
+
- Downloadable report file for future reference
|
|
171
|
+
|
|
172
|
+
**Use Cases:**
|
|
173
|
+
- Initial data exploration and quality assessment
|
|
174
|
+
- Dataset documentation and profiling
|
|
175
|
+
- Data validation before analysis or modeling
|
|
176
|
+
- Quick statistical overview for stakeholder reports
|
|
177
|
+
- Data preprocessing planning and strategy
|
|
178
|
+
|
|
179
|
+
**Examples:**
|
|
180
|
+
- Sales data: Revenue distribution, transaction patterns, missing customer info
|
|
181
|
+
- Survey data: Response rates, demographic breakdowns, incomplete answers
|
|
182
|
+
- Financial data: Account balances, transaction volumes, data completeness
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
instructions: Optional analysis instructions (currently not used in processing logic)
|
|
186
|
+
filename: Name/path of CSV file to analyze (supports various input methods)
|
|
187
|
+
username: User identity for report attribution (automatically injected by backend)
|
|
188
|
+
file_data_base64: Base64-encoded CSV content (alternative input method)
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Dictionary containing:
|
|
192
|
+
- results: Analysis summary and status message
|
|
193
|
+
- artifacts: Downloadable text report with complete analysis
|
|
194
|
+
- display: Viewer configuration for optimal report presentation
|
|
195
|
+
- meta_data: Dataset metrics (rows, columns, generator info)
|
|
196
|
+
Or error message if file cannot be processed
|
|
197
|
+
"""
|
|
198
|
+
try:
|
|
199
|
+
raw = _load_csv_bytes(filename, file_data_base64)
|
|
200
|
+
df = pd.read_csv(io.BytesIO(raw))
|
|
201
|
+
if df.empty:
|
|
202
|
+
return {"results": {"error": "CSV is empty."}}
|
|
203
|
+
|
|
204
|
+
# Use the raw filename; let the chat UI handle any sanitization
|
|
205
|
+
report_text = _dataframe_report(df, username=username or "unknown", source_name=filename)
|
|
206
|
+
report_b64 = base64.b64encode(report_text.encode("utf-8")).decode("utf-8")
|
|
207
|
+
|
|
208
|
+
return {
|
|
209
|
+
"results": {
|
|
210
|
+
"operation": "csv_report",
|
|
211
|
+
"filename": filename,
|
|
212
|
+
"message": "CSV report generated.",
|
|
213
|
+
},
|
|
214
|
+
"artifacts": [
|
|
215
|
+
{
|
|
216
|
+
"name": "report.txt",
|
|
217
|
+
"b64": report_b64,
|
|
218
|
+
"mime": "text/plain",
|
|
219
|
+
}
|
|
220
|
+
],
|
|
221
|
+
"display": {
|
|
222
|
+
"open_canvas": True,
|
|
223
|
+
"primary_file": "report.txt",
|
|
224
|
+
"mode": "replace",
|
|
225
|
+
"viewer_hint": "code",
|
|
226
|
+
},
|
|
227
|
+
"meta_data": {
|
|
228
|
+
"generated_by": username,
|
|
229
|
+
"rows": int(df.shape[0]),
|
|
230
|
+
"columns": int(df.shape[1]),
|
|
231
|
+
},
|
|
232
|
+
}
|
|
233
|
+
except FileNotFoundError as e:
|
|
234
|
+
return {"results": {"error": str(e)}}
|
|
235
|
+
except pd.errors.EmptyDataError:
|
|
236
|
+
return {"results": {"error": "CSV file is empty or unreadable."}}
|
|
237
|
+
except pd.errors.ParserError as e:
|
|
238
|
+
return {"results": {"error": f"CSV parsing error: {e}"}}
|
|
239
|
+
except requests.HTTPError as e:
|
|
240
|
+
return {"results": {"error": f"Download failed: {e}"}}
|
|
241
|
+
except Exception as e: # noqa: BLE001
|
|
242
|
+
return {"results": {"error": f"Unexpected error: {e}"}}
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
@mcp.tool
|
|
246
|
+
def summarize_multiple_csvs(
|
|
247
|
+
instructions: Annotated[str, "Instructions for the tool, not used for logic"],
|
|
248
|
+
file_names: Annotated[List[str], "Array of CSV filenames. Backend may rewrite to downloadable URLs."],
|
|
249
|
+
username: Annotated[str, "Injected by backend. Trust this value."] = "",
|
|
250
|
+
) -> Dict[str, Any]:
|
|
251
|
+
"""Create comparative analysis and consolidated summary across multiple CSV datasets.
|
|
252
|
+
|
|
253
|
+
This advanced tool processes multiple CSV files simultaneously to provide:
|
|
254
|
+
|
|
255
|
+
**Cross-Dataset Analysis:**
|
|
256
|
+
- Comparative dataset metrics (rows, columns, sizes)
|
|
257
|
+
- Column name consistency analysis across files
|
|
258
|
+
- Data type compatibility assessment
|
|
259
|
+
- Missing value patterns comparison
|
|
260
|
+
- Overall data quality evaluation across all files
|
|
261
|
+
|
|
262
|
+
**Consolidated Reporting:**
|
|
263
|
+
- Unified summary of all datasets
|
|
264
|
+
- Total record counts and column inventories
|
|
265
|
+
- Data structure compatibility matrix
|
|
266
|
+
- Common and unique column identification
|
|
267
|
+
- Quality metrics aggregation
|
|
268
|
+
|
|
269
|
+
**Batch Processing Features:**
|
|
270
|
+
- Processes all files in a single operation
|
|
271
|
+
- Error handling for individual file failures
|
|
272
|
+
- Continues processing even if some files fail
|
|
273
|
+
- Detailed error reporting for problematic files
|
|
274
|
+
- Success rate and processing statistics
|
|
275
|
+
|
|
276
|
+
**Multi-File Insights:**
|
|
277
|
+
- Dataset size distribution across files
|
|
278
|
+
- Schema consistency validation
|
|
279
|
+
- Potential data merging opportunities
|
|
280
|
+
- Data integration readiness assessment
|
|
281
|
+
- Standardization recommendations
|
|
282
|
+
|
|
283
|
+
**Use Cases:**
|
|
284
|
+
- Data integration planning and validation
|
|
285
|
+
- Multi-source data quality assessment
|
|
286
|
+
- Database migration preparation
|
|
287
|
+
- Data warehouse loading validation
|
|
288
|
+
- Cross-system data consistency checks
|
|
289
|
+
- Batch data processing workflows
|
|
290
|
+
|
|
291
|
+
**Examples:**
|
|
292
|
+
- Multiple monthly sales reports → Consolidated annual analysis
|
|
293
|
+
- Regional customer databases → Cross-region data consistency check
|
|
294
|
+
- Survey results from different periods → Longitudinal study preparation
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
instructions: Optional processing instructions (currently not used in logic)
|
|
298
|
+
file_names: List of CSV file names/paths to analyze (supports various input methods)
|
|
299
|
+
username: User identity for report attribution (automatically injected by backend)
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
Dictionary containing:
|
|
303
|
+
- results: Consolidated analysis summary with cross-file insights
|
|
304
|
+
- artifacts: Downloadable comprehensive report with all file analyses
|
|
305
|
+
- display: Viewer configuration for optimal multi-file report presentation
|
|
306
|
+
- meta_data: Aggregated statistics (total files, success rate, combined metrics)
|
|
307
|
+
Or error summary if multiple files cannot be processed
|
|
308
|
+
"""
|
|
309
|
+
summaries: List[str] = []
|
|
310
|
+
total_rows = 0
|
|
311
|
+
total_cols_unique = set()
|
|
312
|
+
processed = 0
|
|
313
|
+
errors: List[str] = []
|
|
314
|
+
|
|
315
|
+
for name in file_names:
|
|
316
|
+
try:
|
|
317
|
+
raw = _load_csv_bytes(name)
|
|
318
|
+
df = pd.read_csv(io.BytesIO(raw))
|
|
319
|
+
processed += 1
|
|
320
|
+
total_rows += int(df.shape[0])
|
|
321
|
+
total_cols_unique.update(df.columns.tolist())
|
|
322
|
+
summaries.append(f"{name}: {df.shape[0]} rows x {df.shape[1]} cols")
|
|
323
|
+
except Exception as e: # collect per-file error, continue
|
|
324
|
+
errors.append(f"{name}: {e}")
|
|
325
|
+
|
|
326
|
+
report_lines = [f"Multi-CSV summary for {username or 'unknown'}:"]
|
|
327
|
+
report_lines.extend(summaries or ["No files processed."])
|
|
328
|
+
if errors:
|
|
329
|
+
report_lines.append("")
|
|
330
|
+
report_lines.append("Errors:")
|
|
331
|
+
report_lines.extend(errors)
|
|
332
|
+
|
|
333
|
+
text = "\n".join(report_lines)
|
|
334
|
+
b64 = base64.b64encode(text.encode("utf-8")).decode("utf-8")
|
|
335
|
+
|
|
336
|
+
return {
|
|
337
|
+
"results": {
|
|
338
|
+
"operation": "multi_csv_summary",
|
|
339
|
+
"processed_files": processed,
|
|
340
|
+
"message": "Summary generated.",
|
|
341
|
+
},
|
|
342
|
+
"artifacts": [
|
|
343
|
+
{"name": "multi_csv_summary.txt", "b64": b64, "mime": "text/plain"}
|
|
344
|
+
],
|
|
345
|
+
"display": {
|
|
346
|
+
"open_canvas": True,
|
|
347
|
+
"primary_file": "multi_csv_summary.txt",
|
|
348
|
+
"mode": "replace",
|
|
349
|
+
"viewer_hint": "code",
|
|
350
|
+
},
|
|
351
|
+
"meta_data": {
|
|
352
|
+
"generated_by": username,
|
|
353
|
+
"total_rows": total_rows,
|
|
354
|
+
"unique_columns": sorted(list(total_cols_unique)),
|
|
355
|
+
"errors": errors,
|
|
356
|
+
},
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
@mcp.tool
|
|
361
|
+
def plot_correlation_matrix(
|
|
362
|
+
instructions: Annotated[str, "Instructions for the tool, not used for logic"],
|
|
363
|
+
filename: Annotated[str, "CSV filename. Backend may rewrite to a downloadable URL."],
|
|
364
|
+
columns: Annotated[Optional[List[str]], "Specific columns to plot. If None, plots all numeric columns."] = None,
|
|
365
|
+
username: Annotated[str, "Injected by backend. Trust this value."] = "",
|
|
366
|
+
file_data_base64: Annotated[str, "Framework may supply Base64 content as fallback."] = "",
|
|
367
|
+
) -> Dict[str, Any]:
|
|
368
|
+
"""Generate an N by N correlation matrix plot for numeric columns in a CSV file.
|
|
369
|
+
|
|
370
|
+
Creates a heatmap showing linear correlations between specified columns or all numeric columns.
|
|
371
|
+
"""
|
|
372
|
+
try:
|
|
373
|
+
# Load and parse CSV
|
|
374
|
+
raw = _load_csv_bytes(filename, file_data_base64)
|
|
375
|
+
df = pd.read_csv(io.BytesIO(raw))
|
|
376
|
+
if df.empty:
|
|
377
|
+
return {"results": {"error": "CSV is empty."}}
|
|
378
|
+
|
|
379
|
+
# Select numeric columns
|
|
380
|
+
numeric_df = df.select_dtypes(include=[np.number])
|
|
381
|
+
if numeric_df.empty:
|
|
382
|
+
return {"results": {"error": "No numeric columns found in the CSV."}}
|
|
383
|
+
|
|
384
|
+
# Filter to specified columns if provided
|
|
385
|
+
if columns:
|
|
386
|
+
available_cols = [col for col in columns if col in numeric_df.columns]
|
|
387
|
+
if not available_cols:
|
|
388
|
+
return {"results": {"error": f"None of the specified columns {columns} are numeric or exist in the CSV."}}
|
|
389
|
+
numeric_df = numeric_df[available_cols]
|
|
390
|
+
|
|
391
|
+
# Calculate correlation matrix
|
|
392
|
+
corr_matrix = numeric_df.corr()
|
|
393
|
+
|
|
394
|
+
# Create the plot
|
|
395
|
+
plt.figure(figsize=(10, 8))
|
|
396
|
+
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0,
|
|
397
|
+
square=True, fmt='.2f', cbar_kws={'shrink': 0.8})
|
|
398
|
+
plt.title('Correlation Matrix')
|
|
399
|
+
plt.tight_layout()
|
|
400
|
+
|
|
401
|
+
# Save plot to bytes
|
|
402
|
+
img_buffer = io.BytesIO()
|
|
403
|
+
plt.savefig(img_buffer, format='png', dpi=300, bbox_inches='tight')
|
|
404
|
+
img_buffer.seek(0)
|
|
405
|
+
img_b64 = base64.b64encode(img_buffer.getvalue()).decode('utf-8')
|
|
406
|
+
plt.close()
|
|
407
|
+
|
|
408
|
+
return {
|
|
409
|
+
"results": {
|
|
410
|
+
"operation": "correlation_matrix_plot",
|
|
411
|
+
"filename": filename,
|
|
412
|
+
"columns_plotted": list(numeric_df.columns),
|
|
413
|
+
"message": "Correlation matrix plot generated.",
|
|
414
|
+
},
|
|
415
|
+
"artifacts": [
|
|
416
|
+
{
|
|
417
|
+
"name": "correlation_matrix.png",
|
|
418
|
+
"b64": img_b64,
|
|
419
|
+
"mime": "image/png",
|
|
420
|
+
}
|
|
421
|
+
],
|
|
422
|
+
"display": {
|
|
423
|
+
"open_canvas": True,
|
|
424
|
+
"primary_file": "correlation_matrix.png",
|
|
425
|
+
"mode": "replace",
|
|
426
|
+
"viewer_hint": "image",
|
|
427
|
+
},
|
|
428
|
+
"meta_data": {
|
|
429
|
+
"generated_by": username,
|
|
430
|
+
"correlation_shape": corr_matrix.shape,
|
|
431
|
+
"columns_used": list(numeric_df.columns),
|
|
432
|
+
},
|
|
433
|
+
}
|
|
434
|
+
except FileNotFoundError as e:
|
|
435
|
+
return {"results": {"error": str(e)}}
|
|
436
|
+
except pd.errors.EmptyDataError:
|
|
437
|
+
return {"results": {"error": "CSV file is empty or unreadable."}}
|
|
438
|
+
except pd.errors.ParserError as e:
|
|
439
|
+
return {"results": {"error": f"CSV parsing error: {e}"}}
|
|
440
|
+
except requests.HTTPError as e:
|
|
441
|
+
return {"results": {"error": f"Download failed: {e}"}}
|
|
442
|
+
except Exception as e:
|
|
443
|
+
return {"results": {"error": f"Unexpected error: {e}"}}
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
@mcp.tool
|
|
447
|
+
def plot_time_series(
|
|
448
|
+
instructions: Annotated[str, "Instructions for the tool, not used for logic"],
|
|
449
|
+
filename: Annotated[str, "CSV filename. Backend may rewrite to a downloadable URL."],
|
|
450
|
+
columns: Annotated[List[str], "Columns to plot as time series with index as x-axis."],
|
|
451
|
+
username: Annotated[str, "Injected by backend. Trust this value."] = "",
|
|
452
|
+
file_data_base64: Annotated[str, "Framework may supply Base64 content as fallback."] = "",
|
|
453
|
+
) -> Dict[str, Any]:
|
|
454
|
+
"""Generate connected scatter plots for specified columns with index as x-axis.
|
|
455
|
+
|
|
456
|
+
Creates a time series style plot where each specified column is plotted against the row index.
|
|
457
|
+
"""
|
|
458
|
+
try:
|
|
459
|
+
# Load and parse CSV
|
|
460
|
+
raw = _load_csv_bytes(filename, file_data_base64)
|
|
461
|
+
df = pd.read_csv(io.BytesIO(raw))
|
|
462
|
+
if df.empty:
|
|
463
|
+
return {"results": {"error": "CSV is empty."}}
|
|
464
|
+
|
|
465
|
+
# Handle cases where columns is None or empty
|
|
466
|
+
if columns is None or not columns:
|
|
467
|
+
columns = df.columns.tolist()
|
|
468
|
+
else:
|
|
469
|
+
# Check if specified columns exist
|
|
470
|
+
missing_cols = [col for col in columns if col not in df.columns]
|
|
471
|
+
if missing_cols:
|
|
472
|
+
return {"results": {"error": f"Columns not found in CSV: {missing_cols}"}}
|
|
473
|
+
|
|
474
|
+
# Select only the specified columns
|
|
475
|
+
plot_df = df[columns]
|
|
476
|
+
|
|
477
|
+
# Check if columns are numeric (convert if possible)
|
|
478
|
+
for col in columns:
|
|
479
|
+
if not pd.api.types.is_numeric_dtype(plot_df[col]):
|
|
480
|
+
try:
|
|
481
|
+
plot_df[col] = pd.to_numeric(plot_df[col], errors='coerce')
|
|
482
|
+
except Exception:
|
|
483
|
+
return {"results": {"error": f"Column '{col}' cannot be converted to numeric values."}}
|
|
484
|
+
|
|
485
|
+
# Create the plot
|
|
486
|
+
plt.figure(figsize=(12, 8))
|
|
487
|
+
|
|
488
|
+
for col in columns:
|
|
489
|
+
plt.plot(plot_df.index, plot_df[col], marker='o', markersize=3,
|
|
490
|
+
linewidth=1.5, label=col, alpha=0.8)
|
|
491
|
+
|
|
492
|
+
plt.xlabel('Index')
|
|
493
|
+
plt.ylabel('Values')
|
|
494
|
+
plt.title('Time Series Plot')
|
|
495
|
+
plt.legend()
|
|
496
|
+
plt.grid(True, alpha=0.3)
|
|
497
|
+
plt.tight_layout()
|
|
498
|
+
|
|
499
|
+
# Save plot to bytes
|
|
500
|
+
img_buffer = io.BytesIO()
|
|
501
|
+
plt.savefig(img_buffer, format='png', dpi=300, bbox_inches='tight')
|
|
502
|
+
img_buffer.seek(0)
|
|
503
|
+
img_b64 = base64.b64encode(img_buffer.getvalue()).decode('utf-8')
|
|
504
|
+
plt.close()
|
|
505
|
+
|
|
506
|
+
return {
|
|
507
|
+
"results": {
|
|
508
|
+
"operation": "time_series_plot",
|
|
509
|
+
"filename": filename,
|
|
510
|
+
"columns_plotted": columns,
|
|
511
|
+
"message": "Time series plot generated.",
|
|
512
|
+
},
|
|
513
|
+
"artifacts": [
|
|
514
|
+
{
|
|
515
|
+
"name": "time_series.png",
|
|
516
|
+
"b64": img_b64,
|
|
517
|
+
"mime": "image/png",
|
|
518
|
+
}
|
|
519
|
+
],
|
|
520
|
+
"display": {
|
|
521
|
+
"open_canvas": True,
|
|
522
|
+
"primary_file": "time_series.png",
|
|
523
|
+
"mode": "replace",
|
|
524
|
+
"viewer_hint": "image",
|
|
525
|
+
},
|
|
526
|
+
"meta_data": {
|
|
527
|
+
"generated_by": username,
|
|
528
|
+
"data_points": len(plot_df),
|
|
529
|
+
"columns_plotted": columns,
|
|
530
|
+
},
|
|
531
|
+
}
|
|
532
|
+
except FileNotFoundError as e:
|
|
533
|
+
return {"results": {"error": str(e)}}
|
|
534
|
+
except pd.errors.EmptyDataError:
|
|
535
|
+
return {"results": {"error": "CSV file is empty or unreadable."}}
|
|
536
|
+
except pd.errors.ParserError as e:
|
|
537
|
+
return {"results": {"error": f"CSV parsing error: {e}"}}
|
|
538
|
+
except requests.HTTPError as e:
|
|
539
|
+
return {"results": {"error": f"Download failed: {e}"}}
|
|
540
|
+
except Exception as e:
|
|
541
|
+
return {"results": {"error": f"Unexpected error: {e}"}}
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
if __name__ == "__main__":
|
|
545
|
+
mcp.run()
|