aiecs 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiecs/__init__.py +72 -0
- aiecs/__main__.py +41 -0
- aiecs/aiecs_client.py +469 -0
- aiecs/application/__init__.py +10 -0
- aiecs/application/executors/__init__.py +10 -0
- aiecs/application/executors/operation_executor.py +363 -0
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +100 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
- aiecs/application/knowledge_graph/search/reranker.py +295 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +484 -0
- aiecs/config/__init__.py +16 -0
- aiecs/config/config.py +498 -0
- aiecs/config/graph_config.py +137 -0
- aiecs/config/registry.py +23 -0
- aiecs/core/__init__.py +46 -0
- aiecs/core/interface/__init__.py +34 -0
- aiecs/core/interface/execution_interface.py +152 -0
- aiecs/core/interface/storage_interface.py +171 -0
- aiecs/domain/__init__.py +289 -0
- aiecs/domain/agent/__init__.py +189 -0
- aiecs/domain/agent/base_agent.py +697 -0
- aiecs/domain/agent/exceptions.py +103 -0
- aiecs/domain/agent/graph_aware_mixin.py +559 -0
- aiecs/domain/agent/hybrid_agent.py +490 -0
- aiecs/domain/agent/integration/__init__.py +26 -0
- aiecs/domain/agent/integration/context_compressor.py +222 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
- aiecs/domain/agent/integration/retry_policy.py +219 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +646 -0
- aiecs/domain/agent/lifecycle.py +296 -0
- aiecs/domain/agent/llm_agent.py +300 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +197 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +160 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
- aiecs/domain/agent/models.py +317 -0
- aiecs/domain/agent/observability.py +407 -0
- aiecs/domain/agent/persistence.py +289 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +161 -0
- aiecs/domain/agent/prompts/formatters.py +189 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +260 -0
- aiecs/domain/agent/tool_agent.py +257 -0
- aiecs/domain/agent/tools/__init__.py +12 -0
- aiecs/domain/agent/tools/schema_generator.py +221 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +477 -0
- aiecs/domain/community/analytics.py +481 -0
- aiecs/domain/community/collaborative_workflow.py +642 -0
- aiecs/domain/community/communication_hub.py +645 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +800 -0
- aiecs/domain/community/community_manager.py +813 -0
- aiecs/domain/community/decision_engine.py +879 -0
- aiecs/domain/community/exceptions.py +225 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +268 -0
- aiecs/domain/community/resource_manager.py +457 -0
- aiecs/domain/community/shared_context_manager.py +603 -0
- aiecs/domain/context/__init__.py +58 -0
- aiecs/domain/context/context_engine.py +989 -0
- aiecs/domain/context/conversation_models.py +354 -0
- aiecs/domain/context/graph_memory.py +467 -0
- aiecs/domain/execution/__init__.py +12 -0
- aiecs/domain/execution/model.py +57 -0
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +130 -0
- aiecs/domain/knowledge_graph/models/evidence.py +194 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
- aiecs/domain/knowledge_graph/models/path.py +179 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
- aiecs/domain/knowledge_graph/models/query.py +272 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
- aiecs/domain/knowledge_graph/models/relation.py +136 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
- aiecs/domain/task/__init__.py +13 -0
- aiecs/domain/task/dsl_processor.py +613 -0
- aiecs/domain/task/model.py +62 -0
- aiecs/domain/task/task_context.py +268 -0
- aiecs/infrastructure/__init__.py +24 -0
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +601 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
- aiecs/infrastructure/graph_storage/cache.py +429 -0
- aiecs/infrastructure/graph_storage/distributed.py +226 -0
- aiecs/infrastructure/graph_storage/error_handling.py +390 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +514 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
- aiecs/infrastructure/graph_storage/metrics.py +357 -0
- aiecs/infrastructure/graph_storage/migration.py +413 -0
- aiecs/infrastructure/graph_storage/pagination.py +471 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
- aiecs/infrastructure/graph_storage/postgres.py +871 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +623 -0
- aiecs/infrastructure/graph_storage/streaming.py +495 -0
- aiecs/infrastructure/messaging/__init__.py +13 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
- aiecs/infrastructure/messaging/websocket_manager.py +298 -0
- aiecs/infrastructure/monitoring/__init__.py +34 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
- aiecs/infrastructure/monitoring/structured_logger.py +48 -0
- aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
- aiecs/infrastructure/persistence/__init__.py +24 -0
- aiecs/infrastructure/persistence/context_engine_client.py +187 -0
- aiecs/infrastructure/persistence/database_manager.py +333 -0
- aiecs/infrastructure/persistence/file_storage.py +754 -0
- aiecs/infrastructure/persistence/redis_client.py +220 -0
- aiecs/llm/__init__.py +86 -0
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/callbacks/custom_callbacks.py +264 -0
- aiecs/llm/client_factory.py +420 -0
- aiecs/llm/clients/__init__.py +33 -0
- aiecs/llm/clients/base_client.py +193 -0
- aiecs/llm/clients/googleai_client.py +181 -0
- aiecs/llm/clients/openai_client.py +131 -0
- aiecs/llm/clients/vertex_client.py +437 -0
- aiecs/llm/clients/xai_client.py +184 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +236 -0
- aiecs/llm/config/model_config.py +151 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +363 -0
- aiecs/scripts/__init__.py +3 -0
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/version_manager.py +215 -0
- aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
- aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
- aiecs/scripts/dependance_check/__init__.py +17 -0
- aiecs/scripts/dependance_check/dependency_checker.py +938 -0
- aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
- aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
- aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
- aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
- aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
- aiecs/scripts/tools_develop/README.md +449 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
- aiecs/scripts/tools_develop/verify_tools.py +356 -0
- aiecs/tasks/__init__.py +1 -0
- aiecs/tasks/worker.py +172 -0
- aiecs/tools/__init__.py +299 -0
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +303 -0
- aiecs/tools/apisource/providers/__init__.py +115 -0
- aiecs/tools/apisource/providers/base.py +664 -0
- aiecs/tools/apisource/providers/census.py +401 -0
- aiecs/tools/apisource/providers/fred.py +564 -0
- aiecs/tools/apisource/providers/newsapi.py +412 -0
- aiecs/tools/apisource/providers/worldbank.py +357 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +375 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
- aiecs/tools/apisource/tool.py +850 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +338 -0
- aiecs/tools/base_tool.py +201 -0
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +599 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
- aiecs/tools/docs/content_insertion_tool.py +1333 -0
- aiecs/tools/docs/document_creator_tool.py +1317 -0
- aiecs/tools/docs/document_layout_tool.py +1166 -0
- aiecs/tools/docs/document_parser_tool.py +994 -0
- aiecs/tools/docs/document_writer_tool.py +1818 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
- aiecs/tools/langchain_adapter.py +542 -0
- aiecs/tools/schema_generator.py +275 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +589 -0
- aiecs/tools/search_tool/cache.py +260 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +216 -0
- aiecs/tools/search_tool/core.py +749 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +271 -0
- aiecs/tools/search_tool/metrics.py +371 -0
- aiecs/tools/search_tool/rate_limiter.py +178 -0
- aiecs/tools/search_tool/schemas.py +277 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
- aiecs/tools/statistics/data_loader_tool.py +564 -0
- aiecs/tools/statistics/data_profiler_tool.py +658 -0
- aiecs/tools/statistics/data_transformer_tool.py +573 -0
- aiecs/tools/statistics/data_visualizer_tool.py +495 -0
- aiecs/tools/statistics/model_trainer_tool.py +487 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
- aiecs/tools/task_tools/__init__.py +86 -0
- aiecs/tools/task_tools/chart_tool.py +732 -0
- aiecs/tools/task_tools/classfire_tool.py +922 -0
- aiecs/tools/task_tools/image_tool.py +447 -0
- aiecs/tools/task_tools/office_tool.py +684 -0
- aiecs/tools/task_tools/pandas_tool.py +635 -0
- aiecs/tools/task_tools/report_tool.py +635 -0
- aiecs/tools/task_tools/research_tool.py +392 -0
- aiecs/tools/task_tools/scraper_tool.py +715 -0
- aiecs/tools/task_tools/stats_tool.py +688 -0
- aiecs/tools/temp_file_manager.py +130 -0
- aiecs/tools/tool_executor/__init__.py +37 -0
- aiecs/tools/tool_executor/tool_executor.py +881 -0
- aiecs/utils/LLM_output_structor.py +445 -0
- aiecs/utils/__init__.py +34 -0
- aiecs/utils/base_callback.py +47 -0
- aiecs/utils/cache_provider.py +695 -0
- aiecs/utils/execution_utils.py +184 -0
- aiecs/utils/logging.py +1 -0
- aiecs/utils/prompt_loader.py +14 -0
- aiecs/utils/token_usage_repository.py +323 -0
- aiecs/ws/__init__.py +0 -0
- aiecs/ws/socket_server.py +52 -0
- aiecs-1.5.1.dist-info/METADATA +608 -0
- aiecs-1.5.1.dist-info/RECORD +302 -0
- aiecs-1.5.1.dist-info/WHEEL +5 -0
- aiecs-1.5.1.dist-info/entry_points.txt +10 -0
- aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
- aiecs-1.5.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,732 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import csv
|
|
4
|
+
import tempfile
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Dict, Any, List, Optional, Union, Tuple
|
|
7
|
+
from enum import Enum
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field, field_validator, ConfigDict
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import matplotlib.pyplot as plt
|
|
13
|
+
import seaborn as sns
|
|
14
|
+
|
|
15
|
+
from aiecs.tools import register_tool
|
|
16
|
+
from aiecs.tools.base_tool import BaseTool
|
|
17
|
+
from aiecs.tools.tool_executor import measure_execution_time
|
|
18
|
+
|
|
19
|
+
# Enums for configuration options
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ExportFormat(str, Enum):
|
|
23
|
+
JSON = "json"
|
|
24
|
+
CSV = "csv"
|
|
25
|
+
HTML = "html"
|
|
26
|
+
EXCEL = "excel"
|
|
27
|
+
MARKDOWN = "markdown"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class VisualizationType(str, Enum):
|
|
31
|
+
HISTOGRAM = "histogram"
|
|
32
|
+
BOXPLOT = "boxplot"
|
|
33
|
+
SCATTER = "scatter"
|
|
34
|
+
BAR = "bar"
|
|
35
|
+
LINE = "line"
|
|
36
|
+
HEATMAP = "heatmap"
|
|
37
|
+
PAIR = "pair"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@register_tool("chart")
|
|
41
|
+
class ChartTool(BaseTool):
|
|
42
|
+
"""Chart and visualization tool: creates charts and exports data in various formats."""
|
|
43
|
+
|
|
44
|
+
# Configuration schema
|
|
45
|
+
class Config(BaseModel):
|
|
46
|
+
"""Configuration for the chart tool"""
|
|
47
|
+
|
|
48
|
+
model_config = ConfigDict(env_prefix="CHART_TOOL_")
|
|
49
|
+
|
|
50
|
+
export_dir: str = Field(
|
|
51
|
+
default=os.path.join(tempfile.gettempdir(), "chart_exports"),
|
|
52
|
+
description="Directory to export files to",
|
|
53
|
+
)
|
|
54
|
+
plot_dpi: int = Field(default=100, description="DPI for plot exports")
|
|
55
|
+
plot_figsize: Tuple[int, int] = Field(
|
|
56
|
+
default=(10, 6),
|
|
57
|
+
description="Default figure size (width, height) in inches",
|
|
58
|
+
)
|
|
59
|
+
allowed_extensions: List[str] = Field(
|
|
60
|
+
default=[
|
|
61
|
+
".csv",
|
|
62
|
+
".xlsx",
|
|
63
|
+
".xls",
|
|
64
|
+
".json",
|
|
65
|
+
".parquet",
|
|
66
|
+
".feather",
|
|
67
|
+
".sav",
|
|
68
|
+
".sas7bdat",
|
|
69
|
+
".por",
|
|
70
|
+
],
|
|
71
|
+
description="Allowed file extensions",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Input schemas for operations
|
|
75
|
+
class ReadDataSchema(BaseModel):
|
|
76
|
+
"""Schema for reading data files"""
|
|
77
|
+
|
|
78
|
+
file_path: str = Field(description="Path to the data file")
|
|
79
|
+
nrows: Optional[int] = Field(default=None, description="Number of rows to read")
|
|
80
|
+
sheet_name: Optional[Union[str, int]] = Field(
|
|
81
|
+
default=0, description="Sheet name or index for Excel files"
|
|
82
|
+
)
|
|
83
|
+
export_format: Optional[ExportFormat] = Field(
|
|
84
|
+
default=None, description="Format to export results in"
|
|
85
|
+
)
|
|
86
|
+
export_path: Optional[str] = Field(default=None, description="Path to export results to")
|
|
87
|
+
|
|
88
|
+
@field_validator("file_path")
|
|
89
|
+
@classmethod
|
|
90
|
+
def validate_file_path(cls, v):
|
|
91
|
+
if not os.path.isfile(v):
|
|
92
|
+
raise ValueError(f"File not found: {v}")
|
|
93
|
+
return v
|
|
94
|
+
|
|
95
|
+
@field_validator("export_path")
|
|
96
|
+
@classmethod
|
|
97
|
+
def validate_export_path(cls, v, info):
|
|
98
|
+
if v and "export_format" not in info.data:
|
|
99
|
+
raise ValueError("export_format must be specified when export_path is provided")
|
|
100
|
+
return v
|
|
101
|
+
|
|
102
|
+
class VisualizationSchema(BaseModel):
|
|
103
|
+
"""Schema for data visualization"""
|
|
104
|
+
|
|
105
|
+
file_path: str = Field(description="Path to the data file")
|
|
106
|
+
plot_type: VisualizationType = Field(description="Type of visualization to create")
|
|
107
|
+
x: Optional[str] = Field(default=None, description="Column to use for x-axis")
|
|
108
|
+
y: Optional[str] = Field(default=None, description="Column to use for y-axis")
|
|
109
|
+
hue: Optional[str] = Field(default=None, description="Column to use for color encoding")
|
|
110
|
+
variables: Optional[List[str]] = Field(
|
|
111
|
+
default=None,
|
|
112
|
+
description="List of variables to include in the visualization",
|
|
113
|
+
)
|
|
114
|
+
title: Optional[str] = Field(default=None, description="Title for the visualization")
|
|
115
|
+
figsize: Optional[Tuple[int, int]] = Field(
|
|
116
|
+
default=None, description="Figure size (width, height) in inches"
|
|
117
|
+
)
|
|
118
|
+
output_path: Optional[str] = Field(
|
|
119
|
+
default=None, description="Path to save the visualization"
|
|
120
|
+
)
|
|
121
|
+
dpi: Optional[int] = Field(default=None, description="DPI for the visualization")
|
|
122
|
+
export_format: Optional[ExportFormat] = Field(
|
|
123
|
+
default=None, description="Format to export results in"
|
|
124
|
+
)
|
|
125
|
+
export_path: Optional[str] = Field(default=None, description="Path to export results to")
|
|
126
|
+
|
|
127
|
+
@field_validator("file_path")
|
|
128
|
+
@classmethod
|
|
129
|
+
def validate_file_path(cls, v):
|
|
130
|
+
if not os.path.isfile(v):
|
|
131
|
+
raise ValueError(f"File not found: {v}")
|
|
132
|
+
return v
|
|
133
|
+
|
|
134
|
+
@field_validator("export_path")
|
|
135
|
+
@classmethod
|
|
136
|
+
def validate_export_path(cls, v, info):
|
|
137
|
+
if v and "export_format" not in info.data:
|
|
138
|
+
raise ValueError("export_format must be specified when export_path is provided")
|
|
139
|
+
return v
|
|
140
|
+
|
|
141
|
+
class ExportDataSchema(BaseModel):
|
|
142
|
+
"""Schema for exporting data"""
|
|
143
|
+
|
|
144
|
+
file_path: str = Field(description="Path to the data file")
|
|
145
|
+
variables: Optional[List[str]] = Field(
|
|
146
|
+
default=None,
|
|
147
|
+
description="List of variables to include in the export",
|
|
148
|
+
)
|
|
149
|
+
format: ExportFormat = Field(description="Format to export data in")
|
|
150
|
+
export_path: Optional[str] = Field(
|
|
151
|
+
default=None, description="Path to save the exported data"
|
|
152
|
+
)
|
|
153
|
+
export_format: Optional[ExportFormat] = Field(
|
|
154
|
+
default=None, description="Format to export results in"
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
@field_validator("file_path")
|
|
158
|
+
@classmethod
|
|
159
|
+
def validate_file_path(cls, v):
|
|
160
|
+
if not os.path.isfile(v):
|
|
161
|
+
raise ValueError(f"File not found: {v}")
|
|
162
|
+
return v
|
|
163
|
+
|
|
164
|
+
@field_validator("export_path")
|
|
165
|
+
@classmethod
|
|
166
|
+
def validate_export_path(cls, v, info):
|
|
167
|
+
if v and "export_format" not in info.data:
|
|
168
|
+
raise ValueError("export_format must be specified when export_path is provided")
|
|
169
|
+
return v
|
|
170
|
+
|
|
171
|
+
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
172
|
+
"""
|
|
173
|
+
Initialize the chart tool
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
config: Optional configuration for the tool
|
|
177
|
+
"""
|
|
178
|
+
super().__init__(config)
|
|
179
|
+
|
|
180
|
+
# Parse configuration
|
|
181
|
+
self.config = self.Config(**(config or {}))
|
|
182
|
+
|
|
183
|
+
# Create export directory if it doesn't exist
|
|
184
|
+
os.makedirs(self.config.export_dir, exist_ok=True)
|
|
185
|
+
|
|
186
|
+
# Set up logger
|
|
187
|
+
self.logger = logging.getLogger(__name__)
|
|
188
|
+
|
|
189
|
+
# Set default matplotlib style
|
|
190
|
+
plt.style.use("seaborn-v0_8-whitegrid")
|
|
191
|
+
|
|
192
|
+
def _load_data(
|
|
193
|
+
self,
|
|
194
|
+
file_path: str,
|
|
195
|
+
nrows: Optional[int] = None,
|
|
196
|
+
sheet_name: Optional[Union[str, int]] = 0,
|
|
197
|
+
) -> pd.DataFrame:
|
|
198
|
+
"""
|
|
199
|
+
Load data from various file formats into a pandas DataFrame
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
file_path: Path to the data file
|
|
203
|
+
nrows: Number of rows to read
|
|
204
|
+
sheet_name: Sheet name or index for Excel files
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
Loaded DataFrame
|
|
208
|
+
"""
|
|
209
|
+
# Determine file type and read accordingly
|
|
210
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
211
|
+
|
|
212
|
+
try:
|
|
213
|
+
if ext == ".sav":
|
|
214
|
+
import pyreadstat
|
|
215
|
+
|
|
216
|
+
df, meta = pyreadstat.read_sav(file_path)
|
|
217
|
+
return df
|
|
218
|
+
elif ext == ".sas7bdat":
|
|
219
|
+
import pyreadstat
|
|
220
|
+
|
|
221
|
+
df, meta = pyreadstat.read_sas7bdat(file_path)
|
|
222
|
+
return df
|
|
223
|
+
elif ext == ".por":
|
|
224
|
+
import pyreadstat
|
|
225
|
+
|
|
226
|
+
df, meta = pyreadstat.read_por(file_path)
|
|
227
|
+
return df
|
|
228
|
+
elif ext == ".csv":
|
|
229
|
+
return pd.read_csv(file_path, nrows=nrows)
|
|
230
|
+
elif ext in [".xlsx", ".xls"]:
|
|
231
|
+
return pd.read_excel(file_path, sheet_name=sheet_name, nrows=nrows)
|
|
232
|
+
elif ext == ".json":
|
|
233
|
+
return pd.read_json(file_path)
|
|
234
|
+
elif ext == ".parquet":
|
|
235
|
+
return pd.read_parquet(file_path)
|
|
236
|
+
elif ext == ".feather":
|
|
237
|
+
return pd.read_feather(file_path)
|
|
238
|
+
else:
|
|
239
|
+
raise ValueError(f"Unsupported file format: {ext}")
|
|
240
|
+
except Exception as e:
|
|
241
|
+
raise ValueError(f"Error reading file {file_path}: {str(e)}")
|
|
242
|
+
|
|
243
|
+
def _export_result(self, result: Dict[str, Any], path: str, format: ExportFormat) -> None:
|
|
244
|
+
"""
|
|
245
|
+
Export results to the specified format
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
result: Result to export
|
|
249
|
+
path: Path to save the exported result
|
|
250
|
+
format: Format to export in
|
|
251
|
+
"""
|
|
252
|
+
os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
|
|
253
|
+
|
|
254
|
+
try:
|
|
255
|
+
if format == ExportFormat.JSON:
|
|
256
|
+
# Convert numpy types to Python native types
|
|
257
|
+
def json_serialize(obj):
|
|
258
|
+
if isinstance(obj, (np.integer, np.int64)):
|
|
259
|
+
return int(obj)
|
|
260
|
+
elif isinstance(obj, (np.floating, np.float64)):
|
|
261
|
+
return float(obj)
|
|
262
|
+
elif isinstance(obj, np.ndarray):
|
|
263
|
+
return obj.tolist()
|
|
264
|
+
elif isinstance(obj, pd.DataFrame):
|
|
265
|
+
return obj.to_dict(orient="records")
|
|
266
|
+
return str(obj)
|
|
267
|
+
|
|
268
|
+
with open(path, "w") as f:
|
|
269
|
+
json.dump(result, f, default=json_serialize, indent=2)
|
|
270
|
+
|
|
271
|
+
elif format == ExportFormat.CSV:
|
|
272
|
+
# Find the first dict or DataFrame in the result
|
|
273
|
+
data_to_export = None
|
|
274
|
+
for key, value in result.items():
|
|
275
|
+
if isinstance(value, dict) and value:
|
|
276
|
+
data_to_export = pd.DataFrame(value)
|
|
277
|
+
break
|
|
278
|
+
elif isinstance(value, pd.DataFrame):
|
|
279
|
+
data_to_export = value
|
|
280
|
+
break
|
|
281
|
+
|
|
282
|
+
if data_to_export is not None:
|
|
283
|
+
data_to_export.to_csv(path, index=False)
|
|
284
|
+
else:
|
|
285
|
+
# Fallback: convert the entire result to a flat structure
|
|
286
|
+
flat_data = {}
|
|
287
|
+
for k, v in result.items():
|
|
288
|
+
if not isinstance(v, (dict, list, pd.DataFrame)):
|
|
289
|
+
flat_data[k] = v
|
|
290
|
+
|
|
291
|
+
with open(path, "w", newline="") as f:
|
|
292
|
+
writer = csv.writer(f)
|
|
293
|
+
writer.writerow(flat_data.keys())
|
|
294
|
+
writer.writerow(flat_data.values())
|
|
295
|
+
|
|
296
|
+
elif format == ExportFormat.HTML:
|
|
297
|
+
# Convert to HTML table
|
|
298
|
+
html_content = "<html><body><h1>Chart Results</h1>"
|
|
299
|
+
for key, value in result.items():
|
|
300
|
+
html_content += f"<h2>{key}</h2>"
|
|
301
|
+
if isinstance(value, pd.DataFrame):
|
|
302
|
+
html_content += value.to_html()
|
|
303
|
+
elif isinstance(value, dict):
|
|
304
|
+
html_content += (
|
|
305
|
+
"<table border='1'><tr><th>Parameter</th><th>Value</th></tr>"
|
|
306
|
+
)
|
|
307
|
+
for k, v in value.items():
|
|
308
|
+
html_content += f"<tr><td>{k}</td><td>{v}</td></tr>"
|
|
309
|
+
html_content += "</table>"
|
|
310
|
+
else:
|
|
311
|
+
html_content += f"<p>{value}</p>"
|
|
312
|
+
html_content += "</body></html>"
|
|
313
|
+
|
|
314
|
+
with open(path, "w") as f:
|
|
315
|
+
f.write(html_content)
|
|
316
|
+
|
|
317
|
+
elif format == ExportFormat.EXCEL:
|
|
318
|
+
with pd.ExcelWriter(path) as writer:
|
|
319
|
+
for key, value in result.items():
|
|
320
|
+
if isinstance(value, pd.DataFrame):
|
|
321
|
+
# Excel sheet names limited to 31 chars
|
|
322
|
+
value.to_excel(writer, sheet_name=key[:31])
|
|
323
|
+
elif isinstance(value, dict):
|
|
324
|
+
pd.DataFrame(value, index=[0]).to_excel(writer, sheet_name=key[:31])
|
|
325
|
+
else:
|
|
326
|
+
pd.DataFrame({key: [value]}).to_excel(writer, sheet_name="Summary")
|
|
327
|
+
|
|
328
|
+
elif format == ExportFormat.MARKDOWN:
|
|
329
|
+
with open(path, "w") as f:
|
|
330
|
+
f.write("# Chart Results\n\n")
|
|
331
|
+
for key, value in result.items():
|
|
332
|
+
f.write(f"## {key}\n\n")
|
|
333
|
+
if isinstance(value, pd.DataFrame):
|
|
334
|
+
f.write(value.to_markdown())
|
|
335
|
+
elif isinstance(value, dict):
|
|
336
|
+
f.write("| Parameter | Value |\n|-----------|-------|\n")
|
|
337
|
+
for k, v in value.items():
|
|
338
|
+
f.write(f"| {k} | {v} |\n")
|
|
339
|
+
else:
|
|
340
|
+
f.write(f"{value}\n\n")
|
|
341
|
+
|
|
342
|
+
return path
|
|
343
|
+
except Exception as e:
|
|
344
|
+
raise ValueError(f"Error exporting to {format}: {str(e)}")
|
|
345
|
+
|
|
346
|
+
def _create_visualization(
|
|
347
|
+
self,
|
|
348
|
+
df: pd.DataFrame,
|
|
349
|
+
plot_type: VisualizationType,
|
|
350
|
+
x: Optional[str] = None,
|
|
351
|
+
y: Optional[str] = None,
|
|
352
|
+
hue: Optional[str] = None,
|
|
353
|
+
variables: Optional[List[str]] = None,
|
|
354
|
+
title: Optional[str] = None,
|
|
355
|
+
figsize: Optional[Tuple[int, int]] = None,
|
|
356
|
+
output_path: Optional[str] = None,
|
|
357
|
+
dpi: Optional[int] = None,
|
|
358
|
+
) -> str:
|
|
359
|
+
"""
|
|
360
|
+
Create a visualization based on the parameters and return the path to the saved image
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
df: DataFrame to visualize
|
|
364
|
+
plot_type: Type of visualization to create
|
|
365
|
+
x: Column to use for x-axis
|
|
366
|
+
y: Column to use for y-axis
|
|
367
|
+
hue: Column to use for color encoding
|
|
368
|
+
variables: List of variables to include in the visualization
|
|
369
|
+
title: Title for the visualization
|
|
370
|
+
figsize: Figure size (width, height) in inches
|
|
371
|
+
output_path: Path to save the visualization
|
|
372
|
+
dpi: DPI for the visualization
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
Path to the saved visualization
|
|
376
|
+
"""
|
|
377
|
+
if not output_path:
|
|
378
|
+
output_path = os.path.join(self.config.export_dir, f"plot_{os.urandom(4).hex()}.png")
|
|
379
|
+
elif not os.path.isabs(output_path):
|
|
380
|
+
output_path = os.path.join(self.config.export_dir, output_path)
|
|
381
|
+
|
|
382
|
+
os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
|
|
383
|
+
|
|
384
|
+
try:
|
|
385
|
+
figsize = figsize or self.config.plot_figsize
|
|
386
|
+
dpi = dpi or self.config.plot_dpi
|
|
387
|
+
|
|
388
|
+
plt.figure(figsize=figsize)
|
|
389
|
+
|
|
390
|
+
if plot_type == VisualizationType.HISTOGRAM:
|
|
391
|
+
if variables:
|
|
392
|
+
for var in variables:
|
|
393
|
+
sns.histplot(data=df, x=var, kde=True, label=var)
|
|
394
|
+
plt.legend()
|
|
395
|
+
else:
|
|
396
|
+
sns.histplot(data=df, x=x, hue=hue)
|
|
397
|
+
|
|
398
|
+
elif plot_type == VisualizationType.BOXPLOT:
|
|
399
|
+
sns.boxplot(data=df, x=x, y=y, hue=hue)
|
|
400
|
+
|
|
401
|
+
elif plot_type == VisualizationType.SCATTER:
|
|
402
|
+
sns.scatterplot(data=df, x=x, y=y, hue=hue)
|
|
403
|
+
|
|
404
|
+
elif plot_type == VisualizationType.BAR:
|
|
405
|
+
sns.barplot(data=df, x=x, y=y, hue=hue)
|
|
406
|
+
|
|
407
|
+
elif plot_type == VisualizationType.LINE:
|
|
408
|
+
sns.lineplot(data=df, x=x, y=y, hue=hue)
|
|
409
|
+
|
|
410
|
+
elif plot_type == VisualizationType.HEATMAP:
|
|
411
|
+
if variables:
|
|
412
|
+
corr = df[variables].corr()
|
|
413
|
+
else:
|
|
414
|
+
corr = df.corr()
|
|
415
|
+
sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f")
|
|
416
|
+
|
|
417
|
+
elif plot_type == VisualizationType.PAIR:
|
|
418
|
+
if variables:
|
|
419
|
+
plot_vars = variables + [hue] if hue else variables
|
|
420
|
+
sns.pairplot(df[plot_vars], hue=hue)
|
|
421
|
+
else:
|
|
422
|
+
sns.pairplot(df, hue=hue)
|
|
423
|
+
|
|
424
|
+
if title:
|
|
425
|
+
plt.title(title)
|
|
426
|
+
|
|
427
|
+
plt.tight_layout()
|
|
428
|
+
plt.savefig(output_path, dpi=dpi)
|
|
429
|
+
plt.close()
|
|
430
|
+
|
|
431
|
+
return output_path
|
|
432
|
+
except Exception as e:
|
|
433
|
+
raise ValueError(f"Error creating visualization: {str(e)}")
|
|
434
|
+
|
|
435
|
+
def _validate_variables(self, df: pd.DataFrame, variables: List[str]) -> None:
|
|
436
|
+
"""
|
|
437
|
+
Validate that variables exist in the DataFrame
|
|
438
|
+
|
|
439
|
+
Args:
|
|
440
|
+
df: DataFrame to check
|
|
441
|
+
variables: List of variables to validate
|
|
442
|
+
|
|
443
|
+
Raises:
|
|
444
|
+
ValueError: If any variables are not found in the DataFrame
|
|
445
|
+
"""
|
|
446
|
+
if not variables:
|
|
447
|
+
return
|
|
448
|
+
|
|
449
|
+
available_columns = set(df.columns)
|
|
450
|
+
missing = [col for col in variables if col not in available_columns]
|
|
451
|
+
if missing:
|
|
452
|
+
raise ValueError(
|
|
453
|
+
f"Variables not found in dataset: {', '.join(missing)}. Available columns: {list(available_columns)}"
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
def _to_json_serializable(
|
|
457
|
+
self, result: Union[pd.DataFrame, pd.Series, Dict]
|
|
458
|
+
) -> Union[List[Dict], Dict]:
|
|
459
|
+
"""
|
|
460
|
+
Convert result to JSON serializable format
|
|
461
|
+
|
|
462
|
+
Args:
|
|
463
|
+
result: Result to convert
|
|
464
|
+
|
|
465
|
+
Returns:
|
|
466
|
+
JSON serializable result
|
|
467
|
+
"""
|
|
468
|
+
if isinstance(result, pd.DataFrame):
|
|
469
|
+
# Handle datetime columns
|
|
470
|
+
for col in result.select_dtypes(include=["datetime64"]).columns:
|
|
471
|
+
result[col] = result[col].dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
472
|
+
return result.to_dict(orient="records")
|
|
473
|
+
elif isinstance(result, pd.Series):
|
|
474
|
+
if pd.api.types.is_datetime64_any_dtype(result):
|
|
475
|
+
result = result.dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
476
|
+
return result.to_dict()
|
|
477
|
+
elif isinstance(result, dict):
|
|
478
|
+
# Handle numpy types and datetime objects
|
|
479
|
+
def convert_value(v):
|
|
480
|
+
if isinstance(v, (np.floating, np.integer)):
|
|
481
|
+
return float(v)
|
|
482
|
+
elif isinstance(v, np.bool_):
|
|
483
|
+
return bool(v)
|
|
484
|
+
elif isinstance(v, (pd.Timestamp, np.datetime64)):
|
|
485
|
+
return str(v)
|
|
486
|
+
elif isinstance(v, np.ndarray):
|
|
487
|
+
return v.tolist()
|
|
488
|
+
elif pd.isna(v):
|
|
489
|
+
return None
|
|
490
|
+
return v
|
|
491
|
+
|
|
492
|
+
return {k: convert_value(v) for k, v in result.items()}
|
|
493
|
+
return result
|
|
494
|
+
|
|
495
|
+
@measure_execution_time
|
|
496
|
+
def read_data(
|
|
497
|
+
self,
|
|
498
|
+
file_path: str,
|
|
499
|
+
nrows: Optional[int] = None,
|
|
500
|
+
sheet_name: Optional[Union[str, int]] = 0,
|
|
501
|
+
export_format: Optional[ExportFormat] = None,
|
|
502
|
+
export_path: Optional[str] = None,
|
|
503
|
+
) -> Dict[str, Any]:
|
|
504
|
+
"""
|
|
505
|
+
Read data from various file formats
|
|
506
|
+
|
|
507
|
+
Args:
|
|
508
|
+
file_path: Path to the data file
|
|
509
|
+
nrows: Number of rows to read
|
|
510
|
+
sheet_name: Sheet name or index for Excel files
|
|
511
|
+
export_format: Format to export results in
|
|
512
|
+
export_path: Path to export results to
|
|
513
|
+
|
|
514
|
+
Returns:
|
|
515
|
+
Dictionary with data summary
|
|
516
|
+
"""
|
|
517
|
+
# Validate file path
|
|
518
|
+
if not os.path.isfile(file_path):
|
|
519
|
+
raise ValueError(f"File not found: {file_path}")
|
|
520
|
+
|
|
521
|
+
# Check file extension
|
|
522
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
523
|
+
if ext not in self.config.allowed_extensions:
|
|
524
|
+
raise ValueError(
|
|
525
|
+
f"Extension '{ext}' not allowed. Supported formats: {', '.join(self.config.allowed_extensions)}"
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
# Load data
|
|
529
|
+
df = self._load_data(file_path, nrows, sheet_name)
|
|
530
|
+
|
|
531
|
+
# Create result
|
|
532
|
+
result = {
|
|
533
|
+
"variables": df.columns.tolist(),
|
|
534
|
+
"observations": len(df),
|
|
535
|
+
"dtypes": {col: str(dtype) for col, dtype in df.dtypes.items()},
|
|
536
|
+
# MB
|
|
537
|
+
"memory_usage": df.memory_usage(deep=True).sum() / (1024 * 1024),
|
|
538
|
+
"preview": df.head(5).to_dict(orient="records"),
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
# Handle export if requested
|
|
542
|
+
if export_format and export_path:
|
|
543
|
+
if not os.path.isabs(export_path):
|
|
544
|
+
export_path = os.path.join(self.config.export_dir, export_path)
|
|
545
|
+
|
|
546
|
+
self._export_result(result, export_path, export_format)
|
|
547
|
+
result["exported_to"] = export_path
|
|
548
|
+
|
|
549
|
+
return result
|
|
550
|
+
|
|
551
|
+
@measure_execution_time
|
|
552
|
+
def visualize(
|
|
553
|
+
self,
|
|
554
|
+
file_path: str,
|
|
555
|
+
plot_type: VisualizationType,
|
|
556
|
+
x: Optional[str] = None,
|
|
557
|
+
y: Optional[str] = None,
|
|
558
|
+
hue: Optional[str] = None,
|
|
559
|
+
variables: Optional[List[str]] = None,
|
|
560
|
+
title: Optional[str] = None,
|
|
561
|
+
figsize: Optional[Tuple[int, int]] = None,
|
|
562
|
+
output_path: Optional[str] = None,
|
|
563
|
+
dpi: Optional[int] = None,
|
|
564
|
+
export_format: Optional[ExportFormat] = None,
|
|
565
|
+
export_path: Optional[str] = None,
|
|
566
|
+
) -> Dict[str, Any]:
|
|
567
|
+
"""
|
|
568
|
+
Create data visualizations
|
|
569
|
+
|
|
570
|
+
Args:
|
|
571
|
+
file_path: Path to the data file
|
|
572
|
+
plot_type: Type of visualization to create
|
|
573
|
+
x: Column to use for x-axis
|
|
574
|
+
y: Column to use for y-axis
|
|
575
|
+
hue: Column to use for color encoding
|
|
576
|
+
variables: List of variables to include in the visualization
|
|
577
|
+
title: Title for the visualization
|
|
578
|
+
figsize: Figure size (width, height) in inches
|
|
579
|
+
output_path: Path to save the visualization
|
|
580
|
+
dpi: DPI for the visualization
|
|
581
|
+
export_format: Format to export results in
|
|
582
|
+
export_path: Path to export results to
|
|
583
|
+
|
|
584
|
+
Returns:
|
|
585
|
+
Dictionary with visualization details
|
|
586
|
+
"""
|
|
587
|
+
# Validate file path
|
|
588
|
+
if not os.path.isfile(file_path):
|
|
589
|
+
raise ValueError(f"File not found: {file_path}")
|
|
590
|
+
|
|
591
|
+
# Check file extension
|
|
592
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
593
|
+
if ext not in self.config.allowed_extensions:
|
|
594
|
+
raise ValueError(
|
|
595
|
+
f"Extension '{ext}' not allowed. Supported formats: {', '.join(self.config.allowed_extensions)}"
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
# Load data
|
|
599
|
+
df = self._load_data(file_path)
|
|
600
|
+
|
|
601
|
+
# Validate variables
|
|
602
|
+
vars_to_check = []
|
|
603
|
+
if variables:
|
|
604
|
+
vars_to_check.extend(variables)
|
|
605
|
+
if x:
|
|
606
|
+
vars_to_check.append(x)
|
|
607
|
+
if y:
|
|
608
|
+
vars_to_check.append(y)
|
|
609
|
+
if hue:
|
|
610
|
+
vars_to_check.append(hue)
|
|
611
|
+
|
|
612
|
+
self._validate_variables(df, vars_to_check)
|
|
613
|
+
|
|
614
|
+
# Create visualization
|
|
615
|
+
output_path = self._create_visualization(
|
|
616
|
+
df,
|
|
617
|
+
plot_type,
|
|
618
|
+
x,
|
|
619
|
+
y,
|
|
620
|
+
hue,
|
|
621
|
+
variables,
|
|
622
|
+
title,
|
|
623
|
+
figsize,
|
|
624
|
+
output_path,
|
|
625
|
+
dpi,
|
|
626
|
+
)
|
|
627
|
+
|
|
628
|
+
# Create result
|
|
629
|
+
result = {
|
|
630
|
+
"plot_type": plot_type,
|
|
631
|
+
"output_path": output_path,
|
|
632
|
+
"variables": variables or [x, y, hue],
|
|
633
|
+
"title": title or f"{plot_type.capitalize()} Plot",
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
# Handle export if requested
|
|
637
|
+
if export_format and export_path:
|
|
638
|
+
if not os.path.isabs(export_path):
|
|
639
|
+
export_path = os.path.join(self.config.export_dir, export_path)
|
|
640
|
+
|
|
641
|
+
self._export_result(result, export_path, export_format)
|
|
642
|
+
result["exported_to"] = export_path
|
|
643
|
+
|
|
644
|
+
return result
|
|
645
|
+
|
|
646
|
+
@measure_execution_time
|
|
647
|
+
def export_data(
|
|
648
|
+
self,
|
|
649
|
+
file_path: str,
|
|
650
|
+
format: ExportFormat,
|
|
651
|
+
variables: Optional[List[str]] = None,
|
|
652
|
+
export_path: Optional[str] = None,
|
|
653
|
+
export_format: Optional[ExportFormat] = None,
|
|
654
|
+
) -> Dict[str, Any]:
|
|
655
|
+
"""
|
|
656
|
+
Export data to various formats
|
|
657
|
+
|
|
658
|
+
Args:
|
|
659
|
+
file_path: Path to the data file
|
|
660
|
+
format: Format to export data in
|
|
661
|
+
variables: List of variables to include in the export
|
|
662
|
+
export_path: Path to save the exported data
|
|
663
|
+
export_format: Format to export results in
|
|
664
|
+
|
|
665
|
+
Returns:
|
|
666
|
+
Dictionary with export details
|
|
667
|
+
"""
|
|
668
|
+
# Validate file path
|
|
669
|
+
if not os.path.isfile(file_path):
|
|
670
|
+
raise ValueError(f"File not found: {file_path}")
|
|
671
|
+
|
|
672
|
+
# Check file extension
|
|
673
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
674
|
+
if ext not in self.config.allowed_extensions:
|
|
675
|
+
raise ValueError(
|
|
676
|
+
f"Extension '{ext}' not allowed. Supported formats: {', '.join(self.config.allowed_extensions)}"
|
|
677
|
+
)
|
|
678
|
+
|
|
679
|
+
# Load data
|
|
680
|
+
df = self._load_data(file_path)
|
|
681
|
+
|
|
682
|
+
# Validate variables
|
|
683
|
+
if variables:
|
|
684
|
+
self._validate_variables(df, variables)
|
|
685
|
+
df = df[variables]
|
|
686
|
+
|
|
687
|
+
# Determine export path
|
|
688
|
+
if not export_path:
|
|
689
|
+
ext = "." + format.value
|
|
690
|
+
if format == ExportFormat.EXCEL:
|
|
691
|
+
ext = ".xlsx"
|
|
692
|
+
export_path = os.path.join(self.config.export_dir, f"export_{os.urandom(4).hex()}{ext}")
|
|
693
|
+
elif not os.path.isabs(export_path):
|
|
694
|
+
export_path = os.path.join(self.config.export_dir, export_path)
|
|
695
|
+
|
|
696
|
+
# Create export directory if it doesn't exist
|
|
697
|
+
os.makedirs(os.path.dirname(os.path.abspath(export_path)), exist_ok=True)
|
|
698
|
+
|
|
699
|
+
# Export data
|
|
700
|
+
try:
|
|
701
|
+
if format == ExportFormat.JSON:
|
|
702
|
+
df.to_json(export_path, orient="records", indent=2)
|
|
703
|
+
elif format == ExportFormat.CSV:
|
|
704
|
+
df.to_csv(export_path, index=False)
|
|
705
|
+
elif format == ExportFormat.HTML:
|
|
706
|
+
df.to_html(export_path)
|
|
707
|
+
elif format == ExportFormat.EXCEL:
|
|
708
|
+
df.to_excel(export_path, index=False)
|
|
709
|
+
elif format == ExportFormat.MARKDOWN:
|
|
710
|
+
with open(export_path, "w") as f:
|
|
711
|
+
f.write(df.to_markdown())
|
|
712
|
+
except Exception as e:
|
|
713
|
+
raise ValueError(f"Error exporting to {format}: {str(e)}")
|
|
714
|
+
|
|
715
|
+
# Create result
|
|
716
|
+
result = {
|
|
717
|
+
"format": format,
|
|
718
|
+
"path": export_path,
|
|
719
|
+
"rows": len(df),
|
|
720
|
+
"columns": len(df.columns),
|
|
721
|
+
"variables": df.columns.tolist(),
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
# Handle export if requested
|
|
725
|
+
if export_format and export_path:
|
|
726
|
+
if not os.path.isabs(export_path):
|
|
727
|
+
export_path = os.path.join(self.config.export_dir, export_path)
|
|
728
|
+
|
|
729
|
+
self._export_result(result, export_path, export_format)
|
|
730
|
+
result["exported_to"] = export_path
|
|
731
|
+
|
|
732
|
+
return result
|