aiecs 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiecs/__init__.py +72 -0
- aiecs/__main__.py +41 -0
- aiecs/aiecs_client.py +469 -0
- aiecs/application/__init__.py +10 -0
- aiecs/application/executors/__init__.py +10 -0
- aiecs/application/executors/operation_executor.py +363 -0
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +100 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
- aiecs/application/knowledge_graph/search/reranker.py +295 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +484 -0
- aiecs/config/__init__.py +16 -0
- aiecs/config/config.py +498 -0
- aiecs/config/graph_config.py +137 -0
- aiecs/config/registry.py +23 -0
- aiecs/core/__init__.py +46 -0
- aiecs/core/interface/__init__.py +34 -0
- aiecs/core/interface/execution_interface.py +152 -0
- aiecs/core/interface/storage_interface.py +171 -0
- aiecs/domain/__init__.py +289 -0
- aiecs/domain/agent/__init__.py +189 -0
- aiecs/domain/agent/base_agent.py +697 -0
- aiecs/domain/agent/exceptions.py +103 -0
- aiecs/domain/agent/graph_aware_mixin.py +559 -0
- aiecs/domain/agent/hybrid_agent.py +490 -0
- aiecs/domain/agent/integration/__init__.py +26 -0
- aiecs/domain/agent/integration/context_compressor.py +222 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
- aiecs/domain/agent/integration/retry_policy.py +219 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +646 -0
- aiecs/domain/agent/lifecycle.py +296 -0
- aiecs/domain/agent/llm_agent.py +300 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +197 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +160 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
- aiecs/domain/agent/models.py +317 -0
- aiecs/domain/agent/observability.py +407 -0
- aiecs/domain/agent/persistence.py +289 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +161 -0
- aiecs/domain/agent/prompts/formatters.py +189 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +260 -0
- aiecs/domain/agent/tool_agent.py +257 -0
- aiecs/domain/agent/tools/__init__.py +12 -0
- aiecs/domain/agent/tools/schema_generator.py +221 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +477 -0
- aiecs/domain/community/analytics.py +481 -0
- aiecs/domain/community/collaborative_workflow.py +642 -0
- aiecs/domain/community/communication_hub.py +645 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +800 -0
- aiecs/domain/community/community_manager.py +813 -0
- aiecs/domain/community/decision_engine.py +879 -0
- aiecs/domain/community/exceptions.py +225 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +268 -0
- aiecs/domain/community/resource_manager.py +457 -0
- aiecs/domain/community/shared_context_manager.py +603 -0
- aiecs/domain/context/__init__.py +58 -0
- aiecs/domain/context/context_engine.py +989 -0
- aiecs/domain/context/conversation_models.py +354 -0
- aiecs/domain/context/graph_memory.py +467 -0
- aiecs/domain/execution/__init__.py +12 -0
- aiecs/domain/execution/model.py +57 -0
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +130 -0
- aiecs/domain/knowledge_graph/models/evidence.py +194 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
- aiecs/domain/knowledge_graph/models/path.py +179 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
- aiecs/domain/knowledge_graph/models/query.py +272 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
- aiecs/domain/knowledge_graph/models/relation.py +136 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
- aiecs/domain/task/__init__.py +13 -0
- aiecs/domain/task/dsl_processor.py +613 -0
- aiecs/domain/task/model.py +62 -0
- aiecs/domain/task/task_context.py +268 -0
- aiecs/infrastructure/__init__.py +24 -0
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +601 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
- aiecs/infrastructure/graph_storage/cache.py +429 -0
- aiecs/infrastructure/graph_storage/distributed.py +226 -0
- aiecs/infrastructure/graph_storage/error_handling.py +390 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +514 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
- aiecs/infrastructure/graph_storage/metrics.py +357 -0
- aiecs/infrastructure/graph_storage/migration.py +413 -0
- aiecs/infrastructure/graph_storage/pagination.py +471 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
- aiecs/infrastructure/graph_storage/postgres.py +871 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +623 -0
- aiecs/infrastructure/graph_storage/streaming.py +495 -0
- aiecs/infrastructure/messaging/__init__.py +13 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
- aiecs/infrastructure/messaging/websocket_manager.py +298 -0
- aiecs/infrastructure/monitoring/__init__.py +34 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
- aiecs/infrastructure/monitoring/structured_logger.py +48 -0
- aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
- aiecs/infrastructure/persistence/__init__.py +24 -0
- aiecs/infrastructure/persistence/context_engine_client.py +187 -0
- aiecs/infrastructure/persistence/database_manager.py +333 -0
- aiecs/infrastructure/persistence/file_storage.py +754 -0
- aiecs/infrastructure/persistence/redis_client.py +220 -0
- aiecs/llm/__init__.py +86 -0
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/callbacks/custom_callbacks.py +264 -0
- aiecs/llm/client_factory.py +420 -0
- aiecs/llm/clients/__init__.py +33 -0
- aiecs/llm/clients/base_client.py +193 -0
- aiecs/llm/clients/googleai_client.py +181 -0
- aiecs/llm/clients/openai_client.py +131 -0
- aiecs/llm/clients/vertex_client.py +437 -0
- aiecs/llm/clients/xai_client.py +184 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +236 -0
- aiecs/llm/config/model_config.py +151 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +363 -0
- aiecs/scripts/__init__.py +3 -0
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/version_manager.py +215 -0
- aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
- aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
- aiecs/scripts/dependance_check/__init__.py +17 -0
- aiecs/scripts/dependance_check/dependency_checker.py +938 -0
- aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
- aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
- aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
- aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
- aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
- aiecs/scripts/tools_develop/README.md +449 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
- aiecs/scripts/tools_develop/verify_tools.py +356 -0
- aiecs/tasks/__init__.py +1 -0
- aiecs/tasks/worker.py +172 -0
- aiecs/tools/__init__.py +299 -0
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +303 -0
- aiecs/tools/apisource/providers/__init__.py +115 -0
- aiecs/tools/apisource/providers/base.py +664 -0
- aiecs/tools/apisource/providers/census.py +401 -0
- aiecs/tools/apisource/providers/fred.py +564 -0
- aiecs/tools/apisource/providers/newsapi.py +412 -0
- aiecs/tools/apisource/providers/worldbank.py +357 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +375 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
- aiecs/tools/apisource/tool.py +850 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +338 -0
- aiecs/tools/base_tool.py +201 -0
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +599 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
- aiecs/tools/docs/content_insertion_tool.py +1333 -0
- aiecs/tools/docs/document_creator_tool.py +1317 -0
- aiecs/tools/docs/document_layout_tool.py +1166 -0
- aiecs/tools/docs/document_parser_tool.py +994 -0
- aiecs/tools/docs/document_writer_tool.py +1818 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
- aiecs/tools/langchain_adapter.py +542 -0
- aiecs/tools/schema_generator.py +275 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +589 -0
- aiecs/tools/search_tool/cache.py +260 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +216 -0
- aiecs/tools/search_tool/core.py +749 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +271 -0
- aiecs/tools/search_tool/metrics.py +371 -0
- aiecs/tools/search_tool/rate_limiter.py +178 -0
- aiecs/tools/search_tool/schemas.py +277 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
- aiecs/tools/statistics/data_loader_tool.py +564 -0
- aiecs/tools/statistics/data_profiler_tool.py +658 -0
- aiecs/tools/statistics/data_transformer_tool.py +573 -0
- aiecs/tools/statistics/data_visualizer_tool.py +495 -0
- aiecs/tools/statistics/model_trainer_tool.py +487 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
- aiecs/tools/task_tools/__init__.py +86 -0
- aiecs/tools/task_tools/chart_tool.py +732 -0
- aiecs/tools/task_tools/classfire_tool.py +922 -0
- aiecs/tools/task_tools/image_tool.py +447 -0
- aiecs/tools/task_tools/office_tool.py +684 -0
- aiecs/tools/task_tools/pandas_tool.py +635 -0
- aiecs/tools/task_tools/report_tool.py +635 -0
- aiecs/tools/task_tools/research_tool.py +392 -0
- aiecs/tools/task_tools/scraper_tool.py +715 -0
- aiecs/tools/task_tools/stats_tool.py +688 -0
- aiecs/tools/temp_file_manager.py +130 -0
- aiecs/tools/tool_executor/__init__.py +37 -0
- aiecs/tools/tool_executor/tool_executor.py +881 -0
- aiecs/utils/LLM_output_structor.py +445 -0
- aiecs/utils/__init__.py +34 -0
- aiecs/utils/base_callback.py +47 -0
- aiecs/utils/cache_provider.py +695 -0
- aiecs/utils/execution_utils.py +184 -0
- aiecs/utils/logging.py +1 -0
- aiecs/utils/prompt_loader.py +14 -0
- aiecs/utils/token_usage_repository.py +323 -0
- aiecs/ws/__init__.py +0 -0
- aiecs/ws/socket_server.py +52 -0
- aiecs-1.5.1.dist-info/METADATA +608 -0
- aiecs-1.5.1.dist-info/RECORD +302 -0
- aiecs-1.5.1.dist-info/WHEEL +5 -0
- aiecs-1.5.1.dist-info/entry_points.txt +10 -0
- aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
- aiecs-1.5.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,643 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AI Data Analysis Orchestrator - AI-powered end-to-end data analysis workflow coordination
|
|
3
|
+
|
|
4
|
+
This orchestrator coordinates multiple foundation tools to provide:
|
|
5
|
+
- Natural language driven analysis
|
|
6
|
+
- Automated workflow orchestration
|
|
7
|
+
- Multi-tool coordination
|
|
8
|
+
- Comprehensive analysis execution
|
|
9
|
+
- Support for various analysis modes
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
from typing import Dict, Any, List, Optional
|
|
14
|
+
from enum import Enum
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
|
|
17
|
+
from pydantic import BaseModel, Field, ConfigDict
|
|
18
|
+
|
|
19
|
+
from aiecs.tools.base_tool import BaseTool
|
|
20
|
+
from aiecs.tools import register_tool
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class AnalysisMode(str, Enum):
|
|
24
|
+
"""Analysis execution modes"""
|
|
25
|
+
|
|
26
|
+
EXPLORATORY = "exploratory"
|
|
27
|
+
DIAGNOSTIC = "diagnostic"
|
|
28
|
+
PREDICTIVE = "predictive"
|
|
29
|
+
PRESCRIPTIVE = "prescriptive"
|
|
30
|
+
COMPARATIVE = "comparative"
|
|
31
|
+
CAUSAL = "causal"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class AIProvider(str, Enum):
|
|
35
|
+
"""Supported AI providers for future integration"""
|
|
36
|
+
|
|
37
|
+
OPENAI = "openai"
|
|
38
|
+
ANTHROPIC = "anthropic"
|
|
39
|
+
GOOGLE = "google"
|
|
40
|
+
LOCAL = "local"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class OrchestratorError(Exception):
|
|
44
|
+
"""Base exception for Orchestrator errors"""
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class WorkflowError(OrchestratorError):
|
|
48
|
+
"""Raised when workflow execution fails"""
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@register_tool("ai_data_analysis_orchestrator")
|
|
52
|
+
class AIDataAnalysisOrchestrator(BaseTool):
|
|
53
|
+
"""
|
|
54
|
+
AI-powered data analysis orchestrator that can:
|
|
55
|
+
1. Understand analysis requirements
|
|
56
|
+
2. Automatically design analysis workflows
|
|
57
|
+
3. Orchestrate multiple tools to complete analysis
|
|
58
|
+
4. Generate comprehensive analysis reports
|
|
59
|
+
|
|
60
|
+
Coordinates foundation tools: data_loader, data_profiler, data_transformer,
|
|
61
|
+
data_visualizer, statistical_analyzer, model_trainer
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
# Configuration schema
|
|
65
|
+
class Config(BaseModel):
|
|
66
|
+
"""Configuration for the AI data analysis orchestrator tool"""
|
|
67
|
+
|
|
68
|
+
model_config = ConfigDict(env_prefix="AI_DATA_ORCHESTRATOR_")
|
|
69
|
+
|
|
70
|
+
default_mode: str = Field(default="exploratory", description="Default analysis mode to use")
|
|
71
|
+
max_iterations: int = Field(default=10, description="Maximum number of analysis iterations")
|
|
72
|
+
enable_auto_workflow: bool = Field(
|
|
73
|
+
default=True,
|
|
74
|
+
description="Whether to enable automatic workflow generation",
|
|
75
|
+
)
|
|
76
|
+
default_ai_provider: str = Field(default="openai", description="Default AI provider to use")
|
|
77
|
+
enable_caching: bool = Field(default=True, description="Whether to enable result caching")
|
|
78
|
+
|
|
79
|
+
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
80
|
+
"""Initialize AI Data Analysis Orchestrator"""
|
|
81
|
+
super().__init__(config)
|
|
82
|
+
|
|
83
|
+
# Parse configuration
|
|
84
|
+
self.config = self.Config(**(config or {}))
|
|
85
|
+
|
|
86
|
+
self.logger = logging.getLogger(__name__)
|
|
87
|
+
if not self.logger.handlers:
|
|
88
|
+
handler = logging.StreamHandler()
|
|
89
|
+
handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
|
|
90
|
+
self.logger.addHandler(handler)
|
|
91
|
+
self.logger.setLevel(logging.INFO)
|
|
92
|
+
|
|
93
|
+
# Initialize foundation tools
|
|
94
|
+
self._init_foundation_tools()
|
|
95
|
+
|
|
96
|
+
# Initialize AI providers (placeholder for future implementation)
|
|
97
|
+
self._init_ai_providers()
|
|
98
|
+
|
|
99
|
+
# Workflow cache
|
|
100
|
+
self.workflow_cache = {}
|
|
101
|
+
|
|
102
|
+
def _init_foundation_tools(self):
|
|
103
|
+
"""Initialize foundation data analysis tools"""
|
|
104
|
+
self.foundation_tools = {}
|
|
105
|
+
|
|
106
|
+
try:
|
|
107
|
+
from aiecs.tools.statistics.data_loader_tool import DataLoaderTool
|
|
108
|
+
|
|
109
|
+
self.foundation_tools["data_loader"] = DataLoaderTool()
|
|
110
|
+
self.logger.info("DataLoaderTool initialized")
|
|
111
|
+
except ImportError:
|
|
112
|
+
self.logger.warning("DataLoaderTool not available")
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
from aiecs.tools.statistics.data_profiler_tool import (
|
|
116
|
+
DataProfilerTool,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
self.foundation_tools["data_profiler"] = DataProfilerTool()
|
|
120
|
+
self.logger.info("DataProfilerTool initialized")
|
|
121
|
+
except ImportError:
|
|
122
|
+
self.logger.warning("DataProfilerTool not available")
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
from aiecs.tools.statistics.data_transformer_tool import (
|
|
126
|
+
DataTransformerTool,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
self.foundation_tools["data_transformer"] = DataTransformerTool()
|
|
130
|
+
self.logger.info("DataTransformerTool initialized")
|
|
131
|
+
except ImportError:
|
|
132
|
+
self.logger.warning("DataTransformerTool not available")
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
from aiecs.tools.statistics.data_visualizer_tool import (
|
|
136
|
+
DataVisualizerTool,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
self.foundation_tools["data_visualizer"] = DataVisualizerTool()
|
|
140
|
+
self.logger.info("DataVisualizerTool initialized")
|
|
141
|
+
except ImportError:
|
|
142
|
+
self.logger.warning("DataVisualizerTool not available")
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
from aiecs.tools.statistics.statistical_analyzer_tool import (
|
|
146
|
+
StatisticalAnalyzerTool,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
self.foundation_tools["statistical_analyzer"] = StatisticalAnalyzerTool()
|
|
150
|
+
self.logger.info("StatisticalAnalyzerTool initialized")
|
|
151
|
+
except ImportError:
|
|
152
|
+
self.logger.warning("StatisticalAnalyzerTool not available")
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
from aiecs.tools.statistics.model_trainer_tool import (
|
|
156
|
+
ModelTrainerTool,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
self.foundation_tools["model_trainer"] = ModelTrainerTool()
|
|
160
|
+
self.logger.info("ModelTrainerTool initialized")
|
|
161
|
+
except ImportError:
|
|
162
|
+
self.logger.warning("ModelTrainerTool not available")
|
|
163
|
+
|
|
164
|
+
def _init_ai_providers(self):
|
|
165
|
+
"""Initialize AI providers (placeholder for future implementation)"""
|
|
166
|
+
self.ai_providers = {}
|
|
167
|
+
# Future integration point for AIECS client
|
|
168
|
+
# try:
|
|
169
|
+
# from aiecs import AIECS
|
|
170
|
+
# self.aiecs_client = AIECS()
|
|
171
|
+
# self.ai_providers['aiecs'] = self.aiecs_client
|
|
172
|
+
# except ImportError:
|
|
173
|
+
# self.logger.warning("AIECS client not available")
|
|
174
|
+
|
|
175
|
+
# Schema definitions
|
|
176
|
+
class AnalyzeSchema(BaseModel):
|
|
177
|
+
"""Schema for analyze operation"""
|
|
178
|
+
|
|
179
|
+
data_source: str = Field(description="Path to data source or data itself")
|
|
180
|
+
question: str = Field(description="Analysis question in natural language")
|
|
181
|
+
mode: AnalysisMode = Field(default=AnalysisMode.EXPLORATORY, description="Analysis mode")
|
|
182
|
+
max_iterations: int = Field(default=10, description="Maximum workflow iterations")
|
|
183
|
+
|
|
184
|
+
class AutoAnalyzeDatasetSchema(BaseModel):
|
|
185
|
+
"""Schema for auto_analyze_dataset operation"""
|
|
186
|
+
|
|
187
|
+
data_source: str = Field(description="Path to data source")
|
|
188
|
+
focus_areas: Optional[List[str]] = Field(default=None, description="Areas to focus on")
|
|
189
|
+
generate_report: bool = Field(default=True, description="Generate analysis report")
|
|
190
|
+
|
|
191
|
+
class OrchestrateWorkflowSchema(BaseModel):
|
|
192
|
+
"""Schema for orchestrate_workflow operation"""
|
|
193
|
+
|
|
194
|
+
workflow_steps: List[Dict[str, Any]] = Field(description="Workflow steps to execute")
|
|
195
|
+
data_source: str = Field(description="Data source")
|
|
196
|
+
|
|
197
|
+
def analyze(
|
|
198
|
+
self,
|
|
199
|
+
data_source: str,
|
|
200
|
+
question: str,
|
|
201
|
+
mode: AnalysisMode = AnalysisMode.EXPLORATORY,
|
|
202
|
+
max_iterations: int = 10,
|
|
203
|
+
) -> Dict[str, Any]:
|
|
204
|
+
"""
|
|
205
|
+
Perform AI-driven data analysis based on natural language question.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
data_source: Path to data source file
|
|
209
|
+
question: Analysis question in natural language
|
|
210
|
+
mode: Analysis mode to use
|
|
211
|
+
max_iterations: Maximum workflow iterations
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
Dict containing:
|
|
215
|
+
- analysis_plan: Planned analysis steps
|
|
216
|
+
- execution_log: Log of executed steps
|
|
217
|
+
- findings: Analysis findings and insights
|
|
218
|
+
- recommendations: Recommendations based on analysis
|
|
219
|
+
- report: Analysis report
|
|
220
|
+
"""
|
|
221
|
+
try:
|
|
222
|
+
self.logger.info(f"Starting analysis: {question}")
|
|
223
|
+
|
|
224
|
+
# Design analysis workflow based on question and mode
|
|
225
|
+
workflow = self._design_workflow(question, mode, data_source)
|
|
226
|
+
|
|
227
|
+
# Execute workflow
|
|
228
|
+
execution_results = self._execute_workflow(workflow, data_source, max_iterations)
|
|
229
|
+
|
|
230
|
+
# Generate findings from results
|
|
231
|
+
findings = self._generate_findings(execution_results)
|
|
232
|
+
|
|
233
|
+
# Generate recommendations
|
|
234
|
+
recommendations = self._generate_recommendations(findings)
|
|
235
|
+
|
|
236
|
+
# Generate report
|
|
237
|
+
report = self._generate_analysis_report(
|
|
238
|
+
question,
|
|
239
|
+
workflow,
|
|
240
|
+
execution_results,
|
|
241
|
+
findings,
|
|
242
|
+
recommendations,
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
return {
|
|
246
|
+
"analysis_plan": workflow,
|
|
247
|
+
"execution_log": execution_results.get("log", []),
|
|
248
|
+
"findings": findings,
|
|
249
|
+
"recommendations": recommendations,
|
|
250
|
+
"report": report,
|
|
251
|
+
"mode": mode.value,
|
|
252
|
+
"timestamp": datetime.now().isoformat(),
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
except Exception as e:
|
|
256
|
+
self.logger.error(f"Error in analysis: {e}")
|
|
257
|
+
raise WorkflowError(f"Analysis failed: {e}")
|
|
258
|
+
|
|
259
|
+
def auto_analyze_dataset(
|
|
260
|
+
self,
|
|
261
|
+
data_source: str,
|
|
262
|
+
focus_areas: Optional[List[str]] = None,
|
|
263
|
+
generate_report: bool = True,
|
|
264
|
+
) -> Dict[str, Any]:
|
|
265
|
+
"""
|
|
266
|
+
Automatically analyze dataset without specific question.
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
data_source: Path to data source
|
|
270
|
+
focus_areas: Specific areas to focus on
|
|
271
|
+
generate_report: Whether to generate comprehensive report
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
Dict containing comprehensive analysis results
|
|
275
|
+
"""
|
|
276
|
+
try:
|
|
277
|
+
self.logger.info(f"Auto-analyzing dataset: {data_source}")
|
|
278
|
+
|
|
279
|
+
# Load data
|
|
280
|
+
load_result = self.foundation_tools["data_loader"].load_data(source=data_source)
|
|
281
|
+
data = load_result["data"]
|
|
282
|
+
|
|
283
|
+
# Profile data
|
|
284
|
+
profile_result = self.foundation_tools["data_profiler"].profile_dataset(
|
|
285
|
+
data=data, level="comprehensive"
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
# Auto-transform if needed
|
|
289
|
+
if profile_result.get("quality_issues"):
|
|
290
|
+
transform_result = self.foundation_tools["data_transformer"].auto_transform(
|
|
291
|
+
data=data
|
|
292
|
+
)
|
|
293
|
+
data = transform_result["transformed_data"]
|
|
294
|
+
|
|
295
|
+
# Generate visualizations
|
|
296
|
+
viz_result = self.foundation_tools["data_visualizer"].auto_visualize_dataset(
|
|
297
|
+
data=data,
|
|
298
|
+
focus_areas=focus_areas or ["distributions", "correlations"],
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
# Perform statistical analysis
|
|
302
|
+
numeric_cols = data.select_dtypes(include=["number"]).columns.tolist()
|
|
303
|
+
stats_result = {}
|
|
304
|
+
if len(numeric_cols) >= 2:
|
|
305
|
+
stats_result = self.foundation_tools["statistical_analyzer"].analyze_correlation(
|
|
306
|
+
data=data, variables=numeric_cols
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
# Compile results
|
|
310
|
+
results = {
|
|
311
|
+
"data_profile": profile_result,
|
|
312
|
+
"transformations_applied": (
|
|
313
|
+
transform_result if "transform_result" in locals() else None
|
|
314
|
+
),
|
|
315
|
+
"visualizations": viz_result,
|
|
316
|
+
"statistical_analysis": stats_result,
|
|
317
|
+
"data_source": data_source,
|
|
318
|
+
"timestamp": datetime.now().isoformat(),
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
if generate_report:
|
|
322
|
+
results["report"] = self._generate_auto_analysis_report(results)
|
|
323
|
+
|
|
324
|
+
return results
|
|
325
|
+
|
|
326
|
+
except Exception as e:
|
|
327
|
+
self.logger.error(f"Error in auto analysis: {e}")
|
|
328
|
+
raise WorkflowError(f"Auto analysis failed: {e}")
|
|
329
|
+
|
|
330
|
+
def orchestrate_workflow(
|
|
331
|
+
self, workflow_steps: List[Dict[str, Any]], data_source: str
|
|
332
|
+
) -> Dict[str, Any]:
|
|
333
|
+
"""
|
|
334
|
+
Orchestrate a custom workflow with specified steps.
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
workflow_steps: List of workflow steps with tool and operation info
|
|
338
|
+
data_source: Data source path
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
Dict containing workflow execution results
|
|
342
|
+
"""
|
|
343
|
+
try:
|
|
344
|
+
results = self._execute_workflow(
|
|
345
|
+
{"steps": workflow_steps},
|
|
346
|
+
data_source,
|
|
347
|
+
max_iterations=len(workflow_steps),
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
return {
|
|
351
|
+
"workflow_results": results,
|
|
352
|
+
"total_steps": len(workflow_steps),
|
|
353
|
+
"status": "completed",
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
except Exception as e:
|
|
357
|
+
self.logger.error(f"Error orchestrating workflow: {e}")
|
|
358
|
+
raise WorkflowError(f"Workflow orchestration failed: {e}")
|
|
359
|
+
|
|
360
|
+
# Internal workflow methods
|
|
361
|
+
|
|
362
|
+
def _design_workflow(
|
|
363
|
+
self, question: str, mode: AnalysisMode, data_source: str
|
|
364
|
+
) -> Dict[str, Any]:
|
|
365
|
+
"""Design analysis workflow based on question and mode"""
|
|
366
|
+
workflow = {"question": question, "mode": mode.value, "steps": []}
|
|
367
|
+
|
|
368
|
+
# Standard workflow steps based on mode
|
|
369
|
+
if mode == AnalysisMode.EXPLORATORY:
|
|
370
|
+
workflow["steps"] = [
|
|
371
|
+
{
|
|
372
|
+
"tool": "data_loader",
|
|
373
|
+
"operation": "load_data",
|
|
374
|
+
"params": {"source": data_source},
|
|
375
|
+
},
|
|
376
|
+
{
|
|
377
|
+
"tool": "data_profiler",
|
|
378
|
+
"operation": "profile_dataset",
|
|
379
|
+
"params": {"level": "comprehensive"},
|
|
380
|
+
},
|
|
381
|
+
{
|
|
382
|
+
"tool": "data_visualizer",
|
|
383
|
+
"operation": "auto_visualize_dataset",
|
|
384
|
+
"params": {"max_charts": 5},
|
|
385
|
+
},
|
|
386
|
+
{
|
|
387
|
+
"tool": "statistical_analyzer",
|
|
388
|
+
"operation": "analyze_correlation",
|
|
389
|
+
"params": {},
|
|
390
|
+
},
|
|
391
|
+
]
|
|
392
|
+
elif mode == AnalysisMode.PREDICTIVE:
|
|
393
|
+
workflow["steps"] = [
|
|
394
|
+
{
|
|
395
|
+
"tool": "data_loader",
|
|
396
|
+
"operation": "load_data",
|
|
397
|
+
"params": {"source": data_source},
|
|
398
|
+
},
|
|
399
|
+
{
|
|
400
|
+
"tool": "data_profiler",
|
|
401
|
+
"operation": "profile_dataset",
|
|
402
|
+
"params": {},
|
|
403
|
+
},
|
|
404
|
+
{
|
|
405
|
+
"tool": "data_transformer",
|
|
406
|
+
"operation": "auto_transform",
|
|
407
|
+
"params": {},
|
|
408
|
+
},
|
|
409
|
+
{
|
|
410
|
+
"tool": "model_trainer",
|
|
411
|
+
"operation": "auto_select_model",
|
|
412
|
+
"params": {},
|
|
413
|
+
},
|
|
414
|
+
]
|
|
415
|
+
elif mode == AnalysisMode.DIAGNOSTIC:
|
|
416
|
+
workflow["steps"] = [
|
|
417
|
+
{
|
|
418
|
+
"tool": "data_loader",
|
|
419
|
+
"operation": "load_data",
|
|
420
|
+
"params": {"source": data_source},
|
|
421
|
+
},
|
|
422
|
+
{
|
|
423
|
+
"tool": "data_profiler",
|
|
424
|
+
"operation": "detect_quality_issues",
|
|
425
|
+
"params": {},
|
|
426
|
+
},
|
|
427
|
+
{
|
|
428
|
+
"tool": "statistical_analyzer",
|
|
429
|
+
"operation": "test_hypothesis",
|
|
430
|
+
"params": {},
|
|
431
|
+
},
|
|
432
|
+
]
|
|
433
|
+
else:
|
|
434
|
+
# Default exploratory workflow
|
|
435
|
+
workflow["steps"] = [
|
|
436
|
+
{
|
|
437
|
+
"tool": "data_loader",
|
|
438
|
+
"operation": "load_data",
|
|
439
|
+
"params": {"source": data_source},
|
|
440
|
+
},
|
|
441
|
+
{
|
|
442
|
+
"tool": "data_profiler",
|
|
443
|
+
"operation": "profile_dataset",
|
|
444
|
+
"params": {},
|
|
445
|
+
},
|
|
446
|
+
]
|
|
447
|
+
|
|
448
|
+
return workflow
|
|
449
|
+
|
|
450
|
+
def _execute_workflow(
|
|
451
|
+
self, workflow: Dict[str, Any], data_source: str, max_iterations: int
|
|
452
|
+
) -> Dict[str, Any]:
|
|
453
|
+
"""Execute workflow steps"""
|
|
454
|
+
results = {"log": [], "data": None, "outputs": {}}
|
|
455
|
+
|
|
456
|
+
current_data = None
|
|
457
|
+
|
|
458
|
+
for i, step in enumerate(workflow["steps"][:max_iterations]):
|
|
459
|
+
try:
|
|
460
|
+
tool_name = step["tool"]
|
|
461
|
+
operation = step["operation"]
|
|
462
|
+
params = step.get("params", {})
|
|
463
|
+
|
|
464
|
+
self.logger.info(f"Executing step {i+1}: {tool_name}.{operation}")
|
|
465
|
+
|
|
466
|
+
# Get tool
|
|
467
|
+
tool = self.foundation_tools.get(tool_name)
|
|
468
|
+
if not tool:
|
|
469
|
+
self.logger.warning(f"Tool {tool_name} not available, skipping")
|
|
470
|
+
continue
|
|
471
|
+
|
|
472
|
+
# Prepare parameters
|
|
473
|
+
if current_data is not None and "data" not in params:
|
|
474
|
+
params["data"] = current_data
|
|
475
|
+
|
|
476
|
+
# Execute operation
|
|
477
|
+
result = tool.run(operation, **params)
|
|
478
|
+
|
|
479
|
+
# Update current data if result contains data
|
|
480
|
+
if isinstance(result, dict) and "data" in result:
|
|
481
|
+
current_data = result["data"]
|
|
482
|
+
elif isinstance(result, dict) and "transformed_data" in result:
|
|
483
|
+
current_data = result["transformed_data"]
|
|
484
|
+
|
|
485
|
+
# Log execution
|
|
486
|
+
results["log"].append(
|
|
487
|
+
{
|
|
488
|
+
"step": i + 1,
|
|
489
|
+
"tool": tool_name,
|
|
490
|
+
"operation": operation,
|
|
491
|
+
"status": "success",
|
|
492
|
+
"summary": self._summarize_result(result),
|
|
493
|
+
}
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
results["outputs"][f"{tool_name}_{operation}"] = result
|
|
497
|
+
|
|
498
|
+
except Exception as e:
|
|
499
|
+
self.logger.error(f"Error in step {i+1}: {e}")
|
|
500
|
+
results["log"].append(
|
|
501
|
+
{
|
|
502
|
+
"step": i + 1,
|
|
503
|
+
"tool": step["tool"],
|
|
504
|
+
"operation": step["operation"],
|
|
505
|
+
"status": "failed",
|
|
506
|
+
"error": str(e),
|
|
507
|
+
}
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
results["data"] = current_data
|
|
511
|
+
return results
|
|
512
|
+
|
|
513
|
+
def _generate_findings(self, execution_results: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
514
|
+
"""Generate findings from execution results"""
|
|
515
|
+
findings = []
|
|
516
|
+
|
|
517
|
+
outputs = execution_results.get("outputs", {})
|
|
518
|
+
|
|
519
|
+
# Extract insights from profiling
|
|
520
|
+
if "data_profiler_profile_dataset" in outputs:
|
|
521
|
+
profile = outputs["data_profiler_profile_dataset"]
|
|
522
|
+
summary = profile.get("summary", {})
|
|
523
|
+
findings.append(
|
|
524
|
+
{
|
|
525
|
+
"type": "data_profile",
|
|
526
|
+
"title": "Dataset Overview",
|
|
527
|
+
"description": f"Dataset contains {summary.get('rows', 0)} rows and {summary.get('columns', 0)} columns",
|
|
528
|
+
"confidence": "high",
|
|
529
|
+
"evidence": summary,
|
|
530
|
+
}
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
# Extract insights from statistical analysis
|
|
534
|
+
if "statistical_analyzer_analyze_correlation" in outputs:
|
|
535
|
+
corr = outputs["statistical_analyzer_analyze_correlation"]
|
|
536
|
+
high_corr = corr.get("high_correlations", [])
|
|
537
|
+
if high_corr:
|
|
538
|
+
findings.append(
|
|
539
|
+
{
|
|
540
|
+
"type": "correlation",
|
|
541
|
+
"title": "Significant Correlations Found",
|
|
542
|
+
"description": f"Found {len(high_corr)} significant correlations",
|
|
543
|
+
"confidence": "high",
|
|
544
|
+
"evidence": high_corr,
|
|
545
|
+
}
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
return findings
|
|
549
|
+
|
|
550
|
+
def _generate_recommendations(self, findings: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
551
|
+
"""Generate recommendations based on findings"""
|
|
552
|
+
recommendations = []
|
|
553
|
+
|
|
554
|
+
for finding in findings:
|
|
555
|
+
if finding["type"] == "data_profile":
|
|
556
|
+
recommendations.append(
|
|
557
|
+
{
|
|
558
|
+
"action": "data_quality_check",
|
|
559
|
+
"reason": "Perform comprehensive data quality assessment",
|
|
560
|
+
"priority": "high",
|
|
561
|
+
}
|
|
562
|
+
)
|
|
563
|
+
elif finding["type"] == "correlation":
|
|
564
|
+
recommendations.append(
|
|
565
|
+
{
|
|
566
|
+
"action": "investigate_relationships",
|
|
567
|
+
"reason": "Investigate significant correlations for potential insights",
|
|
568
|
+
"priority": "medium",
|
|
569
|
+
}
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
return recommendations
|
|
573
|
+
|
|
574
|
+
def _generate_analysis_report(
|
|
575
|
+
self,
|
|
576
|
+
question: str,
|
|
577
|
+
workflow: Dict[str, Any],
|
|
578
|
+
execution_results: Dict[str, Any],
|
|
579
|
+
findings: List[Dict[str, Any]],
|
|
580
|
+
recommendations: List[Dict[str, Any]],
|
|
581
|
+
) -> str:
|
|
582
|
+
"""Generate comprehensive analysis report"""
|
|
583
|
+
report_lines = [
|
|
584
|
+
"# Data Analysis Report",
|
|
585
|
+
"",
|
|
586
|
+
f"**Question:** {question}",
|
|
587
|
+
f"**Analysis Mode:** {workflow.get('mode', 'N/A')}",
|
|
588
|
+
f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
|
589
|
+
"",
|
|
590
|
+
"## Analysis Workflow",
|
|
591
|
+
"",
|
|
592
|
+
]
|
|
593
|
+
|
|
594
|
+
for i, step in enumerate(workflow.get("steps", []), 1):
|
|
595
|
+
report_lines.append(f"{i}. {step['tool']}.{step['operation']}")
|
|
596
|
+
|
|
597
|
+
report_lines.extend(["", "## Key Findings", ""])
|
|
598
|
+
|
|
599
|
+
for i, finding in enumerate(findings, 1):
|
|
600
|
+
report_lines.append(f"{i}. **{finding['title']}**: {finding['description']}")
|
|
601
|
+
|
|
602
|
+
report_lines.extend(["", "## Recommendations", ""])
|
|
603
|
+
|
|
604
|
+
for i, rec in enumerate(recommendations, 1):
|
|
605
|
+
report_lines.append(f"{i}. {rec['action']}: {rec['reason']}")
|
|
606
|
+
|
|
607
|
+
return "\n".join(report_lines)
|
|
608
|
+
|
|
609
|
+
def _generate_auto_analysis_report(self, results: Dict[str, Any]) -> str:
|
|
610
|
+
"""Generate report for auto analysis"""
|
|
611
|
+
profile = results.get("data_profile", {})
|
|
612
|
+
summary = profile.get("summary", {})
|
|
613
|
+
|
|
614
|
+
report_lines = [
|
|
615
|
+
"# Automatic Data Analysis Report",
|
|
616
|
+
"",
|
|
617
|
+
f"**Data Source:** {results.get('data_source', 'N/A')}",
|
|
618
|
+
f"**Generated:** {results.get('timestamp', 'N/A')}",
|
|
619
|
+
"",
|
|
620
|
+
"## Dataset Summary",
|
|
621
|
+
"",
|
|
622
|
+
f"- Rows: {summary.get('rows', 0)}",
|
|
623
|
+
f"- Columns: {summary.get('columns', 0)}",
|
|
624
|
+
f"- Missing Data: {summary.get('missing_percentage', 0):.2f}%",
|
|
625
|
+
f"- Duplicate Rows: {summary.get('duplicate_rows', 0)}",
|
|
626
|
+
"",
|
|
627
|
+
"## Analysis Completed",
|
|
628
|
+
"",
|
|
629
|
+
"- Data profiling",
|
|
630
|
+
"- Quality assessment",
|
|
631
|
+
"- Statistical analysis",
|
|
632
|
+
"- Visualization generation",
|
|
633
|
+
]
|
|
634
|
+
|
|
635
|
+
return "\n".join(report_lines)
|
|
636
|
+
|
|
637
|
+
def _summarize_result(self, result: Any) -> str:
|
|
638
|
+
"""Create summary of result"""
|
|
639
|
+
if isinstance(result, dict):
|
|
640
|
+
if "summary" in result:
|
|
641
|
+
return f"Summary available with {len(result)} keys"
|
|
642
|
+
return f"Result with {len(result)} keys"
|
|
643
|
+
return "Result generated"
|