aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,643 @@
1
+ """
2
+ AI Data Analysis Orchestrator - AI-powered end-to-end data analysis workflow coordination
3
+
4
+ This orchestrator coordinates multiple foundation tools to provide:
5
+ - Natural language driven analysis
6
+ - Automated workflow orchestration
7
+ - Multi-tool coordination
8
+ - Comprehensive analysis execution
9
+ - Support for various analysis modes
10
+ """
11
+
12
+ import logging
13
+ from typing import Dict, Any, List, Optional
14
+ from enum import Enum
15
+ from datetime import datetime
16
+
17
+ from pydantic import BaseModel, Field, ConfigDict
18
+
19
+ from aiecs.tools.base_tool import BaseTool
20
+ from aiecs.tools import register_tool
21
+
22
+
23
+ class AnalysisMode(str, Enum):
24
+ """Analysis execution modes"""
25
+
26
+ EXPLORATORY = "exploratory"
27
+ DIAGNOSTIC = "diagnostic"
28
+ PREDICTIVE = "predictive"
29
+ PRESCRIPTIVE = "prescriptive"
30
+ COMPARATIVE = "comparative"
31
+ CAUSAL = "causal"
32
+
33
+
34
+ class AIProvider(str, Enum):
35
+ """Supported AI providers for future integration"""
36
+
37
+ OPENAI = "openai"
38
+ ANTHROPIC = "anthropic"
39
+ GOOGLE = "google"
40
+ LOCAL = "local"
41
+
42
+
43
+ class OrchestratorError(Exception):
44
+ """Base exception for Orchestrator errors"""
45
+
46
+
47
+ class WorkflowError(OrchestratorError):
48
+ """Raised when workflow execution fails"""
49
+
50
+
51
+ @register_tool("ai_data_analysis_orchestrator")
52
+ class AIDataAnalysisOrchestrator(BaseTool):
53
+ """
54
+ AI-powered data analysis orchestrator that can:
55
+ 1. Understand analysis requirements
56
+ 2. Automatically design analysis workflows
57
+ 3. Orchestrate multiple tools to complete analysis
58
+ 4. Generate comprehensive analysis reports
59
+
60
+ Coordinates foundation tools: data_loader, data_profiler, data_transformer,
61
+ data_visualizer, statistical_analyzer, model_trainer
62
+ """
63
+
64
+ # Configuration schema
65
+ class Config(BaseModel):
66
+ """Configuration for the AI data analysis orchestrator tool"""
67
+
68
+ model_config = ConfigDict(env_prefix="AI_DATA_ORCHESTRATOR_")
69
+
70
+ default_mode: str = Field(default="exploratory", description="Default analysis mode to use")
71
+ max_iterations: int = Field(default=10, description="Maximum number of analysis iterations")
72
+ enable_auto_workflow: bool = Field(
73
+ default=True,
74
+ description="Whether to enable automatic workflow generation",
75
+ )
76
+ default_ai_provider: str = Field(default="openai", description="Default AI provider to use")
77
+ enable_caching: bool = Field(default=True, description="Whether to enable result caching")
78
+
79
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
80
+ """Initialize AI Data Analysis Orchestrator"""
81
+ super().__init__(config)
82
+
83
+ # Parse configuration
84
+ self.config = self.Config(**(config or {}))
85
+
86
+ self.logger = logging.getLogger(__name__)
87
+ if not self.logger.handlers:
88
+ handler = logging.StreamHandler()
89
+ handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
90
+ self.logger.addHandler(handler)
91
+ self.logger.setLevel(logging.INFO)
92
+
93
+ # Initialize foundation tools
94
+ self._init_foundation_tools()
95
+
96
+ # Initialize AI providers (placeholder for future implementation)
97
+ self._init_ai_providers()
98
+
99
+ # Workflow cache
100
+ self.workflow_cache = {}
101
+
102
+ def _init_foundation_tools(self):
103
+ """Initialize foundation data analysis tools"""
104
+ self.foundation_tools = {}
105
+
106
+ try:
107
+ from aiecs.tools.statistics.data_loader_tool import DataLoaderTool
108
+
109
+ self.foundation_tools["data_loader"] = DataLoaderTool()
110
+ self.logger.info("DataLoaderTool initialized")
111
+ except ImportError:
112
+ self.logger.warning("DataLoaderTool not available")
113
+
114
+ try:
115
+ from aiecs.tools.statistics.data_profiler_tool import (
116
+ DataProfilerTool,
117
+ )
118
+
119
+ self.foundation_tools["data_profiler"] = DataProfilerTool()
120
+ self.logger.info("DataProfilerTool initialized")
121
+ except ImportError:
122
+ self.logger.warning("DataProfilerTool not available")
123
+
124
+ try:
125
+ from aiecs.tools.statistics.data_transformer_tool import (
126
+ DataTransformerTool,
127
+ )
128
+
129
+ self.foundation_tools["data_transformer"] = DataTransformerTool()
130
+ self.logger.info("DataTransformerTool initialized")
131
+ except ImportError:
132
+ self.logger.warning("DataTransformerTool not available")
133
+
134
+ try:
135
+ from aiecs.tools.statistics.data_visualizer_tool import (
136
+ DataVisualizerTool,
137
+ )
138
+
139
+ self.foundation_tools["data_visualizer"] = DataVisualizerTool()
140
+ self.logger.info("DataVisualizerTool initialized")
141
+ except ImportError:
142
+ self.logger.warning("DataVisualizerTool not available")
143
+
144
+ try:
145
+ from aiecs.tools.statistics.statistical_analyzer_tool import (
146
+ StatisticalAnalyzerTool,
147
+ )
148
+
149
+ self.foundation_tools["statistical_analyzer"] = StatisticalAnalyzerTool()
150
+ self.logger.info("StatisticalAnalyzerTool initialized")
151
+ except ImportError:
152
+ self.logger.warning("StatisticalAnalyzerTool not available")
153
+
154
+ try:
155
+ from aiecs.tools.statistics.model_trainer_tool import (
156
+ ModelTrainerTool,
157
+ )
158
+
159
+ self.foundation_tools["model_trainer"] = ModelTrainerTool()
160
+ self.logger.info("ModelTrainerTool initialized")
161
+ except ImportError:
162
+ self.logger.warning("ModelTrainerTool not available")
163
+
164
+ def _init_ai_providers(self):
165
+ """Initialize AI providers (placeholder for future implementation)"""
166
+ self.ai_providers = {}
167
+ # Future integration point for AIECS client
168
+ # try:
169
+ # from aiecs import AIECS
170
+ # self.aiecs_client = AIECS()
171
+ # self.ai_providers['aiecs'] = self.aiecs_client
172
+ # except ImportError:
173
+ # self.logger.warning("AIECS client not available")
174
+
175
+ # Schema definitions
176
+ class AnalyzeSchema(BaseModel):
177
+ """Schema for analyze operation"""
178
+
179
+ data_source: str = Field(description="Path to data source or data itself")
180
+ question: str = Field(description="Analysis question in natural language")
181
+ mode: AnalysisMode = Field(default=AnalysisMode.EXPLORATORY, description="Analysis mode")
182
+ max_iterations: int = Field(default=10, description="Maximum workflow iterations")
183
+
184
+ class AutoAnalyzeDatasetSchema(BaseModel):
185
+ """Schema for auto_analyze_dataset operation"""
186
+
187
+ data_source: str = Field(description="Path to data source")
188
+ focus_areas: Optional[List[str]] = Field(default=None, description="Areas to focus on")
189
+ generate_report: bool = Field(default=True, description="Generate analysis report")
190
+
191
+ class OrchestrateWorkflowSchema(BaseModel):
192
+ """Schema for orchestrate_workflow operation"""
193
+
194
+ workflow_steps: List[Dict[str, Any]] = Field(description="Workflow steps to execute")
195
+ data_source: str = Field(description="Data source")
196
+
197
+ def analyze(
198
+ self,
199
+ data_source: str,
200
+ question: str,
201
+ mode: AnalysisMode = AnalysisMode.EXPLORATORY,
202
+ max_iterations: int = 10,
203
+ ) -> Dict[str, Any]:
204
+ """
205
+ Perform AI-driven data analysis based on natural language question.
206
+
207
+ Args:
208
+ data_source: Path to data source file
209
+ question: Analysis question in natural language
210
+ mode: Analysis mode to use
211
+ max_iterations: Maximum workflow iterations
212
+
213
+ Returns:
214
+ Dict containing:
215
+ - analysis_plan: Planned analysis steps
216
+ - execution_log: Log of executed steps
217
+ - findings: Analysis findings and insights
218
+ - recommendations: Recommendations based on analysis
219
+ - report: Analysis report
220
+ """
221
+ try:
222
+ self.logger.info(f"Starting analysis: {question}")
223
+
224
+ # Design analysis workflow based on question and mode
225
+ workflow = self._design_workflow(question, mode, data_source)
226
+
227
+ # Execute workflow
228
+ execution_results = self._execute_workflow(workflow, data_source, max_iterations)
229
+
230
+ # Generate findings from results
231
+ findings = self._generate_findings(execution_results)
232
+
233
+ # Generate recommendations
234
+ recommendations = self._generate_recommendations(findings)
235
+
236
+ # Generate report
237
+ report = self._generate_analysis_report(
238
+ question,
239
+ workflow,
240
+ execution_results,
241
+ findings,
242
+ recommendations,
243
+ )
244
+
245
+ return {
246
+ "analysis_plan": workflow,
247
+ "execution_log": execution_results.get("log", []),
248
+ "findings": findings,
249
+ "recommendations": recommendations,
250
+ "report": report,
251
+ "mode": mode.value,
252
+ "timestamp": datetime.now().isoformat(),
253
+ }
254
+
255
+ except Exception as e:
256
+ self.logger.error(f"Error in analysis: {e}")
257
+ raise WorkflowError(f"Analysis failed: {e}")
258
+
259
+ def auto_analyze_dataset(
260
+ self,
261
+ data_source: str,
262
+ focus_areas: Optional[List[str]] = None,
263
+ generate_report: bool = True,
264
+ ) -> Dict[str, Any]:
265
+ """
266
+ Automatically analyze dataset without specific question.
267
+
268
+ Args:
269
+ data_source: Path to data source
270
+ focus_areas: Specific areas to focus on
271
+ generate_report: Whether to generate comprehensive report
272
+
273
+ Returns:
274
+ Dict containing comprehensive analysis results
275
+ """
276
+ try:
277
+ self.logger.info(f"Auto-analyzing dataset: {data_source}")
278
+
279
+ # Load data
280
+ load_result = self.foundation_tools["data_loader"].load_data(source=data_source)
281
+ data = load_result["data"]
282
+
283
+ # Profile data
284
+ profile_result = self.foundation_tools["data_profiler"].profile_dataset(
285
+ data=data, level="comprehensive"
286
+ )
287
+
288
+ # Auto-transform if needed
289
+ if profile_result.get("quality_issues"):
290
+ transform_result = self.foundation_tools["data_transformer"].auto_transform(
291
+ data=data
292
+ )
293
+ data = transform_result["transformed_data"]
294
+
295
+ # Generate visualizations
296
+ viz_result = self.foundation_tools["data_visualizer"].auto_visualize_dataset(
297
+ data=data,
298
+ focus_areas=focus_areas or ["distributions", "correlations"],
299
+ )
300
+
301
+ # Perform statistical analysis
302
+ numeric_cols = data.select_dtypes(include=["number"]).columns.tolist()
303
+ stats_result = {}
304
+ if len(numeric_cols) >= 2:
305
+ stats_result = self.foundation_tools["statistical_analyzer"].analyze_correlation(
306
+ data=data, variables=numeric_cols
307
+ )
308
+
309
+ # Compile results
310
+ results = {
311
+ "data_profile": profile_result,
312
+ "transformations_applied": (
313
+ transform_result if "transform_result" in locals() else None
314
+ ),
315
+ "visualizations": viz_result,
316
+ "statistical_analysis": stats_result,
317
+ "data_source": data_source,
318
+ "timestamp": datetime.now().isoformat(),
319
+ }
320
+
321
+ if generate_report:
322
+ results["report"] = self._generate_auto_analysis_report(results)
323
+
324
+ return results
325
+
326
+ except Exception as e:
327
+ self.logger.error(f"Error in auto analysis: {e}")
328
+ raise WorkflowError(f"Auto analysis failed: {e}")
329
+
330
+ def orchestrate_workflow(
331
+ self, workflow_steps: List[Dict[str, Any]], data_source: str
332
+ ) -> Dict[str, Any]:
333
+ """
334
+ Orchestrate a custom workflow with specified steps.
335
+
336
+ Args:
337
+ workflow_steps: List of workflow steps with tool and operation info
338
+ data_source: Data source path
339
+
340
+ Returns:
341
+ Dict containing workflow execution results
342
+ """
343
+ try:
344
+ results = self._execute_workflow(
345
+ {"steps": workflow_steps},
346
+ data_source,
347
+ max_iterations=len(workflow_steps),
348
+ )
349
+
350
+ return {
351
+ "workflow_results": results,
352
+ "total_steps": len(workflow_steps),
353
+ "status": "completed",
354
+ }
355
+
356
+ except Exception as e:
357
+ self.logger.error(f"Error orchestrating workflow: {e}")
358
+ raise WorkflowError(f"Workflow orchestration failed: {e}")
359
+
360
+ # Internal workflow methods
361
+
362
+ def _design_workflow(
363
+ self, question: str, mode: AnalysisMode, data_source: str
364
+ ) -> Dict[str, Any]:
365
+ """Design analysis workflow based on question and mode"""
366
+ workflow = {"question": question, "mode": mode.value, "steps": []}
367
+
368
+ # Standard workflow steps based on mode
369
+ if mode == AnalysisMode.EXPLORATORY:
370
+ workflow["steps"] = [
371
+ {
372
+ "tool": "data_loader",
373
+ "operation": "load_data",
374
+ "params": {"source": data_source},
375
+ },
376
+ {
377
+ "tool": "data_profiler",
378
+ "operation": "profile_dataset",
379
+ "params": {"level": "comprehensive"},
380
+ },
381
+ {
382
+ "tool": "data_visualizer",
383
+ "operation": "auto_visualize_dataset",
384
+ "params": {"max_charts": 5},
385
+ },
386
+ {
387
+ "tool": "statistical_analyzer",
388
+ "operation": "analyze_correlation",
389
+ "params": {},
390
+ },
391
+ ]
392
+ elif mode == AnalysisMode.PREDICTIVE:
393
+ workflow["steps"] = [
394
+ {
395
+ "tool": "data_loader",
396
+ "operation": "load_data",
397
+ "params": {"source": data_source},
398
+ },
399
+ {
400
+ "tool": "data_profiler",
401
+ "operation": "profile_dataset",
402
+ "params": {},
403
+ },
404
+ {
405
+ "tool": "data_transformer",
406
+ "operation": "auto_transform",
407
+ "params": {},
408
+ },
409
+ {
410
+ "tool": "model_trainer",
411
+ "operation": "auto_select_model",
412
+ "params": {},
413
+ },
414
+ ]
415
+ elif mode == AnalysisMode.DIAGNOSTIC:
416
+ workflow["steps"] = [
417
+ {
418
+ "tool": "data_loader",
419
+ "operation": "load_data",
420
+ "params": {"source": data_source},
421
+ },
422
+ {
423
+ "tool": "data_profiler",
424
+ "operation": "detect_quality_issues",
425
+ "params": {},
426
+ },
427
+ {
428
+ "tool": "statistical_analyzer",
429
+ "operation": "test_hypothesis",
430
+ "params": {},
431
+ },
432
+ ]
433
+ else:
434
+ # Default exploratory workflow
435
+ workflow["steps"] = [
436
+ {
437
+ "tool": "data_loader",
438
+ "operation": "load_data",
439
+ "params": {"source": data_source},
440
+ },
441
+ {
442
+ "tool": "data_profiler",
443
+ "operation": "profile_dataset",
444
+ "params": {},
445
+ },
446
+ ]
447
+
448
+ return workflow
449
+
450
+ def _execute_workflow(
451
+ self, workflow: Dict[str, Any], data_source: str, max_iterations: int
452
+ ) -> Dict[str, Any]:
453
+ """Execute workflow steps"""
454
+ results = {"log": [], "data": None, "outputs": {}}
455
+
456
+ current_data = None
457
+
458
+ for i, step in enumerate(workflow["steps"][:max_iterations]):
459
+ try:
460
+ tool_name = step["tool"]
461
+ operation = step["operation"]
462
+ params = step.get("params", {})
463
+
464
+ self.logger.info(f"Executing step {i+1}: {tool_name}.{operation}")
465
+
466
+ # Get tool
467
+ tool = self.foundation_tools.get(tool_name)
468
+ if not tool:
469
+ self.logger.warning(f"Tool {tool_name} not available, skipping")
470
+ continue
471
+
472
+ # Prepare parameters
473
+ if current_data is not None and "data" not in params:
474
+ params["data"] = current_data
475
+
476
+ # Execute operation
477
+ result = tool.run(operation, **params)
478
+
479
+ # Update current data if result contains data
480
+ if isinstance(result, dict) and "data" in result:
481
+ current_data = result["data"]
482
+ elif isinstance(result, dict) and "transformed_data" in result:
483
+ current_data = result["transformed_data"]
484
+
485
+ # Log execution
486
+ results["log"].append(
487
+ {
488
+ "step": i + 1,
489
+ "tool": tool_name,
490
+ "operation": operation,
491
+ "status": "success",
492
+ "summary": self._summarize_result(result),
493
+ }
494
+ )
495
+
496
+ results["outputs"][f"{tool_name}_{operation}"] = result
497
+
498
+ except Exception as e:
499
+ self.logger.error(f"Error in step {i+1}: {e}")
500
+ results["log"].append(
501
+ {
502
+ "step": i + 1,
503
+ "tool": step["tool"],
504
+ "operation": step["operation"],
505
+ "status": "failed",
506
+ "error": str(e),
507
+ }
508
+ )
509
+
510
+ results["data"] = current_data
511
+ return results
512
+
513
+ def _generate_findings(self, execution_results: Dict[str, Any]) -> List[Dict[str, Any]]:
514
+ """Generate findings from execution results"""
515
+ findings = []
516
+
517
+ outputs = execution_results.get("outputs", {})
518
+
519
+ # Extract insights from profiling
520
+ if "data_profiler_profile_dataset" in outputs:
521
+ profile = outputs["data_profiler_profile_dataset"]
522
+ summary = profile.get("summary", {})
523
+ findings.append(
524
+ {
525
+ "type": "data_profile",
526
+ "title": "Dataset Overview",
527
+ "description": f"Dataset contains {summary.get('rows', 0)} rows and {summary.get('columns', 0)} columns",
528
+ "confidence": "high",
529
+ "evidence": summary,
530
+ }
531
+ )
532
+
533
+ # Extract insights from statistical analysis
534
+ if "statistical_analyzer_analyze_correlation" in outputs:
535
+ corr = outputs["statistical_analyzer_analyze_correlation"]
536
+ high_corr = corr.get("high_correlations", [])
537
+ if high_corr:
538
+ findings.append(
539
+ {
540
+ "type": "correlation",
541
+ "title": "Significant Correlations Found",
542
+ "description": f"Found {len(high_corr)} significant correlations",
543
+ "confidence": "high",
544
+ "evidence": high_corr,
545
+ }
546
+ )
547
+
548
+ return findings
549
+
550
+ def _generate_recommendations(self, findings: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
551
+ """Generate recommendations based on findings"""
552
+ recommendations = []
553
+
554
+ for finding in findings:
555
+ if finding["type"] == "data_profile":
556
+ recommendations.append(
557
+ {
558
+ "action": "data_quality_check",
559
+ "reason": "Perform comprehensive data quality assessment",
560
+ "priority": "high",
561
+ }
562
+ )
563
+ elif finding["type"] == "correlation":
564
+ recommendations.append(
565
+ {
566
+ "action": "investigate_relationships",
567
+ "reason": "Investigate significant correlations for potential insights",
568
+ "priority": "medium",
569
+ }
570
+ )
571
+
572
+ return recommendations
573
+
574
+ def _generate_analysis_report(
575
+ self,
576
+ question: str,
577
+ workflow: Dict[str, Any],
578
+ execution_results: Dict[str, Any],
579
+ findings: List[Dict[str, Any]],
580
+ recommendations: List[Dict[str, Any]],
581
+ ) -> str:
582
+ """Generate comprehensive analysis report"""
583
+ report_lines = [
584
+ "# Data Analysis Report",
585
+ "",
586
+ f"**Question:** {question}",
587
+ f"**Analysis Mode:** {workflow.get('mode', 'N/A')}",
588
+ f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
589
+ "",
590
+ "## Analysis Workflow",
591
+ "",
592
+ ]
593
+
594
+ for i, step in enumerate(workflow.get("steps", []), 1):
595
+ report_lines.append(f"{i}. {step['tool']}.{step['operation']}")
596
+
597
+ report_lines.extend(["", "## Key Findings", ""])
598
+
599
+ for i, finding in enumerate(findings, 1):
600
+ report_lines.append(f"{i}. **{finding['title']}**: {finding['description']}")
601
+
602
+ report_lines.extend(["", "## Recommendations", ""])
603
+
604
+ for i, rec in enumerate(recommendations, 1):
605
+ report_lines.append(f"{i}. {rec['action']}: {rec['reason']}")
606
+
607
+ return "\n".join(report_lines)
608
+
609
+ def _generate_auto_analysis_report(self, results: Dict[str, Any]) -> str:
610
+ """Generate report for auto analysis"""
611
+ profile = results.get("data_profile", {})
612
+ summary = profile.get("summary", {})
613
+
614
+ report_lines = [
615
+ "# Automatic Data Analysis Report",
616
+ "",
617
+ f"**Data Source:** {results.get('data_source', 'N/A')}",
618
+ f"**Generated:** {results.get('timestamp', 'N/A')}",
619
+ "",
620
+ "## Dataset Summary",
621
+ "",
622
+ f"- Rows: {summary.get('rows', 0)}",
623
+ f"- Columns: {summary.get('columns', 0)}",
624
+ f"- Missing Data: {summary.get('missing_percentage', 0):.2f}%",
625
+ f"- Duplicate Rows: {summary.get('duplicate_rows', 0)}",
626
+ "",
627
+ "## Analysis Completed",
628
+ "",
629
+ "- Data profiling",
630
+ "- Quality assessment",
631
+ "- Statistical analysis",
632
+ "- Visualization generation",
633
+ ]
634
+
635
+ return "\n".join(report_lines)
636
+
637
+ def _summarize_result(self, result: Any) -> str:
638
+ """Create summary of result"""
639
+ if isinstance(result, dict):
640
+ if "summary" in result:
641
+ return f"Summary available with {len(result)} keys"
642
+ return f"Result with {len(result)} keys"
643
+ return "Result generated"