aiecs 1.0.8__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (81) hide show
  1. aiecs/__init__.py +1 -1
  2. aiecs/aiecs_client.py +159 -1
  3. aiecs/config/config.py +6 -0
  4. aiecs/domain/__init__.py +95 -0
  5. aiecs/domain/community/__init__.py +159 -0
  6. aiecs/domain/community/agent_adapter.py +516 -0
  7. aiecs/domain/community/analytics.py +465 -0
  8. aiecs/domain/community/collaborative_workflow.py +99 -7
  9. aiecs/domain/community/communication_hub.py +649 -0
  10. aiecs/domain/community/community_builder.py +322 -0
  11. aiecs/domain/community/community_integration.py +365 -12
  12. aiecs/domain/community/community_manager.py +481 -5
  13. aiecs/domain/community/decision_engine.py +459 -13
  14. aiecs/domain/community/exceptions.py +238 -0
  15. aiecs/domain/community/models/__init__.py +36 -0
  16. aiecs/domain/community/resource_manager.py +1 -1
  17. aiecs/domain/community/shared_context_manager.py +621 -0
  18. aiecs/domain/context/__init__.py +24 -0
  19. aiecs/domain/context/context_engine.py +37 -33
  20. aiecs/main.py +20 -2
  21. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  22. aiecs/scripts/aid/__init__.py +15 -0
  23. aiecs/scripts/aid/version_manager.py +224 -0
  24. aiecs/scripts/dependance_check/__init__.py +18 -0
  25. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +51 -8
  26. aiecs/scripts/dependance_patch/__init__.py +8 -0
  27. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +12 -0
  28. aiecs/scripts/tools_develop/README.md +340 -0
  29. aiecs/scripts/tools_develop/__init__.py +16 -0
  30. aiecs/scripts/tools_develop/check_type_annotations.py +263 -0
  31. aiecs/scripts/tools_develop/validate_tool_schemas.py +346 -0
  32. aiecs/tools/__init__.py +53 -34
  33. aiecs/tools/docs/__init__.py +106 -0
  34. aiecs/tools/docs/ai_document_orchestrator.py +556 -0
  35. aiecs/tools/docs/ai_document_writer_orchestrator.py +2222 -0
  36. aiecs/tools/docs/content_insertion_tool.py +1234 -0
  37. aiecs/tools/docs/document_creator_tool.py +1179 -0
  38. aiecs/tools/docs/document_layout_tool.py +1105 -0
  39. aiecs/tools/docs/document_parser_tool.py +924 -0
  40. aiecs/tools/docs/document_writer_tool.py +1636 -0
  41. aiecs/tools/langchain_adapter.py +102 -51
  42. aiecs/tools/schema_generator.py +265 -0
  43. aiecs/tools/statistics/__init__.py +82 -0
  44. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +581 -0
  45. aiecs/tools/statistics/ai_insight_generator_tool.py +473 -0
  46. aiecs/tools/statistics/ai_report_orchestrator_tool.py +629 -0
  47. aiecs/tools/statistics/data_loader_tool.py +518 -0
  48. aiecs/tools/statistics/data_profiler_tool.py +599 -0
  49. aiecs/tools/statistics/data_transformer_tool.py +531 -0
  50. aiecs/tools/statistics/data_visualizer_tool.py +460 -0
  51. aiecs/tools/statistics/model_trainer_tool.py +470 -0
  52. aiecs/tools/statistics/statistical_analyzer_tool.py +426 -0
  53. aiecs/tools/task_tools/chart_tool.py +2 -1
  54. aiecs/tools/task_tools/image_tool.py +43 -43
  55. aiecs/tools/task_tools/office_tool.py +48 -36
  56. aiecs/tools/task_tools/pandas_tool.py +37 -33
  57. aiecs/tools/task_tools/report_tool.py +67 -56
  58. aiecs/tools/task_tools/research_tool.py +32 -31
  59. aiecs/tools/task_tools/scraper_tool.py +53 -46
  60. aiecs/tools/task_tools/search_tool.py +1123 -0
  61. aiecs/tools/task_tools/stats_tool.py +20 -15
  62. {aiecs-1.0.8.dist-info → aiecs-1.2.0.dist-info}/METADATA +5 -1
  63. aiecs-1.2.0.dist-info/RECORD +135 -0
  64. aiecs-1.2.0.dist-info/entry_points.txt +10 -0
  65. aiecs/tools/task_tools/search_api.py +0 -7
  66. aiecs-1.0.8.dist-info/RECORD +0 -98
  67. aiecs-1.0.8.dist-info/entry_points.txt +0 -7
  68. /aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +0 -0
  69. /aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +0 -0
  70. /aiecs/scripts/{dependency_checker.py → dependance_check/dependency_checker.py} +0 -0
  71. /aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +0 -0
  72. /aiecs/scripts/{quick_dependency_check.py → dependance_check/quick_dependency_check.py} +0 -0
  73. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  74. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  75. /aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +0 -0
  76. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  77. /aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +0 -0
  78. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  79. {aiecs-1.0.8.dist-info → aiecs-1.2.0.dist-info}/WHEEL +0 -0
  80. {aiecs-1.0.8.dist-info → aiecs-1.2.0.dist-info}/licenses/LICENSE +0 -0
  81. {aiecs-1.0.8.dist-info → aiecs-1.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,581 @@
1
+ """
2
+ AI Data Analysis Orchestrator - AI-powered end-to-end data analysis workflow coordination
3
+
4
+ This orchestrator coordinates multiple foundation tools to provide:
5
+ - Natural language driven analysis
6
+ - Automated workflow orchestration
7
+ - Multi-tool coordination
8
+ - Comprehensive analysis execution
9
+ - Support for various analysis modes
10
+ """
11
+
12
+ import logging
13
+ import asyncio
14
+ from typing import Dict, Any, List, Optional, Union
15
+ from enum import Enum
16
+ from datetime import datetime
17
+
18
+ import pandas as pd
19
+ from pydantic import BaseModel, Field, ValidationError, ConfigDict
20
+
21
+ from aiecs.tools.base_tool import BaseTool
22
+ from aiecs.tools import register_tool
23
+
24
+
25
+ class AnalysisMode(str, Enum):
26
+ """Analysis execution modes"""
27
+ EXPLORATORY = "exploratory"
28
+ DIAGNOSTIC = "diagnostic"
29
+ PREDICTIVE = "predictive"
30
+ PRESCRIPTIVE = "prescriptive"
31
+ COMPARATIVE = "comparative"
32
+ CAUSAL = "causal"
33
+
34
+
35
+ class AIProvider(str, Enum):
36
+ """Supported AI providers for future integration"""
37
+ OPENAI = "openai"
38
+ ANTHROPIC = "anthropic"
39
+ GOOGLE = "google"
40
+ LOCAL = "local"
41
+
42
+
43
+
44
+
45
+ class OrchestratorError(Exception):
46
+ """Base exception for Orchestrator errors"""
47
+ pass
48
+
49
+
50
+ class WorkflowError(OrchestratorError):
51
+ """Raised when workflow execution fails"""
52
+ pass
53
+
54
+
55
+ @register_tool('ai_data_analysis_orchestrator')
56
+ class AIDataAnalysisOrchestrator(BaseTool):
57
+ """
58
+ AI-powered data analysis orchestrator that can:
59
+ 1. Understand analysis requirements
60
+ 2. Automatically design analysis workflows
61
+ 3. Orchestrate multiple tools to complete analysis
62
+ 4. Generate comprehensive analysis reports
63
+
64
+ Coordinates foundation tools: data_loader, data_profiler, data_transformer,
65
+ data_visualizer, statistical_analyzer, model_trainer
66
+ """
67
+
68
+ # Configuration schema
69
+ class Config(BaseModel):
70
+ """Configuration for the AI data analysis orchestrator tool"""
71
+ model_config = ConfigDict(env_prefix="AI_DATA_ORCHESTRATOR_")
72
+
73
+ default_mode: str = Field(
74
+ default="exploratory",
75
+ description="Default analysis mode to use"
76
+ )
77
+ max_iterations: int = Field(
78
+ default=10,
79
+ description="Maximum number of analysis iterations"
80
+ )
81
+ enable_auto_workflow: bool = Field(
82
+ default=True,
83
+ description="Whether to enable automatic workflow generation"
84
+ )
85
+ default_ai_provider: str = Field(
86
+ default="openai",
87
+ description="Default AI provider to use"
88
+ )
89
+ enable_caching: bool = Field(
90
+ default=True,
91
+ description="Whether to enable result caching"
92
+ )
93
+
94
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
95
+ """Initialize AI Data Analysis Orchestrator"""
96
+ super().__init__(config)
97
+
98
+ # Parse configuration
99
+ self.config = self.Config(**(config or {}))
100
+
101
+ self.logger = logging.getLogger(__name__)
102
+ if not self.logger.handlers:
103
+ handler = logging.StreamHandler()
104
+ handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
105
+ self.logger.addHandler(handler)
106
+ self.logger.setLevel(logging.INFO)
107
+
108
+ # Initialize foundation tools
109
+ self._init_foundation_tools()
110
+
111
+ # Initialize AI providers (placeholder for future implementation)
112
+ self._init_ai_providers()
113
+
114
+ # Workflow cache
115
+ self.workflow_cache = {}
116
+
117
+ def _init_foundation_tools(self):
118
+ """Initialize foundation data analysis tools"""
119
+ self.foundation_tools = {}
120
+
121
+ try:
122
+ from aiecs.tools.statistics.data_loader_tool import DataLoaderTool
123
+ self.foundation_tools['data_loader'] = DataLoaderTool()
124
+ self.logger.info("DataLoaderTool initialized")
125
+ except ImportError:
126
+ self.logger.warning("DataLoaderTool not available")
127
+
128
+ try:
129
+ from aiecs.tools.statistics.data_profiler_tool import DataProfilerTool
130
+ self.foundation_tools['data_profiler'] = DataProfilerTool()
131
+ self.logger.info("DataProfilerTool initialized")
132
+ except ImportError:
133
+ self.logger.warning("DataProfilerTool not available")
134
+
135
+ try:
136
+ from aiecs.tools.statistics.data_transformer_tool import DataTransformerTool
137
+ self.foundation_tools['data_transformer'] = DataTransformerTool()
138
+ self.logger.info("DataTransformerTool initialized")
139
+ except ImportError:
140
+ self.logger.warning("DataTransformerTool not available")
141
+
142
+ try:
143
+ from aiecs.tools.statistics.data_visualizer_tool import DataVisualizerTool
144
+ self.foundation_tools['data_visualizer'] = DataVisualizerTool()
145
+ self.logger.info("DataVisualizerTool initialized")
146
+ except ImportError:
147
+ self.logger.warning("DataVisualizerTool not available")
148
+
149
+ try:
150
+ from aiecs.tools.statistics.statistical_analyzer_tool import StatisticalAnalyzerTool
151
+ self.foundation_tools['statistical_analyzer'] = StatisticalAnalyzerTool()
152
+ self.logger.info("StatisticalAnalyzerTool initialized")
153
+ except ImportError:
154
+ self.logger.warning("StatisticalAnalyzerTool not available")
155
+
156
+ try:
157
+ from aiecs.tools.statistics.model_trainer_tool import ModelTrainerTool
158
+ self.foundation_tools['model_trainer'] = ModelTrainerTool()
159
+ self.logger.info("ModelTrainerTool initialized")
160
+ except ImportError:
161
+ self.logger.warning("ModelTrainerTool not available")
162
+
163
+ def _init_ai_providers(self):
164
+ """Initialize AI providers (placeholder for future implementation)"""
165
+ self.ai_providers = {}
166
+ # Future integration point for AIECS client
167
+ # try:
168
+ # from aiecs import AIECS
169
+ # self.aiecs_client = AIECS()
170
+ # self.ai_providers['aiecs'] = self.aiecs_client
171
+ # except ImportError:
172
+ # self.logger.warning("AIECS client not available")
173
+
174
+ # Schema definitions
175
+ class AnalyzeSchema(BaseModel):
176
+ """Schema for analyze operation"""
177
+ data_source: str = Field(description="Path to data source or data itself")
178
+ question: str = Field(description="Analysis question in natural language")
179
+ mode: AnalysisMode = Field(default=AnalysisMode.EXPLORATORY, description="Analysis mode")
180
+ max_iterations: int = Field(default=10, description="Maximum workflow iterations")
181
+
182
+ class AutoAnalyzeDatasetSchema(BaseModel):
183
+ """Schema for auto_analyze_dataset operation"""
184
+ data_source: str = Field(description="Path to data source")
185
+ focus_areas: Optional[List[str]] = Field(default=None, description="Areas to focus on")
186
+ generate_report: bool = Field(default=True, description="Generate analysis report")
187
+
188
+ class OrchestrateWorkflowSchema(BaseModel):
189
+ """Schema for orchestrate_workflow operation"""
190
+ workflow_steps: List[Dict[str, Any]] = Field(description="Workflow steps to execute")
191
+ data_source: str = Field(description="Data source")
192
+
193
+ def analyze(
194
+ self,
195
+ data_source: str,
196
+ question: str,
197
+ mode: AnalysisMode = AnalysisMode.EXPLORATORY,
198
+ max_iterations: int = 10
199
+ ) -> Dict[str, Any]:
200
+ """
201
+ Perform AI-driven data analysis based on natural language question.
202
+
203
+ Args:
204
+ data_source: Path to data source file
205
+ question: Analysis question in natural language
206
+ mode: Analysis mode to use
207
+ max_iterations: Maximum workflow iterations
208
+
209
+ Returns:
210
+ Dict containing:
211
+ - analysis_plan: Planned analysis steps
212
+ - execution_log: Log of executed steps
213
+ - findings: Analysis findings and insights
214
+ - recommendations: Recommendations based on analysis
215
+ - report: Analysis report
216
+ """
217
+ try:
218
+ self.logger.info(f"Starting analysis: {question}")
219
+
220
+ # Design analysis workflow based on question and mode
221
+ workflow = self._design_workflow(question, mode, data_source)
222
+
223
+ # Execute workflow
224
+ execution_results = self._execute_workflow(workflow, data_source, max_iterations)
225
+
226
+ # Generate findings from results
227
+ findings = self._generate_findings(execution_results)
228
+
229
+ # Generate recommendations
230
+ recommendations = self._generate_recommendations(findings)
231
+
232
+ # Generate report
233
+ report = self._generate_analysis_report(question, workflow, execution_results, findings, recommendations)
234
+
235
+ return {
236
+ 'analysis_plan': workflow,
237
+ 'execution_log': execution_results.get('log', []),
238
+ 'findings': findings,
239
+ 'recommendations': recommendations,
240
+ 'report': report,
241
+ 'mode': mode.value,
242
+ 'timestamp': datetime.now().isoformat()
243
+ }
244
+
245
+ except Exception as e:
246
+ self.logger.error(f"Error in analysis: {e}")
247
+ raise WorkflowError(f"Analysis failed: {e}")
248
+
249
+ def auto_analyze_dataset(
250
+ self,
251
+ data_source: str,
252
+ focus_areas: Optional[List[str]] = None,
253
+ generate_report: bool = True
254
+ ) -> Dict[str, Any]:
255
+ """
256
+ Automatically analyze dataset without specific question.
257
+
258
+ Args:
259
+ data_source: Path to data source
260
+ focus_areas: Specific areas to focus on
261
+ generate_report: Whether to generate comprehensive report
262
+
263
+ Returns:
264
+ Dict containing comprehensive analysis results
265
+ """
266
+ try:
267
+ self.logger.info(f"Auto-analyzing dataset: {data_source}")
268
+
269
+ # Load data
270
+ load_result = self.foundation_tools['data_loader'].load_data(
271
+ source=data_source
272
+ )
273
+ data = load_result['data']
274
+
275
+ # Profile data
276
+ profile_result = self.foundation_tools['data_profiler'].profile_dataset(
277
+ data=data,
278
+ level='comprehensive'
279
+ )
280
+
281
+ # Auto-transform if needed
282
+ if profile_result.get('quality_issues'):
283
+ transform_result = self.foundation_tools['data_transformer'].auto_transform(
284
+ data=data
285
+ )
286
+ data = transform_result['transformed_data']
287
+
288
+ # Generate visualizations
289
+ viz_result = self.foundation_tools['data_visualizer'].auto_visualize_dataset(
290
+ data=data,
291
+ focus_areas=focus_areas or ['distributions', 'correlations']
292
+ )
293
+
294
+ # Perform statistical analysis
295
+ numeric_cols = data.select_dtypes(include=['number']).columns.tolist()
296
+ stats_result = {}
297
+ if len(numeric_cols) >= 2:
298
+ stats_result = self.foundation_tools['statistical_analyzer'].analyze_correlation(
299
+ data=data,
300
+ variables=numeric_cols
301
+ )
302
+
303
+ # Compile results
304
+ results = {
305
+ 'data_profile': profile_result,
306
+ 'transformations_applied': transform_result if 'transform_result' in locals() else None,
307
+ 'visualizations': viz_result,
308
+ 'statistical_analysis': stats_result,
309
+ 'data_source': data_source,
310
+ 'timestamp': datetime.now().isoformat()
311
+ }
312
+
313
+ if generate_report:
314
+ results['report'] = self._generate_auto_analysis_report(results)
315
+
316
+ return results
317
+
318
+ except Exception as e:
319
+ self.logger.error(f"Error in auto analysis: {e}")
320
+ raise WorkflowError(f"Auto analysis failed: {e}")
321
+
322
+ def orchestrate_workflow(
323
+ self,
324
+ workflow_steps: List[Dict[str, Any]],
325
+ data_source: str
326
+ ) -> Dict[str, Any]:
327
+ """
328
+ Orchestrate a custom workflow with specified steps.
329
+
330
+ Args:
331
+ workflow_steps: List of workflow steps with tool and operation info
332
+ data_source: Data source path
333
+
334
+ Returns:
335
+ Dict containing workflow execution results
336
+ """
337
+ try:
338
+ results = self._execute_workflow(
339
+ {'steps': workflow_steps},
340
+ data_source,
341
+ max_iterations=len(workflow_steps)
342
+ )
343
+
344
+ return {
345
+ 'workflow_results': results,
346
+ 'total_steps': len(workflow_steps),
347
+ 'status': 'completed'
348
+ }
349
+
350
+ except Exception as e:
351
+ self.logger.error(f"Error orchestrating workflow: {e}")
352
+ raise WorkflowError(f"Workflow orchestration failed: {e}")
353
+
354
+ # Internal workflow methods
355
+
356
+ def _design_workflow(self, question: str, mode: AnalysisMode, data_source: str) -> Dict[str, Any]:
357
+ """Design analysis workflow based on question and mode"""
358
+ workflow = {
359
+ 'question': question,
360
+ 'mode': mode.value,
361
+ 'steps': []
362
+ }
363
+
364
+ # Standard workflow steps based on mode
365
+ if mode == AnalysisMode.EXPLORATORY:
366
+ workflow['steps'] = [
367
+ {'tool': 'data_loader', 'operation': 'load_data', 'params': {'source': data_source}},
368
+ {'tool': 'data_profiler', 'operation': 'profile_dataset', 'params': {'level': 'comprehensive'}},
369
+ {'tool': 'data_visualizer', 'operation': 'auto_visualize_dataset', 'params': {'max_charts': 5}},
370
+ {'tool': 'statistical_analyzer', 'operation': 'analyze_correlation', 'params': {}}
371
+ ]
372
+ elif mode == AnalysisMode.PREDICTIVE:
373
+ workflow['steps'] = [
374
+ {'tool': 'data_loader', 'operation': 'load_data', 'params': {'source': data_source}},
375
+ {'tool': 'data_profiler', 'operation': 'profile_dataset', 'params': {}},
376
+ {'tool': 'data_transformer', 'operation': 'auto_transform', 'params': {}},
377
+ {'tool': 'model_trainer', 'operation': 'auto_select_model', 'params': {}}
378
+ ]
379
+ elif mode == AnalysisMode.DIAGNOSTIC:
380
+ workflow['steps'] = [
381
+ {'tool': 'data_loader', 'operation': 'load_data', 'params': {'source': data_source}},
382
+ {'tool': 'data_profiler', 'operation': 'detect_quality_issues', 'params': {}},
383
+ {'tool': 'statistical_analyzer', 'operation': 'test_hypothesis', 'params': {}}
384
+ ]
385
+ else:
386
+ # Default exploratory workflow
387
+ workflow['steps'] = [
388
+ {'tool': 'data_loader', 'operation': 'load_data', 'params': {'source': data_source}},
389
+ {'tool': 'data_profiler', 'operation': 'profile_dataset', 'params': {}}
390
+ ]
391
+
392
+ return workflow
393
+
394
+ def _execute_workflow(self, workflow: Dict[str, Any], data_source: str, max_iterations: int) -> Dict[str, Any]:
395
+ """Execute workflow steps"""
396
+ results = {
397
+ 'log': [],
398
+ 'data': None,
399
+ 'outputs': {}
400
+ }
401
+
402
+ current_data = None
403
+
404
+ for i, step in enumerate(workflow['steps'][:max_iterations]):
405
+ try:
406
+ tool_name = step['tool']
407
+ operation = step['operation']
408
+ params = step.get('params', {})
409
+
410
+ self.logger.info(f"Executing step {i+1}: {tool_name}.{operation}")
411
+
412
+ # Get tool
413
+ tool = self.foundation_tools.get(tool_name)
414
+ if not tool:
415
+ self.logger.warning(f"Tool {tool_name} not available, skipping")
416
+ continue
417
+
418
+ # Prepare parameters
419
+ if current_data is not None and 'data' not in params:
420
+ params['data'] = current_data
421
+
422
+ # Execute operation
423
+ result = tool.run(operation, **params)
424
+
425
+ # Update current data if result contains data
426
+ if isinstance(result, dict) and 'data' in result:
427
+ current_data = result['data']
428
+ elif isinstance(result, dict) and 'transformed_data' in result:
429
+ current_data = result['transformed_data']
430
+
431
+ # Log execution
432
+ results['log'].append({
433
+ 'step': i + 1,
434
+ 'tool': tool_name,
435
+ 'operation': operation,
436
+ 'status': 'success',
437
+ 'summary': self._summarize_result(result)
438
+ })
439
+
440
+ results['outputs'][f"{tool_name}_{operation}"] = result
441
+
442
+ except Exception as e:
443
+ self.logger.error(f"Error in step {i+1}: {e}")
444
+ results['log'].append({
445
+ 'step': i + 1,
446
+ 'tool': step['tool'],
447
+ 'operation': step['operation'],
448
+ 'status': 'failed',
449
+ 'error': str(e)
450
+ })
451
+
452
+ results['data'] = current_data
453
+ return results
454
+
455
+ def _generate_findings(self, execution_results: Dict[str, Any]) -> List[Dict[str, Any]]:
456
+ """Generate findings from execution results"""
457
+ findings = []
458
+
459
+ outputs = execution_results.get('outputs', {})
460
+
461
+ # Extract insights from profiling
462
+ if 'data_profiler_profile_dataset' in outputs:
463
+ profile = outputs['data_profiler_profile_dataset']
464
+ summary = profile.get('summary', {})
465
+ findings.append({
466
+ 'type': 'data_profile',
467
+ 'title': 'Dataset Overview',
468
+ 'description': f"Dataset contains {summary.get('rows', 0)} rows and {summary.get('columns', 0)} columns",
469
+ 'confidence': 'high',
470
+ 'evidence': summary
471
+ })
472
+
473
+ # Extract insights from statistical analysis
474
+ if 'statistical_analyzer_analyze_correlation' in outputs:
475
+ corr = outputs['statistical_analyzer_analyze_correlation']
476
+ high_corr = corr.get('high_correlations', [])
477
+ if high_corr:
478
+ findings.append({
479
+ 'type': 'correlation',
480
+ 'title': 'Significant Correlations Found',
481
+ 'description': f"Found {len(high_corr)} significant correlations",
482
+ 'confidence': 'high',
483
+ 'evidence': high_corr
484
+ })
485
+
486
+ return findings
487
+
488
+ def _generate_recommendations(self, findings: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
489
+ """Generate recommendations based on findings"""
490
+ recommendations = []
491
+
492
+ for finding in findings:
493
+ if finding['type'] == 'data_profile':
494
+ recommendations.append({
495
+ 'action': 'data_quality_check',
496
+ 'reason': 'Perform comprehensive data quality assessment',
497
+ 'priority': 'high'
498
+ })
499
+ elif finding['type'] == 'correlation':
500
+ recommendations.append({
501
+ 'action': 'investigate_relationships',
502
+ 'reason': 'Investigate significant correlations for potential insights',
503
+ 'priority': 'medium'
504
+ })
505
+
506
+ return recommendations
507
+
508
+ def _generate_analysis_report(self, question: str, workflow: Dict[str, Any],
509
+ execution_results: Dict[str, Any], findings: List[Dict[str, Any]],
510
+ recommendations: List[Dict[str, Any]]) -> str:
511
+ """Generate comprehensive analysis report"""
512
+ report_lines = [
513
+ "# Data Analysis Report",
514
+ "",
515
+ f"**Question:** {question}",
516
+ f"**Analysis Mode:** {workflow.get('mode', 'N/A')}",
517
+ f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
518
+ "",
519
+ "## Analysis Workflow",
520
+ ""
521
+ ]
522
+
523
+ for i, step in enumerate(workflow.get('steps', []), 1):
524
+ report_lines.append(f"{i}. {step['tool']}.{step['operation']}")
525
+
526
+ report_lines.extend([
527
+ "",
528
+ "## Key Findings",
529
+ ""
530
+ ])
531
+
532
+ for i, finding in enumerate(findings, 1):
533
+ report_lines.append(f"{i}. **{finding['title']}**: {finding['description']}")
534
+
535
+ report_lines.extend([
536
+ "",
537
+ "## Recommendations",
538
+ ""
539
+ ])
540
+
541
+ for i, rec in enumerate(recommendations, 1):
542
+ report_lines.append(f"{i}. {rec['action']}: {rec['reason']}")
543
+
544
+ return "\n".join(report_lines)
545
+
546
+ def _generate_auto_analysis_report(self, results: Dict[str, Any]) -> str:
547
+ """Generate report for auto analysis"""
548
+ profile = results.get('data_profile', {})
549
+ summary = profile.get('summary', {})
550
+
551
+ report_lines = [
552
+ "# Automatic Data Analysis Report",
553
+ "",
554
+ f"**Data Source:** {results.get('data_source', 'N/A')}",
555
+ f"**Generated:** {results.get('timestamp', 'N/A')}",
556
+ "",
557
+ "## Dataset Summary",
558
+ "",
559
+ f"- Rows: {summary.get('rows', 0)}",
560
+ f"- Columns: {summary.get('columns', 0)}",
561
+ f"- Missing Data: {summary.get('missing_percentage', 0):.2f}%",
562
+ f"- Duplicate Rows: {summary.get('duplicate_rows', 0)}",
563
+ "",
564
+ "## Analysis Completed",
565
+ "",
566
+ "- Data profiling",
567
+ "- Quality assessment",
568
+ "- Statistical analysis",
569
+ "- Visualization generation"
570
+ ]
571
+
572
+ return "\n".join(report_lines)
573
+
574
+ def _summarize_result(self, result: Any) -> str:
575
+ """Create summary of result"""
576
+ if isinstance(result, dict):
577
+ if 'summary' in result:
578
+ return f"Summary available with {len(result)} keys"
579
+ return f"Result with {len(result)} keys"
580
+ return "Result generated"
581
+