aiecs 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiecs/__init__.py +72 -0
- aiecs/__main__.py +41 -0
- aiecs/aiecs_client.py +469 -0
- aiecs/application/__init__.py +10 -0
- aiecs/application/executors/__init__.py +10 -0
- aiecs/application/executors/operation_executor.py +363 -0
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +100 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
- aiecs/application/knowledge_graph/search/reranker.py +295 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +484 -0
- aiecs/config/__init__.py +16 -0
- aiecs/config/config.py +498 -0
- aiecs/config/graph_config.py +137 -0
- aiecs/config/registry.py +23 -0
- aiecs/core/__init__.py +46 -0
- aiecs/core/interface/__init__.py +34 -0
- aiecs/core/interface/execution_interface.py +152 -0
- aiecs/core/interface/storage_interface.py +171 -0
- aiecs/domain/__init__.py +289 -0
- aiecs/domain/agent/__init__.py +189 -0
- aiecs/domain/agent/base_agent.py +697 -0
- aiecs/domain/agent/exceptions.py +103 -0
- aiecs/domain/agent/graph_aware_mixin.py +559 -0
- aiecs/domain/agent/hybrid_agent.py +490 -0
- aiecs/domain/agent/integration/__init__.py +26 -0
- aiecs/domain/agent/integration/context_compressor.py +222 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
- aiecs/domain/agent/integration/retry_policy.py +219 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +646 -0
- aiecs/domain/agent/lifecycle.py +296 -0
- aiecs/domain/agent/llm_agent.py +300 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +197 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +160 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
- aiecs/domain/agent/models.py +317 -0
- aiecs/domain/agent/observability.py +407 -0
- aiecs/domain/agent/persistence.py +289 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +161 -0
- aiecs/domain/agent/prompts/formatters.py +189 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +260 -0
- aiecs/domain/agent/tool_agent.py +257 -0
- aiecs/domain/agent/tools/__init__.py +12 -0
- aiecs/domain/agent/tools/schema_generator.py +221 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +477 -0
- aiecs/domain/community/analytics.py +481 -0
- aiecs/domain/community/collaborative_workflow.py +642 -0
- aiecs/domain/community/communication_hub.py +645 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +800 -0
- aiecs/domain/community/community_manager.py +813 -0
- aiecs/domain/community/decision_engine.py +879 -0
- aiecs/domain/community/exceptions.py +225 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +268 -0
- aiecs/domain/community/resource_manager.py +457 -0
- aiecs/domain/community/shared_context_manager.py +603 -0
- aiecs/domain/context/__init__.py +58 -0
- aiecs/domain/context/context_engine.py +989 -0
- aiecs/domain/context/conversation_models.py +354 -0
- aiecs/domain/context/graph_memory.py +467 -0
- aiecs/domain/execution/__init__.py +12 -0
- aiecs/domain/execution/model.py +57 -0
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +130 -0
- aiecs/domain/knowledge_graph/models/evidence.py +194 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
- aiecs/domain/knowledge_graph/models/path.py +179 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
- aiecs/domain/knowledge_graph/models/query.py +272 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
- aiecs/domain/knowledge_graph/models/relation.py +136 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
- aiecs/domain/task/__init__.py +13 -0
- aiecs/domain/task/dsl_processor.py +613 -0
- aiecs/domain/task/model.py +62 -0
- aiecs/domain/task/task_context.py +268 -0
- aiecs/infrastructure/__init__.py +24 -0
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +601 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
- aiecs/infrastructure/graph_storage/cache.py +429 -0
- aiecs/infrastructure/graph_storage/distributed.py +226 -0
- aiecs/infrastructure/graph_storage/error_handling.py +390 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +514 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
- aiecs/infrastructure/graph_storage/metrics.py +357 -0
- aiecs/infrastructure/graph_storage/migration.py +413 -0
- aiecs/infrastructure/graph_storage/pagination.py +471 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
- aiecs/infrastructure/graph_storage/postgres.py +871 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +623 -0
- aiecs/infrastructure/graph_storage/streaming.py +495 -0
- aiecs/infrastructure/messaging/__init__.py +13 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
- aiecs/infrastructure/messaging/websocket_manager.py +298 -0
- aiecs/infrastructure/monitoring/__init__.py +34 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
- aiecs/infrastructure/monitoring/structured_logger.py +48 -0
- aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
- aiecs/infrastructure/persistence/__init__.py +24 -0
- aiecs/infrastructure/persistence/context_engine_client.py +187 -0
- aiecs/infrastructure/persistence/database_manager.py +333 -0
- aiecs/infrastructure/persistence/file_storage.py +754 -0
- aiecs/infrastructure/persistence/redis_client.py +220 -0
- aiecs/llm/__init__.py +86 -0
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/callbacks/custom_callbacks.py +264 -0
- aiecs/llm/client_factory.py +420 -0
- aiecs/llm/clients/__init__.py +33 -0
- aiecs/llm/clients/base_client.py +193 -0
- aiecs/llm/clients/googleai_client.py +181 -0
- aiecs/llm/clients/openai_client.py +131 -0
- aiecs/llm/clients/vertex_client.py +437 -0
- aiecs/llm/clients/xai_client.py +184 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +236 -0
- aiecs/llm/config/model_config.py +151 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +363 -0
- aiecs/scripts/__init__.py +3 -0
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/version_manager.py +215 -0
- aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
- aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
- aiecs/scripts/dependance_check/__init__.py +17 -0
- aiecs/scripts/dependance_check/dependency_checker.py +938 -0
- aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
- aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
- aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
- aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
- aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
- aiecs/scripts/tools_develop/README.md +449 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
- aiecs/scripts/tools_develop/verify_tools.py +356 -0
- aiecs/tasks/__init__.py +1 -0
- aiecs/tasks/worker.py +172 -0
- aiecs/tools/__init__.py +299 -0
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +303 -0
- aiecs/tools/apisource/providers/__init__.py +115 -0
- aiecs/tools/apisource/providers/base.py +664 -0
- aiecs/tools/apisource/providers/census.py +401 -0
- aiecs/tools/apisource/providers/fred.py +564 -0
- aiecs/tools/apisource/providers/newsapi.py +412 -0
- aiecs/tools/apisource/providers/worldbank.py +357 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +375 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
- aiecs/tools/apisource/tool.py +850 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +338 -0
- aiecs/tools/base_tool.py +201 -0
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +599 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
- aiecs/tools/docs/content_insertion_tool.py +1333 -0
- aiecs/tools/docs/document_creator_tool.py +1317 -0
- aiecs/tools/docs/document_layout_tool.py +1166 -0
- aiecs/tools/docs/document_parser_tool.py +994 -0
- aiecs/tools/docs/document_writer_tool.py +1818 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
- aiecs/tools/langchain_adapter.py +542 -0
- aiecs/tools/schema_generator.py +275 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +589 -0
- aiecs/tools/search_tool/cache.py +260 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +216 -0
- aiecs/tools/search_tool/core.py +749 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +271 -0
- aiecs/tools/search_tool/metrics.py +371 -0
- aiecs/tools/search_tool/rate_limiter.py +178 -0
- aiecs/tools/search_tool/schemas.py +277 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
- aiecs/tools/statistics/data_loader_tool.py +564 -0
- aiecs/tools/statistics/data_profiler_tool.py +658 -0
- aiecs/tools/statistics/data_transformer_tool.py +573 -0
- aiecs/tools/statistics/data_visualizer_tool.py +495 -0
- aiecs/tools/statistics/model_trainer_tool.py +487 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
- aiecs/tools/task_tools/__init__.py +86 -0
- aiecs/tools/task_tools/chart_tool.py +732 -0
- aiecs/tools/task_tools/classfire_tool.py +922 -0
- aiecs/tools/task_tools/image_tool.py +447 -0
- aiecs/tools/task_tools/office_tool.py +684 -0
- aiecs/tools/task_tools/pandas_tool.py +635 -0
- aiecs/tools/task_tools/report_tool.py +635 -0
- aiecs/tools/task_tools/research_tool.py +392 -0
- aiecs/tools/task_tools/scraper_tool.py +715 -0
- aiecs/tools/task_tools/stats_tool.py +688 -0
- aiecs/tools/temp_file_manager.py +130 -0
- aiecs/tools/tool_executor/__init__.py +37 -0
- aiecs/tools/tool_executor/tool_executor.py +881 -0
- aiecs/utils/LLM_output_structor.py +445 -0
- aiecs/utils/__init__.py +34 -0
- aiecs/utils/base_callback.py +47 -0
- aiecs/utils/cache_provider.py +695 -0
- aiecs/utils/execution_utils.py +184 -0
- aiecs/utils/logging.py +1 -0
- aiecs/utils/prompt_loader.py +14 -0
- aiecs/utils/token_usage_repository.py +323 -0
- aiecs/ws/__init__.py +0 -0
- aiecs/ws/socket_server.py +52 -0
- aiecs-1.5.1.dist-info/METADATA +608 -0
- aiecs-1.5.1.dist-info/RECORD +302 -0
- aiecs-1.5.1.dist-info/WHEEL +5 -0
- aiecs-1.5.1.dist-info/entry_points.txt +10 -0
- aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
- aiecs-1.5.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,505 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AI Insight Generator Tool - AI-driven insight discovery and pattern detection
|
|
3
|
+
|
|
4
|
+
This tool provides advanced insight generation with:
|
|
5
|
+
- Pattern discovery and anomaly detection
|
|
6
|
+
- Trend analysis and forecasting
|
|
7
|
+
- Actionable insight generation
|
|
8
|
+
- Integration with research_tool reasoning methods
|
|
9
|
+
- AI-powered analysis (placeholder for future enhancement)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
from typing import Dict, Any, List, Optional, Union
|
|
14
|
+
from enum import Enum
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
|
|
17
|
+
import pandas as pd
|
|
18
|
+
import numpy as np
|
|
19
|
+
from scipy import stats as scipy_stats
|
|
20
|
+
from pydantic import BaseModel, Field, ConfigDict
|
|
21
|
+
|
|
22
|
+
from aiecs.tools.base_tool import BaseTool
|
|
23
|
+
from aiecs.tools import register_tool
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class InsightType(str, Enum):
|
|
27
|
+
"""Types of insights to generate"""
|
|
28
|
+
|
|
29
|
+
PATTERN = "pattern"
|
|
30
|
+
ANOMALY = "anomaly"
|
|
31
|
+
TREND = "trend"
|
|
32
|
+
CORRELATION = "correlation"
|
|
33
|
+
SEGMENTATION = "segmentation"
|
|
34
|
+
CAUSATION = "causation"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class InsightGeneratorError(Exception):
|
|
38
|
+
"""Base exception for Insight Generator errors"""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class InsightGenerationError(InsightGeneratorError):
|
|
42
|
+
"""Raised when insight generation fails"""
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@register_tool("ai_insight_generator")
|
|
46
|
+
class AIInsightGeneratorTool(BaseTool):
|
|
47
|
+
"""
|
|
48
|
+
AI-powered insight generation tool that can:
|
|
49
|
+
1. Discover hidden patterns in data
|
|
50
|
+
2. Generate actionable insights
|
|
51
|
+
3. Detect anomalies and outliers
|
|
52
|
+
4. Predict trends and forecast
|
|
53
|
+
5. Apply reasoning methods (Mill's methods, induction, deduction)
|
|
54
|
+
|
|
55
|
+
Integrates with research_tool for reasoning capabilities.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
# Configuration schema
|
|
59
|
+
class Config(BaseModel):
|
|
60
|
+
"""Configuration for the AI insight generator tool"""
|
|
61
|
+
|
|
62
|
+
model_config = ConfigDict(env_prefix="AI_INSIGHT_GENERATOR_")
|
|
63
|
+
|
|
64
|
+
min_confidence: float = Field(
|
|
65
|
+
default=0.7,
|
|
66
|
+
description="Minimum confidence threshold for insights",
|
|
67
|
+
)
|
|
68
|
+
anomaly_std_threshold: float = Field(
|
|
69
|
+
default=3.0,
|
|
70
|
+
description="Standard deviation threshold for anomaly detection",
|
|
71
|
+
)
|
|
72
|
+
correlation_threshold: float = Field(
|
|
73
|
+
default=0.5,
|
|
74
|
+
description="Correlation threshold for significant relationships",
|
|
75
|
+
)
|
|
76
|
+
enable_reasoning: bool = Field(
|
|
77
|
+
default=True,
|
|
78
|
+
description="Whether to enable reasoning methods integration",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
82
|
+
"""Initialize AI Insight Generator Tool"""
|
|
83
|
+
super().__init__(config)
|
|
84
|
+
|
|
85
|
+
# Parse configuration
|
|
86
|
+
self.config = self.Config(**(config or {}))
|
|
87
|
+
|
|
88
|
+
self.logger = logging.getLogger(__name__)
|
|
89
|
+
if not self.logger.handlers:
|
|
90
|
+
handler = logging.StreamHandler()
|
|
91
|
+
handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
|
|
92
|
+
self.logger.addHandler(handler)
|
|
93
|
+
self.logger.setLevel(logging.INFO)
|
|
94
|
+
|
|
95
|
+
self._init_external_tools()
|
|
96
|
+
|
|
97
|
+
def _init_external_tools(self):
|
|
98
|
+
"""Initialize external task tools"""
|
|
99
|
+
self.external_tools = {}
|
|
100
|
+
|
|
101
|
+
# Initialize ResearchTool for reasoning methods
|
|
102
|
+
try:
|
|
103
|
+
from aiecs.tools.task_tools.research_tool import ResearchTool
|
|
104
|
+
|
|
105
|
+
self.external_tools["research"] = ResearchTool()
|
|
106
|
+
self.logger.info("ResearchTool initialized successfully")
|
|
107
|
+
except ImportError:
|
|
108
|
+
self.logger.warning("ResearchTool not available")
|
|
109
|
+
self.external_tools["research"] = None
|
|
110
|
+
|
|
111
|
+
# Initialize StatisticalAnalyzerTool
|
|
112
|
+
try:
|
|
113
|
+
from aiecs.tools.statistics.statistical_analyzer_tool import (
|
|
114
|
+
StatisticalAnalyzerTool,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
self.external_tools["stats_analyzer"] = StatisticalAnalyzerTool()
|
|
118
|
+
self.logger.info("StatisticalAnalyzerTool initialized successfully")
|
|
119
|
+
except ImportError:
|
|
120
|
+
self.logger.warning("StatisticalAnalyzerTool not available")
|
|
121
|
+
self.external_tools["stats_analyzer"] = None
|
|
122
|
+
|
|
123
|
+
# Schema definitions
|
|
124
|
+
class GenerateInsightsSchema(BaseModel):
|
|
125
|
+
"""Schema for generate_insights operation"""
|
|
126
|
+
|
|
127
|
+
data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data to analyze")
|
|
128
|
+
analysis_results: Optional[Dict[str, Any]] = Field(
|
|
129
|
+
default=None, description="Previous analysis results"
|
|
130
|
+
)
|
|
131
|
+
insight_types: Optional[List[InsightType]] = Field(
|
|
132
|
+
default=None, description="Types of insights to generate"
|
|
133
|
+
)
|
|
134
|
+
min_confidence: float = Field(default=0.7, description="Minimum confidence threshold")
|
|
135
|
+
|
|
136
|
+
class DiscoverPatternsSchema(BaseModel):
|
|
137
|
+
"""Schema for discover_patterns operation"""
|
|
138
|
+
|
|
139
|
+
data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(
|
|
140
|
+
description="Data for pattern discovery"
|
|
141
|
+
)
|
|
142
|
+
pattern_types: Optional[List[str]] = Field(
|
|
143
|
+
default=None, description="Specific pattern types"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
class DetectAnomaliesSchema(BaseModel):
|
|
147
|
+
"""Schema for detect_anomalies operation"""
|
|
148
|
+
|
|
149
|
+
data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(
|
|
150
|
+
description="Data for anomaly detection"
|
|
151
|
+
)
|
|
152
|
+
columns: Optional[List[str]] = Field(default=None, description="Columns to check")
|
|
153
|
+
threshold: float = Field(default=3.0, description="Standard deviation threshold")
|
|
154
|
+
|
|
155
|
+
def generate_insights(
|
|
156
|
+
self,
|
|
157
|
+
data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
|
|
158
|
+
analysis_results: Optional[Dict[str, Any]] = None,
|
|
159
|
+
insight_types: Optional[List[InsightType]] = None,
|
|
160
|
+
min_confidence: float = 0.7,
|
|
161
|
+
) -> Dict[str, Any]:
|
|
162
|
+
"""
|
|
163
|
+
Generate AI-powered insights from data and analysis results.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
data: Data to analyze
|
|
167
|
+
analysis_results: Previous analysis results to incorporate
|
|
168
|
+
insight_types: Specific types of insights to generate (all if None)
|
|
169
|
+
min_confidence: Minimum confidence threshold for insights
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
Dict containing:
|
|
173
|
+
- insights: List of generated insights
|
|
174
|
+
- summary: Overall summary
|
|
175
|
+
- priority_insights: Top priority insights
|
|
176
|
+
"""
|
|
177
|
+
try:
|
|
178
|
+
df = self._to_dataframe(data)
|
|
179
|
+
|
|
180
|
+
self.logger.info(f"Generating insights from data with {len(df)} rows")
|
|
181
|
+
|
|
182
|
+
# Default to all insight types
|
|
183
|
+
if insight_types is None:
|
|
184
|
+
insight_types = list(InsightType)
|
|
185
|
+
|
|
186
|
+
insights = []
|
|
187
|
+
|
|
188
|
+
# Generate different types of insights
|
|
189
|
+
if InsightType.PATTERN in insight_types:
|
|
190
|
+
pattern_insights = self._discover_patterns_internal(df)
|
|
191
|
+
insights.extend(pattern_insights)
|
|
192
|
+
|
|
193
|
+
if InsightType.ANOMALY in insight_types:
|
|
194
|
+
anomaly_insights = self._detect_anomalies_internal(df)
|
|
195
|
+
insights.extend(anomaly_insights)
|
|
196
|
+
|
|
197
|
+
if InsightType.TREND in insight_types:
|
|
198
|
+
trend_insights = self._analyze_trends_internal(df)
|
|
199
|
+
insights.extend(trend_insights)
|
|
200
|
+
|
|
201
|
+
if InsightType.CORRELATION in insight_types:
|
|
202
|
+
correlation_insights = self._analyze_correlations_internal(df)
|
|
203
|
+
insights.extend(correlation_insights)
|
|
204
|
+
|
|
205
|
+
if InsightType.CAUSATION in insight_types and self.config.enable_reasoning:
|
|
206
|
+
causation_insights = self._analyze_causation_internal(df)
|
|
207
|
+
insights.extend(causation_insights)
|
|
208
|
+
|
|
209
|
+
# Filter by confidence
|
|
210
|
+
filtered_insights = [i for i in insights if i.get("confidence", 0) >= min_confidence]
|
|
211
|
+
|
|
212
|
+
# Prioritize insights
|
|
213
|
+
priority_insights = self._prioritize_insights(filtered_insights)
|
|
214
|
+
|
|
215
|
+
# Generate summary
|
|
216
|
+
summary = self._generate_insight_summary(filtered_insights)
|
|
217
|
+
|
|
218
|
+
return {
|
|
219
|
+
"insights": filtered_insights,
|
|
220
|
+
"summary": summary,
|
|
221
|
+
"priority_insights": priority_insights[:5],
|
|
222
|
+
"total_insights": len(filtered_insights),
|
|
223
|
+
"timestamp": datetime.now().isoformat(),
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
except Exception as e:
|
|
227
|
+
self.logger.error(f"Error generating insights: {e}")
|
|
228
|
+
raise InsightGenerationError(f"Insight generation failed: {e}")
|
|
229
|
+
|
|
230
|
+
def discover_patterns(
|
|
231
|
+
self,
|
|
232
|
+
data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
|
|
233
|
+
pattern_types: Optional[List[str]] = None,
|
|
234
|
+
) -> Dict[str, Any]:
|
|
235
|
+
"""
|
|
236
|
+
Discover patterns in data.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
data: Data for pattern discovery
|
|
240
|
+
pattern_types: Specific pattern types to look for
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Dict containing discovered patterns
|
|
244
|
+
"""
|
|
245
|
+
try:
|
|
246
|
+
df = self._to_dataframe(data)
|
|
247
|
+
patterns = self._discover_patterns_internal(df)
|
|
248
|
+
|
|
249
|
+
return {"patterns": patterns, "total_patterns": len(patterns)}
|
|
250
|
+
|
|
251
|
+
except Exception as e:
|
|
252
|
+
self.logger.error(f"Error discovering patterns: {e}")
|
|
253
|
+
raise InsightGenerationError(f"Pattern discovery failed: {e}")
|
|
254
|
+
|
|
255
|
+
def detect_anomalies(
|
|
256
|
+
self,
|
|
257
|
+
data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
|
|
258
|
+
columns: Optional[List[str]] = None,
|
|
259
|
+
threshold: float = 3.0,
|
|
260
|
+
) -> Dict[str, Any]:
|
|
261
|
+
"""
|
|
262
|
+
Detect anomalies in data.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
data: Data for anomaly detection
|
|
266
|
+
columns: Columns to check (all numeric if None)
|
|
267
|
+
threshold: Standard deviation threshold
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
Dict containing detected anomalies
|
|
271
|
+
"""
|
|
272
|
+
try:
|
|
273
|
+
df = self._to_dataframe(data)
|
|
274
|
+
anomalies = self._detect_anomalies_internal(df, columns, threshold)
|
|
275
|
+
|
|
276
|
+
return {"anomalies": anomalies, "total_anomalies": len(anomalies)}
|
|
277
|
+
|
|
278
|
+
except Exception as e:
|
|
279
|
+
self.logger.error(f"Error detecting anomalies: {e}")
|
|
280
|
+
raise InsightGenerationError(f"Anomaly detection failed: {e}")
|
|
281
|
+
|
|
282
|
+
# Internal insight generation methods
|
|
283
|
+
|
|
284
|
+
def _to_dataframe(self, data: Union[Dict, List, pd.DataFrame]) -> pd.DataFrame:
|
|
285
|
+
"""Convert data to DataFrame"""
|
|
286
|
+
if isinstance(data, pd.DataFrame):
|
|
287
|
+
return data
|
|
288
|
+
elif isinstance(data, list):
|
|
289
|
+
return pd.DataFrame(data)
|
|
290
|
+
elif isinstance(data, dict):
|
|
291
|
+
return pd.DataFrame([data])
|
|
292
|
+
else:
|
|
293
|
+
raise InsightGenerationError(f"Unsupported data type: {type(data)}")
|
|
294
|
+
|
|
295
|
+
def _discover_patterns_internal(self, df: pd.DataFrame) -> List[Dict[str, Any]]:
|
|
296
|
+
"""Discover patterns in data"""
|
|
297
|
+
patterns = []
|
|
298
|
+
|
|
299
|
+
# Distribution patterns
|
|
300
|
+
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
|
301
|
+
for col in numeric_cols:
|
|
302
|
+
series = df[col].dropna()
|
|
303
|
+
if len(series) > 0:
|
|
304
|
+
skewness = series.skew()
|
|
305
|
+
if abs(skewness) > 1:
|
|
306
|
+
patterns.append(
|
|
307
|
+
{
|
|
308
|
+
"type": InsightType.PATTERN.value,
|
|
309
|
+
"title": f"Skewed Distribution in {col}",
|
|
310
|
+
"description": f"Column {col} shows {'positive' if skewness > 0 else 'negative'} skew ({skewness:.2f})",
|
|
311
|
+
"confidence": 0.85,
|
|
312
|
+
"impact": "medium",
|
|
313
|
+
"evidence": {
|
|
314
|
+
"skewness": float(skewness),
|
|
315
|
+
"column": col,
|
|
316
|
+
},
|
|
317
|
+
}
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
# Categorical patterns
|
|
321
|
+
categorical_cols = df.select_dtypes(include=["object", "category"]).columns
|
|
322
|
+
for col in categorical_cols:
|
|
323
|
+
value_counts = df[col].value_counts()
|
|
324
|
+
if len(value_counts) > 0:
|
|
325
|
+
top_percentage = value_counts.iloc[0] / len(df) * 100
|
|
326
|
+
if top_percentage > 50:
|
|
327
|
+
patterns.append(
|
|
328
|
+
{
|
|
329
|
+
"type": InsightType.PATTERN.value,
|
|
330
|
+
"title": f"Dominant Category in {col}",
|
|
331
|
+
"description": f"'{value_counts.index[0]}' accounts for {top_percentage:.1f}% of {col}",
|
|
332
|
+
"confidence": 0.9,
|
|
333
|
+
"impact": "high",
|
|
334
|
+
"evidence": {
|
|
335
|
+
"dominant_value": str(value_counts.index[0]),
|
|
336
|
+
"percentage": float(top_percentage),
|
|
337
|
+
},
|
|
338
|
+
}
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
return patterns
|
|
342
|
+
|
|
343
|
+
def _detect_anomalies_internal(
|
|
344
|
+
self,
|
|
345
|
+
df: pd.DataFrame,
|
|
346
|
+
columns: Optional[List[str]] = None,
|
|
347
|
+
threshold: float = 3.0,
|
|
348
|
+
) -> List[Dict[str, Any]]:
|
|
349
|
+
"""Detect anomalies using statistical methods"""
|
|
350
|
+
anomalies = []
|
|
351
|
+
|
|
352
|
+
numeric_cols = (
|
|
353
|
+
columns if columns else df.select_dtypes(include=[np.number]).columns.tolist()
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
for col in numeric_cols:
|
|
357
|
+
if col not in df.columns:
|
|
358
|
+
continue
|
|
359
|
+
|
|
360
|
+
series = df[col].dropna()
|
|
361
|
+
if len(series) == 0 or series.std() == 0:
|
|
362
|
+
continue
|
|
363
|
+
|
|
364
|
+
# Z-score method
|
|
365
|
+
z_scores = np.abs((series - series.mean()) / series.std())
|
|
366
|
+
anomaly_count = (z_scores > threshold).sum()
|
|
367
|
+
|
|
368
|
+
if anomaly_count > 0:
|
|
369
|
+
anomaly_percentage = anomaly_count / len(series) * 100
|
|
370
|
+
anomalies.append(
|
|
371
|
+
{
|
|
372
|
+
"type": InsightType.ANOMALY.value,
|
|
373
|
+
"title": f"Anomalies Detected in {col}",
|
|
374
|
+
"description": f"Found {anomaly_count} anomalous values ({anomaly_percentage:.2f}%) in {col}",
|
|
375
|
+
"confidence": 0.8,
|
|
376
|
+
"impact": ("high" if anomaly_percentage > 5 else "medium"),
|
|
377
|
+
"evidence": {
|
|
378
|
+
"column": col,
|
|
379
|
+
"anomaly_count": int(anomaly_count),
|
|
380
|
+
"percentage": float(anomaly_percentage),
|
|
381
|
+
"threshold": threshold,
|
|
382
|
+
},
|
|
383
|
+
"recommendation": "Investigate and consider handling these outliers",
|
|
384
|
+
}
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
return anomalies
|
|
388
|
+
|
|
389
|
+
def _analyze_trends_internal(self, df: pd.DataFrame) -> List[Dict[str, Any]]:
|
|
390
|
+
"""Analyze trends in data"""
|
|
391
|
+
trends = []
|
|
392
|
+
|
|
393
|
+
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
|
394
|
+
|
|
395
|
+
for col in numeric_cols:
|
|
396
|
+
series = df[col].dropna()
|
|
397
|
+
if len(series) < 3:
|
|
398
|
+
continue
|
|
399
|
+
|
|
400
|
+
# Calculate trend using linear regression
|
|
401
|
+
x = np.arange(len(series))
|
|
402
|
+
y = series.values
|
|
403
|
+
|
|
404
|
+
if len(x) > 0 and len(y) > 0:
|
|
405
|
+
slope, intercept, r_value, p_value, std_err = scipy_stats.linregress(x, y)
|
|
406
|
+
|
|
407
|
+
if abs(r_value) > 0.5 and p_value < 0.05:
|
|
408
|
+
trend_direction = "increasing" if slope > 0 else "decreasing"
|
|
409
|
+
trends.append(
|
|
410
|
+
{
|
|
411
|
+
"type": InsightType.TREND.value,
|
|
412
|
+
"title": f"{trend_direction.capitalize()} Trend in {col}",
|
|
413
|
+
"description": f"Column {col} shows a {trend_direction} trend (R²={r_value**2:.3f})",
|
|
414
|
+
"confidence": float(abs(r_value)),
|
|
415
|
+
"impact": ("high" if abs(r_value) > 0.7 else "medium"),
|
|
416
|
+
"evidence": {
|
|
417
|
+
"column": col,
|
|
418
|
+
"slope": float(slope),
|
|
419
|
+
"r_squared": float(r_value**2),
|
|
420
|
+
"p_value": float(p_value),
|
|
421
|
+
},
|
|
422
|
+
}
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
return trends
|
|
426
|
+
|
|
427
|
+
def _analyze_correlations_internal(self, df: pd.DataFrame) -> List[Dict[str, Any]]:
|
|
428
|
+
"""Analyze correlations between variables"""
|
|
429
|
+
correlations = []
|
|
430
|
+
|
|
431
|
+
numeric_df = df.select_dtypes(include=[np.number])
|
|
432
|
+
if numeric_df.shape[1] < 2:
|
|
433
|
+
return correlations
|
|
434
|
+
|
|
435
|
+
corr_matrix = numeric_df.corr()
|
|
436
|
+
|
|
437
|
+
for i in range(len(corr_matrix.columns)):
|
|
438
|
+
for j in range(i + 1, len(corr_matrix.columns)):
|
|
439
|
+
corr_value = corr_matrix.iloc[i, j]
|
|
440
|
+
|
|
441
|
+
if abs(corr_value) > self.config.correlation_threshold:
|
|
442
|
+
col1 = corr_matrix.columns[i]
|
|
443
|
+
col2 = corr_matrix.columns[j]
|
|
444
|
+
|
|
445
|
+
strength = "strong" if abs(corr_value) > 0.7 else "moderate"
|
|
446
|
+
direction = "positive" if corr_value > 0 else "negative"
|
|
447
|
+
|
|
448
|
+
correlations.append(
|
|
449
|
+
{
|
|
450
|
+
"type": InsightType.CORRELATION.value,
|
|
451
|
+
"title": f"{strength.capitalize()} {direction} correlation",
|
|
452
|
+
"description": f"{col1} and {col2} show {strength} {direction} correlation ({corr_value:.3f})",
|
|
453
|
+
"confidence": float(abs(corr_value)),
|
|
454
|
+
"impact": ("high" if abs(corr_value) > 0.7 else "medium"),
|
|
455
|
+
"evidence": {
|
|
456
|
+
"variable1": col1,
|
|
457
|
+
"variable2": col2,
|
|
458
|
+
"correlation": float(corr_value),
|
|
459
|
+
},
|
|
460
|
+
"recommendation": "Consider investigating causal relationship",
|
|
461
|
+
}
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
return correlations
|
|
465
|
+
|
|
466
|
+
def _analyze_causation_internal(self, df: pd.DataFrame) -> List[Dict[str, Any]]:
|
|
467
|
+
"""Analyze potential causal relationships using reasoning methods"""
|
|
468
|
+
causations = []
|
|
469
|
+
|
|
470
|
+
# Use research tool for Mill's methods if available
|
|
471
|
+
if self.external_tools.get("research"):
|
|
472
|
+
# Placeholder for causal analysis using Mill's methods
|
|
473
|
+
# This would require domain knowledge and proper case structures
|
|
474
|
+
self.logger.info(
|
|
475
|
+
"Causal analysis with reasoning methods available but requires domain-specific setup"
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
return causations
|
|
479
|
+
|
|
480
|
+
def _prioritize_insights(self, insights: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
481
|
+
"""Prioritize insights by confidence and impact"""
|
|
482
|
+
impact_scores = {"high": 3, "medium": 2, "low": 1}
|
|
483
|
+
|
|
484
|
+
def priority_score(insight):
|
|
485
|
+
confidence = insight.get("confidence", 0.5)
|
|
486
|
+
impact = impact_scores.get(insight.get("impact", "low"), 1)
|
|
487
|
+
return confidence * impact
|
|
488
|
+
|
|
489
|
+
return sorted(insights, key=priority_score, reverse=True)
|
|
490
|
+
|
|
491
|
+
def _generate_insight_summary(self, insights: List[Dict[str, Any]]) -> str:
|
|
492
|
+
"""Generate summary of insights"""
|
|
493
|
+
if not insights:
|
|
494
|
+
return "No significant insights found in the data."
|
|
495
|
+
|
|
496
|
+
type_counts = {}
|
|
497
|
+
for insight in insights:
|
|
498
|
+
insight_type = insight.get("type", "unknown")
|
|
499
|
+
type_counts[insight_type] = type_counts.get(insight_type, 0) + 1
|
|
500
|
+
|
|
501
|
+
summary_parts = [f"Generated {len(insights)} insights:"]
|
|
502
|
+
for itype, count in type_counts.items():
|
|
503
|
+
summary_parts.append(f"{count} {itype} insights")
|
|
504
|
+
|
|
505
|
+
return "; ".join(summary_parts)
|