aiecs 1.0.1__py3-none-any.whl → 1.7.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +13 -16
- aiecs/__main__.py +7 -7
- aiecs/aiecs_client.py +269 -75
- aiecs/application/executors/operation_executor.py +79 -54
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
- aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
- aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
- aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +98 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
- aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
- aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
- aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
- aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
- aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
- aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
- aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
- aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
- aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
- aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
- aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
- aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
- aiecs/application/knowledge_graph/search/reranker.py +293 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +471 -0
- aiecs/config/__init__.py +20 -5
- aiecs/config/config.py +762 -31
- aiecs/config/graph_config.py +131 -0
- aiecs/config/tool_config.py +435 -0
- aiecs/core/__init__.py +29 -13
- aiecs/core/interface/__init__.py +2 -2
- aiecs/core/interface/execution_interface.py +22 -22
- aiecs/core/interface/storage_interface.py +37 -88
- aiecs/core/registry/__init__.py +31 -0
- aiecs/core/registry/service_registry.py +92 -0
- aiecs/domain/__init__.py +270 -1
- aiecs/domain/agent/__init__.py +191 -0
- aiecs/domain/agent/base_agent.py +3949 -0
- aiecs/domain/agent/exceptions.py +99 -0
- aiecs/domain/agent/graph_aware_mixin.py +569 -0
- aiecs/domain/agent/hybrid_agent.py +1731 -0
- aiecs/domain/agent/integration/__init__.py +29 -0
- aiecs/domain/agent/integration/context_compressor.py +216 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
- aiecs/domain/agent/integration/protocols.py +281 -0
- aiecs/domain/agent/integration/retry_policy.py +218 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
- aiecs/domain/agent/lifecycle.py +291 -0
- aiecs/domain/agent/llm_agent.py +692 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +1124 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +163 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
- aiecs/domain/agent/models.py +894 -0
- aiecs/domain/agent/observability.py +479 -0
- aiecs/domain/agent/persistence.py +449 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +159 -0
- aiecs/domain/agent/prompts/formatters.py +187 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +253 -0
- aiecs/domain/agent/tool_agent.py +444 -0
- aiecs/domain/agent/tools/__init__.py +15 -0
- aiecs/domain/agent/tools/schema_generator.py +377 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +469 -0
- aiecs/domain/community/analytics.py +432 -0
- aiecs/domain/community/collaborative_workflow.py +648 -0
- aiecs/domain/community/communication_hub.py +634 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +796 -0
- aiecs/domain/community/community_manager.py +803 -0
- aiecs/domain/community/decision_engine.py +849 -0
- aiecs/domain/community/exceptions.py +231 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +234 -0
- aiecs/domain/community/resource_manager.py +461 -0
- aiecs/domain/community/shared_context_manager.py +589 -0
- aiecs/domain/context/__init__.py +40 -10
- aiecs/domain/context/context_engine.py +1910 -0
- aiecs/domain/context/conversation_models.py +87 -53
- aiecs/domain/context/graph_memory.py +582 -0
- aiecs/domain/execution/model.py +12 -4
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +148 -0
- aiecs/domain/knowledge_graph/models/evidence.py +178 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
- aiecs/domain/knowledge_graph/models/path.py +171 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
- aiecs/domain/knowledge_graph/models/query.py +261 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
- aiecs/domain/knowledge_graph/models/relation.py +202 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
- aiecs/domain/task/dsl_processor.py +172 -56
- aiecs/domain/task/model.py +20 -8
- aiecs/domain/task/task_context.py +27 -24
- aiecs/infrastructure/__init__.py +0 -2
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +837 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
- aiecs/infrastructure/graph_storage/cache.py +424 -0
- aiecs/infrastructure/graph_storage/distributed.py +223 -0
- aiecs/infrastructure/graph_storage/error_handling.py +380 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
- aiecs/infrastructure/graph_storage/metrics.py +344 -0
- aiecs/infrastructure/graph_storage/migration.py +400 -0
- aiecs/infrastructure/graph_storage/pagination.py +483 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
- aiecs/infrastructure/graph_storage/postgres.py +1563 -0
- aiecs/infrastructure/graph_storage/property_storage.py +353 -0
- aiecs/infrastructure/graph_storage/protocols.py +76 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
- aiecs/infrastructure/graph_storage/streaming.py +487 -0
- aiecs/infrastructure/graph_storage/tenant.py +412 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
- aiecs/infrastructure/messaging/websocket_manager.py +51 -35
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
- aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
- aiecs/infrastructure/monitoring/structured_logger.py +3 -7
- aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
- aiecs/infrastructure/persistence/__init__.py +14 -1
- aiecs/infrastructure/persistence/context_engine_client.py +184 -0
- aiecs/infrastructure/persistence/database_manager.py +67 -43
- aiecs/infrastructure/persistence/file_storage.py +180 -103
- aiecs/infrastructure/persistence/redis_client.py +74 -21
- aiecs/llm/__init__.py +73 -25
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
- aiecs/llm/client_factory.py +230 -37
- aiecs/llm/client_resolver.py +155 -0
- aiecs/llm/clients/__init__.py +38 -0
- aiecs/llm/clients/base_client.py +328 -0
- aiecs/llm/clients/google_function_calling_mixin.py +415 -0
- aiecs/llm/clients/googleai_client.py +314 -0
- aiecs/llm/clients/openai_client.py +158 -0
- aiecs/llm/clients/openai_compatible_mixin.py +367 -0
- aiecs/llm/clients/vertex_client.py +1186 -0
- aiecs/llm/clients/xai_client.py +201 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +272 -0
- aiecs/llm/config/config_validator.py +206 -0
- aiecs/llm/config/model_config.py +143 -0
- aiecs/llm/protocols.py +149 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +89 -0
- aiecs/main.py +140 -121
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/module_checker.py +499 -0
- aiecs/scripts/aid/version_manager.py +235 -0
- aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
- aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
- aiecs/scripts/dependance_check/__init__.py +15 -0
- aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
- aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
- aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
- aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
- aiecs/scripts/knowledge_graph/__init__.py +3 -0
- aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
- aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
- aiecs/scripts/tools_develop/README.md +671 -0
- aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
- aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
- aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
- aiecs/scripts/tools_develop/schema_coverage.py +511 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
- aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
- aiecs/scripts/tools_develop/verify_tools.py +352 -0
- aiecs/tasks/__init__.py +0 -1
- aiecs/tasks/worker.py +115 -47
- aiecs/tools/__init__.py +194 -72
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +330 -0
- aiecs/tools/apisource/providers/__init__.py +112 -0
- aiecs/tools/apisource/providers/base.py +671 -0
- aiecs/tools/apisource/providers/census.py +397 -0
- aiecs/tools/apisource/providers/fred.py +535 -0
- aiecs/tools/apisource/providers/newsapi.py +409 -0
- aiecs/tools/apisource/providers/worldbank.py +352 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +363 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
- aiecs/tools/apisource/tool.py +832 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +334 -0
- aiecs/tools/base_tool.py +415 -21
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +607 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
- aiecs/tools/docs/content_insertion_tool.py +1320 -0
- aiecs/tools/docs/document_creator_tool.py +1464 -0
- aiecs/tools/docs/document_layout_tool.py +1160 -0
- aiecs/tools/docs/document_parser_tool.py +1016 -0
- aiecs/tools/docs/document_writer_tool.py +2008 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
- aiecs/tools/langchain_adapter.py +300 -138
- aiecs/tools/schema_generator.py +455 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +581 -0
- aiecs/tools/search_tool/cache.py +264 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +224 -0
- aiecs/tools/search_tool/core.py +778 -0
- aiecs/tools/search_tool/deduplicator.py +119 -0
- aiecs/tools/search_tool/error_handler.py +242 -0
- aiecs/tools/search_tool/metrics.py +343 -0
- aiecs/tools/search_tool/rate_limiter.py +172 -0
- aiecs/tools/search_tool/schemas.py +275 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
- aiecs/tools/statistics/data_loader_tool.py +555 -0
- aiecs/tools/statistics/data_profiler_tool.py +638 -0
- aiecs/tools/statistics/data_transformer_tool.py +580 -0
- aiecs/tools/statistics/data_visualizer_tool.py +498 -0
- aiecs/tools/statistics/model_trainer_tool.py +507 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
- aiecs/tools/task_tools/__init__.py +49 -36
- aiecs/tools/task_tools/chart_tool.py +200 -184
- aiecs/tools/task_tools/classfire_tool.py +268 -267
- aiecs/tools/task_tools/image_tool.py +220 -141
- aiecs/tools/task_tools/office_tool.py +226 -146
- aiecs/tools/task_tools/pandas_tool.py +477 -121
- aiecs/tools/task_tools/report_tool.py +390 -142
- aiecs/tools/task_tools/research_tool.py +149 -79
- aiecs/tools/task_tools/scraper_tool.py +339 -145
- aiecs/tools/task_tools/stats_tool.py +448 -209
- aiecs/tools/temp_file_manager.py +26 -24
- aiecs/tools/tool_executor/__init__.py +18 -16
- aiecs/tools/tool_executor/tool_executor.py +364 -52
- aiecs/utils/LLM_output_structor.py +74 -48
- aiecs/utils/__init__.py +14 -3
- aiecs/utils/base_callback.py +0 -3
- aiecs/utils/cache_provider.py +696 -0
- aiecs/utils/execution_utils.py +50 -31
- aiecs/utils/prompt_loader.py +1 -0
- aiecs/utils/token_usage_repository.py +37 -11
- aiecs/ws/socket_server.py +14 -4
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/METADATA +52 -15
- aiecs-1.7.17.dist-info/RECORD +337 -0
- aiecs-1.7.17.dist-info/entry_points.txt +13 -0
- aiecs/config/registry.py +0 -19
- aiecs/domain/context/content_engine.py +0 -982
- aiecs/llm/base_client.py +0 -99
- aiecs/llm/openai_client.py +0 -125
- aiecs/llm/vertex_client.py +0 -186
- aiecs/llm/xai_client.py +0 -184
- aiecs/scripts/dependency_checker.py +0 -857
- aiecs/scripts/quick_dependency_check.py +0 -269
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.0.1.dist-info/RECORD +0 -90
- aiecs-1.0.1.dist-info/entry_points.txt +0 -7
- /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
- /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
- /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
- /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/WHEEL +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,472 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Statistical Analyzer Tool - Advanced statistical analysis and hypothesis testing
|
|
3
|
+
|
|
4
|
+
This tool provides comprehensive statistical analysis with:
|
|
5
|
+
- Descriptive and inferential statistics
|
|
6
|
+
- Hypothesis testing (t-test, ANOVA, chi-square)
|
|
7
|
+
- Regression analysis
|
|
8
|
+
- Time series analysis
|
|
9
|
+
- Correlation and causality analysis
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
from typing import Dict, Any, List, Optional, Union
|
|
14
|
+
from enum import Enum
|
|
15
|
+
|
|
16
|
+
import pandas as pd # type: ignore[import-untyped]
|
|
17
|
+
import numpy as np
|
|
18
|
+
from scipy import stats as scipy_stats # type: ignore[import-untyped]
|
|
19
|
+
from pydantic import BaseModel, Field
|
|
20
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
21
|
+
|
|
22
|
+
from aiecs.tools.base_tool import BaseTool
|
|
23
|
+
from aiecs.tools import register_tool
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class AnalysisType(str, Enum):
|
|
27
|
+
"""Types of statistical analyses"""
|
|
28
|
+
|
|
29
|
+
DESCRIPTIVE = "descriptive"
|
|
30
|
+
T_TEST = "t_test"
|
|
31
|
+
ANOVA = "anova"
|
|
32
|
+
CHI_SQUARE = "chi_square"
|
|
33
|
+
LINEAR_REGRESSION = "linear_regression"
|
|
34
|
+
LOGISTIC_REGRESSION = "logistic_regression"
|
|
35
|
+
CORRELATION = "correlation"
|
|
36
|
+
TIME_SERIES = "time_series"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class StatisticalAnalyzerError(Exception):
|
|
40
|
+
"""Base exception for StatisticalAnalyzer errors"""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class AnalysisError(StatisticalAnalyzerError):
|
|
44
|
+
"""Raised when analysis fails"""
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@register_tool("statistical_analyzer")
|
|
48
|
+
class StatisticalAnalyzerTool(BaseTool):
|
|
49
|
+
"""
|
|
50
|
+
Advanced statistical analysis tool that can:
|
|
51
|
+
1. Perform hypothesis testing
|
|
52
|
+
2. Conduct regression analysis
|
|
53
|
+
3. Analyze time series
|
|
54
|
+
4. Perform correlation and causal analysis
|
|
55
|
+
|
|
56
|
+
Integrates with stats_tool for core statistical operations.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
# Configuration schema
|
|
60
|
+
class Config(BaseSettings):
|
|
61
|
+
"""Configuration for the statistical analyzer tool
|
|
62
|
+
|
|
63
|
+
Automatically reads from environment variables with STATISTICAL_ANALYZER_ prefix.
|
|
64
|
+
Example: STATISTICAL_ANALYZER_SIGNIFICANCE_LEVEL -> significance_level
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
model_config = SettingsConfigDict(env_prefix="STATISTICAL_ANALYZER_")
|
|
68
|
+
|
|
69
|
+
significance_level: float = Field(
|
|
70
|
+
default=0.05,
|
|
71
|
+
description="Significance level for hypothesis testing",
|
|
72
|
+
)
|
|
73
|
+
confidence_level: float = Field(
|
|
74
|
+
default=0.95,
|
|
75
|
+
description="Confidence level for statistical intervals",
|
|
76
|
+
)
|
|
77
|
+
enable_effect_size: bool = Field(
|
|
78
|
+
default=True,
|
|
79
|
+
description="Whether to calculate effect sizes in analyses",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
def __init__(self, config: Optional[Dict[str, Any]] = None, **kwargs):
|
|
83
|
+
"""Initialize StatisticalAnalyzerTool with settings
|
|
84
|
+
|
|
85
|
+
Configuration is automatically loaded by BaseTool from:
|
|
86
|
+
1. Explicit config dict (highest priority)
|
|
87
|
+
2. YAML config files (config/tools/statistical_analyzer.yaml)
|
|
88
|
+
3. Environment variables (via dotenv from .env files)
|
|
89
|
+
4. Tool defaults (lowest priority)
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
config: Optional configuration overrides
|
|
93
|
+
**kwargs: Additional arguments passed to BaseTool (e.g., tool_name)
|
|
94
|
+
"""
|
|
95
|
+
super().__init__(config, **kwargs)
|
|
96
|
+
|
|
97
|
+
# Configuration is automatically loaded by BaseTool into self._config_obj
|
|
98
|
+
# Access config via self._config_obj (BaseSettings instance)
|
|
99
|
+
self.config = self._config_obj if self._config_obj else self.Config()
|
|
100
|
+
|
|
101
|
+
self.logger = logging.getLogger(__name__)
|
|
102
|
+
if not self.logger.handlers:
|
|
103
|
+
handler = logging.StreamHandler()
|
|
104
|
+
handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
|
|
105
|
+
self.logger.addHandler(handler)
|
|
106
|
+
self.logger.setLevel(logging.INFO)
|
|
107
|
+
|
|
108
|
+
self._init_external_tools()
|
|
109
|
+
|
|
110
|
+
def _init_external_tools(self):
|
|
111
|
+
"""Initialize external task tools"""
|
|
112
|
+
self.external_tools = {}
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
from aiecs.tools.task_tools.stats_tool import StatsTool
|
|
116
|
+
|
|
117
|
+
self.external_tools["stats"] = StatsTool()
|
|
118
|
+
self.logger.info("StatsTool initialized successfully")
|
|
119
|
+
except ImportError:
|
|
120
|
+
self.logger.warning("StatsTool not available")
|
|
121
|
+
self.external_tools["stats"] = None
|
|
122
|
+
|
|
123
|
+
# Schema definitions
|
|
124
|
+
class AnalyzeSchema(BaseModel):
|
|
125
|
+
"""Schema for analyze operation"""
|
|
126
|
+
|
|
127
|
+
data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data to analyze")
|
|
128
|
+
analysis_type: AnalysisType = Field(description="Type of analysis to perform")
|
|
129
|
+
variables: Dict[str, Any] = Field(description="Variables specification")
|
|
130
|
+
params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters")
|
|
131
|
+
|
|
132
|
+
class TestHypothesisSchema(BaseModel):
|
|
133
|
+
"""Schema for test_hypothesis operation"""
|
|
134
|
+
|
|
135
|
+
data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data for hypothesis testing")
|
|
136
|
+
test_type: str = Field(description="Type of test: t_test, anova, chi_square")
|
|
137
|
+
variables: Dict[str, Any] = Field(description="Variables for testing")
|
|
138
|
+
|
|
139
|
+
class PerformRegressionSchema(BaseModel):
|
|
140
|
+
"""Schema for perform_regression operation"""
|
|
141
|
+
|
|
142
|
+
data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data for regression")
|
|
143
|
+
dependent_var: str = Field(description="Dependent variable")
|
|
144
|
+
independent_vars: List[str] = Field(description="Independent variables")
|
|
145
|
+
regression_type: str = Field(default="linear", description="Type: linear or logistic")
|
|
146
|
+
|
|
147
|
+
class AnalyzeCorrelationSchema(BaseModel):
|
|
148
|
+
"""Schema for analyze_correlation operation"""
|
|
149
|
+
|
|
150
|
+
data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data for correlation analysis")
|
|
151
|
+
variables: Optional[List[str]] = Field(default=None, description="Variables to analyze")
|
|
152
|
+
method: str = Field(default="pearson", description="Correlation method")
|
|
153
|
+
|
|
154
|
+
def analyze(
|
|
155
|
+
self,
|
|
156
|
+
data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
|
|
157
|
+
analysis_type: AnalysisType,
|
|
158
|
+
variables: Dict[str, Any],
|
|
159
|
+
params: Optional[Dict[str, Any]] = None,
|
|
160
|
+
) -> Dict[str, Any]:
|
|
161
|
+
"""
|
|
162
|
+
Perform statistical analysis.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
data: Data to analyze
|
|
166
|
+
analysis_type: Type of analysis
|
|
167
|
+
variables: Variables specification (dependent, independent, etc.)
|
|
168
|
+
params: Additional parameters
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
Dict containing analysis results with statistics, p-values, interpretations
|
|
172
|
+
"""
|
|
173
|
+
try:
|
|
174
|
+
df = self._to_dataframe(data)
|
|
175
|
+
params = params or {}
|
|
176
|
+
|
|
177
|
+
if analysis_type == AnalysisType.DESCRIPTIVE:
|
|
178
|
+
result = self._descriptive_analysis(df, variables)
|
|
179
|
+
elif analysis_type == AnalysisType.T_TEST:
|
|
180
|
+
result = self._t_test_analysis(df, variables, params)
|
|
181
|
+
elif analysis_type == AnalysisType.ANOVA:
|
|
182
|
+
result = self._anova_analysis(df, variables, params)
|
|
183
|
+
elif analysis_type == AnalysisType.CHI_SQUARE:
|
|
184
|
+
result = self._chi_square_analysis(df, variables, params)
|
|
185
|
+
elif analysis_type == AnalysisType.LINEAR_REGRESSION:
|
|
186
|
+
result = self._linear_regression_analysis(df, variables, params)
|
|
187
|
+
elif analysis_type == AnalysisType.CORRELATION:
|
|
188
|
+
result = self._correlation_analysis(df, variables, params)
|
|
189
|
+
else:
|
|
190
|
+
raise AnalysisError(f"Unsupported analysis type: {analysis_type}")
|
|
191
|
+
|
|
192
|
+
result["analysis_type"] = analysis_type.value
|
|
193
|
+
return result
|
|
194
|
+
|
|
195
|
+
except Exception as e:
|
|
196
|
+
self.logger.error(f"Error in analysis: {e}")
|
|
197
|
+
raise AnalysisError(f"Analysis failed: {e}")
|
|
198
|
+
|
|
199
|
+
def test_hypothesis(
|
|
200
|
+
self,
|
|
201
|
+
data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
|
|
202
|
+
test_type: str,
|
|
203
|
+
variables: Dict[str, Any],
|
|
204
|
+
) -> Dict[str, Any]:
|
|
205
|
+
"""Perform hypothesis testing"""
|
|
206
|
+
try:
|
|
207
|
+
df = self._to_dataframe(data)
|
|
208
|
+
|
|
209
|
+
if test_type == "t_test":
|
|
210
|
+
return self._t_test_analysis(df, variables, {})
|
|
211
|
+
elif test_type == "anova":
|
|
212
|
+
return self._anova_analysis(df, variables, {})
|
|
213
|
+
elif test_type == "chi_square":
|
|
214
|
+
return self._chi_square_analysis(df, variables, {})
|
|
215
|
+
else:
|
|
216
|
+
raise AnalysisError(f"Unsupported test type: {test_type}")
|
|
217
|
+
|
|
218
|
+
except Exception as e:
|
|
219
|
+
self.logger.error(f"Error in hypothesis testing: {e}")
|
|
220
|
+
raise AnalysisError(f"Hypothesis testing failed: {e}")
|
|
221
|
+
|
|
222
|
+
def perform_regression(
|
|
223
|
+
self,
|
|
224
|
+
data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
|
|
225
|
+
dependent_var: str,
|
|
226
|
+
independent_vars: List[str],
|
|
227
|
+
regression_type: str = "linear",
|
|
228
|
+
) -> Dict[str, Any]:
|
|
229
|
+
"""Perform regression analysis"""
|
|
230
|
+
try:
|
|
231
|
+
df = self._to_dataframe(data)
|
|
232
|
+
variables = {
|
|
233
|
+
"dependent": dependent_var,
|
|
234
|
+
"independent": independent_vars,
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
if regression_type == "linear":
|
|
238
|
+
return self._linear_regression_analysis(df, variables, {})
|
|
239
|
+
else:
|
|
240
|
+
raise AnalysisError(f"Unsupported regression type: {regression_type}")
|
|
241
|
+
|
|
242
|
+
except Exception as e:
|
|
243
|
+
self.logger.error(f"Error in regression: {e}")
|
|
244
|
+
raise AnalysisError(f"Regression failed: {e}")
|
|
245
|
+
|
|
246
|
+
def analyze_correlation(
|
|
247
|
+
self,
|
|
248
|
+
data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
|
|
249
|
+
variables: Optional[List[str]] = None,
|
|
250
|
+
method: str = "pearson",
|
|
251
|
+
) -> Dict[str, Any]:
|
|
252
|
+
"""Perform correlation analysis"""
|
|
253
|
+
try:
|
|
254
|
+
df = self._to_dataframe(data)
|
|
255
|
+
var_dict = {"variables": variables} if variables else {}
|
|
256
|
+
return self._correlation_analysis(df, var_dict, {"method": method})
|
|
257
|
+
|
|
258
|
+
except Exception as e:
|
|
259
|
+
self.logger.error(f"Error in correlation analysis: {e}")
|
|
260
|
+
raise AnalysisError(f"Correlation analysis failed: {e}")
|
|
261
|
+
|
|
262
|
+
# Internal analysis methods
|
|
263
|
+
|
|
264
|
+
def _to_dataframe(self, data: Union[Dict, List, pd.DataFrame]) -> pd.DataFrame:
|
|
265
|
+
"""Convert data to DataFrame"""
|
|
266
|
+
if isinstance(data, pd.DataFrame):
|
|
267
|
+
return data
|
|
268
|
+
elif isinstance(data, list):
|
|
269
|
+
return pd.DataFrame(data)
|
|
270
|
+
elif isinstance(data, dict):
|
|
271
|
+
return pd.DataFrame([data])
|
|
272
|
+
else:
|
|
273
|
+
raise AnalysisError(f"Unsupported data type: {type(data)}")
|
|
274
|
+
|
|
275
|
+
def _descriptive_analysis(self, df: pd.DataFrame, variables: Dict[str, Any]) -> Dict[str, Any]:
|
|
276
|
+
"""Perform descriptive statistics analysis"""
|
|
277
|
+
cols = variables.get("columns", df.select_dtypes(include=[np.number]).columns.tolist())
|
|
278
|
+
|
|
279
|
+
results = {}
|
|
280
|
+
for col in cols:
|
|
281
|
+
if col in df.columns:
|
|
282
|
+
series = df[col].dropna()
|
|
283
|
+
results[col] = {
|
|
284
|
+
"count": int(len(series)),
|
|
285
|
+
"mean": float(series.mean()),
|
|
286
|
+
"std": float(series.std()),
|
|
287
|
+
"min": float(series.min()),
|
|
288
|
+
"q25": float(series.quantile(0.25)),
|
|
289
|
+
"median": float(series.median()),
|
|
290
|
+
"q75": float(series.quantile(0.75)),
|
|
291
|
+
"max": float(series.max()),
|
|
292
|
+
"skewness": float(series.skew()),
|
|
293
|
+
"kurtosis": float(series.kurt()),
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
return {
|
|
297
|
+
"results": results,
|
|
298
|
+
"interpretation": "Descriptive statistics computed successfully",
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
def _t_test_analysis(
|
|
302
|
+
self,
|
|
303
|
+
df: pd.DataFrame,
|
|
304
|
+
variables: Dict[str, Any],
|
|
305
|
+
params: Dict[str, Any],
|
|
306
|
+
) -> Dict[str, Any]:
|
|
307
|
+
"""Perform t-test"""
|
|
308
|
+
var1_name = variables.get("var1")
|
|
309
|
+
var2_name = variables.get("var2")
|
|
310
|
+
|
|
311
|
+
if not var1_name or not var2_name:
|
|
312
|
+
raise AnalysisError("T-test requires var1 and var2")
|
|
313
|
+
|
|
314
|
+
var1 = df[var1_name].dropna()
|
|
315
|
+
var2 = df[var2_name].dropna()
|
|
316
|
+
|
|
317
|
+
statistic, pvalue = scipy_stats.ttest_ind(var1, var2)
|
|
318
|
+
|
|
319
|
+
return {
|
|
320
|
+
"test_type": "t_test",
|
|
321
|
+
"statistic": float(statistic),
|
|
322
|
+
"p_value": float(pvalue),
|
|
323
|
+
"significant": pvalue < self.config.significance_level,
|
|
324
|
+
"interpretation": f"{'Significant' if pvalue < self.config.significance_level else 'Not significant'} difference at α={self.config.significance_level}",
|
|
325
|
+
"variables": [var1_name, var2_name],
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
def _anova_analysis(
|
|
329
|
+
self,
|
|
330
|
+
df: pd.DataFrame,
|
|
331
|
+
variables: Dict[str, Any],
|
|
332
|
+
params: Dict[str, Any],
|
|
333
|
+
) -> Dict[str, Any]:
|
|
334
|
+
"""Perform ANOVA"""
|
|
335
|
+
groups = variables.get("groups", [])
|
|
336
|
+
|
|
337
|
+
if len(groups) < 2:
|
|
338
|
+
raise AnalysisError("ANOVA requires at least 2 groups")
|
|
339
|
+
|
|
340
|
+
group_data = [df[group].dropna() for group in groups if group in df.columns]
|
|
341
|
+
|
|
342
|
+
if len(group_data) < 2:
|
|
343
|
+
raise AnalysisError("Insufficient valid groups for ANOVA")
|
|
344
|
+
|
|
345
|
+
statistic, pvalue = scipy_stats.f_oneway(*group_data)
|
|
346
|
+
|
|
347
|
+
return {
|
|
348
|
+
"test_type": "anova",
|
|
349
|
+
"statistic": float(statistic),
|
|
350
|
+
"p_value": float(pvalue),
|
|
351
|
+
"significant": pvalue < self.config.significance_level,
|
|
352
|
+
"interpretation": f"{'Significant' if pvalue < self.config.significance_level else 'Not significant'} difference between groups",
|
|
353
|
+
"groups": groups,
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
def _chi_square_analysis(
|
|
357
|
+
self,
|
|
358
|
+
df: pd.DataFrame,
|
|
359
|
+
variables: Dict[str, Any],
|
|
360
|
+
params: Dict[str, Any],
|
|
361
|
+
) -> Dict[str, Any]:
|
|
362
|
+
"""Perform chi-square test"""
|
|
363
|
+
var1_name = variables.get("var1")
|
|
364
|
+
var2_name = variables.get("var2")
|
|
365
|
+
|
|
366
|
+
if not var1_name or not var2_name:
|
|
367
|
+
raise AnalysisError("Chi-square test requires var1 and var2")
|
|
368
|
+
|
|
369
|
+
contingency_table = pd.crosstab(df[var1_name], df[var2_name])
|
|
370
|
+
statistic, pvalue, dof, expected = scipy_stats.chi2_contingency(contingency_table)
|
|
371
|
+
|
|
372
|
+
return {
|
|
373
|
+
"test_type": "chi_square",
|
|
374
|
+
"statistic": float(statistic),
|
|
375
|
+
"p_value": float(pvalue),
|
|
376
|
+
"degrees_of_freedom": int(dof),
|
|
377
|
+
"significant": pvalue < self.config.significance_level,
|
|
378
|
+
"interpretation": f"{'Significant' if pvalue < self.config.significance_level else 'Not significant'} association",
|
|
379
|
+
"variables": [var1_name, var2_name],
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
def _linear_regression_analysis(
|
|
383
|
+
self,
|
|
384
|
+
df: pd.DataFrame,
|
|
385
|
+
variables: Dict[str, Any],
|
|
386
|
+
params: Dict[str, Any],
|
|
387
|
+
) -> Dict[str, Any]:
|
|
388
|
+
"""Perform linear regression"""
|
|
389
|
+
from sklearn.linear_model import LinearRegression # type: ignore[import-untyped]
|
|
390
|
+
from sklearn.metrics import r2_score, mean_squared_error # type: ignore[import-untyped]
|
|
391
|
+
|
|
392
|
+
dependent = variables.get("dependent")
|
|
393
|
+
independent = variables.get("independent", [])
|
|
394
|
+
|
|
395
|
+
if not dependent or not independent:
|
|
396
|
+
raise AnalysisError("Regression requires dependent and independent variables")
|
|
397
|
+
|
|
398
|
+
X = df[independent].dropna()
|
|
399
|
+
y = df[dependent].loc[X.index]
|
|
400
|
+
|
|
401
|
+
model = LinearRegression()
|
|
402
|
+
model.fit(X, y)
|
|
403
|
+
|
|
404
|
+
y_pred = model.predict(X)
|
|
405
|
+
r2 = r2_score(y, y_pred)
|
|
406
|
+
mse = mean_squared_error(y, y_pred)
|
|
407
|
+
|
|
408
|
+
coefficients = {var: float(coef) for var, coef in zip(independent, model.coef_)}
|
|
409
|
+
|
|
410
|
+
return {
|
|
411
|
+
"model_type": "linear_regression",
|
|
412
|
+
"intercept": float(model.intercept_),
|
|
413
|
+
"coefficients": coefficients,
|
|
414
|
+
"r_squared": float(r2),
|
|
415
|
+
"mse": float(mse),
|
|
416
|
+
"rmse": float(np.sqrt(mse)),
|
|
417
|
+
"interpretation": f"Model explains {r2*100:.2f}% of variance",
|
|
418
|
+
"dependent_variable": dependent,
|
|
419
|
+
"independent_variables": independent,
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
def _correlation_analysis(
|
|
423
|
+
self,
|
|
424
|
+
df: pd.DataFrame,
|
|
425
|
+
variables: Dict[str, Any],
|
|
426
|
+
params: Dict[str, Any],
|
|
427
|
+
) -> Dict[str, Any]:
|
|
428
|
+
"""Perform correlation analysis"""
|
|
429
|
+
method = params.get("method", "pearson")
|
|
430
|
+
cols = variables.get("variables")
|
|
431
|
+
|
|
432
|
+
if cols:
|
|
433
|
+
numeric_df = df[cols].select_dtypes(include=[np.number])
|
|
434
|
+
else:
|
|
435
|
+
numeric_df = df.select_dtypes(include=[np.number])
|
|
436
|
+
|
|
437
|
+
if numeric_df.shape[1] < 2:
|
|
438
|
+
raise AnalysisError("Correlation requires at least 2 numeric variables")
|
|
439
|
+
|
|
440
|
+
corr_matrix = numeric_df.corr(method=method)
|
|
441
|
+
|
|
442
|
+
# Find significant correlations
|
|
443
|
+
significant_pairs = []
|
|
444
|
+
for i in range(len(corr_matrix.columns)):
|
|
445
|
+
for j in range(i + 1, len(corr_matrix.columns)):
|
|
446
|
+
corr_value = corr_matrix.iloc[i, j]
|
|
447
|
+
if abs(corr_value) > 0.3: # Threshold for noteworthy correlation
|
|
448
|
+
significant_pairs.append(
|
|
449
|
+
{
|
|
450
|
+
"var1": corr_matrix.columns[i],
|
|
451
|
+
"var2": corr_matrix.columns[j],
|
|
452
|
+
"correlation": float(corr_value),
|
|
453
|
+
"strength": self._interpret_correlation(corr_value),
|
|
454
|
+
}
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
return {
|
|
458
|
+
"method": method,
|
|
459
|
+
"correlation_matrix": corr_matrix.to_dict(),
|
|
460
|
+
"significant_correlations": significant_pairs,
|
|
461
|
+
"interpretation": f"Found {len(significant_pairs)} significant correlations",
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
def _interpret_correlation(self, corr: float) -> str:
|
|
465
|
+
"""Interpret correlation strength"""
|
|
466
|
+
abs_corr = abs(corr)
|
|
467
|
+
if abs_corr < 0.3:
|
|
468
|
+
return "weak"
|
|
469
|
+
elif abs_corr < 0.7:
|
|
470
|
+
return "moderate"
|
|
471
|
+
else:
|
|
472
|
+
return "strong"
|
|
@@ -12,8 +12,11 @@ This module contains specialized tools for various task-oriented operations:
|
|
|
12
12
|
- report_tool: Report generation and formatting operations
|
|
13
13
|
- research_tool: Research and information gathering operations
|
|
14
14
|
- scraper_tool: Web scraping and data extraction operations
|
|
15
|
-
- search_api: Search API integration operations
|
|
16
15
|
- stats_tool: Statistical analysis and computation operations
|
|
16
|
+
|
|
17
|
+
Note:
|
|
18
|
+
- apisource_tool is now a standalone package at aiecs.tools.apisource
|
|
19
|
+
- search_tool is now a standalone package at aiecs.tools.search_tool
|
|
17
20
|
"""
|
|
18
21
|
|
|
19
22
|
# Lazy import all task tools to avoid heavy dependencies at import time
|
|
@@ -21,62 +24,72 @@ import os
|
|
|
21
24
|
|
|
22
25
|
# Define available tools for lazy loading
|
|
23
26
|
_AVAILABLE_TOOLS = [
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
'stats_tool'
|
|
27
|
+
"chart_tool",
|
|
28
|
+
"classfire_tool",
|
|
29
|
+
"image_tool",
|
|
30
|
+
"pandas_tool",
|
|
31
|
+
"report_tool",
|
|
32
|
+
"research_tool",
|
|
33
|
+
"scraper_tool",
|
|
34
|
+
"stats_tool",
|
|
33
35
|
]
|
|
34
36
|
|
|
35
37
|
# Add office_tool conditionally
|
|
36
|
-
|
|
37
|
-
|
|
38
|
+
# Check environment variable via settings (preferred) or direct check
|
|
39
|
+
try:
|
|
40
|
+
from aiecs.config.config import get_settings
|
|
41
|
+
settings = get_settings()
|
|
42
|
+
skip_office_tool = getattr(settings, "skip_office_tool", False)
|
|
43
|
+
except Exception:
|
|
44
|
+
# Fallback to direct env check if settings not available
|
|
45
|
+
skip_office_tool = os.getenv("SKIP_OFFICE_TOOL", "").lower() in ("true", "1", "yes")
|
|
46
|
+
|
|
47
|
+
if not skip_office_tool:
|
|
48
|
+
_AVAILABLE_TOOLS.append("office_tool")
|
|
38
49
|
|
|
39
50
|
# Track which tools have been loaded
|
|
40
51
|
_LOADED_TOOLS = set()
|
|
41
52
|
|
|
53
|
+
|
|
42
54
|
def _lazy_load_tool(tool_name: str):
|
|
43
55
|
"""Lazy load a specific tool module"""
|
|
44
56
|
if tool_name in _LOADED_TOOLS:
|
|
45
57
|
return
|
|
46
|
-
|
|
58
|
+
|
|
47
59
|
try:
|
|
48
|
-
if tool_name ==
|
|
49
|
-
|
|
50
|
-
elif tool_name ==
|
|
51
|
-
|
|
52
|
-
elif tool_name ==
|
|
53
|
-
|
|
54
|
-
elif tool_name ==
|
|
55
|
-
|
|
56
|
-
elif tool_name ==
|
|
57
|
-
|
|
58
|
-
elif tool_name ==
|
|
59
|
-
|
|
60
|
-
elif tool_name ==
|
|
61
|
-
|
|
62
|
-
elif tool_name ==
|
|
63
|
-
|
|
64
|
-
elif tool_name ==
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
from . import stats_tool
|
|
68
|
-
|
|
60
|
+
if tool_name == "chart_tool":
|
|
61
|
+
pass
|
|
62
|
+
elif tool_name == "classfire_tool":
|
|
63
|
+
pass
|
|
64
|
+
elif tool_name == "image_tool":
|
|
65
|
+
pass
|
|
66
|
+
elif tool_name == "office_tool":
|
|
67
|
+
pass
|
|
68
|
+
elif tool_name == "pandas_tool":
|
|
69
|
+
pass
|
|
70
|
+
elif tool_name == "report_tool":
|
|
71
|
+
pass
|
|
72
|
+
elif tool_name == "research_tool":
|
|
73
|
+
pass
|
|
74
|
+
elif tool_name == "scraper_tool":
|
|
75
|
+
pass
|
|
76
|
+
elif tool_name == "stats_tool":
|
|
77
|
+
pass
|
|
78
|
+
|
|
69
79
|
_LOADED_TOOLS.add(tool_name)
|
|
70
|
-
|
|
80
|
+
|
|
71
81
|
except Exception as e:
|
|
72
82
|
import logging
|
|
83
|
+
|
|
73
84
|
logger = logging.getLogger(__name__)
|
|
74
85
|
logger.warning(f"Failed to load tool {tool_name}: {e}")
|
|
75
86
|
|
|
87
|
+
|
|
76
88
|
def load_all_tools():
|
|
77
89
|
"""Load all available tools (for backward compatibility)"""
|
|
78
90
|
for tool_name in _AVAILABLE_TOOLS:
|
|
79
91
|
_lazy_load_tool(tool_name)
|
|
80
92
|
|
|
93
|
+
|
|
81
94
|
# Export the tool modules for external access
|
|
82
|
-
__all__ = _AVAILABLE_TOOLS + [
|
|
95
|
+
__all__ = _AVAILABLE_TOOLS + ["load_all_tools", "_lazy_load_tool"]
|