aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,459 @@
1
+ """
2
+ Statistical Analyzer Tool - Advanced statistical analysis and hypothesis testing
3
+
4
+ This tool provides comprehensive statistical analysis with:
5
+ - Descriptive and inferential statistics
6
+ - Hypothesis testing (t-test, ANOVA, chi-square)
7
+ - Regression analysis
8
+ - Time series analysis
9
+ - Correlation and causality analysis
10
+ """
11
+
12
+ import logging
13
+ from typing import Dict, Any, List, Optional, Union
14
+ from enum import Enum
15
+
16
+ import pandas as pd
17
+ import numpy as np
18
+ from scipy import stats as scipy_stats
19
+ from pydantic import BaseModel, Field, ConfigDict
20
+
21
+ from aiecs.tools.base_tool import BaseTool
22
+ from aiecs.tools import register_tool
23
+
24
+
25
+ class AnalysisType(str, Enum):
26
+ """Types of statistical analyses"""
27
+
28
+ DESCRIPTIVE = "descriptive"
29
+ T_TEST = "t_test"
30
+ ANOVA = "anova"
31
+ CHI_SQUARE = "chi_square"
32
+ LINEAR_REGRESSION = "linear_regression"
33
+ LOGISTIC_REGRESSION = "logistic_regression"
34
+ CORRELATION = "correlation"
35
+ TIME_SERIES = "time_series"
36
+
37
+
38
+ class StatisticalAnalyzerError(Exception):
39
+ """Base exception for StatisticalAnalyzer errors"""
40
+
41
+
42
+ class AnalysisError(StatisticalAnalyzerError):
43
+ """Raised when analysis fails"""
44
+
45
+
46
+ @register_tool("statistical_analyzer")
47
+ class StatisticalAnalyzerTool(BaseTool):
48
+ """
49
+ Advanced statistical analysis tool that can:
50
+ 1. Perform hypothesis testing
51
+ 2. Conduct regression analysis
52
+ 3. Analyze time series
53
+ 4. Perform correlation and causal analysis
54
+
55
+ Integrates with stats_tool for core statistical operations.
56
+ """
57
+
58
+ # Configuration schema
59
+ class Config(BaseModel):
60
+ """Configuration for the statistical analyzer tool"""
61
+
62
+ model_config = ConfigDict(env_prefix="STATISTICAL_ANALYZER_")
63
+
64
+ significance_level: float = Field(
65
+ default=0.05,
66
+ description="Significance level for hypothesis testing",
67
+ )
68
+ confidence_level: float = Field(
69
+ default=0.95,
70
+ description="Confidence level for statistical intervals",
71
+ )
72
+ enable_effect_size: bool = Field(
73
+ default=True,
74
+ description="Whether to calculate effect sizes in analyses",
75
+ )
76
+
77
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
78
+ """Initialize StatisticalAnalyzerTool with settings"""
79
+ super().__init__(config)
80
+
81
+ # Parse configuration
82
+ self.config = self.Config(**(config or {}))
83
+
84
+ self.logger = logging.getLogger(__name__)
85
+ if not self.logger.handlers:
86
+ handler = logging.StreamHandler()
87
+ handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
88
+ self.logger.addHandler(handler)
89
+ self.logger.setLevel(logging.INFO)
90
+
91
+ self._init_external_tools()
92
+
93
+ def _init_external_tools(self):
94
+ """Initialize external task tools"""
95
+ self.external_tools = {}
96
+
97
+ try:
98
+ from aiecs.tools.task_tools.stats_tool import StatsTool
99
+
100
+ self.external_tools["stats"] = StatsTool()
101
+ self.logger.info("StatsTool initialized successfully")
102
+ except ImportError:
103
+ self.logger.warning("StatsTool not available")
104
+ self.external_tools["stats"] = None
105
+
106
+ # Schema definitions
107
+ class AnalyzeSchema(BaseModel):
108
+ """Schema for analyze operation"""
109
+
110
+ data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data to analyze")
111
+ analysis_type: AnalysisType = Field(description="Type of analysis to perform")
112
+ variables: Dict[str, Any] = Field(description="Variables specification")
113
+ params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters")
114
+
115
+ class TestHypothesisSchema(BaseModel):
116
+ """Schema for test_hypothesis operation"""
117
+
118
+ data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(
119
+ description="Data for hypothesis testing"
120
+ )
121
+ test_type: str = Field(description="Type of test: t_test, anova, chi_square")
122
+ variables: Dict[str, Any] = Field(description="Variables for testing")
123
+
124
+ class PerformRegressionSchema(BaseModel):
125
+ """Schema for perform_regression operation"""
126
+
127
+ data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data for regression")
128
+ dependent_var: str = Field(description="Dependent variable")
129
+ independent_vars: List[str] = Field(description="Independent variables")
130
+ regression_type: str = Field(default="linear", description="Type: linear or logistic")
131
+
132
+ class AnalyzeCorrelationSchema(BaseModel):
133
+ """Schema for analyze_correlation operation"""
134
+
135
+ data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(
136
+ description="Data for correlation analysis"
137
+ )
138
+ variables: Optional[List[str]] = Field(default=None, description="Variables to analyze")
139
+ method: str = Field(default="pearson", description="Correlation method")
140
+
141
+ def analyze(
142
+ self,
143
+ data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
144
+ analysis_type: AnalysisType,
145
+ variables: Dict[str, Any],
146
+ params: Optional[Dict[str, Any]] = None,
147
+ ) -> Dict[str, Any]:
148
+ """
149
+ Perform statistical analysis.
150
+
151
+ Args:
152
+ data: Data to analyze
153
+ analysis_type: Type of analysis
154
+ variables: Variables specification (dependent, independent, etc.)
155
+ params: Additional parameters
156
+
157
+ Returns:
158
+ Dict containing analysis results with statistics, p-values, interpretations
159
+ """
160
+ try:
161
+ df = self._to_dataframe(data)
162
+ params = params or {}
163
+
164
+ if analysis_type == AnalysisType.DESCRIPTIVE:
165
+ result = self._descriptive_analysis(df, variables)
166
+ elif analysis_type == AnalysisType.T_TEST:
167
+ result = self._t_test_analysis(df, variables, params)
168
+ elif analysis_type == AnalysisType.ANOVA:
169
+ result = self._anova_analysis(df, variables, params)
170
+ elif analysis_type == AnalysisType.CHI_SQUARE:
171
+ result = self._chi_square_analysis(df, variables, params)
172
+ elif analysis_type == AnalysisType.LINEAR_REGRESSION:
173
+ result = self._linear_regression_analysis(df, variables, params)
174
+ elif analysis_type == AnalysisType.CORRELATION:
175
+ result = self._correlation_analysis(df, variables, params)
176
+ else:
177
+ raise AnalysisError(f"Unsupported analysis type: {analysis_type}")
178
+
179
+ result["analysis_type"] = analysis_type.value
180
+ return result
181
+
182
+ except Exception as e:
183
+ self.logger.error(f"Error in analysis: {e}")
184
+ raise AnalysisError(f"Analysis failed: {e}")
185
+
186
+ def test_hypothesis(
187
+ self,
188
+ data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
189
+ test_type: str,
190
+ variables: Dict[str, Any],
191
+ ) -> Dict[str, Any]:
192
+ """Perform hypothesis testing"""
193
+ try:
194
+ df = self._to_dataframe(data)
195
+
196
+ if test_type == "t_test":
197
+ return self._t_test_analysis(df, variables, {})
198
+ elif test_type == "anova":
199
+ return self._anova_analysis(df, variables, {})
200
+ elif test_type == "chi_square":
201
+ return self._chi_square_analysis(df, variables, {})
202
+ else:
203
+ raise AnalysisError(f"Unsupported test type: {test_type}")
204
+
205
+ except Exception as e:
206
+ self.logger.error(f"Error in hypothesis testing: {e}")
207
+ raise AnalysisError(f"Hypothesis testing failed: {e}")
208
+
209
+ def perform_regression(
210
+ self,
211
+ data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
212
+ dependent_var: str,
213
+ independent_vars: List[str],
214
+ regression_type: str = "linear",
215
+ ) -> Dict[str, Any]:
216
+ """Perform regression analysis"""
217
+ try:
218
+ df = self._to_dataframe(data)
219
+ variables = {
220
+ "dependent": dependent_var,
221
+ "independent": independent_vars,
222
+ }
223
+
224
+ if regression_type == "linear":
225
+ return self._linear_regression_analysis(df, variables, {})
226
+ else:
227
+ raise AnalysisError(f"Unsupported regression type: {regression_type}")
228
+
229
+ except Exception as e:
230
+ self.logger.error(f"Error in regression: {e}")
231
+ raise AnalysisError(f"Regression failed: {e}")
232
+
233
+ def analyze_correlation(
234
+ self,
235
+ data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
236
+ variables: Optional[List[str]] = None,
237
+ method: str = "pearson",
238
+ ) -> Dict[str, Any]:
239
+ """Perform correlation analysis"""
240
+ try:
241
+ df = self._to_dataframe(data)
242
+ var_dict = {"variables": variables} if variables else {}
243
+ return self._correlation_analysis(df, var_dict, {"method": method})
244
+
245
+ except Exception as e:
246
+ self.logger.error(f"Error in correlation analysis: {e}")
247
+ raise AnalysisError(f"Correlation analysis failed: {e}")
248
+
249
+ # Internal analysis methods
250
+
251
+ def _to_dataframe(self, data: Union[Dict, List, pd.DataFrame]) -> pd.DataFrame:
252
+ """Convert data to DataFrame"""
253
+ if isinstance(data, pd.DataFrame):
254
+ return data
255
+ elif isinstance(data, list):
256
+ return pd.DataFrame(data)
257
+ elif isinstance(data, dict):
258
+ return pd.DataFrame([data])
259
+ else:
260
+ raise AnalysisError(f"Unsupported data type: {type(data)}")
261
+
262
+ def _descriptive_analysis(self, df: pd.DataFrame, variables: Dict[str, Any]) -> Dict[str, Any]:
263
+ """Perform descriptive statistics analysis"""
264
+ cols = variables.get("columns", df.select_dtypes(include=[np.number]).columns.tolist())
265
+
266
+ results = {}
267
+ for col in cols:
268
+ if col in df.columns:
269
+ series = df[col].dropna()
270
+ results[col] = {
271
+ "count": int(len(series)),
272
+ "mean": float(series.mean()),
273
+ "std": float(series.std()),
274
+ "min": float(series.min()),
275
+ "q25": float(series.quantile(0.25)),
276
+ "median": float(series.median()),
277
+ "q75": float(series.quantile(0.75)),
278
+ "max": float(series.max()),
279
+ "skewness": float(series.skew()),
280
+ "kurtosis": float(series.kurt()),
281
+ }
282
+
283
+ return {
284
+ "results": results,
285
+ "interpretation": "Descriptive statistics computed successfully",
286
+ }
287
+
288
+ def _t_test_analysis(
289
+ self,
290
+ df: pd.DataFrame,
291
+ variables: Dict[str, Any],
292
+ params: Dict[str, Any],
293
+ ) -> Dict[str, Any]:
294
+ """Perform t-test"""
295
+ var1_name = variables.get("var1")
296
+ var2_name = variables.get("var2")
297
+
298
+ if not var1_name or not var2_name:
299
+ raise AnalysisError("T-test requires var1 and var2")
300
+
301
+ var1 = df[var1_name].dropna()
302
+ var2 = df[var2_name].dropna()
303
+
304
+ statistic, pvalue = scipy_stats.ttest_ind(var1, var2)
305
+
306
+ return {
307
+ "test_type": "t_test",
308
+ "statistic": float(statistic),
309
+ "p_value": float(pvalue),
310
+ "significant": pvalue < self.config.significance_level,
311
+ "interpretation": f"{'Significant' if pvalue < self.config.significance_level else 'Not significant'} difference at α={self.config.significance_level}",
312
+ "variables": [var1_name, var2_name],
313
+ }
314
+
315
+ def _anova_analysis(
316
+ self,
317
+ df: pd.DataFrame,
318
+ variables: Dict[str, Any],
319
+ params: Dict[str, Any],
320
+ ) -> Dict[str, Any]:
321
+ """Perform ANOVA"""
322
+ groups = variables.get("groups", [])
323
+
324
+ if len(groups) < 2:
325
+ raise AnalysisError("ANOVA requires at least 2 groups")
326
+
327
+ group_data = [df[group].dropna() for group in groups if group in df.columns]
328
+
329
+ if len(group_data) < 2:
330
+ raise AnalysisError("Insufficient valid groups for ANOVA")
331
+
332
+ statistic, pvalue = scipy_stats.f_oneway(*group_data)
333
+
334
+ return {
335
+ "test_type": "anova",
336
+ "statistic": float(statistic),
337
+ "p_value": float(pvalue),
338
+ "significant": pvalue < self.config.significance_level,
339
+ "interpretation": f"{'Significant' if pvalue < self.config.significance_level else 'Not significant'} difference between groups",
340
+ "groups": groups,
341
+ }
342
+
343
+ def _chi_square_analysis(
344
+ self,
345
+ df: pd.DataFrame,
346
+ variables: Dict[str, Any],
347
+ params: Dict[str, Any],
348
+ ) -> Dict[str, Any]:
349
+ """Perform chi-square test"""
350
+ var1_name = variables.get("var1")
351
+ var2_name = variables.get("var2")
352
+
353
+ if not var1_name or not var2_name:
354
+ raise AnalysisError("Chi-square test requires var1 and var2")
355
+
356
+ contingency_table = pd.crosstab(df[var1_name], df[var2_name])
357
+ statistic, pvalue, dof, expected = scipy_stats.chi2_contingency(contingency_table)
358
+
359
+ return {
360
+ "test_type": "chi_square",
361
+ "statistic": float(statistic),
362
+ "p_value": float(pvalue),
363
+ "degrees_of_freedom": int(dof),
364
+ "significant": pvalue < self.config.significance_level,
365
+ "interpretation": f"{'Significant' if pvalue < self.config.significance_level else 'Not significant'} association",
366
+ "variables": [var1_name, var2_name],
367
+ }
368
+
369
+ def _linear_regression_analysis(
370
+ self,
371
+ df: pd.DataFrame,
372
+ variables: Dict[str, Any],
373
+ params: Dict[str, Any],
374
+ ) -> Dict[str, Any]:
375
+ """Perform linear regression"""
376
+ from sklearn.linear_model import LinearRegression
377
+ from sklearn.metrics import r2_score, mean_squared_error
378
+
379
+ dependent = variables.get("dependent")
380
+ independent = variables.get("independent", [])
381
+
382
+ if not dependent or not independent:
383
+ raise AnalysisError("Regression requires dependent and independent variables")
384
+
385
+ X = df[independent].dropna()
386
+ y = df[dependent].loc[X.index]
387
+
388
+ model = LinearRegression()
389
+ model.fit(X, y)
390
+
391
+ y_pred = model.predict(X)
392
+ r2 = r2_score(y, y_pred)
393
+ mse = mean_squared_error(y, y_pred)
394
+
395
+ coefficients = {var: float(coef) for var, coef in zip(independent, model.coef_)}
396
+
397
+ return {
398
+ "model_type": "linear_regression",
399
+ "intercept": float(model.intercept_),
400
+ "coefficients": coefficients,
401
+ "r_squared": float(r2),
402
+ "mse": float(mse),
403
+ "rmse": float(np.sqrt(mse)),
404
+ "interpretation": f"Model explains {r2*100:.2f}% of variance",
405
+ "dependent_variable": dependent,
406
+ "independent_variables": independent,
407
+ }
408
+
409
+ def _correlation_analysis(
410
+ self,
411
+ df: pd.DataFrame,
412
+ variables: Dict[str, Any],
413
+ params: Dict[str, Any],
414
+ ) -> Dict[str, Any]:
415
+ """Perform correlation analysis"""
416
+ method = params.get("method", "pearson")
417
+ cols = variables.get("variables")
418
+
419
+ if cols:
420
+ numeric_df = df[cols].select_dtypes(include=[np.number])
421
+ else:
422
+ numeric_df = df.select_dtypes(include=[np.number])
423
+
424
+ if numeric_df.shape[1] < 2:
425
+ raise AnalysisError("Correlation requires at least 2 numeric variables")
426
+
427
+ corr_matrix = numeric_df.corr(method=method)
428
+
429
+ # Find significant correlations
430
+ significant_pairs = []
431
+ for i in range(len(corr_matrix.columns)):
432
+ for j in range(i + 1, len(corr_matrix.columns)):
433
+ corr_value = corr_matrix.iloc[i, j]
434
+ if abs(corr_value) > 0.3: # Threshold for noteworthy correlation
435
+ significant_pairs.append(
436
+ {
437
+ "var1": corr_matrix.columns[i],
438
+ "var2": corr_matrix.columns[j],
439
+ "correlation": float(corr_value),
440
+ "strength": self._interpret_correlation(corr_value),
441
+ }
442
+ )
443
+
444
+ return {
445
+ "method": method,
446
+ "correlation_matrix": corr_matrix.to_dict(),
447
+ "significant_correlations": significant_pairs,
448
+ "interpretation": f"Found {len(significant_pairs)} significant correlations",
449
+ }
450
+
451
+ def _interpret_correlation(self, corr: float) -> str:
452
+ """Interpret correlation strength"""
453
+ abs_corr = abs(corr)
454
+ if abs_corr < 0.3:
455
+ return "weak"
456
+ elif abs_corr < 0.7:
457
+ return "moderate"
458
+ else:
459
+ return "strong"
@@ -0,0 +1,86 @@
1
+ # python-middleware/app/tools/task_tools/__init__.py
2
+
3
+ """
4
+ Task Tools Module
5
+
6
+ This module contains specialized tools for various task-oriented operations:
7
+ - chart_tool: Chart and visualization operations
8
+ - classfire_tool: Classification and categorization operations
9
+ - image_tool: Image processing and manipulation operations
10
+ - office_tool: Office document processing operations
11
+ - pandas_tool: Data analysis and manipulation operations
12
+ - report_tool: Report generation and formatting operations
13
+ - research_tool: Research and information gathering operations
14
+ - scraper_tool: Web scraping and data extraction operations
15
+ - stats_tool: Statistical analysis and computation operations
16
+
17
+ Note:
18
+ - apisource_tool is now a standalone package at aiecs.tools.apisource
19
+ - search_tool is now a standalone package at aiecs.tools.search_tool
20
+ """
21
+
22
+ # Lazy import all task tools to avoid heavy dependencies at import time
23
+ import os
24
+
25
+ # Define available tools for lazy loading
26
+ _AVAILABLE_TOOLS = [
27
+ "chart_tool",
28
+ "classfire_tool",
29
+ "image_tool",
30
+ "pandas_tool",
31
+ "report_tool",
32
+ "research_tool",
33
+ "scraper_tool",
34
+ "stats_tool",
35
+ ]
36
+
37
+ # Add office_tool conditionally
38
+ if not os.getenv("SKIP_OFFICE_TOOL", "").lower() in ("true", "1", "yes"):
39
+ _AVAILABLE_TOOLS.append("office_tool")
40
+
41
+ # Track which tools have been loaded
42
+ _LOADED_TOOLS = set()
43
+
44
+
45
+ def _lazy_load_tool(tool_name: str):
46
+ """Lazy load a specific tool module"""
47
+ if tool_name in _LOADED_TOOLS:
48
+ return
49
+
50
+ try:
51
+ if tool_name == "chart_tool":
52
+ pass
53
+ elif tool_name == "classfire_tool":
54
+ pass
55
+ elif tool_name == "image_tool":
56
+ pass
57
+ elif tool_name == "office_tool":
58
+ pass
59
+ elif tool_name == "pandas_tool":
60
+ pass
61
+ elif tool_name == "report_tool":
62
+ pass
63
+ elif tool_name == "research_tool":
64
+ pass
65
+ elif tool_name == "scraper_tool":
66
+ pass
67
+ elif tool_name == "stats_tool":
68
+ pass
69
+
70
+ _LOADED_TOOLS.add(tool_name)
71
+
72
+ except Exception as e:
73
+ import logging
74
+
75
+ logger = logging.getLogger(__name__)
76
+ logger.warning(f"Failed to load tool {tool_name}: {e}")
77
+
78
+
79
+ def load_all_tools():
80
+ """Load all available tools (for backward compatibility)"""
81
+ for tool_name in _AVAILABLE_TOOLS:
82
+ _lazy_load_tool(tool_name)
83
+
84
+
85
+ # Export the tool modules for external access
86
+ __all__ = _AVAILABLE_TOOLS + ["load_all_tools", "_lazy_load_tool"]