PyPI - aiecs - Versions diffs - 1.0.1__py3-none-any.whl → 1.7.17__py3-none-any.whl - Mend

aiecs 1.0.1py3-none-any.whl → 1.7.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show

aiecs/__init__.py +13 -16
aiecs/__main__.py +7 -7
aiecs/aiecs_client.py +269 -75
aiecs/application/executors/operation_executor.py +79 -54
aiecs/application/knowledge_graph/__init__.py +7 -0
aiecs/application/knowledge_graph/builder/__init__.py +37 -0
aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
aiecs/application/knowledge_graph/extractors/base.py +98 -0
aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
aiecs/application/knowledge_graph/search/__init__.py +59 -0
aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
aiecs/application/knowledge_graph/search/reranker.py +293 -0
aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
aiecs/application/knowledge_graph/validators/__init__.py +13 -0
aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
aiecs/common/__init__.py +9 -0
aiecs/common/knowledge_graph/__init__.py +17 -0
aiecs/common/knowledge_graph/runnable.py +471 -0
aiecs/config/__init__.py +20 -5
aiecs/config/config.py +762 -31
aiecs/config/graph_config.py +131 -0
aiecs/config/tool_config.py +435 -0
aiecs/core/__init__.py +29 -13
aiecs/core/interface/__init__.py +2 -2
aiecs/core/interface/execution_interface.py +22 -22
aiecs/core/interface/storage_interface.py +37 -88
aiecs/core/registry/__init__.py +31 -0
aiecs/core/registry/service_registry.py +92 -0
aiecs/domain/__init__.py +270 -1
aiecs/domain/agent/__init__.py +191 -0
aiecs/domain/agent/base_agent.py +3949 -0
aiecs/domain/agent/exceptions.py +99 -0
aiecs/domain/agent/graph_aware_mixin.py +569 -0
aiecs/domain/agent/hybrid_agent.py +1731 -0
aiecs/domain/agent/integration/__init__.py +29 -0
aiecs/domain/agent/integration/context_compressor.py +216 -0
aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
aiecs/domain/agent/integration/protocols.py +281 -0
aiecs/domain/agent/integration/retry_policy.py +218 -0
aiecs/domain/agent/integration/role_config.py +213 -0
aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
aiecs/domain/agent/lifecycle.py +291 -0
aiecs/domain/agent/llm_agent.py +692 -0
aiecs/domain/agent/memory/__init__.py +12 -0
aiecs/domain/agent/memory/conversation.py +1124 -0
aiecs/domain/agent/migration/__init__.py +14 -0
aiecs/domain/agent/migration/conversion.py +163 -0
aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
aiecs/domain/agent/models.py +894 -0
aiecs/domain/agent/observability.py +479 -0
aiecs/domain/agent/persistence.py +449 -0
aiecs/domain/agent/prompts/__init__.py +29 -0
aiecs/domain/agent/prompts/builder.py +159 -0
aiecs/domain/agent/prompts/formatters.py +187 -0
aiecs/domain/agent/prompts/template.py +255 -0
aiecs/domain/agent/registry.py +253 -0
aiecs/domain/agent/tool_agent.py +444 -0
aiecs/domain/agent/tools/__init__.py +15 -0
aiecs/domain/agent/tools/schema_generator.py +377 -0
aiecs/domain/community/__init__.py +155 -0
aiecs/domain/community/agent_adapter.py +469 -0
aiecs/domain/community/analytics.py +432 -0
aiecs/domain/community/collaborative_workflow.py +648 -0
aiecs/domain/community/communication_hub.py +634 -0
aiecs/domain/community/community_builder.py +320 -0
aiecs/domain/community/community_integration.py +796 -0
aiecs/domain/community/community_manager.py +803 -0
aiecs/domain/community/decision_engine.py +849 -0
aiecs/domain/community/exceptions.py +231 -0
aiecs/domain/community/models/__init__.py +33 -0
aiecs/domain/community/models/community_models.py +234 -0
aiecs/domain/community/resource_manager.py +461 -0
aiecs/domain/community/shared_context_manager.py +589 -0
aiecs/domain/context/__init__.py +40 -10
aiecs/domain/context/context_engine.py +1910 -0
aiecs/domain/context/conversation_models.py +87 -53
aiecs/domain/context/graph_memory.py +582 -0
aiecs/domain/execution/model.py +12 -4
aiecs/domain/knowledge_graph/__init__.py +19 -0
aiecs/domain/knowledge_graph/models/__init__.py +52 -0
aiecs/domain/knowledge_graph/models/entity.py +148 -0
aiecs/domain/knowledge_graph/models/evidence.py +178 -0
aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
aiecs/domain/knowledge_graph/models/path.py +171 -0
aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
aiecs/domain/knowledge_graph/models/query.py +261 -0
aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
aiecs/domain/knowledge_graph/models/relation.py +202 -0
aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
aiecs/domain/task/dsl_processor.py +172 -56
aiecs/domain/task/model.py +20 -8
aiecs/domain/task/task_context.py +27 -24
aiecs/infrastructure/__init__.py +0 -2
aiecs/infrastructure/graph_storage/__init__.py +11 -0
aiecs/infrastructure/graph_storage/base.py +837 -0
aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
aiecs/infrastructure/graph_storage/cache.py +424 -0
aiecs/infrastructure/graph_storage/distributed.py +223 -0
aiecs/infrastructure/graph_storage/error_handling.py +380 -0
aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
aiecs/infrastructure/graph_storage/health_checks.py +378 -0
aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
aiecs/infrastructure/graph_storage/metrics.py +344 -0
aiecs/infrastructure/graph_storage/migration.py +400 -0
aiecs/infrastructure/graph_storage/pagination.py +483 -0
aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
aiecs/infrastructure/graph_storage/postgres.py +1563 -0
aiecs/infrastructure/graph_storage/property_storage.py +353 -0
aiecs/infrastructure/graph_storage/protocols.py +76 -0
aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
aiecs/infrastructure/graph_storage/streaming.py +487 -0
aiecs/infrastructure/graph_storage/tenant.py +412 -0
aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
aiecs/infrastructure/messaging/websocket_manager.py +51 -35
aiecs/infrastructure/monitoring/__init__.py +22 -0
aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
aiecs/infrastructure/monitoring/structured_logger.py +3 -7
aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
aiecs/infrastructure/persistence/__init__.py +14 -1
aiecs/infrastructure/persistence/context_engine_client.py +184 -0
aiecs/infrastructure/persistence/database_manager.py +67 -43
aiecs/infrastructure/persistence/file_storage.py +180 -103
aiecs/infrastructure/persistence/redis_client.py +74 -21
aiecs/llm/__init__.py +73 -25
aiecs/llm/callbacks/__init__.py +11 -0
aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
aiecs/llm/client_factory.py +230 -37
aiecs/llm/client_resolver.py +155 -0
aiecs/llm/clients/__init__.py +38 -0
aiecs/llm/clients/base_client.py +328 -0
aiecs/llm/clients/google_function_calling_mixin.py +415 -0
aiecs/llm/clients/googleai_client.py +314 -0
aiecs/llm/clients/openai_client.py +158 -0
aiecs/llm/clients/openai_compatible_mixin.py +367 -0
aiecs/llm/clients/vertex_client.py +1186 -0
aiecs/llm/clients/xai_client.py +201 -0
aiecs/llm/config/__init__.py +51 -0
aiecs/llm/config/config_loader.py +272 -0
aiecs/llm/config/config_validator.py +206 -0
aiecs/llm/config/model_config.py +143 -0
aiecs/llm/protocols.py +149 -0
aiecs/llm/utils/__init__.py +10 -0
aiecs/llm/utils/validate_config.py +89 -0
aiecs/main.py +140 -121
aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
aiecs/scripts/aid/__init__.py +19 -0
aiecs/scripts/aid/module_checker.py +499 -0
aiecs/scripts/aid/version_manager.py +235 -0
aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
aiecs/scripts/dependance_check/__init__.py +15 -0
aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
aiecs/scripts/dependance_patch/__init__.py +7 -0
aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
aiecs/scripts/knowledge_graph/__init__.py +3 -0
aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
aiecs/scripts/tools_develop/README.md +671 -0
aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
aiecs/scripts/tools_develop/__init__.py +21 -0
aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
aiecs/scripts/tools_develop/schema_coverage.py +511 -0
aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
aiecs/scripts/tools_develop/verify_tools.py +352 -0
aiecs/tasks/__init__.py +0 -1
aiecs/tasks/worker.py +115 -47
aiecs/tools/__init__.py +194 -72
aiecs/tools/apisource/__init__.py +99 -0
aiecs/tools/apisource/intelligence/__init__.py +19 -0
aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
aiecs/tools/apisource/monitoring/__init__.py +9 -0
aiecs/tools/apisource/monitoring/metrics.py +330 -0
aiecs/tools/apisource/providers/__init__.py +112 -0
aiecs/tools/apisource/providers/base.py +671 -0
aiecs/tools/apisource/providers/census.py +397 -0
aiecs/tools/apisource/providers/fred.py +535 -0
aiecs/tools/apisource/providers/newsapi.py +409 -0
aiecs/tools/apisource/providers/worldbank.py +352 -0
aiecs/tools/apisource/reliability/__init__.py +12 -0
aiecs/tools/apisource/reliability/error_handler.py +363 -0
aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
aiecs/tools/apisource/tool.py +832 -0
aiecs/tools/apisource/utils/__init__.py +9 -0
aiecs/tools/apisource/utils/validators.py +334 -0
aiecs/tools/base_tool.py +415 -21
aiecs/tools/docs/__init__.py +121 -0
aiecs/tools/docs/ai_document_orchestrator.py +607 -0
aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
aiecs/tools/docs/content_insertion_tool.py +1320 -0
aiecs/tools/docs/document_creator_tool.py +1464 -0
aiecs/tools/docs/document_layout_tool.py +1160 -0
aiecs/tools/docs/document_parser_tool.py +1016 -0
aiecs/tools/docs/document_writer_tool.py +2008 -0
aiecs/tools/knowledge_graph/__init__.py +17 -0
aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
aiecs/tools/langchain_adapter.py +300 -138
aiecs/tools/schema_generator.py +455 -0
aiecs/tools/search_tool/__init__.py +100 -0
aiecs/tools/search_tool/analyzers.py +581 -0
aiecs/tools/search_tool/cache.py +264 -0
aiecs/tools/search_tool/constants.py +128 -0
aiecs/tools/search_tool/context.py +224 -0
aiecs/tools/search_tool/core.py +778 -0
aiecs/tools/search_tool/deduplicator.py +119 -0
aiecs/tools/search_tool/error_handler.py +242 -0
aiecs/tools/search_tool/metrics.py +343 -0
aiecs/tools/search_tool/rate_limiter.py +172 -0
aiecs/tools/search_tool/schemas.py +275 -0
aiecs/tools/statistics/__init__.py +80 -0
aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
aiecs/tools/statistics/data_loader_tool.py +555 -0
aiecs/tools/statistics/data_profiler_tool.py +638 -0
aiecs/tools/statistics/data_transformer_tool.py +580 -0
aiecs/tools/statistics/data_visualizer_tool.py +498 -0
aiecs/tools/statistics/model_trainer_tool.py +507 -0
aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
aiecs/tools/task_tools/__init__.py +49 -36
aiecs/tools/task_tools/chart_tool.py +200 -184
aiecs/tools/task_tools/classfire_tool.py +268 -267
aiecs/tools/task_tools/image_tool.py +220 -141
aiecs/tools/task_tools/office_tool.py +226 -146
aiecs/tools/task_tools/pandas_tool.py +477 -121
aiecs/tools/task_tools/report_tool.py +390 -142
aiecs/tools/task_tools/research_tool.py +149 -79
aiecs/tools/task_tools/scraper_tool.py +339 -145
aiecs/tools/task_tools/stats_tool.py +448 -209
aiecs/tools/temp_file_manager.py +26 -24
aiecs/tools/tool_executor/__init__.py +18 -16
aiecs/tools/tool_executor/tool_executor.py +364 -52
aiecs/utils/LLM_output_structor.py +74 -48
aiecs/utils/__init__.py +14 -3
aiecs/utils/base_callback.py +0 -3
aiecs/utils/cache_provider.py +696 -0
aiecs/utils/execution_utils.py +50 -31
aiecs/utils/prompt_loader.py +1 -0
aiecs/utils/token_usage_repository.py +37 -11
aiecs/ws/socket_server.py +14 -4
{aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/METADATA +52 -15
aiecs-1.7.17.dist-info/RECORD +337 -0
aiecs-1.7.17.dist-info/entry_points.txt +13 -0
aiecs/config/registry.py +0 -19
aiecs/domain/context/content_engine.py +0 -982
aiecs/llm/base_client.py +0 -99
aiecs/llm/openai_client.py +0 -125
aiecs/llm/vertex_client.py +0 -186
aiecs/llm/xai_client.py +0 -184
aiecs/scripts/dependency_checker.py +0 -857
aiecs/scripts/quick_dependency_check.py +0 -269
aiecs/tools/task_tools/search_api.py +0 -7
aiecs-1.0.1.dist-info/RECORD +0 -90
aiecs-1.0.1.dist-info/entry_points.txt +0 -7
/aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
/aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
/aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
/aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
{aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/WHEEL +0 -0
{aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/licenses/LICENSE +0 -0
{aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/top_level.txt +0 -0

aiecs/tools/task_tools/stats_tool.py CHANGED Viewed

@@ -5,38 +5,44 @@ from typing import Dict, Any, List, Optional, Union, Tuple
 from enum import Enum
 from dataclasses import dataclass
-import pandas as pd
+import pandas as pd  # type: ignore[import-untyped]
 import numpy as np
-from pydantic_settings import BaseSettings
-from pydantic import ValidationError, ConfigDict
+from pydantic import Field, BaseModel
+from pydantic_settings import BaseSettings, SettingsConfigDict
 from aiecs.tools.base_tool import BaseTool
 from aiecs.tools import register_tool
 # Enums for configuration options
 class ScalerType(str, Enum):
     STANDARD = "standard"
     MINMAX = "minmax"
     ROBUST = "robust"
     NONE = "none"
-class StatsSettings(BaseSettings):
-    """Configuration for StatsTool."""
-    max_file_size_mb: int = 200
-    allowed_extensions: List[str] = ['.sav', '.sas7bdat', '.por', '.csv', '.xlsx', '.xls', '.json', '.parquet', '.feather']
-    env_prefix: str = 'STATS_TOOL_'
-    model_config = ConfigDict(env_prefix='STATS_TOOL_')
 # Exceptions
-class StatsToolError(Exception): pass
-class FileOperationError(StatsToolError): pass
-class AnalysisError(StatsToolError): pass
+class StatsToolError(Exception):
+    pass
+class FileOperationError(StatsToolError):
+    pass
+class AnalysisError(StatsToolError):
+    pass
 # Utility Dataclass for Statistical Results
 @dataclass
 class StatsResult:
     """Structured statistical result."""
     test_type: str
     statistic: float
     pvalue: float
@@ -45,53 +51,186 @@ class StatsResult:
     def to_dict(self) -> Dict[str, Any]:
         return {
-            'test_type': self.test_type,
-            'statistic': self.statistic,
-            'pvalue': self.pvalue,
-            'significant': self.significant,
-            **self.additional_metrics
+            "test_type": self.test_type,
+            "statistic": self.statistic,
+            "pvalue": self.pvalue,
+            "significant": self.significant,
+            **self.additional_metrics,
         }
-@register_tool('stats')
+@register_tool("stats")
 class StatsTool(BaseTool):
     """Enhanced statistical analysis tool for various data formats and operations."""
-    def __init__(self, config: Dict[str, Any] = None):
-        super().__init__(config)
-        self.settings = StatsSettings()
-        if config:
-            try:
-                self.settings = self.settings.model_validate({**self.settings.model_dump(), **config})
-            except ValidationError as e:
-                raise ValueError(f"Invalid settings: {e}")
+    # Configuration schema
+    class Config(BaseSettings):
+        """Configuration for the stats tool
+        Automatically reads from environment variables with STATS_TOOL_ prefix.
+        Example: STATS_TOOL_MAX_FILE_SIZE_MB -> max_file_size_mb
+        """
+        model_config = SettingsConfigDict(env_prefix="STATS_TOOL_")
+        max_file_size_mb: int = Field(default=200, description="Maximum file size in megabytes")
+        allowed_extensions: List[str] = Field(
+            default=[
+                ".sav",
+                ".sas7bdat",
+                ".por",
+                ".csv",
+                ".xlsx",
+                ".xls",
+                ".json",
+                ".parquet",
+                ".feather",
+            ],
+            description="Allowed file extensions",
+        )
+    # Schema definitions
+    class Read_dataSchema(BaseModel):
+        """Schema for read_data operation"""
+        file_path: str = Field(description="Path to the data file to read")
+        nrows: Optional[int] = Field(default=None, description="Optional number of rows to read from the file. If None, reads all rows")
+        sheet_name: Optional[Union[str, int]] = Field(default=0, description="Sheet name or index for Excel files. Can be a string name or integer index (0-based)")
+    class DescribeSchema(BaseModel):
+        """Schema for describe operation"""
+        file_path: str = Field(description="Path to the data file")
+        variables: Optional[List[str]] = Field(default=None, description="Optional list of variable names to describe. If None, describes all variables")
+        include_percentiles: bool = Field(default=False, description="Whether to include custom percentiles in the descriptive statistics")
+        percentiles: Optional[List[float]] = Field(default=None, description="Optional list of percentile values (0.0 to 1.0) to include. Only used if include_percentiles is True")
+    class TtestSchema(BaseModel):
+        """Schema for ttest operation"""
+        file_path: str = Field(description="Path to the data file")
+        var1: str = Field(description="Name of the first variable for the t-test")
+        var2: str = Field(description="Name of the second variable for the t-test")
+        equal_var: bool = Field(default=True, description="Whether to assume equal variances. If True, uses standard t-test; if False, uses Welch's t-test")
+        paired: bool = Field(default=False, description="Whether to perform a paired t-test. If True, performs paired t-test; if False, performs independent t-test")
+    class CorrelationSchema(BaseModel):
+        """Schema for correlation operation"""
+        file_path: str = Field(description="Path to the data file")
+        variables: Optional[List[str]] = Field(default=None, description="Optional list of variable names for correlation matrix. If provided, computes correlation matrix for all pairs")
+        var1: Optional[str] = Field(default=None, description="First variable name for pairwise correlation. Must be used together with var2")
+        var2: Optional[str] = Field(default=None, description="Second variable name for pairwise correlation. Must be used together with var1")
+        method: str = Field(default="pearson", description="Correlation method: 'pearson' (linear), 'spearman' (rank-based), or 'kendall' (tau)")
+    class AnovaSchema(BaseModel):
+        """Schema for anova operation"""
+        file_path: str = Field(description="Path to the data file")
+        dependent: str = Field(description="Name of the dependent variable (continuous)")
+        factor: str = Field(description="Name of the factor/grouping variable (categorical)")
+        post_hoc: bool = Field(default=False, description="Whether to perform post-hoc tests (Tukey HSD) to identify which groups differ significantly")
+    class Chi_squareSchema(BaseModel):
+        """Schema for chi_square operation"""
+        file_path: str = Field(description="Path to the data file")
+        var1: str = Field(description="Name of the first categorical variable")
+        var2: str = Field(description="Name of the second categorical variable")
+        correction: bool = Field(default=True, description="Whether to apply Yates' correction for continuity. Recommended for 2x2 contingency tables")
+    class Non_parametricSchema(BaseModel):
+        """Schema for non_parametric operation"""
+        file_path: str = Field(description="Path to the data file")
+        test_type: str = Field(description="Type of non-parametric test: 'mann_whitney' (2 groups), 'wilcoxon' (paired), 'kruskal' (multiple groups), or 'friedman' (repeated measures)")
+        variables: List[str] = Field(description="List of variable names to test. Number of variables depends on test_type")
+        grouping: Optional[str] = Field(default=None, description="Optional grouping variable name. Required for 'kruskal' test, not used for other tests")
+    class RegressionSchema(BaseModel):
+        """Schema for regression operation"""
+        file_path: str = Field(description="Path to the data file")
+        formula: str = Field(description="Regression formula string (e.g., 'y ~ x1 + x2'). Uses R-style formula syntax")
+        regression_type: str = Field(default="ols", description="Type of regression model: 'ols' (ordinary least squares), 'logit' (logistic), 'probit', or 'poisson'")
+        robust: bool = Field(default=False, description="Whether to use robust standard errors (HC3 heteroscedasticity-consistent)")
+        structured_output: bool = Field(default=True, description="Whether to return structured output with coefficients, p-values, and confidence intervals. If False, returns summary text only")
+    class Time_seriesSchema(BaseModel):
+        """Schema for time_series operation"""
+        file_path: str = Field(description="Path to the data file")
+        variable: str = Field(description="Name of the time series variable to analyze")
+        date_variable: Optional[str] = Field(default=None, description="Optional name of the date/time variable. If provided, uses it as the time index")
+        model_type: str = Field(default="arima", description="Type of time series model: 'arima' or 'sarima' (seasonal ARIMA)")
+        order: Optional[Tuple[int, int, int]] = Field(default=(1, 1, 1), description="ARIMA order tuple (p, d, q) where p=autoregressive, d=differencing, q=moving average")
+        seasonal_order: Optional[Tuple[int, int, int, int]] = Field(default=None, description="Optional SARIMA seasonal order tuple (P, D, Q, s). Required for 'sarima' model type")
+        forecast_periods: int = Field(default=10, description="Number of periods to forecast into the future")
+    class PreprocessSchema(BaseModel):
+        """Schema for preprocess operation"""
+        file_path: str = Field(description="Path to the data file")
+        variables: List[str] = Field(description="List of variable names to preprocess")
+        operation: str = Field(description="Preprocessing operation: 'scale' (normalize) or 'impute' (fill missing values)")
+        scaler_type: ScalerType = Field(default=ScalerType.STANDARD, description="Type of scaler to use for scaling operation: 'standard' (z-score), 'minmax' (0-1), 'robust' (median/IQR), or 'none'")
+        output_path: Optional[str] = Field(default=None, description="Optional path to save the preprocessed data. If None, data is not saved to file")
+    def __init__(self, config: Optional[Dict[str, Any]] = None, **kwargs):
+        """
+        Initialize StatsTool with settings and resources.
+        Args:
+            config (Dict, optional): Configuration overrides for StatsTool.
+            **kwargs: Additional arguments passed to BaseTool (e.g., tool_name)
+        Configuration is automatically loaded by BaseTool from:
+        1. Explicit config dict (highest priority)
+        2. YAML config files (config/tools/stats.yaml)
+        3. Environment variables (via dotenv from .env files)
+        4. Tool defaults (lowest priority)
+        """
+        super().__init__(config, **kwargs)
+        # Configuration is automatically loaded by BaseTool into self._config_obj
+        # Access config via self._config_obj (BaseSettings instance)
+        self.config = self._config_obj if self._config_obj else self.Config()
         self.logger = logging.getLogger(__name__)
         if not self.logger.handlers:
             h = logging.StreamHandler()
-            h.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
+            h.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
             self.logger.addHandler(h)
         self.logger.setLevel(logging.INFO)
-    def _load_data(self, file_path: str, nrows: Optional[int] = None, sheet_name: Optional[Union[str, int]] = 0) -> pd.DataFrame:
+    def _load_data(
+        self,
+        file_path: str,
+        nrows: Optional[int] = None,
+        sheet_name: Optional[Union[str, int]] = 0,
+    ) -> pd.DataFrame:
         """Load data from various file formats into a pandas DataFrame."""
         try:
             ext = os.path.splitext(file_path)[1].lower()
-            if ext in ['.sav', '.sas7bdat', '.por']:
-                import pyreadstat
-                if ext == '.sav':
+            if ext in [".sav", ".sas7bdat", ".por"]:
+                import pyreadstat  # type: ignore[import-untyped]
+                if ext == ".sav":
                     df, meta = pyreadstat.read_sav(file_path)
-                elif ext == '.sas7bdat':
+                elif ext == ".sas7bdat":
                     df, meta = pyreadstat.read_sas7bdat(file_path)
                 else:
                     df, meta = pyreadstat.read_por(file_path)
                 return df
-            elif ext == '.csv':
+            elif ext == ".csv":
                 return pd.read_csv(file_path, nrows=nrows)
-            elif ext in ['.xlsx', '.xls']:
+            elif ext in [".xlsx", ".xls"]:
                 return pd.read_excel(file_path, sheet_name=sheet_name, nrows=nrows)
-            elif ext == '.json':
+            elif ext == ".json":
                 return pd.read_json(file_path)
-            elif ext == '.parquet':
+            elif ext == ".parquet":
                 return pd.read_parquet(file_path)
-            elif ext == '.feather':
+            elif ext == ".feather":
                 return pd.read_feather(file_path)
             else:
                 raise FileOperationError(f"Unsupported file format: {ext}")
@@ -115,18 +254,29 @@ class StatsTool(BaseTool):
                 return label
         return "large"
-    def read_data(self, file_path: str, nrows: Optional[int] = None, sheet_name: Optional[Union[str, int]] = 0) -> Dict[str, Any]:
+    def read_data(
+        self,
+        file_path: str,
+        nrows: Optional[int] = None,
+        sheet_name: Optional[Union[str, int]] = 0,
+    ) -> Dict[str, Any]:
         """Read data from various file formats."""
         df = self._load_data(file_path, nrows, sheet_name)
         return {
-            'variables': df.columns.tolist(),
-            'observations': len(df),
-            'dtypes': {col: str(dtype) for col, dtype in df.dtypes.items()},
-            'memory_usage': df.memory_usage(deep=True).sum() / (1024 * 1024),
-            'preview': df.head(5).to_dict(orient='records')
+            "variables": df.columns.tolist(),
+            "observations": len(df),
+            "dtypes": {col: str(dtype) for col, dtype in df.dtypes.items()},
+            "memory_usage": df.memory_usage(deep=True).sum() / (1024 * 1024),
+            "preview": df.head(5).to_dict(orient="records"),
         }
-    def describe(self, file_path: str, variables: Optional[List[str]] = None, include_percentiles: bool = False, percentiles: Optional[List[float]] = None) -> Dict[str, Any]:
+    def describe(
+        self,
+        file_path: str,
+        variables: Optional[List[str]] = None,
+        include_percentiles: bool = False,
+        percentiles: Optional[List[float]] = None,
+    ) -> Dict[str, Any]:
         """Generate descriptive statistics for variables."""
         df = self._load_data(file_path)
         if variables:
@@ -137,21 +287,31 @@ class StatsTool(BaseTool):
             additional_percentiles = [p for p in percentiles if p not in [0.25, 0.5, 0.75]]
             if additional_percentiles:
                 additional_desc = df.describe(percentiles=percentiles)
-                desc = pd.concat([desc, additional_desc.loc[[f"{int(p*100)}%" for p in additional_percentiles]]])
+                desc = pd.concat(
+                    [
+                        desc,
+                        additional_desc.loc[[f"{int(p*100)}%" for p in additional_percentiles]],
+                    ]
+                )
         numeric_cols = df.select_dtypes(include=[np.number]).columns
         if numeric_cols.any():
-            desc.loc['skew'] = df[numeric_cols].skew()
-            desc.loc['kurtosis'] = df[numeric_cols].kurt()
-        return {
-            'statistics': desc.to_dict(),
-            'summary': desc.to_string()
-        }
+            desc.loc["skew"] = df[numeric_cols].skew()
+            desc.loc["kurtosis"] = df[numeric_cols].kurt()
+        return {"statistics": desc.to_dict(), "summary": desc.to_string()}
-    def ttest(self, file_path: str, var1: str, var2: str, equal_var: bool = True, paired: bool = False) -> Dict[str, Any]:
+    def ttest(
+        self,
+        file_path: str,
+        var1: str,
+        var2: str,
+        equal_var: bool = True,
+        paired: bool = False,
+    ) -> Dict[str, Any]:
         """Perform t-tests (independent or paired). Also handles legacy ttest_ind."""
         df = self._load_data(file_path)
         self._validate_variables(df, [var1, var2])
-        import scipy.stats as stats
+        import scipy.stats as stats  # type: ignore[import-untyped]
         a = df[var1].dropna().values
         b = df[var2].dropna().values
         if paired:
@@ -176,64 +336,85 @@ class StatsTool(BaseTool):
             pvalue=float(p),
             significant=p < 0.05,
             additional_metrics={
-                'cohens_d': float(cohens_d),
-                'effect_size_interpretation': self._interpret_effect_size(cohens_d),
-                'group1_mean': float(mean_a),
-                'group2_mean': float(mean_b),
-                'group1_std': float(std_a),
-                'group2_std': float(std_b),
-                'group1_n': int(len(a)),
-                'group2_n': int(len(b))
-            }
+                "cohens_d": float(cohens_d),
+                "effect_size_interpretation": self._interpret_effect_size(cohens_d),
+                "group1_mean": float(mean_a),
+                "group2_mean": float(mean_b),
+                "group1_std": float(std_a),
+                "group2_std": float(std_b),
+                "group1_n": int(len(a)),
+                "group2_n": int(len(b)),
+            },
         ).to_dict()
     # Legacy method (now an alias)
     ttest_ind = ttest
-    def correlation(self, file_path: str, variables: Optional[List[str]] = None, var1: Optional[str] = None, var2: Optional[str] = None, method: str = "pearson") -> Dict[str, Any]:
+    def correlation(
+        self,
+        file_path: str,
+        variables: Optional[List[str]] = None,
+        var1: Optional[str] = None,
+        var2: Optional[str] = None,
+        method: str = "pearson",
+    ) -> Dict[str, Any]:
         """Perform correlation analysis."""
         df = self._load_data(file_path)
         if variables:
             self._validate_variables(df, variables)
         if var1 and var2:
             self._validate_variables(df, [var1, var2])
-        import scipy.stats as stats
+        import scipy.stats as stats  # type: ignore[import-untyped]
         result = {}
         if variables:
             corr_matrix = df[variables].corr(method=method)
-            result['correlation_matrix'] = corr_matrix.to_dict()
+            result["correlation_matrix"] = corr_matrix.to_dict()
             flat_corrs = [
-                {'var1': v1, 'var2': v2, 'correlation': corr_matrix.loc[v1, v2], 'abs_correlation': abs(corr_matrix.loc[v1, v2])}
+                {
+                    "var1": v1,
+                    "var2": v2,
+                    "correlation": corr_matrix.loc[v1, v2],
+                    "abs_correlation": abs(corr_matrix.loc[v1, v2]),
+                }
                 for i, v1 in enumerate(variables)
-                for j, v2 in enumerate(variables) if i < j
+                for j, v2 in enumerate(variables)
+                if i < j
             ]
-            flat_corrs.sort(key=lambda x: x['abs_correlation'], reverse=True)
-            result['pairs'] = flat_corrs
+            flat_corrs.sort(key=lambda x: x["abs_correlation"], reverse=True)
+            result["pairs"] = flat_corrs
         elif var1 and var2:
             x = df[var1].dropna()
             y = df[var2].dropna()
             method_map = {
-                'pearson': (stats.pearsonr, "Pearson's r"),
-                'spearman': (stats.spearmanr, "Spearman's rho"),
-                'kendall': (stats.kendalltau, "Kendall's tau")
+                "pearson": (stats.pearsonr, "Pearson's r"),
+                "spearman": (stats.spearmanr, "Spearman's rho"),
+                "kendall": (stats.kendalltau, "Kendall's tau"),
             }
             func, method_name = method_map[method]
             corr, p = func(x, y)
             result = {
-                'method': method_name,
-                'correlation': float(corr),
-                'pvalue': float(p),
-                'significant': p < 0.05,
-                'n': len(x)
+                "method": method_name,
+                "correlation": float(corr),
+                "pvalue": float(p),
+                "significant": p < 0.05,
+                "n": len(x),
             }
         return result
-    def anova(self, file_path: str, dependent: str, factor: str, post_hoc: bool = False) -> Dict[str, Any]:
+    def anova(
+        self,
+        file_path: str,
+        dependent: str,
+        factor: str,
+        post_hoc: bool = False,
+    ) -> Dict[str, Any]:
         """Perform one-way ANOVA with optional post-hoc tests."""
         df = self._load_data(file_path)
         self._validate_variables(df, [dependent, factor])
-        import scipy.stats as stats
-        from statsmodels.stats.multicomp import pairwise_tukeyhsd
+        import scipy.stats as stats  # type: ignore[import-untyped]  # type: ignore[import-untyped]
+        from statsmodels.stats.multicomp import pairwise_tukeyhsd  # type: ignore[import-untyped]
         dependent_var = df[dependent].dropna()
         factor_var = df[factor].dropna()
         min_len = min(len(dependent_var), len(factor_var))
@@ -242,42 +423,46 @@ class StatsTool(BaseTool):
         groups = {name: group[dependent].dropna().values for name, group in df.groupby(factor)}
         stat, p = stats.f_oneway(*groups.values())
         result = {
-            'F': float(stat),
-            'pvalue': float(p),
-            'significant': p < 0.05,
-            'groups': len(groups),
-            'group_sizes': {name: len(values) for name, values in groups.items()},
-            'group_means': {name: float(np.mean(values)) for name, values in groups.items()},
-            'group_std': {name: float(np.std(values, ddof=1)) for name, values in groups.items()}
+            "F": float(stat),
+            "pvalue": float(p),
+            "significant": p < 0.05,
+            "groups": len(groups),
+            "group_sizes": {name: len(values) for name, values in groups.items()},
+            "group_means": {name: float(np.mean(values)) for name, values in groups.items()},
+            "group_std": {name: float(np.std(values, ddof=1)) for name, values in groups.items()},
         }
         if post_hoc:
-            post_hoc_df = pd.DataFrame({'value': dependent_var, 'group': factor_var})
-            tukey = pairwise_tukeyhsd(post_hoc_df['value'], post_hoc_df['group'])
+            post_hoc_df = pd.DataFrame({"value": dependent_var, "group": factor_var})
+            tukey = pairwise_tukeyhsd(post_hoc_df["value"], post_hoc_df["group"])
             from itertools import combinations
             group_pairs = list(combinations(tukey.groupsunique, 2))
             tukey_results = [
                 {
-                    'group1': str(group1),
-                    'group2': str(group2),
-                    'mean_difference': float(mean_diff),
-                    'p_adjusted': float(p_adj),
-                    'significant': bool(reject),
-                    'conf_lower': float(lower),
-                    'conf_upper': float(upper)
+                    "group1": str(group1),
+                    "group2": str(group2),
+                    "mean_difference": float(mean_diff),
+                    "p_adjusted": float(p_adj),
+                    "significant": bool(reject),
+                    "conf_lower": float(lower),
+                    "conf_upper": float(upper),
                 }
-                for (group1, group2), mean_diff, p_adj, lower, upper, reject in zip(
+                for (
+                    group1,
+                    group2,
+                ), mean_diff, p_adj, lower, upper, reject in zip(
                     group_pairs,
                     tukey.meandiffs,
                     tukey.pvalues,
-                    tukey.confint[:,0],
-                    tukey.confint[:,1],
-                    tukey.reject
+                    tukey.confint[:, 0],
+                    tukey.confint[:, 1],
+                    tukey.reject,
                 )
             ]
-            result['post_hoc'] = {
-                'method': 'Tukey HSD',
-                'alpha': 0.05,  # Standard significance level for Tukey HSD
-                'comparisons': tukey_results
+            result["post_hoc"] = {
+                "method": "Tukey HSD",
+                "alpha": 0.05,  # Standard significance level for Tukey HSD
+                "comparisons": tukey_results,
             }
         return result
@@ -285,48 +470,56 @@ class StatsTool(BaseTool):
         """Perform chi-square test of independence."""
         df = self._load_data(file_path)
         self._validate_variables(df, [var1, var2])
-        import scipy.stats as stats
+        import scipy.stats as stats  # type: ignore[import-untyped]
         contingency = pd.crosstab(df[var1], df[var2])
         chi2, p, dof, expected = stats.chi2_contingency(contingency, correction=correction)
         n = contingency.sum().sum()
         min_dim = min(contingency.shape) - 1
         cramers_v = np.sqrt(chi2 / (n * min_dim))
         return {
-            'chi2': float(chi2),
-            'pvalue': float(p),
-            'dof': int(dof),
-            'significant': p < 0.05,
-            'cramers_v': float(cramers_v),
-            'effect_size_interpretation': self._interpret_effect_size(cramers_v),
-            'contingency_table': contingency.to_dict(),
-            'expected_frequencies': pd.DataFrame(expected, index=contingency.index, columns=contingency.columns).to_dict(),
-            'test_type': 'Chi-square test with Yates correction' if correction else 'Chi-square test'
+            "chi2": float(chi2),
+            "pvalue": float(p),
+            "dof": int(dof),
+            "significant": p < 0.05,
+            "cramers_v": float(cramers_v),
+            "effect_size_interpretation": self._interpret_effect_size(cramers_v),
+            "contingency_table": contingency.to_dict(),
+            "expected_frequencies": pd.DataFrame(expected, index=contingency.index, columns=contingency.columns).to_dict(),
+            "test_type": ("Chi-square test with Yates correction" if correction else "Chi-square test"),
         }
-    def non_parametric(self, file_path: str, test_type: str, variables: List[str], grouping: Optional[str] = None) -> Dict[str, Any]:
+    def non_parametric(
+        self,
+        file_path: str,
+        test_type: str,
+        variables: List[str],
+        grouping: Optional[str] = None,
+    ) -> Dict[str, Any]:
         """Perform non-parametric statistical tests."""
         df = self._load_data(file_path)
         self._validate_variables(df, variables + ([grouping] if grouping else []))
-        import scipy.stats as stats
-        if test_type == 'mann_whitney':
+        import scipy.stats as stats  # type: ignore[import-untyped]
+        if test_type == "mann_whitney":
             if len(variables) != 2:
                 raise AnalysisError("Mann-Whitney U test requires exactly 2 variables")
             x = df[variables[0]].dropna().values
             y = df[variables[1]].dropna().values
             u_stat, p_value = stats.mannwhitneyu(x, y)
             return StatsResult(
-                test_type='Mann-Whitney U test',
+                test_type="Mann-Whitney U test",
                 statistic=float(u_stat),
                 pvalue=float(p_value),
                 significant=p_value < 0.05,
                 additional_metrics={
-                    'n1': len(x),
-                    'n2': len(y),
-                    'median1': float(np.median(x)),
-                    'median2': float(np.median(y))
-                }
+                    "n1": len(x),
+                    "n2": len(y),
+                    "median1": float(np.median(x)),
+                    "median2": float(np.median(y)),
+                },
             ).to_dict()
-        elif test_type == 'wilcoxon':
+        elif test_type == "wilcoxon":
             if len(variables) != 2:
                 raise AnalysisError("Wilcoxon signed-rank test requires exactly 2 variables")
             x = df[variables[0]].dropna().values
@@ -336,161 +529,202 @@ class StatsTool(BaseTool):
             y = y[:min_len]
             w_stat, p_value = stats.wilcoxon(x, y)
             return StatsResult(
-                test_type='Wilcoxon signed-rank test',
+                test_type="Wilcoxon signed-rank test",
                 statistic=float(w_stat),
                 pvalue=float(p_value),
                 significant=p_value < 0.05,
                 additional_metrics={
-                    'n_pairs': min_len,
-                    'median_difference': float(np.median(x - y))
-                }
+                    "n_pairs": min_len,
+                    "median_difference": float(np.median(x - y)),
+                },
             ).to_dict()
-        elif test_type == 'kruskal':
+        elif test_type == "kruskal":
             if not grouping:
                 raise AnalysisError("Kruskal-Wallis test requires a grouping variable")
             groups = {f"{var}_{name}": group[var].dropna().values for name, group in df.groupby(grouping) for var in variables}
             h_stat, p_value = stats.kruskal(*groups.values())
             return StatsResult(
-                test_type='Kruskal-Wallis H test',
+                test_type="Kruskal-Wallis H test",
                 statistic=float(h_stat),
                 pvalue=float(p_value),
                 significant=p_value < 0.05,
                 additional_metrics={
-                    'groups': len(groups),
-                    'group_sizes': {name: len(values) for name, values in groups.items()},
-                    'group_medians': {name: float(np.median(values)) for name, values in groups.items()}
-                }
+                    "groups": len(groups),
+                    "group_sizes": {name: len(values) for name, values in groups.items()},
+                    "group_medians": {name: float(np.median(values)) for name, values in groups.items()},
+                },
             ).to_dict()
-        elif test_type == 'friedman':
+        elif test_type == "friedman":
             if len(variables) < 2:
                 raise AnalysisError("Friedman test requires at least 2 variables")
             data = df[variables].dropna()
             chi2, p_value = stats.friedmanchisquare(*[data[var].values for var in variables])
             return StatsResult(
-                test_type='Friedman test',
+                test_type="Friedman test",
                 statistic=float(chi2),
                 pvalue=float(p_value),
                 significant=p_value < 0.05,
                 additional_metrics={
-                    'n_measures': len(variables),
-                    'n_samples': len(data),
-                    'variable_medians': {var: float(np.median(data[var])) for var in variables}
-                }
+                    "n_measures": len(variables),
+                    "n_samples": len(data),
+                    "variable_medians": {var: float(np.median(data[var])) for var in variables},
+                },
             ).to_dict()
         else:
             raise AnalysisError(f"Unsupported non-parametric test type: {test_type}. Supported types: mann_whitney, wilcoxon, kruskal, friedman")
-    def regression(self, file_path: str, formula: str, regression_type: str = "ols", robust: bool = False, structured_output: bool = True) -> Dict[str, Any]:
+    def regression(
+        self,
+        file_path: str,
+        formula: str,
+        regression_type: str = "ols",
+        robust: bool = False,
+        structured_output: bool = True,
+    ) -> Dict[str, Any]:
         """Perform regression analysis with various models."""
         df = self._load_data(file_path)
-        import statsmodels.formula.api as smf
+        import statsmodels.formula.api as smf  # type: ignore[import-untyped]
         try:
             model_map = {
-                'ols': smf.ols,
-                'logit': smf.logit,
-                'probit': smf.probit,
-                'poisson': smf.poisson
+                "ols": smf.ols,
+                "logit": smf.logit,
+                "probit": smf.probit,
+                "poisson": smf.poisson,
             }
             model = model_map[regression_type](formula=formula, data=df)
-            fit = model.fit(cov_type='HC3' if robust else 'nonrobust')
+            fit = model.fit(cov_type="HC3" if robust else "nonrobust")
             if structured_output:
                 result = {
-                    'model_type': regression_type,
-                    'formula': formula,
-                    'n_observations': int(fit.nobs),
-                    'r_squared': float(fit.rsquared) if hasattr(fit, 'rsquared') else None,
-                    'adj_r_squared': float(fit.rsquared_adj) if hasattr(fit, 'rsquared_adj') else None,
-                    'aic': float(fit.aic) if hasattr(fit, 'aic') else None,
-                    'bic': float(fit.bic) if hasattr(fit, 'bic') else None,
-                    'f_statistic': float(fit.fvalue) if hasattr(fit, 'fvalue') else None,
-                    'f_pvalue': float(fit.f_pvalue) if hasattr(fit, 'f_pvalue') else None,
-                    'log_likelihood': float(fit.llf) if hasattr(fit, 'llf') else None,
-                    'coefficients': {
+                    "model_type": regression_type,
+                    "formula": formula,
+                    "n_observations": int(fit.nobs),
+                    "r_squared": (float(fit.rsquared) if hasattr(fit, "rsquared") else None),
+                    "adj_r_squared": (float(fit.rsquared_adj) if hasattr(fit, "rsquared_adj") else None),
+                    "aic": float(fit.aic) if hasattr(fit, "aic") else None,
+                    "bic": float(fit.bic) if hasattr(fit, "bic") else None,
+                    "f_statistic": (float(fit.fvalue) if hasattr(fit, "fvalue") else None),
+                    "f_pvalue": (float(fit.f_pvalue) if hasattr(fit, "f_pvalue") else None),
+                    "log_likelihood": (float(fit.llf) if hasattr(fit, "llf") else None),
+                    "coefficients": {
                         var: {
-                            'coef': float(fit.params[var]),
-                            'std_err': float(fit.bse[var]),
-                            't_value': float(fit.tvalues[var]) if hasattr(fit, 'tvalues') else None,
-                            'p_value': float(fit.pvalues[var]),
-                            'significant': fit.pvalues[var] < 0.05,
-                            'conf_lower': float(fit.conf_int().loc[var, 0]),
-                            'conf_upper': float(fit.conf_int().loc[var, 1])
-                        } for var in fit.params.index
-                    }
+                            "coef": float(fit.params[var]),
+                            "std_err": float(fit.bse[var]),
+                            "t_value": (float(fit.tvalues[var]) if hasattr(fit, "tvalues") else None),
+                            "p_value": float(fit.pvalues[var]),
+                            "significant": fit.pvalues[var] < 0.05,
+                            "conf_lower": float(fit.conf_int().loc[var, 0]),
+                            "conf_upper": float(fit.conf_int().loc[var, 1]),
+                        }
+                        for var in fit.params.index
+                    },
                 }
-                return {'summary_text': fit.summary().as_text(), 'structured': result}
-            return {'summary': fit.summary().as_text()}
+                return {
+                    "summary_text": fit.summary().as_text(),
+                    "structured": result,
+                }
+            return {"summary": fit.summary().as_text()}
         except Exception as e:
             raise AnalysisError(f"Regression error: {str(e)}")
-    def time_series(self, file_path: str, variable: str, date_variable: Optional[str] = None, model_type: str = "arima", order: Optional[Tuple[int, int, int]] = (1, 1, 1), seasonal_order: Optional[Tuple[int, int, int, int]] = None, forecast_periods: int = 10) -> Dict[str, Any]:
+    def time_series(
+        self,
+        file_path: str,
+        variable: str,
+        date_variable: Optional[str] = None,
+        model_type: str = "arima",
+        order: Optional[Tuple[int, int, int]] = (1, 1, 1),
+        seasonal_order: Optional[Tuple[int, int, int, int]] = None,
+        forecast_periods: int = 10,
+    ) -> Dict[str, Any]:
         """Perform time series analysis."""
         df = self._load_data(file_path)
         self._validate_variables(df, [variable] + ([date_variable] if date_variable else []))
-        from statsmodels.tsa.arima.model import ARIMA
-        from statsmodels.tsa.statespace.sarimax import SARIMAX
+        from statsmodels.tsa.arima.model import ARIMA  # type: ignore[import-untyped]
+        from statsmodels.tsa.statespace.sarimax import SARIMAX  # type: ignore[import-untyped]
         try:
             ts_data = df[variable].dropna()
             if date_variable and date_variable in df.columns:
                 ts_data.index = df[date_variable]
-            if model_type == 'arima':
+            if model_type == "arima":
                 model = ARIMA(ts_data, order=order)
                 fit = model.fit()
-                model_type_name = 'ARIMA'
-            elif model_type == 'sarima':
+                model_type_name = "ARIMA"
+            elif model_type == "sarima":
                 if not seasonal_order:
                     raise AnalysisError("seasonal_order must be provided for SARIMA model")
                 model = SARIMAX(ts_data, order=order, seasonal_order=seasonal_order)
                 fit = model.fit(disp=False)
-                model_type_name = 'SARIMA'
+                model_type_name = "SARIMA"
             else:
                 raise AnalysisError(f"Unsupported time series model: {model_type}")
             forecast = fit.forecast(steps=forecast_periods)
             forecast_index = pd.date_range(
-                start=ts_data.index[-1] if isinstance(ts_data.index, pd.DatetimeIndex) else len(ts_data),
+                start=(ts_data.index[-1] if isinstance(ts_data.index, pd.DatetimeIndex) else len(ts_data)),
                 periods=forecast_periods + 1,
-                freq='D'
+                freq="D",
             )[1:]
             return {
-                'model_type': model_type_name,
-                'order': order,
-                'seasonal_order': seasonal_order if model_type == 'sarima' else None,
-                'aic': float(fit.aic),
-                'bic': float(fit.bic),
-                'forecast': {
-                    'values': forecast.tolist() if isinstance(forecast, np.ndarray) else forecast.values.tolist(),
-                    'index': forecast_index.strftime('%Y-%m-%d').tolist() if isinstance(forecast_index, pd.DatetimeIndex) else list(range(len(forecast)))
+                "model_type": model_type_name,
+                "order": order,
+                "seasonal_order": (seasonal_order if model_type == "sarima" else None),
+                "aic": float(fit.aic),
+                "bic": float(fit.bic),
+                "forecast": {
+                    "values": (forecast.tolist() if isinstance(forecast, np.ndarray) else forecast.values.tolist()),
+                    "index": (forecast_index.strftime("%Y-%m-%d").tolist() if isinstance(forecast_index, pd.DatetimeIndex) else list(range(len(forecast)))),
                 },
-                'summary': str(fit.summary())
+                "summary": str(fit.summary()),
             }
         except Exception as e:
             raise AnalysisError(f"Time series analysis error: {str(e)}")
-    def preprocess(self, file_path: str, variables: List[str], operation: str, scaler_type: ScalerType = ScalerType.STANDARD, output_path: Optional[str] = None) -> Dict[str, Any]:
+    def preprocess(
+        self,
+        file_path: str,
+        variables: List[str],
+        operation: str,
+        scaler_type: ScalerType = ScalerType.STANDARD,
+        output_path: Optional[str] = None,
+    ) -> Dict[str, Any]:
         """Preprocess data with various operations."""
         df = self._load_data(file_path)
         self._validate_variables(df, variables)
         data = df[variables].copy()
-        result = {'operation': operation}
-        if operation == 'scale':
-            from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
+        result: Dict[str, Any] = {"operation": operation}
+        if operation == "scale":
+            from sklearn.preprocessing import (  # type: ignore[import-untyped]
+                StandardScaler,
+                MinMaxScaler,
+                RobustScaler,
+            )
             scaler_map = {
                 ScalerType.STANDARD: (StandardScaler, "StandardScaler"),
                 ScalerType.MINMAX: (MinMaxScaler, "MinMaxScaler"),
-                ScalerType.ROBUST: (RobustScaler, "RobustScaler")
+                ScalerType.ROBUST: (RobustScaler, "RobustScaler"),
             }
             scaler_cls, scaler_name = scaler_map[scaler_type]
             scaler = scaler_cls()
             scaled_data = scaler.fit_transform(data)
-            scaled_df = pd.DataFrame(scaled_data, columns=[f"{col}_scaled" for col in data.columns], index=data.index)
-            result.update({
-                'scaler': scaler_name,
-                'original_stats': data.describe().to_dict(),
-                'scaled_stats': scaled_df.describe().to_dict(),
-                'preview': scaled_df.head(5).to_dict(orient='records')
-            })
+            scaled_df = pd.DataFrame(
+                scaled_data,
+                columns=[f"{col}_scaled" for col in data.columns],
+                index=data.index,
+            )
+            result.update(
+                {
+                    "scaler": scaler_name,
+                    "original_stats": data.describe().to_dict(),
+                    "scaled_stats": scaled_df.describe().to_dict(),
+                    "preview": scaled_df.head(5).to_dict(orient="records"),
+                }
+            )
             processed_df = scaled_df
-        elif operation == 'impute':
+        elif operation == "impute":
             import numpy as np
             imputed_df = data.copy()
             numeric_cols = data.select_dtypes(include=[np.number]).columns
             for col in numeric_cols:
@@ -498,16 +732,21 @@ class StatsTool(BaseTool):
             cat_cols = data.select_dtypes(exclude=[np.number]).columns
             for col in cat_cols:
                 imputed_df[col] = data[col].fillna(data[col].mode()[0] if not data[col].mode().empty else None)
-            result.update({
-                'imputation_method': {'numeric': 'mean', 'categorical': 'mode'},
-                'missing_counts_before': data.isna().sum().to_dict(),
-                'missing_counts_after': imputed_df.isna().sum().to_dict(),
-                'preview': imputed_df.head(5).to_dict(orient='records')
-            })
+            result.update(
+                {
+                    "imputation_method": {
+                        "numeric": "mean",
+                        "categorical": "mode",
+                    },
+                    "missing_counts_before": data.isna().sum().to_dict(),
+                    "missing_counts_after": imputed_df.isna().sum().to_dict(),
+                    "preview": imputed_df.head(5).to_dict(orient="records"),
+                }
+            )
             processed_df = imputed_df
         if output_path:
-            output_path = os.path.abspath(output_path) if os.path.isabs(output_path) else os.path.join(tempfile.gettempdir(), 'stats_outputs', output_path)
+            output_path = os.path.abspath(output_path) if os.path.isabs(output_path) else os.path.join(tempfile.gettempdir(), "stats_outputs", output_path)
             os.makedirs(os.path.dirname(output_path), exist_ok=True)
             processed_df.to_csv(output_path)
-            result['output_file'] = output_path
+            result["output_file"] = output_path
         return result

aiecs 1.0.1__py3-none-any.whl → 1.7.17__py3-none-any.whl

Potentially problematic release.

aiecs 1.0.1py3-none-any.whl → 1.7.17py3-none-any.whl