PyPI - aiecs - Versions diffs - 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl - Mend

aiecs 1.0.1py3-none-any.whl → 1.7.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show

aiecs/__init__.py +13 -16
aiecs/__main__.py +7 -7
aiecs/aiecs_client.py +269 -75
aiecs/application/executors/operation_executor.py +79 -54
aiecs/application/knowledge_graph/__init__.py +7 -0
aiecs/application/knowledge_graph/builder/__init__.py +37 -0
aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
aiecs/application/knowledge_graph/extractors/base.py +98 -0
aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
aiecs/application/knowledge_graph/search/__init__.py +59 -0
aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
aiecs/application/knowledge_graph/search/reranker.py +293 -0
aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
aiecs/application/knowledge_graph/validators/__init__.py +13 -0
aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
aiecs/common/__init__.py +9 -0
aiecs/common/knowledge_graph/__init__.py +17 -0
aiecs/common/knowledge_graph/runnable.py +471 -0
aiecs/config/__init__.py +20 -5
aiecs/config/config.py +762 -31
aiecs/config/graph_config.py +131 -0
aiecs/config/tool_config.py +399 -0
aiecs/core/__init__.py +29 -13
aiecs/core/interface/__init__.py +2 -2
aiecs/core/interface/execution_interface.py +22 -22
aiecs/core/interface/storage_interface.py +37 -88
aiecs/core/registry/__init__.py +31 -0
aiecs/core/registry/service_registry.py +92 -0
aiecs/domain/__init__.py +270 -1
aiecs/domain/agent/__init__.py +191 -0
aiecs/domain/agent/base_agent.py +3870 -0
aiecs/domain/agent/exceptions.py +99 -0
aiecs/domain/agent/graph_aware_mixin.py +569 -0
aiecs/domain/agent/hybrid_agent.py +1435 -0
aiecs/domain/agent/integration/__init__.py +29 -0
aiecs/domain/agent/integration/context_compressor.py +216 -0
aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
aiecs/domain/agent/integration/protocols.py +281 -0
aiecs/domain/agent/integration/retry_policy.py +218 -0
aiecs/domain/agent/integration/role_config.py +213 -0
aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
aiecs/domain/agent/lifecycle.py +291 -0
aiecs/domain/agent/llm_agent.py +692 -0
aiecs/domain/agent/memory/__init__.py +12 -0
aiecs/domain/agent/memory/conversation.py +1124 -0
aiecs/domain/agent/migration/__init__.py +14 -0
aiecs/domain/agent/migration/conversion.py +163 -0
aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
aiecs/domain/agent/models.py +884 -0
aiecs/domain/agent/observability.py +479 -0
aiecs/domain/agent/persistence.py +449 -0
aiecs/domain/agent/prompts/__init__.py +29 -0
aiecs/domain/agent/prompts/builder.py +159 -0
aiecs/domain/agent/prompts/formatters.py +187 -0
aiecs/domain/agent/prompts/template.py +255 -0
aiecs/domain/agent/registry.py +253 -0
aiecs/domain/agent/tool_agent.py +444 -0
aiecs/domain/agent/tools/__init__.py +15 -0
aiecs/domain/agent/tools/schema_generator.py +364 -0
aiecs/domain/community/__init__.py +155 -0
aiecs/domain/community/agent_adapter.py +469 -0
aiecs/domain/community/analytics.py +432 -0
aiecs/domain/community/collaborative_workflow.py +648 -0
aiecs/domain/community/communication_hub.py +634 -0
aiecs/domain/community/community_builder.py +320 -0
aiecs/domain/community/community_integration.py +796 -0
aiecs/domain/community/community_manager.py +803 -0
aiecs/domain/community/decision_engine.py +849 -0
aiecs/domain/community/exceptions.py +231 -0
aiecs/domain/community/models/__init__.py +33 -0
aiecs/domain/community/models/community_models.py +234 -0
aiecs/domain/community/resource_manager.py +461 -0
aiecs/domain/community/shared_context_manager.py +589 -0
aiecs/domain/context/__init__.py +40 -10
aiecs/domain/context/context_engine.py +1910 -0
aiecs/domain/context/conversation_models.py +87 -53
aiecs/domain/context/graph_memory.py +582 -0
aiecs/domain/execution/model.py +12 -4
aiecs/domain/knowledge_graph/__init__.py +19 -0
aiecs/domain/knowledge_graph/models/__init__.py +52 -0
aiecs/domain/knowledge_graph/models/entity.py +148 -0
aiecs/domain/knowledge_graph/models/evidence.py +178 -0
aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
aiecs/domain/knowledge_graph/models/path.py +171 -0
aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
aiecs/domain/knowledge_graph/models/query.py +261 -0
aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
aiecs/domain/knowledge_graph/models/relation.py +202 -0
aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
aiecs/domain/task/dsl_processor.py +172 -56
aiecs/domain/task/model.py +20 -8
aiecs/domain/task/task_context.py +27 -24
aiecs/infrastructure/__init__.py +0 -2
aiecs/infrastructure/graph_storage/__init__.py +11 -0
aiecs/infrastructure/graph_storage/base.py +837 -0
aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
aiecs/infrastructure/graph_storage/cache.py +424 -0
aiecs/infrastructure/graph_storage/distributed.py +223 -0
aiecs/infrastructure/graph_storage/error_handling.py +380 -0
aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
aiecs/infrastructure/graph_storage/health_checks.py +378 -0
aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
aiecs/infrastructure/graph_storage/metrics.py +344 -0
aiecs/infrastructure/graph_storage/migration.py +400 -0
aiecs/infrastructure/graph_storage/pagination.py +483 -0
aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
aiecs/infrastructure/graph_storage/postgres.py +1563 -0
aiecs/infrastructure/graph_storage/property_storage.py +353 -0
aiecs/infrastructure/graph_storage/protocols.py +76 -0
aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
aiecs/infrastructure/graph_storage/streaming.py +487 -0
aiecs/infrastructure/graph_storage/tenant.py +412 -0
aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
aiecs/infrastructure/messaging/websocket_manager.py +51 -35
aiecs/infrastructure/monitoring/__init__.py +22 -0
aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
aiecs/infrastructure/monitoring/structured_logger.py +3 -7
aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
aiecs/infrastructure/persistence/__init__.py +14 -1
aiecs/infrastructure/persistence/context_engine_client.py +184 -0
aiecs/infrastructure/persistence/database_manager.py +67 -43
aiecs/infrastructure/persistence/file_storage.py +180 -103
aiecs/infrastructure/persistence/redis_client.py +74 -21
aiecs/llm/__init__.py +73 -25
aiecs/llm/callbacks/__init__.py +11 -0
aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
aiecs/llm/client_factory.py +224 -36
aiecs/llm/client_resolver.py +155 -0
aiecs/llm/clients/__init__.py +38 -0
aiecs/llm/clients/base_client.py +324 -0
aiecs/llm/clients/google_function_calling_mixin.py +457 -0
aiecs/llm/clients/googleai_client.py +241 -0
aiecs/llm/clients/openai_client.py +158 -0
aiecs/llm/clients/openai_compatible_mixin.py +367 -0
aiecs/llm/clients/vertex_client.py +897 -0
aiecs/llm/clients/xai_client.py +201 -0
aiecs/llm/config/__init__.py +51 -0
aiecs/llm/config/config_loader.py +272 -0
aiecs/llm/config/config_validator.py +206 -0
aiecs/llm/config/model_config.py +143 -0
aiecs/llm/protocols.py +149 -0
aiecs/llm/utils/__init__.py +10 -0
aiecs/llm/utils/validate_config.py +89 -0
aiecs/main.py +140 -121
aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
aiecs/scripts/aid/__init__.py +19 -0
aiecs/scripts/aid/module_checker.py +499 -0
aiecs/scripts/aid/version_manager.py +235 -0
aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
aiecs/scripts/dependance_check/__init__.py +15 -0
aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
aiecs/scripts/dependance_patch/__init__.py +7 -0
aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
aiecs/scripts/knowledge_graph/__init__.py +3 -0
aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
aiecs/scripts/tools_develop/README.md +671 -0
aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
aiecs/scripts/tools_develop/__init__.py +21 -0
aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
aiecs/scripts/tools_develop/schema_coverage.py +511 -0
aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
aiecs/scripts/tools_develop/verify_tools.py +352 -0
aiecs/tasks/__init__.py +0 -1
aiecs/tasks/worker.py +115 -47
aiecs/tools/__init__.py +194 -72
aiecs/tools/apisource/__init__.py +99 -0
aiecs/tools/apisource/intelligence/__init__.py +19 -0
aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
aiecs/tools/apisource/monitoring/__init__.py +9 -0
aiecs/tools/apisource/monitoring/metrics.py +330 -0
aiecs/tools/apisource/providers/__init__.py +112 -0
aiecs/tools/apisource/providers/base.py +671 -0
aiecs/tools/apisource/providers/census.py +397 -0
aiecs/tools/apisource/providers/fred.py +535 -0
aiecs/tools/apisource/providers/newsapi.py +409 -0
aiecs/tools/apisource/providers/worldbank.py +352 -0
aiecs/tools/apisource/reliability/__init__.py +12 -0
aiecs/tools/apisource/reliability/error_handler.py +363 -0
aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
aiecs/tools/apisource/tool.py +832 -0
aiecs/tools/apisource/utils/__init__.py +9 -0
aiecs/tools/apisource/utils/validators.py +334 -0
aiecs/tools/base_tool.py +415 -21
aiecs/tools/docs/__init__.py +121 -0
aiecs/tools/docs/ai_document_orchestrator.py +607 -0
aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
aiecs/tools/docs/content_insertion_tool.py +1320 -0
aiecs/tools/docs/document_creator_tool.py +1323 -0
aiecs/tools/docs/document_layout_tool.py +1160 -0
aiecs/tools/docs/document_parser_tool.py +1011 -0
aiecs/tools/docs/document_writer_tool.py +1829 -0
aiecs/tools/knowledge_graph/__init__.py +17 -0
aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
aiecs/tools/langchain_adapter.py +300 -138
aiecs/tools/schema_generator.py +455 -0
aiecs/tools/search_tool/__init__.py +100 -0
aiecs/tools/search_tool/analyzers.py +581 -0
aiecs/tools/search_tool/cache.py +264 -0
aiecs/tools/search_tool/constants.py +128 -0
aiecs/tools/search_tool/context.py +224 -0
aiecs/tools/search_tool/core.py +778 -0
aiecs/tools/search_tool/deduplicator.py +119 -0
aiecs/tools/search_tool/error_handler.py +242 -0
aiecs/tools/search_tool/metrics.py +343 -0
aiecs/tools/search_tool/rate_limiter.py +172 -0
aiecs/tools/search_tool/schemas.py +275 -0
aiecs/tools/statistics/__init__.py +80 -0
aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
aiecs/tools/statistics/data_loader_tool.py +555 -0
aiecs/tools/statistics/data_profiler_tool.py +638 -0
aiecs/tools/statistics/data_transformer_tool.py +580 -0
aiecs/tools/statistics/data_visualizer_tool.py +498 -0
aiecs/tools/statistics/model_trainer_tool.py +507 -0
aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
aiecs/tools/task_tools/__init__.py +49 -36
aiecs/tools/task_tools/chart_tool.py +200 -184
aiecs/tools/task_tools/classfire_tool.py +268 -267
aiecs/tools/task_tools/image_tool.py +175 -131
aiecs/tools/task_tools/office_tool.py +226 -146
aiecs/tools/task_tools/pandas_tool.py +477 -121
aiecs/tools/task_tools/report_tool.py +390 -142
aiecs/tools/task_tools/research_tool.py +149 -79
aiecs/tools/task_tools/scraper_tool.py +339 -145
aiecs/tools/task_tools/stats_tool.py +448 -209
aiecs/tools/temp_file_manager.py +26 -24
aiecs/tools/tool_executor/__init__.py +18 -16
aiecs/tools/tool_executor/tool_executor.py +364 -52
aiecs/utils/LLM_output_structor.py +74 -48
aiecs/utils/__init__.py +14 -3
aiecs/utils/base_callback.py +0 -3
aiecs/utils/cache_provider.py +696 -0
aiecs/utils/execution_utils.py +50 -31
aiecs/utils/prompt_loader.py +1 -0
aiecs/utils/token_usage_repository.py +37 -11
aiecs/ws/socket_server.py +14 -4
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
aiecs-1.7.6.dist-info/RECORD +337 -0
aiecs-1.7.6.dist-info/entry_points.txt +13 -0
aiecs/config/registry.py +0 -19
aiecs/domain/context/content_engine.py +0 -982
aiecs/llm/base_client.py +0 -99
aiecs/llm/openai_client.py +0 -125
aiecs/llm/vertex_client.py +0 -186
aiecs/llm/xai_client.py +0 -184
aiecs/scripts/dependency_checker.py +0 -857
aiecs/scripts/quick_dependency_check.py +0 -269
aiecs/tools/task_tools/search_api.py +0 -7
aiecs-1.0.1.dist-info/RECORD +0 -90
aiecs-1.0.1.dist-info/entry_points.txt +0 -7
/aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
/aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
/aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
/aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0

aiecs/tools/task_tools/scraper_tool.py CHANGED Viewed

@@ -2,23 +2,24 @@ import os
 import json
 import time
 import logging
-import asyncio
 import tempfile
 import subprocess
-from typing import Dict, Any, List, Optional, Union, Tuple
+from typing import Dict, Any, List, Optional, Tuple, Union
+import csv
 from enum import Enum
-from urllib.parse import urlparse, urljoin
 import httpx
 from bs4 import BeautifulSoup
 from urllib import request as urllib_request
-from pydantic import BaseModel, ValidationError, ConfigDict
-from pydantic_settings import BaseSettings
+from pydantic import BaseModel, Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
 from aiecs.tools.base_tool import BaseTool
 from aiecs.tools import register_tool
 # Enums for configuration options
 class HttpMethod(str, Enum):
     GET = "get"
     POST = "post"
@@ -28,12 +29,14 @@ class HttpMethod(str, Enum):
     OPTIONS = "options"
     PATCH = "patch"
 class ContentType(str, Enum):
     HTML = "html"
     JSON = "json"
     TEXT = "text"
     BINARY = "binary"
 class OutputFormat(str, Enum):
     TEXT = "text"
     JSON = "json"
@@ -41,68 +44,44 @@ class OutputFormat(str, Enum):
     MARKDOWN = "markdown"
     CSV = "csv"
 class RenderEngine(str, Enum):
     NONE = "none"
     PLAYWRIGHT = "playwright"
-# Global settings
-class ScraperSettings(BaseSettings):
-    """
-    Configuration for ScraperTool.
-    Attributes:
-        user_agent (str): User agent for HTTP requests.
-        max_content_length (int): Maximum content length in bytes.
-        output_dir (str): Directory for output files.
-        scrapy_command (str): Command to run Scrapy.
-        allowed_domains (List[str]): Allowed domains for scraping.
-        blocked_domains (List[str]): Blocked domains for scraping.
-        playwright_available (bool): Whether Playwright is available.
-        env_prefix (str): Environment variable prefix.
-    """
-    user_agent: str = "PythonMiddlewareScraper/2.0"
-    max_content_length: int = 10 * 1024 * 1024  # 10MB
-    output_dir: str = os.path.join(tempfile.gettempdir(), 'scraper_outputs')
-    scrapy_command: str = "scrapy"
-    allowed_domains: List[str] = []
-    blocked_domains: List[str] = []
-    playwright_available: bool = False
-    env_prefix: str = "SCRAPER_TOOL_"
-    model_config = ConfigDict(env_prefix="SCRAPER_TOOL_")
 # Exceptions
 class ScraperToolError(Exception):
     """Base exception for ScraperTool errors."""
-    pass
 class HttpError(ScraperToolError):
     """Raised when HTTP requests fail."""
-    pass
 class TimeoutError(ScraperToolError):
     """Raised when operations time out."""
-    pass
 class RateLimitError(ScraperToolError):
     """Raised when rate limits are exceeded."""
-    pass
 class ParsingError(ScraperToolError):
     """Raised when HTML parsing fails."""
-    pass
 class RenderingError(ScraperToolError):
     """Raised when rendering fails."""
-    pass
 class ExternalToolError(ScraperToolError):
     """Raised when external tools fail."""
-    pass
 class FileOperationError(ScraperToolError):
     """Raised when file operations fail."""
-    pass
 @register_tool("scraper")
 class ScraperTool(BaseTool):
@@ -117,65 +96,128 @@ class ScraperTool(BaseTool):
     - Scrapy integration for advanced crawling
     - Output in various formats: text, JSON, HTML, Markdown, CSV
     """
-    def __init__(self, config: Optional[Dict] = None):
+    # Configuration schema
+    class Config(BaseSettings):
+        """Configuration for the scraper tool
+        Automatically reads from environment variables with SCRAPER_TOOL_ prefix.
+        Example: SCRAPER_TOOL_USER_AGENT -> user_agent
+        """
+        model_config = SettingsConfigDict(env_prefix="SCRAPER_TOOL_")
+        user_agent: str = Field(
+            default="PythonMiddlewareScraper/2.0",
+            description="User agent for HTTP requests",
+        )
+        max_content_length: int = Field(
+            default=10 * 1024 * 1024,
+            description="Maximum content length in bytes",
+        )
+        output_dir: str = Field(
+            default=os.path.join(tempfile.gettempdir(), "scraper_outputs"),
+            description="Directory for output files",
+        )
+        scrapy_command: str = Field(default="scrapy", description="Command to run Scrapy")
+        allowed_domains: List[str] = Field(default=[], description="Allowed domains for scraping")
+        blocked_domains: List[str] = Field(default=[], description="Blocked domains for scraping")
+        playwright_available: bool = Field(
+            default=False,
+            description="Whether Playwright is available (auto-detected)",
+        )
+    # Schema definitions
+    class Get_httpxSchema(BaseModel):
+        """Schema for get_httpx operation"""
+        url: str = Field(description="URL to scrape")
+        method: HttpMethod = Field(default=HttpMethod.GET, description="HTTP method to use: GET, POST, PUT, DELETE, HEAD, OPTIONS, or PATCH")
+        params: Optional[Dict[str, str]] = Field(default=None, description="Optional query parameters as dictionary")
+        data: Optional[Dict[str, Any]] = Field(default=None, description="Optional form data as dictionary. Mutually exclusive with json_data")
+        json_data: Optional[Dict[str, Any]] = Field(default=None, description="Optional JSON data as dictionary. Mutually exclusive with data")
+        cookies: Optional[Dict[str, str]] = Field(default=None, description="Optional cookies as dictionary")
+        auth: Optional[Tuple[str, str]] = Field(default=None, description="Optional authentication credentials as (username, password) tuple")
+        verify_ssl: Optional[bool] = Field(default=None, description="Optional SSL certificate verification. If None, defaults to True")
+        allow_redirects: bool = Field(default=True, description="Whether to allow HTTP redirects")
+        content_type: ContentType = Field(default=ContentType.TEXT, description="Expected content type: TEXT, JSON, HTML, or BINARY")
+        headers: Optional[Dict[str, str]] = Field(default=None, description="Optional custom HTTP headers as dictionary")
+        output_format: Optional[OutputFormat] = Field(default=None, description="Optional output format for saving: TEXT, JSON, HTML, MARKDOWN, or CSV")
+        output_path: Optional[str] = Field(default=None, description="Optional path to save output file. Requires output_format to be specified")
+        async_mode: bool = Field(default=True, description="Whether to use async HTTP client. If False, uses synchronous client")
+    class Parse_htmlSchema(BaseModel):
+        """Schema for parse_html operation"""
+        html: str = Field(description="HTML content string to parse")
+        selector: str = Field(description="CSS selector or XPath expression to find elements")
+        selector_type: str = Field(default="css", description="Selector type: 'css' for CSS selectors or 'xpath' for XPath expressions")
+        extract_attr: Optional[str] = Field(default=None, description="Optional attribute name to extract from matched elements (e.g., 'href', 'src')")
+        extract_text: bool = Field(default=True, description="Whether to extract text content from matched elements. Ignored if extract_attr is specified")
+    def __init__(self, config: Optional[Dict] = None, **kwargs):
         """
         Initialize ScraperTool with settings and resources.
         Args:
-            config (Dict, optional): Configuration overrides for ScraperSettings.
+            config (Dict, optional): Configuration overrides for ScraperTool.
+            **kwargs: Additional arguments passed to BaseTool (e.g., tool_name)
         Raises:
             ValueError: If config contains invalid settings.
+        Configuration is automatically loaded by BaseTool from:
+        1. Explicit config dict (highest priority)
+        2. YAML config files (config/tools/scraper.yaml)
+        3. Environment variables (via dotenv from .env files)
+        4. Tool defaults (lowest priority)
         """
-        super().__init__(config)
-        self.settings = ScraperSettings()
-        if config:
-            try:
-                self.settings = self.settings.model_validate({**self.settings.model_dump(), **config})
-            except ValidationError as e:
-                raise ValueError(f"Invalid settings: {e}")
+        super().__init__(config, **kwargs)
+        # Configuration is automatically loaded by BaseTool into self._config_obj
+        # Access config via self._config_obj (BaseSettings instance)
+        self.config = self._config_obj if self._config_obj else self.Config()
         self.logger = logging.getLogger(__name__)
         if not self.logger.handlers:
             handler = logging.StreamHandler()
-            handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
+            handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
             self.logger.addHandler(handler)
         self.logger.setLevel(logging.INFO)
-        os.makedirs(self.settings.output_dir, exist_ok=True)
+        os.makedirs(self.config.output_dir, exist_ok=True)
         self._check_external_tools()
     def _check_external_tools(self):
         """Check if external tools are available."""
         try:
-            import playwright
-            self.settings.playwright_available = True
+            self.config.playwright_available = True
         except ImportError:
-            self.settings.playwright_available = False
+            self.config.playwright_available = False
     async def _save_output(self, content: Any, path: str, format: OutputFormat) -> None:
         """Save content to file in the specified format."""
         try:
             os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
             if format == OutputFormat.TEXT:
-                with open(path, 'w', encoding='utf-8') as f:
+                with open(path, "w", encoding="utf-8") as f:
                     if isinstance(content, dict):
                         f.write(json.dumps(content, indent=2))
                     else:
                         f.write(str(content))
             elif format == OutputFormat.JSON:
-                with open(path, 'w', encoding='utf-8') as f:
+                with open(path, "w", encoding="utf-8") as f:
                     if isinstance(content, dict):
                         json.dump(content, f, indent=2)
                     else:
                         json.dump({"content": content}, f, indent=2)
             elif format == OutputFormat.HTML:
-                with open(path, 'w', encoding='utf-8') as f:
-                    if isinstance(content, dict) and 'html' in content:
-                        f.write(content['html'])
+                with open(path, "w", encoding="utf-8") as f:
+                    if isinstance(content, dict) and "html" in content:
+                        f.write(content["html"])
                     else:
                         f.write(str(content))
             elif format == OutputFormat.MARKDOWN:
-                with open(path, 'w', encoding='utf-8') as f:
+                with open(path, "w", encoding="utf-8") as f:
                     if isinstance(content, dict):
                         f.write("# Scraper Results\n\n")
                         for key, value in content.items():
@@ -186,7 +228,9 @@ class ScraperTool(BaseTool):
                         f.write(str(content))
             elif format == OutputFormat.CSV:
                 import csv
-                with open(path, 'w', newline='', encoding='utf-8') as f:
+                with open(path, "w", newline="", encoding="utf-8") as f:
+                    writer: Union[Any, Any]  # csv.writer or csv.DictWriter instance
                     if isinstance(content, dict):
                         writer = csv.writer(f)
                         writer.writerow(content.keys())
@@ -203,7 +247,23 @@ class ScraperTool(BaseTool):
         except Exception as e:
             raise FileOperationError(f"Error saving output: {str(e)}")
-    async def get_httpx(self, url: str, method: HttpMethod = HttpMethod.GET, params: Optional[Dict[str, str]] = None, data: Optional[Dict[str, Any]] = None, json_data: Optional[Dict[str, Any]] = None, cookies: Optional[Dict[str, str]] = None, auth: Optional[Tuple[str, str]] = None, verify_ssl: Optional[bool] = None, allow_redirects: bool = True, content_type: ContentType = ContentType.TEXT, headers: Optional[Dict[str, str]] = None, output_format: Optional[OutputFormat] = None, output_path: Optional[str] = None, async_mode: bool = True) -> Any:
+    async def get_httpx(
+        self,
+        url: str,
+        method: HttpMethod = HttpMethod.GET,
+        params: Optional[Dict[str, str]] = None,
+        data: Optional[Dict[str, Any]] = None,
+        json_data: Optional[Dict[str, Any]] = None,
+        cookies: Optional[Dict[str, str]] = None,
+        auth: Optional[Tuple[str, str]] = None,
+        verify_ssl: Optional[bool] = None,
+        allow_redirects: bool = True,
+        content_type: ContentType = ContentType.TEXT,
+        headers: Optional[Dict[str, str]] = None,
+        output_format: Optional[OutputFormat] = None,
+        output_path: Optional[str] = None,
+        async_mode: bool = True,
+    ) -> Any:
         """
         Execute HTTP request using httpx library (supports both sync and async).
@@ -231,21 +291,21 @@ class ScraperTool(BaseTool):
         """
         try:
             headers = headers or {}
-            if 'User-Agent' not in headers:
-                headers['User-Agent'] = self.settings.user_agent
-            kwargs = {
-                'params': params,
-                'headers': headers,
-                'follow_redirects': allow_redirects,
+            if "User-Agent" not in headers:
+                headers["User-Agent"] = self.config.user_agent
+            kwargs: Dict[str, Any] = {
+                "params": params,
+                "headers": headers,
+                "follow_redirects": allow_redirects,
             }
             if auth:
-                kwargs['auth'] = auth
+                kwargs["auth"] = auth  # httpx accepts Tuple[str, str] for auth
             if cookies:
-                kwargs['cookies'] = cookies
+                kwargs["cookies"] = cookies
             if json_data:
-                kwargs['json'] = json_data
+                kwargs["json"] = json_data
             elif data:
-                kwargs['data'] = data
+                kwargs["data"] = data
             if async_mode:
                 async with httpx.AsyncClient(verify=verify_ssl if verify_ssl is not None else True) as client:
@@ -260,30 +320,47 @@ class ScraperTool(BaseTool):
                 resp.raise_for_status()
             except httpx.HTTPStatusError as e:
                 raise HttpError(f"HTTP {e.response.status_code}: {e.response.reason_phrase} for {url}")
-            if len(resp.content) > self.settings.max_content_length:
+            if len(resp.content) > self.config.max_content_length:
                 raise HttpError(f"Response content too large: {len(resp.content)} bytes")
             if content_type == ContentType.JSON:
                 result = resp.json()
             elif content_type == ContentType.HTML:
-                result = {'html': resp.text, 'url': str(resp.url), 'status': resp.status_code}
+                result = {
+                    "html": resp.text,
+                    "url": str(resp.url),
+                    "status": resp.status_code,
+                }
             elif content_type == ContentType.BINARY:
-                result = {'content': resp.content, 'url': str(resp.url), 'status': resp.status_code}
+                result = {
+                    "content": resp.content,
+                    "url": str(resp.url),
+                    "status": resp.status_code,
+                }
             else:
                 result = resp.text
             if output_format and output_path:
                 await self._save_output(result, output_path, output_format)
                 if isinstance(result, dict):
-                    result['saved_to'] = output_path
+                    result["saved_to"] = output_path
                 else:
-                    result = {'content': result, 'saved_to': output_path}
+                    result = {"content": result, "saved_to": output_path}
             return result
         except httpx.RequestError as e:
             raise HttpError(f"Request failed: {str(e)}")
-    async def get_urllib(self, url: str, method: HttpMethod = HttpMethod.GET, data: Optional[Dict[str, Any]] = None, content_type: ContentType = ContentType.TEXT, headers: Optional[Dict[str, str]] = None, output_format: Optional[OutputFormat] = None, output_path: Optional[str] = None) -> Any:
+    async def get_urllib(
+        self,
+        url: str,
+        method: HttpMethod = HttpMethod.GET,
+        data: Optional[Dict[str, Any]] = None,
+        content_type: ContentType = ContentType.TEXT,
+        headers: Optional[Dict[str, str]] = None,
+        output_format: Optional[OutputFormat] = None,
+        output_path: Optional[str] = None,
+    ) -> Any:
         """
         Execute HTTP request using urllib.
@@ -305,10 +382,10 @@ class ScraperTool(BaseTool):
         try:
             import urllib.parse
             import urllib.error
             headers = headers or {}
-            if 'User-Agent' not in headers:
-                headers['User-Agent'] = self.settings.user_agent
+            if "User-Agent" not in headers:
+                headers["User-Agent"] = self.config.user_agent
             data_bytes = None
             if data:
                 data_bytes = urllib.parse.urlencode(data).encode()
@@ -316,42 +393,122 @@ class ScraperTool(BaseTool):
                 str(url),
                 data=data_bytes,
                 headers=headers,
-                method=method.value.upper()
+                method=method.value.upper(),
             )
             with urllib_request.urlopen(req) as resp:
-                content_length = resp.getheader('Content-Length')
-                if content_length and int(content_length) > self.settings.max_content_length:
+                content_length = resp.getheader("Content-Length")
+                if content_length and int(content_length) > self.config.max_content_length:
                     raise HttpError(f"Response content too large: {content_length} bytes")
                 content = resp.read()
-                charset = resp.headers.get_content_charset() or 'utf-8'
+                charset = resp.headers.get_content_charset() or "utf-8"
                 if content_type == ContentType.JSON:
-                    result = json.loads(content.decode(charset, errors='ignore'))
+                    result = json.loads(content.decode(charset, errors="ignore"))
                 elif content_type == ContentType.HTML:
-                    result = {'html': content.decode(charset, errors='ignore'), 'url': resp.url, 'status': resp.status}
+                    result = {
+                        "html": content.decode(charset, errors="ignore"),
+                        "url": resp.url,
+                        "status": resp.status,
+                    }
                 elif content_type == ContentType.BINARY:
-                    result = {'content': content, 'url': resp.url, 'status': resp.status}
+                    result = {
+                        "content": content,
+                        "url": resp.url,
+                        "status": resp.status,
+                    }
                 else:
-                    result = content.decode(charset, errors='ignore')
+                    result = content.decode(charset, errors="ignore")
                 if output_format and output_path:
                     await self._save_output(result, output_path, output_format)
                     if isinstance(result, dict):
-                        result['saved_to'] = output_path
+                        result["saved_to"] = output_path
                     else:
-                        result = {'content': result, 'saved_to': output_path}
+                        result = {"content": result, "saved_to": output_path}
                 return result
         except urllib.error.URLError as e:
             raise HttpError(f"Request failed: {str(e)}")
     # Legacy method names for backward compatibility
-    async def get_requests(self, url: str, method: HttpMethod = HttpMethod.GET, params: Optional[Dict[str, str]] = None, data: Optional[Dict[str, Any]] = None, json_data: Optional[Dict[str, Any]] = None, cookies: Optional[Dict[str, str]] = None, auth: Optional[Tuple[str, str]] = None, verify_ssl: Optional[bool] = None, allow_redirects: bool = True, content_type: ContentType = ContentType.TEXT, headers: Optional[Dict[str, str]] = None, output_format: Optional[OutputFormat] = None, output_path: Optional[str] = None) -> Any:
+    async def get_requests(
+        self,
+        url: str,
+        method: HttpMethod = HttpMethod.GET,
+        params: Optional[Dict[str, str]] = None,
+        data: Optional[Dict[str, Any]] = None,
+        json_data: Optional[Dict[str, Any]] = None,
+        cookies: Optional[Dict[str, str]] = None,
+        auth: Optional[Tuple[str, str]] = None,
+        verify_ssl: Optional[bool] = None,
+        allow_redirects: bool = True,
+        content_type: ContentType = ContentType.TEXT,
+        headers: Optional[Dict[str, str]] = None,
+        output_format: Optional[OutputFormat] = None,
+        output_path: Optional[str] = None,
+    ) -> Any:
         """Legacy method - now uses httpx in sync mode."""
-        return await self.get_httpx(url, method, params, data, json_data, cookies, auth, verify_ssl, allow_redirects, content_type, headers, output_format, output_path, async_mode=False)
-    async def get_aiohttp(self, url: str, method: HttpMethod = HttpMethod.GET, params: Optional[Dict[str, str]] = None, data: Optional[Dict[str, Any]] = None, json_data: Optional[Dict[str, Any]] = None, cookies: Optional[Dict[str, str]] = None, auth: Optional[Tuple[str, str]] = None, verify_ssl: Optional[bool] = None, allow_redirects: bool = True, content_type: ContentType = ContentType.TEXT, headers: Optional[Dict[str, str]] = None, output_format: Optional[OutputFormat] = None, output_path: Optional[str] = None) -> Any:
+        return await self.get_httpx(
+            url,
+            method,
+            params,
+            data,
+            json_data,
+            cookies,
+            auth,
+            verify_ssl,
+            allow_redirects,
+            content_type,
+            headers,
+            output_format,
+            output_path,
+            async_mode=False,
+        )
+    async def get_aiohttp(
+        self,
+        url: str,
+        method: HttpMethod = HttpMethod.GET,
+        params: Optional[Dict[str, str]] = None,
+        data: Optional[Dict[str, Any]] = None,
+        json_data: Optional[Dict[str, Any]] = None,
+        cookies: Optional[Dict[str, str]] = None,
+        auth: Optional[Tuple[str, str]] = None,
+        verify_ssl: Optional[bool] = None,
+        allow_redirects: bool = True,
+        content_type: ContentType = ContentType.TEXT,
+        headers: Optional[Dict[str, str]] = None,
+        output_format: Optional[OutputFormat] = None,
+        output_path: Optional[str] = None,
+    ) -> Any:
         """Legacy method - now uses httpx in async mode."""
-        return await self.get_httpx(url, method, params, data, json_data, cookies, auth, verify_ssl, allow_redirects, content_type, headers, output_format, output_path, async_mode=True)
-    async def render(self, url: str, engine: RenderEngine = RenderEngine.PLAYWRIGHT, wait_time: int = 5, wait_selector: Optional[str] = None, scroll_to_bottom: bool = False, screenshot: bool = False, screenshot_path: Optional[str] = None, headers: Optional[Dict[str, str]] = None, output_format: Optional[OutputFormat] = None, output_path: Optional[str] = None) -> Dict[str, Any]:
+        return await self.get_httpx(
+            url,
+            method,
+            params,
+            data,
+            json_data,
+            cookies,
+            auth,
+            verify_ssl,
+            allow_redirects,
+            content_type,
+            headers,
+            output_format,
+            output_path,
+            async_mode=True,
+        )
+    async def render(
+        self,
+        url: str,
+        engine: RenderEngine = RenderEngine.PLAYWRIGHT,
+        wait_time: int = 5,
+        wait_selector: Optional[str] = None,
+        scroll_to_bottom: bool = False,
+        screenshot: bool = False,
+        screenshot_path: Optional[str] = None,
+        headers: Optional[Dict[str, str]] = None,
+        output_format: Optional[OutputFormat] = None,
+        output_path: Optional[str] = None,
+    ) -> Dict[str, Any]:
         """
         Render a web page using a headless browser (Playwright).
@@ -375,56 +532,85 @@ class ScraperTool(BaseTool):
         """
         try:
             if engine == RenderEngine.PLAYWRIGHT:
-                if not self.settings.playwright_available:
+                if not self.config.playwright_available:
                     raise RenderingError("Playwright is not available. Install with 'pip install playwright'")
-                result = await self._render_with_playwright(url, wait_time, wait_selector, scroll_to_bottom, screenshot, screenshot_path)
+                result = await self._render_with_playwright(
+                    url,
+                    wait_time,
+                    wait_selector,
+                    scroll_to_bottom,
+                    screenshot,
+                    screenshot_path,
+                )
             else:
                 raise RenderingError(f"Unsupported rendering engine: {engine}. Only PLAYWRIGHT is supported.")
             if output_format and output_path:
                 await self._save_output(result, output_path, output_format)
-                result['saved_to'] = output_path
+                result["saved_to"] = output_path
             return result
         except Exception as e:
             raise RenderingError(f"Failed to render page: {str(e)}")
-    async def _render_with_playwright(self, url: str, wait_time: int, wait_selector: Optional[str], scroll_to_bottom: bool, screenshot: bool, screenshot_path: Optional[str]) -> Dict[str, Any]:
+    async def _render_with_playwright(
+        self,
+        url: str,
+        wait_time: int,
+        wait_selector: Optional[str],
+        scroll_to_bottom: bool,
+        screenshot: bool,
+        screenshot_path: Optional[str],
+    ) -> Dict[str, Any]:
         """Render a web page using Playwright with async API."""
         from playwright.async_api import async_playwright
         async with async_playwright() as p:
             browser = await p.chromium.launch()
             page = await browser.new_page(
-                user_agent=self.settings.user_agent,
-                viewport={'width': 1280, 'height': 800}
+                user_agent=self.config.user_agent,
+                viewport={"width": 1280, "height": 800},
             )
             try:
                 await page.goto(url)
                 if wait_selector:
                     await page.wait_for_selector(wait_selector)
                 else:
-                    await page.wait_for_load_state('networkidle')
+                    await page.wait_for_load_state("networkidle")
                 if scroll_to_bottom:
-                    await page.evaluate('window.scrollTo(0, document.body.scrollHeight)')
+                    await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
                     await page.wait_for_timeout(1000)
                 screenshot_result = None
                 if screenshot:
-                    screenshot_path = screenshot_path or os.path.join(self.settings.output_dir, f"screenshot_{int(time.time())}.png")
-                    os.makedirs(os.path.dirname(os.path.abspath(screenshot_path)), exist_ok=True)
+                    screenshot_path = screenshot_path or os.path.join(
+                        self.config.output_dir,
+                        f"screenshot_{int(time.time())}.png",
+                    )
+                    os.makedirs(
+                        os.path.dirname(os.path.abspath(screenshot_path)),
+                        exist_ok=True,
+                    )
                     await page.screenshot(path=screenshot_path)
                     screenshot_result = screenshot_path
                 html = await page.content()
                 title = await page.title()
                 result = {
-                    'html': html,
-                    'title': title,
-                    'url': page.url,
-                    'screenshot': screenshot_result
+                    "html": html,
+                    "title": title,
+                    "url": page.url,
+                    "screenshot": screenshot_result,
                 }
                 return result
             finally:
                 await browser.close()
-    def crawl_scrapy(self, project_path: str, spider_name: str, output_path: str, spider_args: Optional[Dict[str, str]] = None, headers: Optional[Dict[str, str]] = None, output_format: Optional[OutputFormat] = None) -> Dict[str, Any]:
+    def crawl_scrapy(
+        self,
+        project_path: str,
+        spider_name: str,
+        output_path: str,
+        spider_args: Optional[Dict[str, str]] = None,
+        headers: Optional[Dict[str, str]] = None,
+        output_format: Optional[OutputFormat] = None,
+    ) -> Dict[str, Any]:
         """
         Execute a Scrapy spider in an existing project and output results to a file.
@@ -447,21 +633,25 @@ class ScraperTool(BaseTool):
             start_time = time.time()
             os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
             cmd = [
-                self.settings.scrapy_command,
-                'crawl', spider_name,
-                '-o', output_path,
-                '-s', f'USER_AGENT={self.settings.user_agent}',
-                '-s', 'LOG_LEVEL=INFO'
+                self.config.scrapy_command,
+                "crawl",
+                spider_name,
+                "-o",
+                output_path,
+                "-s",
+                f"USER_AGENT={self.config.user_agent}",
+                "-s",
+                "LOG_LEVEL=INFO",
             ]
             if spider_args:
                 for k, v in spider_args.items():
-                    cmd += ['-a', f"{k}={v}"]
+                    cmd += ["-a", f"{k}={v}"]
             process = subprocess.run(
                 cmd,
                 cwd=project_path,
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
-                text=True
+                text=True,
             )
             if process.returncode != 0:
                 error_msg = process.stderr.strip()
@@ -470,19 +660,26 @@ class ScraperTool(BaseTool):
                 raise ExternalToolError(f"Scrapy crawl did not create output file: {output_path}")
             file_size = os.path.getsize(output_path)
             result = {
-                'output_path': output_path,
-                'execution_time': time.time() - start_time,
-                'file_size': file_size,
-                'stdout': process.stdout.strip(),
-                'stderr': process.stderr.strip()
+                "output_path": output_path,
+                "execution_time": time.time() - start_time,
+                "file_size": file_size,
+                "stdout": process.stdout.strip(),
+                "stderr": process.stderr.strip(),
             }
             return result
         except subprocess.TimeoutExpired:
-            raise TimeoutError(f"Scrapy crawl timed out")
+            raise TimeoutError("Scrapy crawl timed out")
         except Exception as e:
             raise ExternalToolError(f"Error running Scrapy: {str(e)}")
-    def parse_html(self, html: str, selector: str, selector_type: str = "css", extract_attr: Optional[str] = None, extract_text: bool = True) -> Dict[str, Any]:
+    def parse_html(
+        self,
+        html: str,
+        selector: str,
+        selector_type: str = "css",
+        extract_attr: Optional[str] = None,
+        extract_text: bool = True,
+    ) -> Dict[str, Any]:
         """
         Parse HTML content using BeautifulSoup.
@@ -500,36 +697,37 @@ class ScraperTool(BaseTool):
             ParsingError: If parsing fails.
         """
         try:
-            soup = BeautifulSoup(html, 'html.parser')
-            if selector_type == 'css':
+            soup = BeautifulSoup(html, "html.parser")
+            if selector_type == "css":
                 elements = soup.select(selector)
             else:
                 from lxml.html import fromstring
                 from lxml.etree import XPath
                 root = fromstring(html)
                 xpath = XPath(selector)
                 elements = xpath(root)
             results = []
             for element in elements:
                 if extract_attr:
-                    value = element.get(extract_attr) if hasattr(element, 'get') else element.get(extract_attr)
+                    value = element.get(extract_attr) if hasattr(element, "get") else element.get(extract_attr)
                     if value is not None:
                         results.append(value)
                 elif extract_text:
-                    if hasattr(element, 'text_content') and callable(getattr(element, 'text_content')):
+                    if hasattr(element, "text_content") and callable(getattr(element, "text_content")):
                         # lxml element
-                        text = element.text_content()
+                        text = element.text_content()  # type: ignore[misc]
                     else:
                         # BeautifulSoup element
-                        text = element.get_text()
-                    if text and text.strip():
-                        results.append(text.strip())
+                        text = element.get_text()  # type: ignore[misc]
+                    if text and text.strip():  # type: ignore[misc]
+                        results.append(text.strip())  # type: ignore[misc]
             return {
-                'selector': selector,
-                'selector_type': selector_type,
-                'count': len(results),
-                'results': results
+                "selector": selector,
+                "selector_type": selector_type,
+                "count": len(results),
+                "results": results,
             }
         except Exception as e:
             raise ParsingError(f"Error parsing HTML: {str(e)}")
@@ -542,7 +740,3 @@ class ScraperTool(BaseTool):
     head = get_httpx
     options = get_httpx
     patch = get_httpx
-    # Legacy method aliases
-    get_requests = get_httpx
-    get_aiohttp = get_httpx

aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

Potentially problematic release.

aiecs 1.0.1py3-none-any.whl → 1.7.6py3-none-any.whl