aiecs 1.0.1__py3-none-any.whl → 1.7.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +13 -16
- aiecs/__main__.py +7 -7
- aiecs/aiecs_client.py +269 -75
- aiecs/application/executors/operation_executor.py +79 -54
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
- aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
- aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
- aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +98 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
- aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
- aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
- aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
- aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
- aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
- aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
- aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
- aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
- aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
- aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
- aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
- aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
- aiecs/application/knowledge_graph/search/reranker.py +293 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +471 -0
- aiecs/config/__init__.py +20 -5
- aiecs/config/config.py +762 -31
- aiecs/config/graph_config.py +131 -0
- aiecs/config/tool_config.py +435 -0
- aiecs/core/__init__.py +29 -13
- aiecs/core/interface/__init__.py +2 -2
- aiecs/core/interface/execution_interface.py +22 -22
- aiecs/core/interface/storage_interface.py +37 -88
- aiecs/core/registry/__init__.py +31 -0
- aiecs/core/registry/service_registry.py +92 -0
- aiecs/domain/__init__.py +270 -1
- aiecs/domain/agent/__init__.py +191 -0
- aiecs/domain/agent/base_agent.py +3949 -0
- aiecs/domain/agent/exceptions.py +99 -0
- aiecs/domain/agent/graph_aware_mixin.py +569 -0
- aiecs/domain/agent/hybrid_agent.py +1731 -0
- aiecs/domain/agent/integration/__init__.py +29 -0
- aiecs/domain/agent/integration/context_compressor.py +216 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
- aiecs/domain/agent/integration/protocols.py +281 -0
- aiecs/domain/agent/integration/retry_policy.py +218 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
- aiecs/domain/agent/lifecycle.py +291 -0
- aiecs/domain/agent/llm_agent.py +692 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +1124 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +163 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
- aiecs/domain/agent/models.py +894 -0
- aiecs/domain/agent/observability.py +479 -0
- aiecs/domain/agent/persistence.py +449 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +159 -0
- aiecs/domain/agent/prompts/formatters.py +187 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +253 -0
- aiecs/domain/agent/tool_agent.py +444 -0
- aiecs/domain/agent/tools/__init__.py +15 -0
- aiecs/domain/agent/tools/schema_generator.py +377 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +469 -0
- aiecs/domain/community/analytics.py +432 -0
- aiecs/domain/community/collaborative_workflow.py +648 -0
- aiecs/domain/community/communication_hub.py +634 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +796 -0
- aiecs/domain/community/community_manager.py +803 -0
- aiecs/domain/community/decision_engine.py +849 -0
- aiecs/domain/community/exceptions.py +231 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +234 -0
- aiecs/domain/community/resource_manager.py +461 -0
- aiecs/domain/community/shared_context_manager.py +589 -0
- aiecs/domain/context/__init__.py +40 -10
- aiecs/domain/context/context_engine.py +1910 -0
- aiecs/domain/context/conversation_models.py +87 -53
- aiecs/domain/context/graph_memory.py +582 -0
- aiecs/domain/execution/model.py +12 -4
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +148 -0
- aiecs/domain/knowledge_graph/models/evidence.py +178 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
- aiecs/domain/knowledge_graph/models/path.py +171 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
- aiecs/domain/knowledge_graph/models/query.py +261 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
- aiecs/domain/knowledge_graph/models/relation.py +202 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
- aiecs/domain/task/dsl_processor.py +172 -56
- aiecs/domain/task/model.py +20 -8
- aiecs/domain/task/task_context.py +27 -24
- aiecs/infrastructure/__init__.py +0 -2
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +837 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
- aiecs/infrastructure/graph_storage/cache.py +424 -0
- aiecs/infrastructure/graph_storage/distributed.py +223 -0
- aiecs/infrastructure/graph_storage/error_handling.py +380 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
- aiecs/infrastructure/graph_storage/metrics.py +344 -0
- aiecs/infrastructure/graph_storage/migration.py +400 -0
- aiecs/infrastructure/graph_storage/pagination.py +483 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
- aiecs/infrastructure/graph_storage/postgres.py +1563 -0
- aiecs/infrastructure/graph_storage/property_storage.py +353 -0
- aiecs/infrastructure/graph_storage/protocols.py +76 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
- aiecs/infrastructure/graph_storage/streaming.py +487 -0
- aiecs/infrastructure/graph_storage/tenant.py +412 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
- aiecs/infrastructure/messaging/websocket_manager.py +51 -35
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
- aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
- aiecs/infrastructure/monitoring/structured_logger.py +3 -7
- aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
- aiecs/infrastructure/persistence/__init__.py +14 -1
- aiecs/infrastructure/persistence/context_engine_client.py +184 -0
- aiecs/infrastructure/persistence/database_manager.py +67 -43
- aiecs/infrastructure/persistence/file_storage.py +180 -103
- aiecs/infrastructure/persistence/redis_client.py +74 -21
- aiecs/llm/__init__.py +73 -25
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
- aiecs/llm/client_factory.py +230 -37
- aiecs/llm/client_resolver.py +155 -0
- aiecs/llm/clients/__init__.py +38 -0
- aiecs/llm/clients/base_client.py +328 -0
- aiecs/llm/clients/google_function_calling_mixin.py +415 -0
- aiecs/llm/clients/googleai_client.py +314 -0
- aiecs/llm/clients/openai_client.py +158 -0
- aiecs/llm/clients/openai_compatible_mixin.py +367 -0
- aiecs/llm/clients/vertex_client.py +1186 -0
- aiecs/llm/clients/xai_client.py +201 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +272 -0
- aiecs/llm/config/config_validator.py +206 -0
- aiecs/llm/config/model_config.py +143 -0
- aiecs/llm/protocols.py +149 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +89 -0
- aiecs/main.py +140 -121
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/module_checker.py +499 -0
- aiecs/scripts/aid/version_manager.py +235 -0
- aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
- aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
- aiecs/scripts/dependance_check/__init__.py +15 -0
- aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
- aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
- aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
- aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
- aiecs/scripts/knowledge_graph/__init__.py +3 -0
- aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
- aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
- aiecs/scripts/tools_develop/README.md +671 -0
- aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
- aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
- aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
- aiecs/scripts/tools_develop/schema_coverage.py +511 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
- aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
- aiecs/scripts/tools_develop/verify_tools.py +352 -0
- aiecs/tasks/__init__.py +0 -1
- aiecs/tasks/worker.py +115 -47
- aiecs/tools/__init__.py +194 -72
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +330 -0
- aiecs/tools/apisource/providers/__init__.py +112 -0
- aiecs/tools/apisource/providers/base.py +671 -0
- aiecs/tools/apisource/providers/census.py +397 -0
- aiecs/tools/apisource/providers/fred.py +535 -0
- aiecs/tools/apisource/providers/newsapi.py +409 -0
- aiecs/tools/apisource/providers/worldbank.py +352 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +363 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
- aiecs/tools/apisource/tool.py +832 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +334 -0
- aiecs/tools/base_tool.py +415 -21
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +607 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
- aiecs/tools/docs/content_insertion_tool.py +1320 -0
- aiecs/tools/docs/document_creator_tool.py +1464 -0
- aiecs/tools/docs/document_layout_tool.py +1160 -0
- aiecs/tools/docs/document_parser_tool.py +1016 -0
- aiecs/tools/docs/document_writer_tool.py +2008 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
- aiecs/tools/langchain_adapter.py +300 -138
- aiecs/tools/schema_generator.py +455 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +581 -0
- aiecs/tools/search_tool/cache.py +264 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +224 -0
- aiecs/tools/search_tool/core.py +778 -0
- aiecs/tools/search_tool/deduplicator.py +119 -0
- aiecs/tools/search_tool/error_handler.py +242 -0
- aiecs/tools/search_tool/metrics.py +343 -0
- aiecs/tools/search_tool/rate_limiter.py +172 -0
- aiecs/tools/search_tool/schemas.py +275 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
- aiecs/tools/statistics/data_loader_tool.py +555 -0
- aiecs/tools/statistics/data_profiler_tool.py +638 -0
- aiecs/tools/statistics/data_transformer_tool.py +580 -0
- aiecs/tools/statistics/data_visualizer_tool.py +498 -0
- aiecs/tools/statistics/model_trainer_tool.py +507 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
- aiecs/tools/task_tools/__init__.py +49 -36
- aiecs/tools/task_tools/chart_tool.py +200 -184
- aiecs/tools/task_tools/classfire_tool.py +268 -267
- aiecs/tools/task_tools/image_tool.py +220 -141
- aiecs/tools/task_tools/office_tool.py +226 -146
- aiecs/tools/task_tools/pandas_tool.py +477 -121
- aiecs/tools/task_tools/report_tool.py +390 -142
- aiecs/tools/task_tools/research_tool.py +149 -79
- aiecs/tools/task_tools/scraper_tool.py +339 -145
- aiecs/tools/task_tools/stats_tool.py +448 -209
- aiecs/tools/temp_file_manager.py +26 -24
- aiecs/tools/tool_executor/__init__.py +18 -16
- aiecs/tools/tool_executor/tool_executor.py +364 -52
- aiecs/utils/LLM_output_structor.py +74 -48
- aiecs/utils/__init__.py +14 -3
- aiecs/utils/base_callback.py +0 -3
- aiecs/utils/cache_provider.py +696 -0
- aiecs/utils/execution_utils.py +50 -31
- aiecs/utils/prompt_loader.py +1 -0
- aiecs/utils/token_usage_repository.py +37 -11
- aiecs/ws/socket_server.py +14 -4
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/METADATA +52 -15
- aiecs-1.7.17.dist-info/RECORD +337 -0
- aiecs-1.7.17.dist-info/entry_points.txt +13 -0
- aiecs/config/registry.py +0 -19
- aiecs/domain/context/content_engine.py +0 -982
- aiecs/llm/base_client.py +0 -99
- aiecs/llm/openai_client.py +0 -125
- aiecs/llm/vertex_client.py +0 -186
- aiecs/llm/xai_client.py +0 -184
- aiecs/scripts/dependency_checker.py +0 -857
- aiecs/scripts/quick_dependency_check.py +0 -269
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.0.1.dist-info/RECORD +0 -90
- aiecs-1.0.1.dist-info/entry_points.txt +0 -7
- /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
- /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
- /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
- /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/WHEEL +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,778 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core SearchTool Implementation
|
|
3
|
+
|
|
4
|
+
Enhanced Google Custom Search Tool with quality analysis, intent understanding,
|
|
5
|
+
intelligent caching, and comprehensive metrics.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
import time
|
|
11
|
+
from typing import Any, Dict, List, Optional
|
|
12
|
+
|
|
13
|
+
from pydantic import Field
|
|
14
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
15
|
+
|
|
16
|
+
from aiecs.tools.base_tool import BaseTool
|
|
17
|
+
from aiecs.tools.tool_executor import cache_result_with_strategy
|
|
18
|
+
|
|
19
|
+
# Import Google API with graceful fallback
|
|
20
|
+
try:
|
|
21
|
+
from googleapiclient.discovery import build # type: ignore[import-untyped]
|
|
22
|
+
from googleapiclient.errors import HttpError # type: ignore[import-untyped]
|
|
23
|
+
from google.auth.exceptions import GoogleAuthError
|
|
24
|
+
from google.oauth2 import service_account
|
|
25
|
+
|
|
26
|
+
GOOGLE_API_AVAILABLE = True
|
|
27
|
+
except ImportError:
|
|
28
|
+
GOOGLE_API_AVAILABLE = False
|
|
29
|
+
HttpError = Exception # type: ignore[assignment,misc]
|
|
30
|
+
GoogleAuthError = Exception # type: ignore[assignment,misc]
|
|
31
|
+
|
|
32
|
+
# Import search tool components
|
|
33
|
+
from .constants import (
|
|
34
|
+
AuthenticationError,
|
|
35
|
+
QuotaExceededError,
|
|
36
|
+
RateLimitError,
|
|
37
|
+
CircuitBreakerOpenError,
|
|
38
|
+
SearchAPIError,
|
|
39
|
+
ValidationError,
|
|
40
|
+
)
|
|
41
|
+
from .rate_limiter import RateLimiter, CircuitBreaker
|
|
42
|
+
from .analyzers import (
|
|
43
|
+
ResultQualityAnalyzer,
|
|
44
|
+
QueryIntentAnalyzer,
|
|
45
|
+
ResultSummarizer,
|
|
46
|
+
)
|
|
47
|
+
from .deduplicator import ResultDeduplicator
|
|
48
|
+
from .context import SearchContext
|
|
49
|
+
from .cache import IntelligentCache
|
|
50
|
+
from .metrics import EnhancedMetrics
|
|
51
|
+
from .error_handler import AgentFriendlyErrorHandler
|
|
52
|
+
from pydantic import BaseModel, Field, field_validator
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class SearchTool(BaseTool):
|
|
56
|
+
"""
|
|
57
|
+
Enhanced web search tool using Google Custom Search API.
|
|
58
|
+
|
|
59
|
+
Provides intelligent search with:
|
|
60
|
+
- Quality scoring and ranking
|
|
61
|
+
- Query intent analysis
|
|
62
|
+
- Result deduplication
|
|
63
|
+
- Context-aware search
|
|
64
|
+
- Intelligent Redis caching
|
|
65
|
+
- Comprehensive metrics
|
|
66
|
+
- Agent-friendly error handling
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
# Configuration schema
|
|
70
|
+
class Config(BaseSettings):
|
|
71
|
+
"""Configuration for the search tool
|
|
72
|
+
|
|
73
|
+
Automatically reads from environment variables with SEARCH_TOOL_ prefix.
|
|
74
|
+
Example: SEARCH_TOOL_GOOGLE_API_KEY -> google_api_key
|
|
75
|
+
|
|
76
|
+
Sensitive fields (API keys, credentials) are loaded from .env files via dotenv.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
model_config = SettingsConfigDict(env_prefix="SEARCH_TOOL_")
|
|
80
|
+
|
|
81
|
+
google_api_key: Optional[str] = Field(default=None, description="Google API key for Custom Search")
|
|
82
|
+
google_cse_id: Optional[str] = Field(default=None, description="Custom Search Engine ID")
|
|
83
|
+
google_application_credentials: Optional[str] = Field(default=None, description="Path to service account JSON")
|
|
84
|
+
max_results_per_query: int = Field(default=10, description="Maximum results per single query")
|
|
85
|
+
cache_ttl: int = Field(default=3600, description="Default cache time-to-live in seconds")
|
|
86
|
+
rate_limit_requests: int = Field(default=100, description="Maximum requests per time window")
|
|
87
|
+
rate_limit_window: int = Field(
|
|
88
|
+
default=86400,
|
|
89
|
+
description="Time window for rate limiting in seconds",
|
|
90
|
+
)
|
|
91
|
+
circuit_breaker_threshold: int = Field(default=5, description="Failures before opening circuit")
|
|
92
|
+
circuit_breaker_timeout: int = Field(
|
|
93
|
+
default=60,
|
|
94
|
+
description="Timeout before trying half-open in seconds",
|
|
95
|
+
)
|
|
96
|
+
retry_attempts: int = Field(default=3, description="Number of retry attempts")
|
|
97
|
+
retry_backoff: float = Field(default=2.0, description="Exponential backoff factor")
|
|
98
|
+
timeout: int = Field(default=30, description="API request timeout in seconds")
|
|
99
|
+
user_agent: str = Field(default="AIECS-SearchTool/2.0", description="User agent string")
|
|
100
|
+
|
|
101
|
+
# Enhanced features
|
|
102
|
+
enable_quality_analysis: bool = Field(default=True, description="Enable result quality analysis")
|
|
103
|
+
enable_intent_analysis: bool = Field(default=True, description="Enable query intent analysis")
|
|
104
|
+
enable_deduplication: bool = Field(default=True, description="Enable result deduplication")
|
|
105
|
+
enable_context_tracking: bool = Field(default=True, description="Enable search context tracking")
|
|
106
|
+
enable_intelligent_cache: bool = Field(default=True, description="Enable intelligent Redis caching")
|
|
107
|
+
similarity_threshold: float = Field(default=0.85, description="Similarity threshold for deduplication")
|
|
108
|
+
max_search_history: int = Field(default=10, description="Maximum search history to maintain")
|
|
109
|
+
|
|
110
|
+
# Schema definitions
|
|
111
|
+
class Search_webSchema(BaseModel):
|
|
112
|
+
"""Schema for search_web operation"""
|
|
113
|
+
|
|
114
|
+
query: str = Field(description="Search query string")
|
|
115
|
+
num_results: int = Field(default=10, ge=1, le=100, description="Number of results to return (1-100)")
|
|
116
|
+
start_index: int = Field(default=1, ge=1, le=91, description="Starting index for pagination (1-91)")
|
|
117
|
+
language: str = Field(default="en", description="Language code for results (e.g., 'en', 'zh-CN', 'es')")
|
|
118
|
+
country: str = Field(default="us", description="Country code for geolocation (e.g., 'us', 'cn', 'uk')")
|
|
119
|
+
safe_search: str = Field(default="medium", description="Safe search level: 'off', 'medium', or 'high'")
|
|
120
|
+
date_restrict: Optional[str] = Field(default=None, description="Date restriction (e.g., 'd7' for last 7 days, 'm3' for last 3 months)")
|
|
121
|
+
file_type: Optional[str] = Field(default=None, description="File type filter (e.g., 'pdf', 'doc', 'xls')")
|
|
122
|
+
exclude_terms: Optional[List[str]] = Field(default=None, description="Terms to exclude from search results")
|
|
123
|
+
auto_enhance: bool = Field(default=True, description="Whether to automatically enhance query based on detected intent")
|
|
124
|
+
return_summary: bool = Field(default=False, description="Whether to return a structured summary of results")
|
|
125
|
+
|
|
126
|
+
@field_validator("safe_search")
|
|
127
|
+
@classmethod
|
|
128
|
+
def validate_safe_search(cls, v: str) -> str:
|
|
129
|
+
"""Validate safe search level"""
|
|
130
|
+
allowed = ["off", "medium", "high"]
|
|
131
|
+
if v not in allowed:
|
|
132
|
+
raise ValueError(f"safe_search must be one of {allowed}")
|
|
133
|
+
return v
|
|
134
|
+
|
|
135
|
+
class Search_imagesSchema(BaseModel):
|
|
136
|
+
"""Schema for search_images operation"""
|
|
137
|
+
|
|
138
|
+
query: str = Field(description="Image search query string")
|
|
139
|
+
num_results: int = Field(default=10, ge=1, le=100, description="Number of image results to return (1-100)")
|
|
140
|
+
image_size: Optional[str] = Field(default=None, description="Image size filter: 'icon', 'small', 'medium', 'large', 'xlarge', 'xxlarge', 'huge'")
|
|
141
|
+
image_type: Optional[str] = Field(default=None, description="Image type filter: 'clipart', 'face', 'lineart', 'stock', 'photo', 'animated'")
|
|
142
|
+
image_color_type: Optional[str] = Field(default=None, description="Color type filter: 'color', 'gray', 'mono', 'trans'")
|
|
143
|
+
safe_search: str = Field(default="medium", description="Safe search level: 'off', 'medium', or 'high'")
|
|
144
|
+
|
|
145
|
+
@field_validator("safe_search")
|
|
146
|
+
@classmethod
|
|
147
|
+
def validate_safe_search(cls, v: str) -> str:
|
|
148
|
+
"""Validate safe search level"""
|
|
149
|
+
allowed = ["off", "medium", "high"]
|
|
150
|
+
if v not in allowed:
|
|
151
|
+
raise ValueError(f"safe_search must be one of {allowed}")
|
|
152
|
+
return v
|
|
153
|
+
|
|
154
|
+
class Search_newsSchema(BaseModel):
|
|
155
|
+
"""Schema for search_news operation"""
|
|
156
|
+
|
|
157
|
+
query: str = Field(description="News search query string")
|
|
158
|
+
num_results: int = Field(default=10, ge=1, le=100, description="Number of news results to return (1-100)")
|
|
159
|
+
start_index: int = Field(default=1, ge=1, le=91, description="Starting index for pagination (1-91)")
|
|
160
|
+
language: str = Field(default="en", description="Language code for news articles (e.g., 'en', 'zh-CN', 'es')")
|
|
161
|
+
date_restrict: Optional[str] = Field(default=None, description="Date restriction (e.g., 'd7' for last 7 days, 'm1' for last month)")
|
|
162
|
+
sort_by: str = Field(default="date", description="Sort order: 'date' for newest first, 'relevance' for most relevant")
|
|
163
|
+
|
|
164
|
+
@field_validator("sort_by")
|
|
165
|
+
@classmethod
|
|
166
|
+
def validate_sort_by(cls, v: str) -> str:
|
|
167
|
+
"""Validate sort order"""
|
|
168
|
+
allowed = ["date", "relevance"]
|
|
169
|
+
if v not in allowed:
|
|
170
|
+
raise ValueError(f"sort_by must be one of {allowed}")
|
|
171
|
+
return v
|
|
172
|
+
|
|
173
|
+
class Search_videosSchema(BaseModel):
|
|
174
|
+
"""Schema for search_videos operation"""
|
|
175
|
+
|
|
176
|
+
query: str = Field(description="Video search query string")
|
|
177
|
+
num_results: int = Field(default=10, ge=1, le=100, description="Number of video results to return (1-100)")
|
|
178
|
+
start_index: int = Field(default=1, ge=1, le=91, description="Starting index for pagination (1-91)")
|
|
179
|
+
language: str = Field(default="en", description="Language code for videos (e.g., 'en', 'zh-CN', 'es')")
|
|
180
|
+
safe_search: str = Field(default="medium", description="Safe search level: 'off', 'medium', or 'high'")
|
|
181
|
+
|
|
182
|
+
@field_validator("safe_search")
|
|
183
|
+
@classmethod
|
|
184
|
+
def validate_safe_search(cls, v: str) -> str:
|
|
185
|
+
"""Validate safe search level"""
|
|
186
|
+
allowed = ["off", "medium", "high"]
|
|
187
|
+
if v not in allowed:
|
|
188
|
+
raise ValueError(f"safe_search must be one of {allowed}")
|
|
189
|
+
return v
|
|
190
|
+
|
|
191
|
+
class Get_metricsSchema(BaseModel):
|
|
192
|
+
"""Schema for get_metrics operation (no parameters required)"""
|
|
193
|
+
|
|
194
|
+
pass
|
|
195
|
+
|
|
196
|
+
class Get_metrics_reportSchema(BaseModel):
|
|
197
|
+
"""Schema for get_metrics_report operation (no parameters required)"""
|
|
198
|
+
|
|
199
|
+
pass
|
|
200
|
+
|
|
201
|
+
class Get_health_scoreSchema(BaseModel):
|
|
202
|
+
"""Schema for get_health_score operation (no parameters required)"""
|
|
203
|
+
|
|
204
|
+
pass
|
|
205
|
+
|
|
206
|
+
class Get_quota_statusSchema(BaseModel):
|
|
207
|
+
"""Schema for get_quota_status operation (no parameters required)"""
|
|
208
|
+
|
|
209
|
+
pass
|
|
210
|
+
|
|
211
|
+
class Get_search_contextSchema(BaseModel):
|
|
212
|
+
"""Schema for get_search_context operation (no parameters required)"""
|
|
213
|
+
|
|
214
|
+
pass
|
|
215
|
+
|
|
216
|
+
# Tool metadata
|
|
217
|
+
description = "Comprehensive web search tool using Google Custom Search API."
|
|
218
|
+
category = "task"
|
|
219
|
+
|
|
220
|
+
def __init__(self, config: Optional[Dict[str, Any]] = None, **kwargs):
|
|
221
|
+
"""
|
|
222
|
+
Initialize SearchTool with enhanced capabilities.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
config: Optional configuration overrides
|
|
226
|
+
**kwargs: Additional arguments passed to BaseTool (e.g., tool_name)
|
|
227
|
+
|
|
228
|
+
Raises:
|
|
229
|
+
AuthenticationError: If Google API libraries not available
|
|
230
|
+
ValidationError: If configuration is invalid
|
|
231
|
+
|
|
232
|
+
Configuration is automatically loaded by BaseTool from:
|
|
233
|
+
1. Explicit config dict (highest priority)
|
|
234
|
+
2. YAML config files (config/tools/search.yaml)
|
|
235
|
+
3. Environment variables (via dotenv from .env files)
|
|
236
|
+
4. Tool defaults (lowest priority)
|
|
237
|
+
|
|
238
|
+
Sensitive fields (API keys, credentials) are loaded from .env files.
|
|
239
|
+
"""
|
|
240
|
+
super().__init__(config, **kwargs)
|
|
241
|
+
|
|
242
|
+
if not GOOGLE_API_AVAILABLE:
|
|
243
|
+
raise AuthenticationError("Google API client libraries not available. " "Install with: pip install google-api-python-client google-auth google-auth-httplib2")
|
|
244
|
+
|
|
245
|
+
# Configuration is automatically loaded by BaseTool into self._config_obj
|
|
246
|
+
# Access config via self._config_obj (BaseSettings instance)
|
|
247
|
+
self.config = self._config_obj if self._config_obj else self.Config()
|
|
248
|
+
|
|
249
|
+
# Initialize logger
|
|
250
|
+
self.logger = logging.getLogger(__name__)
|
|
251
|
+
if not self.logger.handlers:
|
|
252
|
+
handler = logging.StreamHandler()
|
|
253
|
+
handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s [SearchTool] %(message)s"))
|
|
254
|
+
self.logger.addHandler(handler)
|
|
255
|
+
self.logger.setLevel(logging.INFO)
|
|
256
|
+
|
|
257
|
+
# Initialize API client
|
|
258
|
+
self._service = None
|
|
259
|
+
self._credentials = None
|
|
260
|
+
self._init_credentials()
|
|
261
|
+
|
|
262
|
+
# Initialize core components
|
|
263
|
+
self.rate_limiter = RateLimiter(self.config.rate_limit_requests, self.config.rate_limit_window)
|
|
264
|
+
|
|
265
|
+
self.circuit_breaker = CircuitBreaker(
|
|
266
|
+
self.config.circuit_breaker_threshold,
|
|
267
|
+
self.config.circuit_breaker_timeout,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
# Initialize enhanced components
|
|
271
|
+
self.quality_analyzer = ResultQualityAnalyzer() if self.config.enable_quality_analysis else None
|
|
272
|
+
self.intent_analyzer = QueryIntentAnalyzer() if self.config.enable_intent_analysis else None
|
|
273
|
+
self.deduplicator = ResultDeduplicator() if self.config.enable_deduplication else None
|
|
274
|
+
self.result_summarizer = ResultSummarizer() if self.config.enable_quality_analysis else None
|
|
275
|
+
self.search_context = SearchContext(self.config.max_search_history) if self.config.enable_context_tracking else None
|
|
276
|
+
self.error_handler = AgentFriendlyErrorHandler()
|
|
277
|
+
|
|
278
|
+
# Initialize intelligent cache (Redis)
|
|
279
|
+
self.intelligent_cache = None
|
|
280
|
+
if self.config.enable_intelligent_cache:
|
|
281
|
+
try:
|
|
282
|
+
from aiecs.infrastructure.persistence import RedisClient
|
|
283
|
+
|
|
284
|
+
redis_client = RedisClient()
|
|
285
|
+
# Note: Redis client needs to be initialized asynchronously
|
|
286
|
+
self.intelligent_cache = IntelligentCache(redis_client, enabled=True)
|
|
287
|
+
except Exception as e:
|
|
288
|
+
self.logger.warning(f"Could not initialize Redis cache: {e}")
|
|
289
|
+
self.intelligent_cache = IntelligentCache(None, enabled=False)
|
|
290
|
+
|
|
291
|
+
# Initialize enhanced metrics
|
|
292
|
+
self.metrics = EnhancedMetrics()
|
|
293
|
+
|
|
294
|
+
self.logger.info("SearchTool initialized with enhanced capabilities")
|
|
295
|
+
|
|
296
|
+
def _create_search_ttl_strategy(self):
|
|
297
|
+
"""
|
|
298
|
+
Create intelligent TTL strategy for search results.
|
|
299
|
+
|
|
300
|
+
This strategy calculates TTL based on:
|
|
301
|
+
1. Query intent type (from result metadata)
|
|
302
|
+
2. Result freshness score
|
|
303
|
+
3. Result quality score
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
Callable: TTL strategy function compatible with cache_result_with_strategy
|
|
307
|
+
"""
|
|
308
|
+
|
|
309
|
+
def calculate_search_ttl(result: Any, args: tuple, kwargs: dict) -> int:
|
|
310
|
+
"""
|
|
311
|
+
Calculate intelligent TTL for search results.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
result: Search result (dict with 'results' and '_metadata')
|
|
315
|
+
args: Positional arguments (not used)
|
|
316
|
+
kwargs: Keyword arguments containing 'query', etc.
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
int: TTL in seconds
|
|
320
|
+
"""
|
|
321
|
+
# Extract metadata from result
|
|
322
|
+
if not isinstance(result, dict):
|
|
323
|
+
return 3600 # Default 1 hour for non-dict results
|
|
324
|
+
|
|
325
|
+
metadata = result.get("_metadata", {})
|
|
326
|
+
intent_type = metadata.get("intent_type", "GENERAL")
|
|
327
|
+
results_list = result.get("results", [])
|
|
328
|
+
query = kwargs.get("query", "")
|
|
329
|
+
|
|
330
|
+
# Use IntelligentCache logic if available
|
|
331
|
+
if hasattr(self, "intelligent_cache") and self.intelligent_cache:
|
|
332
|
+
try:
|
|
333
|
+
return self.intelligent_cache.calculate_ttl(query, intent_type, results_list)
|
|
334
|
+
except Exception as e:
|
|
335
|
+
self.logger.warning(f"Failed to calculate intelligent TTL: {e}")
|
|
336
|
+
|
|
337
|
+
# Fallback: Use intent-based TTL
|
|
338
|
+
from .cache import IntelligentCache
|
|
339
|
+
|
|
340
|
+
ttl_strategies = IntelligentCache.TTL_STRATEGIES
|
|
341
|
+
base_ttl = ttl_strategies.get(intent_type, ttl_strategies.get("GENERAL", 3600))
|
|
342
|
+
|
|
343
|
+
# Adjust based on result count
|
|
344
|
+
if not results_list:
|
|
345
|
+
return base_ttl // 2 # Shorter TTL for empty results
|
|
346
|
+
|
|
347
|
+
return base_ttl
|
|
348
|
+
|
|
349
|
+
return calculate_search_ttl
|
|
350
|
+
|
|
351
|
+
def _init_credentials(self):
|
|
352
|
+
"""Initialize Google API credentials"""
|
|
353
|
+
# Method 1: API Key
|
|
354
|
+
if self.config.google_api_key and self.config.google_cse_id:
|
|
355
|
+
try:
|
|
356
|
+
self._service = build(
|
|
357
|
+
"customsearch",
|
|
358
|
+
"v1",
|
|
359
|
+
developerKey=self.config.google_api_key,
|
|
360
|
+
cache_discovery=False,
|
|
361
|
+
)
|
|
362
|
+
self.logger.info("Initialized with API key")
|
|
363
|
+
return
|
|
364
|
+
except Exception as e:
|
|
365
|
+
self.logger.warning(f"Failed to initialize with API key: {e}")
|
|
366
|
+
|
|
367
|
+
# Method 2: Service Account
|
|
368
|
+
if self.config.google_application_credentials:
|
|
369
|
+
creds_path = self.config.google_application_credentials
|
|
370
|
+
if os.path.exists(creds_path):
|
|
371
|
+
try:
|
|
372
|
+
credentials = service_account.Credentials.from_service_account_file(
|
|
373
|
+
creds_path,
|
|
374
|
+
scopes=["https://www.googleapis.com/auth/cse"],
|
|
375
|
+
)
|
|
376
|
+
self._credentials = credentials
|
|
377
|
+
self._service = build(
|
|
378
|
+
"customsearch",
|
|
379
|
+
"v1",
|
|
380
|
+
credentials=credentials,
|
|
381
|
+
cache_discovery=False,
|
|
382
|
+
)
|
|
383
|
+
self.logger.info("Initialized with service account")
|
|
384
|
+
return
|
|
385
|
+
except Exception as e:
|
|
386
|
+
self.logger.warning(f"Failed to initialize with service account: {e}")
|
|
387
|
+
|
|
388
|
+
raise AuthenticationError("No valid Google API credentials found. Set GOOGLE_API_KEY and GOOGLE_CSE_ID")
|
|
389
|
+
|
|
390
|
+
def _execute_search(self, query: str, num_results: int = 10, start_index: int = 1, **kwargs) -> Dict[str, Any]:
|
|
391
|
+
"""Execute search with rate limiting and circuit breaker"""
|
|
392
|
+
# Check rate limit
|
|
393
|
+
self.rate_limiter.acquire()
|
|
394
|
+
|
|
395
|
+
# Prepare parameters
|
|
396
|
+
search_params = {
|
|
397
|
+
"q": query,
|
|
398
|
+
"cx": self.config.google_cse_id,
|
|
399
|
+
"num": min(num_results, 10),
|
|
400
|
+
"start": start_index,
|
|
401
|
+
**kwargs,
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
# Execute with circuit breaker
|
|
405
|
+
def _do_search():
|
|
406
|
+
try:
|
|
407
|
+
result = self._service.cse().list(**search_params).execute()
|
|
408
|
+
return result
|
|
409
|
+
except HttpError as e:
|
|
410
|
+
if e.resp.status == 429:
|
|
411
|
+
raise QuotaExceededError(f"API quota exceeded: {e}")
|
|
412
|
+
elif e.resp.status == 403:
|
|
413
|
+
raise AuthenticationError(f"Authentication failed: {e}")
|
|
414
|
+
else:
|
|
415
|
+
raise SearchAPIError(f"Search API error: {e}")
|
|
416
|
+
except Exception as e:
|
|
417
|
+
raise SearchAPIError(f"Unexpected error: {e}")
|
|
418
|
+
|
|
419
|
+
return self.circuit_breaker.call(_do_search)
|
|
420
|
+
|
|
421
|
+
def _retry_with_backoff(self, func, *args, **kwargs) -> Any:
|
|
422
|
+
"""Execute with exponential backoff retry"""
|
|
423
|
+
last_exception = None
|
|
424
|
+
|
|
425
|
+
for attempt in range(self.config.retry_attempts):
|
|
426
|
+
try:
|
|
427
|
+
return func(*args, **kwargs)
|
|
428
|
+
except (RateLimitError, CircuitBreakerOpenError) as e:
|
|
429
|
+
# Don't retry these
|
|
430
|
+
raise e
|
|
431
|
+
except Exception as e:
|
|
432
|
+
last_exception = e
|
|
433
|
+
if attempt < self.config.retry_attempts - 1:
|
|
434
|
+
wait_time = self.config.retry_backoff**attempt
|
|
435
|
+
self.logger.warning(f"Attempt {attempt + 1} failed: {e}. Retrying in {wait_time}s...")
|
|
436
|
+
time.sleep(wait_time)
|
|
437
|
+
|
|
438
|
+
if last_exception is None:
|
|
439
|
+
raise RuntimeError("Retry logic failed but no exception was captured")
|
|
440
|
+
raise last_exception
|
|
441
|
+
|
|
442
|
+
def _parse_search_results(
|
|
443
|
+
self,
|
|
444
|
+
raw_results: Dict[str, Any],
|
|
445
|
+
query: str = "",
|
|
446
|
+
enable_quality_analysis: bool = True,
|
|
447
|
+
) -> List[Dict[str, Any]]:
|
|
448
|
+
"""Parse and enhance search results"""
|
|
449
|
+
items = raw_results.get("items", [])
|
|
450
|
+
results = []
|
|
451
|
+
|
|
452
|
+
for position, item in enumerate(items, start=1):
|
|
453
|
+
result = {
|
|
454
|
+
"title": item.get("title", ""),
|
|
455
|
+
"link": item.get("link", ""),
|
|
456
|
+
"snippet": item.get("snippet", ""),
|
|
457
|
+
"displayLink": item.get("displayLink", ""),
|
|
458
|
+
"formattedUrl": item.get("formattedUrl", ""),
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
# Add image metadata
|
|
462
|
+
if "image" in item:
|
|
463
|
+
result["image"] = {
|
|
464
|
+
"contextLink": item["image"].get("contextLink", ""),
|
|
465
|
+
"height": item["image"].get("height", 0),
|
|
466
|
+
"width": item["image"].get("width", 0),
|
|
467
|
+
"byteSize": item["image"].get("byteSize", 0),
|
|
468
|
+
"thumbnailLink": item["image"].get("thumbnailLink", ""),
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
# Add page metadata
|
|
472
|
+
if "pagemap" in item:
|
|
473
|
+
result["metadata"] = item["pagemap"]
|
|
474
|
+
|
|
475
|
+
# Add quality analysis
|
|
476
|
+
if enable_quality_analysis and self.quality_analyzer and query:
|
|
477
|
+
quality_analysis = self.quality_analyzer.analyze_result_quality(result, query, position)
|
|
478
|
+
result["_quality"] = quality_analysis
|
|
479
|
+
|
|
480
|
+
# Add agent-friendly quality summary
|
|
481
|
+
result["_quality_summary"] = {
|
|
482
|
+
"score": quality_analysis["quality_score"],
|
|
483
|
+
"level": quality_analysis["credibility_level"],
|
|
484
|
+
"is_authoritative": quality_analysis["authority_score"] > 0.8,
|
|
485
|
+
"is_relevant": quality_analysis["relevance_score"] > 0.7,
|
|
486
|
+
"is_fresh": quality_analysis["freshness_score"] > 0.7,
|
|
487
|
+
"warnings_count": len(quality_analysis["warnings"]),
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
results.append(result)
|
|
491
|
+
|
|
492
|
+
return results
|
|
493
|
+
|
|
494
|
+
# ========================================================================
|
|
495
|
+
# Core Search Methods
|
|
496
|
+
# ========================================================================
|
|
497
|
+
|
|
498
|
+
@cache_result_with_strategy(ttl_strategy=lambda self, result, args, kwargs: self._create_search_ttl_strategy()(result, args, kwargs))
|
|
499
|
+
def search_web(
|
|
500
|
+
self,
|
|
501
|
+
query: str,
|
|
502
|
+
num_results: int = 10,
|
|
503
|
+
start_index: int = 1,
|
|
504
|
+
language: str = "en",
|
|
505
|
+
country: str = "us",
|
|
506
|
+
safe_search: str = "medium",
|
|
507
|
+
date_restrict: Optional[str] = None,
|
|
508
|
+
file_type: Optional[str] = None,
|
|
509
|
+
exclude_terms: Optional[str] = None,
|
|
510
|
+
auto_enhance: bool = True,
|
|
511
|
+
return_summary: bool = False,
|
|
512
|
+
) -> Dict[str, Any]:
|
|
513
|
+
"""
|
|
514
|
+
Search the web with enhanced intelligence.
|
|
515
|
+
|
|
516
|
+
Args:
|
|
517
|
+
query: Search query string
|
|
518
|
+
num_results: Number of results to return
|
|
519
|
+
start_index: Starting index for pagination
|
|
520
|
+
language: Language code
|
|
521
|
+
country: Country code
|
|
522
|
+
safe_search: Safe search level
|
|
523
|
+
date_restrict: Date restriction
|
|
524
|
+
file_type: File type filter
|
|
525
|
+
exclude_terms: Terms to exclude
|
|
526
|
+
auto_enhance: Enable automatic query enhancement
|
|
527
|
+
return_summary: Return summary metadata
|
|
528
|
+
|
|
529
|
+
Returns:
|
|
530
|
+
List of search results (or dict with results and summary)
|
|
531
|
+
"""
|
|
532
|
+
start_time = time.time()
|
|
533
|
+
intent_analysis = None
|
|
534
|
+
|
|
535
|
+
try:
|
|
536
|
+
if not query or not query.strip():
|
|
537
|
+
raise ValidationError("Query cannot be empty")
|
|
538
|
+
|
|
539
|
+
if num_results < 1 or num_results > 100:
|
|
540
|
+
raise ValidationError("num_results must be between 1 and 100")
|
|
541
|
+
|
|
542
|
+
# Analyze query intent
|
|
543
|
+
enhanced_query = query
|
|
544
|
+
if auto_enhance and self.intent_analyzer:
|
|
545
|
+
intent_analysis = self.intent_analyzer.analyze_query_intent(query)
|
|
546
|
+
enhanced_query = intent_analysis["enhanced_query"]
|
|
547
|
+
|
|
548
|
+
# Merge suggested parameters
|
|
549
|
+
for param, value in intent_analysis["suggested_params"].items():
|
|
550
|
+
if param == "date_restrict" and not date_restrict:
|
|
551
|
+
date_restrict = value
|
|
552
|
+
elif param == "file_type" and not file_type:
|
|
553
|
+
file_type = value
|
|
554
|
+
elif param == "num_results":
|
|
555
|
+
num_results = min(num_results, value)
|
|
556
|
+
|
|
557
|
+
self.logger.info(f"Intent: {intent_analysis['intent_type']} " f"(confidence: {intent_analysis['confidence']:.2f})")
|
|
558
|
+
|
|
559
|
+
# Note: Cache is now handled by @cache_result_with_strategy decorator
|
|
560
|
+
# No need for manual cache check here
|
|
561
|
+
|
|
562
|
+
# Prepare search parameters
|
|
563
|
+
search_params = {
|
|
564
|
+
"lr": f"lang_{language}",
|
|
565
|
+
"cr": f"country{country.upper()}",
|
|
566
|
+
"safe": safe_search,
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
if date_restrict:
|
|
570
|
+
search_params["dateRestrict"] = date_restrict
|
|
571
|
+
|
|
572
|
+
if file_type:
|
|
573
|
+
search_params["fileType"] = file_type
|
|
574
|
+
|
|
575
|
+
if exclude_terms:
|
|
576
|
+
enhanced_query = f"{enhanced_query} -{exclude_terms}"
|
|
577
|
+
|
|
578
|
+
# Execute search
|
|
579
|
+
raw_results = self._retry_with_backoff(
|
|
580
|
+
self._execute_search,
|
|
581
|
+
enhanced_query,
|
|
582
|
+
num_results,
|
|
583
|
+
start_index,
|
|
584
|
+
**search_params,
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
# Parse results
|
|
588
|
+
results = self._parse_search_results(
|
|
589
|
+
raw_results,
|
|
590
|
+
query=query,
|
|
591
|
+
enable_quality_analysis=self.config.enable_quality_analysis,
|
|
592
|
+
)
|
|
593
|
+
|
|
594
|
+
# Deduplicate
|
|
595
|
+
if self.deduplicator:
|
|
596
|
+
results = self.deduplicator.deduplicate_results(results, self.config.similarity_threshold)
|
|
597
|
+
|
|
598
|
+
# Add search metadata
|
|
599
|
+
if intent_analysis:
|
|
600
|
+
for result in results:
|
|
601
|
+
result["_search_metadata"] = {
|
|
602
|
+
"original_query": query,
|
|
603
|
+
"enhanced_query": enhanced_query,
|
|
604
|
+
"intent_type": intent_analysis["intent_type"],
|
|
605
|
+
"intent_confidence": intent_analysis["confidence"],
|
|
606
|
+
"suggestions": intent_analysis["suggestions"],
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
# Update context
|
|
610
|
+
if self.search_context:
|
|
611
|
+
self.search_context.add_search(query, results)
|
|
612
|
+
|
|
613
|
+
# Note: Cache is now handled by @cache_result_with_strategy decorator
|
|
614
|
+
# The decorator will call _create_search_ttl_strategy() to
|
|
615
|
+
# calculate TTL
|
|
616
|
+
|
|
617
|
+
# Record metrics
|
|
618
|
+
response_time = (time.time() - start_time) * 1000
|
|
619
|
+
self.metrics.record_search(query, "web", results, response_time, cached=False)
|
|
620
|
+
|
|
621
|
+
# Prepare result with metadata for TTL calculation
|
|
622
|
+
result_data = {
|
|
623
|
+
"results": results,
|
|
624
|
+
"_metadata": {
|
|
625
|
+
"intent_type": (intent_analysis["intent_type"] if intent_analysis else "GENERAL"),
|
|
626
|
+
"query": query,
|
|
627
|
+
"enhanced_query": enhanced_query,
|
|
628
|
+
"timestamp": time.time(),
|
|
629
|
+
"response_time_ms": response_time,
|
|
630
|
+
},
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
# Generate summary if requested
|
|
634
|
+
if return_summary and self.result_summarizer:
|
|
635
|
+
summary = self.result_summarizer.generate_summary(results, query)
|
|
636
|
+
result_data["summary"] = summary
|
|
637
|
+
|
|
638
|
+
return result_data
|
|
639
|
+
|
|
640
|
+
except Exception as e:
|
|
641
|
+
response_time = (time.time() - start_time) * 1000
|
|
642
|
+
self.metrics.record_search(query, "web", [], response_time, error=e)
|
|
643
|
+
|
|
644
|
+
# Format error for agent
|
|
645
|
+
error_info = self.error_handler.format_error_for_agent(
|
|
646
|
+
e,
|
|
647
|
+
{"circuit_breaker_timeout": self.config.circuit_breaker_timeout},
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
self.logger.error(f"Search failed: {error_info['user_message']}")
|
|
651
|
+
raise
|
|
652
|
+
|
|
653
|
+
def search_images(
|
|
654
|
+
self,
|
|
655
|
+
query: str,
|
|
656
|
+
num_results: int = 10,
|
|
657
|
+
image_size: Optional[str] = None,
|
|
658
|
+
image_type: Optional[str] = None,
|
|
659
|
+
image_color_type: Optional[str] = None,
|
|
660
|
+
safe_search: str = "medium",
|
|
661
|
+
) -> List[Dict[str, Any]]:
|
|
662
|
+
"""Search for images"""
|
|
663
|
+
if not query or not query.strip():
|
|
664
|
+
raise ValidationError("Query cannot be empty")
|
|
665
|
+
|
|
666
|
+
search_params = {
|
|
667
|
+
"searchType": "image",
|
|
668
|
+
"safe": safe_search,
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
if image_size:
|
|
672
|
+
search_params["imgSize"] = image_size
|
|
673
|
+
if image_type:
|
|
674
|
+
search_params["imgType"] = image_type
|
|
675
|
+
if image_color_type:
|
|
676
|
+
search_params["imgColorType"] = image_color_type
|
|
677
|
+
|
|
678
|
+
raw_results = self._retry_with_backoff(self._execute_search, query, num_results, 1, **search_params)
|
|
679
|
+
|
|
680
|
+
return self._parse_search_results(raw_results, query=query)
|
|
681
|
+
|
|
682
|
+
def search_news(
|
|
683
|
+
self,
|
|
684
|
+
query: str,
|
|
685
|
+
num_results: int = 10,
|
|
686
|
+
start_index: int = 1,
|
|
687
|
+
language: str = "en",
|
|
688
|
+
date_restrict: Optional[str] = None,
|
|
689
|
+
sort_by: str = "date",
|
|
690
|
+
) -> List[Dict[str, Any]]:
|
|
691
|
+
"""Search for news articles"""
|
|
692
|
+
if not query or not query.strip():
|
|
693
|
+
raise ValidationError("Query cannot be empty")
|
|
694
|
+
|
|
695
|
+
news_query = f"{query} news"
|
|
696
|
+
|
|
697
|
+
search_params = {
|
|
698
|
+
"lr": f"lang_{language}",
|
|
699
|
+
"sort": sort_by if sort_by == "date" else "",
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
if date_restrict:
|
|
703
|
+
search_params["dateRestrict"] = date_restrict
|
|
704
|
+
|
|
705
|
+
raw_results = self._retry_with_backoff(
|
|
706
|
+
self._execute_search,
|
|
707
|
+
news_query,
|
|
708
|
+
num_results,
|
|
709
|
+
start_index,
|
|
710
|
+
**search_params,
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
return self._parse_search_results(raw_results, query=query)
|
|
714
|
+
|
|
715
|
+
def search_videos(
|
|
716
|
+
self,
|
|
717
|
+
query: str,
|
|
718
|
+
num_results: int = 10,
|
|
719
|
+
start_index: int = 1,
|
|
720
|
+
language: str = "en",
|
|
721
|
+
safe_search: str = "medium",
|
|
722
|
+
) -> List[Dict[str, Any]]:
|
|
723
|
+
"""Search for videos"""
|
|
724
|
+
if not query or not query.strip():
|
|
725
|
+
raise ValidationError("Query cannot be empty")
|
|
726
|
+
|
|
727
|
+
video_query = f"{query} filetype:mp4 OR filetype:webm OR filetype:mov"
|
|
728
|
+
|
|
729
|
+
search_params = {
|
|
730
|
+
"lr": f"lang_{language}",
|
|
731
|
+
"safe": safe_search,
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
raw_results = self._retry_with_backoff(
|
|
735
|
+
self._execute_search,
|
|
736
|
+
video_query,
|
|
737
|
+
num_results,
|
|
738
|
+
start_index,
|
|
739
|
+
**search_params,
|
|
740
|
+
)
|
|
741
|
+
|
|
742
|
+
return self._parse_search_results(raw_results, query=query)
|
|
743
|
+
|
|
744
|
+
# ========================================================================
|
|
745
|
+
# Utility Methods
|
|
746
|
+
# ========================================================================
|
|
747
|
+
|
|
748
|
+
def get_metrics(self) -> Dict[str, Any]:
|
|
749
|
+
"""Get comprehensive metrics"""
|
|
750
|
+
return self.metrics.get_metrics()
|
|
751
|
+
|
|
752
|
+
def get_metrics_report(self) -> str:
|
|
753
|
+
"""Get human-readable metrics report"""
|
|
754
|
+
return self.metrics.generate_report()
|
|
755
|
+
|
|
756
|
+
def get_health_score(self) -> float:
|
|
757
|
+
"""Get system health score (0-1)"""
|
|
758
|
+
return self.metrics.get_health_score()
|
|
759
|
+
|
|
760
|
+
def get_quota_status(self) -> Dict[str, Any]:
|
|
761
|
+
"""Get quota and rate limit status"""
|
|
762
|
+
return {
|
|
763
|
+
"remaining_quota": self.rate_limiter.get_remaining_quota(),
|
|
764
|
+
"max_requests": self.config.rate_limit_requests,
|
|
765
|
+
"time_window_seconds": self.config.rate_limit_window,
|
|
766
|
+
"circuit_breaker_state": self.circuit_breaker.get_state(),
|
|
767
|
+
"health_score": self.get_health_score(),
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
def get_search_context(self) -> Optional[Dict[str, Any]]:
|
|
771
|
+
"""Get search context information"""
|
|
772
|
+
if not self.search_context:
|
|
773
|
+
return None
|
|
774
|
+
|
|
775
|
+
return {
|
|
776
|
+
"history": self.search_context.get_history(5),
|
|
777
|
+
"preferences": self.search_context.get_preferences(),
|
|
778
|
+
}
|