aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,749 @@
1
+ """
2
+ Core SearchTool Implementation
3
+
4
+ Enhanced Google Custom Search Tool with quality analysis, intent understanding,
5
+ intelligent caching, and comprehensive metrics.
6
+ """
7
+
8
+ import logging
9
+ import os
10
+ import time
11
+ from typing import Any, Dict, List, Optional
12
+
13
+ from pydantic import BaseModel, Field, ConfigDict
14
+
15
+ from aiecs.tools.base_tool import BaseTool
16
+ from aiecs.tools.tool_executor import cache_result_with_strategy
17
+ from aiecs.config.config import get_settings
18
+
19
+ # Import Google API with graceful fallback
20
+ try:
21
+ from googleapiclient.discovery import build
22
+ from googleapiclient.errors import HttpError
23
+ from google.auth.exceptions import GoogleAuthError
24
+ from google.oauth2 import service_account
25
+
26
+ GOOGLE_API_AVAILABLE = True
27
+ except ImportError:
28
+ GOOGLE_API_AVAILABLE = False
29
+ HttpError = Exception
30
+ GoogleAuthError = Exception
31
+
32
+ # Import search tool components
33
+ from .constants import (
34
+ AuthenticationError,
35
+ QuotaExceededError,
36
+ RateLimitError,
37
+ CircuitBreakerOpenError,
38
+ SearchAPIError,
39
+ ValidationError,
40
+ )
41
+ from .rate_limiter import RateLimiter, CircuitBreaker
42
+ from .analyzers import (
43
+ ResultQualityAnalyzer,
44
+ QueryIntentAnalyzer,
45
+ ResultSummarizer,
46
+ )
47
+ from .deduplicator import ResultDeduplicator
48
+ from .context import SearchContext
49
+ from .cache import IntelligentCache
50
+ from .metrics import EnhancedMetrics
51
+ from .error_handler import AgentFriendlyErrorHandler
52
+ from .schemas import (
53
+ SearchWebSchema,
54
+ SearchImagesSchema,
55
+ SearchNewsSchema,
56
+ SearchVideosSchema,
57
+ SearchPaginatedSchema,
58
+ SearchBatchSchema,
59
+ ValidateCredentialsSchema,
60
+ GetQuotaStatusSchema,
61
+ GetMetricsSchema,
62
+ GetMetricsReportSchema,
63
+ GetHealthScoreSchema,
64
+ GetSearchContextSchema,
65
+ )
66
+
67
+
68
+ class SearchTool(BaseTool):
69
+ """
70
+ Enhanced web search tool using Google Custom Search API.
71
+
72
+ Provides intelligent search with:
73
+ - Quality scoring and ranking
74
+ - Query intent analysis
75
+ - Result deduplication
76
+ - Context-aware search
77
+ - Intelligent Redis caching
78
+ - Comprehensive metrics
79
+ - Agent-friendly error handling
80
+ """
81
+
82
+ # Configuration schema
83
+ class Config(BaseModel):
84
+ """Configuration for the search tool"""
85
+
86
+ model_config = ConfigDict(env_prefix="SEARCH_TOOL_")
87
+
88
+ google_api_key: Optional[str] = Field(
89
+ default=None, description="Google API key for Custom Search"
90
+ )
91
+ google_cse_id: Optional[str] = Field(default=None, description="Custom Search Engine ID")
92
+ google_application_credentials: Optional[str] = Field(
93
+ default=None, description="Path to service account JSON"
94
+ )
95
+ max_results_per_query: int = Field(
96
+ default=10, description="Maximum results per single query"
97
+ )
98
+ cache_ttl: int = Field(default=3600, description="Default cache time-to-live in seconds")
99
+ rate_limit_requests: int = Field(
100
+ default=100, description="Maximum requests per time window"
101
+ )
102
+ rate_limit_window: int = Field(
103
+ default=86400,
104
+ description="Time window for rate limiting in seconds",
105
+ )
106
+ circuit_breaker_threshold: int = Field(
107
+ default=5, description="Failures before opening circuit"
108
+ )
109
+ circuit_breaker_timeout: int = Field(
110
+ default=60,
111
+ description="Timeout before trying half-open in seconds",
112
+ )
113
+ retry_attempts: int = Field(default=3, description="Number of retry attempts")
114
+ retry_backoff: float = Field(default=2.0, description="Exponential backoff factor")
115
+ timeout: int = Field(default=30, description="API request timeout in seconds")
116
+ user_agent: str = Field(default="AIECS-SearchTool/2.0", description="User agent string")
117
+
118
+ # Enhanced features
119
+ enable_quality_analysis: bool = Field(
120
+ default=True, description="Enable result quality analysis"
121
+ )
122
+ enable_intent_analysis: bool = Field(
123
+ default=True, description="Enable query intent analysis"
124
+ )
125
+ enable_deduplication: bool = Field(default=True, description="Enable result deduplication")
126
+ enable_context_tracking: bool = Field(
127
+ default=True, description="Enable search context tracking"
128
+ )
129
+ enable_intelligent_cache: bool = Field(
130
+ default=True, description="Enable intelligent Redis caching"
131
+ )
132
+ similarity_threshold: float = Field(
133
+ default=0.85, description="Similarity threshold for deduplication"
134
+ )
135
+ max_search_history: int = Field(
136
+ default=10, description="Maximum search history to maintain"
137
+ )
138
+
139
+ # Operation schemas for input validation and documentation
140
+ SearchWebSchema = SearchWebSchema
141
+ SearchImagesSchema = SearchImagesSchema
142
+ SearchNewsSchema = SearchNewsSchema
143
+ SearchVideosSchema = SearchVideosSchema
144
+ SearchPaginatedSchema = SearchPaginatedSchema
145
+ SearchBatchSchema = SearchBatchSchema
146
+ ValidateCredentialsSchema = ValidateCredentialsSchema
147
+ GetQuotaStatusSchema = GetQuotaStatusSchema
148
+ GetMetricsSchema = GetMetricsSchema
149
+ GetMetricsReportSchema = GetMetricsReportSchema
150
+ GetHealthScoreSchema = GetHealthScoreSchema
151
+ GetSearchContextSchema = GetSearchContextSchema
152
+
153
+ # Tool metadata
154
+ description = "Comprehensive web search tool using Google Custom Search API."
155
+ category = "task"
156
+
157
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
158
+ """
159
+ Initialize SearchTool with enhanced capabilities.
160
+
161
+ Args:
162
+ config: Optional configuration overrides
163
+
164
+ Raises:
165
+ AuthenticationError: If Google API libraries not available
166
+ ValidationError: If configuration is invalid
167
+ """
168
+ super().__init__(config)
169
+
170
+ if not GOOGLE_API_AVAILABLE:
171
+ raise AuthenticationError(
172
+ "Google API client libraries not available. "
173
+ "Install with: pip install google-api-python-client google-auth google-auth-httplib2"
174
+ )
175
+
176
+ # Load settings
177
+ global_settings = get_settings()
178
+
179
+ # Merge configuration
180
+ merged_config = {
181
+ "google_api_key": global_settings.google_api_key,
182
+ "google_cse_id": global_settings.google_cse_id,
183
+ "google_application_credentials": global_settings.google_application_credentials,
184
+ }
185
+ if config:
186
+ merged_config.update(config)
187
+
188
+ # Parse configuration
189
+ self.config = self.Config(**merged_config)
190
+
191
+ # Initialize logger
192
+ self.logger = logging.getLogger(__name__)
193
+ if not self.logger.handlers:
194
+ handler = logging.StreamHandler()
195
+ handler.setFormatter(
196
+ logging.Formatter("%(asctime)s %(levelname)s [SearchTool] %(message)s")
197
+ )
198
+ self.logger.addHandler(handler)
199
+ self.logger.setLevel(logging.INFO)
200
+
201
+ # Initialize API client
202
+ self._service = None
203
+ self._credentials = None
204
+ self._init_credentials()
205
+
206
+ # Initialize core components
207
+ self.rate_limiter = RateLimiter(
208
+ self.config.rate_limit_requests, self.config.rate_limit_window
209
+ )
210
+
211
+ self.circuit_breaker = CircuitBreaker(
212
+ self.config.circuit_breaker_threshold,
213
+ self.config.circuit_breaker_timeout,
214
+ )
215
+
216
+ # Initialize enhanced components
217
+ self.quality_analyzer = (
218
+ ResultQualityAnalyzer() if self.config.enable_quality_analysis else None
219
+ )
220
+ self.intent_analyzer = QueryIntentAnalyzer() if self.config.enable_intent_analysis else None
221
+ self.deduplicator = ResultDeduplicator() if self.config.enable_deduplication else None
222
+ self.result_summarizer = ResultSummarizer() if self.config.enable_quality_analysis else None
223
+ self.search_context = (
224
+ SearchContext(self.config.max_search_history)
225
+ if self.config.enable_context_tracking
226
+ else None
227
+ )
228
+ self.error_handler = AgentFriendlyErrorHandler()
229
+
230
+ # Initialize intelligent cache (Redis)
231
+ self.intelligent_cache = None
232
+ if self.config.enable_intelligent_cache:
233
+ try:
234
+ from aiecs.infrastructure.persistence import RedisClient
235
+
236
+ redis_client = RedisClient()
237
+ # Note: Redis client needs to be initialized asynchronously
238
+ self.intelligent_cache = IntelligentCache(redis_client, enabled=True)
239
+ except Exception as e:
240
+ self.logger.warning(f"Could not initialize Redis cache: {e}")
241
+ self.intelligent_cache = IntelligentCache(None, enabled=False)
242
+
243
+ # Initialize enhanced metrics
244
+ self.metrics = EnhancedMetrics()
245
+
246
+ self.logger.info("SearchTool initialized with enhanced capabilities")
247
+
248
+ def _create_search_ttl_strategy(self):
249
+ """
250
+ Create intelligent TTL strategy for search results.
251
+
252
+ This strategy calculates TTL based on:
253
+ 1. Query intent type (from result metadata)
254
+ 2. Result freshness score
255
+ 3. Result quality score
256
+
257
+ Returns:
258
+ Callable: TTL strategy function compatible with cache_result_with_strategy
259
+ """
260
+
261
+ def calculate_search_ttl(result: Any, args: tuple, kwargs: dict) -> int:
262
+ """
263
+ Calculate intelligent TTL for search results.
264
+
265
+ Args:
266
+ result: Search result (dict with 'results' and '_metadata')
267
+ args: Positional arguments (not used)
268
+ kwargs: Keyword arguments containing 'query', etc.
269
+
270
+ Returns:
271
+ int: TTL in seconds
272
+ """
273
+ # Extract metadata from result
274
+ if not isinstance(result, dict):
275
+ return 3600 # Default 1 hour for non-dict results
276
+
277
+ metadata = result.get("_metadata", {})
278
+ intent_type = metadata.get("intent_type", "GENERAL")
279
+ results_list = result.get("results", [])
280
+ query = kwargs.get("query", "")
281
+
282
+ # Use IntelligentCache logic if available
283
+ if hasattr(self, "intelligent_cache") and self.intelligent_cache:
284
+ try:
285
+ return self.intelligent_cache.calculate_ttl(query, intent_type, results_list)
286
+ except Exception as e:
287
+ self.logger.warning(f"Failed to calculate intelligent TTL: {e}")
288
+
289
+ # Fallback: Use intent-based TTL
290
+ from .cache import IntelligentCache
291
+
292
+ ttl_strategies = IntelligentCache.TTL_STRATEGIES
293
+ base_ttl = ttl_strategies.get(intent_type, ttl_strategies.get("GENERAL", 3600))
294
+
295
+ # Adjust based on result count
296
+ if not results_list:
297
+ return base_ttl // 2 # Shorter TTL for empty results
298
+
299
+ return base_ttl
300
+
301
+ return calculate_search_ttl
302
+
303
+ def _init_credentials(self):
304
+ """Initialize Google API credentials"""
305
+ # Method 1: API Key
306
+ if self.config.google_api_key and self.config.google_cse_id:
307
+ try:
308
+ self._service = build(
309
+ "customsearch",
310
+ "v1",
311
+ developerKey=self.config.google_api_key,
312
+ cache_discovery=False,
313
+ )
314
+ self.logger.info("Initialized with API key")
315
+ return
316
+ except Exception as e:
317
+ self.logger.warning(f"Failed to initialize with API key: {e}")
318
+
319
+ # Method 2: Service Account
320
+ if self.config.google_application_credentials:
321
+ creds_path = self.config.google_application_credentials
322
+ if os.path.exists(creds_path):
323
+ try:
324
+ credentials = service_account.Credentials.from_service_account_file(
325
+ creds_path,
326
+ scopes=["https://www.googleapis.com/auth/cse"],
327
+ )
328
+ self._credentials = credentials
329
+ self._service = build(
330
+ "customsearch",
331
+ "v1",
332
+ credentials=credentials,
333
+ cache_discovery=False,
334
+ )
335
+ self.logger.info("Initialized with service account")
336
+ return
337
+ except Exception as e:
338
+ self.logger.warning(f"Failed to initialize with service account: {e}")
339
+
340
+ raise AuthenticationError(
341
+ "No valid Google API credentials found. Set GOOGLE_API_KEY and GOOGLE_CSE_ID"
342
+ )
343
+
344
+ def _execute_search(
345
+ self, query: str, num_results: int = 10, start_index: int = 1, **kwargs
346
+ ) -> Dict[str, Any]:
347
+ """Execute search with rate limiting and circuit breaker"""
348
+ # Check rate limit
349
+ self.rate_limiter.acquire()
350
+
351
+ # Prepare parameters
352
+ search_params = {
353
+ "q": query,
354
+ "cx": self.config.google_cse_id,
355
+ "num": min(num_results, 10),
356
+ "start": start_index,
357
+ **kwargs,
358
+ }
359
+
360
+ # Execute with circuit breaker
361
+ def _do_search():
362
+ try:
363
+ result = self._service.cse().list(**search_params).execute()
364
+ return result
365
+ except HttpError as e:
366
+ if e.resp.status == 429:
367
+ raise QuotaExceededError(f"API quota exceeded: {e}")
368
+ elif e.resp.status == 403:
369
+ raise AuthenticationError(f"Authentication failed: {e}")
370
+ else:
371
+ raise SearchAPIError(f"Search API error: {e}")
372
+ except Exception as e:
373
+ raise SearchAPIError(f"Unexpected error: {e}")
374
+
375
+ return self.circuit_breaker.call(_do_search)
376
+
377
+ def _retry_with_backoff(self, func, *args, **kwargs) -> Any:
378
+ """Execute with exponential backoff retry"""
379
+ last_exception = None
380
+
381
+ for attempt in range(self.config.retry_attempts):
382
+ try:
383
+ return func(*args, **kwargs)
384
+ except (RateLimitError, CircuitBreakerOpenError) as e:
385
+ # Don't retry these
386
+ raise e
387
+ except Exception as e:
388
+ last_exception = e
389
+ if attempt < self.config.retry_attempts - 1:
390
+ wait_time = self.config.retry_backoff**attempt
391
+ self.logger.warning(
392
+ f"Attempt {attempt + 1} failed: {e}. Retrying in {wait_time}s..."
393
+ )
394
+ time.sleep(wait_time)
395
+
396
+ raise last_exception
397
+
398
+ def _parse_search_results(
399
+ self,
400
+ raw_results: Dict[str, Any],
401
+ query: str = "",
402
+ enable_quality_analysis: bool = True,
403
+ ) -> List[Dict[str, Any]]:
404
+ """Parse and enhance search results"""
405
+ items = raw_results.get("items", [])
406
+ results = []
407
+
408
+ for position, item in enumerate(items, start=1):
409
+ result = {
410
+ "title": item.get("title", ""),
411
+ "link": item.get("link", ""),
412
+ "snippet": item.get("snippet", ""),
413
+ "displayLink": item.get("displayLink", ""),
414
+ "formattedUrl": item.get("formattedUrl", ""),
415
+ }
416
+
417
+ # Add image metadata
418
+ if "image" in item:
419
+ result["image"] = {
420
+ "contextLink": item["image"].get("contextLink", ""),
421
+ "height": item["image"].get("height", 0),
422
+ "width": item["image"].get("width", 0),
423
+ "byteSize": item["image"].get("byteSize", 0),
424
+ "thumbnailLink": item["image"].get("thumbnailLink", ""),
425
+ }
426
+
427
+ # Add page metadata
428
+ if "pagemap" in item:
429
+ result["metadata"] = item["pagemap"]
430
+
431
+ # Add quality analysis
432
+ if enable_quality_analysis and self.quality_analyzer and query:
433
+ quality_analysis = self.quality_analyzer.analyze_result_quality(
434
+ result, query, position
435
+ )
436
+ result["_quality"] = quality_analysis
437
+
438
+ # Add agent-friendly quality summary
439
+ result["_quality_summary"] = {
440
+ "score": quality_analysis["quality_score"],
441
+ "level": quality_analysis["credibility_level"],
442
+ "is_authoritative": quality_analysis["authority_score"] > 0.8,
443
+ "is_relevant": quality_analysis["relevance_score"] > 0.7,
444
+ "is_fresh": quality_analysis["freshness_score"] > 0.7,
445
+ "warnings_count": len(quality_analysis["warnings"]),
446
+ }
447
+
448
+ results.append(result)
449
+
450
+ return results
451
+
452
+ # ========================================================================
453
+ # Core Search Methods
454
+ # ========================================================================
455
+
456
+ @cache_result_with_strategy(
457
+ ttl_strategy=lambda self, result, args, kwargs: self._create_search_ttl_strategy()(
458
+ result, args, kwargs
459
+ )
460
+ )
461
+ def search_web(
462
+ self,
463
+ query: str,
464
+ num_results: int = 10,
465
+ start_index: int = 1,
466
+ language: str = "en",
467
+ country: str = "us",
468
+ safe_search: str = "medium",
469
+ date_restrict: Optional[str] = None,
470
+ file_type: Optional[str] = None,
471
+ exclude_terms: Optional[str] = None,
472
+ auto_enhance: bool = True,
473
+ return_summary: bool = False,
474
+ ) -> Dict[str, Any]:
475
+ """
476
+ Search the web with enhanced intelligence.
477
+
478
+ Args:
479
+ query: Search query string
480
+ num_results: Number of results to return
481
+ start_index: Starting index for pagination
482
+ language: Language code
483
+ country: Country code
484
+ safe_search: Safe search level
485
+ date_restrict: Date restriction
486
+ file_type: File type filter
487
+ exclude_terms: Terms to exclude
488
+ auto_enhance: Enable automatic query enhancement
489
+ return_summary: Return summary metadata
490
+
491
+ Returns:
492
+ List of search results (or dict with results and summary)
493
+ """
494
+ start_time = time.time()
495
+ intent_analysis = None
496
+
497
+ try:
498
+ if not query or not query.strip():
499
+ raise ValidationError("Query cannot be empty")
500
+
501
+ if num_results < 1 or num_results > 100:
502
+ raise ValidationError("num_results must be between 1 and 100")
503
+
504
+ # Analyze query intent
505
+ enhanced_query = query
506
+ if auto_enhance and self.intent_analyzer:
507
+ intent_analysis = self.intent_analyzer.analyze_query_intent(query)
508
+ enhanced_query = intent_analysis["enhanced_query"]
509
+
510
+ # Merge suggested parameters
511
+ for param, value in intent_analysis["suggested_params"].items():
512
+ if param == "date_restrict" and not date_restrict:
513
+ date_restrict = value
514
+ elif param == "file_type" and not file_type:
515
+ file_type = value
516
+ elif param == "num_results":
517
+ num_results = min(num_results, value)
518
+
519
+ self.logger.info(
520
+ f"Intent: {intent_analysis['intent_type']} "
521
+ f"(confidence: {intent_analysis['confidence']:.2f})"
522
+ )
523
+
524
+ # Note: Cache is now handled by @cache_result_with_strategy decorator
525
+ # No need for manual cache check here
526
+
527
+ # Prepare search parameters
528
+ search_params = {
529
+ "lr": f"lang_{language}",
530
+ "cr": f"country{country.upper()}",
531
+ "safe": safe_search,
532
+ }
533
+
534
+ if date_restrict:
535
+ search_params["dateRestrict"] = date_restrict
536
+
537
+ if file_type:
538
+ search_params["fileType"] = file_type
539
+
540
+ if exclude_terms:
541
+ enhanced_query = f"{enhanced_query} -{exclude_terms}"
542
+
543
+ # Execute search
544
+ raw_results = self._retry_with_backoff(
545
+ self._execute_search,
546
+ enhanced_query,
547
+ num_results,
548
+ start_index,
549
+ **search_params,
550
+ )
551
+
552
+ # Parse results
553
+ results = self._parse_search_results(
554
+ raw_results,
555
+ query=query,
556
+ enable_quality_analysis=self.config.enable_quality_analysis,
557
+ )
558
+
559
+ # Deduplicate
560
+ if self.deduplicator:
561
+ results = self.deduplicator.deduplicate_results(
562
+ results, self.config.similarity_threshold
563
+ )
564
+
565
+ # Add search metadata
566
+ if intent_analysis:
567
+ for result in results:
568
+ result["_search_metadata"] = {
569
+ "original_query": query,
570
+ "enhanced_query": enhanced_query,
571
+ "intent_type": intent_analysis["intent_type"],
572
+ "intent_confidence": intent_analysis["confidence"],
573
+ "suggestions": intent_analysis["suggestions"],
574
+ }
575
+
576
+ # Update context
577
+ if self.search_context:
578
+ self.search_context.add_search(query, results)
579
+
580
+ # Note: Cache is now handled by @cache_result_with_strategy decorator
581
+ # The decorator will call _create_search_ttl_strategy() to
582
+ # calculate TTL
583
+
584
+ # Record metrics
585
+ response_time = (time.time() - start_time) * 1000
586
+ self.metrics.record_search(query, "web", results, response_time, cached=False)
587
+
588
+ # Prepare result with metadata for TTL calculation
589
+ result_data = {
590
+ "results": results,
591
+ "_metadata": {
592
+ "intent_type": (
593
+ intent_analysis["intent_type"] if intent_analysis else "GENERAL"
594
+ ),
595
+ "query": query,
596
+ "enhanced_query": enhanced_query,
597
+ "timestamp": time.time(),
598
+ "response_time_ms": response_time,
599
+ },
600
+ }
601
+
602
+ # Generate summary if requested
603
+ if return_summary and self.result_summarizer:
604
+ summary = self.result_summarizer.generate_summary(results, query)
605
+ result_data["summary"] = summary
606
+
607
+ return result_data
608
+
609
+ except Exception as e:
610
+ response_time = (time.time() - start_time) * 1000
611
+ self.metrics.record_search(query, "web", [], response_time, error=e)
612
+
613
+ # Format error for agent
614
+ error_info = self.error_handler.format_error_for_agent(
615
+ e,
616
+ {"circuit_breaker_timeout": self.config.circuit_breaker_timeout},
617
+ )
618
+
619
+ self.logger.error(f"Search failed: {error_info['user_message']}")
620
+ raise
621
+
622
+ def search_images(
623
+ self,
624
+ query: str,
625
+ num_results: int = 10,
626
+ image_size: Optional[str] = None,
627
+ image_type: Optional[str] = None,
628
+ image_color_type: Optional[str] = None,
629
+ safe_search: str = "medium",
630
+ ) -> List[Dict[str, Any]]:
631
+ """Search for images"""
632
+ if not query or not query.strip():
633
+ raise ValidationError("Query cannot be empty")
634
+
635
+ search_params = {
636
+ "searchType": "image",
637
+ "safe": safe_search,
638
+ }
639
+
640
+ if image_size:
641
+ search_params["imgSize"] = image_size
642
+ if image_type:
643
+ search_params["imgType"] = image_type
644
+ if image_color_type:
645
+ search_params["imgColorType"] = image_color_type
646
+
647
+ raw_results = self._retry_with_backoff(
648
+ self._execute_search, query, num_results, 1, **search_params
649
+ )
650
+
651
+ return self._parse_search_results(raw_results, query=query)
652
+
653
+ def search_news(
654
+ self,
655
+ query: str,
656
+ num_results: int = 10,
657
+ start_index: int = 1,
658
+ language: str = "en",
659
+ date_restrict: Optional[str] = None,
660
+ sort_by: str = "date",
661
+ ) -> List[Dict[str, Any]]:
662
+ """Search for news articles"""
663
+ if not query or not query.strip():
664
+ raise ValidationError("Query cannot be empty")
665
+
666
+ news_query = f"{query} news"
667
+
668
+ search_params = {
669
+ "lr": f"lang_{language}",
670
+ "sort": sort_by if sort_by == "date" else "",
671
+ }
672
+
673
+ if date_restrict:
674
+ search_params["dateRestrict"] = date_restrict
675
+
676
+ raw_results = self._retry_with_backoff(
677
+ self._execute_search,
678
+ news_query,
679
+ num_results,
680
+ start_index,
681
+ **search_params,
682
+ )
683
+
684
+ return self._parse_search_results(raw_results, query=query)
685
+
686
+ def search_videos(
687
+ self,
688
+ query: str,
689
+ num_results: int = 10,
690
+ start_index: int = 1,
691
+ language: str = "en",
692
+ safe_search: str = "medium",
693
+ ) -> List[Dict[str, Any]]:
694
+ """Search for videos"""
695
+ if not query or not query.strip():
696
+ raise ValidationError("Query cannot be empty")
697
+
698
+ video_query = f"{query} filetype:mp4 OR filetype:webm OR filetype:mov"
699
+
700
+ search_params = {
701
+ "lr": f"lang_{language}",
702
+ "safe": safe_search,
703
+ }
704
+
705
+ raw_results = self._retry_with_backoff(
706
+ self._execute_search,
707
+ video_query,
708
+ num_results,
709
+ start_index,
710
+ **search_params,
711
+ )
712
+
713
+ return self._parse_search_results(raw_results, query=query)
714
+
715
+ # ========================================================================
716
+ # Utility Methods
717
+ # ========================================================================
718
+
719
+ def get_metrics(self) -> Dict[str, Any]:
720
+ """Get comprehensive metrics"""
721
+ return self.metrics.get_metrics()
722
+
723
+ def get_metrics_report(self) -> str:
724
+ """Get human-readable metrics report"""
725
+ return self.metrics.generate_report()
726
+
727
+ def get_health_score(self) -> float:
728
+ """Get system health score (0-1)"""
729
+ return self.metrics.get_health_score()
730
+
731
+ def get_quota_status(self) -> Dict[str, Any]:
732
+ """Get quota and rate limit status"""
733
+ return {
734
+ "remaining_quota": self.rate_limiter.get_remaining_quota(),
735
+ "max_requests": self.config.rate_limit_requests,
736
+ "time_window_seconds": self.config.rate_limit_window,
737
+ "circuit_breaker_state": self.circuit_breaker.get_state(),
738
+ "health_score": self.get_health_score(),
739
+ }
740
+
741
+ def get_search_context(self) -> Optional[Dict[str, Any]]:
742
+ """Get search context information"""
743
+ if not self.search_context:
744
+ return None
745
+
746
+ return {
747
+ "history": self.search_context.get_history(5),
748
+ "preferences": self.search_context.get_preferences(),
749
+ }