realtimex-deeptutor 0.5.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- realtimex_deeptutor/__init__.py +67 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
- src/__init__.py +40 -0
- src/agents/__init__.py +24 -0
- src/agents/base_agent.py +657 -0
- src/agents/chat/__init__.py +24 -0
- src/agents/chat/chat_agent.py +435 -0
- src/agents/chat/prompts/en/chat_agent.yaml +35 -0
- src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
- src/agents/chat/session_manager.py +311 -0
- src/agents/co_writer/__init__.py +0 -0
- src/agents/co_writer/edit_agent.py +260 -0
- src/agents/co_writer/narrator_agent.py +423 -0
- src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
- src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
- src/agents/guide/__init__.py +16 -0
- src/agents/guide/agents/__init__.py +11 -0
- src/agents/guide/agents/chat_agent.py +104 -0
- src/agents/guide/agents/interactive_agent.py +223 -0
- src/agents/guide/agents/locate_agent.py +149 -0
- src/agents/guide/agents/summary_agent.py +150 -0
- src/agents/guide/guide_manager.py +500 -0
- src/agents/guide/prompts/en/chat_agent.yaml +41 -0
- src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
- src/agents/guide/prompts/en/locate_agent.yaml +68 -0
- src/agents/guide/prompts/en/summary_agent.yaml +157 -0
- src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
- src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
- src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
- src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
- src/agents/ideagen/__init__.py +12 -0
- src/agents/ideagen/idea_generation_workflow.py +426 -0
- src/agents/ideagen/material_organizer_agent.py +173 -0
- src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
- src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
- src/agents/question/__init__.py +24 -0
- src/agents/question/agents/__init__.py +18 -0
- src/agents/question/agents/generate_agent.py +381 -0
- src/agents/question/agents/relevance_analyzer.py +207 -0
- src/agents/question/agents/retrieve_agent.py +239 -0
- src/agents/question/coordinator.py +718 -0
- src/agents/question/example.py +109 -0
- src/agents/question/prompts/en/coordinator.yaml +75 -0
- src/agents/question/prompts/en/generate_agent.yaml +77 -0
- src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
- src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
- src/agents/question/prompts/zh/coordinator.yaml +75 -0
- src/agents/question/prompts/zh/generate_agent.yaml +77 -0
- src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
- src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
- src/agents/research/agents/__init__.py +23 -0
- src/agents/research/agents/decompose_agent.py +507 -0
- src/agents/research/agents/manager_agent.py +228 -0
- src/agents/research/agents/note_agent.py +180 -0
- src/agents/research/agents/rephrase_agent.py +263 -0
- src/agents/research/agents/reporting_agent.py +1333 -0
- src/agents/research/agents/research_agent.py +714 -0
- src/agents/research/data_structures.py +451 -0
- src/agents/research/main.py +188 -0
- src/agents/research/prompts/en/decompose_agent.yaml +89 -0
- src/agents/research/prompts/en/manager_agent.yaml +24 -0
- src/agents/research/prompts/en/note_agent.yaml +121 -0
- src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/en/reporting_agent.yaml +380 -0
- src/agents/research/prompts/en/research_agent.yaml +173 -0
- src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
- src/agents/research/prompts/zh/manager_agent.yaml +24 -0
- src/agents/research/prompts/zh/note_agent.yaml +121 -0
- src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
- src/agents/research/prompts/zh/research_agent.yaml +173 -0
- src/agents/research/research_pipeline.py +1309 -0
- src/agents/research/utils/__init__.py +60 -0
- src/agents/research/utils/citation_manager.py +799 -0
- src/agents/research/utils/json_utils.py +98 -0
- src/agents/research/utils/token_tracker.py +297 -0
- src/agents/solve/__init__.py +80 -0
- src/agents/solve/analysis_loop/__init__.py +14 -0
- src/agents/solve/analysis_loop/investigate_agent.py +414 -0
- src/agents/solve/analysis_loop/note_agent.py +190 -0
- src/agents/solve/main_solver.py +862 -0
- src/agents/solve/memory/__init__.py +34 -0
- src/agents/solve/memory/citation_memory.py +353 -0
- src/agents/solve/memory/investigate_memory.py +226 -0
- src/agents/solve/memory/solve_memory.py +340 -0
- src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
- src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
- src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
- src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
- src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
- src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
- src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
- src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
- src/agents/solve/solve_loop/__init__.py +22 -0
- src/agents/solve/solve_loop/citation_manager.py +74 -0
- src/agents/solve/solve_loop/manager_agent.py +274 -0
- src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
- src/agents/solve/solve_loop/response_agent.py +301 -0
- src/agents/solve/solve_loop/solve_agent.py +325 -0
- src/agents/solve/solve_loop/tool_agent.py +470 -0
- src/agents/solve/utils/__init__.py +64 -0
- src/agents/solve/utils/config_validator.py +313 -0
- src/agents/solve/utils/display_manager.py +223 -0
- src/agents/solve/utils/error_handler.py +363 -0
- src/agents/solve/utils/json_utils.py +98 -0
- src/agents/solve/utils/performance_monitor.py +407 -0
- src/agents/solve/utils/token_tracker.py +541 -0
- src/api/__init__.py +0 -0
- src/api/main.py +240 -0
- src/api/routers/__init__.py +1 -0
- src/api/routers/agent_config.py +69 -0
- src/api/routers/chat.py +296 -0
- src/api/routers/co_writer.py +337 -0
- src/api/routers/config.py +627 -0
- src/api/routers/dashboard.py +18 -0
- src/api/routers/guide.py +337 -0
- src/api/routers/ideagen.py +436 -0
- src/api/routers/knowledge.py +821 -0
- src/api/routers/notebook.py +247 -0
- src/api/routers/question.py +537 -0
- src/api/routers/research.py +394 -0
- src/api/routers/settings.py +164 -0
- src/api/routers/solve.py +305 -0
- src/api/routers/system.py +252 -0
- src/api/run_server.py +61 -0
- src/api/utils/history.py +172 -0
- src/api/utils/log_interceptor.py +21 -0
- src/api/utils/notebook_manager.py +415 -0
- src/api/utils/progress_broadcaster.py +72 -0
- src/api/utils/task_id_manager.py +100 -0
- src/config/__init__.py +0 -0
- src/config/accessors.py +18 -0
- src/config/constants.py +34 -0
- src/config/defaults.py +18 -0
- src/config/schema.py +38 -0
- src/config/settings.py +50 -0
- src/core/errors.py +62 -0
- src/knowledge/__init__.py +23 -0
- src/knowledge/add_documents.py +606 -0
- src/knowledge/config.py +65 -0
- src/knowledge/example_add_documents.py +236 -0
- src/knowledge/extract_numbered_items.py +1039 -0
- src/knowledge/initializer.py +621 -0
- src/knowledge/kb.py +22 -0
- src/knowledge/manager.py +782 -0
- src/knowledge/progress_tracker.py +182 -0
- src/knowledge/start_kb.py +535 -0
- src/logging/__init__.py +103 -0
- src/logging/adapters/__init__.py +17 -0
- src/logging/adapters/lightrag.py +184 -0
- src/logging/adapters/llamaindex.py +141 -0
- src/logging/config.py +80 -0
- src/logging/handlers/__init__.py +20 -0
- src/logging/handlers/console.py +75 -0
- src/logging/handlers/file.py +201 -0
- src/logging/handlers/websocket.py +127 -0
- src/logging/logger.py +709 -0
- src/logging/stats/__init__.py +16 -0
- src/logging/stats/llm_stats.py +179 -0
- src/services/__init__.py +56 -0
- src/services/config/__init__.py +61 -0
- src/services/config/knowledge_base_config.py +210 -0
- src/services/config/loader.py +260 -0
- src/services/config/unified_config.py +603 -0
- src/services/embedding/__init__.py +45 -0
- src/services/embedding/adapters/__init__.py +22 -0
- src/services/embedding/adapters/base.py +106 -0
- src/services/embedding/adapters/cohere.py +127 -0
- src/services/embedding/adapters/jina.py +99 -0
- src/services/embedding/adapters/ollama.py +116 -0
- src/services/embedding/adapters/openai_compatible.py +96 -0
- src/services/embedding/client.py +159 -0
- src/services/embedding/config.py +156 -0
- src/services/embedding/provider.py +119 -0
- src/services/llm/__init__.py +152 -0
- src/services/llm/capabilities.py +313 -0
- src/services/llm/client.py +302 -0
- src/services/llm/cloud_provider.py +530 -0
- src/services/llm/config.py +200 -0
- src/services/llm/error_mapping.py +103 -0
- src/services/llm/exceptions.py +152 -0
- src/services/llm/factory.py +450 -0
- src/services/llm/local_provider.py +347 -0
- src/services/llm/providers/anthropic.py +95 -0
- src/services/llm/providers/base_provider.py +93 -0
- src/services/llm/providers/open_ai.py +83 -0
- src/services/llm/registry.py +71 -0
- src/services/llm/telemetry.py +40 -0
- src/services/llm/types.py +27 -0
- src/services/llm/utils.py +333 -0
- src/services/prompt/__init__.py +25 -0
- src/services/prompt/manager.py +206 -0
- src/services/rag/__init__.py +64 -0
- src/services/rag/components/__init__.py +29 -0
- src/services/rag/components/base.py +59 -0
- src/services/rag/components/chunkers/__init__.py +18 -0
- src/services/rag/components/chunkers/base.py +34 -0
- src/services/rag/components/chunkers/fixed.py +71 -0
- src/services/rag/components/chunkers/numbered_item.py +94 -0
- src/services/rag/components/chunkers/semantic.py +97 -0
- src/services/rag/components/embedders/__init__.py +14 -0
- src/services/rag/components/embedders/base.py +32 -0
- src/services/rag/components/embedders/openai.py +63 -0
- src/services/rag/components/indexers/__init__.py +18 -0
- src/services/rag/components/indexers/base.py +35 -0
- src/services/rag/components/indexers/graph.py +172 -0
- src/services/rag/components/indexers/lightrag.py +156 -0
- src/services/rag/components/indexers/vector.py +146 -0
- src/services/rag/components/parsers/__init__.py +18 -0
- src/services/rag/components/parsers/base.py +35 -0
- src/services/rag/components/parsers/markdown.py +52 -0
- src/services/rag/components/parsers/pdf.py +115 -0
- src/services/rag/components/parsers/text.py +86 -0
- src/services/rag/components/retrievers/__init__.py +18 -0
- src/services/rag/components/retrievers/base.py +34 -0
- src/services/rag/components/retrievers/dense.py +200 -0
- src/services/rag/components/retrievers/hybrid.py +164 -0
- src/services/rag/components/retrievers/lightrag.py +169 -0
- src/services/rag/components/routing.py +286 -0
- src/services/rag/factory.py +234 -0
- src/services/rag/pipeline.py +215 -0
- src/services/rag/pipelines/__init__.py +32 -0
- src/services/rag/pipelines/academic.py +44 -0
- src/services/rag/pipelines/lightrag.py +43 -0
- src/services/rag/pipelines/llamaindex.py +313 -0
- src/services/rag/pipelines/raganything.py +384 -0
- src/services/rag/service.py +244 -0
- src/services/rag/types.py +73 -0
- src/services/search/__init__.py +284 -0
- src/services/search/base.py +87 -0
- src/services/search/consolidation.py +398 -0
- src/services/search/providers/__init__.py +128 -0
- src/services/search/providers/baidu.py +188 -0
- src/services/search/providers/exa.py +194 -0
- src/services/search/providers/jina.py +161 -0
- src/services/search/providers/perplexity.py +153 -0
- src/services/search/providers/serper.py +209 -0
- src/services/search/providers/tavily.py +161 -0
- src/services/search/types.py +114 -0
- src/services/setup/__init__.py +34 -0
- src/services/setup/init.py +285 -0
- src/services/tts/__init__.py +16 -0
- src/services/tts/config.py +99 -0
- src/tools/__init__.py +91 -0
- src/tools/code_executor.py +536 -0
- src/tools/paper_search_tool.py +171 -0
- src/tools/query_item_tool.py +310 -0
- src/tools/question/__init__.py +15 -0
- src/tools/question/exam_mimic.py +616 -0
- src/tools/question/pdf_parser.py +211 -0
- src/tools/question/question_extractor.py +397 -0
- src/tools/rag_tool.py +173 -0
- src/tools/tex_chunker.py +339 -0
- src/tools/tex_downloader.py +253 -0
- src/tools/web_search.py +71 -0
- src/utils/config_manager.py +206 -0
- src/utils/document_validator.py +168 -0
- src/utils/error_rate_tracker.py +111 -0
- src/utils/error_utils.py +82 -0
- src/utils/json_parser.py +110 -0
- src/utils/network/circuit_breaker.py +79 -0
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Web Search Service - Pluggable search provider architecture
|
|
3
|
+
|
|
4
|
+
This module provides a unified interface for web search across multiple providers.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from src.services.search import web_search
|
|
8
|
+
|
|
9
|
+
# Simple usage (uses config/main.yaml or SEARCH_PROVIDER env var)
|
|
10
|
+
result = web_search("What is AI?")
|
|
11
|
+
|
|
12
|
+
# Specify provider
|
|
13
|
+
result = web_search("What is AI?", provider="tavily")
|
|
14
|
+
|
|
15
|
+
# With options
|
|
16
|
+
result = web_search(
|
|
17
|
+
query="What is AI?",
|
|
18
|
+
provider="serper",
|
|
19
|
+
output_dir="./cache",
|
|
20
|
+
verbose=True,
|
|
21
|
+
num=20, # Provider-specific option
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
Available Providers:
|
|
25
|
+
- perplexity: AI-powered search (default)
|
|
26
|
+
- baidu: Baidu AI Search
|
|
27
|
+
- tavily: Research-focused with optional answers
|
|
28
|
+
- exa: Neural/embeddings search with summaries
|
|
29
|
+
- serper: Google SERP results
|
|
30
|
+
- jina: SERP with full content extraction
|
|
31
|
+
|
|
32
|
+
Environment Variables:
|
|
33
|
+
- SEARCH_PROVIDER: Default search provider (default: perplexity)
|
|
34
|
+
- SEARCH_API_KEY: Unified API key for all providers
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from datetime import datetime
|
|
38
|
+
import json
|
|
39
|
+
import os
|
|
40
|
+
from pathlib import Path
|
|
41
|
+
from typing import Any
|
|
42
|
+
|
|
43
|
+
from src.logging import get_logger
|
|
44
|
+
from src.services.config import PROJECT_ROOT, load_config_with_main
|
|
45
|
+
|
|
46
|
+
from .base import SEARCH_API_KEY_ENV, BaseSearchProvider
|
|
47
|
+
from .consolidation import CONSOLIDATION_TYPES, PROVIDER_TEMPLATES, AnswerConsolidator
|
|
48
|
+
from .providers import (
|
|
49
|
+
get_available_providers,
|
|
50
|
+
get_default_provider,
|
|
51
|
+
get_provider,
|
|
52
|
+
get_providers_info,
|
|
53
|
+
list_providers,
|
|
54
|
+
)
|
|
55
|
+
from .types import Citation, SearchResult, WebSearchResponse
|
|
56
|
+
|
|
57
|
+
# Module logger
|
|
58
|
+
_logger = get_logger("Search", level="INFO")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _get_web_search_config() -> dict[str, Any]:
|
|
62
|
+
"""
|
|
63
|
+
Load web search configuration from config/main.yaml using the standard config loader.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
dict with web_search config from tools.web_search section
|
|
67
|
+
"""
|
|
68
|
+
try:
|
|
69
|
+
config = load_config_with_main("main.yaml", PROJECT_ROOT)
|
|
70
|
+
return config.get("tools", {}).get("web_search", {})
|
|
71
|
+
except Exception as e:
|
|
72
|
+
_logger.debug(f"Could not load config: {e}")
|
|
73
|
+
return {}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _save_results(result: dict[str, Any], output_dir: str, provider: str) -> str:
|
|
77
|
+
"""Save search results to a JSON file."""
|
|
78
|
+
output_path = Path(output_dir)
|
|
79
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
80
|
+
|
|
81
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
82
|
+
filename = f"search_{provider}_{timestamp}.json"
|
|
83
|
+
file_path = output_path / filename
|
|
84
|
+
|
|
85
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
86
|
+
json.dump(result, f, indent=2, ensure_ascii=False)
|
|
87
|
+
|
|
88
|
+
return str(file_path)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def web_search(
|
|
92
|
+
query: str,
|
|
93
|
+
output_dir: str | None = None,
|
|
94
|
+
verbose: bool = False,
|
|
95
|
+
provider: str | None = None,
|
|
96
|
+
# Consolidation options (only for SERP providers: serper, jina)
|
|
97
|
+
consolidation: str | None = None, # none, template, llm
|
|
98
|
+
consolidation_custom_template: str | None = None, # Custom Jinja2 template
|
|
99
|
+
consolidation_llm_model: str | None = None, # Model for LLM consolidation
|
|
100
|
+
# Legacy Baidu-specific params (for backward compatibility)
|
|
101
|
+
baidu_model: str = "ernie-4.5-turbo-32k",
|
|
102
|
+
baidu_enable_deep_search: bool = False,
|
|
103
|
+
baidu_search_recency_filter: str = "week",
|
|
104
|
+
**provider_kwargs: Any,
|
|
105
|
+
) -> dict[str, Any]:
|
|
106
|
+
"""
|
|
107
|
+
Perform web search using configured provider.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
query: Search query.
|
|
111
|
+
output_dir: Output directory for saving results (optional).
|
|
112
|
+
verbose: Whether to print detailed information.
|
|
113
|
+
provider: Provider name (perplexity, baidu, tavily, exa, serper, jina).
|
|
114
|
+
If not specified, uses SEARCH_PROVIDER env var (default: perplexity).
|
|
115
|
+
consolidation: Answer consolidation type ("none", "template", "llm").
|
|
116
|
+
Only for SERP providers (serper, jina) that return raw results.
|
|
117
|
+
Template consolidation uses provider-specific templates.
|
|
118
|
+
AI providers (perplexity, baidu, tavily, exa) already include answers.
|
|
119
|
+
consolidation_custom_template: Custom Jinja2 template string for unsupported providers.
|
|
120
|
+
consolidation_llm_model: LLM model for llm consolidation (default: gpt-4o-mini).
|
|
121
|
+
baidu_model: Model to use for Baidu AI Search (legacy param).
|
|
122
|
+
baidu_enable_deep_search: Enable deep search for Baidu (legacy param).
|
|
123
|
+
baidu_search_recency_filter: Recency filter for Baidu (legacy param).
|
|
124
|
+
**provider_kwargs: Provider-specific options.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
dict: Search results with answer, citations, search_results, etc.
|
|
128
|
+
|
|
129
|
+
Raises:
|
|
130
|
+
ImportError: If required module is not installed.
|
|
131
|
+
ValueError: If required environment variable is not set.
|
|
132
|
+
Exception: If API call fails.
|
|
133
|
+
|
|
134
|
+
Example:
|
|
135
|
+
>>> result = web_search("What is machine learning?")
|
|
136
|
+
>>> print(result["answer"])
|
|
137
|
+
Machine learning is a subset of artificial intelligence...
|
|
138
|
+
>>> print(result["citations"])
|
|
139
|
+
[{"id": 1, "url": "https://...", "title": "...", ...}]
|
|
140
|
+
"""
|
|
141
|
+
# Load config from main.yaml
|
|
142
|
+
config = _get_web_search_config()
|
|
143
|
+
|
|
144
|
+
# Check if web_search is enabled (default: True)
|
|
145
|
+
if not config.get("enabled", True):
|
|
146
|
+
_logger.warning("Web search is disabled in config")
|
|
147
|
+
return {
|
|
148
|
+
"timestamp": datetime.now().isoformat(),
|
|
149
|
+
"query": query,
|
|
150
|
+
"answer": "Web search is disabled.",
|
|
151
|
+
"citations": [],
|
|
152
|
+
"search_results": [],
|
|
153
|
+
"provider": "disabled",
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
# Determine provider: function arg > env var > config > default
|
|
157
|
+
provider_name = (
|
|
158
|
+
provider or os.environ.get("SEARCH_PROVIDER") or config.get("provider") or "perplexity"
|
|
159
|
+
).lower()
|
|
160
|
+
|
|
161
|
+
# Determine consolidation from config if not provided
|
|
162
|
+
if consolidation is None:
|
|
163
|
+
consolidation = config.get("consolidation")
|
|
164
|
+
|
|
165
|
+
# Determine custom template from config if not provided
|
|
166
|
+
if consolidation_custom_template is None:
|
|
167
|
+
consolidation_custom_template = config.get("consolidation_template") or None
|
|
168
|
+
|
|
169
|
+
# Handle legacy Baidu params
|
|
170
|
+
if provider_name == "baidu":
|
|
171
|
+
provider_kwargs.setdefault("model", baidu_model)
|
|
172
|
+
provider_kwargs.setdefault("enable_deep_search", baidu_enable_deep_search)
|
|
173
|
+
provider_kwargs.setdefault("search_recency_filter", baidu_search_recency_filter)
|
|
174
|
+
|
|
175
|
+
# Get provider instance
|
|
176
|
+
search_provider = get_provider(provider_name)
|
|
177
|
+
|
|
178
|
+
_logger.progress(f"[{search_provider.name}] Searching: {query[:50]}...")
|
|
179
|
+
|
|
180
|
+
# Execute search
|
|
181
|
+
try:
|
|
182
|
+
response = search_provider.search(query, **provider_kwargs)
|
|
183
|
+
_logger.success(f"[{search_provider.name}] Search completed")
|
|
184
|
+
except Exception as e:
|
|
185
|
+
_logger.error(f"[{search_provider.name}] Search failed: {e}")
|
|
186
|
+
raise Exception(f"{search_provider.name} search failed: {e}") from e
|
|
187
|
+
|
|
188
|
+
# Apply consolidation for SERP providers without LLM answers
|
|
189
|
+
if consolidation and not search_provider.supports_answer:
|
|
190
|
+
llm_config = {}
|
|
191
|
+
if consolidation_llm_model:
|
|
192
|
+
llm_config["model"] = consolidation_llm_model
|
|
193
|
+
|
|
194
|
+
consolidator = AnswerConsolidator(
|
|
195
|
+
consolidation_type=consolidation,
|
|
196
|
+
custom_template=consolidation_custom_template,
|
|
197
|
+
llm_config=llm_config if llm_config else None,
|
|
198
|
+
)
|
|
199
|
+
response = consolidator.consolidate(response)
|
|
200
|
+
|
|
201
|
+
# Convert to dict (backward compatible format)
|
|
202
|
+
result = response.to_dict()
|
|
203
|
+
|
|
204
|
+
# Save if output_dir provided
|
|
205
|
+
if output_dir:
|
|
206
|
+
output_path = _save_results(result, output_dir, provider_name)
|
|
207
|
+
result["result_file"] = output_path
|
|
208
|
+
_logger.debug(f"Search results saved to: {output_path}")
|
|
209
|
+
|
|
210
|
+
if verbose:
|
|
211
|
+
answer = result.get("answer", "")
|
|
212
|
+
_logger.info(f"Query: {query}")
|
|
213
|
+
if answer:
|
|
214
|
+
_logger.info(f"Answer: {answer[:200]}..." if len(answer) > 200 else f"Answer: {answer}")
|
|
215
|
+
_logger.info(f"Citations: {len(result.get('citations', []))}")
|
|
216
|
+
|
|
217
|
+
return result
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def get_current_config() -> dict[str, Any]:
|
|
221
|
+
"""
|
|
222
|
+
Get the current web search configuration.
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
dict with:
|
|
226
|
+
- enabled: bool
|
|
227
|
+
- provider: str (effective provider name)
|
|
228
|
+
- consolidation: str | None
|
|
229
|
+
- consolidation_template: str | None (custom Jinja2 template)
|
|
230
|
+
- providers: list[dict] (full provider info for frontend)
|
|
231
|
+
- consolidation_types: list[str]
|
|
232
|
+
- template_providers: list[str] (providers that support template consolidation)
|
|
233
|
+
- config_source: "env" | "yaml" | "default"
|
|
234
|
+
"""
|
|
235
|
+
config = _get_web_search_config()
|
|
236
|
+
|
|
237
|
+
# Determine effective provider
|
|
238
|
+
provider = (os.environ.get("SEARCH_PROVIDER") or config.get("provider") or "perplexity").lower()
|
|
239
|
+
|
|
240
|
+
return {
|
|
241
|
+
"enabled": config.get("enabled", True),
|
|
242
|
+
"provider": provider,
|
|
243
|
+
"consolidation": config.get("consolidation"),
|
|
244
|
+
"consolidation_template": config.get("consolidation_template") or None,
|
|
245
|
+
# For frontend display
|
|
246
|
+
"providers": get_providers_info(),
|
|
247
|
+
"consolidation_types": CONSOLIDATION_TYPES,
|
|
248
|
+
# Only these providers support template consolidation
|
|
249
|
+
"template_providers": list(PROVIDER_TEMPLATES.keys()),
|
|
250
|
+
"config_source": "env"
|
|
251
|
+
if os.environ.get("SEARCH_PROVIDER")
|
|
252
|
+
else "yaml"
|
|
253
|
+
if config.get("provider")
|
|
254
|
+
else "default",
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
# Backwards compatibility alias
|
|
259
|
+
SearchProvider = BaseSearchProvider
|
|
260
|
+
|
|
261
|
+
# Export public API
|
|
262
|
+
__all__ = [
|
|
263
|
+
# Main function
|
|
264
|
+
"web_search",
|
|
265
|
+
"get_current_config",
|
|
266
|
+
# Provider management
|
|
267
|
+
"get_provider",
|
|
268
|
+
"list_providers",
|
|
269
|
+
"get_available_providers",
|
|
270
|
+
"get_default_provider",
|
|
271
|
+
"get_providers_info",
|
|
272
|
+
# Types
|
|
273
|
+
"WebSearchResponse",
|
|
274
|
+
"Citation",
|
|
275
|
+
"SearchResult",
|
|
276
|
+
# Consolidation
|
|
277
|
+
"AnswerConsolidator",
|
|
278
|
+
"CONSOLIDATION_TYPES",
|
|
279
|
+
"PROVIDER_TEMPLATES",
|
|
280
|
+
# Base class
|
|
281
|
+
"BaseSearchProvider",
|
|
282
|
+
"SearchProvider",
|
|
283
|
+
"SEARCH_API_KEY_ENV",
|
|
284
|
+
]
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Web Search Base Provider - Abstract base class for all search providers
|
|
3
|
+
|
|
4
|
+
This module defines the BaseSearchProvider class that all search providers must inherit from.
|
|
5
|
+
All providers use a unified SEARCH_API_KEY environment variable.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
import os
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from src.logging import get_logger
|
|
13
|
+
|
|
14
|
+
from .types import WebSearchResponse
|
|
15
|
+
|
|
16
|
+
# Unified API key environment variable
|
|
17
|
+
SEARCH_API_KEY_ENV = "SEARCH_API_KEY"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BaseSearchProvider(ABC):
|
|
21
|
+
"""Abstract base class for search providers.
|
|
22
|
+
|
|
23
|
+
All providers use a unified SEARCH_API_KEY environment variable.
|
|
24
|
+
Each provider has its own BASE_URL defined as a class constant.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
name: str = "base"
|
|
28
|
+
display_name: str = "Base Provider"
|
|
29
|
+
description: str = ""
|
|
30
|
+
requires_api_key: bool = True
|
|
31
|
+
supports_answer: bool = False # Whether provider generates LLM answers
|
|
32
|
+
BASE_URL: str = "" # Each provider defines its own endpoint
|
|
33
|
+
|
|
34
|
+
def __init__(self, api_key: str | None = None, **kwargs: Any) -> None:
|
|
35
|
+
"""
|
|
36
|
+
Initialize the provider.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
api_key: API key for the provider. If not provided, will be read from SEARCH_API_KEY.
|
|
40
|
+
**kwargs: Additional configuration options.
|
|
41
|
+
"""
|
|
42
|
+
self.logger = get_logger(f"Search.{self.__class__.__name__}", level="INFO")
|
|
43
|
+
self.api_key = api_key or self._get_api_key()
|
|
44
|
+
self.config = kwargs
|
|
45
|
+
|
|
46
|
+
def _get_api_key(self) -> str:
|
|
47
|
+
"""Get API key from unified SEARCH_API_KEY environment variable."""
|
|
48
|
+
key = os.environ.get(SEARCH_API_KEY_ENV, "")
|
|
49
|
+
if self.requires_api_key and not key:
|
|
50
|
+
raise ValueError(
|
|
51
|
+
f"{self.name} requires {SEARCH_API_KEY_ENV} environment variable. "
|
|
52
|
+
f"Please set it before using this provider."
|
|
53
|
+
)
|
|
54
|
+
return key
|
|
55
|
+
|
|
56
|
+
@abstractmethod
|
|
57
|
+
def search(self, query: str, **kwargs: Any) -> WebSearchResponse:
|
|
58
|
+
"""
|
|
59
|
+
Execute search and return standardized response.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
query: The search query.
|
|
63
|
+
**kwargs: Provider-specific options.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
WebSearchResponse: Standardized search response.
|
|
67
|
+
"""
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
def is_available(self) -> bool:
|
|
71
|
+
"""
|
|
72
|
+
Check if provider is available (dependencies installed, API key set).
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
bool: True if provider is available, False otherwise.
|
|
76
|
+
"""
|
|
77
|
+
try:
|
|
78
|
+
if self.requires_api_key:
|
|
79
|
+
key = self.api_key or os.environ.get(SEARCH_API_KEY_ENV, "")
|
|
80
|
+
if not key:
|
|
81
|
+
return False
|
|
82
|
+
return True
|
|
83
|
+
except (ValueError, ImportError):
|
|
84
|
+
return False
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
__all__ = ["BaseSearchProvider", "SEARCH_API_KEY_ENV"]
|