realtimex-deeptutor 0.5.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- realtimex_deeptutor/__init__.py +67 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
- src/__init__.py +40 -0
- src/agents/__init__.py +24 -0
- src/agents/base_agent.py +657 -0
- src/agents/chat/__init__.py +24 -0
- src/agents/chat/chat_agent.py +435 -0
- src/agents/chat/prompts/en/chat_agent.yaml +35 -0
- src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
- src/agents/chat/session_manager.py +311 -0
- src/agents/co_writer/__init__.py +0 -0
- src/agents/co_writer/edit_agent.py +260 -0
- src/agents/co_writer/narrator_agent.py +423 -0
- src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
- src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
- src/agents/guide/__init__.py +16 -0
- src/agents/guide/agents/__init__.py +11 -0
- src/agents/guide/agents/chat_agent.py +104 -0
- src/agents/guide/agents/interactive_agent.py +223 -0
- src/agents/guide/agents/locate_agent.py +149 -0
- src/agents/guide/agents/summary_agent.py +150 -0
- src/agents/guide/guide_manager.py +500 -0
- src/agents/guide/prompts/en/chat_agent.yaml +41 -0
- src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
- src/agents/guide/prompts/en/locate_agent.yaml +68 -0
- src/agents/guide/prompts/en/summary_agent.yaml +157 -0
- src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
- src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
- src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
- src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
- src/agents/ideagen/__init__.py +12 -0
- src/agents/ideagen/idea_generation_workflow.py +426 -0
- src/agents/ideagen/material_organizer_agent.py +173 -0
- src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
- src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
- src/agents/question/__init__.py +24 -0
- src/agents/question/agents/__init__.py +18 -0
- src/agents/question/agents/generate_agent.py +381 -0
- src/agents/question/agents/relevance_analyzer.py +207 -0
- src/agents/question/agents/retrieve_agent.py +239 -0
- src/agents/question/coordinator.py +718 -0
- src/agents/question/example.py +109 -0
- src/agents/question/prompts/en/coordinator.yaml +75 -0
- src/agents/question/prompts/en/generate_agent.yaml +77 -0
- src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
- src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
- src/agents/question/prompts/zh/coordinator.yaml +75 -0
- src/agents/question/prompts/zh/generate_agent.yaml +77 -0
- src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
- src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
- src/agents/research/agents/__init__.py +23 -0
- src/agents/research/agents/decompose_agent.py +507 -0
- src/agents/research/agents/manager_agent.py +228 -0
- src/agents/research/agents/note_agent.py +180 -0
- src/agents/research/agents/rephrase_agent.py +263 -0
- src/agents/research/agents/reporting_agent.py +1333 -0
- src/agents/research/agents/research_agent.py +714 -0
- src/agents/research/data_structures.py +451 -0
- src/agents/research/main.py +188 -0
- src/agents/research/prompts/en/decompose_agent.yaml +89 -0
- src/agents/research/prompts/en/manager_agent.yaml +24 -0
- src/agents/research/prompts/en/note_agent.yaml +121 -0
- src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/en/reporting_agent.yaml +380 -0
- src/agents/research/prompts/en/research_agent.yaml +173 -0
- src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
- src/agents/research/prompts/zh/manager_agent.yaml +24 -0
- src/agents/research/prompts/zh/note_agent.yaml +121 -0
- src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
- src/agents/research/prompts/zh/research_agent.yaml +173 -0
- src/agents/research/research_pipeline.py +1309 -0
- src/agents/research/utils/__init__.py +60 -0
- src/agents/research/utils/citation_manager.py +799 -0
- src/agents/research/utils/json_utils.py +98 -0
- src/agents/research/utils/token_tracker.py +297 -0
- src/agents/solve/__init__.py +80 -0
- src/agents/solve/analysis_loop/__init__.py +14 -0
- src/agents/solve/analysis_loop/investigate_agent.py +414 -0
- src/agents/solve/analysis_loop/note_agent.py +190 -0
- src/agents/solve/main_solver.py +862 -0
- src/agents/solve/memory/__init__.py +34 -0
- src/agents/solve/memory/citation_memory.py +353 -0
- src/agents/solve/memory/investigate_memory.py +226 -0
- src/agents/solve/memory/solve_memory.py +340 -0
- src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
- src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
- src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
- src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
- src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
- src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
- src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
- src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
- src/agents/solve/solve_loop/__init__.py +22 -0
- src/agents/solve/solve_loop/citation_manager.py +74 -0
- src/agents/solve/solve_loop/manager_agent.py +274 -0
- src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
- src/agents/solve/solve_loop/response_agent.py +301 -0
- src/agents/solve/solve_loop/solve_agent.py +325 -0
- src/agents/solve/solve_loop/tool_agent.py +470 -0
- src/agents/solve/utils/__init__.py +64 -0
- src/agents/solve/utils/config_validator.py +313 -0
- src/agents/solve/utils/display_manager.py +223 -0
- src/agents/solve/utils/error_handler.py +363 -0
- src/agents/solve/utils/json_utils.py +98 -0
- src/agents/solve/utils/performance_monitor.py +407 -0
- src/agents/solve/utils/token_tracker.py +541 -0
- src/api/__init__.py +0 -0
- src/api/main.py +240 -0
- src/api/routers/__init__.py +1 -0
- src/api/routers/agent_config.py +69 -0
- src/api/routers/chat.py +296 -0
- src/api/routers/co_writer.py +337 -0
- src/api/routers/config.py +627 -0
- src/api/routers/dashboard.py +18 -0
- src/api/routers/guide.py +337 -0
- src/api/routers/ideagen.py +436 -0
- src/api/routers/knowledge.py +821 -0
- src/api/routers/notebook.py +247 -0
- src/api/routers/question.py +537 -0
- src/api/routers/research.py +394 -0
- src/api/routers/settings.py +164 -0
- src/api/routers/solve.py +305 -0
- src/api/routers/system.py +252 -0
- src/api/run_server.py +61 -0
- src/api/utils/history.py +172 -0
- src/api/utils/log_interceptor.py +21 -0
- src/api/utils/notebook_manager.py +415 -0
- src/api/utils/progress_broadcaster.py +72 -0
- src/api/utils/task_id_manager.py +100 -0
- src/config/__init__.py +0 -0
- src/config/accessors.py +18 -0
- src/config/constants.py +34 -0
- src/config/defaults.py +18 -0
- src/config/schema.py +38 -0
- src/config/settings.py +50 -0
- src/core/errors.py +62 -0
- src/knowledge/__init__.py +23 -0
- src/knowledge/add_documents.py +606 -0
- src/knowledge/config.py +65 -0
- src/knowledge/example_add_documents.py +236 -0
- src/knowledge/extract_numbered_items.py +1039 -0
- src/knowledge/initializer.py +621 -0
- src/knowledge/kb.py +22 -0
- src/knowledge/manager.py +782 -0
- src/knowledge/progress_tracker.py +182 -0
- src/knowledge/start_kb.py +535 -0
- src/logging/__init__.py +103 -0
- src/logging/adapters/__init__.py +17 -0
- src/logging/adapters/lightrag.py +184 -0
- src/logging/adapters/llamaindex.py +141 -0
- src/logging/config.py +80 -0
- src/logging/handlers/__init__.py +20 -0
- src/logging/handlers/console.py +75 -0
- src/logging/handlers/file.py +201 -0
- src/logging/handlers/websocket.py +127 -0
- src/logging/logger.py +709 -0
- src/logging/stats/__init__.py +16 -0
- src/logging/stats/llm_stats.py +179 -0
- src/services/__init__.py +56 -0
- src/services/config/__init__.py +61 -0
- src/services/config/knowledge_base_config.py +210 -0
- src/services/config/loader.py +260 -0
- src/services/config/unified_config.py +603 -0
- src/services/embedding/__init__.py +45 -0
- src/services/embedding/adapters/__init__.py +22 -0
- src/services/embedding/adapters/base.py +106 -0
- src/services/embedding/adapters/cohere.py +127 -0
- src/services/embedding/adapters/jina.py +99 -0
- src/services/embedding/adapters/ollama.py +116 -0
- src/services/embedding/adapters/openai_compatible.py +96 -0
- src/services/embedding/client.py +159 -0
- src/services/embedding/config.py +156 -0
- src/services/embedding/provider.py +119 -0
- src/services/llm/__init__.py +152 -0
- src/services/llm/capabilities.py +313 -0
- src/services/llm/client.py +302 -0
- src/services/llm/cloud_provider.py +530 -0
- src/services/llm/config.py +200 -0
- src/services/llm/error_mapping.py +103 -0
- src/services/llm/exceptions.py +152 -0
- src/services/llm/factory.py +450 -0
- src/services/llm/local_provider.py +347 -0
- src/services/llm/providers/anthropic.py +95 -0
- src/services/llm/providers/base_provider.py +93 -0
- src/services/llm/providers/open_ai.py +83 -0
- src/services/llm/registry.py +71 -0
- src/services/llm/telemetry.py +40 -0
- src/services/llm/types.py +27 -0
- src/services/llm/utils.py +333 -0
- src/services/prompt/__init__.py +25 -0
- src/services/prompt/manager.py +206 -0
- src/services/rag/__init__.py +64 -0
- src/services/rag/components/__init__.py +29 -0
- src/services/rag/components/base.py +59 -0
- src/services/rag/components/chunkers/__init__.py +18 -0
- src/services/rag/components/chunkers/base.py +34 -0
- src/services/rag/components/chunkers/fixed.py +71 -0
- src/services/rag/components/chunkers/numbered_item.py +94 -0
- src/services/rag/components/chunkers/semantic.py +97 -0
- src/services/rag/components/embedders/__init__.py +14 -0
- src/services/rag/components/embedders/base.py +32 -0
- src/services/rag/components/embedders/openai.py +63 -0
- src/services/rag/components/indexers/__init__.py +18 -0
- src/services/rag/components/indexers/base.py +35 -0
- src/services/rag/components/indexers/graph.py +172 -0
- src/services/rag/components/indexers/lightrag.py +156 -0
- src/services/rag/components/indexers/vector.py +146 -0
- src/services/rag/components/parsers/__init__.py +18 -0
- src/services/rag/components/parsers/base.py +35 -0
- src/services/rag/components/parsers/markdown.py +52 -0
- src/services/rag/components/parsers/pdf.py +115 -0
- src/services/rag/components/parsers/text.py +86 -0
- src/services/rag/components/retrievers/__init__.py +18 -0
- src/services/rag/components/retrievers/base.py +34 -0
- src/services/rag/components/retrievers/dense.py +200 -0
- src/services/rag/components/retrievers/hybrid.py +164 -0
- src/services/rag/components/retrievers/lightrag.py +169 -0
- src/services/rag/components/routing.py +286 -0
- src/services/rag/factory.py +234 -0
- src/services/rag/pipeline.py +215 -0
- src/services/rag/pipelines/__init__.py +32 -0
- src/services/rag/pipelines/academic.py +44 -0
- src/services/rag/pipelines/lightrag.py +43 -0
- src/services/rag/pipelines/llamaindex.py +313 -0
- src/services/rag/pipelines/raganything.py +384 -0
- src/services/rag/service.py +244 -0
- src/services/rag/types.py +73 -0
- src/services/search/__init__.py +284 -0
- src/services/search/base.py +87 -0
- src/services/search/consolidation.py +398 -0
- src/services/search/providers/__init__.py +128 -0
- src/services/search/providers/baidu.py +188 -0
- src/services/search/providers/exa.py +194 -0
- src/services/search/providers/jina.py +161 -0
- src/services/search/providers/perplexity.py +153 -0
- src/services/search/providers/serper.py +209 -0
- src/services/search/providers/tavily.py +161 -0
- src/services/search/types.py +114 -0
- src/services/setup/__init__.py +34 -0
- src/services/setup/init.py +285 -0
- src/services/tts/__init__.py +16 -0
- src/services/tts/config.py +99 -0
- src/tools/__init__.py +91 -0
- src/tools/code_executor.py +536 -0
- src/tools/paper_search_tool.py +171 -0
- src/tools/query_item_tool.py +310 -0
- src/tools/question/__init__.py +15 -0
- src/tools/question/exam_mimic.py +616 -0
- src/tools/question/pdf_parser.py +211 -0
- src/tools/question/question_extractor.py +397 -0
- src/tools/rag_tool.py +173 -0
- src/tools/tex_chunker.py +339 -0
- src/tools/tex_downloader.py +253 -0
- src/tools/web_search.py +71 -0
- src/utils/config_manager.py +206 -0
- src/utils/document_validator.py +168 -0
- src/utils/error_rate_tracker.py +111 -0
- src/utils/error_utils.py +82 -0
- src/utils/json_parser.py +110 -0
- src/utils/network/circuit_breaker.py +79 -0
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Answer Consolidation - Generate answers from raw search results
|
|
3
|
+
|
|
4
|
+
Supports two strategies:
|
|
5
|
+
1. template: Fast Jinja2 template rendering
|
|
6
|
+
2. llm: LLM-based answer synthesis (uses project's LLM config from env vars)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from jinja2 import BaseLoader, Environment
|
|
12
|
+
|
|
13
|
+
from src.logging import get_logger
|
|
14
|
+
from src.services.llm import get_llm_client
|
|
15
|
+
|
|
16
|
+
from .types import WebSearchResponse
|
|
17
|
+
|
|
18
|
+
# Module logger
|
|
19
|
+
_logger = get_logger("Search.Consolidation", level="INFO")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# Available consolidation types
|
|
23
|
+
CONSOLIDATION_TYPES = ["none", "template", "llm"]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# =============================================================================
|
|
27
|
+
# PROVIDER-SPECIFIC TEMPLATES
|
|
28
|
+
# =============================================================================
|
|
29
|
+
# Only providers that return raw SERP results (supports_answer=False) need templates.
|
|
30
|
+
# AI providers (Perplexity, Tavily, Baidu, Exa) already generate answers.
|
|
31
|
+
PROVIDER_TEMPLATES = {
|
|
32
|
+
# -------------------------------------------------------------------------
|
|
33
|
+
# SERPER TEMPLATE
|
|
34
|
+
# -------------------------------------------------------------------------
|
|
35
|
+
"serper": """{% if knowledge_graph %}
|
|
36
|
+
## {{ knowledge_graph.title }}{% if knowledge_graph.type %} ({{ knowledge_graph.type }}){% endif %}
|
|
37
|
+
|
|
38
|
+
{{ knowledge_graph.description }}
|
|
39
|
+
{% if knowledge_graph.attributes %}
|
|
40
|
+
{% for key, value in knowledge_graph.attributes.items() %}
|
|
41
|
+
- **{{ key }}**: {{ value }}
|
|
42
|
+
{% endfor %}
|
|
43
|
+
{% endif %}
|
|
44
|
+
{% if knowledge_graph.website %}🔗 [{{ knowledge_graph.website }}]({{ knowledge_graph.website }}){% endif %}
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
{% endif %}
|
|
48
|
+
{% if answer_box %}
|
|
49
|
+
### Direct Answer
|
|
50
|
+
{{ answer_box.answer or answer_box.snippet }}
|
|
51
|
+
{% if answer_box.title %}*Source: [{{ answer_box.title }}]({{ answer_box.link }})*{% endif %}
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
{% endif %}
|
|
55
|
+
### Search Results for "{{ query }}"
|
|
56
|
+
|
|
57
|
+
{% for result in results[:max_results] %}
|
|
58
|
+
**[{{ loop.index }}] {{ result.title }}**
|
|
59
|
+
{{ result.snippet }}
|
|
60
|
+
{% if result.date %}📅 {{ result.date }}{% endif %}
|
|
61
|
+
🔗 {{ result.url }}
|
|
62
|
+
{% if result.sitelinks %}
|
|
63
|
+
└ Related: {% for link in result.sitelinks[:3] %}[{{ link.title }}]({{ link.link }}){% if not loop.last %} | {% endif %}{% endfor %}
|
|
64
|
+
{% endif %}
|
|
65
|
+
|
|
66
|
+
{% endfor %}
|
|
67
|
+
{% if people_also_ask %}
|
|
68
|
+
---
|
|
69
|
+
### People Also Ask
|
|
70
|
+
{% for qa in people_also_ask[:3] %}
|
|
71
|
+
**Q: {{ qa.question }}**
|
|
72
|
+
{{ qa.snippet }}
|
|
73
|
+
*[{{ qa.title }}]({{ qa.link }})*
|
|
74
|
+
|
|
75
|
+
{% endfor %}
|
|
76
|
+
{% endif %}
|
|
77
|
+
{% if related_searches %}
|
|
78
|
+
---
|
|
79
|
+
*Related searches: {% for rs in related_searches[:5] %}{{ rs.query }}{% if not loop.last %}, {% endif %}{% endfor %}*
|
|
80
|
+
{% endif %}""",
|
|
81
|
+
# -------------------------------------------------------------------------
|
|
82
|
+
# JINA TEMPLATE
|
|
83
|
+
# -------------------------------------------------------------------------
|
|
84
|
+
"jina": """### Search Results for "{{ query }}"
|
|
85
|
+
|
|
86
|
+
{% for result in results[:max_results] %}
|
|
87
|
+
---
|
|
88
|
+
## [{{ loop.index }}] {{ result.title }}
|
|
89
|
+
|
|
90
|
+
{% if result.attributes.date %}📅 *{{ result.attributes.date }}*{% endif %}
|
|
91
|
+
|
|
92
|
+
{% if result.content %}
|
|
93
|
+
{% if result.snippet %}*{{ result.snippet }}*{% endif %}
|
|
94
|
+
|
|
95
|
+
### Content Preview
|
|
96
|
+
{{ result.content[:2000] }}{% if result.content|length > 2000 %}
|
|
97
|
+
|
|
98
|
+
*[Content truncated - {{ result.attributes.tokens|default('many') }} tokens total]*{% endif %}
|
|
99
|
+
{% else %}
|
|
100
|
+
*{{ result.snippet }}*
|
|
101
|
+
{% endif %}
|
|
102
|
+
|
|
103
|
+
🔗 [{{ result.url }}]({{ result.url }})
|
|
104
|
+
|
|
105
|
+
{% endfor %}
|
|
106
|
+
---
|
|
107
|
+
*{{ results|length }} results via Jina Reader{% if results and results|length > 0 and not results[0].content %} (no-content mode){% endif %}*
|
|
108
|
+
|
|
109
|
+
{% if links %}
|
|
110
|
+
### Extracted Links
|
|
111
|
+
{% for name, url in links.items()[:10] %}
|
|
112
|
+
- [{{ name }}]({{ url }})
|
|
113
|
+
{% endfor %}
|
|
114
|
+
{% endif %}
|
|
115
|
+
{% if images %}
|
|
116
|
+
### Images Found
|
|
117
|
+
{% for alt, src in images.items()[:5] %}
|
|
118
|
+
- 
|
|
119
|
+
{% endfor %}
|
|
120
|
+
{% endif %}""",
|
|
121
|
+
# -------------------------------------------------------------------------
|
|
122
|
+
# SERPER SCHOLAR TEMPLATE
|
|
123
|
+
# -------------------------------------------------------------------------
|
|
124
|
+
"serper_scholar": """### Academic Results for "{{ query }}"
|
|
125
|
+
|
|
126
|
+
{% for result in results[:max_results] %}
|
|
127
|
+
**[{{ loop.index }}] {{ result.title }}**{% if result.attributes.year %} ({{ result.attributes.year }}){% endif %}
|
|
128
|
+
|
|
129
|
+
{% if result.attributes.publicationInfo %}*{{ result.attributes.publicationInfo }}*{% endif %}
|
|
130
|
+
|
|
131
|
+
{{ result.snippet }}
|
|
132
|
+
|
|
133
|
+
{% if result.attributes.pdfUrl %}📄 [PDF]({{ result.attributes.pdfUrl }}) | {% endif %}🔗 [Link]({{ result.url }})
|
|
134
|
+
{% if result.attributes.citedBy %}📚 Cited by: {{ result.attributes.citedBy }}{% endif %}
|
|
135
|
+
|
|
136
|
+
{% endfor %}
|
|
137
|
+
---
|
|
138
|
+
*{{ results|length }} academic papers found via Google Scholar*""",
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class AnswerConsolidator:
|
|
143
|
+
"""
|
|
144
|
+
Consolidate raw SERP results into formatted answers.
|
|
145
|
+
|
|
146
|
+
IMPORTANT: Template consolidation only works for providers that have
|
|
147
|
+
specific templates defined (serper, jina, serper_scholar).
|
|
148
|
+
|
|
149
|
+
For other providers, use:
|
|
150
|
+
- consolidation_type="llm" for LLM-based synthesis
|
|
151
|
+
- custom_template for a custom Jinja2 template
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
# Map provider names to their specific templates
|
|
155
|
+
# Only these providers support template consolidation
|
|
156
|
+
PROVIDER_TEMPLATE_MAP = {
|
|
157
|
+
"serper": "serper",
|
|
158
|
+
"jina": "jina",
|
|
159
|
+
"serper_scholar": "serper_scholar",
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
def __init__(
|
|
163
|
+
self,
|
|
164
|
+
consolidation_type: str = "template",
|
|
165
|
+
custom_template: str | None = None,
|
|
166
|
+
llm_config: dict[str, Any] | None = None,
|
|
167
|
+
max_results: int = 5,
|
|
168
|
+
autoescape: bool = True,
|
|
169
|
+
):
|
|
170
|
+
"""
|
|
171
|
+
Initialize consolidator.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
consolidation_type: "none", "template", or "llm"
|
|
175
|
+
custom_template: Custom Jinja2 template string
|
|
176
|
+
llm_config: Optional overrides (system_prompt, max_tokens, temperature)
|
|
177
|
+
max_results: Maximum results to include in answer
|
|
178
|
+
autoescape: Whether to enable Jinja2 autoescape for security (default: True)
|
|
179
|
+
"""
|
|
180
|
+
self.consolidation_type = consolidation_type
|
|
181
|
+
self.custom_template = custom_template
|
|
182
|
+
self.llm_config = llm_config or {}
|
|
183
|
+
self.max_results = max_results
|
|
184
|
+
# Security: autoescape defaults to True (set in function signature above).
|
|
185
|
+
# When True, Jinja2 auto-escapes HTML to prevent XSS.
|
|
186
|
+
self.jinja_env = Environment(loader=BaseLoader(), autoescape=autoescape) # nosec B701
|
|
187
|
+
|
|
188
|
+
if self.custom_template is not None and autoescape:
|
|
189
|
+
_logger.warning(
|
|
190
|
+
"Custom Jinja2 templates are rendered with autoescape=True. "
|
|
191
|
+
"HTML in rendered variables will be escaped by default; use the "
|
|
192
|
+
"'safe' filter in your template if you intentionally need raw HTML."
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
def consolidate(self, response: WebSearchResponse) -> WebSearchResponse:
|
|
196
|
+
"""
|
|
197
|
+
Consolidate search results into an answer.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
response: WebSearchResponse with search_results populated
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
WebSearchResponse with answer field populated
|
|
204
|
+
"""
|
|
205
|
+
if self.consolidation_type == "none":
|
|
206
|
+
_logger.debug("Consolidation disabled, returning raw response")
|
|
207
|
+
return response
|
|
208
|
+
|
|
209
|
+
results_count = len(response.search_results)
|
|
210
|
+
_logger.info(
|
|
211
|
+
f"Consolidating {results_count} results from {response.provider} using {self.consolidation_type}"
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
if self.consolidation_type == "template":
|
|
215
|
+
response.answer = self._consolidate_with_template(response)
|
|
216
|
+
_logger.success(f"Template consolidation completed ({len(response.answer)} chars)")
|
|
217
|
+
elif self.consolidation_type == "llm":
|
|
218
|
+
response.answer = self._consolidate_with_llm(response)
|
|
219
|
+
_logger.success(f"LLM consolidation completed ({len(response.answer)} chars)")
|
|
220
|
+
else:
|
|
221
|
+
_logger.error(f"Unknown consolidation type: {self.consolidation_type}")
|
|
222
|
+
raise ValueError(f"Unknown consolidation type: {self.consolidation_type}")
|
|
223
|
+
|
|
224
|
+
return response
|
|
225
|
+
|
|
226
|
+
def _get_template_for_provider(self, provider: str) -> str:
|
|
227
|
+
"""
|
|
228
|
+
Get the template for a specific provider.
|
|
229
|
+
|
|
230
|
+
Only provider-specific templates exist because each provider has
|
|
231
|
+
different response schemas and metadata. No universal templates.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
provider: Provider name (e.g., "serper", "jina")
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
Template string for this provider
|
|
238
|
+
|
|
239
|
+
Raises:
|
|
240
|
+
ValueError: If no template exists for this provider
|
|
241
|
+
"""
|
|
242
|
+
# 1. Custom template takes highest priority
|
|
243
|
+
if self.custom_template:
|
|
244
|
+
_logger.debug(f"Using custom template ({len(self.custom_template)} chars)")
|
|
245
|
+
return self.custom_template
|
|
246
|
+
|
|
247
|
+
# 2. Get provider-specific template
|
|
248
|
+
template_key = self.PROVIDER_TEMPLATE_MAP.get(provider.lower())
|
|
249
|
+
if template_key and template_key in PROVIDER_TEMPLATES:
|
|
250
|
+
_logger.debug(f"Using provider-specific template: {template_key}")
|
|
251
|
+
return PROVIDER_TEMPLATES[template_key]
|
|
252
|
+
|
|
253
|
+
# 3. No template exists for this provider - fail explicitly
|
|
254
|
+
available = list(PROVIDER_TEMPLATES.keys())
|
|
255
|
+
_logger.error(f"No template for provider '{provider}'. Available: {available}")
|
|
256
|
+
raise ValueError(
|
|
257
|
+
f"No template consolidation available for provider '{provider}'. "
|
|
258
|
+
f"Template consolidation only works with: {available}. "
|
|
259
|
+
f"Use consolidation='llm' or provide a custom_template for other providers."
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
def _build_provider_context(self, response: WebSearchResponse) -> dict[str, Any]:
|
|
263
|
+
"""
|
|
264
|
+
Build template context with provider-specific fields.
|
|
265
|
+
|
|
266
|
+
Each provider has unique response fields that we extract from metadata.
|
|
267
|
+
"""
|
|
268
|
+
# Base context (common to all providers)
|
|
269
|
+
context: dict[str, Any] = {
|
|
270
|
+
"query": response.query,
|
|
271
|
+
"provider": response.provider,
|
|
272
|
+
"model": response.model,
|
|
273
|
+
"max_results": self.max_results,
|
|
274
|
+
"results": [
|
|
275
|
+
{
|
|
276
|
+
"title": r.title,
|
|
277
|
+
"url": r.url,
|
|
278
|
+
"snippet": r.snippet,
|
|
279
|
+
"date": r.date,
|
|
280
|
+
"source": r.source,
|
|
281
|
+
"content": r.content,
|
|
282
|
+
"sitelinks": r.sitelinks,
|
|
283
|
+
"attributes": r.attributes,
|
|
284
|
+
}
|
|
285
|
+
for r in response.search_results
|
|
286
|
+
],
|
|
287
|
+
"citations": [
|
|
288
|
+
{
|
|
289
|
+
"id": c.id,
|
|
290
|
+
"reference": c.reference,
|
|
291
|
+
"url": c.url,
|
|
292
|
+
"title": c.title,
|
|
293
|
+
"snippet": c.snippet,
|
|
294
|
+
}
|
|
295
|
+
for c in response.citations
|
|
296
|
+
],
|
|
297
|
+
"timestamp": response.timestamp,
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
# Extract provider-specific metadata
|
|
301
|
+
metadata = response.metadata or {}
|
|
302
|
+
provider_lower = response.provider.lower()
|
|
303
|
+
|
|
304
|
+
# -----------------------------------------------------------------
|
|
305
|
+
# SERPER-specific context
|
|
306
|
+
# -----------------------------------------------------------------
|
|
307
|
+
if provider_lower == "serper":
|
|
308
|
+
context["knowledge_graph"] = metadata.get("knowledgeGraph")
|
|
309
|
+
context["answer_box"] = metadata.get("answerBox")
|
|
310
|
+
context["people_also_ask"] = metadata.get("peopleAlsoAsk")
|
|
311
|
+
context["related_searches"] = metadata.get("relatedSearches")
|
|
312
|
+
|
|
313
|
+
# -----------------------------------------------------------------
|
|
314
|
+
# JINA-specific context
|
|
315
|
+
# -----------------------------------------------------------------
|
|
316
|
+
elif provider_lower == "jina":
|
|
317
|
+
context["links"] = metadata.get("links", {})
|
|
318
|
+
context["images"] = metadata.get("images", {})
|
|
319
|
+
|
|
320
|
+
return context
|
|
321
|
+
|
|
322
|
+
def _consolidate_with_template(self, response: WebSearchResponse) -> str:
|
|
323
|
+
"""Render results using Jinja2 template"""
|
|
324
|
+
_logger.debug(f"Building template context for {response.provider}")
|
|
325
|
+
|
|
326
|
+
# Get template (auto-detect provider-specific if not explicitly set)
|
|
327
|
+
template_str = self._get_template_for_provider(response.provider)
|
|
328
|
+
template = self.jinja_env.from_string(template_str)
|
|
329
|
+
|
|
330
|
+
# Build context with provider-specific fields
|
|
331
|
+
context = self._build_provider_context(response)
|
|
332
|
+
_logger.debug(
|
|
333
|
+
f"Context has {len(context.get('results', []))} results, {len(context.get('citations', []))} citations"
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
try:
|
|
337
|
+
rendered = template.render(**context)
|
|
338
|
+
_logger.debug("Template rendered successfully")
|
|
339
|
+
return rendered
|
|
340
|
+
except Exception as e:
|
|
341
|
+
_logger.error(f"Template rendering failed: {e}")
|
|
342
|
+
raise
|
|
343
|
+
|
|
344
|
+
def _consolidate_with_llm(self, response: WebSearchResponse) -> str:
|
|
345
|
+
"""Generate answer using LLM."""
|
|
346
|
+
system_prompt, user_prompt = self._build_prompts(response)
|
|
347
|
+
|
|
348
|
+
llm = get_llm_client()
|
|
349
|
+
max_tokens = self.llm_config.get("max_tokens", 1000)
|
|
350
|
+
temperature = self.llm_config.get("temperature", 0.3)
|
|
351
|
+
|
|
352
|
+
return llm.complete_sync(
|
|
353
|
+
prompt=user_prompt,
|
|
354
|
+
system_prompt=system_prompt,
|
|
355
|
+
max_tokens=max_tokens,
|
|
356
|
+
temperature=temperature,
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
def _build_prompts(self, response: WebSearchResponse) -> tuple[str, str]:
|
|
360
|
+
"""Build system and user prompts for LLM consolidation."""
|
|
361
|
+
results_text = []
|
|
362
|
+
for i, r in enumerate(response.search_results[: self.max_results], 1):
|
|
363
|
+
text = f"[{i}] {r.title}\nURL: {r.url}\n"
|
|
364
|
+
if r.snippet:
|
|
365
|
+
text += f"{r.snippet}\n"
|
|
366
|
+
if r.content:
|
|
367
|
+
text += f"{r.content[:5000]}{'...' if len(r.content) > 5000 else ''}"
|
|
368
|
+
results_text.append(text)
|
|
369
|
+
|
|
370
|
+
system_prompt = self.llm_config.get(
|
|
371
|
+
"system_prompt",
|
|
372
|
+
"""You are a search result consolidator. Your output will be used as grounding context for another LLM.
|
|
373
|
+
|
|
374
|
+
Task: Extract and structure relevant information from web search results.
|
|
375
|
+
|
|
376
|
+
Output format:
|
|
377
|
+
- Start with a brief factual summary (2-3 sentences)
|
|
378
|
+
- List key facts as bullet points with citation numbers [1], [2], etc.
|
|
379
|
+
- Include specific data: numbers, dates, names, definitions
|
|
380
|
+
- Note any conflicting information between sources
|
|
381
|
+
- End with a "Sources:" section listing [n] URL pairs
|
|
382
|
+
|
|
383
|
+
Be factual and dense. Omit filler words. Prioritize information diversity.""",
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
user_prompt = f"""Query: {response.query}
|
|
387
|
+
|
|
388
|
+
Search Results:
|
|
389
|
+
---
|
|
390
|
+
{chr(10).join(results_text)}
|
|
391
|
+
---
|
|
392
|
+
|
|
393
|
+
Consolidate these results into structured grounding context."""
|
|
394
|
+
|
|
395
|
+
return system_prompt, user_prompt
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
__all__ = ["AnswerConsolidator", "CONSOLIDATION_TYPES", "PROVIDER_TEMPLATES"]
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Web Search Provider Registry
|
|
3
|
+
|
|
4
|
+
This module manages the registration and retrieval of search providers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from typing import Type
|
|
9
|
+
|
|
10
|
+
from ..base import BaseSearchProvider
|
|
11
|
+
|
|
12
|
+
_PROVIDERS: dict[str, Type[BaseSearchProvider]] = {}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def register_provider(name: str):
|
|
16
|
+
"""
|
|
17
|
+
Decorator to register a provider.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
name: Name to register the provider under.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Decorator function.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def decorator(cls: Type[BaseSearchProvider]):
|
|
27
|
+
_PROVIDERS[name.lower()] = cls
|
|
28
|
+
cls.name = name.lower()
|
|
29
|
+
return cls
|
|
30
|
+
|
|
31
|
+
return decorator
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_provider(name: str, **kwargs) -> BaseSearchProvider:
|
|
35
|
+
"""
|
|
36
|
+
Get a provider instance by name.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
name: Provider name (case-insensitive).
|
|
40
|
+
**kwargs: Arguments to pass to provider constructor.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
BaseSearchProvider: Provider instance.
|
|
44
|
+
|
|
45
|
+
Raises:
|
|
46
|
+
ValueError: If provider is not found.
|
|
47
|
+
"""
|
|
48
|
+
name = name.lower()
|
|
49
|
+
if name not in _PROVIDERS:
|
|
50
|
+
available = ", ".join(sorted(_PROVIDERS.keys()))
|
|
51
|
+
raise ValueError(f"Unknown provider: {name}. Available: {available}")
|
|
52
|
+
return _PROVIDERS[name](**kwargs)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def list_providers() -> list[str]:
|
|
56
|
+
"""
|
|
57
|
+
List all registered providers.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
list[str]: Sorted list of provider names.
|
|
61
|
+
"""
|
|
62
|
+
return sorted(_PROVIDERS.keys())
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_available_providers() -> list[str]:
|
|
66
|
+
"""
|
|
67
|
+
List providers that are currently available (have API keys set).
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
list[str]: Sorted list of available provider names.
|
|
71
|
+
"""
|
|
72
|
+
available = []
|
|
73
|
+
for name, cls in _PROVIDERS.items():
|
|
74
|
+
try:
|
|
75
|
+
instance = cls()
|
|
76
|
+
if instance.is_available():
|
|
77
|
+
available.append(name)
|
|
78
|
+
except Exception:
|
|
79
|
+
pass
|
|
80
|
+
return sorted(available)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def get_providers_info() -> list[dict]:
|
|
84
|
+
"""
|
|
85
|
+
Get full provider info from class attributes for frontend display.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
list[dict]: List of provider info dicts with id, name, description, supports_answer
|
|
89
|
+
"""
|
|
90
|
+
providers_info = []
|
|
91
|
+
for provider_id, cls in sorted(_PROVIDERS.items()):
|
|
92
|
+
providers_info.append(
|
|
93
|
+
{
|
|
94
|
+
"id": provider_id,
|
|
95
|
+
"name": cls.display_name,
|
|
96
|
+
"description": cls.description,
|
|
97
|
+
"supports_answer": cls.supports_answer,
|
|
98
|
+
"requires_api_key": cls.requires_api_key,
|
|
99
|
+
}
|
|
100
|
+
)
|
|
101
|
+
return providers_info
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def get_default_provider(**kwargs) -> BaseSearchProvider:
|
|
105
|
+
"""
|
|
106
|
+
Get the default provider based on SEARCH_PROVIDER env var.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
**kwargs: Arguments to pass to provider constructor.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
BaseSearchProvider: Default provider instance.
|
|
113
|
+
"""
|
|
114
|
+
provider_name = os.environ.get("SEARCH_PROVIDER", "perplexity").lower()
|
|
115
|
+
return get_provider(provider_name, **kwargs)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# Auto-import all providers to trigger registration
|
|
119
|
+
from . import baidu, exa, jina, perplexity, serper, tavily
|
|
120
|
+
|
|
121
|
+
__all__ = [
|
|
122
|
+
"register_provider",
|
|
123
|
+
"get_provider",
|
|
124
|
+
"list_providers",
|
|
125
|
+
"get_available_providers",
|
|
126
|
+
"get_providers_info",
|
|
127
|
+
"get_default_provider",
|
|
128
|
+
]
|