realtimex-deeptutor 0.5.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- realtimex_deeptutor/__init__.py +67 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
- src/__init__.py +40 -0
- src/agents/__init__.py +24 -0
- src/agents/base_agent.py +657 -0
- src/agents/chat/__init__.py +24 -0
- src/agents/chat/chat_agent.py +435 -0
- src/agents/chat/prompts/en/chat_agent.yaml +35 -0
- src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
- src/agents/chat/session_manager.py +311 -0
- src/agents/co_writer/__init__.py +0 -0
- src/agents/co_writer/edit_agent.py +260 -0
- src/agents/co_writer/narrator_agent.py +423 -0
- src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
- src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
- src/agents/guide/__init__.py +16 -0
- src/agents/guide/agents/__init__.py +11 -0
- src/agents/guide/agents/chat_agent.py +104 -0
- src/agents/guide/agents/interactive_agent.py +223 -0
- src/agents/guide/agents/locate_agent.py +149 -0
- src/agents/guide/agents/summary_agent.py +150 -0
- src/agents/guide/guide_manager.py +500 -0
- src/agents/guide/prompts/en/chat_agent.yaml +41 -0
- src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
- src/agents/guide/prompts/en/locate_agent.yaml +68 -0
- src/agents/guide/prompts/en/summary_agent.yaml +157 -0
- src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
- src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
- src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
- src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
- src/agents/ideagen/__init__.py +12 -0
- src/agents/ideagen/idea_generation_workflow.py +426 -0
- src/agents/ideagen/material_organizer_agent.py +173 -0
- src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
- src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
- src/agents/question/__init__.py +24 -0
- src/agents/question/agents/__init__.py +18 -0
- src/agents/question/agents/generate_agent.py +381 -0
- src/agents/question/agents/relevance_analyzer.py +207 -0
- src/agents/question/agents/retrieve_agent.py +239 -0
- src/agents/question/coordinator.py +718 -0
- src/agents/question/example.py +109 -0
- src/agents/question/prompts/en/coordinator.yaml +75 -0
- src/agents/question/prompts/en/generate_agent.yaml +77 -0
- src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
- src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
- src/agents/question/prompts/zh/coordinator.yaml +75 -0
- src/agents/question/prompts/zh/generate_agent.yaml +77 -0
- src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
- src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
- src/agents/research/agents/__init__.py +23 -0
- src/agents/research/agents/decompose_agent.py +507 -0
- src/agents/research/agents/manager_agent.py +228 -0
- src/agents/research/agents/note_agent.py +180 -0
- src/agents/research/agents/rephrase_agent.py +263 -0
- src/agents/research/agents/reporting_agent.py +1333 -0
- src/agents/research/agents/research_agent.py +714 -0
- src/agents/research/data_structures.py +451 -0
- src/agents/research/main.py +188 -0
- src/agents/research/prompts/en/decompose_agent.yaml +89 -0
- src/agents/research/prompts/en/manager_agent.yaml +24 -0
- src/agents/research/prompts/en/note_agent.yaml +121 -0
- src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/en/reporting_agent.yaml +380 -0
- src/agents/research/prompts/en/research_agent.yaml +173 -0
- src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
- src/agents/research/prompts/zh/manager_agent.yaml +24 -0
- src/agents/research/prompts/zh/note_agent.yaml +121 -0
- src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
- src/agents/research/prompts/zh/research_agent.yaml +173 -0
- src/agents/research/research_pipeline.py +1309 -0
- src/agents/research/utils/__init__.py +60 -0
- src/agents/research/utils/citation_manager.py +799 -0
- src/agents/research/utils/json_utils.py +98 -0
- src/agents/research/utils/token_tracker.py +297 -0
- src/agents/solve/__init__.py +80 -0
- src/agents/solve/analysis_loop/__init__.py +14 -0
- src/agents/solve/analysis_loop/investigate_agent.py +414 -0
- src/agents/solve/analysis_loop/note_agent.py +190 -0
- src/agents/solve/main_solver.py +862 -0
- src/agents/solve/memory/__init__.py +34 -0
- src/agents/solve/memory/citation_memory.py +353 -0
- src/agents/solve/memory/investigate_memory.py +226 -0
- src/agents/solve/memory/solve_memory.py +340 -0
- src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
- src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
- src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
- src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
- src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
- src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
- src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
- src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
- src/agents/solve/solve_loop/__init__.py +22 -0
- src/agents/solve/solve_loop/citation_manager.py +74 -0
- src/agents/solve/solve_loop/manager_agent.py +274 -0
- src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
- src/agents/solve/solve_loop/response_agent.py +301 -0
- src/agents/solve/solve_loop/solve_agent.py +325 -0
- src/agents/solve/solve_loop/tool_agent.py +470 -0
- src/agents/solve/utils/__init__.py +64 -0
- src/agents/solve/utils/config_validator.py +313 -0
- src/agents/solve/utils/display_manager.py +223 -0
- src/agents/solve/utils/error_handler.py +363 -0
- src/agents/solve/utils/json_utils.py +98 -0
- src/agents/solve/utils/performance_monitor.py +407 -0
- src/agents/solve/utils/token_tracker.py +541 -0
- src/api/__init__.py +0 -0
- src/api/main.py +240 -0
- src/api/routers/__init__.py +1 -0
- src/api/routers/agent_config.py +69 -0
- src/api/routers/chat.py +296 -0
- src/api/routers/co_writer.py +337 -0
- src/api/routers/config.py +627 -0
- src/api/routers/dashboard.py +18 -0
- src/api/routers/guide.py +337 -0
- src/api/routers/ideagen.py +436 -0
- src/api/routers/knowledge.py +821 -0
- src/api/routers/notebook.py +247 -0
- src/api/routers/question.py +537 -0
- src/api/routers/research.py +394 -0
- src/api/routers/settings.py +164 -0
- src/api/routers/solve.py +305 -0
- src/api/routers/system.py +252 -0
- src/api/run_server.py +61 -0
- src/api/utils/history.py +172 -0
- src/api/utils/log_interceptor.py +21 -0
- src/api/utils/notebook_manager.py +415 -0
- src/api/utils/progress_broadcaster.py +72 -0
- src/api/utils/task_id_manager.py +100 -0
- src/config/__init__.py +0 -0
- src/config/accessors.py +18 -0
- src/config/constants.py +34 -0
- src/config/defaults.py +18 -0
- src/config/schema.py +38 -0
- src/config/settings.py +50 -0
- src/core/errors.py +62 -0
- src/knowledge/__init__.py +23 -0
- src/knowledge/add_documents.py +606 -0
- src/knowledge/config.py +65 -0
- src/knowledge/example_add_documents.py +236 -0
- src/knowledge/extract_numbered_items.py +1039 -0
- src/knowledge/initializer.py +621 -0
- src/knowledge/kb.py +22 -0
- src/knowledge/manager.py +782 -0
- src/knowledge/progress_tracker.py +182 -0
- src/knowledge/start_kb.py +535 -0
- src/logging/__init__.py +103 -0
- src/logging/adapters/__init__.py +17 -0
- src/logging/adapters/lightrag.py +184 -0
- src/logging/adapters/llamaindex.py +141 -0
- src/logging/config.py +80 -0
- src/logging/handlers/__init__.py +20 -0
- src/logging/handlers/console.py +75 -0
- src/logging/handlers/file.py +201 -0
- src/logging/handlers/websocket.py +127 -0
- src/logging/logger.py +709 -0
- src/logging/stats/__init__.py +16 -0
- src/logging/stats/llm_stats.py +179 -0
- src/services/__init__.py +56 -0
- src/services/config/__init__.py +61 -0
- src/services/config/knowledge_base_config.py +210 -0
- src/services/config/loader.py +260 -0
- src/services/config/unified_config.py +603 -0
- src/services/embedding/__init__.py +45 -0
- src/services/embedding/adapters/__init__.py +22 -0
- src/services/embedding/adapters/base.py +106 -0
- src/services/embedding/adapters/cohere.py +127 -0
- src/services/embedding/adapters/jina.py +99 -0
- src/services/embedding/adapters/ollama.py +116 -0
- src/services/embedding/adapters/openai_compatible.py +96 -0
- src/services/embedding/client.py +159 -0
- src/services/embedding/config.py +156 -0
- src/services/embedding/provider.py +119 -0
- src/services/llm/__init__.py +152 -0
- src/services/llm/capabilities.py +313 -0
- src/services/llm/client.py +302 -0
- src/services/llm/cloud_provider.py +530 -0
- src/services/llm/config.py +200 -0
- src/services/llm/error_mapping.py +103 -0
- src/services/llm/exceptions.py +152 -0
- src/services/llm/factory.py +450 -0
- src/services/llm/local_provider.py +347 -0
- src/services/llm/providers/anthropic.py +95 -0
- src/services/llm/providers/base_provider.py +93 -0
- src/services/llm/providers/open_ai.py +83 -0
- src/services/llm/registry.py +71 -0
- src/services/llm/telemetry.py +40 -0
- src/services/llm/types.py +27 -0
- src/services/llm/utils.py +333 -0
- src/services/prompt/__init__.py +25 -0
- src/services/prompt/manager.py +206 -0
- src/services/rag/__init__.py +64 -0
- src/services/rag/components/__init__.py +29 -0
- src/services/rag/components/base.py +59 -0
- src/services/rag/components/chunkers/__init__.py +18 -0
- src/services/rag/components/chunkers/base.py +34 -0
- src/services/rag/components/chunkers/fixed.py +71 -0
- src/services/rag/components/chunkers/numbered_item.py +94 -0
- src/services/rag/components/chunkers/semantic.py +97 -0
- src/services/rag/components/embedders/__init__.py +14 -0
- src/services/rag/components/embedders/base.py +32 -0
- src/services/rag/components/embedders/openai.py +63 -0
- src/services/rag/components/indexers/__init__.py +18 -0
- src/services/rag/components/indexers/base.py +35 -0
- src/services/rag/components/indexers/graph.py +172 -0
- src/services/rag/components/indexers/lightrag.py +156 -0
- src/services/rag/components/indexers/vector.py +146 -0
- src/services/rag/components/parsers/__init__.py +18 -0
- src/services/rag/components/parsers/base.py +35 -0
- src/services/rag/components/parsers/markdown.py +52 -0
- src/services/rag/components/parsers/pdf.py +115 -0
- src/services/rag/components/parsers/text.py +86 -0
- src/services/rag/components/retrievers/__init__.py +18 -0
- src/services/rag/components/retrievers/base.py +34 -0
- src/services/rag/components/retrievers/dense.py +200 -0
- src/services/rag/components/retrievers/hybrid.py +164 -0
- src/services/rag/components/retrievers/lightrag.py +169 -0
- src/services/rag/components/routing.py +286 -0
- src/services/rag/factory.py +234 -0
- src/services/rag/pipeline.py +215 -0
- src/services/rag/pipelines/__init__.py +32 -0
- src/services/rag/pipelines/academic.py +44 -0
- src/services/rag/pipelines/lightrag.py +43 -0
- src/services/rag/pipelines/llamaindex.py +313 -0
- src/services/rag/pipelines/raganything.py +384 -0
- src/services/rag/service.py +244 -0
- src/services/rag/types.py +73 -0
- src/services/search/__init__.py +284 -0
- src/services/search/base.py +87 -0
- src/services/search/consolidation.py +398 -0
- src/services/search/providers/__init__.py +128 -0
- src/services/search/providers/baidu.py +188 -0
- src/services/search/providers/exa.py +194 -0
- src/services/search/providers/jina.py +161 -0
- src/services/search/providers/perplexity.py +153 -0
- src/services/search/providers/serper.py +209 -0
- src/services/search/providers/tavily.py +161 -0
- src/services/search/types.py +114 -0
- src/services/setup/__init__.py +34 -0
- src/services/setup/init.py +285 -0
- src/services/tts/__init__.py +16 -0
- src/services/tts/config.py +99 -0
- src/tools/__init__.py +91 -0
- src/tools/code_executor.py +536 -0
- src/tools/paper_search_tool.py +171 -0
- src/tools/query_item_tool.py +310 -0
- src/tools/question/__init__.py +15 -0
- src/tools/question/exam_mimic.py +616 -0
- src/tools/question/pdf_parser.py +211 -0
- src/tools/question/question_extractor.py +397 -0
- src/tools/rag_tool.py +173 -0
- src/tools/tex_chunker.py +339 -0
- src/tools/tex_downloader.py +253 -0
- src/tools/web_search.py +71 -0
- src/utils/config_manager.py +206 -0
- src/utils/document_validator.py +168 -0
- src/utils/error_rate_tracker.py +111 -0
- src/utils/error_utils.py +82 -0
- src/utils/json_parser.py +110 -0
- src/utils/network/circuit_breaker.py +79 -0
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Serper Google SERP Provider
|
|
3
|
+
|
|
4
|
+
API: https://serper.dev
|
|
5
|
+
Endpoint: https://google.serper.dev/{mode}
|
|
6
|
+
|
|
7
|
+
Features:
|
|
8
|
+
- Real-time Google search results (1-2 seconds)
|
|
9
|
+
- Modes: search, scholar
|
|
10
|
+
- Knowledge graph extraction
|
|
11
|
+
- People Also Ask extraction
|
|
12
|
+
- Related searches
|
|
13
|
+
- Very cheap: $1/1000 queries at scale
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
import json
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
import requests
|
|
21
|
+
|
|
22
|
+
from ..base import BaseSearchProvider
|
|
23
|
+
from ..types import Citation, SearchResult, WebSearchResponse
|
|
24
|
+
from . import register_provider
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SerperAPIError(Exception):
|
|
28
|
+
"""Serper API error"""
|
|
29
|
+
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@register_provider("serper")
|
|
34
|
+
class SerperProvider(BaseSearchProvider):
|
|
35
|
+
"""Serper Google SERP provider"""
|
|
36
|
+
|
|
37
|
+
display_name = "Serper"
|
|
38
|
+
description = "Google SERP results"
|
|
39
|
+
supports_answer = False # Raw SERP results, no LLM answer
|
|
40
|
+
BASE_URL = "https://google.serper.dev"
|
|
41
|
+
|
|
42
|
+
def search(
|
|
43
|
+
self,
|
|
44
|
+
query: str,
|
|
45
|
+
mode: str = "search", # search, scholar
|
|
46
|
+
num: int = 10,
|
|
47
|
+
gl: str = "us", # Country code
|
|
48
|
+
hl: str = "en", # Language code
|
|
49
|
+
page: int = 1,
|
|
50
|
+
autocorrect: bool = True,
|
|
51
|
+
timeout: int = 30,
|
|
52
|
+
**kwargs: Any,
|
|
53
|
+
) -> WebSearchResponse:
|
|
54
|
+
"""
|
|
55
|
+
Perform Google SERP search using Serper API.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
query: Search query.
|
|
59
|
+
mode: Search mode - "search" or "scholar".
|
|
60
|
+
num: Number of results (default 10, max 100).
|
|
61
|
+
gl: Country code (default "us").
|
|
62
|
+
hl: Language code (default "en").
|
|
63
|
+
page: Page number for pagination.
|
|
64
|
+
autocorrect: Enable autocorrect (default True).
|
|
65
|
+
timeout: Request timeout in seconds.
|
|
66
|
+
**kwargs: Additional options.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
WebSearchResponse: Standardized search response.
|
|
70
|
+
"""
|
|
71
|
+
self.logger.debug(f"Calling Serper API mode={mode}, num={num}")
|
|
72
|
+
headers = {
|
|
73
|
+
"X-API-KEY": self.api_key,
|
|
74
|
+
"Content-Type": "application/json",
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
payload: dict[str, Any] = {
|
|
78
|
+
"q": query,
|
|
79
|
+
"num": num,
|
|
80
|
+
"gl": gl,
|
|
81
|
+
"hl": hl,
|
|
82
|
+
"page": page,
|
|
83
|
+
"autocorrect": autocorrect,
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
url = f"{self.BASE_URL}/{mode}"
|
|
87
|
+
response = requests.post(url, headers=headers, json=payload, timeout=timeout)
|
|
88
|
+
|
|
89
|
+
if response.status_code != 200:
|
|
90
|
+
try:
|
|
91
|
+
error_data = response.json()
|
|
92
|
+
except (json.JSONDecodeError, ValueError):
|
|
93
|
+
error_data = {"message": response.text}
|
|
94
|
+
self.logger.error(f"Serper API error: {response.status_code} - {error_data}")
|
|
95
|
+
raise SerperAPIError(
|
|
96
|
+
f"Serper API error: {response.status_code} - "
|
|
97
|
+
f"{error_data.get('message', response.text)}"
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
data = response.json()
|
|
101
|
+
self.logger.debug(f"Serper returned {len(data.get('organic', []))} results")
|
|
102
|
+
|
|
103
|
+
# Extract search results
|
|
104
|
+
citations: list[Citation] = []
|
|
105
|
+
search_results: list[SearchResult] = []
|
|
106
|
+
|
|
107
|
+
# Both search and scholar return results in "organic" key
|
|
108
|
+
results_key = "organic"
|
|
109
|
+
|
|
110
|
+
for i, result in enumerate(data.get(results_key, []), 1):
|
|
111
|
+
# Handle different result formats
|
|
112
|
+
title = result.get("title", "")
|
|
113
|
+
url_val = result.get("link", result.get("url", ""))
|
|
114
|
+
snippet = result.get("snippet", result.get("description", ""))
|
|
115
|
+
date = result.get("date", "")
|
|
116
|
+
|
|
117
|
+
# Extract sitelinks if available
|
|
118
|
+
sitelinks = []
|
|
119
|
+
if result.get("sitelinks"):
|
|
120
|
+
for sl in result["sitelinks"]:
|
|
121
|
+
sitelinks.append({"title": sl.get("title", ""), "link": sl.get("link", "")})
|
|
122
|
+
|
|
123
|
+
# Build attributes dict with scholar-specific fields
|
|
124
|
+
attributes: dict[str, Any] = result.get("attributes", {})
|
|
125
|
+
|
|
126
|
+
# Scholar mode: extract publication info, citations, PDF URL, year
|
|
127
|
+
if mode == "scholar":
|
|
128
|
+
# publicationInfo is a string like "A Vaswani, N Shazeer... - Advances in neural..., 2017"
|
|
129
|
+
if result.get("publicationInfo"):
|
|
130
|
+
attributes["publicationInfo"] = result["publicationInfo"]
|
|
131
|
+
# citedBy is a number
|
|
132
|
+
if result.get("citedBy") is not None:
|
|
133
|
+
attributes["citedBy"] = result["citedBy"]
|
|
134
|
+
# pdfUrl is a direct link to PDF
|
|
135
|
+
if result.get("pdfUrl"):
|
|
136
|
+
attributes["pdfUrl"] = result["pdfUrl"]
|
|
137
|
+
# year is a number
|
|
138
|
+
if result.get("year") is not None:
|
|
139
|
+
attributes["year"] = result["year"]
|
|
140
|
+
# paper ID
|
|
141
|
+
if result.get("id"):
|
|
142
|
+
attributes["paperId"] = result["id"]
|
|
143
|
+
|
|
144
|
+
sr = SearchResult(
|
|
145
|
+
title=title,
|
|
146
|
+
url=url_val,
|
|
147
|
+
snippet=snippet,
|
|
148
|
+
date=date,
|
|
149
|
+
source=result.get("source", ""),
|
|
150
|
+
sitelinks=sitelinks,
|
|
151
|
+
attributes=attributes,
|
|
152
|
+
)
|
|
153
|
+
search_results.append(sr)
|
|
154
|
+
|
|
155
|
+
citations.append(
|
|
156
|
+
Citation(
|
|
157
|
+
id=i,
|
|
158
|
+
reference=f"[{i}]",
|
|
159
|
+
url=url_val,
|
|
160
|
+
title=title,
|
|
161
|
+
snippet=snippet,
|
|
162
|
+
date=date,
|
|
163
|
+
source=result.get("source", ""),
|
|
164
|
+
)
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Build metadata with rich SERP data
|
|
168
|
+
metadata: dict[str, Any] = {
|
|
169
|
+
"finish_reason": "stop",
|
|
170
|
+
"mode": mode,
|
|
171
|
+
"searchParameters": data.get("searchParameters", {}),
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
# Include knowledge graph if available
|
|
175
|
+
if data.get("knowledgeGraph"):
|
|
176
|
+
metadata["knowledgeGraph"] = data["knowledgeGraph"]
|
|
177
|
+
|
|
178
|
+
# Include answer box if available
|
|
179
|
+
if data.get("answerBox"):
|
|
180
|
+
metadata["answerBox"] = data["answerBox"]
|
|
181
|
+
|
|
182
|
+
# Include People Also Ask
|
|
183
|
+
if data.get("peopleAlsoAsk"):
|
|
184
|
+
metadata["peopleAlsoAsk"] = data["peopleAlsoAsk"]
|
|
185
|
+
|
|
186
|
+
# Include related searches
|
|
187
|
+
if data.get("relatedSearches"):
|
|
188
|
+
metadata["relatedSearches"] = data["relatedSearches"]
|
|
189
|
+
|
|
190
|
+
# Build answer from answer box or knowledge graph if available
|
|
191
|
+
answer = ""
|
|
192
|
+
if data.get("answerBox"):
|
|
193
|
+
ab = data["answerBox"]
|
|
194
|
+
answer = ab.get("answer", ab.get("snippet", ""))
|
|
195
|
+
elif data.get("knowledgeGraph"):
|
|
196
|
+
kg = data["knowledgeGraph"]
|
|
197
|
+
answer = kg.get("description", "")
|
|
198
|
+
|
|
199
|
+
return WebSearchResponse(
|
|
200
|
+
query=query,
|
|
201
|
+
answer=answer,
|
|
202
|
+
provider="serper_scholar" if mode == "scholar" else "serper",
|
|
203
|
+
timestamp=datetime.now().isoformat(),
|
|
204
|
+
model=f"serper-{mode}",
|
|
205
|
+
citations=citations,
|
|
206
|
+
search_results=search_results,
|
|
207
|
+
usage={},
|
|
208
|
+
metadata=metadata,
|
|
209
|
+
)
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tavily Search Provider
|
|
3
|
+
|
|
4
|
+
API Docs: https://docs.tavily.com/documentation/api-reference/endpoint/search
|
|
5
|
+
|
|
6
|
+
Features:
|
|
7
|
+
- Research-focused search with relevance scoring
|
|
8
|
+
- Optional LLM-generated answers (include_answer=true)
|
|
9
|
+
- Full raw content extraction (include_raw_content=true)
|
|
10
|
+
- Topic filtering (general, news, finance)
|
|
11
|
+
- Time range filtering (day, week, month, year)
|
|
12
|
+
- Domain include/exclude lists
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
import json
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
import requests
|
|
20
|
+
|
|
21
|
+
from ..base import BaseSearchProvider
|
|
22
|
+
from ..types import Citation, SearchResult, WebSearchResponse
|
|
23
|
+
from . import register_provider
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@register_provider("tavily")
|
|
27
|
+
class TavilyProvider(BaseSearchProvider):
|
|
28
|
+
"""Tavily research-focused search provider"""
|
|
29
|
+
|
|
30
|
+
name = "tavily"
|
|
31
|
+
display_name = "Tavily"
|
|
32
|
+
description = "Research-focused search"
|
|
33
|
+
supports_answer = True
|
|
34
|
+
BASE_URL = "https://api.tavily.com/search"
|
|
35
|
+
|
|
36
|
+
def search(
|
|
37
|
+
self,
|
|
38
|
+
query: str,
|
|
39
|
+
search_depth: str = "basic", # basic, advanced
|
|
40
|
+
topic: str = "general", # general, news, finance
|
|
41
|
+
max_results: int = 10,
|
|
42
|
+
include_answer: bool = True, # Get LLM-generated answer
|
|
43
|
+
include_raw_content: bool = False, # Get full page content
|
|
44
|
+
include_images: bool = False,
|
|
45
|
+
days: int | None = None, # Time filter (1-365)
|
|
46
|
+
include_domains: list[str] | None = None,
|
|
47
|
+
exclude_domains: list[str] | None = None,
|
|
48
|
+
timeout: int = 60,
|
|
49
|
+
**kwargs: Any,
|
|
50
|
+
) -> WebSearchResponse:
|
|
51
|
+
"""
|
|
52
|
+
Perform research-focused search using Tavily API.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
query: Search query.
|
|
56
|
+
search_depth: Search depth - "basic" (faster) or "advanced" (more thorough).
|
|
57
|
+
topic: Topic category - "general", "news", or "finance".
|
|
58
|
+
max_results: Maximum number of results (1-20).
|
|
59
|
+
include_answer: Include LLM-generated answer.
|
|
60
|
+
include_raw_content: Include full raw content of pages.
|
|
61
|
+
include_images: Include images in results.
|
|
62
|
+
days: Filter results to last N days (1-365).
|
|
63
|
+
include_domains: List of domains to include.
|
|
64
|
+
exclude_domains: List of domains to exclude.
|
|
65
|
+
timeout: Request timeout in seconds.
|
|
66
|
+
**kwargs: Additional options.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
WebSearchResponse: Standardized search response.
|
|
70
|
+
"""
|
|
71
|
+
self.logger.debug(f"Calling Tavily API depth={search_depth}, max_results={max_results}")
|
|
72
|
+
payload: dict[str, Any] = {
|
|
73
|
+
"api_key": self.api_key,
|
|
74
|
+
"query": query,
|
|
75
|
+
"search_depth": search_depth,
|
|
76
|
+
"topic": topic,
|
|
77
|
+
"max_results": max_results,
|
|
78
|
+
"include_answer": include_answer,
|
|
79
|
+
"include_raw_content": include_raw_content,
|
|
80
|
+
"include_images": include_images,
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if days is not None:
|
|
84
|
+
payload["days"] = days
|
|
85
|
+
if include_domains:
|
|
86
|
+
payload["include_domains"] = include_domains
|
|
87
|
+
if exclude_domains:
|
|
88
|
+
payload["exclude_domains"] = exclude_domains
|
|
89
|
+
|
|
90
|
+
response = requests.post(self.BASE_URL, json=payload, timeout=timeout)
|
|
91
|
+
|
|
92
|
+
if response.status_code != 200:
|
|
93
|
+
try:
|
|
94
|
+
error_data = response.json()
|
|
95
|
+
except (json.JSONDecodeError, ValueError):
|
|
96
|
+
error_data = {"error": response.text}
|
|
97
|
+
self.logger.error(f"Tavily API error: {response.status_code} - {error_data}")
|
|
98
|
+
raise Exception(
|
|
99
|
+
f"Tavily API error: {response.status_code} - "
|
|
100
|
+
f"{error_data.get('error', response.text)}"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
data = response.json()
|
|
104
|
+
self.logger.debug(f"Tavily returned {len(data.get('results', []))} results")
|
|
105
|
+
|
|
106
|
+
# Extract answer
|
|
107
|
+
answer = data.get("answer", "")
|
|
108
|
+
|
|
109
|
+
# Extract search results
|
|
110
|
+
citations: list[Citation] = []
|
|
111
|
+
search_results: list[SearchResult] = []
|
|
112
|
+
|
|
113
|
+
for i, result in enumerate(data.get("results", []), 1):
|
|
114
|
+
sr = SearchResult(
|
|
115
|
+
title=result.get("title", ""),
|
|
116
|
+
url=result.get("url", ""),
|
|
117
|
+
snippet=result.get("content", ""),
|
|
118
|
+
date=result.get("published_date", ""),
|
|
119
|
+
source=result.get("source", ""),
|
|
120
|
+
content=result.get("raw_content", ""),
|
|
121
|
+
score=result.get("score", 0.0),
|
|
122
|
+
)
|
|
123
|
+
search_results.append(sr)
|
|
124
|
+
|
|
125
|
+
citations.append(
|
|
126
|
+
Citation(
|
|
127
|
+
id=i,
|
|
128
|
+
reference=f"[{i}]",
|
|
129
|
+
url=result.get("url", ""),
|
|
130
|
+
title=result.get("title", ""),
|
|
131
|
+
snippet=result.get("content", ""),
|
|
132
|
+
source=result.get("source", ""),
|
|
133
|
+
content=result.get("raw_content", ""),
|
|
134
|
+
)
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Build metadata
|
|
138
|
+
metadata: dict[str, Any] = {
|
|
139
|
+
"finish_reason": "stop",
|
|
140
|
+
"search_depth": search_depth,
|
|
141
|
+
"topic": topic,
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
if data.get("images"):
|
|
145
|
+
metadata["images"] = data["images"]
|
|
146
|
+
if data.get("response_time"):
|
|
147
|
+
metadata["response_time"] = data["response_time"]
|
|
148
|
+
|
|
149
|
+
response_obj = WebSearchResponse(
|
|
150
|
+
query=query,
|
|
151
|
+
answer=answer,
|
|
152
|
+
provider="tavily",
|
|
153
|
+
timestamp=datetime.now().isoformat(),
|
|
154
|
+
model=f"tavily-{search_depth}",
|
|
155
|
+
citations=citations,
|
|
156
|
+
search_results=search_results,
|
|
157
|
+
usage={}, # Tavily doesn't provide token usage
|
|
158
|
+
metadata=metadata,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
return response_obj
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Web Search Types - Shared dataclasses and type definitions
|
|
3
|
+
|
|
4
|
+
This module defines the standardized types used across all search providers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class Citation:
|
|
14
|
+
"""Standardized citation from search results"""
|
|
15
|
+
|
|
16
|
+
id: int
|
|
17
|
+
reference: str # e.g., "[1]"
|
|
18
|
+
url: str
|
|
19
|
+
title: str = ""
|
|
20
|
+
snippet: str = ""
|
|
21
|
+
date: str = ""
|
|
22
|
+
source: str = ""
|
|
23
|
+
content: str = "" # Full content if available
|
|
24
|
+
# Additional fields for backward compatibility with legacy format
|
|
25
|
+
type: str = "web" # Citation type (web, pdf, etc.)
|
|
26
|
+
icon: str = "" # Source icon URL
|
|
27
|
+
website: str = "" # Website name
|
|
28
|
+
web_anchor: str = "" # Web anchor text
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class SearchResult:
|
|
33
|
+
"""Individual search result item"""
|
|
34
|
+
|
|
35
|
+
title: str
|
|
36
|
+
url: str
|
|
37
|
+
snippet: str
|
|
38
|
+
date: str = ""
|
|
39
|
+
source: str = ""
|
|
40
|
+
content: str = "" # Full content if available (e.g., from Jina)
|
|
41
|
+
score: float = 0.0 # Relevance score if available
|
|
42
|
+
# Additional fields for rich results
|
|
43
|
+
sitelinks: list[dict[str, str]] = field(default_factory=list)
|
|
44
|
+
attributes: dict[str, Any] = field(default_factory=dict)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class WebSearchResponse:
|
|
49
|
+
"""Standardized response from any search provider"""
|
|
50
|
+
|
|
51
|
+
query: str
|
|
52
|
+
answer: str # LLM-generated answer or empty for raw SERP providers
|
|
53
|
+
provider: str
|
|
54
|
+
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
55
|
+
model: str = ""
|
|
56
|
+
citations: list[Citation] = field(default_factory=list)
|
|
57
|
+
search_results: list[SearchResult] = field(default_factory=list)
|
|
58
|
+
usage: dict[str, Any] = field(default_factory=dict)
|
|
59
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
60
|
+
|
|
61
|
+
def to_dict(self) -> dict[str, Any]:
|
|
62
|
+
"""Convert to dictionary (backward compatible format)"""
|
|
63
|
+
result = {
|
|
64
|
+
"timestamp": self.timestamp,
|
|
65
|
+
"query": self.query,
|
|
66
|
+
"model": self.model,
|
|
67
|
+
"provider": self.provider,
|
|
68
|
+
"answer": self.answer,
|
|
69
|
+
"response": {
|
|
70
|
+
"content": self.answer,
|
|
71
|
+
"role": "assistant",
|
|
72
|
+
"finish_reason": self.metadata.get("finish_reason", "stop"),
|
|
73
|
+
},
|
|
74
|
+
"usage": self.usage,
|
|
75
|
+
"citations": [
|
|
76
|
+
{
|
|
77
|
+
"id": c.id,
|
|
78
|
+
"reference": c.reference,
|
|
79
|
+
"url": c.url,
|
|
80
|
+
"title": c.title,
|
|
81
|
+
"snippet": c.snippet,
|
|
82
|
+
"date": c.date,
|
|
83
|
+
"source": c.source,
|
|
84
|
+
"content": c.content,
|
|
85
|
+
"type": c.type,
|
|
86
|
+
"icon": c.icon,
|
|
87
|
+
"website": c.website,
|
|
88
|
+
"web_anchor": c.web_anchor,
|
|
89
|
+
}
|
|
90
|
+
for c in self.citations
|
|
91
|
+
],
|
|
92
|
+
"search_results": [
|
|
93
|
+
{
|
|
94
|
+
"title": r.title,
|
|
95
|
+
"url": r.url,
|
|
96
|
+
"snippet": r.snippet,
|
|
97
|
+
"date": r.date,
|
|
98
|
+
"source": r.source,
|
|
99
|
+
"content": r.content,
|
|
100
|
+
"score": r.score,
|
|
101
|
+
"sitelinks": r.sitelinks,
|
|
102
|
+
"attributes": r.attributes,
|
|
103
|
+
}
|
|
104
|
+
for r in self.search_results
|
|
105
|
+
],
|
|
106
|
+
}
|
|
107
|
+
# Add any extra metadata that isn't already in the result
|
|
108
|
+
for key, value in self.metadata.items():
|
|
109
|
+
if key not in result and key != "finish_reason":
|
|
110
|
+
result[key] = value
|
|
111
|
+
return result
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
__all__ = ["Citation", "SearchResult", "WebSearchResponse"]
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Setup Service
|
|
3
|
+
=============
|
|
4
|
+
|
|
5
|
+
System setup and initialization for DeepTutor.
|
|
6
|
+
|
|
7
|
+
Port configuration is done via .env file:
|
|
8
|
+
BACKEND_PORT=8001 (default: 8001)
|
|
9
|
+
FRONTEND_PORT=3782 (default: 3782)
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
from src.services.setup import init_user_directories, get_backend_port
|
|
13
|
+
|
|
14
|
+
# Initialize user directories
|
|
15
|
+
init_user_directories()
|
|
16
|
+
|
|
17
|
+
# Get server ports (from .env)
|
|
18
|
+
backend_port = get_backend_port()
|
|
19
|
+
frontend_port = get_frontend_port()
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from .init import (
|
|
23
|
+
get_backend_port,
|
|
24
|
+
get_frontend_port,
|
|
25
|
+
get_ports,
|
|
26
|
+
init_user_directories,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"init_user_directories",
|
|
31
|
+
"get_backend_port",
|
|
32
|
+
"get_frontend_port",
|
|
33
|
+
"get_ports",
|
|
34
|
+
]
|