realtimex-deeptutor 0.5.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. realtimex_deeptutor/__init__.py +67 -0
  2. realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
  3. realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
  4. realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
  5. realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
  6. realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
  7. realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
  8. src/__init__.py +40 -0
  9. src/agents/__init__.py +24 -0
  10. src/agents/base_agent.py +657 -0
  11. src/agents/chat/__init__.py +24 -0
  12. src/agents/chat/chat_agent.py +435 -0
  13. src/agents/chat/prompts/en/chat_agent.yaml +35 -0
  14. src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
  15. src/agents/chat/session_manager.py +311 -0
  16. src/agents/co_writer/__init__.py +0 -0
  17. src/agents/co_writer/edit_agent.py +260 -0
  18. src/agents/co_writer/narrator_agent.py +423 -0
  19. src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
  20. src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
  21. src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
  22. src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
  23. src/agents/guide/__init__.py +16 -0
  24. src/agents/guide/agents/__init__.py +11 -0
  25. src/agents/guide/agents/chat_agent.py +104 -0
  26. src/agents/guide/agents/interactive_agent.py +223 -0
  27. src/agents/guide/agents/locate_agent.py +149 -0
  28. src/agents/guide/agents/summary_agent.py +150 -0
  29. src/agents/guide/guide_manager.py +500 -0
  30. src/agents/guide/prompts/en/chat_agent.yaml +41 -0
  31. src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
  32. src/agents/guide/prompts/en/locate_agent.yaml +68 -0
  33. src/agents/guide/prompts/en/summary_agent.yaml +157 -0
  34. src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
  35. src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
  36. src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
  37. src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
  38. src/agents/ideagen/__init__.py +12 -0
  39. src/agents/ideagen/idea_generation_workflow.py +426 -0
  40. src/agents/ideagen/material_organizer_agent.py +173 -0
  41. src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
  42. src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
  43. src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
  44. src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
  45. src/agents/question/__init__.py +24 -0
  46. src/agents/question/agents/__init__.py +18 -0
  47. src/agents/question/agents/generate_agent.py +381 -0
  48. src/agents/question/agents/relevance_analyzer.py +207 -0
  49. src/agents/question/agents/retrieve_agent.py +239 -0
  50. src/agents/question/coordinator.py +718 -0
  51. src/agents/question/example.py +109 -0
  52. src/agents/question/prompts/en/coordinator.yaml +75 -0
  53. src/agents/question/prompts/en/generate_agent.yaml +77 -0
  54. src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
  55. src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
  56. src/agents/question/prompts/zh/coordinator.yaml +75 -0
  57. src/agents/question/prompts/zh/generate_agent.yaml +77 -0
  58. src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
  59. src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
  60. src/agents/research/agents/__init__.py +23 -0
  61. src/agents/research/agents/decompose_agent.py +507 -0
  62. src/agents/research/agents/manager_agent.py +228 -0
  63. src/agents/research/agents/note_agent.py +180 -0
  64. src/agents/research/agents/rephrase_agent.py +263 -0
  65. src/agents/research/agents/reporting_agent.py +1333 -0
  66. src/agents/research/agents/research_agent.py +714 -0
  67. src/agents/research/data_structures.py +451 -0
  68. src/agents/research/main.py +188 -0
  69. src/agents/research/prompts/en/decompose_agent.yaml +89 -0
  70. src/agents/research/prompts/en/manager_agent.yaml +24 -0
  71. src/agents/research/prompts/en/note_agent.yaml +121 -0
  72. src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
  73. src/agents/research/prompts/en/reporting_agent.yaml +380 -0
  74. src/agents/research/prompts/en/research_agent.yaml +173 -0
  75. src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
  76. src/agents/research/prompts/zh/manager_agent.yaml +24 -0
  77. src/agents/research/prompts/zh/note_agent.yaml +121 -0
  78. src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
  79. src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
  80. src/agents/research/prompts/zh/research_agent.yaml +173 -0
  81. src/agents/research/research_pipeline.py +1309 -0
  82. src/agents/research/utils/__init__.py +60 -0
  83. src/agents/research/utils/citation_manager.py +799 -0
  84. src/agents/research/utils/json_utils.py +98 -0
  85. src/agents/research/utils/token_tracker.py +297 -0
  86. src/agents/solve/__init__.py +80 -0
  87. src/agents/solve/analysis_loop/__init__.py +14 -0
  88. src/agents/solve/analysis_loop/investigate_agent.py +414 -0
  89. src/agents/solve/analysis_loop/note_agent.py +190 -0
  90. src/agents/solve/main_solver.py +862 -0
  91. src/agents/solve/memory/__init__.py +34 -0
  92. src/agents/solve/memory/citation_memory.py +353 -0
  93. src/agents/solve/memory/investigate_memory.py +226 -0
  94. src/agents/solve/memory/solve_memory.py +340 -0
  95. src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
  96. src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
  97. src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
  98. src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
  99. src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
  100. src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
  101. src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
  102. src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
  103. src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
  104. src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
  105. src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
  106. src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
  107. src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
  108. src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
  109. src/agents/solve/solve_loop/__init__.py +22 -0
  110. src/agents/solve/solve_loop/citation_manager.py +74 -0
  111. src/agents/solve/solve_loop/manager_agent.py +274 -0
  112. src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
  113. src/agents/solve/solve_loop/response_agent.py +301 -0
  114. src/agents/solve/solve_loop/solve_agent.py +325 -0
  115. src/agents/solve/solve_loop/tool_agent.py +470 -0
  116. src/agents/solve/utils/__init__.py +64 -0
  117. src/agents/solve/utils/config_validator.py +313 -0
  118. src/agents/solve/utils/display_manager.py +223 -0
  119. src/agents/solve/utils/error_handler.py +363 -0
  120. src/agents/solve/utils/json_utils.py +98 -0
  121. src/agents/solve/utils/performance_monitor.py +407 -0
  122. src/agents/solve/utils/token_tracker.py +541 -0
  123. src/api/__init__.py +0 -0
  124. src/api/main.py +240 -0
  125. src/api/routers/__init__.py +1 -0
  126. src/api/routers/agent_config.py +69 -0
  127. src/api/routers/chat.py +296 -0
  128. src/api/routers/co_writer.py +337 -0
  129. src/api/routers/config.py +627 -0
  130. src/api/routers/dashboard.py +18 -0
  131. src/api/routers/guide.py +337 -0
  132. src/api/routers/ideagen.py +436 -0
  133. src/api/routers/knowledge.py +821 -0
  134. src/api/routers/notebook.py +247 -0
  135. src/api/routers/question.py +537 -0
  136. src/api/routers/research.py +394 -0
  137. src/api/routers/settings.py +164 -0
  138. src/api/routers/solve.py +305 -0
  139. src/api/routers/system.py +252 -0
  140. src/api/run_server.py +61 -0
  141. src/api/utils/history.py +172 -0
  142. src/api/utils/log_interceptor.py +21 -0
  143. src/api/utils/notebook_manager.py +415 -0
  144. src/api/utils/progress_broadcaster.py +72 -0
  145. src/api/utils/task_id_manager.py +100 -0
  146. src/config/__init__.py +0 -0
  147. src/config/accessors.py +18 -0
  148. src/config/constants.py +34 -0
  149. src/config/defaults.py +18 -0
  150. src/config/schema.py +38 -0
  151. src/config/settings.py +50 -0
  152. src/core/errors.py +62 -0
  153. src/knowledge/__init__.py +23 -0
  154. src/knowledge/add_documents.py +606 -0
  155. src/knowledge/config.py +65 -0
  156. src/knowledge/example_add_documents.py +236 -0
  157. src/knowledge/extract_numbered_items.py +1039 -0
  158. src/knowledge/initializer.py +621 -0
  159. src/knowledge/kb.py +22 -0
  160. src/knowledge/manager.py +782 -0
  161. src/knowledge/progress_tracker.py +182 -0
  162. src/knowledge/start_kb.py +535 -0
  163. src/logging/__init__.py +103 -0
  164. src/logging/adapters/__init__.py +17 -0
  165. src/logging/adapters/lightrag.py +184 -0
  166. src/logging/adapters/llamaindex.py +141 -0
  167. src/logging/config.py +80 -0
  168. src/logging/handlers/__init__.py +20 -0
  169. src/logging/handlers/console.py +75 -0
  170. src/logging/handlers/file.py +201 -0
  171. src/logging/handlers/websocket.py +127 -0
  172. src/logging/logger.py +709 -0
  173. src/logging/stats/__init__.py +16 -0
  174. src/logging/stats/llm_stats.py +179 -0
  175. src/services/__init__.py +56 -0
  176. src/services/config/__init__.py +61 -0
  177. src/services/config/knowledge_base_config.py +210 -0
  178. src/services/config/loader.py +260 -0
  179. src/services/config/unified_config.py +603 -0
  180. src/services/embedding/__init__.py +45 -0
  181. src/services/embedding/adapters/__init__.py +22 -0
  182. src/services/embedding/adapters/base.py +106 -0
  183. src/services/embedding/adapters/cohere.py +127 -0
  184. src/services/embedding/adapters/jina.py +99 -0
  185. src/services/embedding/adapters/ollama.py +116 -0
  186. src/services/embedding/adapters/openai_compatible.py +96 -0
  187. src/services/embedding/client.py +159 -0
  188. src/services/embedding/config.py +156 -0
  189. src/services/embedding/provider.py +119 -0
  190. src/services/llm/__init__.py +152 -0
  191. src/services/llm/capabilities.py +313 -0
  192. src/services/llm/client.py +302 -0
  193. src/services/llm/cloud_provider.py +530 -0
  194. src/services/llm/config.py +200 -0
  195. src/services/llm/error_mapping.py +103 -0
  196. src/services/llm/exceptions.py +152 -0
  197. src/services/llm/factory.py +450 -0
  198. src/services/llm/local_provider.py +347 -0
  199. src/services/llm/providers/anthropic.py +95 -0
  200. src/services/llm/providers/base_provider.py +93 -0
  201. src/services/llm/providers/open_ai.py +83 -0
  202. src/services/llm/registry.py +71 -0
  203. src/services/llm/telemetry.py +40 -0
  204. src/services/llm/types.py +27 -0
  205. src/services/llm/utils.py +333 -0
  206. src/services/prompt/__init__.py +25 -0
  207. src/services/prompt/manager.py +206 -0
  208. src/services/rag/__init__.py +64 -0
  209. src/services/rag/components/__init__.py +29 -0
  210. src/services/rag/components/base.py +59 -0
  211. src/services/rag/components/chunkers/__init__.py +18 -0
  212. src/services/rag/components/chunkers/base.py +34 -0
  213. src/services/rag/components/chunkers/fixed.py +71 -0
  214. src/services/rag/components/chunkers/numbered_item.py +94 -0
  215. src/services/rag/components/chunkers/semantic.py +97 -0
  216. src/services/rag/components/embedders/__init__.py +14 -0
  217. src/services/rag/components/embedders/base.py +32 -0
  218. src/services/rag/components/embedders/openai.py +63 -0
  219. src/services/rag/components/indexers/__init__.py +18 -0
  220. src/services/rag/components/indexers/base.py +35 -0
  221. src/services/rag/components/indexers/graph.py +172 -0
  222. src/services/rag/components/indexers/lightrag.py +156 -0
  223. src/services/rag/components/indexers/vector.py +146 -0
  224. src/services/rag/components/parsers/__init__.py +18 -0
  225. src/services/rag/components/parsers/base.py +35 -0
  226. src/services/rag/components/parsers/markdown.py +52 -0
  227. src/services/rag/components/parsers/pdf.py +115 -0
  228. src/services/rag/components/parsers/text.py +86 -0
  229. src/services/rag/components/retrievers/__init__.py +18 -0
  230. src/services/rag/components/retrievers/base.py +34 -0
  231. src/services/rag/components/retrievers/dense.py +200 -0
  232. src/services/rag/components/retrievers/hybrid.py +164 -0
  233. src/services/rag/components/retrievers/lightrag.py +169 -0
  234. src/services/rag/components/routing.py +286 -0
  235. src/services/rag/factory.py +234 -0
  236. src/services/rag/pipeline.py +215 -0
  237. src/services/rag/pipelines/__init__.py +32 -0
  238. src/services/rag/pipelines/academic.py +44 -0
  239. src/services/rag/pipelines/lightrag.py +43 -0
  240. src/services/rag/pipelines/llamaindex.py +313 -0
  241. src/services/rag/pipelines/raganything.py +384 -0
  242. src/services/rag/service.py +244 -0
  243. src/services/rag/types.py +73 -0
  244. src/services/search/__init__.py +284 -0
  245. src/services/search/base.py +87 -0
  246. src/services/search/consolidation.py +398 -0
  247. src/services/search/providers/__init__.py +128 -0
  248. src/services/search/providers/baidu.py +188 -0
  249. src/services/search/providers/exa.py +194 -0
  250. src/services/search/providers/jina.py +161 -0
  251. src/services/search/providers/perplexity.py +153 -0
  252. src/services/search/providers/serper.py +209 -0
  253. src/services/search/providers/tavily.py +161 -0
  254. src/services/search/types.py +114 -0
  255. src/services/setup/__init__.py +34 -0
  256. src/services/setup/init.py +285 -0
  257. src/services/tts/__init__.py +16 -0
  258. src/services/tts/config.py +99 -0
  259. src/tools/__init__.py +91 -0
  260. src/tools/code_executor.py +536 -0
  261. src/tools/paper_search_tool.py +171 -0
  262. src/tools/query_item_tool.py +310 -0
  263. src/tools/question/__init__.py +15 -0
  264. src/tools/question/exam_mimic.py +616 -0
  265. src/tools/question/pdf_parser.py +211 -0
  266. src/tools/question/question_extractor.py +397 -0
  267. src/tools/rag_tool.py +173 -0
  268. src/tools/tex_chunker.py +339 -0
  269. src/tools/tex_downloader.py +253 -0
  270. src/tools/web_search.py +71 -0
  271. src/utils/config_manager.py +206 -0
  272. src/utils/document_validator.py +168 -0
  273. src/utils/error_rate_tracker.py +111 -0
  274. src/utils/error_utils.py +82 -0
  275. src/utils/json_parser.py +110 -0
  276. src/utils/network/circuit_breaker.py +79 -0
@@ -0,0 +1,209 @@
1
+ """
2
+ Serper Google SERP Provider
3
+
4
+ API: https://serper.dev
5
+ Endpoint: https://google.serper.dev/{mode}
6
+
7
+ Features:
8
+ - Real-time Google search results (1-2 seconds)
9
+ - Modes: search, scholar
10
+ - Knowledge graph extraction
11
+ - People Also Ask extraction
12
+ - Related searches
13
+ - Very cheap: $1/1000 queries at scale
14
+ """
15
+
16
+ from datetime import datetime
17
+ import json
18
+ from typing import Any
19
+
20
+ import requests
21
+
22
+ from ..base import BaseSearchProvider
23
+ from ..types import Citation, SearchResult, WebSearchResponse
24
+ from . import register_provider
25
+
26
+
27
+ class SerperAPIError(Exception):
28
+ """Serper API error"""
29
+
30
+ pass
31
+
32
+
33
+ @register_provider("serper")
34
+ class SerperProvider(BaseSearchProvider):
35
+ """Serper Google SERP provider"""
36
+
37
+ display_name = "Serper"
38
+ description = "Google SERP results"
39
+ supports_answer = False # Raw SERP results, no LLM answer
40
+ BASE_URL = "https://google.serper.dev"
41
+
42
+ def search(
43
+ self,
44
+ query: str,
45
+ mode: str = "search", # search, scholar
46
+ num: int = 10,
47
+ gl: str = "us", # Country code
48
+ hl: str = "en", # Language code
49
+ page: int = 1,
50
+ autocorrect: bool = True,
51
+ timeout: int = 30,
52
+ **kwargs: Any,
53
+ ) -> WebSearchResponse:
54
+ """
55
+ Perform Google SERP search using Serper API.
56
+
57
+ Args:
58
+ query: Search query.
59
+ mode: Search mode - "search" or "scholar".
60
+ num: Number of results (default 10, max 100).
61
+ gl: Country code (default "us").
62
+ hl: Language code (default "en").
63
+ page: Page number for pagination.
64
+ autocorrect: Enable autocorrect (default True).
65
+ timeout: Request timeout in seconds.
66
+ **kwargs: Additional options.
67
+
68
+ Returns:
69
+ WebSearchResponse: Standardized search response.
70
+ """
71
+ self.logger.debug(f"Calling Serper API mode={mode}, num={num}")
72
+ headers = {
73
+ "X-API-KEY": self.api_key,
74
+ "Content-Type": "application/json",
75
+ }
76
+
77
+ payload: dict[str, Any] = {
78
+ "q": query,
79
+ "num": num,
80
+ "gl": gl,
81
+ "hl": hl,
82
+ "page": page,
83
+ "autocorrect": autocorrect,
84
+ }
85
+
86
+ url = f"{self.BASE_URL}/{mode}"
87
+ response = requests.post(url, headers=headers, json=payload, timeout=timeout)
88
+
89
+ if response.status_code != 200:
90
+ try:
91
+ error_data = response.json()
92
+ except (json.JSONDecodeError, ValueError):
93
+ error_data = {"message": response.text}
94
+ self.logger.error(f"Serper API error: {response.status_code} - {error_data}")
95
+ raise SerperAPIError(
96
+ f"Serper API error: {response.status_code} - "
97
+ f"{error_data.get('message', response.text)}"
98
+ )
99
+
100
+ data = response.json()
101
+ self.logger.debug(f"Serper returned {len(data.get('organic', []))} results")
102
+
103
+ # Extract search results
104
+ citations: list[Citation] = []
105
+ search_results: list[SearchResult] = []
106
+
107
+ # Both search and scholar return results in "organic" key
108
+ results_key = "organic"
109
+
110
+ for i, result in enumerate(data.get(results_key, []), 1):
111
+ # Handle different result formats
112
+ title = result.get("title", "")
113
+ url_val = result.get("link", result.get("url", ""))
114
+ snippet = result.get("snippet", result.get("description", ""))
115
+ date = result.get("date", "")
116
+
117
+ # Extract sitelinks if available
118
+ sitelinks = []
119
+ if result.get("sitelinks"):
120
+ for sl in result["sitelinks"]:
121
+ sitelinks.append({"title": sl.get("title", ""), "link": sl.get("link", "")})
122
+
123
+ # Build attributes dict with scholar-specific fields
124
+ attributes: dict[str, Any] = result.get("attributes", {})
125
+
126
+ # Scholar mode: extract publication info, citations, PDF URL, year
127
+ if mode == "scholar":
128
+ # publicationInfo is a string like "A Vaswani, N Shazeer... - Advances in neural..., 2017"
129
+ if result.get("publicationInfo"):
130
+ attributes["publicationInfo"] = result["publicationInfo"]
131
+ # citedBy is a number
132
+ if result.get("citedBy") is not None:
133
+ attributes["citedBy"] = result["citedBy"]
134
+ # pdfUrl is a direct link to PDF
135
+ if result.get("pdfUrl"):
136
+ attributes["pdfUrl"] = result["pdfUrl"]
137
+ # year is a number
138
+ if result.get("year") is not None:
139
+ attributes["year"] = result["year"]
140
+ # paper ID
141
+ if result.get("id"):
142
+ attributes["paperId"] = result["id"]
143
+
144
+ sr = SearchResult(
145
+ title=title,
146
+ url=url_val,
147
+ snippet=snippet,
148
+ date=date,
149
+ source=result.get("source", ""),
150
+ sitelinks=sitelinks,
151
+ attributes=attributes,
152
+ )
153
+ search_results.append(sr)
154
+
155
+ citations.append(
156
+ Citation(
157
+ id=i,
158
+ reference=f"[{i}]",
159
+ url=url_val,
160
+ title=title,
161
+ snippet=snippet,
162
+ date=date,
163
+ source=result.get("source", ""),
164
+ )
165
+ )
166
+
167
+ # Build metadata with rich SERP data
168
+ metadata: dict[str, Any] = {
169
+ "finish_reason": "stop",
170
+ "mode": mode,
171
+ "searchParameters": data.get("searchParameters", {}),
172
+ }
173
+
174
+ # Include knowledge graph if available
175
+ if data.get("knowledgeGraph"):
176
+ metadata["knowledgeGraph"] = data["knowledgeGraph"]
177
+
178
+ # Include answer box if available
179
+ if data.get("answerBox"):
180
+ metadata["answerBox"] = data["answerBox"]
181
+
182
+ # Include People Also Ask
183
+ if data.get("peopleAlsoAsk"):
184
+ metadata["peopleAlsoAsk"] = data["peopleAlsoAsk"]
185
+
186
+ # Include related searches
187
+ if data.get("relatedSearches"):
188
+ metadata["relatedSearches"] = data["relatedSearches"]
189
+
190
+ # Build answer from answer box or knowledge graph if available
191
+ answer = ""
192
+ if data.get("answerBox"):
193
+ ab = data["answerBox"]
194
+ answer = ab.get("answer", ab.get("snippet", ""))
195
+ elif data.get("knowledgeGraph"):
196
+ kg = data["knowledgeGraph"]
197
+ answer = kg.get("description", "")
198
+
199
+ return WebSearchResponse(
200
+ query=query,
201
+ answer=answer,
202
+ provider="serper_scholar" if mode == "scholar" else "serper",
203
+ timestamp=datetime.now().isoformat(),
204
+ model=f"serper-{mode}",
205
+ citations=citations,
206
+ search_results=search_results,
207
+ usage={},
208
+ metadata=metadata,
209
+ )
@@ -0,0 +1,161 @@
1
+ """
2
+ Tavily Search Provider
3
+
4
+ API Docs: https://docs.tavily.com/documentation/api-reference/endpoint/search
5
+
6
+ Features:
7
+ - Research-focused search with relevance scoring
8
+ - Optional LLM-generated answers (include_answer=true)
9
+ - Full raw content extraction (include_raw_content=true)
10
+ - Topic filtering (general, news, finance)
11
+ - Time range filtering (day, week, month, year)
12
+ - Domain include/exclude lists
13
+ """
14
+
15
+ from datetime import datetime
16
+ import json
17
+ from typing import Any
18
+
19
+ import requests
20
+
21
+ from ..base import BaseSearchProvider
22
+ from ..types import Citation, SearchResult, WebSearchResponse
23
+ from . import register_provider
24
+
25
+
26
+ @register_provider("tavily")
27
+ class TavilyProvider(BaseSearchProvider):
28
+ """Tavily research-focused search provider"""
29
+
30
+ name = "tavily"
31
+ display_name = "Tavily"
32
+ description = "Research-focused search"
33
+ supports_answer = True
34
+ BASE_URL = "https://api.tavily.com/search"
35
+
36
+ def search(
37
+ self,
38
+ query: str,
39
+ search_depth: str = "basic", # basic, advanced
40
+ topic: str = "general", # general, news, finance
41
+ max_results: int = 10,
42
+ include_answer: bool = True, # Get LLM-generated answer
43
+ include_raw_content: bool = False, # Get full page content
44
+ include_images: bool = False,
45
+ days: int | None = None, # Time filter (1-365)
46
+ include_domains: list[str] | None = None,
47
+ exclude_domains: list[str] | None = None,
48
+ timeout: int = 60,
49
+ **kwargs: Any,
50
+ ) -> WebSearchResponse:
51
+ """
52
+ Perform research-focused search using Tavily API.
53
+
54
+ Args:
55
+ query: Search query.
56
+ search_depth: Search depth - "basic" (faster) or "advanced" (more thorough).
57
+ topic: Topic category - "general", "news", or "finance".
58
+ max_results: Maximum number of results (1-20).
59
+ include_answer: Include LLM-generated answer.
60
+ include_raw_content: Include full raw content of pages.
61
+ include_images: Include images in results.
62
+ days: Filter results to last N days (1-365).
63
+ include_domains: List of domains to include.
64
+ exclude_domains: List of domains to exclude.
65
+ timeout: Request timeout in seconds.
66
+ **kwargs: Additional options.
67
+
68
+ Returns:
69
+ WebSearchResponse: Standardized search response.
70
+ """
71
+ self.logger.debug(f"Calling Tavily API depth={search_depth}, max_results={max_results}")
72
+ payload: dict[str, Any] = {
73
+ "api_key": self.api_key,
74
+ "query": query,
75
+ "search_depth": search_depth,
76
+ "topic": topic,
77
+ "max_results": max_results,
78
+ "include_answer": include_answer,
79
+ "include_raw_content": include_raw_content,
80
+ "include_images": include_images,
81
+ }
82
+
83
+ if days is not None:
84
+ payload["days"] = days
85
+ if include_domains:
86
+ payload["include_domains"] = include_domains
87
+ if exclude_domains:
88
+ payload["exclude_domains"] = exclude_domains
89
+
90
+ response = requests.post(self.BASE_URL, json=payload, timeout=timeout)
91
+
92
+ if response.status_code != 200:
93
+ try:
94
+ error_data = response.json()
95
+ except (json.JSONDecodeError, ValueError):
96
+ error_data = {"error": response.text}
97
+ self.logger.error(f"Tavily API error: {response.status_code} - {error_data}")
98
+ raise Exception(
99
+ f"Tavily API error: {response.status_code} - "
100
+ f"{error_data.get('error', response.text)}"
101
+ )
102
+
103
+ data = response.json()
104
+ self.logger.debug(f"Tavily returned {len(data.get('results', []))} results")
105
+
106
+ # Extract answer
107
+ answer = data.get("answer", "")
108
+
109
+ # Extract search results
110
+ citations: list[Citation] = []
111
+ search_results: list[SearchResult] = []
112
+
113
+ for i, result in enumerate(data.get("results", []), 1):
114
+ sr = SearchResult(
115
+ title=result.get("title", ""),
116
+ url=result.get("url", ""),
117
+ snippet=result.get("content", ""),
118
+ date=result.get("published_date", ""),
119
+ source=result.get("source", ""),
120
+ content=result.get("raw_content", ""),
121
+ score=result.get("score", 0.0),
122
+ )
123
+ search_results.append(sr)
124
+
125
+ citations.append(
126
+ Citation(
127
+ id=i,
128
+ reference=f"[{i}]",
129
+ url=result.get("url", ""),
130
+ title=result.get("title", ""),
131
+ snippet=result.get("content", ""),
132
+ source=result.get("source", ""),
133
+ content=result.get("raw_content", ""),
134
+ )
135
+ )
136
+
137
+ # Build metadata
138
+ metadata: dict[str, Any] = {
139
+ "finish_reason": "stop",
140
+ "search_depth": search_depth,
141
+ "topic": topic,
142
+ }
143
+
144
+ if data.get("images"):
145
+ metadata["images"] = data["images"]
146
+ if data.get("response_time"):
147
+ metadata["response_time"] = data["response_time"]
148
+
149
+ response_obj = WebSearchResponse(
150
+ query=query,
151
+ answer=answer,
152
+ provider="tavily",
153
+ timestamp=datetime.now().isoformat(),
154
+ model=f"tavily-{search_depth}",
155
+ citations=citations,
156
+ search_results=search_results,
157
+ usage={}, # Tavily doesn't provide token usage
158
+ metadata=metadata,
159
+ )
160
+
161
+ return response_obj
@@ -0,0 +1,114 @@
1
+ """
2
+ Web Search Types - Shared dataclasses and type definitions
3
+
4
+ This module defines the standardized types used across all search providers.
5
+ """
6
+
7
+ from dataclasses import dataclass, field
8
+ from datetime import datetime
9
+ from typing import Any
10
+
11
+
12
+ @dataclass
13
+ class Citation:
14
+ """Standardized citation from search results"""
15
+
16
+ id: int
17
+ reference: str # e.g., "[1]"
18
+ url: str
19
+ title: str = ""
20
+ snippet: str = ""
21
+ date: str = ""
22
+ source: str = ""
23
+ content: str = "" # Full content if available
24
+ # Additional fields for backward compatibility with legacy format
25
+ type: str = "web" # Citation type (web, pdf, etc.)
26
+ icon: str = "" # Source icon URL
27
+ website: str = "" # Website name
28
+ web_anchor: str = "" # Web anchor text
29
+
30
+
31
+ @dataclass
32
+ class SearchResult:
33
+ """Individual search result item"""
34
+
35
+ title: str
36
+ url: str
37
+ snippet: str
38
+ date: str = ""
39
+ source: str = ""
40
+ content: str = "" # Full content if available (e.g., from Jina)
41
+ score: float = 0.0 # Relevance score if available
42
+ # Additional fields for rich results
43
+ sitelinks: list[dict[str, str]] = field(default_factory=list)
44
+ attributes: dict[str, Any] = field(default_factory=dict)
45
+
46
+
47
+ @dataclass
48
+ class WebSearchResponse:
49
+ """Standardized response from any search provider"""
50
+
51
+ query: str
52
+ answer: str # LLM-generated answer or empty for raw SERP providers
53
+ provider: str
54
+ timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
55
+ model: str = ""
56
+ citations: list[Citation] = field(default_factory=list)
57
+ search_results: list[SearchResult] = field(default_factory=list)
58
+ usage: dict[str, Any] = field(default_factory=dict)
59
+ metadata: dict[str, Any] = field(default_factory=dict)
60
+
61
+ def to_dict(self) -> dict[str, Any]:
62
+ """Convert to dictionary (backward compatible format)"""
63
+ result = {
64
+ "timestamp": self.timestamp,
65
+ "query": self.query,
66
+ "model": self.model,
67
+ "provider": self.provider,
68
+ "answer": self.answer,
69
+ "response": {
70
+ "content": self.answer,
71
+ "role": "assistant",
72
+ "finish_reason": self.metadata.get("finish_reason", "stop"),
73
+ },
74
+ "usage": self.usage,
75
+ "citations": [
76
+ {
77
+ "id": c.id,
78
+ "reference": c.reference,
79
+ "url": c.url,
80
+ "title": c.title,
81
+ "snippet": c.snippet,
82
+ "date": c.date,
83
+ "source": c.source,
84
+ "content": c.content,
85
+ "type": c.type,
86
+ "icon": c.icon,
87
+ "website": c.website,
88
+ "web_anchor": c.web_anchor,
89
+ }
90
+ for c in self.citations
91
+ ],
92
+ "search_results": [
93
+ {
94
+ "title": r.title,
95
+ "url": r.url,
96
+ "snippet": r.snippet,
97
+ "date": r.date,
98
+ "source": r.source,
99
+ "content": r.content,
100
+ "score": r.score,
101
+ "sitelinks": r.sitelinks,
102
+ "attributes": r.attributes,
103
+ }
104
+ for r in self.search_results
105
+ ],
106
+ }
107
+ # Add any extra metadata that isn't already in the result
108
+ for key, value in self.metadata.items():
109
+ if key not in result and key != "finish_reason":
110
+ result[key] = value
111
+ return result
112
+
113
+
114
+ __all__ = ["Citation", "SearchResult", "WebSearchResponse"]
@@ -0,0 +1,34 @@
1
+ """
2
+ Setup Service
3
+ =============
4
+
5
+ System setup and initialization for DeepTutor.
6
+
7
+ Port configuration is done via .env file:
8
+ BACKEND_PORT=8001 (default: 8001)
9
+ FRONTEND_PORT=3782 (default: 3782)
10
+
11
+ Usage:
12
+ from src.services.setup import init_user_directories, get_backend_port
13
+
14
+ # Initialize user directories
15
+ init_user_directories()
16
+
17
+ # Get server ports (from .env)
18
+ backend_port = get_backend_port()
19
+ frontend_port = get_frontend_port()
20
+ """
21
+
22
+ from .init import (
23
+ get_backend_port,
24
+ get_frontend_port,
25
+ get_ports,
26
+ init_user_directories,
27
+ )
28
+
29
+ __all__ = [
30
+ "init_user_directories",
31
+ "get_backend_port",
32
+ "get_frontend_port",
33
+ "get_ports",
34
+ ]