realtimex-deeptutor 0.5.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. realtimex_deeptutor/__init__.py +67 -0
  2. realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
  3. realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
  4. realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
  5. realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
  6. realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
  7. realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
  8. src/__init__.py +40 -0
  9. src/agents/__init__.py +24 -0
  10. src/agents/base_agent.py +657 -0
  11. src/agents/chat/__init__.py +24 -0
  12. src/agents/chat/chat_agent.py +435 -0
  13. src/agents/chat/prompts/en/chat_agent.yaml +35 -0
  14. src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
  15. src/agents/chat/session_manager.py +311 -0
  16. src/agents/co_writer/__init__.py +0 -0
  17. src/agents/co_writer/edit_agent.py +260 -0
  18. src/agents/co_writer/narrator_agent.py +423 -0
  19. src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
  20. src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
  21. src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
  22. src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
  23. src/agents/guide/__init__.py +16 -0
  24. src/agents/guide/agents/__init__.py +11 -0
  25. src/agents/guide/agents/chat_agent.py +104 -0
  26. src/agents/guide/agents/interactive_agent.py +223 -0
  27. src/agents/guide/agents/locate_agent.py +149 -0
  28. src/agents/guide/agents/summary_agent.py +150 -0
  29. src/agents/guide/guide_manager.py +500 -0
  30. src/agents/guide/prompts/en/chat_agent.yaml +41 -0
  31. src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
  32. src/agents/guide/prompts/en/locate_agent.yaml +68 -0
  33. src/agents/guide/prompts/en/summary_agent.yaml +157 -0
  34. src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
  35. src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
  36. src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
  37. src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
  38. src/agents/ideagen/__init__.py +12 -0
  39. src/agents/ideagen/idea_generation_workflow.py +426 -0
  40. src/agents/ideagen/material_organizer_agent.py +173 -0
  41. src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
  42. src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
  43. src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
  44. src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
  45. src/agents/question/__init__.py +24 -0
  46. src/agents/question/agents/__init__.py +18 -0
  47. src/agents/question/agents/generate_agent.py +381 -0
  48. src/agents/question/agents/relevance_analyzer.py +207 -0
  49. src/agents/question/agents/retrieve_agent.py +239 -0
  50. src/agents/question/coordinator.py +718 -0
  51. src/agents/question/example.py +109 -0
  52. src/agents/question/prompts/en/coordinator.yaml +75 -0
  53. src/agents/question/prompts/en/generate_agent.yaml +77 -0
  54. src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
  55. src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
  56. src/agents/question/prompts/zh/coordinator.yaml +75 -0
  57. src/agents/question/prompts/zh/generate_agent.yaml +77 -0
  58. src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
  59. src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
  60. src/agents/research/agents/__init__.py +23 -0
  61. src/agents/research/agents/decompose_agent.py +507 -0
  62. src/agents/research/agents/manager_agent.py +228 -0
  63. src/agents/research/agents/note_agent.py +180 -0
  64. src/agents/research/agents/rephrase_agent.py +263 -0
  65. src/agents/research/agents/reporting_agent.py +1333 -0
  66. src/agents/research/agents/research_agent.py +714 -0
  67. src/agents/research/data_structures.py +451 -0
  68. src/agents/research/main.py +188 -0
  69. src/agents/research/prompts/en/decompose_agent.yaml +89 -0
  70. src/agents/research/prompts/en/manager_agent.yaml +24 -0
  71. src/agents/research/prompts/en/note_agent.yaml +121 -0
  72. src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
  73. src/agents/research/prompts/en/reporting_agent.yaml +380 -0
  74. src/agents/research/prompts/en/research_agent.yaml +173 -0
  75. src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
  76. src/agents/research/prompts/zh/manager_agent.yaml +24 -0
  77. src/agents/research/prompts/zh/note_agent.yaml +121 -0
  78. src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
  79. src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
  80. src/agents/research/prompts/zh/research_agent.yaml +173 -0
  81. src/agents/research/research_pipeline.py +1309 -0
  82. src/agents/research/utils/__init__.py +60 -0
  83. src/agents/research/utils/citation_manager.py +799 -0
  84. src/agents/research/utils/json_utils.py +98 -0
  85. src/agents/research/utils/token_tracker.py +297 -0
  86. src/agents/solve/__init__.py +80 -0
  87. src/agents/solve/analysis_loop/__init__.py +14 -0
  88. src/agents/solve/analysis_loop/investigate_agent.py +414 -0
  89. src/agents/solve/analysis_loop/note_agent.py +190 -0
  90. src/agents/solve/main_solver.py +862 -0
  91. src/agents/solve/memory/__init__.py +34 -0
  92. src/agents/solve/memory/citation_memory.py +353 -0
  93. src/agents/solve/memory/investigate_memory.py +226 -0
  94. src/agents/solve/memory/solve_memory.py +340 -0
  95. src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
  96. src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
  97. src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
  98. src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
  99. src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
  100. src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
  101. src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
  102. src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
  103. src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
  104. src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
  105. src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
  106. src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
  107. src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
  108. src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
  109. src/agents/solve/solve_loop/__init__.py +22 -0
  110. src/agents/solve/solve_loop/citation_manager.py +74 -0
  111. src/agents/solve/solve_loop/manager_agent.py +274 -0
  112. src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
  113. src/agents/solve/solve_loop/response_agent.py +301 -0
  114. src/agents/solve/solve_loop/solve_agent.py +325 -0
  115. src/agents/solve/solve_loop/tool_agent.py +470 -0
  116. src/agents/solve/utils/__init__.py +64 -0
  117. src/agents/solve/utils/config_validator.py +313 -0
  118. src/agents/solve/utils/display_manager.py +223 -0
  119. src/agents/solve/utils/error_handler.py +363 -0
  120. src/agents/solve/utils/json_utils.py +98 -0
  121. src/agents/solve/utils/performance_monitor.py +407 -0
  122. src/agents/solve/utils/token_tracker.py +541 -0
  123. src/api/__init__.py +0 -0
  124. src/api/main.py +240 -0
  125. src/api/routers/__init__.py +1 -0
  126. src/api/routers/agent_config.py +69 -0
  127. src/api/routers/chat.py +296 -0
  128. src/api/routers/co_writer.py +337 -0
  129. src/api/routers/config.py +627 -0
  130. src/api/routers/dashboard.py +18 -0
  131. src/api/routers/guide.py +337 -0
  132. src/api/routers/ideagen.py +436 -0
  133. src/api/routers/knowledge.py +821 -0
  134. src/api/routers/notebook.py +247 -0
  135. src/api/routers/question.py +537 -0
  136. src/api/routers/research.py +394 -0
  137. src/api/routers/settings.py +164 -0
  138. src/api/routers/solve.py +305 -0
  139. src/api/routers/system.py +252 -0
  140. src/api/run_server.py +61 -0
  141. src/api/utils/history.py +172 -0
  142. src/api/utils/log_interceptor.py +21 -0
  143. src/api/utils/notebook_manager.py +415 -0
  144. src/api/utils/progress_broadcaster.py +72 -0
  145. src/api/utils/task_id_manager.py +100 -0
  146. src/config/__init__.py +0 -0
  147. src/config/accessors.py +18 -0
  148. src/config/constants.py +34 -0
  149. src/config/defaults.py +18 -0
  150. src/config/schema.py +38 -0
  151. src/config/settings.py +50 -0
  152. src/core/errors.py +62 -0
  153. src/knowledge/__init__.py +23 -0
  154. src/knowledge/add_documents.py +606 -0
  155. src/knowledge/config.py +65 -0
  156. src/knowledge/example_add_documents.py +236 -0
  157. src/knowledge/extract_numbered_items.py +1039 -0
  158. src/knowledge/initializer.py +621 -0
  159. src/knowledge/kb.py +22 -0
  160. src/knowledge/manager.py +782 -0
  161. src/knowledge/progress_tracker.py +182 -0
  162. src/knowledge/start_kb.py +535 -0
  163. src/logging/__init__.py +103 -0
  164. src/logging/adapters/__init__.py +17 -0
  165. src/logging/adapters/lightrag.py +184 -0
  166. src/logging/adapters/llamaindex.py +141 -0
  167. src/logging/config.py +80 -0
  168. src/logging/handlers/__init__.py +20 -0
  169. src/logging/handlers/console.py +75 -0
  170. src/logging/handlers/file.py +201 -0
  171. src/logging/handlers/websocket.py +127 -0
  172. src/logging/logger.py +709 -0
  173. src/logging/stats/__init__.py +16 -0
  174. src/logging/stats/llm_stats.py +179 -0
  175. src/services/__init__.py +56 -0
  176. src/services/config/__init__.py +61 -0
  177. src/services/config/knowledge_base_config.py +210 -0
  178. src/services/config/loader.py +260 -0
  179. src/services/config/unified_config.py +603 -0
  180. src/services/embedding/__init__.py +45 -0
  181. src/services/embedding/adapters/__init__.py +22 -0
  182. src/services/embedding/adapters/base.py +106 -0
  183. src/services/embedding/adapters/cohere.py +127 -0
  184. src/services/embedding/adapters/jina.py +99 -0
  185. src/services/embedding/adapters/ollama.py +116 -0
  186. src/services/embedding/adapters/openai_compatible.py +96 -0
  187. src/services/embedding/client.py +159 -0
  188. src/services/embedding/config.py +156 -0
  189. src/services/embedding/provider.py +119 -0
  190. src/services/llm/__init__.py +152 -0
  191. src/services/llm/capabilities.py +313 -0
  192. src/services/llm/client.py +302 -0
  193. src/services/llm/cloud_provider.py +530 -0
  194. src/services/llm/config.py +200 -0
  195. src/services/llm/error_mapping.py +103 -0
  196. src/services/llm/exceptions.py +152 -0
  197. src/services/llm/factory.py +450 -0
  198. src/services/llm/local_provider.py +347 -0
  199. src/services/llm/providers/anthropic.py +95 -0
  200. src/services/llm/providers/base_provider.py +93 -0
  201. src/services/llm/providers/open_ai.py +83 -0
  202. src/services/llm/registry.py +71 -0
  203. src/services/llm/telemetry.py +40 -0
  204. src/services/llm/types.py +27 -0
  205. src/services/llm/utils.py +333 -0
  206. src/services/prompt/__init__.py +25 -0
  207. src/services/prompt/manager.py +206 -0
  208. src/services/rag/__init__.py +64 -0
  209. src/services/rag/components/__init__.py +29 -0
  210. src/services/rag/components/base.py +59 -0
  211. src/services/rag/components/chunkers/__init__.py +18 -0
  212. src/services/rag/components/chunkers/base.py +34 -0
  213. src/services/rag/components/chunkers/fixed.py +71 -0
  214. src/services/rag/components/chunkers/numbered_item.py +94 -0
  215. src/services/rag/components/chunkers/semantic.py +97 -0
  216. src/services/rag/components/embedders/__init__.py +14 -0
  217. src/services/rag/components/embedders/base.py +32 -0
  218. src/services/rag/components/embedders/openai.py +63 -0
  219. src/services/rag/components/indexers/__init__.py +18 -0
  220. src/services/rag/components/indexers/base.py +35 -0
  221. src/services/rag/components/indexers/graph.py +172 -0
  222. src/services/rag/components/indexers/lightrag.py +156 -0
  223. src/services/rag/components/indexers/vector.py +146 -0
  224. src/services/rag/components/parsers/__init__.py +18 -0
  225. src/services/rag/components/parsers/base.py +35 -0
  226. src/services/rag/components/parsers/markdown.py +52 -0
  227. src/services/rag/components/parsers/pdf.py +115 -0
  228. src/services/rag/components/parsers/text.py +86 -0
  229. src/services/rag/components/retrievers/__init__.py +18 -0
  230. src/services/rag/components/retrievers/base.py +34 -0
  231. src/services/rag/components/retrievers/dense.py +200 -0
  232. src/services/rag/components/retrievers/hybrid.py +164 -0
  233. src/services/rag/components/retrievers/lightrag.py +169 -0
  234. src/services/rag/components/routing.py +286 -0
  235. src/services/rag/factory.py +234 -0
  236. src/services/rag/pipeline.py +215 -0
  237. src/services/rag/pipelines/__init__.py +32 -0
  238. src/services/rag/pipelines/academic.py +44 -0
  239. src/services/rag/pipelines/lightrag.py +43 -0
  240. src/services/rag/pipelines/llamaindex.py +313 -0
  241. src/services/rag/pipelines/raganything.py +384 -0
  242. src/services/rag/service.py +244 -0
  243. src/services/rag/types.py +73 -0
  244. src/services/search/__init__.py +284 -0
  245. src/services/search/base.py +87 -0
  246. src/services/search/consolidation.py +398 -0
  247. src/services/search/providers/__init__.py +128 -0
  248. src/services/search/providers/baidu.py +188 -0
  249. src/services/search/providers/exa.py +194 -0
  250. src/services/search/providers/jina.py +161 -0
  251. src/services/search/providers/perplexity.py +153 -0
  252. src/services/search/providers/serper.py +209 -0
  253. src/services/search/providers/tavily.py +161 -0
  254. src/services/search/types.py +114 -0
  255. src/services/setup/__init__.py +34 -0
  256. src/services/setup/init.py +285 -0
  257. src/services/tts/__init__.py +16 -0
  258. src/services/tts/config.py +99 -0
  259. src/tools/__init__.py +91 -0
  260. src/tools/code_executor.py +536 -0
  261. src/tools/paper_search_tool.py +171 -0
  262. src/tools/query_item_tool.py +310 -0
  263. src/tools/question/__init__.py +15 -0
  264. src/tools/question/exam_mimic.py +616 -0
  265. src/tools/question/pdf_parser.py +211 -0
  266. src/tools/question/question_extractor.py +397 -0
  267. src/tools/rag_tool.py +173 -0
  268. src/tools/tex_chunker.py +339 -0
  269. src/tools/tex_downloader.py +253 -0
  270. src/tools/web_search.py +71 -0
  271. src/utils/config_manager.py +206 -0
  272. src/utils/document_validator.py +168 -0
  273. src/utils/error_rate_tracker.py +111 -0
  274. src/utils/error_utils.py +82 -0
  275. src/utils/json_parser.py +110 -0
  276. src/utils/network/circuit_breaker.py +79 -0
@@ -0,0 +1,188 @@
1
+ """
2
+ Baidu AI Search Provider
3
+
4
+ API: https://qianfan.baidubce.com/v2/ai_search/chat/completions
5
+
6
+ Features:
7
+ - AI-powered search with ERNIE models
8
+ - Deep search mode for comprehensive results
9
+ - Corner markers for reference citations
10
+ - Follow-up query suggestions
11
+ - Recency filtering
12
+ """
13
+
14
+ from datetime import datetime
15
+ from typing import Any
16
+
17
+ import requests
18
+
19
+ from ..base import BaseSearchProvider
20
+ from ..types import Citation, SearchResult, WebSearchResponse
21
+ from . import register_provider
22
+
23
+
24
+ @register_provider("baidu")
25
+ class BaiduProvider(BaseSearchProvider):
26
+ """Baidu AI Search provider"""
27
+
28
+ display_name = "Baidu AI"
29
+ description = "百度AI搜索 with ERNIE models"
30
+ supports_answer = True
31
+ BASE_URL = "https://qianfan.baidubce.com/v2/ai_search/chat/completions"
32
+
33
+ def search(
34
+ self,
35
+ query: str,
36
+ model: str = "ernie-4.5-turbo-32k",
37
+ search_source: str = "baidu_search_v2",
38
+ enable_deep_search: bool = False,
39
+ enable_corner_markers: bool = True,
40
+ enable_followup_queries: bool = False,
41
+ temperature: float = 0.11,
42
+ top_p: float = 0.55,
43
+ search_mode: str = "auto",
44
+ search_recency_filter: str | None = None,
45
+ instruction: str = "",
46
+ timeout: int = 120,
47
+ **kwargs: Any,
48
+ ) -> WebSearchResponse:
49
+ """
50
+ Perform intelligent search using Baidu AI Search API.
51
+
52
+ Args:
53
+ query: Search query.
54
+ model: Model to use for generation (default: ernie-4.5-turbo-32k).
55
+ search_source: Search engine version (baidu_search_v1 or baidu_search_v2).
56
+ enable_deep_search: Enable deep search for more comprehensive results.
57
+ enable_corner_markers: Enable corner markers for reference citations.
58
+ enable_followup_queries: Enable follow-up query suggestions.
59
+ temperature: Model sampling temperature (0, 1].
60
+ top_p: Model sampling top_p (0, 1].
61
+ search_mode: Search mode (auto, required, disabled).
62
+ search_recency_filter: Filter by recency (week, month, semiyear, year).
63
+ instruction: System instruction for response style.
64
+ timeout: Request timeout in seconds.
65
+ **kwargs: Additional options.
66
+
67
+ Returns:
68
+ WebSearchResponse: Standardized search response.
69
+ """
70
+ self.logger.debug(f"Calling Baidu API with model={model}, deep_search={enable_deep_search}")
71
+ headers = {
72
+ "Content-Type": "application/json",
73
+ "Authorization": (
74
+ f"Bearer {self.api_key}" if not self.api_key.startswith("Bearer ") else self.api_key
75
+ ),
76
+ }
77
+
78
+ payload = {
79
+ "messages": [{"role": "user", "content": query}],
80
+ "model": model,
81
+ "search_source": search_source,
82
+ "stream": False,
83
+ "enable_deep_search": enable_deep_search,
84
+ "enable_corner_markers": enable_corner_markers,
85
+ "enable_followup_queries": enable_followup_queries,
86
+ "temperature": temperature,
87
+ "top_p": top_p,
88
+ "search_mode": search_mode,
89
+ }
90
+
91
+ if search_recency_filter:
92
+ payload["search_recency_filter"] = search_recency_filter
93
+
94
+ if instruction:
95
+ payload["instruction"] = instruction
96
+
97
+ response = requests.post(self.BASE_URL, headers=headers, json=payload, timeout=timeout)
98
+
99
+ if response.status_code != 200:
100
+ try:
101
+ error_data = response.json() if response.text else {}
102
+ except Exception:
103
+ error_data = {}
104
+ raise Exception(
105
+ f"Baidu AI Search API error: {response.status_code} - "
106
+ f"{error_data.get('message', response.text)}"
107
+ )
108
+
109
+ try:
110
+ data = response.json()
111
+ except Exception as e:
112
+ raise Exception(f"Failed to parse Baidu API response: {e}")
113
+
114
+ # Extract answer from response
115
+ answer = ""
116
+ finish_reason = ""
117
+ if data.get("choices"):
118
+ choice = data["choices"][0]
119
+ if choice.get("message"):
120
+ answer = choice["message"].get("content", "")
121
+ finish_reason = choice.get("finish_reason", "")
122
+
123
+ # Extract usage information
124
+ usage_info: dict[str, Any] = {}
125
+ if data.get("usage"):
126
+ usage = data["usage"]
127
+ usage_info = {
128
+ "prompt_tokens": usage.get("prompt_tokens", 0),
129
+ "completion_tokens": usage.get("completion_tokens", 0),
130
+ "total_tokens": usage.get("total_tokens", 0),
131
+ }
132
+
133
+ # Extract references/citations
134
+ citations: list[Citation] = []
135
+ search_results: list[SearchResult] = []
136
+
137
+ if data.get("references"):
138
+ for i, ref in enumerate(data["references"], 1):
139
+ citations.append(
140
+ Citation(
141
+ id=ref.get("id", i),
142
+ reference=f"[{ref.get('id', i)}]",
143
+ url=ref.get("url", ""),
144
+ title=ref.get("title", ""),
145
+ snippet=ref.get("content", ""),
146
+ date=ref.get("date", ""),
147
+ source=ref.get("web_anchor", ""),
148
+ type=ref.get("type", "web"),
149
+ icon=ref.get("icon", ""),
150
+ website=ref.get("website", ""),
151
+ web_anchor=ref.get("web_anchor", ""),
152
+ )
153
+ )
154
+
155
+ search_results.append(
156
+ SearchResult(
157
+ title=ref.get("title", ""),
158
+ url=ref.get("url", ""),
159
+ snippet=ref.get("content", ""),
160
+ date=ref.get("date", ""),
161
+ source=ref.get("web_anchor", ""),
162
+ )
163
+ )
164
+
165
+ # Build metadata
166
+ metadata: dict[str, Any] = {
167
+ "finish_reason": finish_reason,
168
+ "is_safe": data.get("is_safe", True),
169
+ "request_id": data.get("request_id", ""),
170
+ }
171
+
172
+ # Add follow-up queries if available
173
+ if data.get("followup_queries"):
174
+ metadata["followup_queries"] = data["followup_queries"]
175
+
176
+ response_obj = WebSearchResponse(
177
+ query=query,
178
+ answer=answer,
179
+ provider="baidu",
180
+ timestamp=datetime.now().isoformat(),
181
+ model=model,
182
+ citations=citations,
183
+ search_results=search_results,
184
+ usage=usage_info,
185
+ metadata=metadata,
186
+ )
187
+
188
+ return response_obj
@@ -0,0 +1,194 @@
1
+ """
2
+ Exa Neural Search Provider
3
+
4
+ API Docs: https://exa.ai/docs/reference/search
5
+ Endpoint: https://api.exa.ai/search
6
+
7
+ Features:
8
+ - Embeddings-based neural search (finds semantically similar content)
9
+ - Multiple search types: auto, neural, keyword
10
+ - Category filtering: research paper, news, company, people, github, tweet, pdf
11
+ - Date filtering (published date and crawl date)
12
+ - Domain include/exclude lists
13
+ - Full text extraction with highlights and summaries
14
+ - Cost tracking in response
15
+
16
+ Pricing:
17
+ - Neural search (1-25 results): $0.005/request
18
+ - Neural search (26-100 results): $0.025/request
19
+ - Content text/highlight/summary: $0.001/page
20
+ """
21
+
22
+ from datetime import datetime
23
+ from typing import Any
24
+
25
+ import requests
26
+
27
+ from ..base import BaseSearchProvider
28
+ from ..types import Citation, SearchResult, WebSearchResponse
29
+ from . import register_provider
30
+
31
+
32
+ @register_provider("exa")
33
+ class ExaProvider(BaseSearchProvider):
34
+ """Exa neural/embeddings-based search provider"""
35
+
36
+ display_name = "Exa"
37
+ description = "Neural/embeddings search"
38
+ supports_answer = True # Provides summaries and context
39
+ BASE_URL = "https://api.exa.ai/search"
40
+
41
+ def search(
42
+ self,
43
+ query: str,
44
+ search_type: str = "auto", # auto, neural, keyword
45
+ num_results: int = 10,
46
+ include_text: bool = True,
47
+ include_highlights: bool = True,
48
+ include_summary: bool = True,
49
+ max_characters: int | None = None,
50
+ category: str | None = None, # research paper, news, company, etc.
51
+ include_domains: list[str] | None = None,
52
+ exclude_domains: list[str] | None = None,
53
+ start_published_date: str | None = None, # ISO format
54
+ end_published_date: str | None = None,
55
+ timeout: int = 60,
56
+ **kwargs: Any,
57
+ ) -> WebSearchResponse:
58
+ """
59
+ Perform neural search using Exa API.
60
+
61
+ Args:
62
+ query: Search query.
63
+ search_type: Search type - "auto", "neural", or "keyword".
64
+ num_results: Number of results to return.
65
+ include_text: Include full text content.
66
+ include_highlights: Include relevant highlights.
67
+ include_summary: Include AI-generated summaries.
68
+ max_characters: Maximum characters per result.
69
+ category: Filter by category.
70
+ include_domains: List of domains to include.
71
+ exclude_domains: List of domains to exclude.
72
+ start_published_date: Filter by start date (ISO format).
73
+ end_published_date: Filter by end date (ISO format).
74
+ timeout: Request timeout in seconds.
75
+ **kwargs: Additional options.
76
+
77
+ Returns:
78
+ WebSearchResponse: Standardized search response.
79
+ """
80
+ self.logger.debug(f"Calling Exa API type={search_type}, num_results={num_results}")
81
+ headers = {
82
+ "Content-Type": "application/json",
83
+ "x-api-key": self.api_key,
84
+ }
85
+
86
+ # Build contents configuration
87
+ contents: dict[str, Any] = {}
88
+ if include_text:
89
+ contents["text"] = {"maxCharacters": max_characters} if max_characters else True
90
+ if include_highlights:
91
+ contents["highlights"] = True
92
+ if include_summary:
93
+ contents["summary"] = True
94
+
95
+ payload: dict[str, Any] = {
96
+ "query": query,
97
+ "type": search_type,
98
+ "numResults": num_results,
99
+ "contents": contents,
100
+ }
101
+
102
+ if category:
103
+ payload["category"] = category
104
+ if include_domains:
105
+ payload["includeDomains"] = include_domains
106
+ if exclude_domains:
107
+ payload["excludeDomains"] = exclude_domains
108
+ if start_published_date:
109
+ payload["startPublishedDate"] = start_published_date
110
+ if end_published_date:
111
+ payload["endPublishedDate"] = end_published_date
112
+
113
+ response = requests.post(self.BASE_URL, headers=headers, json=payload, timeout=timeout)
114
+
115
+ if response.status_code != 200:
116
+ try:
117
+ error_data = response.json() if response.text else {}
118
+ except Exception:
119
+ error_data = {}
120
+ self.logger.error(f"Exa API error: {response.status_code}")
121
+ raise Exception(
122
+ f"Exa API error: {response.status_code} - {error_data.get('error', response.text)}"
123
+ )
124
+
125
+ try:
126
+ data = response.json()
127
+ except Exception as e:
128
+ raise Exception(f"Failed to parse Exa API response: {e}")
129
+ self.logger.debug(f"Exa returned {len(data.get('results', []))} results")
130
+
131
+ # Build answer from summaries
132
+ summaries = []
133
+ citations: list[Citation] = []
134
+ search_results: list[SearchResult] = []
135
+
136
+ for i, result in enumerate(data.get("results", []), 1):
137
+ # Extract summary for answer
138
+ summary = result.get("summary", "")
139
+ if summary:
140
+ summaries.append(f"[{i}] {summary}")
141
+
142
+ # Build search result
143
+ sr = SearchResult(
144
+ title=result.get("title", ""),
145
+ url=result.get("url", ""),
146
+ snippet=summary or result.get("text", "")[:500],
147
+ date=result.get("publishedDate", ""),
148
+ source=result.get("author", ""),
149
+ content=result.get("text", ""),
150
+ score=result.get("score", 0.0),
151
+ )
152
+ search_results.append(sr)
153
+
154
+ # Build citation
155
+ citations.append(
156
+ Citation(
157
+ id=i,
158
+ reference=f"[{i}]",
159
+ url=result.get("url", ""),
160
+ title=result.get("title", ""),
161
+ snippet=summary or result.get("text", "")[:500],
162
+ date=result.get("publishedDate", ""),
163
+ source=result.get("author", ""),
164
+ content=result.get("text", ""),
165
+ )
166
+ )
167
+
168
+ # Combine summaries as answer
169
+ answer = "\n\n".join(summaries) if summaries else ""
170
+
171
+ # Build metadata
172
+ metadata: dict[str, Any] = {
173
+ "finish_reason": "stop",
174
+ "search_type": search_type,
175
+ "autoprompt_string": data.get("autopromptString", ""),
176
+ }
177
+
178
+ # Add cost info if available
179
+ if data.get("costDollars"):
180
+ metadata["cost_dollars"] = data["costDollars"]
181
+
182
+ response_obj = WebSearchResponse(
183
+ query=query,
184
+ answer=answer,
185
+ provider="exa",
186
+ timestamp=datetime.now().isoformat(),
187
+ model=f"exa-{search_type}",
188
+ citations=citations,
189
+ search_results=search_results,
190
+ usage={},
191
+ metadata=metadata,
192
+ )
193
+
194
+ return response_obj
@@ -0,0 +1,161 @@
1
+ """
2
+ Jina Reader Search Provider
3
+
4
+ API Docs: https://jina.ai/reader
5
+ Search Endpoint: https://s.jina.ai/{query}
6
+ Reader Endpoint: https://r.jina.ai/{url}
7
+
8
+ Features:
9
+ - Web search with SERP results (s.jina.ai)
10
+ - URL to clean content conversion (r.jina.ai)
11
+ - Returns clean, LLM-friendly text
12
+ - Automatic content extraction
13
+ - Image captioning support
14
+ - PDF support
15
+ - Free tier: 10M tokens
16
+ """
17
+
18
+ from datetime import datetime
19
+ from typing import Any
20
+ import urllib.parse
21
+
22
+ import requests
23
+
24
+ from ..base import BaseSearchProvider
25
+ from ..types import Citation, SearchResult, WebSearchResponse
26
+ from . import register_provider
27
+
28
+
29
+ @register_provider("jina")
30
+ class JinaProvider(BaseSearchProvider):
31
+ """Jina Reader search provider"""
32
+
33
+ display_name = "Jina"
34
+ description = "SERP with content extraction (free tier)"
35
+ supports_answer = False # Returns raw content, not LLM answers
36
+ requires_api_key = False # Has free tier without API key
37
+ BASE_URL = "https://s.jina.ai"
38
+
39
+ def search(
40
+ self,
41
+ query: str,
42
+ enrich: bool = True,
43
+ timeout: int = 60,
44
+ **kwargs: Any,
45
+ ) -> WebSearchResponse:
46
+ """
47
+ Perform web search using Jina Reader API.
48
+
49
+ Args:
50
+ query: Search query.
51
+ enrich: If True, fetch full content + images. If False, basic SERP only.
52
+ timeout: Request timeout in seconds.
53
+ **kwargs: Additional options.
54
+
55
+ Returns:
56
+ WebSearchResponse: Standardized search response.
57
+ """
58
+ headers: dict[str, str] = {
59
+ "Accept": "application/json",
60
+ }
61
+
62
+ if self.api_key:
63
+ headers["Authorization"] = f"Bearer {self.api_key}"
64
+
65
+ if enrich:
66
+ # Enriched mode: full content + images
67
+ headers["X-Engine"] = "direct"
68
+ headers["X-Timeout"] = str(timeout)
69
+ headers["X-With-Images-Summary"] = "true"
70
+ else:
71
+ # Basic mode: SERP only, no content
72
+ headers["X-Respond-With"] = "no-content"
73
+
74
+ # URL encode the query
75
+ encoded_query = urllib.parse.quote(query)
76
+ url = f"{self.BASE_URL}/{encoded_query}"
77
+
78
+ response = requests.get(url, headers=headers, timeout=timeout)
79
+
80
+ if response.status_code != 200:
81
+ self.logger.error(f"Jina API error: {response.status_code}")
82
+ raise Exception(f"Jina API error: {response.status_code} - {response.text}")
83
+
84
+ data = response.json()
85
+ self.logger.debug(f"Jina returned {len(data.get('data', []))} results")
86
+
87
+ # Extract search results
88
+ citations: list[Citation] = []
89
+ search_results: list[SearchResult] = []
90
+
91
+ # Jina Search API returns results in 'data' array
92
+ # Basic fields: title, url, description, date, content, usage
93
+ # Enriched fields (enrich=true): images, publishedTime, metadata, external
94
+ for i, result in enumerate(data.get("data", []), 1):
95
+ # Build attributes dict for enriched fields
96
+ attributes: dict[str, Any] = {}
97
+ if result.get("images"):
98
+ attributes["images"] = result["images"]
99
+ if result.get("publishedTime"):
100
+ attributes["publishedTime"] = result["publishedTime"]
101
+ if result.get("metadata"):
102
+ attributes["metadata"] = result["metadata"]
103
+ if result.get("external"):
104
+ attributes["external"] = result["external"]
105
+
106
+ sr = SearchResult(
107
+ title=result.get("title", ""),
108
+ url=result.get("url", ""),
109
+ snippet=result.get("description", ""),
110
+ date=result.get("date", ""),
111
+ content=result.get("content", ""),
112
+ attributes=attributes,
113
+ )
114
+ search_results.append(sr)
115
+
116
+ citations.append(
117
+ Citation(
118
+ id=i,
119
+ reference=f"[{i}]",
120
+ url=result.get("url", ""),
121
+ title=result.get("title", ""),
122
+ snippet=result.get("description", ""),
123
+ date=result.get("date", ""),
124
+ content=result.get("content", ""),
125
+ )
126
+ )
127
+
128
+ # Build metadata
129
+ metadata: dict[str, Any] = {
130
+ "finish_reason": "stop",
131
+ "code": data.get("code", 200),
132
+ "status": data.get("status", 20000),
133
+ }
134
+
135
+ # Calculate total tokens - prefer meta.usage.tokens if available
136
+ total_tokens = 0
137
+ if data.get("meta", {}).get("usage", {}).get("tokens"):
138
+ total_tokens = data["meta"]["usage"]["tokens"]
139
+ else:
140
+ # Fallback: sum per-result tokens
141
+ for result in data.get("data", []):
142
+ if result.get("usage", {}).get("tokens"):
143
+ total_tokens += result["usage"]["tokens"]
144
+
145
+ usage: dict[str, Any] = {}
146
+ if total_tokens > 0:
147
+ usage["total_tokens"] = total_tokens
148
+
149
+ response_obj = WebSearchResponse(
150
+ query=query,
151
+ answer="", # Jina doesn't provide LLM answers
152
+ provider="jina",
153
+ timestamp=datetime.now().isoformat(),
154
+ model="jina-reader",
155
+ citations=citations,
156
+ search_results=search_results,
157
+ usage=usage,
158
+ metadata=metadata,
159
+ )
160
+
161
+ return response_obj
@@ -0,0 +1,153 @@
1
+ """
2
+ Perplexity AI Search Provider
3
+
4
+ API: Uses perplexity Python package
5
+ Model: sonar (default)
6
+
7
+ Features:
8
+ - AI-powered search with LLM-generated answers
9
+ - Automatic citation extraction
10
+ - Usage tracking with cost information
11
+ """
12
+
13
+ from datetime import datetime
14
+ from typing import Any
15
+
16
+ from ..base import BaseSearchProvider
17
+ from ..types import Citation, SearchResult, WebSearchResponse
18
+ from . import register_provider
19
+
20
+
21
+ @register_provider("perplexity")
22
+ class PerplexityProvider(BaseSearchProvider):
23
+ """Perplexity AI search provider"""
24
+
25
+ display_name = "Perplexity"
26
+ description = "AI-powered search with answers"
27
+ supports_answer = True
28
+ BASE_URL = "https://api.perplexity.ai" # Used by the perplexity package internally
29
+
30
+ def __init__(self, api_key: str | None = None, **kwargs: Any) -> None:
31
+ super().__init__(api_key, **kwargs)
32
+ self._client = None
33
+
34
+ @property
35
+ def client(self):
36
+ """Lazy-load the Perplexity client."""
37
+ if self._client is None:
38
+ try:
39
+ from perplexity import Perplexity
40
+ except ImportError as e:
41
+ raise ImportError(
42
+ "perplexityai module is not installed. To use Perplexity search, please install: "
43
+ "pip install perplexityai"
44
+ ) from e
45
+ self._client = Perplexity(api_key=self.api_key)
46
+ return self._client
47
+
48
+ def search(
49
+ self,
50
+ query: str,
51
+ model: str = "sonar",
52
+ system_prompt: str = "You are a helpful AI assistant. Provide detailed and accurate answers based on web search results.",
53
+ **kwargs: Any,
54
+ ) -> WebSearchResponse:
55
+ """
56
+ Perform search using Perplexity API.
57
+
58
+ Args:
59
+ query: Search query.
60
+ model: Model to use (default: sonar).
61
+ system_prompt: System prompt for the model.
62
+ **kwargs: Additional options.
63
+
64
+ Returns:
65
+ WebSearchResponse: Standardized search response.
66
+ """
67
+ self.logger.debug(f"Calling Perplexity API with model={model}")
68
+ completion = self.client.chat.completions.create(
69
+ model=model,
70
+ messages=[
71
+ {"role": "system", "content": system_prompt},
72
+ {"role": "user", "content": query},
73
+ ],
74
+ )
75
+
76
+ if not completion.choices or len(completion.choices) == 0:
77
+ raise ValueError("Perplexity API returned no choices")
78
+
79
+ answer = completion.choices[0].message.content
80
+
81
+ # Build usage info with safe attribute access
82
+ usage_info: dict[str, Any] = {}
83
+ if hasattr(completion, "usage") and completion.usage is not None:
84
+ usage = completion.usage
85
+ usage_info = {
86
+ "prompt_tokens": getattr(usage, "prompt_tokens", 0),
87
+ "completion_tokens": getattr(usage, "completion_tokens", 0),
88
+ "total_tokens": getattr(usage, "total_tokens", 0),
89
+ }
90
+ if hasattr(usage, "cost") and usage.cost is not None:
91
+ cost = usage.cost
92
+ usage_info["cost"] = {
93
+ "total_cost": getattr(cost, "total_cost", 0),
94
+ "input_tokens_cost": getattr(cost, "input_tokens_cost", 0),
95
+ "output_tokens_cost": getattr(cost, "output_tokens_cost", 0),
96
+ }
97
+
98
+ # Build search results list
99
+ search_results: list[SearchResult] = []
100
+ if hasattr(completion, "search_results") and completion.search_results:
101
+ for search_item in completion.search_results:
102
+ search_results.append(
103
+ SearchResult(
104
+ title=getattr(search_item, "title", "") or "",
105
+ url=getattr(search_item, "url", "") or "",
106
+ snippet=getattr(search_item, "snippet", "") or "",
107
+ date=getattr(search_item, "date", "") or "",
108
+ source=str(getattr(search_item, "source", ""))
109
+ if getattr(search_item, "source", None)
110
+ else "",
111
+ )
112
+ )
113
+
114
+ # Build citations list
115
+ citations: list[Citation] = []
116
+ if hasattr(completion, "citations") and completion.citations:
117
+ for i, citation_url in enumerate(completion.citations, 1):
118
+ # Try to find matching search result for more info
119
+ title = ""
120
+ snippet = ""
121
+ for sr in search_results:
122
+ if sr.url == citation_url:
123
+ title = sr.title
124
+ snippet = sr.snippet
125
+ break
126
+ citations.append(
127
+ Citation(
128
+ id=i,
129
+ reference=f"[{i}]",
130
+ url=citation_url,
131
+ title=title,
132
+ snippet=snippet,
133
+ )
134
+ )
135
+
136
+ # Ensure answer is a string
137
+ answer_str = str(answer) if answer else ""
138
+
139
+ response = WebSearchResponse(
140
+ query=query,
141
+ answer=answer_str,
142
+ provider="perplexity",
143
+ timestamp=datetime.now().isoformat(),
144
+ model=completion.model,
145
+ citations=citations,
146
+ search_results=search_results,
147
+ usage=usage_info,
148
+ metadata={
149
+ "finish_reason": completion.choices[0].finish_reason,
150
+ },
151
+ )
152
+
153
+ return response