realtimex-deeptutor 0.5.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. realtimex_deeptutor/__init__.py +67 -0
  2. realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
  3. realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
  4. realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
  5. realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
  6. realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
  7. realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
  8. src/__init__.py +40 -0
  9. src/agents/__init__.py +24 -0
  10. src/agents/base_agent.py +657 -0
  11. src/agents/chat/__init__.py +24 -0
  12. src/agents/chat/chat_agent.py +435 -0
  13. src/agents/chat/prompts/en/chat_agent.yaml +35 -0
  14. src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
  15. src/agents/chat/session_manager.py +311 -0
  16. src/agents/co_writer/__init__.py +0 -0
  17. src/agents/co_writer/edit_agent.py +260 -0
  18. src/agents/co_writer/narrator_agent.py +423 -0
  19. src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
  20. src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
  21. src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
  22. src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
  23. src/agents/guide/__init__.py +16 -0
  24. src/agents/guide/agents/__init__.py +11 -0
  25. src/agents/guide/agents/chat_agent.py +104 -0
  26. src/agents/guide/agents/interactive_agent.py +223 -0
  27. src/agents/guide/agents/locate_agent.py +149 -0
  28. src/agents/guide/agents/summary_agent.py +150 -0
  29. src/agents/guide/guide_manager.py +500 -0
  30. src/agents/guide/prompts/en/chat_agent.yaml +41 -0
  31. src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
  32. src/agents/guide/prompts/en/locate_agent.yaml +68 -0
  33. src/agents/guide/prompts/en/summary_agent.yaml +157 -0
  34. src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
  35. src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
  36. src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
  37. src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
  38. src/agents/ideagen/__init__.py +12 -0
  39. src/agents/ideagen/idea_generation_workflow.py +426 -0
  40. src/agents/ideagen/material_organizer_agent.py +173 -0
  41. src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
  42. src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
  43. src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
  44. src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
  45. src/agents/question/__init__.py +24 -0
  46. src/agents/question/agents/__init__.py +18 -0
  47. src/agents/question/agents/generate_agent.py +381 -0
  48. src/agents/question/agents/relevance_analyzer.py +207 -0
  49. src/agents/question/agents/retrieve_agent.py +239 -0
  50. src/agents/question/coordinator.py +718 -0
  51. src/agents/question/example.py +109 -0
  52. src/agents/question/prompts/en/coordinator.yaml +75 -0
  53. src/agents/question/prompts/en/generate_agent.yaml +77 -0
  54. src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
  55. src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
  56. src/agents/question/prompts/zh/coordinator.yaml +75 -0
  57. src/agents/question/prompts/zh/generate_agent.yaml +77 -0
  58. src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
  59. src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
  60. src/agents/research/agents/__init__.py +23 -0
  61. src/agents/research/agents/decompose_agent.py +507 -0
  62. src/agents/research/agents/manager_agent.py +228 -0
  63. src/agents/research/agents/note_agent.py +180 -0
  64. src/agents/research/agents/rephrase_agent.py +263 -0
  65. src/agents/research/agents/reporting_agent.py +1333 -0
  66. src/agents/research/agents/research_agent.py +714 -0
  67. src/agents/research/data_structures.py +451 -0
  68. src/agents/research/main.py +188 -0
  69. src/agents/research/prompts/en/decompose_agent.yaml +89 -0
  70. src/agents/research/prompts/en/manager_agent.yaml +24 -0
  71. src/agents/research/prompts/en/note_agent.yaml +121 -0
  72. src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
  73. src/agents/research/prompts/en/reporting_agent.yaml +380 -0
  74. src/agents/research/prompts/en/research_agent.yaml +173 -0
  75. src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
  76. src/agents/research/prompts/zh/manager_agent.yaml +24 -0
  77. src/agents/research/prompts/zh/note_agent.yaml +121 -0
  78. src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
  79. src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
  80. src/agents/research/prompts/zh/research_agent.yaml +173 -0
  81. src/agents/research/research_pipeline.py +1309 -0
  82. src/agents/research/utils/__init__.py +60 -0
  83. src/agents/research/utils/citation_manager.py +799 -0
  84. src/agents/research/utils/json_utils.py +98 -0
  85. src/agents/research/utils/token_tracker.py +297 -0
  86. src/agents/solve/__init__.py +80 -0
  87. src/agents/solve/analysis_loop/__init__.py +14 -0
  88. src/agents/solve/analysis_loop/investigate_agent.py +414 -0
  89. src/agents/solve/analysis_loop/note_agent.py +190 -0
  90. src/agents/solve/main_solver.py +862 -0
  91. src/agents/solve/memory/__init__.py +34 -0
  92. src/agents/solve/memory/citation_memory.py +353 -0
  93. src/agents/solve/memory/investigate_memory.py +226 -0
  94. src/agents/solve/memory/solve_memory.py +340 -0
  95. src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
  96. src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
  97. src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
  98. src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
  99. src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
  100. src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
  101. src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
  102. src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
  103. src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
  104. src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
  105. src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
  106. src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
  107. src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
  108. src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
  109. src/agents/solve/solve_loop/__init__.py +22 -0
  110. src/agents/solve/solve_loop/citation_manager.py +74 -0
  111. src/agents/solve/solve_loop/manager_agent.py +274 -0
  112. src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
  113. src/agents/solve/solve_loop/response_agent.py +301 -0
  114. src/agents/solve/solve_loop/solve_agent.py +325 -0
  115. src/agents/solve/solve_loop/tool_agent.py +470 -0
  116. src/agents/solve/utils/__init__.py +64 -0
  117. src/agents/solve/utils/config_validator.py +313 -0
  118. src/agents/solve/utils/display_manager.py +223 -0
  119. src/agents/solve/utils/error_handler.py +363 -0
  120. src/agents/solve/utils/json_utils.py +98 -0
  121. src/agents/solve/utils/performance_monitor.py +407 -0
  122. src/agents/solve/utils/token_tracker.py +541 -0
  123. src/api/__init__.py +0 -0
  124. src/api/main.py +240 -0
  125. src/api/routers/__init__.py +1 -0
  126. src/api/routers/agent_config.py +69 -0
  127. src/api/routers/chat.py +296 -0
  128. src/api/routers/co_writer.py +337 -0
  129. src/api/routers/config.py +627 -0
  130. src/api/routers/dashboard.py +18 -0
  131. src/api/routers/guide.py +337 -0
  132. src/api/routers/ideagen.py +436 -0
  133. src/api/routers/knowledge.py +821 -0
  134. src/api/routers/notebook.py +247 -0
  135. src/api/routers/question.py +537 -0
  136. src/api/routers/research.py +394 -0
  137. src/api/routers/settings.py +164 -0
  138. src/api/routers/solve.py +305 -0
  139. src/api/routers/system.py +252 -0
  140. src/api/run_server.py +61 -0
  141. src/api/utils/history.py +172 -0
  142. src/api/utils/log_interceptor.py +21 -0
  143. src/api/utils/notebook_manager.py +415 -0
  144. src/api/utils/progress_broadcaster.py +72 -0
  145. src/api/utils/task_id_manager.py +100 -0
  146. src/config/__init__.py +0 -0
  147. src/config/accessors.py +18 -0
  148. src/config/constants.py +34 -0
  149. src/config/defaults.py +18 -0
  150. src/config/schema.py +38 -0
  151. src/config/settings.py +50 -0
  152. src/core/errors.py +62 -0
  153. src/knowledge/__init__.py +23 -0
  154. src/knowledge/add_documents.py +606 -0
  155. src/knowledge/config.py +65 -0
  156. src/knowledge/example_add_documents.py +236 -0
  157. src/knowledge/extract_numbered_items.py +1039 -0
  158. src/knowledge/initializer.py +621 -0
  159. src/knowledge/kb.py +22 -0
  160. src/knowledge/manager.py +782 -0
  161. src/knowledge/progress_tracker.py +182 -0
  162. src/knowledge/start_kb.py +535 -0
  163. src/logging/__init__.py +103 -0
  164. src/logging/adapters/__init__.py +17 -0
  165. src/logging/adapters/lightrag.py +184 -0
  166. src/logging/adapters/llamaindex.py +141 -0
  167. src/logging/config.py +80 -0
  168. src/logging/handlers/__init__.py +20 -0
  169. src/logging/handlers/console.py +75 -0
  170. src/logging/handlers/file.py +201 -0
  171. src/logging/handlers/websocket.py +127 -0
  172. src/logging/logger.py +709 -0
  173. src/logging/stats/__init__.py +16 -0
  174. src/logging/stats/llm_stats.py +179 -0
  175. src/services/__init__.py +56 -0
  176. src/services/config/__init__.py +61 -0
  177. src/services/config/knowledge_base_config.py +210 -0
  178. src/services/config/loader.py +260 -0
  179. src/services/config/unified_config.py +603 -0
  180. src/services/embedding/__init__.py +45 -0
  181. src/services/embedding/adapters/__init__.py +22 -0
  182. src/services/embedding/adapters/base.py +106 -0
  183. src/services/embedding/adapters/cohere.py +127 -0
  184. src/services/embedding/adapters/jina.py +99 -0
  185. src/services/embedding/adapters/ollama.py +116 -0
  186. src/services/embedding/adapters/openai_compatible.py +96 -0
  187. src/services/embedding/client.py +159 -0
  188. src/services/embedding/config.py +156 -0
  189. src/services/embedding/provider.py +119 -0
  190. src/services/llm/__init__.py +152 -0
  191. src/services/llm/capabilities.py +313 -0
  192. src/services/llm/client.py +302 -0
  193. src/services/llm/cloud_provider.py +530 -0
  194. src/services/llm/config.py +200 -0
  195. src/services/llm/error_mapping.py +103 -0
  196. src/services/llm/exceptions.py +152 -0
  197. src/services/llm/factory.py +450 -0
  198. src/services/llm/local_provider.py +347 -0
  199. src/services/llm/providers/anthropic.py +95 -0
  200. src/services/llm/providers/base_provider.py +93 -0
  201. src/services/llm/providers/open_ai.py +83 -0
  202. src/services/llm/registry.py +71 -0
  203. src/services/llm/telemetry.py +40 -0
  204. src/services/llm/types.py +27 -0
  205. src/services/llm/utils.py +333 -0
  206. src/services/prompt/__init__.py +25 -0
  207. src/services/prompt/manager.py +206 -0
  208. src/services/rag/__init__.py +64 -0
  209. src/services/rag/components/__init__.py +29 -0
  210. src/services/rag/components/base.py +59 -0
  211. src/services/rag/components/chunkers/__init__.py +18 -0
  212. src/services/rag/components/chunkers/base.py +34 -0
  213. src/services/rag/components/chunkers/fixed.py +71 -0
  214. src/services/rag/components/chunkers/numbered_item.py +94 -0
  215. src/services/rag/components/chunkers/semantic.py +97 -0
  216. src/services/rag/components/embedders/__init__.py +14 -0
  217. src/services/rag/components/embedders/base.py +32 -0
  218. src/services/rag/components/embedders/openai.py +63 -0
  219. src/services/rag/components/indexers/__init__.py +18 -0
  220. src/services/rag/components/indexers/base.py +35 -0
  221. src/services/rag/components/indexers/graph.py +172 -0
  222. src/services/rag/components/indexers/lightrag.py +156 -0
  223. src/services/rag/components/indexers/vector.py +146 -0
  224. src/services/rag/components/parsers/__init__.py +18 -0
  225. src/services/rag/components/parsers/base.py +35 -0
  226. src/services/rag/components/parsers/markdown.py +52 -0
  227. src/services/rag/components/parsers/pdf.py +115 -0
  228. src/services/rag/components/parsers/text.py +86 -0
  229. src/services/rag/components/retrievers/__init__.py +18 -0
  230. src/services/rag/components/retrievers/base.py +34 -0
  231. src/services/rag/components/retrievers/dense.py +200 -0
  232. src/services/rag/components/retrievers/hybrid.py +164 -0
  233. src/services/rag/components/retrievers/lightrag.py +169 -0
  234. src/services/rag/components/routing.py +286 -0
  235. src/services/rag/factory.py +234 -0
  236. src/services/rag/pipeline.py +215 -0
  237. src/services/rag/pipelines/__init__.py +32 -0
  238. src/services/rag/pipelines/academic.py +44 -0
  239. src/services/rag/pipelines/lightrag.py +43 -0
  240. src/services/rag/pipelines/llamaindex.py +313 -0
  241. src/services/rag/pipelines/raganything.py +384 -0
  242. src/services/rag/service.py +244 -0
  243. src/services/rag/types.py +73 -0
  244. src/services/search/__init__.py +284 -0
  245. src/services/search/base.py +87 -0
  246. src/services/search/consolidation.py +398 -0
  247. src/services/search/providers/__init__.py +128 -0
  248. src/services/search/providers/baidu.py +188 -0
  249. src/services/search/providers/exa.py +194 -0
  250. src/services/search/providers/jina.py +161 -0
  251. src/services/search/providers/perplexity.py +153 -0
  252. src/services/search/providers/serper.py +209 -0
  253. src/services/search/providers/tavily.py +161 -0
  254. src/services/search/types.py +114 -0
  255. src/services/setup/__init__.py +34 -0
  256. src/services/setup/init.py +285 -0
  257. src/services/tts/__init__.py +16 -0
  258. src/services/tts/config.py +99 -0
  259. src/tools/__init__.py +91 -0
  260. src/tools/code_executor.py +536 -0
  261. src/tools/paper_search_tool.py +171 -0
  262. src/tools/query_item_tool.py +310 -0
  263. src/tools/question/__init__.py +15 -0
  264. src/tools/question/exam_mimic.py +616 -0
  265. src/tools/question/pdf_parser.py +211 -0
  266. src/tools/question/question_extractor.py +397 -0
  267. src/tools/rag_tool.py +173 -0
  268. src/tools/tex_chunker.py +339 -0
  269. src/tools/tex_downloader.py +253 -0
  270. src/tools/web_search.py +71 -0
  271. src/utils/config_manager.py +206 -0
  272. src/utils/document_validator.py +168 -0
  273. src/utils/error_rate_tracker.py +111 -0
  274. src/utils/error_utils.py +82 -0
  275. src/utils/json_parser.py +110 -0
  276. src/utils/network/circuit_breaker.py +79 -0
@@ -0,0 +1,450 @@
1
+ """
2
+ LLM Factory - Central Hub for LLM Calls
3
+ =======================================
4
+
5
+ This module serves as the central hub for all LLM calls in DeepTutor.
6
+ It provides a unified interface for agents to call LLMs, routing requests
7
+ to the appropriate provider (cloud or local) based on URL detection.
8
+
9
+ Architecture:
10
+ Agents (ChatAgent, GuideAgent, etc.)
11
+
12
+ BaseAgent.call_llm() / stream_llm()
13
+
14
+ LLM Factory (this module)
15
+
16
+ ┌─────────┴─────────┐
17
+ ↓ ↓
18
+ CloudProvider LocalProvider
19
+ (cloud_provider) (local_provider)
20
+ ↓ ↓
21
+ OpenAI/DeepSeek/etc LM Studio/Ollama/etc
22
+
23
+ Routing:
24
+ - Automatically routes to local_provider for local URLs (localhost, 127.0.0.1, etc.)
25
+ - Routes to cloud_provider for all other URLs
26
+
27
+ Retry Mechanism:
28
+ - Automatic retry with exponential backoff for transient errors
29
+ - Configurable max_retries, retry_delay, and exponential_backoff
30
+ - Only retries on retriable errors (timeout, rate limit, server errors)
31
+ """
32
+
33
+ import asyncio
34
+ from typing import Any, AsyncGenerator, Dict, List, Optional
35
+
36
+ import tenacity
37
+
38
+ from src.logging.logger import get_logger
39
+
40
+ from . import cloud_provider, local_provider
41
+ from .config import get_llm_config
42
+ from .exceptions import (
43
+ LLMAPIError,
44
+ LLMAuthenticationError,
45
+ LLMRateLimitError,
46
+ LLMTimeoutError,
47
+ )
48
+ from .utils import is_local_llm_server
49
+
50
+ # Initialize logger
51
+ logger = get_logger("LLMFactory")
52
+
53
+ # Default retry configuration
54
+ DEFAULT_MAX_RETRIES = 3
55
+ DEFAULT_RETRY_DELAY = 1.0 # seconds
56
+ DEFAULT_EXPONENTIAL_BACKOFF = True
57
+
58
+
59
+ def _is_retriable_error(error: Exception) -> bool:
60
+ """
61
+ Check if an error is retriable.
62
+
63
+ Retriable errors:
64
+ - Timeout errors
65
+ - Rate limit errors (429)
66
+ - Server errors (5xx)
67
+ - Network/connection errors
68
+
69
+ Non-retriable errors:
70
+ - Authentication errors (401)
71
+ - Bad request (400)
72
+ - Not found (404)
73
+ - Client errors (4xx except 429)
74
+ """
75
+ from aiohttp import ClientError
76
+ from requests.exceptions import RequestException
77
+
78
+ if isinstance(error, (asyncio.TimeoutError, ClientError, RequestException)):
79
+ return True
80
+ if isinstance(error, LLMTimeoutError):
81
+ return True
82
+ if isinstance(error, LLMRateLimitError):
83
+ return True
84
+ if isinstance(error, LLMAuthenticationError):
85
+ return False # Don't retry auth errors
86
+
87
+ if isinstance(error, LLMAPIError):
88
+ status_code = error.status_code
89
+ if status_code:
90
+ # Retry on server errors (5xx) and rate limits (429)
91
+ if status_code >= 500 or status_code == 429:
92
+ return True
93
+ # Don't retry on client errors (4xx except 429)
94
+ if 400 <= status_code < 500:
95
+ return False
96
+ return True # Retry by default for unknown API errors
97
+
98
+ # For other exceptions (network errors, etc.), retry
99
+ return True
100
+
101
+
102
+ def _should_use_local(base_url: Optional[str]) -> bool:
103
+ """
104
+ Determine if we should use the local provider based on URL.
105
+
106
+ Args:
107
+ base_url: The base URL to check
108
+
109
+ Returns:
110
+ True if local provider should be used (localhost, 127.0.0.1, etc.)
111
+ """
112
+ return is_local_llm_server(base_url) if base_url else False
113
+
114
+
115
+ async def complete(
116
+ prompt: str,
117
+ system_prompt: str = "You are a helpful assistant.",
118
+ model: Optional[str] = None,
119
+ api_key: Optional[str] = None,
120
+ base_url: Optional[str] = None,
121
+ api_version: Optional[str] = None,
122
+ binding: Optional[str] = None,
123
+ messages: Optional[List[Dict[str, str]]] = None,
124
+ max_retries: int = DEFAULT_MAX_RETRIES,
125
+ retry_delay: float = DEFAULT_RETRY_DELAY,
126
+ exponential_backoff: bool = DEFAULT_EXPONENTIAL_BACKOFF,
127
+ **kwargs,
128
+ ) -> str:
129
+ """
130
+ Unified LLM completion function with automatic retry.
131
+
132
+ Routes to cloud_provider or local_provider based on configuration.
133
+ Includes automatic retry with exponential backoff for transient errors.
134
+
135
+ Args:
136
+ prompt: The user prompt
137
+ system_prompt: System prompt for context
138
+ model: Model name (optional, uses effective config if not provided)
139
+ api_key: API key (optional)
140
+ base_url: Base URL for the API (optional)
141
+ api_version: API version for Azure OpenAI (optional)
142
+ binding: Provider binding type (optional)
143
+ messages: Pre-built messages array (optional)
144
+ max_retries: Maximum number of retry attempts (default: 3)
145
+ retry_delay: Initial delay between retries in seconds (default: 1.0)
146
+ exponential_backoff: Whether to use exponential backoff (default: True)
147
+ **kwargs: Additional parameters (temperature, max_tokens, etc.)
148
+
149
+ Returns:
150
+ str: The LLM response
151
+ """
152
+ # Get config if parameters not provided
153
+ if not model or not base_url:
154
+ config = get_llm_config()
155
+ model = model or config.model
156
+ api_key = api_key if api_key is not None else config.api_key
157
+ base_url = base_url or config.base_url
158
+ api_version = api_version or config.api_version
159
+ binding = binding or config.binding or "openai"
160
+
161
+ # Determine which provider to use
162
+ use_local = _should_use_local(base_url)
163
+
164
+ # Define helper to determine if a generic LLMAPIError is retriable
165
+ def _is_retriable_llm_api_error(exc: BaseException) -> bool:
166
+ """
167
+ Return True for LLMAPIError instances that represent retriable conditions.
168
+
169
+ We only retry on:
170
+ - HTTP 429 (rate limit), or
171
+ - HTTP 5xx server errors.
172
+
173
+ All other LLMAPIError instances (e.g., 4xx like 400, 401, 403, 404) are treated
174
+ as non-retriable to avoid unnecessary retries.
175
+ """
176
+ if not isinstance(exc, LLMAPIError):
177
+ return False
178
+
179
+ status_code = getattr(exc, "status_code", None)
180
+ if status_code is None:
181
+ # Do not retry when status code is unknown to avoid retrying non-transient errors
182
+ return False
183
+
184
+ if status_code == 429:
185
+ return True
186
+
187
+ if 500 <= status_code < 600:
188
+ return True
189
+
190
+ return False
191
+
192
+ # Define the actual completion function with tenacity retry
193
+ @tenacity.retry(
194
+ retry=(
195
+ tenacity.retry_if_exception_type(LLMRateLimitError)
196
+ | tenacity.retry_if_exception_type(LLMTimeoutError)
197
+ | tenacity.retry_if_exception(_is_retriable_llm_api_error)
198
+ ),
199
+ wait=tenacity.wait_exponential(multiplier=retry_delay, min=retry_delay, max=60),
200
+ stop=tenacity.stop_after_attempt(max_retries + 1),
201
+ before_sleep=lambda retry_state: logger.warning(
202
+ f"LLM call failed (attempt {retry_state.attempt_number}/{max_retries + 1}), "
203
+ f"retrying in {retry_state.upcoming_sleep:.1f}s... Error: {str(retry_state.outcome.exception())}"
204
+ ),
205
+ )
206
+ async def _do_complete(**call_kwargs):
207
+ try:
208
+ if use_local:
209
+ return await local_provider.complete(**call_kwargs)
210
+ else:
211
+ return await cloud_provider.complete(**call_kwargs)
212
+ except Exception as e:
213
+ # Map raw SDK exceptions to unified exceptions for retry logic
214
+ from .error_mapping import map_error
215
+
216
+ mapped_error = map_error(e, provider=call_kwargs.get("binding", "unknown"))
217
+ raise mapped_error from e
218
+
219
+ # Build call kwargs
220
+ call_kwargs = {
221
+ "prompt": prompt,
222
+ "system_prompt": system_prompt,
223
+ "model": model,
224
+ "api_key": api_key,
225
+ "base_url": base_url,
226
+ "messages": messages,
227
+ **kwargs,
228
+ }
229
+
230
+ # Add cloud-specific kwargs if not local
231
+ if not use_local:
232
+ call_kwargs["api_version"] = api_version
233
+ call_kwargs["binding"] = binding or "openai"
234
+
235
+ # Execute with retry (handled by tenacity decorator)
236
+ return await _do_complete(**call_kwargs)
237
+
238
+
239
+ async def stream(
240
+ prompt: str,
241
+ system_prompt: str = "You are a helpful assistant.",
242
+ model: Optional[str] = None,
243
+ api_key: Optional[str] = None,
244
+ base_url: Optional[str] = None,
245
+ api_version: Optional[str] = None,
246
+ binding: Optional[str] = None,
247
+ messages: Optional[List[Dict[str, str]]] = None,
248
+ max_retries: int = DEFAULT_MAX_RETRIES,
249
+ retry_delay: float = DEFAULT_RETRY_DELAY,
250
+ exponential_backoff: bool = DEFAULT_EXPONENTIAL_BACKOFF,
251
+ **kwargs,
252
+ ) -> AsyncGenerator[str, None]:
253
+ """
254
+ Unified LLM streaming function with automatic retry.
255
+
256
+ Routes to cloud_provider or local_provider based on configuration.
257
+ Includes automatic retry with exponential backoff for connection errors.
258
+
259
+ Note: Retry only applies to initial connection errors. Once streaming
260
+ starts, errors during streaming will not be automatically retried.
261
+
262
+ Args:
263
+ prompt: The user prompt
264
+ system_prompt: System prompt for context
265
+ model: Model name (optional, uses effective config if not provided)
266
+ api_key: API key (optional)
267
+ base_url: Base URL for the API (optional)
268
+ api_version: API version for Azure OpenAI (optional)
269
+ binding: Provider binding type (optional)
270
+ messages: Pre-built messages array (optional)
271
+ max_retries: Maximum number of retry attempts (default: 3)
272
+ retry_delay: Initial delay between retries in seconds (default: 1.0)
273
+ exponential_backoff: Whether to use exponential backoff (default: True)
274
+ **kwargs: Additional parameters (temperature, max_tokens, etc.)
275
+
276
+ Yields:
277
+ str: Response chunks
278
+ """
279
+ # Get config if parameters not provided
280
+ if not model or not base_url:
281
+ config = get_llm_config()
282
+ model = model or config.model
283
+ api_key = api_key if api_key is not None else config.api_key
284
+ base_url = base_url or config.base_url
285
+ api_version = api_version or config.api_version
286
+ binding = binding or config.binding or "openai"
287
+
288
+ # Determine which provider to use
289
+ use_local = _should_use_local(base_url)
290
+
291
+ # Build call kwargs
292
+ call_kwargs = {
293
+ "prompt": prompt,
294
+ "system_prompt": system_prompt,
295
+ "model": model,
296
+ "api_key": api_key,
297
+ "base_url": base_url,
298
+ "messages": messages,
299
+ **kwargs,
300
+ }
301
+
302
+ # Add cloud-specific kwargs if not local
303
+ if not use_local:
304
+ call_kwargs["api_version"] = api_version
305
+ call_kwargs["binding"] = binding or "openai"
306
+
307
+ # Retry logic for streaming (retry on connection errors)
308
+ last_exception = None
309
+ delay = retry_delay
310
+
311
+ for attempt in range(max_retries + 1):
312
+ try:
313
+ # Route to appropriate provider
314
+ if use_local:
315
+ async for chunk in local_provider.stream(**call_kwargs):
316
+ yield chunk
317
+ else:
318
+ async for chunk in cloud_provider.stream(**call_kwargs):
319
+ yield chunk
320
+ # If we get here, streaming completed successfully
321
+ return
322
+ except Exception as e:
323
+ last_exception = e
324
+
325
+ # Check if we should retry
326
+ if attempt >= max_retries or not _is_retriable_error(e):
327
+ raise
328
+
329
+ # Calculate delay for next attempt
330
+ if exponential_backoff:
331
+ current_delay = delay * (2**attempt)
332
+ else:
333
+ current_delay = delay
334
+
335
+ # Special handling for rate limit errors with retry_after
336
+ if isinstance(e, LLMRateLimitError) and e.retry_after:
337
+ current_delay = max(current_delay, e.retry_after)
338
+
339
+ # Wait before retrying
340
+ await asyncio.sleep(current_delay)
341
+
342
+ # Should not reach here, but just in case
343
+ if last_exception:
344
+ raise last_exception
345
+
346
+
347
+ async def fetch_models(
348
+ binding: str,
349
+ base_url: str,
350
+ api_key: Optional[str] = None,
351
+ ) -> List[str]:
352
+ """
353
+ Fetch available models from the provider.
354
+
355
+ Routes to cloud_provider or local_provider based on URL.
356
+
357
+ Args:
358
+ binding: Provider type (openai, ollama, etc.)
359
+ base_url: API endpoint URL
360
+ api_key: API key (optional for local providers)
361
+
362
+ Returns:
363
+ List of available model names
364
+ """
365
+ if is_local_llm_server(base_url):
366
+ return await local_provider.fetch_models(base_url, api_key)
367
+ else:
368
+ return await cloud_provider.fetch_models(base_url, api_key, binding)
369
+
370
+
371
+ # API Provider Presets
372
+ API_PROVIDER_PRESETS = {
373
+ "openai": {
374
+ "name": "OpenAI",
375
+ "base_url": "https://api.openai.com/v1",
376
+ "requires_key": True,
377
+ "models": ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo", "gpt-3.5-turbo"],
378
+ },
379
+ "anthropic": {
380
+ "name": "Anthropic",
381
+ "base_url": "https://api.anthropic.com/v1",
382
+ "requires_key": True,
383
+ "binding": "anthropic",
384
+ "models": ["claude-3-5-sonnet-20241022", "claude-3-haiku-20240307"],
385
+ },
386
+ "deepseek": {
387
+ "name": "DeepSeek",
388
+ "base_url": "https://api.deepseek.com",
389
+ "requires_key": True,
390
+ "models": ["deepseek-chat", "deepseek-reasoner"],
391
+ },
392
+ "openrouter": {
393
+ "name": "OpenRouter",
394
+ "base_url": "https://openrouter.ai/api/v1",
395
+ "requires_key": True,
396
+ "models": [], # Dynamic
397
+ },
398
+ }
399
+
400
+ # Local Provider Presets
401
+ LOCAL_PROVIDER_PRESETS = {
402
+ "ollama": {
403
+ "name": "Ollama",
404
+ "base_url": "http://localhost:11434/v1",
405
+ "requires_key": False,
406
+ "default_key": "ollama",
407
+ },
408
+ "lm_studio": {
409
+ "name": "LM Studio",
410
+ "base_url": "http://localhost:1234/v1",
411
+ "requires_key": False,
412
+ "default_key": "lm-studio",
413
+ },
414
+ "vllm": {
415
+ "name": "vLLM",
416
+ "base_url": "http://localhost:8000/v1",
417
+ "requires_key": False,
418
+ "default_key": "vllm",
419
+ },
420
+ "llama_cpp": {
421
+ "name": "llama.cpp",
422
+ "base_url": "http://localhost:8080/v1",
423
+ "requires_key": False,
424
+ "default_key": "llama-cpp",
425
+ },
426
+ }
427
+
428
+
429
+ def get_provider_presets() -> Dict[str, Any]:
430
+ """
431
+ Get all provider presets for frontend display.
432
+ """
433
+ return {
434
+ "api": API_PROVIDER_PRESETS,
435
+ "local": LOCAL_PROVIDER_PRESETS,
436
+ }
437
+
438
+
439
+ __all__ = [
440
+ "complete",
441
+ "stream",
442
+ "fetch_models",
443
+ "get_provider_presets",
444
+ "API_PROVIDER_PRESETS",
445
+ "LOCAL_PROVIDER_PRESETS",
446
+ # Retry configuration defaults
447
+ "DEFAULT_MAX_RETRIES",
448
+ "DEFAULT_RETRY_DELAY",
449
+ "DEFAULT_EXPONENTIAL_BACKOFF",
450
+ ]