realtimex-deeptutor 0.5.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. realtimex_deeptutor/__init__.py +67 -0
  2. realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
  3. realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
  4. realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
  5. realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
  6. realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
  7. realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
  8. src/__init__.py +40 -0
  9. src/agents/__init__.py +24 -0
  10. src/agents/base_agent.py +657 -0
  11. src/agents/chat/__init__.py +24 -0
  12. src/agents/chat/chat_agent.py +435 -0
  13. src/agents/chat/prompts/en/chat_agent.yaml +35 -0
  14. src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
  15. src/agents/chat/session_manager.py +311 -0
  16. src/agents/co_writer/__init__.py +0 -0
  17. src/agents/co_writer/edit_agent.py +260 -0
  18. src/agents/co_writer/narrator_agent.py +423 -0
  19. src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
  20. src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
  21. src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
  22. src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
  23. src/agents/guide/__init__.py +16 -0
  24. src/agents/guide/agents/__init__.py +11 -0
  25. src/agents/guide/agents/chat_agent.py +104 -0
  26. src/agents/guide/agents/interactive_agent.py +223 -0
  27. src/agents/guide/agents/locate_agent.py +149 -0
  28. src/agents/guide/agents/summary_agent.py +150 -0
  29. src/agents/guide/guide_manager.py +500 -0
  30. src/agents/guide/prompts/en/chat_agent.yaml +41 -0
  31. src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
  32. src/agents/guide/prompts/en/locate_agent.yaml +68 -0
  33. src/agents/guide/prompts/en/summary_agent.yaml +157 -0
  34. src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
  35. src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
  36. src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
  37. src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
  38. src/agents/ideagen/__init__.py +12 -0
  39. src/agents/ideagen/idea_generation_workflow.py +426 -0
  40. src/agents/ideagen/material_organizer_agent.py +173 -0
  41. src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
  42. src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
  43. src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
  44. src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
  45. src/agents/question/__init__.py +24 -0
  46. src/agents/question/agents/__init__.py +18 -0
  47. src/agents/question/agents/generate_agent.py +381 -0
  48. src/agents/question/agents/relevance_analyzer.py +207 -0
  49. src/agents/question/agents/retrieve_agent.py +239 -0
  50. src/agents/question/coordinator.py +718 -0
  51. src/agents/question/example.py +109 -0
  52. src/agents/question/prompts/en/coordinator.yaml +75 -0
  53. src/agents/question/prompts/en/generate_agent.yaml +77 -0
  54. src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
  55. src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
  56. src/agents/question/prompts/zh/coordinator.yaml +75 -0
  57. src/agents/question/prompts/zh/generate_agent.yaml +77 -0
  58. src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
  59. src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
  60. src/agents/research/agents/__init__.py +23 -0
  61. src/agents/research/agents/decompose_agent.py +507 -0
  62. src/agents/research/agents/manager_agent.py +228 -0
  63. src/agents/research/agents/note_agent.py +180 -0
  64. src/agents/research/agents/rephrase_agent.py +263 -0
  65. src/agents/research/agents/reporting_agent.py +1333 -0
  66. src/agents/research/agents/research_agent.py +714 -0
  67. src/agents/research/data_structures.py +451 -0
  68. src/agents/research/main.py +188 -0
  69. src/agents/research/prompts/en/decompose_agent.yaml +89 -0
  70. src/agents/research/prompts/en/manager_agent.yaml +24 -0
  71. src/agents/research/prompts/en/note_agent.yaml +121 -0
  72. src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
  73. src/agents/research/prompts/en/reporting_agent.yaml +380 -0
  74. src/agents/research/prompts/en/research_agent.yaml +173 -0
  75. src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
  76. src/agents/research/prompts/zh/manager_agent.yaml +24 -0
  77. src/agents/research/prompts/zh/note_agent.yaml +121 -0
  78. src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
  79. src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
  80. src/agents/research/prompts/zh/research_agent.yaml +173 -0
  81. src/agents/research/research_pipeline.py +1309 -0
  82. src/agents/research/utils/__init__.py +60 -0
  83. src/agents/research/utils/citation_manager.py +799 -0
  84. src/agents/research/utils/json_utils.py +98 -0
  85. src/agents/research/utils/token_tracker.py +297 -0
  86. src/agents/solve/__init__.py +80 -0
  87. src/agents/solve/analysis_loop/__init__.py +14 -0
  88. src/agents/solve/analysis_loop/investigate_agent.py +414 -0
  89. src/agents/solve/analysis_loop/note_agent.py +190 -0
  90. src/agents/solve/main_solver.py +862 -0
  91. src/agents/solve/memory/__init__.py +34 -0
  92. src/agents/solve/memory/citation_memory.py +353 -0
  93. src/agents/solve/memory/investigate_memory.py +226 -0
  94. src/agents/solve/memory/solve_memory.py +340 -0
  95. src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
  96. src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
  97. src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
  98. src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
  99. src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
  100. src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
  101. src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
  102. src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
  103. src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
  104. src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
  105. src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
  106. src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
  107. src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
  108. src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
  109. src/agents/solve/solve_loop/__init__.py +22 -0
  110. src/agents/solve/solve_loop/citation_manager.py +74 -0
  111. src/agents/solve/solve_loop/manager_agent.py +274 -0
  112. src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
  113. src/agents/solve/solve_loop/response_agent.py +301 -0
  114. src/agents/solve/solve_loop/solve_agent.py +325 -0
  115. src/agents/solve/solve_loop/tool_agent.py +470 -0
  116. src/agents/solve/utils/__init__.py +64 -0
  117. src/agents/solve/utils/config_validator.py +313 -0
  118. src/agents/solve/utils/display_manager.py +223 -0
  119. src/agents/solve/utils/error_handler.py +363 -0
  120. src/agents/solve/utils/json_utils.py +98 -0
  121. src/agents/solve/utils/performance_monitor.py +407 -0
  122. src/agents/solve/utils/token_tracker.py +541 -0
  123. src/api/__init__.py +0 -0
  124. src/api/main.py +240 -0
  125. src/api/routers/__init__.py +1 -0
  126. src/api/routers/agent_config.py +69 -0
  127. src/api/routers/chat.py +296 -0
  128. src/api/routers/co_writer.py +337 -0
  129. src/api/routers/config.py +627 -0
  130. src/api/routers/dashboard.py +18 -0
  131. src/api/routers/guide.py +337 -0
  132. src/api/routers/ideagen.py +436 -0
  133. src/api/routers/knowledge.py +821 -0
  134. src/api/routers/notebook.py +247 -0
  135. src/api/routers/question.py +537 -0
  136. src/api/routers/research.py +394 -0
  137. src/api/routers/settings.py +164 -0
  138. src/api/routers/solve.py +305 -0
  139. src/api/routers/system.py +252 -0
  140. src/api/run_server.py +61 -0
  141. src/api/utils/history.py +172 -0
  142. src/api/utils/log_interceptor.py +21 -0
  143. src/api/utils/notebook_manager.py +415 -0
  144. src/api/utils/progress_broadcaster.py +72 -0
  145. src/api/utils/task_id_manager.py +100 -0
  146. src/config/__init__.py +0 -0
  147. src/config/accessors.py +18 -0
  148. src/config/constants.py +34 -0
  149. src/config/defaults.py +18 -0
  150. src/config/schema.py +38 -0
  151. src/config/settings.py +50 -0
  152. src/core/errors.py +62 -0
  153. src/knowledge/__init__.py +23 -0
  154. src/knowledge/add_documents.py +606 -0
  155. src/knowledge/config.py +65 -0
  156. src/knowledge/example_add_documents.py +236 -0
  157. src/knowledge/extract_numbered_items.py +1039 -0
  158. src/knowledge/initializer.py +621 -0
  159. src/knowledge/kb.py +22 -0
  160. src/knowledge/manager.py +782 -0
  161. src/knowledge/progress_tracker.py +182 -0
  162. src/knowledge/start_kb.py +535 -0
  163. src/logging/__init__.py +103 -0
  164. src/logging/adapters/__init__.py +17 -0
  165. src/logging/adapters/lightrag.py +184 -0
  166. src/logging/adapters/llamaindex.py +141 -0
  167. src/logging/config.py +80 -0
  168. src/logging/handlers/__init__.py +20 -0
  169. src/logging/handlers/console.py +75 -0
  170. src/logging/handlers/file.py +201 -0
  171. src/logging/handlers/websocket.py +127 -0
  172. src/logging/logger.py +709 -0
  173. src/logging/stats/__init__.py +16 -0
  174. src/logging/stats/llm_stats.py +179 -0
  175. src/services/__init__.py +56 -0
  176. src/services/config/__init__.py +61 -0
  177. src/services/config/knowledge_base_config.py +210 -0
  178. src/services/config/loader.py +260 -0
  179. src/services/config/unified_config.py +603 -0
  180. src/services/embedding/__init__.py +45 -0
  181. src/services/embedding/adapters/__init__.py +22 -0
  182. src/services/embedding/adapters/base.py +106 -0
  183. src/services/embedding/adapters/cohere.py +127 -0
  184. src/services/embedding/adapters/jina.py +99 -0
  185. src/services/embedding/adapters/ollama.py +116 -0
  186. src/services/embedding/adapters/openai_compatible.py +96 -0
  187. src/services/embedding/client.py +159 -0
  188. src/services/embedding/config.py +156 -0
  189. src/services/embedding/provider.py +119 -0
  190. src/services/llm/__init__.py +152 -0
  191. src/services/llm/capabilities.py +313 -0
  192. src/services/llm/client.py +302 -0
  193. src/services/llm/cloud_provider.py +530 -0
  194. src/services/llm/config.py +200 -0
  195. src/services/llm/error_mapping.py +103 -0
  196. src/services/llm/exceptions.py +152 -0
  197. src/services/llm/factory.py +450 -0
  198. src/services/llm/local_provider.py +347 -0
  199. src/services/llm/providers/anthropic.py +95 -0
  200. src/services/llm/providers/base_provider.py +93 -0
  201. src/services/llm/providers/open_ai.py +83 -0
  202. src/services/llm/registry.py +71 -0
  203. src/services/llm/telemetry.py +40 -0
  204. src/services/llm/types.py +27 -0
  205. src/services/llm/utils.py +333 -0
  206. src/services/prompt/__init__.py +25 -0
  207. src/services/prompt/manager.py +206 -0
  208. src/services/rag/__init__.py +64 -0
  209. src/services/rag/components/__init__.py +29 -0
  210. src/services/rag/components/base.py +59 -0
  211. src/services/rag/components/chunkers/__init__.py +18 -0
  212. src/services/rag/components/chunkers/base.py +34 -0
  213. src/services/rag/components/chunkers/fixed.py +71 -0
  214. src/services/rag/components/chunkers/numbered_item.py +94 -0
  215. src/services/rag/components/chunkers/semantic.py +97 -0
  216. src/services/rag/components/embedders/__init__.py +14 -0
  217. src/services/rag/components/embedders/base.py +32 -0
  218. src/services/rag/components/embedders/openai.py +63 -0
  219. src/services/rag/components/indexers/__init__.py +18 -0
  220. src/services/rag/components/indexers/base.py +35 -0
  221. src/services/rag/components/indexers/graph.py +172 -0
  222. src/services/rag/components/indexers/lightrag.py +156 -0
  223. src/services/rag/components/indexers/vector.py +146 -0
  224. src/services/rag/components/parsers/__init__.py +18 -0
  225. src/services/rag/components/parsers/base.py +35 -0
  226. src/services/rag/components/parsers/markdown.py +52 -0
  227. src/services/rag/components/parsers/pdf.py +115 -0
  228. src/services/rag/components/parsers/text.py +86 -0
  229. src/services/rag/components/retrievers/__init__.py +18 -0
  230. src/services/rag/components/retrievers/base.py +34 -0
  231. src/services/rag/components/retrievers/dense.py +200 -0
  232. src/services/rag/components/retrievers/hybrid.py +164 -0
  233. src/services/rag/components/retrievers/lightrag.py +169 -0
  234. src/services/rag/components/routing.py +286 -0
  235. src/services/rag/factory.py +234 -0
  236. src/services/rag/pipeline.py +215 -0
  237. src/services/rag/pipelines/__init__.py +32 -0
  238. src/services/rag/pipelines/academic.py +44 -0
  239. src/services/rag/pipelines/lightrag.py +43 -0
  240. src/services/rag/pipelines/llamaindex.py +313 -0
  241. src/services/rag/pipelines/raganything.py +384 -0
  242. src/services/rag/service.py +244 -0
  243. src/services/rag/types.py +73 -0
  244. src/services/search/__init__.py +284 -0
  245. src/services/search/base.py +87 -0
  246. src/services/search/consolidation.py +398 -0
  247. src/services/search/providers/__init__.py +128 -0
  248. src/services/search/providers/baidu.py +188 -0
  249. src/services/search/providers/exa.py +194 -0
  250. src/services/search/providers/jina.py +161 -0
  251. src/services/search/providers/perplexity.py +153 -0
  252. src/services/search/providers/serper.py +209 -0
  253. src/services/search/providers/tavily.py +161 -0
  254. src/services/search/types.py +114 -0
  255. src/services/setup/__init__.py +34 -0
  256. src/services/setup/init.py +285 -0
  257. src/services/tts/__init__.py +16 -0
  258. src/services/tts/config.py +99 -0
  259. src/tools/__init__.py +91 -0
  260. src/tools/code_executor.py +536 -0
  261. src/tools/paper_search_tool.py +171 -0
  262. src/tools/query_item_tool.py +310 -0
  263. src/tools/question/__init__.py +15 -0
  264. src/tools/question/exam_mimic.py +616 -0
  265. src/tools/question/pdf_parser.py +211 -0
  266. src/tools/question/question_extractor.py +397 -0
  267. src/tools/rag_tool.py +173 -0
  268. src/tools/tex_chunker.py +339 -0
  269. src/tools/tex_downloader.py +253 -0
  270. src/tools/web_search.py +71 -0
  271. src/utils/config_manager.py +206 -0
  272. src/utils/document_validator.py +168 -0
  273. src/utils/error_rate_tracker.py +111 -0
  274. src/utils/error_utils.py +82 -0
  275. src/utils/json_parser.py +110 -0
  276. src/utils/network/circuit_breaker.py +79 -0
@@ -0,0 +1,530 @@
1
+ """
2
+ Cloud LLM Provider
3
+ ==================
4
+
5
+ Handles all cloud API LLM calls (OpenAI, DeepSeek, Anthropic, etc.)
6
+ Provides both complete() and stream() methods.
7
+ """
8
+
9
+ import logging
10
+ import os
11
+ from typing import AsyncGenerator, Dict, List, Optional
12
+
13
+ import aiohttp
14
+ from lightrag.llm.openai import openai_complete_if_cache
15
+
16
+ # Get loggers for suppression during fallback scenarios
17
+ # (lightrag logs errors internally before raising exceptions)
18
+ _lightrag_logger = logging.getLogger("lightrag")
19
+ _openai_logger = logging.getLogger("openai")
20
+
21
+ from .capabilities import supports_response_format
22
+ from .config import get_token_limit_kwargs
23
+ from .exceptions import LLMAPIError, LLMAuthenticationError, LLMConfigError
24
+ from .utils import (
25
+ build_auth_headers,
26
+ build_chat_url,
27
+ clean_thinking_tags,
28
+ extract_response_content,
29
+ sanitize_url,
30
+ )
31
+
32
+
33
+ async def complete(
34
+ prompt: str,
35
+ system_prompt: str = "You are a helpful assistant.",
36
+ model: Optional[str] = None,
37
+ api_key: Optional[str] = None,
38
+ base_url: Optional[str] = None,
39
+ api_version: Optional[str] = None,
40
+ binding: str = "openai",
41
+ **kwargs,
42
+ ) -> str:
43
+ """
44
+ Complete a prompt using cloud API providers.
45
+
46
+ Supports OpenAI-compatible APIs and Anthropic.
47
+
48
+ Args:
49
+ prompt: The user prompt
50
+ system_prompt: System prompt for context
51
+ model: Model name
52
+ api_key: API key
53
+ base_url: Base URL for the API
54
+ api_version: API version for Azure OpenAI
55
+ binding: Provider binding type (openai, anthropic)
56
+ **kwargs: Additional parameters (temperature, max_tokens, etc.)
57
+
58
+ Returns:
59
+ str: The LLM response
60
+ """
61
+ binding_lower = (binding or "openai").lower()
62
+
63
+ if binding_lower in ["anthropic", "claude"]:
64
+ return await _anthropic_complete(
65
+ model=model,
66
+ prompt=prompt,
67
+ system_prompt=system_prompt,
68
+ api_key=api_key,
69
+ base_url=base_url,
70
+ **kwargs,
71
+ )
72
+
73
+ # Default to OpenAI-compatible endpoint
74
+ return await _openai_complete(
75
+ model=model,
76
+ prompt=prompt,
77
+ system_prompt=system_prompt,
78
+ api_key=api_key,
79
+ base_url=base_url,
80
+ api_version=api_version,
81
+ binding=binding_lower,
82
+ **kwargs,
83
+ )
84
+
85
+
86
+ async def stream(
87
+ prompt: str,
88
+ system_prompt: str = "You are a helpful assistant.",
89
+ model: Optional[str] = None,
90
+ api_key: Optional[str] = None,
91
+ base_url: Optional[str] = None,
92
+ api_version: Optional[str] = None,
93
+ binding: str = "openai",
94
+ messages: Optional[List[Dict[str, str]]] = None,
95
+ **kwargs,
96
+ ) -> AsyncGenerator[str, None]:
97
+ """
98
+ Stream a response from cloud API providers.
99
+
100
+ Args:
101
+ prompt: The user prompt (ignored if messages provided)
102
+ system_prompt: System prompt for context
103
+ model: Model name
104
+ api_key: API key
105
+ base_url: Base URL for the API
106
+ api_version: API version for Azure OpenAI
107
+ binding: Provider binding type (openai, anthropic)
108
+ messages: Pre-built messages array (optional, overrides prompt/system_prompt)
109
+ **kwargs: Additional parameters (temperature, max_tokens, etc.)
110
+
111
+ Yields:
112
+ str: Response chunks
113
+ """
114
+ binding_lower = (binding or "openai").lower()
115
+
116
+ if binding_lower in ["anthropic", "claude"]:
117
+ async for chunk in _anthropic_stream(
118
+ model=model,
119
+ prompt=prompt,
120
+ system_prompt=system_prompt,
121
+ api_key=api_key,
122
+ base_url=base_url,
123
+ messages=messages,
124
+ **kwargs,
125
+ ):
126
+ yield chunk
127
+ else:
128
+ async for chunk in _openai_stream(
129
+ model=model,
130
+ prompt=prompt,
131
+ system_prompt=system_prompt,
132
+ api_key=api_key,
133
+ base_url=base_url,
134
+ api_version=api_version,
135
+ binding=binding_lower,
136
+ messages=messages,
137
+ **kwargs,
138
+ ):
139
+ yield chunk
140
+
141
+
142
+ async def _openai_complete(
143
+ model: str,
144
+ prompt: str,
145
+ system_prompt: str,
146
+ api_key: Optional[str],
147
+ base_url: Optional[str],
148
+ api_version: Optional[str] = None,
149
+ binding: str = "openai",
150
+ **kwargs,
151
+ ) -> str:
152
+ """OpenAI-compatible completion."""
153
+ # Sanitize URL
154
+ if base_url:
155
+ base_url = sanitize_url(base_url, model)
156
+
157
+ # Handle API Parameter Compatibility using capabilities
158
+ # Remove response_format for providers that don't support it (e.g., DeepSeek)
159
+ if not supports_response_format(binding, model):
160
+ kwargs.pop("response_format", None)
161
+
162
+ content = None
163
+ try:
164
+ # Try using lightrag's openai_complete_if_cache first (has caching)
165
+ # Only pass api_version if it's set (for Azure OpenAI)
166
+ # Standard OpenAI SDK doesn't accept api_version parameter
167
+ lightrag_kwargs = {
168
+ "system_prompt": system_prompt,
169
+ "history_messages": [], # Required by lightrag to build messages array
170
+ "api_key": api_key,
171
+ "base_url": base_url,
172
+ **kwargs,
173
+ }
174
+ if api_version:
175
+ lightrag_kwargs["api_version"] = api_version
176
+
177
+ # Suppress lightrag's and openai's internal error logging during the call
178
+ # (errors are handled by our fallback mechanism)
179
+ original_lightrag_level = _lightrag_logger.level
180
+ original_openai_level = _openai_logger.level
181
+ _lightrag_logger.setLevel(logging.CRITICAL)
182
+ _openai_logger.setLevel(logging.CRITICAL)
183
+ try:
184
+ # model and prompt must be positional arguments
185
+ content = await openai_complete_if_cache(model, prompt, **lightrag_kwargs)
186
+ finally:
187
+ _lightrag_logger.setLevel(original_lightrag_level)
188
+ _openai_logger.setLevel(original_openai_level)
189
+ except Exception:
190
+ pass # Fall through to direct call
191
+
192
+ # Fallback: Direct aiohttp call
193
+ if not content and base_url:
194
+ # Build URL using unified utility (use binding for Azure detection)
195
+ url = build_chat_url(base_url, api_version, binding)
196
+
197
+ # Build headers using unified utility
198
+ headers = build_auth_headers(api_key, binding)
199
+
200
+ data = {
201
+ "model": model,
202
+ "messages": [
203
+ {"role": "system", "content": system_prompt},
204
+ {"role": "user", "content": prompt},
205
+ ],
206
+ "temperature": kwargs.get("temperature", 0.7),
207
+ }
208
+
209
+ # Handle max_tokens / max_completion_tokens based on model
210
+ max_tokens = kwargs.get("max_tokens") or kwargs.get("max_completion_tokens") or 4096
211
+ data.update(get_token_limit_kwargs(model, max_tokens))
212
+
213
+ # Include response_format if present in kwargs
214
+ if "response_format" in kwargs:
215
+ data["response_format"] = kwargs["response_format"]
216
+
217
+ timeout = aiohttp.ClientTimeout(total=120)
218
+ async with aiohttp.ClientSession(timeout=timeout) as session:
219
+ async with session.post(url, headers=headers, json=data) as resp:
220
+ if resp.status == 200:
221
+ result = await resp.json()
222
+ if "choices" in result and result["choices"]:
223
+ msg = result["choices"][0].get("message", {})
224
+ # Use unified response extraction
225
+ content = extract_response_content(msg)
226
+ else:
227
+ error_text = await resp.text()
228
+ raise LLMAPIError(
229
+ f"OpenAI API error: {error_text}",
230
+ status_code=resp.status,
231
+ provider=binding or "openai",
232
+ )
233
+
234
+ if content is not None:
235
+ # Clean thinking tags from response using unified utility
236
+ return clean_thinking_tags(content, binding, model)
237
+
238
+ raise LLMConfigError("Cloud completion failed: no valid configuration")
239
+
240
+
241
+ async def _openai_stream(
242
+ model: str,
243
+ prompt: str,
244
+ system_prompt: str,
245
+ api_key: Optional[str],
246
+ base_url: Optional[str],
247
+ api_version: Optional[str] = None,
248
+ binding: str = "openai",
249
+ messages: Optional[List[Dict[str, str]]] = None,
250
+ **kwargs,
251
+ ) -> AsyncGenerator[str, None]:
252
+ """OpenAI-compatible streaming."""
253
+ import json
254
+
255
+ # Sanitize URL
256
+ if base_url:
257
+ base_url = sanitize_url(base_url, model)
258
+
259
+ # Handle API Parameter Compatibility using capabilities
260
+ if not supports_response_format(binding, model):
261
+ kwargs.pop("response_format", None)
262
+
263
+ # Build URL using unified utility
264
+ effective_base = base_url or "https://api.openai.com/v1"
265
+ url = build_chat_url(effective_base, api_version, binding)
266
+
267
+ # Build headers using unified utility
268
+ headers = build_auth_headers(api_key, binding)
269
+
270
+ # Build messages
271
+ if messages:
272
+ msg_list = messages
273
+ else:
274
+ msg_list = [
275
+ {"role": "system", "content": system_prompt},
276
+ {"role": "user", "content": prompt},
277
+ ]
278
+
279
+ data = {
280
+ "model": model,
281
+ "messages": msg_list,
282
+ "temperature": kwargs.get("temperature", 0.7),
283
+ "stream": True,
284
+ }
285
+
286
+ # Handle max_tokens / max_completion_tokens based on model
287
+ max_tokens = kwargs.get("max_tokens") or kwargs.get("max_completion_tokens")
288
+ if max_tokens:
289
+ data.update(get_token_limit_kwargs(model, max_tokens))
290
+
291
+ # Include response_format if present in kwargs
292
+ if "response_format" in kwargs:
293
+ data["response_format"] = kwargs["response_format"]
294
+
295
+ timeout = aiohttp.ClientTimeout(total=300)
296
+ async with aiohttp.ClientSession(timeout=timeout) as session:
297
+ async with session.post(url, headers=headers, json=data) as resp:
298
+ if resp.status != 200:
299
+ error_text = await resp.text()
300
+ raise LLMAPIError(
301
+ f"OpenAI stream error: {error_text}",
302
+ status_code=resp.status,
303
+ provider=binding or "openai",
304
+ )
305
+
306
+ # Track thinking block state for streaming
307
+ in_thinking_block = False
308
+ thinking_buffer = ""
309
+
310
+ async for line in resp.content:
311
+ line_str = line.decode("utf-8").strip()
312
+ if not line_str or not line_str.startswith("data:"):
313
+ continue
314
+
315
+ data_str = line_str[5:].strip()
316
+ if data_str == "[DONE]":
317
+ break
318
+
319
+ try:
320
+ chunk_data = json.loads(data_str)
321
+ if "choices" in chunk_data and chunk_data["choices"]:
322
+ delta = chunk_data["choices"][0].get("delta", {})
323
+ content = delta.get("content")
324
+ if content:
325
+ # Handle thinking tags in streaming
326
+ if "<think>" in content:
327
+ in_thinking_block = True
328
+ thinking_buffer = content
329
+ continue
330
+ elif in_thinking_block:
331
+ thinking_buffer += content
332
+ if "</think>" in thinking_buffer:
333
+ # End of thinking block, clean and yield
334
+ cleaned = clean_thinking_tags(thinking_buffer, binding, model)
335
+ if cleaned:
336
+ yield cleaned
337
+ in_thinking_block = False
338
+ thinking_buffer = ""
339
+ continue
340
+ else:
341
+ yield content
342
+ except json.JSONDecodeError:
343
+ continue
344
+
345
+
346
+ async def _anthropic_complete(
347
+ model: str,
348
+ prompt: str,
349
+ system_prompt: str,
350
+ api_key: Optional[str],
351
+ base_url: Optional[str],
352
+ messages: Optional[List[Dict[str, str]]] = None,
353
+ **kwargs,
354
+ ) -> str:
355
+ """Anthropic (Claude) API completion."""
356
+ api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
357
+ if not api_key:
358
+ raise LLMAuthenticationError("Anthropic API key is missing.", provider="anthropic")
359
+
360
+ # Build URL using unified utility
361
+ effective_base = base_url or "https://api.anthropic.com/v1"
362
+ url = build_chat_url(effective_base, binding="anthropic")
363
+
364
+ # Build headers using unified utility
365
+ headers = build_auth_headers(api_key, binding="anthropic")
366
+
367
+ # Build messages - handle pre-built messages array
368
+ if messages:
369
+ # Filter out system messages for Anthropic (system is a separate parameter)
370
+ msg_list = [m for m in messages if m.get("role") != "system"]
371
+ system_content = next(
372
+ (m["content"] for m in messages if m.get("role") == "system"),
373
+ system_prompt,
374
+ )
375
+ else:
376
+ msg_list = [{"role": "user", "content": prompt}]
377
+ system_content = system_prompt
378
+
379
+ data = {
380
+ "model": model,
381
+ "system": system_content,
382
+ "messages": msg_list,
383
+ "max_tokens": kwargs.get("max_tokens", 4096),
384
+ "temperature": kwargs.get("temperature", 0.7),
385
+ }
386
+
387
+ timeout = aiohttp.ClientTimeout(total=120)
388
+ async with aiohttp.ClientSession(timeout=timeout) as session:
389
+ async with session.post(url, headers=headers, json=data) as response:
390
+ if response.status != 200:
391
+ error_text = await response.text()
392
+ raise LLMAPIError(
393
+ f"Anthropic API error: {error_text}",
394
+ status_code=response.status,
395
+ provider="anthropic",
396
+ )
397
+
398
+ result = await response.json()
399
+ return result["content"][0]["text"]
400
+
401
+
402
+ async def _anthropic_stream(
403
+ model: str,
404
+ prompt: str,
405
+ system_prompt: str,
406
+ api_key: Optional[str],
407
+ base_url: Optional[str],
408
+ messages: Optional[List[Dict[str, str]]] = None,
409
+ **kwargs,
410
+ ) -> AsyncGenerator[str, None]:
411
+ """Anthropic (Claude) API streaming."""
412
+ import json
413
+
414
+ api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
415
+ if not api_key:
416
+ raise LLMAuthenticationError("Anthropic API key is missing.", provider="anthropic")
417
+
418
+ # Build URL using unified utility
419
+ effective_base = base_url or "https://api.anthropic.com/v1"
420
+ url = build_chat_url(effective_base, binding="anthropic")
421
+
422
+ # Build headers using unified utility
423
+ headers = build_auth_headers(api_key, binding="anthropic")
424
+
425
+ # Build messages
426
+ if messages:
427
+ # Filter out system messages for Anthropic
428
+ msg_list = [m for m in messages if m.get("role") != "system"]
429
+ system_content = next(
430
+ (m["content"] for m in messages if m.get("role") == "system"),
431
+ system_prompt,
432
+ )
433
+ else:
434
+ msg_list = [{"role": "user", "content": prompt}]
435
+ system_content = system_prompt
436
+
437
+ data = {
438
+ "model": model,
439
+ "system": system_content,
440
+ "messages": msg_list,
441
+ "max_tokens": kwargs.get("max_tokens", 4096),
442
+ "temperature": kwargs.get("temperature", 0.7),
443
+ "stream": True,
444
+ }
445
+
446
+ timeout = aiohttp.ClientTimeout(total=300)
447
+ async with aiohttp.ClientSession(timeout=timeout) as session:
448
+ async with session.post(url, headers=headers, json=data) as response:
449
+ if response.status != 200:
450
+ error_text = await response.text()
451
+ raise LLMAPIError(
452
+ f"Anthropic stream error: {error_text}",
453
+ status_code=response.status,
454
+ provider="anthropic",
455
+ )
456
+
457
+ async for line in response.content:
458
+ line_str = line.decode("utf-8").strip()
459
+ if not line_str or not line_str.startswith("data:"):
460
+ continue
461
+
462
+ data_str = line_str[5:].strip()
463
+ if not data_str:
464
+ continue
465
+
466
+ try:
467
+ chunk_data = json.loads(data_str)
468
+ event_type = chunk_data.get("type")
469
+ if event_type == "content_block_delta":
470
+ delta = chunk_data.get("delta", {})
471
+ text = delta.get("text")
472
+ if text:
473
+ yield text
474
+ except json.JSONDecodeError:
475
+ continue
476
+
477
+
478
+ async def fetch_models(
479
+ base_url: str,
480
+ api_key: Optional[str] = None,
481
+ binding: str = "openai",
482
+ ) -> List[str]:
483
+ """
484
+ Fetch available models from cloud provider.
485
+
486
+ Args:
487
+ base_url: API endpoint URL
488
+ api_key: API key
489
+ binding: Provider type (openai, anthropic)
490
+
491
+ Returns:
492
+ List of available model names
493
+ """
494
+ binding = binding.lower()
495
+ base_url = base_url.rstrip("/")
496
+
497
+ # Build headers using unified utility
498
+ headers = build_auth_headers(api_key, binding)
499
+ # Remove Content-Type for GET request
500
+ headers.pop("Content-Type", None)
501
+
502
+ timeout = aiohttp.ClientTimeout(total=30)
503
+ async with aiohttp.ClientSession(timeout=timeout) as session:
504
+ try:
505
+ url = f"{base_url}/models"
506
+ async with session.get(url, headers=headers) as resp:
507
+ if resp.status == 200:
508
+ data = await resp.json()
509
+ if "data" in data and isinstance(data["data"], list):
510
+ return [
511
+ m.get("id") or m.get("name")
512
+ for m in data["data"]
513
+ if m.get("id") or m.get("name")
514
+ ]
515
+ elif isinstance(data, list):
516
+ return [
517
+ m.get("id") or m.get("name") if isinstance(m, dict) else str(m)
518
+ for m in data
519
+ ]
520
+ return []
521
+ except Exception as e:
522
+ print(f"Error fetching models from {base_url}: {e}")
523
+ return []
524
+
525
+
526
+ __all__ = [
527
+ "complete",
528
+ "stream",
529
+ "fetch_models",
530
+ ]