realtimex-deeptutor 0.5.0.post1__py3-none-any.whl → 0.5.0.post3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/METADATA +24 -17
- {realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/RECORD +143 -123
- {realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/WHEEL +1 -1
- realtimex_deeptutor-0.5.0.post3.dist-info/entry_points.txt +4 -0
- {realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/top_level.txt +1 -0
- scripts/__init__.py +1 -0
- scripts/audit_prompts.py +179 -0
- scripts/check_install.py +460 -0
- scripts/generate_roster.py +327 -0
- scripts/install_all.py +653 -0
- scripts/migrate_kb.py +655 -0
- scripts/start.py +807 -0
- scripts/start_web.py +632 -0
- scripts/sync_prompts_from_en.py +147 -0
- src/__init__.py +2 -2
- src/agents/ideagen/material_organizer_agent.py +2 -0
- src/agents/solve/__init__.py +6 -0
- src/agents/solve/main_solver.py +9 -0
- src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +9 -7
- src/agents/solve/session_manager.py +345 -0
- src/api/main.py +14 -0
- src/api/routers/chat.py +3 -3
- src/api/routers/co_writer.py +12 -7
- src/api/routers/config.py +1 -0
- src/api/routers/guide.py +3 -1
- src/api/routers/ideagen.py +7 -0
- src/api/routers/knowledge.py +64 -12
- src/api/routers/question.py +2 -0
- src/api/routers/realtimex.py +137 -0
- src/api/routers/research.py +9 -0
- src/api/routers/solve.py +120 -2
- src/cli/__init__.py +13 -0
- src/cli/start.py +209 -0
- src/config/constants.py +11 -9
- src/knowledge/add_documents.py +453 -213
- src/knowledge/extract_numbered_items.py +9 -10
- src/knowledge/initializer.py +102 -101
- src/knowledge/manager.py +251 -74
- src/knowledge/progress_tracker.py +43 -2
- src/knowledge/start_kb.py +11 -2
- src/logging/__init__.py +5 -0
- src/logging/adapters/__init__.py +1 -0
- src/logging/adapters/lightrag.py +25 -18
- src/logging/adapters/llamaindex.py +1 -0
- src/logging/config.py +30 -27
- src/logging/handlers/__init__.py +1 -0
- src/logging/handlers/console.py +7 -50
- src/logging/handlers/file.py +5 -20
- src/logging/handlers/websocket.py +23 -19
- src/logging/logger.py +161 -126
- src/logging/stats/__init__.py +1 -0
- src/logging/stats/llm_stats.py +37 -17
- src/services/__init__.py +17 -1
- src/services/config/__init__.py +1 -0
- src/services/config/knowledge_base_config.py +1 -0
- src/services/config/loader.py +1 -1
- src/services/config/unified_config.py +211 -4
- src/services/embedding/__init__.py +1 -0
- src/services/embedding/adapters/__init__.py +3 -0
- src/services/embedding/adapters/base.py +1 -0
- src/services/embedding/adapters/cohere.py +1 -0
- src/services/embedding/adapters/jina.py +1 -0
- src/services/embedding/adapters/ollama.py +1 -0
- src/services/embedding/adapters/openai_compatible.py +1 -0
- src/services/embedding/adapters/realtimex.py +125 -0
- src/services/embedding/client.py +27 -0
- src/services/embedding/config.py +3 -0
- src/services/embedding/provider.py +1 -0
- src/services/llm/__init__.py +17 -3
- src/services/llm/capabilities.py +47 -0
- src/services/llm/client.py +32 -0
- src/services/llm/cloud_provider.py +21 -4
- src/services/llm/config.py +36 -2
- src/services/llm/error_mapping.py +1 -0
- src/services/llm/exceptions.py +30 -0
- src/services/llm/factory.py +55 -16
- src/services/llm/local_provider.py +1 -0
- src/services/llm/providers/anthropic.py +1 -0
- src/services/llm/providers/base_provider.py +1 -0
- src/services/llm/providers/open_ai.py +1 -0
- src/services/llm/realtimex_provider.py +240 -0
- src/services/llm/registry.py +1 -0
- src/services/llm/telemetry.py +1 -0
- src/services/llm/types.py +1 -0
- src/services/llm/utils.py +1 -0
- src/services/prompt/__init__.py +1 -0
- src/services/prompt/manager.py +3 -2
- src/services/rag/__init__.py +27 -5
- src/services/rag/components/__init__.py +1 -0
- src/services/rag/components/base.py +1 -0
- src/services/rag/components/chunkers/__init__.py +1 -0
- src/services/rag/components/chunkers/base.py +1 -0
- src/services/rag/components/chunkers/fixed.py +1 -0
- src/services/rag/components/chunkers/numbered_item.py +1 -0
- src/services/rag/components/chunkers/semantic.py +1 -0
- src/services/rag/components/embedders/__init__.py +1 -0
- src/services/rag/components/embedders/base.py +1 -0
- src/services/rag/components/embedders/openai.py +1 -0
- src/services/rag/components/indexers/__init__.py +1 -0
- src/services/rag/components/indexers/base.py +1 -0
- src/services/rag/components/indexers/graph.py +5 -44
- src/services/rag/components/indexers/lightrag.py +5 -44
- src/services/rag/components/indexers/vector.py +1 -0
- src/services/rag/components/parsers/__init__.py +1 -0
- src/services/rag/components/parsers/base.py +1 -0
- src/services/rag/components/parsers/markdown.py +1 -0
- src/services/rag/components/parsers/pdf.py +1 -0
- src/services/rag/components/parsers/text.py +1 -0
- src/services/rag/components/retrievers/__init__.py +1 -0
- src/services/rag/components/retrievers/base.py +1 -0
- src/services/rag/components/retrievers/dense.py +1 -0
- src/services/rag/components/retrievers/hybrid.py +5 -44
- src/services/rag/components/retrievers/lightrag.py +5 -44
- src/services/rag/components/routing.py +48 -0
- src/services/rag/factory.py +112 -46
- src/services/rag/pipeline.py +1 -0
- src/services/rag/pipelines/__init__.py +27 -18
- src/services/rag/pipelines/lightrag.py +1 -0
- src/services/rag/pipelines/llamaindex.py +99 -0
- src/services/rag/pipelines/raganything.py +67 -100
- src/services/rag/pipelines/raganything_docling.py +368 -0
- src/services/rag/service.py +5 -12
- src/services/rag/types.py +1 -0
- src/services/rag/utils/__init__.py +17 -0
- src/services/rag/utils/image_migration.py +279 -0
- src/services/search/__init__.py +1 -0
- src/services/search/base.py +1 -0
- src/services/search/consolidation.py +1 -0
- src/services/search/providers/__init__.py +1 -0
- src/services/search/providers/baidu.py +1 -0
- src/services/search/providers/exa.py +1 -0
- src/services/search/providers/jina.py +1 -0
- src/services/search/providers/perplexity.py +1 -0
- src/services/search/providers/serper.py +1 -0
- src/services/search/providers/tavily.py +1 -0
- src/services/search/types.py +1 -0
- src/services/settings/__init__.py +1 -0
- src/services/settings/interface_settings.py +78 -0
- src/services/setup/__init__.py +1 -0
- src/services/tts/__init__.py +1 -0
- src/services/tts/config.py +1 -0
- src/utils/realtimex.py +284 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +0 -2
- src/services/rag/pipelines/academic.py +0 -44
- {realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/licenses/LICENSE +0 -0
src/services/llm/capabilities.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
1
2
|
"""
|
|
2
3
|
Provider Capabilities
|
|
3
4
|
=====================
|
|
@@ -120,6 +121,14 @@ PROVIDER_CAPABILITIES: dict[str, dict[str, Any]] = {
|
|
|
120
121
|
"supports_tools": False,
|
|
121
122
|
"system_in_messages": True,
|
|
122
123
|
},
|
|
124
|
+
# RealTimeX SDK (proxy to configured providers)
|
|
125
|
+
"realtimex": {
|
|
126
|
+
"supports_response_format": True, # Proxied to underlying provider
|
|
127
|
+
"supports_streaming": True, # SDK supports chat_stream()
|
|
128
|
+
"supports_tools": True, # Depends on underlying provider
|
|
129
|
+
"system_in_messages": True, # SDK uses messages array format
|
|
130
|
+
"is_proxy": True, # SDK proxies to actual LLM providers
|
|
131
|
+
},
|
|
123
132
|
}
|
|
124
133
|
|
|
125
134
|
# Default capabilities for unknown providers (assume OpenAI-compatible)
|
|
@@ -129,6 +138,7 @@ DEFAULT_CAPABILITIES: dict[str, Any] = {
|
|
|
129
138
|
"supports_tools": False,
|
|
130
139
|
"system_in_messages": True,
|
|
131
140
|
"has_thinking_tags": False,
|
|
141
|
+
"forced_temperature": None, # None means no forced value, use requested temperature
|
|
132
142
|
}
|
|
133
143
|
|
|
134
144
|
# Model-specific overrides
|
|
@@ -161,6 +171,17 @@ MODEL_OVERRIDES: dict[str, dict[str, Any]] = {
|
|
|
161
171
|
"supports_response_format": False,
|
|
162
172
|
"system_in_messages": False,
|
|
163
173
|
},
|
|
174
|
+
# Reasoning models - only support temperature=1.0
|
|
175
|
+
# See: https://github.com/HKUDS/DeepTutor/issues/141
|
|
176
|
+
"gpt-5": {
|
|
177
|
+
"forced_temperature": 1.0,
|
|
178
|
+
},
|
|
179
|
+
"o1": {
|
|
180
|
+
"forced_temperature": 1.0,
|
|
181
|
+
},
|
|
182
|
+
"o3": {
|
|
183
|
+
"forced_temperature": 1.0,
|
|
184
|
+
},
|
|
164
185
|
}
|
|
165
186
|
|
|
166
187
|
|
|
@@ -299,6 +320,31 @@ def requires_api_version(binding: str, model: Optional[str] = None) -> bool:
|
|
|
299
320
|
return get_capability(binding, "requires_api_version", model, default=False)
|
|
300
321
|
|
|
301
322
|
|
|
323
|
+
def get_effective_temperature(
|
|
324
|
+
binding: str,
|
|
325
|
+
model: Optional[str] = None,
|
|
326
|
+
requested_temp: float = 0.7,
|
|
327
|
+
) -> float:
|
|
328
|
+
"""
|
|
329
|
+
Get the effective temperature value for a model.
|
|
330
|
+
|
|
331
|
+
Some models (e.g., o1, o3, gpt-5) only support a fixed temperature value (1.0).
|
|
332
|
+
This function returns the forced temperature if defined, otherwise the requested value.
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
binding: Provider binding name
|
|
336
|
+
model: Optional model name for model-specific overrides
|
|
337
|
+
requested_temp: The temperature value requested by the caller (default: 0.7)
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
The effective temperature to use for the API call
|
|
341
|
+
"""
|
|
342
|
+
forced_temp = get_capability(binding, "forced_temperature", model)
|
|
343
|
+
if forced_temp is not None:
|
|
344
|
+
return forced_temp
|
|
345
|
+
return requested_temp
|
|
346
|
+
|
|
347
|
+
|
|
302
348
|
__all__ = [
|
|
303
349
|
"PROVIDER_CAPABILITIES",
|
|
304
350
|
"MODEL_OVERRIDES",
|
|
@@ -310,4 +356,5 @@ __all__ = [
|
|
|
310
356
|
"has_thinking_tags",
|
|
311
357
|
"supports_tools",
|
|
312
358
|
"requires_api_version",
|
|
359
|
+
"get_effective_temperature",
|
|
313
360
|
]
|
src/services/llm/client.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
1
2
|
"""
|
|
2
3
|
LLM Client
|
|
3
4
|
==========
|
|
@@ -31,9 +32,38 @@ class LLMClient:
|
|
|
31
32
|
Args:
|
|
32
33
|
config: LLM configuration. If None, loads from environment.
|
|
33
34
|
"""
|
|
35
|
+
|
|
34
36
|
self.config = config or get_llm_config()
|
|
35
37
|
self.logger = get_logger("LLMClient")
|
|
36
38
|
|
|
39
|
+
# Set environment variables for LightRAG compatibility
|
|
40
|
+
# LightRAG's internal functions (openai_complete_if_cache, etc.) read from
|
|
41
|
+
# os.environ["OPENAI_API_KEY"] even when api_key is passed as parameter.
|
|
42
|
+
# We must set these env vars early to ensure all LightRAG operations work.
|
|
43
|
+
self._setup_openai_env_vars()
|
|
44
|
+
|
|
45
|
+
def _setup_openai_env_vars(self):
|
|
46
|
+
"""
|
|
47
|
+
Set OpenAI environment variables for LightRAG compatibility.
|
|
48
|
+
|
|
49
|
+
LightRAG's internal functions read from os.environ["OPENAI_API_KEY"]
|
|
50
|
+
even when api_key is passed as parameter. This method ensures the
|
|
51
|
+
environment variables are set for all LightRAG operations.
|
|
52
|
+
"""
|
|
53
|
+
import os
|
|
54
|
+
|
|
55
|
+
binding = getattr(self.config, "binding", "openai")
|
|
56
|
+
|
|
57
|
+
# Only set env vars for OpenAI-compatible bindings
|
|
58
|
+
if binding in ("openai", "azure_openai", "gemini"):
|
|
59
|
+
if self.config.api_key:
|
|
60
|
+
os.environ["OPENAI_API_KEY"] = self.config.api_key
|
|
61
|
+
self.logger.debug("Set OPENAI_API_KEY env var for LightRAG compatibility")
|
|
62
|
+
|
|
63
|
+
if self.config.base_url:
|
|
64
|
+
os.environ["OPENAI_BASE_URL"] = self.config.base_url
|
|
65
|
+
self.logger.debug(f"Set OPENAI_BASE_URL env var to {self.config.base_url}")
|
|
66
|
+
|
|
37
67
|
async def complete(
|
|
38
68
|
self,
|
|
39
69
|
prompt: str,
|
|
@@ -128,6 +158,7 @@ class LLMClient:
|
|
|
128
158
|
return llm_model_func_via_factory
|
|
129
159
|
|
|
130
160
|
# OpenAI-compatible bindings use lightrag (has caching)
|
|
161
|
+
# Note: Environment variables are already set in __init__ via _setup_openai_env_vars()
|
|
131
162
|
from lightrag.llm.openai import openai_complete_if_cache
|
|
132
163
|
|
|
133
164
|
def llm_model_func(
|
|
@@ -196,6 +227,7 @@ class LLMClient:
|
|
|
196
227
|
return vision_model_func_via_factory
|
|
197
228
|
|
|
198
229
|
# OpenAI-compatible bindings
|
|
230
|
+
# Note: Environment variables are already set in __init__ via _setup_openai_env_vars()
|
|
199
231
|
from lightrag.llm.openai import openai_complete_if_cache
|
|
200
232
|
|
|
201
233
|
# Get api_version once for reuse
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
1
2
|
"""
|
|
2
3
|
Cloud LLM Provider
|
|
3
4
|
==================
|
|
@@ -11,14 +12,27 @@ import os
|
|
|
11
12
|
from typing import AsyncGenerator, Dict, List, Optional
|
|
12
13
|
|
|
13
14
|
import aiohttp
|
|
14
|
-
from lightrag.llm.openai import openai_complete_if_cache
|
|
15
15
|
|
|
16
16
|
# Get loggers for suppression during fallback scenarios
|
|
17
17
|
# (lightrag logs errors internally before raising exceptions)
|
|
18
18
|
_lightrag_logger = logging.getLogger("lightrag")
|
|
19
19
|
_openai_logger = logging.getLogger("openai")
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
# Lazy import for lightrag to avoid import errors when not installed
|
|
22
|
+
_openai_complete_if_cache = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _get_openai_complete_if_cache():
|
|
26
|
+
"""Lazy load openai_complete_if_cache from lightrag."""
|
|
27
|
+
global _openai_complete_if_cache
|
|
28
|
+
if _openai_complete_if_cache is None:
|
|
29
|
+
from lightrag.llm.openai import openai_complete_if_cache
|
|
30
|
+
|
|
31
|
+
_openai_complete_if_cache = openai_complete_if_cache
|
|
32
|
+
return _openai_complete_if_cache
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
from .capabilities import get_effective_temperature, supports_response_format
|
|
22
36
|
from .config import get_token_limit_kwargs
|
|
23
37
|
from .exceptions import LLMAPIError, LLMAuthenticationError, LLMConfigError
|
|
24
38
|
from .utils import (
|
|
@@ -182,6 +196,7 @@ async def _openai_complete(
|
|
|
182
196
|
_openai_logger.setLevel(logging.CRITICAL)
|
|
183
197
|
try:
|
|
184
198
|
# model and prompt must be positional arguments
|
|
199
|
+
openai_complete_if_cache = _get_openai_complete_if_cache()
|
|
185
200
|
content = await openai_complete_if_cache(model, prompt, **lightrag_kwargs)
|
|
186
201
|
finally:
|
|
187
202
|
_lightrag_logger.setLevel(original_lightrag_level)
|
|
@@ -203,7 +218,9 @@ async def _openai_complete(
|
|
|
203
218
|
{"role": "system", "content": system_prompt},
|
|
204
219
|
{"role": "user", "content": prompt},
|
|
205
220
|
],
|
|
206
|
-
"temperature":
|
|
221
|
+
"temperature": get_effective_temperature(
|
|
222
|
+
binding, model, kwargs.get("temperature", 0.7)
|
|
223
|
+
),
|
|
207
224
|
}
|
|
208
225
|
|
|
209
226
|
# Handle max_tokens / max_completion_tokens based on model
|
|
@@ -279,7 +296,7 @@ async def _openai_stream(
|
|
|
279
296
|
data = {
|
|
280
297
|
"model": model,
|
|
281
298
|
"messages": msg_list,
|
|
282
|
-
"temperature": kwargs.get("temperature", 0.7),
|
|
299
|
+
"temperature": get_effective_temperature(binding, model, kwargs.get("temperature", 0.7)),
|
|
283
300
|
"stream": True,
|
|
284
301
|
}
|
|
285
302
|
|
src/services/llm/config.py
CHANGED
|
@@ -26,6 +26,37 @@ load_dotenv(PROJECT_ROOT / "DeepTutor.env", override=False)
|
|
|
26
26
|
load_dotenv(PROJECT_ROOT / ".env", override=False)
|
|
27
27
|
|
|
28
28
|
|
|
29
|
+
def _setup_openai_env_vars_early():
|
|
30
|
+
"""
|
|
31
|
+
Set OPENAI_API_KEY environment variable early for LightRAG compatibility.
|
|
32
|
+
|
|
33
|
+
LightRAG's internal functions (e.g., create_openai_async_client) read directly
|
|
34
|
+
from os.environ["OPENAI_API_KEY"] instead of using the api_key parameter.
|
|
35
|
+
This function ensures the environment variable is set as soon as this module
|
|
36
|
+
is imported, before any LightRAG operations can occur.
|
|
37
|
+
|
|
38
|
+
This is called at module load time to ensure env vars are set before any
|
|
39
|
+
RAG operations, including those in worker threads/processes.
|
|
40
|
+
"""
|
|
41
|
+
binding = os.getenv("LLM_BINDING", "openai")
|
|
42
|
+
api_key = os.getenv("LLM_API_KEY")
|
|
43
|
+
base_url = os.getenv("LLM_HOST")
|
|
44
|
+
|
|
45
|
+
# Only set env vars for OpenAI-compatible bindings
|
|
46
|
+
if binding in ("openai", "azure_openai", "gemini"):
|
|
47
|
+
if api_key and not os.getenv("OPENAI_API_KEY"):
|
|
48
|
+
os.environ["OPENAI_API_KEY"] = api_key
|
|
49
|
+
logger.debug("Set OPENAI_API_KEY env var for LightRAG compatibility (early init)")
|
|
50
|
+
|
|
51
|
+
if base_url and not os.getenv("OPENAI_BASE_URL"):
|
|
52
|
+
os.environ["OPENAI_BASE_URL"] = base_url
|
|
53
|
+
logger.debug(f"Set OPENAI_BASE_URL env var to {base_url} (early init)")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# Execute early setup at module import time
|
|
57
|
+
_setup_openai_env_vars_early()
|
|
58
|
+
|
|
59
|
+
|
|
29
60
|
@dataclass
|
|
30
61
|
class LLMConfig:
|
|
31
62
|
"""LLM configuration dataclass."""
|
|
@@ -37,6 +68,7 @@ class LLMConfig:
|
|
|
37
68
|
api_version: Optional[str] = None
|
|
38
69
|
max_tokens: int = 4096
|
|
39
70
|
temperature: float = 0.7
|
|
71
|
+
source: Optional[str] = None # "realtimex" when using RTX SDK
|
|
40
72
|
|
|
41
73
|
|
|
42
74
|
def _strip_value(value: Optional[str]) -> Optional[str]:
|
|
@@ -95,10 +127,11 @@ def get_llm_config() -> LLMConfig:
|
|
|
95
127
|
if config:
|
|
96
128
|
return LLMConfig(
|
|
97
129
|
binding=config.get("provider", "openai"),
|
|
98
|
-
model=config
|
|
130
|
+
model=config.get("model", ""),
|
|
99
131
|
api_key=config.get("api_key", ""),
|
|
100
132
|
base_url=config.get("base_url"),
|
|
101
133
|
api_version=config.get("api_version"),
|
|
134
|
+
source=config.get("source"), # "realtimex" when using RTX
|
|
102
135
|
)
|
|
103
136
|
except ImportError:
|
|
104
137
|
# Unified config service not yet available, fall back to env
|
|
@@ -128,10 +161,11 @@ async def get_llm_config_async() -> LLMConfig:
|
|
|
128
161
|
if config:
|
|
129
162
|
return LLMConfig(
|
|
130
163
|
binding=config.get("provider", "openai"),
|
|
131
|
-
model=config
|
|
164
|
+
model=config.get("model", ""),
|
|
132
165
|
api_key=config.get("api_key", ""),
|
|
133
166
|
base_url=config.get("base_url"),
|
|
134
167
|
api_version=config.get("api_version"),
|
|
168
|
+
source=config.get("source"), # "realtimex" when using RTX
|
|
135
169
|
)
|
|
136
170
|
except ImportError:
|
|
137
171
|
pass
|
src/services/llm/exceptions.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
1
2
|
"""
|
|
2
3
|
LLM Service Exceptions
|
|
3
4
|
======================
|
|
@@ -137,6 +138,33 @@ class ProviderContextWindowError(LLMAPIError):
|
|
|
137
138
|
pass
|
|
138
139
|
|
|
139
140
|
|
|
141
|
+
# RealTimeX-specific exceptions (for SDK integration)
|
|
142
|
+
class RealTimeXError(LLMAPIError):
|
|
143
|
+
"""
|
|
144
|
+
Base exception for RealTimeX SDK errors.
|
|
145
|
+
|
|
146
|
+
Raised when interacting with RealTimeX SDK proxy fails.
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
def __init__(self, message: str, error_code: Optional[str] = None):
|
|
150
|
+
super().__init__(message, error_code)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class RealTimeXPermissionError(LLMAuthenticationError):
|
|
154
|
+
"""
|
|
155
|
+
Exception raised when RealTimeX SDK permission is required or denied.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
permission: The permission that was required (e.g., 'llm.chat', 'llm.embed')
|
|
159
|
+
message: Optional custom error message
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
def __init__(self, permission: str, message: Optional[str] = None):
|
|
163
|
+
self.permission = permission
|
|
164
|
+
msg = message or f"RealTimeX permission required: {permission}"
|
|
165
|
+
super().__init__(msg)
|
|
166
|
+
|
|
167
|
+
|
|
140
168
|
__all__ = [
|
|
141
169
|
"LLMError",
|
|
142
170
|
"LLMConfigError",
|
|
@@ -149,4 +177,6 @@ __all__ = [
|
|
|
149
177
|
"LLMParseError",
|
|
150
178
|
"ProviderQuotaExceededError",
|
|
151
179
|
"ProviderContextWindowError",
|
|
180
|
+
"RealTimeXError",
|
|
181
|
+
"RealTimeXPermissionError",
|
|
152
182
|
]
|
src/services/llm/factory.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
1
2
|
"""
|
|
2
3
|
LLM Factory - Central Hub for LLM Calls
|
|
3
4
|
=======================================
|
|
@@ -20,9 +21,10 @@ CloudProvider LocalProvider
|
|
|
20
21
|
↓ ↓
|
|
21
22
|
OpenAI/DeepSeek/etc LM Studio/Ollama/etc
|
|
22
23
|
|
|
23
|
-
Routing:
|
|
24
|
-
|
|
25
|
-
|
|
24
|
+
Routing (Priority Order):
|
|
25
|
+
1. RealTimeX SDK (if RTX_APP_ID detected)
|
|
26
|
+
2. Local Provider (for localhost/127.0.0.1 URLs)
|
|
27
|
+
3. Cloud Provider (default)
|
|
26
28
|
|
|
27
29
|
Retry Mechanism:
|
|
28
30
|
- Automatic retry with exponential backoff for transient errors
|
|
@@ -36,6 +38,7 @@ from typing import Any, AsyncGenerator, Dict, List, Optional
|
|
|
36
38
|
import tenacity
|
|
37
39
|
|
|
38
40
|
from src.logging.logger import get_logger
|
|
41
|
+
from src.utils.realtimex import should_use_realtimex_sdk
|
|
39
42
|
|
|
40
43
|
from . import cloud_provider, local_provider
|
|
41
44
|
from .config import get_llm_config
|
|
@@ -51,8 +54,8 @@ from .utils import is_local_llm_server
|
|
|
51
54
|
logger = get_logger("LLMFactory")
|
|
52
55
|
|
|
53
56
|
# Default retry configuration
|
|
54
|
-
DEFAULT_MAX_RETRIES =
|
|
55
|
-
DEFAULT_RETRY_DELAY =
|
|
57
|
+
DEFAULT_MAX_RETRIES = 5 # Increased for complex agents like Research
|
|
58
|
+
DEFAULT_RETRY_DELAY = 2.0 # seconds
|
|
56
59
|
DEFAULT_EXPONENTIAL_BACKOFF = True
|
|
57
60
|
|
|
58
61
|
|
|
@@ -141,8 +144,8 @@ async def complete(
|
|
|
141
144
|
api_version: API version for Azure OpenAI (optional)
|
|
142
145
|
binding: Provider binding type (optional)
|
|
143
146
|
messages: Pre-built messages array (optional)
|
|
144
|
-
max_retries: Maximum number of retry attempts (default:
|
|
145
|
-
retry_delay: Initial delay between retries in seconds (default:
|
|
147
|
+
max_retries: Maximum number of retry attempts (default: 5)
|
|
148
|
+
retry_delay: Initial delay between retries in seconds (default: 2.0)
|
|
146
149
|
exponential_backoff: Whether to use exponential backoff (default: True)
|
|
147
150
|
**kwargs: Additional parameters (temperature, max_tokens, etc.)
|
|
148
151
|
|
|
@@ -150,6 +153,7 @@ async def complete(
|
|
|
150
153
|
str: The LLM response
|
|
151
154
|
"""
|
|
152
155
|
# Get config if parameters not provided
|
|
156
|
+
config = None
|
|
153
157
|
if not model or not base_url:
|
|
154
158
|
config = get_llm_config()
|
|
155
159
|
model = model or config.model
|
|
@@ -158,7 +162,17 @@ async def complete(
|
|
|
158
162
|
api_version = api_version or config.api_version
|
|
159
163
|
binding = binding or config.binding or "openai"
|
|
160
164
|
|
|
161
|
-
#
|
|
165
|
+
# ROUTING LOGIC (Priority order):
|
|
166
|
+
|
|
167
|
+
# 1. RealTimeX SDK (when active config has source="realtimex")
|
|
168
|
+
if config and getattr(config, "source", None) == "realtimex" and should_use_realtimex_sdk():
|
|
169
|
+
from . import realtimex_provider
|
|
170
|
+
|
|
171
|
+
return await realtimex_provider.complete(
|
|
172
|
+
prompt=prompt, system_prompt=system_prompt, model=model, messages=messages, **kwargs
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# 2. Determine which provider to use (local vs cloud)
|
|
162
176
|
use_local = _should_use_local(base_url)
|
|
163
177
|
|
|
164
178
|
# Define helper to determine if a generic LLMAPIError is retriable
|
|
@@ -189,6 +203,9 @@ async def complete(
|
|
|
189
203
|
|
|
190
204
|
return False
|
|
191
205
|
|
|
206
|
+
# Calculate total attempts for logging (1 initial + max_retries)
|
|
207
|
+
total_attempts = max_retries + 1
|
|
208
|
+
|
|
192
209
|
# Define the actual completion function with tenacity retry
|
|
193
210
|
@tenacity.retry(
|
|
194
211
|
retry=(
|
|
@@ -196,10 +213,10 @@ async def complete(
|
|
|
196
213
|
| tenacity.retry_if_exception_type(LLMTimeoutError)
|
|
197
214
|
| tenacity.retry_if_exception(_is_retriable_llm_api_error)
|
|
198
215
|
),
|
|
199
|
-
wait=tenacity.wait_exponential(multiplier=retry_delay, min=retry_delay, max=
|
|
200
|
-
stop=tenacity.stop_after_attempt(
|
|
216
|
+
wait=tenacity.wait_exponential(multiplier=retry_delay, min=retry_delay, max=120),
|
|
217
|
+
stop=tenacity.stop_after_attempt(total_attempts),
|
|
201
218
|
before_sleep=lambda retry_state: logger.warning(
|
|
202
|
-
f"LLM call failed (attempt {retry_state.attempt_number}/{
|
|
219
|
+
f"LLM call failed (attempt {retry_state.attempt_number}/{total_attempts}), "
|
|
203
220
|
f"retrying in {retry_state.upcoming_sleep:.1f}s... Error: {str(retry_state.outcome.exception())}"
|
|
204
221
|
),
|
|
205
222
|
)
|
|
@@ -268,8 +285,8 @@ async def stream(
|
|
|
268
285
|
api_version: API version for Azure OpenAI (optional)
|
|
269
286
|
binding: Provider binding type (optional)
|
|
270
287
|
messages: Pre-built messages array (optional)
|
|
271
|
-
max_retries: Maximum number of retry attempts (default:
|
|
272
|
-
retry_delay: Initial delay between retries in seconds (default:
|
|
288
|
+
max_retries: Maximum number of retry attempts (default: 5)
|
|
289
|
+
retry_delay: Initial delay between retries in seconds (default: 2.0)
|
|
273
290
|
exponential_backoff: Whether to use exponential backoff (default: True)
|
|
274
291
|
**kwargs: Additional parameters (temperature, max_tokens, etc.)
|
|
275
292
|
|
|
@@ -277,6 +294,7 @@ async def stream(
|
|
|
277
294
|
str: Response chunks
|
|
278
295
|
"""
|
|
279
296
|
# Get config if parameters not provided
|
|
297
|
+
config = None
|
|
280
298
|
if not model or not base_url:
|
|
281
299
|
config = get_llm_config()
|
|
282
300
|
model = model or config.model
|
|
@@ -285,7 +303,19 @@ async def stream(
|
|
|
285
303
|
api_version = api_version or config.api_version
|
|
286
304
|
binding = binding or config.binding or "openai"
|
|
287
305
|
|
|
288
|
-
#
|
|
306
|
+
# ROUTING LOGIC (Priority order):
|
|
307
|
+
|
|
308
|
+
# 1. RealTimeX SDK (when active config has source="realtimex")
|
|
309
|
+
if config and getattr(config, "source", None) == "realtimex" and should_use_realtimex_sdk():
|
|
310
|
+
from . import realtimex_provider
|
|
311
|
+
|
|
312
|
+
async for chunk in realtimex_provider.stream(
|
|
313
|
+
prompt=prompt, system_prompt=system_prompt, model=model, messages=messages, **kwargs
|
|
314
|
+
):
|
|
315
|
+
yield chunk
|
|
316
|
+
return
|
|
317
|
+
|
|
318
|
+
# 2. Determine which provider to use (local vs cloud)
|
|
289
319
|
use_local = _should_use_local(base_url)
|
|
290
320
|
|
|
291
321
|
# Build call kwargs
|
|
@@ -305,10 +335,13 @@ async def stream(
|
|
|
305
335
|
call_kwargs["binding"] = binding or "openai"
|
|
306
336
|
|
|
307
337
|
# Retry logic for streaming (retry on connection errors)
|
|
338
|
+
# Total attempts = 1 initial + max_retries
|
|
339
|
+
total_attempts = max_retries + 1
|
|
308
340
|
last_exception = None
|
|
309
341
|
delay = retry_delay
|
|
342
|
+
max_delay = 120 # Cap maximum delay at 120 seconds (consistent with complete())
|
|
310
343
|
|
|
311
|
-
for attempt in range(
|
|
344
|
+
for attempt in range(total_attempts):
|
|
312
345
|
try:
|
|
313
346
|
# Route to appropriate provider
|
|
314
347
|
if use_local:
|
|
@@ -328,7 +361,7 @@ async def stream(
|
|
|
328
361
|
|
|
329
362
|
# Calculate delay for next attempt
|
|
330
363
|
if exponential_backoff:
|
|
331
|
-
current_delay = delay * (2**attempt)
|
|
364
|
+
current_delay = min(delay * (2**attempt), max_delay)
|
|
332
365
|
else:
|
|
333
366
|
current_delay = delay
|
|
334
367
|
|
|
@@ -336,6 +369,12 @@ async def stream(
|
|
|
336
369
|
if isinstance(e, LLMRateLimitError) and e.retry_after:
|
|
337
370
|
current_delay = max(current_delay, e.retry_after)
|
|
338
371
|
|
|
372
|
+
# Log retry attempt (consistent with complete() function)
|
|
373
|
+
logger.warning(
|
|
374
|
+
f"LLM streaming failed (attempt {attempt + 1}/{total_attempts}), "
|
|
375
|
+
f"retrying in {current_delay:.1f}s... Error: {str(e)}"
|
|
376
|
+
)
|
|
377
|
+
|
|
339
378
|
# Wait before retrying
|
|
340
379
|
await asyncio.sleep(current_delay)
|
|
341
380
|
|