realtimex-deeptutor 0.5.0.post1__py3-none-any.whl → 0.5.0.post3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. {realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/METADATA +24 -17
  2. {realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/RECORD +143 -123
  3. {realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/WHEEL +1 -1
  4. realtimex_deeptutor-0.5.0.post3.dist-info/entry_points.txt +4 -0
  5. {realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/top_level.txt +1 -0
  6. scripts/__init__.py +1 -0
  7. scripts/audit_prompts.py +179 -0
  8. scripts/check_install.py +460 -0
  9. scripts/generate_roster.py +327 -0
  10. scripts/install_all.py +653 -0
  11. scripts/migrate_kb.py +655 -0
  12. scripts/start.py +807 -0
  13. scripts/start_web.py +632 -0
  14. scripts/sync_prompts_from_en.py +147 -0
  15. src/__init__.py +2 -2
  16. src/agents/ideagen/material_organizer_agent.py +2 -0
  17. src/agents/solve/__init__.py +6 -0
  18. src/agents/solve/main_solver.py +9 -0
  19. src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +9 -7
  20. src/agents/solve/session_manager.py +345 -0
  21. src/api/main.py +14 -0
  22. src/api/routers/chat.py +3 -3
  23. src/api/routers/co_writer.py +12 -7
  24. src/api/routers/config.py +1 -0
  25. src/api/routers/guide.py +3 -1
  26. src/api/routers/ideagen.py +7 -0
  27. src/api/routers/knowledge.py +64 -12
  28. src/api/routers/question.py +2 -0
  29. src/api/routers/realtimex.py +137 -0
  30. src/api/routers/research.py +9 -0
  31. src/api/routers/solve.py +120 -2
  32. src/cli/__init__.py +13 -0
  33. src/cli/start.py +209 -0
  34. src/config/constants.py +11 -9
  35. src/knowledge/add_documents.py +453 -213
  36. src/knowledge/extract_numbered_items.py +9 -10
  37. src/knowledge/initializer.py +102 -101
  38. src/knowledge/manager.py +251 -74
  39. src/knowledge/progress_tracker.py +43 -2
  40. src/knowledge/start_kb.py +11 -2
  41. src/logging/__init__.py +5 -0
  42. src/logging/adapters/__init__.py +1 -0
  43. src/logging/adapters/lightrag.py +25 -18
  44. src/logging/adapters/llamaindex.py +1 -0
  45. src/logging/config.py +30 -27
  46. src/logging/handlers/__init__.py +1 -0
  47. src/logging/handlers/console.py +7 -50
  48. src/logging/handlers/file.py +5 -20
  49. src/logging/handlers/websocket.py +23 -19
  50. src/logging/logger.py +161 -126
  51. src/logging/stats/__init__.py +1 -0
  52. src/logging/stats/llm_stats.py +37 -17
  53. src/services/__init__.py +17 -1
  54. src/services/config/__init__.py +1 -0
  55. src/services/config/knowledge_base_config.py +1 -0
  56. src/services/config/loader.py +1 -1
  57. src/services/config/unified_config.py +211 -4
  58. src/services/embedding/__init__.py +1 -0
  59. src/services/embedding/adapters/__init__.py +3 -0
  60. src/services/embedding/adapters/base.py +1 -0
  61. src/services/embedding/adapters/cohere.py +1 -0
  62. src/services/embedding/adapters/jina.py +1 -0
  63. src/services/embedding/adapters/ollama.py +1 -0
  64. src/services/embedding/adapters/openai_compatible.py +1 -0
  65. src/services/embedding/adapters/realtimex.py +125 -0
  66. src/services/embedding/client.py +27 -0
  67. src/services/embedding/config.py +3 -0
  68. src/services/embedding/provider.py +1 -0
  69. src/services/llm/__init__.py +17 -3
  70. src/services/llm/capabilities.py +47 -0
  71. src/services/llm/client.py +32 -0
  72. src/services/llm/cloud_provider.py +21 -4
  73. src/services/llm/config.py +36 -2
  74. src/services/llm/error_mapping.py +1 -0
  75. src/services/llm/exceptions.py +30 -0
  76. src/services/llm/factory.py +55 -16
  77. src/services/llm/local_provider.py +1 -0
  78. src/services/llm/providers/anthropic.py +1 -0
  79. src/services/llm/providers/base_provider.py +1 -0
  80. src/services/llm/providers/open_ai.py +1 -0
  81. src/services/llm/realtimex_provider.py +240 -0
  82. src/services/llm/registry.py +1 -0
  83. src/services/llm/telemetry.py +1 -0
  84. src/services/llm/types.py +1 -0
  85. src/services/llm/utils.py +1 -0
  86. src/services/prompt/__init__.py +1 -0
  87. src/services/prompt/manager.py +3 -2
  88. src/services/rag/__init__.py +27 -5
  89. src/services/rag/components/__init__.py +1 -0
  90. src/services/rag/components/base.py +1 -0
  91. src/services/rag/components/chunkers/__init__.py +1 -0
  92. src/services/rag/components/chunkers/base.py +1 -0
  93. src/services/rag/components/chunkers/fixed.py +1 -0
  94. src/services/rag/components/chunkers/numbered_item.py +1 -0
  95. src/services/rag/components/chunkers/semantic.py +1 -0
  96. src/services/rag/components/embedders/__init__.py +1 -0
  97. src/services/rag/components/embedders/base.py +1 -0
  98. src/services/rag/components/embedders/openai.py +1 -0
  99. src/services/rag/components/indexers/__init__.py +1 -0
  100. src/services/rag/components/indexers/base.py +1 -0
  101. src/services/rag/components/indexers/graph.py +5 -44
  102. src/services/rag/components/indexers/lightrag.py +5 -44
  103. src/services/rag/components/indexers/vector.py +1 -0
  104. src/services/rag/components/parsers/__init__.py +1 -0
  105. src/services/rag/components/parsers/base.py +1 -0
  106. src/services/rag/components/parsers/markdown.py +1 -0
  107. src/services/rag/components/parsers/pdf.py +1 -0
  108. src/services/rag/components/parsers/text.py +1 -0
  109. src/services/rag/components/retrievers/__init__.py +1 -0
  110. src/services/rag/components/retrievers/base.py +1 -0
  111. src/services/rag/components/retrievers/dense.py +1 -0
  112. src/services/rag/components/retrievers/hybrid.py +5 -44
  113. src/services/rag/components/retrievers/lightrag.py +5 -44
  114. src/services/rag/components/routing.py +48 -0
  115. src/services/rag/factory.py +112 -46
  116. src/services/rag/pipeline.py +1 -0
  117. src/services/rag/pipelines/__init__.py +27 -18
  118. src/services/rag/pipelines/lightrag.py +1 -0
  119. src/services/rag/pipelines/llamaindex.py +99 -0
  120. src/services/rag/pipelines/raganything.py +67 -100
  121. src/services/rag/pipelines/raganything_docling.py +368 -0
  122. src/services/rag/service.py +5 -12
  123. src/services/rag/types.py +1 -0
  124. src/services/rag/utils/__init__.py +17 -0
  125. src/services/rag/utils/image_migration.py +279 -0
  126. src/services/search/__init__.py +1 -0
  127. src/services/search/base.py +1 -0
  128. src/services/search/consolidation.py +1 -0
  129. src/services/search/providers/__init__.py +1 -0
  130. src/services/search/providers/baidu.py +1 -0
  131. src/services/search/providers/exa.py +1 -0
  132. src/services/search/providers/jina.py +1 -0
  133. src/services/search/providers/perplexity.py +1 -0
  134. src/services/search/providers/serper.py +1 -0
  135. src/services/search/providers/tavily.py +1 -0
  136. src/services/search/types.py +1 -0
  137. src/services/settings/__init__.py +1 -0
  138. src/services/settings/interface_settings.py +78 -0
  139. src/services/setup/__init__.py +1 -0
  140. src/services/tts/__init__.py +1 -0
  141. src/services/tts/config.py +1 -0
  142. src/utils/realtimex.py +284 -0
  143. realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +0 -2
  144. src/services/rag/pipelines/academic.py +0 -44
  145. {realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/licenses/LICENSE +0 -0
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  """
2
3
  Provider Capabilities
3
4
  =====================
@@ -120,6 +121,14 @@ PROVIDER_CAPABILITIES: dict[str, dict[str, Any]] = {
120
121
  "supports_tools": False,
121
122
  "system_in_messages": True,
122
123
  },
124
+ # RealTimeX SDK (proxy to configured providers)
125
+ "realtimex": {
126
+ "supports_response_format": True, # Proxied to underlying provider
127
+ "supports_streaming": True, # SDK supports chat_stream()
128
+ "supports_tools": True, # Depends on underlying provider
129
+ "system_in_messages": True, # SDK uses messages array format
130
+ "is_proxy": True, # SDK proxies to actual LLM providers
131
+ },
123
132
  }
124
133
 
125
134
  # Default capabilities for unknown providers (assume OpenAI-compatible)
@@ -129,6 +138,7 @@ DEFAULT_CAPABILITIES: dict[str, Any] = {
129
138
  "supports_tools": False,
130
139
  "system_in_messages": True,
131
140
  "has_thinking_tags": False,
141
+ "forced_temperature": None, # None means no forced value, use requested temperature
132
142
  }
133
143
 
134
144
  # Model-specific overrides
@@ -161,6 +171,17 @@ MODEL_OVERRIDES: dict[str, dict[str, Any]] = {
161
171
  "supports_response_format": False,
162
172
  "system_in_messages": False,
163
173
  },
174
+ # Reasoning models - only support temperature=1.0
175
+ # See: https://github.com/HKUDS/DeepTutor/issues/141
176
+ "gpt-5": {
177
+ "forced_temperature": 1.0,
178
+ },
179
+ "o1": {
180
+ "forced_temperature": 1.0,
181
+ },
182
+ "o3": {
183
+ "forced_temperature": 1.0,
184
+ },
164
185
  }
165
186
 
166
187
 
@@ -299,6 +320,31 @@ def requires_api_version(binding: str, model: Optional[str] = None) -> bool:
299
320
  return get_capability(binding, "requires_api_version", model, default=False)
300
321
 
301
322
 
323
+ def get_effective_temperature(
324
+ binding: str,
325
+ model: Optional[str] = None,
326
+ requested_temp: float = 0.7,
327
+ ) -> float:
328
+ """
329
+ Get the effective temperature value for a model.
330
+
331
+ Some models (e.g., o1, o3, gpt-5) only support a fixed temperature value (1.0).
332
+ This function returns the forced temperature if defined, otherwise the requested value.
333
+
334
+ Args:
335
+ binding: Provider binding name
336
+ model: Optional model name for model-specific overrides
337
+ requested_temp: The temperature value requested by the caller (default: 0.7)
338
+
339
+ Returns:
340
+ The effective temperature to use for the API call
341
+ """
342
+ forced_temp = get_capability(binding, "forced_temperature", model)
343
+ if forced_temp is not None:
344
+ return forced_temp
345
+ return requested_temp
346
+
347
+
302
348
  __all__ = [
303
349
  "PROVIDER_CAPABILITIES",
304
350
  "MODEL_OVERRIDES",
@@ -310,4 +356,5 @@ __all__ = [
310
356
  "has_thinking_tags",
311
357
  "supports_tools",
312
358
  "requires_api_version",
359
+ "get_effective_temperature",
313
360
  ]
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  """
2
3
  LLM Client
3
4
  ==========
@@ -31,9 +32,38 @@ class LLMClient:
31
32
  Args:
32
33
  config: LLM configuration. If None, loads from environment.
33
34
  """
35
+
34
36
  self.config = config or get_llm_config()
35
37
  self.logger = get_logger("LLMClient")
36
38
 
39
+ # Set environment variables for LightRAG compatibility
40
+ # LightRAG's internal functions (openai_complete_if_cache, etc.) read from
41
+ # os.environ["OPENAI_API_KEY"] even when api_key is passed as parameter.
42
+ # We must set these env vars early to ensure all LightRAG operations work.
43
+ self._setup_openai_env_vars()
44
+
45
+ def _setup_openai_env_vars(self):
46
+ """
47
+ Set OpenAI environment variables for LightRAG compatibility.
48
+
49
+ LightRAG's internal functions read from os.environ["OPENAI_API_KEY"]
50
+ even when api_key is passed as parameter. This method ensures the
51
+ environment variables are set for all LightRAG operations.
52
+ """
53
+ import os
54
+
55
+ binding = getattr(self.config, "binding", "openai")
56
+
57
+ # Only set env vars for OpenAI-compatible bindings
58
+ if binding in ("openai", "azure_openai", "gemini"):
59
+ if self.config.api_key:
60
+ os.environ["OPENAI_API_KEY"] = self.config.api_key
61
+ self.logger.debug("Set OPENAI_API_KEY env var for LightRAG compatibility")
62
+
63
+ if self.config.base_url:
64
+ os.environ["OPENAI_BASE_URL"] = self.config.base_url
65
+ self.logger.debug(f"Set OPENAI_BASE_URL env var to {self.config.base_url}")
66
+
37
67
  async def complete(
38
68
  self,
39
69
  prompt: str,
@@ -128,6 +158,7 @@ class LLMClient:
128
158
  return llm_model_func_via_factory
129
159
 
130
160
  # OpenAI-compatible bindings use lightrag (has caching)
161
+ # Note: Environment variables are already set in __init__ via _setup_openai_env_vars()
131
162
  from lightrag.llm.openai import openai_complete_if_cache
132
163
 
133
164
  def llm_model_func(
@@ -196,6 +227,7 @@ class LLMClient:
196
227
  return vision_model_func_via_factory
197
228
 
198
229
  # OpenAI-compatible bindings
230
+ # Note: Environment variables are already set in __init__ via _setup_openai_env_vars()
199
231
  from lightrag.llm.openai import openai_complete_if_cache
200
232
 
201
233
  # Get api_version once for reuse
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  """
2
3
  Cloud LLM Provider
3
4
  ==================
@@ -11,14 +12,27 @@ import os
11
12
  from typing import AsyncGenerator, Dict, List, Optional
12
13
 
13
14
  import aiohttp
14
- from lightrag.llm.openai import openai_complete_if_cache
15
15
 
16
16
  # Get loggers for suppression during fallback scenarios
17
17
  # (lightrag logs errors internally before raising exceptions)
18
18
  _lightrag_logger = logging.getLogger("lightrag")
19
19
  _openai_logger = logging.getLogger("openai")
20
20
 
21
- from .capabilities import supports_response_format
21
+ # Lazy import for lightrag to avoid import errors when not installed
22
+ _openai_complete_if_cache = None
23
+
24
+
25
+ def _get_openai_complete_if_cache():
26
+ """Lazy load openai_complete_if_cache from lightrag."""
27
+ global _openai_complete_if_cache
28
+ if _openai_complete_if_cache is None:
29
+ from lightrag.llm.openai import openai_complete_if_cache
30
+
31
+ _openai_complete_if_cache = openai_complete_if_cache
32
+ return _openai_complete_if_cache
33
+
34
+
35
+ from .capabilities import get_effective_temperature, supports_response_format
22
36
  from .config import get_token_limit_kwargs
23
37
  from .exceptions import LLMAPIError, LLMAuthenticationError, LLMConfigError
24
38
  from .utils import (
@@ -182,6 +196,7 @@ async def _openai_complete(
182
196
  _openai_logger.setLevel(logging.CRITICAL)
183
197
  try:
184
198
  # model and prompt must be positional arguments
199
+ openai_complete_if_cache = _get_openai_complete_if_cache()
185
200
  content = await openai_complete_if_cache(model, prompt, **lightrag_kwargs)
186
201
  finally:
187
202
  _lightrag_logger.setLevel(original_lightrag_level)
@@ -203,7 +218,9 @@ async def _openai_complete(
203
218
  {"role": "system", "content": system_prompt},
204
219
  {"role": "user", "content": prompt},
205
220
  ],
206
- "temperature": kwargs.get("temperature", 0.7),
221
+ "temperature": get_effective_temperature(
222
+ binding, model, kwargs.get("temperature", 0.7)
223
+ ),
207
224
  }
208
225
 
209
226
  # Handle max_tokens / max_completion_tokens based on model
@@ -279,7 +296,7 @@ async def _openai_stream(
279
296
  data = {
280
297
  "model": model,
281
298
  "messages": msg_list,
282
- "temperature": kwargs.get("temperature", 0.7),
299
+ "temperature": get_effective_temperature(binding, model, kwargs.get("temperature", 0.7)),
283
300
  "stream": True,
284
301
  }
285
302
 
@@ -26,6 +26,37 @@ load_dotenv(PROJECT_ROOT / "DeepTutor.env", override=False)
26
26
  load_dotenv(PROJECT_ROOT / ".env", override=False)
27
27
 
28
28
 
29
+ def _setup_openai_env_vars_early():
30
+ """
31
+ Set OPENAI_API_KEY environment variable early for LightRAG compatibility.
32
+
33
+ LightRAG's internal functions (e.g., create_openai_async_client) read directly
34
+ from os.environ["OPENAI_API_KEY"] instead of using the api_key parameter.
35
+ This function ensures the environment variable is set as soon as this module
36
+ is imported, before any LightRAG operations can occur.
37
+
38
+ This is called at module load time to ensure env vars are set before any
39
+ RAG operations, including those in worker threads/processes.
40
+ """
41
+ binding = os.getenv("LLM_BINDING", "openai")
42
+ api_key = os.getenv("LLM_API_KEY")
43
+ base_url = os.getenv("LLM_HOST")
44
+
45
+ # Only set env vars for OpenAI-compatible bindings
46
+ if binding in ("openai", "azure_openai", "gemini"):
47
+ if api_key and not os.getenv("OPENAI_API_KEY"):
48
+ os.environ["OPENAI_API_KEY"] = api_key
49
+ logger.debug("Set OPENAI_API_KEY env var for LightRAG compatibility (early init)")
50
+
51
+ if base_url and not os.getenv("OPENAI_BASE_URL"):
52
+ os.environ["OPENAI_BASE_URL"] = base_url
53
+ logger.debug(f"Set OPENAI_BASE_URL env var to {base_url} (early init)")
54
+
55
+
56
+ # Execute early setup at module import time
57
+ _setup_openai_env_vars_early()
58
+
59
+
29
60
  @dataclass
30
61
  class LLMConfig:
31
62
  """LLM configuration dataclass."""
@@ -37,6 +68,7 @@ class LLMConfig:
37
68
  api_version: Optional[str] = None
38
69
  max_tokens: int = 4096
39
70
  temperature: float = 0.7
71
+ source: Optional[str] = None # "realtimex" when using RTX SDK
40
72
 
41
73
 
42
74
  def _strip_value(value: Optional[str]) -> Optional[str]:
@@ -95,10 +127,11 @@ def get_llm_config() -> LLMConfig:
95
127
  if config:
96
128
  return LLMConfig(
97
129
  binding=config.get("provider", "openai"),
98
- model=config["model"],
130
+ model=config.get("model", ""),
99
131
  api_key=config.get("api_key", ""),
100
132
  base_url=config.get("base_url"),
101
133
  api_version=config.get("api_version"),
134
+ source=config.get("source"), # "realtimex" when using RTX
102
135
  )
103
136
  except ImportError:
104
137
  # Unified config service not yet available, fall back to env
@@ -128,10 +161,11 @@ async def get_llm_config_async() -> LLMConfig:
128
161
  if config:
129
162
  return LLMConfig(
130
163
  binding=config.get("provider", "openai"),
131
- model=config["model"],
164
+ model=config.get("model", ""),
132
165
  api_key=config.get("api_key", ""),
133
166
  base_url=config.get("base_url"),
134
167
  api_version=config.get("api_version"),
168
+ source=config.get("source"), # "realtimex" when using RTX
135
169
  )
136
170
  except ImportError:
137
171
  pass
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  """
2
3
  Error Mapping - Map provider-specific errors to unified exceptions.
3
4
  """
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  """
2
3
  LLM Service Exceptions
3
4
  ======================
@@ -137,6 +138,33 @@ class ProviderContextWindowError(LLMAPIError):
137
138
  pass
138
139
 
139
140
 
141
+ # RealTimeX-specific exceptions (for SDK integration)
142
+ class RealTimeXError(LLMAPIError):
143
+ """
144
+ Base exception for RealTimeX SDK errors.
145
+
146
+ Raised when interacting with RealTimeX SDK proxy fails.
147
+ """
148
+
149
+ def __init__(self, message: str, error_code: Optional[str] = None):
150
+ super().__init__(message, error_code)
151
+
152
+
153
+ class RealTimeXPermissionError(LLMAuthenticationError):
154
+ """
155
+ Exception raised when RealTimeX SDK permission is required or denied.
156
+
157
+ Args:
158
+ permission: The permission that was required (e.g., 'llm.chat', 'llm.embed')
159
+ message: Optional custom error message
160
+ """
161
+
162
+ def __init__(self, permission: str, message: Optional[str] = None):
163
+ self.permission = permission
164
+ msg = message or f"RealTimeX permission required: {permission}"
165
+ super().__init__(msg)
166
+
167
+
140
168
  __all__ = [
141
169
  "LLMError",
142
170
  "LLMConfigError",
@@ -149,4 +177,6 @@ __all__ = [
149
177
  "LLMParseError",
150
178
  "ProviderQuotaExceededError",
151
179
  "ProviderContextWindowError",
180
+ "RealTimeXError",
181
+ "RealTimeXPermissionError",
152
182
  ]
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  """
2
3
  LLM Factory - Central Hub for LLM Calls
3
4
  =======================================
@@ -20,9 +21,10 @@ CloudProvider LocalProvider
20
21
  ↓ ↓
21
22
  OpenAI/DeepSeek/etc LM Studio/Ollama/etc
22
23
 
23
- Routing:
24
- - Automatically routes to local_provider for local URLs (localhost, 127.0.0.1, etc.)
25
- - Routes to cloud_provider for all other URLs
24
+ Routing (Priority Order):
25
+ 1. RealTimeX SDK (if RTX_APP_ID detected)
26
+ 2. Local Provider (for localhost/127.0.0.1 URLs)
27
+ 3. Cloud Provider (default)
26
28
 
27
29
  Retry Mechanism:
28
30
  - Automatic retry with exponential backoff for transient errors
@@ -36,6 +38,7 @@ from typing import Any, AsyncGenerator, Dict, List, Optional
36
38
  import tenacity
37
39
 
38
40
  from src.logging.logger import get_logger
41
+ from src.utils.realtimex import should_use_realtimex_sdk
39
42
 
40
43
  from . import cloud_provider, local_provider
41
44
  from .config import get_llm_config
@@ -51,8 +54,8 @@ from .utils import is_local_llm_server
51
54
  logger = get_logger("LLMFactory")
52
55
 
53
56
  # Default retry configuration
54
- DEFAULT_MAX_RETRIES = 3
55
- DEFAULT_RETRY_DELAY = 1.0 # seconds
57
+ DEFAULT_MAX_RETRIES = 5 # Increased for complex agents like Research
58
+ DEFAULT_RETRY_DELAY = 2.0 # seconds
56
59
  DEFAULT_EXPONENTIAL_BACKOFF = True
57
60
 
58
61
 
@@ -141,8 +144,8 @@ async def complete(
141
144
  api_version: API version for Azure OpenAI (optional)
142
145
  binding: Provider binding type (optional)
143
146
  messages: Pre-built messages array (optional)
144
- max_retries: Maximum number of retry attempts (default: 3)
145
- retry_delay: Initial delay between retries in seconds (default: 1.0)
147
+ max_retries: Maximum number of retry attempts (default: 5)
148
+ retry_delay: Initial delay between retries in seconds (default: 2.0)
146
149
  exponential_backoff: Whether to use exponential backoff (default: True)
147
150
  **kwargs: Additional parameters (temperature, max_tokens, etc.)
148
151
 
@@ -150,6 +153,7 @@ async def complete(
150
153
  str: The LLM response
151
154
  """
152
155
  # Get config if parameters not provided
156
+ config = None
153
157
  if not model or not base_url:
154
158
  config = get_llm_config()
155
159
  model = model or config.model
@@ -158,7 +162,17 @@ async def complete(
158
162
  api_version = api_version or config.api_version
159
163
  binding = binding or config.binding or "openai"
160
164
 
161
- # Determine which provider to use
165
+ # ROUTING LOGIC (Priority order):
166
+
167
+ # 1. RealTimeX SDK (when active config has source="realtimex")
168
+ if config and getattr(config, "source", None) == "realtimex" and should_use_realtimex_sdk():
169
+ from . import realtimex_provider
170
+
171
+ return await realtimex_provider.complete(
172
+ prompt=prompt, system_prompt=system_prompt, model=model, messages=messages, **kwargs
173
+ )
174
+
175
+ # 2. Determine which provider to use (local vs cloud)
162
176
  use_local = _should_use_local(base_url)
163
177
 
164
178
  # Define helper to determine if a generic LLMAPIError is retriable
@@ -189,6 +203,9 @@ async def complete(
189
203
 
190
204
  return False
191
205
 
206
+ # Calculate total attempts for logging (1 initial + max_retries)
207
+ total_attempts = max_retries + 1
208
+
192
209
  # Define the actual completion function with tenacity retry
193
210
  @tenacity.retry(
194
211
  retry=(
@@ -196,10 +213,10 @@ async def complete(
196
213
  | tenacity.retry_if_exception_type(LLMTimeoutError)
197
214
  | tenacity.retry_if_exception(_is_retriable_llm_api_error)
198
215
  ),
199
- wait=tenacity.wait_exponential(multiplier=retry_delay, min=retry_delay, max=60),
200
- stop=tenacity.stop_after_attempt(max_retries + 1),
216
+ wait=tenacity.wait_exponential(multiplier=retry_delay, min=retry_delay, max=120),
217
+ stop=tenacity.stop_after_attempt(total_attempts),
201
218
  before_sleep=lambda retry_state: logger.warning(
202
- f"LLM call failed (attempt {retry_state.attempt_number}/{max_retries + 1}), "
219
+ f"LLM call failed (attempt {retry_state.attempt_number}/{total_attempts}), "
203
220
  f"retrying in {retry_state.upcoming_sleep:.1f}s... Error: {str(retry_state.outcome.exception())}"
204
221
  ),
205
222
  )
@@ -268,8 +285,8 @@ async def stream(
268
285
  api_version: API version for Azure OpenAI (optional)
269
286
  binding: Provider binding type (optional)
270
287
  messages: Pre-built messages array (optional)
271
- max_retries: Maximum number of retry attempts (default: 3)
272
- retry_delay: Initial delay between retries in seconds (default: 1.0)
288
+ max_retries: Maximum number of retry attempts (default: 5)
289
+ retry_delay: Initial delay between retries in seconds (default: 2.0)
273
290
  exponential_backoff: Whether to use exponential backoff (default: True)
274
291
  **kwargs: Additional parameters (temperature, max_tokens, etc.)
275
292
 
@@ -277,6 +294,7 @@ async def stream(
277
294
  str: Response chunks
278
295
  """
279
296
  # Get config if parameters not provided
297
+ config = None
280
298
  if not model or not base_url:
281
299
  config = get_llm_config()
282
300
  model = model or config.model
@@ -285,7 +303,19 @@ async def stream(
285
303
  api_version = api_version or config.api_version
286
304
  binding = binding or config.binding or "openai"
287
305
 
288
- # Determine which provider to use
306
+ # ROUTING LOGIC (Priority order):
307
+
308
+ # 1. RealTimeX SDK (when active config has source="realtimex")
309
+ if config and getattr(config, "source", None) == "realtimex" and should_use_realtimex_sdk():
310
+ from . import realtimex_provider
311
+
312
+ async for chunk in realtimex_provider.stream(
313
+ prompt=prompt, system_prompt=system_prompt, model=model, messages=messages, **kwargs
314
+ ):
315
+ yield chunk
316
+ return
317
+
318
+ # 2. Determine which provider to use (local vs cloud)
289
319
  use_local = _should_use_local(base_url)
290
320
 
291
321
  # Build call kwargs
@@ -305,10 +335,13 @@ async def stream(
305
335
  call_kwargs["binding"] = binding or "openai"
306
336
 
307
337
  # Retry logic for streaming (retry on connection errors)
338
+ # Total attempts = 1 initial + max_retries
339
+ total_attempts = max_retries + 1
308
340
  last_exception = None
309
341
  delay = retry_delay
342
+ max_delay = 120 # Cap maximum delay at 120 seconds (consistent with complete())
310
343
 
311
- for attempt in range(max_retries + 1):
344
+ for attempt in range(total_attempts):
312
345
  try:
313
346
  # Route to appropriate provider
314
347
  if use_local:
@@ -328,7 +361,7 @@ async def stream(
328
361
 
329
362
  # Calculate delay for next attempt
330
363
  if exponential_backoff:
331
- current_delay = delay * (2**attempt)
364
+ current_delay = min(delay * (2**attempt), max_delay)
332
365
  else:
333
366
  current_delay = delay
334
367
 
@@ -336,6 +369,12 @@ async def stream(
336
369
  if isinstance(e, LLMRateLimitError) and e.retry_after:
337
370
  current_delay = max(current_delay, e.retry_after)
338
371
 
372
+ # Log retry attempt (consistent with complete() function)
373
+ logger.warning(
374
+ f"LLM streaming failed (attempt {attempt + 1}/{total_attempts}), "
375
+ f"retrying in {current_delay:.1f}s... Error: {str(e)}"
376
+ )
377
+
339
378
  # Wait before retrying
340
379
  await asyncio.sleep(current_delay)
341
380
 
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  """
2
3
  Local LLM Provider
3
4
  ==================
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  import anthropic
2
3
 
3
4
  from ..registry import register_provider
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  """
2
3
  Base LLM Provider - Unified interface and configuration.
3
4
  """
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  import os
2
3
 
3
4
  import httpx