realtimex-deeptutor 0.5.0.post1__py3-none-any.whl → 0.5.0.post3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. {realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/METADATA +24 -17
  2. {realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/RECORD +143 -123
  3. {realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/WHEEL +1 -1
  4. realtimex_deeptutor-0.5.0.post3.dist-info/entry_points.txt +4 -0
  5. {realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/top_level.txt +1 -0
  6. scripts/__init__.py +1 -0
  7. scripts/audit_prompts.py +179 -0
  8. scripts/check_install.py +460 -0
  9. scripts/generate_roster.py +327 -0
  10. scripts/install_all.py +653 -0
  11. scripts/migrate_kb.py +655 -0
  12. scripts/start.py +807 -0
  13. scripts/start_web.py +632 -0
  14. scripts/sync_prompts_from_en.py +147 -0
  15. src/__init__.py +2 -2
  16. src/agents/ideagen/material_organizer_agent.py +2 -0
  17. src/agents/solve/__init__.py +6 -0
  18. src/agents/solve/main_solver.py +9 -0
  19. src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +9 -7
  20. src/agents/solve/session_manager.py +345 -0
  21. src/api/main.py +14 -0
  22. src/api/routers/chat.py +3 -3
  23. src/api/routers/co_writer.py +12 -7
  24. src/api/routers/config.py +1 -0
  25. src/api/routers/guide.py +3 -1
  26. src/api/routers/ideagen.py +7 -0
  27. src/api/routers/knowledge.py +64 -12
  28. src/api/routers/question.py +2 -0
  29. src/api/routers/realtimex.py +137 -0
  30. src/api/routers/research.py +9 -0
  31. src/api/routers/solve.py +120 -2
  32. src/cli/__init__.py +13 -0
  33. src/cli/start.py +209 -0
  34. src/config/constants.py +11 -9
  35. src/knowledge/add_documents.py +453 -213
  36. src/knowledge/extract_numbered_items.py +9 -10
  37. src/knowledge/initializer.py +102 -101
  38. src/knowledge/manager.py +251 -74
  39. src/knowledge/progress_tracker.py +43 -2
  40. src/knowledge/start_kb.py +11 -2
  41. src/logging/__init__.py +5 -0
  42. src/logging/adapters/__init__.py +1 -0
  43. src/logging/adapters/lightrag.py +25 -18
  44. src/logging/adapters/llamaindex.py +1 -0
  45. src/logging/config.py +30 -27
  46. src/logging/handlers/__init__.py +1 -0
  47. src/logging/handlers/console.py +7 -50
  48. src/logging/handlers/file.py +5 -20
  49. src/logging/handlers/websocket.py +23 -19
  50. src/logging/logger.py +161 -126
  51. src/logging/stats/__init__.py +1 -0
  52. src/logging/stats/llm_stats.py +37 -17
  53. src/services/__init__.py +17 -1
  54. src/services/config/__init__.py +1 -0
  55. src/services/config/knowledge_base_config.py +1 -0
  56. src/services/config/loader.py +1 -1
  57. src/services/config/unified_config.py +211 -4
  58. src/services/embedding/__init__.py +1 -0
  59. src/services/embedding/adapters/__init__.py +3 -0
  60. src/services/embedding/adapters/base.py +1 -0
  61. src/services/embedding/adapters/cohere.py +1 -0
  62. src/services/embedding/adapters/jina.py +1 -0
  63. src/services/embedding/adapters/ollama.py +1 -0
  64. src/services/embedding/adapters/openai_compatible.py +1 -0
  65. src/services/embedding/adapters/realtimex.py +125 -0
  66. src/services/embedding/client.py +27 -0
  67. src/services/embedding/config.py +3 -0
  68. src/services/embedding/provider.py +1 -0
  69. src/services/llm/__init__.py +17 -3
  70. src/services/llm/capabilities.py +47 -0
  71. src/services/llm/client.py +32 -0
  72. src/services/llm/cloud_provider.py +21 -4
  73. src/services/llm/config.py +36 -2
  74. src/services/llm/error_mapping.py +1 -0
  75. src/services/llm/exceptions.py +30 -0
  76. src/services/llm/factory.py +55 -16
  77. src/services/llm/local_provider.py +1 -0
  78. src/services/llm/providers/anthropic.py +1 -0
  79. src/services/llm/providers/base_provider.py +1 -0
  80. src/services/llm/providers/open_ai.py +1 -0
  81. src/services/llm/realtimex_provider.py +240 -0
  82. src/services/llm/registry.py +1 -0
  83. src/services/llm/telemetry.py +1 -0
  84. src/services/llm/types.py +1 -0
  85. src/services/llm/utils.py +1 -0
  86. src/services/prompt/__init__.py +1 -0
  87. src/services/prompt/manager.py +3 -2
  88. src/services/rag/__init__.py +27 -5
  89. src/services/rag/components/__init__.py +1 -0
  90. src/services/rag/components/base.py +1 -0
  91. src/services/rag/components/chunkers/__init__.py +1 -0
  92. src/services/rag/components/chunkers/base.py +1 -0
  93. src/services/rag/components/chunkers/fixed.py +1 -0
  94. src/services/rag/components/chunkers/numbered_item.py +1 -0
  95. src/services/rag/components/chunkers/semantic.py +1 -0
  96. src/services/rag/components/embedders/__init__.py +1 -0
  97. src/services/rag/components/embedders/base.py +1 -0
  98. src/services/rag/components/embedders/openai.py +1 -0
  99. src/services/rag/components/indexers/__init__.py +1 -0
  100. src/services/rag/components/indexers/base.py +1 -0
  101. src/services/rag/components/indexers/graph.py +5 -44
  102. src/services/rag/components/indexers/lightrag.py +5 -44
  103. src/services/rag/components/indexers/vector.py +1 -0
  104. src/services/rag/components/parsers/__init__.py +1 -0
  105. src/services/rag/components/parsers/base.py +1 -0
  106. src/services/rag/components/parsers/markdown.py +1 -0
  107. src/services/rag/components/parsers/pdf.py +1 -0
  108. src/services/rag/components/parsers/text.py +1 -0
  109. src/services/rag/components/retrievers/__init__.py +1 -0
  110. src/services/rag/components/retrievers/base.py +1 -0
  111. src/services/rag/components/retrievers/dense.py +1 -0
  112. src/services/rag/components/retrievers/hybrid.py +5 -44
  113. src/services/rag/components/retrievers/lightrag.py +5 -44
  114. src/services/rag/components/routing.py +48 -0
  115. src/services/rag/factory.py +112 -46
  116. src/services/rag/pipeline.py +1 -0
  117. src/services/rag/pipelines/__init__.py +27 -18
  118. src/services/rag/pipelines/lightrag.py +1 -0
  119. src/services/rag/pipelines/llamaindex.py +99 -0
  120. src/services/rag/pipelines/raganything.py +67 -100
  121. src/services/rag/pipelines/raganything_docling.py +368 -0
  122. src/services/rag/service.py +5 -12
  123. src/services/rag/types.py +1 -0
  124. src/services/rag/utils/__init__.py +17 -0
  125. src/services/rag/utils/image_migration.py +279 -0
  126. src/services/search/__init__.py +1 -0
  127. src/services/search/base.py +1 -0
  128. src/services/search/consolidation.py +1 -0
  129. src/services/search/providers/__init__.py +1 -0
  130. src/services/search/providers/baidu.py +1 -0
  131. src/services/search/providers/exa.py +1 -0
  132. src/services/search/providers/jina.py +1 -0
  133. src/services/search/providers/perplexity.py +1 -0
  134. src/services/search/providers/serper.py +1 -0
  135. src/services/search/providers/tavily.py +1 -0
  136. src/services/search/types.py +1 -0
  137. src/services/settings/__init__.py +1 -0
  138. src/services/settings/interface_settings.py +78 -0
  139. src/services/setup/__init__.py +1 -0
  140. src/services/tts/__init__.py +1 -0
  141. src/services/tts/config.py +1 -0
  142. src/utils/realtimex.py +284 -0
  143. realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +0 -2
  144. src/services/rag/pipelines/academic.py +0 -44
  145. {realtimex_deeptutor-0.5.0.post1.dist-info → realtimex_deeptutor-0.5.0.post3.dist-info}/licenses/LICENSE +0 -0
src/utils/realtimex.py ADDED
@@ -0,0 +1,284 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ RealTimeX SDK Utilities
4
+ ========================
5
+
6
+ Utilities for RealTimeX SDK integration.
7
+ Provides unified SDK instance management and environment detection.
8
+ """
9
+
10
+ import logging
11
+ from typing import TYPE_CHECKING, Optional
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ if TYPE_CHECKING:
16
+ from realtimex_sdk import RealtimeXSDK
17
+
18
+ # Global SDK instance (lazy-initialized, singleton)
19
+ _sdk_instance: Optional["RealtimeXSDK"] = None
20
+
21
+
22
+ def get_realtimex_sdk() -> "RealtimeXSDK":
23
+ """
24
+ Get or create the shared RealTimeX SDK instance.
25
+
26
+ This is the single source of truth for SDK instances across DeepTutor.
27
+ All permissions required by the application are specified here.
28
+
29
+ Returns:
30
+ RealtimeXSDK: Initialized SDK instance with all required permissions
31
+
32
+ Raises:
33
+ ImportError: If realtimex-sdk is not installed
34
+ """
35
+ global _sdk_instance
36
+
37
+ if _sdk_instance is None:
38
+ try:
39
+ from realtimex_sdk import RealtimeXSDK, SDKConfig
40
+
41
+ # Specify ALL permissions needed by DeepTutor
42
+ _sdk_instance = RealtimeXSDK(
43
+ config=SDKConfig(
44
+ permissions=[
45
+ "llm.chat", # For LLM completions
46
+ "llm.providers", # For listing available providers
47
+ "llm.embed", # For embeddings
48
+ ]
49
+ )
50
+ )
51
+ logger.info("RealTimeX SDK initialized with all required permissions")
52
+ except ImportError as e:
53
+ logger.error("RealTimeX SDK not installed. Install with: pip install realtimex-sdk")
54
+ raise ImportError(
55
+ "realtimex-sdk is required for RealTimeX integration. "
56
+ "Install with: pip install realtimex-sdk"
57
+ ) from e
58
+
59
+ return _sdk_instance
60
+
61
+
62
+ # Cache for detection result
63
+ _detection_cache: Optional[bool] = None
64
+
65
+
66
+ def should_use_realtimex_sdk(force_check: bool = False) -> bool:
67
+ """
68
+ Detect if DeepTutor is running in RealTimeX environment.
69
+
70
+ Performs a 3-stage check with result caching:
71
+ 1. RTX_APP_ID environment variable is set
72
+ 2. RealtimeX SDK is installed (importable)
73
+ 3. RealTimeX Main App is accessible (via SDK ping)
74
+
75
+ Args:
76
+ force_check: Force re-check even if cached
77
+
78
+ Returns:
79
+ True if all RealTimeX conditions are met
80
+ """
81
+ global _detection_cache
82
+
83
+ if _detection_cache is not None and not force_check:
84
+ return _detection_cache
85
+
86
+ try:
87
+ import os
88
+
89
+ # Check 1: RTX_APP_ID environment variable
90
+ app_id = os.getenv("RTX_APP_ID")
91
+ if not app_id:
92
+ logger.debug("RealTimeX not detected: RTX_APP_ID not set")
93
+ _detection_cache = False
94
+ return False
95
+
96
+ # Check 2: SDK installed
97
+ try:
98
+ import realtimex_sdk # noqa: F401
99
+ except ImportError:
100
+ logger.warning(
101
+ "RealTimeX detected (RTX_APP_ID present) but SDK not installed. "
102
+ "Install with: pip install realtimex-sdk"
103
+ )
104
+ _detection_cache = False
105
+ return False
106
+
107
+ # Check 3: Main App connectivity via SDK ping
108
+ try:
109
+ sdk = get_realtimex_sdk()
110
+ result = sdk.ping_sync()
111
+
112
+ if not result.get("success"):
113
+ logger.warning(f"RealTimeX ping failed: {result}")
114
+ _detection_cache = False
115
+ return False
116
+
117
+ # Log successful detection with mode info
118
+ mode = result.get("mode", "unknown")
119
+ logger.info(f"RealTimeX environment detected (app_id: {app_id}, mode: {mode})")
120
+ _detection_cache = True
121
+ return True
122
+
123
+ except Exception as e:
124
+ logger.warning(f"RealTimeX Main App not accessible: {e}")
125
+ _detection_cache = False
126
+ return False
127
+
128
+ except Exception as e:
129
+ logger.error(f"RealTimeX detection error: {e}")
130
+ _detection_cache = False
131
+ return False
132
+
133
+
134
+ # Cache for providers list
135
+ _providers_cache: Optional[dict] = None
136
+ _providers_cache_time: float = 0
137
+ PROVIDERS_CACHE_TTL = 300 # 5 minutes
138
+
139
+
140
+ async def get_cached_providers() -> dict:
141
+ """
142
+ Get available providers from RealTimeX SDK with backend caching.
143
+
144
+ Returns:
145
+ Dict with 'llm' and 'embedding' provider lists.
146
+ Returns empty lists if SDK not enabled.
147
+ """
148
+ global _providers_cache, _providers_cache_time
149
+
150
+ import time
151
+
152
+ if not should_use_realtimex_sdk():
153
+ return {"rtx_enabled": False, "llm": [], "embedding": []}
154
+
155
+ # Check cache validity
156
+ if _providers_cache and (time.time() - _providers_cache_time) < PROVIDERS_CACHE_TTL:
157
+ return _providers_cache
158
+
159
+ # Fetch fresh data from SDK
160
+ try:
161
+ sdk = get_realtimex_sdk()
162
+
163
+ # Fetch both in parallel (conceptually, though await is sequential here)
164
+ # In a real async environment we might use asyncio.gather, but sequential is safe
165
+ llm_result = await sdk.llm.chat_providers()
166
+ embed_result = await sdk.llm.embed_providers()
167
+
168
+ def serialize_provider(p):
169
+ return {
170
+ "provider": p.provider,
171
+ "models": [{"id": m.id, "name": m.name} for m in p.models],
172
+ }
173
+
174
+ _providers_cache = {
175
+ "rtx_enabled": True,
176
+ "llm": [serialize_provider(p) for p in llm_result.providers],
177
+ "embedding": [serialize_provider(p) for p in embed_result.providers],
178
+ }
179
+ _providers_cache_time = time.time()
180
+
181
+ return _providers_cache
182
+
183
+ except Exception as e:
184
+ logger.warning(f"Failed to fetch RTX providers: {e}")
185
+ # Return empty but enabled structure on error to allow retry
186
+ return {"rtx_enabled": True, "llm": [], "embedding": [], "error": str(e)}
187
+
188
+
189
+ def invalidate_providers_cache():
190
+ """Invalidate the providers cache (e.g. on reconnection)."""
191
+ global _providers_cache, _providers_cache_time
192
+ _providers_cache = None
193
+ _providers_cache_time = 0
194
+
195
+
196
+ # =============================================================================
197
+ # RTX Active Config Storage
198
+ # =============================================================================
199
+ # Stores the user's selected provider/model for LLM and Embedding when using RTX.
200
+ # This is persisted to disk so selections survive restarts.
201
+
202
+ import json
203
+ from pathlib import Path
204
+
205
+ # Storage path for RTX active config
206
+ _RTX_CONFIG_DIR = Path(__file__).resolve().parent.parent.parent / "data" / "user" / "settings"
207
+ _RTX_CONFIG_FILE = _RTX_CONFIG_DIR / "rtx_active.json"
208
+
209
+
210
+ def _load_rtx_active_config() -> dict:
211
+ """Load RTX active config from disk."""
212
+ try:
213
+ if _RTX_CONFIG_FILE.exists():
214
+ with open(_RTX_CONFIG_FILE, "r", encoding="utf-8") as f:
215
+ return json.load(f)
216
+ except Exception as e:
217
+ logger.warning(f"Failed to load RTX active config: {e}")
218
+ return {}
219
+
220
+
221
+ def _save_rtx_active_config(config: dict) -> bool:
222
+ """Save RTX active config to disk."""
223
+ try:
224
+ _RTX_CONFIG_DIR.mkdir(parents=True, exist_ok=True)
225
+ with open(_RTX_CONFIG_FILE, "w", encoding="utf-8") as f:
226
+ json.dump(config, f, indent=2, ensure_ascii=False)
227
+ return True
228
+ except Exception as e:
229
+ logger.error(f"Failed to save RTX active config: {e}")
230
+ return False
231
+
232
+
233
+ def get_rtx_active_config(config_type: str) -> Optional[dict]:
234
+ """
235
+ Get the active RTX config for a specific config type.
236
+
237
+ Args:
238
+ config_type: "llm" or "embedding"
239
+
240
+ Returns:
241
+ Dict with provider, model, or None if not configured
242
+ """
243
+ if not should_use_realtimex_sdk():
244
+ return None
245
+
246
+ data = _load_rtx_active_config()
247
+ return data.get(config_type)
248
+
249
+
250
+ def set_rtx_active_config(config_type: str, provider: str, model: str) -> bool:
251
+ """
252
+ Set the active RTX config for a specific config type.
253
+
254
+ Args:
255
+ config_type: "llm" or "embedding"
256
+ provider: Provider name (e.g., "openai")
257
+ model: Model ID (e.g., "gpt-4o")
258
+
259
+ Returns:
260
+ True if saved successfully
261
+ """
262
+ data = _load_rtx_active_config()
263
+ data[config_type] = {
264
+ "provider": provider,
265
+ "model": model,
266
+ }
267
+ return _save_rtx_active_config(data)
268
+
269
+
270
+ def clear_rtx_active_config(config_type: str) -> bool:
271
+ """
272
+ Clear the active RTX config for a specific config type.
273
+
274
+ Args:
275
+ config_type: "llm" or "embedding"
276
+
277
+ Returns:
278
+ True if cleared successfully
279
+ """
280
+ data = _load_rtx_active_config()
281
+ if config_type in data:
282
+ del data[config_type]
283
+ return _save_rtx_active_config(data)
284
+ return True
@@ -1,2 +0,0 @@
1
- [console_scripts]
2
- deeptutor = src.api.run_server:main
@@ -1,44 +0,0 @@
1
- """
2
- Academic Pipeline
3
- =================
4
-
5
- Pipeline optimized for academic documents with numbered item extraction.
6
- """
7
-
8
- from typing import Optional
9
-
10
- from ..components.chunkers import NumberedItemExtractor, SemanticChunker
11
- from ..components.embedders import OpenAIEmbedder
12
- from ..components.indexers import GraphIndexer
13
- from ..components.parsers import TextParser
14
- from ..components.retrievers import HybridRetriever
15
- from ..pipeline import RAGPipeline
16
-
17
-
18
- def AcademicPipeline(kb_base_dir: Optional[str] = None) -> RAGPipeline:
19
- """
20
- Create an academic document pipeline.
21
-
22
- This pipeline uses:
23
- - TextParser for document parsing (supports txt, md files)
24
- - SemanticChunker for text chunking
25
- - NumberedItemExtractor for extracting definitions, theorems, etc.
26
- - OpenAIEmbedder for embedding generation
27
- - GraphIndexer for knowledge graph indexing
28
- - HybridRetriever for hybrid retrieval
29
-
30
- Args:
31
- kb_base_dir: Base directory for knowledge bases
32
-
33
- Returns:
34
- Configured RAGPipeline
35
- """
36
- return (
37
- RAGPipeline("academic", kb_base_dir=kb_base_dir)
38
- .parser(TextParser())
39
- .chunker(SemanticChunker())
40
- .chunker(NumberedItemExtractor())
41
- .embedder(OpenAIEmbedder())
42
- .indexer(GraphIndexer(kb_base_dir=kb_base_dir))
43
- .retriever(HybridRetriever(kb_base_dir=kb_base_dir))
44
- )