realtimex-deeptutor 0.5.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. realtimex_deeptutor/__init__.py +67 -0
  2. realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
  3. realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
  4. realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
  5. realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
  6. realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
  7. realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
  8. src/__init__.py +40 -0
  9. src/agents/__init__.py +24 -0
  10. src/agents/base_agent.py +657 -0
  11. src/agents/chat/__init__.py +24 -0
  12. src/agents/chat/chat_agent.py +435 -0
  13. src/agents/chat/prompts/en/chat_agent.yaml +35 -0
  14. src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
  15. src/agents/chat/session_manager.py +311 -0
  16. src/agents/co_writer/__init__.py +0 -0
  17. src/agents/co_writer/edit_agent.py +260 -0
  18. src/agents/co_writer/narrator_agent.py +423 -0
  19. src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
  20. src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
  21. src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
  22. src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
  23. src/agents/guide/__init__.py +16 -0
  24. src/agents/guide/agents/__init__.py +11 -0
  25. src/agents/guide/agents/chat_agent.py +104 -0
  26. src/agents/guide/agents/interactive_agent.py +223 -0
  27. src/agents/guide/agents/locate_agent.py +149 -0
  28. src/agents/guide/agents/summary_agent.py +150 -0
  29. src/agents/guide/guide_manager.py +500 -0
  30. src/agents/guide/prompts/en/chat_agent.yaml +41 -0
  31. src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
  32. src/agents/guide/prompts/en/locate_agent.yaml +68 -0
  33. src/agents/guide/prompts/en/summary_agent.yaml +157 -0
  34. src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
  35. src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
  36. src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
  37. src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
  38. src/agents/ideagen/__init__.py +12 -0
  39. src/agents/ideagen/idea_generation_workflow.py +426 -0
  40. src/agents/ideagen/material_organizer_agent.py +173 -0
  41. src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
  42. src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
  43. src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
  44. src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
  45. src/agents/question/__init__.py +24 -0
  46. src/agents/question/agents/__init__.py +18 -0
  47. src/agents/question/agents/generate_agent.py +381 -0
  48. src/agents/question/agents/relevance_analyzer.py +207 -0
  49. src/agents/question/agents/retrieve_agent.py +239 -0
  50. src/agents/question/coordinator.py +718 -0
  51. src/agents/question/example.py +109 -0
  52. src/agents/question/prompts/en/coordinator.yaml +75 -0
  53. src/agents/question/prompts/en/generate_agent.yaml +77 -0
  54. src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
  55. src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
  56. src/agents/question/prompts/zh/coordinator.yaml +75 -0
  57. src/agents/question/prompts/zh/generate_agent.yaml +77 -0
  58. src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
  59. src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
  60. src/agents/research/agents/__init__.py +23 -0
  61. src/agents/research/agents/decompose_agent.py +507 -0
  62. src/agents/research/agents/manager_agent.py +228 -0
  63. src/agents/research/agents/note_agent.py +180 -0
  64. src/agents/research/agents/rephrase_agent.py +263 -0
  65. src/agents/research/agents/reporting_agent.py +1333 -0
  66. src/agents/research/agents/research_agent.py +714 -0
  67. src/agents/research/data_structures.py +451 -0
  68. src/agents/research/main.py +188 -0
  69. src/agents/research/prompts/en/decompose_agent.yaml +89 -0
  70. src/agents/research/prompts/en/manager_agent.yaml +24 -0
  71. src/agents/research/prompts/en/note_agent.yaml +121 -0
  72. src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
  73. src/agents/research/prompts/en/reporting_agent.yaml +380 -0
  74. src/agents/research/prompts/en/research_agent.yaml +173 -0
  75. src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
  76. src/agents/research/prompts/zh/manager_agent.yaml +24 -0
  77. src/agents/research/prompts/zh/note_agent.yaml +121 -0
  78. src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
  79. src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
  80. src/agents/research/prompts/zh/research_agent.yaml +173 -0
  81. src/agents/research/research_pipeline.py +1309 -0
  82. src/agents/research/utils/__init__.py +60 -0
  83. src/agents/research/utils/citation_manager.py +799 -0
  84. src/agents/research/utils/json_utils.py +98 -0
  85. src/agents/research/utils/token_tracker.py +297 -0
  86. src/agents/solve/__init__.py +80 -0
  87. src/agents/solve/analysis_loop/__init__.py +14 -0
  88. src/agents/solve/analysis_loop/investigate_agent.py +414 -0
  89. src/agents/solve/analysis_loop/note_agent.py +190 -0
  90. src/agents/solve/main_solver.py +862 -0
  91. src/agents/solve/memory/__init__.py +34 -0
  92. src/agents/solve/memory/citation_memory.py +353 -0
  93. src/agents/solve/memory/investigate_memory.py +226 -0
  94. src/agents/solve/memory/solve_memory.py +340 -0
  95. src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
  96. src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
  97. src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
  98. src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
  99. src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
  100. src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
  101. src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
  102. src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
  103. src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
  104. src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
  105. src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
  106. src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
  107. src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
  108. src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
  109. src/agents/solve/solve_loop/__init__.py +22 -0
  110. src/agents/solve/solve_loop/citation_manager.py +74 -0
  111. src/agents/solve/solve_loop/manager_agent.py +274 -0
  112. src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
  113. src/agents/solve/solve_loop/response_agent.py +301 -0
  114. src/agents/solve/solve_loop/solve_agent.py +325 -0
  115. src/agents/solve/solve_loop/tool_agent.py +470 -0
  116. src/agents/solve/utils/__init__.py +64 -0
  117. src/agents/solve/utils/config_validator.py +313 -0
  118. src/agents/solve/utils/display_manager.py +223 -0
  119. src/agents/solve/utils/error_handler.py +363 -0
  120. src/agents/solve/utils/json_utils.py +98 -0
  121. src/agents/solve/utils/performance_monitor.py +407 -0
  122. src/agents/solve/utils/token_tracker.py +541 -0
  123. src/api/__init__.py +0 -0
  124. src/api/main.py +240 -0
  125. src/api/routers/__init__.py +1 -0
  126. src/api/routers/agent_config.py +69 -0
  127. src/api/routers/chat.py +296 -0
  128. src/api/routers/co_writer.py +337 -0
  129. src/api/routers/config.py +627 -0
  130. src/api/routers/dashboard.py +18 -0
  131. src/api/routers/guide.py +337 -0
  132. src/api/routers/ideagen.py +436 -0
  133. src/api/routers/knowledge.py +821 -0
  134. src/api/routers/notebook.py +247 -0
  135. src/api/routers/question.py +537 -0
  136. src/api/routers/research.py +394 -0
  137. src/api/routers/settings.py +164 -0
  138. src/api/routers/solve.py +305 -0
  139. src/api/routers/system.py +252 -0
  140. src/api/run_server.py +61 -0
  141. src/api/utils/history.py +172 -0
  142. src/api/utils/log_interceptor.py +21 -0
  143. src/api/utils/notebook_manager.py +415 -0
  144. src/api/utils/progress_broadcaster.py +72 -0
  145. src/api/utils/task_id_manager.py +100 -0
  146. src/config/__init__.py +0 -0
  147. src/config/accessors.py +18 -0
  148. src/config/constants.py +34 -0
  149. src/config/defaults.py +18 -0
  150. src/config/schema.py +38 -0
  151. src/config/settings.py +50 -0
  152. src/core/errors.py +62 -0
  153. src/knowledge/__init__.py +23 -0
  154. src/knowledge/add_documents.py +606 -0
  155. src/knowledge/config.py +65 -0
  156. src/knowledge/example_add_documents.py +236 -0
  157. src/knowledge/extract_numbered_items.py +1039 -0
  158. src/knowledge/initializer.py +621 -0
  159. src/knowledge/kb.py +22 -0
  160. src/knowledge/manager.py +782 -0
  161. src/knowledge/progress_tracker.py +182 -0
  162. src/knowledge/start_kb.py +535 -0
  163. src/logging/__init__.py +103 -0
  164. src/logging/adapters/__init__.py +17 -0
  165. src/logging/adapters/lightrag.py +184 -0
  166. src/logging/adapters/llamaindex.py +141 -0
  167. src/logging/config.py +80 -0
  168. src/logging/handlers/__init__.py +20 -0
  169. src/logging/handlers/console.py +75 -0
  170. src/logging/handlers/file.py +201 -0
  171. src/logging/handlers/websocket.py +127 -0
  172. src/logging/logger.py +709 -0
  173. src/logging/stats/__init__.py +16 -0
  174. src/logging/stats/llm_stats.py +179 -0
  175. src/services/__init__.py +56 -0
  176. src/services/config/__init__.py +61 -0
  177. src/services/config/knowledge_base_config.py +210 -0
  178. src/services/config/loader.py +260 -0
  179. src/services/config/unified_config.py +603 -0
  180. src/services/embedding/__init__.py +45 -0
  181. src/services/embedding/adapters/__init__.py +22 -0
  182. src/services/embedding/adapters/base.py +106 -0
  183. src/services/embedding/adapters/cohere.py +127 -0
  184. src/services/embedding/adapters/jina.py +99 -0
  185. src/services/embedding/adapters/ollama.py +116 -0
  186. src/services/embedding/adapters/openai_compatible.py +96 -0
  187. src/services/embedding/client.py +159 -0
  188. src/services/embedding/config.py +156 -0
  189. src/services/embedding/provider.py +119 -0
  190. src/services/llm/__init__.py +152 -0
  191. src/services/llm/capabilities.py +313 -0
  192. src/services/llm/client.py +302 -0
  193. src/services/llm/cloud_provider.py +530 -0
  194. src/services/llm/config.py +200 -0
  195. src/services/llm/error_mapping.py +103 -0
  196. src/services/llm/exceptions.py +152 -0
  197. src/services/llm/factory.py +450 -0
  198. src/services/llm/local_provider.py +347 -0
  199. src/services/llm/providers/anthropic.py +95 -0
  200. src/services/llm/providers/base_provider.py +93 -0
  201. src/services/llm/providers/open_ai.py +83 -0
  202. src/services/llm/registry.py +71 -0
  203. src/services/llm/telemetry.py +40 -0
  204. src/services/llm/types.py +27 -0
  205. src/services/llm/utils.py +333 -0
  206. src/services/prompt/__init__.py +25 -0
  207. src/services/prompt/manager.py +206 -0
  208. src/services/rag/__init__.py +64 -0
  209. src/services/rag/components/__init__.py +29 -0
  210. src/services/rag/components/base.py +59 -0
  211. src/services/rag/components/chunkers/__init__.py +18 -0
  212. src/services/rag/components/chunkers/base.py +34 -0
  213. src/services/rag/components/chunkers/fixed.py +71 -0
  214. src/services/rag/components/chunkers/numbered_item.py +94 -0
  215. src/services/rag/components/chunkers/semantic.py +97 -0
  216. src/services/rag/components/embedders/__init__.py +14 -0
  217. src/services/rag/components/embedders/base.py +32 -0
  218. src/services/rag/components/embedders/openai.py +63 -0
  219. src/services/rag/components/indexers/__init__.py +18 -0
  220. src/services/rag/components/indexers/base.py +35 -0
  221. src/services/rag/components/indexers/graph.py +172 -0
  222. src/services/rag/components/indexers/lightrag.py +156 -0
  223. src/services/rag/components/indexers/vector.py +146 -0
  224. src/services/rag/components/parsers/__init__.py +18 -0
  225. src/services/rag/components/parsers/base.py +35 -0
  226. src/services/rag/components/parsers/markdown.py +52 -0
  227. src/services/rag/components/parsers/pdf.py +115 -0
  228. src/services/rag/components/parsers/text.py +86 -0
  229. src/services/rag/components/retrievers/__init__.py +18 -0
  230. src/services/rag/components/retrievers/base.py +34 -0
  231. src/services/rag/components/retrievers/dense.py +200 -0
  232. src/services/rag/components/retrievers/hybrid.py +164 -0
  233. src/services/rag/components/retrievers/lightrag.py +169 -0
  234. src/services/rag/components/routing.py +286 -0
  235. src/services/rag/factory.py +234 -0
  236. src/services/rag/pipeline.py +215 -0
  237. src/services/rag/pipelines/__init__.py +32 -0
  238. src/services/rag/pipelines/academic.py +44 -0
  239. src/services/rag/pipelines/lightrag.py +43 -0
  240. src/services/rag/pipelines/llamaindex.py +313 -0
  241. src/services/rag/pipelines/raganything.py +384 -0
  242. src/services/rag/service.py +244 -0
  243. src/services/rag/types.py +73 -0
  244. src/services/search/__init__.py +284 -0
  245. src/services/search/base.py +87 -0
  246. src/services/search/consolidation.py +398 -0
  247. src/services/search/providers/__init__.py +128 -0
  248. src/services/search/providers/baidu.py +188 -0
  249. src/services/search/providers/exa.py +194 -0
  250. src/services/search/providers/jina.py +161 -0
  251. src/services/search/providers/perplexity.py +153 -0
  252. src/services/search/providers/serper.py +209 -0
  253. src/services/search/providers/tavily.py +161 -0
  254. src/services/search/types.py +114 -0
  255. src/services/setup/__init__.py +34 -0
  256. src/services/setup/init.py +285 -0
  257. src/services/tts/__init__.py +16 -0
  258. src/services/tts/config.py +99 -0
  259. src/tools/__init__.py +91 -0
  260. src/tools/code_executor.py +536 -0
  261. src/tools/paper_search_tool.py +171 -0
  262. src/tools/query_item_tool.py +310 -0
  263. src/tools/question/__init__.py +15 -0
  264. src/tools/question/exam_mimic.py +616 -0
  265. src/tools/question/pdf_parser.py +211 -0
  266. src/tools/question/question_extractor.py +397 -0
  267. src/tools/rag_tool.py +173 -0
  268. src/tools/tex_chunker.py +339 -0
  269. src/tools/tex_downloader.py +253 -0
  270. src/tools/web_search.py +71 -0
  271. src/utils/config_manager.py +206 -0
  272. src/utils/document_validator.py +168 -0
  273. src/utils/error_rate_tracker.py +111 -0
  274. src/utils/error_utils.py +82 -0
  275. src/utils/json_parser.py +110 -0
  276. src/utils/network/circuit_breaker.py +79 -0
@@ -0,0 +1,423 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ NarratorAgent - Note narration agent.
5
+ Inherits from unified BaseAgent with special TTS configuration.
6
+ """
7
+
8
+ from datetime import datetime
9
+ import json
10
+ import os
11
+ from pathlib import Path
12
+ import re
13
+ from typing import Any, Optional
14
+ from urllib.parse import urlparse
15
+ import uuid
16
+
17
+ from openai import AsyncAzureOpenAI, AsyncOpenAI
18
+
19
+ from src.agents.base_agent import BaseAgent
20
+ from src.services.tts import get_tts_config
21
+
22
+ # Import shared stats from edit_agent for legacy compatibility
23
+
24
+ # Define storage path (unified under user/co-writer/ directory)
25
+ USER_DIR = Path(__file__).parent.parent.parent.parent / "data" / "user" / "co-writer" / "audio"
26
+
27
+
28
+ def ensure_dirs():
29
+ """Ensure directories exist"""
30
+ USER_DIR.mkdir(parents=True, exist_ok=True)
31
+
32
+
33
+ class NarratorAgent(BaseAgent):
34
+ """Note Narration Agent - Generate narration script and convert to audio"""
35
+
36
+ def __init__(self, language: str = "en"):
37
+ """
38
+ Initialize NarratorAgent.
39
+
40
+ Args:
41
+ language: Language setting ('en' | 'zh'), default 'en'
42
+
43
+ Note: LLM configuration (api_key, base_url, model, etc.) is loaded
44
+ automatically from the unified config service. Use refresh_config()
45
+ to pick up configuration changes made in Settings.
46
+ """
47
+ # Use "narrator" as module_name to get independent temperature/max_tokens config
48
+ super().__init__(
49
+ module_name="narrator",
50
+ agent_name="narrator_agent",
51
+ language=language,
52
+ )
53
+
54
+ # Override prompts to load from co_writer module
55
+ # (narrator_agent prompts are stored under co_writer/prompts/)
56
+ from src.services.prompt import get_prompt_manager
57
+
58
+ self.prompts = get_prompt_manager().load_prompts(
59
+ module_name="co_writer",
60
+ agent_name="narrator_agent",
61
+ language=language,
62
+ )
63
+
64
+ # Load TTS-specific configuration
65
+ self._load_tts_config()
66
+
67
+ def _load_tts_config(self):
68
+ """Load TTS-specific configuration from unified config service."""
69
+ try:
70
+ self.tts_config = get_tts_config()
71
+ # Get voice from unified config (defaults to "alloy")
72
+ self.default_voice = self.tts_config.get("voice", "alloy")
73
+ self.logger.info(f"TTS settings loaded: voice={self.default_voice}")
74
+ # Validate TTS configuration
75
+ self._validate_tts_config()
76
+ except Exception as e:
77
+ self.logger.error(f"Failed to load TTS config: {e}", exc_info=True)
78
+ self.tts_config = None
79
+ self.default_voice = "alloy"
80
+
81
+ def _validate_tts_config(self):
82
+ """Validate TTS configuration completeness and format"""
83
+ if not self.tts_config:
84
+ raise ValueError("TTS config is None")
85
+
86
+ # Check required keys
87
+ required_keys = ["model", "api_key", "base_url"]
88
+ missing_keys = [key for key in required_keys if key not in self.tts_config]
89
+ if missing_keys:
90
+ raise ValueError(f"TTS config missing required keys: {missing_keys}")
91
+
92
+ # Validate base_url format
93
+ base_url = self.tts_config["base_url"]
94
+ if not base_url:
95
+ raise ValueError("TTS config 'base_url' is empty")
96
+
97
+ if not isinstance(base_url, str):
98
+ raise ValueError(f"TTS config 'base_url' must be a string, got {type(base_url)}")
99
+
100
+ # Validate URL format
101
+ if not base_url.startswith(("http://", "https://")):
102
+ raise ValueError(
103
+ f"TTS config 'base_url' must start with http:// or https://, got: {base_url}"
104
+ )
105
+
106
+ try:
107
+ parsed = urlparse(base_url)
108
+ if not parsed.netloc:
109
+ raise ValueError(f"TTS config 'base_url' has invalid format: {base_url}")
110
+ except Exception as e:
111
+ raise ValueError(f"TTS config 'base_url' parsing error: {e}")
112
+
113
+ # Validate api_key
114
+ api_key = self.tts_config.get("api_key")
115
+ if not api_key:
116
+ raise ValueError("TTS config 'api_key' is empty")
117
+
118
+ if not isinstance(api_key, str) or len(api_key.strip()) == 0:
119
+ raise ValueError("TTS config 'api_key' must be a non-empty string")
120
+
121
+ # Validate model
122
+ model = self.tts_config.get("model")
123
+ if not model:
124
+ raise ValueError("TTS config 'model' is empty")
125
+
126
+ # Log configuration info (hide sensitive information)
127
+ api_key_preview = f"{api_key[:8]}...{api_key[-4:]}" if len(api_key) > 12 else "*" * 10
128
+ self.logger.info("TTS Configuration Loaded (OpenAI API):")
129
+ self.logger.info(f" Model: {model}")
130
+ self.logger.info(f" Base URL: {base_url}")
131
+ self.logger.info(f" API Key: {api_key_preview}")
132
+ self.logger.info(f" Default Voice: {self.default_voice}")
133
+
134
+ async def process(
135
+ self,
136
+ content: str,
137
+ style: str = "friendly",
138
+ voice: Optional[str] = None,
139
+ skip_audio: bool = False,
140
+ ) -> dict[str, Any]:
141
+ """
142
+ Main processing method - alias for narrate().
143
+
144
+ Args:
145
+ content: Note content
146
+ style: Narration style
147
+ voice: Voice role
148
+ skip_audio: Whether to skip audio generation
149
+
150
+ Returns:
151
+ Dict containing script info and optionally audio info
152
+ """
153
+ return await self.narrate(content, style, voice, skip_audio)
154
+
155
+ async def generate_script(self, content: str, style: str = "friendly") -> dict[str, Any]:
156
+ """
157
+ Generate narration script
158
+
159
+ Args:
160
+ content: Note content (Markdown format)
161
+ style: Narration style (friendly, academic, concise)
162
+
163
+ Returns:
164
+ Dict containing:
165
+ - script: Narration script text
166
+ - key_points: List of extracted key points
167
+ """
168
+ # Estimate target length: OpenAI TTS supports up to 4096 characters
169
+ is_long_content = len(content) > 5000
170
+
171
+ style_prompts = {
172
+ "friendly": self.get_prompt("style_friendly", ""),
173
+ "academic": self.get_prompt("style_academic", ""),
174
+ "concise": self.get_prompt("style_concise", ""),
175
+ }
176
+
177
+ length_instruction = (
178
+ self.get_prompt("length_instruction_long", "")
179
+ if is_long_content
180
+ else self.get_prompt("length_instruction_short", "")
181
+ )
182
+
183
+ system_template = self.get_prompt("generate_script_system_template", "")
184
+ system_prompt = system_template.format(
185
+ style_prompt=style_prompts.get(style, style_prompts["friendly"]),
186
+ length_instruction=length_instruction,
187
+ )
188
+
189
+ if is_long_content:
190
+ user_template = self.get_prompt("generate_script_user_long", "")
191
+ user_prompt = user_template.format(content=content[:8000] + "...")
192
+ else:
193
+ user_template = self.get_prompt("generate_script_user_short", "")
194
+ user_prompt = user_template.format(content=content)
195
+
196
+ self.logger.info(f"Generating narration script with style: {style}")
197
+
198
+ # Use inherited call_llm method
199
+ response = await self.call_llm(
200
+ user_prompt=user_prompt,
201
+ system_prompt=system_prompt,
202
+ stage="generate_script",
203
+ )
204
+
205
+ # Clean and truncate response, ensure it doesn't exceed 4000 characters
206
+ script = response.strip()
207
+ if len(script) > 4000:
208
+ self.logger.warning(
209
+ f"Generated script length {len(script)} exceeds 4000 limit. Truncating..."
210
+ )
211
+ truncated = script[:3997]
212
+ last_period = max(
213
+ truncated.rfind("。"),
214
+ truncated.rfind("!"),
215
+ truncated.rfind("?"),
216
+ truncated.rfind("."),
217
+ truncated.rfind("!"),
218
+ truncated.rfind("?"),
219
+ )
220
+ if last_period > 3500:
221
+ script = truncated[: last_period + 1]
222
+ else:
223
+ script = truncated + "..."
224
+
225
+ key_points = await self._extract_key_points(content)
226
+
227
+ return {
228
+ "script": script,
229
+ "key_points": key_points,
230
+ "style": style,
231
+ "original_length": len(content),
232
+ "script_length": len(script),
233
+ }
234
+
235
+ async def _extract_key_points(self, content: str) -> list:
236
+ """Extract key points from notes"""
237
+ system_prompt = self.get_prompt("extract_key_points_system", "")
238
+ user_template = self.get_prompt(
239
+ "extract_key_points_user",
240
+ "Please extract key points from the following notes:\n\n{content}",
241
+ )
242
+ user_prompt = user_template.format(content=content[:4000])
243
+
244
+ try:
245
+ response = await self.call_llm(
246
+ user_prompt=user_prompt,
247
+ system_prompt=system_prompt,
248
+ stage="extract_key_points",
249
+ )
250
+
251
+ # Try to parse JSON
252
+ json_match = re.search(r"\[.*\]", response, re.DOTALL)
253
+ if json_match:
254
+ return json.loads(json_match.group())
255
+ return []
256
+ except Exception as e:
257
+ self.logger.warning(f"Failed to extract key points: {e}")
258
+ return []
259
+
260
+ async def generate_audio(self, script: str, voice: str = None) -> dict[str, Any]:
261
+ """
262
+ Convert narration script to audio using OpenAI TTS API
263
+
264
+ Args:
265
+ script: Narration script text
266
+ voice: Voice role (alloy, echo, fable, onyx, nova, shimmer)
267
+
268
+ Returns:
269
+ Dict containing:
270
+ - audio_path: Audio file path
271
+ - audio_url: Audio access URL
272
+ - audio_id: Unique audio identifier
273
+ - voice: Voice used
274
+ """
275
+ if not self.tts_config:
276
+ raise ValueError(
277
+ "TTS configuration not available. Please configure TTS_MODEL, TTS_API_KEY, and TTS_URL in .env"
278
+ )
279
+
280
+ # Use default voice if not specified
281
+ if voice is None:
282
+ voice = self.default_voice
283
+
284
+ # Validate input parameters
285
+ if not script or not script.strip():
286
+ raise ValueError("Script cannot be empty")
287
+
288
+ ensure_dirs()
289
+
290
+ # Truncate overly long scripts (OpenAI TTS supports up to 4096 characters)
291
+ original_script_length = len(script)
292
+ if len(script) > 4096:
293
+ self.logger.warning(f"Script length {len(script)} exceeds 4096 limit. Truncating...")
294
+ truncated = script[:4093]
295
+ last_period = max(
296
+ truncated.rfind("。"),
297
+ truncated.rfind("!"),
298
+ truncated.rfind("?"),
299
+ truncated.rfind("."),
300
+ truncated.rfind("!"),
301
+ truncated.rfind("?"),
302
+ )
303
+ if last_period > 3500:
304
+ script = truncated[: last_period + 1]
305
+ else:
306
+ script = truncated + "..."
307
+ self.logger.info(
308
+ f"Script truncated from {original_script_length} to {len(script)} characters"
309
+ )
310
+
311
+ audio_id = datetime.now().strftime("%Y%m%d_%H%M%S") + "_" + uuid.uuid4().hex[:6]
312
+ audio_filename = f"narration_{audio_id}.mp3"
313
+ audio_path = USER_DIR / audio_filename
314
+
315
+ self.logger.info(f"Starting TTS audio generation - ID: {audio_id}, Voice: {voice}")
316
+
317
+ try:
318
+ binding = os.getenv("TTS_BINDING", "openai")
319
+ api_version = self.tts_config.get("api_version")
320
+
321
+ # Only use Azure client if binding is explicitly Azure,
322
+ # OR if binding is generic 'openai' but an Azure-specific api_version is present.
323
+ if binding == "azure_openai" or (binding == "openai" and api_version):
324
+ client = AsyncAzureOpenAI(
325
+ api_key=self.tts_config["api_key"],
326
+ azure_endpoint=self.tts_config["base_url"],
327
+ api_version=api_version,
328
+ )
329
+ else:
330
+ # Create OpenAI client with custom base_url
331
+ client = AsyncOpenAI(
332
+ base_url=self.tts_config["base_url"], api_key=self.tts_config["api_key"]
333
+ )
334
+
335
+ # Call OpenAI TTS API
336
+ response = await client.audio.speech.create(
337
+ model=self.tts_config["model"], voice=voice, input=script
338
+ )
339
+
340
+ # Save audio to file
341
+ await response.stream_to_file(audio_path)
342
+
343
+ self.logger.info(f"Audio saved to: {audio_path}")
344
+
345
+ # Use correct path: co-writer/audio (matching the actual storage directory)
346
+ relative_path = f"co-writer/audio/{audio_filename}"
347
+ audio_access_url = f"/api/outputs/{relative_path}"
348
+
349
+ return {
350
+ "audio_path": str(audio_path),
351
+ "audio_url": audio_access_url,
352
+ "audio_id": audio_id,
353
+ "voice": voice,
354
+ }
355
+
356
+ except Exception as e:
357
+ self.logger.error(f"TTS generation failed: {type(e).__name__}: {e}", exc_info=True)
358
+ raise ValueError(f"TTS generation failed: {type(e).__name__}: {e}")
359
+
360
+ async def narrate(
361
+ self,
362
+ content: str,
363
+ style: str = "friendly",
364
+ voice: str = None,
365
+ skip_audio: bool = False,
366
+ ) -> dict[str, Any]:
367
+ """
368
+ Complete narration flow: generate script + generate audio
369
+
370
+ Args:
371
+ content: Note content
372
+ style: Narration style
373
+ voice: Voice role (alloy, echo, fable, onyx, nova, shimmer)
374
+ skip_audio: Whether to skip audio generation (only return script)
375
+
376
+ Returns:
377
+ Dict containing script info and optionally audio info
378
+ """
379
+ # Refresh TTS config before starting to avoid stale credentials
380
+ try:
381
+ self.tts_config = get_tts_config()
382
+ except Exception as e:
383
+ self.logger.error(f"Failed to refresh TTS config: {e}")
384
+
385
+ script_result = await self.generate_script(content, style)
386
+
387
+ # Use default voice if not specified
388
+ if voice is None:
389
+ voice = self.default_voice
390
+
391
+ result = {
392
+ "script": script_result["script"],
393
+ "key_points": script_result["key_points"],
394
+ "style": style,
395
+ "original_length": script_result["original_length"],
396
+ "script_length": script_result["script_length"],
397
+ }
398
+
399
+ if not skip_audio and self.tts_config:
400
+ try:
401
+ audio_result = await self.generate_audio(script_result["script"], voice=voice)
402
+ result.update(
403
+ {
404
+ "audio_url": audio_result["audio_url"],
405
+ "audio_path": audio_result["audio_path"],
406
+ "audio_id": audio_result["audio_id"],
407
+ "voice": voice,
408
+ "has_audio": True,
409
+ }
410
+ )
411
+ except Exception as e:
412
+ self.logger.error(f"Audio generation failed: {e}")
413
+ result["has_audio"] = False
414
+ result["audio_error"] = str(e)
415
+ else:
416
+ result["has_audio"] = False
417
+ if not self.tts_config:
418
+ result["audio_error"] = "TTS not configured"
419
+
420
+ return result
421
+
422
+
423
+ __all__ = ["NarratorAgent"]
@@ -0,0 +1,113 @@
1
+ system: |
2
+ You are an expert editor and writing assistant.
3
+
4
+ action_template: |
5
+ {action_verb} the following text based on the user's instruction.
6
+
7
+ User Instruction: {instruction}
8
+
9
+ context_template: |
10
+ Reference Context:
11
+ {context}
12
+
13
+ user_template: |
14
+ Target Text to Edit:
15
+ {text}
16
+
17
+ Output only the edited text, without quotes or explanations.
18
+
19
+ auto_mark_system: |
20
+ You are a professional academic reading annotation assistant, helping readers quickly grasp the core points of text.
21
+
22
+ ## Task
23
+ Read the input text and **carefully select** the most critical information for annotation. Annotations should help readers quickly locate key points without interfering with reading.
24
+
25
+ ## Available Tags and Precise Usage Scenarios
26
+
27
+ ### 1. Circle - Use Sparingly
28
+ ```html
29
+ <span data-rough-notation="circle">content</span>
30
+ ```
31
+ **Applicable Scenarios**:
32
+ - Core topic words of articles/paragraphs (e.g., key concepts in paper titles)
33
+ - Unique proper nouns, model names (e.g., GPT-4, BERT)
34
+ - Key numerical values/metrics (e.g., 95.7%, p<0.05)
35
+
36
+ **Limitation**: Maximum 1 per 100 characters, content should not exceed 5 characters
37
+
38
+ ### 2. Highlight - Moderate Use
39
+ ```html
40
+ <span data-rough-notation="highlight">content</span>
41
+ ```
42
+ **Applicable Scenarios**:
43
+ - Definitional statements (e.g., "XX refers to...")
44
+ - First appearance of core concepts and their explanations
45
+ - Important methodological descriptions
46
+
47
+ **Limitation**: Maximum 2 per paragraph, content 2-15 characters
48
+
49
+ ### 3. Box - Minimal Use
50
+ ```html
51
+ <span data-rough-notation="box">content</span>
52
+ ```
53
+ **Applicable Scenarios**:
54
+ - Mathematical formulas, equations
55
+ - Specific data points or statistical values
56
+ - Code snippets, commands
57
+ - Version numbers, dates, and other precise information
58
+
59
+ **Limitation**: Maximum 1 per paragraph, content should not exceed 20 characters
60
+
61
+ ### 4. Underline - Moderate Use
62
+ ```html
63
+ <span data-rough-notation="underline">content</span>
64
+ ```
65
+ **Applicable Scenarios**:
66
+ - Conclusive statements
67
+ - Key expressions of causal relationships
68
+ - Core viewpoints in comparisons or contrasts
69
+ - Author's main arguments
70
+
71
+ **Limitation**: Maximum 1 per paragraph, content 5-30 characters
72
+
73
+ ### 5. Bracket - Use Sparingly
74
+ ```html
75
+ <span data-rough-notation="bracket">content</span>
76
+ ```
77
+ **Applicable Scenarios**:
78
+ - Entire paragraphs that are core summaries or conclusions
79
+ - Important quotations or theorem statements
80
+ - Critical warnings or notes
81
+
82
+ **Limitation**: Maximum 1-2 per entire article, for truly indispensable complete sentences
83
+
84
+ ## Core Rules
85
+
86
+ 1. **Exercise Restraint**: Better to annotate less than to over-annotate. Annotation density should not exceed 10% of total text per paragraph.
87
+ 2. **No Modifications**: Absolutely must not modify, delete, or add any text from the original, only insert HTML tags.
88
+ 3. **Tag Placement**: Tags must be placed inside Markdown symbols (e.g., `**`, `*`, `` ` ``).
89
+ 4. **When No Annotation Needed**: If the text has no information worth annotating, return it as-is.
90
+
91
+ ## Examples
92
+
93
+ **Input**:
94
+ Deep learning is a subfield of machine learning, and its core is using neural networks to learn data representations.
95
+
96
+ **Output**:
97
+ <span data-rough-notation="highlight">Deep learning is a subfield of machine learning</span>, and its core is using <span data-rough-notation="circle">neural networks</span> to learn data representations.
98
+
99
+ **Input**:
100
+ The weather is nice today, perfect for going out for a walk.
101
+
102
+ **Output**:
103
+ The weather is nice today, perfect for going out for a walk.
104
+
105
+ **Input**:
106
+ Experimental results show that our proposed method achieved 99.2% accuracy on the MNIST dataset, significantly exceeding the baseline method's 95.1%.
107
+
108
+ **Output**:
109
+ <span data-rough-notation="underline">Experimental results show that our proposed method achieved <span data-rough-notation="box">99.2%</span> accuracy on the MNIST dataset</span>, significantly exceeding the baseline method's 95.1%.
110
+
111
+ auto_mark_user_template: |
112
+ Process the following text:
113
+ {text}
@@ -0,0 +1,88 @@
1
+ style_friendly: |
2
+ You are a friendly and approachable tutor, explaining note content face-to-face to students.
3
+
4
+ **Narration Requirements**:
5
+ 1. **Person**: Use "we", "us", "you" to create closeness
6
+ 2. **Tone**: Relaxed but professional, like chatting with a friend
7
+ 3. **Pacing**: Appropriate pauses, use words like "well", "next", "so" for transitions
8
+ 4. **Emphasis**: Use phrases like "this is important", "note here" to highlight key information
9
+ 5. **Interaction**: Appropriately include phrases like "what do you think", "think about it" to guide thinking
10
+ 6. **Length Control**: The script should be controlled within 4000 characters. If the original content is long, please generate a refined summary version, highlighting main points and key information.
11
+
12
+ style_academic: |
13
+ You are a senior scholar giving an academic lecture.
14
+
15
+ **Narration Requirements**:
16
+ 1. **Person**: Use "we", "this paper" and other academic language
17
+ 2. **Tone**: Rigorous and professional, with clear logic
18
+ 3. **Structure**: Clear introduction-body-conclusion structure
19
+ 4. **Terminology**: Retain professional terms, provide explanations when necessary
20
+ 5. **Citations**: Maintain academic standards when mentioning related theories or research
21
+ 6. **Length Control**: The script should be controlled within 4000 characters. If the original content is long, please generate a refined summary version, highlighting main points and key information.
22
+
23
+ style_concise: |
24
+ You are an efficient knowledge communicator who needs to quickly convey core information.
25
+
26
+ **Narration Requirements**:
27
+ 1. **Person**: Use "we" to maintain friendliness
28
+ 2. **Tone**: Direct and to the point, no beating around the bush
29
+ 3. **Structure**: General first, then details, get straight to the point
30
+ 4. **Focus**: Only cover the most core content
31
+ 5. **Transitions**: Use concise "first", "then", "finally" to connect
32
+ 6. **Length Control**: The script should be controlled within 4000 characters. If the original content is long, please generate a refined summary version, highlighting main points and key information.
33
+
34
+ generate_script_system_template: |
35
+ You are a professional note narration script writing expert. Your task is to convert user's note content into a script suitable for oral narration.
36
+
37
+ {style_prompt}
38
+
39
+ {length_instruction}
40
+
41
+ **Output Format**:
42
+ Output the narration script text directly, without any additional explanations or markers.
43
+ The script should be coherent spoken language, suitable for direct reading aloud.
44
+
45
+ **Notes**:
46
+ 1. Maintain the core information and logical structure of the original text
47
+ 2. Convert Markdown formats (such as **bold**, *italic*, code blocks, etc.) into oral descriptions
48
+ 3. Mathematical formulas need to be described orally, such as "x squared plus y squared equals z squared"
49
+ 4. Remove all HTML tags, retain their text content
50
+ 5. Avoid overly written expressions
51
+ 6. **Control the length within 4000 characters**
52
+
53
+ length_instruction_long: |
54
+ **Important: The script should be controlled within 4000 characters. If the original content is long, please generate a refined summary version, retaining the most core viewpoints and key information.**
55
+
56
+ length_instruction_short: |
57
+ **Important: The script should be controlled within 4000 characters.**
58
+
59
+ generate_script_user_long: |
60
+ The following is a longer note content. Please generate a refined narration script summary, controlled within 4000 characters, containing the most core viewpoints and key information:
61
+
62
+ ---
63
+ {content}
64
+ ---
65
+
66
+ Please generate a narration script suitable for oral reading (within 4000 characters).
67
+
68
+ generate_script_user_short: |
69
+ Please convert the following note content into a narration script (controlled within 4000 characters):
70
+
71
+ ---
72
+ {content}
73
+ ---
74
+
75
+ Please generate a narration script suitable for oral reading.
76
+
77
+ extract_key_points_system: |
78
+ You are a content analysis expert. Please extract 3-5 key points from the given notes.
79
+
80
+ Output format: JSON array, each element is a key point string.
81
+ Example: ["Key point 1", "Key point 2", "Key point 3"]
82
+
83
+ Only output the JSON array, no other content.
84
+
85
+ extract_key_points_user: |
86
+ Please extract key points from the following notes:
87
+
88
+ {content}