PyPI - realtimex-deeptutor - Versions diffs - 0.5.0.post1__py3-none-any.whl - Mend

realtimex-deeptutor 0.5.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (276) hide show

realtimex_deeptutor/__init__.py +67 -0
realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
src/__init__.py +40 -0
src/agents/__init__.py +24 -0
src/agents/base_agent.py +657 -0
src/agents/chat/__init__.py +24 -0
src/agents/chat/chat_agent.py +435 -0
src/agents/chat/prompts/en/chat_agent.yaml +35 -0
src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
src/agents/chat/session_manager.py +311 -0
src/agents/co_writer/__init__.py +0 -0
src/agents/co_writer/edit_agent.py +260 -0
src/agents/co_writer/narrator_agent.py +423 -0
src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
src/agents/guide/__init__.py +16 -0
src/agents/guide/agents/__init__.py +11 -0
src/agents/guide/agents/chat_agent.py +104 -0
src/agents/guide/agents/interactive_agent.py +223 -0
src/agents/guide/agents/locate_agent.py +149 -0
src/agents/guide/agents/summary_agent.py +150 -0
src/agents/guide/guide_manager.py +500 -0
src/agents/guide/prompts/en/chat_agent.yaml +41 -0
src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
src/agents/guide/prompts/en/locate_agent.yaml +68 -0
src/agents/guide/prompts/en/summary_agent.yaml +157 -0
src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
src/agents/ideagen/__init__.py +12 -0
src/agents/ideagen/idea_generation_workflow.py +426 -0
src/agents/ideagen/material_organizer_agent.py +173 -0
src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
src/agents/question/__init__.py +24 -0
src/agents/question/agents/__init__.py +18 -0
src/agents/question/agents/generate_agent.py +381 -0
src/agents/question/agents/relevance_analyzer.py +207 -0
src/agents/question/agents/retrieve_agent.py +239 -0
src/agents/question/coordinator.py +718 -0
src/agents/question/example.py +109 -0
src/agents/question/prompts/en/coordinator.yaml +75 -0
src/agents/question/prompts/en/generate_agent.yaml +77 -0
src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
src/agents/question/prompts/zh/coordinator.yaml +75 -0
src/agents/question/prompts/zh/generate_agent.yaml +77 -0
src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
src/agents/research/agents/__init__.py +23 -0
src/agents/research/agents/decompose_agent.py +507 -0
src/agents/research/agents/manager_agent.py +228 -0
src/agents/research/agents/note_agent.py +180 -0
src/agents/research/agents/rephrase_agent.py +263 -0
src/agents/research/agents/reporting_agent.py +1333 -0
src/agents/research/agents/research_agent.py +714 -0
src/agents/research/data_structures.py +451 -0
src/agents/research/main.py +188 -0
src/agents/research/prompts/en/decompose_agent.yaml +89 -0
src/agents/research/prompts/en/manager_agent.yaml +24 -0
src/agents/research/prompts/en/note_agent.yaml +121 -0
src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
src/agents/research/prompts/en/reporting_agent.yaml +380 -0
src/agents/research/prompts/en/research_agent.yaml +173 -0
src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
src/agents/research/prompts/zh/manager_agent.yaml +24 -0
src/agents/research/prompts/zh/note_agent.yaml +121 -0
src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
src/agents/research/prompts/zh/research_agent.yaml +173 -0
src/agents/research/research_pipeline.py +1309 -0
src/agents/research/utils/__init__.py +60 -0
src/agents/research/utils/citation_manager.py +799 -0
src/agents/research/utils/json_utils.py +98 -0
src/agents/research/utils/token_tracker.py +297 -0
src/agents/solve/__init__.py +80 -0
src/agents/solve/analysis_loop/__init__.py +14 -0
src/agents/solve/analysis_loop/investigate_agent.py +414 -0
src/agents/solve/analysis_loop/note_agent.py +190 -0
src/agents/solve/main_solver.py +862 -0
src/agents/solve/memory/__init__.py +34 -0
src/agents/solve/memory/citation_memory.py +353 -0
src/agents/solve/memory/investigate_memory.py +226 -0
src/agents/solve/memory/solve_memory.py +340 -0
src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
src/agents/solve/solve_loop/__init__.py +22 -0
src/agents/solve/solve_loop/citation_manager.py +74 -0
src/agents/solve/solve_loop/manager_agent.py +274 -0
src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
src/agents/solve/solve_loop/response_agent.py +301 -0
src/agents/solve/solve_loop/solve_agent.py +325 -0
src/agents/solve/solve_loop/tool_agent.py +470 -0
src/agents/solve/utils/__init__.py +64 -0
src/agents/solve/utils/config_validator.py +313 -0
src/agents/solve/utils/display_manager.py +223 -0
src/agents/solve/utils/error_handler.py +363 -0
src/agents/solve/utils/json_utils.py +98 -0
src/agents/solve/utils/performance_monitor.py +407 -0
src/agents/solve/utils/token_tracker.py +541 -0
src/api/__init__.py +0 -0
src/api/main.py +240 -0
src/api/routers/__init__.py +1 -0
src/api/routers/agent_config.py +69 -0
src/api/routers/chat.py +296 -0
src/api/routers/co_writer.py +337 -0
src/api/routers/config.py +627 -0
src/api/routers/dashboard.py +18 -0
src/api/routers/guide.py +337 -0
src/api/routers/ideagen.py +436 -0
src/api/routers/knowledge.py +821 -0
src/api/routers/notebook.py +247 -0
src/api/routers/question.py +537 -0
src/api/routers/research.py +394 -0
src/api/routers/settings.py +164 -0
src/api/routers/solve.py +305 -0
src/api/routers/system.py +252 -0
src/api/run_server.py +61 -0
src/api/utils/history.py +172 -0
src/api/utils/log_interceptor.py +21 -0
src/api/utils/notebook_manager.py +415 -0
src/api/utils/progress_broadcaster.py +72 -0
src/api/utils/task_id_manager.py +100 -0
src/config/__init__.py +0 -0
src/config/accessors.py +18 -0
src/config/constants.py +34 -0
src/config/defaults.py +18 -0
src/config/schema.py +38 -0
src/config/settings.py +50 -0
src/core/errors.py +62 -0
src/knowledge/__init__.py +23 -0
src/knowledge/add_documents.py +606 -0
src/knowledge/config.py +65 -0
src/knowledge/example_add_documents.py +236 -0
src/knowledge/extract_numbered_items.py +1039 -0
src/knowledge/initializer.py +621 -0
src/knowledge/kb.py +22 -0
src/knowledge/manager.py +782 -0
src/knowledge/progress_tracker.py +182 -0
src/knowledge/start_kb.py +535 -0
src/logging/__init__.py +103 -0
src/logging/adapters/__init__.py +17 -0
src/logging/adapters/lightrag.py +184 -0
src/logging/adapters/llamaindex.py +141 -0
src/logging/config.py +80 -0
src/logging/handlers/__init__.py +20 -0
src/logging/handlers/console.py +75 -0
src/logging/handlers/file.py +201 -0
src/logging/handlers/websocket.py +127 -0
src/logging/logger.py +709 -0
src/logging/stats/__init__.py +16 -0
src/logging/stats/llm_stats.py +179 -0
src/services/__init__.py +56 -0
src/services/config/__init__.py +61 -0
src/services/config/knowledge_base_config.py +210 -0
src/services/config/loader.py +260 -0
src/services/config/unified_config.py +603 -0
src/services/embedding/__init__.py +45 -0
src/services/embedding/adapters/__init__.py +22 -0
src/services/embedding/adapters/base.py +106 -0
src/services/embedding/adapters/cohere.py +127 -0
src/services/embedding/adapters/jina.py +99 -0
src/services/embedding/adapters/ollama.py +116 -0
src/services/embedding/adapters/openai_compatible.py +96 -0
src/services/embedding/client.py +159 -0
src/services/embedding/config.py +156 -0
src/services/embedding/provider.py +119 -0
src/services/llm/__init__.py +152 -0
src/services/llm/capabilities.py +313 -0
src/services/llm/client.py +302 -0
src/services/llm/cloud_provider.py +530 -0
src/services/llm/config.py +200 -0
src/services/llm/error_mapping.py +103 -0
src/services/llm/exceptions.py +152 -0
src/services/llm/factory.py +450 -0
src/services/llm/local_provider.py +347 -0
src/services/llm/providers/anthropic.py +95 -0
src/services/llm/providers/base_provider.py +93 -0
src/services/llm/providers/open_ai.py +83 -0
src/services/llm/registry.py +71 -0
src/services/llm/telemetry.py +40 -0
src/services/llm/types.py +27 -0
src/services/llm/utils.py +333 -0
src/services/prompt/__init__.py +25 -0
src/services/prompt/manager.py +206 -0
src/services/rag/__init__.py +64 -0
src/services/rag/components/__init__.py +29 -0
src/services/rag/components/base.py +59 -0
src/services/rag/components/chunkers/__init__.py +18 -0
src/services/rag/components/chunkers/base.py +34 -0
src/services/rag/components/chunkers/fixed.py +71 -0
src/services/rag/components/chunkers/numbered_item.py +94 -0
src/services/rag/components/chunkers/semantic.py +97 -0
src/services/rag/components/embedders/__init__.py +14 -0
src/services/rag/components/embedders/base.py +32 -0
src/services/rag/components/embedders/openai.py +63 -0
src/services/rag/components/indexers/__init__.py +18 -0
src/services/rag/components/indexers/base.py +35 -0
src/services/rag/components/indexers/graph.py +172 -0
src/services/rag/components/indexers/lightrag.py +156 -0
src/services/rag/components/indexers/vector.py +146 -0
src/services/rag/components/parsers/__init__.py +18 -0
src/services/rag/components/parsers/base.py +35 -0
src/services/rag/components/parsers/markdown.py +52 -0
src/services/rag/components/parsers/pdf.py +115 -0
src/services/rag/components/parsers/text.py +86 -0
src/services/rag/components/retrievers/__init__.py +18 -0
src/services/rag/components/retrievers/base.py +34 -0
src/services/rag/components/retrievers/dense.py +200 -0
src/services/rag/components/retrievers/hybrid.py +164 -0
src/services/rag/components/retrievers/lightrag.py +169 -0
src/services/rag/components/routing.py +286 -0
src/services/rag/factory.py +234 -0
src/services/rag/pipeline.py +215 -0
src/services/rag/pipelines/__init__.py +32 -0
src/services/rag/pipelines/academic.py +44 -0
src/services/rag/pipelines/lightrag.py +43 -0
src/services/rag/pipelines/llamaindex.py +313 -0
src/services/rag/pipelines/raganything.py +384 -0
src/services/rag/service.py +244 -0
src/services/rag/types.py +73 -0
src/services/search/__init__.py +284 -0
src/services/search/base.py +87 -0
src/services/search/consolidation.py +398 -0
src/services/search/providers/__init__.py +128 -0
src/services/search/providers/baidu.py +188 -0
src/services/search/providers/exa.py +194 -0
src/services/search/providers/jina.py +161 -0
src/services/search/providers/perplexity.py +153 -0
src/services/search/providers/serper.py +209 -0
src/services/search/providers/tavily.py +161 -0
src/services/search/types.py +114 -0
src/services/setup/__init__.py +34 -0
src/services/setup/init.py +285 -0
src/services/tts/__init__.py +16 -0
src/services/tts/config.py +99 -0
src/tools/__init__.py +91 -0
src/tools/code_executor.py +536 -0
src/tools/paper_search_tool.py +171 -0
src/tools/query_item_tool.py +310 -0
src/tools/question/__init__.py +15 -0
src/tools/question/exam_mimic.py +616 -0
src/tools/question/pdf_parser.py +211 -0
src/tools/question/question_extractor.py +397 -0
src/tools/rag_tool.py +173 -0
src/tools/tex_chunker.py +339 -0
src/tools/tex_downloader.py +253 -0
src/tools/web_search.py +71 -0
src/utils/config_manager.py +206 -0
src/utils/document_validator.py +168 -0
src/utils/error_rate_tracker.py +111 -0
src/utils/error_utils.py +82 -0
src/utils/json_parser.py +110 -0
src/utils/network/circuit_breaker.py +79 -0

src/agents/co_writer/narrator_agent.py ADDED Viewed

@@ -0,0 +1,423 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+NarratorAgent - Note narration agent.
+Inherits from unified BaseAgent with special TTS configuration.
+"""
+from datetime import datetime
+import json
+import os
+from pathlib import Path
+import re
+from typing import Any, Optional
+from urllib.parse import urlparse
+import uuid
+from openai import AsyncAzureOpenAI, AsyncOpenAI
+from src.agents.base_agent import BaseAgent
+from src.services.tts import get_tts_config
+# Import shared stats from edit_agent for legacy compatibility
+# Define storage path (unified under user/co-writer/ directory)
+USER_DIR = Path(__file__).parent.parent.parent.parent / "data" / "user" / "co-writer" / "audio"
+def ensure_dirs():
+    """Ensure directories exist"""
+    USER_DIR.mkdir(parents=True, exist_ok=True)
+class NarratorAgent(BaseAgent):
+    """Note Narration Agent - Generate narration script and convert to audio"""
+    def __init__(self, language: str = "en"):
+        """
+        Initialize NarratorAgent.
+        Args:
+            language: Language setting ('en' | 'zh'), default 'en'
+        Note: LLM configuration (api_key, base_url, model, etc.) is loaded
+        automatically from the unified config service. Use refresh_config()
+        to pick up configuration changes made in Settings.
+        """
+        # Use "narrator" as module_name to get independent temperature/max_tokens config
+        super().__init__(
+            module_name="narrator",
+            agent_name="narrator_agent",
+            language=language,
+        )
+        # Override prompts to load from co_writer module
+        # (narrator_agent prompts are stored under co_writer/prompts/)
+        from src.services.prompt import get_prompt_manager
+        self.prompts = get_prompt_manager().load_prompts(
+            module_name="co_writer",
+            agent_name="narrator_agent",
+            language=language,
+        )
+        # Load TTS-specific configuration
+        self._load_tts_config()
+    def _load_tts_config(self):
+        """Load TTS-specific configuration from unified config service."""
+        try:
+            self.tts_config = get_tts_config()
+            # Get voice from unified config (defaults to "alloy")
+            self.default_voice = self.tts_config.get("voice", "alloy")
+            self.logger.info(f"TTS settings loaded: voice={self.default_voice}")
+            # Validate TTS configuration
+            self._validate_tts_config()
+        except Exception as e:
+            self.logger.error(f"Failed to load TTS config: {e}", exc_info=True)
+            self.tts_config = None
+            self.default_voice = "alloy"
+    def _validate_tts_config(self):
+        """Validate TTS configuration completeness and format"""
+        if not self.tts_config:
+            raise ValueError("TTS config is None")
+        # Check required keys
+        required_keys = ["model", "api_key", "base_url"]
+        missing_keys = [key for key in required_keys if key not in self.tts_config]
+        if missing_keys:
+            raise ValueError(f"TTS config missing required keys: {missing_keys}")
+        # Validate base_url format
+        base_url = self.tts_config["base_url"]
+        if not base_url:
+            raise ValueError("TTS config 'base_url' is empty")
+        if not isinstance(base_url, str):
+            raise ValueError(f"TTS config 'base_url' must be a string, got {type(base_url)}")
+        # Validate URL format
+        if not base_url.startswith(("http://", "https://")):
+            raise ValueError(
+                f"TTS config 'base_url' must start with http:// or https://, got: {base_url}"
+            )
+        try:
+            parsed = urlparse(base_url)
+            if not parsed.netloc:
+                raise ValueError(f"TTS config 'base_url' has invalid format: {base_url}")
+        except Exception as e:
+            raise ValueError(f"TTS config 'base_url' parsing error: {e}")
+        # Validate api_key
+        api_key = self.tts_config.get("api_key")
+        if not api_key:
+            raise ValueError("TTS config 'api_key' is empty")
+        if not isinstance(api_key, str) or len(api_key.strip()) == 0:
+            raise ValueError("TTS config 'api_key' must be a non-empty string")
+        # Validate model
+        model = self.tts_config.get("model")
+        if not model:
+            raise ValueError("TTS config 'model' is empty")
+        # Log configuration info (hide sensitive information)
+        api_key_preview = f"{api_key[:8]}...{api_key[-4:]}" if len(api_key) > 12 else "*" * 10
+        self.logger.info("TTS Configuration Loaded (OpenAI API):")
+        self.logger.info(f"  Model: {model}")
+        self.logger.info(f"  Base URL: {base_url}")
+        self.logger.info(f"  API Key: {api_key_preview}")
+        self.logger.info(f"  Default Voice: {self.default_voice}")
+    async def process(
+        self,
+        content: str,
+        style: str = "friendly",
+        voice: Optional[str] = None,
+        skip_audio: bool = False,
+    ) -> dict[str, Any]:
+        """
+        Main processing method - alias for narrate().
+        Args:
+            content: Note content
+            style: Narration style
+            voice: Voice role
+            skip_audio: Whether to skip audio generation
+        Returns:
+            Dict containing script info and optionally audio info
+        """
+        return await self.narrate(content, style, voice, skip_audio)
+    async def generate_script(self, content: str, style: str = "friendly") -> dict[str, Any]:
+        """
+        Generate narration script
+        Args:
+            content: Note content (Markdown format)
+            style: Narration style (friendly, academic, concise)
+        Returns:
+            Dict containing:
+                - script: Narration script text
+                - key_points: List of extracted key points
+        """
+        # Estimate target length: OpenAI TTS supports up to 4096 characters
+        is_long_content = len(content) > 5000
+        style_prompts = {
+            "friendly": self.get_prompt("style_friendly", ""),
+            "academic": self.get_prompt("style_academic", ""),
+            "concise": self.get_prompt("style_concise", ""),
+        }
+        length_instruction = (
+            self.get_prompt("length_instruction_long", "")
+            if is_long_content
+            else self.get_prompt("length_instruction_short", "")
+        )
+        system_template = self.get_prompt("generate_script_system_template", "")
+        system_prompt = system_template.format(
+            style_prompt=style_prompts.get(style, style_prompts["friendly"]),
+            length_instruction=length_instruction,
+        )
+        if is_long_content:
+            user_template = self.get_prompt("generate_script_user_long", "")
+            user_prompt = user_template.format(content=content[:8000] + "...")
+        else:
+            user_template = self.get_prompt("generate_script_user_short", "")
+            user_prompt = user_template.format(content=content)
+        self.logger.info(f"Generating narration script with style: {style}")
+        # Use inherited call_llm method
+        response = await self.call_llm(
+            user_prompt=user_prompt,
+            system_prompt=system_prompt,
+            stage="generate_script",
+        )
+        # Clean and truncate response, ensure it doesn't exceed 4000 characters
+        script = response.strip()
+        if len(script) > 4000:
+            self.logger.warning(
+                f"Generated script length {len(script)} exceeds 4000 limit. Truncating..."
+            )
+            truncated = script[:3997]
+            last_period = max(
+                truncated.rfind("。"),
+                truncated.rfind("！"),
+                truncated.rfind("？"),
+                truncated.rfind("."),
+                truncated.rfind("!"),
+                truncated.rfind("?"),
+            )
+            if last_period > 3500:
+                script = truncated[: last_period + 1]
+            else:
+                script = truncated + "..."
+        key_points = await self._extract_key_points(content)
+        return {
+            "script": script,
+            "key_points": key_points,
+            "style": style,
+            "original_length": len(content),
+            "script_length": len(script),
+        }
+    async def _extract_key_points(self, content: str) -> list:
+        """Extract key points from notes"""
+        system_prompt = self.get_prompt("extract_key_points_system", "")
+        user_template = self.get_prompt(
+            "extract_key_points_user",
+            "Please extract key points from the following notes:\n\n{content}",
+        )
+        user_prompt = user_template.format(content=content[:4000])
+        try:
+            response = await self.call_llm(
+                user_prompt=user_prompt,
+                system_prompt=system_prompt,
+                stage="extract_key_points",
+            )
+            # Try to parse JSON
+            json_match = re.search(r"\[.*\]", response, re.DOTALL)
+            if json_match:
+                return json.loads(json_match.group())
+            return []
+        except Exception as e:
+            self.logger.warning(f"Failed to extract key points: {e}")
+            return []
+    async def generate_audio(self, script: str, voice: str = None) -> dict[str, Any]:
+        """
+        Convert narration script to audio using OpenAI TTS API
+        Args:
+            script: Narration script text
+            voice: Voice role (alloy, echo, fable, onyx, nova, shimmer)
+        Returns:
+            Dict containing:
+                - audio_path: Audio file path
+                - audio_url: Audio access URL
+                - audio_id: Unique audio identifier
+                - voice: Voice used
+        """
+        if not self.tts_config:
+            raise ValueError(
+                "TTS configuration not available. Please configure TTS_MODEL, TTS_API_KEY, and TTS_URL in .env"
+            )
+        # Use default voice if not specified
+        if voice is None:
+            voice = self.default_voice
+        # Validate input parameters
+        if not script or not script.strip():
+            raise ValueError("Script cannot be empty")
+        ensure_dirs()
+        # Truncate overly long scripts (OpenAI TTS supports up to 4096 characters)
+        original_script_length = len(script)
+        if len(script) > 4096:
+            self.logger.warning(f"Script length {len(script)} exceeds 4096 limit. Truncating...")
+            truncated = script[:4093]
+            last_period = max(
+                truncated.rfind("。"),
+                truncated.rfind("！"),
+                truncated.rfind("？"),
+                truncated.rfind("."),
+                truncated.rfind("!"),
+                truncated.rfind("?"),
+            )
+            if last_period > 3500:
+                script = truncated[: last_period + 1]
+            else:
+                script = truncated + "..."
+            self.logger.info(
+                f"Script truncated from {original_script_length} to {len(script)} characters"
+            )
+        audio_id = datetime.now().strftime("%Y%m%d_%H%M%S") + "_" + uuid.uuid4().hex[:6]
+        audio_filename = f"narration_{audio_id}.mp3"
+        audio_path = USER_DIR / audio_filename
+        self.logger.info(f"Starting TTS audio generation - ID: {audio_id}, Voice: {voice}")
+        try:
+            binding = os.getenv("TTS_BINDING", "openai")
+            api_version = self.tts_config.get("api_version")
+            # Only use Azure client if binding is explicitly Azure,
+            # OR if binding is generic 'openai' but an Azure-specific api_version is present.
+            if binding == "azure_openai" or (binding == "openai" and api_version):
+                client = AsyncAzureOpenAI(
+                    api_key=self.tts_config["api_key"],
+                    azure_endpoint=self.tts_config["base_url"],
+                    api_version=api_version,
+                )
+            else:
+                # Create OpenAI client with custom base_url
+                client = AsyncOpenAI(
+                    base_url=self.tts_config["base_url"], api_key=self.tts_config["api_key"]
+                )
+            # Call OpenAI TTS API
+            response = await client.audio.speech.create(
+                model=self.tts_config["model"], voice=voice, input=script
+            )
+            # Save audio to file
+            await response.stream_to_file(audio_path)
+            self.logger.info(f"Audio saved to: {audio_path}")
+            # Use correct path: co-writer/audio (matching the actual storage directory)
+            relative_path = f"co-writer/audio/{audio_filename}"
+            audio_access_url = f"/api/outputs/{relative_path}"
+            return {
+                "audio_path": str(audio_path),
+                "audio_url": audio_access_url,
+                "audio_id": audio_id,
+                "voice": voice,
+            }
+        except Exception as e:
+            self.logger.error(f"TTS generation failed: {type(e).__name__}: {e}", exc_info=True)
+            raise ValueError(f"TTS generation failed: {type(e).__name__}: {e}")
+    async def narrate(
+        self,
+        content: str,
+        style: str = "friendly",
+        voice: str = None,
+        skip_audio: bool = False,
+    ) -> dict[str, Any]:
+        """
+        Complete narration flow: generate script + generate audio
+        Args:
+            content: Note content
+            style: Narration style
+            voice: Voice role (alloy, echo, fable, onyx, nova, shimmer)
+            skip_audio: Whether to skip audio generation (only return script)
+        Returns:
+            Dict containing script info and optionally audio info
+        """
+        # Refresh TTS config before starting to avoid stale credentials
+        try:
+            self.tts_config = get_tts_config()
+        except Exception as e:
+            self.logger.error(f"Failed to refresh TTS config: {e}")
+        script_result = await self.generate_script(content, style)
+        # Use default voice if not specified
+        if voice is None:
+            voice = self.default_voice
+        result = {
+            "script": script_result["script"],
+            "key_points": script_result["key_points"],
+            "style": style,
+            "original_length": script_result["original_length"],
+            "script_length": script_result["script_length"],
+        }
+        if not skip_audio and self.tts_config:
+            try:
+                audio_result = await self.generate_audio(script_result["script"], voice=voice)
+                result.update(
+                    {
+                        "audio_url": audio_result["audio_url"],
+                        "audio_path": audio_result["audio_path"],
+                        "audio_id": audio_result["audio_id"],
+                        "voice": voice,
+                        "has_audio": True,
+                    }
+                )
+            except Exception as e:
+                self.logger.error(f"Audio generation failed: {e}")
+                result["has_audio"] = False
+                result["audio_error"] = str(e)
+        else:
+            result["has_audio"] = False
+            if not self.tts_config:
+                result["audio_error"] = "TTS not configured"
+        return result
+__all__ = ["NarratorAgent"]

src/agents/co_writer/prompts/en/edit_agent.yaml ADDED Viewed

@@ -0,0 +1,113 @@
+system: |
+  You are an expert editor and writing assistant.
+action_template: |
+  {action_verb} the following text based on the user's instruction.
+  User Instruction: {instruction}
+context_template: |
+  Reference Context:
+  {context}
+user_template: |
+  Target Text to Edit:
+  {text}
+  Output only the edited text, without quotes or explanations.
+auto_mark_system: |
+  You are a professional academic reading annotation assistant, helping readers quickly grasp the core points of text.
+  ## Task
+  Read the input text and **carefully select** the most critical information for annotation. Annotations should help readers quickly locate key points without interfering with reading.
+  ## Available Tags and Precise Usage Scenarios
+  ### 1. Circle - Use Sparingly
+  ```html
+  <span data-rough-notation="circle">content</span>
+  ```
+  **Applicable Scenarios**:
+  - Core topic words of articles/paragraphs (e.g., key concepts in paper titles)
+  - Unique proper nouns, model names (e.g., GPT-4, BERT)
+  - Key numerical values/metrics (e.g., 95.7%, p<0.05)
+  **Limitation**: Maximum 1 per 100 characters, content should not exceed 5 characters
+  ### 2. Highlight - Moderate Use
+  ```html
+  <span data-rough-notation="highlight">content</span>
+  ```
+  **Applicable Scenarios**:
+  - Definitional statements (e.g., "XX refers to...")
+  - First appearance of core concepts and their explanations
+  - Important methodological descriptions
+  **Limitation**: Maximum 2 per paragraph, content 2-15 characters
+  ### 3. Box - Minimal Use
+  ```html
+  <span data-rough-notation="box">content</span>
+  ```
+  **Applicable Scenarios**:
+  - Mathematical formulas, equations
+  - Specific data points or statistical values
+  - Code snippets, commands
+  - Version numbers, dates, and other precise information
+  **Limitation**: Maximum 1 per paragraph, content should not exceed 20 characters
+  ### 4. Underline - Moderate Use
+  ```html
+  <span data-rough-notation="underline">content</span>
+  ```
+  **Applicable Scenarios**:
+  - Conclusive statements
+  - Key expressions of causal relationships
+  - Core viewpoints in comparisons or contrasts
+  - Author's main arguments
+  **Limitation**: Maximum 1 per paragraph, content 5-30 characters
+  ### 5. Bracket - Use Sparingly
+  ```html
+  <span data-rough-notation="bracket">content</span>
+  ```
+  **Applicable Scenarios**:
+  - Entire paragraphs that are core summaries or conclusions
+  - Important quotations or theorem statements
+  - Critical warnings or notes
+  **Limitation**: Maximum 1-2 per entire article, for truly indispensable complete sentences
+  ## Core Rules
+  1. **Exercise Restraint**: Better to annotate less than to over-annotate. Annotation density should not exceed 10% of total text per paragraph.
+  2. **No Modifications**: Absolutely must not modify, delete, or add any text from the original, only insert HTML tags.
+  3. **Tag Placement**: Tags must be placed inside Markdown symbols (e.g., `**`, `*`, `` ` ``).
+  4. **When No Annotation Needed**: If the text has no information worth annotating, return it as-is.
+  ## Examples
+  **Input**:
+  Deep learning is a subfield of machine learning, and its core is using neural networks to learn data representations.
+  **Output**:
+  <span data-rough-notation="highlight">Deep learning is a subfield of machine learning</span>, and its core is using <span data-rough-notation="circle">neural networks</span> to learn data representations.
+  **Input**:
+  The weather is nice today, perfect for going out for a walk.
+  **Output**:
+  The weather is nice today, perfect for going out for a walk.
+  **Input**:
+  Experimental results show that our proposed method achieved 99.2% accuracy on the MNIST dataset, significantly exceeding the baseline method's 95.1%.
+  **Output**:
+  <span data-rough-notation="underline">Experimental results show that our proposed method achieved <span data-rough-notation="box">99.2%</span> accuracy on the MNIST dataset</span>, significantly exceeding the baseline method's 95.1%.
+auto_mark_user_template: |
+  Process the following text:
+  {text}

src/agents/co_writer/prompts/en/narrator_agent.yaml ADDED Viewed

@@ -0,0 +1,88 @@
+style_friendly: |
+  You are a friendly and approachable tutor, explaining note content face-to-face to students.
+  **Narration Requirements**:
+  1. **Person**: Use "we", "us", "you" to create closeness
+  2. **Tone**: Relaxed but professional, like chatting with a friend
+  3. **Pacing**: Appropriate pauses, use words like "well", "next", "so" for transitions
+  4. **Emphasis**: Use phrases like "this is important", "note here" to highlight key information
+  5. **Interaction**: Appropriately include phrases like "what do you think", "think about it" to guide thinking
+  6. **Length Control**: The script should be controlled within 4000 characters. If the original content is long, please generate a refined summary version, highlighting main points and key information.
+style_academic: |
+  You are a senior scholar giving an academic lecture.
+  **Narration Requirements**:
+  1. **Person**: Use "we", "this paper" and other academic language
+  2. **Tone**: Rigorous and professional, with clear logic
+  3. **Structure**: Clear introduction-body-conclusion structure
+  4. **Terminology**: Retain professional terms, provide explanations when necessary
+  5. **Citations**: Maintain academic standards when mentioning related theories or research
+  6. **Length Control**: The script should be controlled within 4000 characters. If the original content is long, please generate a refined summary version, highlighting main points and key information.
+style_concise: |
+  You are an efficient knowledge communicator who needs to quickly convey core information.
+  **Narration Requirements**:
+  1. **Person**: Use "we" to maintain friendliness
+  2. **Tone**: Direct and to the point, no beating around the bush
+  3. **Structure**: General first, then details, get straight to the point
+  4. **Focus**: Only cover the most core content
+  5. **Transitions**: Use concise "first", "then", "finally" to connect
+  6. **Length Control**: The script should be controlled within 4000 characters. If the original content is long, please generate a refined summary version, highlighting main points and key information.
+generate_script_system_template: |
+  You are a professional note narration script writing expert. Your task is to convert user's note content into a script suitable for oral narration.
+  {style_prompt}
+  {length_instruction}
+  **Output Format**:
+  Output the narration script text directly, without any additional explanations or markers.
+  The script should be coherent spoken language, suitable for direct reading aloud.
+  **Notes**:
+  1. Maintain the core information and logical structure of the original text
+  2. Convert Markdown formats (such as **bold**, *italic*, code blocks, etc.) into oral descriptions
+  3. Mathematical formulas need to be described orally, such as "x squared plus y squared equals z squared"
+  4. Remove all HTML tags, retain their text content
+  5. Avoid overly written expressions
+  6. **Control the length within 4000 characters**
+length_instruction_long: |
+  **Important: The script should be controlled within 4000 characters. If the original content is long, please generate a refined summary version, retaining the most core viewpoints and key information.**
+length_instruction_short: |
+  **Important: The script should be controlled within 4000 characters.**
+generate_script_user_long: |
+  The following is a longer note content. Please generate a refined narration script summary, controlled within 4000 characters, containing the most core viewpoints and key information:
+  ---
+  {content}
+  ---
+  Please generate a narration script suitable for oral reading (within 4000 characters).
+generate_script_user_short: |
+  Please convert the following note content into a narration script (controlled within 4000 characters):
+  ---
+  {content}
+  ---
+  Please generate a narration script suitable for oral reading.
+extract_key_points_system: |
+  You are a content analysis expert. Please extract 3-5 key points from the given notes.
+  Output format: JSON array, each element is a key point string.
+  Example: ["Key point 1", "Key point 2", "Key point 3"]
+  Only output the JSON array, no other content.
+extract_key_points_user: |
+  Please extract key points from the following notes:
+  {content}