realtimex-deeptutor 0.5.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- realtimex_deeptutor/__init__.py +67 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
- src/__init__.py +40 -0
- src/agents/__init__.py +24 -0
- src/agents/base_agent.py +657 -0
- src/agents/chat/__init__.py +24 -0
- src/agents/chat/chat_agent.py +435 -0
- src/agents/chat/prompts/en/chat_agent.yaml +35 -0
- src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
- src/agents/chat/session_manager.py +311 -0
- src/agents/co_writer/__init__.py +0 -0
- src/agents/co_writer/edit_agent.py +260 -0
- src/agents/co_writer/narrator_agent.py +423 -0
- src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
- src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
- src/agents/guide/__init__.py +16 -0
- src/agents/guide/agents/__init__.py +11 -0
- src/agents/guide/agents/chat_agent.py +104 -0
- src/agents/guide/agents/interactive_agent.py +223 -0
- src/agents/guide/agents/locate_agent.py +149 -0
- src/agents/guide/agents/summary_agent.py +150 -0
- src/agents/guide/guide_manager.py +500 -0
- src/agents/guide/prompts/en/chat_agent.yaml +41 -0
- src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
- src/agents/guide/prompts/en/locate_agent.yaml +68 -0
- src/agents/guide/prompts/en/summary_agent.yaml +157 -0
- src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
- src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
- src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
- src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
- src/agents/ideagen/__init__.py +12 -0
- src/agents/ideagen/idea_generation_workflow.py +426 -0
- src/agents/ideagen/material_organizer_agent.py +173 -0
- src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
- src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
- src/agents/question/__init__.py +24 -0
- src/agents/question/agents/__init__.py +18 -0
- src/agents/question/agents/generate_agent.py +381 -0
- src/agents/question/agents/relevance_analyzer.py +207 -0
- src/agents/question/agents/retrieve_agent.py +239 -0
- src/agents/question/coordinator.py +718 -0
- src/agents/question/example.py +109 -0
- src/agents/question/prompts/en/coordinator.yaml +75 -0
- src/agents/question/prompts/en/generate_agent.yaml +77 -0
- src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
- src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
- src/agents/question/prompts/zh/coordinator.yaml +75 -0
- src/agents/question/prompts/zh/generate_agent.yaml +77 -0
- src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
- src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
- src/agents/research/agents/__init__.py +23 -0
- src/agents/research/agents/decompose_agent.py +507 -0
- src/agents/research/agents/manager_agent.py +228 -0
- src/agents/research/agents/note_agent.py +180 -0
- src/agents/research/agents/rephrase_agent.py +263 -0
- src/agents/research/agents/reporting_agent.py +1333 -0
- src/agents/research/agents/research_agent.py +714 -0
- src/agents/research/data_structures.py +451 -0
- src/agents/research/main.py +188 -0
- src/agents/research/prompts/en/decompose_agent.yaml +89 -0
- src/agents/research/prompts/en/manager_agent.yaml +24 -0
- src/agents/research/prompts/en/note_agent.yaml +121 -0
- src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/en/reporting_agent.yaml +380 -0
- src/agents/research/prompts/en/research_agent.yaml +173 -0
- src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
- src/agents/research/prompts/zh/manager_agent.yaml +24 -0
- src/agents/research/prompts/zh/note_agent.yaml +121 -0
- src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
- src/agents/research/prompts/zh/research_agent.yaml +173 -0
- src/agents/research/research_pipeline.py +1309 -0
- src/agents/research/utils/__init__.py +60 -0
- src/agents/research/utils/citation_manager.py +799 -0
- src/agents/research/utils/json_utils.py +98 -0
- src/agents/research/utils/token_tracker.py +297 -0
- src/agents/solve/__init__.py +80 -0
- src/agents/solve/analysis_loop/__init__.py +14 -0
- src/agents/solve/analysis_loop/investigate_agent.py +414 -0
- src/agents/solve/analysis_loop/note_agent.py +190 -0
- src/agents/solve/main_solver.py +862 -0
- src/agents/solve/memory/__init__.py +34 -0
- src/agents/solve/memory/citation_memory.py +353 -0
- src/agents/solve/memory/investigate_memory.py +226 -0
- src/agents/solve/memory/solve_memory.py +340 -0
- src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
- src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
- src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
- src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
- src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
- src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
- src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
- src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
- src/agents/solve/solve_loop/__init__.py +22 -0
- src/agents/solve/solve_loop/citation_manager.py +74 -0
- src/agents/solve/solve_loop/manager_agent.py +274 -0
- src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
- src/agents/solve/solve_loop/response_agent.py +301 -0
- src/agents/solve/solve_loop/solve_agent.py +325 -0
- src/agents/solve/solve_loop/tool_agent.py +470 -0
- src/agents/solve/utils/__init__.py +64 -0
- src/agents/solve/utils/config_validator.py +313 -0
- src/agents/solve/utils/display_manager.py +223 -0
- src/agents/solve/utils/error_handler.py +363 -0
- src/agents/solve/utils/json_utils.py +98 -0
- src/agents/solve/utils/performance_monitor.py +407 -0
- src/agents/solve/utils/token_tracker.py +541 -0
- src/api/__init__.py +0 -0
- src/api/main.py +240 -0
- src/api/routers/__init__.py +1 -0
- src/api/routers/agent_config.py +69 -0
- src/api/routers/chat.py +296 -0
- src/api/routers/co_writer.py +337 -0
- src/api/routers/config.py +627 -0
- src/api/routers/dashboard.py +18 -0
- src/api/routers/guide.py +337 -0
- src/api/routers/ideagen.py +436 -0
- src/api/routers/knowledge.py +821 -0
- src/api/routers/notebook.py +247 -0
- src/api/routers/question.py +537 -0
- src/api/routers/research.py +394 -0
- src/api/routers/settings.py +164 -0
- src/api/routers/solve.py +305 -0
- src/api/routers/system.py +252 -0
- src/api/run_server.py +61 -0
- src/api/utils/history.py +172 -0
- src/api/utils/log_interceptor.py +21 -0
- src/api/utils/notebook_manager.py +415 -0
- src/api/utils/progress_broadcaster.py +72 -0
- src/api/utils/task_id_manager.py +100 -0
- src/config/__init__.py +0 -0
- src/config/accessors.py +18 -0
- src/config/constants.py +34 -0
- src/config/defaults.py +18 -0
- src/config/schema.py +38 -0
- src/config/settings.py +50 -0
- src/core/errors.py +62 -0
- src/knowledge/__init__.py +23 -0
- src/knowledge/add_documents.py +606 -0
- src/knowledge/config.py +65 -0
- src/knowledge/example_add_documents.py +236 -0
- src/knowledge/extract_numbered_items.py +1039 -0
- src/knowledge/initializer.py +621 -0
- src/knowledge/kb.py +22 -0
- src/knowledge/manager.py +782 -0
- src/knowledge/progress_tracker.py +182 -0
- src/knowledge/start_kb.py +535 -0
- src/logging/__init__.py +103 -0
- src/logging/adapters/__init__.py +17 -0
- src/logging/adapters/lightrag.py +184 -0
- src/logging/adapters/llamaindex.py +141 -0
- src/logging/config.py +80 -0
- src/logging/handlers/__init__.py +20 -0
- src/logging/handlers/console.py +75 -0
- src/logging/handlers/file.py +201 -0
- src/logging/handlers/websocket.py +127 -0
- src/logging/logger.py +709 -0
- src/logging/stats/__init__.py +16 -0
- src/logging/stats/llm_stats.py +179 -0
- src/services/__init__.py +56 -0
- src/services/config/__init__.py +61 -0
- src/services/config/knowledge_base_config.py +210 -0
- src/services/config/loader.py +260 -0
- src/services/config/unified_config.py +603 -0
- src/services/embedding/__init__.py +45 -0
- src/services/embedding/adapters/__init__.py +22 -0
- src/services/embedding/adapters/base.py +106 -0
- src/services/embedding/adapters/cohere.py +127 -0
- src/services/embedding/adapters/jina.py +99 -0
- src/services/embedding/adapters/ollama.py +116 -0
- src/services/embedding/adapters/openai_compatible.py +96 -0
- src/services/embedding/client.py +159 -0
- src/services/embedding/config.py +156 -0
- src/services/embedding/provider.py +119 -0
- src/services/llm/__init__.py +152 -0
- src/services/llm/capabilities.py +313 -0
- src/services/llm/client.py +302 -0
- src/services/llm/cloud_provider.py +530 -0
- src/services/llm/config.py +200 -0
- src/services/llm/error_mapping.py +103 -0
- src/services/llm/exceptions.py +152 -0
- src/services/llm/factory.py +450 -0
- src/services/llm/local_provider.py +347 -0
- src/services/llm/providers/anthropic.py +95 -0
- src/services/llm/providers/base_provider.py +93 -0
- src/services/llm/providers/open_ai.py +83 -0
- src/services/llm/registry.py +71 -0
- src/services/llm/telemetry.py +40 -0
- src/services/llm/types.py +27 -0
- src/services/llm/utils.py +333 -0
- src/services/prompt/__init__.py +25 -0
- src/services/prompt/manager.py +206 -0
- src/services/rag/__init__.py +64 -0
- src/services/rag/components/__init__.py +29 -0
- src/services/rag/components/base.py +59 -0
- src/services/rag/components/chunkers/__init__.py +18 -0
- src/services/rag/components/chunkers/base.py +34 -0
- src/services/rag/components/chunkers/fixed.py +71 -0
- src/services/rag/components/chunkers/numbered_item.py +94 -0
- src/services/rag/components/chunkers/semantic.py +97 -0
- src/services/rag/components/embedders/__init__.py +14 -0
- src/services/rag/components/embedders/base.py +32 -0
- src/services/rag/components/embedders/openai.py +63 -0
- src/services/rag/components/indexers/__init__.py +18 -0
- src/services/rag/components/indexers/base.py +35 -0
- src/services/rag/components/indexers/graph.py +172 -0
- src/services/rag/components/indexers/lightrag.py +156 -0
- src/services/rag/components/indexers/vector.py +146 -0
- src/services/rag/components/parsers/__init__.py +18 -0
- src/services/rag/components/parsers/base.py +35 -0
- src/services/rag/components/parsers/markdown.py +52 -0
- src/services/rag/components/parsers/pdf.py +115 -0
- src/services/rag/components/parsers/text.py +86 -0
- src/services/rag/components/retrievers/__init__.py +18 -0
- src/services/rag/components/retrievers/base.py +34 -0
- src/services/rag/components/retrievers/dense.py +200 -0
- src/services/rag/components/retrievers/hybrid.py +164 -0
- src/services/rag/components/retrievers/lightrag.py +169 -0
- src/services/rag/components/routing.py +286 -0
- src/services/rag/factory.py +234 -0
- src/services/rag/pipeline.py +215 -0
- src/services/rag/pipelines/__init__.py +32 -0
- src/services/rag/pipelines/academic.py +44 -0
- src/services/rag/pipelines/lightrag.py +43 -0
- src/services/rag/pipelines/llamaindex.py +313 -0
- src/services/rag/pipelines/raganything.py +384 -0
- src/services/rag/service.py +244 -0
- src/services/rag/types.py +73 -0
- src/services/search/__init__.py +284 -0
- src/services/search/base.py +87 -0
- src/services/search/consolidation.py +398 -0
- src/services/search/providers/__init__.py +128 -0
- src/services/search/providers/baidu.py +188 -0
- src/services/search/providers/exa.py +194 -0
- src/services/search/providers/jina.py +161 -0
- src/services/search/providers/perplexity.py +153 -0
- src/services/search/providers/serper.py +209 -0
- src/services/search/providers/tavily.py +161 -0
- src/services/search/types.py +114 -0
- src/services/setup/__init__.py +34 -0
- src/services/setup/init.py +285 -0
- src/services/tts/__init__.py +16 -0
- src/services/tts/config.py +99 -0
- src/tools/__init__.py +91 -0
- src/tools/code_executor.py +536 -0
- src/tools/paper_search_tool.py +171 -0
- src/tools/query_item_tool.py +310 -0
- src/tools/question/__init__.py +15 -0
- src/tools/question/exam_mimic.py +616 -0
- src/tools/question/pdf_parser.py +211 -0
- src/tools/question/question_extractor.py +397 -0
- src/tools/rag_tool.py +173 -0
- src/tools/tex_chunker.py +339 -0
- src/tools/tex_downloader.py +253 -0
- src/tools/web_search.py +71 -0
- src/utils/config_manager.py +206 -0
- src/utils/document_validator.py +168 -0
- src/utils/error_rate_tracker.py +111 -0
- src/utils/error_utils.py +82 -0
- src/utils/json_parser.py +110 -0
- src/utils/network/circuit_breaker.py +79 -0
|
@@ -0,0 +1,821 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Knowledge Base API Router
|
|
3
|
+
=========================
|
|
4
|
+
|
|
5
|
+
Handles knowledge base CRUD operations, file uploads, and initialization.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
import os
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
import shutil
|
|
13
|
+
import sys
|
|
14
|
+
import traceback
|
|
15
|
+
|
|
16
|
+
from fastapi import (
|
|
17
|
+
APIRouter,
|
|
18
|
+
BackgroundTasks,
|
|
19
|
+
File,
|
|
20
|
+
Form,
|
|
21
|
+
HTTPException,
|
|
22
|
+
UploadFile,
|
|
23
|
+
WebSocket,
|
|
24
|
+
WebSocketDisconnect,
|
|
25
|
+
)
|
|
26
|
+
from pydantic import BaseModel
|
|
27
|
+
|
|
28
|
+
from src.api.utils.progress_broadcaster import ProgressBroadcaster
|
|
29
|
+
from src.api.utils.task_id_manager import TaskIDManager
|
|
30
|
+
from src.knowledge.add_documents import DocumentAdder
|
|
31
|
+
from src.knowledge.initializer import KnowledgeBaseInitializer
|
|
32
|
+
from src.knowledge.manager import KnowledgeBaseManager
|
|
33
|
+
from src.knowledge.progress_tracker import ProgressStage, ProgressTracker
|
|
34
|
+
from src.utils.document_validator import DocumentValidator
|
|
35
|
+
from src.utils.error_utils import format_exception_message
|
|
36
|
+
|
|
37
|
+
_project_root = Path(__file__).parent.parent.parent.parent
|
|
38
|
+
sys.path.insert(0, str(_project_root))
|
|
39
|
+
from src.logging import get_logger
|
|
40
|
+
from src.services.config import load_config_with_main
|
|
41
|
+
from src.services.llm import get_llm_config
|
|
42
|
+
|
|
43
|
+
# Initialize logger with config
|
|
44
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
|
45
|
+
config = load_config_with_main("solve_config.yaml", project_root) # Use any config to get main.yaml
|
|
46
|
+
log_dir = config.get("paths", {}).get("user_log_dir") or config.get("logging", {}).get("log_dir")
|
|
47
|
+
logger = get_logger("Knowledge", level="INFO", log_dir=log_dir)
|
|
48
|
+
|
|
49
|
+
router = APIRouter()
|
|
50
|
+
|
|
51
|
+
# Constants for byte conversions
|
|
52
|
+
BYTES_PER_GB = 1024**3
|
|
53
|
+
BYTES_PER_MB = 1024**2
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def format_bytes_human_readable(size_bytes: int) -> str:
|
|
57
|
+
"""Format bytes into human-readable string (GB, MB, or bytes)."""
|
|
58
|
+
if size_bytes >= BYTES_PER_GB:
|
|
59
|
+
return f"{size_bytes / BYTES_PER_GB:.1f} GB"
|
|
60
|
+
elif size_bytes >= BYTES_PER_MB:
|
|
61
|
+
return f"{size_bytes / BYTES_PER_MB:.1f} MB"
|
|
62
|
+
else:
|
|
63
|
+
return f"{size_bytes} bytes"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
_kb_base_dir = _project_root / "data" / "knowledge_bases"
|
|
67
|
+
|
|
68
|
+
# Lazy initialization
|
|
69
|
+
kb_manager = None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def get_kb_manager():
|
|
73
|
+
"""Get KnowledgeBaseManager instance (lazy init)"""
|
|
74
|
+
global kb_manager
|
|
75
|
+
if kb_manager is None:
|
|
76
|
+
kb_manager = KnowledgeBaseManager(base_dir=str(_kb_base_dir))
|
|
77
|
+
return kb_manager
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class KnowledgeBaseInfo(BaseModel):
|
|
81
|
+
name: str
|
|
82
|
+
is_default: bool
|
|
83
|
+
statistics: dict
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class LinkFolderRequest(BaseModel):
|
|
87
|
+
"""Request model for linking a local folder to a KB."""
|
|
88
|
+
|
|
89
|
+
folder_path: str
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class LinkedFolderInfo(BaseModel):
|
|
93
|
+
"""Response model for linked folder information."""
|
|
94
|
+
|
|
95
|
+
id: str
|
|
96
|
+
path: str
|
|
97
|
+
added_at: str
|
|
98
|
+
file_count: int
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
async def run_initialization_task(initializer: KnowledgeBaseInitializer):
|
|
102
|
+
"""Background task for knowledge base initialization"""
|
|
103
|
+
task_manager = TaskIDManager.get_instance()
|
|
104
|
+
task_id = task_manager.generate_task_id("kb_init", initializer.kb_name)
|
|
105
|
+
|
|
106
|
+
try:
|
|
107
|
+
if not initializer.progress_tracker:
|
|
108
|
+
initializer.progress_tracker = ProgressTracker(
|
|
109
|
+
initializer.kb_name, initializer.base_dir
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
initializer.progress_tracker.task_id = task_id
|
|
113
|
+
|
|
114
|
+
logger.info(f"[{task_id}] Initializing KB: {initializer.kb_name}")
|
|
115
|
+
|
|
116
|
+
await initializer.process_documents()
|
|
117
|
+
initializer.extract_numbered_items()
|
|
118
|
+
|
|
119
|
+
initializer.progress_tracker.update(
|
|
120
|
+
ProgressStage.COMPLETED, "Knowledge base initialization complete!", current=1, total=1
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
logger.success(f"[{task_id}] KB '{initializer.kb_name}' initialized")
|
|
124
|
+
task_manager.update_task_status(task_id, "completed")
|
|
125
|
+
except Exception as e:
|
|
126
|
+
error_msg = str(e)
|
|
127
|
+
|
|
128
|
+
logger.error(f"[{task_id}] KB '{initializer.kb_name}' init failed: {error_msg}")
|
|
129
|
+
|
|
130
|
+
task_manager.update_task_status(task_id, "error", error=error_msg)
|
|
131
|
+
|
|
132
|
+
if initializer.progress_tracker:
|
|
133
|
+
initializer.progress_tracker.update(
|
|
134
|
+
ProgressStage.ERROR, f"Initialization failed: {error_msg}", error=error_msg
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
async def run_upload_processing_task(
|
|
139
|
+
kb_name: str,
|
|
140
|
+
base_dir: str,
|
|
141
|
+
api_key: str,
|
|
142
|
+
base_url: str,
|
|
143
|
+
uploaded_file_paths: list[str],
|
|
144
|
+
rag_provider: str = None,
|
|
145
|
+
):
|
|
146
|
+
"""Background task for processing uploaded files"""
|
|
147
|
+
task_manager = TaskIDManager.get_instance()
|
|
148
|
+
task_key = f"{kb_name}_upload_{len(uploaded_file_paths)}"
|
|
149
|
+
task_id = task_manager.generate_task_id("kb_upload", task_key)
|
|
150
|
+
|
|
151
|
+
progress_tracker = ProgressTracker(kb_name, Path(base_dir))
|
|
152
|
+
progress_tracker.task_id = task_id
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
logger.info(f"[{task_id}] Processing {len(uploaded_file_paths)} files to KB '{kb_name}'")
|
|
156
|
+
progress_tracker.update(
|
|
157
|
+
ProgressStage.PROCESSING_DOCUMENTS,
|
|
158
|
+
f"Processing {len(uploaded_file_paths)} files...",
|
|
159
|
+
current=0,
|
|
160
|
+
total=len(uploaded_file_paths),
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
adder = DocumentAdder(
|
|
164
|
+
kb_name=kb_name,
|
|
165
|
+
base_dir=base_dir,
|
|
166
|
+
api_key=api_key,
|
|
167
|
+
base_url=base_url,
|
|
168
|
+
progress_tracker=progress_tracker,
|
|
169
|
+
rag_provider=rag_provider,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
new_files = [Path(path) for path in uploaded_file_paths]
|
|
173
|
+
processed_files = await adder.process_new_documents(new_files)
|
|
174
|
+
|
|
175
|
+
if processed_files:
|
|
176
|
+
progress_tracker.update(
|
|
177
|
+
ProgressStage.EXTRACTING_ITEMS,
|
|
178
|
+
"Extracting numbered items...",
|
|
179
|
+
current=0,
|
|
180
|
+
total=len(processed_files),
|
|
181
|
+
)
|
|
182
|
+
adder.extract_numbered_items_for_new_docs(processed_files, batch_size=20)
|
|
183
|
+
|
|
184
|
+
adder.update_metadata(len(new_files))
|
|
185
|
+
|
|
186
|
+
progress_tracker.update(
|
|
187
|
+
ProgressStage.COMPLETED,
|
|
188
|
+
f"Successfully processed {len(processed_files)} files!",
|
|
189
|
+
current=len(processed_files),
|
|
190
|
+
total=len(processed_files),
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
logger.success(f"[{task_id}] Processed {len(processed_files)} files to KB '{kb_name}'")
|
|
194
|
+
task_manager.update_task_status(task_id, "completed")
|
|
195
|
+
except Exception as e:
|
|
196
|
+
error_msg = f"Upload processing failed (KB '{kb_name}'): {e}"
|
|
197
|
+
logger.error(f"[{task_id}] {error_msg}")
|
|
198
|
+
|
|
199
|
+
task_manager.update_task_status(task_id, "error", error=error_msg)
|
|
200
|
+
|
|
201
|
+
progress_tracker.update(
|
|
202
|
+
ProgressStage.ERROR, f"Processing failed: {error_msg}", error=error_msg
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
@router.get("/health")
|
|
207
|
+
async def health_check():
|
|
208
|
+
"""Health check endpoint"""
|
|
209
|
+
try:
|
|
210
|
+
manager = get_kb_manager()
|
|
211
|
+
config_exists = manager.config_file.exists()
|
|
212
|
+
kb_count = len(manager.list_knowledge_bases())
|
|
213
|
+
return {
|
|
214
|
+
"status": "ok",
|
|
215
|
+
"config_file": str(manager.config_file),
|
|
216
|
+
"config_exists": config_exists,
|
|
217
|
+
"base_dir": str(manager.base_dir),
|
|
218
|
+
"base_dir_exists": manager.base_dir.exists(),
|
|
219
|
+
"knowledge_bases_count": kb_count,
|
|
220
|
+
}
|
|
221
|
+
except Exception as e:
|
|
222
|
+
return {"status": "error", "error": str(e), "traceback": traceback.format_exc()}
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
@router.get("/rag-providers")
|
|
226
|
+
async def get_rag_providers():
|
|
227
|
+
"""Get list of available RAG providers."""
|
|
228
|
+
try:
|
|
229
|
+
from src.services.rag.service import RAGService
|
|
230
|
+
|
|
231
|
+
providers = RAGService.list_providers()
|
|
232
|
+
return {"providers": providers}
|
|
233
|
+
except Exception as e:
|
|
234
|
+
logger.error(f"Error getting RAG providers: {e}")
|
|
235
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
@router.get("/configs")
|
|
239
|
+
async def get_all_kb_configs():
|
|
240
|
+
"""Get all knowledge base configurations from centralized config file."""
|
|
241
|
+
try:
|
|
242
|
+
from src.services.config import get_kb_config_service
|
|
243
|
+
|
|
244
|
+
service = get_kb_config_service()
|
|
245
|
+
return service.get_all_configs()
|
|
246
|
+
except Exception as e:
|
|
247
|
+
logger.error(f"Error getting KB configs: {e}")
|
|
248
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
@router.get("/{kb_name}/config")
|
|
252
|
+
async def get_kb_config(kb_name: str):
|
|
253
|
+
"""Get configuration for a specific knowledge base."""
|
|
254
|
+
try:
|
|
255
|
+
from src.services.config import get_kb_config_service
|
|
256
|
+
|
|
257
|
+
service = get_kb_config_service()
|
|
258
|
+
config = service.get_kb_config(kb_name)
|
|
259
|
+
return {"kb_name": kb_name, "config": config}
|
|
260
|
+
except Exception as e:
|
|
261
|
+
logger.error(f"Error getting config for KB '{kb_name}': {e}")
|
|
262
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
@router.put("/{kb_name}/config")
|
|
266
|
+
async def update_kb_config(kb_name: str, config: dict):
|
|
267
|
+
"""Update configuration for a specific knowledge base."""
|
|
268
|
+
try:
|
|
269
|
+
from src.services.config import get_kb_config_service
|
|
270
|
+
|
|
271
|
+
service = get_kb_config_service()
|
|
272
|
+
service.set_kb_config(kb_name, config)
|
|
273
|
+
return {"status": "success", "kb_name": kb_name, "config": service.get_kb_config(kb_name)}
|
|
274
|
+
except Exception as e:
|
|
275
|
+
logger.error(f"Error updating config for KB '{kb_name}': {e}")
|
|
276
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
@router.post("/configs/sync")
|
|
280
|
+
async def sync_configs_from_metadata():
|
|
281
|
+
"""Sync all KB configurations from their metadata.json files to centralized config."""
|
|
282
|
+
try:
|
|
283
|
+
from src.services.config import get_kb_config_service
|
|
284
|
+
|
|
285
|
+
service = get_kb_config_service()
|
|
286
|
+
service.sync_all_from_metadata(_kb_base_dir)
|
|
287
|
+
return {"status": "success", "message": "Configurations synced from metadata files"}
|
|
288
|
+
except Exception as e:
|
|
289
|
+
logger.error(f"Error syncing configs: {e}")
|
|
290
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
@router.get("/default")
|
|
294
|
+
async def get_default_kb():
|
|
295
|
+
"""Get the default knowledge base."""
|
|
296
|
+
try:
|
|
297
|
+
manager = get_kb_manager()
|
|
298
|
+
default_kb = manager.get_default()
|
|
299
|
+
return {"default_kb": default_kb}
|
|
300
|
+
except Exception as e:
|
|
301
|
+
logger.error(f"Error getting default KB: {e}")
|
|
302
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
@router.put("/default/{kb_name}")
|
|
306
|
+
async def set_default_kb(kb_name: str):
|
|
307
|
+
"""Set the default knowledge base."""
|
|
308
|
+
try:
|
|
309
|
+
manager = get_kb_manager()
|
|
310
|
+
|
|
311
|
+
# Verify KB exists
|
|
312
|
+
if kb_name not in manager.list_knowledge_bases():
|
|
313
|
+
raise HTTPException(status_code=404, detail=f"Knowledge base '{kb_name}' not found")
|
|
314
|
+
|
|
315
|
+
manager.set_default(kb_name)
|
|
316
|
+
return {"status": "success", "default_kb": kb_name}
|
|
317
|
+
except HTTPException:
|
|
318
|
+
raise
|
|
319
|
+
except Exception as e:
|
|
320
|
+
logger.error(f"Error setting default KB: {e}")
|
|
321
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
@router.get("/list", response_model=list[KnowledgeBaseInfo])
|
|
325
|
+
async def list_knowledge_bases():
|
|
326
|
+
"""List all available knowledge bases with their details."""
|
|
327
|
+
try:
|
|
328
|
+
manager = get_kb_manager()
|
|
329
|
+
kb_names = manager.list_knowledge_bases()
|
|
330
|
+
|
|
331
|
+
logger.info(f"Found {len(kb_names)} knowledge bases: {kb_names}")
|
|
332
|
+
|
|
333
|
+
if not kb_names:
|
|
334
|
+
logger.info("No knowledge bases found, returning empty list")
|
|
335
|
+
return []
|
|
336
|
+
|
|
337
|
+
result = []
|
|
338
|
+
errors = []
|
|
339
|
+
|
|
340
|
+
for name in kb_names:
|
|
341
|
+
try:
|
|
342
|
+
info = manager.get_info(name)
|
|
343
|
+
logger.debug(f"Successfully got info for KB '{name}': {info.get('statistics', {})}")
|
|
344
|
+
result.append(
|
|
345
|
+
KnowledgeBaseInfo(
|
|
346
|
+
name=info["name"],
|
|
347
|
+
is_default=info["is_default"],
|
|
348
|
+
statistics=info.get("statistics", {}),
|
|
349
|
+
)
|
|
350
|
+
)
|
|
351
|
+
except Exception as e:
|
|
352
|
+
error_msg = f"Error getting info for KB '{name}': {e}"
|
|
353
|
+
errors.append(error_msg)
|
|
354
|
+
logger.warning(f"{error_msg}\n{traceback.format_exc()}")
|
|
355
|
+
try:
|
|
356
|
+
kb_dir = manager.base_dir / name
|
|
357
|
+
if kb_dir.exists():
|
|
358
|
+
logger.info(f"KB '{name}' directory exists, creating fallback info")
|
|
359
|
+
result.append(
|
|
360
|
+
KnowledgeBaseInfo(
|
|
361
|
+
name=name,
|
|
362
|
+
is_default=name == manager.get_default(),
|
|
363
|
+
statistics={
|
|
364
|
+
"raw_documents": 0,
|
|
365
|
+
"images": 0,
|
|
366
|
+
"content_lists": 0,
|
|
367
|
+
"rag_initialized": False,
|
|
368
|
+
},
|
|
369
|
+
)
|
|
370
|
+
)
|
|
371
|
+
except Exception as fallback_err:
|
|
372
|
+
logger.error(f"Fallback also failed for KB '{name}': {fallback_err}")
|
|
373
|
+
|
|
374
|
+
if errors and not result:
|
|
375
|
+
error_detail = f"Failed to load knowledge bases. Errors: {'; '.join(errors)}"
|
|
376
|
+
logger.error(error_detail)
|
|
377
|
+
raise HTTPException(status_code=500, detail=error_detail)
|
|
378
|
+
|
|
379
|
+
if errors:
|
|
380
|
+
logger.warning(
|
|
381
|
+
f"Some KBs had errors, returning {len(result)} results. Errors: {errors}"
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
logger.info(f"Returning {len(result)} knowledge bases")
|
|
385
|
+
return result
|
|
386
|
+
except HTTPException:
|
|
387
|
+
raise
|
|
388
|
+
except Exception as e:
|
|
389
|
+
error_msg = f"Error listing knowledge bases: {e}"
|
|
390
|
+
logger.error(f"{error_msg}\n{traceback.format_exc()}")
|
|
391
|
+
raise HTTPException(status_code=500, detail=f"Failed to list knowledge bases: {e!s}")
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
@router.get("/{kb_name}")
|
|
395
|
+
async def get_knowledge_base_details(kb_name: str):
|
|
396
|
+
"""Get detailed info for a specific KB."""
|
|
397
|
+
try:
|
|
398
|
+
manager = get_kb_manager()
|
|
399
|
+
return manager.get_info(kb_name)
|
|
400
|
+
except ValueError:
|
|
401
|
+
raise HTTPException(status_code=404, detail=f"Knowledge base '{kb_name}' not found")
|
|
402
|
+
except Exception as e:
|
|
403
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
@router.delete("/{kb_name}")
|
|
407
|
+
async def delete_knowledge_base(kb_name: str):
|
|
408
|
+
"""Delete a knowledge base."""
|
|
409
|
+
try:
|
|
410
|
+
manager = get_kb_manager()
|
|
411
|
+
success = manager.delete_knowledge_base(kb_name, confirm=True)
|
|
412
|
+
if not success:
|
|
413
|
+
raise HTTPException(status_code=400, detail="Failed to delete knowledge base")
|
|
414
|
+
logger.info(f"KB '{kb_name}' deleted")
|
|
415
|
+
return {"message": f"Knowledge base '{kb_name}' deleted successfully"}
|
|
416
|
+
except ValueError:
|
|
417
|
+
raise HTTPException(status_code=404, detail=f"Knowledge base '{kb_name}' not found")
|
|
418
|
+
except Exception as e:
|
|
419
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
@router.post("/{kb_name}/upload")
|
|
423
|
+
async def upload_files(
|
|
424
|
+
kb_name: str,
|
|
425
|
+
background_tasks: BackgroundTasks,
|
|
426
|
+
files: list[UploadFile] = File(...),
|
|
427
|
+
rag_provider: str = Form(None),
|
|
428
|
+
):
|
|
429
|
+
"""Upload files to a knowledge base and process them in background."""
|
|
430
|
+
try:
|
|
431
|
+
manager = get_kb_manager()
|
|
432
|
+
kb_path = manager.get_knowledge_base_path(kb_name)
|
|
433
|
+
raw_dir = kb_path / "raw"
|
|
434
|
+
raw_dir.mkdir(parents=True, exist_ok=True)
|
|
435
|
+
|
|
436
|
+
try:
|
|
437
|
+
llm_config = get_llm_config()
|
|
438
|
+
api_key = llm_config.api_key
|
|
439
|
+
base_url = llm_config.base_url
|
|
440
|
+
except ValueError as e:
|
|
441
|
+
raise HTTPException(status_code=500, detail=f"LLM config error: {e!s}")
|
|
442
|
+
|
|
443
|
+
uploaded_files = []
|
|
444
|
+
uploaded_file_paths = []
|
|
445
|
+
|
|
446
|
+
# 1. Save files and validate size during streaming
|
|
447
|
+
for file in files:
|
|
448
|
+
file_path = None
|
|
449
|
+
try:
|
|
450
|
+
# Sanitize filename first (without size validation)
|
|
451
|
+
sanitized_filename = DocumentValidator.validate_upload_safety(file.filename, None)
|
|
452
|
+
file.filename = sanitized_filename
|
|
453
|
+
|
|
454
|
+
# Save file to disk with size checking during streaming
|
|
455
|
+
file_path = raw_dir / file.filename
|
|
456
|
+
max_size = DocumentValidator.MAX_FILE_SIZE
|
|
457
|
+
written_bytes = 0
|
|
458
|
+
with open(file_path, "wb") as buffer:
|
|
459
|
+
for chunk in iter(lambda: file.file.read(8192), b""):
|
|
460
|
+
written_bytes += len(chunk)
|
|
461
|
+
if written_bytes > max_size:
|
|
462
|
+
# Format size in human-readable format
|
|
463
|
+
size_str = format_bytes_human_readable(max_size)
|
|
464
|
+
raise HTTPException(
|
|
465
|
+
status_code=400,
|
|
466
|
+
detail=f"File '{file.filename}' exceeds maximum size limit of {size_str}",
|
|
467
|
+
)
|
|
468
|
+
buffer.write(chunk)
|
|
469
|
+
|
|
470
|
+
# Validate with actual size (additional checks)
|
|
471
|
+
DocumentValidator.validate_upload_safety(file.filename, written_bytes)
|
|
472
|
+
|
|
473
|
+
uploaded_files.append(file.filename)
|
|
474
|
+
uploaded_file_paths.append(str(file_path))
|
|
475
|
+
|
|
476
|
+
except Exception as e:
|
|
477
|
+
# Clean up partially saved file
|
|
478
|
+
if file_path and file_path.exists():
|
|
479
|
+
try:
|
|
480
|
+
os.unlink(file_path)
|
|
481
|
+
except OSError:
|
|
482
|
+
pass
|
|
483
|
+
|
|
484
|
+
error_message = (
|
|
485
|
+
f"Validation failed for file '{file.filename}': {format_exception_message(e)}"
|
|
486
|
+
)
|
|
487
|
+
logger.error(error_message, exc_info=True)
|
|
488
|
+
raise HTTPException(status_code=400, detail=error_message) from e
|
|
489
|
+
|
|
490
|
+
logger.info(f"Uploading {len(uploaded_files)} files to KB '{kb_name}'")
|
|
491
|
+
|
|
492
|
+
background_tasks.add_task(
|
|
493
|
+
run_upload_processing_task,
|
|
494
|
+
kb_name=kb_name,
|
|
495
|
+
base_dir=str(_kb_base_dir),
|
|
496
|
+
api_key=api_key,
|
|
497
|
+
base_url=base_url,
|
|
498
|
+
uploaded_file_paths=uploaded_file_paths,
|
|
499
|
+
rag_provider=rag_provider,
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
return {
|
|
503
|
+
"message": f"Uploaded {len(uploaded_files)} files. Processing in background.",
|
|
504
|
+
"files": uploaded_files,
|
|
505
|
+
}
|
|
506
|
+
except ValueError:
|
|
507
|
+
raise HTTPException(status_code=404, detail=f"Knowledge base '{kb_name}' not found")
|
|
508
|
+
except Exception as e:
|
|
509
|
+
# Unexpected failure (Server error)
|
|
510
|
+
formatted_error = format_exception_message(e)
|
|
511
|
+
raise HTTPException(status_code=500, detail=formatted_error) from e
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
@router.post("/create")
|
|
515
|
+
async def create_knowledge_base(
|
|
516
|
+
background_tasks: BackgroundTasks,
|
|
517
|
+
name: str = Form(...),
|
|
518
|
+
files: list[UploadFile] = File(...),
|
|
519
|
+
rag_provider: str = Form("raganything"),
|
|
520
|
+
):
|
|
521
|
+
"""Create a new knowledge base and initialize it with files."""
|
|
522
|
+
try:
|
|
523
|
+
manager = get_kb_manager()
|
|
524
|
+
if name in manager.list_knowledge_bases():
|
|
525
|
+
raise HTTPException(status_code=400, detail=f"Knowledge base '{name}' already exists")
|
|
526
|
+
|
|
527
|
+
try:
|
|
528
|
+
llm_config = get_llm_config()
|
|
529
|
+
api_key = llm_config.api_key
|
|
530
|
+
base_url = llm_config.base_url
|
|
531
|
+
except ValueError as e:
|
|
532
|
+
raise HTTPException(status_code=500, detail=f"LLM config error: {e!s}")
|
|
533
|
+
|
|
534
|
+
progress_tracker = ProgressTracker(name, _kb_base_dir)
|
|
535
|
+
|
|
536
|
+
logger.info(f"Creating KB: {name}")
|
|
537
|
+
|
|
538
|
+
progress_tracker.update(
|
|
539
|
+
ProgressStage.INITIALIZING, "Initializing knowledge base...", current=0, total=0
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
initializer = KnowledgeBaseInitializer(
|
|
543
|
+
kb_name=name,
|
|
544
|
+
base_dir=str(_kb_base_dir),
|
|
545
|
+
api_key=api_key,
|
|
546
|
+
base_url=base_url,
|
|
547
|
+
progress_tracker=progress_tracker,
|
|
548
|
+
rag_provider=rag_provider,
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
initializer.create_directory_structure()
|
|
552
|
+
|
|
553
|
+
manager = get_kb_manager()
|
|
554
|
+
if name not in manager.list_knowledge_bases():
|
|
555
|
+
logger.warning(f"KB {name} not found in config, registering manually")
|
|
556
|
+
initializer._register_to_config()
|
|
557
|
+
|
|
558
|
+
uploaded_files = []
|
|
559
|
+
for file in files:
|
|
560
|
+
file_path = initializer.raw_dir / file.filename
|
|
561
|
+
with open(file_path, "wb") as buffer:
|
|
562
|
+
shutil.copyfileobj(file.file, buffer)
|
|
563
|
+
uploaded_files.append(file.filename)
|
|
564
|
+
|
|
565
|
+
progress_tracker.update(
|
|
566
|
+
ProgressStage.PROCESSING_DOCUMENTS,
|
|
567
|
+
f"Saved {len(uploaded_files)} files, preparing to process...",
|
|
568
|
+
current=0,
|
|
569
|
+
total=len(uploaded_files),
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
background_tasks.add_task(run_initialization_task, initializer)
|
|
573
|
+
|
|
574
|
+
logger.success(f"KB '{name}' created, processing {len(uploaded_files)} files in background")
|
|
575
|
+
|
|
576
|
+
return {
|
|
577
|
+
"message": f"Knowledge base '{name}' created. Processing {len(uploaded_files)} files in background.",
|
|
578
|
+
"name": name,
|
|
579
|
+
"files": uploaded_files,
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
except HTTPException:
|
|
583
|
+
raise
|
|
584
|
+
except Exception as e:
|
|
585
|
+
logger.error(f"Failed to create KB: {e}")
|
|
586
|
+
logger.debug(traceback.format_exc())
|
|
587
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
@router.get("/{kb_name}/progress")
|
|
591
|
+
async def get_progress(kb_name: str):
|
|
592
|
+
"""Get initialization progress for a knowledge base"""
|
|
593
|
+
try:
|
|
594
|
+
progress_tracker = ProgressTracker(kb_name, _kb_base_dir)
|
|
595
|
+
progress = progress_tracker.get_progress()
|
|
596
|
+
|
|
597
|
+
if progress is None:
|
|
598
|
+
return {"status": "not_started", "message": "Initialization not started"}
|
|
599
|
+
|
|
600
|
+
return progress
|
|
601
|
+
except Exception as e:
|
|
602
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
@router.post("/{kb_name}/progress/clear")
|
|
606
|
+
async def clear_progress(kb_name: str):
|
|
607
|
+
"""Clear progress file for a knowledge base (useful for stuck states)"""
|
|
608
|
+
try:
|
|
609
|
+
progress_tracker = ProgressTracker(kb_name, _kb_base_dir)
|
|
610
|
+
progress_tracker.clear()
|
|
611
|
+
return {"status": "success", "message": f"Progress cleared for {kb_name}"}
|
|
612
|
+
except Exception as e:
|
|
613
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
614
|
+
|
|
615
|
+
|
|
616
|
+
@router.websocket("/{kb_name}/progress/ws")
|
|
617
|
+
async def websocket_progress(websocket: WebSocket, kb_name: str):
|
|
618
|
+
"""WebSocket endpoint for real-time progress updates"""
|
|
619
|
+
await websocket.accept()
|
|
620
|
+
|
|
621
|
+
broadcaster = ProgressBroadcaster.get_instance()
|
|
622
|
+
|
|
623
|
+
try:
|
|
624
|
+
await broadcaster.connect(kb_name, websocket)
|
|
625
|
+
|
|
626
|
+
progress_tracker = ProgressTracker(kb_name, _kb_base_dir)
|
|
627
|
+
initial_progress = progress_tracker.get_progress()
|
|
628
|
+
|
|
629
|
+
# Check if KB is already ready (has rag_storage)
|
|
630
|
+
kb_dir = _kb_base_dir / kb_name
|
|
631
|
+
rag_storage_dir = kb_dir / "rag_storage"
|
|
632
|
+
kb_is_ready = rag_storage_dir.exists() and rag_storage_dir.is_dir()
|
|
633
|
+
|
|
634
|
+
# Only send non-completed progress if KB is not ready
|
|
635
|
+
# or if progress is recent (within 5 minutes)
|
|
636
|
+
if initial_progress:
|
|
637
|
+
stage = initial_progress.get("stage")
|
|
638
|
+
timestamp = initial_progress.get("timestamp")
|
|
639
|
+
|
|
640
|
+
should_send = False
|
|
641
|
+
if stage in ["completed", "error"] or not kb_is_ready:
|
|
642
|
+
should_send = True
|
|
643
|
+
elif timestamp:
|
|
644
|
+
# Check if progress is recent
|
|
645
|
+
try:
|
|
646
|
+
progress_time = datetime.fromisoformat(timestamp)
|
|
647
|
+
now = datetime.now()
|
|
648
|
+
age_seconds = (now - progress_time).total_seconds()
|
|
649
|
+
if age_seconds < 300: # 5 minutes
|
|
650
|
+
should_send = True
|
|
651
|
+
except:
|
|
652
|
+
pass
|
|
653
|
+
|
|
654
|
+
if should_send:
|
|
655
|
+
await websocket.send_json({"type": "progress", "data": initial_progress})
|
|
656
|
+
|
|
657
|
+
last_progress = initial_progress
|
|
658
|
+
last_timestamp = initial_progress.get("timestamp") if initial_progress else None
|
|
659
|
+
|
|
660
|
+
while True:
|
|
661
|
+
try:
|
|
662
|
+
try:
|
|
663
|
+
await asyncio.wait_for(websocket.receive_text(), timeout=1.0)
|
|
664
|
+
except asyncio.TimeoutError:
|
|
665
|
+
current_progress = progress_tracker.get_progress()
|
|
666
|
+
if current_progress:
|
|
667
|
+
current_timestamp = current_progress.get("timestamp")
|
|
668
|
+
if current_timestamp != last_timestamp:
|
|
669
|
+
await websocket.send_json(
|
|
670
|
+
{"type": "progress", "data": current_progress}
|
|
671
|
+
)
|
|
672
|
+
last_progress = current_progress
|
|
673
|
+
last_timestamp = current_timestamp
|
|
674
|
+
|
|
675
|
+
if current_progress.get("stage") in ["completed", "error"]:
|
|
676
|
+
await asyncio.sleep(3)
|
|
677
|
+
break
|
|
678
|
+
continue
|
|
679
|
+
|
|
680
|
+
except WebSocketDisconnect:
|
|
681
|
+
break
|
|
682
|
+
except Exception:
|
|
683
|
+
break
|
|
684
|
+
|
|
685
|
+
except Exception as e:
|
|
686
|
+
logger.debug(f"Progress WS error: {e}")
|
|
687
|
+
try:
|
|
688
|
+
await websocket.send_json({"type": "error", "message": str(e)})
|
|
689
|
+
except:
|
|
690
|
+
pass
|
|
691
|
+
finally:
|
|
692
|
+
await broadcaster.disconnect(kb_name, websocket)
|
|
693
|
+
try:
|
|
694
|
+
await websocket.close()
|
|
695
|
+
except:
|
|
696
|
+
pass
|
|
697
|
+
|
|
698
|
+
|
|
699
|
+
@router.post("/{kb_name}/link-folder", response_model=LinkedFolderInfo)
|
|
700
|
+
async def link_folder(kb_name: str, request: LinkFolderRequest):
|
|
701
|
+
"""
|
|
702
|
+
Link a local folder to a knowledge base.
|
|
703
|
+
|
|
704
|
+
This allows syncing documents from a local folder (which can be
|
|
705
|
+
synced with SharePoint, Google Drive, OneLake, etc.) to the KB.
|
|
706
|
+
|
|
707
|
+
The folder path supports:
|
|
708
|
+
- Absolute paths: /Users/name/Documents or C:\\Users\\name\\Documents
|
|
709
|
+
- Home directory: ~/Documents
|
|
710
|
+
- Relative paths (resolved from server working directory)
|
|
711
|
+
"""
|
|
712
|
+
try:
|
|
713
|
+
manager = get_kb_manager()
|
|
714
|
+
folder_info = manager.link_folder(kb_name, request.folder_path)
|
|
715
|
+
logger.info(f"Linked folder '{request.folder_path}' to KB '{kb_name}'")
|
|
716
|
+
return LinkedFolderInfo(**folder_info)
|
|
717
|
+
except ValueError as e:
|
|
718
|
+
error_msg = str(e)
|
|
719
|
+
if "not found" in error_msg.lower():
|
|
720
|
+
raise HTTPException(status_code=404, detail=error_msg)
|
|
721
|
+
raise HTTPException(status_code=400, detail=error_msg)
|
|
722
|
+
except Exception as e:
|
|
723
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
724
|
+
|
|
725
|
+
|
|
726
|
+
@router.get("/{kb_name}/linked-folders", response_model=list[LinkedFolderInfo])
|
|
727
|
+
async def get_linked_folders(kb_name: str):
|
|
728
|
+
"""Get list of linked folders for a knowledge base."""
|
|
729
|
+
try:
|
|
730
|
+
manager = get_kb_manager()
|
|
731
|
+
folders = manager.get_linked_folders(kb_name)
|
|
732
|
+
return [LinkedFolderInfo(**f) for f in folders]
|
|
733
|
+
except ValueError:
|
|
734
|
+
raise HTTPException(status_code=404, detail=f"Knowledge base '{kb_name}' not found")
|
|
735
|
+
except Exception as e:
|
|
736
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
737
|
+
|
|
738
|
+
|
|
739
|
+
@router.delete("/{kb_name}/linked-folders/{folder_id}")
|
|
740
|
+
async def unlink_folder(kb_name: str, folder_id: str):
|
|
741
|
+
"""Unlink a folder from a knowledge base."""
|
|
742
|
+
try:
|
|
743
|
+
manager = get_kb_manager()
|
|
744
|
+
success = manager.unlink_folder(kb_name, folder_id)
|
|
745
|
+
if not success:
|
|
746
|
+
raise HTTPException(status_code=404, detail=f"Folder '{folder_id}' not found")
|
|
747
|
+
logger.info(f"Unlinked folder '{folder_id}' from KB '{kb_name}'")
|
|
748
|
+
return {"message": "Folder unlinked successfully", "folder_id": folder_id}
|
|
749
|
+
except ValueError:
|
|
750
|
+
raise HTTPException(status_code=404, detail=f"Knowledge base '{kb_name}' not found")
|
|
751
|
+
except Exception as e:
|
|
752
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
753
|
+
|
|
754
|
+
|
|
755
|
+
@router.post("/{kb_name}/sync-folder/{folder_id}")
|
|
756
|
+
async def sync_folder(kb_name: str, folder_id: str, background_tasks: BackgroundTasks):
|
|
757
|
+
"""
|
|
758
|
+
Sync files from a linked folder to the knowledge base.
|
|
759
|
+
|
|
760
|
+
This scans the linked folder for supported documents and processes
|
|
761
|
+
any new files that haven't been added yet.
|
|
762
|
+
"""
|
|
763
|
+
try:
|
|
764
|
+
manager = get_kb_manager()
|
|
765
|
+
|
|
766
|
+
# Get linked folders and find the one with matching ID
|
|
767
|
+
folders = manager.get_linked_folders(kb_name)
|
|
768
|
+
folder_info = next((f for f in folders if f["id"] == folder_id), None)
|
|
769
|
+
|
|
770
|
+
if not folder_info:
|
|
771
|
+
raise HTTPException(status_code=404, detail=f"Linked folder '{folder_id}' not found")
|
|
772
|
+
|
|
773
|
+
folder_path = folder_info["path"]
|
|
774
|
+
|
|
775
|
+
# Check for changes (new or modified files)
|
|
776
|
+
changes = manager.detect_folder_changes(kb_name, folder_id)
|
|
777
|
+
files_to_process = changes["new_files"] + changes["modified_files"]
|
|
778
|
+
|
|
779
|
+
if not files_to_process:
|
|
780
|
+
return {"message": "No new or modified files to sync", "files": [], "file_count": 0}
|
|
781
|
+
|
|
782
|
+
# Get LLM config
|
|
783
|
+
try:
|
|
784
|
+
llm_config = get_llm_config()
|
|
785
|
+
api_key = llm_config.api_key
|
|
786
|
+
base_url = llm_config.base_url
|
|
787
|
+
except ValueError as e:
|
|
788
|
+
raise HTTPException(status_code=500, detail=f"LLM config error: {e!s}")
|
|
789
|
+
|
|
790
|
+
logger.info(
|
|
791
|
+
f"Syncing {len(files_to_process)} files from folder '{folder_path}' to KB '{kb_name}'"
|
|
792
|
+
)
|
|
793
|
+
|
|
794
|
+
# NOTE: We DO NOT update sync state here anymore.
|
|
795
|
+
# It is updated in run_upload_processing_task only after successful processing.
|
|
796
|
+
# This prevents marking files as synced if processing fails (race condition fix).
|
|
797
|
+
|
|
798
|
+
# Add background task to process files
|
|
799
|
+
background_tasks.add_task(
|
|
800
|
+
run_upload_processing_task,
|
|
801
|
+
kb_name=kb_name,
|
|
802
|
+
base_dir=str(_kb_base_dir),
|
|
803
|
+
api_key=api_key,
|
|
804
|
+
base_url=base_url,
|
|
805
|
+
uploaded_file_paths=files_to_process,
|
|
806
|
+
folder_id=folder_id, # Pass folder_id to update state on success
|
|
807
|
+
)
|
|
808
|
+
|
|
809
|
+
return {
|
|
810
|
+
"message": f"Syncing {len(files_to_process)} files from linked folder",
|
|
811
|
+
"folder_path": folder_path,
|
|
812
|
+
"new_files": changes["new_count"],
|
|
813
|
+
"modified_files": changes["modified_count"],
|
|
814
|
+
"file_count": len(files_to_process),
|
|
815
|
+
}
|
|
816
|
+
except HTTPException:
|
|
817
|
+
raise
|
|
818
|
+
except ValueError:
|
|
819
|
+
raise HTTPException(status_code=404, detail=f"Knowledge base '{kb_name}' not found")
|
|
820
|
+
except Exception as e:
|
|
821
|
+
raise HTTPException(status_code=500, detail=str(e))
|