realtimex-deeptutor 0.5.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- realtimex_deeptutor/__init__.py +67 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
- src/__init__.py +40 -0
- src/agents/__init__.py +24 -0
- src/agents/base_agent.py +657 -0
- src/agents/chat/__init__.py +24 -0
- src/agents/chat/chat_agent.py +435 -0
- src/agents/chat/prompts/en/chat_agent.yaml +35 -0
- src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
- src/agents/chat/session_manager.py +311 -0
- src/agents/co_writer/__init__.py +0 -0
- src/agents/co_writer/edit_agent.py +260 -0
- src/agents/co_writer/narrator_agent.py +423 -0
- src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
- src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
- src/agents/guide/__init__.py +16 -0
- src/agents/guide/agents/__init__.py +11 -0
- src/agents/guide/agents/chat_agent.py +104 -0
- src/agents/guide/agents/interactive_agent.py +223 -0
- src/agents/guide/agents/locate_agent.py +149 -0
- src/agents/guide/agents/summary_agent.py +150 -0
- src/agents/guide/guide_manager.py +500 -0
- src/agents/guide/prompts/en/chat_agent.yaml +41 -0
- src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
- src/agents/guide/prompts/en/locate_agent.yaml +68 -0
- src/agents/guide/prompts/en/summary_agent.yaml +157 -0
- src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
- src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
- src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
- src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
- src/agents/ideagen/__init__.py +12 -0
- src/agents/ideagen/idea_generation_workflow.py +426 -0
- src/agents/ideagen/material_organizer_agent.py +173 -0
- src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
- src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
- src/agents/question/__init__.py +24 -0
- src/agents/question/agents/__init__.py +18 -0
- src/agents/question/agents/generate_agent.py +381 -0
- src/agents/question/agents/relevance_analyzer.py +207 -0
- src/agents/question/agents/retrieve_agent.py +239 -0
- src/agents/question/coordinator.py +718 -0
- src/agents/question/example.py +109 -0
- src/agents/question/prompts/en/coordinator.yaml +75 -0
- src/agents/question/prompts/en/generate_agent.yaml +77 -0
- src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
- src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
- src/agents/question/prompts/zh/coordinator.yaml +75 -0
- src/agents/question/prompts/zh/generate_agent.yaml +77 -0
- src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
- src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
- src/agents/research/agents/__init__.py +23 -0
- src/agents/research/agents/decompose_agent.py +507 -0
- src/agents/research/agents/manager_agent.py +228 -0
- src/agents/research/agents/note_agent.py +180 -0
- src/agents/research/agents/rephrase_agent.py +263 -0
- src/agents/research/agents/reporting_agent.py +1333 -0
- src/agents/research/agents/research_agent.py +714 -0
- src/agents/research/data_structures.py +451 -0
- src/agents/research/main.py +188 -0
- src/agents/research/prompts/en/decompose_agent.yaml +89 -0
- src/agents/research/prompts/en/manager_agent.yaml +24 -0
- src/agents/research/prompts/en/note_agent.yaml +121 -0
- src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/en/reporting_agent.yaml +380 -0
- src/agents/research/prompts/en/research_agent.yaml +173 -0
- src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
- src/agents/research/prompts/zh/manager_agent.yaml +24 -0
- src/agents/research/prompts/zh/note_agent.yaml +121 -0
- src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
- src/agents/research/prompts/zh/research_agent.yaml +173 -0
- src/agents/research/research_pipeline.py +1309 -0
- src/agents/research/utils/__init__.py +60 -0
- src/agents/research/utils/citation_manager.py +799 -0
- src/agents/research/utils/json_utils.py +98 -0
- src/agents/research/utils/token_tracker.py +297 -0
- src/agents/solve/__init__.py +80 -0
- src/agents/solve/analysis_loop/__init__.py +14 -0
- src/agents/solve/analysis_loop/investigate_agent.py +414 -0
- src/agents/solve/analysis_loop/note_agent.py +190 -0
- src/agents/solve/main_solver.py +862 -0
- src/agents/solve/memory/__init__.py +34 -0
- src/agents/solve/memory/citation_memory.py +353 -0
- src/agents/solve/memory/investigate_memory.py +226 -0
- src/agents/solve/memory/solve_memory.py +340 -0
- src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
- src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
- src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
- src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
- src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
- src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
- src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
- src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
- src/agents/solve/solve_loop/__init__.py +22 -0
- src/agents/solve/solve_loop/citation_manager.py +74 -0
- src/agents/solve/solve_loop/manager_agent.py +274 -0
- src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
- src/agents/solve/solve_loop/response_agent.py +301 -0
- src/agents/solve/solve_loop/solve_agent.py +325 -0
- src/agents/solve/solve_loop/tool_agent.py +470 -0
- src/agents/solve/utils/__init__.py +64 -0
- src/agents/solve/utils/config_validator.py +313 -0
- src/agents/solve/utils/display_manager.py +223 -0
- src/agents/solve/utils/error_handler.py +363 -0
- src/agents/solve/utils/json_utils.py +98 -0
- src/agents/solve/utils/performance_monitor.py +407 -0
- src/agents/solve/utils/token_tracker.py +541 -0
- src/api/__init__.py +0 -0
- src/api/main.py +240 -0
- src/api/routers/__init__.py +1 -0
- src/api/routers/agent_config.py +69 -0
- src/api/routers/chat.py +296 -0
- src/api/routers/co_writer.py +337 -0
- src/api/routers/config.py +627 -0
- src/api/routers/dashboard.py +18 -0
- src/api/routers/guide.py +337 -0
- src/api/routers/ideagen.py +436 -0
- src/api/routers/knowledge.py +821 -0
- src/api/routers/notebook.py +247 -0
- src/api/routers/question.py +537 -0
- src/api/routers/research.py +394 -0
- src/api/routers/settings.py +164 -0
- src/api/routers/solve.py +305 -0
- src/api/routers/system.py +252 -0
- src/api/run_server.py +61 -0
- src/api/utils/history.py +172 -0
- src/api/utils/log_interceptor.py +21 -0
- src/api/utils/notebook_manager.py +415 -0
- src/api/utils/progress_broadcaster.py +72 -0
- src/api/utils/task_id_manager.py +100 -0
- src/config/__init__.py +0 -0
- src/config/accessors.py +18 -0
- src/config/constants.py +34 -0
- src/config/defaults.py +18 -0
- src/config/schema.py +38 -0
- src/config/settings.py +50 -0
- src/core/errors.py +62 -0
- src/knowledge/__init__.py +23 -0
- src/knowledge/add_documents.py +606 -0
- src/knowledge/config.py +65 -0
- src/knowledge/example_add_documents.py +236 -0
- src/knowledge/extract_numbered_items.py +1039 -0
- src/knowledge/initializer.py +621 -0
- src/knowledge/kb.py +22 -0
- src/knowledge/manager.py +782 -0
- src/knowledge/progress_tracker.py +182 -0
- src/knowledge/start_kb.py +535 -0
- src/logging/__init__.py +103 -0
- src/logging/adapters/__init__.py +17 -0
- src/logging/adapters/lightrag.py +184 -0
- src/logging/adapters/llamaindex.py +141 -0
- src/logging/config.py +80 -0
- src/logging/handlers/__init__.py +20 -0
- src/logging/handlers/console.py +75 -0
- src/logging/handlers/file.py +201 -0
- src/logging/handlers/websocket.py +127 -0
- src/logging/logger.py +709 -0
- src/logging/stats/__init__.py +16 -0
- src/logging/stats/llm_stats.py +179 -0
- src/services/__init__.py +56 -0
- src/services/config/__init__.py +61 -0
- src/services/config/knowledge_base_config.py +210 -0
- src/services/config/loader.py +260 -0
- src/services/config/unified_config.py +603 -0
- src/services/embedding/__init__.py +45 -0
- src/services/embedding/adapters/__init__.py +22 -0
- src/services/embedding/adapters/base.py +106 -0
- src/services/embedding/adapters/cohere.py +127 -0
- src/services/embedding/adapters/jina.py +99 -0
- src/services/embedding/adapters/ollama.py +116 -0
- src/services/embedding/adapters/openai_compatible.py +96 -0
- src/services/embedding/client.py +159 -0
- src/services/embedding/config.py +156 -0
- src/services/embedding/provider.py +119 -0
- src/services/llm/__init__.py +152 -0
- src/services/llm/capabilities.py +313 -0
- src/services/llm/client.py +302 -0
- src/services/llm/cloud_provider.py +530 -0
- src/services/llm/config.py +200 -0
- src/services/llm/error_mapping.py +103 -0
- src/services/llm/exceptions.py +152 -0
- src/services/llm/factory.py +450 -0
- src/services/llm/local_provider.py +347 -0
- src/services/llm/providers/anthropic.py +95 -0
- src/services/llm/providers/base_provider.py +93 -0
- src/services/llm/providers/open_ai.py +83 -0
- src/services/llm/registry.py +71 -0
- src/services/llm/telemetry.py +40 -0
- src/services/llm/types.py +27 -0
- src/services/llm/utils.py +333 -0
- src/services/prompt/__init__.py +25 -0
- src/services/prompt/manager.py +206 -0
- src/services/rag/__init__.py +64 -0
- src/services/rag/components/__init__.py +29 -0
- src/services/rag/components/base.py +59 -0
- src/services/rag/components/chunkers/__init__.py +18 -0
- src/services/rag/components/chunkers/base.py +34 -0
- src/services/rag/components/chunkers/fixed.py +71 -0
- src/services/rag/components/chunkers/numbered_item.py +94 -0
- src/services/rag/components/chunkers/semantic.py +97 -0
- src/services/rag/components/embedders/__init__.py +14 -0
- src/services/rag/components/embedders/base.py +32 -0
- src/services/rag/components/embedders/openai.py +63 -0
- src/services/rag/components/indexers/__init__.py +18 -0
- src/services/rag/components/indexers/base.py +35 -0
- src/services/rag/components/indexers/graph.py +172 -0
- src/services/rag/components/indexers/lightrag.py +156 -0
- src/services/rag/components/indexers/vector.py +146 -0
- src/services/rag/components/parsers/__init__.py +18 -0
- src/services/rag/components/parsers/base.py +35 -0
- src/services/rag/components/parsers/markdown.py +52 -0
- src/services/rag/components/parsers/pdf.py +115 -0
- src/services/rag/components/parsers/text.py +86 -0
- src/services/rag/components/retrievers/__init__.py +18 -0
- src/services/rag/components/retrievers/base.py +34 -0
- src/services/rag/components/retrievers/dense.py +200 -0
- src/services/rag/components/retrievers/hybrid.py +164 -0
- src/services/rag/components/retrievers/lightrag.py +169 -0
- src/services/rag/components/routing.py +286 -0
- src/services/rag/factory.py +234 -0
- src/services/rag/pipeline.py +215 -0
- src/services/rag/pipelines/__init__.py +32 -0
- src/services/rag/pipelines/academic.py +44 -0
- src/services/rag/pipelines/lightrag.py +43 -0
- src/services/rag/pipelines/llamaindex.py +313 -0
- src/services/rag/pipelines/raganything.py +384 -0
- src/services/rag/service.py +244 -0
- src/services/rag/types.py +73 -0
- src/services/search/__init__.py +284 -0
- src/services/search/base.py +87 -0
- src/services/search/consolidation.py +398 -0
- src/services/search/providers/__init__.py +128 -0
- src/services/search/providers/baidu.py +188 -0
- src/services/search/providers/exa.py +194 -0
- src/services/search/providers/jina.py +161 -0
- src/services/search/providers/perplexity.py +153 -0
- src/services/search/providers/serper.py +209 -0
- src/services/search/providers/tavily.py +161 -0
- src/services/search/types.py +114 -0
- src/services/setup/__init__.py +34 -0
- src/services/setup/init.py +285 -0
- src/services/tts/__init__.py +16 -0
- src/services/tts/config.py +99 -0
- src/tools/__init__.py +91 -0
- src/tools/code_executor.py +536 -0
- src/tools/paper_search_tool.py +171 -0
- src/tools/query_item_tool.py +310 -0
- src/tools/question/__init__.py +15 -0
- src/tools/question/exam_mimic.py +616 -0
- src/tools/question/pdf_parser.py +211 -0
- src/tools/question/question_extractor.py +397 -0
- src/tools/rag_tool.py +173 -0
- src/tools/tex_chunker.py +339 -0
- src/tools/tex_downloader.py +253 -0
- src/tools/web_search.py +71 -0
- src/utils/config_manager.py +206 -0
- src/utils/document_validator.py +168 -0
- src/utils/error_rate_tracker.py +111 -0
- src/utils/error_utils.py +82 -0
- src/utils/json_parser.py +110 -0
- src/utils/network/circuit_breaker.py +79 -0
src/knowledge/manager.py
ADDED
|
@@ -0,0 +1,782 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Knowledge Base Manager
|
|
5
|
+
|
|
6
|
+
Manages multiple knowledge bases and provides utilities for accessing them.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
import hashlib
|
|
11
|
+
import json
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
import shutil
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class KnowledgeBaseManager:
|
|
17
|
+
"""Manager for knowledge bases"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, base_dir="./data/knowledge_bases"):
|
|
20
|
+
self.base_dir = Path(base_dir)
|
|
21
|
+
self.base_dir.mkdir(parents=True, exist_ok=True)
|
|
22
|
+
|
|
23
|
+
# Config file to track knowledge bases
|
|
24
|
+
self.config_file = self.base_dir / "kb_config.json"
|
|
25
|
+
self.config = self._load_config()
|
|
26
|
+
|
|
27
|
+
def _load_config(self) -> dict:
|
|
28
|
+
"""Load knowledge base configuration (kb_config.json only stores KB list)"""
|
|
29
|
+
if self.config_file.exists():
|
|
30
|
+
with open(self.config_file, encoding="utf-8") as f:
|
|
31
|
+
config = json.load(f)
|
|
32
|
+
# Migration: remove old "default" field if present
|
|
33
|
+
if "default" in config:
|
|
34
|
+
del config["default"]
|
|
35
|
+
# Save cleaned config
|
|
36
|
+
try:
|
|
37
|
+
with open(self.config_file, "w", encoding="utf-8") as wf:
|
|
38
|
+
json.dump(config, wf, indent=2, ensure_ascii=False)
|
|
39
|
+
except Exception:
|
|
40
|
+
pass
|
|
41
|
+
return config
|
|
42
|
+
return {"knowledge_bases": {}}
|
|
43
|
+
|
|
44
|
+
def _save_config(self):
|
|
45
|
+
"""Save knowledge base configuration"""
|
|
46
|
+
with open(self.config_file, "w", encoding="utf-8") as f:
|
|
47
|
+
json.dump(self.config, f, indent=2, ensure_ascii=False)
|
|
48
|
+
|
|
49
|
+
def list_knowledge_bases(self) -> list[str]:
|
|
50
|
+
"""List all available knowledge bases from kb_config.json"""
|
|
51
|
+
kb_list = []
|
|
52
|
+
|
|
53
|
+
# Read knowledge base list from config file (this is the authoritative source)
|
|
54
|
+
config_kbs = self.config.get("knowledge_bases", {})
|
|
55
|
+
|
|
56
|
+
for kb_name in config_kbs.keys():
|
|
57
|
+
# Verify knowledge base directory exists
|
|
58
|
+
kb_dir = self.base_dir / kb_name
|
|
59
|
+
if kb_dir.exists() and kb_dir.is_dir():
|
|
60
|
+
kb_list.append(kb_name)
|
|
61
|
+
else:
|
|
62
|
+
# If in config but directory doesn't exist, log warning but don't add
|
|
63
|
+
print(
|
|
64
|
+
f"Warning: Knowledge base '{kb_name}' is in config but directory does not exist: {kb_dir}"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# If no config file or config is empty, fallback to scanning directory (backward compatibility)
|
|
68
|
+
if not kb_list and self.base_dir.exists():
|
|
69
|
+
for item in self.base_dir.iterdir():
|
|
70
|
+
if item.is_dir() and item.name != "__pycache__":
|
|
71
|
+
metadata_file = item / "metadata.json"
|
|
72
|
+
if metadata_file.exists():
|
|
73
|
+
kb_list.append(item.name)
|
|
74
|
+
|
|
75
|
+
return sorted(kb_list)
|
|
76
|
+
|
|
77
|
+
def register_knowledge_base(self, name: str, description: str = "", set_default: bool = False):
|
|
78
|
+
"""Register a knowledge base"""
|
|
79
|
+
kb_dir = self.base_dir / name
|
|
80
|
+
if not kb_dir.exists():
|
|
81
|
+
raise ValueError(f"Knowledge base directory does not exist: {kb_dir}")
|
|
82
|
+
|
|
83
|
+
if "knowledge_bases" not in self.config:
|
|
84
|
+
self.config["knowledge_bases"] = {}
|
|
85
|
+
|
|
86
|
+
self.config["knowledge_bases"][name] = {"path": name, "description": description}
|
|
87
|
+
|
|
88
|
+
# Only set default if explicitly requested
|
|
89
|
+
if set_default:
|
|
90
|
+
self.set_default(name)
|
|
91
|
+
|
|
92
|
+
self._save_config()
|
|
93
|
+
|
|
94
|
+
def get_knowledge_base_path(self, name: str | None = None) -> Path:
|
|
95
|
+
"""Get path to a knowledge base"""
|
|
96
|
+
if name is None:
|
|
97
|
+
name = self.config.get("default")
|
|
98
|
+
if name is None:
|
|
99
|
+
raise ValueError("No default knowledge base set")
|
|
100
|
+
|
|
101
|
+
kb_dir = self.base_dir / name
|
|
102
|
+
if not kb_dir.exists():
|
|
103
|
+
raise ValueError(f"Knowledge base not found: {name}")
|
|
104
|
+
|
|
105
|
+
return kb_dir
|
|
106
|
+
|
|
107
|
+
def get_rag_storage_path(self, name: str | None = None) -> Path:
|
|
108
|
+
"""Get RAG storage path for a knowledge base"""
|
|
109
|
+
kb_dir = self.get_knowledge_base_path(name)
|
|
110
|
+
rag_storage = kb_dir / "rag_storage"
|
|
111
|
+
if not rag_storage.exists():
|
|
112
|
+
raise ValueError(f"RAG storage not found for knowledge base: {name or 'default'}")
|
|
113
|
+
return rag_storage
|
|
114
|
+
|
|
115
|
+
def get_images_path(self, name: str | None = None) -> Path:
|
|
116
|
+
"""Get images path for a knowledge base"""
|
|
117
|
+
kb_dir = self.get_knowledge_base_path(name)
|
|
118
|
+
return kb_dir / "images"
|
|
119
|
+
|
|
120
|
+
def get_content_list_path(self, name: str | None = None) -> Path:
|
|
121
|
+
"""Get content list path for a knowledge base"""
|
|
122
|
+
kb_dir = self.get_knowledge_base_path(name)
|
|
123
|
+
return kb_dir / "content_list"
|
|
124
|
+
|
|
125
|
+
def get_raw_path(self, name: str | None = None) -> Path:
|
|
126
|
+
"""Get raw documents path for a knowledge base"""
|
|
127
|
+
kb_dir = self.get_knowledge_base_path(name)
|
|
128
|
+
return kb_dir / "raw"
|
|
129
|
+
|
|
130
|
+
def set_default(self, name: str):
|
|
131
|
+
"""Set default knowledge base using centralized config service."""
|
|
132
|
+
if name not in self.list_knowledge_bases():
|
|
133
|
+
raise ValueError(f"Knowledge base not found: {name}")
|
|
134
|
+
|
|
135
|
+
# Use centralized config service only (no longer stored in kb_config.json)
|
|
136
|
+
try:
|
|
137
|
+
from src.services.config import get_kb_config_service
|
|
138
|
+
|
|
139
|
+
kb_config_service = get_kb_config_service()
|
|
140
|
+
kb_config_service.set_default_kb(name)
|
|
141
|
+
except Exception as e:
|
|
142
|
+
print(f"Warning: Failed to save default to centralized config: {e}")
|
|
143
|
+
|
|
144
|
+
def get_default(self) -> str | None:
|
|
145
|
+
"""
|
|
146
|
+
Get default knowledge base name.
|
|
147
|
+
|
|
148
|
+
Priority:
|
|
149
|
+
1. Centralized config service (knowledge_base_configs.json)
|
|
150
|
+
2. First knowledge base in the list (auto-fallback)
|
|
151
|
+
"""
|
|
152
|
+
# Try centralized config first
|
|
153
|
+
try:
|
|
154
|
+
from src.services.config import get_kb_config_service
|
|
155
|
+
|
|
156
|
+
kb_config_service = get_kb_config_service()
|
|
157
|
+
default_kb = kb_config_service.get_default_kb()
|
|
158
|
+
if default_kb and default_kb in self.list_knowledge_bases():
|
|
159
|
+
return default_kb
|
|
160
|
+
except Exception:
|
|
161
|
+
pass
|
|
162
|
+
|
|
163
|
+
# Fallback to first knowledge base in sorted list
|
|
164
|
+
kb_list = self.list_knowledge_bases()
|
|
165
|
+
if kb_list:
|
|
166
|
+
return kb_list[0]
|
|
167
|
+
|
|
168
|
+
return None
|
|
169
|
+
|
|
170
|
+
def get_metadata(self, name: str | None = None) -> dict:
|
|
171
|
+
"""Get knowledge base metadata"""
|
|
172
|
+
kb_dir = self.get_knowledge_base_path(name)
|
|
173
|
+
metadata_file = kb_dir / "metadata.json"
|
|
174
|
+
|
|
175
|
+
if metadata_file.exists():
|
|
176
|
+
with open(metadata_file, encoding="utf-8") as f:
|
|
177
|
+
return json.load(f)
|
|
178
|
+
|
|
179
|
+
return {}
|
|
180
|
+
|
|
181
|
+
def get_info(self, name: str | None = None) -> dict:
|
|
182
|
+
"""Get detailed information about a knowledge base.
|
|
183
|
+
|
|
184
|
+
This method:
|
|
185
|
+
1. Gets the KB name (from parameter or default)
|
|
186
|
+
2. Reads metadata.json from the KB directory
|
|
187
|
+
3. Collects statistics about files and RAG status
|
|
188
|
+
"""
|
|
189
|
+
kb_name = name or self.get_default()
|
|
190
|
+
if kb_name is None:
|
|
191
|
+
raise ValueError("No knowledge base name provided and no default set")
|
|
192
|
+
|
|
193
|
+
# Get knowledge base path
|
|
194
|
+
kb_dir = self.base_dir / kb_name
|
|
195
|
+
if not kb_dir.exists():
|
|
196
|
+
raise ValueError(f"Knowledge base directory does not exist: {kb_dir}")
|
|
197
|
+
|
|
198
|
+
# Verify knowledge base is in config (if not, give warning but don't block)
|
|
199
|
+
if kb_name not in self.config.get("knowledge_bases", {}):
|
|
200
|
+
print(
|
|
201
|
+
f"Warning: Knowledge base '{kb_name}' is not in kb_config.json, but directory exists"
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
info = {
|
|
205
|
+
"name": kb_name,
|
|
206
|
+
"path": str(kb_dir),
|
|
207
|
+
"is_default": kb_name == self.get_default(),
|
|
208
|
+
"metadata": {},
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
# Read metadata.json (if exists)
|
|
212
|
+
metadata_file = kb_dir / "metadata.json"
|
|
213
|
+
if metadata_file.exists():
|
|
214
|
+
try:
|
|
215
|
+
with open(metadata_file, encoding="utf-8") as f:
|
|
216
|
+
info["metadata"] = json.load(f)
|
|
217
|
+
except Exception as e:
|
|
218
|
+
print(f"Warning: Failed to read metadata.json for KB '{kb_name}': {e}")
|
|
219
|
+
info["metadata"] = {}
|
|
220
|
+
else:
|
|
221
|
+
# metadata.json doesn't exist, use empty dict
|
|
222
|
+
info["metadata"] = {}
|
|
223
|
+
|
|
224
|
+
# Count files - handle errors gracefully
|
|
225
|
+
raw_dir = kb_dir / "raw"
|
|
226
|
+
images_dir = kb_dir / "images"
|
|
227
|
+
content_list_dir = kb_dir / "content_list"
|
|
228
|
+
rag_storage_dir = kb_dir / "rag_storage"
|
|
229
|
+
|
|
230
|
+
try:
|
|
231
|
+
raw_count = (
|
|
232
|
+
len([f for f in raw_dir.iterdir() if f.is_file()]) if raw_dir.exists() else 0
|
|
233
|
+
)
|
|
234
|
+
except Exception:
|
|
235
|
+
raw_count = 0
|
|
236
|
+
|
|
237
|
+
try:
|
|
238
|
+
images_count = (
|
|
239
|
+
len([f for f in images_dir.iterdir() if f.is_file()]) if images_dir.exists() else 0
|
|
240
|
+
)
|
|
241
|
+
except Exception:
|
|
242
|
+
images_count = 0
|
|
243
|
+
|
|
244
|
+
try:
|
|
245
|
+
content_lists_count = (
|
|
246
|
+
len(list(content_list_dir.glob("*.json"))) if content_list_dir.exists() else 0
|
|
247
|
+
)
|
|
248
|
+
except Exception:
|
|
249
|
+
content_lists_count = 0
|
|
250
|
+
|
|
251
|
+
metadata = info["metadata"]
|
|
252
|
+
rag_provider = metadata.get("rag_provider") if isinstance(metadata, dict) else None
|
|
253
|
+
info["statistics"] = {
|
|
254
|
+
"raw_documents": raw_count,
|
|
255
|
+
"images": images_count,
|
|
256
|
+
"content_lists": content_lists_count,
|
|
257
|
+
"rag_initialized": rag_storage_dir.exists() and rag_storage_dir.is_dir(),
|
|
258
|
+
"rag_provider": rag_provider, # Add RAG provider info
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
# Try to get RAG statistics
|
|
262
|
+
if rag_storage_dir.exists() and rag_storage_dir.is_dir():
|
|
263
|
+
try:
|
|
264
|
+
entities_file = rag_storage_dir / "kv_store_full_entities.json"
|
|
265
|
+
relations_file = rag_storage_dir / "kv_store_full_relations.json"
|
|
266
|
+
chunks_file = rag_storage_dir / "kv_store_text_chunks.json"
|
|
267
|
+
|
|
268
|
+
rag_stats = {}
|
|
269
|
+
if entities_file.exists():
|
|
270
|
+
try:
|
|
271
|
+
with open(entities_file, encoding="utf-8") as f:
|
|
272
|
+
entities_data = json.load(f)
|
|
273
|
+
rag_stats["entities"] = (
|
|
274
|
+
len(entities_data) if isinstance(entities_data, (list, dict)) else 0
|
|
275
|
+
)
|
|
276
|
+
except Exception:
|
|
277
|
+
pass
|
|
278
|
+
|
|
279
|
+
if relations_file.exists():
|
|
280
|
+
try:
|
|
281
|
+
with open(relations_file, encoding="utf-8") as f:
|
|
282
|
+
relations_data = json.load(f)
|
|
283
|
+
rag_stats["relations"] = (
|
|
284
|
+
len(relations_data)
|
|
285
|
+
if isinstance(relations_data, (list, dict))
|
|
286
|
+
else 0
|
|
287
|
+
)
|
|
288
|
+
except Exception:
|
|
289
|
+
pass
|
|
290
|
+
|
|
291
|
+
if chunks_file.exists():
|
|
292
|
+
try:
|
|
293
|
+
with open(chunks_file, encoding="utf-8") as f:
|
|
294
|
+
chunks_data = json.load(f)
|
|
295
|
+
rag_stats["chunks"] = (
|
|
296
|
+
len(chunks_data) if isinstance(chunks_data, (list, dict)) else 0
|
|
297
|
+
)
|
|
298
|
+
except Exception:
|
|
299
|
+
pass
|
|
300
|
+
|
|
301
|
+
if rag_stats:
|
|
302
|
+
statistics = info["statistics"]
|
|
303
|
+
if isinstance(statistics, dict):
|
|
304
|
+
statistics["rag"] = rag_stats
|
|
305
|
+
except Exception:
|
|
306
|
+
pass
|
|
307
|
+
|
|
308
|
+
return info
|
|
309
|
+
|
|
310
|
+
def delete_knowledge_base(self, name: str, confirm: bool = False) -> bool:
|
|
311
|
+
"""
|
|
312
|
+
Delete a knowledge base
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
name: Knowledge base name
|
|
316
|
+
confirm: If True, skip confirmation (use with caution!)
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
True if deleted successfully
|
|
320
|
+
"""
|
|
321
|
+
if name not in self.list_knowledge_bases():
|
|
322
|
+
raise ValueError(f"Knowledge base not found: {name}")
|
|
323
|
+
|
|
324
|
+
kb_dir = self.get_knowledge_base_path(name)
|
|
325
|
+
|
|
326
|
+
if not confirm:
|
|
327
|
+
# Ask for confirmation in CLI
|
|
328
|
+
print(f"⚠️ Warning: This will permanently delete the knowledge base '{name}'")
|
|
329
|
+
print(f" Path: {kb_dir}")
|
|
330
|
+
response = input("Are you sure? Type 'yes' to confirm: ")
|
|
331
|
+
if response.lower() != "yes":
|
|
332
|
+
print("Deletion cancelled.")
|
|
333
|
+
return False
|
|
334
|
+
|
|
335
|
+
# Delete the directory
|
|
336
|
+
shutil.rmtree(kb_dir)
|
|
337
|
+
|
|
338
|
+
# Remove from config
|
|
339
|
+
if name in self.config.get("knowledge_bases", {}):
|
|
340
|
+
del self.config["knowledge_bases"][name]
|
|
341
|
+
|
|
342
|
+
# Update default if this was the default
|
|
343
|
+
if self.config.get("default") == name:
|
|
344
|
+
remaining = self.list_knowledge_bases()
|
|
345
|
+
self.config["default"] = remaining[0] if remaining else None
|
|
346
|
+
|
|
347
|
+
self._save_config()
|
|
348
|
+
return True
|
|
349
|
+
|
|
350
|
+
def clean_rag_storage(self, name: str | None = None, backup: bool = True) -> bool:
|
|
351
|
+
"""
|
|
352
|
+
Clean (delete) RAG storage for a knowledge base
|
|
353
|
+
Useful when RAG data is corrupted
|
|
354
|
+
|
|
355
|
+
Args:
|
|
356
|
+
name: Knowledge base name (default if not specified)
|
|
357
|
+
backup: If True, backup the RAG storage before deleting
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
True if cleaned successfully
|
|
361
|
+
"""
|
|
362
|
+
kb_name = name or self.get_default()
|
|
363
|
+
kb_dir = self.get_knowledge_base_path(kb_name)
|
|
364
|
+
rag_storage_dir = kb_dir / "rag_storage"
|
|
365
|
+
|
|
366
|
+
if not rag_storage_dir.exists():
|
|
367
|
+
print(f"RAG storage does not exist for '{kb_name}'")
|
|
368
|
+
return False
|
|
369
|
+
|
|
370
|
+
# Backup if requested
|
|
371
|
+
if backup:
|
|
372
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
373
|
+
backup_dir = kb_dir / f"rag_storage_backup_{timestamp}"
|
|
374
|
+
shutil.copytree(rag_storage_dir, backup_dir)
|
|
375
|
+
print(f"✓ Backed up to: {backup_dir}")
|
|
376
|
+
|
|
377
|
+
# Delete RAG storage
|
|
378
|
+
shutil.rmtree(rag_storage_dir)
|
|
379
|
+
rag_storage_dir.mkdir(parents=True, exist_ok=True)
|
|
380
|
+
|
|
381
|
+
print(f"✓ RAG storage cleaned for '{kb_name}'")
|
|
382
|
+
return True
|
|
383
|
+
|
|
384
|
+
def link_folder(self, kb_name: str, folder_path: str) -> dict:
|
|
385
|
+
"""
|
|
386
|
+
Link a local folder to a knowledge base.
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
kb_name: Knowledge base name
|
|
390
|
+
folder_path: Path to local folder (supports ~, relative paths)
|
|
391
|
+
|
|
392
|
+
Returns:
|
|
393
|
+
Dict with folder info including id, path, and file count
|
|
394
|
+
|
|
395
|
+
Raises:
|
|
396
|
+
ValueError: If KB not found or folder doesn't exist
|
|
397
|
+
"""
|
|
398
|
+
if kb_name not in self.list_knowledge_bases():
|
|
399
|
+
raise ValueError(f"Knowledge base not found: {kb_name}")
|
|
400
|
+
|
|
401
|
+
# Normalize path (cross-platform: handles ~, relative paths, etc.)
|
|
402
|
+
folder = Path(folder_path).expanduser().resolve()
|
|
403
|
+
|
|
404
|
+
if not folder.exists():
|
|
405
|
+
raise ValueError(f"Folder does not exist: {folder}")
|
|
406
|
+
if not folder.is_dir():
|
|
407
|
+
raise ValueError(f"Path is not a directory: {folder}")
|
|
408
|
+
|
|
409
|
+
# Get supported files in folder
|
|
410
|
+
supported_extensions = {".pdf", ".docx", ".doc", ".txt", ".md", ".markdown"}
|
|
411
|
+
files: list[Path] = []
|
|
412
|
+
for ext in supported_extensions:
|
|
413
|
+
files.extend(folder.glob(f"**/*{ext}"))
|
|
414
|
+
|
|
415
|
+
# Generate folder ID
|
|
416
|
+
import hashlib
|
|
417
|
+
|
|
418
|
+
folder_id = hashlib.md5( # noqa: S324
|
|
419
|
+
str(folder).encode(), usedforsecurity=False
|
|
420
|
+
).hexdigest()[:8]
|
|
421
|
+
|
|
422
|
+
# Load existing linked folders from metadata
|
|
423
|
+
kb_dir = self.base_dir / kb_name
|
|
424
|
+
metadata_file = kb_dir / "metadata.json"
|
|
425
|
+
metadata: dict = {}
|
|
426
|
+
|
|
427
|
+
if metadata_file.exists():
|
|
428
|
+
try:
|
|
429
|
+
with open(metadata_file, encoding="utf-8") as fp:
|
|
430
|
+
metadata = json.load(fp)
|
|
431
|
+
except Exception:
|
|
432
|
+
metadata = {}
|
|
433
|
+
|
|
434
|
+
if "linked_folders" not in metadata:
|
|
435
|
+
metadata["linked_folders"] = []
|
|
436
|
+
|
|
437
|
+
# Check if already linked
|
|
438
|
+
existing_ids = [item["id"] for item in metadata.get("linked_folders", [])]
|
|
439
|
+
if folder_id in existing_ids:
|
|
440
|
+
# If already linked, treat as success (idempotent)
|
|
441
|
+
# Find and return existing info
|
|
442
|
+
for item in metadata.get("linked_folders", []):
|
|
443
|
+
if item["id"] == folder_id:
|
|
444
|
+
return item
|
|
445
|
+
|
|
446
|
+
# Add folder info
|
|
447
|
+
folder_info = {
|
|
448
|
+
"id": folder_id,
|
|
449
|
+
"path": str(folder),
|
|
450
|
+
"added_at": datetime.now().isoformat(),
|
|
451
|
+
"file_count": len(files),
|
|
452
|
+
}
|
|
453
|
+
metadata["linked_folders"].append(folder_info)
|
|
454
|
+
|
|
455
|
+
# Save metadata
|
|
456
|
+
with open(metadata_file, "w", encoding="utf-8") as fp:
|
|
457
|
+
json.dump(metadata, fp, indent=2, ensure_ascii=False)
|
|
458
|
+
|
|
459
|
+
return folder_info
|
|
460
|
+
|
|
461
|
+
def get_linked_folders(self, kb_name: str) -> list[dict]:
|
|
462
|
+
"""
|
|
463
|
+
Get list of linked folders for a knowledge base.
|
|
464
|
+
|
|
465
|
+
Args:
|
|
466
|
+
kb_name: Knowledge base name
|
|
467
|
+
|
|
468
|
+
Returns:
|
|
469
|
+
List of linked folder info dicts
|
|
470
|
+
"""
|
|
471
|
+
if kb_name not in self.list_knowledge_bases():
|
|
472
|
+
raise ValueError(f"Knowledge base not found: {kb_name}")
|
|
473
|
+
|
|
474
|
+
kb_dir = self.base_dir / kb_name
|
|
475
|
+
metadata_file = kb_dir / "metadata.json"
|
|
476
|
+
|
|
477
|
+
if not metadata_file.exists():
|
|
478
|
+
return []
|
|
479
|
+
|
|
480
|
+
try:
|
|
481
|
+
with open(metadata_file, encoding="utf-8") as f:
|
|
482
|
+
metadata = json.load(f)
|
|
483
|
+
return metadata.get("linked_folders", [])
|
|
484
|
+
except Exception:
|
|
485
|
+
return []
|
|
486
|
+
|
|
487
|
+
def unlink_folder(self, kb_name: str, folder_id: str) -> bool:
|
|
488
|
+
"""
|
|
489
|
+
Unlink a folder from a knowledge base.
|
|
490
|
+
|
|
491
|
+
Args:
|
|
492
|
+
kb_name: Knowledge base name
|
|
493
|
+
folder_id: Folder ID to unlink
|
|
494
|
+
|
|
495
|
+
Returns:
|
|
496
|
+
True if unlinked successfully, False if not found
|
|
497
|
+
"""
|
|
498
|
+
if kb_name not in self.list_knowledge_bases():
|
|
499
|
+
raise ValueError(f"Knowledge base not found: {kb_name}")
|
|
500
|
+
|
|
501
|
+
kb_dir = self.base_dir / kb_name
|
|
502
|
+
metadata_file = kb_dir / "metadata.json"
|
|
503
|
+
|
|
504
|
+
if not metadata_file.exists():
|
|
505
|
+
return False
|
|
506
|
+
|
|
507
|
+
try:
|
|
508
|
+
with open(metadata_file, encoding="utf-8") as f:
|
|
509
|
+
metadata = json.load(f)
|
|
510
|
+
except Exception:
|
|
511
|
+
return False
|
|
512
|
+
|
|
513
|
+
linked = metadata.get("linked_folders", [])
|
|
514
|
+
new_linked = [f for f in linked if f["id"] != folder_id]
|
|
515
|
+
|
|
516
|
+
if len(new_linked) == len(linked):
|
|
517
|
+
return False # Not found
|
|
518
|
+
|
|
519
|
+
metadata["linked_folders"] = new_linked
|
|
520
|
+
|
|
521
|
+
with open(metadata_file, "w", encoding="utf-8") as f:
|
|
522
|
+
json.dump(metadata, f, indent=2, ensure_ascii=False)
|
|
523
|
+
|
|
524
|
+
return True
|
|
525
|
+
|
|
526
|
+
def scan_linked_folder(self, folder_path: str) -> list[str]:
|
|
527
|
+
"""
|
|
528
|
+
Scan a linked folder and return list of supported file paths.
|
|
529
|
+
|
|
530
|
+
Args:
|
|
531
|
+
folder_path: Path to folder
|
|
532
|
+
|
|
533
|
+
Returns:
|
|
534
|
+
List of file paths (as strings)
|
|
535
|
+
"""
|
|
536
|
+
folder = Path(folder_path).expanduser().resolve()
|
|
537
|
+
|
|
538
|
+
if not folder.exists() or not folder.is_dir():
|
|
539
|
+
return []
|
|
540
|
+
|
|
541
|
+
supported_extensions = {".pdf", ".docx", ".doc", ".txt", ".md", ".markdown"}
|
|
542
|
+
files = []
|
|
543
|
+
|
|
544
|
+
for ext in supported_extensions:
|
|
545
|
+
for file_path in folder.glob(f"**/*{ext}"):
|
|
546
|
+
files.append(str(file_path))
|
|
547
|
+
|
|
548
|
+
return sorted(files)
|
|
549
|
+
|
|
550
|
+
def detect_folder_changes(self, kb_name: str, folder_id: str) -> dict:
|
|
551
|
+
"""
|
|
552
|
+
Detect new and modified files in a linked folder since last sync.
|
|
553
|
+
|
|
554
|
+
This enables automatic sync of changes from local folders that may
|
|
555
|
+
be synced with cloud services like SharePoint, Google Drive, etc.
|
|
556
|
+
|
|
557
|
+
Args:
|
|
558
|
+
kb_name: Knowledge base name
|
|
559
|
+
folder_id: Folder ID to check for changes
|
|
560
|
+
|
|
561
|
+
Returns:
|
|
562
|
+
Dict with 'new_files', 'modified_files', and 'has_changes' keys
|
|
563
|
+
"""
|
|
564
|
+
if kb_name not in self.list_knowledge_bases():
|
|
565
|
+
raise ValueError(f"Knowledge base not found: {kb_name}")
|
|
566
|
+
|
|
567
|
+
# Get folder info
|
|
568
|
+
folders = self.get_linked_folders(kb_name)
|
|
569
|
+
folder_info = next((f for f in folders if f["id"] == folder_id), None)
|
|
570
|
+
|
|
571
|
+
if not folder_info:
|
|
572
|
+
raise ValueError(f"Linked folder not found: {folder_id}")
|
|
573
|
+
|
|
574
|
+
folder_path = Path(folder_info["path"]).expanduser().resolve()
|
|
575
|
+
last_sync = folder_info.get("last_sync")
|
|
576
|
+
synced_files = folder_info.get("synced_files", {})
|
|
577
|
+
|
|
578
|
+
# Parse last sync timestamp
|
|
579
|
+
last_sync_time = None
|
|
580
|
+
if last_sync:
|
|
581
|
+
try:
|
|
582
|
+
last_sync_time = datetime.fromisoformat(last_sync)
|
|
583
|
+
except Exception:
|
|
584
|
+
pass
|
|
585
|
+
|
|
586
|
+
# Scan current files
|
|
587
|
+
supported_extensions = {".pdf", ".docx", ".doc", ".txt", ".md", ".markdown"}
|
|
588
|
+
new_files = []
|
|
589
|
+
modified_files = []
|
|
590
|
+
|
|
591
|
+
for ext in supported_extensions:
|
|
592
|
+
for file_path in folder_path.glob(f"**/*{ext}"):
|
|
593
|
+
file_str = str(file_path)
|
|
594
|
+
file_mtime = datetime.fromtimestamp(file_path.stat().st_mtime)
|
|
595
|
+
|
|
596
|
+
if file_str in synced_files:
|
|
597
|
+
# Check if modified since last sync
|
|
598
|
+
prev_mtime_str = synced_files[file_str]
|
|
599
|
+
try:
|
|
600
|
+
prev_mtime = datetime.fromisoformat(prev_mtime_str)
|
|
601
|
+
if file_mtime > prev_mtime:
|
|
602
|
+
modified_files.append(file_str)
|
|
603
|
+
except Exception:
|
|
604
|
+
modified_files.append(file_str)
|
|
605
|
+
else:
|
|
606
|
+
# New file (not in synced files)
|
|
607
|
+
new_files.append(file_str)
|
|
608
|
+
|
|
609
|
+
return {
|
|
610
|
+
"new_files": sorted(new_files),
|
|
611
|
+
"modified_files": sorted(modified_files),
|
|
612
|
+
"has_changes": len(new_files) > 0 or len(modified_files) > 0,
|
|
613
|
+
"new_count": len(new_files),
|
|
614
|
+
"modified_count": len(modified_files),
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
def update_folder_sync_state(self, kb_name: str, folder_id: str, synced_files: list[str]):
|
|
618
|
+
"""
|
|
619
|
+
Update the sync state for a linked folder after successful sync.
|
|
620
|
+
|
|
621
|
+
Records which files were synced and their modification times,
|
|
622
|
+
enabling future change detection.
|
|
623
|
+
|
|
624
|
+
Args:
|
|
625
|
+
kb_name: Knowledge base name
|
|
626
|
+
folder_id: Folder ID
|
|
627
|
+
synced_files: List of file paths that were successfully synced
|
|
628
|
+
"""
|
|
629
|
+
if kb_name not in self.list_knowledge_bases():
|
|
630
|
+
raise ValueError(f"Knowledge base not found: {kb_name}")
|
|
631
|
+
|
|
632
|
+
kb_dir = self.base_dir / kb_name
|
|
633
|
+
metadata_file = kb_dir / "metadata.json"
|
|
634
|
+
|
|
635
|
+
if not metadata_file.exists():
|
|
636
|
+
return
|
|
637
|
+
|
|
638
|
+
try:
|
|
639
|
+
with open(metadata_file, encoding="utf-8") as f:
|
|
640
|
+
metadata = json.load(f)
|
|
641
|
+
except Exception:
|
|
642
|
+
return
|
|
643
|
+
|
|
644
|
+
linked = metadata.get("linked_folders", [])
|
|
645
|
+
|
|
646
|
+
for folder in linked:
|
|
647
|
+
if folder["id"] == folder_id:
|
|
648
|
+
# Record sync timestamp
|
|
649
|
+
folder["last_sync"] = datetime.now().isoformat()
|
|
650
|
+
|
|
651
|
+
# Record file modification times
|
|
652
|
+
file_states = folder.get("synced_files", {})
|
|
653
|
+
for file_path in synced_files:
|
|
654
|
+
try:
|
|
655
|
+
p = Path(file_path)
|
|
656
|
+
if p.exists():
|
|
657
|
+
mtime = datetime.fromtimestamp(p.stat().st_mtime)
|
|
658
|
+
file_states[file_path] = mtime.isoformat()
|
|
659
|
+
except Exception:
|
|
660
|
+
pass
|
|
661
|
+
|
|
662
|
+
folder["synced_files"] = file_states
|
|
663
|
+
folder["file_count"] = len(file_states)
|
|
664
|
+
break
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
def main():
|
|
668
|
+
"""Command-line interface for knowledge base manager"""
|
|
669
|
+
import argparse
|
|
670
|
+
|
|
671
|
+
parser = argparse.ArgumentParser(description="Knowledge Base Manager")
|
|
672
|
+
parser.add_argument(
|
|
673
|
+
"--base-dir", default="./knowledge_bases", help="Base directory for knowledge bases"
|
|
674
|
+
)
|
|
675
|
+
|
|
676
|
+
subparsers = parser.add_subparsers(dest="command", help="Commands")
|
|
677
|
+
|
|
678
|
+
# List command
|
|
679
|
+
subparsers.add_parser("list", help="List all knowledge bases")
|
|
680
|
+
|
|
681
|
+
# Info command
|
|
682
|
+
info_parser = subparsers.add_parser("info", help="Show knowledge base information")
|
|
683
|
+
info_parser.add_argument(
|
|
684
|
+
"name", nargs="?", help="Knowledge base name (default if not specified)"
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
# Set default command
|
|
688
|
+
default_parser = subparsers.add_parser("set-default", help="Set default knowledge base")
|
|
689
|
+
default_parser.add_argument("name", help="Knowledge base name")
|
|
690
|
+
|
|
691
|
+
# Delete command
|
|
692
|
+
delete_parser = subparsers.add_parser("delete", help="Delete a knowledge base")
|
|
693
|
+
delete_parser.add_argument("name", help="Knowledge base name")
|
|
694
|
+
delete_parser.add_argument("--force", action="store_true", help="Skip confirmation")
|
|
695
|
+
|
|
696
|
+
# Clean RAG command
|
|
697
|
+
clean_parser = subparsers.add_parser(
|
|
698
|
+
"clean-rag", help="Clean RAG storage (useful for corrupted data)"
|
|
699
|
+
)
|
|
700
|
+
clean_parser.add_argument(
|
|
701
|
+
"name", nargs="?", help="Knowledge base name (default if not specified)"
|
|
702
|
+
)
|
|
703
|
+
clean_parser.add_argument(
|
|
704
|
+
"--no-backup", action="store_true", help="Don't backup before cleaning"
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
args = parser.parse_args()
|
|
708
|
+
|
|
709
|
+
manager = KnowledgeBaseManager(args.base_dir)
|
|
710
|
+
|
|
711
|
+
if args.command == "list":
|
|
712
|
+
kb_list = manager.list_knowledge_bases()
|
|
713
|
+
default_kb = manager.get_default()
|
|
714
|
+
|
|
715
|
+
print("\nAvailable Knowledge Bases:")
|
|
716
|
+
print("=" * 60)
|
|
717
|
+
if not kb_list:
|
|
718
|
+
print("No knowledge bases found")
|
|
719
|
+
else:
|
|
720
|
+
for kb_name in kb_list:
|
|
721
|
+
default_marker = " (default)" if kb_name == default_kb else ""
|
|
722
|
+
print(f" • {kb_name}{default_marker}")
|
|
723
|
+
print()
|
|
724
|
+
|
|
725
|
+
elif args.command == "info":
|
|
726
|
+
try:
|
|
727
|
+
info = manager.get_info(args.name)
|
|
728
|
+
|
|
729
|
+
print("\nKnowledge Base Information:")
|
|
730
|
+
print("=" * 60)
|
|
731
|
+
print(f"Name: {info['name']}")
|
|
732
|
+
print(f"Path: {info['path']}")
|
|
733
|
+
print(f"Default: {'Yes' if info['is_default'] else 'No'}")
|
|
734
|
+
|
|
735
|
+
if info.get("metadata"):
|
|
736
|
+
print("\nMetadata:")
|
|
737
|
+
for key, value in info["metadata"].items():
|
|
738
|
+
print(f" {key}: {value}")
|
|
739
|
+
|
|
740
|
+
print("\nStatistics:")
|
|
741
|
+
stats = info["statistics"]
|
|
742
|
+
print(f" Raw documents: {stats['raw_documents']}")
|
|
743
|
+
print(f" Images: {stats['images']}")
|
|
744
|
+
print(f" Content lists: {stats['content_lists']}")
|
|
745
|
+
print(f" RAG initialized: {'Yes' if stats['rag_initialized'] else 'No'}")
|
|
746
|
+
|
|
747
|
+
if "rag" in stats:
|
|
748
|
+
print("\n RAG Statistics:")
|
|
749
|
+
for key, value in stats["rag"].items():
|
|
750
|
+
print(f" {key}: {value}")
|
|
751
|
+
|
|
752
|
+
print()
|
|
753
|
+
except Exception as e:
|
|
754
|
+
print(f"Error: {e!s}")
|
|
755
|
+
|
|
756
|
+
elif args.command == "set-default":
|
|
757
|
+
try:
|
|
758
|
+
manager.set_default(args.name)
|
|
759
|
+
print(f"✓ Set '{args.name}' as default knowledge base")
|
|
760
|
+
except Exception as e:
|
|
761
|
+
print(f"Error: {e!s}")
|
|
762
|
+
|
|
763
|
+
elif args.command == "delete":
|
|
764
|
+
try:
|
|
765
|
+
success = manager.delete_knowledge_base(args.name, confirm=args.force)
|
|
766
|
+
if success:
|
|
767
|
+
print(f"✓ Deleted knowledge base '{args.name}'")
|
|
768
|
+
except Exception as e:
|
|
769
|
+
print(f"Error: {e!s}")
|
|
770
|
+
|
|
771
|
+
elif args.command == "clean-rag":
|
|
772
|
+
try:
|
|
773
|
+
manager.clean_rag_storage(args.name, backup=not args.no_backup)
|
|
774
|
+
except Exception as e:
|
|
775
|
+
print(f"Error: {e!s}")
|
|
776
|
+
|
|
777
|
+
else:
|
|
778
|
+
parser.print_help()
|
|
779
|
+
|
|
780
|
+
|
|
781
|
+
if __name__ == "__main__":
|
|
782
|
+
main()
|