realtimex-deeptutor 0.5.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. realtimex_deeptutor/__init__.py +67 -0
  2. realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
  3. realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
  4. realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
  5. realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
  6. realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
  7. realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
  8. src/__init__.py +40 -0
  9. src/agents/__init__.py +24 -0
  10. src/agents/base_agent.py +657 -0
  11. src/agents/chat/__init__.py +24 -0
  12. src/agents/chat/chat_agent.py +435 -0
  13. src/agents/chat/prompts/en/chat_agent.yaml +35 -0
  14. src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
  15. src/agents/chat/session_manager.py +311 -0
  16. src/agents/co_writer/__init__.py +0 -0
  17. src/agents/co_writer/edit_agent.py +260 -0
  18. src/agents/co_writer/narrator_agent.py +423 -0
  19. src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
  20. src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
  21. src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
  22. src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
  23. src/agents/guide/__init__.py +16 -0
  24. src/agents/guide/agents/__init__.py +11 -0
  25. src/agents/guide/agents/chat_agent.py +104 -0
  26. src/agents/guide/agents/interactive_agent.py +223 -0
  27. src/agents/guide/agents/locate_agent.py +149 -0
  28. src/agents/guide/agents/summary_agent.py +150 -0
  29. src/agents/guide/guide_manager.py +500 -0
  30. src/agents/guide/prompts/en/chat_agent.yaml +41 -0
  31. src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
  32. src/agents/guide/prompts/en/locate_agent.yaml +68 -0
  33. src/agents/guide/prompts/en/summary_agent.yaml +157 -0
  34. src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
  35. src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
  36. src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
  37. src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
  38. src/agents/ideagen/__init__.py +12 -0
  39. src/agents/ideagen/idea_generation_workflow.py +426 -0
  40. src/agents/ideagen/material_organizer_agent.py +173 -0
  41. src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
  42. src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
  43. src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
  44. src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
  45. src/agents/question/__init__.py +24 -0
  46. src/agents/question/agents/__init__.py +18 -0
  47. src/agents/question/agents/generate_agent.py +381 -0
  48. src/agents/question/agents/relevance_analyzer.py +207 -0
  49. src/agents/question/agents/retrieve_agent.py +239 -0
  50. src/agents/question/coordinator.py +718 -0
  51. src/agents/question/example.py +109 -0
  52. src/agents/question/prompts/en/coordinator.yaml +75 -0
  53. src/agents/question/prompts/en/generate_agent.yaml +77 -0
  54. src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
  55. src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
  56. src/agents/question/prompts/zh/coordinator.yaml +75 -0
  57. src/agents/question/prompts/zh/generate_agent.yaml +77 -0
  58. src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
  59. src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
  60. src/agents/research/agents/__init__.py +23 -0
  61. src/agents/research/agents/decompose_agent.py +507 -0
  62. src/agents/research/agents/manager_agent.py +228 -0
  63. src/agents/research/agents/note_agent.py +180 -0
  64. src/agents/research/agents/rephrase_agent.py +263 -0
  65. src/agents/research/agents/reporting_agent.py +1333 -0
  66. src/agents/research/agents/research_agent.py +714 -0
  67. src/agents/research/data_structures.py +451 -0
  68. src/agents/research/main.py +188 -0
  69. src/agents/research/prompts/en/decompose_agent.yaml +89 -0
  70. src/agents/research/prompts/en/manager_agent.yaml +24 -0
  71. src/agents/research/prompts/en/note_agent.yaml +121 -0
  72. src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
  73. src/agents/research/prompts/en/reporting_agent.yaml +380 -0
  74. src/agents/research/prompts/en/research_agent.yaml +173 -0
  75. src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
  76. src/agents/research/prompts/zh/manager_agent.yaml +24 -0
  77. src/agents/research/prompts/zh/note_agent.yaml +121 -0
  78. src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
  79. src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
  80. src/agents/research/prompts/zh/research_agent.yaml +173 -0
  81. src/agents/research/research_pipeline.py +1309 -0
  82. src/agents/research/utils/__init__.py +60 -0
  83. src/agents/research/utils/citation_manager.py +799 -0
  84. src/agents/research/utils/json_utils.py +98 -0
  85. src/agents/research/utils/token_tracker.py +297 -0
  86. src/agents/solve/__init__.py +80 -0
  87. src/agents/solve/analysis_loop/__init__.py +14 -0
  88. src/agents/solve/analysis_loop/investigate_agent.py +414 -0
  89. src/agents/solve/analysis_loop/note_agent.py +190 -0
  90. src/agents/solve/main_solver.py +862 -0
  91. src/agents/solve/memory/__init__.py +34 -0
  92. src/agents/solve/memory/citation_memory.py +353 -0
  93. src/agents/solve/memory/investigate_memory.py +226 -0
  94. src/agents/solve/memory/solve_memory.py +340 -0
  95. src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
  96. src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
  97. src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
  98. src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
  99. src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
  100. src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
  101. src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
  102. src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
  103. src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
  104. src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
  105. src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
  106. src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
  107. src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
  108. src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
  109. src/agents/solve/solve_loop/__init__.py +22 -0
  110. src/agents/solve/solve_loop/citation_manager.py +74 -0
  111. src/agents/solve/solve_loop/manager_agent.py +274 -0
  112. src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
  113. src/agents/solve/solve_loop/response_agent.py +301 -0
  114. src/agents/solve/solve_loop/solve_agent.py +325 -0
  115. src/agents/solve/solve_loop/tool_agent.py +470 -0
  116. src/agents/solve/utils/__init__.py +64 -0
  117. src/agents/solve/utils/config_validator.py +313 -0
  118. src/agents/solve/utils/display_manager.py +223 -0
  119. src/agents/solve/utils/error_handler.py +363 -0
  120. src/agents/solve/utils/json_utils.py +98 -0
  121. src/agents/solve/utils/performance_monitor.py +407 -0
  122. src/agents/solve/utils/token_tracker.py +541 -0
  123. src/api/__init__.py +0 -0
  124. src/api/main.py +240 -0
  125. src/api/routers/__init__.py +1 -0
  126. src/api/routers/agent_config.py +69 -0
  127. src/api/routers/chat.py +296 -0
  128. src/api/routers/co_writer.py +337 -0
  129. src/api/routers/config.py +627 -0
  130. src/api/routers/dashboard.py +18 -0
  131. src/api/routers/guide.py +337 -0
  132. src/api/routers/ideagen.py +436 -0
  133. src/api/routers/knowledge.py +821 -0
  134. src/api/routers/notebook.py +247 -0
  135. src/api/routers/question.py +537 -0
  136. src/api/routers/research.py +394 -0
  137. src/api/routers/settings.py +164 -0
  138. src/api/routers/solve.py +305 -0
  139. src/api/routers/system.py +252 -0
  140. src/api/run_server.py +61 -0
  141. src/api/utils/history.py +172 -0
  142. src/api/utils/log_interceptor.py +21 -0
  143. src/api/utils/notebook_manager.py +415 -0
  144. src/api/utils/progress_broadcaster.py +72 -0
  145. src/api/utils/task_id_manager.py +100 -0
  146. src/config/__init__.py +0 -0
  147. src/config/accessors.py +18 -0
  148. src/config/constants.py +34 -0
  149. src/config/defaults.py +18 -0
  150. src/config/schema.py +38 -0
  151. src/config/settings.py +50 -0
  152. src/core/errors.py +62 -0
  153. src/knowledge/__init__.py +23 -0
  154. src/knowledge/add_documents.py +606 -0
  155. src/knowledge/config.py +65 -0
  156. src/knowledge/example_add_documents.py +236 -0
  157. src/knowledge/extract_numbered_items.py +1039 -0
  158. src/knowledge/initializer.py +621 -0
  159. src/knowledge/kb.py +22 -0
  160. src/knowledge/manager.py +782 -0
  161. src/knowledge/progress_tracker.py +182 -0
  162. src/knowledge/start_kb.py +535 -0
  163. src/logging/__init__.py +103 -0
  164. src/logging/adapters/__init__.py +17 -0
  165. src/logging/adapters/lightrag.py +184 -0
  166. src/logging/adapters/llamaindex.py +141 -0
  167. src/logging/config.py +80 -0
  168. src/logging/handlers/__init__.py +20 -0
  169. src/logging/handlers/console.py +75 -0
  170. src/logging/handlers/file.py +201 -0
  171. src/logging/handlers/websocket.py +127 -0
  172. src/logging/logger.py +709 -0
  173. src/logging/stats/__init__.py +16 -0
  174. src/logging/stats/llm_stats.py +179 -0
  175. src/services/__init__.py +56 -0
  176. src/services/config/__init__.py +61 -0
  177. src/services/config/knowledge_base_config.py +210 -0
  178. src/services/config/loader.py +260 -0
  179. src/services/config/unified_config.py +603 -0
  180. src/services/embedding/__init__.py +45 -0
  181. src/services/embedding/adapters/__init__.py +22 -0
  182. src/services/embedding/adapters/base.py +106 -0
  183. src/services/embedding/adapters/cohere.py +127 -0
  184. src/services/embedding/adapters/jina.py +99 -0
  185. src/services/embedding/adapters/ollama.py +116 -0
  186. src/services/embedding/adapters/openai_compatible.py +96 -0
  187. src/services/embedding/client.py +159 -0
  188. src/services/embedding/config.py +156 -0
  189. src/services/embedding/provider.py +119 -0
  190. src/services/llm/__init__.py +152 -0
  191. src/services/llm/capabilities.py +313 -0
  192. src/services/llm/client.py +302 -0
  193. src/services/llm/cloud_provider.py +530 -0
  194. src/services/llm/config.py +200 -0
  195. src/services/llm/error_mapping.py +103 -0
  196. src/services/llm/exceptions.py +152 -0
  197. src/services/llm/factory.py +450 -0
  198. src/services/llm/local_provider.py +347 -0
  199. src/services/llm/providers/anthropic.py +95 -0
  200. src/services/llm/providers/base_provider.py +93 -0
  201. src/services/llm/providers/open_ai.py +83 -0
  202. src/services/llm/registry.py +71 -0
  203. src/services/llm/telemetry.py +40 -0
  204. src/services/llm/types.py +27 -0
  205. src/services/llm/utils.py +333 -0
  206. src/services/prompt/__init__.py +25 -0
  207. src/services/prompt/manager.py +206 -0
  208. src/services/rag/__init__.py +64 -0
  209. src/services/rag/components/__init__.py +29 -0
  210. src/services/rag/components/base.py +59 -0
  211. src/services/rag/components/chunkers/__init__.py +18 -0
  212. src/services/rag/components/chunkers/base.py +34 -0
  213. src/services/rag/components/chunkers/fixed.py +71 -0
  214. src/services/rag/components/chunkers/numbered_item.py +94 -0
  215. src/services/rag/components/chunkers/semantic.py +97 -0
  216. src/services/rag/components/embedders/__init__.py +14 -0
  217. src/services/rag/components/embedders/base.py +32 -0
  218. src/services/rag/components/embedders/openai.py +63 -0
  219. src/services/rag/components/indexers/__init__.py +18 -0
  220. src/services/rag/components/indexers/base.py +35 -0
  221. src/services/rag/components/indexers/graph.py +172 -0
  222. src/services/rag/components/indexers/lightrag.py +156 -0
  223. src/services/rag/components/indexers/vector.py +146 -0
  224. src/services/rag/components/parsers/__init__.py +18 -0
  225. src/services/rag/components/parsers/base.py +35 -0
  226. src/services/rag/components/parsers/markdown.py +52 -0
  227. src/services/rag/components/parsers/pdf.py +115 -0
  228. src/services/rag/components/parsers/text.py +86 -0
  229. src/services/rag/components/retrievers/__init__.py +18 -0
  230. src/services/rag/components/retrievers/base.py +34 -0
  231. src/services/rag/components/retrievers/dense.py +200 -0
  232. src/services/rag/components/retrievers/hybrid.py +164 -0
  233. src/services/rag/components/retrievers/lightrag.py +169 -0
  234. src/services/rag/components/routing.py +286 -0
  235. src/services/rag/factory.py +234 -0
  236. src/services/rag/pipeline.py +215 -0
  237. src/services/rag/pipelines/__init__.py +32 -0
  238. src/services/rag/pipelines/academic.py +44 -0
  239. src/services/rag/pipelines/lightrag.py +43 -0
  240. src/services/rag/pipelines/llamaindex.py +313 -0
  241. src/services/rag/pipelines/raganything.py +384 -0
  242. src/services/rag/service.py +244 -0
  243. src/services/rag/types.py +73 -0
  244. src/services/search/__init__.py +284 -0
  245. src/services/search/base.py +87 -0
  246. src/services/search/consolidation.py +398 -0
  247. src/services/search/providers/__init__.py +128 -0
  248. src/services/search/providers/baidu.py +188 -0
  249. src/services/search/providers/exa.py +194 -0
  250. src/services/search/providers/jina.py +161 -0
  251. src/services/search/providers/perplexity.py +153 -0
  252. src/services/search/providers/serper.py +209 -0
  253. src/services/search/providers/tavily.py +161 -0
  254. src/services/search/types.py +114 -0
  255. src/services/setup/__init__.py +34 -0
  256. src/services/setup/init.py +285 -0
  257. src/services/tts/__init__.py +16 -0
  258. src/services/tts/config.py +99 -0
  259. src/tools/__init__.py +91 -0
  260. src/tools/code_executor.py +536 -0
  261. src/tools/paper_search_tool.py +171 -0
  262. src/tools/query_item_tool.py +310 -0
  263. src/tools/question/__init__.py +15 -0
  264. src/tools/question/exam_mimic.py +616 -0
  265. src/tools/question/pdf_parser.py +211 -0
  266. src/tools/question/question_extractor.py +397 -0
  267. src/tools/rag_tool.py +173 -0
  268. src/tools/tex_chunker.py +339 -0
  269. src/tools/tex_downloader.py +253 -0
  270. src/tools/web_search.py +71 -0
  271. src/utils/config_manager.py +206 -0
  272. src/utils/document_validator.py +168 -0
  273. src/utils/error_rate_tracker.py +111 -0
  274. src/utils/error_utils.py +82 -0
  275. src/utils/json_parser.py +110 -0
  276. src/utils/network/circuit_breaker.py +79 -0
@@ -0,0 +1,535 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Knowledge Base Management Startup Script - Unified Entry Point
5
+ Provides knowledge base initialization, management, querying, and other functions
6
+ """
7
+
8
+ import argparse
9
+ import asyncio
10
+ from pathlib import Path
11
+ import sys
12
+
13
+ # Set paths - compatible with both direct execution and module import
14
+ try:
15
+ from .config import KNOWLEDGE_BASES_DIR, get_env_config, setup_paths
16
+
17
+ setup_paths()
18
+ from .extract_numbered_items import process_content_list
19
+ from .initializer import KnowledgeBaseInitializer
20
+ from .manager import KnowledgeBaseManager
21
+ except ImportError:
22
+ # If relative import fails, means this file is run directly
23
+ # Add parent directory to path
24
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
25
+ from src.knowledge.config import KNOWLEDGE_BASES_DIR, get_env_config, setup_paths
26
+
27
+ setup_paths()
28
+ from src.knowledge.extract_numbered_items import process_content_list
29
+ from src.knowledge.initializer import KnowledgeBaseInitializer
30
+ from src.knowledge.manager import KnowledgeBaseManager
31
+
32
+
33
+ def list_knowledge_bases():
34
+ """List all knowledge bases"""
35
+ manager = KnowledgeBaseManager(str(KNOWLEDGE_BASES_DIR))
36
+ kb_list = manager.list_knowledge_bases()
37
+ default_kb = manager.get_default()
38
+
39
+ print("\n" + "=" * 60)
40
+ print("📚 Available Knowledge Bases")
41
+ print("=" * 60)
42
+
43
+ if not kb_list:
44
+ print(" ⚠️ No knowledge bases yet")
45
+ print("\nTip: Use 'init' command to create a new knowledge base")
46
+ else:
47
+ for kb_name in kb_list:
48
+ default_marker = " ★(default)" if kb_name == default_kb else ""
49
+ print(f" • {kb_name}{default_marker}")
50
+
51
+ # Display statistics
52
+ try:
53
+ info = manager.get_info(kb_name)
54
+ stats = info.get("statistics", {})
55
+ print(f" - Documents: {stats.get('raw_documents', 0)} files")
56
+ print(f" - Images: {stats.get('images', 0)} files")
57
+ print(
58
+ f" - RAG: {'Initialized' if stats.get('rag_initialized') else 'Not initialized'}"
59
+ )
60
+ except:
61
+ pass
62
+
63
+ print("=" * 60 + "\n")
64
+
65
+
66
+ def show_kb_info(kb_name=None):
67
+ """Display detailed knowledge base information"""
68
+ manager = KnowledgeBaseManager(str(KNOWLEDGE_BASES_DIR))
69
+
70
+ try:
71
+ info = manager.get_info(kb_name)
72
+
73
+ print("\n" + "=" * 60)
74
+ print(f"📖 Knowledge Base Info: {info['name']}")
75
+ print("=" * 60)
76
+ print(f"Path: {info['path']}")
77
+ print(f"Default: {'Yes' if info['is_default'] else 'No'}")
78
+
79
+ if info.get("metadata"):
80
+ print("\n[Metadata]")
81
+ for key, value in info["metadata"].items():
82
+ print(f" {key}: {value}")
83
+
84
+ print("\n[Statistics]")
85
+ stats = info["statistics"]
86
+ print(f" Raw Documents: {stats['raw_documents']} files")
87
+ print(f" Extracted Images: {stats['images']} files")
88
+ print(f" Content Lists: {stats['content_lists']} files")
89
+ print(f" RAG Status: {'Initialized' if stats['rag_initialized'] else 'Not initialized'}")
90
+
91
+ if "rag" in stats:
92
+ print("\n[RAG Statistics]")
93
+ for key, value in stats["rag"].items():
94
+ print(f" {key}: {value}")
95
+
96
+ print("=" * 60 + "\n")
97
+
98
+ except Exception as e:
99
+ print(f"✗ Error: {e!s}\n")
100
+
101
+
102
+ def set_default_kb(kb_name):
103
+ """Set default knowledge base"""
104
+ manager = KnowledgeBaseManager(str(KNOWLEDGE_BASES_DIR))
105
+
106
+ try:
107
+ manager.set_default(kb_name)
108
+ print(f"✓ Set '{kb_name}' as default knowledge base\n")
109
+ except Exception as e:
110
+ print(f"✗ Error: {e!s}\n")
111
+
112
+
113
+ async def init_knowledge_base(args):
114
+ """Initialize new knowledge base"""
115
+ # Get API configuration
116
+ env_config = get_env_config()
117
+ api_key = args.api_key or env_config["api_key"]
118
+ base_url = args.base_url or env_config["base_url"]
119
+
120
+ if not api_key and not args.skip_processing:
121
+ print("✗ Error: API Key not set")
122
+ print("Please set environment variable LLM_API_KEY or use --api-key parameter\n")
123
+ return
124
+
125
+ # Collect document files
126
+ doc_files = []
127
+ if args.docs:
128
+ doc_files.extend(args.docs)
129
+
130
+ if args.docs_dir:
131
+ docs_dir = Path(args.docs_dir)
132
+ if docs_dir.exists() and docs_dir.is_dir():
133
+ for ext in ["*.pdf", "*.docx", "*.doc", "*.txt", "*.md"]:
134
+ doc_files.extend([str(f) for f in docs_dir.glob(ext)])
135
+ else:
136
+ print(f"✗ Error: Document directory does not exist: {args.docs_dir}\n")
137
+ return
138
+
139
+ if not args.skip_processing and not doc_files:
140
+ print("✗ Error: No documents specified")
141
+ print("Use --docs or --docs-dir to specify documents\n")
142
+ return
143
+
144
+ # Initialize knowledge base
145
+ print("\n" + "=" * 60)
146
+ print(f"🚀 Initializing knowledge base: {args.name}")
147
+ print("=" * 60 + "\n")
148
+
149
+ initializer = KnowledgeBaseInitializer(
150
+ kb_name=args.name, base_dir=str(KNOWLEDGE_BASES_DIR), api_key=api_key, base_url=base_url
151
+ )
152
+
153
+ # Create directory structure
154
+ initializer.create_directory_structure()
155
+
156
+ # Copy documents
157
+ if doc_files:
158
+ copied_files = initializer.copy_documents(doc_files)
159
+ print(f"✓ Copied {len(copied_files)} files\n")
160
+
161
+ # Process documents
162
+ if not args.skip_processing:
163
+ await initializer.process_documents()
164
+ else:
165
+ print("⏭️ Skipping document processing\n")
166
+
167
+ # Extract numbered items
168
+ if not args.skip_processing and not args.skip_extract:
169
+ initializer.extract_numbered_items(batch_size=args.batch_size)
170
+ elif args.skip_extract:
171
+ print("⏭️ Skipping numbered items extraction\n")
172
+
173
+ print("\n" + "=" * 60)
174
+ print(f"✓ Knowledge base '{args.name}' initialization complete!")
175
+ print(f"Location: {initializer.kb_dir}")
176
+ print("=" * 60 + "\n")
177
+
178
+
179
+ def extract_items(args):
180
+ """Extract numbered items"""
181
+ # Get API configuration
182
+ env_config = get_env_config()
183
+ api_key = args.api_key or env_config["api_key"]
184
+ base_url = args.base_url or env_config["base_url"]
185
+
186
+ if not api_key:
187
+ print("✗ Error: API Key not set")
188
+ print("Please set environment variable LLM_API_KEY or use --api-key parameter\n")
189
+ return
190
+
191
+ # Build paths
192
+ kb_dir = KNOWLEDGE_BASES_DIR / args.kb
193
+ content_list_dir = kb_dir / "content_list"
194
+
195
+ if not content_list_dir.exists():
196
+ print(f"✗ Error: content_list directory does not exist: {content_list_dir}\n")
197
+ return
198
+
199
+ # Get files to process
200
+ if args.content_file:
201
+ content_list_files = [content_list_dir / args.content_file]
202
+ if not content_list_files[0].exists():
203
+ print(f"✗ Error: content_list file does not exist: {content_list_files[0]}\n")
204
+ return
205
+ else:
206
+ content_list_files = sorted(content_list_dir.glob("*.json"))
207
+ if not content_list_files:
208
+ print(f"✗ Error: No JSON files found in {content_list_dir}\n")
209
+ return
210
+
211
+ if args.debug:
212
+ print("⚠️ Debug mode: Only processing first file\n")
213
+ content_list_files = content_list_files[:1]
214
+
215
+ output_file = kb_dir / "numbered_items.json"
216
+
217
+ print("\n" + "=" * 60)
218
+ print(f"🔍 Extracting numbered items: {args.kb}")
219
+ print("=" * 60)
220
+ print(f"File count: {len(content_list_files)}")
221
+ print(f"Batch size: {args.batch_size}")
222
+ print(f"Max concurrent: {args.max_concurrent}")
223
+ print("=" * 60 + "\n")
224
+
225
+ try:
226
+ for idx, content_list_file in enumerate(content_list_files, 1):
227
+ print(f"\nProcessing file [{idx}/{len(content_list_files)}]: {content_list_file.name}")
228
+
229
+ process_content_list(
230
+ content_list_file,
231
+ output_file,
232
+ api_key,
233
+ base_url,
234
+ args.batch_size,
235
+ merge=(idx > 1), # Auto-merge after first file
236
+ )
237
+
238
+ print("\n" + "=" * 60)
239
+ print("✓ Extraction complete!")
240
+ print(f"Output file: {output_file}")
241
+ print("=" * 60 + "\n")
242
+
243
+ except Exception as e:
244
+ print(f"\n✗ Extraction failed: {e}\n")
245
+
246
+
247
+ def delete_knowledge_base(args):
248
+ """Delete knowledge base"""
249
+ manager = KnowledgeBaseManager(str(KNOWLEDGE_BASES_DIR))
250
+
251
+ try:
252
+ success = manager.delete_knowledge_base(args.name, confirm=args.force)
253
+ if success:
254
+ print(f"\n✓ Deleted knowledge base '{args.name}'\n")
255
+ except Exception as e:
256
+ print(f"\n✗ Error: {e}\n")
257
+
258
+
259
+ def clean_rag_storage(args):
260
+ """Clean RAG storage"""
261
+ manager = KnowledgeBaseManager(str(KNOWLEDGE_BASES_DIR))
262
+
263
+ print("\n" + "=" * 60)
264
+ print("🧹 Cleaning RAG storage")
265
+ print("=" * 60 + "\n")
266
+
267
+ try:
268
+ manager.clean_rag_storage(args.name, backup=not args.no_backup)
269
+ print("\n" + "=" * 60)
270
+ print("✓ RAG storage cleaned!")
271
+ print("💡 Tip: Use 'add_documents.py' to reprocess documents to rebuild RAG")
272
+ print("=" * 60 + "\n")
273
+ except Exception as e:
274
+ print(f"\n✗ Error: {e}\n")
275
+
276
+
277
+ async def refresh_knowledge_base(args):
278
+ """Refresh knowledge base (reprocess all documents)"""
279
+ manager = KnowledgeBaseManager(str(KNOWLEDGE_BASES_DIR))
280
+
281
+ # Get API configuration
282
+ env_config = get_env_config()
283
+ api_key = args.api_key or env_config["api_key"]
284
+ base_url = args.base_url or env_config["base_url"]
285
+
286
+ if not api_key:
287
+ print("✗ Error: API Key not set")
288
+ print("Please set environment variable LLM_API_KEY or use --api-key parameter\n")
289
+ return
290
+
291
+ try:
292
+ kb_name = args.name
293
+ kb_dir = manager.get_knowledge_base_path(kb_name)
294
+ raw_dir = kb_dir / "raw"
295
+
296
+ if not raw_dir.exists() or not list(raw_dir.glob("*")):
297
+ print(f"✗ Error: No raw documents found in knowledge base '{kb_name}'\n")
298
+ return
299
+
300
+ print("\n" + "=" * 60)
301
+ print(f"🔄 Refreshing knowledge base: {kb_name}")
302
+ print("=" * 60)
303
+ print(f"Path: {kb_dir}")
304
+ print("=" * 60 + "\n")
305
+
306
+ # Step 1: Clean RAG storage
307
+ print("Step 1/3: Cleaning RAG storage...")
308
+ manager.clean_rag_storage(kb_name, backup=not args.no_backup)
309
+
310
+ # Step 2: Clean content_list and images (optional)
311
+ if args.full:
312
+ print("\nStep 2/3: Cleaning extracted content and images...")
313
+ content_list_dir = kb_dir / "content_list"
314
+ images_dir = kb_dir / "images"
315
+
316
+ if content_list_dir.exists():
317
+ import shutil
318
+
319
+ shutil.rmtree(content_list_dir)
320
+ content_list_dir.mkdir(parents=True, exist_ok=True)
321
+ print(" ✓ Cleaned content_list")
322
+
323
+ if images_dir.exists():
324
+ import shutil
325
+
326
+ shutil.rmtree(images_dir)
327
+ images_dir.mkdir(parents=True, exist_ok=True)
328
+ print(" ✓ Cleaned images")
329
+ else:
330
+ print("\nStep 2/3: Skipping content cleanup (use --full for complete refresh)")
331
+
332
+ # Step 3: Reprocess all documents
333
+ print("\nStep 3/3: Reprocessing documents...")
334
+
335
+ from src.knowledge.initializer import KnowledgeBaseInitializer
336
+
337
+ initializer = KnowledgeBaseInitializer(
338
+ kb_name=kb_name, base_dir=str(KNOWLEDGE_BASES_DIR), api_key=api_key, base_url=base_url
339
+ )
340
+
341
+ # Reprocess documents
342
+ await initializer.process_documents()
343
+
344
+ # Extract numbered items
345
+ if not args.skip_extract:
346
+ print("\nExtracting numbered items...")
347
+ initializer.extract_numbered_items(batch_size=args.batch_size)
348
+
349
+ print("\n" + "=" * 60)
350
+ print(f"✓ Knowledge base '{kb_name}' refresh complete!")
351
+ print("=" * 60 + "\n")
352
+
353
+ except Exception as e:
354
+ print(f"\n✗ Refresh failed: {e}\n")
355
+ raise
356
+
357
+
358
+ def main():
359
+ """Main function"""
360
+ parser = argparse.ArgumentParser(
361
+ description="Knowledge Base Management Tool",
362
+ formatter_class=argparse.RawDescriptionHelpFormatter,
363
+ epilog="""
364
+ Usage Examples:
365
+
366
+ [Recommended: Directly run kb.py]
367
+ python knowledge_init/kb.py list
368
+ python knowledge_init/kb.py info ai_textbook
369
+ python knowledge_init/kb.py set-default math2211
370
+ python knowledge_init/kb.py init my_kb --docs document.pdf
371
+ python knowledge_init/kb.py init my_course --docs-dir ./materials/
372
+ python knowledge_init/kb.py extract --kb ai_textbook
373
+ python knowledge_init/kb.py extract --kb ai_textbook --debug
374
+
375
+ [New: Delete and Refresh Features]
376
+ python knowledge_init/kb.py delete old_kb # Delete knowledge base (requires confirmation)
377
+ python knowledge_init/kb.py delete old_kb --force # Force delete (skip confirmation)
378
+ python knowledge_init/kb.py clean-rag C2-test # Clean RAG storage (fix corrupted graph data)
379
+ python knowledge_init/kb.py refresh ai_textbook # Refresh knowledge base (reprocess all documents)
380
+ python knowledge_init/kb.py refresh ai_textbook --full # Full refresh
381
+
382
+ [Method 2: Run as module]
383
+ python -m knowledge_init.start_kb list
384
+ python -m knowledge_init.start_kb init my_kb --docs document.pdf
385
+ python -m knowledge_init.start_kb clean-rag C2-test
386
+
387
+ [Important] All commands must be run from project root directory (DeepTutor/)!
388
+ """,
389
+ )
390
+
391
+ subparsers = parser.add_subparsers(dest="command", help="Command")
392
+
393
+ # list command
394
+ subparsers.add_parser("list", help="List all knowledge bases")
395
+
396
+ # info command
397
+ info_parser = subparsers.add_parser("info", help="Show knowledge base information")
398
+ info_parser.add_argument(
399
+ "name",
400
+ nargs="?",
401
+ help="Knowledge base name (optional, default shows default knowledge base)",
402
+ )
403
+
404
+ # set-default command
405
+ default_parser = subparsers.add_parser("set-default", help="Set default knowledge base")
406
+ default_parser.add_argument("name", help="Knowledge base name")
407
+
408
+ # init command
409
+ init_parser = subparsers.add_parser("init", help="Initialize new knowledge base")
410
+ init_parser.add_argument("name", help="Knowledge base name")
411
+ init_parser.add_argument("--docs", nargs="+", help="Document file list")
412
+ init_parser.add_argument("--docs-dir", help="Document directory")
413
+ init_parser.add_argument("--api-key", help="OpenAI API Key")
414
+ init_parser.add_argument("--base-url", help="API Base URL")
415
+ init_parser.add_argument(
416
+ "--skip-processing", action="store_true", help="Skip document processing"
417
+ )
418
+ init_parser.add_argument(
419
+ "--skip-extract", action="store_true", help="Skip numbered items extraction"
420
+ )
421
+ init_parser.add_argument("--batch-size", type=int, default=20, help="Batch size (default 20)")
422
+
423
+ # extract command
424
+ extract_parser = subparsers.add_parser("extract", help="Extract numbered items")
425
+ extract_parser.add_argument("--kb", required=True, help="Knowledge base name")
426
+ extract_parser.add_argument("--content-file", help="Specify content_list file (optional)")
427
+ extract_parser.add_argument(
428
+ "--batch-size", type=int, default=20, help="Batch size (default 20)"
429
+ )
430
+ extract_parser.add_argument(
431
+ "--max-concurrent", type=int, default=5, help="Max concurrent tasks (default 5)"
432
+ )
433
+ extract_parser.add_argument(
434
+ "--debug", action="store_true", help="Debug mode (only process first file)"
435
+ )
436
+ extract_parser.add_argument("--api-key", help="OpenAI API Key")
437
+ extract_parser.add_argument("--base-url", help="API Base URL")
438
+
439
+ # delete command
440
+ delete_parser = subparsers.add_parser("delete", help="Delete knowledge base")
441
+ delete_parser.add_argument("name", help="Knowledge base name")
442
+ delete_parser.add_argument("--force", action="store_true", help="Skip confirmation (dangerous)")
443
+
444
+ # clean-rag command
445
+ clean_parser = subparsers.add_parser(
446
+ "clean-rag", help="Clean RAG storage (fix corrupted graph data)"
447
+ )
448
+ clean_parser.add_argument(
449
+ "name",
450
+ nargs="?",
451
+ help="Knowledge base name (optional, default uses default knowledge base)",
452
+ )
453
+ clean_parser.add_argument(
454
+ "--no-backup", action="store_true", help="No backup (not recommended)"
455
+ )
456
+
457
+ # refresh command
458
+ refresh_parser = subparsers.add_parser(
459
+ "refresh", help="Refresh knowledge base (reprocess all documents)"
460
+ )
461
+ refresh_parser.add_argument("name", help="Knowledge base name")
462
+ refresh_parser.add_argument(
463
+ "--full", action="store_true", help="Full refresh (clean all extracted content)"
464
+ )
465
+ refresh_parser.add_argument(
466
+ "--no-backup", action="store_true", help="No backup for RAG storage"
467
+ )
468
+ refresh_parser.add_argument(
469
+ "--skip-extract", action="store_true", help="Skip numbered items extraction"
470
+ )
471
+ refresh_parser.add_argument(
472
+ "--batch-size", type=int, default=20, help="Batch size (default 20)"
473
+ )
474
+ refresh_parser.add_argument("--api-key", help="OpenAI API Key")
475
+ refresh_parser.add_argument("--base-url", help="API Base URL")
476
+
477
+ args = parser.parse_args()
478
+
479
+ # Windows console UTF-8 support
480
+ if sys.platform == "win32":
481
+ import io
482
+
483
+ sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
484
+
485
+ # Execute command
486
+ if args.command == "list":
487
+ list_knowledge_bases()
488
+
489
+ elif args.command == "info":
490
+ show_kb_info(args.name)
491
+
492
+ elif args.command == "set-default":
493
+ set_default_kb(args.name)
494
+
495
+ elif args.command == "init":
496
+ try:
497
+ asyncio.run(init_knowledge_base(args))
498
+ except (KeyboardInterrupt, SystemExit):
499
+ print("\n\n⚠️ Operation cancelled")
500
+ except IndexError as e:
501
+ # Ignore IndexError during asyncio cleanup (doesn't affect functionality)
502
+ if "pop from an empty deque" not in str(e):
503
+ raise
504
+ except Exception as e:
505
+ print(f"\n✗ Error: {e}")
506
+ raise
507
+
508
+ elif args.command == "extract":
509
+ extract_items(args)
510
+
511
+ elif args.command == "delete":
512
+ delete_knowledge_base(args)
513
+
514
+ elif args.command == "clean-rag":
515
+ clean_rag_storage(args)
516
+
517
+ elif args.command == "refresh":
518
+ try:
519
+ asyncio.run(refresh_knowledge_base(args))
520
+ except (KeyboardInterrupt, SystemExit):
521
+ print("\n\n⚠️ Operation cancelled")
522
+ except IndexError as e:
523
+ # Ignore IndexError during asyncio cleanup
524
+ if "pop from an empty deque" not in str(e):
525
+ raise
526
+ except Exception as e:
527
+ print(f"\n✗ Error: {e}")
528
+ raise
529
+
530
+ else:
531
+ parser.print_help()
532
+
533
+
534
+ if __name__ == "__main__":
535
+ main()
@@ -0,0 +1,103 @@
1
+ """
2
+ Unified Logging System for DeepTutor
3
+ =====================================
4
+
5
+ A clean, consistent logging system with:
6
+ - Unified format: [Module] Symbol Message
7
+ - English-only output
8
+ - File output to data/user/logs/
9
+ - WebSocket streaming support
10
+ - Color-coded console output
11
+ - LLM usage statistics tracking
12
+ - External library log forwarding (LightRAG, LlamaIndex)
13
+
14
+ Usage:
15
+ from src.logging import get_logger, LLMStats
16
+
17
+ logger = get_logger("Solver")
18
+ logger.info("Processing started")
19
+ logger.success("Task completed in 2.3s")
20
+ logger.error("Something went wrong")
21
+
22
+ # Track LLM usage
23
+ stats = LLMStats("Solver")
24
+ stats.add_call(model="gpt-4o", prompt_tokens=100, completion_tokens=50)
25
+ stats.print_summary()
26
+ """
27
+
28
+ # Core logging
29
+ # Adapters for external libraries
30
+ from .adapters import (
31
+ LightRAGLogContext,
32
+ LightRAGLogForwarder,
33
+ LlamaIndexLogContext,
34
+ LlamaIndexLogForwarder,
35
+ get_lightrag_forwarding_config,
36
+ )
37
+
38
+ # Configuration
39
+ from .config import (
40
+ LoggingConfig,
41
+ get_default_log_dir,
42
+ load_logging_config,
43
+ )
44
+
45
+ # Handlers
46
+ from .handlers import (
47
+ ConsoleHandler,
48
+ FileHandler,
49
+ JSONFileHandler,
50
+ LogInterceptor,
51
+ RotatingFileHandler,
52
+ WebSocketLogHandler,
53
+ )
54
+ from .logger import (
55
+ ConsoleFormatter,
56
+ FileFormatter,
57
+ Logger,
58
+ LogLevel,
59
+ get_logger,
60
+ reset_logger,
61
+ )
62
+
63
+ # Statistics tracking
64
+ from .stats import (
65
+ MODEL_PRICING,
66
+ LLMCall,
67
+ LLMStats,
68
+ estimate_tokens,
69
+ get_pricing,
70
+ )
71
+
72
+ __all__ = [
73
+ # Core
74
+ "Logger",
75
+ "LogLevel",
76
+ "get_logger",
77
+ "reset_logger",
78
+ "ConsoleFormatter",
79
+ "FileFormatter",
80
+ # Handlers
81
+ "ConsoleHandler",
82
+ "FileHandler",
83
+ "JSONFileHandler",
84
+ "RotatingFileHandler",
85
+ "WebSocketLogHandler",
86
+ "LogInterceptor",
87
+ # Adapters
88
+ "LightRAGLogContext",
89
+ "LightRAGLogForwarder",
90
+ "get_lightrag_forwarding_config",
91
+ "LlamaIndexLogContext",
92
+ "LlamaIndexLogForwarder",
93
+ # Stats
94
+ "LLMStats",
95
+ "LLMCall",
96
+ "get_pricing",
97
+ "estimate_tokens",
98
+ "MODEL_PRICING",
99
+ # Config
100
+ "LoggingConfig",
101
+ "load_logging_config",
102
+ "get_default_log_dir",
103
+ ]
@@ -0,0 +1,17 @@
1
+ """
2
+ Log Adapters
3
+ ============
4
+
5
+ Adapters for forwarding logs from external libraries to the unified logging system.
6
+ """
7
+
8
+ from .lightrag import LightRAGLogContext, LightRAGLogForwarder, get_lightrag_forwarding_config
9
+ from .llamaindex import LlamaIndexLogContext, LlamaIndexLogForwarder
10
+
11
+ __all__ = [
12
+ "LightRAGLogContext",
13
+ "LightRAGLogForwarder",
14
+ "get_lightrag_forwarding_config",
15
+ "LlamaIndexLogContext",
16
+ "LlamaIndexLogForwarder",
17
+ ]