realtimex-deeptutor 0.5.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- realtimex_deeptutor/__init__.py +67 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
- src/__init__.py +40 -0
- src/agents/__init__.py +24 -0
- src/agents/base_agent.py +657 -0
- src/agents/chat/__init__.py +24 -0
- src/agents/chat/chat_agent.py +435 -0
- src/agents/chat/prompts/en/chat_agent.yaml +35 -0
- src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
- src/agents/chat/session_manager.py +311 -0
- src/agents/co_writer/__init__.py +0 -0
- src/agents/co_writer/edit_agent.py +260 -0
- src/agents/co_writer/narrator_agent.py +423 -0
- src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
- src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
- src/agents/guide/__init__.py +16 -0
- src/agents/guide/agents/__init__.py +11 -0
- src/agents/guide/agents/chat_agent.py +104 -0
- src/agents/guide/agents/interactive_agent.py +223 -0
- src/agents/guide/agents/locate_agent.py +149 -0
- src/agents/guide/agents/summary_agent.py +150 -0
- src/agents/guide/guide_manager.py +500 -0
- src/agents/guide/prompts/en/chat_agent.yaml +41 -0
- src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
- src/agents/guide/prompts/en/locate_agent.yaml +68 -0
- src/agents/guide/prompts/en/summary_agent.yaml +157 -0
- src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
- src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
- src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
- src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
- src/agents/ideagen/__init__.py +12 -0
- src/agents/ideagen/idea_generation_workflow.py +426 -0
- src/agents/ideagen/material_organizer_agent.py +173 -0
- src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
- src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
- src/agents/question/__init__.py +24 -0
- src/agents/question/agents/__init__.py +18 -0
- src/agents/question/agents/generate_agent.py +381 -0
- src/agents/question/agents/relevance_analyzer.py +207 -0
- src/agents/question/agents/retrieve_agent.py +239 -0
- src/agents/question/coordinator.py +718 -0
- src/agents/question/example.py +109 -0
- src/agents/question/prompts/en/coordinator.yaml +75 -0
- src/agents/question/prompts/en/generate_agent.yaml +77 -0
- src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
- src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
- src/agents/question/prompts/zh/coordinator.yaml +75 -0
- src/agents/question/prompts/zh/generate_agent.yaml +77 -0
- src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
- src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
- src/agents/research/agents/__init__.py +23 -0
- src/agents/research/agents/decompose_agent.py +507 -0
- src/agents/research/agents/manager_agent.py +228 -0
- src/agents/research/agents/note_agent.py +180 -0
- src/agents/research/agents/rephrase_agent.py +263 -0
- src/agents/research/agents/reporting_agent.py +1333 -0
- src/agents/research/agents/research_agent.py +714 -0
- src/agents/research/data_structures.py +451 -0
- src/agents/research/main.py +188 -0
- src/agents/research/prompts/en/decompose_agent.yaml +89 -0
- src/agents/research/prompts/en/manager_agent.yaml +24 -0
- src/agents/research/prompts/en/note_agent.yaml +121 -0
- src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/en/reporting_agent.yaml +380 -0
- src/agents/research/prompts/en/research_agent.yaml +173 -0
- src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
- src/agents/research/prompts/zh/manager_agent.yaml +24 -0
- src/agents/research/prompts/zh/note_agent.yaml +121 -0
- src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
- src/agents/research/prompts/zh/research_agent.yaml +173 -0
- src/agents/research/research_pipeline.py +1309 -0
- src/agents/research/utils/__init__.py +60 -0
- src/agents/research/utils/citation_manager.py +799 -0
- src/agents/research/utils/json_utils.py +98 -0
- src/agents/research/utils/token_tracker.py +297 -0
- src/agents/solve/__init__.py +80 -0
- src/agents/solve/analysis_loop/__init__.py +14 -0
- src/agents/solve/analysis_loop/investigate_agent.py +414 -0
- src/agents/solve/analysis_loop/note_agent.py +190 -0
- src/agents/solve/main_solver.py +862 -0
- src/agents/solve/memory/__init__.py +34 -0
- src/agents/solve/memory/citation_memory.py +353 -0
- src/agents/solve/memory/investigate_memory.py +226 -0
- src/agents/solve/memory/solve_memory.py +340 -0
- src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
- src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
- src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
- src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
- src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
- src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
- src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
- src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
- src/agents/solve/solve_loop/__init__.py +22 -0
- src/agents/solve/solve_loop/citation_manager.py +74 -0
- src/agents/solve/solve_loop/manager_agent.py +274 -0
- src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
- src/agents/solve/solve_loop/response_agent.py +301 -0
- src/agents/solve/solve_loop/solve_agent.py +325 -0
- src/agents/solve/solve_loop/tool_agent.py +470 -0
- src/agents/solve/utils/__init__.py +64 -0
- src/agents/solve/utils/config_validator.py +313 -0
- src/agents/solve/utils/display_manager.py +223 -0
- src/agents/solve/utils/error_handler.py +363 -0
- src/agents/solve/utils/json_utils.py +98 -0
- src/agents/solve/utils/performance_monitor.py +407 -0
- src/agents/solve/utils/token_tracker.py +541 -0
- src/api/__init__.py +0 -0
- src/api/main.py +240 -0
- src/api/routers/__init__.py +1 -0
- src/api/routers/agent_config.py +69 -0
- src/api/routers/chat.py +296 -0
- src/api/routers/co_writer.py +337 -0
- src/api/routers/config.py +627 -0
- src/api/routers/dashboard.py +18 -0
- src/api/routers/guide.py +337 -0
- src/api/routers/ideagen.py +436 -0
- src/api/routers/knowledge.py +821 -0
- src/api/routers/notebook.py +247 -0
- src/api/routers/question.py +537 -0
- src/api/routers/research.py +394 -0
- src/api/routers/settings.py +164 -0
- src/api/routers/solve.py +305 -0
- src/api/routers/system.py +252 -0
- src/api/run_server.py +61 -0
- src/api/utils/history.py +172 -0
- src/api/utils/log_interceptor.py +21 -0
- src/api/utils/notebook_manager.py +415 -0
- src/api/utils/progress_broadcaster.py +72 -0
- src/api/utils/task_id_manager.py +100 -0
- src/config/__init__.py +0 -0
- src/config/accessors.py +18 -0
- src/config/constants.py +34 -0
- src/config/defaults.py +18 -0
- src/config/schema.py +38 -0
- src/config/settings.py +50 -0
- src/core/errors.py +62 -0
- src/knowledge/__init__.py +23 -0
- src/knowledge/add_documents.py +606 -0
- src/knowledge/config.py +65 -0
- src/knowledge/example_add_documents.py +236 -0
- src/knowledge/extract_numbered_items.py +1039 -0
- src/knowledge/initializer.py +621 -0
- src/knowledge/kb.py +22 -0
- src/knowledge/manager.py +782 -0
- src/knowledge/progress_tracker.py +182 -0
- src/knowledge/start_kb.py +535 -0
- src/logging/__init__.py +103 -0
- src/logging/adapters/__init__.py +17 -0
- src/logging/adapters/lightrag.py +184 -0
- src/logging/adapters/llamaindex.py +141 -0
- src/logging/config.py +80 -0
- src/logging/handlers/__init__.py +20 -0
- src/logging/handlers/console.py +75 -0
- src/logging/handlers/file.py +201 -0
- src/logging/handlers/websocket.py +127 -0
- src/logging/logger.py +709 -0
- src/logging/stats/__init__.py +16 -0
- src/logging/stats/llm_stats.py +179 -0
- src/services/__init__.py +56 -0
- src/services/config/__init__.py +61 -0
- src/services/config/knowledge_base_config.py +210 -0
- src/services/config/loader.py +260 -0
- src/services/config/unified_config.py +603 -0
- src/services/embedding/__init__.py +45 -0
- src/services/embedding/adapters/__init__.py +22 -0
- src/services/embedding/adapters/base.py +106 -0
- src/services/embedding/adapters/cohere.py +127 -0
- src/services/embedding/adapters/jina.py +99 -0
- src/services/embedding/adapters/ollama.py +116 -0
- src/services/embedding/adapters/openai_compatible.py +96 -0
- src/services/embedding/client.py +159 -0
- src/services/embedding/config.py +156 -0
- src/services/embedding/provider.py +119 -0
- src/services/llm/__init__.py +152 -0
- src/services/llm/capabilities.py +313 -0
- src/services/llm/client.py +302 -0
- src/services/llm/cloud_provider.py +530 -0
- src/services/llm/config.py +200 -0
- src/services/llm/error_mapping.py +103 -0
- src/services/llm/exceptions.py +152 -0
- src/services/llm/factory.py +450 -0
- src/services/llm/local_provider.py +347 -0
- src/services/llm/providers/anthropic.py +95 -0
- src/services/llm/providers/base_provider.py +93 -0
- src/services/llm/providers/open_ai.py +83 -0
- src/services/llm/registry.py +71 -0
- src/services/llm/telemetry.py +40 -0
- src/services/llm/types.py +27 -0
- src/services/llm/utils.py +333 -0
- src/services/prompt/__init__.py +25 -0
- src/services/prompt/manager.py +206 -0
- src/services/rag/__init__.py +64 -0
- src/services/rag/components/__init__.py +29 -0
- src/services/rag/components/base.py +59 -0
- src/services/rag/components/chunkers/__init__.py +18 -0
- src/services/rag/components/chunkers/base.py +34 -0
- src/services/rag/components/chunkers/fixed.py +71 -0
- src/services/rag/components/chunkers/numbered_item.py +94 -0
- src/services/rag/components/chunkers/semantic.py +97 -0
- src/services/rag/components/embedders/__init__.py +14 -0
- src/services/rag/components/embedders/base.py +32 -0
- src/services/rag/components/embedders/openai.py +63 -0
- src/services/rag/components/indexers/__init__.py +18 -0
- src/services/rag/components/indexers/base.py +35 -0
- src/services/rag/components/indexers/graph.py +172 -0
- src/services/rag/components/indexers/lightrag.py +156 -0
- src/services/rag/components/indexers/vector.py +146 -0
- src/services/rag/components/parsers/__init__.py +18 -0
- src/services/rag/components/parsers/base.py +35 -0
- src/services/rag/components/parsers/markdown.py +52 -0
- src/services/rag/components/parsers/pdf.py +115 -0
- src/services/rag/components/parsers/text.py +86 -0
- src/services/rag/components/retrievers/__init__.py +18 -0
- src/services/rag/components/retrievers/base.py +34 -0
- src/services/rag/components/retrievers/dense.py +200 -0
- src/services/rag/components/retrievers/hybrid.py +164 -0
- src/services/rag/components/retrievers/lightrag.py +169 -0
- src/services/rag/components/routing.py +286 -0
- src/services/rag/factory.py +234 -0
- src/services/rag/pipeline.py +215 -0
- src/services/rag/pipelines/__init__.py +32 -0
- src/services/rag/pipelines/academic.py +44 -0
- src/services/rag/pipelines/lightrag.py +43 -0
- src/services/rag/pipelines/llamaindex.py +313 -0
- src/services/rag/pipelines/raganything.py +384 -0
- src/services/rag/service.py +244 -0
- src/services/rag/types.py +73 -0
- src/services/search/__init__.py +284 -0
- src/services/search/base.py +87 -0
- src/services/search/consolidation.py +398 -0
- src/services/search/providers/__init__.py +128 -0
- src/services/search/providers/baidu.py +188 -0
- src/services/search/providers/exa.py +194 -0
- src/services/search/providers/jina.py +161 -0
- src/services/search/providers/perplexity.py +153 -0
- src/services/search/providers/serper.py +209 -0
- src/services/search/providers/tavily.py +161 -0
- src/services/search/types.py +114 -0
- src/services/setup/__init__.py +34 -0
- src/services/setup/init.py +285 -0
- src/services/tts/__init__.py +16 -0
- src/services/tts/config.py +99 -0
- src/tools/__init__.py +91 -0
- src/tools/code_executor.py +536 -0
- src/tools/paper_search_tool.py +171 -0
- src/tools/query_item_tool.py +310 -0
- src/tools/question/__init__.py +15 -0
- src/tools/question/exam_mimic.py +616 -0
- src/tools/question/pdf_parser.py +211 -0
- src/tools/question/question_extractor.py +397 -0
- src/tools/rag_tool.py +173 -0
- src/tools/tex_chunker.py +339 -0
- src/tools/tex_downloader.py +253 -0
- src/tools/web_search.py +71 -0
- src/utils/config_manager.py +206 -0
- src/utils/document_validator.py +168 -0
- src/utils/error_rate_tracker.py +111 -0
- src/utils/error_utils.py +82 -0
- src/utils/json_parser.py +110 -0
- src/utils/network/circuit_breaker.py +79 -0
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Incremental Document Addition Usage Example
|
|
5
|
+
|
|
6
|
+
Demonstrates how to add new documents to an existing knowledge base
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import os
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
# Add project root directory to path
|
|
15
|
+
project_root = Path(__file__).parent.parent.parent
|
|
16
|
+
sys.path.insert(0, str(project_root))
|
|
17
|
+
|
|
18
|
+
from src.knowledge.add_documents import DocumentAdder
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
async def example_add_single_document():
|
|
22
|
+
"""Example 1: Add single document"""
|
|
23
|
+
print("\n" + "=" * 60)
|
|
24
|
+
print("Example 1: Add single document to knowledge base")
|
|
25
|
+
print("=" * 60)
|
|
26
|
+
|
|
27
|
+
adder = DocumentAdder(
|
|
28
|
+
kb_name="ai_textbook",
|
|
29
|
+
base_dir="./data/knowledge_bases",
|
|
30
|
+
api_key=os.getenv("LLM_API_KEY"),
|
|
31
|
+
base_url=os.getenv("LLM_HOST"),
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Add documents
|
|
35
|
+
new_files = adder.add_documents(
|
|
36
|
+
source_files=["./new_chapter.pdf"],
|
|
37
|
+
skip_duplicates=True, # Skip files with same name
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
if new_files:
|
|
41
|
+
# Process new documents
|
|
42
|
+
processed = await adder.process_new_documents(new_files)
|
|
43
|
+
|
|
44
|
+
# Extract numbered items
|
|
45
|
+
adder.extract_numbered_items_for_new_docs(processed)
|
|
46
|
+
|
|
47
|
+
# Update metadata
|
|
48
|
+
adder.update_metadata(len(new_files))
|
|
49
|
+
|
|
50
|
+
print("\n✓ Completed!")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
async def example_add_multiple_documents():
|
|
54
|
+
"""Example 2: Add multiple documents"""
|
|
55
|
+
print("\n" + "=" * 60)
|
|
56
|
+
print("Example 2: Batch add documents to knowledge base")
|
|
57
|
+
print("=" * 60)
|
|
58
|
+
|
|
59
|
+
adder = DocumentAdder(
|
|
60
|
+
kb_name="math2211",
|
|
61
|
+
base_dir="./data/knowledge_bases",
|
|
62
|
+
api_key=os.getenv("LLM_API_KEY"),
|
|
63
|
+
base_url=os.getenv("LLM_HOST"),
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# Add multiple documents
|
|
67
|
+
new_files = adder.add_documents(
|
|
68
|
+
source_files=[
|
|
69
|
+
"./materials/chapter1.pdf",
|
|
70
|
+
"./materials/chapter2.pdf",
|
|
71
|
+
"./materials/exercises.pdf",
|
|
72
|
+
],
|
|
73
|
+
skip_duplicates=True,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
if new_files:
|
|
77
|
+
# Process new documents
|
|
78
|
+
processed = await adder.process_new_documents(new_files)
|
|
79
|
+
|
|
80
|
+
# Extract numbered items (use larger batch size)
|
|
81
|
+
adder.extract_numbered_items_for_new_docs(
|
|
82
|
+
processed,
|
|
83
|
+
batch_size=30, # Increase batch size for efficiency
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Update metadata
|
|
87
|
+
adder.update_metadata(len(new_files))
|
|
88
|
+
|
|
89
|
+
print("\n✓ Completed!")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
async def example_add_from_directory():
|
|
93
|
+
"""Example 3: Add all documents from directory"""
|
|
94
|
+
print("\n" + "=" * 60)
|
|
95
|
+
print("Example 3: Add all documents from directory")
|
|
96
|
+
print("=" * 60)
|
|
97
|
+
|
|
98
|
+
adder = DocumentAdder(
|
|
99
|
+
kb_name="ai_textbook",
|
|
100
|
+
base_dir="./data/knowledge_bases",
|
|
101
|
+
api_key=os.getenv("LLM_API_KEY"),
|
|
102
|
+
base_url=os.getenv("LLM_HOST"),
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Collect all documents in directory
|
|
106
|
+
docs_dir = Path("./new_materials")
|
|
107
|
+
doc_files = []
|
|
108
|
+
|
|
109
|
+
for ext in ["*.pdf", "*.docx", "*.doc"]:
|
|
110
|
+
doc_files.extend([str(f) for f in docs_dir.glob(ext)])
|
|
111
|
+
|
|
112
|
+
print(f"Found {len(doc_files)} documents")
|
|
113
|
+
|
|
114
|
+
# Add documents
|
|
115
|
+
new_files = adder.add_documents(source_files=doc_files, skip_duplicates=True)
|
|
116
|
+
|
|
117
|
+
if new_files:
|
|
118
|
+
# Process new documents
|
|
119
|
+
processed = await adder.process_new_documents(new_files)
|
|
120
|
+
|
|
121
|
+
# Extract numbered items
|
|
122
|
+
adder.extract_numbered_items_for_new_docs(processed)
|
|
123
|
+
|
|
124
|
+
# Update metadata
|
|
125
|
+
adder.update_metadata(len(new_files))
|
|
126
|
+
|
|
127
|
+
print("\n✓ Completed!")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
async def example_add_only_no_processing():
|
|
131
|
+
"""Example 4: Only add files, no processing (process manually later)"""
|
|
132
|
+
print("\n" + "=" * 60)
|
|
133
|
+
print("Example 4: Only add files, no processing")
|
|
134
|
+
print("=" * 60)
|
|
135
|
+
|
|
136
|
+
adder = DocumentAdder(
|
|
137
|
+
kb_name="ai_textbook",
|
|
138
|
+
base_dir="./data/knowledge_bases",
|
|
139
|
+
api_key=os.getenv("LLM_API_KEY"),
|
|
140
|
+
base_url=os.getenv("LLM_HOST"),
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Only add files to raw directory
|
|
144
|
+
new_files = adder.add_documents(source_files=["./chapter.pdf"], skip_duplicates=True)
|
|
145
|
+
|
|
146
|
+
print(f"Added {len(new_files)} files to raw directory")
|
|
147
|
+
print("These files can be processed manually later")
|
|
148
|
+
|
|
149
|
+
# Don't call process_new_documents()
|
|
150
|
+
# Can process manually later or use other tools
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
async def example_check_existing_files():
|
|
154
|
+
"""Example 5: Check existing files"""
|
|
155
|
+
print("\n" + "=" * 60)
|
|
156
|
+
print("Example 5: Check existing files in knowledge base")
|
|
157
|
+
print("=" * 60)
|
|
158
|
+
|
|
159
|
+
adder = DocumentAdder(kb_name="ai_textbook", base_dir="./data/knowledge_bases")
|
|
160
|
+
|
|
161
|
+
existing_files = adder.get_existing_files()
|
|
162
|
+
|
|
163
|
+
print(f"\nKnowledge base 'ai_textbook' already has {len(existing_files)} files:")
|
|
164
|
+
for filename in sorted(existing_files):
|
|
165
|
+
print(f" • {filename}")
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def example_with_error_handling():
|
|
169
|
+
"""Example 6: Complete error handling"""
|
|
170
|
+
print("\n" + "=" * 60)
|
|
171
|
+
print("Example 6: Incremental addition with error handling")
|
|
172
|
+
print("=" * 60)
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
adder = DocumentAdder(
|
|
176
|
+
kb_name="ai_textbook",
|
|
177
|
+
base_dir="./data/knowledge_bases",
|
|
178
|
+
api_key=os.getenv("LLM_API_KEY"),
|
|
179
|
+
base_url=os.getenv("LLM_HOST"),
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# Check if file exists
|
|
183
|
+
source_file = Path("./new_chapter.pdf")
|
|
184
|
+
if not source_file.exists():
|
|
185
|
+
print(f"✗ Error: Source file does not exist: {source_file}")
|
|
186
|
+
return
|
|
187
|
+
|
|
188
|
+
# Add documents
|
|
189
|
+
new_files = adder.add_documents(source_files=[str(source_file)], skip_duplicates=True)
|
|
190
|
+
|
|
191
|
+
if not new_files:
|
|
192
|
+
print("ℹ️ No new files to add (may already exist)")
|
|
193
|
+
return
|
|
194
|
+
|
|
195
|
+
# Async processing
|
|
196
|
+
async def process():
|
|
197
|
+
processed = await adder.process_new_documents(new_files)
|
|
198
|
+
adder.extract_numbered_items_for_new_docs(processed)
|
|
199
|
+
adder.update_metadata(len(new_files))
|
|
200
|
+
|
|
201
|
+
asyncio.run(process())
|
|
202
|
+
|
|
203
|
+
print("\n✓ Successfully added documents!")
|
|
204
|
+
|
|
205
|
+
except ValueError as e:
|
|
206
|
+
print(f"✗ Configuration error: {e}")
|
|
207
|
+
except FileNotFoundError as e:
|
|
208
|
+
print(f"✗ File error: {e}")
|
|
209
|
+
except Exception as e:
|
|
210
|
+
print(f"✗ Unknown error: {e}")
|
|
211
|
+
import traceback
|
|
212
|
+
|
|
213
|
+
traceback.print_exc()
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
if __name__ == "__main__":
|
|
217
|
+
print("\nIncremental Document Addition Usage Examples")
|
|
218
|
+
print("=" * 60)
|
|
219
|
+
|
|
220
|
+
# Select example to run
|
|
221
|
+
example = 5 # Change this number to run different examples
|
|
222
|
+
|
|
223
|
+
if example == 1:
|
|
224
|
+
asyncio.run(example_add_single_document())
|
|
225
|
+
elif example == 2:
|
|
226
|
+
asyncio.run(example_add_multiple_documents())
|
|
227
|
+
elif example == 3:
|
|
228
|
+
asyncio.run(example_add_from_directory())
|
|
229
|
+
elif example == 4:
|
|
230
|
+
asyncio.run(example_add_only_no_processing())
|
|
231
|
+
elif example == 5:
|
|
232
|
+
asyncio.run(example_check_existing_files())
|
|
233
|
+
elif example == 6:
|
|
234
|
+
example_with_error_handling()
|
|
235
|
+
else:
|
|
236
|
+
print("Please select example number 1-6")
|