realtimex-deeptutor 0.5.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- realtimex_deeptutor/__init__.py +67 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
- src/__init__.py +40 -0
- src/agents/__init__.py +24 -0
- src/agents/base_agent.py +657 -0
- src/agents/chat/__init__.py +24 -0
- src/agents/chat/chat_agent.py +435 -0
- src/agents/chat/prompts/en/chat_agent.yaml +35 -0
- src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
- src/agents/chat/session_manager.py +311 -0
- src/agents/co_writer/__init__.py +0 -0
- src/agents/co_writer/edit_agent.py +260 -0
- src/agents/co_writer/narrator_agent.py +423 -0
- src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
- src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
- src/agents/guide/__init__.py +16 -0
- src/agents/guide/agents/__init__.py +11 -0
- src/agents/guide/agents/chat_agent.py +104 -0
- src/agents/guide/agents/interactive_agent.py +223 -0
- src/agents/guide/agents/locate_agent.py +149 -0
- src/agents/guide/agents/summary_agent.py +150 -0
- src/agents/guide/guide_manager.py +500 -0
- src/agents/guide/prompts/en/chat_agent.yaml +41 -0
- src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
- src/agents/guide/prompts/en/locate_agent.yaml +68 -0
- src/agents/guide/prompts/en/summary_agent.yaml +157 -0
- src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
- src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
- src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
- src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
- src/agents/ideagen/__init__.py +12 -0
- src/agents/ideagen/idea_generation_workflow.py +426 -0
- src/agents/ideagen/material_organizer_agent.py +173 -0
- src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
- src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
- src/agents/question/__init__.py +24 -0
- src/agents/question/agents/__init__.py +18 -0
- src/agents/question/agents/generate_agent.py +381 -0
- src/agents/question/agents/relevance_analyzer.py +207 -0
- src/agents/question/agents/retrieve_agent.py +239 -0
- src/agents/question/coordinator.py +718 -0
- src/agents/question/example.py +109 -0
- src/agents/question/prompts/en/coordinator.yaml +75 -0
- src/agents/question/prompts/en/generate_agent.yaml +77 -0
- src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
- src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
- src/agents/question/prompts/zh/coordinator.yaml +75 -0
- src/agents/question/prompts/zh/generate_agent.yaml +77 -0
- src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
- src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
- src/agents/research/agents/__init__.py +23 -0
- src/agents/research/agents/decompose_agent.py +507 -0
- src/agents/research/agents/manager_agent.py +228 -0
- src/agents/research/agents/note_agent.py +180 -0
- src/agents/research/agents/rephrase_agent.py +263 -0
- src/agents/research/agents/reporting_agent.py +1333 -0
- src/agents/research/agents/research_agent.py +714 -0
- src/agents/research/data_structures.py +451 -0
- src/agents/research/main.py +188 -0
- src/agents/research/prompts/en/decompose_agent.yaml +89 -0
- src/agents/research/prompts/en/manager_agent.yaml +24 -0
- src/agents/research/prompts/en/note_agent.yaml +121 -0
- src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/en/reporting_agent.yaml +380 -0
- src/agents/research/prompts/en/research_agent.yaml +173 -0
- src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
- src/agents/research/prompts/zh/manager_agent.yaml +24 -0
- src/agents/research/prompts/zh/note_agent.yaml +121 -0
- src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
- src/agents/research/prompts/zh/research_agent.yaml +173 -0
- src/agents/research/research_pipeline.py +1309 -0
- src/agents/research/utils/__init__.py +60 -0
- src/agents/research/utils/citation_manager.py +799 -0
- src/agents/research/utils/json_utils.py +98 -0
- src/agents/research/utils/token_tracker.py +297 -0
- src/agents/solve/__init__.py +80 -0
- src/agents/solve/analysis_loop/__init__.py +14 -0
- src/agents/solve/analysis_loop/investigate_agent.py +414 -0
- src/agents/solve/analysis_loop/note_agent.py +190 -0
- src/agents/solve/main_solver.py +862 -0
- src/agents/solve/memory/__init__.py +34 -0
- src/agents/solve/memory/citation_memory.py +353 -0
- src/agents/solve/memory/investigate_memory.py +226 -0
- src/agents/solve/memory/solve_memory.py +340 -0
- src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
- src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
- src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
- src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
- src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
- src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
- src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
- src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
- src/agents/solve/solve_loop/__init__.py +22 -0
- src/agents/solve/solve_loop/citation_manager.py +74 -0
- src/agents/solve/solve_loop/manager_agent.py +274 -0
- src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
- src/agents/solve/solve_loop/response_agent.py +301 -0
- src/agents/solve/solve_loop/solve_agent.py +325 -0
- src/agents/solve/solve_loop/tool_agent.py +470 -0
- src/agents/solve/utils/__init__.py +64 -0
- src/agents/solve/utils/config_validator.py +313 -0
- src/agents/solve/utils/display_manager.py +223 -0
- src/agents/solve/utils/error_handler.py +363 -0
- src/agents/solve/utils/json_utils.py +98 -0
- src/agents/solve/utils/performance_monitor.py +407 -0
- src/agents/solve/utils/token_tracker.py +541 -0
- src/api/__init__.py +0 -0
- src/api/main.py +240 -0
- src/api/routers/__init__.py +1 -0
- src/api/routers/agent_config.py +69 -0
- src/api/routers/chat.py +296 -0
- src/api/routers/co_writer.py +337 -0
- src/api/routers/config.py +627 -0
- src/api/routers/dashboard.py +18 -0
- src/api/routers/guide.py +337 -0
- src/api/routers/ideagen.py +436 -0
- src/api/routers/knowledge.py +821 -0
- src/api/routers/notebook.py +247 -0
- src/api/routers/question.py +537 -0
- src/api/routers/research.py +394 -0
- src/api/routers/settings.py +164 -0
- src/api/routers/solve.py +305 -0
- src/api/routers/system.py +252 -0
- src/api/run_server.py +61 -0
- src/api/utils/history.py +172 -0
- src/api/utils/log_interceptor.py +21 -0
- src/api/utils/notebook_manager.py +415 -0
- src/api/utils/progress_broadcaster.py +72 -0
- src/api/utils/task_id_manager.py +100 -0
- src/config/__init__.py +0 -0
- src/config/accessors.py +18 -0
- src/config/constants.py +34 -0
- src/config/defaults.py +18 -0
- src/config/schema.py +38 -0
- src/config/settings.py +50 -0
- src/core/errors.py +62 -0
- src/knowledge/__init__.py +23 -0
- src/knowledge/add_documents.py +606 -0
- src/knowledge/config.py +65 -0
- src/knowledge/example_add_documents.py +236 -0
- src/knowledge/extract_numbered_items.py +1039 -0
- src/knowledge/initializer.py +621 -0
- src/knowledge/kb.py +22 -0
- src/knowledge/manager.py +782 -0
- src/knowledge/progress_tracker.py +182 -0
- src/knowledge/start_kb.py +535 -0
- src/logging/__init__.py +103 -0
- src/logging/adapters/__init__.py +17 -0
- src/logging/adapters/lightrag.py +184 -0
- src/logging/adapters/llamaindex.py +141 -0
- src/logging/config.py +80 -0
- src/logging/handlers/__init__.py +20 -0
- src/logging/handlers/console.py +75 -0
- src/logging/handlers/file.py +201 -0
- src/logging/handlers/websocket.py +127 -0
- src/logging/logger.py +709 -0
- src/logging/stats/__init__.py +16 -0
- src/logging/stats/llm_stats.py +179 -0
- src/services/__init__.py +56 -0
- src/services/config/__init__.py +61 -0
- src/services/config/knowledge_base_config.py +210 -0
- src/services/config/loader.py +260 -0
- src/services/config/unified_config.py +603 -0
- src/services/embedding/__init__.py +45 -0
- src/services/embedding/adapters/__init__.py +22 -0
- src/services/embedding/adapters/base.py +106 -0
- src/services/embedding/adapters/cohere.py +127 -0
- src/services/embedding/adapters/jina.py +99 -0
- src/services/embedding/adapters/ollama.py +116 -0
- src/services/embedding/adapters/openai_compatible.py +96 -0
- src/services/embedding/client.py +159 -0
- src/services/embedding/config.py +156 -0
- src/services/embedding/provider.py +119 -0
- src/services/llm/__init__.py +152 -0
- src/services/llm/capabilities.py +313 -0
- src/services/llm/client.py +302 -0
- src/services/llm/cloud_provider.py +530 -0
- src/services/llm/config.py +200 -0
- src/services/llm/error_mapping.py +103 -0
- src/services/llm/exceptions.py +152 -0
- src/services/llm/factory.py +450 -0
- src/services/llm/local_provider.py +347 -0
- src/services/llm/providers/anthropic.py +95 -0
- src/services/llm/providers/base_provider.py +93 -0
- src/services/llm/providers/open_ai.py +83 -0
- src/services/llm/registry.py +71 -0
- src/services/llm/telemetry.py +40 -0
- src/services/llm/types.py +27 -0
- src/services/llm/utils.py +333 -0
- src/services/prompt/__init__.py +25 -0
- src/services/prompt/manager.py +206 -0
- src/services/rag/__init__.py +64 -0
- src/services/rag/components/__init__.py +29 -0
- src/services/rag/components/base.py +59 -0
- src/services/rag/components/chunkers/__init__.py +18 -0
- src/services/rag/components/chunkers/base.py +34 -0
- src/services/rag/components/chunkers/fixed.py +71 -0
- src/services/rag/components/chunkers/numbered_item.py +94 -0
- src/services/rag/components/chunkers/semantic.py +97 -0
- src/services/rag/components/embedders/__init__.py +14 -0
- src/services/rag/components/embedders/base.py +32 -0
- src/services/rag/components/embedders/openai.py +63 -0
- src/services/rag/components/indexers/__init__.py +18 -0
- src/services/rag/components/indexers/base.py +35 -0
- src/services/rag/components/indexers/graph.py +172 -0
- src/services/rag/components/indexers/lightrag.py +156 -0
- src/services/rag/components/indexers/vector.py +146 -0
- src/services/rag/components/parsers/__init__.py +18 -0
- src/services/rag/components/parsers/base.py +35 -0
- src/services/rag/components/parsers/markdown.py +52 -0
- src/services/rag/components/parsers/pdf.py +115 -0
- src/services/rag/components/parsers/text.py +86 -0
- src/services/rag/components/retrievers/__init__.py +18 -0
- src/services/rag/components/retrievers/base.py +34 -0
- src/services/rag/components/retrievers/dense.py +200 -0
- src/services/rag/components/retrievers/hybrid.py +164 -0
- src/services/rag/components/retrievers/lightrag.py +169 -0
- src/services/rag/components/routing.py +286 -0
- src/services/rag/factory.py +234 -0
- src/services/rag/pipeline.py +215 -0
- src/services/rag/pipelines/__init__.py +32 -0
- src/services/rag/pipelines/academic.py +44 -0
- src/services/rag/pipelines/lightrag.py +43 -0
- src/services/rag/pipelines/llamaindex.py +313 -0
- src/services/rag/pipelines/raganything.py +384 -0
- src/services/rag/service.py +244 -0
- src/services/rag/types.py +73 -0
- src/services/search/__init__.py +284 -0
- src/services/search/base.py +87 -0
- src/services/search/consolidation.py +398 -0
- src/services/search/providers/__init__.py +128 -0
- src/services/search/providers/baidu.py +188 -0
- src/services/search/providers/exa.py +194 -0
- src/services/search/providers/jina.py +161 -0
- src/services/search/providers/perplexity.py +153 -0
- src/services/search/providers/serper.py +209 -0
- src/services/search/providers/tavily.py +161 -0
- src/services/search/types.py +114 -0
- src/services/setup/__init__.py +34 -0
- src/services/setup/init.py +285 -0
- src/services/tts/__init__.py +16 -0
- src/services/tts/config.py +99 -0
- src/tools/__init__.py +91 -0
- src/tools/code_executor.py +536 -0
- src/tools/paper_search_tool.py +171 -0
- src/tools/query_item_tool.py +310 -0
- src/tools/question/__init__.py +15 -0
- src/tools/question/exam_mimic.py +616 -0
- src/tools/question/pdf_parser.py +211 -0
- src/tools/question/question_extractor.py +397 -0
- src/tools/rag_tool.py +173 -0
- src/tools/tex_chunker.py +339 -0
- src/tools/tex_downloader.py +253 -0
- src/tools/web_search.py +71 -0
- src/utils/config_manager.py +206 -0
- src/utils/document_validator.py +168 -0
- src/utils/error_rate_tracker.py +111 -0
- src/utils/error_utils.py +82 -0
- src/utils/json_parser.py +110 -0
- src/utils/network/circuit_breaker.py +79 -0
|
@@ -0,0 +1,451 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
DR-in-KG 2.0 Core Data Structures
|
|
5
|
+
Includes: TopicBlock, ToolTrace, DynamicTopicQueue
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import asdict, dataclass, field
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from enum import Enum
|
|
11
|
+
import json
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TopicStatus(Enum):
|
|
17
|
+
"""Topic block status enumeration"""
|
|
18
|
+
|
|
19
|
+
PENDING = "pending" # Pending research
|
|
20
|
+
RESEARCHING = "researching" # Researching
|
|
21
|
+
COMPLETED = "completed" # Completed
|
|
22
|
+
FAILED = "failed" # Failed
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ToolType(Enum):
|
|
26
|
+
"""Tool type enumeration"""
|
|
27
|
+
|
|
28
|
+
RAG_NAIVE = "rag_naive"
|
|
29
|
+
RAG_HYBRID = "rag_hybrid"
|
|
30
|
+
QUERY_ITEM = "query_item"
|
|
31
|
+
PAPER_SEARCH = "paper_search"
|
|
32
|
+
RUN_CODE = "run_code"
|
|
33
|
+
WEB_SEARCH = "web_search"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# Default max size for raw_answer (50KB)
|
|
37
|
+
DEFAULT_RAW_ANSWER_MAX_SIZE = 50 * 1024
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class ToolTrace:
|
|
42
|
+
"""
|
|
43
|
+
Tool trace - Records complete loop of a single tool call
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
tool_id: str # Unique identifier (e.g., "tool_1", "tool_2")
|
|
47
|
+
citation_id: str # Citation ID (for report citations and anchors, e.g., CIT-1-01)
|
|
48
|
+
tool_type: str # Tool type (rag_naive, web_search, etc.)
|
|
49
|
+
query: str # Query statement issued
|
|
50
|
+
raw_answer: str # Raw detailed result returned by tool (may be truncated)
|
|
51
|
+
summary: str # Core summary generated by Note Agent
|
|
52
|
+
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
53
|
+
raw_answer_truncated: bool = field(default=False) # Whether raw_answer was truncated
|
|
54
|
+
raw_answer_original_size: int = field(default=0) # Original size before truncation
|
|
55
|
+
|
|
56
|
+
def __post_init__(self):
|
|
57
|
+
"""Post-initialization to handle raw_answer size limit"""
|
|
58
|
+
if self.raw_answer_original_size == 0:
|
|
59
|
+
self.raw_answer_original_size = len(self.raw_answer)
|
|
60
|
+
|
|
61
|
+
# Truncate if needed
|
|
62
|
+
if len(self.raw_answer) > DEFAULT_RAW_ANSWER_MAX_SIZE:
|
|
63
|
+
self.raw_answer = self._truncate_raw_answer(
|
|
64
|
+
self.raw_answer, DEFAULT_RAW_ANSWER_MAX_SIZE
|
|
65
|
+
)
|
|
66
|
+
self.raw_answer_truncated = True
|
|
67
|
+
|
|
68
|
+
@staticmethod
|
|
69
|
+
def _truncate_raw_answer(raw_answer: str, max_size: int) -> str:
|
|
70
|
+
"""
|
|
71
|
+
Truncate raw_answer while trying to preserve valid JSON structure
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
raw_answer: Original raw answer string
|
|
75
|
+
max_size: Maximum size in bytes
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Truncated string
|
|
79
|
+
"""
|
|
80
|
+
if len(raw_answer) <= max_size:
|
|
81
|
+
return raw_answer
|
|
82
|
+
|
|
83
|
+
# Try to parse as JSON and truncate intelligently
|
|
84
|
+
try:
|
|
85
|
+
data = json.loads(raw_answer)
|
|
86
|
+
|
|
87
|
+
# If it's a dict with common RAG response fields, truncate content fields
|
|
88
|
+
if isinstance(data, dict):
|
|
89
|
+
# Truncate long content fields
|
|
90
|
+
content_fields = ["answer", "content", "text", "chunks", "documents"]
|
|
91
|
+
for field_name in content_fields:
|
|
92
|
+
if field_name in data:
|
|
93
|
+
if (
|
|
94
|
+
isinstance(data[field_name], str)
|
|
95
|
+
and len(data[field_name]) > max_size // 2
|
|
96
|
+
):
|
|
97
|
+
data[field_name] = data[field_name][: max_size // 2] + "... [truncated]"
|
|
98
|
+
elif isinstance(data[field_name], list):
|
|
99
|
+
# Keep only first few items
|
|
100
|
+
data[field_name] = data[field_name][:3]
|
|
101
|
+
if data[field_name]:
|
|
102
|
+
data[field_name].append({"note": "... additional items truncated"})
|
|
103
|
+
|
|
104
|
+
truncated = json.dumps(data, ensure_ascii=False)
|
|
105
|
+
if len(truncated) <= max_size:
|
|
106
|
+
return truncated
|
|
107
|
+
except (json.JSONDecodeError, TypeError):
|
|
108
|
+
pass
|
|
109
|
+
|
|
110
|
+
# Fallback: simple truncation with marker
|
|
111
|
+
truncation_marker = "\n... [content truncated, original size: {} bytes]".format(
|
|
112
|
+
len(raw_answer)
|
|
113
|
+
)
|
|
114
|
+
return raw_answer[: max_size - len(truncation_marker)] + truncation_marker
|
|
115
|
+
|
|
116
|
+
def to_dict(self) -> dict[str, Any]:
|
|
117
|
+
"""Convert to dictionary"""
|
|
118
|
+
return asdict(self)
|
|
119
|
+
|
|
120
|
+
@classmethod
|
|
121
|
+
def from_dict(cls, data: dict[str, Any]) -> "ToolTrace":
|
|
122
|
+
"""Create from dictionary"""
|
|
123
|
+
# Handle backward compatibility - old data may not have new fields
|
|
124
|
+
if "raw_answer_truncated" not in data:
|
|
125
|
+
data["raw_answer_truncated"] = False
|
|
126
|
+
if "raw_answer_original_size" not in data:
|
|
127
|
+
data["raw_answer_original_size"] = len(data.get("raw_answer", ""))
|
|
128
|
+
return cls(**data)
|
|
129
|
+
|
|
130
|
+
@classmethod
|
|
131
|
+
def create_with_size_limit(
|
|
132
|
+
cls,
|
|
133
|
+
tool_id: str,
|
|
134
|
+
citation_id: str,
|
|
135
|
+
tool_type: str,
|
|
136
|
+
query: str,
|
|
137
|
+
raw_answer: str,
|
|
138
|
+
summary: str,
|
|
139
|
+
max_size: int = DEFAULT_RAW_ANSWER_MAX_SIZE,
|
|
140
|
+
) -> "ToolTrace":
|
|
141
|
+
"""
|
|
142
|
+
Create a ToolTrace with explicit size limit
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
tool_id: Tool ID
|
|
146
|
+
citation_id: Citation ID
|
|
147
|
+
tool_type: Tool type
|
|
148
|
+
query: Query string
|
|
149
|
+
raw_answer: Raw answer (will be truncated if needed)
|
|
150
|
+
summary: Summary
|
|
151
|
+
max_size: Maximum size for raw_answer
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
ToolTrace instance
|
|
155
|
+
"""
|
|
156
|
+
original_size = len(raw_answer)
|
|
157
|
+
truncated = len(raw_answer) > max_size
|
|
158
|
+
|
|
159
|
+
if truncated:
|
|
160
|
+
raw_answer = cls._truncate_raw_answer(raw_answer, max_size)
|
|
161
|
+
|
|
162
|
+
return cls(
|
|
163
|
+
tool_id=tool_id,
|
|
164
|
+
citation_id=citation_id,
|
|
165
|
+
tool_type=tool_type,
|
|
166
|
+
query=query,
|
|
167
|
+
raw_answer=raw_answer,
|
|
168
|
+
summary=summary,
|
|
169
|
+
raw_answer_truncated=truncated,
|
|
170
|
+
raw_answer_original_size=original_size,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
@dataclass
|
|
175
|
+
class TopicBlock:
|
|
176
|
+
"""
|
|
177
|
+
Topic block - Minimum scheduling unit in queue
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
block_id: str # Unique identifier (e.g., "block_1", "block_2")
|
|
181
|
+
sub_topic: str # Sub-topic name
|
|
182
|
+
overview: str # Topic overview/background
|
|
183
|
+
status: TopicStatus = TopicStatus.PENDING # Topic status
|
|
184
|
+
tool_traces: list[ToolTrace] = field(default_factory=list) # Tool call trace list
|
|
185
|
+
iteration_count: int = 0 # Current iteration count
|
|
186
|
+
created_at: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
187
|
+
updated_at: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
188
|
+
metadata: dict[str, Any] = field(default_factory=dict) # Additional metadata
|
|
189
|
+
|
|
190
|
+
def add_tool_trace(self, trace: ToolTrace) -> None:
|
|
191
|
+
"""Add tool trace"""
|
|
192
|
+
self.tool_traces.append(trace)
|
|
193
|
+
self.updated_at = datetime.now().isoformat()
|
|
194
|
+
|
|
195
|
+
def get_latest_trace(self) -> ToolTrace | None:
|
|
196
|
+
"""Get latest tool trace"""
|
|
197
|
+
return self.tool_traces[-1] if self.tool_traces else None
|
|
198
|
+
|
|
199
|
+
def get_all_summaries(self) -> str:
|
|
200
|
+
"""Get concatenated summaries of all tool traces"""
|
|
201
|
+
if not self.tool_traces:
|
|
202
|
+
return ""
|
|
203
|
+
return "\n".join([f"[{trace.tool_type}] {trace.summary}" for trace in self.tool_traces])
|
|
204
|
+
|
|
205
|
+
def to_dict(self) -> dict[str, Any]:
|
|
206
|
+
"""Convert to dictionary"""
|
|
207
|
+
data = asdict(self)
|
|
208
|
+
data["status"] = self.status.value
|
|
209
|
+
data["tool_traces"] = [trace.to_dict() for trace in self.tool_traces]
|
|
210
|
+
return data
|
|
211
|
+
|
|
212
|
+
@classmethod
|
|
213
|
+
def from_dict(cls, data: dict[str, Any]) -> "TopicBlock":
|
|
214
|
+
"""Create from dictionary"""
|
|
215
|
+
data_copy = data.copy()
|
|
216
|
+
if isinstance(data_copy.get("status"), str):
|
|
217
|
+
data_copy["status"] = TopicStatus(data_copy["status"])
|
|
218
|
+
if "tool_traces" in data_copy:
|
|
219
|
+
data_copy["tool_traces"] = [
|
|
220
|
+
ToolTrace.from_dict(t) if isinstance(t, dict) else t
|
|
221
|
+
for t in data_copy["tool_traces"]
|
|
222
|
+
]
|
|
223
|
+
return cls(**data_copy)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
class DynamicTopicQueue:
|
|
227
|
+
"""
|
|
228
|
+
Dynamic topic queue - Core memory and scheduling center of the system
|
|
229
|
+
"""
|
|
230
|
+
|
|
231
|
+
def __init__(
|
|
232
|
+
self, research_id: str, max_length: int | None = None, state_file: str | None = None
|
|
233
|
+
):
|
|
234
|
+
"""
|
|
235
|
+
Initialize queue
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
research_id: Research task ID
|
|
239
|
+
max_length: Maximum queue length (None means unlimited)
|
|
240
|
+
state_file: Auto-persistence file path
|
|
241
|
+
"""
|
|
242
|
+
self.research_id = research_id
|
|
243
|
+
self.blocks: list[TopicBlock] = []
|
|
244
|
+
self.block_counter = 0
|
|
245
|
+
self.created_at = datetime.now().isoformat()
|
|
246
|
+
self.max_length = max_length if isinstance(max_length, int) and max_length > 0 else None
|
|
247
|
+
self.state_file = state_file
|
|
248
|
+
|
|
249
|
+
def set_state_file(self, filepath: str | None) -> None:
|
|
250
|
+
"""Set queue auto-persistence file"""
|
|
251
|
+
self.state_file = filepath
|
|
252
|
+
self._auto_save()
|
|
253
|
+
|
|
254
|
+
@staticmethod
|
|
255
|
+
def _normalize_topic(text: str) -> str:
|
|
256
|
+
return (text or "").strip().lower()
|
|
257
|
+
|
|
258
|
+
def add_block(self, sub_topic: str, overview: str) -> TopicBlock:
|
|
259
|
+
"""
|
|
260
|
+
Add new topic block to the end of queue
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
sub_topic: Sub-topic name
|
|
264
|
+
overview: Topic overview
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
Created TopicBlock
|
|
268
|
+
"""
|
|
269
|
+
if self.max_length and len(self.blocks) >= self.max_length:
|
|
270
|
+
raise RuntimeError(
|
|
271
|
+
f"Queue has reached maximum capacity ({self.max_length}), cannot add new topic."
|
|
272
|
+
)
|
|
273
|
+
self.block_counter += 1
|
|
274
|
+
block_id = f"block_{self.block_counter}"
|
|
275
|
+
block = TopicBlock(block_id=block_id, sub_topic=sub_topic, overview=overview)
|
|
276
|
+
self.blocks.append(block)
|
|
277
|
+
self._auto_save()
|
|
278
|
+
return block
|
|
279
|
+
|
|
280
|
+
def has_topic(self, sub_topic: str) -> bool:
|
|
281
|
+
"""Check if topic already exists (case-insensitive, ignoring leading/trailing spaces)"""
|
|
282
|
+
target = self._normalize_topic(sub_topic)
|
|
283
|
+
if not target:
|
|
284
|
+
return False
|
|
285
|
+
return any(self._normalize_topic(b.sub_topic) == target for b in self.blocks)
|
|
286
|
+
|
|
287
|
+
def list_topics(self) -> list[str]:
|
|
288
|
+
"""List all current topic titles"""
|
|
289
|
+
return [b.sub_topic for b in self.blocks]
|
|
290
|
+
|
|
291
|
+
def get_pending_block(self) -> TopicBlock | None:
|
|
292
|
+
"""
|
|
293
|
+
Get first pending topic block
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
First TopicBlock with PENDING status, or None if not found
|
|
297
|
+
"""
|
|
298
|
+
for block in self.blocks:
|
|
299
|
+
if block.status == TopicStatus.PENDING:
|
|
300
|
+
return block
|
|
301
|
+
return None
|
|
302
|
+
|
|
303
|
+
def get_block_by_id(self, block_id: str) -> TopicBlock | None:
|
|
304
|
+
"""
|
|
305
|
+
Get topic block by ID
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
block_id: Topic block ID
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
Corresponding TopicBlock, or None if not found
|
|
312
|
+
"""
|
|
313
|
+
for block in self.blocks:
|
|
314
|
+
if block.block_id == block_id:
|
|
315
|
+
return block
|
|
316
|
+
return None
|
|
317
|
+
|
|
318
|
+
def mark_researching(self, block_id: str) -> bool:
|
|
319
|
+
"""
|
|
320
|
+
Mark topic block as researching
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
block_id: Topic block ID
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
Whether marking was successful
|
|
327
|
+
"""
|
|
328
|
+
block = self.get_block_by_id(block_id)
|
|
329
|
+
if block:
|
|
330
|
+
block.status = TopicStatus.RESEARCHING
|
|
331
|
+
block.updated_at = datetime.now().isoformat()
|
|
332
|
+
self._auto_save()
|
|
333
|
+
return True
|
|
334
|
+
return False
|
|
335
|
+
|
|
336
|
+
def mark_completed(self, block_id: str) -> bool:
|
|
337
|
+
"""
|
|
338
|
+
Mark topic block as completed
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
block_id: Topic block ID
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
Whether marking was successful
|
|
345
|
+
"""
|
|
346
|
+
block = self.get_block_by_id(block_id)
|
|
347
|
+
if block:
|
|
348
|
+
block.status = TopicStatus.COMPLETED
|
|
349
|
+
block.updated_at = datetime.now().isoformat()
|
|
350
|
+
self._auto_save()
|
|
351
|
+
return True
|
|
352
|
+
return False
|
|
353
|
+
|
|
354
|
+
def mark_failed(self, block_id: str) -> bool:
|
|
355
|
+
"""
|
|
356
|
+
Mark topic block as failed
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
block_id: Topic block ID
|
|
360
|
+
|
|
361
|
+
Returns:
|
|
362
|
+
Whether marking was successful
|
|
363
|
+
"""
|
|
364
|
+
block = self.get_block_by_id(block_id)
|
|
365
|
+
if block:
|
|
366
|
+
block.status = TopicStatus.FAILED
|
|
367
|
+
block.updated_at = datetime.now().isoformat()
|
|
368
|
+
self._auto_save()
|
|
369
|
+
return True
|
|
370
|
+
return False
|
|
371
|
+
|
|
372
|
+
def get_all_completed_blocks(self) -> list[TopicBlock]:
|
|
373
|
+
"""Get all completed topic blocks"""
|
|
374
|
+
return [b for b in self.blocks if b.status == TopicStatus.COMPLETED]
|
|
375
|
+
|
|
376
|
+
def get_all_pending_blocks(self) -> list[TopicBlock]:
|
|
377
|
+
"""Get all pending topic blocks"""
|
|
378
|
+
return [b for b in self.blocks if b.status == TopicStatus.PENDING]
|
|
379
|
+
|
|
380
|
+
def is_all_completed(self) -> bool:
|
|
381
|
+
"""Check if all topic blocks are completed"""
|
|
382
|
+
if not self.blocks:
|
|
383
|
+
return False
|
|
384
|
+
return all(b.status == TopicStatus.COMPLETED for b in self.blocks)
|
|
385
|
+
|
|
386
|
+
def get_statistics(self) -> dict[str, Any]:
|
|
387
|
+
"""Get queue statistics"""
|
|
388
|
+
return {
|
|
389
|
+
"total_blocks": len(self.blocks),
|
|
390
|
+
"pending": len(self.get_all_pending_blocks()),
|
|
391
|
+
"researching": len([b for b in self.blocks if b.status == TopicStatus.RESEARCHING]),
|
|
392
|
+
"completed": len(self.get_all_completed_blocks()),
|
|
393
|
+
"failed": len([b for b in self.blocks if b.status == TopicStatus.FAILED]),
|
|
394
|
+
"total_tool_calls": sum(len(b.tool_traces) for b in self.blocks),
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
def to_dict(self) -> dict[str, Any]:
|
|
398
|
+
"""Convert to dictionary"""
|
|
399
|
+
return {
|
|
400
|
+
"research_id": self.research_id,
|
|
401
|
+
"created_at": self.created_at,
|
|
402
|
+
"blocks": [b.to_dict() for b in self.blocks],
|
|
403
|
+
"statistics": self.get_statistics(),
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
@classmethod
|
|
407
|
+
def from_dict(cls, data: dict[str, Any]) -> "DynamicTopicQueue":
|
|
408
|
+
"""Create from dictionary"""
|
|
409
|
+
queue = cls(data["research_id"])
|
|
410
|
+
queue.created_at = data.get("created_at", queue.created_at)
|
|
411
|
+
for block_data in data.get("blocks", []):
|
|
412
|
+
block = TopicBlock.from_dict(block_data)
|
|
413
|
+
queue.blocks.append(block)
|
|
414
|
+
# Update counter
|
|
415
|
+
if block.block_id.startswith("block_"):
|
|
416
|
+
try:
|
|
417
|
+
block_num = int(block.block_id.split("_")[1])
|
|
418
|
+
queue.block_counter = max(queue.block_counter, block_num)
|
|
419
|
+
except (ValueError, IndexError):
|
|
420
|
+
pass
|
|
421
|
+
return queue
|
|
422
|
+
|
|
423
|
+
def save_to_json(self, filepath: str) -> None:
|
|
424
|
+
"""Save queue to JSON file"""
|
|
425
|
+
Path(filepath).parent.mkdir(parents=True, exist_ok=True)
|
|
426
|
+
with open(filepath, "w", encoding="utf-8") as f:
|
|
427
|
+
json.dump(self.to_dict(), f, ensure_ascii=False, indent=2)
|
|
428
|
+
|
|
429
|
+
def _auto_save(self) -> None:
|
|
430
|
+
"""Auto-save if state_file is set"""
|
|
431
|
+
if self.state_file:
|
|
432
|
+
try:
|
|
433
|
+
self.save_to_json(self.state_file)
|
|
434
|
+
except Exception as exc:
|
|
435
|
+
print(f"â ď¸ Failed to save queue progress: {exc}")
|
|
436
|
+
|
|
437
|
+
@classmethod
|
|
438
|
+
def load_from_json(cls, filepath: str) -> "DynamicTopicQueue":
|
|
439
|
+
"""Load queue from JSON file"""
|
|
440
|
+
with open(filepath, encoding="utf-8") as f:
|
|
441
|
+
data = json.load(f)
|
|
442
|
+
return cls.from_dict(data)
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
__all__ = [
|
|
446
|
+
"DynamicTopicQueue",
|
|
447
|
+
"ToolTrace",
|
|
448
|
+
"ToolType",
|
|
449
|
+
"TopicBlock",
|
|
450
|
+
"TopicStatus",
|
|
451
|
+
]
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
DR-in-KG 2.0 - Main Entry
|
|
5
|
+
Deep research system based on dynamic topic queue
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
python main.py --topic "Research Topic" [--preset quick/standard/deep]
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
import asyncio
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
import sys
|
|
15
|
+
|
|
16
|
+
from dotenv import load_dotenv
|
|
17
|
+
import yaml
|
|
18
|
+
|
|
19
|
+
from src.agents.research.research_pipeline import ResearchPipeline
|
|
20
|
+
from src.services.llm import get_llm_config
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def load_config(config_path: str = None, preset: str = None) -> dict:
|
|
24
|
+
"""
|
|
25
|
+
Load configuration file (with main.yaml merge)
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
config_path: Configuration file path (default: config/research_config.yaml)
|
|
29
|
+
preset: Preset mode (quick/standard/deep)
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
Configuration dictionary (merged with main.yaml)
|
|
33
|
+
"""
|
|
34
|
+
if config_path is None:
|
|
35
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
|
36
|
+
from src.services.config import load_config_with_main
|
|
37
|
+
|
|
38
|
+
config = load_config_with_main("research_config.yaml", project_root)
|
|
39
|
+
else:
|
|
40
|
+
# If custom config path provided, load it directly (for backward compatibility)
|
|
41
|
+
config_file = Path(config_path)
|
|
42
|
+
if not config_file.exists():
|
|
43
|
+
raise FileNotFoundError(f"Configuration file not found: {config_file}")
|
|
44
|
+
with open(config_file, encoding="utf-8") as f:
|
|
45
|
+
config = yaml.safe_load(f) or {}
|
|
46
|
+
|
|
47
|
+
# Apply preset
|
|
48
|
+
if preset and "presets" in config and preset in config["presets"]:
|
|
49
|
+
print(f"â Applied preset configuration: {preset}")
|
|
50
|
+
preset_config = config["presets"][preset]
|
|
51
|
+
for key, value in preset_config.items():
|
|
52
|
+
if key in config and isinstance(value, dict):
|
|
53
|
+
config[key].update(value)
|
|
54
|
+
|
|
55
|
+
return config
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def display_config(config: dict):
|
|
59
|
+
"""Display current configuration"""
|
|
60
|
+
print("\n" + "=" * 70)
|
|
61
|
+
print("đ Current Configuration")
|
|
62
|
+
print("=" * 70)
|
|
63
|
+
|
|
64
|
+
planning = config.get("planning", {})
|
|
65
|
+
researching = config.get("researching", {})
|
|
66
|
+
reporting = config.get("reporting", {})
|
|
67
|
+
|
|
68
|
+
print("ăPlanning Configurationă")
|
|
69
|
+
print(f" Initial subtopics: {planning.get('decompose', {}).get('initial_subtopics', 5)}")
|
|
70
|
+
print(f" Max subtopics: {planning.get('decompose', {}).get('max_subtopics', 10)}")
|
|
71
|
+
|
|
72
|
+
print("\năResearching Configurationă")
|
|
73
|
+
print(f" Max iterations: {researching.get('max_iterations', 5)}")
|
|
74
|
+
print(f" Research mode: {researching.get('research_mode', 'deep')}")
|
|
75
|
+
print(" Enabled tools:")
|
|
76
|
+
print(f" - RAG: {researching.get('enable_rag_hybrid', True)}")
|
|
77
|
+
print(f" - Web Search: {researching.get('enable_web_search', True)}")
|
|
78
|
+
print(f" - Paper Search: {researching.get('enable_paper_search', True)}")
|
|
79
|
+
|
|
80
|
+
print("\năReporting Configurationă")
|
|
81
|
+
print(f" Min section length: {reporting.get('min_section_length', 500)} characters")
|
|
82
|
+
print(f" Enable topic deduplication: {reporting.get('enable_deduplication', True)}")
|
|
83
|
+
|
|
84
|
+
print("=" * 70 + "\n")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
async def main():
|
|
88
|
+
"""Main function"""
|
|
89
|
+
# Parse command line arguments
|
|
90
|
+
parser = argparse.ArgumentParser(
|
|
91
|
+
description="DR-in-KG 2.0 - Deep research system based on dynamic topic queue",
|
|
92
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
93
|
+
epilog="""
|
|
94
|
+
Examples:
|
|
95
|
+
# Quick mode
|
|
96
|
+
python main.py --topic "Deep Learning Basics" --preset quick
|
|
97
|
+
|
|
98
|
+
# Standard mode
|
|
99
|
+
python main.py --topic "Transformer Architecture" --preset standard
|
|
100
|
+
|
|
101
|
+
# Deep mode
|
|
102
|
+
python main.py --topic "Graph Neural Networks" --preset deep
|
|
103
|
+
""",
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
parser.add_argument("--topic", type=str, required=True, help="Research topic")
|
|
107
|
+
|
|
108
|
+
parser.add_argument(
|
|
109
|
+
"--config",
|
|
110
|
+
type=str,
|
|
111
|
+
default="config.yaml",
|
|
112
|
+
help="Configuration file path (default: config.yaml)",
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
parser.add_argument(
|
|
116
|
+
"--preset",
|
|
117
|
+
type=str,
|
|
118
|
+
choices=["quick", "standard", "deep"],
|
|
119
|
+
help="Preset configuration (quick: fast, standard: standard, deep: deep)",
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
parser.add_argument("--output-dir", type=str, help="Output directory (overrides config file)")
|
|
123
|
+
|
|
124
|
+
args = parser.parse_args()
|
|
125
|
+
|
|
126
|
+
# Load environment variables
|
|
127
|
+
load_dotenv()
|
|
128
|
+
|
|
129
|
+
# Check API configuration
|
|
130
|
+
try:
|
|
131
|
+
llm_config = get_llm_config()
|
|
132
|
+
except ValueError as e:
|
|
133
|
+
print(f"â Error: {e}")
|
|
134
|
+
print("Please configure in .env or DeepTutor.env file:")
|
|
135
|
+
print(" LLM_MODEL=gpt-4o")
|
|
136
|
+
print(" LLM_API_KEY=your_api_key_here")
|
|
137
|
+
print(" LLM_HOST=https://api.openai.com/v1")
|
|
138
|
+
sys.exit(1)
|
|
139
|
+
|
|
140
|
+
# Load configuration
|
|
141
|
+
try:
|
|
142
|
+
config = load_config(args.config, args.preset)
|
|
143
|
+
except Exception as e:
|
|
144
|
+
print(f"â Failed to load configuration: {e!s}")
|
|
145
|
+
sys.exit(1)
|
|
146
|
+
|
|
147
|
+
# Override configuration (command line arguments take priority)
|
|
148
|
+
if args.output_dir:
|
|
149
|
+
config["system"]["output_base_dir"] = args.output_dir
|
|
150
|
+
config["system"]["reports_dir"] = args.output_dir
|
|
151
|
+
|
|
152
|
+
# Display configuration
|
|
153
|
+
display_config(config)
|
|
154
|
+
|
|
155
|
+
# Create research pipeline
|
|
156
|
+
pipeline = ResearchPipeline(
|
|
157
|
+
config=config, api_key=llm_config.api_key, base_url=llm_config.base_url
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Execute research
|
|
161
|
+
try:
|
|
162
|
+
result = await pipeline.run(topic=args.topic)
|
|
163
|
+
|
|
164
|
+
print("\n" + "=" * 70)
|
|
165
|
+
print("â Research completed!")
|
|
166
|
+
print("=" * 70)
|
|
167
|
+
print(f"Research ID: {result['research_id']}")
|
|
168
|
+
print(f"Topic: {result['topic']}")
|
|
169
|
+
print(f"Final Report: {result['final_report_path']}")
|
|
170
|
+
print("=" * 70 + "\n")
|
|
171
|
+
|
|
172
|
+
except KeyboardInterrupt:
|
|
173
|
+
print("\n\nâ ď¸ Research interrupted by user")
|
|
174
|
+
sys.exit(0)
|
|
175
|
+
except Exception as e:
|
|
176
|
+
print(f"\n\nâ Research failed: {e!s}")
|
|
177
|
+
import traceback
|
|
178
|
+
|
|
179
|
+
traceback.print_exc()
|
|
180
|
+
sys.exit(1)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
if __name__ == "__main__":
|
|
184
|
+
# Windows compatibility
|
|
185
|
+
if sys.platform == "win32":
|
|
186
|
+
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
|
187
|
+
|
|
188
|
+
asyncio.run(main())
|