local-deep-research 0.1.26__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +23 -22
- local_deep_research/__main__.py +16 -0
- local_deep_research/advanced_search_system/__init__.py +7 -0
- local_deep_research/advanced_search_system/filters/__init__.py +8 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
- local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
- local_deep_research/advanced_search_system/findings/repository.py +452 -0
- local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
- local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
- local_deep_research/advanced_search_system/questions/__init__.py +1 -0
- local_deep_research/advanced_search_system/questions/base_question.py +64 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
- local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
- local_deep_research/advanced_search_system/tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
- local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
- local_deep_research/api/__init__.py +5 -5
- local_deep_research/api/research_functions.py +96 -84
- local_deep_research/app.py +8 -0
- local_deep_research/citation_handler.py +25 -16
- local_deep_research/{config.py → config/config_files.py} +102 -110
- local_deep_research/config/llm_config.py +472 -0
- local_deep_research/config/search_config.py +77 -0
- local_deep_research/defaults/__init__.py +10 -5
- local_deep_research/defaults/main.toml +2 -2
- local_deep_research/defaults/search_engines.toml +60 -34
- local_deep_research/main.py +121 -19
- local_deep_research/migrate_db.py +147 -0
- local_deep_research/report_generator.py +72 -44
- local_deep_research/search_system.py +147 -283
- local_deep_research/setup_data_dir.py +35 -0
- local_deep_research/test_migration.py +178 -0
- local_deep_research/utilities/__init__.py +0 -0
- local_deep_research/utilities/db_utils.py +49 -0
- local_deep_research/{utilties → utilities}/enums.py +2 -2
- local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
- local_deep_research/utilities/search_utilities.py +242 -0
- local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
- local_deep_research/web/__init__.py +0 -1
- local_deep_research/web/app.py +86 -1709
- local_deep_research/web/app_factory.py +289 -0
- local_deep_research/web/database/README.md +70 -0
- local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
- local_deep_research/web/database/migrations.py +447 -0
- local_deep_research/web/database/models.py +117 -0
- local_deep_research/web/database/schema_upgrade.py +107 -0
- local_deep_research/web/models/database.py +294 -0
- local_deep_research/web/models/settings.py +94 -0
- local_deep_research/web/routes/api_routes.py +559 -0
- local_deep_research/web/routes/history_routes.py +354 -0
- local_deep_research/web/routes/research_routes.py +715 -0
- local_deep_research/web/routes/settings_routes.py +1592 -0
- local_deep_research/web/services/research_service.py +947 -0
- local_deep_research/web/services/resource_service.py +149 -0
- local_deep_research/web/services/settings_manager.py +669 -0
- local_deep_research/web/services/settings_service.py +187 -0
- local_deep_research/web/services/socket_service.py +210 -0
- local_deep_research/web/static/css/custom_dropdown.css +277 -0
- local_deep_research/web/static/css/settings.css +1223 -0
- local_deep_research/web/static/css/styles.css +525 -48
- local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
- local_deep_research/web/static/js/components/detail.js +348 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
- local_deep_research/web/static/js/components/fallback/ui.js +215 -0
- local_deep_research/web/static/js/components/history.js +487 -0
- local_deep_research/web/static/js/components/logpanel.js +949 -0
- local_deep_research/web/static/js/components/progress.js +1107 -0
- local_deep_research/web/static/js/components/research.js +1865 -0
- local_deep_research/web/static/js/components/results.js +766 -0
- local_deep_research/web/static/js/components/settings.js +3981 -0
- local_deep_research/web/static/js/components/settings_sync.js +106 -0
- local_deep_research/web/static/js/main.js +226 -0
- local_deep_research/web/static/js/services/api.js +253 -0
- local_deep_research/web/static/js/services/audio.js +31 -0
- local_deep_research/web/static/js/services/formatting.js +119 -0
- local_deep_research/web/static/js/services/pdf.js +622 -0
- local_deep_research/web/static/js/services/socket.js +882 -0
- local_deep_research/web/static/js/services/ui.js +546 -0
- local_deep_research/web/templates/base.html +72 -0
- local_deep_research/web/templates/components/custom_dropdown.html +47 -0
- local_deep_research/web/templates/components/log_panel.html +32 -0
- local_deep_research/web/templates/components/mobile_nav.html +22 -0
- local_deep_research/web/templates/components/settings_form.html +299 -0
- local_deep_research/web/templates/components/sidebar.html +21 -0
- local_deep_research/web/templates/pages/details.html +73 -0
- local_deep_research/web/templates/pages/history.html +51 -0
- local_deep_research/web/templates/pages/progress.html +57 -0
- local_deep_research/web/templates/pages/research.html +139 -0
- local_deep_research/web/templates/pages/results.html +59 -0
- local_deep_research/web/templates/settings_dashboard.html +78 -192
- local_deep_research/web/utils/__init__.py +0 -0
- local_deep_research/web/utils/formatters.py +76 -0
- local_deep_research/web_search_engines/engines/full_search.py +18 -16
- local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
- local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
- local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
- local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +211 -159
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
- local_deep_research/web_search_engines/search_engine_base.py +174 -99
- local_deep_research/web_search_engines/search_engine_factory.py +192 -102
- local_deep_research/web_search_engines/search_engines_config.py +22 -15
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/METADATA +177 -97
- local_deep_research-0.2.0.dist-info/RECORD +135 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/WHEEL +1 -2
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/entry_points.txt +3 -0
- local_deep_research/defaults/llm_config.py +0 -338
- local_deep_research/utilties/search_utilities.py +0 -114
- local_deep_research/web/static/js/app.js +0 -3763
- local_deep_research/web/templates/api_keys_config.html +0 -82
- local_deep_research/web/templates/collections_config.html +0 -90
- local_deep_research/web/templates/index.html +0 -348
- local_deep_research/web/templates/llm_config.html +0 -120
- local_deep_research/web/templates/main_config.html +0 -89
- local_deep_research/web/templates/search_engines_config.html +0 -154
- local_deep_research/web/templates/settings.html +0 -519
- local_deep_research-0.1.26.dist-info/RECORD +0 -61
- local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
- /local_deep_research/{utilties → config}/__init__.py +0 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,16 +1,28 @@
|
|
1
|
-
|
2
|
-
from
|
3
|
-
|
4
|
-
|
1
|
+
import importlib
|
2
|
+
from typing import Dict, List
|
3
|
+
|
4
|
+
# Fix circular import by importing directly from source modules
|
5
|
+
from .config.llm_config import get_llm
|
5
6
|
from .search_system import AdvancedSearchSystem
|
6
|
-
|
7
|
-
from . import
|
8
|
-
from .
|
7
|
+
|
8
|
+
# from . import utilities
|
9
|
+
from .utilities import search_utilities
|
10
|
+
|
11
|
+
|
12
|
+
def get_report_generator(search_system=None):
|
13
|
+
"""Return an instance of the report generator with default settings.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
search_system: Optional existing AdvancedSearchSystem to use
|
17
|
+
"""
|
18
|
+
return IntegratedReportGenerator(search_system=search_system)
|
19
|
+
|
9
20
|
|
10
21
|
class IntegratedReportGenerator:
|
11
|
-
def __init__(self, searches_per_section: int = 2):
|
22
|
+
def __init__(self, searches_per_section: int = 2, search_system=None):
|
12
23
|
self.model = get_llm()
|
13
|
-
|
24
|
+
# Use provided search_system or create a new one
|
25
|
+
self.search_system = search_system or AdvancedSearchSystem()
|
14
26
|
self.searches_per_section = (
|
15
27
|
searches_per_section # Control search depth per section
|
16
28
|
)
|
@@ -22,17 +34,16 @@ class IntegratedReportGenerator:
|
|
22
34
|
structure = self._determine_report_structure(initial_findings, query)
|
23
35
|
|
24
36
|
# Step 2: Research and generate content for each section in one step
|
25
|
-
sections = self._research_and_generate_sections(
|
37
|
+
sections = self._research_and_generate_sections(
|
38
|
+
initial_findings, structure, query
|
39
|
+
)
|
26
40
|
|
27
41
|
# Step 3: Format final report
|
28
42
|
report = self._format_final_report(sections, structure, query)
|
29
43
|
|
30
44
|
return report
|
31
45
|
|
32
|
-
def _determine_report_structure(
|
33
|
-
self, findings: Dict, query: str
|
34
|
-
) -> List[Dict]:
|
35
|
-
|
46
|
+
def _determine_report_structure(self, findings: Dict, query: str) -> List[Dict]:
|
36
47
|
"""Analyze content and determine optimal report structure."""
|
37
48
|
combined_content = findings["current_knowledge"]
|
38
49
|
prompt = f"""
|
@@ -92,44 +103,51 @@ class IntegratedReportGenerator:
|
|
92
103
|
) -> Dict[str, str]:
|
93
104
|
"""Research and generate content for each section in one step."""
|
94
105
|
sections = {}
|
95
|
-
|
106
|
+
|
96
107
|
for section in structure:
|
97
108
|
print(f"Processing section: {section['name']}")
|
98
109
|
section_content = []
|
99
110
|
section_content.append(f"# {section['name']}\n")
|
100
|
-
|
111
|
+
|
101
112
|
# Process each subsection by directly researching it
|
102
113
|
for subsection in section["subsections"]:
|
103
114
|
# Add subsection header
|
104
115
|
section_content.append(f"## {subsection['name']}\n")
|
105
116
|
section_content.append(f"_{subsection['purpose']}_\n\n")
|
106
|
-
|
117
|
+
|
107
118
|
# Generate a specific search query for this subsection
|
108
119
|
subsection_query = f"{query} {section['name']} {subsection['name']} {subsection['purpose']}"
|
109
|
-
|
110
|
-
print(
|
111
|
-
|
120
|
+
|
121
|
+
print(
|
122
|
+
f"Researching subsection: {subsection['name']} with query: {subsection_query}"
|
123
|
+
)
|
124
|
+
|
112
125
|
# Configure search system for focused search
|
113
126
|
original_max_iterations = self.search_system.max_iterations
|
114
127
|
self.search_system.max_iterations = 1 # Keep search focused
|
115
|
-
|
128
|
+
|
116
129
|
# Perform search for this subsection
|
117
130
|
subsection_results = self.search_system.analyze_topic(subsection_query)
|
118
|
-
|
131
|
+
|
119
132
|
# Restore original iterations setting
|
120
133
|
self.search_system.max_iterations = original_max_iterations
|
121
|
-
|
134
|
+
|
122
135
|
# Add the researched content for this subsection
|
123
|
-
if
|
136
|
+
if (
|
137
|
+
"current_knowledge" in subsection_results
|
138
|
+
and subsection_results["current_knowledge"]
|
139
|
+
):
|
124
140
|
section_content.append(subsection_results["current_knowledge"])
|
125
141
|
else:
|
126
|
-
section_content.append(
|
127
|
-
|
142
|
+
section_content.append(
|
143
|
+
"*Limited information was found for this subsection.*\n"
|
144
|
+
)
|
145
|
+
|
128
146
|
section_content.append("\n\n")
|
129
|
-
|
147
|
+
|
130
148
|
# Combine all content for this section
|
131
149
|
sections[section["name"]] = "\n".join(section_content)
|
132
|
-
|
150
|
+
|
133
151
|
return sections
|
134
152
|
|
135
153
|
def _generate_sections(
|
@@ -157,15 +175,21 @@ class IntegratedReportGenerator:
|
|
157
175
|
for i, section in enumerate(structure, 1):
|
158
176
|
toc.append(f"{i}. **{section['name']}**")
|
159
177
|
for j, subsection in enumerate(section["subsections"], 1):
|
160
|
-
toc.append(
|
178
|
+
toc.append(
|
179
|
+
f" {i}.{j} {subsection['name']} | _{subsection['purpose']}_"
|
180
|
+
)
|
161
181
|
|
162
182
|
# Combine TOC and sections
|
163
183
|
report_parts = ["\n".join(toc), ""]
|
164
|
-
|
184
|
+
|
165
185
|
# Add a summary of the research
|
166
186
|
report_parts.append("# Research Summary")
|
167
|
-
report_parts.append(
|
168
|
-
|
187
|
+
report_parts.append(
|
188
|
+
"This report was researched using an advanced search system."
|
189
|
+
)
|
190
|
+
report_parts.append(
|
191
|
+
"Research included targeted searches for each section and subsection."
|
192
|
+
)
|
169
193
|
report_parts.append("\n---\n")
|
170
194
|
|
171
195
|
# Add each section's content
|
@@ -173,30 +197,34 @@ class IntegratedReportGenerator:
|
|
173
197
|
if section["name"] in sections:
|
174
198
|
report_parts.append(sections[section["name"]])
|
175
199
|
report_parts.append("")
|
176
|
-
|
200
|
+
|
177
201
|
# Format links from search system
|
178
|
-
|
179
|
-
|
202
|
+
# Get utilities module dynamically to avoid circular imports
|
203
|
+
utilities = importlib.import_module("local_deep_research.utilities")
|
204
|
+
formatted_all_links = utilities.search_utilities.format_links_to_markdown(
|
205
|
+
all_links=self.search_system.all_links_of_system
|
206
|
+
)
|
207
|
+
|
180
208
|
# Create final report with all parts
|
181
209
|
final_report_content = "\n\n".join(report_parts)
|
182
|
-
final_report_content =
|
183
|
-
|
210
|
+
final_report_content = (
|
211
|
+
final_report_content + "\n\n## Sources\n\n" + formatted_all_links
|
212
|
+
)
|
213
|
+
|
184
214
|
# Create metadata dictionary
|
185
215
|
from datetime import datetime
|
216
|
+
|
186
217
|
metadata = {
|
187
218
|
"generated_at": datetime.utcnow().isoformat(),
|
188
219
|
"initial_sources": len(self.search_system.all_links_of_system),
|
189
220
|
"sections_researched": len(structure),
|
190
221
|
"searches_per_section": self.searches_per_section,
|
191
|
-
"query": query
|
222
|
+
"query": query,
|
192
223
|
}
|
193
|
-
|
224
|
+
|
194
225
|
# Return both content and metadata
|
195
|
-
return {
|
196
|
-
"content": final_report_content,
|
197
|
-
"metadata": metadata
|
198
|
-
}
|
226
|
+
return {"content": final_report_content, "metadata": metadata}
|
199
227
|
|
200
228
|
def _generate_error_report(self, query: str, error_msg: str) -> str:
|
201
229
|
error_report = f"=== ERROR REPORT ===\nQuery: {query}\nError: {error_msg}"
|
202
|
-
return error_report
|
230
|
+
return error_report
|
@@ -1,306 +1,170 @@
|
|
1
|
-
|
2
|
-
from datetime import datetime
|
3
|
-
from .utilties.search_utilities import remove_think_tags, format_findings_to_text, format_links
|
4
|
-
import os
|
5
|
-
from .utilties.enums import KnowledgeAccumulationApproach
|
6
|
-
from .config import settings, get_llm, get_search
|
7
|
-
from .citation_handler import CitationHandler
|
8
|
-
from datetime import datetime
|
9
|
-
from .utilties.search_utilities import extract_links_from_search_results
|
1
|
+
# src/local_deep_research/search_system/search_system.py
|
10
2
|
import logging
|
11
|
-
|
12
|
-
|
13
|
-
|
3
|
+
from typing import Callable, Dict
|
4
|
+
|
5
|
+
from langchain_core.language_models import BaseChatModel
|
6
|
+
|
7
|
+
from .advanced_search_system.findings.repository import FindingsRepository
|
8
|
+
from .advanced_search_system.questions.standard_question import (
|
9
|
+
StandardQuestionGenerator,
|
10
|
+
)
|
11
|
+
from .advanced_search_system.strategies.iterdrag_strategy import IterDRAGStrategy
|
12
|
+
from .advanced_search_system.strategies.parallel_search_strategy import (
|
13
|
+
ParallelSearchStrategy,
|
14
|
+
)
|
15
|
+
from .advanced_search_system.strategies.rapid_search_strategy import RapidSearchStrategy
|
16
|
+
from .advanced_search_system.strategies.standard_strategy import StandardSearchStrategy
|
17
|
+
from .citation_handler import CitationHandler
|
18
|
+
from .config.config_files import settings
|
19
|
+
from .config.llm_config import get_llm
|
20
|
+
from .config.search_config import get_search
|
21
|
+
from .utilities.db_utils import get_db_setting
|
14
22
|
|
15
|
-
|
16
|
-
# Get fresh configuration
|
23
|
+
logger = logging.getLogger(__name__)
|
17
24
|
|
18
|
-
self.search = get_search()
|
19
|
-
self.model = get_llm()
|
20
|
-
self.max_iterations = settings.search.iterations
|
21
|
-
self.questions_per_iteration = settings.search.questions_per_iteration
|
22
|
-
|
23
|
-
self.context_limit = settings.general.knowledge_accumulation_context_limit
|
24
|
-
self.questions_by_iteration = {}
|
25
|
-
self.citation_handler = CitationHandler(self.model)
|
26
|
-
self.progress_callback = None
|
27
|
-
self.all_links_of_system = list()
|
28
|
-
|
29
|
-
# Check if search is available, log warning if not
|
30
|
-
if self.search is None:
|
31
|
-
logger.info("WARNING: Search system initialized with no search engine! Research will not be effective.")
|
32
|
-
self._update_progress("WARNING: No search engine available", None, {"error": "No search engine configured properly"})
|
33
25
|
|
34
|
-
|
26
|
+
class AdvancedSearchSystem:
|
27
|
+
"""
|
28
|
+
Advanced search system that coordinates different search strategies.
|
29
|
+
"""
|
30
|
+
|
31
|
+
def __init__(
|
32
|
+
self,
|
33
|
+
strategy_name: str = "parallel",
|
34
|
+
include_text_content: bool = True,
|
35
|
+
use_cross_engine_filter: bool = True,
|
36
|
+
llm: BaseChatModel | None = None,
|
37
|
+
):
|
38
|
+
"""Initialize the advanced search system.
|
35
39
|
|
36
|
-
def set_progress_callback(self, callback: Callable[[str, int, dict], None]) -> None:
|
37
|
-
"""Set a callback function to receive progress updates.
|
38
|
-
|
39
40
|
Args:
|
40
|
-
|
41
|
+
strategy_name: The name of the search strategy to use ("standard" or "iterdrag")
|
42
|
+
include_text_content: If False, only includes metadata and links in search results
|
43
|
+
use_cross_engine_filter: Whether to filter results across search
|
44
|
+
engines.
|
45
|
+
llm: LLM to use. If not provided, it will use the default one.
|
41
46
|
"""
|
42
|
-
|
47
|
+
# Get configuration
|
48
|
+
self.search = get_search()
|
49
|
+
self.model = llm
|
50
|
+
if llm is None:
|
51
|
+
self.model = get_llm()
|
52
|
+
self.max_iterations = get_db_setting(
|
53
|
+
"search.iterations", settings.search.iterations
|
54
|
+
)
|
55
|
+
self.questions_per_iteration = get_db_setting(
|
56
|
+
"search.questions_per_iteration", settings.search.questions_per_iteration
|
57
|
+
)
|
43
58
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
message: Description of the current progress state
|
49
|
-
progress_percent: Progress percentage (0-100), if applicable
|
50
|
-
metadata: Additional data about the progress state
|
51
|
-
"""
|
52
|
-
if self.progress_callback:
|
53
|
-
self.progress_callback(message, progress_percent, metadata or {})
|
59
|
+
# Log the strategy name that's being used
|
60
|
+
logger.info(
|
61
|
+
f"Initializing AdvancedSearchSystem with strategy_name='{strategy_name}'"
|
62
|
+
)
|
54
63
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
if
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
64
|
+
# Initialize components
|
65
|
+
self.citation_handler = CitationHandler(self.model)
|
66
|
+
self.question_generator = StandardQuestionGenerator(self.model)
|
67
|
+
self.findings_repository = FindingsRepository(self.model)
|
68
|
+
|
69
|
+
# Initialize strategy based on name
|
70
|
+
if strategy_name.lower() == "iterdrag":
|
71
|
+
logger.info("Creating IterDRAGStrategy instance")
|
72
|
+
self.strategy = IterDRAGStrategy(model=self.model, search=self.search)
|
73
|
+
elif strategy_name.lower() == "parallel":
|
74
|
+
logger.info("Creating ParallelSearchStrategy instance")
|
75
|
+
self.strategy = ParallelSearchStrategy(
|
76
|
+
model=self.model,
|
77
|
+
search=self.search,
|
78
|
+
include_text_content=include_text_content,
|
79
|
+
use_cross_engine_filter=use_cross_engine_filter,
|
80
|
+
)
|
81
|
+
elif strategy_name.lower() == "rapid":
|
82
|
+
logger.info("Creating RapidSearchStrategy instance")
|
83
|
+
self.strategy = RapidSearchStrategy(model=self.model, search=self.search)
|
69
84
|
else:
|
70
|
-
|
85
|
+
logger.info("Creating StandardSearchStrategy instance")
|
86
|
+
self.strategy = StandardSearchStrategy(model=self.model, search=self.search)
|
71
87
|
|
72
|
-
|
73
|
-
|
74
|
-
q.replace("Q:", "").strip()
|
75
|
-
for q in remove_think_tags(response.content).split("\n")
|
76
|
-
if q.strip().startswith("Q:")
|
77
|
-
][: self.questions_per_iteration]
|
78
|
-
|
79
|
-
self._update_progress(
|
80
|
-
f"Generated {len(questions)} follow-up questions",
|
81
|
-
None,
|
82
|
-
{"questions": questions}
|
83
|
-
)
|
84
|
-
|
85
|
-
return questions
|
88
|
+
# Log the actual strategy class
|
89
|
+
logger.info(f"Created strategy of type: {type(self.strategy).__name__}")
|
86
90
|
|
87
|
-
|
88
|
-
self.
|
91
|
+
# For backward compatibility
|
92
|
+
self.questions_by_iteration = {}
|
93
|
+
self.progress_callback = None
|
94
|
+
self.all_links_of_system = list()
|
89
95
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
if self.questions_by_iteration:
|
94
|
-
prompt = f"""First provide a high-quality 1 page explanation with IEEE Referencing Style e.g. [1,2]. Never make up sources. Than provide a exact high-quality one sentence-long answer to the query.
|
96
|
+
# Configure the strategy with our attributes
|
97
|
+
if hasattr(self, "progress_callback") and self.progress_callback:
|
98
|
+
self.strategy.set_progress_callback(self.progress_callback)
|
95
99
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
self._update_progress("Knowledge compression complete", None)
|
102
|
-
response = remove_think_tags(response.content)
|
103
|
-
response = str(response) #+ "\n\n" + str(formatted_links)
|
100
|
+
def _progress_callback(self, message: str, progress: int, metadata: dict) -> None:
|
101
|
+
"""Handle progress updates from the strategy."""
|
102
|
+
logger.info(f"Progress: {progress}% - {message}")
|
103
|
+
if hasattr(self, "progress_callback"):
|
104
|
+
self.progress_callback(message, progress, metadata)
|
104
105
|
|
105
|
-
|
106
|
+
def set_progress_callback(self, callback: Callable[[str, int, dict], None]) -> None:
|
107
|
+
"""Set a callback function to receive progress updates."""
|
108
|
+
self.progress_callback = callback
|
109
|
+
if hasattr(self, "strategy"):
|
110
|
+
self.strategy.set_progress_callback(callback)
|
106
111
|
|
107
112
|
def analyze_topic(self, query: str) -> Dict:
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
findings = []
|
113
|
-
current_knowledge = ""
|
114
|
-
iteration = 0
|
115
|
-
total_iterations = self.max_iterations
|
116
|
-
section_links = list()
|
117
|
-
|
118
|
-
self._update_progress("Initializing research system", 5, {
|
119
|
-
"phase": "init",
|
120
|
-
"iterations_planned": total_iterations
|
121
|
-
})
|
122
|
-
|
123
|
-
# Check if search engine is available
|
124
|
-
if self.search is None:
|
125
|
-
error_msg = "Error: No search engine available. Please check your configuration."
|
126
|
-
self._update_progress(error_msg, 100, {
|
127
|
-
"phase": "error",
|
128
|
-
"error": "No search engine available",
|
129
|
-
"status": "failed"
|
130
|
-
})
|
131
|
-
return {
|
132
|
-
"findings": [],
|
133
|
-
"iterations": 0,
|
134
|
-
"questions": {},
|
135
|
-
"formatted_findings": "Error: Unable to conduct research without a search engine.",
|
136
|
-
"current_knowledge": "",
|
137
|
-
"error": error_msg
|
138
|
-
}
|
139
|
-
|
140
|
-
while iteration < self.max_iterations:
|
141
|
-
iteration_progress_base = (iteration / total_iterations) * 100
|
142
|
-
self._update_progress(f"Starting iteration {iteration + 1} of {total_iterations}",
|
143
|
-
int(iteration_progress_base),
|
144
|
-
{"phase": "iteration_start", "iteration": iteration + 1})
|
145
|
-
|
146
|
-
# Generate questions for this iteration
|
147
|
-
questions = self._get_follow_up_questions(current_knowledge, query)
|
148
|
-
self.questions_by_iteration[iteration] = questions
|
149
|
-
logger.info(f"Generated questions: {questions}")
|
150
|
-
question_count = len(questions)
|
151
|
-
for q_idx, question in enumerate(questions):
|
152
|
-
question_progress_base = iteration_progress_base + (((q_idx+1) / question_count) * (100/total_iterations) * 0.5)
|
153
|
-
|
154
|
-
self._update_progress(f"Searching for: {question}",
|
155
|
-
int(question_progress_base),
|
156
|
-
{"phase": "search", "iteration": iteration + 1, "question_index": q_idx + 1})
|
157
|
-
|
158
|
-
try:
|
159
|
-
if self.search is None:
|
160
|
-
self._update_progress(f"Search engine unavailable, skipping search for: {question}",
|
161
|
-
int(question_progress_base + 2),
|
162
|
-
{"phase": "search_error", "error": "No search engine available"})
|
163
|
-
search_results = []
|
164
|
-
else:
|
165
|
-
search_results = self.search.run(question)
|
166
|
-
except Exception as e:
|
167
|
-
error_msg = f"Error during search: {str(e)}"
|
168
|
-
logger.info(f"SEARCH ERROR: {error_msg}")
|
169
|
-
self._update_progress(error_msg,
|
170
|
-
int(question_progress_base + 2),
|
171
|
-
{"phase": "search_error", "error": str(e)})
|
172
|
-
search_results = []
|
173
|
-
|
174
|
-
if search_results is None:
|
175
|
-
self._update_progress(f"No search results found for question: {question}",
|
176
|
-
int(question_progress_base + 2),
|
177
|
-
{"phase": "search_complete", "result_count": 0})
|
178
|
-
search_results = [] # Initialize to empty list instead of None
|
179
|
-
continue
|
180
|
-
|
181
|
-
self._update_progress(f"Found {len(search_results)} results for question: {question}",
|
182
|
-
int(question_progress_base + 2),
|
183
|
-
{"phase": "search_complete", "result_count": len(search_results)})
|
184
|
-
|
185
|
-
logger.info(f"len search: {len(search_results)}")
|
186
|
-
|
187
|
-
if len(search_results) == 0:
|
188
|
-
continue
|
113
|
+
"""Analyze a topic using the current strategy.
|
189
114
|
|
190
|
-
|
191
|
-
|
192
|
-
|
115
|
+
Args:
|
116
|
+
query: The research query to analyze
|
117
|
+
"""
|
193
118
|
|
119
|
+
# Send progress message with LLM info
|
120
|
+
self.progress_callback(
|
121
|
+
f"Using {get_db_setting('llm.provider')} model: {get_db_setting('llm.model')}",
|
122
|
+
1, # Low percentage to show this as an early step
|
123
|
+
{
|
124
|
+
"phase": "setup",
|
125
|
+
"llm_info": {
|
126
|
+
"name": get_db_setting("llm.model"),
|
127
|
+
"provider": get_db_setting("llm.provider"),
|
128
|
+
},
|
129
|
+
},
|
130
|
+
)
|
131
|
+
# Send progress message with search strategy info
|
132
|
+
search_tool = get_db_setting("search.tool")
|
133
|
+
|
134
|
+
self.progress_callback(
|
135
|
+
f"Using search tool: {search_tool}",
|
136
|
+
1.5, # Between setup and processing steps
|
137
|
+
{
|
138
|
+
"phase": "setup",
|
139
|
+
"search_info": {
|
140
|
+
"tool": search_tool,
|
141
|
+
},
|
142
|
+
},
|
143
|
+
)
|
194
144
|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
{
|
211
|
-
"phase": f"Follow-up {iteration}.{questions.index(question) + 1}",
|
212
|
-
"content": results_with_links,
|
213
|
-
"question": question,
|
214
|
-
"search_results": search_results,
|
215
|
-
"documents": result["documents"],
|
216
|
-
}
|
145
|
+
# Use the strategy to analyze the topic
|
146
|
+
result = self.strategy.analyze_topic(query)
|
147
|
+
|
148
|
+
# Update our attributes for backward compatibility
|
149
|
+
if hasattr(self.strategy, "questions_by_iteration"):
|
150
|
+
self.questions_by_iteration = self.strategy.questions_by_iteration
|
151
|
+
# Send progress message with search info
|
152
|
+
self.progress_callback(
|
153
|
+
f"Processed questions: {self.strategy.questions_by_iteration}",
|
154
|
+
2, # Low percentage to show this as an early step
|
155
|
+
{
|
156
|
+
"phase": "setup",
|
157
|
+
"search_info": {
|
158
|
+
"questions_by_iteration": len(
|
159
|
+
self.strategy.questions_by_iteration
|
217
160
|
)
|
161
|
+
},
|
162
|
+
},
|
163
|
+
)
|
164
|
+
if hasattr(self.strategy, "all_links_of_system"):
|
165
|
+
self.all_links_of_system = self.strategy.all_links_of_system
|
218
166
|
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
if settings.general.knowledge_accumulation == str(KnowledgeAccumulationApproach.QUESTION.value):
|
223
|
-
logger.info("Compressing knowledge")
|
224
|
-
self._update_progress(f"Compress Knowledge for: {question}",
|
225
|
-
int(question_progress_base + 0),
|
226
|
-
{"phase": "analysis"})
|
227
|
-
current_knowledge = self._compress_knowledge(current_knowledge , query, section_links)
|
228
|
-
|
229
|
-
self._update_progress(f"Analysis complete for question: {question}",
|
230
|
-
int(question_progress_base + 10),
|
231
|
-
{"phase": "analysis_complete"})
|
232
|
-
except Exception as e:
|
233
|
-
error_msg = f"Error analyzing results: {str(e)}"
|
234
|
-
logger.info(f"ANALYSIS ERROR: {error_msg}")
|
235
|
-
self._update_progress(error_msg,
|
236
|
-
int(question_progress_base + 10),
|
237
|
-
{"phase": "analysis_error", "error": str(e)})
|
238
|
-
iteration += 1
|
239
|
-
|
240
|
-
self._update_progress(f"Compressing knowledge after iteration {iteration}",
|
241
|
-
int((iteration / total_iterations) * 100 - 5),
|
242
|
-
{"phase": "knowledge_compression"})
|
243
|
-
logger.info(str(iteration))
|
244
|
-
logger.info(settings.general.knowledge_accumulation)
|
245
|
-
logger.info(str(KnowledgeAccumulationApproach.ITERATION.value))
|
246
|
-
if settings.general.knowledge_accumulation == KnowledgeAccumulationApproach.ITERATION.value:
|
247
|
-
try:
|
248
|
-
logger.info("ITERATION - Compressing Knowledge")
|
249
|
-
current_knowledge = self._compress_knowledge(current_knowledge , query, section_links)
|
250
|
-
logger.info("FINISHED ITERATION - Compressing Knowledge")
|
251
|
-
except Exception as e:
|
252
|
-
error_msg = f"Error compressing knowledge: {str(e)}"
|
253
|
-
logger.info(f"COMPRESSION ERROR: {error_msg}")
|
254
|
-
self._update_progress(error_msg,
|
255
|
-
int((iteration / total_iterations) * 100 - 3),
|
256
|
-
{"phase": "compression_error", "error": str(e)})
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
self._update_progress(f"Iteration {iteration} complete",
|
261
|
-
int((iteration / total_iterations) * 100),
|
262
|
-
{"phase": "iteration_complete", "iteration": iteration})
|
263
|
-
|
264
|
-
try:
|
265
|
-
formatted_findings = self._save_findings(findings, current_knowledge, query)
|
266
|
-
except Exception as e:
|
267
|
-
error_msg = f"Error saving findings: {str(e)}"
|
268
|
-
logger.info(f"SAVE ERROR: {error_msg}")
|
269
|
-
self._update_progress(error_msg,
|
270
|
-
int((iteration / total_iterations) * 100),
|
271
|
-
{"phase": "save_error", "error": str(e)})
|
272
|
-
formatted_findings = "Error: Could not format findings due to an error."
|
273
|
-
|
274
|
-
self._update_progress("Research complete", 95, {"phase": "complete"})
|
275
|
-
|
276
|
-
return {
|
277
|
-
"findings": findings,
|
278
|
-
"iterations": iteration,
|
279
|
-
"questions": self.questions_by_iteration,
|
280
|
-
"formatted_findings": formatted_findings,
|
281
|
-
"current_knowledge": current_knowledge
|
282
|
-
}
|
283
|
-
|
284
|
-
def _save_findings(self, findings: List[Dict], current_knowledge: str, query: str):
|
285
|
-
logger.info("Saving findings ...")
|
286
|
-
self._update_progress("Saving research findings...", None)
|
287
|
-
|
288
|
-
formatted_findings = format_findings_to_text(
|
289
|
-
findings, current_knowledge, self.questions_by_iteration
|
290
|
-
)
|
291
|
-
safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[
|
292
|
-
:50
|
293
|
-
]
|
294
|
-
safe_query = safe_query.replace(" ", "_").lower()
|
295
|
-
import local_deep_research.config as conf
|
296
|
-
output_dir = f"{conf.get_config_dir()}/research_outputs"
|
297
|
-
if not os.path.exists(output_dir):
|
298
|
-
os.makedirs(output_dir)
|
299
|
-
|
300
|
-
filename = os.path.join(output_dir, f"formatted_output_{safe_query}.txt")
|
167
|
+
# Include the search system instance for access to citations
|
168
|
+
result["search_system"] = self
|
301
169
|
|
302
|
-
|
303
|
-
text_file.write(formatted_findings)
|
304
|
-
logger.info("Saved findings")
|
305
|
-
self._update_progress("Research findings saved", None, {"filename": filename})
|
306
|
-
return formatted_findings
|
170
|
+
return result
|