local-deep-research 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +23 -22
- local_deep_research/__main__.py +16 -0
- local_deep_research/advanced_search_system/__init__.py +7 -0
- local_deep_research/advanced_search_system/filters/__init__.py +8 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
- local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
- local_deep_research/advanced_search_system/findings/repository.py +452 -0
- local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
- local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
- local_deep_research/advanced_search_system/questions/__init__.py +1 -0
- local_deep_research/advanced_search_system/questions/base_question.py +64 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
- local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
- local_deep_research/advanced_search_system/tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
- local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
- local_deep_research/api/__init__.py +5 -5
- local_deep_research/api/research_functions.py +154 -160
- local_deep_research/app.py +8 -0
- local_deep_research/citation_handler.py +25 -16
- local_deep_research/{config.py → config/config_files.py} +102 -110
- local_deep_research/config/llm_config.py +472 -0
- local_deep_research/config/search_config.py +77 -0
- local_deep_research/defaults/__init__.py +10 -5
- local_deep_research/defaults/main.toml +2 -2
- local_deep_research/defaults/search_engines.toml +60 -34
- local_deep_research/main.py +121 -19
- local_deep_research/migrate_db.py +147 -0
- local_deep_research/report_generator.py +87 -45
- local_deep_research/search_system.py +153 -283
- local_deep_research/setup_data_dir.py +35 -0
- local_deep_research/test_migration.py +178 -0
- local_deep_research/utilities/__init__.py +0 -0
- local_deep_research/utilities/db_utils.py +49 -0
- local_deep_research/{utilties → utilities}/enums.py +2 -2
- local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
- local_deep_research/utilities/search_utilities.py +242 -0
- local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
- local_deep_research/web/__init__.py +0 -1
- local_deep_research/web/app.py +86 -1709
- local_deep_research/web/app_factory.py +289 -0
- local_deep_research/web/database/README.md +70 -0
- local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
- local_deep_research/web/database/migrations.py +447 -0
- local_deep_research/web/database/models.py +117 -0
- local_deep_research/web/database/schema_upgrade.py +107 -0
- local_deep_research/web/models/database.py +294 -0
- local_deep_research/web/models/settings.py +94 -0
- local_deep_research/web/routes/api_routes.py +559 -0
- local_deep_research/web/routes/history_routes.py +354 -0
- local_deep_research/web/routes/research_routes.py +715 -0
- local_deep_research/web/routes/settings_routes.py +1583 -0
- local_deep_research/web/services/research_service.py +947 -0
- local_deep_research/web/services/resource_service.py +149 -0
- local_deep_research/web/services/settings_manager.py +669 -0
- local_deep_research/web/services/settings_service.py +187 -0
- local_deep_research/web/services/socket_service.py +210 -0
- local_deep_research/web/static/css/custom_dropdown.css +277 -0
- local_deep_research/web/static/css/settings.css +1223 -0
- local_deep_research/web/static/css/styles.css +525 -48
- local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
- local_deep_research/web/static/js/components/detail.js +348 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
- local_deep_research/web/static/js/components/fallback/ui.js +215 -0
- local_deep_research/web/static/js/components/history.js +487 -0
- local_deep_research/web/static/js/components/logpanel.js +949 -0
- local_deep_research/web/static/js/components/progress.js +1107 -0
- local_deep_research/web/static/js/components/research.js +1865 -0
- local_deep_research/web/static/js/components/results.js +766 -0
- local_deep_research/web/static/js/components/settings.js +3981 -0
- local_deep_research/web/static/js/components/settings_sync.js +106 -0
- local_deep_research/web/static/js/main.js +226 -0
- local_deep_research/web/static/js/services/api.js +253 -0
- local_deep_research/web/static/js/services/audio.js +31 -0
- local_deep_research/web/static/js/services/formatting.js +119 -0
- local_deep_research/web/static/js/services/pdf.js +622 -0
- local_deep_research/web/static/js/services/socket.js +882 -0
- local_deep_research/web/static/js/services/ui.js +546 -0
- local_deep_research/web/templates/base.html +72 -0
- local_deep_research/web/templates/components/custom_dropdown.html +47 -0
- local_deep_research/web/templates/components/log_panel.html +32 -0
- local_deep_research/web/templates/components/mobile_nav.html +22 -0
- local_deep_research/web/templates/components/settings_form.html +299 -0
- local_deep_research/web/templates/components/sidebar.html +21 -0
- local_deep_research/web/templates/pages/details.html +73 -0
- local_deep_research/web/templates/pages/history.html +51 -0
- local_deep_research/web/templates/pages/progress.html +57 -0
- local_deep_research/web/templates/pages/research.html +139 -0
- local_deep_research/web/templates/pages/results.html +59 -0
- local_deep_research/web/templates/settings_dashboard.html +78 -192
- local_deep_research/web/utils/__init__.py +0 -0
- local_deep_research/web/utils/formatters.py +76 -0
- local_deep_research/web_search_engines/engines/full_search.py +18 -16
- local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
- local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
- local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
- local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +212 -160
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
- local_deep_research/web_search_engines/search_engine_base.py +174 -99
- local_deep_research/web_search_engines/search_engine_factory.py +192 -102
- local_deep_research/web_search_engines/search_engines_config.py +22 -15
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/METADATA +177 -97
- local_deep_research-0.2.2.dist-info/RECORD +135 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/WHEEL +1 -2
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/entry_points.txt +3 -0
- local_deep_research/defaults/llm_config.py +0 -338
- local_deep_research/utilties/search_utilities.py +0 -114
- local_deep_research/web/static/js/app.js +0 -3763
- local_deep_research/web/templates/api_keys_config.html +0 -82
- local_deep_research/web/templates/collections_config.html +0 -90
- local_deep_research/web/templates/index.html +0 -348
- local_deep_research/web/templates/llm_config.html +0 -120
- local_deep_research/web/templates/main_config.html +0 -89
- local_deep_research/web/templates/search_engines_config.html +0 -154
- local_deep_research/web/templates/settings.html +0 -519
- local_deep_research-0.1.26.dist-info/RECORD +0 -61
- local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
- /local_deep_research/{utilties → config}/__init__.py +0 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,306 +1,176 @@
|
|
1
|
-
|
2
|
-
from datetime import datetime
|
3
|
-
from .utilties.search_utilities import remove_think_tags, format_findings_to_text, format_links
|
4
|
-
import os
|
5
|
-
from .utilties.enums import KnowledgeAccumulationApproach
|
6
|
-
from .config import settings, get_llm, get_search
|
7
|
-
from .citation_handler import CitationHandler
|
8
|
-
from datetime import datetime
|
9
|
-
from .utilties.search_utilities import extract_links_from_search_results
|
1
|
+
# src/local_deep_research/search_system/search_system.py
|
10
2
|
import logging
|
3
|
+
from typing import Callable, Dict
|
4
|
+
|
5
|
+
from langchain_core.language_models import BaseChatModel
|
6
|
+
|
7
|
+
from .advanced_search_system.findings.repository import FindingsRepository
|
8
|
+
from .advanced_search_system.questions.standard_question import (
|
9
|
+
StandardQuestionGenerator,
|
10
|
+
)
|
11
|
+
from .advanced_search_system.strategies.iterdrag_strategy import IterDRAGStrategy
|
12
|
+
from .advanced_search_system.strategies.parallel_search_strategy import (
|
13
|
+
ParallelSearchStrategy,
|
14
|
+
)
|
15
|
+
from .advanced_search_system.strategies.rapid_search_strategy import RapidSearchStrategy
|
16
|
+
from .advanced_search_system.strategies.standard_strategy import StandardSearchStrategy
|
17
|
+
from .citation_handler import CitationHandler
|
18
|
+
from .config.config_files import settings
|
19
|
+
from .config.llm_config import get_llm
|
20
|
+
from .config.search_config import get_search
|
21
|
+
from .utilities.db_utils import get_db_setting
|
22
|
+
from .web_search_engines.search_engine_base import BaseSearchEngine
|
23
|
+
|
11
24
|
logger = logging.getLogger(__name__)
|
12
|
-
class AdvancedSearchSystem:
|
13
|
-
def __init__(self):
|
14
25
|
|
15
|
-
|
16
|
-
# Get fresh configuration
|
17
26
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
self
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
self._update_progress("WARNING: No search engine available", None, {"error": "No search engine configured properly"})
|
33
|
-
|
34
|
-
|
27
|
+
class AdvancedSearchSystem:
|
28
|
+
"""
|
29
|
+
Advanced search system that coordinates different search strategies.
|
30
|
+
"""
|
31
|
+
|
32
|
+
def __init__(
|
33
|
+
self,
|
34
|
+
strategy_name: str = "parallel",
|
35
|
+
include_text_content: bool = True,
|
36
|
+
use_cross_engine_filter: bool = True,
|
37
|
+
llm: BaseChatModel | None = None,
|
38
|
+
search: BaseSearchEngine | None = None,
|
39
|
+
):
|
40
|
+
"""Initialize the advanced search system.
|
35
41
|
|
36
|
-
def set_progress_callback(self, callback: Callable[[str, int, dict], None]) -> None:
|
37
|
-
"""Set a callback function to receive progress updates.
|
38
|
-
|
39
42
|
Args:
|
40
|
-
|
43
|
+
strategy_name: The name of the search strategy to use ("standard" or "iterdrag")
|
44
|
+
include_text_content: If False, only includes metadata and links in search results
|
45
|
+
use_cross_engine_filter: Whether to filter results across search
|
46
|
+
engines.
|
47
|
+
llm: LLM to use. If not provided, it will use the default one.
|
48
|
+
search: Search engine to use. If not provided, it will use the
|
49
|
+
default one.
|
41
50
|
"""
|
42
|
-
|
51
|
+
# Get configuration
|
52
|
+
self.model = llm
|
53
|
+
if llm is None:
|
54
|
+
self.model = get_llm()
|
55
|
+
self.search = search
|
56
|
+
if search is None:
|
57
|
+
self.search = get_search(llm_instance=self.model)
|
58
|
+
self.max_iterations = get_db_setting(
|
59
|
+
"search.iterations", settings.search.iterations
|
60
|
+
)
|
61
|
+
self.questions_per_iteration = get_db_setting(
|
62
|
+
"search.questions_per_iteration", settings.search.questions_per_iteration
|
63
|
+
)
|
43
64
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
message: Description of the current progress state
|
49
|
-
progress_percent: Progress percentage (0-100), if applicable
|
50
|
-
metadata: Additional data about the progress state
|
51
|
-
"""
|
52
|
-
if self.progress_callback:
|
53
|
-
self.progress_callback(message, progress_percent, metadata or {})
|
65
|
+
# Log the strategy name that's being used
|
66
|
+
logger.info(
|
67
|
+
f"Initializing AdvancedSearchSystem with strategy_name='{strategy_name}'"
|
68
|
+
)
|
54
69
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
if
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
70
|
+
# Initialize components
|
71
|
+
self.citation_handler = CitationHandler(self.model)
|
72
|
+
self.question_generator = StandardQuestionGenerator(self.model)
|
73
|
+
self.findings_repository = FindingsRepository(self.model)
|
74
|
+
|
75
|
+
# Initialize strategy based on name
|
76
|
+
if strategy_name.lower() == "iterdrag":
|
77
|
+
logger.info("Creating IterDRAGStrategy instance")
|
78
|
+
self.strategy = IterDRAGStrategy(model=self.model, search=self.search)
|
79
|
+
elif strategy_name.lower() == "parallel":
|
80
|
+
logger.info("Creating ParallelSearchStrategy instance")
|
81
|
+
self.strategy = ParallelSearchStrategy(
|
82
|
+
model=self.model,
|
83
|
+
search=self.search,
|
84
|
+
include_text_content=include_text_content,
|
85
|
+
use_cross_engine_filter=use_cross_engine_filter,
|
86
|
+
)
|
87
|
+
elif strategy_name.lower() == "rapid":
|
88
|
+
logger.info("Creating RapidSearchStrategy instance")
|
89
|
+
self.strategy = RapidSearchStrategy(model=self.model, search=self.search)
|
69
90
|
else:
|
70
|
-
|
91
|
+
logger.info("Creating StandardSearchStrategy instance")
|
92
|
+
self.strategy = StandardSearchStrategy(model=self.model, search=self.search)
|
71
93
|
|
72
|
-
|
73
|
-
|
74
|
-
q.replace("Q:", "").strip()
|
75
|
-
for q in remove_think_tags(response.content).split("\n")
|
76
|
-
if q.strip().startswith("Q:")
|
77
|
-
][: self.questions_per_iteration]
|
78
|
-
|
79
|
-
self._update_progress(
|
80
|
-
f"Generated {len(questions)} follow-up questions",
|
81
|
-
None,
|
82
|
-
{"questions": questions}
|
83
|
-
)
|
84
|
-
|
85
|
-
return questions
|
94
|
+
# Log the actual strategy class
|
95
|
+
logger.info(f"Created strategy of type: {type(self.strategy).__name__}")
|
86
96
|
|
87
|
-
|
88
|
-
self.
|
97
|
+
# For backward compatibility
|
98
|
+
self.questions_by_iteration = {}
|
99
|
+
self.progress_callback = lambda _1, _2, _3: None
|
100
|
+
self.all_links_of_system = list()
|
89
101
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
if self.questions_by_iteration:
|
94
|
-
prompt = f"""First provide a high-quality 1 page explanation with IEEE Referencing Style e.g. [1,2]. Never make up sources. Than provide a exact high-quality one sentence-long answer to the query.
|
102
|
+
# Configure the strategy with our attributes
|
103
|
+
if hasattr(self, "progress_callback") and self.progress_callback:
|
104
|
+
self.strategy.set_progress_callback(self.progress_callback)
|
95
105
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
self._update_progress("Knowledge compression complete", None)
|
102
|
-
response = remove_think_tags(response.content)
|
103
|
-
response = str(response) #+ "\n\n" + str(formatted_links)
|
106
|
+
def _progress_callback(self, message: str, progress: int, metadata: dict) -> None:
|
107
|
+
"""Handle progress updates from the strategy."""
|
108
|
+
logger.info(f"Progress: {progress}% - {message}")
|
109
|
+
if hasattr(self, "progress_callback"):
|
110
|
+
self.progress_callback(message, progress, metadata)
|
104
111
|
|
105
|
-
|
112
|
+
def set_progress_callback(self, callback: Callable[[str, int, dict], None]) -> None:
|
113
|
+
"""Set a callback function to receive progress updates."""
|
114
|
+
self.progress_callback = callback
|
115
|
+
if hasattr(self, "strategy"):
|
116
|
+
self.strategy.set_progress_callback(callback)
|
106
117
|
|
107
118
|
def analyze_topic(self, query: str) -> Dict:
|
108
|
-
|
109
|
-
|
119
|
+
"""Analyze a topic using the current strategy.
|
110
120
|
|
121
|
+
Args:
|
122
|
+
query: The research query to analyze
|
123
|
+
"""
|
111
124
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
"
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
"error": error_msg
|
138
|
-
}
|
139
|
-
|
140
|
-
while iteration < self.max_iterations:
|
141
|
-
iteration_progress_base = (iteration / total_iterations) * 100
|
142
|
-
self._update_progress(f"Starting iteration {iteration + 1} of {total_iterations}",
|
143
|
-
int(iteration_progress_base),
|
144
|
-
{"phase": "iteration_start", "iteration": iteration + 1})
|
145
|
-
|
146
|
-
# Generate questions for this iteration
|
147
|
-
questions = self._get_follow_up_questions(current_knowledge, query)
|
148
|
-
self.questions_by_iteration[iteration] = questions
|
149
|
-
logger.info(f"Generated questions: {questions}")
|
150
|
-
question_count = len(questions)
|
151
|
-
for q_idx, question in enumerate(questions):
|
152
|
-
question_progress_base = iteration_progress_base + (((q_idx+1) / question_count) * (100/total_iterations) * 0.5)
|
153
|
-
|
154
|
-
self._update_progress(f"Searching for: {question}",
|
155
|
-
int(question_progress_base),
|
156
|
-
{"phase": "search", "iteration": iteration + 1, "question_index": q_idx + 1})
|
157
|
-
|
158
|
-
try:
|
159
|
-
if self.search is None:
|
160
|
-
self._update_progress(f"Search engine unavailable, skipping search for: {question}",
|
161
|
-
int(question_progress_base + 2),
|
162
|
-
{"phase": "search_error", "error": "No search engine available"})
|
163
|
-
search_results = []
|
164
|
-
else:
|
165
|
-
search_results = self.search.run(question)
|
166
|
-
except Exception as e:
|
167
|
-
error_msg = f"Error during search: {str(e)}"
|
168
|
-
logger.info(f"SEARCH ERROR: {error_msg}")
|
169
|
-
self._update_progress(error_msg,
|
170
|
-
int(question_progress_base + 2),
|
171
|
-
{"phase": "search_error", "error": str(e)})
|
172
|
-
search_results = []
|
173
|
-
|
174
|
-
if search_results is None:
|
175
|
-
self._update_progress(f"No search results found for question: {question}",
|
176
|
-
int(question_progress_base + 2),
|
177
|
-
{"phase": "search_complete", "result_count": 0})
|
178
|
-
search_results = [] # Initialize to empty list instead of None
|
179
|
-
continue
|
180
|
-
|
181
|
-
self._update_progress(f"Found {len(search_results)} results for question: {question}",
|
182
|
-
int(question_progress_base + 2),
|
183
|
-
{"phase": "search_complete", "result_count": len(search_results)})
|
184
|
-
|
185
|
-
logger.info(f"len search: {len(search_results)}")
|
186
|
-
|
187
|
-
if len(search_results) == 0:
|
188
|
-
continue
|
189
|
-
|
190
|
-
self._update_progress(f"Analyzing results for: {question}",
|
191
|
-
int(question_progress_base + 5),
|
192
|
-
{"phase": "analysis"})
|
193
|
-
|
125
|
+
# Send progress message with LLM info
|
126
|
+
self.progress_callback(
|
127
|
+
f"Using {get_db_setting('llm.provider')} model: {get_db_setting('llm.model')}",
|
128
|
+
1, # Low percentage to show this as an early step
|
129
|
+
{
|
130
|
+
"phase": "setup",
|
131
|
+
"llm_info": {
|
132
|
+
"name": get_db_setting("llm.model"),
|
133
|
+
"provider": get_db_setting("llm.provider"),
|
134
|
+
},
|
135
|
+
},
|
136
|
+
)
|
137
|
+
# Send progress message with search strategy info
|
138
|
+
search_tool = get_db_setting("search.tool")
|
139
|
+
|
140
|
+
self.progress_callback(
|
141
|
+
f"Using search tool: {search_tool}",
|
142
|
+
1.5, # Between setup and processing steps
|
143
|
+
{
|
144
|
+
"phase": "setup",
|
145
|
+
"search_info": {
|
146
|
+
"tool": search_tool,
|
147
|
+
},
|
148
|
+
},
|
149
|
+
)
|
194
150
|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
{
|
211
|
-
"phase": f"Follow-up {iteration}.{questions.index(question) + 1}",
|
212
|
-
"content": results_with_links,
|
213
|
-
"question": question,
|
214
|
-
"search_results": search_results,
|
215
|
-
"documents": result["documents"],
|
216
|
-
}
|
151
|
+
# Use the strategy to analyze the topic
|
152
|
+
result = self.strategy.analyze_topic(query)
|
153
|
+
|
154
|
+
# Update our attributes for backward compatibility
|
155
|
+
if hasattr(self.strategy, "questions_by_iteration"):
|
156
|
+
self.questions_by_iteration = self.strategy.questions_by_iteration
|
157
|
+
# Send progress message with search info
|
158
|
+
self.progress_callback(
|
159
|
+
f"Processed questions: {self.strategy.questions_by_iteration}",
|
160
|
+
2, # Low percentage to show this as an early step
|
161
|
+
{
|
162
|
+
"phase": "setup",
|
163
|
+
"search_info": {
|
164
|
+
"questions_by_iteration": len(
|
165
|
+
self.strategy.questions_by_iteration
|
217
166
|
)
|
167
|
+
},
|
168
|
+
},
|
169
|
+
)
|
170
|
+
if hasattr(self.strategy, "all_links_of_system"):
|
171
|
+
self.all_links_of_system = self.strategy.all_links_of_system
|
218
172
|
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
if settings.general.knowledge_accumulation == str(KnowledgeAccumulationApproach.QUESTION.value):
|
223
|
-
logger.info("Compressing knowledge")
|
224
|
-
self._update_progress(f"Compress Knowledge for: {question}",
|
225
|
-
int(question_progress_base + 0),
|
226
|
-
{"phase": "analysis"})
|
227
|
-
current_knowledge = self._compress_knowledge(current_knowledge , query, section_links)
|
228
|
-
|
229
|
-
self._update_progress(f"Analysis complete for question: {question}",
|
230
|
-
int(question_progress_base + 10),
|
231
|
-
{"phase": "analysis_complete"})
|
232
|
-
except Exception as e:
|
233
|
-
error_msg = f"Error analyzing results: {str(e)}"
|
234
|
-
logger.info(f"ANALYSIS ERROR: {error_msg}")
|
235
|
-
self._update_progress(error_msg,
|
236
|
-
int(question_progress_base + 10),
|
237
|
-
{"phase": "analysis_error", "error": str(e)})
|
238
|
-
iteration += 1
|
239
|
-
|
240
|
-
self._update_progress(f"Compressing knowledge after iteration {iteration}",
|
241
|
-
int((iteration / total_iterations) * 100 - 5),
|
242
|
-
{"phase": "knowledge_compression"})
|
243
|
-
logger.info(str(iteration))
|
244
|
-
logger.info(settings.general.knowledge_accumulation)
|
245
|
-
logger.info(str(KnowledgeAccumulationApproach.ITERATION.value))
|
246
|
-
if settings.general.knowledge_accumulation == KnowledgeAccumulationApproach.ITERATION.value:
|
247
|
-
try:
|
248
|
-
logger.info("ITERATION - Compressing Knowledge")
|
249
|
-
current_knowledge = self._compress_knowledge(current_knowledge , query, section_links)
|
250
|
-
logger.info("FINISHED ITERATION - Compressing Knowledge")
|
251
|
-
except Exception as e:
|
252
|
-
error_msg = f"Error compressing knowledge: {str(e)}"
|
253
|
-
logger.info(f"COMPRESSION ERROR: {error_msg}")
|
254
|
-
self._update_progress(error_msg,
|
255
|
-
int((iteration / total_iterations) * 100 - 3),
|
256
|
-
{"phase": "compression_error", "error": str(e)})
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
self._update_progress(f"Iteration {iteration} complete",
|
261
|
-
int((iteration / total_iterations) * 100),
|
262
|
-
{"phase": "iteration_complete", "iteration": iteration})
|
263
|
-
|
264
|
-
try:
|
265
|
-
formatted_findings = self._save_findings(findings, current_knowledge, query)
|
266
|
-
except Exception as e:
|
267
|
-
error_msg = f"Error saving findings: {str(e)}"
|
268
|
-
logger.info(f"SAVE ERROR: {error_msg}")
|
269
|
-
self._update_progress(error_msg,
|
270
|
-
int((iteration / total_iterations) * 100),
|
271
|
-
{"phase": "save_error", "error": str(e)})
|
272
|
-
formatted_findings = "Error: Could not format findings due to an error."
|
273
|
-
|
274
|
-
self._update_progress("Research complete", 95, {"phase": "complete"})
|
275
|
-
|
276
|
-
return {
|
277
|
-
"findings": findings,
|
278
|
-
"iterations": iteration,
|
279
|
-
"questions": self.questions_by_iteration,
|
280
|
-
"formatted_findings": formatted_findings,
|
281
|
-
"current_knowledge": current_knowledge
|
282
|
-
}
|
283
|
-
|
284
|
-
def _save_findings(self, findings: List[Dict], current_knowledge: str, query: str):
|
285
|
-
logger.info("Saving findings ...")
|
286
|
-
self._update_progress("Saving research findings...", None)
|
287
|
-
|
288
|
-
formatted_findings = format_findings_to_text(
|
289
|
-
findings, current_knowledge, self.questions_by_iteration
|
290
|
-
)
|
291
|
-
safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[
|
292
|
-
:50
|
293
|
-
]
|
294
|
-
safe_query = safe_query.replace(" ", "_").lower()
|
295
|
-
import local_deep_research.config as conf
|
296
|
-
output_dir = f"{conf.get_config_dir()}/research_outputs"
|
297
|
-
if not os.path.exists(output_dir):
|
298
|
-
os.makedirs(output_dir)
|
299
|
-
|
300
|
-
filename = os.path.join(output_dir, f"formatted_output_{safe_query}.txt")
|
173
|
+
# Include the search system instance for access to citations
|
174
|
+
result["search_system"] = self
|
301
175
|
|
302
|
-
|
303
|
-
text_file.write(formatted_findings)
|
304
|
-
logger.info("Saved findings")
|
305
|
-
self._update_progress("Research findings saved", None, {"filename": filename})
|
306
|
-
return formatted_findings
|
176
|
+
return result
|
@@ -0,0 +1,35 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
"""
|
3
|
+
Data directory setup script for Local Deep Research.
|
4
|
+
Creates the data directory for the application database if it doesn't exist.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import os
|
8
|
+
|
9
|
+
|
10
|
+
def setup_data_dir():
|
11
|
+
"""Set up the data directory for the application."""
|
12
|
+
# Get the project root directory (3 levels up from this file)
|
13
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
14
|
+
project_root = os.path.abspath(os.path.join(current_dir, "..", ".."))
|
15
|
+
|
16
|
+
# Define the data directory path
|
17
|
+
data_dir = os.path.join(project_root, "data")
|
18
|
+
|
19
|
+
# Create the data directory if it doesn't exist
|
20
|
+
if not os.path.exists(data_dir):
|
21
|
+
os.makedirs(data_dir)
|
22
|
+
print(f"Created data directory at: {data_dir}")
|
23
|
+
else:
|
24
|
+
print(f"Data directory already exists at: {data_dir}")
|
25
|
+
|
26
|
+
# Return the path to the data directory
|
27
|
+
return data_dir
|
28
|
+
|
29
|
+
|
30
|
+
if __name__ == "__main__":
|
31
|
+
data_dir = setup_data_dir()
|
32
|
+
db_path = os.path.join(data_dir, "ldr.db")
|
33
|
+
print(f"Database path: {db_path}")
|
34
|
+
print("Run the following command to migrate your database:")
|
35
|
+
print("python -m src.local_deep_research.migrate_db --backup")
|