local-deep-research 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +1 -1
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +5 -1
- local_deep_research/advanced_search_system/strategies/base_strategy.py +5 -2
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +23 -16
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +273 -144
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +4 -3
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +402 -0
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +8 -4
- local_deep_research/api/research_functions.py +0 -46
- local_deep_research/citation_handler.py +16 -20
- local_deep_research/config/llm_config.py +25 -68
- local_deep_research/config/search_config.py +8 -21
- local_deep_research/defaults/default_settings.json +3814 -0
- local_deep_research/search_system.py +46 -32
- local_deep_research/utilities/db_utils.py +22 -3
- local_deep_research/utilities/search_utilities.py +10 -7
- local_deep_research/web/app.py +3 -23
- local_deep_research/web/app_factory.py +1 -25
- local_deep_research/web/database/migrations.py +20 -418
- local_deep_research/web/routes/settings_routes.py +75 -364
- local_deep_research/web/services/research_service.py +47 -43
- local_deep_research/web/services/settings_manager.py +108 -315
- local_deep_research/web/services/settings_service.py +3 -56
- local_deep_research/web/static/js/components/research.js +1 -1
- local_deep_research/web/static/js/components/settings.js +16 -4
- local_deep_research/web/static/js/research_form.js +106 -0
- local_deep_research/web/templates/pages/research.html +3 -2
- local_deep_research/web_search_engines/engines/meta_search_engine.py +13 -18
- local_deep_research/web_search_engines/engines/search_engine_local.py +11 -2
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +7 -11
- local_deep_research/web_search_engines/search_engine_factory.py +12 -64
- local_deep_research/web_search_engines/search_engines_config.py +123 -64
- {local_deep_research-0.2.2.dist-info → local_deep_research-0.3.0.dist-info}/METADATA +16 -1
- {local_deep_research-0.2.2.dist-info → local_deep_research-0.3.0.dist-info}/RECORD +37 -38
- local_deep_research/config/config_files.py +0 -245
- local_deep_research/defaults/local_collections.toml +0 -53
- local_deep_research/defaults/main.toml +0 -80
- local_deep_research/defaults/search_engines.toml +0 -291
- {local_deep_research-0.2.2.dist-info → local_deep_research-0.3.0.dist-info}/WHEEL +0 -0
- {local_deep_research-0.2.2.dist-info → local_deep_research-0.3.0.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.2.2.dist-info → local_deep_research-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,402 @@
|
|
1
|
+
import concurrent.futures
|
2
|
+
import logging
|
3
|
+
from typing import Dict
|
4
|
+
|
5
|
+
from ...citation_handler import CitationHandler
|
6
|
+
from ...config.llm_config import get_llm
|
7
|
+
from ...config.search_config import get_search
|
8
|
+
from ...utilities.db_utils import get_db_setting
|
9
|
+
from ..filters.cross_engine_filter import CrossEngineFilter
|
10
|
+
from ..findings.repository import FindingsRepository
|
11
|
+
from ..questions.standard_question import StandardQuestionGenerator
|
12
|
+
from .base_strategy import BaseSearchStrategy
|
13
|
+
|
14
|
+
logger = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
|
17
|
+
class SourceBasedSearchStrategy(BaseSearchStrategy):
|
18
|
+
"""
|
19
|
+
Source-based search strategy that generates questions based on search results and
|
20
|
+
defers content analysis until final synthesis.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def __init__(
|
24
|
+
self,
|
25
|
+
search=None,
|
26
|
+
model=None,
|
27
|
+
citation_handler=None,
|
28
|
+
include_text_content: bool = True,
|
29
|
+
use_cross_engine_filter: bool = True,
|
30
|
+
filter_reorder: bool = True,
|
31
|
+
filter_reindex: bool = True,
|
32
|
+
cross_engine_max_results: int = None,
|
33
|
+
all_links_of_system=None,
|
34
|
+
):
|
35
|
+
"""Initialize with optional dependency injection for testing."""
|
36
|
+
# Pass the links list to the parent class
|
37
|
+
super().__init__(all_links_of_system=all_links_of_system)
|
38
|
+
self.search = search or get_search()
|
39
|
+
self.model = model or get_llm()
|
40
|
+
self.progress_callback = None
|
41
|
+
|
42
|
+
self.questions_by_iteration = {}
|
43
|
+
self.include_text_content = include_text_content
|
44
|
+
self.use_cross_engine_filter = use_cross_engine_filter
|
45
|
+
self.filter_reorder = filter_reorder
|
46
|
+
self.filter_reindex = filter_reindex
|
47
|
+
|
48
|
+
# Get cross_engine_max_results from database if not provided
|
49
|
+
if cross_engine_max_results is None:
|
50
|
+
cross_engine_max_results = get_db_setting(
|
51
|
+
"search.cross_engine_max_results", 100
|
52
|
+
)
|
53
|
+
|
54
|
+
# Initialize the cross-engine filter
|
55
|
+
self.cross_engine_filter = CrossEngineFilter(
|
56
|
+
model=self.model,
|
57
|
+
max_results=cross_engine_max_results,
|
58
|
+
default_reorder=filter_reorder,
|
59
|
+
default_reindex=filter_reindex,
|
60
|
+
)
|
61
|
+
|
62
|
+
# Set include_full_content on the search engine if it supports it
|
63
|
+
if hasattr(self.search, "include_full_content"):
|
64
|
+
self.search.include_full_content = include_text_content
|
65
|
+
|
66
|
+
# Use provided citation_handler or create one
|
67
|
+
self.citation_handler = citation_handler or CitationHandler(self.model)
|
68
|
+
|
69
|
+
# Initialize components
|
70
|
+
self.question_generator = StandardQuestionGenerator(self.model)
|
71
|
+
self.findings_repository = FindingsRepository(self.model)
|
72
|
+
|
73
|
+
def _format_search_results_as_context(self, search_results):
|
74
|
+
"""Format search results into context for question generation."""
|
75
|
+
context_snippets = []
|
76
|
+
|
77
|
+
for i, result in enumerate(
|
78
|
+
search_results[:10]
|
79
|
+
): # Limit to prevent context overflow
|
80
|
+
title = result.get("title", "Untitled")
|
81
|
+
snippet = result.get("snippet", "")
|
82
|
+
url = result.get("link", "")
|
83
|
+
|
84
|
+
if snippet:
|
85
|
+
context_snippets.append(
|
86
|
+
f"Source {i + 1}: {title}\nURL: {url}\nSnippet: {snippet}"
|
87
|
+
)
|
88
|
+
|
89
|
+
return "\n\n".join(context_snippets)
|
90
|
+
|
91
|
+
def analyze_topic(self, query: str) -> Dict:
|
92
|
+
"""
|
93
|
+
Analyze a topic using source-based search strategy.
|
94
|
+
"""
|
95
|
+
logger.info(f"Starting source-based research on topic: {query}")
|
96
|
+
accumulated_search_results_across_all_iterations = (
|
97
|
+
[]
|
98
|
+
) # tracking links across iterations but not global
|
99
|
+
findings = []
|
100
|
+
total_citation_count_before_this_search = len(self.all_links_of_system)
|
101
|
+
|
102
|
+
self._update_progress(
|
103
|
+
"Initializing source-based research",
|
104
|
+
5,
|
105
|
+
{
|
106
|
+
"phase": "init",
|
107
|
+
"strategy": "source-based",
|
108
|
+
"include_text_content": self.include_text_content,
|
109
|
+
},
|
110
|
+
)
|
111
|
+
|
112
|
+
# Check search engine
|
113
|
+
if not self._validate_search_engine():
|
114
|
+
return {
|
115
|
+
"findings": [],
|
116
|
+
"iterations": 0,
|
117
|
+
"questions_by_iteration": {},
|
118
|
+
"formatted_findings": "Error: Unable to conduct research without a search engine.",
|
119
|
+
"current_knowledge": "",
|
120
|
+
"error": "No search engine available",
|
121
|
+
}
|
122
|
+
|
123
|
+
# Determine number of iterations to run
|
124
|
+
iterations_to_run = get_db_setting("search.iterations")
|
125
|
+
logger.debug("Selected amount of iterations: " + str(iterations_to_run))
|
126
|
+
iterations_to_run = int(iterations_to_run)
|
127
|
+
try:
|
128
|
+
filtered_search_results = []
|
129
|
+
total_citation_count_before_this_search = len(self.all_links_of_system)
|
130
|
+
# Run each iteration
|
131
|
+
for iteration in range(1, iterations_to_run + 1):
|
132
|
+
iteration_progress_base = 5 + (iteration - 1) * (70 / iterations_to_run)
|
133
|
+
|
134
|
+
self._update_progress(
|
135
|
+
f"Starting iteration {iteration}/{iterations_to_run}",
|
136
|
+
iteration_progress_base,
|
137
|
+
{"phase": f"iteration_{iteration}", "iteration": iteration},
|
138
|
+
)
|
139
|
+
|
140
|
+
# Step 1: Generate or use questions
|
141
|
+
self._update_progress(
|
142
|
+
f"Generating search questions for iteration {iteration}",
|
143
|
+
iteration_progress_base + 5,
|
144
|
+
{"phase": "question_generation", "iteration": iteration},
|
145
|
+
)
|
146
|
+
|
147
|
+
# For first iteration, use initial query
|
148
|
+
if iteration == 1:
|
149
|
+
# Generate questions for first iteration
|
150
|
+
context = f"""Iteration: {iteration} of {iterations_to_run}"""
|
151
|
+
questions = self.question_generator.generate_questions(
|
152
|
+
current_knowledge=context,
|
153
|
+
query=query,
|
154
|
+
questions_per_iteration=int(
|
155
|
+
get_db_setting("search.questions_per_iteration")
|
156
|
+
),
|
157
|
+
questions_by_iteration=self.questions_by_iteration,
|
158
|
+
)
|
159
|
+
|
160
|
+
# Always include the original query for the first iteration
|
161
|
+
if query not in questions:
|
162
|
+
all_questions = [query] + questions
|
163
|
+
else:
|
164
|
+
all_questions = questions
|
165
|
+
|
166
|
+
self.questions_by_iteration[iteration] = all_questions
|
167
|
+
logger.info(
|
168
|
+
f"Using questions for iteration {iteration}: {all_questions}"
|
169
|
+
)
|
170
|
+
else:
|
171
|
+
# For subsequent iterations, generate questions based on previous search results
|
172
|
+
source_context = self._format_search_results_as_context(
|
173
|
+
filtered_search_results
|
174
|
+
)
|
175
|
+
if iteration != 1:
|
176
|
+
context = f"""Previous search results:\n{source_context}\n\nIteration: {iteration} of {iterations_to_run}"""
|
177
|
+
elif iterations_to_run == 1:
|
178
|
+
context = ""
|
179
|
+
else:
|
180
|
+
context = f"""Iteration: {iteration} of {iterations_to_run}"""
|
181
|
+
# Use standard question generator with search results as context
|
182
|
+
questions = self.question_generator.generate_questions(
|
183
|
+
current_knowledge=context,
|
184
|
+
query=query,
|
185
|
+
questions_per_iteration=int(
|
186
|
+
get_db_setting("search.questions_per_iteration")
|
187
|
+
),
|
188
|
+
questions_by_iteration=self.questions_by_iteration,
|
189
|
+
)
|
190
|
+
|
191
|
+
# Use only the new questions for this iteration's searches
|
192
|
+
all_questions = questions
|
193
|
+
|
194
|
+
# Store in questions_by_iteration
|
195
|
+
self.questions_by_iteration[iteration] = questions
|
196
|
+
logger.info(
|
197
|
+
f"Generated questions for iteration {iteration}: {questions}"
|
198
|
+
)
|
199
|
+
|
200
|
+
# Step 2: Run all searches in parallel for this iteration
|
201
|
+
self._update_progress(
|
202
|
+
f"Running parallel searches for iteration {iteration}",
|
203
|
+
iteration_progress_base + 10,
|
204
|
+
{"phase": "parallel_search", "iteration": iteration},
|
205
|
+
)
|
206
|
+
|
207
|
+
# Function for thread pool
|
208
|
+
def search_question(q):
|
209
|
+
try:
|
210
|
+
result = self.search.run(q)
|
211
|
+
return {"question": q, "results": result or []}
|
212
|
+
except Exception as e:
|
213
|
+
logger.error(f"Error searching for '{q}': {str(e)}")
|
214
|
+
return {"question": q, "results": [], "error": str(e)}
|
215
|
+
|
216
|
+
# Run searches in parallel
|
217
|
+
with concurrent.futures.ThreadPoolExecutor(
|
218
|
+
max_workers=len(all_questions)
|
219
|
+
) as executor:
|
220
|
+
futures = [
|
221
|
+
executor.submit(search_question, q) for q in all_questions
|
222
|
+
]
|
223
|
+
iteration_search_dict = {}
|
224
|
+
iteration_search_results = []
|
225
|
+
|
226
|
+
# Process results as they complete
|
227
|
+
for i, future in enumerate(
|
228
|
+
concurrent.futures.as_completed(futures)
|
229
|
+
):
|
230
|
+
result_dict = future.result()
|
231
|
+
question = result_dict["question"]
|
232
|
+
search_results = result_dict["results"]
|
233
|
+
iteration_search_dict[question] = search_results
|
234
|
+
|
235
|
+
self._update_progress(
|
236
|
+
f"Completed search {i + 1} of {len(all_questions)}: {question[:30]}...",
|
237
|
+
iteration_progress_base
|
238
|
+
+ 10
|
239
|
+
+ ((i + 1) / len(all_questions) * 30),
|
240
|
+
{
|
241
|
+
"phase": "search_complete",
|
242
|
+
"iteration": iteration,
|
243
|
+
"result_count": len(search_results),
|
244
|
+
"question": question,
|
245
|
+
},
|
246
|
+
)
|
247
|
+
|
248
|
+
iteration_search_results.extend(search_results)
|
249
|
+
|
250
|
+
if False and self.use_cross_engine_filter:
|
251
|
+
self._update_progress(
|
252
|
+
f"Filtering search results for iteration {iteration}",
|
253
|
+
iteration_progress_base + 45,
|
254
|
+
{"phase": "cross_engine_filtering", "iteration": iteration},
|
255
|
+
)
|
256
|
+
|
257
|
+
existing_link_count = len(self.all_links_of_system)
|
258
|
+
logger.info(f"Existing link count: {existing_link_count}")
|
259
|
+
filtered_search_results = self.cross_engine_filter.filter_results(
|
260
|
+
iteration_search_results,
|
261
|
+
query,
|
262
|
+
reorder=True,
|
263
|
+
reindex=True,
|
264
|
+
start_index=existing_link_count, # Start indexing after existing links
|
265
|
+
)
|
266
|
+
|
267
|
+
self._update_progress(
|
268
|
+
f"Filtered from {len(iteration_search_results)} to {len(filtered_search_results)} results",
|
269
|
+
iteration_progress_base + 50,
|
270
|
+
{
|
271
|
+
"phase": "filtering_complete",
|
272
|
+
"iteration": iteration,
|
273
|
+
"links_count": len(self.all_links_of_system),
|
274
|
+
},
|
275
|
+
)
|
276
|
+
else:
|
277
|
+
# Use the search results as they are
|
278
|
+
filtered_search_results = iteration_search_results
|
279
|
+
|
280
|
+
# Use filtered results
|
281
|
+
accumulated_search_results_across_all_iterations.extend(
|
282
|
+
filtered_search_results
|
283
|
+
)
|
284
|
+
|
285
|
+
# Create a lightweight finding for this iteration's search metadata (no text content)
|
286
|
+
finding = {
|
287
|
+
"phase": f"Iteration {iteration}",
|
288
|
+
"content": f"Searched with {len(all_questions)} questions, found {len(filtered_search_results)} results.",
|
289
|
+
"question": query,
|
290
|
+
"documents": [],
|
291
|
+
}
|
292
|
+
findings.append(finding)
|
293
|
+
|
294
|
+
# Mark iteration as complete
|
295
|
+
iteration_progress = 5 + iteration * (70 / iterations_to_run)
|
296
|
+
self._update_progress(
|
297
|
+
f"Completed iteration {iteration}/{iterations_to_run}",
|
298
|
+
iteration_progress,
|
299
|
+
{"phase": "iteration_complete", "iteration": iteration},
|
300
|
+
)
|
301
|
+
|
302
|
+
# Do we need this filter?
|
303
|
+
if self.use_cross_engine_filter:
|
304
|
+
# Final filtering of all accumulated search results
|
305
|
+
self._update_progress(
|
306
|
+
"Performing final filtering of all results",
|
307
|
+
80,
|
308
|
+
{"phase": "final_filtering"},
|
309
|
+
)
|
310
|
+
final_filtered_results = self.cross_engine_filter.filter_results(
|
311
|
+
accumulated_search_results_across_all_iterations,
|
312
|
+
query,
|
313
|
+
reorder=True, # Always reorder in final filtering
|
314
|
+
reindex=True, # Always reindex in final filtering
|
315
|
+
max_results=int(get_db_setting("search.final_max_results") or 100),
|
316
|
+
start_index=len(self.all_links_of_system),
|
317
|
+
)
|
318
|
+
self._update_progress(
|
319
|
+
f"Filtered from {len(accumulated_search_results_across_all_iterations)} to {len(final_filtered_results)} results",
|
320
|
+
iteration_progress_base + 85,
|
321
|
+
{
|
322
|
+
"phase": "filtering_complete",
|
323
|
+
"iteration": iteration,
|
324
|
+
"links_count": len(self.all_links_of_system),
|
325
|
+
},
|
326
|
+
)
|
327
|
+
else:
|
328
|
+
final_filtered_results = filtered_search_results
|
329
|
+
# links = extract_links_from_search_results()
|
330
|
+
self.all_links_of_system.extend(final_filtered_results)
|
331
|
+
|
332
|
+
# Final synthesis after all iterations
|
333
|
+
self._update_progress(
|
334
|
+
"Generating final synthesis", 90, {"phase": "synthesis"}
|
335
|
+
)
|
336
|
+
|
337
|
+
# Final synthesis
|
338
|
+
final_citation_result = self.citation_handler.analyze_followup(
|
339
|
+
query,
|
340
|
+
final_filtered_results,
|
341
|
+
previous_knowledge="", # Empty string as we don't need previous knowledge here
|
342
|
+
nr_of_links=total_citation_count_before_this_search,
|
343
|
+
)
|
344
|
+
|
345
|
+
# Add null check for final_citation_result
|
346
|
+
if final_citation_result:
|
347
|
+
synthesized_content = final_citation_result["content"]
|
348
|
+
documents = final_citation_result.get("documents", [])
|
349
|
+
else:
|
350
|
+
synthesized_content = "No relevant results found in final synthesis."
|
351
|
+
documents = []
|
352
|
+
|
353
|
+
# Add a final synthesis finding
|
354
|
+
final_finding = {
|
355
|
+
"phase": "Final synthesis",
|
356
|
+
"content": synthesized_content,
|
357
|
+
"question": query,
|
358
|
+
"search_results": self.all_links_of_system,
|
359
|
+
"documents": documents,
|
360
|
+
}
|
361
|
+
findings.append(final_finding)
|
362
|
+
|
363
|
+
# Add documents to repository
|
364
|
+
self.findings_repository.add_documents(documents)
|
365
|
+
|
366
|
+
# Transfer questions to repository
|
367
|
+
self.findings_repository.set_questions_by_iteration(
|
368
|
+
self.questions_by_iteration
|
369
|
+
)
|
370
|
+
|
371
|
+
# Format findings
|
372
|
+
formatted_findings = self.findings_repository.format_findings_to_text(
|
373
|
+
findings, synthesized_content
|
374
|
+
)
|
375
|
+
|
376
|
+
except Exception as e:
|
377
|
+
import traceback
|
378
|
+
|
379
|
+
error_msg = f"Error in research process: {str(e)}"
|
380
|
+
logger.error(error_msg)
|
381
|
+
logger.error(traceback.format_exc())
|
382
|
+
synthesized_content = f"Error: {str(e)}"
|
383
|
+
formatted_findings = f"Error: {str(e)}"
|
384
|
+
finding = {
|
385
|
+
"phase": "Error",
|
386
|
+
"content": synthesized_content,
|
387
|
+
"question": query,
|
388
|
+
"search_results": [],
|
389
|
+
"documents": [],
|
390
|
+
}
|
391
|
+
findings.append(finding)
|
392
|
+
|
393
|
+
self._update_progress("Research complete", 100, {"phase": "complete"})
|
394
|
+
|
395
|
+
return {
|
396
|
+
"findings": findings,
|
397
|
+
"iterations": iterations_to_run,
|
398
|
+
"questions_by_iteration": self.questions_by_iteration,
|
399
|
+
"formatted_findings": formatted_findings,
|
400
|
+
"current_knowledge": synthesized_content,
|
401
|
+
"all_links_of_system": self.all_links_of_system,
|
402
|
+
}
|
@@ -3,7 +3,6 @@ import logging
|
|
3
3
|
from typing import Dict
|
4
4
|
|
5
5
|
from ...citation_handler import CitationHandler
|
6
|
-
from ...config.config_files import settings
|
7
6
|
from ...config.llm_config import get_llm
|
8
7
|
from ...config.search_config import get_search
|
9
8
|
from ...utilities.db_utils import get_db_setting
|
@@ -20,11 +19,17 @@ logger = logging.getLogger(__name__)
|
|
20
19
|
class StandardSearchStrategy(BaseSearchStrategy):
|
21
20
|
"""Standard iterative search strategy that generates follow-up questions."""
|
22
21
|
|
23
|
-
def __init__(
|
22
|
+
def __init__(
|
23
|
+
self, search=None, model=None, citation_handler=None, all_links_of_system=None
|
24
|
+
):
|
24
25
|
"""Initialize with optional dependency injection for testing."""
|
26
|
+
super().__init__(all_links_of_system=all_links_of_system)
|
25
27
|
self.search = search or get_search()
|
26
28
|
self.model = model or get_llm()
|
29
|
+
|
30
|
+
# Get iterations setting
|
27
31
|
self.max_iterations = int(get_db_setting("search.iterations"))
|
32
|
+
|
28
33
|
self.questions_per_iteration = int(
|
29
34
|
get_db_setting("search.questions_per_iteration")
|
30
35
|
)
|
@@ -43,7 +48,6 @@ class StandardSearchStrategy(BaseSearchStrategy):
|
|
43
48
|
|
44
49
|
# Initialize other attributes
|
45
50
|
self.progress_callback = None
|
46
|
-
self.all_links_of_system = list()
|
47
51
|
|
48
52
|
def _update_progress(
|
49
53
|
self, message: str, progress_percent: int = None, metadata: dict = None
|
@@ -117,7 +121,7 @@ Iteration: {iteration + 1} of {total_iterations}"""
|
|
117
121
|
question_count = len(questions)
|
118
122
|
knowledge_accumulation = get_db_setting(
|
119
123
|
"general.knowledge_accumulation",
|
120
|
-
|
124
|
+
"ITERATION",
|
121
125
|
)
|
122
126
|
for q_idx, question in enumerate(questions):
|
123
127
|
question_progress_base = iteration_progress_base + (
|
@@ -4,11 +4,8 @@ Provides programmatic access to search and research capabilities.
|
|
4
4
|
"""
|
5
5
|
|
6
6
|
import logging
|
7
|
-
import os
|
8
7
|
from typing import Any, Callable, Dict, Optional
|
9
8
|
|
10
|
-
import toml
|
11
|
-
|
12
9
|
from ..config.llm_config import get_llm
|
13
10
|
from ..config.search_config import get_search
|
14
11
|
from ..report_generator import IntegratedReportGenerator
|
@@ -279,46 +276,3 @@ def analyze_documents(
|
|
279
276
|
logger.info(f"Analysis saved to {output_file}")
|
280
277
|
|
281
278
|
return analysis_result
|
282
|
-
|
283
|
-
|
284
|
-
def get_available_search_engines() -> Dict[str, str]:
|
285
|
-
"""
|
286
|
-
Get a dictionary of available search engines.
|
287
|
-
|
288
|
-
Returns:
|
289
|
-
Dictionary mapping engine names to descriptions
|
290
|
-
"""
|
291
|
-
|
292
|
-
from ..web_search_engines.search_engine_factory import get_available_engines
|
293
|
-
|
294
|
-
engines = get_available_engines()
|
295
|
-
|
296
|
-
# Add some descriptions for common engines
|
297
|
-
descriptions = {
|
298
|
-
"auto": "Automatic selection based on query type",
|
299
|
-
"wikipedia": "Wikipedia articles and general knowledge",
|
300
|
-
"arxiv": "Scientific papers and research",
|
301
|
-
"pubmed": "Medical and biomedical literature",
|
302
|
-
"semantic_scholar": "Academic papers across all fields",
|
303
|
-
"github": "Code repositories and technical documentation",
|
304
|
-
"local_all": "All local document collections",
|
305
|
-
}
|
306
|
-
|
307
|
-
return {engine: descriptions.get(engine, "Search engine") for engine in engines}
|
308
|
-
|
309
|
-
|
310
|
-
def get_available_collections() -> Dict[str, Dict[str, Any]]:
|
311
|
-
"""
|
312
|
-
Get a dictionary of available local document collections.
|
313
|
-
|
314
|
-
Returns:
|
315
|
-
Dictionary mapping collection names to their configuration
|
316
|
-
"""
|
317
|
-
|
318
|
-
from ..config.config_files import LOCAL_COLLECTIONS_FILE
|
319
|
-
|
320
|
-
if os.path.exists(LOCAL_COLLECTIONS_FILE):
|
321
|
-
collections = toml.load(LOCAL_COLLECTIONS_FILE)
|
322
|
-
return collections
|
323
|
-
|
324
|
-
return {}
|
@@ -4,7 +4,6 @@ from typing import Any, Dict, List, Union
|
|
4
4
|
|
5
5
|
from langchain_core.documents import Document
|
6
6
|
|
7
|
-
from .config.config_files import settings
|
8
7
|
from .utilities.db_utils import get_db_setting
|
9
8
|
|
10
9
|
|
@@ -82,21 +81,19 @@ Provide a detailed analysis with citations. Do not create the bibliography, it w
|
|
82
81
|
formatted_sources = self._format_sources(documents)
|
83
82
|
# Add fact-checking step
|
84
83
|
fact_check_prompt = f"""Analyze these sources for factual consistency:
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
84
|
+
1. Cross-reference major claims between sources
|
85
|
+
2. Identify and flag any contradictions
|
86
|
+
3. Verify basic facts (dates, company names, ownership)
|
87
|
+
4. Note when sources disagree
|
89
88
|
|
90
|
-
|
91
|
-
|
89
|
+
Previous Knowledge:
|
90
|
+
{previous_knowledge}
|
92
91
|
|
93
|
-
|
94
|
-
|
92
|
+
New Sources:
|
93
|
+
{formatted_sources}
|
95
94
|
|
96
95
|
Return any inconsistencies or conflicts found."""
|
97
|
-
if get_db_setting(
|
98
|
-
"general.enable_fact_checking", settings.general.enable_fact_checking
|
99
|
-
):
|
96
|
+
if get_db_setting("general.enable_fact_checking", True):
|
100
97
|
fact_check_response = self.llm.invoke(fact_check_prompt).content
|
101
98
|
|
102
99
|
else:
|
@@ -104,16 +101,15 @@ Provide a detailed analysis with citations. Do not create the bibliography, it w
|
|
104
101
|
|
105
102
|
prompt = f"""Using the previous knowledge and new sources, answer the question. Include citations using numbers in square brackets [1], [2], etc. When citing, use the source number provided at the start of each source. Reflect information from sources critically.
|
106
103
|
|
107
|
-
|
108
|
-
|
104
|
+
Previous Knowledge:
|
105
|
+
{previous_knowledge}
|
109
106
|
|
110
|
-
|
107
|
+
Question: {question}
|
111
108
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
"""
|
109
|
+
New Sources:
|
110
|
+
{formatted_sources}
|
111
|
+
Reflect information from sources critically based on: {fact_check_response}. Never invent sources.
|
112
|
+
Provide a detailed answer with citations. Example format: "According to [1], ..." """
|
117
113
|
|
118
114
|
response = self.llm.invoke(prompt)
|
119
115
|
|