local-deep-research 0.2.0__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +261 -139
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +407 -0
- local_deep_research/api/research_functions.py +72 -90
- local_deep_research/citation_handler.py +16 -17
- local_deep_research/defaults/search_engines.toml +1 -1
- local_deep_research/report_generator.py +19 -5
- local_deep_research/search_system.py +20 -3
- local_deep_research/web/routes/settings_routes.py +0 -9
- local_deep_research/web/services/research_service.py +4 -0
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +1 -1
- {local_deep_research-0.2.0.dist-info → local_deep_research-0.2.3.dist-info}/METADATA +2 -2
- {local_deep_research-0.2.0.dist-info → local_deep_research-0.2.3.dist-info}/RECORD +15 -14
- {local_deep_research-0.2.0.dist-info → local_deep_research-0.2.3.dist-info}/WHEEL +0 -0
- {local_deep_research-0.2.0.dist-info → local_deep_research-0.2.3.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.2.0.dist-info → local_deep_research-0.2.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,407 @@
|
|
1
|
+
import concurrent.futures
|
2
|
+
import logging
|
3
|
+
from typing import Dict
|
4
|
+
|
5
|
+
from ...citation_handler import CitationHandler
|
6
|
+
from ...config.llm_config import get_llm
|
7
|
+
from ...config.search_config import get_search
|
8
|
+
from ...utilities.db_utils import get_db_setting
|
9
|
+
from ...utilities.search_utilities import extract_links_from_search_results
|
10
|
+
from ..filters.cross_engine_filter import CrossEngineFilter
|
11
|
+
from ..findings.repository import FindingsRepository
|
12
|
+
from ..questions.standard_question import StandardQuestionGenerator
|
13
|
+
from .base_strategy import BaseSearchStrategy
|
14
|
+
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
|
18
|
+
class SourceBasedSearchStrategy(BaseSearchStrategy):
|
19
|
+
"""
|
20
|
+
Source-based search strategy that generates questions based on search results and
|
21
|
+
defers content analysis until final synthesis.
|
22
|
+
"""
|
23
|
+
|
24
|
+
def __init__(
|
25
|
+
self,
|
26
|
+
search=None,
|
27
|
+
model=None,
|
28
|
+
citation_handler=None,
|
29
|
+
include_text_content: bool = True,
|
30
|
+
use_cross_engine_filter: bool = True,
|
31
|
+
filter_reorder: bool = True,
|
32
|
+
filter_reindex: bool = True,
|
33
|
+
filter_max_results: int = 20,
|
34
|
+
):
|
35
|
+
"""Initialize with optional dependency injection for testing."""
|
36
|
+
super().__init__()
|
37
|
+
self.search = search or get_search()
|
38
|
+
self.model = model or get_llm()
|
39
|
+
self.progress_callback = None
|
40
|
+
self.all_links_of_system = list()
|
41
|
+
self.all_search_results = []
|
42
|
+
self.questions_by_iteration = {}
|
43
|
+
self.include_text_content = include_text_content
|
44
|
+
self.use_cross_engine_filter = use_cross_engine_filter
|
45
|
+
self.filter_reorder = filter_reorder
|
46
|
+
self.filter_reindex = filter_reindex
|
47
|
+
|
48
|
+
# Initialize the cross-engine filter
|
49
|
+
self.cross_engine_filter = CrossEngineFilter(
|
50
|
+
model=self.model,
|
51
|
+
max_results=filter_max_results,
|
52
|
+
default_reorder=filter_reorder,
|
53
|
+
default_reindex=filter_reindex,
|
54
|
+
)
|
55
|
+
|
56
|
+
# Set include_full_content on the search engine if it supports it
|
57
|
+
if hasattr(self.search, "include_full_content"):
|
58
|
+
self.search.include_full_content = include_text_content
|
59
|
+
|
60
|
+
# Use provided citation_handler or create one
|
61
|
+
self.citation_handler = citation_handler or CitationHandler(self.model)
|
62
|
+
|
63
|
+
# Initialize components
|
64
|
+
self.question_generator = StandardQuestionGenerator(self.model)
|
65
|
+
self.findings_repository = FindingsRepository(self.model)
|
66
|
+
|
67
|
+
def _format_search_results_as_context(self, search_results):
|
68
|
+
"""Format search results into context for question generation."""
|
69
|
+
context_snippets = []
|
70
|
+
|
71
|
+
for i, result in enumerate(
|
72
|
+
search_results[:10]
|
73
|
+
): # Limit to prevent context overflow
|
74
|
+
title = result.get("title", "Untitled")
|
75
|
+
snippet = result.get("snippet", "")
|
76
|
+
url = result.get("link", "")
|
77
|
+
|
78
|
+
if snippet:
|
79
|
+
context_snippets.append(
|
80
|
+
f"Source {i + 1}: {title}\nURL: {url}\nSnippet: {snippet}"
|
81
|
+
)
|
82
|
+
|
83
|
+
return "\n\n".join(context_snippets)
|
84
|
+
|
85
|
+
def analyze_topic(self, query: str) -> Dict:
|
86
|
+
"""
|
87
|
+
Analyze a topic using source-based search strategy.
|
88
|
+
"""
|
89
|
+
logger.info(f"Starting source-based research on topic: {query}")
|
90
|
+
|
91
|
+
findings = []
|
92
|
+
self.all_search_results = []
|
93
|
+
|
94
|
+
# Track all search results across iterations
|
95
|
+
self.all_links_of_system = list()
|
96
|
+
self.questions_by_iteration = {}
|
97
|
+
|
98
|
+
self._update_progress(
|
99
|
+
"Initializing source-based research",
|
100
|
+
5,
|
101
|
+
{
|
102
|
+
"phase": "init",
|
103
|
+
"strategy": "source-based",
|
104
|
+
"include_text_content": self.include_text_content,
|
105
|
+
},
|
106
|
+
)
|
107
|
+
|
108
|
+
# Check search engine
|
109
|
+
if not self._validate_search_engine():
|
110
|
+
return {
|
111
|
+
"findings": [],
|
112
|
+
"iterations": 0,
|
113
|
+
"questions_by_iteration": {},
|
114
|
+
"formatted_findings": "Error: Unable to conduct research without a search engine.",
|
115
|
+
"current_knowledge": "",
|
116
|
+
"error": "No search engine available",
|
117
|
+
}
|
118
|
+
|
119
|
+
# Determine number of iterations to run
|
120
|
+
iterations_to_run = get_db_setting("search.iterations")
|
121
|
+
logger.debug("Selected amount of iterations: " + str(iterations_to_run))
|
122
|
+
iterations_to_run = int(iterations_to_run)
|
123
|
+
try:
|
124
|
+
# Run each iteration
|
125
|
+
for iteration in range(1, iterations_to_run + 1):
|
126
|
+
iteration_progress_base = 5 + (iteration - 1) * (70 / iterations_to_run)
|
127
|
+
|
128
|
+
self._update_progress(
|
129
|
+
f"Starting iteration {iteration}/{iterations_to_run}",
|
130
|
+
iteration_progress_base,
|
131
|
+
{"phase": f"iteration_{iteration}", "iteration": iteration},
|
132
|
+
)
|
133
|
+
|
134
|
+
# Step 1: Generate or use questions
|
135
|
+
self._update_progress(
|
136
|
+
f"Generating search questions for iteration {iteration}",
|
137
|
+
iteration_progress_base + 5,
|
138
|
+
{"phase": "question_generation", "iteration": iteration},
|
139
|
+
)
|
140
|
+
|
141
|
+
# For first iteration, use initial query
|
142
|
+
if iteration == 1:
|
143
|
+
# Generate questions for first iteration
|
144
|
+
source_context = self._format_search_results_as_context(
|
145
|
+
self.all_search_results
|
146
|
+
)
|
147
|
+
context = f"""Iteration: {iteration} of {iterations_to_run}"""
|
148
|
+
questions = self.question_generator.generate_questions(
|
149
|
+
current_knowledge=context,
|
150
|
+
query=query,
|
151
|
+
questions_per_iteration=int(
|
152
|
+
get_db_setting("search.questions_per_iteration")
|
153
|
+
),
|
154
|
+
questions_by_iteration=self.questions_by_iteration,
|
155
|
+
)
|
156
|
+
|
157
|
+
# Always include the original query for the first iteration
|
158
|
+
if query not in questions:
|
159
|
+
all_questions = [query] + questions
|
160
|
+
else:
|
161
|
+
all_questions = questions
|
162
|
+
|
163
|
+
self.questions_by_iteration[iteration] = all_questions
|
164
|
+
logger.info(
|
165
|
+
f"Using questions for iteration {iteration}: {all_questions}"
|
166
|
+
)
|
167
|
+
else:
|
168
|
+
# For subsequent iterations, generate questions based on previous search results
|
169
|
+
source_context = self._format_search_results_as_context(
|
170
|
+
self.all_search_results
|
171
|
+
)
|
172
|
+
if iteration != 1:
|
173
|
+
context = f"""Previous search results:\n{source_context}\n\nIteration: {iteration} of {iterations_to_run}"""
|
174
|
+
elif iterations_to_run == 1:
|
175
|
+
context = ""
|
176
|
+
else:
|
177
|
+
context = f"""Iteration: {iteration} of {iterations_to_run}"""
|
178
|
+
# Use standard question generator with search results as context
|
179
|
+
questions = self.question_generator.generate_questions(
|
180
|
+
current_knowledge=context,
|
181
|
+
query=query,
|
182
|
+
questions_per_iteration=int(
|
183
|
+
get_db_setting("search.questions_per_iteration")
|
184
|
+
),
|
185
|
+
questions_by_iteration=self.questions_by_iteration,
|
186
|
+
)
|
187
|
+
|
188
|
+
# Use only the new questions for this iteration's searches
|
189
|
+
all_questions = questions
|
190
|
+
|
191
|
+
# Store in questions_by_iteration
|
192
|
+
self.questions_by_iteration[iteration] = questions
|
193
|
+
logger.info(
|
194
|
+
f"Generated questions for iteration {iteration}: {questions}"
|
195
|
+
)
|
196
|
+
|
197
|
+
# Step 2: Run all searches in parallel for this iteration
|
198
|
+
self._update_progress(
|
199
|
+
f"Running parallel searches for iteration {iteration}",
|
200
|
+
iteration_progress_base + 10,
|
201
|
+
{"phase": "parallel_search", "iteration": iteration},
|
202
|
+
)
|
203
|
+
|
204
|
+
# Function for thread pool
|
205
|
+
def search_question(q):
|
206
|
+
try:
|
207
|
+
result = self.search.run(q)
|
208
|
+
return {"question": q, "results": result or []}
|
209
|
+
except Exception as e:
|
210
|
+
logger.error(f"Error searching for '{q}': {str(e)}")
|
211
|
+
return {"question": q, "results": [], "error": str(e)}
|
212
|
+
|
213
|
+
# Run searches in parallel
|
214
|
+
with concurrent.futures.ThreadPoolExecutor(
|
215
|
+
max_workers=len(all_questions)
|
216
|
+
) as executor:
|
217
|
+
futures = [
|
218
|
+
executor.submit(search_question, q) for q in all_questions
|
219
|
+
]
|
220
|
+
iteration_search_dict = {}
|
221
|
+
iteration_search_results = []
|
222
|
+
|
223
|
+
# Process results as they complete
|
224
|
+
for i, future in enumerate(
|
225
|
+
concurrent.futures.as_completed(futures)
|
226
|
+
):
|
227
|
+
result_dict = future.result()
|
228
|
+
question = result_dict["question"]
|
229
|
+
search_results = result_dict["results"]
|
230
|
+
iteration_search_dict[question] = search_results
|
231
|
+
|
232
|
+
self._update_progress(
|
233
|
+
f"Completed search {i + 1} of {len(all_questions)}: {question[:30]}...",
|
234
|
+
iteration_progress_base
|
235
|
+
+ 10
|
236
|
+
+ ((i + 1) / len(all_questions) * 30),
|
237
|
+
{
|
238
|
+
"phase": "search_complete",
|
239
|
+
"iteration": iteration,
|
240
|
+
"result_count": len(search_results),
|
241
|
+
"question": question,
|
242
|
+
},
|
243
|
+
)
|
244
|
+
|
245
|
+
# Collect all search results for this iteration
|
246
|
+
iteration_search_results.extend(search_results)
|
247
|
+
|
248
|
+
# Step 3: Apply cross-engine filtering if enabled
|
249
|
+
if self.use_cross_engine_filter:
|
250
|
+
self._update_progress(
|
251
|
+
f"Filtering search results for iteration {iteration}",
|
252
|
+
iteration_progress_base + 45,
|
253
|
+
{"phase": "cross_engine_filtering", "iteration": iteration},
|
254
|
+
)
|
255
|
+
|
256
|
+
# Get the current link count (for indexing)
|
257
|
+
existing_link_count = len(self.all_links_of_system)
|
258
|
+
|
259
|
+
# Filter the search results
|
260
|
+
filtered_search_results = self.cross_engine_filter.filter_results(
|
261
|
+
iteration_search_results,
|
262
|
+
query,
|
263
|
+
reorder=self.filter_reorder,
|
264
|
+
reindex=self.filter_reindex,
|
265
|
+
start_index=existing_link_count, # Start indexing after existing links
|
266
|
+
)
|
267
|
+
|
268
|
+
links = extract_links_from_search_results(filtered_search_results)
|
269
|
+
self.all_links_of_system.extend(links)
|
270
|
+
|
271
|
+
self._update_progress(
|
272
|
+
f"Filtered from {len(iteration_search_results)} to {len(filtered_search_results)} results",
|
273
|
+
iteration_progress_base + 50,
|
274
|
+
{
|
275
|
+
"phase": "filtering_complete",
|
276
|
+
"iteration": iteration,
|
277
|
+
"links_count": len(self.all_links_of_system),
|
278
|
+
},
|
279
|
+
)
|
280
|
+
|
281
|
+
# Use filtered results
|
282
|
+
iteration_search_results = filtered_search_results
|
283
|
+
else:
|
284
|
+
# Just extract links without filtering
|
285
|
+
links = extract_links_from_search_results(iteration_search_results)
|
286
|
+
self.all_links_of_system.extend(links)
|
287
|
+
|
288
|
+
# Add to all search results
|
289
|
+
self.all_search_results.extend(iteration_search_results)
|
290
|
+
|
291
|
+
# Create a lightweight finding for this iteration's search metadata (no text content)
|
292
|
+
finding = {
|
293
|
+
"phase": f"Iteration {iteration}",
|
294
|
+
"content": f"Searched with {len(all_questions)} questions, found {len(iteration_search_results)} results.",
|
295
|
+
"question": query,
|
296
|
+
"search_results": iteration_search_results,
|
297
|
+
"documents": [],
|
298
|
+
}
|
299
|
+
findings.append(finding)
|
300
|
+
|
301
|
+
# Mark iteration as complete
|
302
|
+
iteration_progress = 5 + iteration * (70 / iterations_to_run)
|
303
|
+
self._update_progress(
|
304
|
+
f"Completed iteration {iteration}/{iterations_to_run}",
|
305
|
+
iteration_progress,
|
306
|
+
{"phase": "iteration_complete", "iteration": iteration},
|
307
|
+
)
|
308
|
+
|
309
|
+
# Final filtering of all accumulated search results
|
310
|
+
self._update_progress(
|
311
|
+
"Performing final filtering of all results",
|
312
|
+
80,
|
313
|
+
{"phase": "final_filtering"},
|
314
|
+
)
|
315
|
+
|
316
|
+
# Apply final cross-engine filtering to all accumulated results if enabled
|
317
|
+
if self.use_cross_engine_filter:
|
318
|
+
final_filtered_results = self.cross_engine_filter.filter_results(
|
319
|
+
self.all_search_results,
|
320
|
+
query,
|
321
|
+
reorder=True, # Always reorder in final filtering
|
322
|
+
reindex=False, # Always reindex in final filtering
|
323
|
+
max_results=int(get_db_setting("search.final_max_results") or 30),
|
324
|
+
)
|
325
|
+
else:
|
326
|
+
final_filtered_results = self.all_search_results
|
327
|
+
self._update_progress(
|
328
|
+
f"Filtered from {len(self.all_search_results)} to {len(final_filtered_results)} results",
|
329
|
+
iteration_progress_base + 85,
|
330
|
+
{
|
331
|
+
"phase": "filtering_complete",
|
332
|
+
"iteration": iteration,
|
333
|
+
"links_count": len(self.all_links_of_system),
|
334
|
+
},
|
335
|
+
)
|
336
|
+
# Final synthesis after all iterations
|
337
|
+
self._update_progress(
|
338
|
+
"Generating final synthesis", 90, {"phase": "synthesis"}
|
339
|
+
)
|
340
|
+
|
341
|
+
total_citation_count = len(self.all_links_of_system)
|
342
|
+
|
343
|
+
# Final synthesis
|
344
|
+
final_citation_result = self.citation_handler.analyze_followup(
|
345
|
+
query,
|
346
|
+
final_filtered_results,
|
347
|
+
previous_knowledge="", # Empty string as we don't need previous knowledge here
|
348
|
+
nr_of_links=total_citation_count,
|
349
|
+
)
|
350
|
+
|
351
|
+
# Add null check for final_citation_result
|
352
|
+
if final_citation_result:
|
353
|
+
synthesized_content = final_citation_result["content"]
|
354
|
+
documents = final_citation_result.get("documents", [])
|
355
|
+
else:
|
356
|
+
synthesized_content = "No relevant results found in final synthesis."
|
357
|
+
documents = []
|
358
|
+
|
359
|
+
# Add a final synthesis finding
|
360
|
+
final_finding = {
|
361
|
+
"phase": "Final synthesis",
|
362
|
+
"content": synthesized_content,
|
363
|
+
"question": query,
|
364
|
+
"search_results": final_filtered_results,
|
365
|
+
"documents": documents,
|
366
|
+
}
|
367
|
+
findings.append(final_finding)
|
368
|
+
|
369
|
+
# Add documents to repository
|
370
|
+
self.findings_repository.add_documents(documents)
|
371
|
+
|
372
|
+
# Transfer questions to repository
|
373
|
+
self.findings_repository.set_questions_by_iteration(
|
374
|
+
self.questions_by_iteration
|
375
|
+
)
|
376
|
+
|
377
|
+
# Format findings
|
378
|
+
formatted_findings = self.findings_repository.format_findings_to_text(
|
379
|
+
findings, synthesized_content
|
380
|
+
)
|
381
|
+
|
382
|
+
except Exception as e:
|
383
|
+
import traceback
|
384
|
+
|
385
|
+
error_msg = f"Error in research process: {str(e)}"
|
386
|
+
logger.error(error_msg)
|
387
|
+
logger.error(traceback.format_exc())
|
388
|
+
synthesized_content = f"Error: {str(e)}"
|
389
|
+
formatted_findings = f"Error: {str(e)}"
|
390
|
+
finding = {
|
391
|
+
"phase": "Error",
|
392
|
+
"content": synthesized_content,
|
393
|
+
"question": query,
|
394
|
+
"search_results": [],
|
395
|
+
"documents": [],
|
396
|
+
}
|
397
|
+
findings.append(finding)
|
398
|
+
|
399
|
+
self._update_progress("Research complete", 100, {"phase": "complete"})
|
400
|
+
|
401
|
+
return {
|
402
|
+
"findings": findings,
|
403
|
+
"iterations": iterations_to_run,
|
404
|
+
"questions_by_iteration": self.questions_by_iteration,
|
405
|
+
"formatted_findings": formatted_findings,
|
406
|
+
"current_knowledge": synthesized_content,
|
407
|
+
}
|
@@ -9,78 +9,100 @@ from typing import Any, Callable, Dict, Optional
|
|
9
9
|
|
10
10
|
import toml
|
11
11
|
|
12
|
-
from .. import get_report_generator # Use the lazy import function
|
13
12
|
from ..config.llm_config import get_llm
|
14
13
|
from ..config.search_config import get_search
|
14
|
+
from ..report_generator import IntegratedReportGenerator
|
15
15
|
from ..search_system import AdvancedSearchSystem
|
16
16
|
from ..utilities.search_utilities import remove_think_tags
|
17
17
|
|
18
18
|
logger = logging.getLogger(__name__)
|
19
19
|
|
20
20
|
|
21
|
-
def
|
22
|
-
|
21
|
+
def _init_search_system(
|
22
|
+
model_name: str | None = None,
|
23
|
+
temperature: float = 0.7,
|
24
|
+
provider: str | None = None,
|
25
|
+
openai_endpoint_url: str | None = None,
|
26
|
+
progress_callback: Callable[[str, int, dict], None] | None = None,
|
23
27
|
search_tool: Optional[str] = None,
|
24
28
|
iterations: int = 1,
|
25
29
|
questions_per_iteration: int = 1,
|
26
|
-
|
27
|
-
max_filtered_results: int = 5,
|
28
|
-
region: str = "us",
|
29
|
-
time_period: str = "y",
|
30
|
-
safe_search: bool = True,
|
31
|
-
temperature: float = 0.7,
|
32
|
-
progress_callback: Optional[Callable] = None,
|
33
|
-
) -> Dict[str, Any]:
|
30
|
+
) -> AdvancedSearchSystem:
|
34
31
|
"""
|
35
|
-
|
32
|
+
Initializes the advanced search system with specified parameters. This function sets up
|
33
|
+
and returns an instance of the AdvancedSearchSystem using the provided configuration
|
34
|
+
options such as model name, temperature for randomness in responses, provider service
|
35
|
+
details, endpoint URL, and an optional search tool.
|
36
36
|
|
37
37
|
Args:
|
38
|
-
|
38
|
+
model_name: Name of the model to use (if None, uses database setting)
|
39
|
+
temperature: LLM temperature for generation
|
40
|
+
provider: Provider to use (if None, uses database setting)
|
41
|
+
openai_endpoint_url: Custom endpoint URL to use (if None, uses database
|
42
|
+
setting)
|
43
|
+
progress_callback: Optional callback function to receive progress updates
|
39
44
|
search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
|
40
45
|
iterations: Number of research cycles to perform
|
41
46
|
questions_per_iteration: Number of questions to generate per cycle
|
42
|
-
max_results: Maximum number of search results to consider
|
43
|
-
max_filtered_results: Maximum results after relevance filtering
|
44
|
-
region: Search region/locale
|
45
|
-
time_period: Time period for search results (d=day, w=week, m=month, y=year)
|
46
|
-
safe_search: Whether to enable safe search
|
47
|
-
temperature: LLM temperature for generation
|
48
|
-
progress_callback: Optional callback function to receive progress updates
|
49
47
|
|
50
48
|
Returns:
|
51
|
-
|
52
|
-
- 'summary': The generated summary text
|
53
|
-
- 'findings': List of detailed findings from each search
|
54
|
-
- 'iterations': Number of iterations performed
|
55
|
-
- 'questions': Questions generated during research
|
56
|
-
"""
|
57
|
-
logger.info("Generating quick summary for query: %s", query)
|
49
|
+
AdvancedSearchSystem: An instance of the configured AdvancedSearchSystem.
|
58
50
|
|
51
|
+
"""
|
59
52
|
# Get language model with custom temperature
|
60
|
-
llm = get_llm(
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
system.max_iterations = iterations
|
67
|
-
system.questions_per_iteration = questions_per_iteration
|
68
|
-
system.model = llm # Ensure the model is directly attached to the system
|
53
|
+
llm = get_llm(
|
54
|
+
temperature=temperature,
|
55
|
+
openai_endpoint_url=openai_endpoint_url,
|
56
|
+
model_name=model_name,
|
57
|
+
provider=provider,
|
58
|
+
)
|
69
59
|
|
70
60
|
# Set the search engine if specified
|
61
|
+
search_engine = None
|
71
62
|
if search_tool:
|
72
|
-
search_engine = get_search(search_tool)
|
73
|
-
if search_engine:
|
74
|
-
system.search = search_engine
|
75
|
-
else:
|
63
|
+
search_engine = get_search(search_tool, llm_instance=llm)
|
64
|
+
if search_engine is None:
|
76
65
|
logger.warning(
|
77
66
|
f"Could not create search engine '{search_tool}', using default."
|
78
67
|
)
|
79
68
|
|
69
|
+
# Create search system with custom parameters
|
70
|
+
system = AdvancedSearchSystem(llm=llm, search=search_engine)
|
71
|
+
|
72
|
+
# Override default settings with user-provided values
|
73
|
+
system.max_iterations = iterations
|
74
|
+
system.questions_per_iteration = questions_per_iteration
|
75
|
+
|
80
76
|
# Set progress callback if provided
|
81
77
|
if progress_callback:
|
82
78
|
system.set_progress_callback(progress_callback)
|
83
79
|
|
80
|
+
return system
|
81
|
+
|
82
|
+
|
83
|
+
def quick_summary(
|
84
|
+
query: str,
|
85
|
+
**kwargs: Any,
|
86
|
+
) -> Dict[str, Any]:
|
87
|
+
"""
|
88
|
+
Generate a quick research summary for a given query.
|
89
|
+
|
90
|
+
Args:
|
91
|
+
query: The research query to analyze
|
92
|
+
**kwargs: Configuration for the search system. Will be forwarded to
|
93
|
+
`_init_search_system()`.
|
94
|
+
|
95
|
+
Returns:
|
96
|
+
Dictionary containing the research results with keys:
|
97
|
+
- 'summary': The generated summary text
|
98
|
+
- 'findings': List of detailed findings from each search
|
99
|
+
- 'iterations': Number of iterations performed
|
100
|
+
- 'questions': Questions generated during research
|
101
|
+
"""
|
102
|
+
logger.info("Generating quick summary for query: %s", query)
|
103
|
+
|
104
|
+
system = _init_search_system(**kwargs)
|
105
|
+
|
84
106
|
# Perform the search and analysis
|
85
107
|
results = system.analyze_topic(query)
|
86
108
|
|
@@ -103,36 +125,20 @@ def quick_summary(
|
|
103
125
|
|
104
126
|
def generate_report(
|
105
127
|
query: str,
|
106
|
-
search_tool: Optional[str] = None,
|
107
|
-
iterations: int = 2,
|
108
|
-
questions_per_iteration: int = 2,
|
109
|
-
searches_per_section: int = 2,
|
110
|
-
max_results: int = 50,
|
111
|
-
max_filtered_results: int = 5,
|
112
|
-
region: str = "us",
|
113
|
-
time_period: str = "y",
|
114
|
-
safe_search: bool = True,
|
115
|
-
temperature: float = 0.7,
|
116
128
|
output_file: Optional[str] = None,
|
117
129
|
progress_callback: Optional[Callable] = None,
|
130
|
+
searches_per_section: int = 2,
|
131
|
+
**kwargs: Any,
|
118
132
|
) -> Dict[str, Any]:
|
119
133
|
"""
|
120
134
|
Generate a comprehensive, structured research report for a given query.
|
121
135
|
|
122
136
|
Args:
|
123
137
|
query: The research query to analyze
|
124
|
-
search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
|
125
|
-
iterations: Number of research cycles to perform
|
126
|
-
questions_per_iteration: Number of questions to generate per cycle
|
127
|
-
searches_per_section: Number of searches to perform per report section
|
128
|
-
max_results: Maximum number of search results to consider
|
129
|
-
max_filtered_results: Maximum results after relevance filtering
|
130
|
-
region: Search region/locale
|
131
|
-
time_period: Time period for search results (d=day, w=week, m=month, y=year)
|
132
|
-
safe_search: Whether to enable safe search
|
133
|
-
temperature: LLM temperature for generation
|
134
138
|
output_file: Optional path to save report markdown file
|
135
139
|
progress_callback: Optional callback function to receive progress updates
|
140
|
+
searches_per_section: The number of searches to perform for each
|
141
|
+
section in the report.
|
136
142
|
|
137
143
|
Returns:
|
138
144
|
Dictionary containing the research report with keys:
|
@@ -141,34 +147,7 @@ def generate_report(
|
|
141
147
|
"""
|
142
148
|
logger.info("Generating comprehensive research report for query: %s", query)
|
143
149
|
|
144
|
-
|
145
|
-
llm = get_llm(temperature=temperature)
|
146
|
-
|
147
|
-
# Create search system with custom parameters
|
148
|
-
system = AdvancedSearchSystem()
|
149
|
-
|
150
|
-
# Override default settings with user-provided values
|
151
|
-
system.max_iterations = iterations
|
152
|
-
system.questions_per_iteration = questions_per_iteration
|
153
|
-
system.model = llm # Ensure the model is directly attached to the system
|
154
|
-
|
155
|
-
# Set the search engine if specified
|
156
|
-
if search_tool:
|
157
|
-
search_engine = get_search(
|
158
|
-
search_tool,
|
159
|
-
llm_instance=llm,
|
160
|
-
max_results=max_results,
|
161
|
-
max_filtered_results=max_filtered_results,
|
162
|
-
region=region,
|
163
|
-
time_period=time_period,
|
164
|
-
safe_search=safe_search,
|
165
|
-
)
|
166
|
-
if search_engine:
|
167
|
-
system.search = search_engine
|
168
|
-
else:
|
169
|
-
logger.warning(
|
170
|
-
f"Could not create search engine '{search_tool}', using default."
|
171
|
-
)
|
150
|
+
system = _init_search_system(**kwargs)
|
172
151
|
|
173
152
|
# Set progress callback if provided
|
174
153
|
if progress_callback:
|
@@ -178,8 +157,11 @@ def generate_report(
|
|
178
157
|
initial_findings = system.analyze_topic(query)
|
179
158
|
|
180
159
|
# Generate the structured report
|
181
|
-
report_generator =
|
182
|
-
|
160
|
+
report_generator = IntegratedReportGenerator(
|
161
|
+
search_system=system,
|
162
|
+
llm=system.model,
|
163
|
+
searches_per_section=searches_per_section,
|
164
|
+
)
|
183
165
|
report = report_generator.generate_report(initial_findings, query)
|
184
166
|
|
185
167
|
# Save report to file if path is provided
|
@@ -82,18 +82,18 @@ Provide a detailed analysis with citations. Do not create the bibliography, it w
|
|
82
82
|
formatted_sources = self._format_sources(documents)
|
83
83
|
# Add fact-checking step
|
84
84
|
fact_check_prompt = f"""Analyze these sources for factual consistency:
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
85
|
+
1. Cross-reference major claims between sources
|
86
|
+
2. Identify and flag any contradictions
|
87
|
+
3. Verify basic facts (dates, company names, ownership)
|
88
|
+
4. Note when sources disagree
|
89
89
|
|
90
|
-
|
91
|
-
|
90
|
+
Previous Knowledge:
|
91
|
+
{previous_knowledge}
|
92
92
|
|
93
|
-
|
94
|
-
|
93
|
+
New Sources:
|
94
|
+
{formatted_sources}
|
95
95
|
|
96
|
-
|
96
|
+
Return any inconsistencies or conflicts found."""
|
97
97
|
if get_db_setting(
|
98
98
|
"general.enable_fact_checking", settings.general.enable_fact_checking
|
99
99
|
):
|
@@ -104,16 +104,15 @@ Provide a detailed analysis with citations. Do not create the bibliography, it w
|
|
104
104
|
|
105
105
|
prompt = f"""Using the previous knowledge and new sources, answer the question. Include citations using numbers in square brackets [1], [2], etc. When citing, use the source number provided at the start of each source. Reflect information from sources critically.
|
106
106
|
|
107
|
-
|
108
|
-
|
107
|
+
Previous Knowledge:
|
108
|
+
{previous_knowledge}
|
109
109
|
|
110
|
-
|
110
|
+
Question: {question}
|
111
111
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
"""
|
112
|
+
New Sources:
|
113
|
+
{formatted_sources}
|
114
|
+
Reflect information from sources critically based on: {fact_check_response}. Never invent sources.
|
115
|
+
Provide a detailed answer with citations. Example format: "According to [1], ..." """
|
117
116
|
|
118
117
|
response = self.llm.invoke(prompt)
|
119
118
|
|