local-deep-research 0.2.0__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +261 -139
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +407 -0
- local_deep_research/api/research_functions.py +72 -90
- local_deep_research/citation_handler.py +16 -17
- local_deep_research/defaults/search_engines.toml +1 -1
- local_deep_research/report_generator.py +19 -5
- local_deep_research/search_system.py +20 -3
- local_deep_research/web/routes/settings_routes.py +0 -9
- local_deep_research/web/services/research_service.py +4 -0
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +1 -1
- {local_deep_research-0.2.0.dist-info → local_deep_research-0.2.3.dist-info}/METADATA +2 -2
- {local_deep_research-0.2.0.dist-info → local_deep_research-0.2.3.dist-info}/RECORD +15 -14
- {local_deep_research-0.2.0.dist-info → local_deep_research-0.2.3.dist-info}/WHEEL +0 -0
- {local_deep_research-0.2.0.dist-info → local_deep_research-0.2.3.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.2.0.dist-info → local_deep_research-0.2.3.dist-info}/licenses/LICENSE +0 -0
@@ -80,7 +80,7 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
80
80
|
|
81
81
|
def analyze_topic(self, query: str) -> Dict:
|
82
82
|
"""
|
83
|
-
|
83
|
+
Analyze a topic using parallel search, supporting multiple iterations.
|
84
84
|
|
85
85
|
Args:
|
86
86
|
query: The research query to analyze
|
@@ -89,6 +89,11 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
89
89
|
|
90
90
|
findings = []
|
91
91
|
all_search_results = []
|
92
|
+
current_knowledge = ""
|
93
|
+
|
94
|
+
# Track all search results across iterations
|
95
|
+
self.all_links_of_system = list()
|
96
|
+
self.questions_by_iteration = {}
|
92
97
|
|
93
98
|
self._update_progress(
|
94
99
|
"Initializing parallel research",
|
@@ -105,184 +110,301 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
105
110
|
return {
|
106
111
|
"findings": [],
|
107
112
|
"iterations": 0,
|
108
|
-
"
|
113
|
+
"questions_by_iteration": {},
|
109
114
|
"formatted_findings": "Error: Unable to conduct research without a search engine.",
|
110
115
|
"current_knowledge": "",
|
111
116
|
"error": "No search engine available",
|
112
117
|
}
|
113
118
|
|
119
|
+
# Determine number of iterations to run
|
120
|
+
iterations_to_run = get_db_setting("search.iterations")
|
121
|
+
logger.debug("Selected amount of iterations: " + iterations_to_run)
|
122
|
+
iterations_to_run = int(iterations_to_run)
|
114
123
|
try:
|
115
|
-
#
|
116
|
-
|
117
|
-
|
118
|
-
)
|
124
|
+
# Run each iteration
|
125
|
+
for iteration in range(1, iterations_to_run + 1):
|
126
|
+
iteration_progress_base = 5 + (iteration - 1) * (70 / iterations_to_run)
|
119
127
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
get_db_setting("search.questions_per_iteration")
|
126
|
-
), # 3 additional questions
|
127
|
-
questions_by_iteration={},
|
128
|
-
)
|
128
|
+
self._update_progress(
|
129
|
+
f"Starting iteration {iteration}/{iterations_to_run}",
|
130
|
+
iteration_progress_base,
|
131
|
+
{"phase": f"iteration_{iteration}", "iteration": iteration},
|
132
|
+
)
|
129
133
|
|
130
|
-
|
131
|
-
|
134
|
+
# Step 1: Generate questions
|
135
|
+
self._update_progress(
|
136
|
+
f"Generating search questions for iteration {iteration}",
|
137
|
+
iteration_progress_base + 5,
|
138
|
+
{"phase": "question_generation", "iteration": iteration},
|
139
|
+
)
|
132
140
|
|
133
|
-
|
134
|
-
|
135
|
-
|
141
|
+
# For first iteration, generate initial questions
|
142
|
+
# For subsequent iterations, generate follow-up questions
|
143
|
+
logger.info("Starting to generate questions")
|
144
|
+
if iteration == 1:
|
145
|
+
# Generate additional questions (plus the main query)
|
146
|
+
if iterations_to_run > 1:
|
147
|
+
context = f"""Iteration: {1} of {iterations_to_run}"""
|
148
|
+
else:
|
149
|
+
context = ""
|
150
|
+
questions = self.question_generator.generate_questions(
|
151
|
+
current_knowledge=context,
|
152
|
+
query=query,
|
153
|
+
questions_per_iteration=int(
|
154
|
+
get_db_setting("search.questions_per_iteration")
|
155
|
+
),
|
156
|
+
questions_by_iteration=self.questions_by_iteration,
|
157
|
+
)
|
136
158
|
|
137
|
-
|
138
|
-
|
139
|
-
"Running parallel searches for all questions",
|
140
|
-
20,
|
141
|
-
{"phase": "parallel_search"},
|
142
|
-
)
|
159
|
+
# Add the original query as the first question
|
160
|
+
all_questions = [query] + questions
|
143
161
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
162
|
+
# Store in questions_by_iteration
|
163
|
+
self.questions_by_iteration[iteration] = questions
|
164
|
+
logger.info(
|
165
|
+
f"Generated questions for iteration {iteration}: {questions}"
|
166
|
+
)
|
167
|
+
else:
|
168
|
+
# Get past questions from all previous iterations
|
169
|
+
past_questions = []
|
170
|
+
for prev_iter in range(1, iteration):
|
171
|
+
if prev_iter in self.questions_by_iteration:
|
172
|
+
past_questions.extend(
|
173
|
+
self.questions_by_iteration[prev_iter]
|
174
|
+
)
|
175
|
+
|
176
|
+
# Generate follow-up questions based on accumulated knowledge if iterations > 2
|
177
|
+
use_knowledge = iterations_to_run > 2
|
178
|
+
knowledge_for_questions = current_knowledge if use_knowledge else ""
|
179
|
+
context = f"""Current Knowledge: {knowledge_for_questions}
|
180
|
+
Iteration: {iteration} of {iterations_to_run}"""
|
181
|
+
|
182
|
+
# Generate questions
|
183
|
+
questions = self.question_generator.generate_questions(
|
184
|
+
current_knowledge=context,
|
185
|
+
query=query,
|
186
|
+
questions_per_iteration=int(
|
187
|
+
get_db_setting("search.questions_per_iteration")
|
188
|
+
),
|
189
|
+
questions_by_iteration=self.questions_by_iteration,
|
190
|
+
)
|
166
191
|
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
},
|
192
|
+
# Use only the new questions for this iteration's searches
|
193
|
+
all_questions = questions
|
194
|
+
|
195
|
+
# Store in questions_by_iteration
|
196
|
+
self.questions_by_iteration[iteration] = questions
|
197
|
+
logger.info(
|
198
|
+
f"Generated questions for iteration {iteration}: {questions}"
|
175
199
|
)
|
176
200
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
201
|
+
# Step 2: Run all searches in parallel for this iteration
|
202
|
+
self._update_progress(
|
203
|
+
f"Running parallel searches for iteration {iteration}",
|
204
|
+
iteration_progress_base + 10,
|
205
|
+
{"phase": "parallel_search", "iteration": iteration},
|
206
|
+
)
|
182
207
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
208
|
+
# Function for thread pool
|
209
|
+
def search_question(q):
|
210
|
+
try:
|
211
|
+
result = self.search.run(q)
|
212
|
+
return {"question": q, "results": result or []}
|
213
|
+
except Exception as e:
|
214
|
+
logger.error(f"Error searching for '{q}': {str(e)}")
|
215
|
+
return {"question": q, "results": [], "error": str(e)}
|
216
|
+
|
217
|
+
# Run searches in parallel
|
218
|
+
with concurrent.futures.ThreadPoolExecutor(
|
219
|
+
max_workers=len(all_questions)
|
220
|
+
) as executor:
|
221
|
+
futures = [
|
222
|
+
executor.submit(search_question, q) for q in all_questions
|
223
|
+
]
|
224
|
+
iteration_search_dict = {}
|
225
|
+
iteration_search_results = []
|
226
|
+
|
227
|
+
# Process results as they complete
|
228
|
+
for i, future in enumerate(
|
229
|
+
concurrent.futures.as_completed(futures)
|
230
|
+
):
|
231
|
+
result_dict = future.result()
|
232
|
+
question = result_dict["question"]
|
233
|
+
search_results = result_dict["results"]
|
234
|
+
iteration_search_dict[question] = search_results
|
235
|
+
|
236
|
+
self._update_progress(
|
237
|
+
f"Completed search {i + 1} of {len(all_questions)}: {question[:30]}...",
|
238
|
+
iteration_progress_base
|
239
|
+
+ 10
|
240
|
+
+ ((i + 1) / len(all_questions) * 30),
|
241
|
+
{
|
242
|
+
"phase": "search_complete",
|
243
|
+
"iteration": iteration,
|
244
|
+
"result_count": len(search_results),
|
245
|
+
"question": question,
|
246
|
+
},
|
247
|
+
)
|
248
|
+
|
249
|
+
# Collect all search results for this iteration
|
250
|
+
iteration_search_results.extend(search_results)
|
251
|
+
|
252
|
+
# Step 3: Filter and analyze results for this iteration
|
190
253
|
self._update_progress(
|
191
|
-
"
|
192
|
-
|
193
|
-
{"phase": "
|
254
|
+
f"Analyzing results for iteration {iteration}",
|
255
|
+
iteration_progress_base + 45,
|
256
|
+
{"phase": "iteration_analysis", "iteration": iteration},
|
194
257
|
)
|
195
258
|
|
196
|
-
#
|
197
|
-
|
259
|
+
# Apply cross-engine filtering if enabled
|
260
|
+
if self.use_cross_engine_filter:
|
261
|
+
self._update_progress(
|
262
|
+
f"Filtering search results for iteration {iteration}",
|
263
|
+
iteration_progress_base + 45,
|
264
|
+
{"phase": "cross_engine_filtering", "iteration": iteration},
|
265
|
+
)
|
198
266
|
|
199
|
-
|
200
|
-
|
201
|
-
all_search_results,
|
202
|
-
query,
|
203
|
-
reorder=self.filter_reorder,
|
204
|
-
reindex=self.filter_reindex,
|
205
|
-
start_index=existing_link_count, # Start indexing after existing links
|
206
|
-
)
|
267
|
+
# Get the current link count (for indexing)
|
268
|
+
existing_link_count = len(self.all_links_of_system)
|
207
269
|
|
208
|
-
|
209
|
-
|
270
|
+
# Filter the search results
|
271
|
+
filtered_search_results = self.cross_engine_filter.filter_results(
|
272
|
+
iteration_search_results,
|
273
|
+
query,
|
274
|
+
reorder=self.filter_reorder,
|
275
|
+
reindex=self.filter_reindex,
|
276
|
+
start_index=existing_link_count, # Start indexing after existing links
|
277
|
+
)
|
278
|
+
|
279
|
+
links = extract_links_from_search_results(filtered_search_results)
|
280
|
+
self.all_links_of_system.extend(links)
|
281
|
+
|
282
|
+
self._update_progress(
|
283
|
+
f"Filtered from {len(iteration_search_results)} to {len(filtered_search_results)} results",
|
284
|
+
iteration_progress_base + 50,
|
285
|
+
{
|
286
|
+
"phase": "filtering_complete",
|
287
|
+
"iteration": iteration,
|
288
|
+
"links_count": len(self.all_links_of_system),
|
289
|
+
},
|
290
|
+
)
|
210
291
|
|
292
|
+
# Use filtered results for analysis
|
293
|
+
iteration_search_results = filtered_search_results
|
294
|
+
else:
|
295
|
+
# Just extract links without filtering
|
296
|
+
links = extract_links_from_search_results(iteration_search_results)
|
297
|
+
self.all_links_of_system.extend(links)
|
298
|
+
|
299
|
+
# Add to all search results
|
300
|
+
all_search_results.extend(iteration_search_results)
|
301
|
+
|
302
|
+
# Create a finding for this iteration's results
|
303
|
+
if self.include_text_content and iteration_search_results:
|
304
|
+
# For iteration > 1 with knowledge accumulation, use follow-up analysis
|
305
|
+
if iteration > 1 and iterations_to_run > 2:
|
306
|
+
citation_result = self.citation_handler.analyze_followup(
|
307
|
+
query,
|
308
|
+
iteration_search_results,
|
309
|
+
current_knowledge,
|
310
|
+
len(self.all_links_of_system) - len(links),
|
311
|
+
)
|
312
|
+
else:
|
313
|
+
# For first iteration or without knowledge accumulation, use initial analysis
|
314
|
+
citation_result = self.citation_handler.analyze_initial(
|
315
|
+
query, iteration_search_results
|
316
|
+
)
|
317
|
+
|
318
|
+
if citation_result:
|
319
|
+
# Create a finding for this iteration
|
320
|
+
iteration_content = citation_result["content"]
|
321
|
+
|
322
|
+
# Update current knowledge if iterations > 2
|
323
|
+
if iterations_to_run > 2:
|
324
|
+
if current_knowledge:
|
325
|
+
current_knowledge = f"{current_knowledge}\n\n## FINDINGS FROM ITERATION {iteration}:\n\n{iteration_content}"
|
326
|
+
else:
|
327
|
+
current_knowledge = iteration_content
|
328
|
+
|
329
|
+
finding = {
|
330
|
+
"phase": f"Iteration {iteration}",
|
331
|
+
"content": iteration_content,
|
332
|
+
"question": query,
|
333
|
+
"search_results": iteration_search_results,
|
334
|
+
"documents": citation_result.get("documents", []),
|
335
|
+
}
|
336
|
+
findings.append(finding)
|
337
|
+
|
338
|
+
# Add documents to repository
|
339
|
+
if "documents" in citation_result:
|
340
|
+
self.findings_repository.add_documents(
|
341
|
+
citation_result["documents"]
|
342
|
+
)
|
343
|
+
|
344
|
+
# Mark iteration as complete
|
345
|
+
iteration_progress = 5 + iteration * (70 / iterations_to_run)
|
211
346
|
self._update_progress(
|
212
|
-
f"
|
213
|
-
|
214
|
-
{
|
215
|
-
"phase": "filtering_complete",
|
216
|
-
"links_count": len(self.all_links_of_system),
|
217
|
-
},
|
347
|
+
f"Completed iteration {iteration}/{iterations_to_run}",
|
348
|
+
iteration_progress,
|
349
|
+
{"phase": "iteration_complete", "iteration": iteration},
|
218
350
|
)
|
219
351
|
|
220
|
-
|
221
|
-
|
352
|
+
# Final synthesis after all iterations
|
353
|
+
self._update_progress(
|
354
|
+
"Generating final synthesis", 80, {"phase": "synthesis"}
|
355
|
+
)
|
222
356
|
|
223
|
-
#
|
357
|
+
# Handle final synthesis based on include_text_content flag
|
224
358
|
if self.include_text_content:
|
225
|
-
#
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
359
|
+
# Generate a final synthesis from all search results
|
360
|
+
if iterations_to_run > 1:
|
361
|
+
final_citation_result = self.citation_handler.analyze_initial(
|
362
|
+
query, all_search_results
|
363
|
+
)
|
364
|
+
# Add null check for final_citation_result
|
365
|
+
if final_citation_result:
|
366
|
+
synthesized_content = final_citation_result["content"]
|
367
|
+
else:
|
368
|
+
synthesized_content = (
|
369
|
+
"No relevant results found in final synthesis."
|
370
|
+
)
|
371
|
+
else:
|
372
|
+
# For single iteration, use the content from findings
|
373
|
+
synthesized_content = (
|
374
|
+
findings[0]["content"]
|
375
|
+
if findings
|
376
|
+
else "No relevant results found."
|
377
|
+
)
|
378
|
+
# Add a final synthesis finding
|
379
|
+
final_finding = {
|
233
380
|
"phase": "Final synthesis",
|
234
381
|
"content": synthesized_content,
|
235
382
|
"question": query,
|
236
383
|
"search_results": all_search_results,
|
237
|
-
"documents":
|
384
|
+
"documents": [],
|
238
385
|
}
|
239
|
-
findings.append(
|
240
|
-
|
241
|
-
# Transfer questions to repository
|
242
|
-
self.findings_repository.set_questions_by_iteration(
|
243
|
-
self.questions_by_iteration
|
244
|
-
)
|
245
|
-
|
246
|
-
# Format findings
|
247
|
-
formatted_findings = self.findings_repository.format_findings_to_text(
|
248
|
-
findings, synthesized_content
|
249
|
-
)
|
250
|
-
|
251
|
-
# Add documents to repository
|
252
|
-
if "documents" in citation_result:
|
253
|
-
self.findings_repository.add_documents(citation_result["documents"])
|
254
|
-
else:
|
255
|
-
synthesized_content = "No relevant results found."
|
256
|
-
formatted_findings = synthesized_content
|
257
|
-
finding = {
|
258
|
-
"phase": "Error",
|
259
|
-
"content": "No relevant results found.",
|
260
|
-
"question": query,
|
261
|
-
"search_results": all_search_results,
|
262
|
-
"documents": [],
|
263
|
-
}
|
264
|
-
findings.append(finding)
|
386
|
+
findings.append(final_finding)
|
265
387
|
else:
|
266
388
|
# Skip LLM analysis, just format the raw search results
|
267
389
|
synthesized_content = "LLM analysis skipped"
|
268
|
-
|
390
|
+
final_finding = {
|
269
391
|
"phase": "Raw search results",
|
270
392
|
"content": "LLM analysis was skipped. Displaying raw search results with links.",
|
271
393
|
"question": query,
|
272
394
|
"search_results": all_search_results,
|
273
395
|
"documents": [],
|
274
396
|
}
|
275
|
-
findings.append(
|
397
|
+
findings.append(final_finding)
|
276
398
|
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
399
|
+
# Transfer questions to repository
|
400
|
+
self.findings_repository.set_questions_by_iteration(
|
401
|
+
self.questions_by_iteration
|
402
|
+
)
|
281
403
|
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
404
|
+
# Format findings
|
405
|
+
formatted_findings = self.findings_repository.format_findings_to_text(
|
406
|
+
findings, synthesized_content
|
407
|
+
)
|
286
408
|
|
287
409
|
except Exception as e:
|
288
410
|
import traceback
|
@@ -305,8 +427,8 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
305
427
|
|
306
428
|
return {
|
307
429
|
"findings": findings,
|
308
|
-
"iterations":
|
309
|
-
"
|
430
|
+
"iterations": iterations_to_run,
|
431
|
+
"questions_by_iteration": self.questions_by_iteration,
|
310
432
|
"formatted_findings": formatted_findings,
|
311
433
|
"current_knowledge": synthesized_content,
|
312
434
|
}
|