local-deep-research 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +1 -1
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +5 -1
- local_deep_research/advanced_search_system/strategies/base_strategy.py +5 -2
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +23 -16
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +273 -144
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +4 -3
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +402 -0
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +8 -4
- local_deep_research/api/research_functions.py +0 -46
- local_deep_research/citation_handler.py +16 -20
- local_deep_research/config/llm_config.py +25 -68
- local_deep_research/config/search_config.py +8 -21
- local_deep_research/defaults/default_settings.json +3814 -0
- local_deep_research/search_system.py +46 -32
- local_deep_research/utilities/db_utils.py +22 -3
- local_deep_research/utilities/search_utilities.py +10 -7
- local_deep_research/web/app.py +3 -23
- local_deep_research/web/app_factory.py +1 -25
- local_deep_research/web/database/migrations.py +20 -418
- local_deep_research/web/routes/settings_routes.py +75 -364
- local_deep_research/web/services/research_service.py +47 -43
- local_deep_research/web/services/settings_manager.py +108 -315
- local_deep_research/web/services/settings_service.py +3 -56
- local_deep_research/web/static/js/components/research.js +1 -1
- local_deep_research/web/static/js/components/settings.js +16 -4
- local_deep_research/web/static/js/research_form.js +106 -0
- local_deep_research/web/templates/pages/research.html +3 -2
- local_deep_research/web_search_engines/engines/meta_search_engine.py +13 -18
- local_deep_research/web_search_engines/engines/search_engine_local.py +11 -2
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +7 -11
- local_deep_research/web_search_engines/search_engine_factory.py +12 -64
- local_deep_research/web_search_engines/search_engines_config.py +123 -64
- {local_deep_research-0.2.2.dist-info → local_deep_research-0.3.0.dist-info}/METADATA +16 -1
- {local_deep_research-0.2.2.dist-info → local_deep_research-0.3.0.dist-info}/RECORD +37 -38
- local_deep_research/config/config_files.py +0 -245
- local_deep_research/defaults/local_collections.toml +0 -53
- local_deep_research/defaults/main.toml +0 -80
- local_deep_research/defaults/search_engines.toml +0 -291
- {local_deep_research-0.2.2.dist-info → local_deep_research-0.3.0.dist-info}/WHEEL +0 -0
- {local_deep_research-0.2.2.dist-info → local_deep_research-0.3.0.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.2.2.dist-info → local_deep_research-0.3.0.dist-info}/licenses/LICENSE +0 -0
local_deep_research/__init__.py
CHANGED
@@ -6,6 +6,7 @@ import json
|
|
6
6
|
import logging
|
7
7
|
from typing import Dict, List
|
8
8
|
|
9
|
+
from ...utilities.db_utils import get_db_setting
|
9
10
|
from ...utilities.search_utilities import remove_think_tags
|
10
11
|
from .base_filter import BaseFilter
|
11
12
|
|
@@ -16,7 +17,7 @@ class CrossEngineFilter(BaseFilter):
|
|
16
17
|
"""Filter that ranks and filters results from multiple search engines."""
|
17
18
|
|
18
19
|
def __init__(
|
19
|
-
self, model, max_results=
|
20
|
+
self, model, max_results=None, default_reorder=True, default_reindex=True
|
20
21
|
):
|
21
22
|
"""
|
22
23
|
Initialize the cross-engine filter.
|
@@ -28,6 +29,9 @@ class CrossEngineFilter(BaseFilter):
|
|
28
29
|
default_reindex: Default setting for reindexing results after filtering
|
29
30
|
"""
|
30
31
|
super().__init__(model)
|
32
|
+
# Get max_results from database settings if not provided
|
33
|
+
if max_results is None:
|
34
|
+
max_results = get_db_setting("search.cross_engine_max_results", 100)
|
31
35
|
self.max_results = max_results
|
32
36
|
self.default_reorder = default_reorder
|
33
37
|
self.default_reindex = default_reindex
|
@@ -13,11 +13,14 @@ logger = logging.getLogger(__name__)
|
|
13
13
|
class BaseSearchStrategy(ABC):
|
14
14
|
"""Abstract base class for all search strategies."""
|
15
15
|
|
16
|
-
def __init__(self):
|
16
|
+
def __init__(self, all_links_of_system=None):
|
17
17
|
"""Initialize the base strategy with common attributes."""
|
18
18
|
self.progress_callback = None
|
19
19
|
self.questions_by_iteration = {}
|
20
|
-
|
20
|
+
# Create a new list if None is provided (avoiding mutable default argument)
|
21
|
+
self.all_links_of_system = (
|
22
|
+
all_links_of_system if all_links_of_system is not None else []
|
23
|
+
)
|
21
24
|
|
22
25
|
def set_progress_callback(self, callback: Callable[[str, int, dict], None]) -> None:
|
23
26
|
"""Set a callback function to receive progress updates."""
|
@@ -7,10 +7,7 @@ import logging
|
|
7
7
|
from datetime import datetime
|
8
8
|
from typing import Dict, List
|
9
9
|
|
10
|
-
from langchain_core.language_models import BaseLLM
|
11
|
-
|
12
10
|
from ...citation_handler import CitationHandler
|
13
|
-
from ...config.config_files import settings
|
14
11
|
from ...config.llm_config import get_llm
|
15
12
|
from ...config.search_config import get_search
|
16
13
|
from ...utilities.db_utils import get_db_setting
|
@@ -27,18 +24,34 @@ class IterDRAGStrategy(BaseSearchStrategy):
|
|
27
24
|
"""IterDRAG strategy that breaks queries into sub-queries."""
|
28
25
|
|
29
26
|
def __init__(
|
30
|
-
self,
|
27
|
+
self,
|
28
|
+
search=None,
|
29
|
+
model=None,
|
30
|
+
max_iterations=3,
|
31
|
+
subqueries_per_iteration=2,
|
32
|
+
all_links_of_system=None,
|
31
33
|
):
|
32
|
-
"""Initialize the strategy with
|
33
|
-
|
34
|
-
|
34
|
+
"""Initialize the IterDRAG strategy with search and LLM.
|
35
|
+
|
36
|
+
Args:
|
37
|
+
search: Search engine to use for web queries
|
38
|
+
model: LLM to use for text generation and reasoning
|
39
|
+
max_iterations: Maximum number of iterations to run
|
40
|
+
subqueries_per_iteration: Number of sub-queries to generate per iteration
|
41
|
+
all_links_of_system: Optional list of links to initialize with
|
42
|
+
"""
|
43
|
+
super().__init__(all_links_of_system=all_links_of_system)
|
35
44
|
self.search = search or get_search()
|
45
|
+
self.model = model or get_llm()
|
46
|
+
self.max_iterations = max_iterations
|
47
|
+
self.subqueries_per_iteration = subqueries_per_iteration
|
48
|
+
|
49
|
+
# Initialize progress callback
|
36
50
|
self.progress_callback = None
|
37
|
-
self.all_links_of_system = list()
|
38
51
|
self.questions_by_iteration = {}
|
39
52
|
|
40
53
|
# Use provided citation_handler or create one
|
41
|
-
self.citation_handler =
|
54
|
+
self.citation_handler = CitationHandler(self.model)
|
42
55
|
|
43
56
|
# Initialize components
|
44
57
|
self.question_generator = DecompositionQuestionGenerator(self.model)
|
@@ -396,13 +409,7 @@ Please try again with a different query or contact support.
|
|
396
409
|
"""
|
397
410
|
|
398
411
|
# Compress knowledge if needed
|
399
|
-
if (
|
400
|
-
get_db_setting(
|
401
|
-
"general.knowledge_accumulation",
|
402
|
-
settings.general.knowledge_accumulation,
|
403
|
-
)
|
404
|
-
== "ITERATION"
|
405
|
-
):
|
412
|
+
if get_db_setting("general.knowledge_accumulation", "ITERATION") == "ITERATION":
|
406
413
|
try:
|
407
414
|
self._update_progress(
|
408
415
|
"Compressing knowledge", 90, {"phase": "knowledge_compression"}
|
@@ -34,7 +34,8 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
34
34
|
use_cross_engine_filter: bool = True,
|
35
35
|
filter_reorder: bool = True,
|
36
36
|
filter_reindex: bool = True,
|
37
|
-
|
37
|
+
cross_engine_max_results: int = None,
|
38
|
+
all_links_of_system=None,
|
38
39
|
):
|
39
40
|
"""Initialize with optional dependency injection for testing.
|
40
41
|
|
@@ -46,23 +47,29 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
46
47
|
use_cross_engine_filter: If True, filter search results across engines
|
47
48
|
filter_reorder: Whether to reorder results by relevance
|
48
49
|
filter_reindex: Whether to update result indices after filtering
|
49
|
-
|
50
|
+
cross_engine_max_results: Maximum number of results to keep after cross-engine filtering
|
51
|
+
all_links_of_system: Optional list of links to initialize with
|
50
52
|
"""
|
51
|
-
super().__init__()
|
53
|
+
super().__init__(all_links_of_system=all_links_of_system)
|
52
54
|
self.search = search or get_search()
|
53
55
|
self.model = model or get_llm()
|
54
56
|
self.progress_callback = None
|
55
|
-
self.all_links_of_system = list()
|
56
57
|
self.questions_by_iteration = {}
|
57
58
|
self.include_text_content = include_text_content
|
58
59
|
self.use_cross_engine_filter = use_cross_engine_filter
|
59
60
|
self.filter_reorder = filter_reorder
|
60
61
|
self.filter_reindex = filter_reindex
|
61
62
|
|
63
|
+
# Get max_filtered_results from database if not provided
|
64
|
+
if cross_engine_max_results is None:
|
65
|
+
cross_engine_max_results = get_db_setting(
|
66
|
+
"search.cross_engine_max_results", 100
|
67
|
+
)
|
68
|
+
|
62
69
|
# Initialize the cross-engine filter
|
63
70
|
self.cross_engine_filter = CrossEngineFilter(
|
64
71
|
model=self.model,
|
65
|
-
max_results=
|
72
|
+
max_results=cross_engine_max_results,
|
66
73
|
default_reorder=filter_reorder,
|
67
74
|
default_reindex=filter_reindex,
|
68
75
|
)
|
@@ -80,7 +87,7 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
80
87
|
|
81
88
|
def analyze_topic(self, query: str) -> Dict:
|
82
89
|
"""
|
83
|
-
|
90
|
+
Analyze a topic using parallel search, supporting multiple iterations.
|
84
91
|
|
85
92
|
Args:
|
86
93
|
query: The research query to analyze
|
@@ -89,6 +96,11 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
89
96
|
|
90
97
|
findings = []
|
91
98
|
all_search_results = []
|
99
|
+
current_knowledge = ""
|
100
|
+
|
101
|
+
# Track all search results across iterations
|
102
|
+
self.all_links_of_system = list()
|
103
|
+
self.questions_by_iteration = {}
|
92
104
|
|
93
105
|
self._update_progress(
|
94
106
|
"Initializing parallel research",
|
@@ -105,184 +117,301 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
105
117
|
return {
|
106
118
|
"findings": [],
|
107
119
|
"iterations": 0,
|
108
|
-
"
|
120
|
+
"questions_by_iteration": {},
|
109
121
|
"formatted_findings": "Error: Unable to conduct research without a search engine.",
|
110
122
|
"current_knowledge": "",
|
111
123
|
"error": "No search engine available",
|
112
124
|
}
|
113
125
|
|
126
|
+
# Determine number of iterations to run
|
127
|
+
iterations_to_run = get_db_setting("search.iterations")
|
128
|
+
logger.debug("Selected amount of iterations: " + str(iterations_to_run))
|
129
|
+
iterations_to_run = int(iterations_to_run)
|
114
130
|
try:
|
115
|
-
#
|
116
|
-
|
117
|
-
|
118
|
-
)
|
131
|
+
# Run each iteration
|
132
|
+
for iteration in range(1, iterations_to_run + 1):
|
133
|
+
iteration_progress_base = 5 + (iteration - 1) * (70 / iterations_to_run)
|
119
134
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
135
|
+
self._update_progress(
|
136
|
+
f"Starting iteration {iteration}/{iterations_to_run}",
|
137
|
+
iteration_progress_base,
|
138
|
+
{"phase": f"iteration_{iteration}", "iteration": iteration},
|
139
|
+
)
|
140
|
+
|
141
|
+
# Step 1: Generate questions
|
142
|
+
self._update_progress(
|
143
|
+
f"Generating search questions for iteration {iteration}",
|
144
|
+
iteration_progress_base + 5,
|
145
|
+
{"phase": "question_generation", "iteration": iteration},
|
146
|
+
)
|
129
147
|
|
130
|
-
|
131
|
-
|
148
|
+
# For first iteration, generate initial questions
|
149
|
+
# For subsequent iterations, generate follow-up questions
|
150
|
+
logger.info("Starting to generate questions")
|
151
|
+
if iteration == 1:
|
152
|
+
# Generate additional questions (plus the main query)
|
153
|
+
if iterations_to_run > 1:
|
154
|
+
context = f"""Iteration: {1} of {iterations_to_run}"""
|
155
|
+
else:
|
156
|
+
context = ""
|
157
|
+
questions = self.question_generator.generate_questions(
|
158
|
+
current_knowledge=context,
|
159
|
+
query=query,
|
160
|
+
questions_per_iteration=int(
|
161
|
+
get_db_setting("search.questions_per_iteration")
|
162
|
+
),
|
163
|
+
questions_by_iteration=self.questions_by_iteration,
|
164
|
+
)
|
132
165
|
|
133
|
-
|
134
|
-
|
135
|
-
logger.info(f"Generated questions: {questions}")
|
166
|
+
# Add the original query as the first question
|
167
|
+
all_questions = [query] + questions
|
136
168
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
169
|
+
# Store in questions_by_iteration
|
170
|
+
self.questions_by_iteration[iteration] = questions
|
171
|
+
logger.info(
|
172
|
+
f"Generated questions for iteration {iteration}: {questions}"
|
173
|
+
)
|
174
|
+
else:
|
175
|
+
# Get past questions from all previous iterations
|
176
|
+
past_questions = []
|
177
|
+
for prev_iter in range(1, iteration):
|
178
|
+
if prev_iter in self.questions_by_iteration:
|
179
|
+
past_questions.extend(
|
180
|
+
self.questions_by_iteration[prev_iter]
|
181
|
+
)
|
182
|
+
|
183
|
+
# Generate follow-up questions based on accumulated knowledge if iterations > 2
|
184
|
+
use_knowledge = iterations_to_run > 2
|
185
|
+
knowledge_for_questions = current_knowledge if use_knowledge else ""
|
186
|
+
context = f"""Current Knowledge: {knowledge_for_questions}
|
187
|
+
Iteration: {iteration} of {iterations_to_run}"""
|
188
|
+
|
189
|
+
# Generate questions
|
190
|
+
questions = self.question_generator.generate_questions(
|
191
|
+
current_knowledge=context,
|
192
|
+
query=query,
|
193
|
+
questions_per_iteration=int(
|
194
|
+
get_db_setting("search.questions_per_iteration")
|
195
|
+
),
|
196
|
+
questions_by_iteration=self.questions_by_iteration,
|
197
|
+
)
|
143
198
|
|
144
|
-
|
145
|
-
|
146
|
-
try:
|
147
|
-
result = self.search.run(q)
|
148
|
-
return {"question": q, "results": result or []}
|
149
|
-
except Exception as e:
|
150
|
-
logger.error(f"Error searching for '{q}': {str(e)}")
|
151
|
-
return {"question": q, "results": [], "error": str(e)}
|
152
|
-
|
153
|
-
# Run searches in parallel
|
154
|
-
with concurrent.futures.ThreadPoolExecutor(
|
155
|
-
max_workers=len(all_questions)
|
156
|
-
) as executor:
|
157
|
-
futures = [executor.submit(search_question, q) for q in all_questions]
|
158
|
-
all_search_dict = {}
|
159
|
-
|
160
|
-
# Process results as they complete
|
161
|
-
for i, future in enumerate(concurrent.futures.as_completed(futures)):
|
162
|
-
result_dict = future.result()
|
163
|
-
question = result_dict["question"]
|
164
|
-
search_results = result_dict["results"]
|
165
|
-
all_search_dict[question] = search_results
|
199
|
+
# Use only the new questions for this iteration's searches
|
200
|
+
all_questions = questions
|
166
201
|
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
{
|
171
|
-
"phase": "search_complete",
|
172
|
-
"result_count": len(search_results),
|
173
|
-
"question": question,
|
174
|
-
},
|
202
|
+
# Store in questions_by_iteration
|
203
|
+
self.questions_by_iteration[iteration] = questions
|
204
|
+
logger.info(
|
205
|
+
f"Generated questions for iteration {iteration}: {questions}"
|
175
206
|
)
|
176
207
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
208
|
+
# Step 2: Run all searches in parallel for this iteration
|
209
|
+
self._update_progress(
|
210
|
+
f"Running parallel searches for iteration {iteration}",
|
211
|
+
iteration_progress_base + 10,
|
212
|
+
{"phase": "parallel_search", "iteration": iteration},
|
213
|
+
)
|
182
214
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
215
|
+
# Function for thread pool
|
216
|
+
def search_question(q):
|
217
|
+
try:
|
218
|
+
result = self.search.run(q)
|
219
|
+
return {"question": q, "results": result or []}
|
220
|
+
except Exception as e:
|
221
|
+
logger.error(f"Error searching for '{q}': {str(e)}")
|
222
|
+
return {"question": q, "results": [], "error": str(e)}
|
223
|
+
|
224
|
+
# Run searches in parallel
|
225
|
+
with concurrent.futures.ThreadPoolExecutor(
|
226
|
+
max_workers=len(all_questions)
|
227
|
+
) as executor:
|
228
|
+
futures = [
|
229
|
+
executor.submit(search_question, q) for q in all_questions
|
230
|
+
]
|
231
|
+
iteration_search_dict = {}
|
232
|
+
iteration_search_results = []
|
233
|
+
|
234
|
+
# Process results as they complete
|
235
|
+
for i, future in enumerate(
|
236
|
+
concurrent.futures.as_completed(futures)
|
237
|
+
):
|
238
|
+
result_dict = future.result()
|
239
|
+
question = result_dict["question"]
|
240
|
+
search_results = result_dict["results"]
|
241
|
+
iteration_search_dict[question] = search_results
|
242
|
+
|
243
|
+
self._update_progress(
|
244
|
+
f"Completed search {i + 1} of {len(all_questions)}: {question[:30]}...",
|
245
|
+
iteration_progress_base
|
246
|
+
+ 10
|
247
|
+
+ ((i + 1) / len(all_questions) * 30),
|
248
|
+
{
|
249
|
+
"phase": "search_complete",
|
250
|
+
"iteration": iteration,
|
251
|
+
"result_count": len(search_results),
|
252
|
+
"question": question,
|
253
|
+
},
|
254
|
+
)
|
255
|
+
|
256
|
+
# Collect all search results for this iteration
|
257
|
+
iteration_search_results.extend(search_results)
|
258
|
+
|
259
|
+
# Step 3: Filter and analyze results for this iteration
|
190
260
|
self._update_progress(
|
191
|
-
"
|
192
|
-
|
193
|
-
{"phase": "
|
261
|
+
f"Analyzing results for iteration {iteration}",
|
262
|
+
iteration_progress_base + 45,
|
263
|
+
{"phase": "iteration_analysis", "iteration": iteration},
|
194
264
|
)
|
195
265
|
|
196
|
-
#
|
197
|
-
|
266
|
+
# Apply cross-engine filtering if enabled
|
267
|
+
if self.use_cross_engine_filter:
|
268
|
+
self._update_progress(
|
269
|
+
f"Filtering search results for iteration {iteration}",
|
270
|
+
iteration_progress_base + 45,
|
271
|
+
{"phase": "cross_engine_filtering", "iteration": iteration},
|
272
|
+
)
|
273
|
+
|
274
|
+
# Get the current link count (for indexing)
|
275
|
+
existing_link_count = len(self.all_links_of_system)
|
198
276
|
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
277
|
+
# Filter the search results
|
278
|
+
filtered_search_results = self.cross_engine_filter.filter_results(
|
279
|
+
iteration_search_results,
|
280
|
+
query,
|
281
|
+
reorder=self.filter_reorder,
|
282
|
+
reindex=self.filter_reindex,
|
283
|
+
start_index=existing_link_count, # Start indexing after existing links
|
284
|
+
)
|
207
285
|
|
208
|
-
|
209
|
-
|
286
|
+
links = extract_links_from_search_results(filtered_search_results)
|
287
|
+
self.all_links_of_system.extend(links)
|
288
|
+
|
289
|
+
self._update_progress(
|
290
|
+
f"Filtered from {len(iteration_search_results)} to {len(filtered_search_results)} results",
|
291
|
+
iteration_progress_base + 50,
|
292
|
+
{
|
293
|
+
"phase": "filtering_complete",
|
294
|
+
"iteration": iteration,
|
295
|
+
"links_count": len(self.all_links_of_system),
|
296
|
+
},
|
297
|
+
)
|
210
298
|
|
299
|
+
# Use filtered results for analysis
|
300
|
+
iteration_search_results = filtered_search_results
|
301
|
+
else:
|
302
|
+
# Just extract links without filtering
|
303
|
+
links = extract_links_from_search_results(iteration_search_results)
|
304
|
+
self.all_links_of_system.extend(links)
|
305
|
+
|
306
|
+
# Add to all search results
|
307
|
+
all_search_results.extend(iteration_search_results)
|
308
|
+
|
309
|
+
# Create a finding for this iteration's results
|
310
|
+
if self.include_text_content and iteration_search_results:
|
311
|
+
# For iteration > 1 with knowledge accumulation, use follow-up analysis
|
312
|
+
if iteration > 1 and iterations_to_run > 2:
|
313
|
+
citation_result = self.citation_handler.analyze_followup(
|
314
|
+
query,
|
315
|
+
iteration_search_results,
|
316
|
+
current_knowledge,
|
317
|
+
len(self.all_links_of_system) - len(links),
|
318
|
+
)
|
319
|
+
else:
|
320
|
+
# For first iteration or without knowledge accumulation, use initial analysis
|
321
|
+
citation_result = self.citation_handler.analyze_initial(
|
322
|
+
query, iteration_search_results
|
323
|
+
)
|
324
|
+
|
325
|
+
if citation_result:
|
326
|
+
# Create a finding for this iteration
|
327
|
+
iteration_content = citation_result["content"]
|
328
|
+
|
329
|
+
# Update current knowledge if iterations > 2
|
330
|
+
if iterations_to_run > 2:
|
331
|
+
if current_knowledge:
|
332
|
+
current_knowledge = f"{current_knowledge}\n\n## FINDINGS FROM ITERATION {iteration}:\n\n{iteration_content}"
|
333
|
+
else:
|
334
|
+
current_knowledge = iteration_content
|
335
|
+
|
336
|
+
finding = {
|
337
|
+
"phase": f"Iteration {iteration}",
|
338
|
+
"content": iteration_content,
|
339
|
+
"question": query,
|
340
|
+
"search_results": iteration_search_results,
|
341
|
+
"documents": citation_result.get("documents", []),
|
342
|
+
}
|
343
|
+
findings.append(finding)
|
344
|
+
|
345
|
+
# Add documents to repository
|
346
|
+
if "documents" in citation_result:
|
347
|
+
self.findings_repository.add_documents(
|
348
|
+
citation_result["documents"]
|
349
|
+
)
|
350
|
+
|
351
|
+
# Mark iteration as complete
|
352
|
+
iteration_progress = 5 + iteration * (70 / iterations_to_run)
|
211
353
|
self._update_progress(
|
212
|
-
f"
|
213
|
-
|
214
|
-
{
|
215
|
-
"phase": "filtering_complete",
|
216
|
-
"links_count": len(self.all_links_of_system),
|
217
|
-
},
|
354
|
+
f"Completed iteration {iteration}/{iterations_to_run}",
|
355
|
+
iteration_progress,
|
356
|
+
{"phase": "iteration_complete", "iteration": iteration},
|
218
357
|
)
|
219
358
|
|
220
|
-
|
221
|
-
|
359
|
+
# Final synthesis after all iterations
|
360
|
+
self._update_progress(
|
361
|
+
"Generating final synthesis", 80, {"phase": "synthesis"}
|
362
|
+
)
|
222
363
|
|
223
|
-
#
|
364
|
+
# Handle final synthesis based on include_text_content flag
|
224
365
|
if self.include_text_content:
|
225
|
-
#
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
366
|
+
# Generate a final synthesis from all search results
|
367
|
+
if iterations_to_run > 1:
|
368
|
+
final_citation_result = self.citation_handler.analyze_initial(
|
369
|
+
query, all_search_results
|
370
|
+
)
|
371
|
+
# Add null check for final_citation_result
|
372
|
+
if final_citation_result:
|
373
|
+
synthesized_content = final_citation_result["content"]
|
374
|
+
else:
|
375
|
+
synthesized_content = (
|
376
|
+
"No relevant results found in final synthesis."
|
377
|
+
)
|
378
|
+
else:
|
379
|
+
# For single iteration, use the content from findings
|
380
|
+
synthesized_content = (
|
381
|
+
findings[0]["content"]
|
382
|
+
if findings
|
383
|
+
else "No relevant results found."
|
384
|
+
)
|
385
|
+
# Add a final synthesis finding
|
386
|
+
final_finding = {
|
233
387
|
"phase": "Final synthesis",
|
234
388
|
"content": synthesized_content,
|
235
389
|
"question": query,
|
236
390
|
"search_results": all_search_results,
|
237
|
-
"documents":
|
391
|
+
"documents": [],
|
238
392
|
}
|
239
|
-
findings.append(
|
240
|
-
|
241
|
-
# Transfer questions to repository
|
242
|
-
self.findings_repository.set_questions_by_iteration(
|
243
|
-
self.questions_by_iteration
|
244
|
-
)
|
245
|
-
|
246
|
-
# Format findings
|
247
|
-
formatted_findings = self.findings_repository.format_findings_to_text(
|
248
|
-
findings, synthesized_content
|
249
|
-
)
|
250
|
-
|
251
|
-
# Add documents to repository
|
252
|
-
if "documents" in citation_result:
|
253
|
-
self.findings_repository.add_documents(citation_result["documents"])
|
254
|
-
else:
|
255
|
-
synthesized_content = "No relevant results found."
|
256
|
-
formatted_findings = synthesized_content
|
257
|
-
finding = {
|
258
|
-
"phase": "Error",
|
259
|
-
"content": "No relevant results found.",
|
260
|
-
"question": query,
|
261
|
-
"search_results": all_search_results,
|
262
|
-
"documents": [],
|
263
|
-
}
|
264
|
-
findings.append(finding)
|
393
|
+
findings.append(final_finding)
|
265
394
|
else:
|
266
395
|
# Skip LLM analysis, just format the raw search results
|
267
396
|
synthesized_content = "LLM analysis skipped"
|
268
|
-
|
397
|
+
final_finding = {
|
269
398
|
"phase": "Raw search results",
|
270
399
|
"content": "LLM analysis was skipped. Displaying raw search results with links.",
|
271
400
|
"question": query,
|
272
401
|
"search_results": all_search_results,
|
273
402
|
"documents": [],
|
274
403
|
}
|
275
|
-
findings.append(
|
404
|
+
findings.append(final_finding)
|
276
405
|
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
406
|
+
# Transfer questions to repository
|
407
|
+
self.findings_repository.set_questions_by_iteration(
|
408
|
+
self.questions_by_iteration
|
409
|
+
)
|
281
410
|
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
411
|
+
# Format findings
|
412
|
+
formatted_findings = self.findings_repository.format_findings_to_text(
|
413
|
+
findings, synthesized_content
|
414
|
+
)
|
286
415
|
|
287
416
|
except Exception as e:
|
288
417
|
import traceback
|
@@ -305,8 +434,8 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
305
434
|
|
306
435
|
return {
|
307
436
|
"findings": findings,
|
308
|
-
"iterations":
|
309
|
-
"
|
437
|
+
"iterations": iterations_to_run,
|
438
|
+
"questions_by_iteration": self.questions_by_iteration,
|
310
439
|
"formatted_findings": formatted_findings,
|
311
440
|
"current_knowledge": synthesized_content,
|
312
441
|
}
|
@@ -23,13 +23,14 @@ class RapidSearchStrategy(BaseSearchStrategy):
|
|
23
23
|
a single synthesis step at the end, optimized for speed.
|
24
24
|
"""
|
25
25
|
|
26
|
-
def __init__(
|
26
|
+
def __init__(
|
27
|
+
self, search=None, model=None, citation_handler=None, all_links_of_system=None
|
28
|
+
):
|
27
29
|
"""Initialize with optional dependency injection for testing."""
|
28
|
-
super().__init__()
|
30
|
+
super().__init__(all_links_of_system=all_links_of_system)
|
29
31
|
self.search = search or get_search()
|
30
32
|
self.model = model or get_llm()
|
31
33
|
self.progress_callback = None
|
32
|
-
self.all_links_of_system = list()
|
33
34
|
self.questions_by_iteration = {}
|
34
35
|
|
35
36
|
# Use provided citation_handler or create one
|