local-deep-research 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +1 -1
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +5 -1
- local_deep_research/advanced_search_system/strategies/base_strategy.py +5 -2
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +23 -16
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +13 -6
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +4 -3
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +57 -62
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +8 -4
- local_deep_research/api/research_functions.py +0 -46
- local_deep_research/citation_handler.py +2 -5
- local_deep_research/config/llm_config.py +25 -68
- local_deep_research/config/search_config.py +8 -21
- local_deep_research/defaults/default_settings.json +3814 -0
- local_deep_research/search_system.py +34 -31
- local_deep_research/utilities/db_utils.py +22 -3
- local_deep_research/utilities/search_utilities.py +10 -7
- local_deep_research/web/app.py +3 -23
- local_deep_research/web/app_factory.py +1 -25
- local_deep_research/web/database/migrations.py +20 -418
- local_deep_research/web/routes/settings_routes.py +75 -364
- local_deep_research/web/services/research_service.py +43 -43
- local_deep_research/web/services/settings_manager.py +108 -315
- local_deep_research/web/services/settings_service.py +3 -56
- local_deep_research/web/static/js/components/research.js +1 -1
- local_deep_research/web/static/js/components/settings.js +16 -4
- local_deep_research/web/static/js/research_form.js +106 -0
- local_deep_research/web/templates/pages/research.html +3 -2
- local_deep_research/web_search_engines/engines/meta_search_engine.py +13 -18
- local_deep_research/web_search_engines/engines/search_engine_local.py +11 -2
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +7 -11
- local_deep_research/web_search_engines/search_engine_factory.py +12 -64
- local_deep_research/web_search_engines/search_engines_config.py +123 -64
- {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.0.dist-info}/METADATA +16 -1
- {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.0.dist-info}/RECORD +37 -39
- local_deep_research/config/config_files.py +0 -245
- local_deep_research/defaults/local_collections.toml +0 -53
- local_deep_research/defaults/main.toml +0 -80
- local_deep_research/defaults/search_engines.toml +0 -291
- {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.0.dist-info}/WHEEL +0 -0
- {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.0.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.0.dist-info}/licenses/LICENSE +0 -0
local_deep_research/__init__.py
CHANGED
@@ -6,6 +6,7 @@ import json
|
|
6
6
|
import logging
|
7
7
|
from typing import Dict, List
|
8
8
|
|
9
|
+
from ...utilities.db_utils import get_db_setting
|
9
10
|
from ...utilities.search_utilities import remove_think_tags
|
10
11
|
from .base_filter import BaseFilter
|
11
12
|
|
@@ -16,7 +17,7 @@ class CrossEngineFilter(BaseFilter):
|
|
16
17
|
"""Filter that ranks and filters results from multiple search engines."""
|
17
18
|
|
18
19
|
def __init__(
|
19
|
-
self, model, max_results=
|
20
|
+
self, model, max_results=None, default_reorder=True, default_reindex=True
|
20
21
|
):
|
21
22
|
"""
|
22
23
|
Initialize the cross-engine filter.
|
@@ -28,6 +29,9 @@ class CrossEngineFilter(BaseFilter):
|
|
28
29
|
default_reindex: Default setting for reindexing results after filtering
|
29
30
|
"""
|
30
31
|
super().__init__(model)
|
32
|
+
# Get max_results from database settings if not provided
|
33
|
+
if max_results is None:
|
34
|
+
max_results = get_db_setting("search.cross_engine_max_results", 100)
|
31
35
|
self.max_results = max_results
|
32
36
|
self.default_reorder = default_reorder
|
33
37
|
self.default_reindex = default_reindex
|
@@ -13,11 +13,14 @@ logger = logging.getLogger(__name__)
|
|
13
13
|
class BaseSearchStrategy(ABC):
|
14
14
|
"""Abstract base class for all search strategies."""
|
15
15
|
|
16
|
-
def __init__(self):
|
16
|
+
def __init__(self, all_links_of_system=None):
|
17
17
|
"""Initialize the base strategy with common attributes."""
|
18
18
|
self.progress_callback = None
|
19
19
|
self.questions_by_iteration = {}
|
20
|
-
|
20
|
+
# Create a new list if None is provided (avoiding mutable default argument)
|
21
|
+
self.all_links_of_system = (
|
22
|
+
all_links_of_system if all_links_of_system is not None else []
|
23
|
+
)
|
21
24
|
|
22
25
|
def set_progress_callback(self, callback: Callable[[str, int, dict], None]) -> None:
|
23
26
|
"""Set a callback function to receive progress updates."""
|
@@ -7,10 +7,7 @@ import logging
|
|
7
7
|
from datetime import datetime
|
8
8
|
from typing import Dict, List
|
9
9
|
|
10
|
-
from langchain_core.language_models import BaseLLM
|
11
|
-
|
12
10
|
from ...citation_handler import CitationHandler
|
13
|
-
from ...config.config_files import settings
|
14
11
|
from ...config.llm_config import get_llm
|
15
12
|
from ...config.search_config import get_search
|
16
13
|
from ...utilities.db_utils import get_db_setting
|
@@ -27,18 +24,34 @@ class IterDRAGStrategy(BaseSearchStrategy):
|
|
27
24
|
"""IterDRAG strategy that breaks queries into sub-queries."""
|
28
25
|
|
29
26
|
def __init__(
|
30
|
-
self,
|
27
|
+
self,
|
28
|
+
search=None,
|
29
|
+
model=None,
|
30
|
+
max_iterations=3,
|
31
|
+
subqueries_per_iteration=2,
|
32
|
+
all_links_of_system=None,
|
31
33
|
):
|
32
|
-
"""Initialize the strategy with
|
33
|
-
|
34
|
-
|
34
|
+
"""Initialize the IterDRAG strategy with search and LLM.
|
35
|
+
|
36
|
+
Args:
|
37
|
+
search: Search engine to use for web queries
|
38
|
+
model: LLM to use for text generation and reasoning
|
39
|
+
max_iterations: Maximum number of iterations to run
|
40
|
+
subqueries_per_iteration: Number of sub-queries to generate per iteration
|
41
|
+
all_links_of_system: Optional list of links to initialize with
|
42
|
+
"""
|
43
|
+
super().__init__(all_links_of_system=all_links_of_system)
|
35
44
|
self.search = search or get_search()
|
45
|
+
self.model = model or get_llm()
|
46
|
+
self.max_iterations = max_iterations
|
47
|
+
self.subqueries_per_iteration = subqueries_per_iteration
|
48
|
+
|
49
|
+
# Initialize progress callback
|
36
50
|
self.progress_callback = None
|
37
|
-
self.all_links_of_system = list()
|
38
51
|
self.questions_by_iteration = {}
|
39
52
|
|
40
53
|
# Use provided citation_handler or create one
|
41
|
-
self.citation_handler =
|
54
|
+
self.citation_handler = CitationHandler(self.model)
|
42
55
|
|
43
56
|
# Initialize components
|
44
57
|
self.question_generator = DecompositionQuestionGenerator(self.model)
|
@@ -396,13 +409,7 @@ Please try again with a different query or contact support.
|
|
396
409
|
"""
|
397
410
|
|
398
411
|
# Compress knowledge if needed
|
399
|
-
if (
|
400
|
-
get_db_setting(
|
401
|
-
"general.knowledge_accumulation",
|
402
|
-
settings.general.knowledge_accumulation,
|
403
|
-
)
|
404
|
-
== "ITERATION"
|
405
|
-
):
|
412
|
+
if get_db_setting("general.knowledge_accumulation", "ITERATION") == "ITERATION":
|
406
413
|
try:
|
407
414
|
self._update_progress(
|
408
415
|
"Compressing knowledge", 90, {"phase": "knowledge_compression"}
|
@@ -34,7 +34,8 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
34
34
|
use_cross_engine_filter: bool = True,
|
35
35
|
filter_reorder: bool = True,
|
36
36
|
filter_reindex: bool = True,
|
37
|
-
|
37
|
+
cross_engine_max_results: int = None,
|
38
|
+
all_links_of_system=None,
|
38
39
|
):
|
39
40
|
"""Initialize with optional dependency injection for testing.
|
40
41
|
|
@@ -46,23 +47,29 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
46
47
|
use_cross_engine_filter: If True, filter search results across engines
|
47
48
|
filter_reorder: Whether to reorder results by relevance
|
48
49
|
filter_reindex: Whether to update result indices after filtering
|
49
|
-
|
50
|
+
cross_engine_max_results: Maximum number of results to keep after cross-engine filtering
|
51
|
+
all_links_of_system: Optional list of links to initialize with
|
50
52
|
"""
|
51
|
-
super().__init__()
|
53
|
+
super().__init__(all_links_of_system=all_links_of_system)
|
52
54
|
self.search = search or get_search()
|
53
55
|
self.model = model or get_llm()
|
54
56
|
self.progress_callback = None
|
55
|
-
self.all_links_of_system = list()
|
56
57
|
self.questions_by_iteration = {}
|
57
58
|
self.include_text_content = include_text_content
|
58
59
|
self.use_cross_engine_filter = use_cross_engine_filter
|
59
60
|
self.filter_reorder = filter_reorder
|
60
61
|
self.filter_reindex = filter_reindex
|
61
62
|
|
63
|
+
# Get max_filtered_results from database if not provided
|
64
|
+
if cross_engine_max_results is None:
|
65
|
+
cross_engine_max_results = get_db_setting(
|
66
|
+
"search.cross_engine_max_results", 100
|
67
|
+
)
|
68
|
+
|
62
69
|
# Initialize the cross-engine filter
|
63
70
|
self.cross_engine_filter = CrossEngineFilter(
|
64
71
|
model=self.model,
|
65
|
-
max_results=
|
72
|
+
max_results=cross_engine_max_results,
|
66
73
|
default_reorder=filter_reorder,
|
67
74
|
default_reindex=filter_reindex,
|
68
75
|
)
|
@@ -118,7 +125,7 @@ class ParallelSearchStrategy(BaseSearchStrategy):
|
|
118
125
|
|
119
126
|
# Determine number of iterations to run
|
120
127
|
iterations_to_run = get_db_setting("search.iterations")
|
121
|
-
logger.debug("Selected amount of iterations: " + iterations_to_run)
|
128
|
+
logger.debug("Selected amount of iterations: " + str(iterations_to_run))
|
122
129
|
iterations_to_run = int(iterations_to_run)
|
123
130
|
try:
|
124
131
|
# Run each iteration
|
@@ -23,13 +23,14 @@ class RapidSearchStrategy(BaseSearchStrategy):
|
|
23
23
|
a single synthesis step at the end, optimized for speed.
|
24
24
|
"""
|
25
25
|
|
26
|
-
def __init__(
|
26
|
+
def __init__(
|
27
|
+
self, search=None, model=None, citation_handler=None, all_links_of_system=None
|
28
|
+
):
|
27
29
|
"""Initialize with optional dependency injection for testing."""
|
28
|
-
super().__init__()
|
30
|
+
super().__init__(all_links_of_system=all_links_of_system)
|
29
31
|
self.search = search or get_search()
|
30
32
|
self.model = model or get_llm()
|
31
33
|
self.progress_callback = None
|
32
|
-
self.all_links_of_system = list()
|
33
34
|
self.questions_by_iteration = {}
|
34
35
|
|
35
36
|
# Use provided citation_handler or create one
|
@@ -6,7 +6,6 @@ from ...citation_handler import CitationHandler
|
|
6
6
|
from ...config.llm_config import get_llm
|
7
7
|
from ...config.search_config import get_search
|
8
8
|
from ...utilities.db_utils import get_db_setting
|
9
|
-
from ...utilities.search_utilities import extract_links_from_search_results
|
10
9
|
from ..filters.cross_engine_filter import CrossEngineFilter
|
11
10
|
from ..findings.repository import FindingsRepository
|
12
11
|
from ..questions.standard_question import StandardQuestionGenerator
|
@@ -30,25 +29,32 @@ class SourceBasedSearchStrategy(BaseSearchStrategy):
|
|
30
29
|
use_cross_engine_filter: bool = True,
|
31
30
|
filter_reorder: bool = True,
|
32
31
|
filter_reindex: bool = True,
|
33
|
-
|
32
|
+
cross_engine_max_results: int = None,
|
33
|
+
all_links_of_system=None,
|
34
34
|
):
|
35
35
|
"""Initialize with optional dependency injection for testing."""
|
36
|
-
|
36
|
+
# Pass the links list to the parent class
|
37
|
+
super().__init__(all_links_of_system=all_links_of_system)
|
37
38
|
self.search = search or get_search()
|
38
39
|
self.model = model or get_llm()
|
39
40
|
self.progress_callback = None
|
40
|
-
|
41
|
-
self.all_search_results = []
|
41
|
+
|
42
42
|
self.questions_by_iteration = {}
|
43
43
|
self.include_text_content = include_text_content
|
44
44
|
self.use_cross_engine_filter = use_cross_engine_filter
|
45
45
|
self.filter_reorder = filter_reorder
|
46
46
|
self.filter_reindex = filter_reindex
|
47
47
|
|
48
|
+
# Get cross_engine_max_results from database if not provided
|
49
|
+
if cross_engine_max_results is None:
|
50
|
+
cross_engine_max_results = get_db_setting(
|
51
|
+
"search.cross_engine_max_results", 100
|
52
|
+
)
|
53
|
+
|
48
54
|
# Initialize the cross-engine filter
|
49
55
|
self.cross_engine_filter = CrossEngineFilter(
|
50
56
|
model=self.model,
|
51
|
-
max_results=
|
57
|
+
max_results=cross_engine_max_results,
|
52
58
|
default_reorder=filter_reorder,
|
53
59
|
default_reindex=filter_reindex,
|
54
60
|
)
|
@@ -87,13 +93,11 @@ class SourceBasedSearchStrategy(BaseSearchStrategy):
|
|
87
93
|
Analyze a topic using source-based search strategy.
|
88
94
|
"""
|
89
95
|
logger.info(f"Starting source-based research on topic: {query}")
|
90
|
-
|
96
|
+
accumulated_search_results_across_all_iterations = (
|
97
|
+
[]
|
98
|
+
) # tracking links across iterations but not global
|
91
99
|
findings = []
|
92
|
-
|
93
|
-
|
94
|
-
# Track all search results across iterations
|
95
|
-
self.all_links_of_system = list()
|
96
|
-
self.questions_by_iteration = {}
|
100
|
+
total_citation_count_before_this_search = len(self.all_links_of_system)
|
97
101
|
|
98
102
|
self._update_progress(
|
99
103
|
"Initializing source-based research",
|
@@ -121,6 +125,8 @@ class SourceBasedSearchStrategy(BaseSearchStrategy):
|
|
121
125
|
logger.debug("Selected amount of iterations: " + str(iterations_to_run))
|
122
126
|
iterations_to_run = int(iterations_to_run)
|
123
127
|
try:
|
128
|
+
filtered_search_results = []
|
129
|
+
total_citation_count_before_this_search = len(self.all_links_of_system)
|
124
130
|
# Run each iteration
|
125
131
|
for iteration in range(1, iterations_to_run + 1):
|
126
132
|
iteration_progress_base = 5 + (iteration - 1) * (70 / iterations_to_run)
|
@@ -141,9 +147,6 @@ class SourceBasedSearchStrategy(BaseSearchStrategy):
|
|
141
147
|
# For first iteration, use initial query
|
142
148
|
if iteration == 1:
|
143
149
|
# Generate questions for first iteration
|
144
|
-
source_context = self._format_search_results_as_context(
|
145
|
-
self.all_search_results
|
146
|
-
)
|
147
150
|
context = f"""Iteration: {iteration} of {iterations_to_run}"""
|
148
151
|
questions = self.question_generator.generate_questions(
|
149
152
|
current_knowledge=context,
|
@@ -167,7 +170,7 @@ class SourceBasedSearchStrategy(BaseSearchStrategy):
|
|
167
170
|
else:
|
168
171
|
# For subsequent iterations, generate questions based on previous search results
|
169
172
|
source_context = self._format_search_results_as_context(
|
170
|
-
|
173
|
+
filtered_search_results
|
171
174
|
)
|
172
175
|
if iteration != 1:
|
173
176
|
context = f"""Previous search results:\n{source_context}\n\nIteration: {iteration} of {iterations_to_run}"""
|
@@ -242,32 +245,25 @@ class SourceBasedSearchStrategy(BaseSearchStrategy):
|
|
242
245
|
},
|
243
246
|
)
|
244
247
|
|
245
|
-
# Collect all search results for this iteration
|
246
248
|
iteration_search_results.extend(search_results)
|
247
249
|
|
248
|
-
|
249
|
-
if self.use_cross_engine_filter:
|
250
|
+
if False and self.use_cross_engine_filter:
|
250
251
|
self._update_progress(
|
251
252
|
f"Filtering search results for iteration {iteration}",
|
252
253
|
iteration_progress_base + 45,
|
253
254
|
{"phase": "cross_engine_filtering", "iteration": iteration},
|
254
255
|
)
|
255
256
|
|
256
|
-
# Get the current link count (for indexing)
|
257
257
|
existing_link_count = len(self.all_links_of_system)
|
258
|
-
|
259
|
-
# Filter the search results
|
258
|
+
logger.info(f"Existing link count: {existing_link_count}")
|
260
259
|
filtered_search_results = self.cross_engine_filter.filter_results(
|
261
260
|
iteration_search_results,
|
262
261
|
query,
|
263
|
-
reorder=
|
264
|
-
reindex=
|
262
|
+
reorder=True,
|
263
|
+
reindex=True,
|
265
264
|
start_index=existing_link_count, # Start indexing after existing links
|
266
265
|
)
|
267
266
|
|
268
|
-
links = extract_links_from_search_results(filtered_search_results)
|
269
|
-
self.all_links_of_system.extend(links)
|
270
|
-
|
271
267
|
self._update_progress(
|
272
268
|
f"Filtered from {len(iteration_search_results)} to {len(filtered_search_results)} results",
|
273
269
|
iteration_progress_base + 50,
|
@@ -277,23 +273,20 @@ class SourceBasedSearchStrategy(BaseSearchStrategy):
|
|
277
273
|
"links_count": len(self.all_links_of_system),
|
278
274
|
},
|
279
275
|
)
|
280
|
-
|
281
|
-
# Use filtered results
|
282
|
-
iteration_search_results = filtered_search_results
|
283
276
|
else:
|
284
|
-
#
|
285
|
-
|
286
|
-
self.all_links_of_system.extend(links)
|
277
|
+
# Use the search results as they are
|
278
|
+
filtered_search_results = iteration_search_results
|
287
279
|
|
288
|
-
|
289
|
-
|
280
|
+
# Use filtered results
|
281
|
+
accumulated_search_results_across_all_iterations.extend(
|
282
|
+
filtered_search_results
|
283
|
+
)
|
290
284
|
|
291
285
|
# Create a lightweight finding for this iteration's search metadata (no text content)
|
292
286
|
finding = {
|
293
287
|
"phase": f"Iteration {iteration}",
|
294
|
-
"content": f"Searched with {len(all_questions)} questions, found {len(
|
288
|
+
"content": f"Searched with {len(all_questions)} questions, found {len(filtered_search_results)} results.",
|
295
289
|
"question": query,
|
296
|
-
"search_results": iteration_search_results,
|
297
290
|
"documents": [],
|
298
291
|
}
|
299
292
|
findings.append(finding)
|
@@ -306,46 +299,47 @@ class SourceBasedSearchStrategy(BaseSearchStrategy):
|
|
306
299
|
{"phase": "iteration_complete", "iteration": iteration},
|
307
300
|
)
|
308
301
|
|
309
|
-
#
|
310
|
-
self._update_progress(
|
311
|
-
"Performing final filtering of all results",
|
312
|
-
80,
|
313
|
-
{"phase": "final_filtering"},
|
314
|
-
)
|
315
|
-
|
316
|
-
# Apply final cross-engine filtering to all accumulated results if enabled
|
302
|
+
# Do we need this filter?
|
317
303
|
if self.use_cross_engine_filter:
|
304
|
+
# Final filtering of all accumulated search results
|
305
|
+
self._update_progress(
|
306
|
+
"Performing final filtering of all results",
|
307
|
+
80,
|
308
|
+
{"phase": "final_filtering"},
|
309
|
+
)
|
318
310
|
final_filtered_results = self.cross_engine_filter.filter_results(
|
319
|
-
|
311
|
+
accumulated_search_results_across_all_iterations,
|
320
312
|
query,
|
321
313
|
reorder=True, # Always reorder in final filtering
|
322
|
-
reindex=
|
323
|
-
max_results=int(get_db_setting("search.final_max_results") or
|
314
|
+
reindex=True, # Always reindex in final filtering
|
315
|
+
max_results=int(get_db_setting("search.final_max_results") or 100),
|
316
|
+
start_index=len(self.all_links_of_system),
|
317
|
+
)
|
318
|
+
self._update_progress(
|
319
|
+
f"Filtered from {len(accumulated_search_results_across_all_iterations)} to {len(final_filtered_results)} results",
|
320
|
+
iteration_progress_base + 85,
|
321
|
+
{
|
322
|
+
"phase": "filtering_complete",
|
323
|
+
"iteration": iteration,
|
324
|
+
"links_count": len(self.all_links_of_system),
|
325
|
+
},
|
324
326
|
)
|
325
327
|
else:
|
326
|
-
final_filtered_results =
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
{
|
331
|
-
"phase": "filtering_complete",
|
332
|
-
"iteration": iteration,
|
333
|
-
"links_count": len(self.all_links_of_system),
|
334
|
-
},
|
335
|
-
)
|
328
|
+
final_filtered_results = filtered_search_results
|
329
|
+
# links = extract_links_from_search_results()
|
330
|
+
self.all_links_of_system.extend(final_filtered_results)
|
331
|
+
|
336
332
|
# Final synthesis after all iterations
|
337
333
|
self._update_progress(
|
338
334
|
"Generating final synthesis", 90, {"phase": "synthesis"}
|
339
335
|
)
|
340
336
|
|
341
|
-
total_citation_count = len(self.all_links_of_system)
|
342
|
-
|
343
337
|
# Final synthesis
|
344
338
|
final_citation_result = self.citation_handler.analyze_followup(
|
345
339
|
query,
|
346
340
|
final_filtered_results,
|
347
341
|
previous_knowledge="", # Empty string as we don't need previous knowledge here
|
348
|
-
nr_of_links=
|
342
|
+
nr_of_links=total_citation_count_before_this_search,
|
349
343
|
)
|
350
344
|
|
351
345
|
# Add null check for final_citation_result
|
@@ -361,7 +355,7 @@ class SourceBasedSearchStrategy(BaseSearchStrategy):
|
|
361
355
|
"phase": "Final synthesis",
|
362
356
|
"content": synthesized_content,
|
363
357
|
"question": query,
|
364
|
-
"search_results":
|
358
|
+
"search_results": self.all_links_of_system,
|
365
359
|
"documents": documents,
|
366
360
|
}
|
367
361
|
findings.append(final_finding)
|
@@ -404,4 +398,5 @@ class SourceBasedSearchStrategy(BaseSearchStrategy):
|
|
404
398
|
"questions_by_iteration": self.questions_by_iteration,
|
405
399
|
"formatted_findings": formatted_findings,
|
406
400
|
"current_knowledge": synthesized_content,
|
401
|
+
"all_links_of_system": self.all_links_of_system,
|
407
402
|
}
|
@@ -3,7 +3,6 @@ import logging
|
|
3
3
|
from typing import Dict
|
4
4
|
|
5
5
|
from ...citation_handler import CitationHandler
|
6
|
-
from ...config.config_files import settings
|
7
6
|
from ...config.llm_config import get_llm
|
8
7
|
from ...config.search_config import get_search
|
9
8
|
from ...utilities.db_utils import get_db_setting
|
@@ -20,11 +19,17 @@ logger = logging.getLogger(__name__)
|
|
20
19
|
class StandardSearchStrategy(BaseSearchStrategy):
|
21
20
|
"""Standard iterative search strategy that generates follow-up questions."""
|
22
21
|
|
23
|
-
def __init__(
|
22
|
+
def __init__(
|
23
|
+
self, search=None, model=None, citation_handler=None, all_links_of_system=None
|
24
|
+
):
|
24
25
|
"""Initialize with optional dependency injection for testing."""
|
26
|
+
super().__init__(all_links_of_system=all_links_of_system)
|
25
27
|
self.search = search or get_search()
|
26
28
|
self.model = model or get_llm()
|
29
|
+
|
30
|
+
# Get iterations setting
|
27
31
|
self.max_iterations = int(get_db_setting("search.iterations"))
|
32
|
+
|
28
33
|
self.questions_per_iteration = int(
|
29
34
|
get_db_setting("search.questions_per_iteration")
|
30
35
|
)
|
@@ -43,7 +48,6 @@ class StandardSearchStrategy(BaseSearchStrategy):
|
|
43
48
|
|
44
49
|
# Initialize other attributes
|
45
50
|
self.progress_callback = None
|
46
|
-
self.all_links_of_system = list()
|
47
51
|
|
48
52
|
def _update_progress(
|
49
53
|
self, message: str, progress_percent: int = None, metadata: dict = None
|
@@ -117,7 +121,7 @@ Iteration: {iteration + 1} of {total_iterations}"""
|
|
117
121
|
question_count = len(questions)
|
118
122
|
knowledge_accumulation = get_db_setting(
|
119
123
|
"general.knowledge_accumulation",
|
120
|
-
|
124
|
+
"ITERATION",
|
121
125
|
)
|
122
126
|
for q_idx, question in enumerate(questions):
|
123
127
|
question_progress_base = iteration_progress_base + (
|
@@ -4,11 +4,8 @@ Provides programmatic access to search and research capabilities.
|
|
4
4
|
"""
|
5
5
|
|
6
6
|
import logging
|
7
|
-
import os
|
8
7
|
from typing import Any, Callable, Dict, Optional
|
9
8
|
|
10
|
-
import toml
|
11
|
-
|
12
9
|
from ..config.llm_config import get_llm
|
13
10
|
from ..config.search_config import get_search
|
14
11
|
from ..report_generator import IntegratedReportGenerator
|
@@ -279,46 +276,3 @@ def analyze_documents(
|
|
279
276
|
logger.info(f"Analysis saved to {output_file}")
|
280
277
|
|
281
278
|
return analysis_result
|
282
|
-
|
283
|
-
|
284
|
-
def get_available_search_engines() -> Dict[str, str]:
|
285
|
-
"""
|
286
|
-
Get a dictionary of available search engines.
|
287
|
-
|
288
|
-
Returns:
|
289
|
-
Dictionary mapping engine names to descriptions
|
290
|
-
"""
|
291
|
-
|
292
|
-
from ..web_search_engines.search_engine_factory import get_available_engines
|
293
|
-
|
294
|
-
engines = get_available_engines()
|
295
|
-
|
296
|
-
# Add some descriptions for common engines
|
297
|
-
descriptions = {
|
298
|
-
"auto": "Automatic selection based on query type",
|
299
|
-
"wikipedia": "Wikipedia articles and general knowledge",
|
300
|
-
"arxiv": "Scientific papers and research",
|
301
|
-
"pubmed": "Medical and biomedical literature",
|
302
|
-
"semantic_scholar": "Academic papers across all fields",
|
303
|
-
"github": "Code repositories and technical documentation",
|
304
|
-
"local_all": "All local document collections",
|
305
|
-
}
|
306
|
-
|
307
|
-
return {engine: descriptions.get(engine, "Search engine") for engine in engines}
|
308
|
-
|
309
|
-
|
310
|
-
def get_available_collections() -> Dict[str, Dict[str, Any]]:
|
311
|
-
"""
|
312
|
-
Get a dictionary of available local document collections.
|
313
|
-
|
314
|
-
Returns:
|
315
|
-
Dictionary mapping collection names to their configuration
|
316
|
-
"""
|
317
|
-
|
318
|
-
from ..config.config_files import LOCAL_COLLECTIONS_FILE
|
319
|
-
|
320
|
-
if os.path.exists(LOCAL_COLLECTIONS_FILE):
|
321
|
-
collections = toml.load(LOCAL_COLLECTIONS_FILE)
|
322
|
-
return collections
|
323
|
-
|
324
|
-
return {}
|
@@ -4,7 +4,6 @@ from typing import Any, Dict, List, Union
|
|
4
4
|
|
5
5
|
from langchain_core.documents import Document
|
6
6
|
|
7
|
-
from .config.config_files import settings
|
8
7
|
from .utilities.db_utils import get_db_setting
|
9
8
|
|
10
9
|
|
@@ -93,10 +92,8 @@ Previous Knowledge:
|
|
93
92
|
New Sources:
|
94
93
|
{formatted_sources}
|
95
94
|
|
96
|
-
Return any inconsistencies or conflicts found."""
|
97
|
-
if get_db_setting(
|
98
|
-
"general.enable_fact_checking", settings.general.enable_fact_checking
|
99
|
-
):
|
95
|
+
Return any inconsistencies or conflicts found."""
|
96
|
+
if get_db_setting("general.enable_fact_checking", True):
|
100
97
|
fact_check_response = self.llm.invoke(fact_check_prompt).content
|
101
98
|
|
102
99
|
else:
|