local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +7 -0
- local_deep_research/__version__.py +1 -1
- local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
- local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
- local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
- local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
- local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
- local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
- local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
- local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
- local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
- local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
- local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
- local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
- local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
- local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
- local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
- local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
- local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
- local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
- local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
- local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
- local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
- local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
- local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
- local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
- local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
- local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
- local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
- local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
- local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
- local_deep_research/advanced_search_system/findings/repository.py +54 -17
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
- local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
- local_deep_research/advanced_search_system/questions/__init__.py +16 -0
- local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
- local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
- local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
- local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
- local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
- local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
- local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
- local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
- local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
- local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
- local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
- local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
- local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
- local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
- local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
- local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
- local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
- local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
- local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
- local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
- local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
- local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
- local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
- local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
- local_deep_research/api/benchmark_functions.py +6 -2
- local_deep_research/api/research_functions.py +10 -4
- local_deep_research/benchmarks/__init__.py +9 -7
- local_deep_research/benchmarks/benchmark_functions.py +6 -2
- local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
- local_deep_research/benchmarks/cli.py +38 -13
- local_deep_research/benchmarks/comparison/__init__.py +4 -2
- local_deep_research/benchmarks/comparison/evaluator.py +316 -239
- local_deep_research/benchmarks/datasets/__init__.py +1 -1
- local_deep_research/benchmarks/datasets/base.py +91 -72
- local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
- local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
- local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
- local_deep_research/benchmarks/datasets/utils.py +48 -29
- local_deep_research/benchmarks/datasets.py +4 -11
- local_deep_research/benchmarks/efficiency/__init__.py +8 -4
- local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
- local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
- local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
- local_deep_research/benchmarks/evaluators/composite.py +6 -2
- local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
- local_deep_research/benchmarks/graders.py +32 -10
- local_deep_research/benchmarks/metrics/README.md +1 -1
- local_deep_research/benchmarks/metrics/calculation.py +25 -10
- local_deep_research/benchmarks/metrics/reporting.py +7 -3
- local_deep_research/benchmarks/metrics/visualization.py +42 -23
- local_deep_research/benchmarks/metrics.py +1 -1
- local_deep_research/benchmarks/optimization/__init__.py +3 -1
- local_deep_research/benchmarks/optimization/api.py +7 -1
- local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
- local_deep_research/benchmarks/runners.py +48 -15
- local_deep_research/citation_handler.py +65 -92
- local_deep_research/citation_handlers/__init__.py +15 -0
- local_deep_research/citation_handlers/base_citation_handler.py +70 -0
- local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
- local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
- local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
- local_deep_research/config/llm_config.py +271 -169
- local_deep_research/config/search_config.py +14 -5
- local_deep_research/defaults/__init__.py +0 -1
- local_deep_research/metrics/__init__.py +13 -0
- local_deep_research/metrics/database.py +58 -0
- local_deep_research/metrics/db_models.py +115 -0
- local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
- local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
- local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
- local_deep_research/metrics/migrate_research_ratings.py +31 -0
- local_deep_research/metrics/models.py +61 -0
- local_deep_research/metrics/pricing/__init__.py +12 -0
- local_deep_research/metrics/pricing/cost_calculator.py +237 -0
- local_deep_research/metrics/pricing/pricing_cache.py +143 -0
- local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
- local_deep_research/metrics/query_utils.py +51 -0
- local_deep_research/metrics/search_tracker.py +380 -0
- local_deep_research/metrics/token_counter.py +1078 -0
- local_deep_research/migrate_db.py +3 -1
- local_deep_research/report_generator.py +22 -8
- local_deep_research/search_system.py +390 -9
- local_deep_research/test_migration.py +15 -5
- local_deep_research/utilities/db_utils.py +7 -4
- local_deep_research/utilities/es_utils.py +115 -104
- local_deep_research/utilities/llm_utils.py +15 -5
- local_deep_research/utilities/log_utils.py +151 -0
- local_deep_research/utilities/search_cache.py +387 -0
- local_deep_research/utilities/search_utilities.py +14 -6
- local_deep_research/utilities/threading_utils.py +92 -0
- local_deep_research/utilities/url_utils.py +6 -0
- local_deep_research/web/api.py +347 -0
- local_deep_research/web/app.py +13 -17
- local_deep_research/web/app_factory.py +71 -66
- local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
- local_deep_research/web/database/migrations.py +20 -3
- local_deep_research/web/database/models.py +74 -25
- local_deep_research/web/database/schema_upgrade.py +49 -29
- local_deep_research/web/models/database.py +63 -83
- local_deep_research/web/routes/api_routes.py +56 -22
- local_deep_research/web/routes/benchmark_routes.py +4 -1
- local_deep_research/web/routes/globals.py +22 -0
- local_deep_research/web/routes/history_routes.py +71 -46
- local_deep_research/web/routes/metrics_routes.py +1155 -0
- local_deep_research/web/routes/research_routes.py +192 -54
- local_deep_research/web/routes/settings_routes.py +156 -55
- local_deep_research/web/services/research_service.py +412 -251
- local_deep_research/web/services/resource_service.py +36 -11
- local_deep_research/web/services/settings_manager.py +55 -17
- local_deep_research/web/services/settings_service.py +12 -4
- local_deep_research/web/services/socket_service.py +295 -188
- local_deep_research/web/static/css/custom_dropdown.css +180 -0
- local_deep_research/web/static/css/styles.css +39 -1
- local_deep_research/web/static/js/components/detail.js +633 -267
- local_deep_research/web/static/js/components/details.js +751 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
- local_deep_research/web/static/js/components/fallback/ui.js +23 -23
- local_deep_research/web/static/js/components/history.js +76 -76
- local_deep_research/web/static/js/components/logpanel.js +61 -13
- local_deep_research/web/static/js/components/progress.js +13 -2
- local_deep_research/web/static/js/components/research.js +99 -12
- local_deep_research/web/static/js/components/results.js +239 -106
- local_deep_research/web/static/js/main.js +40 -40
- local_deep_research/web/static/js/services/audio.js +1 -1
- local_deep_research/web/static/js/services/formatting.js +11 -11
- local_deep_research/web/static/js/services/keyboard.js +157 -0
- local_deep_research/web/static/js/services/pdf.js +80 -80
- local_deep_research/web/static/sounds/README.md +1 -1
- local_deep_research/web/templates/base.html +1 -0
- local_deep_research/web/templates/components/log_panel.html +7 -1
- local_deep_research/web/templates/components/mobile_nav.html +1 -1
- local_deep_research/web/templates/components/sidebar.html +3 -0
- local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
- local_deep_research/web/templates/pages/details.html +325 -24
- local_deep_research/web/templates/pages/history.html +1 -1
- local_deep_research/web/templates/pages/metrics.html +1929 -0
- local_deep_research/web/templates/pages/progress.html +2 -2
- local_deep_research/web/templates/pages/research.html +53 -17
- local_deep_research/web/templates/pages/results.html +12 -1
- local_deep_research/web/templates/pages/star_reviews.html +803 -0
- local_deep_research/web/utils/formatters.py +9 -3
- local_deep_research/web_search_engines/default_search_engines.py +5 -3
- local_deep_research/web_search_engines/engines/full_search.py +8 -2
- local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
- local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
- local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
- local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
- local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
- local_deep_research/web_search_engines/search_engine_base.py +83 -35
- local_deep_research/web_search_engines/search_engine_factory.py +25 -8
- local_deep_research/web_search_engines/search_engines_config.py +9 -3
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
- local_deep_research-0.5.2.dist-info/RECORD +265 -0
- local_deep_research-0.4.4.dist-info/RECORD +0 -177
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -29,7 +29,9 @@ def convert_debug_to_markdown(raw_text, query):
|
|
29
29
|
if "DETAILED FINDINGS:" in raw_text:
|
30
30
|
logger.info("Found DETAILED FINDINGS section")
|
31
31
|
detailed_index = raw_text.index("DETAILED FINDINGS:")
|
32
|
-
content = raw_text[
|
32
|
+
content = raw_text[
|
33
|
+
detailed_index + len("DETAILED FINDINGS:") :
|
34
|
+
].strip()
|
33
35
|
else:
|
34
36
|
logger.info("No DETAILED FINDINGS section found, using full text")
|
35
37
|
content = raw_text
|
@@ -40,7 +42,8 @@ def convert_debug_to_markdown(raw_text, query):
|
|
40
42
|
[
|
41
43
|
line
|
42
44
|
for line in content.split("\n")
|
43
|
-
if not line.strip().startswith("===")
|
45
|
+
if not line.strip().startswith("===")
|
46
|
+
and not line.strip() == "=" * 80
|
44
47
|
]
|
45
48
|
)
|
46
49
|
lines_after = len(content.split("\n"))
|
@@ -54,7 +57,10 @@ def convert_debug_to_markdown(raw_text, query):
|
|
54
57
|
for marker in ["DETAILED FINDINGS:", "COMPLETE RESEARCH:"]:
|
55
58
|
if marker in content[search_index:]:
|
56
59
|
marker_pos = content.index(marker, search_index)
|
57
|
-
if
|
60
|
+
if (
|
61
|
+
next_major_section == -1
|
62
|
+
or marker_pos < next_major_section
|
63
|
+
):
|
58
64
|
next_major_section = marker_pos
|
59
65
|
|
60
66
|
if next_major_section != -1:
|
@@ -3,10 +3,11 @@ Default search engine configurations.
|
|
3
3
|
This file can be used to initialize the search engine configurations.
|
4
4
|
"""
|
5
5
|
|
6
|
+
|
6
7
|
def get_default_elasticsearch_config():
|
7
8
|
"""
|
8
9
|
Returns the default Elasticsearch search engine configuration.
|
9
|
-
|
10
|
+
|
10
11
|
Returns:
|
11
12
|
dict: Default configuration for Elasticsearch search engine
|
12
13
|
"""
|
@@ -26,13 +27,14 @@ def get_default_elasticsearch_config():
|
|
26
27
|
"reliability": "High, depending on your Elasticsearch setup",
|
27
28
|
}
|
28
29
|
|
30
|
+
|
29
31
|
def get_default_search_engine_configs():
|
30
32
|
"""
|
31
33
|
Returns a dictionary of default search engine configurations.
|
32
|
-
|
34
|
+
|
33
35
|
Returns:
|
34
36
|
dict: Dictionary of default search engine configurations
|
35
37
|
"""
|
36
38
|
return {
|
37
39
|
"elasticsearch": get_default_elasticsearch_config(),
|
38
|
-
}
|
40
|
+
}
|
@@ -74,7 +74,9 @@ class FullSearchResults:
|
|
74
74
|
if not html or not html.strip():
|
75
75
|
return ""
|
76
76
|
paragraphs = justext.justext(html, justext.get_stoplist(self.language))
|
77
|
-
cleaned = "\n".join(
|
77
|
+
cleaned = "\n".join(
|
78
|
+
[p.text for p in paragraphs if not p.is_boilerplate]
|
79
|
+
)
|
78
80
|
return cleaned
|
79
81
|
|
80
82
|
def run(self, query: str):
|
@@ -91,7 +93,11 @@ class FullSearchResults:
|
|
91
93
|
filtered_results = search_results
|
92
94
|
|
93
95
|
# Extract URLs from filtered results
|
94
|
-
urls = [
|
96
|
+
urls = [
|
97
|
+
result.get("link")
|
98
|
+
for result in filtered_results
|
99
|
+
if result.get("link")
|
100
|
+
]
|
95
101
|
|
96
102
|
if not urls:
|
97
103
|
logger.error("\n === NO VALID LINKS ===\n")
|
@@ -3,7 +3,7 @@ from typing import Any, Dict, List, Optional
|
|
3
3
|
from loguru import logger
|
4
4
|
|
5
5
|
from ...utilities.db_utils import get_db_setting
|
6
|
-
from ...web.services.socket_service import
|
6
|
+
from ...web.services.socket_service import SocketIOService
|
7
7
|
from ..search_engine_base import BaseSearchEngine
|
8
8
|
from ..search_engine_factory import create_search_engine
|
9
9
|
from ..search_engines_config import search_config
|
@@ -39,7 +39,9 @@ class MetaSearchEngine(BaseSearchEngine):
|
|
39
39
|
"""
|
40
40
|
# Initialize the BaseSearchEngine with the LLM, max_filtered_results, and max_results
|
41
41
|
super().__init__(
|
42
|
-
llm=llm,
|
42
|
+
llm=llm,
|
43
|
+
max_filtered_results=max_filtered_results,
|
44
|
+
max_results=max_results,
|
43
45
|
)
|
44
46
|
|
45
47
|
self.use_api_key_services = use_api_key_services
|
@@ -82,7 +84,9 @@ class MetaSearchEngine(BaseSearchEngine):
|
|
82
84
|
)
|
83
85
|
else:
|
84
86
|
# Format: search.engine.web.{engine_name}.use_in_auto_search
|
85
|
-
auto_search_setting =
|
87
|
+
auto_search_setting = (
|
88
|
+
f"search.engine.web.{name}.use_in_auto_search"
|
89
|
+
)
|
86
90
|
|
87
91
|
# Get setting from database, default to False if not found
|
88
92
|
use_in_auto_search = get_db_setting(auto_search_setting, False)
|
@@ -95,7 +99,10 @@ class MetaSearchEngine(BaseSearchEngine):
|
|
95
99
|
continue
|
96
100
|
|
97
101
|
# Skip engines that require API keys if we don't want to use them
|
98
|
-
if
|
102
|
+
if (
|
103
|
+
config_.get("requires_api_key", False)
|
104
|
+
and not self.use_api_key_services
|
105
|
+
):
|
99
106
|
continue
|
100
107
|
|
101
108
|
# Skip engines that require API keys if the key is not available
|
@@ -155,11 +162,15 @@ class MetaSearchEngine(BaseSearchEngine):
|
|
155
162
|
|
156
163
|
# For searches containing "arxiv", prioritize the arxiv engine
|
157
164
|
if "arxiv" in query_lower and "arxiv" in self.available_engines:
|
158
|
-
return ["arxiv"] + [
|
165
|
+
return ["arxiv"] + [
|
166
|
+
e for e in self.available_engines if e != "arxiv"
|
167
|
+
]
|
159
168
|
|
160
169
|
# For searches containing "pubmed", prioritize the pubmed engine
|
161
170
|
if "pubmed" in query_lower and "pubmed" in self.available_engines:
|
162
|
-
return ["pubmed"] + [
|
171
|
+
return ["pubmed"] + [
|
172
|
+
e for e in self.available_engines if e != "pubmed"
|
173
|
+
]
|
163
174
|
|
164
175
|
# Check if SearXNG is available and prioritize it for general queries
|
165
176
|
if "searxng" in self.available_engines:
|
@@ -169,7 +180,9 @@ class MetaSearchEngine(BaseSearchEngine):
|
|
169
180
|
]
|
170
181
|
reliability_sorted = sorted(
|
171
182
|
engines_without_searxng,
|
172
|
-
key=lambda x: search_config()
|
183
|
+
key=lambda x: search_config()
|
184
|
+
.get(x, {})
|
185
|
+
.get("reliability", 0),
|
173
186
|
reverse=True,
|
174
187
|
)
|
175
188
|
return ["searxng"] + reliability_sorted
|
@@ -182,7 +195,9 @@ class MetaSearchEngine(BaseSearchEngine):
|
|
182
195
|
# Return engines sorted by reliability
|
183
196
|
return sorted(
|
184
197
|
self.available_engines,
|
185
|
-
key=lambda x: search_config()
|
198
|
+
key=lambda x: search_config()
|
199
|
+
.get(x, {})
|
200
|
+
.get("reliability", 0),
|
186
201
|
reverse=True,
|
187
202
|
)
|
188
203
|
|
@@ -213,7 +228,9 @@ class MetaSearchEngine(BaseSearchEngine):
|
|
213
228
|
)
|
214
229
|
return sorted(
|
215
230
|
self.available_engines,
|
216
|
-
key=lambda x: search_config()
|
231
|
+
key=lambda x: search_config()
|
232
|
+
.get(x, {})
|
233
|
+
.get("reliability", 0),
|
217
234
|
reverse=True,
|
218
235
|
)
|
219
236
|
|
@@ -255,7 +272,10 @@ Example output: searxng,wikipedia,brave"""
|
|
255
272
|
valid_engines.append(cleaned_name)
|
256
273
|
|
257
274
|
# If SearXNG is available but not selected by the LLM, add it as a fallback
|
258
|
-
if
|
275
|
+
if (
|
276
|
+
"searxng" in self.available_engines
|
277
|
+
and "searxng" not in valid_engines
|
278
|
+
):
|
259
279
|
# Add it as the last option if the LLM selected others
|
260
280
|
if valid_engines:
|
261
281
|
valid_engines.append("searxng")
|
@@ -267,7 +287,9 @@ Example output: searxng,wikipedia,brave"""
|
|
267
287
|
if not valid_engines:
|
268
288
|
valid_engines = sorted(
|
269
289
|
self.available_engines,
|
270
|
-
key=lambda x: search_config()
|
290
|
+
key=lambda x: search_config()
|
291
|
+
.get(x, {})
|
292
|
+
.get("reliability", 0),
|
271
293
|
reverse=True,
|
272
294
|
)
|
273
295
|
|
@@ -278,13 +300,17 @@ Example output: searxng,wikipedia,brave"""
|
|
278
300
|
if "searxng" in self.available_engines:
|
279
301
|
return ["searxng"] + sorted(
|
280
302
|
[e for e in self.available_engines if e != "searxng"],
|
281
|
-
key=lambda x: search_config()
|
303
|
+
key=lambda x: search_config()
|
304
|
+
.get(x, {})
|
305
|
+
.get("reliability", 0),
|
282
306
|
reverse=True,
|
283
307
|
)
|
284
308
|
else:
|
285
309
|
return sorted(
|
286
310
|
self.available_engines,
|
287
|
-
key=lambda x: search_config()
|
311
|
+
key=lambda x: search_config()
|
312
|
+
.get(x, {})
|
313
|
+
.get("reliability", 0),
|
288
314
|
reverse=True,
|
289
315
|
)
|
290
316
|
|
@@ -342,9 +368,12 @@ Example output: searxng,wikipedia,brave"""
|
|
342
368
|
|
343
369
|
# Emit a socket event to inform about the selected engine
|
344
370
|
try:
|
345
|
-
emit_socket_event(
|
371
|
+
SocketIOService().emit_socket_event(
|
346
372
|
"search_engine_selected",
|
347
|
-
{
|
373
|
+
{
|
374
|
+
"engine": engine_name,
|
375
|
+
"result_count": len(previews),
|
376
|
+
},
|
348
377
|
)
|
349
378
|
except Exception:
|
350
379
|
logger.exception("Socket emit error (non-critical)")
|
@@ -355,7 +384,9 @@ Example output: searxng,wikipedia,brave"""
|
|
355
384
|
all_errors.append(f"{engine_name} returned no previews")
|
356
385
|
|
357
386
|
except Exception as e:
|
358
|
-
error_msg =
|
387
|
+
error_msg = (
|
388
|
+
f"Error getting previews from {engine_name}: {str(e)}"
|
389
|
+
)
|
359
390
|
logger.exception(error_msg)
|
360
391
|
all_errors.append(error_msg)
|
361
392
|
|
@@ -390,7 +421,9 @@ Example output: searxng,wikipedia,brave"""
|
|
390
421
|
# Use the selected engine to get full content
|
391
422
|
if hasattr(self, "_selected_engine"):
|
392
423
|
try:
|
393
|
-
logger.info(
|
424
|
+
logger.info(
|
425
|
+
f"Using {self._selected_engine_name} to get full content"
|
426
|
+
)
|
394
427
|
return self._selected_engine._get_full_content(relevant_items)
|
395
428
|
except Exception:
|
396
429
|
logger.exception(
|
@@ -404,7 +437,9 @@ Example output: searxng,wikipedia,brave"""
|
|
404
437
|
)
|
405
438
|
return relevant_items
|
406
439
|
|
407
|
-
def _get_engine_instance(
|
440
|
+
def _get_engine_instance(
|
441
|
+
self, engine_name: str
|
442
|
+
) -> Optional[BaseSearchEngine]:
|
408
443
|
"""Get or create an instance of the specified search engine"""
|
409
444
|
# Return cached instance if available
|
410
445
|
if engine_name in self.engine_cache:
|
@@ -418,11 +453,15 @@ Example output: searxng,wikipedia,brave"""
|
|
418
453
|
|
419
454
|
# Add max_filtered_results if specified
|
420
455
|
if self.max_filtered_results is not None:
|
421
|
-
common_params["max_filtered_results"] =
|
456
|
+
common_params["max_filtered_results"] = (
|
457
|
+
self.max_filtered_results
|
458
|
+
)
|
422
459
|
|
423
460
|
engine = create_search_engine(engine_name, **common_params)
|
424
461
|
except Exception:
|
425
|
-
logger.exception(
|
462
|
+
logger.exception(
|
463
|
+
f"Error creating engine instance for {engine_name}"
|
464
|
+
)
|
426
465
|
return None
|
427
466
|
|
428
467
|
if engine:
|
@@ -148,6 +148,7 @@ class ArXivSearchEngine(BaseSearchEngine):
|
|
148
148
|
else None
|
149
149
|
),
|
150
150
|
"journal_ref": paper.journal_ref,
|
151
|
+
"source": "arXiv",
|
151
152
|
}
|
152
153
|
|
153
154
|
previews.append(preview)
|
@@ -234,8 +235,12 @@ class ArXivSearchEngine(BaseSearchEngine):
|
|
234
235
|
):
|
235
236
|
try:
|
236
237
|
# Download the paper
|
237
|
-
pdf_count +=
|
238
|
-
|
238
|
+
pdf_count += (
|
239
|
+
1 # Increment counter before attempting download
|
240
|
+
)
|
241
|
+
paper_path = paper.download_pdf(
|
242
|
+
dirpath=self.download_dir
|
243
|
+
)
|
239
244
|
result["pdf_path"] = str(paper_path)
|
240
245
|
|
241
246
|
# Extract text from PDF
|
@@ -266,7 +271,9 @@ class ArXivSearchEngine(BaseSearchEngine):
|
|
266
271
|
with pdfplumber.open(paper_path) as pdf:
|
267
272
|
pdf_text = ""
|
268
273
|
for page in pdf.pages:
|
269
|
-
pdf_text +=
|
274
|
+
pdf_text += (
|
275
|
+
page.extract_text() + "\n\n"
|
276
|
+
)
|
270
277
|
|
271
278
|
if (
|
272
279
|
pdf_text.strip()
|
@@ -285,9 +292,13 @@ class ArXivSearchEngine(BaseSearchEngine):
|
|
285
292
|
)
|
286
293
|
except Exception:
|
287
294
|
logger.exception("Error extracting text from PDF")
|
288
|
-
logger.error(
|
295
|
+
logger.error(
|
296
|
+
"Using paper summary as content instead"
|
297
|
+
)
|
289
298
|
except Exception:
|
290
|
-
logger.exception(
|
299
|
+
logger.exception(
|
300
|
+
f"Error downloading paper {paper.title}"
|
301
|
+
)
|
291
302
|
result["pdf_path"] = None
|
292
303
|
pdf_count -= 1 # Decrement counter if download fails
|
293
304
|
elif (
|
@@ -400,7 +411,9 @@ class ArXivSearchEngine(BaseSearchEngine):
|
|
400
411
|
if self.include_full_text and self.download_dir:
|
401
412
|
try:
|
402
413
|
# Download the paper
|
403
|
-
paper_path = paper.download_pdf(
|
414
|
+
paper_path = paper.download_pdf(
|
415
|
+
dirpath=self.download_dir
|
416
|
+
)
|
404
417
|
result["pdf_path"] = str(paper_path)
|
405
418
|
except Exception:
|
406
419
|
logger.exception("Error downloading paper")
|
@@ -46,7 +46,9 @@ class BraveSearchEngine(BaseSearchEngine):
|
|
46
46
|
"""
|
47
47
|
# Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
|
48
48
|
super().__init__(
|
49
|
-
llm=llm,
|
49
|
+
llm=llm,
|
50
|
+
max_filtered_results=max_filtered_results,
|
51
|
+
max_results=max_results,
|
50
52
|
)
|
51
53
|
self.include_full_content = include_full_content
|
52
54
|
|
@@ -144,7 +146,9 @@ class BraveSearchEngine(BaseSearchEngine):
|
|
144
146
|
|
145
147
|
raw_results = json.loads(raw_results)
|
146
148
|
except json.JSONDecodeError:
|
147
|
-
logger.error(
|
149
|
+
logger.error(
|
150
|
+
"Error: Unable to parse BraveSearch response as JSON."
|
151
|
+
)
|
148
152
|
return []
|
149
153
|
|
150
154
|
# Format results as previews
|
@@ -36,7 +36,9 @@ class DuckDuckGoSearchEngine(BaseSearchEngine):
|
|
36
36
|
"""
|
37
37
|
# Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
|
38
38
|
super().__init__(
|
39
|
-
llm=llm,
|
39
|
+
llm=llm,
|
40
|
+
max_filtered_results=max_filtered_results,
|
41
|
+
max_results=max_results,
|
40
42
|
)
|
41
43
|
self.region = region
|
42
44
|
self.safe_search = safe_search
|