local-deep-research 0.1.26__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +23 -22
- local_deep_research/__main__.py +16 -0
- local_deep_research/advanced_search_system/__init__.py +7 -0
- local_deep_research/advanced_search_system/filters/__init__.py +8 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
- local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
- local_deep_research/advanced_search_system/findings/repository.py +452 -0
- local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
- local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
- local_deep_research/advanced_search_system/questions/__init__.py +1 -0
- local_deep_research/advanced_search_system/questions/base_question.py +64 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
- local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
- local_deep_research/advanced_search_system/tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
- local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
- local_deep_research/api/__init__.py +5 -5
- local_deep_research/api/research_functions.py +96 -84
- local_deep_research/app.py +8 -0
- local_deep_research/citation_handler.py +25 -16
- local_deep_research/{config.py → config/config_files.py} +102 -110
- local_deep_research/config/llm_config.py +472 -0
- local_deep_research/config/search_config.py +77 -0
- local_deep_research/defaults/__init__.py +10 -5
- local_deep_research/defaults/main.toml +2 -2
- local_deep_research/defaults/search_engines.toml +60 -34
- local_deep_research/main.py +121 -19
- local_deep_research/migrate_db.py +147 -0
- local_deep_research/report_generator.py +72 -44
- local_deep_research/search_system.py +147 -283
- local_deep_research/setup_data_dir.py +35 -0
- local_deep_research/test_migration.py +178 -0
- local_deep_research/utilities/__init__.py +0 -0
- local_deep_research/utilities/db_utils.py +49 -0
- local_deep_research/{utilties → utilities}/enums.py +2 -2
- local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
- local_deep_research/utilities/search_utilities.py +242 -0
- local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
- local_deep_research/web/__init__.py +0 -1
- local_deep_research/web/app.py +86 -1709
- local_deep_research/web/app_factory.py +289 -0
- local_deep_research/web/database/README.md +70 -0
- local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
- local_deep_research/web/database/migrations.py +447 -0
- local_deep_research/web/database/models.py +117 -0
- local_deep_research/web/database/schema_upgrade.py +107 -0
- local_deep_research/web/models/database.py +294 -0
- local_deep_research/web/models/settings.py +94 -0
- local_deep_research/web/routes/api_routes.py +559 -0
- local_deep_research/web/routes/history_routes.py +354 -0
- local_deep_research/web/routes/research_routes.py +715 -0
- local_deep_research/web/routes/settings_routes.py +1592 -0
- local_deep_research/web/services/research_service.py +947 -0
- local_deep_research/web/services/resource_service.py +149 -0
- local_deep_research/web/services/settings_manager.py +669 -0
- local_deep_research/web/services/settings_service.py +187 -0
- local_deep_research/web/services/socket_service.py +210 -0
- local_deep_research/web/static/css/custom_dropdown.css +277 -0
- local_deep_research/web/static/css/settings.css +1223 -0
- local_deep_research/web/static/css/styles.css +525 -48
- local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
- local_deep_research/web/static/js/components/detail.js +348 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
- local_deep_research/web/static/js/components/fallback/ui.js +215 -0
- local_deep_research/web/static/js/components/history.js +487 -0
- local_deep_research/web/static/js/components/logpanel.js +949 -0
- local_deep_research/web/static/js/components/progress.js +1107 -0
- local_deep_research/web/static/js/components/research.js +1865 -0
- local_deep_research/web/static/js/components/results.js +766 -0
- local_deep_research/web/static/js/components/settings.js +3981 -0
- local_deep_research/web/static/js/components/settings_sync.js +106 -0
- local_deep_research/web/static/js/main.js +226 -0
- local_deep_research/web/static/js/services/api.js +253 -0
- local_deep_research/web/static/js/services/audio.js +31 -0
- local_deep_research/web/static/js/services/formatting.js +119 -0
- local_deep_research/web/static/js/services/pdf.js +622 -0
- local_deep_research/web/static/js/services/socket.js +882 -0
- local_deep_research/web/static/js/services/ui.js +546 -0
- local_deep_research/web/templates/base.html +72 -0
- local_deep_research/web/templates/components/custom_dropdown.html +47 -0
- local_deep_research/web/templates/components/log_panel.html +32 -0
- local_deep_research/web/templates/components/mobile_nav.html +22 -0
- local_deep_research/web/templates/components/settings_form.html +299 -0
- local_deep_research/web/templates/components/sidebar.html +21 -0
- local_deep_research/web/templates/pages/details.html +73 -0
- local_deep_research/web/templates/pages/history.html +51 -0
- local_deep_research/web/templates/pages/progress.html +57 -0
- local_deep_research/web/templates/pages/research.html +139 -0
- local_deep_research/web/templates/pages/results.html +59 -0
- local_deep_research/web/templates/settings_dashboard.html +78 -192
- local_deep_research/web/utils/__init__.py +0 -0
- local_deep_research/web/utils/formatters.py +76 -0
- local_deep_research/web_search_engines/engines/full_search.py +18 -16
- local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
- local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
- local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
- local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +211 -159
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
- local_deep_research/web_search_engines/search_engine_base.py +174 -99
- local_deep_research/web_search_engines/search_engine_factory.py +192 -102
- local_deep_research/web_search_engines/search_engines_config.py +22 -15
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/METADATA +177 -97
- local_deep_research-0.2.0.dist-info/RECORD +135 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/WHEEL +1 -2
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/entry_points.txt +3 -0
- local_deep_research/defaults/llm_config.py +0 -338
- local_deep_research/utilties/search_utilities.py +0 -114
- local_deep_research/web/static/js/app.js +0 -3763
- local_deep_research/web/templates/api_keys_config.html +0 -82
- local_deep_research/web/templates/collections_config.html +0 -90
- local_deep_research/web/templates/index.html +0 -348
- local_deep_research/web/templates/llm_config.html +0 -120
- local_deep_research/web/templates/main_config.html +0 -89
- local_deep_research/web/templates/search_engines_config.html +0 -154
- local_deep_research/web/templates/settings.html +0 -519
- local_deep_research-0.1.26.dist-info/RECORD +0 -61
- local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
- /local_deep_research/{utilties → config}/__init__.py +0 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,13 +1,17 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
import os
|
4
|
+
from datetime import datetime
|
5
|
+
from typing import Dict, List
|
6
|
+
|
1
7
|
import justext
|
2
8
|
from langchain_community.document_loaders import AsyncChromiumLoader
|
3
9
|
from langchain_community.document_transformers import BeautifulSoupTransformer
|
4
10
|
from langchain_core.language_models import BaseLLM
|
5
|
-
|
6
|
-
import
|
7
|
-
from .
|
8
|
-
|
9
|
-
from local_deep_research import config
|
10
|
-
import logging
|
11
|
+
|
12
|
+
from ...config.search_config import QUALITY_CHECK_DDG_URLS
|
13
|
+
from ...utilities.search_utilities import remove_think_tags
|
14
|
+
|
11
15
|
logger = logging.getLogger(__name__)
|
12
16
|
|
13
17
|
|
@@ -15,14 +19,13 @@ class FullSearchResults:
|
|
15
19
|
def __init__(
|
16
20
|
self,
|
17
21
|
llm: BaseLLM, # Add LLM parameter
|
18
|
-
web_search: list,
|
22
|
+
web_search: list,
|
19
23
|
output_format: str = "list",
|
20
24
|
language: str = "English",
|
21
25
|
max_results: int = 10,
|
22
26
|
region: str = "wt-wt",
|
23
27
|
time: str = "y",
|
24
|
-
safesearch: str = "Moderate"
|
25
|
-
|
28
|
+
safesearch: str = "Moderate",
|
26
29
|
):
|
27
30
|
self.llm = llm
|
28
31
|
self.output_format = output_format
|
@@ -31,10 +34,9 @@ class FullSearchResults:
|
|
31
34
|
self.region = region
|
32
35
|
self.time = time
|
33
36
|
self.safesearch = safesearch
|
34
|
-
self.web_search =web_search
|
37
|
+
self.web_search = web_search
|
35
38
|
os.environ["USER_AGENT"] = "Local Deep Research/1.0"
|
36
39
|
|
37
|
-
|
38
40
|
self.bs_transformer = BeautifulSoupTransformer()
|
39
41
|
self.tags_to_extract = ["p", "div", "span"]
|
40
42
|
|
@@ -54,7 +56,7 @@ class FullSearchResults:
|
|
54
56
|
{results}
|
55
57
|
|
56
58
|
Return a JSON array of indices (0-based) for sources that meet ALL criteria.
|
57
|
-
ONLY Return a JSON array of indices (0-based) and nothing else. No letters.
|
59
|
+
ONLY Return a JSON array of indices (0-based) and nothing else. No letters.
|
58
60
|
Example response: \n[0, 2, 4]\n\n"""
|
59
61
|
|
60
62
|
try:
|
@@ -66,7 +68,7 @@ class FullSearchResults:
|
|
66
68
|
return [r for i, r in enumerate(results) if i in good_indices]
|
67
69
|
except Exception as e:
|
68
70
|
logger.error(f"URL filtering error: {e}")
|
69
|
-
return []
|
71
|
+
return []
|
70
72
|
|
71
73
|
def remove_boilerplate(self, html: str) -> str:
|
72
74
|
if not html or not html.strip():
|
@@ -77,13 +79,13 @@ class FullSearchResults:
|
|
77
79
|
|
78
80
|
def run(self, query: str):
|
79
81
|
nr_full_text = 0
|
80
|
-
# Step 1: Get search results
|
82
|
+
# Step 1: Get search results
|
81
83
|
search_results = self.web_search.invoke(query)
|
82
84
|
if not isinstance(search_results, list):
|
83
85
|
raise ValueError("Expected the search results in list format.")
|
84
86
|
|
85
87
|
# Step 2: Filter URLs using LLM
|
86
|
-
if
|
88
|
+
if QUALITY_CHECK_DDG_URLS:
|
87
89
|
filtered_results = self.check_urls(search_results, query)
|
88
90
|
else:
|
89
91
|
filtered_results = search_results
|
@@ -126,4 +128,4 @@ class FullSearchResults:
|
|
126
128
|
return self.run(query)
|
127
129
|
|
128
130
|
def __call__(self, query: str):
|
129
|
-
return self.invoke(query)
|
131
|
+
return self.invoke(query)
|
@@ -1,12 +1,13 @@
|
|
1
1
|
import logging
|
2
2
|
import os
|
3
|
-
from typing import Dict, List,
|
3
|
+
from typing import Any, Dict, List, Optional
|
4
4
|
|
5
|
-
from
|
6
|
-
from
|
7
|
-
from
|
8
|
-
from
|
9
|
-
from
|
5
|
+
from ...config import search_config
|
6
|
+
from ...web.services.socket_service import emit_socket_event
|
7
|
+
from ..search_engine_base import BaseSearchEngine
|
8
|
+
from ..search_engine_factory import create_search_engine
|
9
|
+
from ..search_engines_config import SEARCH_ENGINES
|
10
|
+
from .search_engine_wikipedia import WikipediaSearchEngine
|
10
11
|
|
11
12
|
# Setup logging
|
12
13
|
logging.basicConfig(level=logging.INFO)
|
@@ -18,17 +19,20 @@ class MetaSearchEngine(BaseSearchEngine):
|
|
18
19
|
LLM-powered meta search engine that intelligently selects and uses
|
19
20
|
the appropriate search engines based on query analysis
|
20
21
|
"""
|
21
|
-
|
22
|
-
def __init__(
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
22
|
+
|
23
|
+
def __init__(
|
24
|
+
self,
|
25
|
+
llm,
|
26
|
+
max_results: int = 10,
|
27
|
+
use_api_key_services: bool = True,
|
28
|
+
max_engines_to_try: int = 3,
|
29
|
+
max_filtered_results: Optional[int] = None,
|
30
|
+
engine_selection_callback=None,
|
31
|
+
**kwargs,
|
32
|
+
):
|
29
33
|
"""
|
30
34
|
Initialize the meta search engine.
|
31
|
-
|
35
|
+
|
32
36
|
Args:
|
33
37
|
llm: Language model instance for query classification and relevance filtering
|
34
38
|
max_results: Maximum number of search results to return
|
@@ -37,247 +41,294 @@ class MetaSearchEngine(BaseSearchEngine):
|
|
37
41
|
max_filtered_results: Maximum number of results to keep after filtering
|
38
42
|
**kwargs: Additional parameters (ignored but accepted for compatibility)
|
39
43
|
"""
|
40
|
-
# Initialize the BaseSearchEngine with the LLM and
|
41
|
-
super().__init__(
|
42
|
-
|
43
|
-
|
44
|
+
# Initialize the BaseSearchEngine with the LLM, max_filtered_results, and max_results
|
45
|
+
super().__init__(
|
46
|
+
llm=llm, max_filtered_results=max_filtered_results, max_results=max_results
|
47
|
+
)
|
48
|
+
|
44
49
|
self.use_api_key_services = use_api_key_services
|
45
50
|
self.max_engines_to_try = max_engines_to_try
|
46
|
-
|
51
|
+
|
47
52
|
# Cache for engine instances
|
48
53
|
self.engine_cache = {}
|
49
|
-
|
54
|
+
|
50
55
|
# Get available engines (excluding 'meta' and 'auto')
|
51
56
|
self.available_engines = self._get_available_engines()
|
52
|
-
logger.info(
|
53
|
-
|
57
|
+
logger.info(
|
58
|
+
f"Meta Search Engine initialized with {len(self.available_engines)} available engines: {', '.join(self.available_engines)}"
|
59
|
+
)
|
60
|
+
|
54
61
|
# Create a fallback engine in case everything else fails
|
55
62
|
self.fallback_engine = WikipediaSearchEngine(
|
56
|
-
max_results=max_results,
|
63
|
+
max_results=self.max_results,
|
57
64
|
llm=llm,
|
58
|
-
max_filtered_results=max_filtered_results
|
65
|
+
max_filtered_results=max_filtered_results,
|
59
66
|
)
|
60
|
-
|
67
|
+
|
61
68
|
def _get_available_engines(self) -> List[str]:
|
62
69
|
"""Get list of available engines, excluding 'meta' and 'auto'"""
|
63
70
|
# Filter out 'meta' and 'auto' and check API key availability
|
64
71
|
available = []
|
65
|
-
for name,
|
72
|
+
for name, config_ in SEARCH_ENGINES.items():
|
66
73
|
if name in ["meta", "auto"]:
|
67
74
|
continue
|
68
|
-
|
69
|
-
if
|
75
|
+
|
76
|
+
if config_.get("requires_api_key", False) and not self.use_api_key_services:
|
70
77
|
continue
|
71
|
-
|
72
|
-
if
|
73
|
-
api_key_env =
|
78
|
+
|
79
|
+
if config_.get("requires_api_key", False):
|
80
|
+
api_key_env = config_.get("api_key_env")
|
74
81
|
api_key = os.getenv(api_key_env) if api_key_env else None
|
75
82
|
if not api_key:
|
76
83
|
continue
|
77
|
-
|
84
|
+
|
78
85
|
available.append(name)
|
79
|
-
|
86
|
+
|
80
87
|
# Make sure we have at least one engine available
|
81
88
|
if not available and "wikipedia" in SEARCH_ENGINES:
|
82
89
|
available.append("wikipedia")
|
83
|
-
|
90
|
+
|
84
91
|
return available
|
85
|
-
|
92
|
+
|
86
93
|
def analyze_query(self, query: str) -> List[str]:
|
87
94
|
"""
|
88
|
-
|
89
|
-
|
95
|
+
Analyze the query to determine the best search engines to use.
|
96
|
+
|
97
|
+
Args:
|
98
|
+
query: The search query
|
99
|
+
|
100
|
+
Returns:
|
101
|
+
List of search engine names sorted by suitability
|
90
102
|
"""
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
logger.error(f"Missing key for engine {name}: {e}")
|
104
|
-
# Add a basic description for engines with missing configuration
|
105
|
-
engine_descriptions.append(f"- {name.upper()}: General purpose search engine.")
|
106
|
-
except Exception as e:
|
107
|
-
logger.error(f"Error processing engine {name}: {e}")
|
108
|
-
engine_descriptions.append(f"- {name.upper()}: General purpose search engine.")
|
103
|
+
try:
|
104
|
+
# Check if the LLM is available to help select engines
|
105
|
+
if not self.llm:
|
106
|
+
logger.warning(
|
107
|
+
"No LLM available for query analysis, using default engines"
|
108
|
+
)
|
109
|
+
# Return engines sorted by reliability
|
110
|
+
return sorted(
|
111
|
+
self.available_engines,
|
112
|
+
key=lambda x: SEARCH_ENGINES.get(x, {}).get("reliability", 0),
|
113
|
+
reverse=True,
|
114
|
+
)
|
109
115
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
116
|
+
# Create a prompt that outlines the available search engines and their strengths
|
117
|
+
engines_info = []
|
118
|
+
for engine_name in self.available_engines:
|
119
|
+
try:
|
120
|
+
if engine_name in SEARCH_ENGINES:
|
121
|
+
strengths = SEARCH_ENGINES[engine_name].get(
|
122
|
+
"strengths", "General search"
|
123
|
+
)
|
124
|
+
weaknesses = SEARCH_ENGINES[engine_name].get(
|
125
|
+
"weaknesses", "None specified"
|
126
|
+
)
|
127
|
+
description = SEARCH_ENGINES[engine_name].get(
|
128
|
+
"description", engine_name
|
129
|
+
)
|
130
|
+
engines_info.append(
|
131
|
+
f"- {engine_name}: {description}\n Strengths: {strengths}\n Weaknesses: {weaknesses}"
|
132
|
+
)
|
133
|
+
except KeyError as e:
|
134
|
+
logger.error(f"Missing key for engine {engine_name}: {str(e)}")
|
115
135
|
|
116
|
-
|
117
|
-
{engine_descriptions}
|
136
|
+
prompt = f"""You are a search query analyst. Consider this search query:
|
118
137
|
|
119
|
-
|
120
|
-
1. The nature of the query (factual, academic, product-related, news, etc.)
|
121
|
-
2. The strengths and weaknesses of each engine
|
122
|
-
3. The reliability of each engine
|
138
|
+
QUERY: {query}
|
123
139
|
|
124
|
-
|
125
|
-
|
140
|
+
I have these search engines available:
|
141
|
+
{chr(10).join(engines_info)}
|
126
142
|
|
127
|
-
|
128
|
-
|
143
|
+
Determine which search engines would be most appropriate for answering this query.
|
144
|
+
First analyze the nature of the query (factual, scientific, code-related, etc.)
|
145
|
+
Then select the 1-3 most appropriate search engines for this type of query.
|
146
|
+
|
147
|
+
Output ONLY a comma-separated list of the search engine names in order of most appropriate to least appropriate.
|
148
|
+
Example output: wikipedia,arxiv,github"""
|
149
|
+
|
150
|
+
# Get analysis from LLM
|
129
151
|
response = self.llm.invoke(prompt)
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
152
|
+
|
153
|
+
# Handle different response formats
|
154
|
+
if hasattr(response, "content"):
|
155
|
+
content = response.content.strip()
|
156
|
+
else:
|
157
|
+
content = str(response).strip()
|
158
|
+
|
159
|
+
# Extract engine names
|
160
|
+
valid_engines = []
|
161
|
+
for engine_name in content.split(","):
|
162
|
+
cleaned_name = engine_name.strip().lower()
|
163
|
+
if cleaned_name in self.available_engines:
|
164
|
+
valid_engines.append(cleaned_name)
|
165
|
+
|
138
166
|
# If no valid engines were returned, use default order based on reliability
|
139
167
|
if not valid_engines:
|
140
168
|
valid_engines = sorted(
|
141
|
-
self.available_engines,
|
142
|
-
key=lambda x: SEARCH_ENGINES
|
143
|
-
reverse=True
|
169
|
+
self.available_engines,
|
170
|
+
key=lambda x: SEARCH_ENGINES.get(x, {}).get("reliability", 0),
|
171
|
+
reverse=True,
|
144
172
|
)
|
145
|
-
|
173
|
+
|
146
174
|
return valid_engines
|
147
175
|
except Exception as e:
|
148
176
|
logger.error(f"Error analyzing query with LLM: {str(e)}")
|
149
177
|
# Fall back to reliability-based ordering
|
150
178
|
return sorted(
|
151
|
-
self.available_engines,
|
152
|
-
key=lambda x: SEARCH_ENGINES
|
153
|
-
reverse=True
|
179
|
+
self.available_engines,
|
180
|
+
key=lambda x: SEARCH_ENGINES.get(x, {}).get("reliability", 0),
|
181
|
+
reverse=True,
|
154
182
|
)
|
155
|
-
|
183
|
+
|
156
184
|
def _get_previews(self, query: str) -> List[Dict[str, Any]]:
|
157
185
|
"""
|
158
186
|
Get preview information by selecting the best search engine for this query.
|
159
|
-
|
187
|
+
|
160
188
|
Args:
|
161
189
|
query: The search query
|
162
|
-
|
190
|
+
|
163
191
|
Returns:
|
164
192
|
List of preview dictionaries
|
165
193
|
"""
|
166
194
|
# Get ranked list of engines for this query
|
167
195
|
ranked_engines = self.analyze_query(query)
|
168
|
-
|
196
|
+
|
169
197
|
if not ranked_engines:
|
170
|
-
logger.warning(
|
198
|
+
logger.warning(
|
199
|
+
"No suitable search engines found for query, using fallback engine"
|
200
|
+
)
|
171
201
|
return self.fallback_engine._get_previews(query)
|
172
|
-
|
202
|
+
|
173
203
|
# Limit the number of engines to try
|
174
|
-
engines_to_try = ranked_engines[:self.max_engines_to_try]
|
175
|
-
|
176
|
-
|
177
|
-
|
204
|
+
engines_to_try = ranked_engines[: self.max_engines_to_try]
|
205
|
+
logger.info(
|
206
|
+
f"SEARCH_PLAN: Will try these engines in order: {', '.join(engines_to_try)}"
|
207
|
+
)
|
208
|
+
|
178
209
|
all_errors = []
|
179
210
|
# Try each engine in order
|
180
211
|
for engine_name in engines_to_try:
|
181
212
|
logger.info(f"Trying search engine: {engine_name}")
|
182
|
-
|
213
|
+
|
183
214
|
# Get or create the engine instance
|
184
215
|
engine = self._get_engine_instance(engine_name)
|
185
|
-
|
216
|
+
|
186
217
|
if not engine:
|
187
218
|
logger.warning(f"Failed to initialize {engine_name}, skipping")
|
188
219
|
all_errors.append(f"Failed to initialize {engine_name}")
|
189
220
|
continue
|
190
|
-
|
221
|
+
|
191
222
|
try:
|
192
223
|
# Get previews from this engine
|
193
224
|
previews = engine._get_previews(query)
|
194
|
-
|
225
|
+
|
195
226
|
# If search was successful, return results
|
196
227
|
if previews and len(previews) > 0:
|
197
|
-
logger.info(f"
|
228
|
+
logger.info(f"ENGINE_SELECTED: {engine_name}")
|
229
|
+
logger.info(
|
230
|
+
f"Successfully got {len(previews)} preview results from {engine_name}"
|
231
|
+
)
|
198
232
|
# Store selected engine for later use
|
199
233
|
self._selected_engine = engine
|
200
234
|
self._selected_engine_name = engine_name
|
235
|
+
|
236
|
+
# Emit a socket event to inform about the selected engine
|
237
|
+
try:
|
238
|
+
emit_socket_event(
|
239
|
+
"search_engine_selected",
|
240
|
+
{"engine": engine_name, "result_count": len(previews)},
|
241
|
+
)
|
242
|
+
except Exception as socket_error:
|
243
|
+
logger.error(
|
244
|
+
f"Socket emit error (non-critical): {str(socket_error)}"
|
245
|
+
)
|
246
|
+
|
201
247
|
return previews
|
202
|
-
|
248
|
+
|
203
249
|
logger.info(f"{engine_name} returned no previews")
|
204
250
|
all_errors.append(f"{engine_name} returned no previews")
|
205
|
-
|
251
|
+
|
206
252
|
except Exception as e:
|
207
253
|
error_msg = f"Error getting previews from {engine_name}: {str(e)}"
|
208
254
|
logger.error(error_msg)
|
209
255
|
all_errors.append(error_msg)
|
210
|
-
|
256
|
+
|
211
257
|
# If we reach here, all engines failed, use fallback
|
212
|
-
logger.warning(
|
258
|
+
logger.warning(
|
259
|
+
f"All engines failed or returned no preview results: {', '.join(all_errors)}"
|
260
|
+
)
|
213
261
|
logger.info("Using fallback Wikipedia engine for previews")
|
214
262
|
self._selected_engine = self.fallback_engine
|
215
263
|
self._selected_engine_name = "wikipedia"
|
216
264
|
return self.fallback_engine._get_previews(query)
|
217
|
-
|
218
|
-
def _get_full_content(
|
265
|
+
|
266
|
+
def _get_full_content(
|
267
|
+
self, relevant_items: List[Dict[str, Any]]
|
268
|
+
) -> List[Dict[str, Any]]:
|
219
269
|
"""
|
220
270
|
Get full content using the engine that provided the previews.
|
221
|
-
|
271
|
+
|
222
272
|
Args:
|
223
273
|
relevant_items: List of relevant preview dictionaries
|
224
|
-
|
274
|
+
|
225
275
|
Returns:
|
226
276
|
List of result dictionaries with full content
|
227
277
|
"""
|
228
278
|
# Check if we should get full content
|
229
|
-
if
|
279
|
+
if (
|
280
|
+
hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
|
281
|
+
and search_config.SEARCH_SNIPPETS_ONLY
|
282
|
+
):
|
230
283
|
logger.info("Snippet-only mode, skipping full content retrieval")
|
231
284
|
return relevant_items
|
232
|
-
|
285
|
+
|
233
286
|
logger.info("Getting full content for relevant items")
|
234
|
-
|
287
|
+
|
235
288
|
# Use the selected engine to get full content
|
236
|
-
if hasattr(self,
|
289
|
+
if hasattr(self, "_selected_engine"):
|
237
290
|
try:
|
238
291
|
logger.info(f"Using {self._selected_engine_name} to get full content")
|
239
292
|
return self._selected_engine._get_full_content(relevant_items)
|
240
293
|
except Exception as e:
|
241
|
-
logger.error(
|
294
|
+
logger.error(
|
295
|
+
f"Error getting full content from {self._selected_engine_name}: {str(e)}"
|
296
|
+
)
|
242
297
|
# Fall back to returning relevant items without full content
|
243
298
|
return relevant_items
|
244
299
|
else:
|
245
|
-
logger.warning(
|
300
|
+
logger.warning(
|
301
|
+
"No engine was selected during preview phase, returning relevant items as-is"
|
302
|
+
)
|
246
303
|
return relevant_items
|
247
|
-
|
304
|
+
|
248
305
|
def _get_engine_instance(self, engine_name: str) -> Optional[BaseSearchEngine]:
|
249
306
|
"""Get or create an instance of the specified search engine"""
|
250
307
|
# Return cached instance if available
|
251
308
|
if engine_name in self.engine_cache:
|
252
309
|
return self.engine_cache[engine_name]
|
253
|
-
|
310
|
+
|
254
311
|
# Create a new instance
|
255
312
|
engine = None
|
256
313
|
try:
|
257
314
|
# Only pass parameters that all engines accept
|
258
|
-
common_params = {
|
259
|
-
|
260
|
-
"max_results": self.max_results
|
261
|
-
}
|
262
|
-
|
315
|
+
common_params = {"llm": self.llm, "max_results": self.max_results}
|
316
|
+
|
263
317
|
# Add max_filtered_results if specified
|
264
318
|
if self.max_filtered_results is not None:
|
265
319
|
common_params["max_filtered_results"] = self.max_filtered_results
|
266
|
-
|
267
|
-
engine = create_search_engine(
|
268
|
-
engine_name,
|
269
|
-
**common_params
|
270
|
-
)
|
320
|
+
|
321
|
+
engine = create_search_engine(engine_name, **common_params)
|
271
322
|
except Exception as e:
|
272
323
|
logger.error(f"Error creating engine instance for {engine_name}: {str(e)}")
|
273
324
|
return None
|
274
|
-
|
325
|
+
|
275
326
|
if engine:
|
276
327
|
# Cache the instance
|
277
328
|
self.engine_cache[engine_name] = engine
|
278
|
-
|
329
|
+
|
279
330
|
return engine
|
280
|
-
|
331
|
+
|
281
332
|
def invoke(self, query: str) -> List[Dict[str, Any]]:
|
282
333
|
"""Compatibility method for LangChain tools"""
|
283
|
-
return self.run(query)
|
334
|
+
return self.run(query)
|