local-deep-research 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. local_deep_research/__init__.py +24 -0
  2. local_deep_research/citation_handler.py +113 -0
  3. local_deep_research/config.py +166 -0
  4. local_deep_research/defaults/__init__.py +44 -0
  5. local_deep_research/defaults/llm_config.py +269 -0
  6. local_deep_research/defaults/local_collections.toml +47 -0
  7. local_deep_research/defaults/main.toml +57 -0
  8. local_deep_research/defaults/search_engines.toml +244 -0
  9. local_deep_research/local_collections.py +141 -0
  10. local_deep_research/main.py +113 -0
  11. local_deep_research/report_generator.py +206 -0
  12. local_deep_research/search_system.py +241 -0
  13. local_deep_research/utilties/__init__.py +0 -0
  14. local_deep_research/utilties/enums.py +9 -0
  15. local_deep_research/utilties/llm_utils.py +116 -0
  16. local_deep_research/utilties/search_utilities.py +115 -0
  17. local_deep_research/utilties/setup_utils.py +6 -0
  18. local_deep_research/web/__init__.py +2 -0
  19. local_deep_research/web/app.py +1209 -0
  20. local_deep_research/web/static/css/styles.css +1008 -0
  21. local_deep_research/web/static/js/app.js +2078 -0
  22. local_deep_research/web/templates/api_keys_config.html +82 -0
  23. local_deep_research/web/templates/collections_config.html +90 -0
  24. local_deep_research/web/templates/index.html +312 -0
  25. local_deep_research/web/templates/llm_config.html +120 -0
  26. local_deep_research/web/templates/main_config.html +89 -0
  27. local_deep_research/web/templates/search_engines_config.html +154 -0
  28. local_deep_research/web/templates/settings.html +519 -0
  29. local_deep_research/web/templates/settings_dashboard.html +207 -0
  30. local_deep_research/web_search_engines/__init__.py +0 -0
  31. local_deep_research/web_search_engines/engines/__init__.py +0 -0
  32. local_deep_research/web_search_engines/engines/full_search.py +128 -0
  33. local_deep_research/web_search_engines/engines/meta_search_engine.py +274 -0
  34. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +367 -0
  35. local_deep_research/web_search_engines/engines/search_engine_brave.py +245 -0
  36. local_deep_research/web_search_engines/engines/search_engine_ddg.py +123 -0
  37. local_deep_research/web_search_engines/engines/search_engine_github.py +663 -0
  38. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +283 -0
  39. local_deep_research/web_search_engines/engines/search_engine_guardian.py +337 -0
  40. local_deep_research/web_search_engines/engines/search_engine_local.py +901 -0
  41. local_deep_research/web_search_engines/engines/search_engine_local_all.py +153 -0
  42. local_deep_research/web_search_engines/engines/search_engine_medrxiv.py +623 -0
  43. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +992 -0
  44. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +230 -0
  45. local_deep_research/web_search_engines/engines/search_engine_wayback.py +474 -0
  46. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +242 -0
  47. local_deep_research/web_search_engines/full_search.py +254 -0
  48. local_deep_research/web_search_engines/search_engine_base.py +197 -0
  49. local_deep_research/web_search_engines/search_engine_factory.py +233 -0
  50. local_deep_research/web_search_engines/search_engines_config.py +54 -0
  51. local_deep_research-0.1.0.dist-info/LICENSE +21 -0
  52. local_deep_research-0.1.0.dist-info/METADATA +328 -0
  53. local_deep_research-0.1.0.dist-info/RECORD +56 -0
  54. local_deep_research-0.1.0.dist-info/WHEEL +5 -0
  55. local_deep_research-0.1.0.dist-info/entry_points.txt +3 -0
  56. local_deep_research-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,207 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Deep Research System - Settings</title>
7
+ <link rel="stylesheet" href="{{ url_for('research.serve_static', path='css/styles.css') }}">
8
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css">
9
+ <link rel="icon" type="image/png" href="{{ url_for('static', filename='favicon.ico') }}">
10
+ <style>
11
+ .settings-cards {
12
+ display: grid;
13
+ grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
14
+ gap: 1.5rem;
15
+ margin-top: 1.5rem;
16
+ }
17
+ .settings-card {
18
+ display: flex;
19
+ flex-direction: column;
20
+ height: 100%;
21
+ }
22
+ .settings-card .card-content {
23
+ display: flex;
24
+ flex-direction: column;
25
+ flex: 1;
26
+ }
27
+ .settings-icon {
28
+ font-size: 2rem;
29
+ color: var(--accent-primary);
30
+ margin-bottom: 1rem;
31
+ text-align: center;
32
+ }
33
+ .settings-title {
34
+ font-size: 1.25rem;
35
+ font-weight: 600;
36
+ margin-bottom: 0.75rem;
37
+ color: var(--text-primary);
38
+ }
39
+ .settings-description {
40
+ color: var(--text-secondary);
41
+ margin-bottom: 1.5rem;
42
+ flex: 1;
43
+ }
44
+ .card-actions {
45
+ margin-top: auto;
46
+ text-align: center;
47
+ }
48
+ </style>
49
+ </head>
50
+ <body>
51
+ <div class="app-container">
52
+ <!-- Sidebar -->
53
+ <aside class="sidebar">
54
+ <div class="sidebar-header">
55
+ <h2 id="logo-link" style="cursor: pointer;"><i class="fas fa-atom"></i> Deep Research</h2>
56
+ </div>
57
+ <nav class="sidebar-nav">
58
+ <ul>
59
+ <li data-page="new-research"><i class="fas fa-search"></i> <a href="{{ url_for('research.index') }}">New Research</a></li>
60
+ <li data-page="history"><i class="fas fa-history"></i> <a href="{{ url_for('research.index') }}#history">History</a></li>
61
+ <li class="active" data-page="settings"><i class="fas fa-cog"></i> Settings</li>
62
+ </ul>
63
+ </nav>
64
+ <div class="sidebar-footer">
65
+ <p>v0.1.0 | <i class="fas fa-brain"></i></p>
66
+ </div>
67
+ </aside>
68
+
69
+ <!-- Main Content -->
70
+ <main class="main-content">
71
+ <div class="page active" id="settings">
72
+ <div class="page-header">
73
+ <h1>Settings</h1>
74
+ </div>
75
+
76
+ {% with messages = get_flashed_messages(with_categories=true) %}
77
+ {% if messages %}
78
+ {% for category, message in messages %}
79
+ <div class="alert alert-{{ category }}">
80
+ {{ message }}
81
+ </div>
82
+ {% endfor %}
83
+ {% endif %}
84
+ {% endwith %}
85
+
86
+ <div class="settings-cards">
87
+ <!-- Main Configuration Card -->
88
+ <div class="card settings-card">
89
+ <div class="card-content">
90
+ <div class="settings-icon">
91
+ <i class="fas fa-search"></i>
92
+ </div>
93
+ <h3 class="settings-title">Search Settings</h3>
94
+ <p class="settings-description">
95
+ Configure search parameters, results limits, general behavior, and output settings for the research system.
96
+ </p>
97
+ <div class="card-actions">
98
+ <a href="{{ url_for('research.main_config_page') }}" class="btn btn-primary">
99
+ <i class="fas fa-cog"></i> Configure
100
+ </a>
101
+ </div>
102
+ </div>
103
+ </div>
104
+
105
+ <!-- LLM Configuration Card -->
106
+ <div class="card settings-card">
107
+ <div class="card-content">
108
+ <div class="settings-icon">
109
+ <i class="fas fa-brain"></i>
110
+ </div>
111
+ <h3 class="settings-title">Language Model Settings</h3>
112
+ <p class="settings-description">
113
+ Set up the language models used for research, including model selection, parameters, and API settings.
114
+ </p>
115
+ <div class="card-actions">
116
+ <a href="{{ url_for('research.llm_config_page') }}" class="btn btn-primary">
117
+ <i class="fas fa-code"></i> Edit Configuration
118
+ </a>
119
+ </div>
120
+ </div>
121
+ </div>
122
+
123
+ <!-- Local Collections Card -->
124
+ <div class="card settings-card">
125
+ <div class="card-content">
126
+ <div class="settings-icon">
127
+ <i class="fas fa-folder"></i>
128
+ </div>
129
+ <h3 class="settings-title">Local Document Collections</h3>
130
+ <p class="settings-description">
131
+ Configure local document collections to search through your own files, papers, and research materials.
132
+ </p>
133
+ <div class="card-actions">
134
+ <a href="{{ url_for('research.collections_config_page') }}" class="btn btn-primary">
135
+ <i class="fas fa-code"></i> Edit Configuration
136
+ </a>
137
+ </div>
138
+ </div>
139
+ </div>
140
+
141
+ <div class="card settings-card">
142
+ <div class="card-content">
143
+ <div class="settings-icon">
144
+ <i class="fas fa-key"></i>
145
+ </div>
146
+ <h3 class="settings-title">API Keys</h3>
147
+ <p class="settings-description">
148
+ Configure API keys for external services like OpenAI, Anthropic, and search providers.
149
+ </p>
150
+ <div class="card-actions">
151
+ <a href="{{ url_for('research.api_keys_config_page') }}" class="btn btn-primary">
152
+ <i class="fas fa-cog"></i> Configure
153
+ </a>
154
+ </div>
155
+ </div>
156
+ </div>
157
+ <div class="card settings-card">
158
+ <div class="card-content">
159
+ <div class="settings-icon">
160
+ <i class="fas fa-search-plus"></i>
161
+ </div>
162
+ <h3 class="settings-title">Search Engines Settings</h3>
163
+ <p class="settings-description">
164
+ Configure search engines, their parameters, and specify which search engines to use for different types of queries.
165
+ </p>
166
+ <div class="card-actions">
167
+ <a href="{{ url_for('research.search_engines_config_page') }}" class="btn btn-primary">
168
+ <i class="fas fa-cog"></i> Configure
169
+ </a>
170
+ </div>
171
+ </div>
172
+ </div>
173
+ </div>
174
+ </div>
175
+ </main>
176
+ </div>
177
+
178
+ <!-- Mobile Tab Bar -->
179
+ <nav class="mobile-tab-bar">
180
+ <ul>
181
+ <li data-page="new-research">
182
+ <a href="{{ url_for('research.index') }}">
183
+ <i class="fas fa-search"></i>
184
+ <span>Research</span>
185
+ </a>
186
+ </li>
187
+ <li data-page="history">
188
+ <a href="{{ url_for('research.index') }}#history">
189
+ <i class="fas fa-history"></i>
190
+ <span>History</span>
191
+ </a>
192
+ </li>
193
+ <li class="active" data-page="settings">
194
+ <i class="fas fa-cog"></i>
195
+ <span>Settings</span>
196
+ </li>
197
+ </ul>
198
+ </nav>
199
+
200
+ <script>
201
+ // Make the logo clickable to go back to home
202
+ document.getElementById('logo-link').addEventListener('click', function() {
203
+ window.location.href = "{{ url_for('research.index') }}";
204
+ });
205
+ </script>
206
+ </body>
207
+ </html>
File without changes
@@ -0,0 +1,128 @@
1
+ import justext
2
+ from langchain_community.document_loaders import AsyncChromiumLoader
3
+ from langchain_community.document_transformers import BeautifulSoupTransformer
4
+ from langchain_core.language_models import BaseLLM
5
+ from typing import List, Dict
6
+ import json, os
7
+ from .utilties.search_utilities import remove_think_tags
8
+ from datetime import datetime
9
+ from local_deep_research import config
10
+
11
+ class FullSearchResults:
12
+ def __init__(
13
+ self,
14
+ llm: BaseLLM, # Add LLM parameter
15
+ web_search: list,
16
+ output_format: str = "list",
17
+ language: str = "English",
18
+ max_results: int = 10,
19
+ region: str = "wt-wt",
20
+ time: str = "y",
21
+ safesearch: str = "Moderate"
22
+
23
+ ):
24
+ self.llm = llm
25
+ self.output_format = output_format
26
+ self.language = language
27
+ self.max_results = max_results
28
+ self.region = region
29
+ self.time = time
30
+ self.safesearch = safesearch
31
+ self.web_search =web_search
32
+ os.environ["USER_AGENT"] = "Local Deep Research/1.0"
33
+
34
+
35
+ self.bs_transformer = BeautifulSoupTransformer()
36
+ self.tags_to_extract = ["p", "div", "span"]
37
+
38
+ def check_urls(self, results: List[Dict], query: str) -> List[Dict]:
39
+ if not results:
40
+ return results
41
+
42
+ now = datetime.now()
43
+ current_time = now.strftime("%Y-%m-%d")
44
+ prompt = f"""ONLY Return a JSON array. The response contains no letters. Evaluate these URLs for:
45
+ 1. Timeliness (today: {current_time})
46
+ 2. Factual accuracy (cross-reference major claims)
47
+ 3. Source reliability (prefer official company websites, established news outlets)
48
+ 4. Direct relevance to query: {query}
49
+
50
+ URLs to evaluate:
51
+ {results}
52
+
53
+ Return a JSON array of indices (0-based) for sources that meet ALL criteria.
54
+ ONLY Return a JSON array of indices (0-based) and nothing else. No letters.
55
+ Example response: \n[0, 2, 4]\n\n"""
56
+
57
+ try:
58
+ # Get LLM's evaluation
59
+ response = self.llm.invoke(prompt)
60
+ # print(response)
61
+ good_indices = json.loads(remove_think_tags(response.content))
62
+
63
+ # Return only the results with good URLs
64
+ return [r for i, r in enumerate(results) if i in good_indices]
65
+ except Exception as e:
66
+ print(f"URL filtering error: {e}")
67
+ return []
68
+
69
+ def remove_boilerplate(self, html: str) -> str:
70
+ if not html or not html.strip():
71
+ return ""
72
+ paragraphs = justext.justext(html, justext.get_stoplist(self.language))
73
+ cleaned = "\n".join([p.text for p in paragraphs if not p.is_boilerplate])
74
+ return cleaned
75
+
76
+ def run(self, query: str):
77
+ nr_full_text = 0
78
+ # Step 1: Get search results from DuckDuckGo
79
+ search_results = self.web_search.invoke(query)
80
+ #print(type(search_results))
81
+ if not isinstance(search_results, list):
82
+ raise ValueError("Expected the search results in list format.")
83
+
84
+ # Step 2: Filter URLs using LLM
85
+ if config.QUALITY_CHECK_DDG_URLS:
86
+ filtered_results = self.check_urls(search_results, query)
87
+ else:
88
+ filtered_results = search_results
89
+
90
+ # Extract URLs from filtered results
91
+ urls = [result.get("link") for result in filtered_results if result.get("link")]
92
+ print(urls)
93
+ if not urls:
94
+ print("\n === NO VALID LINKS ===\n")
95
+ return []
96
+
97
+ # Step 3: Download the full HTML pages for filtered URLs
98
+ loader = AsyncChromiumLoader(urls)
99
+ html_docs = loader.load()
100
+
101
+ # Step 4: Process the HTML using BeautifulSoupTransformer
102
+ full_docs = self.bs_transformer.transform_documents(
103
+ html_docs, tags_to_extract=self.tags_to_extract
104
+ )
105
+
106
+ # Step 5: Remove boilerplate from each document
107
+ url_to_content = {}
108
+ for doc in full_docs:
109
+ nr_full_text = nr_full_text + 1
110
+ source = doc.metadata.get("source")
111
+ if source:
112
+ cleaned_text = self.remove_boilerplate(doc.page_content)
113
+ url_to_content[source] = cleaned_text
114
+
115
+ # Attach the cleaned full content to each filtered result
116
+ for result in filtered_results:
117
+ link = result.get("link")
118
+ result["full_content"] = url_to_content.get(link, None)
119
+
120
+ print("FULL SEARCH WITH FILTERED URLS")
121
+ print("Full text retrieved: ", nr_full_text)
122
+ return filtered_results
123
+
124
+ def invoke(self, query: str):
125
+ return self.run(query)
126
+
127
+ def __call__(self, query: str):
128
+ return self.invoke(query)
@@ -0,0 +1,274 @@
1
+ import logging
2
+ import os
3
+ from typing import Dict, List, Any, Optional
4
+
5
+ from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
6
+ from local_deep_research.web_search_engines.search_engines_config import SEARCH_ENGINES
7
+ from local_deep_research.web_search_engines.search_engine_factory import create_search_engine
8
+ from local_deep_research.web_search_engines.engines.search_engine_wikipedia import WikipediaSearchEngine
9
+ from local_deep_research import config
10
+
11
+ # Setup logging
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class MetaSearchEngine(BaseSearchEngine):
17
+ """
18
+ LLM-powered meta search engine that intelligently selects and uses
19
+ the appropriate search engines based on query analysis
20
+ """
21
+
22
+ def __init__(self,
23
+ llm,
24
+ max_results: int = 10,
25
+ use_api_key_services: bool = True,
26
+ max_engines_to_try: int = 3,
27
+ max_filtered_results: Optional[int] = None,
28
+ **kwargs):
29
+ """
30
+ Initialize the meta search engine.
31
+
32
+ Args:
33
+ llm: Language model instance for query classification and relevance filtering
34
+ max_results: Maximum number of search results to return
35
+ use_api_key_services: Whether to include services that require API keys
36
+ max_engines_to_try: Maximum number of engines to try before giving up
37
+ max_filtered_results: Maximum number of results to keep after filtering
38
+ **kwargs: Additional parameters (ignored but accepted for compatibility)
39
+ """
40
+ # Initialize the BaseSearchEngine with the LLM and max_filtered_results
41
+ super().__init__(llm=llm, max_filtered_results=max_filtered_results)
42
+
43
+ self.max_results = max_results
44
+ self.use_api_key_services = use_api_key_services
45
+ self.max_engines_to_try = max_engines_to_try
46
+
47
+ # Cache for engine instances
48
+ self.engine_cache = {}
49
+
50
+ # Get available engines (excluding 'meta' and 'auto')
51
+ self.available_engines = self._get_available_engines()
52
+ logger.info(f"Meta Search Engine initialized with {len(self.available_engines)} available engines: {', '.join(self.available_engines)}")
53
+
54
+ # Create a fallback engine in case everything else fails
55
+ self.fallback_engine = WikipediaSearchEngine(
56
+ max_results=max_results,
57
+ llm=llm,
58
+ max_filtered_results=max_filtered_results
59
+ )
60
+
61
+ def _get_available_engines(self) -> List[str]:
62
+ """Get list of available engines, excluding 'meta' and 'auto'"""
63
+ # Filter out 'meta' and 'auto' and check API key availability
64
+ available = []
65
+ for name, config in SEARCH_ENGINES.items():
66
+ if name in ["meta", "auto"]:
67
+ continue
68
+
69
+ if config.get("requires_api_key", False) and not self.use_api_key_services:
70
+ continue
71
+
72
+ if config.get("requires_api_key", False):
73
+ api_key_env = config.get("api_key_env")
74
+ api_key = os.getenv(api_key_env) if api_key_env else None
75
+ if not api_key:
76
+ continue
77
+
78
+ available.append(name)
79
+
80
+ # Make sure we have at least one engine available
81
+ if not available and "wikipedia" in SEARCH_ENGINES:
82
+ available.append("wikipedia")
83
+
84
+ return available
85
+
86
+ def analyze_query(self, query: str) -> List[str]:
87
+ """
88
+ Use the LLM to analyze the query and return a ranked list of
89
+ recommended search engines to try
90
+ """
91
+ if not self.available_engines:
92
+ logger.warning("No search engines available")
93
+ return []
94
+
95
+ # Create engine descriptions for the prompt
96
+ engine_descriptions = "\n".join([
97
+ f"- {name.upper()}: Good for {', '.join(SEARCH_ENGINES[name]['strengths'][:3])}. "
98
+ f"Weaknesses: {', '.join(SEARCH_ENGINES[name]['weaknesses'][:2])}. "
99
+ f"Reliability: {SEARCH_ENGINES[name]['reliability']*100:.0f}%"
100
+ for name in self.available_engines
101
+ ])
102
+
103
+ prompt = f"""Analyze this search query and rank the available search engines in order of most to least appropriate for answering it.
104
+
105
+ Query: "{query}"
106
+
107
+ Available search engines:
108
+ {engine_descriptions}
109
+
110
+ Consider:
111
+ 1. The nature of the query (factual, academic, product-related, news, etc.)
112
+ 2. The strengths and weaknesses of each engine
113
+ 3. The reliability of each engine
114
+
115
+ Return ONLY a comma-separated list of search engine names in your recommended order. Example: "wikipedia,arxiv,duckduckgo"
116
+ Do not include any engines that are not listed above. Only return the comma-separated list, nothing else."""
117
+
118
+ # Get response from LLM
119
+ try:
120
+ response = self.llm.invoke(prompt)
121
+ content = response.content.strip()
122
+
123
+ # Parse the response into a list of engine names
124
+ engine_names = [name.strip().lower() for name in content.split(',')]
125
+
126
+ # Filter out any invalid engine names
127
+ valid_engines = [name for name in engine_names if name in self.available_engines]
128
+
129
+ # If no valid engines were returned, use default order based on reliability
130
+ if not valid_engines:
131
+ valid_engines = sorted(
132
+ self.available_engines,
133
+ key=lambda x: SEARCH_ENGINES[x]["reliability"],
134
+ reverse=True
135
+ )
136
+
137
+ return valid_engines
138
+ except Exception as e:
139
+ logger.error(f"Error analyzing query with LLM: {str(e)}")
140
+ # Fall back to reliability-based ordering
141
+ return sorted(
142
+ self.available_engines,
143
+ key=lambda x: SEARCH_ENGINES[x]["reliability"],
144
+ reverse=True
145
+ )
146
+
147
+ def _get_previews(self, query: str) -> List[Dict[str, Any]]:
148
+ """
149
+ Get preview information by selecting the best search engine for this query.
150
+
151
+ Args:
152
+ query: The search query
153
+
154
+ Returns:
155
+ List of preview dictionaries
156
+ """
157
+ # Get ranked list of engines for this query
158
+ ranked_engines = self.analyze_query(query)
159
+
160
+ if not ranked_engines:
161
+ logger.warning("No suitable search engines found for query, using fallback engine")
162
+ return self.fallback_engine._get_previews(query)
163
+
164
+ # Limit the number of engines to try
165
+ engines_to_try = ranked_engines[:self.max_engines_to_try]
166
+
167
+ logger.info(f"Search plan created. Will try these engines in order: {', '.join(engines_to_try)}")
168
+
169
+ all_errors = []
170
+ # Try each engine in order
171
+ for engine_name in engines_to_try:
172
+ logger.info(f"Trying search engine: {engine_name}")
173
+
174
+ # Get or create the engine instance
175
+ engine = self._get_engine_instance(engine_name)
176
+
177
+ if not engine:
178
+ logger.warning(f"Failed to initialize {engine_name}, skipping")
179
+ all_errors.append(f"Failed to initialize {engine_name}")
180
+ continue
181
+
182
+ try:
183
+ # Get previews from this engine
184
+ previews = engine._get_previews(query)
185
+
186
+ # If search was successful, return results
187
+ if previews and len(previews) > 0:
188
+ logger.info(f"Successfully got {len(previews)} preview results from {engine_name}")
189
+ # Store selected engine for later use
190
+ self._selected_engine = engine
191
+ self._selected_engine_name = engine_name
192
+ return previews
193
+
194
+ logger.info(f"{engine_name} returned no previews")
195
+ all_errors.append(f"{engine_name} returned no previews")
196
+
197
+ except Exception as e:
198
+ error_msg = f"Error getting previews from {engine_name}: {str(e)}"
199
+ logger.error(error_msg)
200
+ all_errors.append(error_msg)
201
+
202
+ # If we reach here, all engines failed, use fallback
203
+ logger.warning(f"All engines failed or returned no preview results: {', '.join(all_errors)}")
204
+ logger.info("Using fallback Wikipedia engine for previews")
205
+ self._selected_engine = self.fallback_engine
206
+ self._selected_engine_name = "wikipedia"
207
+ return self.fallback_engine._get_previews(query)
208
+
209
+ def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
210
+ """
211
+ Get full content using the engine that provided the previews.
212
+
213
+ Args:
214
+ relevant_items: List of relevant preview dictionaries
215
+
216
+ Returns:
217
+ List of result dictionaries with full content
218
+ """
219
+ # Check if we should get full content
220
+ if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
221
+ logger.info("Snippet-only mode, skipping full content retrieval")
222
+ return relevant_items
223
+
224
+ logger.info("Getting full content for relevant items")
225
+
226
+ # Use the selected engine to get full content
227
+ if hasattr(self, '_selected_engine'):
228
+ try:
229
+ logger.info(f"Using {self._selected_engine_name} to get full content")
230
+ return self._selected_engine._get_full_content(relevant_items)
231
+ except Exception as e:
232
+ logger.error(f"Error getting full content from {self._selected_engine_name}: {str(e)}")
233
+ # Fall back to returning relevant items without full content
234
+ return relevant_items
235
+ else:
236
+ logger.warning("No engine was selected during preview phase, returning relevant items as-is")
237
+ return relevant_items
238
+
239
+ def _get_engine_instance(self, engine_name: str) -> Optional[BaseSearchEngine]:
240
+ """Get or create an instance of the specified search engine"""
241
+ # Return cached instance if available
242
+ if engine_name in self.engine_cache:
243
+ return self.engine_cache[engine_name]
244
+
245
+ # Create a new instance
246
+ engine = None
247
+ try:
248
+ # Only pass parameters that all engines accept
249
+ common_params = {
250
+ "llm": self.llm,
251
+ "max_results": self.max_results
252
+ }
253
+
254
+ # Add max_filtered_results if specified
255
+ if self.max_filtered_results is not None:
256
+ common_params["max_filtered_results"] = self.max_filtered_results
257
+
258
+ engine = create_search_engine(
259
+ engine_name,
260
+ **common_params
261
+ )
262
+ except Exception as e:
263
+ logger.error(f"Error creating engine instance for {engine_name}: {str(e)}")
264
+ return None
265
+
266
+ if engine:
267
+ # Cache the instance
268
+ self.engine_cache[engine_name] = engine
269
+
270
+ return engine
271
+
272
+ def invoke(self, query: str) -> List[Dict[str, Any]]:
273
+ """Compatibility method for LangChain tools"""
274
+ return self.run(query)