local-deep-research 0.1.26__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +23 -22
- local_deep_research/__main__.py +16 -0
- local_deep_research/advanced_search_system/__init__.py +7 -0
- local_deep_research/advanced_search_system/filters/__init__.py +8 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
- local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
- local_deep_research/advanced_search_system/findings/repository.py +452 -0
- local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
- local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
- local_deep_research/advanced_search_system/questions/__init__.py +1 -0
- local_deep_research/advanced_search_system/questions/base_question.py +64 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
- local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
- local_deep_research/advanced_search_system/tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
- local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
- local_deep_research/api/__init__.py +5 -5
- local_deep_research/api/research_functions.py +96 -84
- local_deep_research/app.py +8 -0
- local_deep_research/citation_handler.py +25 -16
- local_deep_research/{config.py → config/config_files.py} +102 -110
- local_deep_research/config/llm_config.py +472 -0
- local_deep_research/config/search_config.py +77 -0
- local_deep_research/defaults/__init__.py +10 -5
- local_deep_research/defaults/main.toml +2 -2
- local_deep_research/defaults/search_engines.toml +60 -34
- local_deep_research/main.py +121 -19
- local_deep_research/migrate_db.py +147 -0
- local_deep_research/report_generator.py +72 -44
- local_deep_research/search_system.py +147 -283
- local_deep_research/setup_data_dir.py +35 -0
- local_deep_research/test_migration.py +178 -0
- local_deep_research/utilities/__init__.py +0 -0
- local_deep_research/utilities/db_utils.py +49 -0
- local_deep_research/{utilties → utilities}/enums.py +2 -2
- local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
- local_deep_research/utilities/search_utilities.py +242 -0
- local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
- local_deep_research/web/__init__.py +0 -1
- local_deep_research/web/app.py +86 -1709
- local_deep_research/web/app_factory.py +289 -0
- local_deep_research/web/database/README.md +70 -0
- local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
- local_deep_research/web/database/migrations.py +447 -0
- local_deep_research/web/database/models.py +117 -0
- local_deep_research/web/database/schema_upgrade.py +107 -0
- local_deep_research/web/models/database.py +294 -0
- local_deep_research/web/models/settings.py +94 -0
- local_deep_research/web/routes/api_routes.py +559 -0
- local_deep_research/web/routes/history_routes.py +354 -0
- local_deep_research/web/routes/research_routes.py +715 -0
- local_deep_research/web/routes/settings_routes.py +1592 -0
- local_deep_research/web/services/research_service.py +947 -0
- local_deep_research/web/services/resource_service.py +149 -0
- local_deep_research/web/services/settings_manager.py +669 -0
- local_deep_research/web/services/settings_service.py +187 -0
- local_deep_research/web/services/socket_service.py +210 -0
- local_deep_research/web/static/css/custom_dropdown.css +277 -0
- local_deep_research/web/static/css/settings.css +1223 -0
- local_deep_research/web/static/css/styles.css +525 -48
- local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
- local_deep_research/web/static/js/components/detail.js +348 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
- local_deep_research/web/static/js/components/fallback/ui.js +215 -0
- local_deep_research/web/static/js/components/history.js +487 -0
- local_deep_research/web/static/js/components/logpanel.js +949 -0
- local_deep_research/web/static/js/components/progress.js +1107 -0
- local_deep_research/web/static/js/components/research.js +1865 -0
- local_deep_research/web/static/js/components/results.js +766 -0
- local_deep_research/web/static/js/components/settings.js +3981 -0
- local_deep_research/web/static/js/components/settings_sync.js +106 -0
- local_deep_research/web/static/js/main.js +226 -0
- local_deep_research/web/static/js/services/api.js +253 -0
- local_deep_research/web/static/js/services/audio.js +31 -0
- local_deep_research/web/static/js/services/formatting.js +119 -0
- local_deep_research/web/static/js/services/pdf.js +622 -0
- local_deep_research/web/static/js/services/socket.js +882 -0
- local_deep_research/web/static/js/services/ui.js +546 -0
- local_deep_research/web/templates/base.html +72 -0
- local_deep_research/web/templates/components/custom_dropdown.html +47 -0
- local_deep_research/web/templates/components/log_panel.html +32 -0
- local_deep_research/web/templates/components/mobile_nav.html +22 -0
- local_deep_research/web/templates/components/settings_form.html +299 -0
- local_deep_research/web/templates/components/sidebar.html +21 -0
- local_deep_research/web/templates/pages/details.html +73 -0
- local_deep_research/web/templates/pages/history.html +51 -0
- local_deep_research/web/templates/pages/progress.html +57 -0
- local_deep_research/web/templates/pages/research.html +139 -0
- local_deep_research/web/templates/pages/results.html +59 -0
- local_deep_research/web/templates/settings_dashboard.html +78 -192
- local_deep_research/web/utils/__init__.py +0 -0
- local_deep_research/web/utils/formatters.py +76 -0
- local_deep_research/web_search_engines/engines/full_search.py +18 -16
- local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
- local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
- local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
- local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +211 -159
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
- local_deep_research/web_search_engines/search_engine_base.py +174 -99
- local_deep_research/web_search_engines/search_engine_factory.py +192 -102
- local_deep_research/web_search_engines/search_engines_config.py +22 -15
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/METADATA +177 -97
- local_deep_research-0.2.0.dist-info/RECORD +135 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/WHEEL +1 -2
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/entry_points.txt +3 -0
- local_deep_research/defaults/llm_config.py +0 -338
- local_deep_research/utilties/search_utilities.py +0 -114
- local_deep_research/web/static/js/app.js +0 -3763
- local_deep_research/web/templates/api_keys_config.html +0 -82
- local_deep_research/web/templates/collections_config.html +0 -90
- local_deep_research/web/templates/index.html +0 -348
- local_deep_research/web/templates/llm_config.html +0 -120
- local_deep_research/web/templates/main_config.html +0 -89
- local_deep_research/web/templates/search_engines_config.html +0 -154
- local_deep_research/web/templates/settings.html +0 -519
- local_deep_research-0.1.26.dist-info/RECORD +0 -61
- local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
- /local_deep_research/{utilties → config}/__init__.py +0 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,31 +1,36 @@
|
|
1
|
-
|
2
|
-
from typing import Dict, List, Any, Optional
|
1
|
+
import logging
|
3
2
|
import os
|
3
|
+
from typing import Any, Dict, List, Optional
|
4
|
+
|
5
|
+
from langchain_community.tools import BraveSearch
|
4
6
|
from langchain_core.language_models import BaseLLM
|
5
7
|
|
6
|
-
from
|
7
|
-
from
|
8
|
-
|
8
|
+
from ...config import search_config
|
9
|
+
from ..search_engine_base import BaseSearchEngine
|
10
|
+
|
9
11
|
logger = logging.getLogger(__name__)
|
10
12
|
|
13
|
+
|
11
14
|
class BraveSearchEngine(BaseSearchEngine):
|
12
15
|
"""Brave search engine implementation with two-phase approach"""
|
13
16
|
|
14
|
-
def __init__(
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
17
|
+
def __init__(
|
18
|
+
self,
|
19
|
+
max_results: int = 10,
|
20
|
+
region: str = "US",
|
21
|
+
time_period: str = "y",
|
22
|
+
safe_search: bool = True,
|
23
|
+
search_language: str = "English",
|
24
|
+
api_key: Optional[str] = None,
|
25
|
+
language_code_mapping: Optional[Dict[str, str]] = None,
|
26
|
+
llm: Optional[BaseLLM] = None,
|
27
|
+
include_full_content: bool = True,
|
28
|
+
max_filtered_results: Optional[int] = None,
|
29
|
+
**kwargs,
|
30
|
+
):
|
26
31
|
"""
|
27
32
|
Initialize the Brave search engine.
|
28
|
-
|
33
|
+
|
29
34
|
Args:
|
30
35
|
max_results: Maximum number of search results
|
31
36
|
region: Region code for search results
|
@@ -40,9 +45,11 @@ class BraveSearchEngine(BaseSearchEngine):
|
|
40
45
|
**kwargs: Additional parameters (ignored but accepted for compatibility)
|
41
46
|
"""
|
42
47
|
# Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
|
43
|
-
super().__init__(
|
48
|
+
super().__init__(
|
49
|
+
llm=llm, max_filtered_results=max_filtered_results, max_results=max_results
|
50
|
+
)
|
44
51
|
self.include_full_content = include_full_content
|
45
|
-
|
52
|
+
|
46
53
|
# Set up language code mapping
|
47
54
|
if language_code_mapping is None:
|
48
55
|
language_code_mapping = {
|
@@ -56,21 +63,23 @@ class BraveSearchEngine(BaseSearchEngine):
|
|
56
63
|
"portuguese": "pt",
|
57
64
|
"russian": "ru",
|
58
65
|
}
|
59
|
-
|
66
|
+
|
60
67
|
# Get API key
|
61
68
|
brave_api_key = api_key or os.getenv("BRAVE_API_KEY")
|
62
69
|
if not brave_api_key:
|
63
|
-
raise ValueError(
|
64
|
-
|
70
|
+
raise ValueError(
|
71
|
+
"BRAVE_API_KEY not found. Please provide api_key or set the BRAVE_API_KEY environment variable."
|
72
|
+
)
|
73
|
+
|
65
74
|
# Get language code
|
66
75
|
language_code = language_code_mapping.get(search_language.lower(), "en")
|
67
|
-
|
76
|
+
|
68
77
|
# Convert time period format to Brave's format
|
69
78
|
brave_time_period = f"p{time_period}"
|
70
|
-
|
79
|
+
|
71
80
|
# Convert safe search to Brave's format
|
72
81
|
brave_safe_search = "moderate" if safe_search else "off"
|
73
|
-
|
82
|
+
|
74
83
|
# Initialize Brave Search
|
75
84
|
self.engine = BraveSearch.from_api_key(
|
76
85
|
api_key=brave_api_key,
|
@@ -80,17 +89,18 @@ class BraveSearchEngine(BaseSearchEngine):
|
|
80
89
|
"search_lang": language_code,
|
81
90
|
"safesearch": brave_safe_search,
|
82
91
|
"freshness": brave_time_period,
|
83
|
-
}
|
92
|
+
},
|
84
93
|
)
|
85
|
-
|
94
|
+
|
86
95
|
# Set user agent for Brave Search
|
87
96
|
os.environ["USER_AGENT"] = "Local Deep Research/1.0"
|
88
|
-
|
97
|
+
|
89
98
|
# If full content is requested, initialize FullSearchResults
|
90
99
|
if include_full_content:
|
91
100
|
# Import FullSearchResults only if needed
|
92
101
|
try:
|
93
|
-
from
|
102
|
+
from .full_search import FullSearchResults
|
103
|
+
|
94
104
|
self.full_search = FullSearchResults(
|
95
105
|
llm=llm,
|
96
106
|
web_search=self.engine,
|
@@ -98,37 +108,40 @@ class BraveSearchEngine(BaseSearchEngine):
|
|
98
108
|
max_results=max_results,
|
99
109
|
region=region,
|
100
110
|
time=time_period,
|
101
|
-
safesearch=brave_safe_search
|
111
|
+
safesearch=brave_safe_search,
|
102
112
|
)
|
103
113
|
except ImportError:
|
104
|
-
logger.warning(
|
114
|
+
logger.warning(
|
115
|
+
"Warning: FullSearchResults not available. Full content retrieval disabled."
|
116
|
+
)
|
105
117
|
self.include_full_content = False
|
106
|
-
|
118
|
+
|
107
119
|
def _get_previews(self, query: str) -> List[Dict[str, Any]]:
|
108
120
|
"""
|
109
121
|
Get preview information from Brave Search.
|
110
|
-
|
122
|
+
|
111
123
|
Args:
|
112
124
|
query: The search query
|
113
|
-
|
125
|
+
|
114
126
|
Returns:
|
115
127
|
List of preview dictionaries
|
116
128
|
"""
|
117
129
|
logger.info("Getting search results from Brave Search")
|
118
|
-
|
130
|
+
|
119
131
|
try:
|
120
132
|
# Get search results from Brave Search
|
121
133
|
raw_results = self.engine.run(query[:400])
|
122
|
-
|
134
|
+
|
123
135
|
# Parse results if they're in string format
|
124
136
|
if isinstance(raw_results, str):
|
125
137
|
try:
|
126
138
|
import json
|
139
|
+
|
127
140
|
raw_results = json.loads(raw_results)
|
128
141
|
except json.JSONDecodeError:
|
129
142
|
logger.error("Error: Unable to parse BraveSearch response as JSON.")
|
130
143
|
return []
|
131
|
-
|
144
|
+
|
132
145
|
# Format results as previews
|
133
146
|
previews = []
|
134
147
|
for i, result in enumerate(raw_results):
|
@@ -138,39 +151,44 @@ class BraveSearchEngine(BaseSearchEngine):
|
|
138
151
|
"link": result.get("link", ""),
|
139
152
|
"snippet": result.get("snippet", ""),
|
140
153
|
"displayed_link": result.get("link", ""),
|
141
|
-
"position": i
|
154
|
+
"position": i,
|
142
155
|
}
|
143
|
-
|
156
|
+
|
144
157
|
# Store full Brave result for later
|
145
158
|
preview["_full_result"] = result
|
146
|
-
|
159
|
+
|
147
160
|
previews.append(preview)
|
148
|
-
|
161
|
+
|
149
162
|
# Store the previews for potential full content retrieval
|
150
163
|
self._search_results = previews
|
151
|
-
|
164
|
+
|
152
165
|
return previews
|
153
|
-
|
166
|
+
|
154
167
|
except Exception as e:
|
155
168
|
logger.error(f"Error getting Brave Search results: {e}")
|
156
169
|
return []
|
157
|
-
|
158
|
-
def _get_full_content(
|
170
|
+
|
171
|
+
def _get_full_content(
|
172
|
+
self, relevant_items: List[Dict[str, Any]]
|
173
|
+
) -> List[Dict[str, Any]]:
|
159
174
|
"""
|
160
175
|
Get full content for the relevant search results.
|
161
176
|
If include_full_content is True and FullSearchResults is available,
|
162
177
|
retrieves full webpage content for the results.
|
163
|
-
|
178
|
+
|
164
179
|
Args:
|
165
180
|
relevant_items: List of relevant preview dictionaries
|
166
|
-
|
181
|
+
|
167
182
|
Returns:
|
168
183
|
List of result dictionaries with full content if requested
|
169
184
|
"""
|
170
185
|
# Check if we should get full content
|
171
|
-
if
|
186
|
+
if (
|
187
|
+
hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
|
188
|
+
and search_config.SEARCH_SNIPPETS_ONLY
|
189
|
+
):
|
172
190
|
logger.info("Snippet-only mode, skipping full content retrieval")
|
173
|
-
|
191
|
+
|
174
192
|
# Return the relevant items with their full Brave information
|
175
193
|
results = []
|
176
194
|
for item in relevant_items:
|
@@ -182,28 +200,27 @@ class BraveSearchEngine(BaseSearchEngine):
|
|
182
200
|
del result["_full_result"]
|
183
201
|
else:
|
184
202
|
result = item
|
185
|
-
|
203
|
+
|
186
204
|
results.append(result)
|
187
|
-
|
205
|
+
|
188
206
|
return results
|
189
|
-
|
207
|
+
|
190
208
|
# If full content retrieval is enabled
|
191
|
-
if self.include_full_content and hasattr(self,
|
209
|
+
if self.include_full_content and hasattr(self, "full_search"):
|
192
210
|
logger.info("Retrieving full webpage content")
|
193
|
-
|
211
|
+
|
194
212
|
try:
|
195
|
-
# Extract only the links from relevant items
|
196
|
-
links = [item.get("link") for item in relevant_items if item.get("link")]
|
197
|
-
|
198
213
|
# Use FullSearchResults to get full content
|
199
|
-
results_with_content = self.full_search._get_full_content(
|
200
|
-
|
214
|
+
results_with_content = self.full_search._get_full_content(
|
215
|
+
relevant_items
|
216
|
+
)
|
217
|
+
|
201
218
|
return results_with_content
|
202
|
-
|
219
|
+
|
203
220
|
except Exception as e:
|
204
221
|
logger.error(f"Error retrieving full content: {e}")
|
205
222
|
# Fall back to returning the items without full content
|
206
|
-
|
223
|
+
|
207
224
|
# Return items with their full Brave information
|
208
225
|
results = []
|
209
226
|
for item in relevant_items:
|
@@ -217,28 +234,28 @@ class BraveSearchEngine(BaseSearchEngine):
|
|
217
234
|
result = item.copy()
|
218
235
|
if "_full_result" in result:
|
219
236
|
del result["_full_result"]
|
220
|
-
|
237
|
+
|
221
238
|
results.append(result)
|
222
|
-
|
239
|
+
|
223
240
|
return results
|
224
|
-
|
241
|
+
|
225
242
|
def run(self, query: str) -> List[Dict[str, Any]]:
|
226
243
|
"""
|
227
244
|
Execute a search using Brave Search with the two-phase approach.
|
228
|
-
|
245
|
+
|
229
246
|
Args:
|
230
247
|
query: The search query
|
231
|
-
|
248
|
+
|
232
249
|
Returns:
|
233
250
|
List of search results
|
234
251
|
"""
|
235
252
|
logger.info("---Execute a search using Brave Search---")
|
236
|
-
|
253
|
+
|
237
254
|
# Use the implementation from the parent class which handles all phases
|
238
255
|
results = super().run(query)
|
239
|
-
|
256
|
+
|
240
257
|
# Clean up
|
241
|
-
if hasattr(self,
|
258
|
+
if hasattr(self, "_search_results"):
|
242
259
|
del self._search_results
|
243
|
-
|
260
|
+
|
244
261
|
return results
|
@@ -1,26 +1,31 @@
|
|
1
|
+
import logging
|
2
|
+
from typing import Any, Dict, List, Optional
|
3
|
+
|
1
4
|
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
|
2
|
-
from typing import Dict, List, Any, Optional
|
3
5
|
from langchain_core.language_models import BaseLLM
|
4
6
|
|
5
|
-
from
|
6
|
-
from
|
7
|
-
|
7
|
+
from ..search_engine_base import BaseSearchEngine
|
8
|
+
from .full_search import FullSearchResults # Import the FullSearchResults class
|
9
|
+
|
8
10
|
logger = logging.getLogger(__name__)
|
9
11
|
|
12
|
+
|
10
13
|
class DuckDuckGoSearchEngine(BaseSearchEngine):
|
11
14
|
"""DuckDuckGo search engine implementation with two-phase retrieval"""
|
12
|
-
|
13
|
-
def __init__(
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
15
|
+
|
16
|
+
def __init__(
|
17
|
+
self,
|
18
|
+
max_results: int = 10,
|
19
|
+
region: str = "us",
|
20
|
+
safe_search: bool = True,
|
21
|
+
llm: Optional[BaseLLM] = None,
|
22
|
+
language: str = "English",
|
23
|
+
include_full_content: bool = False,
|
24
|
+
max_filtered_results=5,
|
25
|
+
):
|
21
26
|
"""
|
22
27
|
Initialize the DuckDuckGo search engine.
|
23
|
-
|
28
|
+
|
24
29
|
Args:
|
25
30
|
max_results: Maximum number of search results
|
26
31
|
region: Region code for search results
|
@@ -30,19 +35,21 @@ class DuckDuckGoSearchEngine(BaseSearchEngine):
|
|
30
35
|
include_full_content: Whether to include full webpage content in results
|
31
36
|
"""
|
32
37
|
# Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
|
33
|
-
super().__init__(
|
38
|
+
super().__init__(
|
39
|
+
llm=llm, max_filtered_results=max_filtered_results, max_results=max_results
|
40
|
+
)
|
34
41
|
self.region = region
|
35
42
|
self.safe_search = safe_search
|
36
43
|
self.language = language
|
37
44
|
self.include_full_content = include_full_content
|
38
|
-
|
45
|
+
|
39
46
|
# Initialize the DuckDuckGo wrapper
|
40
47
|
self.engine = DuckDuckGoSearchAPIWrapper(
|
41
48
|
region=region,
|
42
49
|
max_results=max_results,
|
43
|
-
safesearch="moderate" if safe_search else "off"
|
50
|
+
safesearch="moderate" if safe_search else "off",
|
44
51
|
)
|
45
|
-
|
52
|
+
|
46
53
|
# Initialize FullSearchResults if full content is requested
|
47
54
|
if include_full_content and llm:
|
48
55
|
self.full_search = FullSearchResults(
|
@@ -52,44 +59,44 @@ class DuckDuckGoSearchEngine(BaseSearchEngine):
|
|
52
59
|
max_results=max_results,
|
53
60
|
region=region,
|
54
61
|
time="y",
|
55
|
-
safesearch="Moderate" if safe_search else "Off"
|
62
|
+
safesearch="Moderate" if safe_search else "Off",
|
56
63
|
)
|
57
|
-
|
64
|
+
|
58
65
|
def run(self, query: str) -> List[Dict[str, Any]]:
|
59
66
|
"""
|
60
67
|
Execute a search using DuckDuckGo with the two-phase approach.
|
61
68
|
Respects config parameters:
|
62
69
|
- SEARCH_SNIPPETS_ONLY: If True, only returns snippets without full content
|
63
70
|
- SKIP_RELEVANCE_FILTER: If True, returns all results without filtering
|
64
|
-
|
71
|
+
|
65
72
|
Args:
|
66
73
|
query: The search query
|
67
|
-
|
74
|
+
|
68
75
|
Returns:
|
69
76
|
List of search results
|
70
77
|
"""
|
71
78
|
logger.info("---Execute a search using DuckDuckGo---")
|
72
|
-
|
79
|
+
|
73
80
|
# Implementation of the two-phase approach (from parent class)
|
74
81
|
return super().run(query)
|
75
|
-
|
82
|
+
|
76
83
|
def _get_previews(self, query: str) -> List[Dict[str, Any]]:
|
77
84
|
"""
|
78
85
|
Get preview information (titles and snippets) for initial search results.
|
79
|
-
|
86
|
+
|
80
87
|
Args:
|
81
88
|
query: The search query
|
82
|
-
|
89
|
+
|
83
90
|
Returns:
|
84
91
|
List of preview dictionaries with 'id', 'title', and 'snippet' keys
|
85
92
|
"""
|
86
93
|
try:
|
87
94
|
# Get search results from DuckDuckGo
|
88
95
|
results = self.engine.results(query, max_results=self.max_results)
|
89
|
-
|
96
|
+
|
90
97
|
if not isinstance(results, list):
|
91
98
|
return []
|
92
|
-
|
99
|
+
|
93
100
|
# Process results to get previews
|
94
101
|
previews = []
|
95
102
|
for i, result in enumerate(results):
|
@@ -97,30 +104,32 @@ class DuckDuckGoSearchEngine(BaseSearchEngine):
|
|
97
104
|
"id": result.get("link"), # Use URL as ID for DDG
|
98
105
|
"title": result.get("title", ""),
|
99
106
|
"snippet": result.get("snippet", ""),
|
100
|
-
"link": result.get("link", "")
|
107
|
+
"link": result.get("link", ""),
|
101
108
|
}
|
102
|
-
|
109
|
+
|
103
110
|
previews.append(preview)
|
104
|
-
|
111
|
+
|
105
112
|
return previews
|
106
|
-
|
113
|
+
|
107
114
|
except Exception as e:
|
108
115
|
logger.error(f"Error getting DuckDuckGo previews: {e}")
|
109
116
|
return []
|
110
|
-
|
111
|
-
def _get_full_content(
|
117
|
+
|
118
|
+
def _get_full_content(
|
119
|
+
self, relevant_items: List[Dict[str, Any]]
|
120
|
+
) -> List[Dict[str, Any]]:
|
112
121
|
"""
|
113
122
|
Get full content for the relevant items by using FullSearchResults.
|
114
|
-
|
123
|
+
|
115
124
|
Args:
|
116
125
|
relevant_items: List of relevant preview dictionaries
|
117
|
-
|
126
|
+
|
118
127
|
Returns:
|
119
128
|
List of result dictionaries with full content
|
120
129
|
"""
|
121
130
|
# If we have FullSearchResults, use it to get full content
|
122
|
-
if hasattr(self,
|
131
|
+
if hasattr(self, "full_search"):
|
123
132
|
return self.full_search._get_full_content(relevant_items)
|
124
|
-
|
133
|
+
|
125
134
|
# Otherwise, just return the relevant items without full content
|
126
|
-
return relevant_items
|
135
|
+
return relevant_items
|