local-deep-research 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +24 -0
- local_deep_research/citation_handler.py +113 -0
- local_deep_research/config.py +166 -0
- local_deep_research/defaults/__init__.py +44 -0
- local_deep_research/defaults/llm_config.py +269 -0
- local_deep_research/defaults/local_collections.toml +47 -0
- local_deep_research/defaults/main.toml +57 -0
- local_deep_research/defaults/search_engines.toml +244 -0
- local_deep_research/local_collections.py +141 -0
- local_deep_research/main.py +113 -0
- local_deep_research/report_generator.py +206 -0
- local_deep_research/search_system.py +241 -0
- local_deep_research/utilties/__init__.py +0 -0
- local_deep_research/utilties/enums.py +9 -0
- local_deep_research/utilties/llm_utils.py +116 -0
- local_deep_research/utilties/search_utilities.py +115 -0
- local_deep_research/utilties/setup_utils.py +6 -0
- local_deep_research/web/__init__.py +2 -0
- local_deep_research/web/app.py +1209 -0
- local_deep_research/web/static/css/styles.css +1008 -0
- local_deep_research/web/static/js/app.js +2078 -0
- local_deep_research/web/templates/api_keys_config.html +82 -0
- local_deep_research/web/templates/collections_config.html +90 -0
- local_deep_research/web/templates/index.html +312 -0
- local_deep_research/web/templates/llm_config.html +120 -0
- local_deep_research/web/templates/main_config.html +89 -0
- local_deep_research/web/templates/search_engines_config.html +154 -0
- local_deep_research/web/templates/settings.html +519 -0
- local_deep_research/web/templates/settings_dashboard.html +207 -0
- local_deep_research/web_search_engines/__init__.py +0 -0
- local_deep_research/web_search_engines/engines/__init__.py +0 -0
- local_deep_research/web_search_engines/engines/full_search.py +128 -0
- local_deep_research/web_search_engines/engines/meta_search_engine.py +274 -0
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +367 -0
- local_deep_research/web_search_engines/engines/search_engine_brave.py +245 -0
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +123 -0
- local_deep_research/web_search_engines/engines/search_engine_github.py +663 -0
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +283 -0
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +337 -0
- local_deep_research/web_search_engines/engines/search_engine_local.py +901 -0
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +153 -0
- local_deep_research/web_search_engines/engines/search_engine_medrxiv.py +623 -0
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +992 -0
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +230 -0
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +474 -0
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +242 -0
- local_deep_research/web_search_engines/full_search.py +254 -0
- local_deep_research/web_search_engines/search_engine_base.py +197 -0
- local_deep_research/web_search_engines/search_engine_factory.py +233 -0
- local_deep_research/web_search_engines/search_engines_config.py +54 -0
- local_deep_research-0.1.0.dist-info/LICENSE +21 -0
- local_deep_research-0.1.0.dist-info/METADATA +328 -0
- local_deep_research-0.1.0.dist-info/RECORD +56 -0
- local_deep_research-0.1.0.dist-info/WHEEL +5 -0
- local_deep_research-0.1.0.dist-info/entry_points.txt +3 -0
- local_deep_research-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,230 @@
|
|
1
|
+
from langchain_community.utilities import SerpAPIWrapper
|
2
|
+
from typing import Dict, List, Any, Optional
|
3
|
+
import os
|
4
|
+
from langchain_core.language_models import BaseLLM
|
5
|
+
|
6
|
+
from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
|
7
|
+
from local_deep_research import config
|
8
|
+
|
9
|
+
|
10
|
+
class SerpAPISearchEngine(BaseSearchEngine):
|
11
|
+
"""Google search engine implementation using SerpAPI with two-phase approach"""
|
12
|
+
|
13
|
+
def __init__(self,
|
14
|
+
max_results: int = 10,
|
15
|
+
region: str = "us",
|
16
|
+
time_period: str = "y",
|
17
|
+
safe_search: bool = True,
|
18
|
+
search_language: str = "English",
|
19
|
+
api_key: Optional[str] = None,
|
20
|
+
language_code_mapping: Optional[Dict[str, str]] = None,
|
21
|
+
llm: Optional[BaseLLM] = None,
|
22
|
+
include_full_content: bool = False,
|
23
|
+
max_filtered_results: Optional[int] = None,
|
24
|
+
**kwargs):
|
25
|
+
"""
|
26
|
+
Initialize the SerpAPI search engine.
|
27
|
+
|
28
|
+
Args:
|
29
|
+
max_results: Maximum number of search results
|
30
|
+
region: Region code for search results
|
31
|
+
time_period: Time period for search results
|
32
|
+
safe_search: Whether to enable safe search
|
33
|
+
search_language: Language for search results
|
34
|
+
api_key: SerpAPI API key (can also be set in SERP_API_KEY env)
|
35
|
+
language_code_mapping: Mapping from language names to codes
|
36
|
+
llm: Language model for relevance filtering
|
37
|
+
include_full_content: Whether to include full webpage content in results
|
38
|
+
max_filtered_results: Maximum number of results to keep after filtering
|
39
|
+
**kwargs: Additional parameters (ignored but accepted for compatibility)
|
40
|
+
"""
|
41
|
+
# Initialize the BaseSearchEngine with the LLM and max_filtered_results
|
42
|
+
super().__init__(llm=llm, max_filtered_results=max_filtered_results)
|
43
|
+
|
44
|
+
self.max_results = max_results
|
45
|
+
self.include_full_content = include_full_content
|
46
|
+
|
47
|
+
# Set up language code mapping
|
48
|
+
if language_code_mapping is None:
|
49
|
+
language_code_mapping = {
|
50
|
+
"english": "en",
|
51
|
+
"spanish": "es",
|
52
|
+
"chinese": "zh",
|
53
|
+
"hindi": "hi",
|
54
|
+
"french": "fr",
|
55
|
+
"arabic": "ar",
|
56
|
+
"bengali": "bn",
|
57
|
+
"portuguese": "pt",
|
58
|
+
"russian": "ru",
|
59
|
+
}
|
60
|
+
|
61
|
+
# Get API key
|
62
|
+
serpapi_api_key = api_key or os.getenv("SERP_API_KEY")
|
63
|
+
if not serpapi_api_key:
|
64
|
+
raise ValueError("SERP_API_KEY not found. Please provide api_key or set the SERP_API_KEY environment variable.")
|
65
|
+
|
66
|
+
# Get language code
|
67
|
+
language_code = language_code_mapping.get(search_language.lower(), "en")
|
68
|
+
|
69
|
+
# Initialize SerpAPI wrapper
|
70
|
+
self.engine = SerpAPIWrapper(
|
71
|
+
serpapi_api_key=serpapi_api_key,
|
72
|
+
params={
|
73
|
+
"engine": "google",
|
74
|
+
"hl": language_code,
|
75
|
+
"gl": region,
|
76
|
+
"safe": "active" if safe_search else "off",
|
77
|
+
"tbs": f"qdr:{time_period}",
|
78
|
+
"num": max_results,
|
79
|
+
}
|
80
|
+
)
|
81
|
+
|
82
|
+
# If full content is requested, initialize FullSearchResults
|
83
|
+
if include_full_content:
|
84
|
+
# Import FullSearchResults only if needed
|
85
|
+
try:
|
86
|
+
from local_deep_research.web_search_engines.engines.full_search import FullSearchResults
|
87
|
+
self.full_search = FullSearchResults(
|
88
|
+
llm=llm,
|
89
|
+
web_search=self.engine,
|
90
|
+
language=search_language,
|
91
|
+
max_results=max_results,
|
92
|
+
region=region,
|
93
|
+
time=time_period,
|
94
|
+
safesearch="Moderate" if safe_search else "Off"
|
95
|
+
)
|
96
|
+
except ImportError:
|
97
|
+
print("Warning: FullSearchResults not available. Full content retrieval disabled.")
|
98
|
+
self.include_full_content = False
|
99
|
+
|
100
|
+
def _get_previews(self, query: str) -> List[Dict[str, Any]]:
|
101
|
+
"""
|
102
|
+
Get preview information from SerpAPI.
|
103
|
+
|
104
|
+
Args:
|
105
|
+
query: The search query
|
106
|
+
|
107
|
+
Returns:
|
108
|
+
List of preview dictionaries
|
109
|
+
"""
|
110
|
+
print("Getting search results from SerpAPI")
|
111
|
+
|
112
|
+
try:
|
113
|
+
# Get search results from SerpAPI
|
114
|
+
organic_results = self.engine.results(query).get("organic_results", [])
|
115
|
+
|
116
|
+
# Format results as previews
|
117
|
+
previews = []
|
118
|
+
for result in organic_results:
|
119
|
+
preview = {
|
120
|
+
"id": result.get("position", len(previews)), # Use position as ID
|
121
|
+
"title": result.get("title", ""),
|
122
|
+
"link": result.get("link", ""),
|
123
|
+
"snippet": result.get("snippet", ""),
|
124
|
+
"displayed_link": result.get("displayed_link", ""),
|
125
|
+
"position": result.get("position")
|
126
|
+
}
|
127
|
+
|
128
|
+
# Store full SerpAPI result for later
|
129
|
+
preview["_full_result"] = result
|
130
|
+
|
131
|
+
previews.append(preview)
|
132
|
+
|
133
|
+
# Store the previews for potential full content retrieval
|
134
|
+
self._search_results = previews
|
135
|
+
|
136
|
+
return previews
|
137
|
+
|
138
|
+
except Exception as e:
|
139
|
+
print(f"Error getting SerpAPI results: {e}")
|
140
|
+
return []
|
141
|
+
|
142
|
+
def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
143
|
+
"""
|
144
|
+
Get full content for the relevant search results.
|
145
|
+
If include_full_content is True and FullSearchResults is available,
|
146
|
+
retrieves full webpage content for the results.
|
147
|
+
|
148
|
+
Args:
|
149
|
+
relevant_items: List of relevant preview dictionaries
|
150
|
+
|
151
|
+
Returns:
|
152
|
+
List of result dictionaries with full content if requested
|
153
|
+
"""
|
154
|
+
# Check if we should get full content
|
155
|
+
if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
|
156
|
+
print("Snippet-only mode, skipping full content retrieval")
|
157
|
+
|
158
|
+
# Return the relevant items with their full SerpAPI information
|
159
|
+
results = []
|
160
|
+
for item in relevant_items:
|
161
|
+
# Use the full result if available, otherwise use the preview
|
162
|
+
if "_full_result" in item:
|
163
|
+
result = item["_full_result"]
|
164
|
+
# Remove temporary field
|
165
|
+
if "_full_result" in result:
|
166
|
+
del result["_full_result"]
|
167
|
+
else:
|
168
|
+
result = item
|
169
|
+
|
170
|
+
results.append(result)
|
171
|
+
|
172
|
+
return results
|
173
|
+
|
174
|
+
# If full content retrieval is enabled
|
175
|
+
if self.include_full_content and hasattr(self, 'full_search'):
|
176
|
+
print("Retrieving full webpage content")
|
177
|
+
|
178
|
+
try:
|
179
|
+
# Extract only the links from relevant items
|
180
|
+
links = [item.get("link") for item in relevant_items if item.get("link")]
|
181
|
+
|
182
|
+
# Use FullSearchResults to get full content
|
183
|
+
# This is a simplified approach - in a real implementation,
|
184
|
+
# you would need to fetch and process the URLs
|
185
|
+
results_with_content = self.full_search._get_full_content(relevant_items)
|
186
|
+
|
187
|
+
return results_with_content
|
188
|
+
|
189
|
+
except Exception as e:
|
190
|
+
print(f"Error retrieving full content: {e}")
|
191
|
+
# Fall back to returning the items without full content
|
192
|
+
|
193
|
+
# Return items with their full SerpAPI information
|
194
|
+
results = []
|
195
|
+
for item in relevant_items:
|
196
|
+
# Use the full result if available, otherwise use the preview
|
197
|
+
if "_full_result" in item:
|
198
|
+
result = item["_full_result"].copy()
|
199
|
+
# Remove temporary field
|
200
|
+
if "_full_result" in result:
|
201
|
+
del result["_full_result"]
|
202
|
+
else:
|
203
|
+
result = item.copy()
|
204
|
+
if "_full_result" in result:
|
205
|
+
del result["_full_result"]
|
206
|
+
|
207
|
+
results.append(result)
|
208
|
+
|
209
|
+
return results
|
210
|
+
|
211
|
+
def run(self, query: str) -> List[Dict[str, Any]]:
|
212
|
+
"""
|
213
|
+
Execute a search using SerpAPI with the two-phase approach.
|
214
|
+
|
215
|
+
Args:
|
216
|
+
query: The search query
|
217
|
+
|
218
|
+
Returns:
|
219
|
+
List of search results
|
220
|
+
"""
|
221
|
+
print("---Execute a search using SerpAPI (Google)---")
|
222
|
+
|
223
|
+
# Use the implementation from the parent class which handles all phases
|
224
|
+
results = super().run(query)
|
225
|
+
|
226
|
+
# Clean up
|
227
|
+
if hasattr(self, '_search_results'):
|
228
|
+
del self._search_results
|
229
|
+
|
230
|
+
return results
|