local-deep-research 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. local_deep_research/__init__.py +24 -0
  2. local_deep_research/citation_handler.py +113 -0
  3. local_deep_research/config.py +166 -0
  4. local_deep_research/defaults/__init__.py +44 -0
  5. local_deep_research/defaults/llm_config.py +269 -0
  6. local_deep_research/defaults/local_collections.toml +47 -0
  7. local_deep_research/defaults/main.toml +57 -0
  8. local_deep_research/defaults/search_engines.toml +244 -0
  9. local_deep_research/local_collections.py +141 -0
  10. local_deep_research/main.py +113 -0
  11. local_deep_research/report_generator.py +206 -0
  12. local_deep_research/search_system.py +241 -0
  13. local_deep_research/utilties/__init__.py +0 -0
  14. local_deep_research/utilties/enums.py +9 -0
  15. local_deep_research/utilties/llm_utils.py +116 -0
  16. local_deep_research/utilties/search_utilities.py +115 -0
  17. local_deep_research/utilties/setup_utils.py +6 -0
  18. local_deep_research/web/__init__.py +2 -0
  19. local_deep_research/web/app.py +1209 -0
  20. local_deep_research/web/static/css/styles.css +1008 -0
  21. local_deep_research/web/static/js/app.js +2078 -0
  22. local_deep_research/web/templates/api_keys_config.html +82 -0
  23. local_deep_research/web/templates/collections_config.html +90 -0
  24. local_deep_research/web/templates/index.html +312 -0
  25. local_deep_research/web/templates/llm_config.html +120 -0
  26. local_deep_research/web/templates/main_config.html +89 -0
  27. local_deep_research/web/templates/search_engines_config.html +154 -0
  28. local_deep_research/web/templates/settings.html +519 -0
  29. local_deep_research/web/templates/settings_dashboard.html +207 -0
  30. local_deep_research/web_search_engines/__init__.py +0 -0
  31. local_deep_research/web_search_engines/engines/__init__.py +0 -0
  32. local_deep_research/web_search_engines/engines/full_search.py +128 -0
  33. local_deep_research/web_search_engines/engines/meta_search_engine.py +274 -0
  34. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +367 -0
  35. local_deep_research/web_search_engines/engines/search_engine_brave.py +245 -0
  36. local_deep_research/web_search_engines/engines/search_engine_ddg.py +123 -0
  37. local_deep_research/web_search_engines/engines/search_engine_github.py +663 -0
  38. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +283 -0
  39. local_deep_research/web_search_engines/engines/search_engine_guardian.py +337 -0
  40. local_deep_research/web_search_engines/engines/search_engine_local.py +901 -0
  41. local_deep_research/web_search_engines/engines/search_engine_local_all.py +153 -0
  42. local_deep_research/web_search_engines/engines/search_engine_medrxiv.py +623 -0
  43. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +992 -0
  44. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +230 -0
  45. local_deep_research/web_search_engines/engines/search_engine_wayback.py +474 -0
  46. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +242 -0
  47. local_deep_research/web_search_engines/full_search.py +254 -0
  48. local_deep_research/web_search_engines/search_engine_base.py +197 -0
  49. local_deep_research/web_search_engines/search_engine_factory.py +233 -0
  50. local_deep_research/web_search_engines/search_engines_config.py +54 -0
  51. local_deep_research-0.1.0.dist-info/LICENSE +21 -0
  52. local_deep_research-0.1.0.dist-info/METADATA +328 -0
  53. local_deep_research-0.1.0.dist-info/RECORD +56 -0
  54. local_deep_research-0.1.0.dist-info/WHEEL +5 -0
  55. local_deep_research-0.1.0.dist-info/entry_points.txt +3 -0
  56. local_deep_research-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,230 @@
1
+ from langchain_community.utilities import SerpAPIWrapper
2
+ from typing import Dict, List, Any, Optional
3
+ import os
4
+ from langchain_core.language_models import BaseLLM
5
+
6
+ from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
7
+ from local_deep_research import config
8
+
9
+
10
+ class SerpAPISearchEngine(BaseSearchEngine):
11
+ """Google search engine implementation using SerpAPI with two-phase approach"""
12
+
13
+ def __init__(self,
14
+ max_results: int = 10,
15
+ region: str = "us",
16
+ time_period: str = "y",
17
+ safe_search: bool = True,
18
+ search_language: str = "English",
19
+ api_key: Optional[str] = None,
20
+ language_code_mapping: Optional[Dict[str, str]] = None,
21
+ llm: Optional[BaseLLM] = None,
22
+ include_full_content: bool = False,
23
+ max_filtered_results: Optional[int] = None,
24
+ **kwargs):
25
+ """
26
+ Initialize the SerpAPI search engine.
27
+
28
+ Args:
29
+ max_results: Maximum number of search results
30
+ region: Region code for search results
31
+ time_period: Time period for search results
32
+ safe_search: Whether to enable safe search
33
+ search_language: Language for search results
34
+ api_key: SerpAPI API key (can also be set in SERP_API_KEY env)
35
+ language_code_mapping: Mapping from language names to codes
36
+ llm: Language model for relevance filtering
37
+ include_full_content: Whether to include full webpage content in results
38
+ max_filtered_results: Maximum number of results to keep after filtering
39
+ **kwargs: Additional parameters (ignored but accepted for compatibility)
40
+ """
41
+ # Initialize the BaseSearchEngine with the LLM and max_filtered_results
42
+ super().__init__(llm=llm, max_filtered_results=max_filtered_results)
43
+
44
+ self.max_results = max_results
45
+ self.include_full_content = include_full_content
46
+
47
+ # Set up language code mapping
48
+ if language_code_mapping is None:
49
+ language_code_mapping = {
50
+ "english": "en",
51
+ "spanish": "es",
52
+ "chinese": "zh",
53
+ "hindi": "hi",
54
+ "french": "fr",
55
+ "arabic": "ar",
56
+ "bengali": "bn",
57
+ "portuguese": "pt",
58
+ "russian": "ru",
59
+ }
60
+
61
+ # Get API key
62
+ serpapi_api_key = api_key or os.getenv("SERP_API_KEY")
63
+ if not serpapi_api_key:
64
+ raise ValueError("SERP_API_KEY not found. Please provide api_key or set the SERP_API_KEY environment variable.")
65
+
66
+ # Get language code
67
+ language_code = language_code_mapping.get(search_language.lower(), "en")
68
+
69
+ # Initialize SerpAPI wrapper
70
+ self.engine = SerpAPIWrapper(
71
+ serpapi_api_key=serpapi_api_key,
72
+ params={
73
+ "engine": "google",
74
+ "hl": language_code,
75
+ "gl": region,
76
+ "safe": "active" if safe_search else "off",
77
+ "tbs": f"qdr:{time_period}",
78
+ "num": max_results,
79
+ }
80
+ )
81
+
82
+ # If full content is requested, initialize FullSearchResults
83
+ if include_full_content:
84
+ # Import FullSearchResults only if needed
85
+ try:
86
+ from local_deep_research.web_search_engines.engines.full_search import FullSearchResults
87
+ self.full_search = FullSearchResults(
88
+ llm=llm,
89
+ web_search=self.engine,
90
+ language=search_language,
91
+ max_results=max_results,
92
+ region=region,
93
+ time=time_period,
94
+ safesearch="Moderate" if safe_search else "Off"
95
+ )
96
+ except ImportError:
97
+ print("Warning: FullSearchResults not available. Full content retrieval disabled.")
98
+ self.include_full_content = False
99
+
100
+ def _get_previews(self, query: str) -> List[Dict[str, Any]]:
101
+ """
102
+ Get preview information from SerpAPI.
103
+
104
+ Args:
105
+ query: The search query
106
+
107
+ Returns:
108
+ List of preview dictionaries
109
+ """
110
+ print("Getting search results from SerpAPI")
111
+
112
+ try:
113
+ # Get search results from SerpAPI
114
+ organic_results = self.engine.results(query).get("organic_results", [])
115
+
116
+ # Format results as previews
117
+ previews = []
118
+ for result in organic_results:
119
+ preview = {
120
+ "id": result.get("position", len(previews)), # Use position as ID
121
+ "title": result.get("title", ""),
122
+ "link": result.get("link", ""),
123
+ "snippet": result.get("snippet", ""),
124
+ "displayed_link": result.get("displayed_link", ""),
125
+ "position": result.get("position")
126
+ }
127
+
128
+ # Store full SerpAPI result for later
129
+ preview["_full_result"] = result
130
+
131
+ previews.append(preview)
132
+
133
+ # Store the previews for potential full content retrieval
134
+ self._search_results = previews
135
+
136
+ return previews
137
+
138
+ except Exception as e:
139
+ print(f"Error getting SerpAPI results: {e}")
140
+ return []
141
+
142
+ def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
143
+ """
144
+ Get full content for the relevant search results.
145
+ If include_full_content is True and FullSearchResults is available,
146
+ retrieves full webpage content for the results.
147
+
148
+ Args:
149
+ relevant_items: List of relevant preview dictionaries
150
+
151
+ Returns:
152
+ List of result dictionaries with full content if requested
153
+ """
154
+ # Check if we should get full content
155
+ if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
156
+ print("Snippet-only mode, skipping full content retrieval")
157
+
158
+ # Return the relevant items with their full SerpAPI information
159
+ results = []
160
+ for item in relevant_items:
161
+ # Use the full result if available, otherwise use the preview
162
+ if "_full_result" in item:
163
+ result = item["_full_result"]
164
+ # Remove temporary field
165
+ if "_full_result" in result:
166
+ del result["_full_result"]
167
+ else:
168
+ result = item
169
+
170
+ results.append(result)
171
+
172
+ return results
173
+
174
+ # If full content retrieval is enabled
175
+ if self.include_full_content and hasattr(self, 'full_search'):
176
+ print("Retrieving full webpage content")
177
+
178
+ try:
179
+ # Extract only the links from relevant items
180
+ links = [item.get("link") for item in relevant_items if item.get("link")]
181
+
182
+ # Use FullSearchResults to get full content
183
+ # This is a simplified approach - in a real implementation,
184
+ # you would need to fetch and process the URLs
185
+ results_with_content = self.full_search._get_full_content(relevant_items)
186
+
187
+ return results_with_content
188
+
189
+ except Exception as e:
190
+ print(f"Error retrieving full content: {e}")
191
+ # Fall back to returning the items without full content
192
+
193
+ # Return items with their full SerpAPI information
194
+ results = []
195
+ for item in relevant_items:
196
+ # Use the full result if available, otherwise use the preview
197
+ if "_full_result" in item:
198
+ result = item["_full_result"].copy()
199
+ # Remove temporary field
200
+ if "_full_result" in result:
201
+ del result["_full_result"]
202
+ else:
203
+ result = item.copy()
204
+ if "_full_result" in result:
205
+ del result["_full_result"]
206
+
207
+ results.append(result)
208
+
209
+ return results
210
+
211
+ def run(self, query: str) -> List[Dict[str, Any]]:
212
+ """
213
+ Execute a search using SerpAPI with the two-phase approach.
214
+
215
+ Args:
216
+ query: The search query
217
+
218
+ Returns:
219
+ List of search results
220
+ """
221
+ print("---Execute a search using SerpAPI (Google)---")
222
+
223
+ # Use the implementation from the parent class which handles all phases
224
+ results = super().run(query)
225
+
226
+ # Clean up
227
+ if hasattr(self, '_search_results'):
228
+ del self._search_results
229
+
230
+ return results