local-deep-research 0.1.26__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +23 -22
- local_deep_research/__main__.py +16 -0
- local_deep_research/advanced_search_system/__init__.py +7 -0
- local_deep_research/advanced_search_system/filters/__init__.py +8 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
- local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
- local_deep_research/advanced_search_system/findings/repository.py +452 -0
- local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
- local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
- local_deep_research/advanced_search_system/questions/__init__.py +1 -0
- local_deep_research/advanced_search_system/questions/base_question.py +64 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
- local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
- local_deep_research/advanced_search_system/tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
- local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
- local_deep_research/api/__init__.py +5 -5
- local_deep_research/api/research_functions.py +96 -84
- local_deep_research/app.py +8 -0
- local_deep_research/citation_handler.py +25 -16
- local_deep_research/{config.py → config/config_files.py} +102 -110
- local_deep_research/config/llm_config.py +472 -0
- local_deep_research/config/search_config.py +77 -0
- local_deep_research/defaults/__init__.py +10 -5
- local_deep_research/defaults/main.toml +2 -2
- local_deep_research/defaults/search_engines.toml +60 -34
- local_deep_research/main.py +121 -19
- local_deep_research/migrate_db.py +147 -0
- local_deep_research/report_generator.py +72 -44
- local_deep_research/search_system.py +147 -283
- local_deep_research/setup_data_dir.py +35 -0
- local_deep_research/test_migration.py +178 -0
- local_deep_research/utilities/__init__.py +0 -0
- local_deep_research/utilities/db_utils.py +49 -0
- local_deep_research/{utilties → utilities}/enums.py +2 -2
- local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
- local_deep_research/utilities/search_utilities.py +242 -0
- local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
- local_deep_research/web/__init__.py +0 -1
- local_deep_research/web/app.py +86 -1709
- local_deep_research/web/app_factory.py +289 -0
- local_deep_research/web/database/README.md +70 -0
- local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
- local_deep_research/web/database/migrations.py +447 -0
- local_deep_research/web/database/models.py +117 -0
- local_deep_research/web/database/schema_upgrade.py +107 -0
- local_deep_research/web/models/database.py +294 -0
- local_deep_research/web/models/settings.py +94 -0
- local_deep_research/web/routes/api_routes.py +559 -0
- local_deep_research/web/routes/history_routes.py +354 -0
- local_deep_research/web/routes/research_routes.py +715 -0
- local_deep_research/web/routes/settings_routes.py +1592 -0
- local_deep_research/web/services/research_service.py +947 -0
- local_deep_research/web/services/resource_service.py +149 -0
- local_deep_research/web/services/settings_manager.py +669 -0
- local_deep_research/web/services/settings_service.py +187 -0
- local_deep_research/web/services/socket_service.py +210 -0
- local_deep_research/web/static/css/custom_dropdown.css +277 -0
- local_deep_research/web/static/css/settings.css +1223 -0
- local_deep_research/web/static/css/styles.css +525 -48
- local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
- local_deep_research/web/static/js/components/detail.js +348 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
- local_deep_research/web/static/js/components/fallback/ui.js +215 -0
- local_deep_research/web/static/js/components/history.js +487 -0
- local_deep_research/web/static/js/components/logpanel.js +949 -0
- local_deep_research/web/static/js/components/progress.js +1107 -0
- local_deep_research/web/static/js/components/research.js +1865 -0
- local_deep_research/web/static/js/components/results.js +766 -0
- local_deep_research/web/static/js/components/settings.js +3981 -0
- local_deep_research/web/static/js/components/settings_sync.js +106 -0
- local_deep_research/web/static/js/main.js +226 -0
- local_deep_research/web/static/js/services/api.js +253 -0
- local_deep_research/web/static/js/services/audio.js +31 -0
- local_deep_research/web/static/js/services/formatting.js +119 -0
- local_deep_research/web/static/js/services/pdf.js +622 -0
- local_deep_research/web/static/js/services/socket.js +882 -0
- local_deep_research/web/static/js/services/ui.js +546 -0
- local_deep_research/web/templates/base.html +72 -0
- local_deep_research/web/templates/components/custom_dropdown.html +47 -0
- local_deep_research/web/templates/components/log_panel.html +32 -0
- local_deep_research/web/templates/components/mobile_nav.html +22 -0
- local_deep_research/web/templates/components/settings_form.html +299 -0
- local_deep_research/web/templates/components/sidebar.html +21 -0
- local_deep_research/web/templates/pages/details.html +73 -0
- local_deep_research/web/templates/pages/history.html +51 -0
- local_deep_research/web/templates/pages/progress.html +57 -0
- local_deep_research/web/templates/pages/research.html +139 -0
- local_deep_research/web/templates/pages/results.html +59 -0
- local_deep_research/web/templates/settings_dashboard.html +78 -192
- local_deep_research/web/utils/__init__.py +0 -0
- local_deep_research/web/utils/formatters.py +76 -0
- local_deep_research/web_search_engines/engines/full_search.py +18 -16
- local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
- local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
- local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
- local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +211 -159
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
- local_deep_research/web_search_engines/search_engine_base.py +174 -99
- local_deep_research/web_search_engines/search_engine_factory.py +192 -102
- local_deep_research/web_search_engines/search_engines_config.py +22 -15
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/METADATA +177 -97
- local_deep_research-0.2.0.dist-info/RECORD +135 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/WHEEL +1 -2
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/entry_points.txt +3 -0
- local_deep_research/defaults/llm_config.py +0 -338
- local_deep_research/utilties/search_utilities.py +0 -114
- local_deep_research/web/static/js/app.js +0 -3763
- local_deep_research/web/templates/api_keys_config.html +0 -82
- local_deep_research/web/templates/collections_config.html +0 -90
- local_deep_research/web/templates/index.html +0 -348
- local_deep_research/web/templates/llm_config.html +0 -120
- local_deep_research/web/templates/main_config.html +0 -89
- local_deep_research/web/templates/search_engines_config.html +0 -154
- local_deep_research/web/templates/settings.html +0 -519
- local_deep_research-0.1.26.dist-info/RECORD +0 -61
- local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
- /local_deep_research/{utilties → config}/__init__.py +0 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,31 +1,36 @@
|
|
1
|
-
|
2
|
-
from typing import Dict, List, Any, Optional
|
1
|
+
import logging
|
3
2
|
import os
|
3
|
+
from typing import Any, Dict, List, Optional
|
4
|
+
|
5
|
+
from langchain_community.utilities import SerpAPIWrapper
|
4
6
|
from langchain_core.language_models import BaseLLM
|
5
7
|
|
6
|
-
from
|
7
|
-
from
|
8
|
-
|
8
|
+
from ...config import search_config
|
9
|
+
from ..search_engine_base import BaseSearchEngine
|
10
|
+
|
9
11
|
logger = logging.getLogger(__name__)
|
10
12
|
|
13
|
+
|
11
14
|
class SerpAPISearchEngine(BaseSearchEngine):
|
12
15
|
"""Google search engine implementation using SerpAPI with two-phase approach"""
|
13
16
|
|
14
|
-
def __init__(
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
17
|
+
def __init__(
|
18
|
+
self,
|
19
|
+
max_results: int = 10,
|
20
|
+
region: str = "us",
|
21
|
+
time_period: str = "y",
|
22
|
+
safe_search: bool = True,
|
23
|
+
search_language: str = "English",
|
24
|
+
api_key: Optional[str] = None,
|
25
|
+
language_code_mapping: Optional[Dict[str, str]] = None,
|
26
|
+
llm: Optional[BaseLLM] = None,
|
27
|
+
include_full_content: bool = False,
|
28
|
+
max_filtered_results: Optional[int] = None,
|
29
|
+
**kwargs,
|
30
|
+
):
|
26
31
|
"""
|
27
32
|
Initialize the SerpAPI search engine.
|
28
|
-
|
33
|
+
|
29
34
|
Args:
|
30
35
|
max_results: Maximum number of search results
|
31
36
|
region: Region code for search results
|
@@ -40,9 +45,11 @@ class SerpAPISearchEngine(BaseSearchEngine):
|
|
40
45
|
**kwargs: Additional parameters (ignored but accepted for compatibility)
|
41
46
|
"""
|
42
47
|
# Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
|
43
|
-
super().__init__(
|
48
|
+
super().__init__(
|
49
|
+
llm=llm, max_filtered_results=max_filtered_results, max_results=max_results
|
50
|
+
)
|
44
51
|
self.include_full_content = include_full_content
|
45
|
-
|
52
|
+
|
46
53
|
# Set up language code mapping
|
47
54
|
if language_code_mapping is None:
|
48
55
|
language_code_mapping = {
|
@@ -56,15 +63,17 @@ class SerpAPISearchEngine(BaseSearchEngine):
|
|
56
63
|
"portuguese": "pt",
|
57
64
|
"russian": "ru",
|
58
65
|
}
|
59
|
-
|
66
|
+
|
60
67
|
# Get API key
|
61
68
|
serpapi_api_key = api_key or os.getenv("SERP_API_KEY")
|
62
69
|
if not serpapi_api_key:
|
63
|
-
raise ValueError(
|
64
|
-
|
70
|
+
raise ValueError(
|
71
|
+
"SERP_API_KEY not found. Please provide api_key or set the SERP_API_KEY environment variable."
|
72
|
+
)
|
73
|
+
|
65
74
|
# Get language code
|
66
75
|
language_code = language_code_mapping.get(search_language.lower(), "en")
|
67
|
-
|
76
|
+
|
68
77
|
# Initialize SerpAPI wrapper
|
69
78
|
self.engine = SerpAPIWrapper(
|
70
79
|
serpapi_api_key=serpapi_api_key,
|
@@ -75,14 +84,15 @@ class SerpAPISearchEngine(BaseSearchEngine):
|
|
75
84
|
"safe": "active" if safe_search else "off",
|
76
85
|
"tbs": f"qdr:{time_period}",
|
77
86
|
"num": max_results,
|
78
|
-
}
|
87
|
+
},
|
79
88
|
)
|
80
|
-
|
89
|
+
|
81
90
|
# If full content is requested, initialize FullSearchResults
|
82
91
|
if include_full_content:
|
83
92
|
# Import FullSearchResults only if needed
|
84
93
|
try:
|
85
|
-
from
|
94
|
+
from .full_search import FullSearchResults
|
95
|
+
|
86
96
|
self.full_search = FullSearchResults(
|
87
97
|
llm=llm,
|
88
98
|
web_search=self.engine,
|
@@ -90,28 +100,30 @@ class SerpAPISearchEngine(BaseSearchEngine):
|
|
90
100
|
max_results=max_results,
|
91
101
|
region=region,
|
92
102
|
time=time_period,
|
93
|
-
safesearch="Moderate" if safe_search else "Off"
|
103
|
+
safesearch="Moderate" if safe_search else "Off",
|
94
104
|
)
|
95
105
|
except ImportError:
|
96
|
-
logger.warning(
|
106
|
+
logger.warning(
|
107
|
+
"Warning: FullSearchResults not available. Full content retrieval disabled."
|
108
|
+
)
|
97
109
|
self.include_full_content = False
|
98
|
-
|
110
|
+
|
99
111
|
def _get_previews(self, query: str) -> List[Dict[str, Any]]:
|
100
112
|
"""
|
101
113
|
Get preview information from SerpAPI.
|
102
|
-
|
114
|
+
|
103
115
|
Args:
|
104
116
|
query: The search query
|
105
|
-
|
117
|
+
|
106
118
|
Returns:
|
107
119
|
List of preview dictionaries
|
108
120
|
"""
|
109
121
|
logger.info("Getting search results from SerpAPI")
|
110
|
-
|
122
|
+
|
111
123
|
try:
|
112
124
|
# Get search results from SerpAPI
|
113
125
|
organic_results = self.engine.results(query).get("organic_results", [])
|
114
|
-
|
126
|
+
|
115
127
|
# Format results as previews
|
116
128
|
previews = []
|
117
129
|
for result in organic_results:
|
@@ -121,39 +133,44 @@ class SerpAPISearchEngine(BaseSearchEngine):
|
|
121
133
|
"link": result.get("link", ""),
|
122
134
|
"snippet": result.get("snippet", ""),
|
123
135
|
"displayed_link": result.get("displayed_link", ""),
|
124
|
-
"position": result.get("position")
|
136
|
+
"position": result.get("position"),
|
125
137
|
}
|
126
|
-
|
138
|
+
|
127
139
|
# Store full SerpAPI result for later
|
128
140
|
preview["_full_result"] = result
|
129
|
-
|
141
|
+
|
130
142
|
previews.append(preview)
|
131
|
-
|
143
|
+
|
132
144
|
# Store the previews for potential full content retrieval
|
133
145
|
self._search_results = previews
|
134
|
-
|
146
|
+
|
135
147
|
return previews
|
136
|
-
|
148
|
+
|
137
149
|
except Exception as e:
|
138
150
|
logger.error(f"Error getting SerpAPI results: {e}")
|
139
151
|
return []
|
140
|
-
|
141
|
-
def _get_full_content(
|
152
|
+
|
153
|
+
def _get_full_content(
|
154
|
+
self, relevant_items: List[Dict[str, Any]]
|
155
|
+
) -> List[Dict[str, Any]]:
|
142
156
|
"""
|
143
157
|
Get full content for the relevant search results.
|
144
158
|
If include_full_content is True and FullSearchResults is available,
|
145
159
|
retrieves full webpage content for the results.
|
146
|
-
|
160
|
+
|
147
161
|
Args:
|
148
162
|
relevant_items: List of relevant preview dictionaries
|
149
|
-
|
163
|
+
|
150
164
|
Returns:
|
151
165
|
List of result dictionaries with full content if requested
|
152
166
|
"""
|
153
167
|
# Check if we should get full content
|
154
|
-
if
|
168
|
+
if (
|
169
|
+
hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
|
170
|
+
and search_config.SEARCH_SNIPPETS_ONLY
|
171
|
+
):
|
155
172
|
logger.info("Snippet-only mode, skipping full content retrieval")
|
156
|
-
|
173
|
+
|
157
174
|
# Return the relevant items with their full SerpAPI information
|
158
175
|
results = []
|
159
176
|
for item in relevant_items:
|
@@ -165,30 +182,29 @@ class SerpAPISearchEngine(BaseSearchEngine):
|
|
165
182
|
del result["_full_result"]
|
166
183
|
else:
|
167
184
|
result = item
|
168
|
-
|
185
|
+
|
169
186
|
results.append(result)
|
170
|
-
|
187
|
+
|
171
188
|
return results
|
172
|
-
|
189
|
+
|
173
190
|
# If full content retrieval is enabled
|
174
|
-
if self.include_full_content and hasattr(self,
|
191
|
+
if self.include_full_content and hasattr(self, "full_search"):
|
175
192
|
logger.info("Retrieving full webpage content")
|
176
|
-
|
193
|
+
|
177
194
|
try:
|
178
|
-
# Extract only the links from relevant items
|
179
|
-
links = [item.get("link") for item in relevant_items if item.get("link")]
|
180
|
-
|
181
195
|
# Use FullSearchResults to get full content
|
182
196
|
# This is a simplified approach - in a real implementation,
|
183
197
|
# you would need to fetch and process the URLs
|
184
|
-
results_with_content = self.full_search._get_full_content(
|
185
|
-
|
198
|
+
results_with_content = self.full_search._get_full_content(
|
199
|
+
relevant_items
|
200
|
+
)
|
201
|
+
|
186
202
|
return results_with_content
|
187
|
-
|
203
|
+
|
188
204
|
except Exception as e:
|
189
205
|
logger.info(f"Error retrieving full content: {e}")
|
190
206
|
# Fall back to returning the items without full content
|
191
|
-
|
207
|
+
|
192
208
|
# Return items with their full SerpAPI information
|
193
209
|
results = []
|
194
210
|
for item in relevant_items:
|
@@ -202,28 +218,28 @@ class SerpAPISearchEngine(BaseSearchEngine):
|
|
202
218
|
result = item.copy()
|
203
219
|
if "_full_result" in result:
|
204
220
|
del result["_full_result"]
|
205
|
-
|
221
|
+
|
206
222
|
results.append(result)
|
207
|
-
|
223
|
+
|
208
224
|
return results
|
209
|
-
|
225
|
+
|
210
226
|
def run(self, query: str) -> List[Dict[str, Any]]:
|
211
227
|
"""
|
212
228
|
Execute a search using SerpAPI with the two-phase approach.
|
213
|
-
|
229
|
+
|
214
230
|
Args:
|
215
231
|
query: The search query
|
216
|
-
|
232
|
+
|
217
233
|
Returns:
|
218
234
|
List of search results
|
219
235
|
"""
|
220
236
|
logger.info("---Execute a search using SerpAPI (Google)---")
|
221
|
-
|
237
|
+
|
222
238
|
# Use the implementation from the parent class which handles all phases
|
223
239
|
results = super().run(query)
|
224
|
-
|
240
|
+
|
225
241
|
# Clean up
|
226
|
-
if hasattr(self,
|
242
|
+
if hasattr(self, "_search_results"):
|
227
243
|
del self._search_results
|
228
|
-
|
229
|
-
return results
|
244
|
+
|
245
|
+
return results
|