local-deep-research 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +23 -22
- local_deep_research/__main__.py +16 -0
- local_deep_research/advanced_search_system/__init__.py +7 -0
- local_deep_research/advanced_search_system/filters/__init__.py +8 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
- local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
- local_deep_research/advanced_search_system/findings/repository.py +452 -0
- local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
- local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
- local_deep_research/advanced_search_system/questions/__init__.py +1 -0
- local_deep_research/advanced_search_system/questions/base_question.py +64 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
- local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
- local_deep_research/advanced_search_system/tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
- local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
- local_deep_research/api/__init__.py +5 -5
- local_deep_research/api/research_functions.py +154 -160
- local_deep_research/app.py +8 -0
- local_deep_research/citation_handler.py +25 -16
- local_deep_research/{config.py → config/config_files.py} +102 -110
- local_deep_research/config/llm_config.py +472 -0
- local_deep_research/config/search_config.py +77 -0
- local_deep_research/defaults/__init__.py +10 -5
- local_deep_research/defaults/main.toml +2 -2
- local_deep_research/defaults/search_engines.toml +60 -34
- local_deep_research/main.py +121 -19
- local_deep_research/migrate_db.py +147 -0
- local_deep_research/report_generator.py +87 -45
- local_deep_research/search_system.py +153 -283
- local_deep_research/setup_data_dir.py +35 -0
- local_deep_research/test_migration.py +178 -0
- local_deep_research/utilities/__init__.py +0 -0
- local_deep_research/utilities/db_utils.py +49 -0
- local_deep_research/{utilties → utilities}/enums.py +2 -2
- local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
- local_deep_research/utilities/search_utilities.py +242 -0
- local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
- local_deep_research/web/__init__.py +0 -1
- local_deep_research/web/app.py +86 -1709
- local_deep_research/web/app_factory.py +289 -0
- local_deep_research/web/database/README.md +70 -0
- local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
- local_deep_research/web/database/migrations.py +447 -0
- local_deep_research/web/database/models.py +117 -0
- local_deep_research/web/database/schema_upgrade.py +107 -0
- local_deep_research/web/models/database.py +294 -0
- local_deep_research/web/models/settings.py +94 -0
- local_deep_research/web/routes/api_routes.py +559 -0
- local_deep_research/web/routes/history_routes.py +354 -0
- local_deep_research/web/routes/research_routes.py +715 -0
- local_deep_research/web/routes/settings_routes.py +1583 -0
- local_deep_research/web/services/research_service.py +947 -0
- local_deep_research/web/services/resource_service.py +149 -0
- local_deep_research/web/services/settings_manager.py +669 -0
- local_deep_research/web/services/settings_service.py +187 -0
- local_deep_research/web/services/socket_service.py +210 -0
- local_deep_research/web/static/css/custom_dropdown.css +277 -0
- local_deep_research/web/static/css/settings.css +1223 -0
- local_deep_research/web/static/css/styles.css +525 -48
- local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
- local_deep_research/web/static/js/components/detail.js +348 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
- local_deep_research/web/static/js/components/fallback/ui.js +215 -0
- local_deep_research/web/static/js/components/history.js +487 -0
- local_deep_research/web/static/js/components/logpanel.js +949 -0
- local_deep_research/web/static/js/components/progress.js +1107 -0
- local_deep_research/web/static/js/components/research.js +1865 -0
- local_deep_research/web/static/js/components/results.js +766 -0
- local_deep_research/web/static/js/components/settings.js +3981 -0
- local_deep_research/web/static/js/components/settings_sync.js +106 -0
- local_deep_research/web/static/js/main.js +226 -0
- local_deep_research/web/static/js/services/api.js +253 -0
- local_deep_research/web/static/js/services/audio.js +31 -0
- local_deep_research/web/static/js/services/formatting.js +119 -0
- local_deep_research/web/static/js/services/pdf.js +622 -0
- local_deep_research/web/static/js/services/socket.js +882 -0
- local_deep_research/web/static/js/services/ui.js +546 -0
- local_deep_research/web/templates/base.html +72 -0
- local_deep_research/web/templates/components/custom_dropdown.html +47 -0
- local_deep_research/web/templates/components/log_panel.html +32 -0
- local_deep_research/web/templates/components/mobile_nav.html +22 -0
- local_deep_research/web/templates/components/settings_form.html +299 -0
- local_deep_research/web/templates/components/sidebar.html +21 -0
- local_deep_research/web/templates/pages/details.html +73 -0
- local_deep_research/web/templates/pages/history.html +51 -0
- local_deep_research/web/templates/pages/progress.html +57 -0
- local_deep_research/web/templates/pages/research.html +139 -0
- local_deep_research/web/templates/pages/results.html +59 -0
- local_deep_research/web/templates/settings_dashboard.html +78 -192
- local_deep_research/web/utils/__init__.py +0 -0
- local_deep_research/web/utils/formatters.py +76 -0
- local_deep_research/web_search_engines/engines/full_search.py +18 -16
- local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
- local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
- local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
- local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +212 -160
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
- local_deep_research/web_search_engines/search_engine_base.py +174 -99
- local_deep_research/web_search_engines/search_engine_factory.py +192 -102
- local_deep_research/web_search_engines/search_engines_config.py +22 -15
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/METADATA +177 -97
- local_deep_research-0.2.2.dist-info/RECORD +135 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/WHEEL +1 -2
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/entry_points.txt +3 -0
- local_deep_research/defaults/llm_config.py +0 -338
- local_deep_research/utilties/search_utilities.py +0 -114
- local_deep_research/web/static/js/app.js +0 -3763
- local_deep_research/web/templates/api_keys_config.html +0 -82
- local_deep_research/web/templates/collections_config.html +0 -90
- local_deep_research/web/templates/index.html +0 -348
- local_deep_research/web/templates/llm_config.html +0 -120
- local_deep_research/web/templates/main_config.html +0 -89
- local_deep_research/web/templates/search_engines_config.html +0 -154
- local_deep_research/web/templates/settings.html +0 -519
- local_deep_research-0.1.26.dist-info/RECORD +0 -61
- local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
- /local_deep_research/{utilties → config}/__init__.py +0 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,10 +1,11 @@
|
|
1
|
+
import logging
|
2
|
+
from typing import Any, Dict, List, Optional
|
3
|
+
|
1
4
|
import wikipedia
|
2
|
-
from typing import Dict, List, Any, Optional
|
3
5
|
from langchain_core.language_models import BaseLLM
|
4
|
-
import logging
|
5
6
|
|
6
|
-
from
|
7
|
-
from
|
7
|
+
from ...config import search_config
|
8
|
+
from ..search_engine_base import BaseSearchEngine
|
8
9
|
|
9
10
|
# Setup logging
|
10
11
|
logger = logging.getLogger(__name__)
|
@@ -12,18 +13,20 @@ logger = logging.getLogger(__name__)
|
|
12
13
|
|
13
14
|
class WikipediaSearchEngine(BaseSearchEngine):
|
14
15
|
"""Wikipedia search engine implementation with two-phase approach"""
|
15
|
-
|
16
|
-
def __init__(
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
16
|
+
|
17
|
+
def __init__(
|
18
|
+
self,
|
19
|
+
max_results: int = 10,
|
20
|
+
language: str = "en",
|
21
|
+
include_content: bool = True,
|
22
|
+
sentences: int = 5,
|
23
|
+
llm: Optional[BaseLLM] = None,
|
24
|
+
max_filtered_results: Optional[int] = None,
|
25
|
+
**kwargs,
|
26
|
+
):
|
24
27
|
"""
|
25
28
|
Initialize the Wikipedia search engine.
|
26
|
-
|
29
|
+
|
27
30
|
Args:
|
28
31
|
max_results: Maximum number of search results
|
29
32
|
language: Language code for Wikipedia (e.g., 'en', 'fr', 'es')
|
@@ -34,38 +37,42 @@ class WikipediaSearchEngine(BaseSearchEngine):
|
|
34
37
|
**kwargs: Additional parameters (ignored but accepted for compatibility)
|
35
38
|
"""
|
36
39
|
# Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
|
37
|
-
super().__init__(
|
40
|
+
super().__init__(
|
41
|
+
llm=llm, max_filtered_results=max_filtered_results, max_results=max_results
|
42
|
+
)
|
38
43
|
self.include_content = include_content
|
39
44
|
self.sentences = sentences
|
40
|
-
|
45
|
+
|
41
46
|
# Set the Wikipedia language
|
42
47
|
wikipedia.set_lang(language)
|
43
|
-
|
48
|
+
|
44
49
|
def _get_previews(self, query: str) -> List[Dict[str, Any]]:
|
45
50
|
"""
|
46
51
|
Get preview information (titles and summaries) for Wikipedia pages.
|
47
|
-
|
52
|
+
|
48
53
|
Args:
|
49
54
|
query: The search query
|
50
|
-
|
55
|
+
|
51
56
|
Returns:
|
52
57
|
List of preview dictionaries
|
53
58
|
"""
|
54
|
-
logger.info(
|
55
|
-
|
59
|
+
logger.info("Getting Wikipedia page previews for query: %s", query)
|
60
|
+
|
56
61
|
try:
|
57
62
|
# Get search results (just titles)
|
58
63
|
search_results = wikipedia.search(query, results=self.max_results)
|
59
|
-
|
60
|
-
logger.info(
|
61
|
-
|
64
|
+
|
65
|
+
logger.info(
|
66
|
+
f"Found {len(search_results)} Wikipedia results: {search_results}"
|
67
|
+
)
|
68
|
+
|
62
69
|
if not search_results:
|
63
|
-
logger.info(
|
70
|
+
logger.info("No Wikipedia results found for query: %s", query)
|
64
71
|
return []
|
65
|
-
|
72
|
+
|
66
73
|
# Create a cache for full pages (will be populated on-demand)
|
67
74
|
self._page_cache = {}
|
68
|
-
|
75
|
+
|
69
76
|
# Generate previews with summaries
|
70
77
|
previews = []
|
71
78
|
for title in search_results:
|
@@ -73,83 +80,104 @@ class WikipediaSearchEngine(BaseSearchEngine):
|
|
73
80
|
# Get just the summary, with auto_suggest=False to be more precise
|
74
81
|
summary = None
|
75
82
|
try:
|
76
|
-
summary = wikipedia.summary(
|
83
|
+
summary = wikipedia.summary(
|
84
|
+
title, sentences=self.sentences, auto_suggest=False
|
85
|
+
)
|
77
86
|
except wikipedia.exceptions.DisambiguationError as e:
|
78
87
|
# If disambiguation error, try the first option
|
79
88
|
if e.options and len(e.options) > 0:
|
80
|
-
logger.info(
|
89
|
+
logger.info(
|
90
|
+
f"Disambiguation for '{title}', trying first option: {e.options[0]}"
|
91
|
+
)
|
81
92
|
try:
|
82
|
-
summary = wikipedia.summary(
|
93
|
+
summary = wikipedia.summary(
|
94
|
+
e.options[0],
|
95
|
+
sentences=self.sentences,
|
96
|
+
auto_suggest=False,
|
97
|
+
)
|
83
98
|
title = e.options[0] # Use the new title
|
84
99
|
except Exception as inner_e:
|
85
|
-
logger.error(
|
100
|
+
logger.error(
|
101
|
+
f"Error with disambiguation option: {inner_e}"
|
102
|
+
)
|
86
103
|
continue
|
87
104
|
else:
|
88
|
-
logger.warning(
|
105
|
+
logger.warning(
|
106
|
+
f"Disambiguation with no options for '{title}'"
|
107
|
+
)
|
89
108
|
continue
|
90
|
-
|
109
|
+
|
91
110
|
if summary:
|
92
111
|
preview = {
|
93
112
|
"id": title, # Use title as ID
|
94
113
|
"title": title,
|
95
114
|
"snippet": summary,
|
96
|
-
"link": f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}"
|
115
|
+
"link": f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}",
|
97
116
|
}
|
98
|
-
|
117
|
+
|
99
118
|
previews.append(preview)
|
100
|
-
|
101
|
-
except (
|
102
|
-
|
119
|
+
|
120
|
+
except (
|
121
|
+
wikipedia.exceptions.PageError,
|
122
|
+
wikipedia.exceptions.WikipediaException,
|
123
|
+
) as e:
|
103
124
|
# Skip pages with errors
|
104
125
|
logger.warning(f"Error getting summary for '{title}': {e}")
|
105
126
|
continue
|
106
127
|
except Exception as e:
|
107
128
|
logger.error(f"Unexpected error for '{title}': {e}")
|
108
129
|
continue
|
109
|
-
|
130
|
+
|
110
131
|
logger.info(f"Successfully created {len(previews)} previews from Wikipedia")
|
111
132
|
return previews
|
112
|
-
|
133
|
+
|
113
134
|
except Exception as e:
|
114
135
|
logger.error(f"Error getting Wikipedia previews: {e}")
|
115
136
|
return []
|
116
|
-
|
117
|
-
def _get_full_content(
|
137
|
+
|
138
|
+
def _get_full_content(
|
139
|
+
self, relevant_items: List[Dict[str, Any]]
|
140
|
+
) -> List[Dict[str, Any]]:
|
118
141
|
"""
|
119
142
|
Get full content for the relevant Wikipedia pages.
|
120
|
-
|
143
|
+
|
121
144
|
Args:
|
122
145
|
relevant_items: List of relevant preview dictionaries
|
123
|
-
|
146
|
+
|
124
147
|
Returns:
|
125
148
|
List of result dictionaries with full content
|
126
149
|
"""
|
127
150
|
# Check if we should add full content
|
128
|
-
if
|
151
|
+
if (
|
152
|
+
hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
|
153
|
+
and search_config.SEARCH_SNIPPETS_ONLY
|
154
|
+
):
|
129
155
|
logger.info("Snippet-only mode, skipping full content retrieval")
|
130
156
|
return relevant_items
|
131
|
-
|
132
|
-
logger.info(
|
133
|
-
|
157
|
+
|
158
|
+
logger.info(
|
159
|
+
f"Getting full content for {len(relevant_items)} relevant Wikipedia pages"
|
160
|
+
)
|
161
|
+
|
134
162
|
results = []
|
135
163
|
for item in relevant_items:
|
136
164
|
title = item.get("id") # Title stored as ID
|
137
|
-
|
165
|
+
|
138
166
|
if not title:
|
139
167
|
results.append(item)
|
140
168
|
continue
|
141
|
-
|
169
|
+
|
142
170
|
try:
|
143
171
|
# Get the full page
|
144
172
|
page = wikipedia.page(title, auto_suggest=False)
|
145
|
-
|
173
|
+
|
146
174
|
# Create a full result with all information
|
147
175
|
result = {
|
148
176
|
"title": page.title,
|
149
177
|
"link": page.url,
|
150
|
-
"snippet": item.get("snippet", "") # Keep existing snippet
|
178
|
+
"snippet": item.get("snippet", ""), # Keep existing snippet
|
151
179
|
}
|
152
|
-
|
180
|
+
|
153
181
|
# Add additional information
|
154
182
|
result["content"] = page.content
|
155
183
|
result["full_content"] = page.content
|
@@ -158,29 +186,33 @@ class WikipediaSearchEngine(BaseSearchEngine):
|
|
158
186
|
result["links"] = page.links
|
159
187
|
result["images"] = page.images
|
160
188
|
result["sections"] = page.sections
|
161
|
-
|
189
|
+
|
162
190
|
results.append(result)
|
163
|
-
|
164
|
-
except (
|
165
|
-
|
166
|
-
|
191
|
+
|
192
|
+
except (
|
193
|
+
wikipedia.exceptions.DisambiguationError,
|
194
|
+
wikipedia.exceptions.PageError,
|
195
|
+
wikipedia.exceptions.WikipediaException,
|
196
|
+
) as e:
|
167
197
|
# If error, use the preview
|
168
198
|
logger.warning(f"Error getting full content for '{title}': {e}")
|
169
199
|
results.append(item)
|
170
200
|
except Exception as e:
|
171
|
-
logger.error(
|
201
|
+
logger.error(
|
202
|
+
f"Unexpected error getting full content for '{title}': {e}"
|
203
|
+
)
|
172
204
|
results.append(item)
|
173
|
-
|
205
|
+
|
174
206
|
return results
|
175
|
-
|
207
|
+
|
176
208
|
def get_summary(self, title: str, sentences: Optional[int] = None) -> str:
|
177
209
|
"""
|
178
210
|
Get a summary of a specific Wikipedia page.
|
179
|
-
|
211
|
+
|
180
212
|
Args:
|
181
213
|
title: Title of the Wikipedia page
|
182
214
|
sentences: Number of sentences to include (defaults to self.sentences)
|
183
|
-
|
215
|
+
|
184
216
|
Returns:
|
185
217
|
Summary of the page
|
186
218
|
"""
|
@@ -189,31 +221,37 @@ class WikipediaSearchEngine(BaseSearchEngine):
|
|
189
221
|
return wikipedia.summary(title, sentences=sentences, auto_suggest=False)
|
190
222
|
except wikipedia.exceptions.DisambiguationError as e:
|
191
223
|
if e.options and len(e.options) > 0:
|
192
|
-
return wikipedia.summary(
|
224
|
+
return wikipedia.summary(
|
225
|
+
e.options[0], sentences=sentences, auto_suggest=False
|
226
|
+
)
|
193
227
|
raise
|
194
|
-
|
228
|
+
|
195
229
|
def get_page(self, title: str) -> Dict[str, Any]:
|
196
230
|
"""
|
197
231
|
Get detailed information about a specific Wikipedia page.
|
198
|
-
|
232
|
+
|
199
233
|
Args:
|
200
234
|
title: Title of the Wikipedia page
|
201
|
-
|
235
|
+
|
202
236
|
Returns:
|
203
237
|
Dictionary with page information
|
204
238
|
"""
|
205
|
-
#
|
206
|
-
include_content =
|
207
|
-
|
239
|
+
# Initialize include_content with our instance value
|
240
|
+
include_content = self.include_content
|
241
|
+
|
242
|
+
# Check if we should override with config setting
|
243
|
+
if hasattr(search_config, "SEARCH_SNIPPETS_ONLY"):
|
244
|
+
include_content = not search_config.SEARCH_SNIPPETS_ONLY
|
245
|
+
|
208
246
|
try:
|
209
247
|
page = wikipedia.page(title, auto_suggest=False)
|
210
|
-
|
248
|
+
|
211
249
|
result = {
|
212
250
|
"title": page.title,
|
213
251
|
"link": page.url,
|
214
|
-
"snippet": self.get_summary(title, self.sentences)
|
252
|
+
"snippet": self.get_summary(title, self.sentences),
|
215
253
|
}
|
216
|
-
|
254
|
+
|
217
255
|
# Add additional information if requested
|
218
256
|
if include_content:
|
219
257
|
result["content"] = page.content
|
@@ -223,18 +261,18 @@ class WikipediaSearchEngine(BaseSearchEngine):
|
|
223
261
|
result["links"] = page.links
|
224
262
|
result["images"] = page.images
|
225
263
|
result["sections"] = page.sections
|
226
|
-
|
264
|
+
|
227
265
|
return result
|
228
266
|
except wikipedia.exceptions.DisambiguationError as e:
|
229
267
|
if e.options and len(e.options) > 0:
|
230
268
|
return self.get_page(e.options[0])
|
231
269
|
raise
|
232
|
-
|
270
|
+
|
233
271
|
def set_language(self, language: str) -> None:
|
234
272
|
"""
|
235
273
|
Change the Wikipedia language.
|
236
|
-
|
274
|
+
|
237
275
|
Args:
|
238
276
|
language: Language code (e.g., 'en', 'fr', 'es')
|
239
277
|
"""
|
240
|
-
wikipedia.set_lang(language)
|
278
|
+
wikipedia.set_lang(language)
|