local-deep-research 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/defaults/main.toml +5 -0
- local_deep_research/search_system.py +98 -38
- local_deep_research/web/app.py +360 -117
- local_deep_research/web/static/css/styles.css +28 -2
- local_deep_research/web/static/js/app.js +640 -197
- local_deep_research/web/templates/index.html +3 -1
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +454 -0
- local_deep_research/web_search_engines/search_engine_factory.py +20 -1
- {local_deep_research-0.1.0.dist-info → local_deep_research-0.1.1.dist-info}/METADATA +16 -4
- {local_deep_research-0.1.0.dist-info → local_deep_research-0.1.1.dist-info}/RECORD +14 -13
- {local_deep_research-0.1.0.dist-info → local_deep_research-0.1.1.dist-info}/LICENSE +0 -0
- {local_deep_research-0.1.0.dist-info → local_deep_research-0.1.1.dist-info}/WHEEL +0 -0
- {local_deep_research-0.1.0.dist-info → local_deep_research-0.1.1.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.1.0.dist-info → local_deep_research-0.1.1.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,6 @@
|
|
8
8
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css">
|
9
9
|
<!-- Change to CDN version that works in browsers -->
|
10
10
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/styles/github-dark.min.css">
|
11
|
-
<link rel="icon" type="image/png" href="{{ url_for('static', filename='favicon.ico') }}">
|
12
11
|
</head>
|
13
12
|
<body>
|
14
13
|
<div class="app-container">
|
@@ -119,6 +118,9 @@
|
|
119
118
|
<i class="fas fa-stop-circle"></i> Terminate Research
|
120
119
|
</button>
|
121
120
|
<div id="error-message" class="error-message" style="display: none;"></div>
|
121
|
+
<button id="try-again-btn" class="btn btn-primary" style="display: none; margin-top: 15px;">
|
122
|
+
<i class="fas fa-redo"></i> Try Again
|
123
|
+
</button>
|
122
124
|
</div>
|
123
125
|
</div>
|
124
126
|
</div>
|
@@ -0,0 +1,454 @@
|
|
1
|
+
import requests
|
2
|
+
import logging
|
3
|
+
import os
|
4
|
+
from typing import Dict, List, Any, Optional
|
5
|
+
from langchain_core.language_models import BaseLLM
|
6
|
+
import time
|
7
|
+
import json
|
8
|
+
|
9
|
+
from web_search_engines.search_engine_base import BaseSearchEngine
|
10
|
+
from web_search_engines.engines.full_search import FullSearchResults
|
11
|
+
import config
|
12
|
+
|
13
|
+
# Setup logging
|
14
|
+
logging.basicConfig(level=logging.INFO)
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
class SearXNGSearchEngine(BaseSearchEngine):
|
18
|
+
"""
|
19
|
+
SearXNG search engine implementation that requires an instance URL provided via
|
20
|
+
environment variable or configuration. Designed for ethical usage with proper
|
21
|
+
rate limiting and single-instance approach.
|
22
|
+
"""
|
23
|
+
|
24
|
+
def __init__(self,
|
25
|
+
max_results: int = 15,
|
26
|
+
instance_url: Optional[str] = None, # Can be None if using env var
|
27
|
+
categories: Optional[List[str]] = None,
|
28
|
+
engines: Optional[List[str]] = None,
|
29
|
+
language: str = "en",
|
30
|
+
safe_search: int = 1,
|
31
|
+
time_range: Optional[str] = None,
|
32
|
+
delay_between_requests: float = 2.0,
|
33
|
+
llm: Optional[BaseLLM] = None,
|
34
|
+
max_filtered_results: Optional[int] = None,
|
35
|
+
include_full_content: bool = True,
|
36
|
+
api_key: Optional[str] = None): # API key is actually the instance URL
|
37
|
+
"""
|
38
|
+
Initialize the SearXNG search engine with ethical usage patterns.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
max_results: Maximum number of search results
|
42
|
+
instance_url: URL of your SearXNG instance (preferably self-hosted)
|
43
|
+
categories: List of SearXNG categories to search in (general, images, videos, news, etc.)
|
44
|
+
engines: List of engines to use (google, bing, duckduckgo, etc.)
|
45
|
+
language: Language code for search results
|
46
|
+
safe_search: Safe search level (0=off, 1=moderate, 2=strict)
|
47
|
+
time_range: Time range for results (day, week, month, year)
|
48
|
+
delay_between_requests: Seconds to wait between requests
|
49
|
+
llm: Language model for relevance filtering
|
50
|
+
max_filtered_results: Maximum number of results to keep after filtering
|
51
|
+
include_full_content: Whether to include full webpage content in results
|
52
|
+
api_key: Alternative way to provide instance URL (takes precedence over instance_url)
|
53
|
+
"""
|
54
|
+
# Initialize the BaseSearchEngine with the LLM and max_filtered_results
|
55
|
+
super().__init__(llm=llm, max_filtered_results=max_filtered_results)
|
56
|
+
|
57
|
+
# Get instance URL from various sources in priority order:
|
58
|
+
# 1. api_key parameter (which is actually the instance URL)
|
59
|
+
# 2. SEARXNG_INSTANCE environment variable
|
60
|
+
# 3. instance_url parameter
|
61
|
+
# 4. Default to None, which will disable the engine
|
62
|
+
self.instance_url = api_key or os.getenv("SEARXNG_INSTANCE") or instance_url
|
63
|
+
|
64
|
+
# Add debug logging for instance URL
|
65
|
+
logger.info(f"SearXNG init - Instance URL sources: api_key={api_key}, env={os.getenv('SEARXNG_INSTANCE')}, param={instance_url}")
|
66
|
+
|
67
|
+
# Validate and normalize the instance URL if provided
|
68
|
+
if self.instance_url:
|
69
|
+
self.instance_url = self.instance_url.rstrip('/')
|
70
|
+
self.is_available = True
|
71
|
+
logger.info(f"SearXNG initialized with instance URL: {self.instance_url}")
|
72
|
+
else:
|
73
|
+
self.is_available = False
|
74
|
+
logger.error("No SearXNG instance URL provided. The engine is disabled. "
|
75
|
+
"Set SEARXNG_INSTANCE environment variable or provide instance_url parameter.")
|
76
|
+
|
77
|
+
# Add debug logging for all parameters
|
78
|
+
logger.info(f"SearXNG init params: max_results={max_results}, language={language}, "
|
79
|
+
f"max_filtered_results={max_filtered_results}, is_available={self.is_available}")
|
80
|
+
|
81
|
+
self.max_results = max_results
|
82
|
+
self.categories = categories or ["general"]
|
83
|
+
self.engines = engines
|
84
|
+
self.language = language
|
85
|
+
self.safe_search = safe_search
|
86
|
+
self.time_range = time_range
|
87
|
+
|
88
|
+
self.delay_between_requests = float(os.getenv("SEARXNG_DELAY", delay_between_requests))
|
89
|
+
|
90
|
+
self.include_full_content = include_full_content
|
91
|
+
|
92
|
+
if self.is_available:
|
93
|
+
self.search_url = f"{self.instance_url}/search"
|
94
|
+
logger.info(f"SearXNG engine initialized with instance: {self.instance_url}")
|
95
|
+
logger.info(f"Rate limiting set to {self.delay_between_requests} seconds between requests")
|
96
|
+
|
97
|
+
self.full_search = FullSearchResults(
|
98
|
+
llm=llm,
|
99
|
+
web_search=self,
|
100
|
+
language=language,
|
101
|
+
max_results=max_results,
|
102
|
+
region="wt-wt",
|
103
|
+
time="y",
|
104
|
+
safesearch="Moderate" if safe_search == 1 else "Off" if safe_search == 0 else "Strict"
|
105
|
+
)
|
106
|
+
|
107
|
+
self.last_request_time = 0
|
108
|
+
|
109
|
+
def _respect_rate_limit(self):
|
110
|
+
"""Apply self-imposed rate limiting between requests"""
|
111
|
+
current_time = time.time()
|
112
|
+
time_since_last_request = current_time - self.last_request_time
|
113
|
+
|
114
|
+
|
115
|
+
if time_since_last_request < self.delay_between_requests:
|
116
|
+
wait_time = self.delay_between_requests - time_since_last_request
|
117
|
+
logger.info(f"Rate limiting: waiting {wait_time:.2f} seconds")
|
118
|
+
time.sleep(wait_time)
|
119
|
+
|
120
|
+
self.last_request_time = time.time()
|
121
|
+
|
122
|
+
def _get_search_results(self, query: str) -> List[Dict[str, Any]]:
|
123
|
+
"""
|
124
|
+
Get search results from SearXNG with ethical rate limiting.
|
125
|
+
|
126
|
+
Args:
|
127
|
+
query: The search query
|
128
|
+
|
129
|
+
Returns:
|
130
|
+
List of search results from SearXNG
|
131
|
+
"""
|
132
|
+
if not self.is_available:
|
133
|
+
logger.error("SearXNG engine is disabled (no instance URL provided) - cannot run search")
|
134
|
+
return []
|
135
|
+
|
136
|
+
logger.info(f"SearXNG running search for query: {query}")
|
137
|
+
|
138
|
+
try:
|
139
|
+
self._respect_rate_limit()
|
140
|
+
|
141
|
+
initial_headers = {
|
142
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
143
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
144
|
+
"Accept-Language": "en-US,en;q=0.9"
|
145
|
+
}
|
146
|
+
|
147
|
+
try:
|
148
|
+
initial_response = requests.get(self.instance_url, headers=initial_headers, timeout=10)
|
149
|
+
cookies = initial_response.cookies
|
150
|
+
except Exception as e:
|
151
|
+
logger.warning(f"Failed to get initial cookies: {e}")
|
152
|
+
cookies = None
|
153
|
+
|
154
|
+
params = {
|
155
|
+
"q": query,
|
156
|
+
"categories": ",".join(self.categories),
|
157
|
+
"language": self.language,
|
158
|
+
"format": "html", # Use HTML format instead of JSON
|
159
|
+
"pageno": 1,
|
160
|
+
"safesearch": self.safe_search,
|
161
|
+
"count": self.max_results
|
162
|
+
}
|
163
|
+
|
164
|
+
if self.engines:
|
165
|
+
params["engines"] = ",".join(self.engines)
|
166
|
+
|
167
|
+
if self.time_range:
|
168
|
+
params["time_range"] = self.time_range
|
169
|
+
|
170
|
+
# Browser-like headers
|
171
|
+
headers = {
|
172
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
173
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
174
|
+
"Accept-Language": "en-US,en;q=0.9",
|
175
|
+
"Referer": self.instance_url + "/",
|
176
|
+
"Connection": "keep-alive",
|
177
|
+
"Upgrade-Insecure-Requests": "1"
|
178
|
+
}
|
179
|
+
|
180
|
+
logger.info(f"Sending request to SearXNG instance at {self.instance_url}")
|
181
|
+
response = requests.get(
|
182
|
+
self.search_url,
|
183
|
+
params=params,
|
184
|
+
headers=headers,
|
185
|
+
cookies=cookies,
|
186
|
+
timeout=15
|
187
|
+
)
|
188
|
+
|
189
|
+
if response.status_code == 200:
|
190
|
+
try:
|
191
|
+
from bs4 import BeautifulSoup
|
192
|
+
|
193
|
+
soup = BeautifulSoup(response.text, 'html.parser')
|
194
|
+
results = []
|
195
|
+
|
196
|
+
result_elements = soup.select('.result-item')
|
197
|
+
|
198
|
+
if not result_elements:
|
199
|
+
result_elements = soup.select('.result')
|
200
|
+
|
201
|
+
if not result_elements:
|
202
|
+
result_elements = soup.select('article')
|
203
|
+
|
204
|
+
if not result_elements:
|
205
|
+
logger.debug(f"Classes found in HTML: {[c['class'] for c in soup.select('[class]') if 'class' in c.attrs][:10]}")
|
206
|
+
result_elements = soup.select('div[id^="result"]')
|
207
|
+
|
208
|
+
logger.info(f"Found {len(result_elements)} search result elements")
|
209
|
+
|
210
|
+
for idx, result_element in enumerate(result_elements):
|
211
|
+
if idx >= self.max_results:
|
212
|
+
break
|
213
|
+
|
214
|
+
title_element = (
|
215
|
+
result_element.select_one('.result-title') or
|
216
|
+
result_element.select_one('.title') or
|
217
|
+
result_element.select_one('h3') or
|
218
|
+
result_element.select_one('a[href]')
|
219
|
+
)
|
220
|
+
|
221
|
+
url_element = (
|
222
|
+
result_element.select_one('.result-url') or
|
223
|
+
result_element.select_one('.url') or
|
224
|
+
result_element.select_one('a[href]')
|
225
|
+
)
|
226
|
+
|
227
|
+
content_element = (
|
228
|
+
result_element.select_one('.result-content') or
|
229
|
+
result_element.select_one('.content') or
|
230
|
+
result_element.select_one('.snippet') or
|
231
|
+
result_element.select_one('p')
|
232
|
+
)
|
233
|
+
|
234
|
+
title = title_element.get_text(strip=True) if title_element else ""
|
235
|
+
|
236
|
+
url = ""
|
237
|
+
if url_element and url_element.has_attr('href'):
|
238
|
+
url = url_element['href']
|
239
|
+
elif url_element:
|
240
|
+
url = url_element.get_text(strip=True)
|
241
|
+
|
242
|
+
content = content_element.get_text(strip=True) if content_element else ""
|
243
|
+
|
244
|
+
if not url and title_element and title_element.has_attr('href'):
|
245
|
+
url = title_element['href']
|
246
|
+
|
247
|
+
logger.debug(f"Extracted result {idx}: title={title[:30]}..., url={url[:30]}..., content={content[:30]}...")
|
248
|
+
|
249
|
+
# Add to results if we have at least a title or URL
|
250
|
+
if title or url:
|
251
|
+
results.append({
|
252
|
+
"title": title,
|
253
|
+
"url": url,
|
254
|
+
"content": content,
|
255
|
+
"engine": "searxng",
|
256
|
+
"category": "general"
|
257
|
+
})
|
258
|
+
|
259
|
+
logger.info(f"SearXNG returned {len(results)} results from HTML parsing")
|
260
|
+
return results
|
261
|
+
|
262
|
+
except ImportError:
|
263
|
+
logger.error("BeautifulSoup not available for HTML parsing")
|
264
|
+
return []
|
265
|
+
except Exception as e:
|
266
|
+
logger.error(f"Error parsing HTML results: {str(e)}")
|
267
|
+
return []
|
268
|
+
else:
|
269
|
+
logger.error(f"SearXNG returned status code {response.status_code}")
|
270
|
+
return []
|
271
|
+
|
272
|
+
except Exception as e:
|
273
|
+
logger.error(f"Error getting SearXNG results: {e}")
|
274
|
+
return []
|
275
|
+
|
276
|
+
def _get_previews(self, query: str) -> List[Dict[str, Any]]:
|
277
|
+
"""
|
278
|
+
Get preview information for SearXNG search results.
|
279
|
+
|
280
|
+
Args:
|
281
|
+
query: The search query
|
282
|
+
|
283
|
+
Returns:
|
284
|
+
List of preview dictionaries
|
285
|
+
"""
|
286
|
+
if not self.is_available:
|
287
|
+
logger.warning("SearXNG engine is disabled (no instance URL provided)")
|
288
|
+
return []
|
289
|
+
|
290
|
+
logger.info(f"Getting SearXNG previews for query: {query}")
|
291
|
+
|
292
|
+
results = self._get_search_results(query)
|
293
|
+
|
294
|
+
if not results:
|
295
|
+
logger.warning(f"No SearXNG results found for query: {query}")
|
296
|
+
return []
|
297
|
+
|
298
|
+
previews = []
|
299
|
+
for i, result in enumerate(results):
|
300
|
+
title = result.get("title", "")
|
301
|
+
url = result.get("url", "")
|
302
|
+
content = result.get("content", "")
|
303
|
+
|
304
|
+
preview = {
|
305
|
+
"id": url or f"searxng-result-{i}",
|
306
|
+
"title": title,
|
307
|
+
"link": url,
|
308
|
+
"snippet": content,
|
309
|
+
"engine": result.get("engine", ""),
|
310
|
+
"category": result.get("category", "")
|
311
|
+
}
|
312
|
+
|
313
|
+
previews.append(preview)
|
314
|
+
|
315
|
+
return previews
|
316
|
+
|
317
|
+
def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
318
|
+
"""
|
319
|
+
Get full content for the relevant search results.
|
320
|
+
|
321
|
+
Args:
|
322
|
+
relevant_items: List of relevant preview dictionaries
|
323
|
+
|
324
|
+
Returns:
|
325
|
+
List of result dictionaries with full content
|
326
|
+
"""
|
327
|
+
if not self.is_available:
|
328
|
+
return relevant_items
|
329
|
+
|
330
|
+
if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
|
331
|
+
logger.info("Snippet-only mode, skipping full content retrieval")
|
332
|
+
return relevant_items
|
333
|
+
|
334
|
+
logger.info("Retrieving full webpage content")
|
335
|
+
|
336
|
+
try:
|
337
|
+
results_with_content = self.full_search._get_full_content(relevant_items)
|
338
|
+
return results_with_content
|
339
|
+
|
340
|
+
except Exception as e:
|
341
|
+
logger.error(f"Error retrieving full content: {e}")
|
342
|
+
return relevant_items
|
343
|
+
|
344
|
+
def invoke(self, query: str) -> List[Dict[str, Any]]:
|
345
|
+
"""Compatibility method for LangChain tools"""
|
346
|
+
return self.run(query)
|
347
|
+
|
348
|
+
def results(self, query: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
|
349
|
+
"""
|
350
|
+
Get search results in a format compatible with other search engines.
|
351
|
+
|
352
|
+
Args:
|
353
|
+
query: The search query
|
354
|
+
max_results: Optional override for maximum results
|
355
|
+
|
356
|
+
Returns:
|
357
|
+
List of search result dictionaries
|
358
|
+
"""
|
359
|
+
if not self.is_available:
|
360
|
+
return []
|
361
|
+
|
362
|
+
original_max_results = self.max_results
|
363
|
+
|
364
|
+
try:
|
365
|
+
if max_results is not None:
|
366
|
+
self.max_results = max_results
|
367
|
+
|
368
|
+
results = self._get_search_results(query)
|
369
|
+
|
370
|
+
formatted_results = []
|
371
|
+
for result in results:
|
372
|
+
formatted_results.append({
|
373
|
+
"title": result.get("title", ""),
|
374
|
+
"link": result.get("url", ""),
|
375
|
+
"snippet": result.get("content", "")
|
376
|
+
})
|
377
|
+
|
378
|
+
return formatted_results
|
379
|
+
|
380
|
+
finally:
|
381
|
+
self.max_results = original_max_results
|
382
|
+
|
383
|
+
@staticmethod
|
384
|
+
def get_self_hosting_instructions() -> str:
|
385
|
+
"""
|
386
|
+
Get instructions for self-hosting a SearXNG instance.
|
387
|
+
|
388
|
+
Returns:
|
389
|
+
String with installation instructions
|
390
|
+
"""
|
391
|
+
return """
|
392
|
+
# SearXNG Self-Hosting Instructions
|
393
|
+
|
394
|
+
The most ethical way to use SearXNG is to host your own instance. Here's how:
|
395
|
+
|
396
|
+
## Using Docker (easiest method)
|
397
|
+
|
398
|
+
1. Install Docker if you don't have it already
|
399
|
+
2. Run these commands:
|
400
|
+
|
401
|
+
```bash
|
402
|
+
# Pull the SearXNG Docker image
|
403
|
+
docker pull searxng/searxng
|
404
|
+
|
405
|
+
# Run SearXNG (will be available at http://localhost:8080)
|
406
|
+
docker run -d -p 8080:8080 --name searxng searxng/searxng
|
407
|
+
```
|
408
|
+
|
409
|
+
## Using Docker Compose (recommended for production)
|
410
|
+
|
411
|
+
1. Create a file named `docker-compose.yml` with the following content:
|
412
|
+
|
413
|
+
```yaml
|
414
|
+
version: '3'
|
415
|
+
services:
|
416
|
+
searxng:
|
417
|
+
container_name: searxng
|
418
|
+
image: searxng/searxng
|
419
|
+
ports:
|
420
|
+
- "8080:8080"
|
421
|
+
volumes:
|
422
|
+
- ./searxng:/etc/searxng
|
423
|
+
environment:
|
424
|
+
- SEARXNG_BASE_URL=http://localhost:8080/
|
425
|
+
restart: unless-stopped
|
426
|
+
```
|
427
|
+
|
428
|
+
2. Run with Docker Compose:
|
429
|
+
|
430
|
+
```bash
|
431
|
+
docker-compose up -d
|
432
|
+
```
|
433
|
+
|
434
|
+
For more detailed instructions and configuration options, visit:
|
435
|
+
https://searxng.github.io/searxng/admin/installation.html
|
436
|
+
"""
|
437
|
+
|
438
|
+
def run(self, query: str) -> List[Dict[str, Any]]:
|
439
|
+
"""
|
440
|
+
Override BaseSearchEngine run method to add SearXNG-specific error handling.
|
441
|
+
"""
|
442
|
+
if not self.is_available:
|
443
|
+
logger.error("SearXNG run method called but engine is not available (missing instance URL)")
|
444
|
+
return []
|
445
|
+
|
446
|
+
logger.info(f"SearXNG run method called with query: {query}")
|
447
|
+
|
448
|
+
try:
|
449
|
+
# Call the parent class's run method
|
450
|
+
return super().run(query)
|
451
|
+
except Exception as e:
|
452
|
+
logger.error(f"Error in SearXNG run method: {str(e)}")
|
453
|
+
# Return empty results on error
|
454
|
+
return []
|
@@ -230,4 +230,23 @@ def get_search(search_tool: str, llm_instance,
|
|
230
230
|
params["time_period"] = time_period
|
231
231
|
|
232
232
|
# Create and return the search engine
|
233
|
-
|
233
|
+
logger.info(f"Creating search engine for tool: {search_tool} with params: {params.keys()}")
|
234
|
+
engine = create_search_engine(search_tool, **params)
|
235
|
+
|
236
|
+
# Add debugging to check if engine is None
|
237
|
+
if engine is None:
|
238
|
+
logger.error(f"Failed to create search engine for {search_tool} - returned None")
|
239
|
+
else:
|
240
|
+
engine_type = type(engine).__name__
|
241
|
+
logger.info(f"Successfully created search engine of type: {engine_type}")
|
242
|
+
# Check if the engine has run method
|
243
|
+
if hasattr(engine, 'run'):
|
244
|
+
logger.info(f"Engine has 'run' method: {getattr(engine, 'run')}")
|
245
|
+
else:
|
246
|
+
logger.error(f"Engine does NOT have 'run' method!")
|
247
|
+
|
248
|
+
# For SearxNG, check availability flag
|
249
|
+
if hasattr(engine, 'is_available'):
|
250
|
+
logger.info(f"Engine availability flag: {engine.is_available}")
|
251
|
+
|
252
|
+
return engine
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: local-deep-research
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.1
|
4
4
|
Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
|
5
5
|
Author-email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -91,12 +91,13 @@ A powerful AI-powered research assistant that performs deep, iterative analysis
|
|
91
91
|
|
92
92
|
- 🌐 **Enhanced Search Integration**
|
93
93
|
- **Auto-selection of search sources**: The "auto" search engine intelligently analyzes your query and selects the most appropriate search engine based on the query content
|
94
|
+
- **SearXNG** integration for local web-search engine, great for privacy, no API key required (requires a searxng server)
|
94
95
|
- Wikipedia integration for factual knowledge
|
95
96
|
- arXiv integration for scientific papers and academic research
|
96
97
|
- PubMed integration for biomedical literature and medical research
|
97
98
|
- DuckDuckGo integration for web searches (may experience rate limiting)
|
98
99
|
- SerpAPI integration for Google search results (requires API key)
|
99
|
-
-
|
100
|
+
- Google Programmable Search Engine integration for custom search experiences (requires API key)
|
100
101
|
- The Guardian integration for news articles and journalism (requires API key)
|
101
102
|
- **Local RAG search for private documents** - search your own documents with vector embeddings
|
102
103
|
- Full webpage content retrieval
|
@@ -127,10 +128,10 @@ This example showcases the system's ability to perform multiple research iterati
|
|
127
128
|
|
128
129
|
1. Clone the repository:
|
129
130
|
```bash
|
130
|
-
git clone https://github.com/
|
131
|
+
git clone https://github.com/LearningCircuit/local-deep-research.git
|
131
132
|
cd local-deep-research
|
132
133
|
```
|
133
|
-
|
134
|
+
(experimental pip install with new features (but not so well tested yet): **pip install local-deep-research** )
|
134
135
|
2. Install dependencies:
|
135
136
|
```bash
|
136
137
|
pip install -r requirements.txt
|
@@ -147,6 +148,15 @@ ollama pull mistral # Default model - many work really well choose best for you
|
|
147
148
|
```bash
|
148
149
|
# Copy the template
|
149
150
|
cp .env.template .env
|
151
|
+
```
|
152
|
+
|
153
|
+
## Experimental install
|
154
|
+
```bash
|
155
|
+
#experimental pip install with new features (but not so well tested yet):
|
156
|
+
pip install local-deep-research
|
157
|
+
playwright install
|
158
|
+
ollama pull mistral
|
159
|
+
```
|
150
160
|
|
151
161
|
# Edit .env with your API keys (if using cloud LLMs)
|
152
162
|
ANTHROPIC_API_KEY=your-api-key-here # For Claude
|
@@ -276,6 +286,7 @@ You can use local search in several ways:
|
|
276
286
|
The system supports multiple search engines that can be selected by changing the `search_tool` variable in `config.py`:
|
277
287
|
|
278
288
|
- **Auto** (`auto`): Intelligent search engine selector that analyzes your query and chooses the most appropriate source (Wikipedia, arXiv, local collections, etc.)
|
289
|
+
- **SearXNG** (`searxng`): Local web-search engine, great for privacy, no API key required (requires a searxng server)
|
279
290
|
- **Wikipedia** (`wiki`): Best for general knowledge, facts, and overview information
|
280
291
|
- **arXiv** (`arxiv`): Great for scientific and academic research, accessing preprints and papers
|
281
292
|
- **PubMed** (`pubmed`): Excellent for biomedical literature, medical research, and health information
|
@@ -307,6 +318,7 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
|
|
307
318
|
- [DuckDuckGo](https://duckduckgo.com) for web search
|
308
319
|
- [The Guardian](https://www.theguardian.com/) for quality journalism
|
309
320
|
- [SerpAPI](https://serpapi.com) for Google search results (requires API key)
|
321
|
+
- [SearXNG](https://searxng.org/) for local web-search engine
|
310
322
|
- Built on [LangChain](https://github.com/hwchase17/langchain) framework
|
311
323
|
- Uses [justext](https://github.com/miso-belica/justext) for content extraction
|
312
324
|
- [Playwright](https://playwright.dev) for web content retrieval
|
@@ -4,11 +4,11 @@ local_deep_research/config.py,sha256=oFSbtWpv0lrekuBZz3BYylezIZB-x0a8yjAYd3HZLWU
|
|
4
4
|
local_deep_research/local_collections.py,sha256=SB-fdptT7qS0klJUVx_Rs9OgDwafMUgI46984WlZGKI,6076
|
5
5
|
local_deep_research/main.py,sha256=uQXtGQ6LtZNd5Qw63D5ke4Q_LjYimouWVSUknVsk3JQ,3645
|
6
6
|
local_deep_research/report_generator.py,sha256=UOiSw_vPHgtUpI8L9_UaOlpBVBloPB-ilhAo-1d2B9M,8200
|
7
|
-
local_deep_research/search_system.py,sha256=
|
7
|
+
local_deep_research/search_system.py,sha256=PqGd3gUv2nuhtUo59I1jR2d5ksvUs_NaqDZS2t8djDw,15096
|
8
8
|
local_deep_research/defaults/__init__.py,sha256=2Vvlkl-gmP_qPYWegE4JBgummypogl3VXrQ1XzptFDU,1381
|
9
9
|
local_deep_research/defaults/llm_config.py,sha256=88IGWPPvikSKmAqfqsGovBx2Jac5eh2sBY_LIW624Ik,7910
|
10
10
|
local_deep_research/defaults/local_collections.toml,sha256=_edVWVHrhunMfazjejhJlGPRkHKKIP51qQtNkMgNEiA,1406
|
11
|
-
local_deep_research/defaults/main.toml,sha256=
|
11
|
+
local_deep_research/defaults/main.toml,sha256=l_J9JAPhKEp63IsLBO0hQDVimxogEpnrEVnNjiOeUxg,1403
|
12
12
|
local_deep_research/defaults/search_engines.toml,sha256=lK2lm9qgQkY308B3RVEa5UsksFKuZN-Sz7ES7w7YuLg,7770
|
13
13
|
local_deep_research/utilties/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
14
|
local_deep_research/utilties/enums.py,sha256=QQxov4i1VED_eZnDlOz4Oro4rLgP4pfpUal8ksUL2uE,196
|
@@ -16,12 +16,12 @@ local_deep_research/utilties/llm_utils.py,sha256=IGv-_gJWqLTpO3_op1NHIwxKaFEzmXh
|
|
16
16
|
local_deep_research/utilties/search_utilities.py,sha256=C8ycFd7blcq5vtnd6GxP8dkepZT6EEqHFtT3WYxF0Ck,4151
|
17
17
|
local_deep_research/utilties/setup_utils.py,sha256=t6GNp7lK1nLPdPNCkYUk82IATGM62vqy8UBD-KqShOs,215
|
18
18
|
local_deep_research/web/__init__.py,sha256=3oHMatNu8r24FBtpojriIVbHYOVSHj4Q-quycMKOuDk,62
|
19
|
-
local_deep_research/web/app.py,sha256=
|
20
|
-
local_deep_research/web/static/css/styles.css,sha256=
|
21
|
-
local_deep_research/web/static/js/app.js,sha256=
|
19
|
+
local_deep_research/web/app.py,sha256=PMpuqhRUMBjDeSuWqaMMANaSde71UQzi-XAWSWL8wkI,61097
|
20
|
+
local_deep_research/web/static/css/styles.css,sha256=Dn_mKTvDhmNlcU5cVd0iRWJZHYDId6_jRBL5sbYHztY,18325
|
21
|
+
local_deep_research/web/static/js/app.js,sha256=H0yYc36BpVVkty_WhHPMEcTS4neqedfcZOnKvkHUcqE,117305
|
22
22
|
local_deep_research/web/templates/api_keys_config.html,sha256=jA8Y-nfUGJ1dTvbw2jK_8xPy2x6UG_5gHpbrTJAex2g,3527
|
23
23
|
local_deep_research/web/templates/collections_config.html,sha256=Dci7KumXBON8rAXRX8TVjgqS-bbht7d6aQiedDUnxQ0,3560
|
24
|
-
local_deep_research/web/templates/index.html,sha256=
|
24
|
+
local_deep_research/web/templates/index.html,sha256=m3dPkmiAdhYHxBMieW2wGwg5Z5msFWK3pghktRxc-d0,15681
|
25
25
|
local_deep_research/web/templates/llm_config.html,sha256=23BqM2bFbb_S3xeXHhAnsw_BWIhjQto430QWL67mfaU,4169
|
26
26
|
local_deep_research/web/templates/main_config.html,sha256=xMIgqZzrMxLzeKavsDX3XY34K4aBJYB-lHeaNcE6AZc,3266
|
27
27
|
local_deep_research/web/templates/search_engines_config.html,sha256=z_krznfdhF3erWy-qsAK8mUaut9x5zZ0MrU3AciYjQc,7008
|
@@ -30,7 +30,7 @@ local_deep_research/web/templates/settings_dashboard.html,sha256=De-v1KNdVvkXme5
|
|
30
30
|
local_deep_research/web_search_engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
31
31
|
local_deep_research/web_search_engines/full_search.py,sha256=3SSTvD12g4pNlZCSGh8jwsyYWpQglgqjADnq8dG1zyI,9756
|
32
32
|
local_deep_research/web_search_engines/search_engine_base.py,sha256=w8CJdtkcp_Ba-R3mm0RLPVap1Fu1xcfIEmge1IUUhTI,8417
|
33
|
-
local_deep_research/web_search_engines/search_engine_factory.py,sha256=
|
33
|
+
local_deep_research/web_search_engines/search_engine_factory.py,sha256=B_QaqoAwnVXCmHNdqGbo94LekWY6wpBw_PWNkI120qE,10728
|
34
34
|
local_deep_research/web_search_engines/search_engines_config.py,sha256=bNCuR09NOk5cjnKIgDQfhPipqmvDKeE7WP_6p8LLZf0,1979
|
35
35
|
local_deep_research/web_search_engines/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
36
36
|
local_deep_research/web_search_engines/engines/full_search.py,sha256=mcxS8o7-WmOQc3_H4232adhBHevZfSHWmaOFoia68UU,4711
|
@@ -45,12 +45,13 @@ local_deep_research/web_search_engines/engines/search_engine_local.py,sha256=uAs
|
|
45
45
|
local_deep_research/web_search_engines/engines/search_engine_local_all.py,sha256=nmVAUb1VlzWSQLWtfiN1Yiwo3ehOu0nQAIV1N6P8_5c,5924
|
46
46
|
local_deep_research/web_search_engines/engines/search_engine_medrxiv.py,sha256=rkWqhgEUxUoHMWaJeA1JTYKyzsTguQejg6qEeDOG_lo,24009
|
47
47
|
local_deep_research/web_search_engines/engines/search_engine_pubmed.py,sha256=AnYcSsy1Q1puG_ZcS6t0mLu-ZgKfdqkOE5-FtLlrYcc,39199
|
48
|
+
local_deep_research/web_search_engines/engines/search_engine_searxng.py,sha256=9qR2_DJ-K7dibR4dzoDd3ImfT8C5r3lrXzr8fzVuYPs,18012
|
48
49
|
local_deep_research/web_search_engines/engines/search_engine_serpapi.py,sha256=0zFskxH5MDDzTpwSXjdM25J8vcJrI7xtjnQDpodRZb0,9131
|
49
50
|
local_deep_research/web_search_engines/engines/search_engine_wayback.py,sha256=BJ6MBuryJ88VVT_YtNx04ruagCx95rE9W7Af0DboAbI,18145
|
50
51
|
local_deep_research/web_search_engines/engines/search_engine_wikipedia.py,sha256=LSLOA0AuNy_Yb1-qeAEiwx_fFr2qK8FQG4pY5j-VycY,9812
|
51
|
-
local_deep_research-0.1.
|
52
|
-
local_deep_research-0.1.
|
53
|
-
local_deep_research-0.1.
|
54
|
-
local_deep_research-0.1.
|
55
|
-
local_deep_research-0.1.
|
56
|
-
local_deep_research-0.1.
|
52
|
+
local_deep_research-0.1.1.dist-info/LICENSE,sha256=Qg2CaTdu6SWnSqk1_JtgBPp_Da-LdqJDhT1Vt1MUc5s,1072
|
53
|
+
local_deep_research-0.1.1.dist-info/METADATA,sha256=8A1xNEGb7tvHGnLLx9cH6d40A3PzsFQv5Gb_dI7WgCY,14894
|
54
|
+
local_deep_research-0.1.1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
55
|
+
local_deep_research-0.1.1.dist-info/entry_points.txt,sha256=u-Y6Z3MWtR3dmsTDFYhXyfkPv7mALUA7YAnY4Fi1XDs,97
|
56
|
+
local_deep_research-0.1.1.dist-info/top_level.txt,sha256=h6-uVE_wSuLOcoWwT9szhX23mBWufu77MqmM25UfbCY,20
|
57
|
+
local_deep_research-0.1.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
{local_deep_research-0.1.0.dist-info → local_deep_research-0.1.1.dist-info}/entry_points.txt
RENAMED
File without changes
|
File without changes
|