local-deep-research 0.1.0__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,6 @@
8
8
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css">
9
9
  <!-- Change to CDN version that works in browsers -->
10
10
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/styles/github-dark.min.css">
11
- <link rel="icon" type="image/png" href="{{ url_for('static', filename='favicon.ico') }}">
12
11
  </head>
13
12
  <body>
14
13
  <div class="app-container">
@@ -119,6 +118,9 @@
119
118
  <i class="fas fa-stop-circle"></i> Terminate Research
120
119
  </button>
121
120
  <div id="error-message" class="error-message" style="display: none;"></div>
121
+ <button id="try-again-btn" class="btn btn-primary" style="display: none; margin-top: 15px;">
122
+ <i class="fas fa-redo"></i> Try Again
123
+ </button>
122
124
  </div>
123
125
  </div>
124
126
  </div>
@@ -214,6 +216,31 @@
214
216
  </div>
215
217
  </div>
216
218
  </div>
219
+
220
+ <!-- Collapsible Log Panel -->
221
+ <div class="collapsible-log-panel">
222
+ <div class="log-panel-header" id="log-panel-toggle">
223
+ <i class="fas fa-chevron-down toggle-icon"></i>
224
+ <span>Research Logs</span>
225
+ <span class="log-indicator" id="log-indicator">0</span>
226
+ </div>
227
+ <div class="log-panel-content" id="log-panel-content">
228
+ <div class="log-controls">
229
+ <div class="log-filter">
230
+ <div class="filter-buttons">
231
+ <button class="small-btn selected" onclick="window.filterLogsByType('all')">All</button>
232
+ <button class="small-btn" onclick="window.filterLogsByType('milestone')">Milestones</button>
233
+ <button class="small-btn" onclick="window.filterLogsByType('info')">Info</button>
234
+ <button class="small-btn" onclick="window.filterLogsByType('error')">Errors</button>
235
+ </div>
236
+ </div>
237
+ </div>
238
+ <div class="console-log" id="console-log-container">
239
+ <!-- Logs will be added here dynamically -->
240
+ <div class="empty-log-message">No logs yet. Research logs will appear here as they occur.</div>
241
+ </div>
242
+ </div>
243
+ </div>
217
244
  </main>
218
245
  </div>
219
246
 
@@ -308,5 +335,14 @@
308
335
  window.html2canvas_noSandbox = true;
309
336
  }
310
337
  </script>
338
+
339
+ <!-- Add a template for console log entries -->
340
+ <template id="console-log-entry-template">
341
+ <div class="console-log-entry">
342
+ <span class="log-timestamp"></span>
343
+ <span class="log-badge"></span>
344
+ <span class="log-message"></span>
345
+ </div>
346
+ </template>
311
347
  </body>
312
348
  </html>
@@ -0,0 +1,454 @@
1
+ import requests
2
+ import logging
3
+ import os
4
+ from typing import Dict, List, Any, Optional
5
+ from langchain_core.language_models import BaseLLM
6
+ import time
7
+ import json
8
+
9
+ from web_search_engines.search_engine_base import BaseSearchEngine
10
+ from web_search_engines.engines.full_search import FullSearchResults
11
+ import config
12
+
13
+ # Setup logging
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class SearXNGSearchEngine(BaseSearchEngine):
18
+ """
19
+ SearXNG search engine implementation that requires an instance URL provided via
20
+ environment variable or configuration. Designed for ethical usage with proper
21
+ rate limiting and single-instance approach.
22
+ """
23
+
24
+ def __init__(self,
25
+ max_results: int = 15,
26
+ instance_url: Optional[str] = None, # Can be None if using env var
27
+ categories: Optional[List[str]] = None,
28
+ engines: Optional[List[str]] = None,
29
+ language: str = "en",
30
+ safe_search: int = 1,
31
+ time_range: Optional[str] = None,
32
+ delay_between_requests: float = 2.0,
33
+ llm: Optional[BaseLLM] = None,
34
+ max_filtered_results: Optional[int] = None,
35
+ include_full_content: bool = True,
36
+ api_key: Optional[str] = None): # API key is actually the instance URL
37
+ """
38
+ Initialize the SearXNG search engine with ethical usage patterns.
39
+
40
+ Args:
41
+ max_results: Maximum number of search results
42
+ instance_url: URL of your SearXNG instance (preferably self-hosted)
43
+ categories: List of SearXNG categories to search in (general, images, videos, news, etc.)
44
+ engines: List of engines to use (google, bing, duckduckgo, etc.)
45
+ language: Language code for search results
46
+ safe_search: Safe search level (0=off, 1=moderate, 2=strict)
47
+ time_range: Time range for results (day, week, month, year)
48
+ delay_between_requests: Seconds to wait between requests
49
+ llm: Language model for relevance filtering
50
+ max_filtered_results: Maximum number of results to keep after filtering
51
+ include_full_content: Whether to include full webpage content in results
52
+ api_key: Alternative way to provide instance URL (takes precedence over instance_url)
53
+ """
54
+ # Initialize the BaseSearchEngine with the LLM and max_filtered_results
55
+ super().__init__(llm=llm, max_filtered_results=max_filtered_results)
56
+
57
+ # Get instance URL from various sources in priority order:
58
+ # 1. api_key parameter (which is actually the instance URL)
59
+ # 2. SEARXNG_INSTANCE environment variable
60
+ # 3. instance_url parameter
61
+ # 4. Default to None, which will disable the engine
62
+ self.instance_url = api_key or os.getenv("SEARXNG_INSTANCE") or instance_url
63
+
64
+ # Add debug logging for instance URL
65
+ logger.info(f"SearXNG init - Instance URL sources: api_key={api_key}, env={os.getenv('SEARXNG_INSTANCE')}, param={instance_url}")
66
+
67
+ # Validate and normalize the instance URL if provided
68
+ if self.instance_url:
69
+ self.instance_url = self.instance_url.rstrip('/')
70
+ self.is_available = True
71
+ logger.info(f"SearXNG initialized with instance URL: {self.instance_url}")
72
+ else:
73
+ self.is_available = False
74
+ logger.error("No SearXNG instance URL provided. The engine is disabled. "
75
+ "Set SEARXNG_INSTANCE environment variable or provide instance_url parameter.")
76
+
77
+ # Add debug logging for all parameters
78
+ logger.info(f"SearXNG init params: max_results={max_results}, language={language}, "
79
+ f"max_filtered_results={max_filtered_results}, is_available={self.is_available}")
80
+
81
+ self.max_results = max_results
82
+ self.categories = categories or ["general"]
83
+ self.engines = engines
84
+ self.language = language
85
+ self.safe_search = safe_search
86
+ self.time_range = time_range
87
+
88
+ self.delay_between_requests = float(os.getenv("SEARXNG_DELAY", delay_between_requests))
89
+
90
+ self.include_full_content = include_full_content
91
+
92
+ if self.is_available:
93
+ self.search_url = f"{self.instance_url}/search"
94
+ logger.info(f"SearXNG engine initialized with instance: {self.instance_url}")
95
+ logger.info(f"Rate limiting set to {self.delay_between_requests} seconds between requests")
96
+
97
+ self.full_search = FullSearchResults(
98
+ llm=llm,
99
+ web_search=self,
100
+ language=language,
101
+ max_results=max_results,
102
+ region="wt-wt",
103
+ time="y",
104
+ safesearch="Moderate" if safe_search == 1 else "Off" if safe_search == 0 else "Strict"
105
+ )
106
+
107
+ self.last_request_time = 0
108
+
109
+ def _respect_rate_limit(self):
110
+ """Apply self-imposed rate limiting between requests"""
111
+ current_time = time.time()
112
+ time_since_last_request = current_time - self.last_request_time
113
+
114
+
115
+ if time_since_last_request < self.delay_between_requests:
116
+ wait_time = self.delay_between_requests - time_since_last_request
117
+ logger.info(f"Rate limiting: waiting {wait_time:.2f} seconds")
118
+ time.sleep(wait_time)
119
+
120
+ self.last_request_time = time.time()
121
+
122
+ def _get_search_results(self, query: str) -> List[Dict[str, Any]]:
123
+ """
124
+ Get search results from SearXNG with ethical rate limiting.
125
+
126
+ Args:
127
+ query: The search query
128
+
129
+ Returns:
130
+ List of search results from SearXNG
131
+ """
132
+ if not self.is_available:
133
+ logger.error("SearXNG engine is disabled (no instance URL provided) - cannot run search")
134
+ return []
135
+
136
+ logger.info(f"SearXNG running search for query: {query}")
137
+
138
+ try:
139
+ self._respect_rate_limit()
140
+
141
+ initial_headers = {
142
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
143
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
144
+ "Accept-Language": "en-US,en;q=0.9"
145
+ }
146
+
147
+ try:
148
+ initial_response = requests.get(self.instance_url, headers=initial_headers, timeout=10)
149
+ cookies = initial_response.cookies
150
+ except Exception as e:
151
+ logger.warning(f"Failed to get initial cookies: {e}")
152
+ cookies = None
153
+
154
+ params = {
155
+ "q": query,
156
+ "categories": ",".join(self.categories),
157
+ "language": self.language,
158
+ "format": "html", # Use HTML format instead of JSON
159
+ "pageno": 1,
160
+ "safesearch": self.safe_search,
161
+ "count": self.max_results
162
+ }
163
+
164
+ if self.engines:
165
+ params["engines"] = ",".join(self.engines)
166
+
167
+ if self.time_range:
168
+ params["time_range"] = self.time_range
169
+
170
+ # Browser-like headers
171
+ headers = {
172
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
173
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
174
+ "Accept-Language": "en-US,en;q=0.9",
175
+ "Referer": self.instance_url + "/",
176
+ "Connection": "keep-alive",
177
+ "Upgrade-Insecure-Requests": "1"
178
+ }
179
+
180
+ logger.info(f"Sending request to SearXNG instance at {self.instance_url}")
181
+ response = requests.get(
182
+ self.search_url,
183
+ params=params,
184
+ headers=headers,
185
+ cookies=cookies,
186
+ timeout=15
187
+ )
188
+
189
+ if response.status_code == 200:
190
+ try:
191
+ from bs4 import BeautifulSoup
192
+
193
+ soup = BeautifulSoup(response.text, 'html.parser')
194
+ results = []
195
+
196
+ result_elements = soup.select('.result-item')
197
+
198
+ if not result_elements:
199
+ result_elements = soup.select('.result')
200
+
201
+ if not result_elements:
202
+ result_elements = soup.select('article')
203
+
204
+ if not result_elements:
205
+ logger.debug(f"Classes found in HTML: {[c['class'] for c in soup.select('[class]') if 'class' in c.attrs][:10]}")
206
+ result_elements = soup.select('div[id^="result"]')
207
+
208
+ logger.info(f"Found {len(result_elements)} search result elements")
209
+
210
+ for idx, result_element in enumerate(result_elements):
211
+ if idx >= self.max_results:
212
+ break
213
+
214
+ title_element = (
215
+ result_element.select_one('.result-title') or
216
+ result_element.select_one('.title') or
217
+ result_element.select_one('h3') or
218
+ result_element.select_one('a[href]')
219
+ )
220
+
221
+ url_element = (
222
+ result_element.select_one('.result-url') or
223
+ result_element.select_one('.url') or
224
+ result_element.select_one('a[href]')
225
+ )
226
+
227
+ content_element = (
228
+ result_element.select_one('.result-content') or
229
+ result_element.select_one('.content') or
230
+ result_element.select_one('.snippet') or
231
+ result_element.select_one('p')
232
+ )
233
+
234
+ title = title_element.get_text(strip=True) if title_element else ""
235
+
236
+ url = ""
237
+ if url_element and url_element.has_attr('href'):
238
+ url = url_element['href']
239
+ elif url_element:
240
+ url = url_element.get_text(strip=True)
241
+
242
+ content = content_element.get_text(strip=True) if content_element else ""
243
+
244
+ if not url and title_element and title_element.has_attr('href'):
245
+ url = title_element['href']
246
+
247
+ logger.debug(f"Extracted result {idx}: title={title[:30]}..., url={url[:30]}..., content={content[:30]}...")
248
+
249
+ # Add to results if we have at least a title or URL
250
+ if title or url:
251
+ results.append({
252
+ "title": title,
253
+ "url": url,
254
+ "content": content,
255
+ "engine": "searxng",
256
+ "category": "general"
257
+ })
258
+
259
+ logger.info(f"SearXNG returned {len(results)} results from HTML parsing")
260
+ return results
261
+
262
+ except ImportError:
263
+ logger.error("BeautifulSoup not available for HTML parsing")
264
+ return []
265
+ except Exception as e:
266
+ logger.error(f"Error parsing HTML results: {str(e)}")
267
+ return []
268
+ else:
269
+ logger.error(f"SearXNG returned status code {response.status_code}")
270
+ return []
271
+
272
+ except Exception as e:
273
+ logger.error(f"Error getting SearXNG results: {e}")
274
+ return []
275
+
276
+ def _get_previews(self, query: str) -> List[Dict[str, Any]]:
277
+ """
278
+ Get preview information for SearXNG search results.
279
+
280
+ Args:
281
+ query: The search query
282
+
283
+ Returns:
284
+ List of preview dictionaries
285
+ """
286
+ if not self.is_available:
287
+ logger.warning("SearXNG engine is disabled (no instance URL provided)")
288
+ return []
289
+
290
+ logger.info(f"Getting SearXNG previews for query: {query}")
291
+
292
+ results = self._get_search_results(query)
293
+
294
+ if not results:
295
+ logger.warning(f"No SearXNG results found for query: {query}")
296
+ return []
297
+
298
+ previews = []
299
+ for i, result in enumerate(results):
300
+ title = result.get("title", "")
301
+ url = result.get("url", "")
302
+ content = result.get("content", "")
303
+
304
+ preview = {
305
+ "id": url or f"searxng-result-{i}",
306
+ "title": title,
307
+ "link": url,
308
+ "snippet": content,
309
+ "engine": result.get("engine", ""),
310
+ "category": result.get("category", "")
311
+ }
312
+
313
+ previews.append(preview)
314
+
315
+ return previews
316
+
317
+ def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
318
+ """
319
+ Get full content for the relevant search results.
320
+
321
+ Args:
322
+ relevant_items: List of relevant preview dictionaries
323
+
324
+ Returns:
325
+ List of result dictionaries with full content
326
+ """
327
+ if not self.is_available:
328
+ return relevant_items
329
+
330
+ if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
331
+ logger.info("Snippet-only mode, skipping full content retrieval")
332
+ return relevant_items
333
+
334
+ logger.info("Retrieving full webpage content")
335
+
336
+ try:
337
+ results_with_content = self.full_search._get_full_content(relevant_items)
338
+ return results_with_content
339
+
340
+ except Exception as e:
341
+ logger.error(f"Error retrieving full content: {e}")
342
+ return relevant_items
343
+
344
+ def invoke(self, query: str) -> List[Dict[str, Any]]:
345
+ """Compatibility method for LangChain tools"""
346
+ return self.run(query)
347
+
348
+ def results(self, query: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
349
+ """
350
+ Get search results in a format compatible with other search engines.
351
+
352
+ Args:
353
+ query: The search query
354
+ max_results: Optional override for maximum results
355
+
356
+ Returns:
357
+ List of search result dictionaries
358
+ """
359
+ if not self.is_available:
360
+ return []
361
+
362
+ original_max_results = self.max_results
363
+
364
+ try:
365
+ if max_results is not None:
366
+ self.max_results = max_results
367
+
368
+ results = self._get_search_results(query)
369
+
370
+ formatted_results = []
371
+ for result in results:
372
+ formatted_results.append({
373
+ "title": result.get("title", ""),
374
+ "link": result.get("url", ""),
375
+ "snippet": result.get("content", "")
376
+ })
377
+
378
+ return formatted_results
379
+
380
+ finally:
381
+ self.max_results = original_max_results
382
+
383
+ @staticmethod
384
+ def get_self_hosting_instructions() -> str:
385
+ """
386
+ Get instructions for self-hosting a SearXNG instance.
387
+
388
+ Returns:
389
+ String with installation instructions
390
+ """
391
+ return """
392
+ # SearXNG Self-Hosting Instructions
393
+
394
+ The most ethical way to use SearXNG is to host your own instance. Here's how:
395
+
396
+ ## Using Docker (easiest method)
397
+
398
+ 1. Install Docker if you don't have it already
399
+ 2. Run these commands:
400
+
401
+ ```bash
402
+ # Pull the SearXNG Docker image
403
+ docker pull searxng/searxng
404
+
405
+ # Run SearXNG (will be available at http://localhost:8080)
406
+ docker run -d -p 8080:8080 --name searxng searxng/searxng
407
+ ```
408
+
409
+ ## Using Docker Compose (recommended for production)
410
+
411
+ 1. Create a file named `docker-compose.yml` with the following content:
412
+
413
+ ```yaml
414
+ version: '3'
415
+ services:
416
+ searxng:
417
+ container_name: searxng
418
+ image: searxng/searxng
419
+ ports:
420
+ - "8080:8080"
421
+ volumes:
422
+ - ./searxng:/etc/searxng
423
+ environment:
424
+ - SEARXNG_BASE_URL=http://localhost:8080/
425
+ restart: unless-stopped
426
+ ```
427
+
428
+ 2. Run with Docker Compose:
429
+
430
+ ```bash
431
+ docker-compose up -d
432
+ ```
433
+
434
+ For more detailed instructions and configuration options, visit:
435
+ https://searxng.github.io/searxng/admin/installation.html
436
+ """
437
+
438
+ def run(self, query: str) -> List[Dict[str, Any]]:
439
+ """
440
+ Override BaseSearchEngine run method to add SearXNG-specific error handling.
441
+ """
442
+ if not self.is_available:
443
+ logger.error("SearXNG run method called but engine is not available (missing instance URL)")
444
+ return []
445
+
446
+ logger.info(f"SearXNG run method called with query: {query}")
447
+
448
+ try:
449
+ # Call the parent class's run method
450
+ return super().run(query)
451
+ except Exception as e:
452
+ logger.error(f"Error in SearXNG run method: {str(e)}")
453
+ # Return empty results on error
454
+ return []
@@ -230,4 +230,23 @@ def get_search(search_tool: str, llm_instance,
230
230
  params["time_period"] = time_period
231
231
 
232
232
  # Create and return the search engine
233
- return create_search_engine(search_tool, **params)
233
+ logger.info(f"Creating search engine for tool: {search_tool} with params: {params.keys()}")
234
+ engine = create_search_engine(search_tool, **params)
235
+
236
+ # Add debugging to check if engine is None
237
+ if engine is None:
238
+ logger.error(f"Failed to create search engine for {search_tool} - returned None")
239
+ else:
240
+ engine_type = type(engine).__name__
241
+ logger.info(f"Successfully created search engine of type: {engine_type}")
242
+ # Check if the engine has run method
243
+ if hasattr(engine, 'run'):
244
+ logger.info(f"Engine has 'run' method: {getattr(engine, 'run')}")
245
+ else:
246
+ logger.error(f"Engine does NOT have 'run' method!")
247
+
248
+ # For SearxNG, check availability flag
249
+ if hasattr(engine, 'is_available'):
250
+ logger.info(f"Engine availability flag: {engine.is_available}")
251
+
252
+ return engine
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: local-deep-research
3
- Version: 0.1.0
3
+ Version: 0.1.12
4
4
  Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
5
5
  Author-email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>
6
6
  License: MIT License
@@ -51,7 +51,7 @@ Requires-Dist: flask-socketio>=5.1.1
51
51
  Requires-Dist: sqlalchemy>=1.4.23
52
52
  Requires-Dist: wikipedia
53
53
  Requires-Dist: arxiv>=1.4.3
54
- Requires-Dist: PyPDF2>=2.0.0
54
+ Requires-Dist: pypdf
55
55
  Requires-Dist: sentence-transformers
56
56
  Requires-Dist: faiss-cpu
57
57
  Requires-Dist: pydantic>=2.0.0
@@ -59,6 +59,7 @@ Requires-Dist: pydantic-settings>=2.0.0
59
59
  Requires-Dist: toml>=0.10.2
60
60
  Requires-Dist: platformdirs>=3.0.0
61
61
  Requires-Dist: dynaconf
62
+ Dynamic: license-file
62
63
 
63
64
  # Local Deep Research
64
65
 
@@ -91,12 +92,13 @@ A powerful AI-powered research assistant that performs deep, iterative analysis
91
92
 
92
93
  - 🌐 **Enhanced Search Integration**
93
94
  - **Auto-selection of search sources**: The "auto" search engine intelligently analyzes your query and selects the most appropriate search engine based on the query content
95
+ - **SearXNG** integration for local web-search engine, great for privacy, no API key required (requires a searxng server)
94
96
  - Wikipedia integration for factual knowledge
95
97
  - arXiv integration for scientific papers and academic research
96
98
  - PubMed integration for biomedical literature and medical research
97
99
  - DuckDuckGo integration for web searches (may experience rate limiting)
98
100
  - SerpAPI integration for Google search results (requires API key)
99
- - **Google Programmable Search Engine** integration for custom search experiences (requires API key)
101
+ - Google Programmable Search Engine integration for custom search experiences (requires API key)
100
102
  - The Guardian integration for news articles and journalism (requires API key)
101
103
  - **Local RAG search for private documents** - search your own documents with vector embeddings
102
104
  - Full webpage content retrieval
@@ -127,10 +129,10 @@ This example showcases the system's ability to perform multiple research iterati
127
129
 
128
130
  1. Clone the repository:
129
131
  ```bash
130
- git clone https://github.com/yourusername/local-deep-research.git
132
+ git clone https://github.com/LearningCircuit/local-deep-research.git
131
133
  cd local-deep-research
132
134
  ```
133
-
135
+ (experimental pip install with new features (but not so well tested yet): **pip install local-deep-research** )
134
136
  2. Install dependencies:
135
137
  ```bash
136
138
  pip install -r requirements.txt
@@ -147,6 +149,20 @@ ollama pull mistral # Default model - many work really well choose best for you
147
149
  ```bash
148
150
  # Copy the template
149
151
  cp .env.template .env
152
+ ```
153
+
154
+ ## Experimental install
155
+ ```bash
156
+ #experimental pip install with new features (but not so well tested yet):
157
+ pip install local-deep-research
158
+ playwright install
159
+ ollama pull mistral
160
+ ```
161
+ ## Community & Support
162
+
163
+ We've just launched our [Discord server](https://discord.gg/2E6gYU2Z) for this project!
164
+
165
+ Our Discord server can help to exchange ideas about research approaches, discuss advanced usage patterns, and share other ideas.
150
166
 
151
167
  # Edit .env with your API keys (if using cloud LLMs)
152
168
  ANTHROPIC_API_KEY=your-api-key-here # For Claude
@@ -276,6 +292,7 @@ You can use local search in several ways:
276
292
  The system supports multiple search engines that can be selected by changing the `search_tool` variable in `config.py`:
277
293
 
278
294
  - **Auto** (`auto`): Intelligent search engine selector that analyzes your query and chooses the most appropriate source (Wikipedia, arXiv, local collections, etc.)
295
+ - **SearXNG** (`searxng`): Local web-search engine, great for privacy, no API key required (requires a searxng server)
279
296
  - **Wikipedia** (`wiki`): Best for general knowledge, facts, and overview information
280
297
  - **arXiv** (`arxiv`): Great for scientific and academic research, accessing preprints and papers
281
298
  - **PubMed** (`pubmed`): Excellent for biomedical literature, medical research, and health information
@@ -307,6 +324,7 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
307
324
  - [DuckDuckGo](https://duckduckgo.com) for web search
308
325
  - [The Guardian](https://www.theguardian.com/) for quality journalism
309
326
  - [SerpAPI](https://serpapi.com) for Google search results (requires API key)
327
+ - [SearXNG](https://searxng.org/) for local web-search engine
310
328
  - Built on [LangChain](https://github.com/hwchase17/langchain) framework
311
329
  - Uses [justext](https://github.com/miso-belica/justext) for content extraction
312
330
  - [Playwright](https://playwright.dev) for web content retrieval