local-deep-research 0.1.16__py3-none-any.whl → 0.1.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. local_deep_research/citation_handler.py +0 -2
  2. local_deep_research/config.py +1 -4
  3. local_deep_research/defaults/llm_config.py +2 -2
  4. local_deep_research/defaults/main.toml +3 -3
  5. local_deep_research/report_generator.py +1 -5
  6. local_deep_research/search_system.py +1 -1
  7. local_deep_research/utilties/search_utilities.py +3 -4
  8. local_deep_research/web_search_engines/engines/full_search.py +9 -8
  9. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +14 -14
  10. local_deep_research/web_search_engines/engines/search_engine_brave.py +10 -9
  11. local_deep_research/web_search_engines/engines/search_engine_ddg.py +4 -2
  12. local_deep_research/web_search_engines/engines/search_engine_local.py +1 -1
  13. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +9 -8
  14. local_deep_research/web_search_engines/search_engine_base.py +1 -1
  15. local_deep_research-0.1.17.dist-info/METADATA +393 -0
  16. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/RECORD +20 -22
  17. local_deep_research/local_collections.py +0 -141
  18. local_deep_research/web_search_engines/full_search.py +0 -254
  19. local_deep_research-0.1.16.dist-info/METADATA +0 -346
  20. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/WHEEL +0 -0
  21. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/entry_points.txt +0 -0
  22. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/licenses/LICENSE +0 -0
  23. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/top_level.txt +0 -0
@@ -1,254 +0,0 @@
1
- import justext
2
- from langchain_community.document_loaders import AsyncChromiumLoader
3
- from langchain_community.document_transformers import BeautifulSoupTransformer
4
- from langchain_core.language_models import BaseLLM
5
- from typing import List, Dict, Any, Optional, Union
6
- import json
7
- import os
8
- from .utilties.search_utilities import remove_think_tags
9
- from datetime import datetime
10
- from local_deep_research import config
11
-
12
- class FullSearchResults:
13
- """
14
- Enhanced web content retrieval class that works with the BaseSearchEngine architecture.
15
- Can be used as a wrapper around web-based search engines like DuckDuckGo and SerpAPI.
16
- """
17
-
18
- def __init__(
19
- self,
20
- llm: BaseLLM,
21
- web_search,
22
- output_format: str = "list",
23
- language: str = "English",
24
- max_results: int = 10,
25
- region: str = "wt-wt",
26
- time: str = "y",
27
- safesearch: str = "Moderate"
28
- ):
29
- """
30
- Initialize the full search results processor.
31
-
32
- Args:
33
- llm: Language model instance for relevance filtering
34
- web_search: Web search engine instance that provides initial results
35
- output_format: Format of output ('list' or other formats)
36
- language: Language for content processing
37
- max_results: Maximum number of search results
38
- region: Search region
39
- time: Time period for search results
40
- safesearch: Safe search setting
41
- """
42
- self.llm = llm
43
- self.output_format = output_format
44
- self.language = language
45
- self.max_results = max_results
46
- self.region = region
47
- self.time = time
48
- self.safesearch = safesearch
49
- self.web_search = web_search
50
- os.environ["USER_AGENT"] = "Local Deep Research/1.0"
51
-
52
- self.bs_transformer = BeautifulSoupTransformer()
53
- self.tags_to_extract = ["p", "div", "span"]
54
-
55
- def run(self, query: str) -> List[Dict[str, Any]]:
56
- """
57
- Legacy method that performs a full search in one step.
58
- Respects config parameters:
59
- - SEARCH_SNIPPETS_ONLY: If True, only returns snippets without full content
60
- - SKIP_RELEVANCE_FILTER: If True, returns all results without filtering
61
-
62
- Args:
63
- query: The search query
64
-
65
- Returns:
66
- List of search results with full content (unless SEARCH_SNIPPETS_ONLY is True)
67
- """
68
- # Phase 1: Get search results from the web search engine
69
- previews = self._get_previews(query)
70
- if not previews:
71
- return []
72
-
73
- # Phase 2: Filter URLs using LLM (unless SKIP_RELEVANCE_FILTER is True)
74
- if hasattr(config, 'SKIP_RELEVANCE_FILTER') and config.SKIP_RELEVANCE_FILTER:
75
- relevant_items = previews
76
- print("Skipping relevance filtering as per config")
77
- else:
78
- relevant_items = self._filter_relevant_items(previews, query)
79
- if not relevant_items:
80
- return []
81
-
82
- # Phase 3: Get full content for relevant items (unless SEARCH_SNIPPETS_ONLY is True)
83
- if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
84
- print("Returning snippet-only results as per config")
85
- return relevant_items
86
- else:
87
- results = self._get_full_content(relevant_items)
88
- return results
89
-
90
- def _get_previews(self, query: str) -> List[Dict[str, Any]]:
91
- """
92
- Get preview information from the web search engine.
93
-
94
- Args:
95
- query: The search query
96
-
97
- Returns:
98
- List of preview dictionaries
99
- """
100
- try:
101
- # Get search results from the web search engine
102
- search_results = self.web_search.invoke(query)
103
-
104
- if not isinstance(search_results, list):
105
- print("Error: Expected search results in list format")
106
- return []
107
-
108
- # Return the results as previews
109
- return search_results
110
-
111
- except Exception as e:
112
- print(f"Error getting previews: {e}")
113
- return []
114
-
115
- def _filter_relevant_items(self, previews: List[Dict[str, Any]], query: str) -> List[Dict[str, Any]]:
116
- """
117
- Filter previews for relevance using LLM.
118
-
119
- Args:
120
- previews: List of preview dictionaries
121
- query: The original search query
122
-
123
- Returns:
124
- List of relevant preview dictionaries
125
- """
126
- # Skip filtering if disabled in config or no previews
127
- if not config.QUALITY_CHECK_DDG_URLS or not previews:
128
- return previews
129
-
130
- # Format for LLM evaluation
131
- now = datetime.now()
132
- current_time = now.strftime("%Y-%m-%d")
133
- prompt = f"""ONLY Return a JSON array. The response contains no letters. Evaluate these URLs for:
134
- 1. Timeliness (today: {current_time})
135
- 2. Factual accuracy (cross-reference major claims)
136
- 3. Source reliability (prefer official company websites, established news outlets)
137
- 4. Direct relevance to query: {query}
138
-
139
- URLs to evaluate:
140
- {json.dumps(previews, indent=2)}
141
-
142
- Return a JSON array of indices (0-based) for sources that meet ALL criteria.
143
- ONLY Return a JSON array of indices (0-based) and nothing else. No letters.
144
- Example response: \n[0, 2, 4]\n\n"""
145
-
146
- try:
147
- # Get LLM's evaluation
148
- response = self.llm.invoke(prompt)
149
-
150
- # Extract JSON array from response
151
- response_text = remove_think_tags(response.content)
152
- # Clean up response to handle potential formatting issues
153
- response_text = response_text.strip()
154
-
155
- # Find the first occurrence of '[' and the last occurrence of ']'
156
- start_idx = response_text.find('[')
157
- end_idx = response_text.rfind(']')
158
-
159
- if start_idx >= 0 and end_idx > start_idx:
160
- array_text = response_text[start_idx:end_idx+1]
161
- good_indices = json.loads(array_text)
162
-
163
- # Return only the results with good indices
164
- return [r for i, r in enumerate(previews) if i in good_indices]
165
- else:
166
- print("Could not find JSON array in response, returning all previews")
167
- return previews
168
-
169
- except Exception as e:
170
- print(f"URL filtering error: {e}")
171
- # Fall back to returning all previews on error
172
- return previews
173
-
174
- def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
175
- """
176
- Get full content for the relevant items by retrieving and processing web pages.
177
-
178
- Args:
179
- relevant_items: List of relevant preview dictionaries
180
-
181
- Returns:
182
- List of result dictionaries with full content
183
- """
184
- nr_full_text = 0
185
-
186
- # Extract URLs from relevant items
187
- urls = [item.get("link") for item in relevant_items if item.get("link")]
188
-
189
- if not urls:
190
- print("\n === NO VALID LINKS ===\n")
191
- return relevant_items
192
-
193
- try:
194
- # Download the full HTML pages for filtered URLs
195
- loader = AsyncChromiumLoader(urls)
196
- html_docs = loader.load()
197
-
198
- # Process the HTML using BeautifulSoupTransformer
199
- full_docs = self.bs_transformer.transform_documents(
200
- html_docs, tags_to_extract=self.tags_to_extract
201
- )
202
-
203
- # Remove boilerplate from each document
204
- url_to_content = {}
205
- for doc in full_docs:
206
- nr_full_text += 1
207
- source = doc.metadata.get("source")
208
- if source:
209
- cleaned_text = self._remove_boilerplate(doc.page_content)
210
- url_to_content[source] = cleaned_text
211
-
212
- # Attach the cleaned full content to each result
213
- results = []
214
- for item in relevant_items:
215
- new_item = item.copy()
216
- link = item.get("link")
217
- new_item["full_content"] = url_to_content.get(link, None)
218
- results.append(new_item)
219
-
220
- print(f"FULL SEARCH WITH FILTERED URLS - Full text retrieved: {nr_full_text}")
221
- return results
222
-
223
- except Exception as e:
224
- print(f"Error retrieving full content: {e}")
225
- # Return original items if full content retrieval fails
226
- return relevant_items
227
-
228
- def _remove_boilerplate(self, html: str) -> str:
229
- """
230
- Remove boilerplate content from HTML.
231
-
232
- Args:
233
- html: HTML content
234
-
235
- Returns:
236
- Cleaned text content
237
- """
238
- if not html or not html.strip():
239
- return ""
240
- try:
241
- paragraphs = justext.justext(html, justext.get_stoplist(self.language))
242
- cleaned = "\n".join([p.text for p in paragraphs if not p.is_boilerplate])
243
- return cleaned
244
- except Exception as e:
245
- print(f"Error removing boilerplate: {e}")
246
- return html
247
-
248
- def invoke(self, query: str) -> List[Dict[str, Any]]:
249
- """Compatibility method for LangChain tools"""
250
- return self.run(query)
251
-
252
- def __call__(self, query: str) -> List[Dict[str, Any]]:
253
- """Make the class callable like a function"""
254
- return self.invoke(query)
@@ -1,346 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: local-deep-research
3
- Version: 0.1.16
4
- Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
5
- Author-email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>
6
- License: MIT License
7
-
8
- Copyright (c) 2025 LearningCircuit
9
-
10
- Permission is hereby granted, free of charge, to any person obtaining a copy
11
- of this software and associated documentation files (the "Software"), to deal
12
- in the Software without restriction, including without limitation the rights
13
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
- copies of the Software, and to permit persons to whom the Software is
15
- furnished to do so, subject to the following conditions:
16
-
17
- The above copyright notice and this permission notice shall be included in all
18
- copies or substantial portions of the Software.
19
-
20
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
- SOFTWARE.
27
-
28
- Project-URL: Homepage, https://github.com/LearningCircuit/local-deep-research
29
- Project-URL: Bug Tracker, https://github.com/LearningCircuit/local-deep-research/issues
30
- Classifier: Programming Language :: Python :: 3
31
- Classifier: License :: OSI Approved :: MIT License
32
- Classifier: Operating System :: OS Independent
33
- Requires-Python: >=3.8
34
- Description-Content-Type: text/markdown
35
- License-File: LICENSE
36
- Requires-Dist: langchain>=0.3.18
37
- Requires-Dist: langchain-community>=0.3.17
38
- Requires-Dist: langchain-core>=0.3.34
39
- Requires-Dist: langchain-ollama>=0.2.3
40
- Requires-Dist: langchain-openai>=0.3.5
41
- Requires-Dist: langchain_anthropic>=0.3.7
42
- Requires-Dist: duckduckgo_search>=7.3.2
43
- Requires-Dist: python-dateutil>=2.9.0
44
- Requires-Dist: typing_extensions>=4.12.2
45
- Requires-Dist: justext
46
- Requires-Dist: playwright
47
- Requires-Dist: beautifulsoup4
48
- Requires-Dist: flask>=2.0.1
49
- Requires-Dist: flask-cors>=3.0.10
50
- Requires-Dist: flask-socketio>=5.1.1
51
- Requires-Dist: sqlalchemy>=1.4.23
52
- Requires-Dist: wikipedia
53
- Requires-Dist: arxiv>=1.4.3
54
- Requires-Dist: pypdf
55
- Requires-Dist: sentence-transformers
56
- Requires-Dist: faiss-cpu
57
- Requires-Dist: pydantic>=2.0.0
58
- Requires-Dist: pydantic-settings>=2.0.0
59
- Requires-Dist: toml>=0.10.2
60
- Requires-Dist: platformdirs>=3.0.0
61
- Requires-Dist: dynaconf
62
- Dynamic: license-file
63
-
64
- # Local Deep Research
65
-
66
- A powerful AI-powered research assistant that performs deep, iterative analysis using multiple LLMs and web searches. The system can be run locally for privacy or configured to use cloud-based LLMs for enhanced capabilities.
67
-
68
- ## Features
69
-
70
- - 🔍 **Advanced Research Capabilities**
71
- - Automated deep research with intelligent follow-up questions
72
- - Citation tracking and source verification
73
- - Multi-iteration analysis for comprehensive coverage
74
- - Full webpage content analysis (not just snippets)
75
-
76
- - 🤖 **Flexible LLM Support**
77
- - Local AI processing with Ollama models
78
- - Cloud LLM support (Claude, GPT)
79
- - Supports all Langchain models
80
- - Configurable model selection based on needs
81
-
82
- - 📊 **Rich Output Options**
83
- - Detailed research findings with citations
84
- - Comprehensive research reports
85
- - Quick summaries for rapid insights
86
- - Source tracking and verification
87
-
88
- - 🔒 **Privacy-Focused**
89
- - Runs entirely on your machine when using local models
90
- - Configurable search settings
91
- - Transparent data handling
92
-
93
- - 🌐 **Enhanced Search Integration**
94
- - **Auto-selection of search sources**: The "auto" search engine intelligently analyzes your query and selects the most appropriate search engine based on the query content
95
- - **SearXNG** integration for local web-search engine, great for privacy, no API key required (requires a searxng server)
96
- - Wikipedia integration for factual knowledge
97
- - arXiv integration for scientific papers and academic research
98
- - PubMed integration for biomedical literature and medical research
99
- - DuckDuckGo integration for web searches (may experience rate limiting)
100
- - SerpAPI integration for Google search results (requires API key)
101
- - Google Programmable Search Engine integration for custom search experiences (requires API key)
102
- - The Guardian integration for news articles and journalism (requires API key)
103
- - **Local RAG search for private documents** - search your own documents with vector embeddings
104
- - Full webpage content retrieval
105
- - Source filtering and validation
106
- - Configurable search parameters
107
-
108
- - 📑 **Local Document Search (RAG)**
109
- - Vector embedding-based search of your local documents
110
- - Create custom document collections for different topics
111
- - Privacy-preserving - your documents stay on your machine
112
- - Intelligent chunking and retrieval
113
- - Compatible with various document formats (PDF, text, markdown, etc.)
114
- - Automatic integration with meta-search for unified queries
115
-
116
- ## Example Research: Fusion Energy Developments
117
-
118
- The repository includes complete research examples demonstrating the tool's capabilities. For instance, our [fusion energy research analysis](https://github.com/LearningCircuit/local-deep-research/blob/main/examples/fusion-energy-research-developments.md) provides a comprehensive overview of:
119
-
120
- - Latest scientific breakthroughs in fusion research (2022-2025)
121
- - Private sector funding developments exceeding $6 billion
122
- - Expert projections for commercial fusion energy timelines
123
- - Regulatory frameworks being developed for fusion deployment
124
- - Technical challenges that must be overcome for commercial viability
125
-
126
- This example showcases the system's ability to perform multiple research iterations, follow evidence trails across scientific and commercial domains, and synthesize information from diverse sources while maintaining proper citation.
127
-
128
- ## Installation
129
-
130
- 1. Clone the repository:
131
- ```bash
132
- git clone https://github.com/LearningCircuit/local-deep-research.git
133
- cd local-deep-research
134
- ```
135
- (experimental pip install with new features (but not so well tested yet): **pip install local-deep-research** )
136
- 2. Install dependencies:
137
- ```bash
138
- pip install -r requirements.txt
139
- playwright install
140
- ```
141
-
142
- 3. Install Ollama (for local models):
143
- ```bash
144
- # Install Ollama from https://ollama.ai
145
- ollama pull mistral # Default model - many work really well choose best for your hardware (fits in GPU)
146
- ```
147
-
148
- 4. Configure environment variables:
149
- ```bash
150
- # Copy the template
151
- cp .env.template .env
152
- ```
153
-
154
- ## Experimental install
155
- ```bash
156
- #experimental pip install with new features (but not so well tested yet):
157
- pip install local-deep-research
158
- playwright install
159
- ollama pull mistral
160
- ```
161
- ## Community & Support
162
-
163
- We've just launched our [Discord server](https://discord.gg/2E6gYU2Z) for this project!
164
-
165
- Our Discord server can help to exchange ideas about research approaches, discuss advanced usage patterns, and share other ideas.
166
-
167
- # Edit .env with your API keys (if using cloud LLMs)
168
- ANTHROPIC_API_KEY=your-api-key-here # For Claude
169
- OPENAI_API_KEY=your-openai-key-here # For GPT models
170
- GUARDIAN_API_KEY=your-guardian-api-key-here # For The Guardian search
171
- ```
172
-
173
- ## Usage
174
- Terminal usage (not recommended):
175
- ```bash
176
- python main.py
177
- ```
178
-
179
- ### Web Interface
180
-
181
- The project includes a web interface for a more user-friendly experience:
182
-
183
- ```bash
184
- python app.py
185
- ```
186
-
187
- This will start a local web server, accessible at `http://127.0.0.1:5000` in your browser.
188
-
189
- #### Web Interface Features:
190
-
191
- - **Dashboard**: Intuitive interface for starting and managing research queries
192
- - **Real-time Updates**: Track research progress with live updates
193
- - **Research History**: Access and manage past research queries
194
- - **PDF Export**: Download completed research reports as PDF documents
195
- - **Research Management**: Terminate ongoing research processes or delete past records
196
-
197
- ![Web Interface](./web1.png)
198
- ![Web Interface](./web2.png)
199
- ### Configuration
200
- **Please report your best settings in issues so we can improve the default settings.**
201
-
202
- Key settings in `config.py`:
203
- ```python
204
- # LLM Configuration
205
- DEFAULT_MODEL = "mistral" # Change based on your needs
206
- DEFAULT_TEMPERATURE = 0.7
207
- MAX_TOKENS = 8000
208
-
209
- # Search Configuration
210
- MAX_SEARCH_RESULTS = 40
211
- SEARCH_REGION = "us-en"
212
- TIME_PERIOD = "y"
213
- SAFE_SEARCH = True
214
- SEARCH_SNIPPETS_ONLY = False
215
-
216
- # Choose search tool: "wiki", "arxiv", "duckduckgo", "guardian", "serp", "local_all", or "auto"
217
- search_tool = "auto" # "auto" will intelligently select the best search engine for your query
218
- ```
219
-
220
- ## Local Document Search (RAG)
221
-
222
- The system includes powerful local document search capabilities using Retrieval-Augmented Generation (RAG). This allows you to search and retrieve content from your own document collections.
223
-
224
- ### Setting Up Local Collections
225
-
226
- Create a file named `local_collections.py` in the project root directory:
227
-
228
- ```python
229
- # local_collections.py
230
- import os
231
- from typing import Dict, Any
232
-
233
- # Registry of local document collections
234
- LOCAL_COLLECTIONS = {
235
- # Research Papers Collection
236
- "research_papers": {
237
- "name": "Research Papers",
238
- "description": "Academic research papers and articles",
239
- "paths": [os.path.abspath("local_search_files/research_papers")], # Use absolute paths
240
- "enabled": True,
241
- "embedding_model": "all-MiniLM-L6-v2",
242
- "embedding_device": "cpu",
243
- "embedding_model_type": "sentence_transformers",
244
- "max_results": 20,
245
- "max_filtered_results": 5,
246
- "chunk_size": 800, # Smaller chunks for academic content
247
- "chunk_overlap": 150,
248
- "cache_dir": ".cache/local_search/research_papers"
249
- },
250
-
251
- # Personal Notes Collection
252
- "personal_notes": {
253
- "name": "Personal Notes",
254
- "description": "Personal notes and documents",
255
- "paths": [os.path.abspath("local_search_files/personal_notes")], # Use absolute paths
256
- "enabled": True,
257
- "embedding_model": "all-MiniLM-L6-v2",
258
- "embedding_device": "cpu",
259
- "embedding_model_type": "sentence_transformers",
260
- "max_results": 30,
261
- "max_filtered_results": 10,
262
- "chunk_size": 500, # Smaller chunks for notes
263
- "chunk_overlap": 100,
264
- "cache_dir": ".cache/local_search/personal_notes"
265
- }
266
- }
267
- ```
268
-
269
- Create directories for your collections:
270
-
271
- ```bash
272
- mkdir -p local_search_files/research_papers
273
- mkdir -p local_search_files/personal_notes
274
- ```
275
-
276
- Add your documents to these folders, and the system will automatically index them and make them available for searching.
277
-
278
- ### Using Local Search
279
-
280
- You can use local search in several ways:
281
-
282
- 1. **Auto-selection**: Set `search_tool = "auto"` in `config.py` and the system will automatically use your local collections when appropriate for the query.
283
-
284
- 2. **Explicit Selection**: Set `search_tool = "research_papers"` to search only that specific collection.
285
-
286
- 3. **Search All Local Collections**: Set `search_tool = "local_all"` to search across all your local document collections.
287
-
288
- 4. **Query Syntax**: Use `collection:collection_name your query` to target a specific collection within a query.
289
-
290
- ### Search Engine Options
291
-
292
- The system supports multiple search engines that can be selected by changing the `search_tool` variable in `config.py`:
293
-
294
- - **Auto** (`auto`): Intelligent search engine selector that analyzes your query and chooses the most appropriate source (Wikipedia, arXiv, local collections, etc.)
295
- - **SearXNG** (`searxng`): Local web-search engine, great for privacy, no API key required (requires a searxng server)
296
- - **Wikipedia** (`wiki`): Best for general knowledge, facts, and overview information
297
- - **arXiv** (`arxiv`): Great for scientific and academic research, accessing preprints and papers
298
- - **PubMed** (`pubmed`): Excellent for biomedical literature, medical research, and health information
299
- - **DuckDuckGo** (`duckduckgo`): General web search that doesn't require an API key
300
- - **The Guardian** (`guardian`): Quality journalism and news articles (requires an API key)
301
- - **SerpAPI** (`serp`): Google search results (requires an API key)
302
- - **Google Programmable Search Engine** (`google_pse`): Custom search experiences with control over search scope and domains (requires API key and search engine ID)
303
- - **Local Collections**: Any collections defined in your `local_collections.py` file
304
-
305
- > **Note:** The "auto" option will intelligently select the best search engine based on your query. For example, if you ask about physics research papers, it might select arXiv or your research_papers collection, while if you ask about current events, it might select The Guardian or DuckDuckGo.
306
-
307
- > **Support Free Knowledge:** If you frequently use the search engines in this tool, please consider making a donation to these organizations. They provide valuable services and rely on user support to maintain their operations:
308
- > - [Donate to Wikipedia](https://donate.wikimedia.org)
309
- > - [Support The Guardian](https://support.theguardian.com)
310
- > - [Support arXiv](https://arxiv.org/about/give)
311
- > - [Donate to DuckDuckGo](https://duckduckgo.com/donations)
312
- > - [Support PubMed/NCBI](https://www.nlm.nih.gov/pubs/donations/donations.html)
313
-
314
- ## License
315
-
316
- This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
317
-
318
- ## Acknowledgments
319
- - Built with [Ollama](https://ollama.ai) for local AI processing
320
- - Search powered by multiple sources:
321
- - [Wikipedia](https://www.wikipedia.org/) for factual knowledge (default search engine)
322
- - [arXiv](https://arxiv.org/) for scientific papers
323
- - [PubMed](https://pubmed.ncbi.nlm.nih.gov/) for biomedical literature
324
- - [DuckDuckGo](https://duckduckgo.com) for web search
325
- - [The Guardian](https://www.theguardian.com/) for quality journalism
326
- - [SerpAPI](https://serpapi.com) for Google search results (requires API key)
327
- - [SearXNG](https://searxng.org/) for local web-search engine
328
- - Built on [LangChain](https://github.com/hwchase17/langchain) framework
329
- - Uses [justext](https://github.com/miso-belica/justext) for content extraction
330
- - [Playwright](https://playwright.dev) for web content retrieval
331
- - Uses [FAISS](https://github.com/facebookresearch/faiss) for vector similarity search
332
- - Uses [sentence-transformers](https://github.com/UKPLab/sentence-transformers) for embeddings
333
-
334
- ## Contributing
335
-
336
- Contributions are welcome! Please feel free to submit a Pull Request.
337
-
338
- 1. Fork the repository
339
- 2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
340
- 3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
341
- 4. Push to the branch (`git push origin feature/AmazingFeature`)
342
- 5. Open a Pull Request
343
-
344
- ## Star History
345
-
346
- [![Star History Chart](https://api.star-history.com/svg?repos=LearningCircuit/local-deep-research&type=Date)](https://www.star-history.com/#LearningCircuit/local-deep-research&Date)