local-deep-research 0.1.15__py3-none-any.whl → 0.1.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. local_deep_research/citation_handler.py +0 -2
  2. local_deep_research/config.py +1 -4
  3. local_deep_research/defaults/llm_config.py +2 -2
  4. local_deep_research/defaults/main.toml +3 -3
  5. local_deep_research/defaults/search_engines.toml +2 -2
  6. local_deep_research/report_generator.py +1 -5
  7. local_deep_research/search_system.py +9 -10
  8. local_deep_research/utilties/search_utilities.py +3 -4
  9. local_deep_research/web_search_engines/engines/full_search.py +9 -8
  10. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +14 -14
  11. local_deep_research/web_search_engines/engines/search_engine_brave.py +10 -9
  12. local_deep_research/web_search_engines/engines/search_engine_ddg.py +4 -2
  13. local_deep_research/web_search_engines/engines/search_engine_local.py +1 -1
  14. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +102 -661
  15. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +9 -8
  16. local_deep_research/web_search_engines/search_engine_base.py +6 -15
  17. local_deep_research-0.1.17.dist-info/METADATA +393 -0
  18. {local_deep_research-0.1.15.dist-info → local_deep_research-0.1.17.dist-info}/RECORD +22 -24
  19. local_deep_research/local_collections.py +0 -141
  20. local_deep_research/web_search_engines/full_search.py +0 -254
  21. local_deep_research-0.1.15.dist-info/METADATA +0 -346
  22. {local_deep_research-0.1.15.dist-info → local_deep_research-0.1.17.dist-info}/WHEEL +0 -0
  23. {local_deep_research-0.1.15.dist-info → local_deep_research-0.1.17.dist-info}/entry_points.txt +0 -0
  24. {local_deep_research-0.1.15.dist-info → local_deep_research-0.1.17.dist-info}/licenses/LICENSE +0 -0
  25. {local_deep_research-0.1.15.dist-info → local_deep_research-0.1.17.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,8 @@ from langchain_core.language_models import BaseLLM
5
5
 
6
6
  from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
7
7
  from local_deep_research import config
8
-
8
+ import logging
9
+ logger = logging.getLogger(__name__)
9
10
 
10
11
  class SerpAPISearchEngine(BaseSearchEngine):
11
12
  """Google search engine implementation using SerpAPI with two-phase approach"""
@@ -92,7 +93,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
92
93
  safesearch="Moderate" if safe_search else "Off"
93
94
  )
94
95
  except ImportError:
95
- print("Warning: FullSearchResults not available. Full content retrieval disabled.")
96
+ logger.warning("Warning: FullSearchResults not available. Full content retrieval disabled.")
96
97
  self.include_full_content = False
97
98
 
98
99
  def _get_previews(self, query: str) -> List[Dict[str, Any]]:
@@ -105,7 +106,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
105
106
  Returns:
106
107
  List of preview dictionaries
107
108
  """
108
- print("Getting search results from SerpAPI")
109
+ logger.info("Getting search results from SerpAPI")
109
110
 
110
111
  try:
111
112
  # Get search results from SerpAPI
@@ -134,7 +135,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
134
135
  return previews
135
136
 
136
137
  except Exception as e:
137
- print(f"Error getting SerpAPI results: {e}")
138
+ logger.error(f"Error getting SerpAPI results: {e}")
138
139
  return []
139
140
 
140
141
  def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
@@ -151,7 +152,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
151
152
  """
152
153
  # Check if we should get full content
153
154
  if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
154
- print("Snippet-only mode, skipping full content retrieval")
155
+ logger.info("Snippet-only mode, skipping full content retrieval")
155
156
 
156
157
  # Return the relevant items with their full SerpAPI information
157
158
  results = []
@@ -171,7 +172,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
171
172
 
172
173
  # If full content retrieval is enabled
173
174
  if self.include_full_content and hasattr(self, 'full_search'):
174
- print("Retrieving full webpage content")
175
+ logger.info("Retrieving full webpage content")
175
176
 
176
177
  try:
177
178
  # Extract only the links from relevant items
@@ -185,7 +186,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
185
186
  return results_with_content
186
187
 
187
188
  except Exception as e:
188
- print(f"Error retrieving full content: {e}")
189
+ logger.info(f"Error retrieving full content: {e}")
189
190
  # Fall back to returning the items without full content
190
191
 
191
192
  # Return items with their full SerpAPI information
@@ -216,7 +217,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
216
217
  Returns:
217
218
  List of search results
218
219
  """
219
- print("---Execute a search using SerpAPI (Google)---")
220
+ logger.info("---Execute a search using SerpAPI (Google)---")
220
221
 
221
222
  # Use the implementation from the parent class which handles all phases
222
223
  results = super().run(query)
@@ -53,7 +53,7 @@ class BaseSearchEngine(ABC):
53
53
  List of search results with full content (if available)
54
54
  """
55
55
  # Ensure we're measuring time correctly for citation tracking
56
- start_time = datetime.now()
56
+
57
57
 
58
58
  # Step 1: Get preview information for items
59
59
  previews = self._get_previews(query)
@@ -65,13 +65,8 @@ class BaseSearchEngine(ABC):
65
65
  filtered_items = self._filter_for_relevance(previews, query)
66
66
  if not filtered_items:
67
67
  logger.info(f"All preview results were filtered out as irrelevant for query: {query}")
68
- # Fall back to preview items if everything was filtered
69
- # Access config inside the method to avoid circular import
70
- from local_deep_research import config
71
- if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
72
- return previews[:self.max_filtered_results or 5] # Return unfiltered but limited results
73
- else:
74
- filtered_items = previews[:self.max_filtered_results or 5]
68
+ # Do not fall back to previews, return empty list instead
69
+ return []
75
70
 
76
71
  # Step 3: Get full content for filtered items
77
72
  # Import config inside the method to avoid circular import
@@ -166,17 +161,13 @@ Respond with ONLY the JSON array, no other text."""
166
161
 
167
162
  return ranked_results
168
163
  else:
169
- logger.info("Could not find JSON array in response, returning all previews")
170
- if self.max_filtered_results and len(previews) > self.max_filtered_results:
171
- return previews[:self.max_filtered_results]
172
- return previews
164
+ logger.info("Could not find JSON array in response, returning no previews")
165
+ return []
173
166
 
174
167
  except Exception as e:
175
168
  logger.info(f"Relevance filtering error: {e}")
176
169
  # Fall back to returning all previews (or top N) on error
177
- if self.max_filtered_results and len(previews) > self.max_filtered_results:
178
- return previews[:self.max_filtered_results]
179
- return previews
170
+ return[]
180
171
 
181
172
  @abstractmethod
182
173
  def _get_previews(self, query: str) -> List[Dict[str, Any]]:
@@ -0,0 +1,393 @@
1
+ Metadata-Version: 2.4
2
+ Name: local-deep-research
3
+ Version: 0.1.17
4
+ Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
5
+ Author-email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 LearningCircuit
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/LearningCircuit/local-deep-research
29
+ Project-URL: Bug Tracker, https://github.com/LearningCircuit/local-deep-research/issues
30
+ Classifier: Programming Language :: Python :: 3
31
+ Classifier: License :: OSI Approved :: MIT License
32
+ Classifier: Operating System :: OS Independent
33
+ Requires-Python: >=3.8
34
+ Description-Content-Type: text/markdown
35
+ License-File: LICENSE
36
+ Requires-Dist: langchain>=0.3.18
37
+ Requires-Dist: langchain-community>=0.3.17
38
+ Requires-Dist: langchain-core>=0.3.34
39
+ Requires-Dist: langchain-ollama>=0.2.3
40
+ Requires-Dist: langchain-openai>=0.3.5
41
+ Requires-Dist: langchain_anthropic>=0.3.7
42
+ Requires-Dist: duckduckgo_search>=7.3.2
43
+ Requires-Dist: python-dateutil>=2.9.0
44
+ Requires-Dist: typing_extensions>=4.12.2
45
+ Requires-Dist: justext
46
+ Requires-Dist: playwright
47
+ Requires-Dist: beautifulsoup4
48
+ Requires-Dist: flask>=2.0.1
49
+ Requires-Dist: flask-cors>=3.0.10
50
+ Requires-Dist: flask-socketio>=5.1.1
51
+ Requires-Dist: sqlalchemy>=1.4.23
52
+ Requires-Dist: wikipedia
53
+ Requires-Dist: arxiv>=1.4.3
54
+ Requires-Dist: pypdf
55
+ Requires-Dist: sentence-transformers
56
+ Requires-Dist: faiss-cpu
57
+ Requires-Dist: pydantic>=2.0.0
58
+ Requires-Dist: pydantic-settings>=2.0.0
59
+ Requires-Dist: toml>=0.10.2
60
+ Requires-Dist: platformdirs>=3.0.0
61
+ Requires-Dist: dynaconf
62
+ Requires-Dist: requests>=2.28.0
63
+ Requires-Dist: tiktoken>=0.4.0
64
+ Requires-Dist: xmltodict>=0.13.0
65
+ Requires-Dist: lxml>=4.9.2
66
+ Requires-Dist: pdfplumber>=0.9.0
67
+ Requires-Dist: unstructured>=0.10.0
68
+ Dynamic: license-file
69
+
70
+ # Local Deep Research
71
+
72
+ A powerful AI-powered research assistant that performs deep, iterative analysis using multiple LLMs and web searches. The system can be run locally for privacy or configured to use cloud-based LLMs for enhanced capabilities.
73
+
74
+ ## Quick Start
75
+
76
+ ```bash
77
+ # Install the package
78
+ pip install local-deep-research
79
+
80
+ # Install required browser automation tools
81
+ playwright install
82
+
83
+ # For local models, install Ollama
84
+ # Download from https://ollama.ai and then pull a model
85
+ ollama pull gemma3:12b
86
+ ```
87
+
88
+ Then run:
89
+
90
+ ```bash
91
+ # Start the web interface (recommended)
92
+ ldr-web # (OR python -m local_deep_research.web.app)
93
+
94
+ # OR run the command line version
95
+ ldr # (OR python -m local_deep_research.main)
96
+ ```
97
+
98
+ Access the web interface at `http://127.0.0.1:5000` in your browser.
99
+
100
+ ## Features
101
+
102
+ - 🔍 **Advanced Research Capabilities**
103
+ - Automated deep research with intelligent follow-up questions
104
+ - Citation tracking and source verification
105
+ - Multi-iteration analysis for comprehensive coverage
106
+ - Full webpage content analysis (not just snippets)
107
+
108
+ - 🤖 **Flexible LLM Support**
109
+ - Local AI processing with Ollama models
110
+ - Cloud LLM support (Claude, GPT)
111
+ - Supports all Langchain models
112
+ - Configurable model selection based on needs
113
+
114
+ - 📊 **Rich Output Options**
115
+ - Detailed research findings with citations
116
+ - Comprehensive research reports
117
+ - Quick summaries for rapid insights
118
+ - Source tracking and verification
119
+
120
+ - 🔒 **Privacy-Focused**
121
+ - Runs entirely on your machine when using local models
122
+ - Configurable search settings
123
+ - Transparent data handling
124
+
125
+ - 🌐 **Enhanced Search Integration**
126
+ - **Auto-selection of search sources**: The "auto" search engine intelligently analyzes your query and selects the most appropriate search engine
127
+ - Multiple search engines including Wikipedia, arXiv, PubMed, Semantic Scholar, and more
128
+ - **Local RAG search for private documents** - search your own documents with vector embeddings
129
+ - Full webpage content retrieval and intelligent filtering
130
+
131
+ ## Configuration System
132
+
133
+ The package automatically creates and manages configuration files in your user directory:
134
+
135
+ - **Windows**: `Documents\LearningCircuit\local-deep-research\config\`
136
+ - **Linux/Mac**: `~/.config/local_deep_research/config/`
137
+
138
+ ### Default Configuration Files
139
+
140
+ When you first run the tool, it creates these configuration files:
141
+
142
+ | File | Purpose |
143
+ |------|---------|
144
+ | `settings.toml` | General settings for research, web interface, and search |
145
+ | `llm_config.py` | Configure which LLM to use (local or cloud-based) |
146
+ | `search_engines.toml` | Define and configure search engines |
147
+ | `local_collections.toml` | Configure local document collections for RAG |
148
+ | `.secrets.toml` | Store API keys for cloud services |
149
+
150
+ ## Setting Up AI Models
151
+
152
+ The system supports multiple LLM providers:
153
+
154
+ ### Local Models (via Ollama)
155
+
156
+ 1. [Install Ollama](https://ollama.ai)
157
+ 2. Pull a model: `ollama pull gemma3:12b` (recommended model)
158
+ 3. Ollama runs on port 11434 by default
159
+
160
+ ### Cloud Models
161
+
162
+ Edit your `.secrets.toml` file to add API keys:
163
+
164
+ ```toml
165
+ ANTHROPIC_API_KEY = "your-api-key-here" # For Claude models
166
+ OPENAI_API_KEY = "your-openai-key-here" # For GPT models
167
+ OPENAI_ENDPOINT_API_KEY = "your-key-here" # For OpenRouter or similar services
168
+ ```
169
+
170
+ Then edit `llm_config.py` to change the default provider:
171
+
172
+ ```python
173
+ # Set your preferred model provider here
174
+ DEFAULT_PROVIDER = ModelProvider.OLLAMA # Change to your preferred provider
175
+
176
+ # Set your default model name here
177
+ DEFAULT_MODEL = "mistral" # Change to your preferred model
178
+ ```
179
+
180
+ ### Supported LLM Providers
181
+
182
+ The system supports multiple LLM providers:
183
+
184
+ | Provider | Type | Configuration | Notes |
185
+ |----------|------|--------------|-------|
186
+ | `OLLAMA` | Local | No API key needed | Default - install from ollama.ai |
187
+ | `OPENAI` | Cloud | Requires `OPENAI_API_KEY` | GPT models (3.5, 4, 4o) |
188
+ | `ANTHROPIC` | Cloud | Requires `ANTHROPIC_API_KEY` | Claude models (3 Opus, Sonnet, Haiku) |
189
+ | `OPENAI_ENDPOINT` | Cloud | Requires `OPENAI_ENDPOINT_API_KEY` | For any OpenAI-compatible API |
190
+ | `VLLM` | Local | No API key needed | For hosting models via vLLM |
191
+
192
+ You can configure the OpenAI-compatible endpoint URL in `llm_config.py`:
193
+
194
+ ```python
195
+ # For OpenRouter, Together.ai, Azure OpenAI, or any compatible endpoint
196
+ OPENAI_ENDPOINT_URL = "https://openrouter.ai/api/v1"
197
+ ```
198
+
199
+ The `OPENAI_ENDPOINT` provider can access any service with an OpenAI-compatible API, including:
200
+ - OpenRouter (access to hundreds of models)
201
+ - Azure OpenAI
202
+ - Together.ai
203
+ - Groq
204
+ - Anyscale
205
+ - Self-hosted LLM servers with OpenAI compatibility
206
+ - Any other service that implements the OpenAI API specification
207
+
208
+ ## Setting Up Search Engines
209
+
210
+ The system includes multiple search engines. Some require API keys:
211
+
212
+ ```toml
213
+ # Add to .secrets.toml
214
+ SERP_API_KEY = "your-serpapi-key-here" # For Google results via SerpAPI
215
+ GOOGLE_PSE_API_KEY = "your-google-key-here" # For Google Programmable Search
216
+ GOOGLE_PSE_ENGINE_ID = "your-pse-id-here" # For Google Programmable Search
217
+ BRAVE_API_KEY = "your-brave-search-key-here" # For Brave Search
218
+ GUARDIAN_API_KEY = "your-guardian-key-here" # For The Guardian
219
+ ```
220
+
221
+ No API key required for: Wikipedia, arXiv, PubMed, Semantic Scholar, and local collections.
222
+
223
+ ## Local Document Search (RAG)
224
+
225
+ The system can search through your local documents using vector embeddings.
226
+
227
+ ### Setting Up Document Collections
228
+
229
+ 1. Define collections in `local_collections.toml`. Default collections include:
230
+
231
+ ```toml
232
+ [project_docs]
233
+ name = "Project Documents"
234
+ description = "Project documentation and specifications"
235
+ paths = ["@format ${DOCS_DIR}/project_documents"]
236
+ enabled = true
237
+ embedding_model = "all-MiniLM-L6-v2"
238
+ embedding_device = "cpu"
239
+ embedding_model_type = "sentence_transformers"
240
+ max_results = 20
241
+ max_filtered_results = 5
242
+ chunk_size = 1000
243
+ chunk_overlap = 200
244
+ cache_dir = "__CACHE_DIR__/local_search/project_docs"
245
+
246
+ # More collections defined in the file...
247
+ ```
248
+
249
+ 2. Create your document directories:
250
+ - The `${DOCS_DIR}` variable points to a default location in your Documents folder
251
+ - Documents are automatically indexed when the search is first used
252
+
253
+ ### Using Local Search
254
+
255
+ You can use local document search in several ways:
256
+
257
+ 1. **Auto-selection**: Set `tool = "auto"` in `settings.toml` [search] section
258
+ 2. **Explicit collection**: Set `tool = "project_docs"` to search only that collection
259
+ 3. **All collections**: Set `tool = "local_all"` to search across all collections
260
+ 4. **Query syntax**: Type `collection:project_docs your query` to target a specific collection
261
+
262
+ ## Available Search Engines
263
+
264
+ | Engine | Purpose | API Key Required? |
265
+ |--------|---------|-------------------|
266
+ | `auto` | Intelligently selects the best engine | No |
267
+ | `wikipedia` | General knowledge and facts | No |
268
+ | `arxiv` | Scientific papers and research | No |
269
+ | `pubmed` | Medical and biomedical research | No |
270
+ | `semantic_scholar` | Academic literature across all fields | No |
271
+ | `github` | Code repositories and documentation | No (but rate-limited) |
272
+ | `brave` | Web search (privacy-focused) | Yes |
273
+ | `serpapi` | Google search results | Yes |
274
+ | `google_pse` | Custom Google search | Yes |
275
+ | `wayback` | Historical web content | No |
276
+ | `searxng` | Local web search engine | No (requires server) |
277
+ | Any collection name | Search your local documents | No |
278
+
279
+ > **Support Free Knowledge:** If you frequently use the search engines in this tool, please consider making a donation to these organizations. They provide valuable services and rely on user support to maintain their operations:
280
+ > - [Donate to Wikipedia](https://donate.wikimedia.org)
281
+ > - [Support arXiv](https://arxiv.org/about/give)
282
+ > - [Donate to DuckDuckGo](https://duckduckgo.com/donations)
283
+ > - [Support PubMed/NCBI](https://www.nlm.nih.gov/pubs/donations/donations.html)
284
+
285
+ ## Advanced Configuration
286
+
287
+ ### Research Parameters
288
+
289
+ Edit `settings.toml` to customize research parameters:
290
+
291
+ ```toml
292
+ [search]
293
+ # Search tool to use (auto, wikipedia, arxiv, etc.)
294
+ tool = "auto"
295
+
296
+ # Number of research cycles
297
+ iterations = 2
298
+
299
+ # Questions generated per cycle
300
+ questions_per_iteration = 2
301
+
302
+ # Results per search query
303
+ max_results = 50
304
+
305
+ # Results after relevance filtering
306
+ max_filtered_results = 5
307
+
308
+ # More settings available...
309
+ ```
310
+
311
+ ## Web Interface
312
+
313
+ The web interface offers several features:
314
+
315
+ - **Dashboard**: Start and manage research queries
316
+ - **Real-time Updates**: Track research progress
317
+ - **Research History**: Access past queries
318
+ - **PDF Export**: Download reports
319
+ - **Research Management**: Terminate processes or delete records
320
+
321
+ ## Command Line Interface
322
+
323
+ The CLI version allows you to:
324
+
325
+ 1. Choose between a quick summary or detailed report
326
+ 2. Enter your research query
327
+ 3. View results directly in the terminal
328
+ 4. Save reports automatically to the configured output directory
329
+
330
+ ## Development Setup
331
+
332
+ If you want to develop or modify the package, you can install it in development mode:
333
+
334
+ ```bash
335
+ # Clone the repository
336
+ git clone https://github.com/LearningCircuit/local-deep-research.git
337
+ cd local-deep-research
338
+
339
+ # Install in development mode
340
+ pip install -e .
341
+ ```
342
+
343
+ This creates an "editable" installation that uses your local code, so any changes you make are immediately available without reinstalling.
344
+
345
+ You can run the application directly using Python module syntax:
346
+
347
+ ```bash
348
+ # Run the web interface
349
+ python -m local_deep_research.web.app
350
+
351
+ # Run the CLI version
352
+ python -m local_deep_research.main
353
+ ```
354
+
355
+ This approach is useful for development and debugging, as it provides more detailed error messages and allows you to make code changes on the fly.
356
+
357
+ ## Example Research
358
+
359
+ The repository includes complete research examples like our [fusion energy research analysis](https://github.com/LearningCircuit/local-deep-research/blob/main/examples/fusion-energy-research-developments.md) showcasing the system's capabilities.
360
+
361
+ ## Community & Support
362
+
363
+ Join our [Discord server](https://discord.gg/2E6gYU2Z) to exchange ideas, discuss usage patterns, and share research approaches.
364
+
365
+ ## License
366
+
367
+ This project is licensed under the MIT License.
368
+
369
+ ## Acknowledgments
370
+
371
+ - Built with [Ollama](https://ollama.ai) for local AI processing
372
+ - Search powered by multiple sources:
373
+ - [Wikipedia](https://www.wikipedia.org/) for factual knowledge (default search engine)
374
+ - [arXiv](https://arxiv.org/) for scientific papers
375
+ - [PubMed](https://pubmed.ncbi.nlm.nih.gov/) for biomedical literature
376
+ - [DuckDuckGo](https://duckduckgo.com) for web search
377
+ - [The Guardian](https://www.theguardian.com/) for journalism
378
+ - [SerpAPI](https://serpapi.com) for Google search results (requires API key)
379
+ - [SearXNG](https://searxng.org/) for local web-search engine
380
+ - [Brave Search](https://search.brave.com/) for privacy-focused web search
381
+ - [Semantic Scholar](https://www.semanticscholar.org/) for academic literature
382
+ - Built on [LangChain](https://github.com/hwchase17/langchain) framework
383
+ - Uses [justext](https://github.com/miso-belica/justext), [Playwright](https://playwright.dev), [FAISS](https://github.com/facebookresearch/faiss), and more
384
+
385
+ ## Contributing
386
+
387
+ Contributions are welcome! Please feel free to submit a Pull Request.
388
+
389
+ 1. Fork the repository
390
+ 2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
391
+ 3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
392
+ 4. Push to the branch (`git push origin feature/AmazingFeature`)
393
+ 5. Open a Pull Request
@@ -1,19 +1,18 @@
1
1
  local_deep_research/__init__.py,sha256=pfHzjzYc6Szo8VCNLtFZRXyAlEz7CViY7r2fH9O7yms,584
2
- local_deep_research/citation_handler.py,sha256=1Znl5SPkVwOZ3JvFowvaW1uriaw37s1nPYhXbpa2UZ0,4423
3
- local_deep_research/config.py,sha256=lucqOE4KeNm1ynYdcHYWJLE5fJ0QN-1QKZpRcBPsHe8,8674
4
- local_deep_research/local_collections.py,sha256=SB-fdptT7qS0klJUVx_Rs9OgDwafMUgI46984WlZGKI,6076
2
+ local_deep_research/citation_handler.py,sha256=v_fwTy-2XvUuoH3OQRzmBrvaiN7mBk8jbNfySslmt5g,4357
3
+ local_deep_research/config.py,sha256=PAruLZutlrjkGOKrv49hk8U4q9JPWWgAKLiY8Ukpsks,8572
5
4
  local_deep_research/main.py,sha256=uQXtGQ6LtZNd5Qw63D5ke4Q_LjYimouWVSUknVsk3JQ,3645
6
- local_deep_research/report_generator.py,sha256=UOiSw_vPHgtUpI8L9_UaOlpBVBloPB-ilhAo-1d2B9M,8200
7
- local_deep_research/search_system.py,sha256=lKOdtSFe-dlOa66QmBIL1SAO4J2ZnFgBIQVMHCPxbMo,15623
5
+ local_deep_research/report_generator.py,sha256=EvaArnWirMgg42fMzmZeJczoEYujEbJ2ryHHYuuoXx8,8058
6
+ local_deep_research/search_system.py,sha256=yY3BEzX68vdtUcYF9h6lC3yVao0YA_NSBj6W3-RwlKk,15459
8
7
  local_deep_research/defaults/__init__.py,sha256=2Vvlkl-gmP_qPYWegE4JBgummypogl3VXrQ1XzptFDU,1381
9
- local_deep_research/defaults/llm_config.py,sha256=88IGWPPvikSKmAqfqsGovBx2Jac5eh2sBY_LIW624Ik,7910
8
+ local_deep_research/defaults/llm_config.py,sha256=T03pntyNtOk1fvu-RZ-iEoh7L2D2hcICr8usIPpULuo,7870
10
9
  local_deep_research/defaults/local_collections.toml,sha256=_edVWVHrhunMfazjejhJlGPRkHKKIP51qQtNkMgNEiA,1406
11
- local_deep_research/defaults/main.toml,sha256=l_J9JAPhKEp63IsLBO0hQDVimxogEpnrEVnNjiOeUxg,1403
12
- local_deep_research/defaults/search_engines.toml,sha256=-jjuG9G6H8dQemZGs6ytZwAdJLbVcWTlCw_SVDNgQXQ,8101
10
+ local_deep_research/defaults/main.toml,sha256=DLhFq88vdE2_psLaWhPV9BWPixqTHvR2Rllaj_rmjJ4,1403
11
+ local_deep_research/defaults/search_engines.toml,sha256=TYkOqVaZq9JPawz4fIPyGdkAtYa4t8F9H50VY-wv2ak,8101
13
12
  local_deep_research/utilties/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
13
  local_deep_research/utilties/enums.py,sha256=TVAZiu9szNbdacfb7whgaQJJlSk7oYByADaAierD4CE,229
15
14
  local_deep_research/utilties/llm_utils.py,sha256=IGv-_gJWqLTpO3_op1NHIwxKaFEzmXhhVYSLTTSMnIA,4522
16
- local_deep_research/utilties/search_utilities.py,sha256=C8ycFd7blcq5vtnd6GxP8dkepZT6EEqHFtT3WYxF0Ck,4151
15
+ local_deep_research/utilties/search_utilities.py,sha256=-wj_-HCotqDUXYcPaKGN1C_t6WH4xubNhem4RE60AuM,4098
17
16
  local_deep_research/utilties/setup_utils.py,sha256=t6GNp7lK1nLPdPNCkYUk82IATGM62vqy8UBD-KqShOs,215
18
17
  local_deep_research/web/__init__.py,sha256=3oHMatNu8r24FBtpojriIVbHYOVSHj4Q-quycMKOuDk,62
19
18
  local_deep_research/web/app.py,sha256=5_VLNdhJOqdgacucglUdS_lVURNgYNbXhK9vME6JmzA,72431
@@ -28,30 +27,29 @@ local_deep_research/web/templates/search_engines_config.html,sha256=z_krznfdhF3e
28
27
  local_deep_research/web/templates/settings.html,sha256=S9A-tdpzMhP2Zw7kp2jxKlwaWtoRil_IHX_a376vXsA,29575
29
28
  local_deep_research/web/templates/settings_dashboard.html,sha256=De-v1KNdVvkXme5i3YZ6sIfU9aAKDc_N-AW9n4PZoso,9109
30
29
  local_deep_research/web_search_engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- local_deep_research/web_search_engines/full_search.py,sha256=3SSTvD12g4pNlZCSGh8jwsyYWpQglgqjADnq8dG1zyI,9756
32
- local_deep_research/web_search_engines/search_engine_base.py,sha256=ig1sv1EVXZ9NqVA2lZJIKMjDxl9W8Gb7rTc_oRbsnSU,8803
30
+ local_deep_research/web_search_engines/search_engine_base.py,sha256=QmhfjuHK2deomh8tARghKuYnF-5t3wwBB661odS2VtU,8065
33
31
  local_deep_research/web_search_engines/search_engine_factory.py,sha256=B_QaqoAwnVXCmHNdqGbo94LekWY6wpBw_PWNkI120qE,10728
34
32
  local_deep_research/web_search_engines/search_engines_config.py,sha256=bNCuR09NOk5cjnKIgDQfhPipqmvDKeE7WP_6p8LLZf0,1979
35
33
  local_deep_research/web_search_engines/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
- local_deep_research/web_search_engines/engines/full_search.py,sha256=mcxS8o7-WmOQc3_H4232adhBHevZfSHWmaOFoia68UU,4711
34
+ local_deep_research/web_search_engines/engines/full_search.py,sha256=BuOz8dX-XocazCG7gGBKFnIY99FZtNFI0-Wq3fhsfp4,4689
37
35
  local_deep_research/web_search_engines/engines/meta_search_engine.py,sha256=Zky4sowCortEaIj1pBU0sKuggXr5izkubgrD7cM8IOQ,11485
38
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py,sha256=WlKQSB1kgdEaLtDj4JwpIiP3eZPustJt4ryJ9wiulT0,15316
39
- local_deep_research/web_search_engines/engines/search_engine_brave.py,sha256=zAofutKEnXb_CYAi7hrptNVrQ15PQoYK_Eg3spcD1Ig,9583
40
- local_deep_research/web_search_engines/engines/search_engine_ddg.py,sha256=-_rWr2uwSoxHLkAV-WrHj_vuoSActr_jzfveI7dE10c,4845
36
+ local_deep_research/web_search_engines/engines/search_engine_arxiv.py,sha256=cf8OzhSzE1zqaiOZ6EFQGy_6hTCJMaTysYd8rs1KJNU,15408
37
+ local_deep_research/web_search_engines/engines/search_engine_brave.py,sha256=J242byUGG5ROQ_bh-mU292_t7Q7m20_9O0r1w5z6d9A,9688
38
+ local_deep_research/web_search_engines/engines/search_engine_ddg.py,sha256=qK2i65dbPtr_ppoKPU_YA0mDqM_sDAvN6ZztvdFjsCk,4910
41
39
  local_deep_research/web_search_engines/engines/search_engine_github.py,sha256=qqipsw2ycjlRbR6mmMmxzGU3LEcFDJJJ7Ez7xUgWjRM,26768
42
40
  local_deep_research/web_search_engines/engines/search_engine_google_pse.py,sha256=YkXvBmgcqTImCxuyy6580SGRAvImGc6SzInXZgo1kNE,11294
43
41
  local_deep_research/web_search_engines/engines/search_engine_guardian.py,sha256=MW4WIwtNAwcpdigNXronyezAxr50EIZTV1NMedrAv2o,23912
44
- local_deep_research/web_search_engines/engines/search_engine_local.py,sha256=uAsNtaFV3AH4ltNpudHIl546jsOKKyGDjUwU5J7gKts,36766
42
+ local_deep_research/web_search_engines/engines/search_engine_local.py,sha256=rfmPiA9DVmjbaB3KQtlq7s6BRMgHRgzP7AhktZNDw2M,36772
45
43
  local_deep_research/web_search_engines/engines/search_engine_local_all.py,sha256=CRNcxBzNd9kanyIJYaUDB7qfXYxVCvd4L2mX8jL73v0,5955
46
44
  local_deep_research/web_search_engines/engines/search_engine_pubmed.py,sha256=MayfzM2R0XoI7cpXlG1XJ1ktfTN_6H-Xs9RmD89UAao,39236
47
45
  local_deep_research/web_search_engines/engines/search_engine_searxng.py,sha256=GMy6qDMSaVBtjWRm48XBu6TjLAy1HfcO2EFTwr8S9rk,18048
48
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py,sha256=y-g7L9P91XXrO4-2tueHB0FoE4N2cPEOUhjYnXNvWYs,44950
49
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py,sha256=iy-QmT99Tf2cJlfCrPbEhtMB7a_zCKppvlUKi7VBrlE,9118
46
+ local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py,sha256=6VMymjFJ7pyV2nv5dRfFofXgg0kG82rkwbICVnNDNH4,23352
47
+ local_deep_research/web_search_engines/engines/search_engine_serpapi.py,sha256=XikEYnM-pAaR70VeAJ28lbqpRzCj4bCA9xY29taTV8g,9215
50
48
  local_deep_research/web_search_engines/engines/search_engine_wayback.py,sha256=astAvSLajDZ6rwgthJ3iBcHSWuDSYPO7uilIxaJhXmU,18132
51
49
  local_deep_research/web_search_engines/engines/search_engine_wikipedia.py,sha256=KSGJECbEcxZpVK-PhYsTCtzedSK0l1AjQmvGtx8KBks,9799
52
- local_deep_research-0.1.15.dist-info/licenses/LICENSE,sha256=Qg2CaTdu6SWnSqk1_JtgBPp_Da-LdqJDhT1Vt1MUc5s,1072
53
- local_deep_research-0.1.15.dist-info/METADATA,sha256=58mozAoQWVyN6UDWlpVNPAaGQM4Qy2V0Qge1Kpzz2U8,15151
54
- local_deep_research-0.1.15.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
55
- local_deep_research-0.1.15.dist-info/entry_points.txt,sha256=u-Y6Z3MWtR3dmsTDFYhXyfkPv7mALUA7YAnY4Fi1XDs,97
56
- local_deep_research-0.1.15.dist-info/top_level.txt,sha256=h6-uVE_wSuLOcoWwT9szhX23mBWufu77MqmM25UfbCY,20
57
- local_deep_research-0.1.15.dist-info/RECORD,,
50
+ local_deep_research-0.1.17.dist-info/licenses/LICENSE,sha256=Qg2CaTdu6SWnSqk1_JtgBPp_Da-LdqJDhT1Vt1MUc5s,1072
51
+ local_deep_research-0.1.17.dist-info/METADATA,sha256=2uPvdSw878o2XCdEYAy4LlWCi_TrvwDz3hQBHq5z4CE,14428
52
+ local_deep_research-0.1.17.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
53
+ local_deep_research-0.1.17.dist-info/entry_points.txt,sha256=u-Y6Z3MWtR3dmsTDFYhXyfkPv7mALUA7YAnY4Fi1XDs,97
54
+ local_deep_research-0.1.17.dist-info/top_level.txt,sha256=h6-uVE_wSuLOcoWwT9szhX23mBWufu77MqmM25UfbCY,20
55
+ local_deep_research-0.1.17.dist-info/RECORD,,