local-deep-research 0.1.16__py3-none-any.whl → 0.1.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/citation_handler.py +0 -2
- local_deep_research/config.py +1 -4
- local_deep_research/defaults/llm_config.py +2 -2
- local_deep_research/defaults/main.toml +3 -3
- local_deep_research/report_generator.py +1 -5
- local_deep_research/search_system.py +1 -1
- local_deep_research/utilties/search_utilities.py +3 -4
- local_deep_research/web_search_engines/engines/full_search.py +9 -8
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +14 -14
- local_deep_research/web_search_engines/engines/search_engine_brave.py +10 -9
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +4 -2
- local_deep_research/web_search_engines/engines/search_engine_local.py +1 -1
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +9 -8
- local_deep_research/web_search_engines/search_engine_base.py +1 -1
- local_deep_research-0.1.17.dist-info/METADATA +393 -0
- {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/RECORD +20 -22
- local_deep_research/local_collections.py +0 -141
- local_deep_research/web_search_engines/full_search.py +0 -254
- local_deep_research-0.1.16.dist-info/METADATA +0 -346
- {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/WHEEL +0 -0
- {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/licenses/LICENSE +0 -0
- {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,393 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: local-deep-research
|
3
|
+
Version: 0.1.17
|
4
|
+
Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
|
5
|
+
Author-email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>
|
6
|
+
License: MIT License
|
7
|
+
|
8
|
+
Copyright (c) 2025 LearningCircuit
|
9
|
+
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
12
|
+
in the Software without restriction, including without limitation the rights
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
15
|
+
furnished to do so, subject to the following conditions:
|
16
|
+
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
18
|
+
copies or substantial portions of the Software.
|
19
|
+
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
26
|
+
SOFTWARE.
|
27
|
+
|
28
|
+
Project-URL: Homepage, https://github.com/LearningCircuit/local-deep-research
|
29
|
+
Project-URL: Bug Tracker, https://github.com/LearningCircuit/local-deep-research/issues
|
30
|
+
Classifier: Programming Language :: Python :: 3
|
31
|
+
Classifier: License :: OSI Approved :: MIT License
|
32
|
+
Classifier: Operating System :: OS Independent
|
33
|
+
Requires-Python: >=3.8
|
34
|
+
Description-Content-Type: text/markdown
|
35
|
+
License-File: LICENSE
|
36
|
+
Requires-Dist: langchain>=0.3.18
|
37
|
+
Requires-Dist: langchain-community>=0.3.17
|
38
|
+
Requires-Dist: langchain-core>=0.3.34
|
39
|
+
Requires-Dist: langchain-ollama>=0.2.3
|
40
|
+
Requires-Dist: langchain-openai>=0.3.5
|
41
|
+
Requires-Dist: langchain_anthropic>=0.3.7
|
42
|
+
Requires-Dist: duckduckgo_search>=7.3.2
|
43
|
+
Requires-Dist: python-dateutil>=2.9.0
|
44
|
+
Requires-Dist: typing_extensions>=4.12.2
|
45
|
+
Requires-Dist: justext
|
46
|
+
Requires-Dist: playwright
|
47
|
+
Requires-Dist: beautifulsoup4
|
48
|
+
Requires-Dist: flask>=2.0.1
|
49
|
+
Requires-Dist: flask-cors>=3.0.10
|
50
|
+
Requires-Dist: flask-socketio>=5.1.1
|
51
|
+
Requires-Dist: sqlalchemy>=1.4.23
|
52
|
+
Requires-Dist: wikipedia
|
53
|
+
Requires-Dist: arxiv>=1.4.3
|
54
|
+
Requires-Dist: pypdf
|
55
|
+
Requires-Dist: sentence-transformers
|
56
|
+
Requires-Dist: faiss-cpu
|
57
|
+
Requires-Dist: pydantic>=2.0.0
|
58
|
+
Requires-Dist: pydantic-settings>=2.0.0
|
59
|
+
Requires-Dist: toml>=0.10.2
|
60
|
+
Requires-Dist: platformdirs>=3.0.0
|
61
|
+
Requires-Dist: dynaconf
|
62
|
+
Requires-Dist: requests>=2.28.0
|
63
|
+
Requires-Dist: tiktoken>=0.4.0
|
64
|
+
Requires-Dist: xmltodict>=0.13.0
|
65
|
+
Requires-Dist: lxml>=4.9.2
|
66
|
+
Requires-Dist: pdfplumber>=0.9.0
|
67
|
+
Requires-Dist: unstructured>=0.10.0
|
68
|
+
Dynamic: license-file
|
69
|
+
|
70
|
+
# Local Deep Research
|
71
|
+
|
72
|
+
A powerful AI-powered research assistant that performs deep, iterative analysis using multiple LLMs and web searches. The system can be run locally for privacy or configured to use cloud-based LLMs for enhanced capabilities.
|
73
|
+
|
74
|
+
## Quick Start
|
75
|
+
|
76
|
+
```bash
|
77
|
+
# Install the package
|
78
|
+
pip install local-deep-research
|
79
|
+
|
80
|
+
# Install required browser automation tools
|
81
|
+
playwright install
|
82
|
+
|
83
|
+
# For local models, install Ollama
|
84
|
+
# Download from https://ollama.ai and then pull a model
|
85
|
+
ollama pull gemma3:12b
|
86
|
+
```
|
87
|
+
|
88
|
+
Then run:
|
89
|
+
|
90
|
+
```bash
|
91
|
+
# Start the web interface (recommended)
|
92
|
+
ldr-web # (OR python -m local_deep_research.web.app)
|
93
|
+
|
94
|
+
# OR run the command line version
|
95
|
+
ldr # (OR python -m local_deep_research.main)
|
96
|
+
```
|
97
|
+
|
98
|
+
Access the web interface at `http://127.0.0.1:5000` in your browser.
|
99
|
+
|
100
|
+
## Features
|
101
|
+
|
102
|
+
- 🔍 **Advanced Research Capabilities**
|
103
|
+
- Automated deep research with intelligent follow-up questions
|
104
|
+
- Citation tracking and source verification
|
105
|
+
- Multi-iteration analysis for comprehensive coverage
|
106
|
+
- Full webpage content analysis (not just snippets)
|
107
|
+
|
108
|
+
- 🤖 **Flexible LLM Support**
|
109
|
+
- Local AI processing with Ollama models
|
110
|
+
- Cloud LLM support (Claude, GPT)
|
111
|
+
- Supports all Langchain models
|
112
|
+
- Configurable model selection based on needs
|
113
|
+
|
114
|
+
- 📊 **Rich Output Options**
|
115
|
+
- Detailed research findings with citations
|
116
|
+
- Comprehensive research reports
|
117
|
+
- Quick summaries for rapid insights
|
118
|
+
- Source tracking and verification
|
119
|
+
|
120
|
+
- 🔒 **Privacy-Focused**
|
121
|
+
- Runs entirely on your machine when using local models
|
122
|
+
- Configurable search settings
|
123
|
+
- Transparent data handling
|
124
|
+
|
125
|
+
- 🌐 **Enhanced Search Integration**
|
126
|
+
- **Auto-selection of search sources**: The "auto" search engine intelligently analyzes your query and selects the most appropriate search engine
|
127
|
+
- Multiple search engines including Wikipedia, arXiv, PubMed, Semantic Scholar, and more
|
128
|
+
- **Local RAG search for private documents** - search your own documents with vector embeddings
|
129
|
+
- Full webpage content retrieval and intelligent filtering
|
130
|
+
|
131
|
+
## Configuration System
|
132
|
+
|
133
|
+
The package automatically creates and manages configuration files in your user directory:
|
134
|
+
|
135
|
+
- **Windows**: `Documents\LearningCircuit\local-deep-research\config\`
|
136
|
+
- **Linux/Mac**: `~/.config/local_deep_research/config/`
|
137
|
+
|
138
|
+
### Default Configuration Files
|
139
|
+
|
140
|
+
When you first run the tool, it creates these configuration files:
|
141
|
+
|
142
|
+
| File | Purpose |
|
143
|
+
|------|---------|
|
144
|
+
| `settings.toml` | General settings for research, web interface, and search |
|
145
|
+
| `llm_config.py` | Configure which LLM to use (local or cloud-based) |
|
146
|
+
| `search_engines.toml` | Define and configure search engines |
|
147
|
+
| `local_collections.toml` | Configure local document collections for RAG |
|
148
|
+
| `.secrets.toml` | Store API keys for cloud services |
|
149
|
+
|
150
|
+
## Setting Up AI Models
|
151
|
+
|
152
|
+
The system supports multiple LLM providers:
|
153
|
+
|
154
|
+
### Local Models (via Ollama)
|
155
|
+
|
156
|
+
1. [Install Ollama](https://ollama.ai)
|
157
|
+
2. Pull a model: `ollama pull gemma3:12b` (recommended model)
|
158
|
+
3. Ollama runs on port 11434 by default
|
159
|
+
|
160
|
+
### Cloud Models
|
161
|
+
|
162
|
+
Edit your `.secrets.toml` file to add API keys:
|
163
|
+
|
164
|
+
```toml
|
165
|
+
ANTHROPIC_API_KEY = "your-api-key-here" # For Claude models
|
166
|
+
OPENAI_API_KEY = "your-openai-key-here" # For GPT models
|
167
|
+
OPENAI_ENDPOINT_API_KEY = "your-key-here" # For OpenRouter or similar services
|
168
|
+
```
|
169
|
+
|
170
|
+
Then edit `llm_config.py` to change the default provider:
|
171
|
+
|
172
|
+
```python
|
173
|
+
# Set your preferred model provider here
|
174
|
+
DEFAULT_PROVIDER = ModelProvider.OLLAMA # Change to your preferred provider
|
175
|
+
|
176
|
+
# Set your default model name here
|
177
|
+
DEFAULT_MODEL = "mistral" # Change to your preferred model
|
178
|
+
```
|
179
|
+
|
180
|
+
### Supported LLM Providers
|
181
|
+
|
182
|
+
The system supports multiple LLM providers:
|
183
|
+
|
184
|
+
| Provider | Type | Configuration | Notes |
|
185
|
+
|----------|------|--------------|-------|
|
186
|
+
| `OLLAMA` | Local | No API key needed | Default - install from ollama.ai |
|
187
|
+
| `OPENAI` | Cloud | Requires `OPENAI_API_KEY` | GPT models (3.5, 4, 4o) |
|
188
|
+
| `ANTHROPIC` | Cloud | Requires `ANTHROPIC_API_KEY` | Claude models (3 Opus, Sonnet, Haiku) |
|
189
|
+
| `OPENAI_ENDPOINT` | Cloud | Requires `OPENAI_ENDPOINT_API_KEY` | For any OpenAI-compatible API |
|
190
|
+
| `VLLM` | Local | No API key needed | For hosting models via vLLM |
|
191
|
+
|
192
|
+
You can configure the OpenAI-compatible endpoint URL in `llm_config.py`:
|
193
|
+
|
194
|
+
```python
|
195
|
+
# For OpenRouter, Together.ai, Azure OpenAI, or any compatible endpoint
|
196
|
+
OPENAI_ENDPOINT_URL = "https://openrouter.ai/api/v1"
|
197
|
+
```
|
198
|
+
|
199
|
+
The `OPENAI_ENDPOINT` provider can access any service with an OpenAI-compatible API, including:
|
200
|
+
- OpenRouter (access to hundreds of models)
|
201
|
+
- Azure OpenAI
|
202
|
+
- Together.ai
|
203
|
+
- Groq
|
204
|
+
- Anyscale
|
205
|
+
- Self-hosted LLM servers with OpenAI compatibility
|
206
|
+
- Any other service that implements the OpenAI API specification
|
207
|
+
|
208
|
+
## Setting Up Search Engines
|
209
|
+
|
210
|
+
The system includes multiple search engines. Some require API keys:
|
211
|
+
|
212
|
+
```toml
|
213
|
+
# Add to .secrets.toml
|
214
|
+
SERP_API_KEY = "your-serpapi-key-here" # For Google results via SerpAPI
|
215
|
+
GOOGLE_PSE_API_KEY = "your-google-key-here" # For Google Programmable Search
|
216
|
+
GOOGLE_PSE_ENGINE_ID = "your-pse-id-here" # For Google Programmable Search
|
217
|
+
BRAVE_API_KEY = "your-brave-search-key-here" # For Brave Search
|
218
|
+
GUARDIAN_API_KEY = "your-guardian-key-here" # For The Guardian
|
219
|
+
```
|
220
|
+
|
221
|
+
No API key required for: Wikipedia, arXiv, PubMed, Semantic Scholar, and local collections.
|
222
|
+
|
223
|
+
## Local Document Search (RAG)
|
224
|
+
|
225
|
+
The system can search through your local documents using vector embeddings.
|
226
|
+
|
227
|
+
### Setting Up Document Collections
|
228
|
+
|
229
|
+
1. Define collections in `local_collections.toml`. Default collections include:
|
230
|
+
|
231
|
+
```toml
|
232
|
+
[project_docs]
|
233
|
+
name = "Project Documents"
|
234
|
+
description = "Project documentation and specifications"
|
235
|
+
paths = ["@format ${DOCS_DIR}/project_documents"]
|
236
|
+
enabled = true
|
237
|
+
embedding_model = "all-MiniLM-L6-v2"
|
238
|
+
embedding_device = "cpu"
|
239
|
+
embedding_model_type = "sentence_transformers"
|
240
|
+
max_results = 20
|
241
|
+
max_filtered_results = 5
|
242
|
+
chunk_size = 1000
|
243
|
+
chunk_overlap = 200
|
244
|
+
cache_dir = "__CACHE_DIR__/local_search/project_docs"
|
245
|
+
|
246
|
+
# More collections defined in the file...
|
247
|
+
```
|
248
|
+
|
249
|
+
2. Create your document directories:
|
250
|
+
- The `${DOCS_DIR}` variable points to a default location in your Documents folder
|
251
|
+
- Documents are automatically indexed when the search is first used
|
252
|
+
|
253
|
+
### Using Local Search
|
254
|
+
|
255
|
+
You can use local document search in several ways:
|
256
|
+
|
257
|
+
1. **Auto-selection**: Set `tool = "auto"` in `settings.toml` [search] section
|
258
|
+
2. **Explicit collection**: Set `tool = "project_docs"` to search only that collection
|
259
|
+
3. **All collections**: Set `tool = "local_all"` to search across all collections
|
260
|
+
4. **Query syntax**: Type `collection:project_docs your query` to target a specific collection
|
261
|
+
|
262
|
+
## Available Search Engines
|
263
|
+
|
264
|
+
| Engine | Purpose | API Key Required? |
|
265
|
+
|--------|---------|-------------------|
|
266
|
+
| `auto` | Intelligently selects the best engine | No |
|
267
|
+
| `wikipedia` | General knowledge and facts | No |
|
268
|
+
| `arxiv` | Scientific papers and research | No |
|
269
|
+
| `pubmed` | Medical and biomedical research | No |
|
270
|
+
| `semantic_scholar` | Academic literature across all fields | No |
|
271
|
+
| `github` | Code repositories and documentation | No (but rate-limited) |
|
272
|
+
| `brave` | Web search (privacy-focused) | Yes |
|
273
|
+
| `serpapi` | Google search results | Yes |
|
274
|
+
| `google_pse` | Custom Google search | Yes |
|
275
|
+
| `wayback` | Historical web content | No |
|
276
|
+
| `searxng` | Local web search engine | No (requires server) |
|
277
|
+
| Any collection name | Search your local documents | No |
|
278
|
+
|
279
|
+
> **Support Free Knowledge:** If you frequently use the search engines in this tool, please consider making a donation to these organizations. They provide valuable services and rely on user support to maintain their operations:
|
280
|
+
> - [Donate to Wikipedia](https://donate.wikimedia.org)
|
281
|
+
> - [Support arXiv](https://arxiv.org/about/give)
|
282
|
+
> - [Donate to DuckDuckGo](https://duckduckgo.com/donations)
|
283
|
+
> - [Support PubMed/NCBI](https://www.nlm.nih.gov/pubs/donations/donations.html)
|
284
|
+
|
285
|
+
## Advanced Configuration
|
286
|
+
|
287
|
+
### Research Parameters
|
288
|
+
|
289
|
+
Edit `settings.toml` to customize research parameters:
|
290
|
+
|
291
|
+
```toml
|
292
|
+
[search]
|
293
|
+
# Search tool to use (auto, wikipedia, arxiv, etc.)
|
294
|
+
tool = "auto"
|
295
|
+
|
296
|
+
# Number of research cycles
|
297
|
+
iterations = 2
|
298
|
+
|
299
|
+
# Questions generated per cycle
|
300
|
+
questions_per_iteration = 2
|
301
|
+
|
302
|
+
# Results per search query
|
303
|
+
max_results = 50
|
304
|
+
|
305
|
+
# Results after relevance filtering
|
306
|
+
max_filtered_results = 5
|
307
|
+
|
308
|
+
# More settings available...
|
309
|
+
```
|
310
|
+
|
311
|
+
## Web Interface
|
312
|
+
|
313
|
+
The web interface offers several features:
|
314
|
+
|
315
|
+
- **Dashboard**: Start and manage research queries
|
316
|
+
- **Real-time Updates**: Track research progress
|
317
|
+
- **Research History**: Access past queries
|
318
|
+
- **PDF Export**: Download reports
|
319
|
+
- **Research Management**: Terminate processes or delete records
|
320
|
+
|
321
|
+
## Command Line Interface
|
322
|
+
|
323
|
+
The CLI version allows you to:
|
324
|
+
|
325
|
+
1. Choose between a quick summary or detailed report
|
326
|
+
2. Enter your research query
|
327
|
+
3. View results directly in the terminal
|
328
|
+
4. Save reports automatically to the configured output directory
|
329
|
+
|
330
|
+
## Development Setup
|
331
|
+
|
332
|
+
If you want to develop or modify the package, you can install it in development mode:
|
333
|
+
|
334
|
+
```bash
|
335
|
+
# Clone the repository
|
336
|
+
git clone https://github.com/LearningCircuit/local-deep-research.git
|
337
|
+
cd local-deep-research
|
338
|
+
|
339
|
+
# Install in development mode
|
340
|
+
pip install -e .
|
341
|
+
```
|
342
|
+
|
343
|
+
This creates an "editable" installation that uses your local code, so any changes you make are immediately available without reinstalling.
|
344
|
+
|
345
|
+
You can run the application directly using Python module syntax:
|
346
|
+
|
347
|
+
```bash
|
348
|
+
# Run the web interface
|
349
|
+
python -m local_deep_research.web.app
|
350
|
+
|
351
|
+
# Run the CLI version
|
352
|
+
python -m local_deep_research.main
|
353
|
+
```
|
354
|
+
|
355
|
+
This approach is useful for development and debugging, as it provides more detailed error messages and allows you to make code changes on the fly.
|
356
|
+
|
357
|
+
## Example Research
|
358
|
+
|
359
|
+
The repository includes complete research examples like our [fusion energy research analysis](https://github.com/LearningCircuit/local-deep-research/blob/main/examples/fusion-energy-research-developments.md) showcasing the system's capabilities.
|
360
|
+
|
361
|
+
## Community & Support
|
362
|
+
|
363
|
+
Join our [Discord server](https://discord.gg/2E6gYU2Z) to exchange ideas, discuss usage patterns, and share research approaches.
|
364
|
+
|
365
|
+
## License
|
366
|
+
|
367
|
+
This project is licensed under the MIT License.
|
368
|
+
|
369
|
+
## Acknowledgments
|
370
|
+
|
371
|
+
- Built with [Ollama](https://ollama.ai) for local AI processing
|
372
|
+
- Search powered by multiple sources:
|
373
|
+
- [Wikipedia](https://www.wikipedia.org/) for factual knowledge (default search engine)
|
374
|
+
- [arXiv](https://arxiv.org/) for scientific papers
|
375
|
+
- [PubMed](https://pubmed.ncbi.nlm.nih.gov/) for biomedical literature
|
376
|
+
- [DuckDuckGo](https://duckduckgo.com) for web search
|
377
|
+
- [The Guardian](https://www.theguardian.com/) for journalism
|
378
|
+
- [SerpAPI](https://serpapi.com) for Google search results (requires API key)
|
379
|
+
- [SearXNG](https://searxng.org/) for local web-search engine
|
380
|
+
- [Brave Search](https://search.brave.com/) for privacy-focused web search
|
381
|
+
- [Semantic Scholar](https://www.semanticscholar.org/) for academic literature
|
382
|
+
- Built on [LangChain](https://github.com/hwchase17/langchain) framework
|
383
|
+
- Uses [justext](https://github.com/miso-belica/justext), [Playwright](https://playwright.dev), [FAISS](https://github.com/facebookresearch/faiss), and more
|
384
|
+
|
385
|
+
## Contributing
|
386
|
+
|
387
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
388
|
+
|
389
|
+
1. Fork the repository
|
390
|
+
2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
|
391
|
+
3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
|
392
|
+
4. Push to the branch (`git push origin feature/AmazingFeature`)
|
393
|
+
5. Open a Pull Request
|
@@ -1,19 +1,18 @@
|
|
1
1
|
local_deep_research/__init__.py,sha256=pfHzjzYc6Szo8VCNLtFZRXyAlEz7CViY7r2fH9O7yms,584
|
2
|
-
local_deep_research/citation_handler.py,sha256=
|
3
|
-
local_deep_research/config.py,sha256=
|
4
|
-
local_deep_research/local_collections.py,sha256=SB-fdptT7qS0klJUVx_Rs9OgDwafMUgI46984WlZGKI,6076
|
2
|
+
local_deep_research/citation_handler.py,sha256=v_fwTy-2XvUuoH3OQRzmBrvaiN7mBk8jbNfySslmt5g,4357
|
3
|
+
local_deep_research/config.py,sha256=PAruLZutlrjkGOKrv49hk8U4q9JPWWgAKLiY8Ukpsks,8572
|
5
4
|
local_deep_research/main.py,sha256=uQXtGQ6LtZNd5Qw63D5ke4Q_LjYimouWVSUknVsk3JQ,3645
|
6
|
-
local_deep_research/report_generator.py,sha256=
|
7
|
-
local_deep_research/search_system.py,sha256=
|
5
|
+
local_deep_research/report_generator.py,sha256=EvaArnWirMgg42fMzmZeJczoEYujEbJ2ryHHYuuoXx8,8058
|
6
|
+
local_deep_research/search_system.py,sha256=yY3BEzX68vdtUcYF9h6lC3yVao0YA_NSBj6W3-RwlKk,15459
|
8
7
|
local_deep_research/defaults/__init__.py,sha256=2Vvlkl-gmP_qPYWegE4JBgummypogl3VXrQ1XzptFDU,1381
|
9
|
-
local_deep_research/defaults/llm_config.py,sha256=
|
8
|
+
local_deep_research/defaults/llm_config.py,sha256=T03pntyNtOk1fvu-RZ-iEoh7L2D2hcICr8usIPpULuo,7870
|
10
9
|
local_deep_research/defaults/local_collections.toml,sha256=_edVWVHrhunMfazjejhJlGPRkHKKIP51qQtNkMgNEiA,1406
|
11
|
-
local_deep_research/defaults/main.toml,sha256=
|
10
|
+
local_deep_research/defaults/main.toml,sha256=DLhFq88vdE2_psLaWhPV9BWPixqTHvR2Rllaj_rmjJ4,1403
|
12
11
|
local_deep_research/defaults/search_engines.toml,sha256=TYkOqVaZq9JPawz4fIPyGdkAtYa4t8F9H50VY-wv2ak,8101
|
13
12
|
local_deep_research/utilties/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
13
|
local_deep_research/utilties/enums.py,sha256=TVAZiu9szNbdacfb7whgaQJJlSk7oYByADaAierD4CE,229
|
15
14
|
local_deep_research/utilties/llm_utils.py,sha256=IGv-_gJWqLTpO3_op1NHIwxKaFEzmXhhVYSLTTSMnIA,4522
|
16
|
-
local_deep_research/utilties/search_utilities.py,sha256
|
15
|
+
local_deep_research/utilties/search_utilities.py,sha256=-wj_-HCotqDUXYcPaKGN1C_t6WH4xubNhem4RE60AuM,4098
|
17
16
|
local_deep_research/utilties/setup_utils.py,sha256=t6GNp7lK1nLPdPNCkYUk82IATGM62vqy8UBD-KqShOs,215
|
18
17
|
local_deep_research/web/__init__.py,sha256=3oHMatNu8r24FBtpojriIVbHYOVSHj4Q-quycMKOuDk,62
|
19
18
|
local_deep_research/web/app.py,sha256=5_VLNdhJOqdgacucglUdS_lVURNgYNbXhK9vME6JmzA,72431
|
@@ -28,30 +27,29 @@ local_deep_research/web/templates/search_engines_config.html,sha256=z_krznfdhF3e
|
|
28
27
|
local_deep_research/web/templates/settings.html,sha256=S9A-tdpzMhP2Zw7kp2jxKlwaWtoRil_IHX_a376vXsA,29575
|
29
28
|
local_deep_research/web/templates/settings_dashboard.html,sha256=De-v1KNdVvkXme5i3YZ6sIfU9aAKDc_N-AW9n4PZoso,9109
|
30
29
|
local_deep_research/web_search_engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
31
|
-
local_deep_research/web_search_engines/
|
32
|
-
local_deep_research/web_search_engines/search_engine_base.py,sha256=Knmf45pMYd7hYc9x8jG8gOtMnribsXDcOXGrA50LK3E,8100
|
30
|
+
local_deep_research/web_search_engines/search_engine_base.py,sha256=QmhfjuHK2deomh8tARghKuYnF-5t3wwBB661odS2VtU,8065
|
33
31
|
local_deep_research/web_search_engines/search_engine_factory.py,sha256=B_QaqoAwnVXCmHNdqGbo94LekWY6wpBw_PWNkI120qE,10728
|
34
32
|
local_deep_research/web_search_engines/search_engines_config.py,sha256=bNCuR09NOk5cjnKIgDQfhPipqmvDKeE7WP_6p8LLZf0,1979
|
35
33
|
local_deep_research/web_search_engines/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
36
|
-
local_deep_research/web_search_engines/engines/full_search.py,sha256=
|
34
|
+
local_deep_research/web_search_engines/engines/full_search.py,sha256=BuOz8dX-XocazCG7gGBKFnIY99FZtNFI0-Wq3fhsfp4,4689
|
37
35
|
local_deep_research/web_search_engines/engines/meta_search_engine.py,sha256=Zky4sowCortEaIj1pBU0sKuggXr5izkubgrD7cM8IOQ,11485
|
38
|
-
local_deep_research/web_search_engines/engines/search_engine_arxiv.py,sha256=
|
39
|
-
local_deep_research/web_search_engines/engines/search_engine_brave.py,sha256=
|
40
|
-
local_deep_research/web_search_engines/engines/search_engine_ddg.py,sha256
|
36
|
+
local_deep_research/web_search_engines/engines/search_engine_arxiv.py,sha256=cf8OzhSzE1zqaiOZ6EFQGy_6hTCJMaTysYd8rs1KJNU,15408
|
37
|
+
local_deep_research/web_search_engines/engines/search_engine_brave.py,sha256=J242byUGG5ROQ_bh-mU292_t7Q7m20_9O0r1w5z6d9A,9688
|
38
|
+
local_deep_research/web_search_engines/engines/search_engine_ddg.py,sha256=qK2i65dbPtr_ppoKPU_YA0mDqM_sDAvN6ZztvdFjsCk,4910
|
41
39
|
local_deep_research/web_search_engines/engines/search_engine_github.py,sha256=qqipsw2ycjlRbR6mmMmxzGU3LEcFDJJJ7Ez7xUgWjRM,26768
|
42
40
|
local_deep_research/web_search_engines/engines/search_engine_google_pse.py,sha256=YkXvBmgcqTImCxuyy6580SGRAvImGc6SzInXZgo1kNE,11294
|
43
41
|
local_deep_research/web_search_engines/engines/search_engine_guardian.py,sha256=MW4WIwtNAwcpdigNXronyezAxr50EIZTV1NMedrAv2o,23912
|
44
|
-
local_deep_research/web_search_engines/engines/search_engine_local.py,sha256=
|
42
|
+
local_deep_research/web_search_engines/engines/search_engine_local.py,sha256=rfmPiA9DVmjbaB3KQtlq7s6BRMgHRgzP7AhktZNDw2M,36772
|
45
43
|
local_deep_research/web_search_engines/engines/search_engine_local_all.py,sha256=CRNcxBzNd9kanyIJYaUDB7qfXYxVCvd4L2mX8jL73v0,5955
|
46
44
|
local_deep_research/web_search_engines/engines/search_engine_pubmed.py,sha256=MayfzM2R0XoI7cpXlG1XJ1ktfTN_6H-Xs9RmD89UAao,39236
|
47
45
|
local_deep_research/web_search_engines/engines/search_engine_searxng.py,sha256=GMy6qDMSaVBtjWRm48XBu6TjLAy1HfcO2EFTwr8S9rk,18048
|
48
46
|
local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py,sha256=6VMymjFJ7pyV2nv5dRfFofXgg0kG82rkwbICVnNDNH4,23352
|
49
|
-
local_deep_research/web_search_engines/engines/search_engine_serpapi.py,sha256=
|
47
|
+
local_deep_research/web_search_engines/engines/search_engine_serpapi.py,sha256=XikEYnM-pAaR70VeAJ28lbqpRzCj4bCA9xY29taTV8g,9215
|
50
48
|
local_deep_research/web_search_engines/engines/search_engine_wayback.py,sha256=astAvSLajDZ6rwgthJ3iBcHSWuDSYPO7uilIxaJhXmU,18132
|
51
49
|
local_deep_research/web_search_engines/engines/search_engine_wikipedia.py,sha256=KSGJECbEcxZpVK-PhYsTCtzedSK0l1AjQmvGtx8KBks,9799
|
52
|
-
local_deep_research-0.1.
|
53
|
-
local_deep_research-0.1.
|
54
|
-
local_deep_research-0.1.
|
55
|
-
local_deep_research-0.1.
|
56
|
-
local_deep_research-0.1.
|
57
|
-
local_deep_research-0.1.
|
50
|
+
local_deep_research-0.1.17.dist-info/licenses/LICENSE,sha256=Qg2CaTdu6SWnSqk1_JtgBPp_Da-LdqJDhT1Vt1MUc5s,1072
|
51
|
+
local_deep_research-0.1.17.dist-info/METADATA,sha256=2uPvdSw878o2XCdEYAy4LlWCi_TrvwDz3hQBHq5z4CE,14428
|
52
|
+
local_deep_research-0.1.17.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
|
53
|
+
local_deep_research-0.1.17.dist-info/entry_points.txt,sha256=u-Y6Z3MWtR3dmsTDFYhXyfkPv7mALUA7YAnY4Fi1XDs,97
|
54
|
+
local_deep_research-0.1.17.dist-info/top_level.txt,sha256=h6-uVE_wSuLOcoWwT9szhX23mBWufu77MqmM25UfbCY,20
|
55
|
+
local_deep_research-0.1.17.dist-info/RECORD,,
|
@@ -1,141 +0,0 @@
|
|
1
|
-
# local_collections.py
|
2
|
-
"""
|
3
|
-
Configuration file for local document collections.
|
4
|
-
Each collection functions as an independent search engine.
|
5
|
-
"""
|
6
|
-
|
7
|
-
import os
|
8
|
-
from typing import Dict, Any
|
9
|
-
|
10
|
-
# Registry of local document collections
|
11
|
-
# Each collection appears as a separate search engine in the main configuration
|
12
|
-
LOCAL_COLLECTIONS = {
|
13
|
-
# Project Documents Collection
|
14
|
-
"project_docs": {
|
15
|
-
"name": "Project Documents",
|
16
|
-
"description": "Project documentation and specifications",
|
17
|
-
"paths": [os.path.abspath("./local_search_files/project_documents")],
|
18
|
-
"enabled": True,
|
19
|
-
"embedding_model": "all-MiniLM-L6-v2",
|
20
|
-
"embedding_device": "cpu",
|
21
|
-
"embedding_model_type": "sentence_transformers",
|
22
|
-
"max_results": 20,
|
23
|
-
"max_filtered_results": 5,
|
24
|
-
"chunk_size": 1000,
|
25
|
-
"chunk_overlap": 200,
|
26
|
-
"cache_dir": ".cache/local_search/project_docs"
|
27
|
-
},
|
28
|
-
|
29
|
-
# Research Papers Collection
|
30
|
-
"research_papers": {
|
31
|
-
"name": "Research Papers",
|
32
|
-
"description": "Academic research papers and articles",
|
33
|
-
"paths": [os.path.abspath("local_search_files/research_papers")],
|
34
|
-
"enabled": True,
|
35
|
-
"embedding_model": "all-MiniLM-L6-v2",
|
36
|
-
"embedding_device": "cpu",
|
37
|
-
"embedding_model_type": "sentence_transformers",
|
38
|
-
"max_results": 20,
|
39
|
-
"max_filtered_results": 5,
|
40
|
-
"chunk_size": 800, # Smaller chunks for academic content
|
41
|
-
"chunk_overlap": 150,
|
42
|
-
"cache_dir": ".cache/local_search/research_papers"
|
43
|
-
},
|
44
|
-
|
45
|
-
# Personal Notes Collection
|
46
|
-
"personal_notes": {
|
47
|
-
"name": "Personal Notes",
|
48
|
-
"description": "Personal notes and documents",
|
49
|
-
"paths": [os.path.abspath("./local_search_files/personal_notes")],
|
50
|
-
"enabled": True,
|
51
|
-
"embedding_model": "all-MiniLM-L6-v2",
|
52
|
-
"embedding_device": "cpu",
|
53
|
-
"embedding_model_type": "sentence_transformers",
|
54
|
-
"max_results": 30,
|
55
|
-
"max_filtered_results": 10,
|
56
|
-
"chunk_size": 500, # Smaller chunks for notes
|
57
|
-
"chunk_overlap": 100,
|
58
|
-
"cache_dir": ".cache/local_search/personal_notes"
|
59
|
-
}
|
60
|
-
}
|
61
|
-
|
62
|
-
# Configuration for local search integration
|
63
|
-
LOCAL_SEARCH_CONFIG = {
|
64
|
-
# General embedding options
|
65
|
-
"DEFAULT_EMBEDDING_MODEL": "all-MiniLM-L6-v2",
|
66
|
-
"DEFAULT_EMBEDDING_DEVICE": "cpu", # "cpu" or "cuda" for GPU acceleration
|
67
|
-
"DEFAULT_EMBEDDING_MODEL_TYPE": "sentence_transformers", # or "ollama"
|
68
|
-
|
69
|
-
# Ollama settings (only used if model type is "ollama")
|
70
|
-
# Note: You must run 'ollama pull nomic-embed-text' first if using Ollama for embeddings
|
71
|
-
"OLLAMA_BASE_URL": "http://localhost:11434",
|
72
|
-
"OLLAMA_EMBEDDING_MODEL": "nomic-embed-text",
|
73
|
-
|
74
|
-
# Default indexing options
|
75
|
-
"FORCE_REINDEX": True, # Force reindexing on startup
|
76
|
-
"CACHE_DIR": ".cache/local_search", # Base directory for cache
|
77
|
-
}
|
78
|
-
|
79
|
-
def register_local_collections(search_engines_dict: Dict[str, Any]) -> None:
|
80
|
-
"""
|
81
|
-
Register all enabled local collections as search engines.
|
82
|
-
|
83
|
-
Args:
|
84
|
-
search_engines_dict: The main search engines dictionary to update
|
85
|
-
"""
|
86
|
-
for collection_id, collection in LOCAL_COLLECTIONS.items():
|
87
|
-
print(collection_id, collection)
|
88
|
-
if collection.get("enabled", True):
|
89
|
-
# Skip if already defined (don't override)
|
90
|
-
if collection_id in search_engines_dict:
|
91
|
-
continue
|
92
|
-
|
93
|
-
# Validate paths exist
|
94
|
-
paths = collection.get("paths", [])
|
95
|
-
valid_paths = []
|
96
|
-
for path in paths:
|
97
|
-
if os.path.exists(path) and os.path.isdir(path):
|
98
|
-
valid_paths.append(path)
|
99
|
-
else:
|
100
|
-
print(f"Warning: Collection '{collection_id}' contains non-existent folder: {path}")
|
101
|
-
|
102
|
-
# Log warning if no valid paths
|
103
|
-
if not valid_paths and paths:
|
104
|
-
print(f"Warning: Collection '{collection_id}' has no valid folders. It will be registered but won't return results.")
|
105
|
-
|
106
|
-
# Create a search engine entry for this collection
|
107
|
-
search_engines_dict[collection_id] = {
|
108
|
-
"module_path": "local_deep_research.web_search_engines.engines.search_engine_local",
|
109
|
-
"class_name": "LocalSearchEngine",
|
110
|
-
"requires_api_key": False,
|
111
|
-
"reliability": 0.9, # High reliability for local documents
|
112
|
-
"strengths": ["personal documents", "offline access",
|
113
|
-
collection.get("description", "local documents")],
|
114
|
-
"weaknesses": ["requires indexing", "limited to specific folders"],
|
115
|
-
"default_params": {
|
116
|
-
"folder_paths": collection.get("paths", []),
|
117
|
-
"embedding_model": collection.get(
|
118
|
-
"embedding_model",
|
119
|
-
LOCAL_SEARCH_CONFIG["DEFAULT_EMBEDDING_MODEL"]
|
120
|
-
),
|
121
|
-
"embedding_device": collection.get(
|
122
|
-
"embedding_device",
|
123
|
-
LOCAL_SEARCH_CONFIG["DEFAULT_EMBEDDING_DEVICE"]
|
124
|
-
),
|
125
|
-
"embedding_model_type": collection.get(
|
126
|
-
"embedding_model_type",
|
127
|
-
LOCAL_SEARCH_CONFIG["DEFAULT_EMBEDDING_MODEL_TYPE"]
|
128
|
-
),
|
129
|
-
"chunk_size": collection.get("chunk_size", 1000),
|
130
|
-
"chunk_overlap": collection.get("chunk_overlap", 200),
|
131
|
-
"cache_dir": collection.get(
|
132
|
-
"cache_dir",
|
133
|
-
f"{LOCAL_SEARCH_CONFIG['CACHE_DIR']}/{collection_id}"
|
134
|
-
),
|
135
|
-
"max_results": collection.get("max_results", 20),
|
136
|
-
"max_filtered_results": collection.get("max_filtered_results", 5),
|
137
|
-
"collection_name": collection.get("name", collection_id),
|
138
|
-
"collection_description": collection.get("description", "")
|
139
|
-
},
|
140
|
-
"requires_llm": True
|
141
|
-
}
|