local-deep-research 0.1.17__tar.gz → 0.1.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {local_deep_research-0.1.17/src/local_deep_research.egg-info → local_deep_research-0.1.19}/PKG-INFO +41 -18
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/README.md +41 -18
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/pyproject.toml +2 -2
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/config.py +23 -0
- local_deep_research-0.1.19/src/local_deep_research/defaults/llm_config.py +307 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/defaults/local_collections.toml +10 -4
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/defaults/main.toml +25 -7
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/static/css/styles.css +43 -0
- local_deep_research-0.1.19/src/local_deep_research/web/static/sounds/README.md +29 -0
- local_deep_research-0.1.19/src/local_deep_research/web/static/sounds/error.mp3 +0 -0
- local_deep_research-0.1.19/src/local_deep_research/web/static/sounds/success.mp3 +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/meta_search_engine.py +17 -8
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_local.py +61 -21
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_local_all.py +7 -5
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/search_engine_factory.py +11 -2
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/search_engines_config.py +24 -7
- {local_deep_research-0.1.17 → local_deep_research-0.1.19/src/local_deep_research.egg-info}/PKG-INFO +41 -18
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research.egg-info/SOURCES.txt +3 -0
- local_deep_research-0.1.17/src/local_deep_research/defaults/llm_config.py +0 -269
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/LICENSE +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/MANIFEST.in +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/requirements.txt +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/setup.cfg +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/__init__.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/citation_handler.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/defaults/__init__.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/defaults/search_engines.toml +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/main.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/report_generator.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/search_system.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/utilties/__init__.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/utilties/enums.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/utilties/llm_utils.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/utilties/search_utilities.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/utilties/setup_utils.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/__init__.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/app.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/static/js/app.js +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/templates/api_keys_config.html +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/templates/collections_config.html +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/templates/index.html +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/templates/llm_config.html +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/templates/main_config.html +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/templates/search_engines_config.html +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/templates/settings.html +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/templates/settings_dashboard.html +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/__init__.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/__init__.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/full_search.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_arxiv.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_brave.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_ddg.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_github.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_google_pse.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_guardian.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_pubmed.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_searxng.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_serpapi.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_wayback.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/search_engine_base.py +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research.egg-info/dependency_links.txt +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research.egg-info/entry_points.txt +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research.egg-info/requires.txt +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research.egg-info/top_level.txt +0 -0
- {local_deep_research-0.1.17 → local_deep_research-0.1.19}/tests/test_google_pse.py +0 -0
{local_deep_research-0.1.17/src/local_deep_research.egg-info → local_deep_research-0.1.19}/PKG-INFO
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: local-deep-research
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.19
|
4
4
|
Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
|
5
5
|
Author-email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -71,6 +71,14 @@ Dynamic: license-file
|
|
71
71
|
|
72
72
|
A powerful AI-powered research assistant that performs deep, iterative analysis using multiple LLMs and web searches. The system can be run locally for privacy or configured to use cloud-based LLMs for enhanced capabilities.
|
73
73
|
|
74
|
+
<div align="center">
|
75
|
+
<a href="https://www.youtube.com/watch?v=0ISreg9q0p0">
|
76
|
+
<img src="https://img.youtube.com/vi/0ISreg9q0p0/0.jpg" alt="Local Deep Research">
|
77
|
+
<br>
|
78
|
+
<span>▶️ Watch Video</span>
|
79
|
+
</a>
|
80
|
+
</div>
|
81
|
+
|
74
82
|
## Quick Start
|
75
83
|
|
76
84
|
```bash
|
@@ -101,7 +109,7 @@ Access the web interface at `http://127.0.0.1:5000` in your browser.
|
|
101
109
|
|
102
110
|
- 🔍 **Advanced Research Capabilities**
|
103
111
|
- Automated deep research with intelligent follow-up questions
|
104
|
-
-
|
112
|
+
- Proper inline citation and source verification
|
105
113
|
- Multi-iteration analysis for comprehensive coverage
|
106
114
|
- Full webpage content analysis (not just snippets)
|
107
115
|
|
@@ -112,8 +120,8 @@ Access the web interface at `http://127.0.0.1:5000` in your browser.
|
|
112
120
|
- Configurable model selection based on needs
|
113
121
|
|
114
122
|
- 📊 **Rich Output Options**
|
115
|
-
- Detailed research findings with citations
|
116
|
-
-
|
123
|
+
- Detailed research findings with proper citations
|
124
|
+
- Well-structured comprehensive research reports
|
117
125
|
- Quick summaries for rapid insights
|
118
126
|
- Source tracking and verification
|
119
127
|
|
@@ -128,6 +136,12 @@ Access the web interface at `http://127.0.0.1:5000` in your browser.
|
|
128
136
|
- **Local RAG search for private documents** - search your own documents with vector embeddings
|
129
137
|
- Full webpage content retrieval and intelligent filtering
|
130
138
|
|
139
|
+
- 🎓 **Academic & Scientific Integration**
|
140
|
+
- Direct integration with PubMed, arXiv, Wikipedia, Semantic Scholar
|
141
|
+
- Properly formatted citations from academic sources
|
142
|
+
- Report structure suitable for literature reviews
|
143
|
+
- Cross-disciplinary synthesis of information
|
144
|
+
|
131
145
|
## Configuration System
|
132
146
|
|
133
147
|
The package automatically creates and manages configuration files in your user directory:
|
@@ -137,6 +151,8 @@ The package automatically creates and manages configuration files in your user d
|
|
137
151
|
|
138
152
|
### Default Configuration Files
|
139
153
|
|
154
|
+
If you prefere environment variables please refere to this file: https://github.com/LearningCircuit/local-deep-research/blob/main/docs/env_configuration.md
|
155
|
+
|
140
156
|
When you first run the tool, it creates these configuration files:
|
141
157
|
|
142
158
|
| File | Purpose |
|
@@ -209,6 +225,10 @@ The `OPENAI_ENDPOINT` provider can access any service with an OpenAI-compatible
|
|
209
225
|
|
210
226
|
The system includes multiple search engines. Some require API keys:
|
211
227
|
|
228
|
+
Use .env in config folder if .secrets.toml doesnt work.
|
229
|
+
|
230
|
+
You can also overwrite other settings via environment variables, e.g. to overwrite [web] port setting in settings.toml please use: **LDR_WEB__PORT=8080**
|
231
|
+
|
212
232
|
```toml
|
213
233
|
# Add to .secrets.toml
|
214
234
|
SERP_API_KEY = "your-serpapi-key-here" # For Google results via SerpAPI
|
@@ -261,20 +281,20 @@ You can use local document search in several ways:
|
|
261
281
|
|
262
282
|
## Available Search Engines
|
263
283
|
|
264
|
-
| Engine | Purpose | API Key Required? |
|
265
|
-
|
266
|
-
| `auto` | Intelligently selects the best engine | No |
|
267
|
-
| `wikipedia` | General knowledge and facts | No |
|
268
|
-
| `arxiv` | Scientific papers and research | No |
|
269
|
-
| `pubmed` | Medical and biomedical research | No |
|
270
|
-
| `semantic_scholar` | Academic literature across all fields | No |
|
271
|
-
| `github` | Code repositories and documentation | No (
|
272
|
-
| `brave` | Web search (privacy-focused) | Yes |
|
273
|
-
| `serpapi` | Google search results | Yes |
|
274
|
-
| `google_pse` | Custom Google search | Yes |
|
275
|
-
| `wayback` | Historical web content | No |
|
276
|
-
| `searxng` | Local web search engine | No (requires server) |
|
277
|
-
| Any collection name | Search your local documents | No |
|
284
|
+
| Engine | Purpose | API Key Required? | Rate Limit |
|
285
|
+
|--------|---------|-------------------|------------|
|
286
|
+
| `auto` | Intelligently selects the best engine | No | Based on selected engine |
|
287
|
+
| `wikipedia` | General knowledge and facts | No | No strict limit |
|
288
|
+
| `arxiv` | Scientific papers and research | No | No strict limit |
|
289
|
+
| `pubmed` | Medical and biomedical research | No | No strict limit |
|
290
|
+
| `semantic_scholar` | Academic literature across all fields | No | 100/5min |
|
291
|
+
| `github` | Code repositories and documentation | No | 60/hour (unauthenticated) |
|
292
|
+
| `brave` | Web search (privacy-focused) | Yes | Based on plan |
|
293
|
+
| `serpapi` | Google search results | Yes | Based on plan |
|
294
|
+
| `google_pse` | Custom Google search | Yes | 100/day free tier |
|
295
|
+
| `wayback` | Historical web content | No | No strict limit |
|
296
|
+
| `searxng` | Local web search engine | No (requires local server) | No limit |
|
297
|
+
| Any collection name | Search your local documents | No | No limit |
|
278
298
|
|
279
299
|
> **Support Free Knowledge:** If you frequently use the search engines in this tool, please consider making a donation to these organizations. They provide valuable services and rely on user support to maintain their operations:
|
280
300
|
> - [Donate to Wikipedia](https://donate.wikimedia.org)
|
@@ -318,6 +338,9 @@ The web interface offers several features:
|
|
318
338
|
- **PDF Export**: Download reports
|
319
339
|
- **Research Management**: Terminate processes or delete records
|
320
340
|
|
341
|
+

|
342
|
+

|
343
|
+
|
321
344
|
## Command Line Interface
|
322
345
|
|
323
346
|
The CLI version allows you to:
|
@@ -2,6 +2,14 @@
|
|
2
2
|
|
3
3
|
A powerful AI-powered research assistant that performs deep, iterative analysis using multiple LLMs and web searches. The system can be run locally for privacy or configured to use cloud-based LLMs for enhanced capabilities.
|
4
4
|
|
5
|
+
<div align="center">
|
6
|
+
<a href="https://www.youtube.com/watch?v=0ISreg9q0p0">
|
7
|
+
<img src="https://img.youtube.com/vi/0ISreg9q0p0/0.jpg" alt="Local Deep Research">
|
8
|
+
<br>
|
9
|
+
<span>▶️ Watch Video</span>
|
10
|
+
</a>
|
11
|
+
</div>
|
12
|
+
|
5
13
|
## Quick Start
|
6
14
|
|
7
15
|
```bash
|
@@ -32,7 +40,7 @@ Access the web interface at `http://127.0.0.1:5000` in your browser.
|
|
32
40
|
|
33
41
|
- 🔍 **Advanced Research Capabilities**
|
34
42
|
- Automated deep research with intelligent follow-up questions
|
35
|
-
-
|
43
|
+
- Proper inline citation and source verification
|
36
44
|
- Multi-iteration analysis for comprehensive coverage
|
37
45
|
- Full webpage content analysis (not just snippets)
|
38
46
|
|
@@ -43,8 +51,8 @@ Access the web interface at `http://127.0.0.1:5000` in your browser.
|
|
43
51
|
- Configurable model selection based on needs
|
44
52
|
|
45
53
|
- 📊 **Rich Output Options**
|
46
|
-
- Detailed research findings with citations
|
47
|
-
-
|
54
|
+
- Detailed research findings with proper citations
|
55
|
+
- Well-structured comprehensive research reports
|
48
56
|
- Quick summaries for rapid insights
|
49
57
|
- Source tracking and verification
|
50
58
|
|
@@ -59,6 +67,12 @@ Access the web interface at `http://127.0.0.1:5000` in your browser.
|
|
59
67
|
- **Local RAG search for private documents** - search your own documents with vector embeddings
|
60
68
|
- Full webpage content retrieval and intelligent filtering
|
61
69
|
|
70
|
+
- 🎓 **Academic & Scientific Integration**
|
71
|
+
- Direct integration with PubMed, arXiv, Wikipedia, Semantic Scholar
|
72
|
+
- Properly formatted citations from academic sources
|
73
|
+
- Report structure suitable for literature reviews
|
74
|
+
- Cross-disciplinary synthesis of information
|
75
|
+
|
62
76
|
## Configuration System
|
63
77
|
|
64
78
|
The package automatically creates and manages configuration files in your user directory:
|
@@ -68,6 +82,8 @@ The package automatically creates and manages configuration files in your user d
|
|
68
82
|
|
69
83
|
### Default Configuration Files
|
70
84
|
|
85
|
+
If you prefere environment variables please refere to this file: https://github.com/LearningCircuit/local-deep-research/blob/main/docs/env_configuration.md
|
86
|
+
|
71
87
|
When you first run the tool, it creates these configuration files:
|
72
88
|
|
73
89
|
| File | Purpose |
|
@@ -140,6 +156,10 @@ The `OPENAI_ENDPOINT` provider can access any service with an OpenAI-compatible
|
|
140
156
|
|
141
157
|
The system includes multiple search engines. Some require API keys:
|
142
158
|
|
159
|
+
Use .env in config folder if .secrets.toml doesnt work.
|
160
|
+
|
161
|
+
You can also overwrite other settings via environment variables, e.g. to overwrite [web] port setting in settings.toml please use: **LDR_WEB__PORT=8080**
|
162
|
+
|
143
163
|
```toml
|
144
164
|
# Add to .secrets.toml
|
145
165
|
SERP_API_KEY = "your-serpapi-key-here" # For Google results via SerpAPI
|
@@ -192,20 +212,20 @@ You can use local document search in several ways:
|
|
192
212
|
|
193
213
|
## Available Search Engines
|
194
214
|
|
195
|
-
| Engine | Purpose | API Key Required? |
|
196
|
-
|
197
|
-
| `auto` | Intelligently selects the best engine | No |
|
198
|
-
| `wikipedia` | General knowledge and facts | No |
|
199
|
-
| `arxiv` | Scientific papers and research | No |
|
200
|
-
| `pubmed` | Medical and biomedical research | No |
|
201
|
-
| `semantic_scholar` | Academic literature across all fields | No |
|
202
|
-
| `github` | Code repositories and documentation | No (
|
203
|
-
| `brave` | Web search (privacy-focused) | Yes |
|
204
|
-
| `serpapi` | Google search results | Yes |
|
205
|
-
| `google_pse` | Custom Google search | Yes |
|
206
|
-
| `wayback` | Historical web content | No |
|
207
|
-
| `searxng` | Local web search engine | No (requires server) |
|
208
|
-
| Any collection name | Search your local documents | No |
|
215
|
+
| Engine | Purpose | API Key Required? | Rate Limit |
|
216
|
+
|--------|---------|-------------------|------------|
|
217
|
+
| `auto` | Intelligently selects the best engine | No | Based on selected engine |
|
218
|
+
| `wikipedia` | General knowledge and facts | No | No strict limit |
|
219
|
+
| `arxiv` | Scientific papers and research | No | No strict limit |
|
220
|
+
| `pubmed` | Medical and biomedical research | No | No strict limit |
|
221
|
+
| `semantic_scholar` | Academic literature across all fields | No | 100/5min |
|
222
|
+
| `github` | Code repositories and documentation | No | 60/hour (unauthenticated) |
|
223
|
+
| `brave` | Web search (privacy-focused) | Yes | Based on plan |
|
224
|
+
| `serpapi` | Google search results | Yes | Based on plan |
|
225
|
+
| `google_pse` | Custom Google search | Yes | 100/day free tier |
|
226
|
+
| `wayback` | Historical web content | No | No strict limit |
|
227
|
+
| `searxng` | Local web search engine | No (requires local server) | No limit |
|
228
|
+
| Any collection name | Search your local documents | No | No limit |
|
209
229
|
|
210
230
|
> **Support Free Knowledge:** If you frequently use the search engines in this tool, please consider making a donation to these organizations. They provide valuable services and rely on user support to maintain their operations:
|
211
231
|
> - [Donate to Wikipedia](https://donate.wikimedia.org)
|
@@ -249,6 +269,9 @@ The web interface offers several features:
|
|
249
269
|
- **PDF Export**: Download reports
|
250
270
|
- **Research Management**: Terminate processes or delete records
|
251
271
|
|
272
|
+

|
273
|
+

|
274
|
+
|
252
275
|
## Command Line Interface
|
253
276
|
|
254
277
|
The CLI version allows you to:
|
@@ -321,4 +344,4 @@ Contributions are welcome! Please feel free to submit a Pull Request.
|
|
321
344
|
2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
|
322
345
|
3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
|
323
346
|
4. Push to the branch (`git push origin feature/AmazingFeature`)
|
324
|
-
5. Open a Pull Request
|
347
|
+
5. Open a Pull Request
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "local-deep-research"
|
7
|
-
version = "0.1.
|
7
|
+
version = "0.1.19"
|
8
8
|
description = "AI-powered research assistant with deep, iterative analysis using LLMs and web searches"
|
9
9
|
readme = "README.md"
|
10
10
|
requires-python = ">=3.8"
|
@@ -66,4 +66,4 @@ include-package-data = true
|
|
66
66
|
|
67
67
|
[tool.setuptools.package-data]
|
68
68
|
"local_deep_research.web" = ["templates/*", "static/*", "static/**/*"]
|
69
|
-
"local_deep_research.defaults" = ["*.toml", "*.py"]
|
69
|
+
"local_deep_research.defaults" = ["*.toml", "*.py", "*.template"]
|
@@ -145,6 +145,19 @@ def init_config_files():
|
|
145
145
|
if not os.path.exists(search_engines_file) and os.path.exists(default_engines):
|
146
146
|
shutil.copyfile(default_engines, search_engines_file)
|
147
147
|
logger.info(f"Created search_engines.toml at {search_engines_file}")
|
148
|
+
|
149
|
+
# Create .env.template if it doesn't exist
|
150
|
+
env_template_file = CONFIG_DIR / ".env.template"
|
151
|
+
if not env_template_file.exists():
|
152
|
+
shutil.copy(defaults_dir / ".env.template", env_template_file)
|
153
|
+
logger.info(f"Created .env.template at {env_template_file}")
|
154
|
+
|
155
|
+
# Optionally create an empty .env file if it doesn't exist
|
156
|
+
env_file = CONFIG_DIR / ".env"
|
157
|
+
if not env_file.exists():
|
158
|
+
with open(env_file, "w") as f:
|
159
|
+
f.write("# Add your environment variables here\n")
|
160
|
+
logger.info(f"Created empty .env file at {env_file}")
|
148
161
|
except Exception as e:
|
149
162
|
logger.error(f"Error initializing Windows config files: {e}")
|
150
163
|
else:
|
@@ -183,7 +196,17 @@ def init_config_files():
|
|
183
196
|
if not search_engines_file.exists():
|
184
197
|
shutil.copy(defaults_dir / "search_engines.toml", search_engines_file)
|
185
198
|
logger.info(f"Created search_engines.toml at {search_engines_file}")
|
199
|
+
env_template_file = CONFIG_DIR / ".env.template"
|
200
|
+
if not env_template_file.exists():
|
201
|
+
shutil.copy(defaults_dir / ".env.template", env_template_file)
|
202
|
+
logger.info(f"Created .env.template at {env_template_file}")
|
186
203
|
|
204
|
+
# Optionally create an empty .env file if it doesn't exist
|
205
|
+
env_file = CONFIG_DIR / ".env"
|
206
|
+
if not env_file.exists():
|
207
|
+
with open(env_file, "w") as f:
|
208
|
+
f.write("# Add your environment variables here\n")
|
209
|
+
logger.info(f"Created empty .env file at {env_file}")
|
187
210
|
secrets_file = CONFIG_DIR / ".secrets.toml"
|
188
211
|
if not secrets_file.exists():
|
189
212
|
with open(secrets_file, "w") as f:
|
@@ -0,0 +1,307 @@
|
|
1
|
+
"""
|
2
|
+
LLM configuration for Local Deep Research.
|
3
|
+
|
4
|
+
This file controls which language models are available and how they're configured.
|
5
|
+
You can customize model selection, parameters, and fallbacks here.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from langchain_anthropic import ChatAnthropic
|
9
|
+
from langchain_openai import ChatOpenAI
|
10
|
+
from langchain_ollama import ChatOllama
|
11
|
+
from langchain_community.llms import VLLM
|
12
|
+
from local_deep_research.config import settings
|
13
|
+
import os
|
14
|
+
import logging
|
15
|
+
|
16
|
+
# Initialize environment
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
# Valid provider options
|
20
|
+
VALID_PROVIDERS = ["ollama", "openai", "anthropic", "vllm", "openai_endpoint", "lmstudio", "llamacpp", "none"]
|
21
|
+
|
22
|
+
# ================================
|
23
|
+
# LLM FUNCTIONS
|
24
|
+
# ================================
|
25
|
+
|
26
|
+
def get_llm(model_name=None, temperature=None, provider=None):
|
27
|
+
"""
|
28
|
+
Get LLM instance based on model name and provider.
|
29
|
+
|
30
|
+
Args:
|
31
|
+
model_name: Name of the model to use (if None, uses settings.llm.model)
|
32
|
+
temperature: Model temperature (if None, uses settings.llm.temperature)
|
33
|
+
provider: Provider to use (if None, uses settings.llm.provider)
|
34
|
+
|
35
|
+
Returns:
|
36
|
+
A LangChain LLM instance
|
37
|
+
"""
|
38
|
+
# Use settings values for parameters if not provided
|
39
|
+
if model_name is None:
|
40
|
+
model_name = settings.llm.model
|
41
|
+
|
42
|
+
if temperature is None:
|
43
|
+
temperature = settings.llm.temperature
|
44
|
+
|
45
|
+
if provider is None:
|
46
|
+
provider = settings.llm.provider.lower()
|
47
|
+
if provider not in VALID_PROVIDERS:
|
48
|
+
logger.error(f"Invalid provider in settings: {provider}")
|
49
|
+
raise ValueError(f"Invalid provider: {provider}. Must be one of: {VALID_PROVIDERS}")
|
50
|
+
|
51
|
+
# Common parameters for all models
|
52
|
+
common_params = {
|
53
|
+
"temperature": temperature,
|
54
|
+
"max_tokens": settings.llm.max_tokens,
|
55
|
+
}
|
56
|
+
|
57
|
+
# Handle different providers
|
58
|
+
if provider == "anthropic":
|
59
|
+
api_key = settings.get('ANTHROPIC_API_KEY', '')
|
60
|
+
if not api_key:
|
61
|
+
logger.warning("ANTHROPIC_API_KEY not found. Falling back to default model.")
|
62
|
+
return get_fallback_model(temperature)
|
63
|
+
|
64
|
+
return ChatAnthropic(
|
65
|
+
model=model_name, anthropic_api_key=api_key, **common_params
|
66
|
+
)
|
67
|
+
|
68
|
+
elif provider == "openai":
|
69
|
+
api_key = settings.get('OPENAI_API_KEY', '')
|
70
|
+
if not api_key:
|
71
|
+
logger.warning("OPENAI_API_KEY not found. Falling back to default model.")
|
72
|
+
return get_fallback_model(temperature)
|
73
|
+
|
74
|
+
return ChatOpenAI(model=model_name, api_key=api_key, **common_params)
|
75
|
+
|
76
|
+
elif provider == "openai_endpoint":
|
77
|
+
api_key = settings.get('OPENAI_ENDPOINT_API_KEY', '')
|
78
|
+
|
79
|
+
if not api_key:
|
80
|
+
logger.warning("OPENAI_ENDPOINT_API_KEY not found. Falling back to default model.")
|
81
|
+
return get_fallback_model(temperature)
|
82
|
+
|
83
|
+
# Get endpoint URL from settings
|
84
|
+
openai_endpoint_url = settings.llm.openai_endpoint_url
|
85
|
+
|
86
|
+
return ChatOpenAI(
|
87
|
+
model=model_name,
|
88
|
+
api_key=api_key,
|
89
|
+
openai_api_base=openai_endpoint_url,
|
90
|
+
**common_params
|
91
|
+
)
|
92
|
+
|
93
|
+
elif provider == "vllm":
|
94
|
+
try:
|
95
|
+
return VLLM(
|
96
|
+
model=model_name,
|
97
|
+
trust_remote_code=True,
|
98
|
+
max_new_tokens=128,
|
99
|
+
top_k=10,
|
100
|
+
top_p=0.95,
|
101
|
+
temperature=temperature,
|
102
|
+
)
|
103
|
+
except Exception as e:
|
104
|
+
logger.error(f"Error loading VLLM model: {e}")
|
105
|
+
logger.warning("Falling back.")
|
106
|
+
return get_fallback_model(temperature)
|
107
|
+
|
108
|
+
elif provider == "ollama":
|
109
|
+
try:
|
110
|
+
# Use the configurable Ollama base URL
|
111
|
+
base_url = settings.get('OLLAMA_BASE_URL', settings.llm.get('ollama_base_url', 'http://localhost:11434'))
|
112
|
+
return ChatOllama(model=model_name, base_url=base_url, **common_params)
|
113
|
+
except Exception as e:
|
114
|
+
logger.error(f"Error loading Ollama model: {e}")
|
115
|
+
return get_fallback_model(temperature)
|
116
|
+
|
117
|
+
elif provider == "lmstudio":
|
118
|
+
try:
|
119
|
+
# Import LM Studio package
|
120
|
+
import lmstudio
|
121
|
+
from langchain_core.language_models import BaseLLM
|
122
|
+
|
123
|
+
# Get LM Studio URL from settings
|
124
|
+
lmstudio_url = settings.llm.get('lmstudio_url', "http://localhost:1234")
|
125
|
+
|
126
|
+
# Create LM Studio LLM instance
|
127
|
+
model = lmstudio.llm(model_name)
|
128
|
+
|
129
|
+
# Return a LangChain compatible wrapper
|
130
|
+
class LMStudioLLM(BaseLLM):
|
131
|
+
def _call(self, prompt, stop=None, **kwargs):
|
132
|
+
result = model.complete(prompt, temperature=temperature)
|
133
|
+
return result.completion
|
134
|
+
|
135
|
+
@property
|
136
|
+
def _identifying_params(self):
|
137
|
+
return {"model_name": model_name}
|
138
|
+
|
139
|
+
@property
|
140
|
+
def _llm_type(self):
|
141
|
+
return "lmstudio"
|
142
|
+
|
143
|
+
return LMStudioLLM()
|
144
|
+
except ImportError:
|
145
|
+
logger.error("LM Studio package not installed. Run 'pip install lmstudio'")
|
146
|
+
raise ImportError("LM Studio package not installed. Run 'pip install lmstudio'")
|
147
|
+
|
148
|
+
elif provider == "llamacpp":
|
149
|
+
try:
|
150
|
+
# Import LlamaCpp
|
151
|
+
from langchain_community.llms import LlamaCpp
|
152
|
+
|
153
|
+
# Get LlamaCpp model path from settings
|
154
|
+
model_path = settings.llm.get('llamacpp_model_path', "")
|
155
|
+
if not model_path:
|
156
|
+
logger.error("llamacpp_model_path not set in settings")
|
157
|
+
raise ValueError("llamacpp_model_path not set in settings.toml")
|
158
|
+
|
159
|
+
# Get additional LlamaCpp parameters
|
160
|
+
n_gpu_layers = settings.llm.get('llamacpp_n_gpu_layers', 1)
|
161
|
+
n_batch = settings.llm.get('llamacpp_n_batch', 512)
|
162
|
+
f16_kv = settings.llm.get('llamacpp_f16_kv', True)
|
163
|
+
|
164
|
+
# Create LlamaCpp instance
|
165
|
+
return LlamaCpp(
|
166
|
+
model_path=model_path,
|
167
|
+
temperature=temperature,
|
168
|
+
max_tokens=settings.llm.max_tokens,
|
169
|
+
n_gpu_layers=n_gpu_layers,
|
170
|
+
n_batch=n_batch,
|
171
|
+
f16_kv=f16_kv,
|
172
|
+
verbose=True
|
173
|
+
)
|
174
|
+
except ImportError:
|
175
|
+
logger.error("LlamaCpp package not installed. Run 'pip install llama-cpp-python'")
|
176
|
+
raise ImportError("LlamaCpp package not installed. Run 'pip install llama-cpp-python'")
|
177
|
+
|
178
|
+
else:
|
179
|
+
return get_fallback_model(temperature)
|
180
|
+
|
181
|
+
def get_fallback_model(temperature=None):
|
182
|
+
"""Create a dummy model for when no providers are available"""
|
183
|
+
from langchain_community.llms.fake import FakeListLLM
|
184
|
+
return FakeListLLM(
|
185
|
+
responses=["No language models are available. Please install Ollama or set up API keys."]
|
186
|
+
)
|
187
|
+
|
188
|
+
# ================================
|
189
|
+
# COMPATIBILITY FUNCTIONS
|
190
|
+
# ================================
|
191
|
+
|
192
|
+
def get_available_provider_types():
|
193
|
+
"""Return available model providers"""
|
194
|
+
providers = {}
|
195
|
+
|
196
|
+
if is_ollama_available():
|
197
|
+
providers["ollama"] = "Ollama (local models)"
|
198
|
+
|
199
|
+
if is_openai_available():
|
200
|
+
providers["openai"] = "OpenAI API"
|
201
|
+
|
202
|
+
if is_anthropic_available():
|
203
|
+
providers["anthropic"] = "Anthropic API"
|
204
|
+
|
205
|
+
if is_openai_endpoint_available():
|
206
|
+
providers["openai_endpoint"] = "OpenAI-compatible Endpoint"
|
207
|
+
|
208
|
+
if is_lmstudio_available():
|
209
|
+
providers["lmstudio"] = "LM Studio (local models)"
|
210
|
+
|
211
|
+
if is_llamacpp_available():
|
212
|
+
providers["llamacpp"] = "LlamaCpp (local models)"
|
213
|
+
|
214
|
+
# Check for VLLM capability
|
215
|
+
try:
|
216
|
+
import torch
|
217
|
+
import transformers
|
218
|
+
providers["vllm"] = "VLLM (local models)"
|
219
|
+
except ImportError:
|
220
|
+
pass
|
221
|
+
|
222
|
+
# Default fallback
|
223
|
+
if not providers:
|
224
|
+
providers["none"] = "No model providers available"
|
225
|
+
|
226
|
+
return providers
|
227
|
+
|
228
|
+
# ================================
|
229
|
+
# HELPER FUNCTIONS
|
230
|
+
# ================================
|
231
|
+
|
232
|
+
def is_openai_available():
|
233
|
+
"""Check if OpenAI is available"""
|
234
|
+
try:
|
235
|
+
api_key = settings.get('OPENAI_API_KEY', '')
|
236
|
+
return bool(api_key)
|
237
|
+
except:
|
238
|
+
return False
|
239
|
+
|
240
|
+
def is_anthropic_available():
|
241
|
+
"""Check if Anthropic is available"""
|
242
|
+
try:
|
243
|
+
api_key = settings.get('ANTHROPIC_API_KEY', '')
|
244
|
+
return bool(api_key)
|
245
|
+
except:
|
246
|
+
return False
|
247
|
+
|
248
|
+
def is_openai_endpoint_available():
|
249
|
+
"""Check if OpenAI endpoint is available"""
|
250
|
+
try:
|
251
|
+
api_key = settings.get('OPENAI_ENDPOINT_API_KEY', '')
|
252
|
+
return bool(api_key)
|
253
|
+
except:
|
254
|
+
return False
|
255
|
+
|
256
|
+
def is_ollama_available():
|
257
|
+
"""Check if Ollama is running"""
|
258
|
+
try:
|
259
|
+
import requests
|
260
|
+
base_url = settings.get('OLLAMA_BASE_URL', settings.llm.get('ollama_base_url', 'http://localhost:11434'))
|
261
|
+
response = requests.get(f"{base_url}/api/tags", timeout=1.0)
|
262
|
+
return response.status_code == 200
|
263
|
+
except:
|
264
|
+
return False
|
265
|
+
|
266
|
+
def is_vllm_available():
|
267
|
+
"""Check if VLLM capability is available"""
|
268
|
+
try:
|
269
|
+
import torch
|
270
|
+
import transformers
|
271
|
+
return True
|
272
|
+
except ImportError:
|
273
|
+
return False
|
274
|
+
|
275
|
+
def is_lmstudio_available():
|
276
|
+
"""Check if LM Studio is available"""
|
277
|
+
try:
|
278
|
+
import lmstudio
|
279
|
+
import requests
|
280
|
+
lmstudio_url = settings.llm.get('lmstudio_url', 'http://localhost:1234')
|
281
|
+
# Try to connect to check if running
|
282
|
+
response = requests.get(f"{lmstudio_url}/health", timeout=1.0)
|
283
|
+
return response.status_code == 200
|
284
|
+
except:
|
285
|
+
return False
|
286
|
+
|
287
|
+
def is_llamacpp_available():
|
288
|
+
"""Check if LlamaCpp is available and configured"""
|
289
|
+
try:
|
290
|
+
from langchain_community.llms import LlamaCpp
|
291
|
+
model_path = settings.llm.get('llamacpp_model_path', '')
|
292
|
+
return bool(model_path) and os.path.exists(model_path)
|
293
|
+
except:
|
294
|
+
return False
|
295
|
+
|
296
|
+
def get_available_providers():
|
297
|
+
"""Get dictionary of available providers"""
|
298
|
+
return get_available_provider_types()
|
299
|
+
|
300
|
+
# Log which providers are available
|
301
|
+
AVAILABLE_PROVIDERS = get_available_providers()
|
302
|
+
logger.info(f"Available providers: {list(AVAILABLE_PROVIDERS.keys())}")
|
303
|
+
|
304
|
+
# Check if selected provider is available
|
305
|
+
selected_provider = settings.llm.provider.lower()
|
306
|
+
if selected_provider not in AVAILABLE_PROVIDERS and selected_provider != "none":
|
307
|
+
logger.warning(f"Selected provider {selected_provider} is not available.")
|
@@ -1,6 +1,3 @@
|
|
1
|
-
# Default local document collections configuration
|
2
|
-
# Each collection functions as an independent search engine
|
3
|
-
|
4
1
|
# Project Documents Collection
|
5
2
|
[project_docs]
|
6
3
|
name = "Project Documents"
|
@@ -15,6 +12,9 @@ max_filtered_results = 5
|
|
15
12
|
chunk_size = 1000
|
16
13
|
chunk_overlap = 200
|
17
14
|
cache_dir = "__CACHE_DIR__/local_search/project_docs"
|
15
|
+
strengths = ["project documentation", "specifications", "internal documents"]
|
16
|
+
weaknesses = ["no external information", "limited to organizational knowledge"]
|
17
|
+
reliability = 0.9
|
18
18
|
|
19
19
|
# Research Papers Collection
|
20
20
|
[research_papers]
|
@@ -30,6 +30,9 @@ max_filtered_results = 5
|
|
30
30
|
chunk_size = 800
|
31
31
|
chunk_overlap = 150
|
32
32
|
cache_dir = "__CACHE_DIR__/local_search/research_papers"
|
33
|
+
strengths = ["academic research", "scientific papers", "scholarly content"]
|
34
|
+
weaknesses = ["potentially outdated", "limited to collected papers"]
|
35
|
+
reliability = 0.85
|
33
36
|
|
34
37
|
# Personal Notes Collection
|
35
38
|
[personal_notes]
|
@@ -44,4 +47,7 @@ max_results = 30
|
|
44
47
|
max_filtered_results = 10
|
45
48
|
chunk_size = 500
|
46
49
|
chunk_overlap = 100
|
47
|
-
cache_dir = "__CACHE_DIR__/local_search/personal_notes"
|
50
|
+
cache_dir = "__CACHE_DIR__/local_search/personal_notes"
|
51
|
+
strengths = ["personal knowledge", "notes", "private documents"]
|
52
|
+
weaknesses = ["subjective content", "informal information"]
|
53
|
+
reliability = 0.75
|
{local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/defaults/main.toml
RENAMED
@@ -1,23 +1,41 @@
|
|
1
1
|
|
2
2
|
# Main configuration for Local Deep Research
|
3
3
|
|
4
|
+
[web]
|
5
|
+
port = 5000
|
6
|
+
host = "0.0.0.0"
|
7
|
+
debug = true
|
8
|
+
|
9
|
+
[llm]
|
10
|
+
# LLM provider (one of: ollama, openai, anthropic, vllm, openai_endpoint, lmstudio, llamacpp)
|
11
|
+
provider = "ollama"
|
12
|
+
# Model name
|
13
|
+
model = "gemma3:12b"
|
14
|
+
# Temperature
|
15
|
+
temperature = 0.7
|
16
|
+
# Maximum tokens
|
17
|
+
max_tokens = 30000
|
18
|
+
# OpenAI-compatible endpoint URL
|
19
|
+
openai_endpoint_url = "https://openrouter.ai/api/v1"
|
20
|
+
# LM Studio URL (default: http://localhost:1234)
|
21
|
+
lmstudio_url = "http://localhost:1234"
|
22
|
+
# LlamaCpp model path
|
23
|
+
llamacpp_model_path = ""
|
24
|
+
# LlamaCpp parameters
|
25
|
+
llamacpp_n_gpu_layers = 1
|
26
|
+
llamacpp_n_batch = 512
|
27
|
+
llamacpp_f16_kv = true
|
28
|
+
|
4
29
|
[general]
|
5
30
|
# Directory for research outputs (relative to user data directory)
|
6
31
|
output_dir = "research_outputs"
|
7
|
-
|
8
32
|
# Knowledge accumulation approach (NONE, QUESTION, or ITERATION)
|
9
33
|
knowledge_accumulation = "ITERATION"
|
10
|
-
|
11
34
|
# Maximum context size for knowledge accumulation
|
12
35
|
knowledge_accumulation_context_limit = 2000000
|
13
|
-
|
14
36
|
# Enable fact checking (experimental, works better with large LLMs)
|
15
37
|
enable_fact_checking = false
|
16
38
|
|
17
|
-
[web]
|
18
|
-
port = 5000
|
19
|
-
host = "0.0.0.0"
|
20
|
-
debug = true
|
21
39
|
|
22
40
|
[search]
|
23
41
|
# Search tool to use (auto, wikipedia, arxiv, duckduckgo, serp, google_pse, etc.)
|