local-deep-research 0.1.17__tar.gz → 0.1.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {local_deep_research-0.1.17/src/local_deep_research.egg-info → local_deep_research-0.1.19}/PKG-INFO +41 -18
  2. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/README.md +41 -18
  3. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/pyproject.toml +2 -2
  4. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/config.py +23 -0
  5. local_deep_research-0.1.19/src/local_deep_research/defaults/llm_config.py +307 -0
  6. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/defaults/local_collections.toml +10 -4
  7. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/defaults/main.toml +25 -7
  8. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/static/css/styles.css +43 -0
  9. local_deep_research-0.1.19/src/local_deep_research/web/static/sounds/README.md +29 -0
  10. local_deep_research-0.1.19/src/local_deep_research/web/static/sounds/error.mp3 +0 -0
  11. local_deep_research-0.1.19/src/local_deep_research/web/static/sounds/success.mp3 +0 -0
  12. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/meta_search_engine.py +17 -8
  13. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_local.py +61 -21
  14. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_local_all.py +7 -5
  15. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/search_engine_factory.py +11 -2
  16. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/search_engines_config.py +24 -7
  17. {local_deep_research-0.1.17 → local_deep_research-0.1.19/src/local_deep_research.egg-info}/PKG-INFO +41 -18
  18. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research.egg-info/SOURCES.txt +3 -0
  19. local_deep_research-0.1.17/src/local_deep_research/defaults/llm_config.py +0 -269
  20. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/LICENSE +0 -0
  21. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/MANIFEST.in +0 -0
  22. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/requirements.txt +0 -0
  23. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/setup.cfg +0 -0
  24. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/__init__.py +0 -0
  25. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/citation_handler.py +0 -0
  26. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/defaults/__init__.py +0 -0
  27. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/defaults/search_engines.toml +0 -0
  28. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/main.py +0 -0
  29. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/report_generator.py +0 -0
  30. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/search_system.py +0 -0
  31. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/utilties/__init__.py +0 -0
  32. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/utilties/enums.py +0 -0
  33. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/utilties/llm_utils.py +0 -0
  34. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/utilties/search_utilities.py +0 -0
  35. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/utilties/setup_utils.py +0 -0
  36. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/__init__.py +0 -0
  37. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/app.py +0 -0
  38. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/static/js/app.js +0 -0
  39. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/templates/api_keys_config.html +0 -0
  40. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/templates/collections_config.html +0 -0
  41. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/templates/index.html +0 -0
  42. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/templates/llm_config.html +0 -0
  43. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/templates/main_config.html +0 -0
  44. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/templates/search_engines_config.html +0 -0
  45. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/templates/settings.html +0 -0
  46. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web/templates/settings_dashboard.html +0 -0
  47. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/__init__.py +0 -0
  48. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/__init__.py +0 -0
  49. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/full_search.py +0 -0
  50. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_arxiv.py +0 -0
  51. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_brave.py +0 -0
  52. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_ddg.py +0 -0
  53. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_github.py +0 -0
  54. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_google_pse.py +0 -0
  55. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_guardian.py +0 -0
  56. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_pubmed.py +0 -0
  57. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_searxng.py +0 -0
  58. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +0 -0
  59. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_serpapi.py +0 -0
  60. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_wayback.py +0 -0
  61. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +0 -0
  62. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research/web_search_engines/search_engine_base.py +0 -0
  63. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research.egg-info/dependency_links.txt +0 -0
  64. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research.egg-info/entry_points.txt +0 -0
  65. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research.egg-info/requires.txt +0 -0
  66. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/src/local_deep_research.egg-info/top_level.txt +0 -0
  67. {local_deep_research-0.1.17 → local_deep_research-0.1.19}/tests/test_google_pse.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: local-deep-research
3
- Version: 0.1.17
3
+ Version: 0.1.19
4
4
  Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
5
5
  Author-email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>
6
6
  License: MIT License
@@ -71,6 +71,14 @@ Dynamic: license-file
71
71
 
72
72
  A powerful AI-powered research assistant that performs deep, iterative analysis using multiple LLMs and web searches. The system can be run locally for privacy or configured to use cloud-based LLMs for enhanced capabilities.
73
73
 
74
+ <div align="center">
75
+ <a href="https://www.youtube.com/watch?v=0ISreg9q0p0">
76
+ <img src="https://img.youtube.com/vi/0ISreg9q0p0/0.jpg" alt="Local Deep Research">
77
+ <br>
78
+ <span>▶️ Watch Video</span>
79
+ </a>
80
+ </div>
81
+
74
82
  ## Quick Start
75
83
 
76
84
  ```bash
@@ -101,7 +109,7 @@ Access the web interface at `http://127.0.0.1:5000` in your browser.
101
109
 
102
110
  - 🔍 **Advanced Research Capabilities**
103
111
  - Automated deep research with intelligent follow-up questions
104
- - Citation tracking and source verification
112
+ - Proper inline citation and source verification
105
113
  - Multi-iteration analysis for comprehensive coverage
106
114
  - Full webpage content analysis (not just snippets)
107
115
 
@@ -112,8 +120,8 @@ Access the web interface at `http://127.0.0.1:5000` in your browser.
112
120
  - Configurable model selection based on needs
113
121
 
114
122
  - 📊 **Rich Output Options**
115
- - Detailed research findings with citations
116
- - Comprehensive research reports
123
+ - Detailed research findings with proper citations
124
+ - Well-structured comprehensive research reports
117
125
  - Quick summaries for rapid insights
118
126
  - Source tracking and verification
119
127
 
@@ -128,6 +136,12 @@ Access the web interface at `http://127.0.0.1:5000` in your browser.
128
136
  - **Local RAG search for private documents** - search your own documents with vector embeddings
129
137
  - Full webpage content retrieval and intelligent filtering
130
138
 
139
+ - 🎓 **Academic & Scientific Integration**
140
+ - Direct integration with PubMed, arXiv, Wikipedia, Semantic Scholar
141
+ - Properly formatted citations from academic sources
142
+ - Report structure suitable for literature reviews
143
+ - Cross-disciplinary synthesis of information
144
+
131
145
  ## Configuration System
132
146
 
133
147
  The package automatically creates and manages configuration files in your user directory:
@@ -137,6 +151,8 @@ The package automatically creates and manages configuration files in your user d
137
151
 
138
152
  ### Default Configuration Files
139
153
 
154
+ If you prefere environment variables please refere to this file: https://github.com/LearningCircuit/local-deep-research/blob/main/docs/env_configuration.md
155
+
140
156
  When you first run the tool, it creates these configuration files:
141
157
 
142
158
  | File | Purpose |
@@ -209,6 +225,10 @@ The `OPENAI_ENDPOINT` provider can access any service with an OpenAI-compatible
209
225
 
210
226
  The system includes multiple search engines. Some require API keys:
211
227
 
228
+ Use .env in config folder if .secrets.toml doesnt work.
229
+
230
+ You can also overwrite other settings via environment variables, e.g. to overwrite [web] port setting in settings.toml please use: **LDR_WEB__PORT=8080**
231
+
212
232
  ```toml
213
233
  # Add to .secrets.toml
214
234
  SERP_API_KEY = "your-serpapi-key-here" # For Google results via SerpAPI
@@ -261,20 +281,20 @@ You can use local document search in several ways:
261
281
 
262
282
  ## Available Search Engines
263
283
 
264
- | Engine | Purpose | API Key Required? |
265
- |--------|---------|-------------------|
266
- | `auto` | Intelligently selects the best engine | No |
267
- | `wikipedia` | General knowledge and facts | No |
268
- | `arxiv` | Scientific papers and research | No |
269
- | `pubmed` | Medical and biomedical research | No |
270
- | `semantic_scholar` | Academic literature across all fields | No |
271
- | `github` | Code repositories and documentation | No (but rate-limited) |
272
- | `brave` | Web search (privacy-focused) | Yes |
273
- | `serpapi` | Google search results | Yes |
274
- | `google_pse` | Custom Google search | Yes |
275
- | `wayback` | Historical web content | No |
276
- | `searxng` | Local web search engine | No (requires server) |
277
- | Any collection name | Search your local documents | No |
284
+ | Engine | Purpose | API Key Required? | Rate Limit |
285
+ |--------|---------|-------------------|------------|
286
+ | `auto` | Intelligently selects the best engine | No | Based on selected engine |
287
+ | `wikipedia` | General knowledge and facts | No | No strict limit |
288
+ | `arxiv` | Scientific papers and research | No | No strict limit |
289
+ | `pubmed` | Medical and biomedical research | No | No strict limit |
290
+ | `semantic_scholar` | Academic literature across all fields | No | 100/5min |
291
+ | `github` | Code repositories and documentation | No | 60/hour (unauthenticated) |
292
+ | `brave` | Web search (privacy-focused) | Yes | Based on plan |
293
+ | `serpapi` | Google search results | Yes | Based on plan |
294
+ | `google_pse` | Custom Google search | Yes | 100/day free tier |
295
+ | `wayback` | Historical web content | No | No strict limit |
296
+ | `searxng` | Local web search engine | No (requires local server) | No limit |
297
+ | Any collection name | Search your local documents | No | No limit |
278
298
 
279
299
  > **Support Free Knowledge:** If you frequently use the search engines in this tool, please consider making a donation to these organizations. They provide valuable services and rely on user support to maintain their operations:
280
300
  > - [Donate to Wikipedia](https://donate.wikimedia.org)
@@ -318,6 +338,9 @@ The web interface offers several features:
318
338
  - **PDF Export**: Download reports
319
339
  - **Research Management**: Terminate processes or delete records
320
340
 
341
+ ![Web Interface](./web1.png)
342
+ ![Web Interface](./web2.png)
343
+
321
344
  ## Command Line Interface
322
345
 
323
346
  The CLI version allows you to:
@@ -2,6 +2,14 @@
2
2
 
3
3
  A powerful AI-powered research assistant that performs deep, iterative analysis using multiple LLMs and web searches. The system can be run locally for privacy or configured to use cloud-based LLMs for enhanced capabilities.
4
4
 
5
+ <div align="center">
6
+ <a href="https://www.youtube.com/watch?v=0ISreg9q0p0">
7
+ <img src="https://img.youtube.com/vi/0ISreg9q0p0/0.jpg" alt="Local Deep Research">
8
+ <br>
9
+ <span>▶️ Watch Video</span>
10
+ </a>
11
+ </div>
12
+
5
13
  ## Quick Start
6
14
 
7
15
  ```bash
@@ -32,7 +40,7 @@ Access the web interface at `http://127.0.0.1:5000` in your browser.
32
40
 
33
41
  - 🔍 **Advanced Research Capabilities**
34
42
  - Automated deep research with intelligent follow-up questions
35
- - Citation tracking and source verification
43
+ - Proper inline citation and source verification
36
44
  - Multi-iteration analysis for comprehensive coverage
37
45
  - Full webpage content analysis (not just snippets)
38
46
 
@@ -43,8 +51,8 @@ Access the web interface at `http://127.0.0.1:5000` in your browser.
43
51
  - Configurable model selection based on needs
44
52
 
45
53
  - 📊 **Rich Output Options**
46
- - Detailed research findings with citations
47
- - Comprehensive research reports
54
+ - Detailed research findings with proper citations
55
+ - Well-structured comprehensive research reports
48
56
  - Quick summaries for rapid insights
49
57
  - Source tracking and verification
50
58
 
@@ -59,6 +67,12 @@ Access the web interface at `http://127.0.0.1:5000` in your browser.
59
67
  - **Local RAG search for private documents** - search your own documents with vector embeddings
60
68
  - Full webpage content retrieval and intelligent filtering
61
69
 
70
+ - 🎓 **Academic & Scientific Integration**
71
+ - Direct integration with PubMed, arXiv, Wikipedia, Semantic Scholar
72
+ - Properly formatted citations from academic sources
73
+ - Report structure suitable for literature reviews
74
+ - Cross-disciplinary synthesis of information
75
+
62
76
  ## Configuration System
63
77
 
64
78
  The package automatically creates and manages configuration files in your user directory:
@@ -68,6 +82,8 @@ The package automatically creates and manages configuration files in your user d
68
82
 
69
83
  ### Default Configuration Files
70
84
 
85
+ If you prefere environment variables please refere to this file: https://github.com/LearningCircuit/local-deep-research/blob/main/docs/env_configuration.md
86
+
71
87
  When you first run the tool, it creates these configuration files:
72
88
 
73
89
  | File | Purpose |
@@ -140,6 +156,10 @@ The `OPENAI_ENDPOINT` provider can access any service with an OpenAI-compatible
140
156
 
141
157
  The system includes multiple search engines. Some require API keys:
142
158
 
159
+ Use .env in config folder if .secrets.toml doesnt work.
160
+
161
+ You can also overwrite other settings via environment variables, e.g. to overwrite [web] port setting in settings.toml please use: **LDR_WEB__PORT=8080**
162
+
143
163
  ```toml
144
164
  # Add to .secrets.toml
145
165
  SERP_API_KEY = "your-serpapi-key-here" # For Google results via SerpAPI
@@ -192,20 +212,20 @@ You can use local document search in several ways:
192
212
 
193
213
  ## Available Search Engines
194
214
 
195
- | Engine | Purpose | API Key Required? |
196
- |--------|---------|-------------------|
197
- | `auto` | Intelligently selects the best engine | No |
198
- | `wikipedia` | General knowledge and facts | No |
199
- | `arxiv` | Scientific papers and research | No |
200
- | `pubmed` | Medical and biomedical research | No |
201
- | `semantic_scholar` | Academic literature across all fields | No |
202
- | `github` | Code repositories and documentation | No (but rate-limited) |
203
- | `brave` | Web search (privacy-focused) | Yes |
204
- | `serpapi` | Google search results | Yes |
205
- | `google_pse` | Custom Google search | Yes |
206
- | `wayback` | Historical web content | No |
207
- | `searxng` | Local web search engine | No (requires server) |
208
- | Any collection name | Search your local documents | No |
215
+ | Engine | Purpose | API Key Required? | Rate Limit |
216
+ |--------|---------|-------------------|------------|
217
+ | `auto` | Intelligently selects the best engine | No | Based on selected engine |
218
+ | `wikipedia` | General knowledge and facts | No | No strict limit |
219
+ | `arxiv` | Scientific papers and research | No | No strict limit |
220
+ | `pubmed` | Medical and biomedical research | No | No strict limit |
221
+ | `semantic_scholar` | Academic literature across all fields | No | 100/5min |
222
+ | `github` | Code repositories and documentation | No | 60/hour (unauthenticated) |
223
+ | `brave` | Web search (privacy-focused) | Yes | Based on plan |
224
+ | `serpapi` | Google search results | Yes | Based on plan |
225
+ | `google_pse` | Custom Google search | Yes | 100/day free tier |
226
+ | `wayback` | Historical web content | No | No strict limit |
227
+ | `searxng` | Local web search engine | No (requires local server) | No limit |
228
+ | Any collection name | Search your local documents | No | No limit |
209
229
 
210
230
  > **Support Free Knowledge:** If you frequently use the search engines in this tool, please consider making a donation to these organizations. They provide valuable services and rely on user support to maintain their operations:
211
231
  > - [Donate to Wikipedia](https://donate.wikimedia.org)
@@ -249,6 +269,9 @@ The web interface offers several features:
249
269
  - **PDF Export**: Download reports
250
270
  - **Research Management**: Terminate processes or delete records
251
271
 
272
+ ![Web Interface](./web1.png)
273
+ ![Web Interface](./web2.png)
274
+
252
275
  ## Command Line Interface
253
276
 
254
277
  The CLI version allows you to:
@@ -321,4 +344,4 @@ Contributions are welcome! Please feel free to submit a Pull Request.
321
344
  2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
322
345
  3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
323
346
  4. Push to the branch (`git push origin feature/AmazingFeature`)
324
- 5. Open a Pull Request
347
+ 5. Open a Pull Request
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "local-deep-research"
7
- version = "0.1.17"
7
+ version = "0.1.19"
8
8
  description = "AI-powered research assistant with deep, iterative analysis using LLMs and web searches"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -66,4 +66,4 @@ include-package-data = true
66
66
 
67
67
  [tool.setuptools.package-data]
68
68
  "local_deep_research.web" = ["templates/*", "static/*", "static/**/*"]
69
- "local_deep_research.defaults" = ["*.toml", "*.py"]
69
+ "local_deep_research.defaults" = ["*.toml", "*.py", "*.template"]
@@ -145,6 +145,19 @@ def init_config_files():
145
145
  if not os.path.exists(search_engines_file) and os.path.exists(default_engines):
146
146
  shutil.copyfile(default_engines, search_engines_file)
147
147
  logger.info(f"Created search_engines.toml at {search_engines_file}")
148
+
149
+ # Create .env.template if it doesn't exist
150
+ env_template_file = CONFIG_DIR / ".env.template"
151
+ if not env_template_file.exists():
152
+ shutil.copy(defaults_dir / ".env.template", env_template_file)
153
+ logger.info(f"Created .env.template at {env_template_file}")
154
+
155
+ # Optionally create an empty .env file if it doesn't exist
156
+ env_file = CONFIG_DIR / ".env"
157
+ if not env_file.exists():
158
+ with open(env_file, "w") as f:
159
+ f.write("# Add your environment variables here\n")
160
+ logger.info(f"Created empty .env file at {env_file}")
148
161
  except Exception as e:
149
162
  logger.error(f"Error initializing Windows config files: {e}")
150
163
  else:
@@ -183,7 +196,17 @@ def init_config_files():
183
196
  if not search_engines_file.exists():
184
197
  shutil.copy(defaults_dir / "search_engines.toml", search_engines_file)
185
198
  logger.info(f"Created search_engines.toml at {search_engines_file}")
199
+ env_template_file = CONFIG_DIR / ".env.template"
200
+ if not env_template_file.exists():
201
+ shutil.copy(defaults_dir / ".env.template", env_template_file)
202
+ logger.info(f"Created .env.template at {env_template_file}")
186
203
 
204
+ # Optionally create an empty .env file if it doesn't exist
205
+ env_file = CONFIG_DIR / ".env"
206
+ if not env_file.exists():
207
+ with open(env_file, "w") as f:
208
+ f.write("# Add your environment variables here\n")
209
+ logger.info(f"Created empty .env file at {env_file}")
187
210
  secrets_file = CONFIG_DIR / ".secrets.toml"
188
211
  if not secrets_file.exists():
189
212
  with open(secrets_file, "w") as f:
@@ -0,0 +1,307 @@
1
+ """
2
+ LLM configuration for Local Deep Research.
3
+
4
+ This file controls which language models are available and how they're configured.
5
+ You can customize model selection, parameters, and fallbacks here.
6
+ """
7
+
8
+ from langchain_anthropic import ChatAnthropic
9
+ from langchain_openai import ChatOpenAI
10
+ from langchain_ollama import ChatOllama
11
+ from langchain_community.llms import VLLM
12
+ from local_deep_research.config import settings
13
+ import os
14
+ import logging
15
+
16
+ # Initialize environment
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Valid provider options
20
+ VALID_PROVIDERS = ["ollama", "openai", "anthropic", "vllm", "openai_endpoint", "lmstudio", "llamacpp", "none"]
21
+
22
+ # ================================
23
+ # LLM FUNCTIONS
24
+ # ================================
25
+
26
+ def get_llm(model_name=None, temperature=None, provider=None):
27
+ """
28
+ Get LLM instance based on model name and provider.
29
+
30
+ Args:
31
+ model_name: Name of the model to use (if None, uses settings.llm.model)
32
+ temperature: Model temperature (if None, uses settings.llm.temperature)
33
+ provider: Provider to use (if None, uses settings.llm.provider)
34
+
35
+ Returns:
36
+ A LangChain LLM instance
37
+ """
38
+ # Use settings values for parameters if not provided
39
+ if model_name is None:
40
+ model_name = settings.llm.model
41
+
42
+ if temperature is None:
43
+ temperature = settings.llm.temperature
44
+
45
+ if provider is None:
46
+ provider = settings.llm.provider.lower()
47
+ if provider not in VALID_PROVIDERS:
48
+ logger.error(f"Invalid provider in settings: {provider}")
49
+ raise ValueError(f"Invalid provider: {provider}. Must be one of: {VALID_PROVIDERS}")
50
+
51
+ # Common parameters for all models
52
+ common_params = {
53
+ "temperature": temperature,
54
+ "max_tokens": settings.llm.max_tokens,
55
+ }
56
+
57
+ # Handle different providers
58
+ if provider == "anthropic":
59
+ api_key = settings.get('ANTHROPIC_API_KEY', '')
60
+ if not api_key:
61
+ logger.warning("ANTHROPIC_API_KEY not found. Falling back to default model.")
62
+ return get_fallback_model(temperature)
63
+
64
+ return ChatAnthropic(
65
+ model=model_name, anthropic_api_key=api_key, **common_params
66
+ )
67
+
68
+ elif provider == "openai":
69
+ api_key = settings.get('OPENAI_API_KEY', '')
70
+ if not api_key:
71
+ logger.warning("OPENAI_API_KEY not found. Falling back to default model.")
72
+ return get_fallback_model(temperature)
73
+
74
+ return ChatOpenAI(model=model_name, api_key=api_key, **common_params)
75
+
76
+ elif provider == "openai_endpoint":
77
+ api_key = settings.get('OPENAI_ENDPOINT_API_KEY', '')
78
+
79
+ if not api_key:
80
+ logger.warning("OPENAI_ENDPOINT_API_KEY not found. Falling back to default model.")
81
+ return get_fallback_model(temperature)
82
+
83
+ # Get endpoint URL from settings
84
+ openai_endpoint_url = settings.llm.openai_endpoint_url
85
+
86
+ return ChatOpenAI(
87
+ model=model_name,
88
+ api_key=api_key,
89
+ openai_api_base=openai_endpoint_url,
90
+ **common_params
91
+ )
92
+
93
+ elif provider == "vllm":
94
+ try:
95
+ return VLLM(
96
+ model=model_name,
97
+ trust_remote_code=True,
98
+ max_new_tokens=128,
99
+ top_k=10,
100
+ top_p=0.95,
101
+ temperature=temperature,
102
+ )
103
+ except Exception as e:
104
+ logger.error(f"Error loading VLLM model: {e}")
105
+ logger.warning("Falling back.")
106
+ return get_fallback_model(temperature)
107
+
108
+ elif provider == "ollama":
109
+ try:
110
+ # Use the configurable Ollama base URL
111
+ base_url = settings.get('OLLAMA_BASE_URL', settings.llm.get('ollama_base_url', 'http://localhost:11434'))
112
+ return ChatOllama(model=model_name, base_url=base_url, **common_params)
113
+ except Exception as e:
114
+ logger.error(f"Error loading Ollama model: {e}")
115
+ return get_fallback_model(temperature)
116
+
117
+ elif provider == "lmstudio":
118
+ try:
119
+ # Import LM Studio package
120
+ import lmstudio
121
+ from langchain_core.language_models import BaseLLM
122
+
123
+ # Get LM Studio URL from settings
124
+ lmstudio_url = settings.llm.get('lmstudio_url', "http://localhost:1234")
125
+
126
+ # Create LM Studio LLM instance
127
+ model = lmstudio.llm(model_name)
128
+
129
+ # Return a LangChain compatible wrapper
130
+ class LMStudioLLM(BaseLLM):
131
+ def _call(self, prompt, stop=None, **kwargs):
132
+ result = model.complete(prompt, temperature=temperature)
133
+ return result.completion
134
+
135
+ @property
136
+ def _identifying_params(self):
137
+ return {"model_name": model_name}
138
+
139
+ @property
140
+ def _llm_type(self):
141
+ return "lmstudio"
142
+
143
+ return LMStudioLLM()
144
+ except ImportError:
145
+ logger.error("LM Studio package not installed. Run 'pip install lmstudio'")
146
+ raise ImportError("LM Studio package not installed. Run 'pip install lmstudio'")
147
+
148
+ elif provider == "llamacpp":
149
+ try:
150
+ # Import LlamaCpp
151
+ from langchain_community.llms import LlamaCpp
152
+
153
+ # Get LlamaCpp model path from settings
154
+ model_path = settings.llm.get('llamacpp_model_path', "")
155
+ if not model_path:
156
+ logger.error("llamacpp_model_path not set in settings")
157
+ raise ValueError("llamacpp_model_path not set in settings.toml")
158
+
159
+ # Get additional LlamaCpp parameters
160
+ n_gpu_layers = settings.llm.get('llamacpp_n_gpu_layers', 1)
161
+ n_batch = settings.llm.get('llamacpp_n_batch', 512)
162
+ f16_kv = settings.llm.get('llamacpp_f16_kv', True)
163
+
164
+ # Create LlamaCpp instance
165
+ return LlamaCpp(
166
+ model_path=model_path,
167
+ temperature=temperature,
168
+ max_tokens=settings.llm.max_tokens,
169
+ n_gpu_layers=n_gpu_layers,
170
+ n_batch=n_batch,
171
+ f16_kv=f16_kv,
172
+ verbose=True
173
+ )
174
+ except ImportError:
175
+ logger.error("LlamaCpp package not installed. Run 'pip install llama-cpp-python'")
176
+ raise ImportError("LlamaCpp package not installed. Run 'pip install llama-cpp-python'")
177
+
178
+ else:
179
+ return get_fallback_model(temperature)
180
+
181
+ def get_fallback_model(temperature=None):
182
+ """Create a dummy model for when no providers are available"""
183
+ from langchain_community.llms.fake import FakeListLLM
184
+ return FakeListLLM(
185
+ responses=["No language models are available. Please install Ollama or set up API keys."]
186
+ )
187
+
188
+ # ================================
189
+ # COMPATIBILITY FUNCTIONS
190
+ # ================================
191
+
192
+ def get_available_provider_types():
193
+ """Return available model providers"""
194
+ providers = {}
195
+
196
+ if is_ollama_available():
197
+ providers["ollama"] = "Ollama (local models)"
198
+
199
+ if is_openai_available():
200
+ providers["openai"] = "OpenAI API"
201
+
202
+ if is_anthropic_available():
203
+ providers["anthropic"] = "Anthropic API"
204
+
205
+ if is_openai_endpoint_available():
206
+ providers["openai_endpoint"] = "OpenAI-compatible Endpoint"
207
+
208
+ if is_lmstudio_available():
209
+ providers["lmstudio"] = "LM Studio (local models)"
210
+
211
+ if is_llamacpp_available():
212
+ providers["llamacpp"] = "LlamaCpp (local models)"
213
+
214
+ # Check for VLLM capability
215
+ try:
216
+ import torch
217
+ import transformers
218
+ providers["vllm"] = "VLLM (local models)"
219
+ except ImportError:
220
+ pass
221
+
222
+ # Default fallback
223
+ if not providers:
224
+ providers["none"] = "No model providers available"
225
+
226
+ return providers
227
+
228
+ # ================================
229
+ # HELPER FUNCTIONS
230
+ # ================================
231
+
232
+ def is_openai_available():
233
+ """Check if OpenAI is available"""
234
+ try:
235
+ api_key = settings.get('OPENAI_API_KEY', '')
236
+ return bool(api_key)
237
+ except:
238
+ return False
239
+
240
+ def is_anthropic_available():
241
+ """Check if Anthropic is available"""
242
+ try:
243
+ api_key = settings.get('ANTHROPIC_API_KEY', '')
244
+ return bool(api_key)
245
+ except:
246
+ return False
247
+
248
+ def is_openai_endpoint_available():
249
+ """Check if OpenAI endpoint is available"""
250
+ try:
251
+ api_key = settings.get('OPENAI_ENDPOINT_API_KEY', '')
252
+ return bool(api_key)
253
+ except:
254
+ return False
255
+
256
+ def is_ollama_available():
257
+ """Check if Ollama is running"""
258
+ try:
259
+ import requests
260
+ base_url = settings.get('OLLAMA_BASE_URL', settings.llm.get('ollama_base_url', 'http://localhost:11434'))
261
+ response = requests.get(f"{base_url}/api/tags", timeout=1.0)
262
+ return response.status_code == 200
263
+ except:
264
+ return False
265
+
266
+ def is_vllm_available():
267
+ """Check if VLLM capability is available"""
268
+ try:
269
+ import torch
270
+ import transformers
271
+ return True
272
+ except ImportError:
273
+ return False
274
+
275
+ def is_lmstudio_available():
276
+ """Check if LM Studio is available"""
277
+ try:
278
+ import lmstudio
279
+ import requests
280
+ lmstudio_url = settings.llm.get('lmstudio_url', 'http://localhost:1234')
281
+ # Try to connect to check if running
282
+ response = requests.get(f"{lmstudio_url}/health", timeout=1.0)
283
+ return response.status_code == 200
284
+ except:
285
+ return False
286
+
287
+ def is_llamacpp_available():
288
+ """Check if LlamaCpp is available and configured"""
289
+ try:
290
+ from langchain_community.llms import LlamaCpp
291
+ model_path = settings.llm.get('llamacpp_model_path', '')
292
+ return bool(model_path) and os.path.exists(model_path)
293
+ except:
294
+ return False
295
+
296
+ def get_available_providers():
297
+ """Get dictionary of available providers"""
298
+ return get_available_provider_types()
299
+
300
+ # Log which providers are available
301
+ AVAILABLE_PROVIDERS = get_available_providers()
302
+ logger.info(f"Available providers: {list(AVAILABLE_PROVIDERS.keys())}")
303
+
304
+ # Check if selected provider is available
305
+ selected_provider = settings.llm.provider.lower()
306
+ if selected_provider not in AVAILABLE_PROVIDERS and selected_provider != "none":
307
+ logger.warning(f"Selected provider {selected_provider} is not available.")
@@ -1,6 +1,3 @@
1
- # Default local document collections configuration
2
- # Each collection functions as an independent search engine
3
-
4
1
  # Project Documents Collection
5
2
  [project_docs]
6
3
  name = "Project Documents"
@@ -15,6 +12,9 @@ max_filtered_results = 5
15
12
  chunk_size = 1000
16
13
  chunk_overlap = 200
17
14
  cache_dir = "__CACHE_DIR__/local_search/project_docs"
15
+ strengths = ["project documentation", "specifications", "internal documents"]
16
+ weaknesses = ["no external information", "limited to organizational knowledge"]
17
+ reliability = 0.9
18
18
 
19
19
  # Research Papers Collection
20
20
  [research_papers]
@@ -30,6 +30,9 @@ max_filtered_results = 5
30
30
  chunk_size = 800
31
31
  chunk_overlap = 150
32
32
  cache_dir = "__CACHE_DIR__/local_search/research_papers"
33
+ strengths = ["academic research", "scientific papers", "scholarly content"]
34
+ weaknesses = ["potentially outdated", "limited to collected papers"]
35
+ reliability = 0.85
33
36
 
34
37
  # Personal Notes Collection
35
38
  [personal_notes]
@@ -44,4 +47,7 @@ max_results = 30
44
47
  max_filtered_results = 10
45
48
  chunk_size = 500
46
49
  chunk_overlap = 100
47
- cache_dir = "__CACHE_DIR__/local_search/personal_notes"
50
+ cache_dir = "__CACHE_DIR__/local_search/personal_notes"
51
+ strengths = ["personal knowledge", "notes", "private documents"]
52
+ weaknesses = ["subjective content", "informal information"]
53
+ reliability = 0.75
@@ -1,23 +1,41 @@
1
1
 
2
2
  # Main configuration for Local Deep Research
3
3
 
4
+ [web]
5
+ port = 5000
6
+ host = "0.0.0.0"
7
+ debug = true
8
+
9
+ [llm]
10
+ # LLM provider (one of: ollama, openai, anthropic, vllm, openai_endpoint, lmstudio, llamacpp)
11
+ provider = "ollama"
12
+ # Model name
13
+ model = "gemma3:12b"
14
+ # Temperature
15
+ temperature = 0.7
16
+ # Maximum tokens
17
+ max_tokens = 30000
18
+ # OpenAI-compatible endpoint URL
19
+ openai_endpoint_url = "https://openrouter.ai/api/v1"
20
+ # LM Studio URL (default: http://localhost:1234)
21
+ lmstudio_url = "http://localhost:1234"
22
+ # LlamaCpp model path
23
+ llamacpp_model_path = ""
24
+ # LlamaCpp parameters
25
+ llamacpp_n_gpu_layers = 1
26
+ llamacpp_n_batch = 512
27
+ llamacpp_f16_kv = true
28
+
4
29
  [general]
5
30
  # Directory for research outputs (relative to user data directory)
6
31
  output_dir = "research_outputs"
7
-
8
32
  # Knowledge accumulation approach (NONE, QUESTION, or ITERATION)
9
33
  knowledge_accumulation = "ITERATION"
10
-
11
34
  # Maximum context size for knowledge accumulation
12
35
  knowledge_accumulation_context_limit = 2000000
13
-
14
36
  # Enable fact checking (experimental, works better with large LLMs)
15
37
  enable_fact_checking = false
16
38
 
17
- [web]
18
- port = 5000
19
- host = "0.0.0.0"
20
- debug = true
21
39
 
22
40
  [search]
23
41
  # Search tool to use (auto, wikipedia, arxiv, duckduckgo, serp, google_pse, etc.)