local-deep-research 0.1.16__py3-none-any.whl → 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. local_deep_research/citation_handler.py +0 -2
  2. local_deep_research/config.py +1 -4
  3. local_deep_research/defaults/llm_config.py +2 -2
  4. local_deep_research/defaults/main.toml +3 -3
  5. local_deep_research/report_generator.py +1 -5
  6. local_deep_research/search_system.py +1 -1
  7. local_deep_research/utilties/search_utilities.py +3 -4
  8. local_deep_research/web/static/css/styles.css +43 -0
  9. local_deep_research/web_search_engines/engines/full_search.py +9 -8
  10. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +14 -14
  11. local_deep_research/web_search_engines/engines/search_engine_brave.py +10 -9
  12. local_deep_research/web_search_engines/engines/search_engine_ddg.py +4 -2
  13. local_deep_research/web_search_engines/engines/search_engine_local.py +1 -1
  14. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +9 -8
  15. local_deep_research/web_search_engines/search_engine_base.py +1 -1
  16. local_deep_research/web_search_engines/search_engine_factory.py +11 -2
  17. local_deep_research-0.1.18.dist-info/METADATA +402 -0
  18. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.18.dist-info}/RECORD +22 -24
  19. local_deep_research/local_collections.py +0 -141
  20. local_deep_research/web_search_engines/full_search.py +0 -254
  21. local_deep_research-0.1.16.dist-info/METADATA +0 -346
  22. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.18.dist-info}/WHEEL +0 -0
  23. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.18.dist-info}/entry_points.txt +0 -0
  24. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.18.dist-info}/licenses/LICENSE +0 -0
  25. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.18.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,402 @@
1
+ Metadata-Version: 2.4
2
+ Name: local-deep-research
3
+ Version: 0.1.18
4
+ Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
5
+ Author-email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 LearningCircuit
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/LearningCircuit/local-deep-research
29
+ Project-URL: Bug Tracker, https://github.com/LearningCircuit/local-deep-research/issues
30
+ Classifier: Programming Language :: Python :: 3
31
+ Classifier: License :: OSI Approved :: MIT License
32
+ Classifier: Operating System :: OS Independent
33
+ Requires-Python: >=3.8
34
+ Description-Content-Type: text/markdown
35
+ License-File: LICENSE
36
+ Requires-Dist: langchain>=0.3.18
37
+ Requires-Dist: langchain-community>=0.3.17
38
+ Requires-Dist: langchain-core>=0.3.34
39
+ Requires-Dist: langchain-ollama>=0.2.3
40
+ Requires-Dist: langchain-openai>=0.3.5
41
+ Requires-Dist: langchain_anthropic>=0.3.7
42
+ Requires-Dist: duckduckgo_search>=7.3.2
43
+ Requires-Dist: python-dateutil>=2.9.0
44
+ Requires-Dist: typing_extensions>=4.12.2
45
+ Requires-Dist: justext
46
+ Requires-Dist: playwright
47
+ Requires-Dist: beautifulsoup4
48
+ Requires-Dist: flask>=2.0.1
49
+ Requires-Dist: flask-cors>=3.0.10
50
+ Requires-Dist: flask-socketio>=5.1.1
51
+ Requires-Dist: sqlalchemy>=1.4.23
52
+ Requires-Dist: wikipedia
53
+ Requires-Dist: arxiv>=1.4.3
54
+ Requires-Dist: pypdf
55
+ Requires-Dist: sentence-transformers
56
+ Requires-Dist: faiss-cpu
57
+ Requires-Dist: pydantic>=2.0.0
58
+ Requires-Dist: pydantic-settings>=2.0.0
59
+ Requires-Dist: toml>=0.10.2
60
+ Requires-Dist: platformdirs>=3.0.0
61
+ Requires-Dist: dynaconf
62
+ Requires-Dist: requests>=2.28.0
63
+ Requires-Dist: tiktoken>=0.4.0
64
+ Requires-Dist: xmltodict>=0.13.0
65
+ Requires-Dist: lxml>=4.9.2
66
+ Requires-Dist: pdfplumber>=0.9.0
67
+ Requires-Dist: unstructured>=0.10.0
68
+ Dynamic: license-file
69
+
70
+ # Local Deep Research
71
+
72
+ A powerful AI-powered research assistant that performs deep, iterative analysis using multiple LLMs and web searches. The system can be run locally for privacy or configured to use cloud-based LLMs for enhanced capabilities.
73
+
74
+ ## Quick Start
75
+
76
+ ```bash
77
+ # Install the package
78
+ pip install local-deep-research
79
+
80
+ # Install required browser automation tools
81
+ playwright install
82
+
83
+ # For local models, install Ollama
84
+ # Download from https://ollama.ai and then pull a model
85
+ ollama pull gemma3:12b
86
+ ```
87
+
88
+ Then run:
89
+
90
+ ```bash
91
+ # Start the web interface (recommended)
92
+ ldr-web # (OR python -m local_deep_research.web.app)
93
+
94
+ # OR run the command line version
95
+ ldr # (OR python -m local_deep_research.main)
96
+ ```
97
+
98
+ Access the web interface at `http://127.0.0.1:5000` in your browser.
99
+
100
+ ## Features
101
+
102
+ - 🔍 **Advanced Research Capabilities**
103
+ - Automated deep research with intelligent follow-up questions
104
+ - Proper inline citation and source verification
105
+ - Multi-iteration analysis for comprehensive coverage
106
+ - Full webpage content analysis (not just snippets)
107
+
108
+ - 🤖 **Flexible LLM Support**
109
+ - Local AI processing with Ollama models
110
+ - Cloud LLM support (Claude, GPT)
111
+ - Supports all Langchain models
112
+ - Configurable model selection based on needs
113
+
114
+ - 📊 **Rich Output Options**
115
+ - Detailed research findings with proper citations
116
+ - Well-structured comprehensive research reports
117
+ - Quick summaries for rapid insights
118
+ - Source tracking and verification
119
+
120
+ - 🔒 **Privacy-Focused**
121
+ - Runs entirely on your machine when using local models
122
+ - Configurable search settings
123
+ - Transparent data handling
124
+
125
+ - 🌐 **Enhanced Search Integration**
126
+ - **Auto-selection of search sources**: The "auto" search engine intelligently analyzes your query and selects the most appropriate search engine
127
+ - Multiple search engines including Wikipedia, arXiv, PubMed, Semantic Scholar, and more
128
+ - **Local RAG search for private documents** - search your own documents with vector embeddings
129
+ - Full webpage content retrieval and intelligent filtering
130
+
131
+ - 🎓 **Academic & Scientific Integration**
132
+ - Direct integration with PubMed, arXiv, Wikipedia, Semantic Scholar
133
+ - Properly formatted citations from academic sources
134
+ - Report structure suitable for literature reviews
135
+ - Cross-disciplinary synthesis of information
136
+
137
+ ## Configuration System
138
+
139
+ The package automatically creates and manages configuration files in your user directory:
140
+
141
+ - **Windows**: `Documents\LearningCircuit\local-deep-research\config\`
142
+ - **Linux/Mac**: `~/.config/local_deep_research/config/`
143
+
144
+ ### Default Configuration Files
145
+
146
+ When you first run the tool, it creates these configuration files:
147
+
148
+ | File | Purpose |
149
+ |------|---------|
150
+ | `settings.toml` | General settings for research, web interface, and search |
151
+ | `llm_config.py` | Configure which LLM to use (local or cloud-based) |
152
+ | `search_engines.toml` | Define and configure search engines |
153
+ | `local_collections.toml` | Configure local document collections for RAG |
154
+ | `.secrets.toml` | Store API keys for cloud services |
155
+
156
+ ## Setting Up AI Models
157
+
158
+ The system supports multiple LLM providers:
159
+
160
+ ### Local Models (via Ollama)
161
+
162
+ 1. [Install Ollama](https://ollama.ai)
163
+ 2. Pull a model: `ollama pull gemma3:12b` (recommended model)
164
+ 3. Ollama runs on port 11434 by default
165
+
166
+ ### Cloud Models
167
+
168
+ Edit your `.secrets.toml` file to add API keys:
169
+
170
+ ```toml
171
+ ANTHROPIC_API_KEY = "your-api-key-here" # For Claude models
172
+ OPENAI_API_KEY = "your-openai-key-here" # For GPT models
173
+ OPENAI_ENDPOINT_API_KEY = "your-key-here" # For OpenRouter or similar services
174
+ ```
175
+
176
+ Then edit `llm_config.py` to change the default provider:
177
+
178
+ ```python
179
+ # Set your preferred model provider here
180
+ DEFAULT_PROVIDER = ModelProvider.OLLAMA # Change to your preferred provider
181
+
182
+ # Set your default model name here
183
+ DEFAULT_MODEL = "mistral" # Change to your preferred model
184
+ ```
185
+
186
+ ### Supported LLM Providers
187
+
188
+ The system supports multiple LLM providers:
189
+
190
+ | Provider | Type | Configuration | Notes |
191
+ |----------|------|--------------|-------|
192
+ | `OLLAMA` | Local | No API key needed | Default - install from ollama.ai |
193
+ | `OPENAI` | Cloud | Requires `OPENAI_API_KEY` | GPT models (3.5, 4, 4o) |
194
+ | `ANTHROPIC` | Cloud | Requires `ANTHROPIC_API_KEY` | Claude models (3 Opus, Sonnet, Haiku) |
195
+ | `OPENAI_ENDPOINT` | Cloud | Requires `OPENAI_ENDPOINT_API_KEY` | For any OpenAI-compatible API |
196
+ | `VLLM` | Local | No API key needed | For hosting models via vLLM |
197
+
198
+ You can configure the OpenAI-compatible endpoint URL in `llm_config.py`:
199
+
200
+ ```python
201
+ # For OpenRouter, Together.ai, Azure OpenAI, or any compatible endpoint
202
+ OPENAI_ENDPOINT_URL = "https://openrouter.ai/api/v1"
203
+ ```
204
+
205
+ The `OPENAI_ENDPOINT` provider can access any service with an OpenAI-compatible API, including:
206
+ - OpenRouter (access to hundreds of models)
207
+ - Azure OpenAI
208
+ - Together.ai
209
+ - Groq
210
+ - Anyscale
211
+ - Self-hosted LLM servers with OpenAI compatibility
212
+ - Any other service that implements the OpenAI API specification
213
+
214
+ ## Setting Up Search Engines
215
+
216
+ The system includes multiple search engines. Some require API keys:
217
+
218
+ ```toml
219
+ # Add to .secrets.toml
220
+ SERP_API_KEY = "your-serpapi-key-here" # For Google results via SerpAPI
221
+ GOOGLE_PSE_API_KEY = "your-google-key-here" # For Google Programmable Search
222
+ GOOGLE_PSE_ENGINE_ID = "your-pse-id-here" # For Google Programmable Search
223
+ BRAVE_API_KEY = "your-brave-search-key-here" # For Brave Search
224
+ GUARDIAN_API_KEY = "your-guardian-key-here" # For The Guardian
225
+ ```
226
+
227
+ No API key required for: Wikipedia, arXiv, PubMed, Semantic Scholar, and local collections.
228
+
229
+ ## Local Document Search (RAG)
230
+
231
+ The system can search through your local documents using vector embeddings.
232
+
233
+ ### Setting Up Document Collections
234
+
235
+ 1. Define collections in `local_collections.toml`. Default collections include:
236
+
237
+ ```toml
238
+ [project_docs]
239
+ name = "Project Documents"
240
+ description = "Project documentation and specifications"
241
+ paths = ["@format ${DOCS_DIR}/project_documents"]
242
+ enabled = true
243
+ embedding_model = "all-MiniLM-L6-v2"
244
+ embedding_device = "cpu"
245
+ embedding_model_type = "sentence_transformers"
246
+ max_results = 20
247
+ max_filtered_results = 5
248
+ chunk_size = 1000
249
+ chunk_overlap = 200
250
+ cache_dir = "__CACHE_DIR__/local_search/project_docs"
251
+
252
+ # More collections defined in the file...
253
+ ```
254
+
255
+ 2. Create your document directories:
256
+ - The `${DOCS_DIR}` variable points to a default location in your Documents folder
257
+ - Documents are automatically indexed when the search is first used
258
+
259
+ ### Using Local Search
260
+
261
+ You can use local document search in several ways:
262
+
263
+ 1. **Auto-selection**: Set `tool = "auto"` in `settings.toml` [search] section
264
+ 2. **Explicit collection**: Set `tool = "project_docs"` to search only that collection
265
+ 3. **All collections**: Set `tool = "local_all"` to search across all collections
266
+ 4. **Query syntax**: Type `collection:project_docs your query` to target a specific collection
267
+
268
+ ## Available Search Engines
269
+
270
+ | Engine | Purpose | API Key Required? | Rate Limit |
271
+ |--------|---------|-------------------|------------|
272
+ | `auto` | Intelligently selects the best engine | No | Based on selected engine |
273
+ | `wikipedia` | General knowledge and facts | No | No strict limit |
274
+ | `arxiv` | Scientific papers and research | No | No strict limit |
275
+ | `pubmed` | Medical and biomedical research | No | No strict limit |
276
+ | `semantic_scholar` | Academic literature across all fields | No | 100/5min |
277
+ | `github` | Code repositories and documentation | No | 60/hour (unauthenticated) |
278
+ | `brave` | Web search (privacy-focused) | Yes | Based on plan |
279
+ | `serpapi` | Google search results | Yes | Based on plan |
280
+ | `google_pse` | Custom Google search | Yes | 100/day free tier |
281
+ | `wayback` | Historical web content | No | No strict limit |
282
+ | `searxng` | Local web search engine | No (requires local server) | No limit |
283
+ | Any collection name | Search your local documents | No | No limit |
284
+
285
+ > **Support Free Knowledge:** If you frequently use the search engines in this tool, please consider making a donation to these organizations. They provide valuable services and rely on user support to maintain their operations:
286
+ > - [Donate to Wikipedia](https://donate.wikimedia.org)
287
+ > - [Support arXiv](https://arxiv.org/about/give)
288
+ > - [Donate to DuckDuckGo](https://duckduckgo.com/donations)
289
+ > - [Support PubMed/NCBI](https://www.nlm.nih.gov/pubs/donations/donations.html)
290
+
291
+ ## Advanced Configuration
292
+
293
+ ### Research Parameters
294
+
295
+ Edit `settings.toml` to customize research parameters:
296
+
297
+ ```toml
298
+ [search]
299
+ # Search tool to use (auto, wikipedia, arxiv, etc.)
300
+ tool = "auto"
301
+
302
+ # Number of research cycles
303
+ iterations = 2
304
+
305
+ # Questions generated per cycle
306
+ questions_per_iteration = 2
307
+
308
+ # Results per search query
309
+ max_results = 50
310
+
311
+ # Results after relevance filtering
312
+ max_filtered_results = 5
313
+
314
+ # More settings available...
315
+ ```
316
+
317
+ ## Web Interface
318
+
319
+ The web interface offers several features:
320
+
321
+ - **Dashboard**: Start and manage research queries
322
+ - **Real-time Updates**: Track research progress
323
+ - **Research History**: Access past queries
324
+ - **PDF Export**: Download reports
325
+ - **Research Management**: Terminate processes or delete records
326
+
327
+ ![Web Interface](./web1.png)
328
+ ![Web Interface](./web2.png)
329
+
330
+ ## Command Line Interface
331
+
332
+ The CLI version allows you to:
333
+
334
+ 1. Choose between a quick summary or detailed report
335
+ 2. Enter your research query
336
+ 3. View results directly in the terminal
337
+ 4. Save reports automatically to the configured output directory
338
+
339
+ ## Development Setup
340
+
341
+ If you want to develop or modify the package, you can install it in development mode:
342
+
343
+ ```bash
344
+ # Clone the repository
345
+ git clone https://github.com/LearningCircuit/local-deep-research.git
346
+ cd local-deep-research
347
+
348
+ # Install in development mode
349
+ pip install -e .
350
+ ```
351
+
352
+ This creates an "editable" installation that uses your local code, so any changes you make are immediately available without reinstalling.
353
+
354
+ You can run the application directly using Python module syntax:
355
+
356
+ ```bash
357
+ # Run the web interface
358
+ python -m local_deep_research.web.app
359
+
360
+ # Run the CLI version
361
+ python -m local_deep_research.main
362
+ ```
363
+
364
+ This approach is useful for development and debugging, as it provides more detailed error messages and allows you to make code changes on the fly.
365
+
366
+ ## Example Research
367
+
368
+ The repository includes complete research examples like our [fusion energy research analysis](https://github.com/LearningCircuit/local-deep-research/blob/main/examples/fusion-energy-research-developments.md) showcasing the system's capabilities.
369
+
370
+ ## Community & Support
371
+
372
+ Join our [Discord server](https://discord.gg/2E6gYU2Z) to exchange ideas, discuss usage patterns, and share research approaches.
373
+
374
+ ## License
375
+
376
+ This project is licensed under the MIT License.
377
+
378
+ ## Acknowledgments
379
+
380
+ - Built with [Ollama](https://ollama.ai) for local AI processing
381
+ - Search powered by multiple sources:
382
+ - [Wikipedia](https://www.wikipedia.org/) for factual knowledge (default search engine)
383
+ - [arXiv](https://arxiv.org/) for scientific papers
384
+ - [PubMed](https://pubmed.ncbi.nlm.nih.gov/) for biomedical literature
385
+ - [DuckDuckGo](https://duckduckgo.com) for web search
386
+ - [The Guardian](https://www.theguardian.com/) for journalism
387
+ - [SerpAPI](https://serpapi.com) for Google search results (requires API key)
388
+ - [SearXNG](https://searxng.org/) for local web-search engine
389
+ - [Brave Search](https://search.brave.com/) for privacy-focused web search
390
+ - [Semantic Scholar](https://www.semanticscholar.org/) for academic literature
391
+ - Built on [LangChain](https://github.com/hwchase17/langchain) framework
392
+ - Uses [justext](https://github.com/miso-belica/justext), [Playwright](https://playwright.dev), [FAISS](https://github.com/facebookresearch/faiss), and more
393
+
394
+ ## Contributing
395
+
396
+ Contributions are welcome! Please feel free to submit a Pull Request.
397
+
398
+ 1. Fork the repository
399
+ 2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
400
+ 3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
401
+ 4. Push to the branch (`git push origin feature/AmazingFeature`)
402
+ 5. Open a Pull Request
@@ -1,23 +1,22 @@
1
1
  local_deep_research/__init__.py,sha256=pfHzjzYc6Szo8VCNLtFZRXyAlEz7CViY7r2fH9O7yms,584
2
- local_deep_research/citation_handler.py,sha256=1Znl5SPkVwOZ3JvFowvaW1uriaw37s1nPYhXbpa2UZ0,4423
3
- local_deep_research/config.py,sha256=lucqOE4KeNm1ynYdcHYWJLE5fJ0QN-1QKZpRcBPsHe8,8674
4
- local_deep_research/local_collections.py,sha256=SB-fdptT7qS0klJUVx_Rs9OgDwafMUgI46984WlZGKI,6076
2
+ local_deep_research/citation_handler.py,sha256=v_fwTy-2XvUuoH3OQRzmBrvaiN7mBk8jbNfySslmt5g,4357
3
+ local_deep_research/config.py,sha256=PAruLZutlrjkGOKrv49hk8U4q9JPWWgAKLiY8Ukpsks,8572
5
4
  local_deep_research/main.py,sha256=uQXtGQ6LtZNd5Qw63D5ke4Q_LjYimouWVSUknVsk3JQ,3645
6
- local_deep_research/report_generator.py,sha256=UOiSw_vPHgtUpI8L9_UaOlpBVBloPB-ilhAo-1d2B9M,8200
7
- local_deep_research/search_system.py,sha256=KNSn_8ciEGfSRR0k0ggIzZF6jqNXoYsCnUmdKgtKO0E,15481
5
+ local_deep_research/report_generator.py,sha256=EvaArnWirMgg42fMzmZeJczoEYujEbJ2ryHHYuuoXx8,8058
6
+ local_deep_research/search_system.py,sha256=yY3BEzX68vdtUcYF9h6lC3yVao0YA_NSBj6W3-RwlKk,15459
8
7
  local_deep_research/defaults/__init__.py,sha256=2Vvlkl-gmP_qPYWegE4JBgummypogl3VXrQ1XzptFDU,1381
9
- local_deep_research/defaults/llm_config.py,sha256=88IGWPPvikSKmAqfqsGovBx2Jac5eh2sBY_LIW624Ik,7910
8
+ local_deep_research/defaults/llm_config.py,sha256=T03pntyNtOk1fvu-RZ-iEoh7L2D2hcICr8usIPpULuo,7870
10
9
  local_deep_research/defaults/local_collections.toml,sha256=_edVWVHrhunMfazjejhJlGPRkHKKIP51qQtNkMgNEiA,1406
11
- local_deep_research/defaults/main.toml,sha256=l_J9JAPhKEp63IsLBO0hQDVimxogEpnrEVnNjiOeUxg,1403
10
+ local_deep_research/defaults/main.toml,sha256=DLhFq88vdE2_psLaWhPV9BWPixqTHvR2Rllaj_rmjJ4,1403
12
11
  local_deep_research/defaults/search_engines.toml,sha256=TYkOqVaZq9JPawz4fIPyGdkAtYa4t8F9H50VY-wv2ak,8101
13
12
  local_deep_research/utilties/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
13
  local_deep_research/utilties/enums.py,sha256=TVAZiu9szNbdacfb7whgaQJJlSk7oYByADaAierD4CE,229
15
14
  local_deep_research/utilties/llm_utils.py,sha256=IGv-_gJWqLTpO3_op1NHIwxKaFEzmXhhVYSLTTSMnIA,4522
16
- local_deep_research/utilties/search_utilities.py,sha256=C8ycFd7blcq5vtnd6GxP8dkepZT6EEqHFtT3WYxF0Ck,4151
15
+ local_deep_research/utilties/search_utilities.py,sha256=-wj_-HCotqDUXYcPaKGN1C_t6WH4xubNhem4RE60AuM,4098
17
16
  local_deep_research/utilties/setup_utils.py,sha256=t6GNp7lK1nLPdPNCkYUk82IATGM62vqy8UBD-KqShOs,215
18
17
  local_deep_research/web/__init__.py,sha256=3oHMatNu8r24FBtpojriIVbHYOVSHj4Q-quycMKOuDk,62
19
18
  local_deep_research/web/app.py,sha256=5_VLNdhJOqdgacucglUdS_lVURNgYNbXhK9vME6JmzA,72431
20
- local_deep_research/web/static/css/styles.css,sha256=mW217FfZNW1pzMtlbuXE2fRBJekeIdIoy4m-yXFirj4,23782
19
+ local_deep_research/web/static/css/styles.css,sha256=_26yBV1fKM51Dfv67CxKSbK8aeoYK5Tl7b2TPs5Whuo,24641
21
20
  local_deep_research/web/static/js/app.js,sha256=GPncdWpw2YNTs56JY-0tjTTr9JnX-fIZSZX0agwKZMU,172813
22
21
  local_deep_research/web/templates/api_keys_config.html,sha256=jA8Y-nfUGJ1dTvbw2jK_8xPy2x6UG_5gHpbrTJAex2g,3527
23
22
  local_deep_research/web/templates/collections_config.html,sha256=Dci7KumXBON8rAXRX8TVjgqS-bbht7d6aQiedDUnxQ0,3560
@@ -28,30 +27,29 @@ local_deep_research/web/templates/search_engines_config.html,sha256=z_krznfdhF3e
28
27
  local_deep_research/web/templates/settings.html,sha256=S9A-tdpzMhP2Zw7kp2jxKlwaWtoRil_IHX_a376vXsA,29575
29
28
  local_deep_research/web/templates/settings_dashboard.html,sha256=De-v1KNdVvkXme5i3YZ6sIfU9aAKDc_N-AW9n4PZoso,9109
30
29
  local_deep_research/web_search_engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- local_deep_research/web_search_engines/full_search.py,sha256=3SSTvD12g4pNlZCSGh8jwsyYWpQglgqjADnq8dG1zyI,9756
32
- local_deep_research/web_search_engines/search_engine_base.py,sha256=Knmf45pMYd7hYc9x8jG8gOtMnribsXDcOXGrA50LK3E,8100
33
- local_deep_research/web_search_engines/search_engine_factory.py,sha256=B_QaqoAwnVXCmHNdqGbo94LekWY6wpBw_PWNkI120qE,10728
30
+ local_deep_research/web_search_engines/search_engine_base.py,sha256=QmhfjuHK2deomh8tARghKuYnF-5t3wwBB661odS2VtU,8065
31
+ local_deep_research/web_search_engines/search_engine_factory.py,sha256=Sld6bYTwcyTxgVLx04t00sD7vfJhSHFOl6iiGJ08ZUE,11118
34
32
  local_deep_research/web_search_engines/search_engines_config.py,sha256=bNCuR09NOk5cjnKIgDQfhPipqmvDKeE7WP_6p8LLZf0,1979
35
33
  local_deep_research/web_search_engines/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
- local_deep_research/web_search_engines/engines/full_search.py,sha256=mcxS8o7-WmOQc3_H4232adhBHevZfSHWmaOFoia68UU,4711
34
+ local_deep_research/web_search_engines/engines/full_search.py,sha256=BuOz8dX-XocazCG7gGBKFnIY99FZtNFI0-Wq3fhsfp4,4689
37
35
  local_deep_research/web_search_engines/engines/meta_search_engine.py,sha256=Zky4sowCortEaIj1pBU0sKuggXr5izkubgrD7cM8IOQ,11485
38
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py,sha256=WlKQSB1kgdEaLtDj4JwpIiP3eZPustJt4ryJ9wiulT0,15316
39
- local_deep_research/web_search_engines/engines/search_engine_brave.py,sha256=zAofutKEnXb_CYAi7hrptNVrQ15PQoYK_Eg3spcD1Ig,9583
40
- local_deep_research/web_search_engines/engines/search_engine_ddg.py,sha256=-_rWr2uwSoxHLkAV-WrHj_vuoSActr_jzfveI7dE10c,4845
36
+ local_deep_research/web_search_engines/engines/search_engine_arxiv.py,sha256=cf8OzhSzE1zqaiOZ6EFQGy_6hTCJMaTysYd8rs1KJNU,15408
37
+ local_deep_research/web_search_engines/engines/search_engine_brave.py,sha256=J242byUGG5ROQ_bh-mU292_t7Q7m20_9O0r1w5z6d9A,9688
38
+ local_deep_research/web_search_engines/engines/search_engine_ddg.py,sha256=qK2i65dbPtr_ppoKPU_YA0mDqM_sDAvN6ZztvdFjsCk,4910
41
39
  local_deep_research/web_search_engines/engines/search_engine_github.py,sha256=qqipsw2ycjlRbR6mmMmxzGU3LEcFDJJJ7Ez7xUgWjRM,26768
42
40
  local_deep_research/web_search_engines/engines/search_engine_google_pse.py,sha256=YkXvBmgcqTImCxuyy6580SGRAvImGc6SzInXZgo1kNE,11294
43
41
  local_deep_research/web_search_engines/engines/search_engine_guardian.py,sha256=MW4WIwtNAwcpdigNXronyezAxr50EIZTV1NMedrAv2o,23912
44
- local_deep_research/web_search_engines/engines/search_engine_local.py,sha256=uAsNtaFV3AH4ltNpudHIl546jsOKKyGDjUwU5J7gKts,36766
42
+ local_deep_research/web_search_engines/engines/search_engine_local.py,sha256=rfmPiA9DVmjbaB3KQtlq7s6BRMgHRgzP7AhktZNDw2M,36772
45
43
  local_deep_research/web_search_engines/engines/search_engine_local_all.py,sha256=CRNcxBzNd9kanyIJYaUDB7qfXYxVCvd4L2mX8jL73v0,5955
46
44
  local_deep_research/web_search_engines/engines/search_engine_pubmed.py,sha256=MayfzM2R0XoI7cpXlG1XJ1ktfTN_6H-Xs9RmD89UAao,39236
47
45
  local_deep_research/web_search_engines/engines/search_engine_searxng.py,sha256=GMy6qDMSaVBtjWRm48XBu6TjLAy1HfcO2EFTwr8S9rk,18048
48
46
  local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py,sha256=6VMymjFJ7pyV2nv5dRfFofXgg0kG82rkwbICVnNDNH4,23352
49
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py,sha256=iy-QmT99Tf2cJlfCrPbEhtMB7a_zCKppvlUKi7VBrlE,9118
47
+ local_deep_research/web_search_engines/engines/search_engine_serpapi.py,sha256=XikEYnM-pAaR70VeAJ28lbqpRzCj4bCA9xY29taTV8g,9215
50
48
  local_deep_research/web_search_engines/engines/search_engine_wayback.py,sha256=astAvSLajDZ6rwgthJ3iBcHSWuDSYPO7uilIxaJhXmU,18132
51
49
  local_deep_research/web_search_engines/engines/search_engine_wikipedia.py,sha256=KSGJECbEcxZpVK-PhYsTCtzedSK0l1AjQmvGtx8KBks,9799
52
- local_deep_research-0.1.16.dist-info/licenses/LICENSE,sha256=Qg2CaTdu6SWnSqk1_JtgBPp_Da-LdqJDhT1Vt1MUc5s,1072
53
- local_deep_research-0.1.16.dist-info/METADATA,sha256=KHrqDTRQmo_FAt1KFZLmYlbO0eQKfjqvpHFu_kRUd_w,15151
54
- local_deep_research-0.1.16.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
55
- local_deep_research-0.1.16.dist-info/entry_points.txt,sha256=u-Y6Z3MWtR3dmsTDFYhXyfkPv7mALUA7YAnY4Fi1XDs,97
56
- local_deep_research-0.1.16.dist-info/top_level.txt,sha256=h6-uVE_wSuLOcoWwT9szhX23mBWufu77MqmM25UfbCY,20
57
- local_deep_research-0.1.16.dist-info/RECORD,,
50
+ local_deep_research-0.1.18.dist-info/licenses/LICENSE,sha256=Qg2CaTdu6SWnSqk1_JtgBPp_Da-LdqJDhT1Vt1MUc5s,1072
51
+ local_deep_research-0.1.18.dist-info/METADATA,sha256=6F15QvEwfPPqHVEGRUCPEYVXKoR8PvbXOwzM9Vemem8,15013
52
+ local_deep_research-0.1.18.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
53
+ local_deep_research-0.1.18.dist-info/entry_points.txt,sha256=u-Y6Z3MWtR3dmsTDFYhXyfkPv7mALUA7YAnY4Fi1XDs,97
54
+ local_deep_research-0.1.18.dist-info/top_level.txt,sha256=h6-uVE_wSuLOcoWwT9szhX23mBWufu77MqmM25UfbCY,20
55
+ local_deep_research-0.1.18.dist-info/RECORD,,
@@ -1,141 +0,0 @@
1
- # local_collections.py
2
- """
3
- Configuration file for local document collections.
4
- Each collection functions as an independent search engine.
5
- """
6
-
7
- import os
8
- from typing import Dict, Any
9
-
10
- # Registry of local document collections
11
- # Each collection appears as a separate search engine in the main configuration
12
- LOCAL_COLLECTIONS = {
13
- # Project Documents Collection
14
- "project_docs": {
15
- "name": "Project Documents",
16
- "description": "Project documentation and specifications",
17
- "paths": [os.path.abspath("./local_search_files/project_documents")],
18
- "enabled": True,
19
- "embedding_model": "all-MiniLM-L6-v2",
20
- "embedding_device": "cpu",
21
- "embedding_model_type": "sentence_transformers",
22
- "max_results": 20,
23
- "max_filtered_results": 5,
24
- "chunk_size": 1000,
25
- "chunk_overlap": 200,
26
- "cache_dir": ".cache/local_search/project_docs"
27
- },
28
-
29
- # Research Papers Collection
30
- "research_papers": {
31
- "name": "Research Papers",
32
- "description": "Academic research papers and articles",
33
- "paths": [os.path.abspath("local_search_files/research_papers")],
34
- "enabled": True,
35
- "embedding_model": "all-MiniLM-L6-v2",
36
- "embedding_device": "cpu",
37
- "embedding_model_type": "sentence_transformers",
38
- "max_results": 20,
39
- "max_filtered_results": 5,
40
- "chunk_size": 800, # Smaller chunks for academic content
41
- "chunk_overlap": 150,
42
- "cache_dir": ".cache/local_search/research_papers"
43
- },
44
-
45
- # Personal Notes Collection
46
- "personal_notes": {
47
- "name": "Personal Notes",
48
- "description": "Personal notes and documents",
49
- "paths": [os.path.abspath("./local_search_files/personal_notes")],
50
- "enabled": True,
51
- "embedding_model": "all-MiniLM-L6-v2",
52
- "embedding_device": "cpu",
53
- "embedding_model_type": "sentence_transformers",
54
- "max_results": 30,
55
- "max_filtered_results": 10,
56
- "chunk_size": 500, # Smaller chunks for notes
57
- "chunk_overlap": 100,
58
- "cache_dir": ".cache/local_search/personal_notes"
59
- }
60
- }
61
-
62
- # Configuration for local search integration
63
- LOCAL_SEARCH_CONFIG = {
64
- # General embedding options
65
- "DEFAULT_EMBEDDING_MODEL": "all-MiniLM-L6-v2",
66
- "DEFAULT_EMBEDDING_DEVICE": "cpu", # "cpu" or "cuda" for GPU acceleration
67
- "DEFAULT_EMBEDDING_MODEL_TYPE": "sentence_transformers", # or "ollama"
68
-
69
- # Ollama settings (only used if model type is "ollama")
70
- # Note: You must run 'ollama pull nomic-embed-text' first if using Ollama for embeddings
71
- "OLLAMA_BASE_URL": "http://localhost:11434",
72
- "OLLAMA_EMBEDDING_MODEL": "nomic-embed-text",
73
-
74
- # Default indexing options
75
- "FORCE_REINDEX": True, # Force reindexing on startup
76
- "CACHE_DIR": ".cache/local_search", # Base directory for cache
77
- }
78
-
79
- def register_local_collections(search_engines_dict: Dict[str, Any]) -> None:
80
- """
81
- Register all enabled local collections as search engines.
82
-
83
- Args:
84
- search_engines_dict: The main search engines dictionary to update
85
- """
86
- for collection_id, collection in LOCAL_COLLECTIONS.items():
87
- print(collection_id, collection)
88
- if collection.get("enabled", True):
89
- # Skip if already defined (don't override)
90
- if collection_id in search_engines_dict:
91
- continue
92
-
93
- # Validate paths exist
94
- paths = collection.get("paths", [])
95
- valid_paths = []
96
- for path in paths:
97
- if os.path.exists(path) and os.path.isdir(path):
98
- valid_paths.append(path)
99
- else:
100
- print(f"Warning: Collection '{collection_id}' contains non-existent folder: {path}")
101
-
102
- # Log warning if no valid paths
103
- if not valid_paths and paths:
104
- print(f"Warning: Collection '{collection_id}' has no valid folders. It will be registered but won't return results.")
105
-
106
- # Create a search engine entry for this collection
107
- search_engines_dict[collection_id] = {
108
- "module_path": "local_deep_research.web_search_engines.engines.search_engine_local",
109
- "class_name": "LocalSearchEngine",
110
- "requires_api_key": False,
111
- "reliability": 0.9, # High reliability for local documents
112
- "strengths": ["personal documents", "offline access",
113
- collection.get("description", "local documents")],
114
- "weaknesses": ["requires indexing", "limited to specific folders"],
115
- "default_params": {
116
- "folder_paths": collection.get("paths", []),
117
- "embedding_model": collection.get(
118
- "embedding_model",
119
- LOCAL_SEARCH_CONFIG["DEFAULT_EMBEDDING_MODEL"]
120
- ),
121
- "embedding_device": collection.get(
122
- "embedding_device",
123
- LOCAL_SEARCH_CONFIG["DEFAULT_EMBEDDING_DEVICE"]
124
- ),
125
- "embedding_model_type": collection.get(
126
- "embedding_model_type",
127
- LOCAL_SEARCH_CONFIG["DEFAULT_EMBEDDING_MODEL_TYPE"]
128
- ),
129
- "chunk_size": collection.get("chunk_size", 1000),
130
- "chunk_overlap": collection.get("chunk_overlap", 200),
131
- "cache_dir": collection.get(
132
- "cache_dir",
133
- f"{LOCAL_SEARCH_CONFIG['CACHE_DIR']}/{collection_id}"
134
- ),
135
- "max_results": collection.get("max_results", 20),
136
- "max_filtered_results": collection.get("max_filtered_results", 5),
137
- "collection_name": collection.get("name", collection_id),
138
- "collection_description": collection.get("description", "")
139
- },
140
- "requires_llm": True
141
- }