local-deep-research 0.1.16__py3-none-any.whl → 0.1.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. local_deep_research/citation_handler.py +0 -2
  2. local_deep_research/config.py +1 -4
  3. local_deep_research/defaults/llm_config.py +2 -2
  4. local_deep_research/defaults/main.toml +3 -3
  5. local_deep_research/report_generator.py +1 -5
  6. local_deep_research/search_system.py +1 -1
  7. local_deep_research/utilties/search_utilities.py +3 -4
  8. local_deep_research/web_search_engines/engines/full_search.py +9 -8
  9. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +14 -14
  10. local_deep_research/web_search_engines/engines/search_engine_brave.py +10 -9
  11. local_deep_research/web_search_engines/engines/search_engine_ddg.py +4 -2
  12. local_deep_research/web_search_engines/engines/search_engine_local.py +1 -1
  13. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +9 -8
  14. local_deep_research/web_search_engines/search_engine_base.py +1 -1
  15. local_deep_research-0.1.17.dist-info/METADATA +393 -0
  16. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/RECORD +20 -22
  17. local_deep_research/local_collections.py +0 -141
  18. local_deep_research/web_search_engines/full_search.py +0 -254
  19. local_deep_research-0.1.16.dist-info/METADATA +0 -346
  20. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/WHEEL +0 -0
  21. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/entry_points.txt +0 -0
  22. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/licenses/LICENSE +0 -0
  23. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,393 @@
1
+ Metadata-Version: 2.4
2
+ Name: local-deep-research
3
+ Version: 0.1.17
4
+ Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
5
+ Author-email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 LearningCircuit
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/LearningCircuit/local-deep-research
29
+ Project-URL: Bug Tracker, https://github.com/LearningCircuit/local-deep-research/issues
30
+ Classifier: Programming Language :: Python :: 3
31
+ Classifier: License :: OSI Approved :: MIT License
32
+ Classifier: Operating System :: OS Independent
33
+ Requires-Python: >=3.8
34
+ Description-Content-Type: text/markdown
35
+ License-File: LICENSE
36
+ Requires-Dist: langchain>=0.3.18
37
+ Requires-Dist: langchain-community>=0.3.17
38
+ Requires-Dist: langchain-core>=0.3.34
39
+ Requires-Dist: langchain-ollama>=0.2.3
40
+ Requires-Dist: langchain-openai>=0.3.5
41
+ Requires-Dist: langchain_anthropic>=0.3.7
42
+ Requires-Dist: duckduckgo_search>=7.3.2
43
+ Requires-Dist: python-dateutil>=2.9.0
44
+ Requires-Dist: typing_extensions>=4.12.2
45
+ Requires-Dist: justext
46
+ Requires-Dist: playwright
47
+ Requires-Dist: beautifulsoup4
48
+ Requires-Dist: flask>=2.0.1
49
+ Requires-Dist: flask-cors>=3.0.10
50
+ Requires-Dist: flask-socketio>=5.1.1
51
+ Requires-Dist: sqlalchemy>=1.4.23
52
+ Requires-Dist: wikipedia
53
+ Requires-Dist: arxiv>=1.4.3
54
+ Requires-Dist: pypdf
55
+ Requires-Dist: sentence-transformers
56
+ Requires-Dist: faiss-cpu
57
+ Requires-Dist: pydantic>=2.0.0
58
+ Requires-Dist: pydantic-settings>=2.0.0
59
+ Requires-Dist: toml>=0.10.2
60
+ Requires-Dist: platformdirs>=3.0.0
61
+ Requires-Dist: dynaconf
62
+ Requires-Dist: requests>=2.28.0
63
+ Requires-Dist: tiktoken>=0.4.0
64
+ Requires-Dist: xmltodict>=0.13.0
65
+ Requires-Dist: lxml>=4.9.2
66
+ Requires-Dist: pdfplumber>=0.9.0
67
+ Requires-Dist: unstructured>=0.10.0
68
+ Dynamic: license-file
69
+
70
+ # Local Deep Research
71
+
72
+ A powerful AI-powered research assistant that performs deep, iterative analysis using multiple LLMs and web searches. The system can be run locally for privacy or configured to use cloud-based LLMs for enhanced capabilities.
73
+
74
+ ## Quick Start
75
+
76
+ ```bash
77
+ # Install the package
78
+ pip install local-deep-research
79
+
80
+ # Install required browser automation tools
81
+ playwright install
82
+
83
+ # For local models, install Ollama
84
+ # Download from https://ollama.ai and then pull a model
85
+ ollama pull gemma3:12b
86
+ ```
87
+
88
+ Then run:
89
+
90
+ ```bash
91
+ # Start the web interface (recommended)
92
+ ldr-web # (OR python -m local_deep_research.web.app)
93
+
94
+ # OR run the command line version
95
+ ldr # (OR python -m local_deep_research.main)
96
+ ```
97
+
98
+ Access the web interface at `http://127.0.0.1:5000` in your browser.
99
+
100
+ ## Features
101
+
102
+ - 🔍 **Advanced Research Capabilities**
103
+ - Automated deep research with intelligent follow-up questions
104
+ - Citation tracking and source verification
105
+ - Multi-iteration analysis for comprehensive coverage
106
+ - Full webpage content analysis (not just snippets)
107
+
108
+ - 🤖 **Flexible LLM Support**
109
+ - Local AI processing with Ollama models
110
+ - Cloud LLM support (Claude, GPT)
111
+ - Supports all Langchain models
112
+ - Configurable model selection based on needs
113
+
114
+ - 📊 **Rich Output Options**
115
+ - Detailed research findings with citations
116
+ - Comprehensive research reports
117
+ - Quick summaries for rapid insights
118
+ - Source tracking and verification
119
+
120
+ - 🔒 **Privacy-Focused**
121
+ - Runs entirely on your machine when using local models
122
+ - Configurable search settings
123
+ - Transparent data handling
124
+
125
+ - 🌐 **Enhanced Search Integration**
126
+ - **Auto-selection of search sources**: The "auto" search engine intelligently analyzes your query and selects the most appropriate search engine
127
+ - Multiple search engines including Wikipedia, arXiv, PubMed, Semantic Scholar, and more
128
+ - **Local RAG search for private documents** - search your own documents with vector embeddings
129
+ - Full webpage content retrieval and intelligent filtering
130
+
131
+ ## Configuration System
132
+
133
+ The package automatically creates and manages configuration files in your user directory:
134
+
135
+ - **Windows**: `Documents\LearningCircuit\local-deep-research\config\`
136
+ - **Linux/Mac**: `~/.config/local_deep_research/config/`
137
+
138
+ ### Default Configuration Files
139
+
140
+ When you first run the tool, it creates these configuration files:
141
+
142
+ | File | Purpose |
143
+ |------|---------|
144
+ | `settings.toml` | General settings for research, web interface, and search |
145
+ | `llm_config.py` | Configure which LLM to use (local or cloud-based) |
146
+ | `search_engines.toml` | Define and configure search engines |
147
+ | `local_collections.toml` | Configure local document collections for RAG |
148
+ | `.secrets.toml` | Store API keys for cloud services |
149
+
150
+ ## Setting Up AI Models
151
+
152
+ The system supports multiple LLM providers:
153
+
154
+ ### Local Models (via Ollama)
155
+
156
+ 1. [Install Ollama](https://ollama.ai)
157
+ 2. Pull a model: `ollama pull gemma3:12b` (recommended model)
158
+ 3. Ollama runs on port 11434 by default
159
+
160
+ ### Cloud Models
161
+
162
+ Edit your `.secrets.toml` file to add API keys:
163
+
164
+ ```toml
165
+ ANTHROPIC_API_KEY = "your-api-key-here" # For Claude models
166
+ OPENAI_API_KEY = "your-openai-key-here" # For GPT models
167
+ OPENAI_ENDPOINT_API_KEY = "your-key-here" # For OpenRouter or similar services
168
+ ```
169
+
170
+ Then edit `llm_config.py` to change the default provider:
171
+
172
+ ```python
173
+ # Set your preferred model provider here
174
+ DEFAULT_PROVIDER = ModelProvider.OLLAMA # Change to your preferred provider
175
+
176
+ # Set your default model name here
177
+ DEFAULT_MODEL = "mistral" # Change to your preferred model
178
+ ```
179
+
180
+ ### Supported LLM Providers
181
+
182
+ The system supports multiple LLM providers:
183
+
184
+ | Provider | Type | Configuration | Notes |
185
+ |----------|------|--------------|-------|
186
+ | `OLLAMA` | Local | No API key needed | Default - install from ollama.ai |
187
+ | `OPENAI` | Cloud | Requires `OPENAI_API_KEY` | GPT models (3.5, 4, 4o) |
188
+ | `ANTHROPIC` | Cloud | Requires `ANTHROPIC_API_KEY` | Claude models (3 Opus, Sonnet, Haiku) |
189
+ | `OPENAI_ENDPOINT` | Cloud | Requires `OPENAI_ENDPOINT_API_KEY` | For any OpenAI-compatible API |
190
+ | `VLLM` | Local | No API key needed | For hosting models via vLLM |
191
+
192
+ You can configure the OpenAI-compatible endpoint URL in `llm_config.py`:
193
+
194
+ ```python
195
+ # For OpenRouter, Together.ai, Azure OpenAI, or any compatible endpoint
196
+ OPENAI_ENDPOINT_URL = "https://openrouter.ai/api/v1"
197
+ ```
198
+
199
+ The `OPENAI_ENDPOINT` provider can access any service with an OpenAI-compatible API, including:
200
+ - OpenRouter (access to hundreds of models)
201
+ - Azure OpenAI
202
+ - Together.ai
203
+ - Groq
204
+ - Anyscale
205
+ - Self-hosted LLM servers with OpenAI compatibility
206
+ - Any other service that implements the OpenAI API specification
207
+
208
+ ## Setting Up Search Engines
209
+
210
+ The system includes multiple search engines. Some require API keys:
211
+
212
+ ```toml
213
+ # Add to .secrets.toml
214
+ SERP_API_KEY = "your-serpapi-key-here" # For Google results via SerpAPI
215
+ GOOGLE_PSE_API_KEY = "your-google-key-here" # For Google Programmable Search
216
+ GOOGLE_PSE_ENGINE_ID = "your-pse-id-here" # For Google Programmable Search
217
+ BRAVE_API_KEY = "your-brave-search-key-here" # For Brave Search
218
+ GUARDIAN_API_KEY = "your-guardian-key-here" # For The Guardian
219
+ ```
220
+
221
+ No API key required for: Wikipedia, arXiv, PubMed, Semantic Scholar, and local collections.
222
+
223
+ ## Local Document Search (RAG)
224
+
225
+ The system can search through your local documents using vector embeddings.
226
+
227
+ ### Setting Up Document Collections
228
+
229
+ 1. Define collections in `local_collections.toml`. Default collections include:
230
+
231
+ ```toml
232
+ [project_docs]
233
+ name = "Project Documents"
234
+ description = "Project documentation and specifications"
235
+ paths = ["@format ${DOCS_DIR}/project_documents"]
236
+ enabled = true
237
+ embedding_model = "all-MiniLM-L6-v2"
238
+ embedding_device = "cpu"
239
+ embedding_model_type = "sentence_transformers"
240
+ max_results = 20
241
+ max_filtered_results = 5
242
+ chunk_size = 1000
243
+ chunk_overlap = 200
244
+ cache_dir = "__CACHE_DIR__/local_search/project_docs"
245
+
246
+ # More collections defined in the file...
247
+ ```
248
+
249
+ 2. Create your document directories:
250
+ - The `${DOCS_DIR}` variable points to a default location in your Documents folder
251
+ - Documents are automatically indexed when the search is first used
252
+
253
+ ### Using Local Search
254
+
255
+ You can use local document search in several ways:
256
+
257
+ 1. **Auto-selection**: Set `tool = "auto"` in `settings.toml` [search] section
258
+ 2. **Explicit collection**: Set `tool = "project_docs"` to search only that collection
259
+ 3. **All collections**: Set `tool = "local_all"` to search across all collections
260
+ 4. **Query syntax**: Type `collection:project_docs your query` to target a specific collection
261
+
262
+ ## Available Search Engines
263
+
264
+ | Engine | Purpose | API Key Required? |
265
+ |--------|---------|-------------------|
266
+ | `auto` | Intelligently selects the best engine | No |
267
+ | `wikipedia` | General knowledge and facts | No |
268
+ | `arxiv` | Scientific papers and research | No |
269
+ | `pubmed` | Medical and biomedical research | No |
270
+ | `semantic_scholar` | Academic literature across all fields | No |
271
+ | `github` | Code repositories and documentation | No (but rate-limited) |
272
+ | `brave` | Web search (privacy-focused) | Yes |
273
+ | `serpapi` | Google search results | Yes |
274
+ | `google_pse` | Custom Google search | Yes |
275
+ | `wayback` | Historical web content | No |
276
+ | `searxng` | Local web search engine | No (requires server) |
277
+ | Any collection name | Search your local documents | No |
278
+
279
+ > **Support Free Knowledge:** If you frequently use the search engines in this tool, please consider making a donation to these organizations. They provide valuable services and rely on user support to maintain their operations:
280
+ > - [Donate to Wikipedia](https://donate.wikimedia.org)
281
+ > - [Support arXiv](https://arxiv.org/about/give)
282
+ > - [Donate to DuckDuckGo](https://duckduckgo.com/donations)
283
+ > - [Support PubMed/NCBI](https://www.nlm.nih.gov/pubs/donations/donations.html)
284
+
285
+ ## Advanced Configuration
286
+
287
+ ### Research Parameters
288
+
289
+ Edit `settings.toml` to customize research parameters:
290
+
291
+ ```toml
292
+ [search]
293
+ # Search tool to use (auto, wikipedia, arxiv, etc.)
294
+ tool = "auto"
295
+
296
+ # Number of research cycles
297
+ iterations = 2
298
+
299
+ # Questions generated per cycle
300
+ questions_per_iteration = 2
301
+
302
+ # Results per search query
303
+ max_results = 50
304
+
305
+ # Results after relevance filtering
306
+ max_filtered_results = 5
307
+
308
+ # More settings available...
309
+ ```
310
+
311
+ ## Web Interface
312
+
313
+ The web interface offers several features:
314
+
315
+ - **Dashboard**: Start and manage research queries
316
+ - **Real-time Updates**: Track research progress
317
+ - **Research History**: Access past queries
318
+ - **PDF Export**: Download reports
319
+ - **Research Management**: Terminate processes or delete records
320
+
321
+ ## Command Line Interface
322
+
323
+ The CLI version allows you to:
324
+
325
+ 1. Choose between a quick summary or detailed report
326
+ 2. Enter your research query
327
+ 3. View results directly in the terminal
328
+ 4. Save reports automatically to the configured output directory
329
+
330
+ ## Development Setup
331
+
332
+ If you want to develop or modify the package, you can install it in development mode:
333
+
334
+ ```bash
335
+ # Clone the repository
336
+ git clone https://github.com/LearningCircuit/local-deep-research.git
337
+ cd local-deep-research
338
+
339
+ # Install in development mode
340
+ pip install -e .
341
+ ```
342
+
343
+ This creates an "editable" installation that uses your local code, so any changes you make are immediately available without reinstalling.
344
+
345
+ You can run the application directly using Python module syntax:
346
+
347
+ ```bash
348
+ # Run the web interface
349
+ python -m local_deep_research.web.app
350
+
351
+ # Run the CLI version
352
+ python -m local_deep_research.main
353
+ ```
354
+
355
+ This approach is useful for development and debugging, as it provides more detailed error messages and allows you to make code changes on the fly.
356
+
357
+ ## Example Research
358
+
359
+ The repository includes complete research examples like our [fusion energy research analysis](https://github.com/LearningCircuit/local-deep-research/blob/main/examples/fusion-energy-research-developments.md) showcasing the system's capabilities.
360
+
361
+ ## Community & Support
362
+
363
+ Join our [Discord server](https://discord.gg/2E6gYU2Z) to exchange ideas, discuss usage patterns, and share research approaches.
364
+
365
+ ## License
366
+
367
+ This project is licensed under the MIT License.
368
+
369
+ ## Acknowledgments
370
+
371
+ - Built with [Ollama](https://ollama.ai) for local AI processing
372
+ - Search powered by multiple sources:
373
+ - [Wikipedia](https://www.wikipedia.org/) for factual knowledge (default search engine)
374
+ - [arXiv](https://arxiv.org/) for scientific papers
375
+ - [PubMed](https://pubmed.ncbi.nlm.nih.gov/) for biomedical literature
376
+ - [DuckDuckGo](https://duckduckgo.com) for web search
377
+ - [The Guardian](https://www.theguardian.com/) for journalism
378
+ - [SerpAPI](https://serpapi.com) for Google search results (requires API key)
379
+ - [SearXNG](https://searxng.org/) for local web-search engine
380
+ - [Brave Search](https://search.brave.com/) for privacy-focused web search
381
+ - [Semantic Scholar](https://www.semanticscholar.org/) for academic literature
382
+ - Built on [LangChain](https://github.com/hwchase17/langchain) framework
383
+ - Uses [justext](https://github.com/miso-belica/justext), [Playwright](https://playwright.dev), [FAISS](https://github.com/facebookresearch/faiss), and more
384
+
385
+ ## Contributing
386
+
387
+ Contributions are welcome! Please feel free to submit a Pull Request.
388
+
389
+ 1. Fork the repository
390
+ 2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
391
+ 3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
392
+ 4. Push to the branch (`git push origin feature/AmazingFeature`)
393
+ 5. Open a Pull Request
@@ -1,19 +1,18 @@
1
1
  local_deep_research/__init__.py,sha256=pfHzjzYc6Szo8VCNLtFZRXyAlEz7CViY7r2fH9O7yms,584
2
- local_deep_research/citation_handler.py,sha256=1Znl5SPkVwOZ3JvFowvaW1uriaw37s1nPYhXbpa2UZ0,4423
3
- local_deep_research/config.py,sha256=lucqOE4KeNm1ynYdcHYWJLE5fJ0QN-1QKZpRcBPsHe8,8674
4
- local_deep_research/local_collections.py,sha256=SB-fdptT7qS0klJUVx_Rs9OgDwafMUgI46984WlZGKI,6076
2
+ local_deep_research/citation_handler.py,sha256=v_fwTy-2XvUuoH3OQRzmBrvaiN7mBk8jbNfySslmt5g,4357
3
+ local_deep_research/config.py,sha256=PAruLZutlrjkGOKrv49hk8U4q9JPWWgAKLiY8Ukpsks,8572
5
4
  local_deep_research/main.py,sha256=uQXtGQ6LtZNd5Qw63D5ke4Q_LjYimouWVSUknVsk3JQ,3645
6
- local_deep_research/report_generator.py,sha256=UOiSw_vPHgtUpI8L9_UaOlpBVBloPB-ilhAo-1d2B9M,8200
7
- local_deep_research/search_system.py,sha256=KNSn_8ciEGfSRR0k0ggIzZF6jqNXoYsCnUmdKgtKO0E,15481
5
+ local_deep_research/report_generator.py,sha256=EvaArnWirMgg42fMzmZeJczoEYujEbJ2ryHHYuuoXx8,8058
6
+ local_deep_research/search_system.py,sha256=yY3BEzX68vdtUcYF9h6lC3yVao0YA_NSBj6W3-RwlKk,15459
8
7
  local_deep_research/defaults/__init__.py,sha256=2Vvlkl-gmP_qPYWegE4JBgummypogl3VXrQ1XzptFDU,1381
9
- local_deep_research/defaults/llm_config.py,sha256=88IGWPPvikSKmAqfqsGovBx2Jac5eh2sBY_LIW624Ik,7910
8
+ local_deep_research/defaults/llm_config.py,sha256=T03pntyNtOk1fvu-RZ-iEoh7L2D2hcICr8usIPpULuo,7870
10
9
  local_deep_research/defaults/local_collections.toml,sha256=_edVWVHrhunMfazjejhJlGPRkHKKIP51qQtNkMgNEiA,1406
11
- local_deep_research/defaults/main.toml,sha256=l_J9JAPhKEp63IsLBO0hQDVimxogEpnrEVnNjiOeUxg,1403
10
+ local_deep_research/defaults/main.toml,sha256=DLhFq88vdE2_psLaWhPV9BWPixqTHvR2Rllaj_rmjJ4,1403
12
11
  local_deep_research/defaults/search_engines.toml,sha256=TYkOqVaZq9JPawz4fIPyGdkAtYa4t8F9H50VY-wv2ak,8101
13
12
  local_deep_research/utilties/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
13
  local_deep_research/utilties/enums.py,sha256=TVAZiu9szNbdacfb7whgaQJJlSk7oYByADaAierD4CE,229
15
14
  local_deep_research/utilties/llm_utils.py,sha256=IGv-_gJWqLTpO3_op1NHIwxKaFEzmXhhVYSLTTSMnIA,4522
16
- local_deep_research/utilties/search_utilities.py,sha256=C8ycFd7blcq5vtnd6GxP8dkepZT6EEqHFtT3WYxF0Ck,4151
15
+ local_deep_research/utilties/search_utilities.py,sha256=-wj_-HCotqDUXYcPaKGN1C_t6WH4xubNhem4RE60AuM,4098
17
16
  local_deep_research/utilties/setup_utils.py,sha256=t6GNp7lK1nLPdPNCkYUk82IATGM62vqy8UBD-KqShOs,215
18
17
  local_deep_research/web/__init__.py,sha256=3oHMatNu8r24FBtpojriIVbHYOVSHj4Q-quycMKOuDk,62
19
18
  local_deep_research/web/app.py,sha256=5_VLNdhJOqdgacucglUdS_lVURNgYNbXhK9vME6JmzA,72431
@@ -28,30 +27,29 @@ local_deep_research/web/templates/search_engines_config.html,sha256=z_krznfdhF3e
28
27
  local_deep_research/web/templates/settings.html,sha256=S9A-tdpzMhP2Zw7kp2jxKlwaWtoRil_IHX_a376vXsA,29575
29
28
  local_deep_research/web/templates/settings_dashboard.html,sha256=De-v1KNdVvkXme5i3YZ6sIfU9aAKDc_N-AW9n4PZoso,9109
30
29
  local_deep_research/web_search_engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- local_deep_research/web_search_engines/full_search.py,sha256=3SSTvD12g4pNlZCSGh8jwsyYWpQglgqjADnq8dG1zyI,9756
32
- local_deep_research/web_search_engines/search_engine_base.py,sha256=Knmf45pMYd7hYc9x8jG8gOtMnribsXDcOXGrA50LK3E,8100
30
+ local_deep_research/web_search_engines/search_engine_base.py,sha256=QmhfjuHK2deomh8tARghKuYnF-5t3wwBB661odS2VtU,8065
33
31
  local_deep_research/web_search_engines/search_engine_factory.py,sha256=B_QaqoAwnVXCmHNdqGbo94LekWY6wpBw_PWNkI120qE,10728
34
32
  local_deep_research/web_search_engines/search_engines_config.py,sha256=bNCuR09NOk5cjnKIgDQfhPipqmvDKeE7WP_6p8LLZf0,1979
35
33
  local_deep_research/web_search_engines/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
- local_deep_research/web_search_engines/engines/full_search.py,sha256=mcxS8o7-WmOQc3_H4232adhBHevZfSHWmaOFoia68UU,4711
34
+ local_deep_research/web_search_engines/engines/full_search.py,sha256=BuOz8dX-XocazCG7gGBKFnIY99FZtNFI0-Wq3fhsfp4,4689
37
35
  local_deep_research/web_search_engines/engines/meta_search_engine.py,sha256=Zky4sowCortEaIj1pBU0sKuggXr5izkubgrD7cM8IOQ,11485
38
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py,sha256=WlKQSB1kgdEaLtDj4JwpIiP3eZPustJt4ryJ9wiulT0,15316
39
- local_deep_research/web_search_engines/engines/search_engine_brave.py,sha256=zAofutKEnXb_CYAi7hrptNVrQ15PQoYK_Eg3spcD1Ig,9583
40
- local_deep_research/web_search_engines/engines/search_engine_ddg.py,sha256=-_rWr2uwSoxHLkAV-WrHj_vuoSActr_jzfveI7dE10c,4845
36
+ local_deep_research/web_search_engines/engines/search_engine_arxiv.py,sha256=cf8OzhSzE1zqaiOZ6EFQGy_6hTCJMaTysYd8rs1KJNU,15408
37
+ local_deep_research/web_search_engines/engines/search_engine_brave.py,sha256=J242byUGG5ROQ_bh-mU292_t7Q7m20_9O0r1w5z6d9A,9688
38
+ local_deep_research/web_search_engines/engines/search_engine_ddg.py,sha256=qK2i65dbPtr_ppoKPU_YA0mDqM_sDAvN6ZztvdFjsCk,4910
41
39
  local_deep_research/web_search_engines/engines/search_engine_github.py,sha256=qqipsw2ycjlRbR6mmMmxzGU3LEcFDJJJ7Ez7xUgWjRM,26768
42
40
  local_deep_research/web_search_engines/engines/search_engine_google_pse.py,sha256=YkXvBmgcqTImCxuyy6580SGRAvImGc6SzInXZgo1kNE,11294
43
41
  local_deep_research/web_search_engines/engines/search_engine_guardian.py,sha256=MW4WIwtNAwcpdigNXronyezAxr50EIZTV1NMedrAv2o,23912
44
- local_deep_research/web_search_engines/engines/search_engine_local.py,sha256=uAsNtaFV3AH4ltNpudHIl546jsOKKyGDjUwU5J7gKts,36766
42
+ local_deep_research/web_search_engines/engines/search_engine_local.py,sha256=rfmPiA9DVmjbaB3KQtlq7s6BRMgHRgzP7AhktZNDw2M,36772
45
43
  local_deep_research/web_search_engines/engines/search_engine_local_all.py,sha256=CRNcxBzNd9kanyIJYaUDB7qfXYxVCvd4L2mX8jL73v0,5955
46
44
  local_deep_research/web_search_engines/engines/search_engine_pubmed.py,sha256=MayfzM2R0XoI7cpXlG1XJ1ktfTN_6H-Xs9RmD89UAao,39236
47
45
  local_deep_research/web_search_engines/engines/search_engine_searxng.py,sha256=GMy6qDMSaVBtjWRm48XBu6TjLAy1HfcO2EFTwr8S9rk,18048
48
46
  local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py,sha256=6VMymjFJ7pyV2nv5dRfFofXgg0kG82rkwbICVnNDNH4,23352
49
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py,sha256=iy-QmT99Tf2cJlfCrPbEhtMB7a_zCKppvlUKi7VBrlE,9118
47
+ local_deep_research/web_search_engines/engines/search_engine_serpapi.py,sha256=XikEYnM-pAaR70VeAJ28lbqpRzCj4bCA9xY29taTV8g,9215
50
48
  local_deep_research/web_search_engines/engines/search_engine_wayback.py,sha256=astAvSLajDZ6rwgthJ3iBcHSWuDSYPO7uilIxaJhXmU,18132
51
49
  local_deep_research/web_search_engines/engines/search_engine_wikipedia.py,sha256=KSGJECbEcxZpVK-PhYsTCtzedSK0l1AjQmvGtx8KBks,9799
52
- local_deep_research-0.1.16.dist-info/licenses/LICENSE,sha256=Qg2CaTdu6SWnSqk1_JtgBPp_Da-LdqJDhT1Vt1MUc5s,1072
53
- local_deep_research-0.1.16.dist-info/METADATA,sha256=KHrqDTRQmo_FAt1KFZLmYlbO0eQKfjqvpHFu_kRUd_w,15151
54
- local_deep_research-0.1.16.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
55
- local_deep_research-0.1.16.dist-info/entry_points.txt,sha256=u-Y6Z3MWtR3dmsTDFYhXyfkPv7mALUA7YAnY4Fi1XDs,97
56
- local_deep_research-0.1.16.dist-info/top_level.txt,sha256=h6-uVE_wSuLOcoWwT9szhX23mBWufu77MqmM25UfbCY,20
57
- local_deep_research-0.1.16.dist-info/RECORD,,
50
+ local_deep_research-0.1.17.dist-info/licenses/LICENSE,sha256=Qg2CaTdu6SWnSqk1_JtgBPp_Da-LdqJDhT1Vt1MUc5s,1072
51
+ local_deep_research-0.1.17.dist-info/METADATA,sha256=2uPvdSw878o2XCdEYAy4LlWCi_TrvwDz3hQBHq5z4CE,14428
52
+ local_deep_research-0.1.17.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
53
+ local_deep_research-0.1.17.dist-info/entry_points.txt,sha256=u-Y6Z3MWtR3dmsTDFYhXyfkPv7mALUA7YAnY4Fi1XDs,97
54
+ local_deep_research-0.1.17.dist-info/top_level.txt,sha256=h6-uVE_wSuLOcoWwT9szhX23mBWufu77MqmM25UfbCY,20
55
+ local_deep_research-0.1.17.dist-info/RECORD,,
@@ -1,141 +0,0 @@
1
- # local_collections.py
2
- """
3
- Configuration file for local document collections.
4
- Each collection functions as an independent search engine.
5
- """
6
-
7
- import os
8
- from typing import Dict, Any
9
-
10
- # Registry of local document collections
11
- # Each collection appears as a separate search engine in the main configuration
12
- LOCAL_COLLECTIONS = {
13
- # Project Documents Collection
14
- "project_docs": {
15
- "name": "Project Documents",
16
- "description": "Project documentation and specifications",
17
- "paths": [os.path.abspath("./local_search_files/project_documents")],
18
- "enabled": True,
19
- "embedding_model": "all-MiniLM-L6-v2",
20
- "embedding_device": "cpu",
21
- "embedding_model_type": "sentence_transformers",
22
- "max_results": 20,
23
- "max_filtered_results": 5,
24
- "chunk_size": 1000,
25
- "chunk_overlap": 200,
26
- "cache_dir": ".cache/local_search/project_docs"
27
- },
28
-
29
- # Research Papers Collection
30
- "research_papers": {
31
- "name": "Research Papers",
32
- "description": "Academic research papers and articles",
33
- "paths": [os.path.abspath("local_search_files/research_papers")],
34
- "enabled": True,
35
- "embedding_model": "all-MiniLM-L6-v2",
36
- "embedding_device": "cpu",
37
- "embedding_model_type": "sentence_transformers",
38
- "max_results": 20,
39
- "max_filtered_results": 5,
40
- "chunk_size": 800, # Smaller chunks for academic content
41
- "chunk_overlap": 150,
42
- "cache_dir": ".cache/local_search/research_papers"
43
- },
44
-
45
- # Personal Notes Collection
46
- "personal_notes": {
47
- "name": "Personal Notes",
48
- "description": "Personal notes and documents",
49
- "paths": [os.path.abspath("./local_search_files/personal_notes")],
50
- "enabled": True,
51
- "embedding_model": "all-MiniLM-L6-v2",
52
- "embedding_device": "cpu",
53
- "embedding_model_type": "sentence_transformers",
54
- "max_results": 30,
55
- "max_filtered_results": 10,
56
- "chunk_size": 500, # Smaller chunks for notes
57
- "chunk_overlap": 100,
58
- "cache_dir": ".cache/local_search/personal_notes"
59
- }
60
- }
61
-
62
- # Configuration for local search integration
63
- LOCAL_SEARCH_CONFIG = {
64
- # General embedding options
65
- "DEFAULT_EMBEDDING_MODEL": "all-MiniLM-L6-v2",
66
- "DEFAULT_EMBEDDING_DEVICE": "cpu", # "cpu" or "cuda" for GPU acceleration
67
- "DEFAULT_EMBEDDING_MODEL_TYPE": "sentence_transformers", # or "ollama"
68
-
69
- # Ollama settings (only used if model type is "ollama")
70
- # Note: You must run 'ollama pull nomic-embed-text' first if using Ollama for embeddings
71
- "OLLAMA_BASE_URL": "http://localhost:11434",
72
- "OLLAMA_EMBEDDING_MODEL": "nomic-embed-text",
73
-
74
- # Default indexing options
75
- "FORCE_REINDEX": True, # Force reindexing on startup
76
- "CACHE_DIR": ".cache/local_search", # Base directory for cache
77
- }
78
-
79
- def register_local_collections(search_engines_dict: Dict[str, Any]) -> None:
80
- """
81
- Register all enabled local collections as search engines.
82
-
83
- Args:
84
- search_engines_dict: The main search engines dictionary to update
85
- """
86
- for collection_id, collection in LOCAL_COLLECTIONS.items():
87
- print(collection_id, collection)
88
- if collection.get("enabled", True):
89
- # Skip if already defined (don't override)
90
- if collection_id in search_engines_dict:
91
- continue
92
-
93
- # Validate paths exist
94
- paths = collection.get("paths", [])
95
- valid_paths = []
96
- for path in paths:
97
- if os.path.exists(path) and os.path.isdir(path):
98
- valid_paths.append(path)
99
- else:
100
- print(f"Warning: Collection '{collection_id}' contains non-existent folder: {path}")
101
-
102
- # Log warning if no valid paths
103
- if not valid_paths and paths:
104
- print(f"Warning: Collection '{collection_id}' has no valid folders. It will be registered but won't return results.")
105
-
106
- # Create a search engine entry for this collection
107
- search_engines_dict[collection_id] = {
108
- "module_path": "local_deep_research.web_search_engines.engines.search_engine_local",
109
- "class_name": "LocalSearchEngine",
110
- "requires_api_key": False,
111
- "reliability": 0.9, # High reliability for local documents
112
- "strengths": ["personal documents", "offline access",
113
- collection.get("description", "local documents")],
114
- "weaknesses": ["requires indexing", "limited to specific folders"],
115
- "default_params": {
116
- "folder_paths": collection.get("paths", []),
117
- "embedding_model": collection.get(
118
- "embedding_model",
119
- LOCAL_SEARCH_CONFIG["DEFAULT_EMBEDDING_MODEL"]
120
- ),
121
- "embedding_device": collection.get(
122
- "embedding_device",
123
- LOCAL_SEARCH_CONFIG["DEFAULT_EMBEDDING_DEVICE"]
124
- ),
125
- "embedding_model_type": collection.get(
126
- "embedding_model_type",
127
- LOCAL_SEARCH_CONFIG["DEFAULT_EMBEDDING_MODEL_TYPE"]
128
- ),
129
- "chunk_size": collection.get("chunk_size", 1000),
130
- "chunk_overlap": collection.get("chunk_overlap", 200),
131
- "cache_dir": collection.get(
132
- "cache_dir",
133
- f"{LOCAL_SEARCH_CONFIG['CACHE_DIR']}/{collection_id}"
134
- ),
135
- "max_results": collection.get("max_results", 20),
136
- "max_filtered_results": collection.get("max_filtered_results", 5),
137
- "collection_name": collection.get("name", collection_id),
138
- "collection_description": collection.get("description", "")
139
- },
140
- "requires_llm": True
141
- }