local-deep-research 0.1.17__py3-none-any.whl → 0.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/config.py +23 -0
- local_deep_research/defaults/llm_config.py +122 -84
- local_deep_research/defaults/local_collections.toml +10 -4
- local_deep_research/defaults/main.toml +25 -7
- local_deep_research/web/static/css/styles.css +43 -0
- local_deep_research/web/static/sounds/README.md +29 -0
- local_deep_research/web/static/sounds/error.mp3 +0 -0
- local_deep_research/web/static/sounds/success.mp3 +0 -0
- local_deep_research/web_search_engines/engines/meta_search_engine.py +17 -8
- local_deep_research/web_search_engines/engines/search_engine_local.py +61 -21
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +7 -5
- local_deep_research/web_search_engines/search_engine_factory.py +11 -2
- local_deep_research/web_search_engines/search_engines_config.py +24 -7
- {local_deep_research-0.1.17.dist-info → local_deep_research-0.1.19.dist-info}/METADATA +41 -18
- {local_deep_research-0.1.17.dist-info → local_deep_research-0.1.19.dist-info}/RECORD +19 -16
- {local_deep_research-0.1.17.dist-info → local_deep_research-0.1.19.dist-info}/WHEEL +1 -1
- {local_deep_research-0.1.17.dist-info → local_deep_research-0.1.19.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.1.17.dist-info → local_deep_research-0.1.19.dist-info}/licenses/LICENSE +0 -0
- {local_deep_research-0.1.17.dist-info → local_deep_research-0.1.19.dist-info}/top_level.txt +0 -0
local_deep_research/config.py
CHANGED
@@ -145,6 +145,19 @@ def init_config_files():
|
|
145
145
|
if not os.path.exists(search_engines_file) and os.path.exists(default_engines):
|
146
146
|
shutil.copyfile(default_engines, search_engines_file)
|
147
147
|
logger.info(f"Created search_engines.toml at {search_engines_file}")
|
148
|
+
|
149
|
+
# Create .env.template if it doesn't exist
|
150
|
+
env_template_file = CONFIG_DIR / ".env.template"
|
151
|
+
if not env_template_file.exists():
|
152
|
+
shutil.copy(defaults_dir / ".env.template", env_template_file)
|
153
|
+
logger.info(f"Created .env.template at {env_template_file}")
|
154
|
+
|
155
|
+
# Optionally create an empty .env file if it doesn't exist
|
156
|
+
env_file = CONFIG_DIR / ".env"
|
157
|
+
if not env_file.exists():
|
158
|
+
with open(env_file, "w") as f:
|
159
|
+
f.write("# Add your environment variables here\n")
|
160
|
+
logger.info(f"Created empty .env file at {env_file}")
|
148
161
|
except Exception as e:
|
149
162
|
logger.error(f"Error initializing Windows config files: {e}")
|
150
163
|
else:
|
@@ -183,7 +196,17 @@ def init_config_files():
|
|
183
196
|
if not search_engines_file.exists():
|
184
197
|
shutil.copy(defaults_dir / "search_engines.toml", search_engines_file)
|
185
198
|
logger.info(f"Created search_engines.toml at {search_engines_file}")
|
199
|
+
env_template_file = CONFIG_DIR / ".env.template"
|
200
|
+
if not env_template_file.exists():
|
201
|
+
shutil.copy(defaults_dir / ".env.template", env_template_file)
|
202
|
+
logger.info(f"Created .env.template at {env_template_file}")
|
186
203
|
|
204
|
+
# Optionally create an empty .env file if it doesn't exist
|
205
|
+
env_file = CONFIG_DIR / ".env"
|
206
|
+
if not env_file.exists():
|
207
|
+
with open(env_file, "w") as f:
|
208
|
+
f.write("# Add your environment variables here\n")
|
209
|
+
logger.info(f"Created empty .env file at {env_file}")
|
187
210
|
secrets_file = CONFIG_DIR / ".secrets.toml"
|
188
211
|
if not secrets_file.exists():
|
189
212
|
with open(secrets_file, "w") as f:
|
@@ -12,78 +12,50 @@ from langchain_community.llms import VLLM
|
|
12
12
|
from local_deep_research.config import settings
|
13
13
|
import os
|
14
14
|
import logging
|
15
|
-
from enum import Enum, auto
|
16
15
|
|
17
16
|
# Initialize environment
|
18
17
|
logger = logging.getLogger(__name__)
|
19
18
|
|
20
|
-
#
|
21
|
-
|
22
|
-
OLLAMA = auto()
|
23
|
-
OPENAI = auto()
|
24
|
-
ANTHROPIC = auto()
|
25
|
-
VLLM = auto()
|
26
|
-
OPENAI_ENDPOINT = auto()
|
27
|
-
NONE = auto()
|
28
|
-
|
29
|
-
# ================================
|
30
|
-
# USER CONFIGURATION SECTION
|
31
|
-
# ================================
|
32
|
-
|
33
|
-
# Set your preferred model provider here
|
34
|
-
DEFAULT_PROVIDER = ModelProvider.OLLAMA # Change this to your preferred provider
|
35
|
-
|
36
|
-
# Set your default model name here
|
37
|
-
DEFAULT_MODEL = "gemma3:12b" # Your default model
|
38
|
-
|
39
|
-
# Set default model parameters
|
40
|
-
DEFAULT_TEMPERATURE = 0.7
|
41
|
-
MAX_TOKENS = 30000
|
42
|
-
|
43
|
-
# Server URLs
|
44
|
-
OPENAI_ENDPOINT_URL = "https://openrouter.ai/api/v1" # For OpenRouter or compatible services
|
45
|
-
OLLAMA_BASE_URL = "http://localhost:11434" # URL for Ollama server
|
46
|
-
|
47
|
-
|
48
|
-
|
19
|
+
# Valid provider options
|
20
|
+
VALID_PROVIDERS = ["ollama", "openai", "anthropic", "vllm", "openai_endpoint", "lmstudio", "llamacpp", "none"]
|
49
21
|
|
50
22
|
# ================================
|
51
23
|
# LLM FUNCTIONS
|
52
24
|
# ================================
|
53
25
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
26
|
def get_llm(model_name=None, temperature=None, provider=None):
|
59
27
|
"""
|
60
28
|
Get LLM instance based on model name and provider.
|
61
29
|
|
62
30
|
Args:
|
63
|
-
model_name: Name of the model to use (if None, uses
|
64
|
-
temperature: Model temperature (if None, uses
|
65
|
-
provider: Provider to use (if None, uses
|
31
|
+
model_name: Name of the model to use (if None, uses settings.llm.model)
|
32
|
+
temperature: Model temperature (if None, uses settings.llm.temperature)
|
33
|
+
provider: Provider to use (if None, uses settings.llm.provider)
|
66
34
|
|
67
35
|
Returns:
|
68
36
|
A LangChain LLM instance
|
69
37
|
"""
|
38
|
+
# Use settings values for parameters if not provided
|
70
39
|
if model_name is None:
|
71
|
-
model_name =
|
40
|
+
model_name = settings.llm.model
|
72
41
|
|
73
42
|
if temperature is None:
|
74
|
-
temperature =
|
43
|
+
temperature = settings.llm.temperature
|
75
44
|
|
76
45
|
if provider is None:
|
77
|
-
provider =
|
46
|
+
provider = settings.llm.provider.lower()
|
47
|
+
if provider not in VALID_PROVIDERS:
|
48
|
+
logger.error(f"Invalid provider in settings: {provider}")
|
49
|
+
raise ValueError(f"Invalid provider: {provider}. Must be one of: {VALID_PROVIDERS}")
|
78
50
|
|
79
51
|
# Common parameters for all models
|
80
52
|
common_params = {
|
81
53
|
"temperature": temperature,
|
82
|
-
"max_tokens":
|
54
|
+
"max_tokens": settings.llm.max_tokens,
|
83
55
|
}
|
84
56
|
|
85
57
|
# Handle different providers
|
86
|
-
if provider ==
|
58
|
+
if provider == "anthropic":
|
87
59
|
api_key = settings.get('ANTHROPIC_API_KEY', '')
|
88
60
|
if not api_key:
|
89
61
|
logger.warning("ANTHROPIC_API_KEY not found. Falling back to default model.")
|
@@ -93,7 +65,7 @@ def get_llm(model_name=None, temperature=None, provider=None):
|
|
93
65
|
model=model_name, anthropic_api_key=api_key, **common_params
|
94
66
|
)
|
95
67
|
|
96
|
-
elif provider ==
|
68
|
+
elif provider == "openai":
|
97
69
|
api_key = settings.get('OPENAI_API_KEY', '')
|
98
70
|
if not api_key:
|
99
71
|
logger.warning("OPENAI_API_KEY not found. Falling back to default model.")
|
@@ -101,21 +73,24 @@ def get_llm(model_name=None, temperature=None, provider=None):
|
|
101
73
|
|
102
74
|
return ChatOpenAI(model=model_name, api_key=api_key, **common_params)
|
103
75
|
|
104
|
-
elif provider ==
|
105
|
-
api_key = settings.OPENAI_ENDPOINT_API_KEY
|
76
|
+
elif provider == "openai_endpoint":
|
77
|
+
api_key = settings.get('OPENAI_ENDPOINT_API_KEY', '')
|
106
78
|
|
107
79
|
if not api_key:
|
108
80
|
logger.warning("OPENAI_ENDPOINT_API_KEY not found. Falling back to default model.")
|
109
81
|
return get_fallback_model(temperature)
|
110
82
|
|
83
|
+
# Get endpoint URL from settings
|
84
|
+
openai_endpoint_url = settings.llm.openai_endpoint_url
|
85
|
+
|
111
86
|
return ChatOpenAI(
|
112
87
|
model=model_name,
|
113
88
|
api_key=api_key,
|
114
|
-
openai_api_base=
|
89
|
+
openai_api_base=openai_endpoint_url,
|
115
90
|
**common_params
|
116
91
|
)
|
117
92
|
|
118
|
-
elif provider ==
|
93
|
+
elif provider == "vllm":
|
119
94
|
try:
|
120
95
|
return VLLM(
|
121
96
|
model=model_name,
|
@@ -130,19 +105,80 @@ def get_llm(model_name=None, temperature=None, provider=None):
|
|
130
105
|
logger.warning("Falling back.")
|
131
106
|
return get_fallback_model(temperature)
|
132
107
|
|
133
|
-
elif provider ==
|
108
|
+
elif provider == "ollama":
|
134
109
|
try:
|
135
110
|
# Use the configurable Ollama base URL
|
136
|
-
base_url = settings.get('OLLAMA_BASE_URL',
|
111
|
+
base_url = settings.get('OLLAMA_BASE_URL', settings.llm.get('ollama_base_url', 'http://localhost:11434'))
|
137
112
|
return ChatOllama(model=model_name, base_url=base_url, **common_params)
|
138
113
|
except Exception as e:
|
139
114
|
logger.error(f"Error loading Ollama model: {e}")
|
140
115
|
return get_fallback_model(temperature)
|
141
116
|
|
117
|
+
elif provider == "lmstudio":
|
118
|
+
try:
|
119
|
+
# Import LM Studio package
|
120
|
+
import lmstudio
|
121
|
+
from langchain_core.language_models import BaseLLM
|
122
|
+
|
123
|
+
# Get LM Studio URL from settings
|
124
|
+
lmstudio_url = settings.llm.get('lmstudio_url', "http://localhost:1234")
|
125
|
+
|
126
|
+
# Create LM Studio LLM instance
|
127
|
+
model = lmstudio.llm(model_name)
|
128
|
+
|
129
|
+
# Return a LangChain compatible wrapper
|
130
|
+
class LMStudioLLM(BaseLLM):
|
131
|
+
def _call(self, prompt, stop=None, **kwargs):
|
132
|
+
result = model.complete(prompt, temperature=temperature)
|
133
|
+
return result.completion
|
134
|
+
|
135
|
+
@property
|
136
|
+
def _identifying_params(self):
|
137
|
+
return {"model_name": model_name}
|
138
|
+
|
139
|
+
@property
|
140
|
+
def _llm_type(self):
|
141
|
+
return "lmstudio"
|
142
|
+
|
143
|
+
return LMStudioLLM()
|
144
|
+
except ImportError:
|
145
|
+
logger.error("LM Studio package not installed. Run 'pip install lmstudio'")
|
146
|
+
raise ImportError("LM Studio package not installed. Run 'pip install lmstudio'")
|
147
|
+
|
148
|
+
elif provider == "llamacpp":
|
149
|
+
try:
|
150
|
+
# Import LlamaCpp
|
151
|
+
from langchain_community.llms import LlamaCpp
|
152
|
+
|
153
|
+
# Get LlamaCpp model path from settings
|
154
|
+
model_path = settings.llm.get('llamacpp_model_path', "")
|
155
|
+
if not model_path:
|
156
|
+
logger.error("llamacpp_model_path not set in settings")
|
157
|
+
raise ValueError("llamacpp_model_path not set in settings.toml")
|
158
|
+
|
159
|
+
# Get additional LlamaCpp parameters
|
160
|
+
n_gpu_layers = settings.llm.get('llamacpp_n_gpu_layers', 1)
|
161
|
+
n_batch = settings.llm.get('llamacpp_n_batch', 512)
|
162
|
+
f16_kv = settings.llm.get('llamacpp_f16_kv', True)
|
163
|
+
|
164
|
+
# Create LlamaCpp instance
|
165
|
+
return LlamaCpp(
|
166
|
+
model_path=model_path,
|
167
|
+
temperature=temperature,
|
168
|
+
max_tokens=settings.llm.max_tokens,
|
169
|
+
n_gpu_layers=n_gpu_layers,
|
170
|
+
n_batch=n_batch,
|
171
|
+
f16_kv=f16_kv,
|
172
|
+
verbose=True
|
173
|
+
)
|
174
|
+
except ImportError:
|
175
|
+
logger.error("LlamaCpp package not installed. Run 'pip install llama-cpp-python'")
|
176
|
+
raise ImportError("LlamaCpp package not installed. Run 'pip install llama-cpp-python'")
|
177
|
+
|
142
178
|
else:
|
143
179
|
return get_fallback_model(temperature)
|
144
180
|
|
145
|
-
def get_fallback_model(temperature=
|
181
|
+
def get_fallback_model(temperature=None):
|
146
182
|
"""Create a dummy model for when no providers are available"""
|
147
183
|
from langchain_community.llms.fake import FakeListLLM
|
148
184
|
return FakeListLLM(
|
@@ -169,6 +205,12 @@ def get_available_provider_types():
|
|
169
205
|
if is_openai_endpoint_available():
|
170
206
|
providers["openai_endpoint"] = "OpenAI-compatible Endpoint"
|
171
207
|
|
208
|
+
if is_lmstudio_available():
|
209
|
+
providers["lmstudio"] = "LM Studio (local models)"
|
210
|
+
|
211
|
+
if is_llamacpp_available():
|
212
|
+
providers["llamacpp"] = "LlamaCpp (local models)"
|
213
|
+
|
172
214
|
# Check for VLLM capability
|
173
215
|
try:
|
174
216
|
import torch
|
@@ -183,9 +225,6 @@ def get_available_provider_types():
|
|
183
225
|
|
184
226
|
return providers
|
185
227
|
|
186
|
-
|
187
|
-
|
188
|
-
|
189
228
|
# ================================
|
190
229
|
# HELPER FUNCTIONS
|
191
230
|
# ================================
|
@@ -193,7 +232,7 @@ def get_available_provider_types():
|
|
193
232
|
def is_openai_available():
|
194
233
|
"""Check if OpenAI is available"""
|
195
234
|
try:
|
196
|
-
api_key = settings.
|
235
|
+
api_key = settings.get('OPENAI_API_KEY', '')
|
197
236
|
return bool(api_key)
|
198
237
|
except:
|
199
238
|
return False
|
@@ -201,18 +240,15 @@ def is_openai_available():
|
|
201
240
|
def is_anthropic_available():
|
202
241
|
"""Check if Anthropic is available"""
|
203
242
|
try:
|
204
|
-
api_key = settings.
|
243
|
+
api_key = settings.get('ANTHROPIC_API_KEY', '')
|
205
244
|
return bool(api_key)
|
206
245
|
except:
|
207
246
|
return False
|
208
247
|
|
209
|
-
|
210
|
-
|
211
248
|
def is_openai_endpoint_available():
|
212
249
|
"""Check if OpenAI endpoint is available"""
|
213
|
-
|
214
250
|
try:
|
215
|
-
api_key = settings.OPENAI_ENDPOINT_API_KEY
|
251
|
+
api_key = settings.get('OPENAI_ENDPOINT_API_KEY', '')
|
216
252
|
return bool(api_key)
|
217
253
|
except:
|
218
254
|
return False
|
@@ -221,7 +257,7 @@ def is_ollama_available():
|
|
221
257
|
"""Check if Ollama is running"""
|
222
258
|
try:
|
223
259
|
import requests
|
224
|
-
base_url = settings.get('OLLAMA_BASE_URL',
|
260
|
+
base_url = settings.get('OLLAMA_BASE_URL', settings.llm.get('ollama_base_url', 'http://localhost:11434'))
|
225
261
|
response = requests.get(f"{base_url}/api/tags", timeout=1.0)
|
226
262
|
return response.status_code == 200
|
227
263
|
except:
|
@@ -236,34 +272,36 @@ def is_vllm_available():
|
|
236
272
|
except ImportError:
|
237
273
|
return False
|
238
274
|
|
275
|
+
def is_lmstudio_available():
|
276
|
+
"""Check if LM Studio is available"""
|
277
|
+
try:
|
278
|
+
import lmstudio
|
279
|
+
import requests
|
280
|
+
lmstudio_url = settings.llm.get('lmstudio_url', 'http://localhost:1234')
|
281
|
+
# Try to connect to check if running
|
282
|
+
response = requests.get(f"{lmstudio_url}/health", timeout=1.0)
|
283
|
+
return response.status_code == 200
|
284
|
+
except:
|
285
|
+
return False
|
286
|
+
|
287
|
+
def is_llamacpp_available():
|
288
|
+
"""Check if LlamaCpp is available and configured"""
|
289
|
+
try:
|
290
|
+
from langchain_community.llms import LlamaCpp
|
291
|
+
model_path = settings.llm.get('llamacpp_model_path', '')
|
292
|
+
return bool(model_path) and os.path.exists(model_path)
|
293
|
+
except:
|
294
|
+
return False
|
295
|
+
|
239
296
|
def get_available_providers():
|
240
297
|
"""Get dictionary of available providers"""
|
241
|
-
|
242
|
-
|
243
|
-
if is_ollama_available():
|
244
|
-
providers[ModelProvider.OLLAMA] = "Ollama (local models)"
|
245
|
-
|
246
|
-
if is_openai_available():
|
247
|
-
providers[ModelProvider.OPENAI] = "OpenAI API"
|
248
|
-
|
249
|
-
if is_anthropic_available():
|
250
|
-
providers[ModelProvider.ANTHROPIC] = "Anthropic API"
|
251
|
-
|
252
|
-
if is_openai_endpoint_available():
|
253
|
-
providers[ModelProvider.OPENAI_ENDPOINT] = "OpenAI-compatible Endpoint"
|
254
|
-
|
255
|
-
if is_vllm_available():
|
256
|
-
providers[ModelProvider.VLLM] = "VLLM (local models)"
|
257
|
-
|
258
|
-
if not providers:
|
259
|
-
providers[ModelProvider.NONE] = "No model providers available"
|
260
|
-
|
261
|
-
return providers
|
298
|
+
return get_available_provider_types()
|
262
299
|
|
263
300
|
# Log which providers are available
|
264
301
|
AVAILABLE_PROVIDERS = get_available_providers()
|
265
|
-
logger.info(f"Available providers: {
|
302
|
+
logger.info(f"Available providers: {list(AVAILABLE_PROVIDERS.keys())}")
|
266
303
|
|
267
304
|
# Check if selected provider is available
|
268
|
-
|
269
|
-
|
305
|
+
selected_provider = settings.llm.provider.lower()
|
306
|
+
if selected_provider not in AVAILABLE_PROVIDERS and selected_provider != "none":
|
307
|
+
logger.warning(f"Selected provider {selected_provider} is not available.")
|
@@ -1,6 +1,3 @@
|
|
1
|
-
# Default local document collections configuration
|
2
|
-
# Each collection functions as an independent search engine
|
3
|
-
|
4
1
|
# Project Documents Collection
|
5
2
|
[project_docs]
|
6
3
|
name = "Project Documents"
|
@@ -15,6 +12,9 @@ max_filtered_results = 5
|
|
15
12
|
chunk_size = 1000
|
16
13
|
chunk_overlap = 200
|
17
14
|
cache_dir = "__CACHE_DIR__/local_search/project_docs"
|
15
|
+
strengths = ["project documentation", "specifications", "internal documents"]
|
16
|
+
weaknesses = ["no external information", "limited to organizational knowledge"]
|
17
|
+
reliability = 0.9
|
18
18
|
|
19
19
|
# Research Papers Collection
|
20
20
|
[research_papers]
|
@@ -30,6 +30,9 @@ max_filtered_results = 5
|
|
30
30
|
chunk_size = 800
|
31
31
|
chunk_overlap = 150
|
32
32
|
cache_dir = "__CACHE_DIR__/local_search/research_papers"
|
33
|
+
strengths = ["academic research", "scientific papers", "scholarly content"]
|
34
|
+
weaknesses = ["potentially outdated", "limited to collected papers"]
|
35
|
+
reliability = 0.85
|
33
36
|
|
34
37
|
# Personal Notes Collection
|
35
38
|
[personal_notes]
|
@@ -44,4 +47,7 @@ max_results = 30
|
|
44
47
|
max_filtered_results = 10
|
45
48
|
chunk_size = 500
|
46
49
|
chunk_overlap = 100
|
47
|
-
cache_dir = "__CACHE_DIR__/local_search/personal_notes"
|
50
|
+
cache_dir = "__CACHE_DIR__/local_search/personal_notes"
|
51
|
+
strengths = ["personal knowledge", "notes", "private documents"]
|
52
|
+
weaknesses = ["subjective content", "informal information"]
|
53
|
+
reliability = 0.75
|
@@ -1,23 +1,41 @@
|
|
1
1
|
|
2
2
|
# Main configuration for Local Deep Research
|
3
3
|
|
4
|
+
[web]
|
5
|
+
port = 5000
|
6
|
+
host = "0.0.0.0"
|
7
|
+
debug = true
|
8
|
+
|
9
|
+
[llm]
|
10
|
+
# LLM provider (one of: ollama, openai, anthropic, vllm, openai_endpoint, lmstudio, llamacpp)
|
11
|
+
provider = "ollama"
|
12
|
+
# Model name
|
13
|
+
model = "gemma3:12b"
|
14
|
+
# Temperature
|
15
|
+
temperature = 0.7
|
16
|
+
# Maximum tokens
|
17
|
+
max_tokens = 30000
|
18
|
+
# OpenAI-compatible endpoint URL
|
19
|
+
openai_endpoint_url = "https://openrouter.ai/api/v1"
|
20
|
+
# LM Studio URL (default: http://localhost:1234)
|
21
|
+
lmstudio_url = "http://localhost:1234"
|
22
|
+
# LlamaCpp model path
|
23
|
+
llamacpp_model_path = ""
|
24
|
+
# LlamaCpp parameters
|
25
|
+
llamacpp_n_gpu_layers = 1
|
26
|
+
llamacpp_n_batch = 512
|
27
|
+
llamacpp_f16_kv = true
|
28
|
+
|
4
29
|
[general]
|
5
30
|
# Directory for research outputs (relative to user data directory)
|
6
31
|
output_dir = "research_outputs"
|
7
|
-
|
8
32
|
# Knowledge accumulation approach (NONE, QUESTION, or ITERATION)
|
9
33
|
knowledge_accumulation = "ITERATION"
|
10
|
-
|
11
34
|
# Maximum context size for knowledge accumulation
|
12
35
|
knowledge_accumulation_context_limit = 2000000
|
13
|
-
|
14
36
|
# Enable fact checking (experimental, works better with large LLMs)
|
15
37
|
enable_fact_checking = false
|
16
38
|
|
17
|
-
[web]
|
18
|
-
port = 5000
|
19
|
-
host = "0.0.0.0"
|
20
|
-
debug = true
|
21
39
|
|
22
40
|
[search]
|
23
41
|
# Search tool to use (auto, wikipedia, arxiv, duckduckgo, serp, google_pse, etc.)
|
@@ -100,8 +100,18 @@ a:hover {
|
|
100
100
|
transition: all 0.2s;
|
101
101
|
}
|
102
102
|
|
103
|
+
.sidebar-nav li a {
|
104
|
+
color: inherit;
|
105
|
+
text-decoration: none;
|
106
|
+
width: 100%;
|
107
|
+
display: flex;
|
108
|
+
align-items: center;
|
109
|
+
gap: 0.75rem;
|
110
|
+
}
|
111
|
+
|
103
112
|
.sidebar-nav li i {
|
104
113
|
width: 20px;
|
114
|
+
color: inherit;
|
105
115
|
}
|
106
116
|
|
107
117
|
.sidebar-nav li:hover {
|
@@ -115,6 +125,11 @@ a:hover {
|
|
115
125
|
border-left-color: var(--accent-primary);
|
116
126
|
}
|
117
127
|
|
128
|
+
.sidebar-nav li.active i,
|
129
|
+
.sidebar-nav li.active a {
|
130
|
+
color: inherit;
|
131
|
+
}
|
132
|
+
|
118
133
|
.sidebar-footer {
|
119
134
|
padding: 1rem 1.5rem;
|
120
135
|
border-top: 1px solid var(--border-color);
|
@@ -1270,4 +1285,32 @@ textarea:focus, input[type="text"]:focus {
|
|
1270
1285
|
background-color: var(--accent-primary);
|
1271
1286
|
color: white;
|
1272
1287
|
border-color: var(--accent-primary);
|
1288
|
+
}
|
1289
|
+
|
1290
|
+
/* Settings card styles */
|
1291
|
+
.settings-card {
|
1292
|
+
background-color: var(--bg-secondary);
|
1293
|
+
border-radius: 12px;
|
1294
|
+
padding: 1.5rem;
|
1295
|
+
transition: all 0.2s;
|
1296
|
+
border: 1px solid var(--border-color);
|
1297
|
+
}
|
1298
|
+
|
1299
|
+
.settings-card:hover {
|
1300
|
+
background-color: var(--bg-tertiary);
|
1301
|
+
border-color: var(--accent-primary);
|
1302
|
+
transform: translateY(-2px);
|
1303
|
+
box-shadow: var(--card-shadow);
|
1304
|
+
}
|
1305
|
+
|
1306
|
+
.settings-card .btn {
|
1307
|
+
background-color: var(--bg-tertiary);
|
1308
|
+
color: var(--text-primary);
|
1309
|
+
border: 1px solid var(--border-color);
|
1310
|
+
}
|
1311
|
+
|
1312
|
+
.settings-card .btn:hover {
|
1313
|
+
background-color: var(--accent-primary);
|
1314
|
+
color: white;
|
1315
|
+
border-color: var(--accent-primary);
|
1273
1316
|
}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# Sound Files for Notifications
|
2
|
+
|
3
|
+
This directory contains sound files used for notifications in the Deep Research application.
|
4
|
+
|
5
|
+
## Required Files
|
6
|
+
|
7
|
+
1. `success.mp3` - Played when research completes successfully
|
8
|
+
2. `error.mp3` - Played when research fails or encounters an error
|
9
|
+
|
10
|
+
## Sound Sources
|
11
|
+
|
12
|
+
You can download copyright-free sound files from these sources:
|
13
|
+
|
14
|
+
- [Freesound](https://freesound.org/)
|
15
|
+
- [Free Sound Library](https://www.freesoundslibrary.com/)
|
16
|
+
|
17
|
+
## Recommended Sounds
|
18
|
+
|
19
|
+
### Success Sound
|
20
|
+
- [Success Sound by grunz](https://freesound.org/people/grunz/sounds/109662/)
|
21
|
+
- Direct download: https://freesound.org/data/previews/109/109662_945474-lq.mp3
|
22
|
+
|
23
|
+
### Error Sound
|
24
|
+
- [Error Sound by Autistic Lucario](https://freesound.org/people/Autistic%20Lucario/sounds/142608/)
|
25
|
+
- Direct download: https://freesound.org/data/previews/142/142608_1840739-lq.mp3
|
26
|
+
|
27
|
+
## Usage
|
28
|
+
|
29
|
+
The application will automatically use these sounds when research tasks complete or fail, but only when the browser tab is not in focus.
|
Binary file
|
Binary file
|
@@ -91,14 +91,23 @@ class MetaSearchEngine(BaseSearchEngine):
|
|
91
91
|
if not self.available_engines:
|
92
92
|
logger.warning("No search engines available")
|
93
93
|
return []
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
94
|
+
engine_descriptions = []
|
95
|
+
for name in self.available_engines:
|
96
|
+
logger.info(f"Processing search engine: {name}")
|
97
|
+
try:
|
98
|
+
description = f"- {name.upper()}: Good for {', '.join(SEARCH_ENGINES[name]['strengths'][:3])}. " \
|
99
|
+
f"Weaknesses: {', '.join(SEARCH_ENGINES[name]['weaknesses'][:2])}. " \
|
100
|
+
f"Reliability: {SEARCH_ENGINES[name]['reliability']*100:.0f}%"
|
101
|
+
engine_descriptions.append(description)
|
102
|
+
except KeyError as e:
|
103
|
+
logger.error(f"Missing key for engine {name}: {e}")
|
104
|
+
# Add a basic description for engines with missing configuration
|
105
|
+
engine_descriptions.append(f"- {name.upper()}: General purpose search engine.")
|
106
|
+
except Exception as e:
|
107
|
+
logger.error(f"Error processing engine {name}: {e}")
|
108
|
+
engine_descriptions.append(f"- {name.upper()}: General purpose search engine.")
|
109
|
+
|
110
|
+
engine_descriptions = "\n".join(engine_descriptions)
|
102
111
|
|
103
112
|
prompt = f"""Analyze this search query and rank the available search engines in order of most to least appropriate for answering it.
|
104
113
|
|
@@ -10,6 +10,7 @@ import logging
|
|
10
10
|
import re
|
11
11
|
import pickle
|
12
12
|
|
13
|
+
from faiss import normalize_L2
|
13
14
|
from langchain_core.language_models import BaseLLM
|
14
15
|
from langchain_community.document_loaders import (
|
15
16
|
PyPDFLoader,
|
@@ -23,6 +24,7 @@ from langchain_community.document_loaders import (
|
|
23
24
|
from langchain_community.document_loaders.base import BaseLoader
|
24
25
|
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
25
26
|
from langchain_community.vectorstores import FAISS
|
27
|
+
from langchain_community.vectorstores.utils import DistanceStrategy
|
26
28
|
from langchain_community.embeddings import (
|
27
29
|
HuggingFaceEmbeddings,
|
28
30
|
OllamaEmbeddings,
|
@@ -136,7 +138,8 @@ class LocalEmbeddingManager:
|
|
136
138
|
vector_store = FAISS.load_local(
|
137
139
|
str(vector_store_path),
|
138
140
|
self.embeddings,
|
139
|
-
allow_dangerous_deserialization=True
|
141
|
+
allow_dangerous_deserialization=True,
|
142
|
+
normalize_L2=True
|
140
143
|
)
|
141
144
|
|
142
145
|
# Add this code to show document count
|
@@ -175,6 +178,10 @@ class LocalEmbeddingManager:
|
|
175
178
|
|
176
179
|
def _get_folder_hash(self, folder_path: str) -> str:
|
177
180
|
"""Generate a hash for a folder based on its path"""
|
181
|
+
# Strip trailing slashes if we have them.
|
182
|
+
if folder_path.endswith("/"):
|
183
|
+
folder_path = folder_path[:-1]
|
184
|
+
|
178
185
|
return hashlib.md5(folder_path.encode()).hexdigest()
|
179
186
|
|
180
187
|
def _get_index_path(self, folder_path: str) -> Path:
|
@@ -205,6 +212,32 @@ class LocalEmbeddingManager:
|
|
205
212
|
return True
|
206
213
|
|
207
214
|
return False
|
215
|
+
|
216
|
+
def _check_config_changed(self, folder_path: str) -> bool:
|
217
|
+
"""
|
218
|
+
Checks if the embedding configuration for a folder has been changed
|
219
|
+
since it was last indexed.
|
220
|
+
"""
|
221
|
+
folder_hash = self._get_folder_hash(folder_path)
|
222
|
+
|
223
|
+
if folder_hash not in self.indexed_folders:
|
224
|
+
# It hasn't been indexed at all. That's a new configuration,
|
225
|
+
# technically.
|
226
|
+
return True
|
227
|
+
|
228
|
+
embedding_config = self.indexed_folders[folder_hash]
|
229
|
+
chunk_size = embedding_config.get("chunk_size", 0)
|
230
|
+
chunk_overlap = embedding_config.get("chunk_overlap", 0)
|
231
|
+
embedding_model = embedding_config.get("embedding_model", "")
|
232
|
+
|
233
|
+
if (chunk_size, chunk_overlap, embedding_model) != (
|
234
|
+
self.chunk_size, self.chunk_overlap, self.embedding_model
|
235
|
+
):
|
236
|
+
logger.info(
|
237
|
+
"Embedding configuration has changed, re-indexing folder."
|
238
|
+
)
|
239
|
+
return True
|
240
|
+
return False
|
208
241
|
|
209
242
|
def get_file_loader(self, file_path: str) -> Optional[BaseLoader]:
|
210
243
|
"""Get an appropriate document loader for a file based on its extension"""
|
@@ -257,9 +290,10 @@ class LocalEmbeddingManager:
|
|
257
290
|
folder_str = str(folder_path)
|
258
291
|
folder_hash = self._get_folder_hash(folder_str)
|
259
292
|
index_path = self._get_index_path(folder_str)
|
260
|
-
|
293
|
+
|
261
294
|
# Check if folder needs to be reindexed
|
262
|
-
if not force_reindex and not self._check_folder_modified(folder_str)
|
295
|
+
if (not force_reindex and not self._check_folder_modified(folder_str)
|
296
|
+
and not self._check_config_changed(folder_str)):
|
263
297
|
logger.info(f"Folder {folder_path} has not been modified since last indexing")
|
264
298
|
|
265
299
|
# Load the vector store from disk if not already loaded
|
@@ -268,7 +302,8 @@ class LocalEmbeddingManager:
|
|
268
302
|
self.vector_stores[folder_hash] = FAISS.load_local(
|
269
303
|
str(index_path),
|
270
304
|
self.embeddings,
|
271
|
-
allow_dangerous_deserialization=True
|
305
|
+
allow_dangerous_deserialization=True,
|
306
|
+
normalize_L2=True,
|
272
307
|
)
|
273
308
|
logger.info(f"Loaded index for {folder_path} from disk")
|
274
309
|
except Exception as e:
|
@@ -328,7 +363,11 @@ class LocalEmbeddingManager:
|
|
328
363
|
|
329
364
|
# Create vector store
|
330
365
|
logger.info(f"Creating vector store with {len(splits)} chunks")
|
331
|
-
vector_store = FAISS.from_documents(
|
366
|
+
vector_store = FAISS.from_documents(
|
367
|
+
splits,
|
368
|
+
self.embeddings,
|
369
|
+
normalize_L2=True
|
370
|
+
)
|
332
371
|
|
333
372
|
# Save the vector store to disk
|
334
373
|
logger.info(f"Saving index to {index_path}")
|
@@ -421,7 +460,8 @@ class LocalEmbeddingManager:
|
|
421
460
|
self.vector_stores[folder_hash] = FAISS.load_local(
|
422
461
|
str(index_path),
|
423
462
|
self.embeddings,
|
424
|
-
allow_dangerous_deserialization=True
|
463
|
+
allow_dangerous_deserialization=True,
|
464
|
+
nomalize_L2=True
|
425
465
|
)
|
426
466
|
except Exception as e:
|
427
467
|
logger.error(f"Error loading index for {folder_path}: {e}")
|
@@ -431,14 +471,14 @@ class LocalEmbeddingManager:
|
|
431
471
|
vector_store = self.vector_stores[folder_hash]
|
432
472
|
|
433
473
|
try:
|
434
|
-
docs_with_scores =
|
474
|
+
docs_with_scores = (
|
475
|
+
vector_store.similarity_search_with_relevance_scores(
|
476
|
+
query,
|
477
|
+
k=limit
|
478
|
+
)
|
479
|
+
)
|
435
480
|
|
436
|
-
for doc,
|
437
|
-
# Convert score from distance to similarity (lower distance = higher similarity)
|
438
|
-
# FAISS cosine distance is in [0, 2], where 0 is identical and 2 is opposite
|
439
|
-
# Convert to a similarity score in [0, 1]
|
440
|
-
similarity = 1.0 - (score / 2.0)
|
441
|
-
|
481
|
+
for doc, similarity in docs_with_scores:
|
442
482
|
# Skip results below the threshold
|
443
483
|
if similarity < score_threshold:
|
444
484
|
continue
|
@@ -491,7 +531,7 @@ class LocalSearchEngine(BaseSearchEngine):
|
|
491
531
|
|
492
532
|
def __init__(
|
493
533
|
self,
|
494
|
-
|
534
|
+
paths: List[str],
|
495
535
|
llm: Optional[BaseLLM] = None,
|
496
536
|
max_results: int = 10,
|
497
537
|
max_filtered_results: Optional[int] = None,
|
@@ -509,7 +549,7 @@ class LocalSearchEngine(BaseSearchEngine):
|
|
509
549
|
Initialize the local search engine.
|
510
550
|
|
511
551
|
Args:
|
512
|
-
|
552
|
+
paths: List of folder paths to search in
|
513
553
|
llm: Language model for relevance filtering
|
514
554
|
max_results: Maximum number of results to return
|
515
555
|
max_filtered_results: Maximum results after filtering
|
@@ -527,21 +567,21 @@ class LocalSearchEngine(BaseSearchEngine):
|
|
527
567
|
super().__init__(llm=llm, max_filtered_results=max_filtered_results)
|
528
568
|
|
529
569
|
# Validate folder paths
|
530
|
-
self.folder_paths =
|
570
|
+
self.folder_paths = paths
|
531
571
|
self.valid_folder_paths = []
|
532
|
-
for path in
|
572
|
+
for path in paths:
|
533
573
|
if os.path.exists(path) and os.path.isdir(path):
|
534
574
|
self.valid_folder_paths.append(path)
|
535
575
|
else:
|
536
576
|
logger.warning(f"Folder not found or is not a directory: {path}")
|
537
577
|
|
538
578
|
# If no valid folders, log a clear message
|
539
|
-
if not self.valid_folder_paths and
|
540
|
-
logger.warning(f"No valid folders found among: {
|
579
|
+
if not self.valid_folder_paths and paths:
|
580
|
+
logger.warning(f"No valid folders found among: {paths}")
|
541
581
|
logger.warning("This search engine will return no results until valid folders are configured")
|
542
582
|
|
543
583
|
self.max_results = max_results
|
544
|
-
self.collections = collections or {"default": {"paths":
|
584
|
+
self.collections = collections or {"default": {"paths": paths, "description": "Default collection"}}
|
545
585
|
|
546
586
|
# Initialize the embedding manager with only valid folders
|
547
587
|
self.embedding_manager = LocalEmbeddingManager(
|
@@ -885,7 +925,7 @@ class LocalSearchEngine(BaseSearchEngine):
|
|
885
925
|
cache_dir = config_dict.get("cache_dir", ".cache/local_search")
|
886
926
|
|
887
927
|
return cls(
|
888
|
-
|
928
|
+
paths=folder_paths,
|
889
929
|
collections=collections,
|
890
930
|
llm=llm,
|
891
931
|
max_results=max_results,
|
@@ -4,11 +4,13 @@ Search engine that searches across all local collections
|
|
4
4
|
|
5
5
|
import logging
|
6
6
|
from typing import Dict, List, Any, Optional
|
7
|
+
|
8
|
+
import toml
|
7
9
|
from langchain_core.language_models import BaseLLM
|
8
10
|
|
9
11
|
from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
|
10
12
|
from local_deep_research.web_search_engines.search_engine_factory import create_search_engine
|
11
|
-
from local_deep_research import
|
13
|
+
from local_deep_research.config import LOCAL_COLLECTIONS_FILE
|
12
14
|
|
13
15
|
# Setup logging
|
14
16
|
logger = logging.getLogger(__name__)
|
@@ -18,7 +20,7 @@ class LocalAllSearchEngine(BaseSearchEngine):
|
|
18
20
|
Search engine that searches across all local document collections.
|
19
21
|
Acts as a meta search engine specifically for local collections.
|
20
22
|
"""
|
21
|
-
|
23
|
+
|
22
24
|
def __init__(
|
23
25
|
self,
|
24
26
|
llm: Optional[BaseLLM] = None,
|
@@ -41,9 +43,9 @@ class LocalAllSearchEngine(BaseSearchEngine):
|
|
41
43
|
# Find all local collection search engines
|
42
44
|
self.local_engines = {}
|
43
45
|
try:
|
44
|
-
|
45
|
-
|
46
|
-
for collection_id, collection in
|
46
|
+
local_collections = toml.load(LOCAL_COLLECTIONS_FILE)
|
47
|
+
|
48
|
+
for collection_id, collection in local_collections.items():
|
47
49
|
if not collection.get("enabled", True):
|
48
50
|
continue
|
49
51
|
|
@@ -6,6 +6,7 @@ from typing import Dict, List, Any, Optional
|
|
6
6
|
|
7
7
|
from .search_engine_base import BaseSearchEngine
|
8
8
|
from .search_engines_config import SEARCH_ENGINES, DEFAULT_SEARCH_ENGINE
|
9
|
+
from local_deep_research.config import settings
|
9
10
|
|
10
11
|
# Setup logging
|
11
12
|
logging.basicConfig(level=logging.INFO)
|
@@ -42,10 +43,18 @@ def create_search_engine(engine_name: str, llm=None, **kwargs) -> Optional[BaseS
|
|
42
43
|
# Check for API key requirements
|
43
44
|
if engine_config.get("requires_api_key", False):
|
44
45
|
api_key_env = engine_config.get("api_key_env")
|
45
|
-
api_key = os.getenv(api_key_env) if api_key_env else None
|
46
46
|
|
47
|
+
# First check environment variable
|
48
|
+
api_key = os.getenv(api_key_env)
|
49
|
+
|
50
|
+
# If not found in environment, check Dynaconf settings
|
51
|
+
if not api_key and api_key_env:
|
52
|
+
# Convert env var name to settings path (e.g., BRAVE_API_KEY -> brave_api_key)
|
53
|
+
settings_key = api_key_env.lower()
|
54
|
+
api_key = settings.get(settings_key)
|
55
|
+
|
47
56
|
if not api_key:
|
48
|
-
logger.info(f"Required API key for {engine_name} not found in environment variable: {api_key_env}")
|
57
|
+
logger.info(f"Required API key for {engine_name} not found in environment variable: {api_key_env} or settings")
|
49
58
|
return None
|
50
59
|
|
51
60
|
# Check for LLM requirements
|
@@ -6,7 +6,7 @@ import logging
|
|
6
6
|
import os
|
7
7
|
import toml
|
8
8
|
from pathlib import Path
|
9
|
-
from local_deep_research.config import CONFIG_DIR
|
9
|
+
from local_deep_research.config import CONFIG_DIR, LOCAL_COLLECTIONS_FILE
|
10
10
|
|
11
11
|
logger = logging.getLogger(__name__)
|
12
12
|
|
@@ -41,13 +41,30 @@ if 'auto' in SEARCH_ENGINES and 'meta' not in SEARCH_ENGINES:
|
|
41
41
|
SEARCH_ENGINES['meta'] = SEARCH_ENGINES['auto']
|
42
42
|
|
43
43
|
# Register local document collections
|
44
|
-
try:
|
45
|
-
from local_deep_research.local_collections import register_local_collections
|
46
|
-
register_local_collections(SEARCH_ENGINES)
|
47
|
-
logger.info(f"Registered local document collections as search engines")
|
48
|
-
except ImportError:
|
49
|
-
logger.info("No local collections configuration found. Local document search is disabled.")
|
50
44
|
|
45
|
+
if os.path.exists(LOCAL_COLLECTIONS_FILE):
|
46
|
+
try:
|
47
|
+
local_collections_data = toml.load(LOCAL_COLLECTIONS_FILE)
|
48
|
+
|
49
|
+
for collection, config in local_collections_data.items():
|
50
|
+
# Create a new dictionary with required search engine fields
|
51
|
+
engine_config = {
|
52
|
+
"module_path": "local_deep_research.web_search_engines.engines.search_engine_local",
|
53
|
+
"class_name": "LocalSearchEngine",
|
54
|
+
"default_params": config,
|
55
|
+
"requires_llm": True
|
56
|
+
}
|
57
|
+
|
58
|
+
# Copy these specific fields to the top level if they exist
|
59
|
+
for field in ["strengths", "weaknesses", "reliability", "description"]:
|
60
|
+
if field in config:
|
61
|
+
engine_config[field] = config[field]
|
62
|
+
|
63
|
+
SEARCH_ENGINES[collection] = engine_config
|
64
|
+
|
65
|
+
logger.info(f"Registered local document collections as search engines")
|
66
|
+
except Exception as e:
|
67
|
+
logger.error(f"Error loading local collections from TOML file: {e}")
|
51
68
|
# Ensure the meta search engine is still available at the end if it exists
|
52
69
|
if 'auto' in SEARCH_ENGINES:
|
53
70
|
meta_config = SEARCH_ENGINES["auto"]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: local-deep-research
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.19
|
4
4
|
Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
|
5
5
|
Author-email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -71,6 +71,14 @@ Dynamic: license-file
|
|
71
71
|
|
72
72
|
A powerful AI-powered research assistant that performs deep, iterative analysis using multiple LLMs and web searches. The system can be run locally for privacy or configured to use cloud-based LLMs for enhanced capabilities.
|
73
73
|
|
74
|
+
<div align="center">
|
75
|
+
<a href="https://www.youtube.com/watch?v=0ISreg9q0p0">
|
76
|
+
<img src="https://img.youtube.com/vi/0ISreg9q0p0/0.jpg" alt="Local Deep Research">
|
77
|
+
<br>
|
78
|
+
<span>▶️ Watch Video</span>
|
79
|
+
</a>
|
80
|
+
</div>
|
81
|
+
|
74
82
|
## Quick Start
|
75
83
|
|
76
84
|
```bash
|
@@ -101,7 +109,7 @@ Access the web interface at `http://127.0.0.1:5000` in your browser.
|
|
101
109
|
|
102
110
|
- 🔍 **Advanced Research Capabilities**
|
103
111
|
- Automated deep research with intelligent follow-up questions
|
104
|
-
-
|
112
|
+
- Proper inline citation and source verification
|
105
113
|
- Multi-iteration analysis for comprehensive coverage
|
106
114
|
- Full webpage content analysis (not just snippets)
|
107
115
|
|
@@ -112,8 +120,8 @@ Access the web interface at `http://127.0.0.1:5000` in your browser.
|
|
112
120
|
- Configurable model selection based on needs
|
113
121
|
|
114
122
|
- 📊 **Rich Output Options**
|
115
|
-
- Detailed research findings with citations
|
116
|
-
-
|
123
|
+
- Detailed research findings with proper citations
|
124
|
+
- Well-structured comprehensive research reports
|
117
125
|
- Quick summaries for rapid insights
|
118
126
|
- Source tracking and verification
|
119
127
|
|
@@ -128,6 +136,12 @@ Access the web interface at `http://127.0.0.1:5000` in your browser.
|
|
128
136
|
- **Local RAG search for private documents** - search your own documents with vector embeddings
|
129
137
|
- Full webpage content retrieval and intelligent filtering
|
130
138
|
|
139
|
+
- 🎓 **Academic & Scientific Integration**
|
140
|
+
- Direct integration with PubMed, arXiv, Wikipedia, Semantic Scholar
|
141
|
+
- Properly formatted citations from academic sources
|
142
|
+
- Report structure suitable for literature reviews
|
143
|
+
- Cross-disciplinary synthesis of information
|
144
|
+
|
131
145
|
## Configuration System
|
132
146
|
|
133
147
|
The package automatically creates and manages configuration files in your user directory:
|
@@ -137,6 +151,8 @@ The package automatically creates and manages configuration files in your user d
|
|
137
151
|
|
138
152
|
### Default Configuration Files
|
139
153
|
|
154
|
+
If you prefere environment variables please refere to this file: https://github.com/LearningCircuit/local-deep-research/blob/main/docs/env_configuration.md
|
155
|
+
|
140
156
|
When you first run the tool, it creates these configuration files:
|
141
157
|
|
142
158
|
| File | Purpose |
|
@@ -209,6 +225,10 @@ The `OPENAI_ENDPOINT` provider can access any service with an OpenAI-compatible
|
|
209
225
|
|
210
226
|
The system includes multiple search engines. Some require API keys:
|
211
227
|
|
228
|
+
Use .env in config folder if .secrets.toml doesnt work.
|
229
|
+
|
230
|
+
You can also overwrite other settings via environment variables, e.g. to overwrite [web] port setting in settings.toml please use: **LDR_WEB__PORT=8080**
|
231
|
+
|
212
232
|
```toml
|
213
233
|
# Add to .secrets.toml
|
214
234
|
SERP_API_KEY = "your-serpapi-key-here" # For Google results via SerpAPI
|
@@ -261,20 +281,20 @@ You can use local document search in several ways:
|
|
261
281
|
|
262
282
|
## Available Search Engines
|
263
283
|
|
264
|
-
| Engine | Purpose | API Key Required? |
|
265
|
-
|
266
|
-
| `auto` | Intelligently selects the best engine | No |
|
267
|
-
| `wikipedia` | General knowledge and facts | No |
|
268
|
-
| `arxiv` | Scientific papers and research | No |
|
269
|
-
| `pubmed` | Medical and biomedical research | No |
|
270
|
-
| `semantic_scholar` | Academic literature across all fields | No |
|
271
|
-
| `github` | Code repositories and documentation | No (
|
272
|
-
| `brave` | Web search (privacy-focused) | Yes |
|
273
|
-
| `serpapi` | Google search results | Yes |
|
274
|
-
| `google_pse` | Custom Google search | Yes |
|
275
|
-
| `wayback` | Historical web content | No |
|
276
|
-
| `searxng` | Local web search engine | No (requires server) |
|
277
|
-
| Any collection name | Search your local documents | No |
|
284
|
+
| Engine | Purpose | API Key Required? | Rate Limit |
|
285
|
+
|--------|---------|-------------------|------------|
|
286
|
+
| `auto` | Intelligently selects the best engine | No | Based on selected engine |
|
287
|
+
| `wikipedia` | General knowledge and facts | No | No strict limit |
|
288
|
+
| `arxiv` | Scientific papers and research | No | No strict limit |
|
289
|
+
| `pubmed` | Medical and biomedical research | No | No strict limit |
|
290
|
+
| `semantic_scholar` | Academic literature across all fields | No | 100/5min |
|
291
|
+
| `github` | Code repositories and documentation | No | 60/hour (unauthenticated) |
|
292
|
+
| `brave` | Web search (privacy-focused) | Yes | Based on plan |
|
293
|
+
| `serpapi` | Google search results | Yes | Based on plan |
|
294
|
+
| `google_pse` | Custom Google search | Yes | 100/day free tier |
|
295
|
+
| `wayback` | Historical web content | No | No strict limit |
|
296
|
+
| `searxng` | Local web search engine | No (requires local server) | No limit |
|
297
|
+
| Any collection name | Search your local documents | No | No limit |
|
278
298
|
|
279
299
|
> **Support Free Knowledge:** If you frequently use the search engines in this tool, please consider making a donation to these organizations. They provide valuable services and rely on user support to maintain their operations:
|
280
300
|
> - [Donate to Wikipedia](https://donate.wikimedia.org)
|
@@ -318,6 +338,9 @@ The web interface offers several features:
|
|
318
338
|
- **PDF Export**: Download reports
|
319
339
|
- **Research Management**: Terminate processes or delete records
|
320
340
|
|
341
|
+

|
342
|
+

|
343
|
+
|
321
344
|
## Command Line Interface
|
322
345
|
|
323
346
|
The CLI version allows you to:
|
@@ -1,13 +1,13 @@
|
|
1
1
|
local_deep_research/__init__.py,sha256=pfHzjzYc6Szo8VCNLtFZRXyAlEz7CViY7r2fH9O7yms,584
|
2
2
|
local_deep_research/citation_handler.py,sha256=v_fwTy-2XvUuoH3OQRzmBrvaiN7mBk8jbNfySslmt5g,4357
|
3
|
-
local_deep_research/config.py,sha256=
|
3
|
+
local_deep_research/config.py,sha256=3g8-QPMrxoIMjHvyjSJBFUELmAIyOQFHApUnd8p50a8,9881
|
4
4
|
local_deep_research/main.py,sha256=uQXtGQ6LtZNd5Qw63D5ke4Q_LjYimouWVSUknVsk3JQ,3645
|
5
5
|
local_deep_research/report_generator.py,sha256=EvaArnWirMgg42fMzmZeJczoEYujEbJ2ryHHYuuoXx8,8058
|
6
6
|
local_deep_research/search_system.py,sha256=yY3BEzX68vdtUcYF9h6lC3yVao0YA_NSBj6W3-RwlKk,15459
|
7
7
|
local_deep_research/defaults/__init__.py,sha256=2Vvlkl-gmP_qPYWegE4JBgummypogl3VXrQ1XzptFDU,1381
|
8
|
-
local_deep_research/defaults/llm_config.py,sha256=
|
9
|
-
local_deep_research/defaults/local_collections.toml,sha256=
|
10
|
-
local_deep_research/defaults/main.toml,sha256=
|
8
|
+
local_deep_research/defaults/llm_config.py,sha256=JZe2GWpQvjMZQTuOq24RUbOK0Hz6pKkfXr2uBDKTm7A,10551
|
9
|
+
local_deep_research/defaults/local_collections.toml,sha256=zNa03PVnFrZ757JdZOuW6QDxkOc6ep5tG8baGBrMmXM,1778
|
10
|
+
local_deep_research/defaults/main.toml,sha256=6Lzbc5sVLxMwu83bLBp_tpYOZgmtThCfPL1L42eTGro,1939
|
11
11
|
local_deep_research/defaults/search_engines.toml,sha256=TYkOqVaZq9JPawz4fIPyGdkAtYa4t8F9H50VY-wv2ak,8101
|
12
12
|
local_deep_research/utilties/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
13
|
local_deep_research/utilties/enums.py,sha256=TVAZiu9szNbdacfb7whgaQJJlSk7oYByADaAierD4CE,229
|
@@ -16,8 +16,11 @@ local_deep_research/utilties/search_utilities.py,sha256=-wj_-HCotqDUXYcPaKGN1C_t
|
|
16
16
|
local_deep_research/utilties/setup_utils.py,sha256=t6GNp7lK1nLPdPNCkYUk82IATGM62vqy8UBD-KqShOs,215
|
17
17
|
local_deep_research/web/__init__.py,sha256=3oHMatNu8r24FBtpojriIVbHYOVSHj4Q-quycMKOuDk,62
|
18
18
|
local_deep_research/web/app.py,sha256=5_VLNdhJOqdgacucglUdS_lVURNgYNbXhK9vME6JmzA,72431
|
19
|
-
local_deep_research/web/static/css/styles.css,sha256=
|
19
|
+
local_deep_research/web/static/css/styles.css,sha256=_26yBV1fKM51Dfv67CxKSbK8aeoYK5Tl7b2TPs5Whuo,24641
|
20
20
|
local_deep_research/web/static/js/app.js,sha256=GPncdWpw2YNTs56JY-0tjTTr9JnX-fIZSZX0agwKZMU,172813
|
21
|
+
local_deep_research/web/static/sounds/README.md,sha256=yNfVJIpKoSHSdAEj-lpxkjGy8F-OMStXCiIo1fY5I-0,1003
|
22
|
+
local_deep_research/web/static/sounds/error.mp3,sha256=OM3K-pDxkPDCcptqb7c4bIwkHTQa7cLREs4xdYAODPs,3177
|
23
|
+
local_deep_research/web/static/sounds/success.mp3,sha256=8EJRxWER-dt6vG6X6GDK3DNb8zoNa_1eDzusYJVcWLI,11818
|
21
24
|
local_deep_research/web/templates/api_keys_config.html,sha256=jA8Y-nfUGJ1dTvbw2jK_8xPy2x6UG_5gHpbrTJAex2g,3527
|
22
25
|
local_deep_research/web/templates/collections_config.html,sha256=Dci7KumXBON8rAXRX8TVjgqS-bbht7d6aQiedDUnxQ0,3560
|
23
26
|
local_deep_research/web/templates/index.html,sha256=IW4cU5NgXVFXF6BxMhLuFzwkte_iYmLo3DQssxuYLZw,17490
|
@@ -28,28 +31,28 @@ local_deep_research/web/templates/settings.html,sha256=S9A-tdpzMhP2Zw7kp2jxKlwaW
|
|
28
31
|
local_deep_research/web/templates/settings_dashboard.html,sha256=De-v1KNdVvkXme5i3YZ6sIfU9aAKDc_N-AW9n4PZoso,9109
|
29
32
|
local_deep_research/web_search_engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
30
33
|
local_deep_research/web_search_engines/search_engine_base.py,sha256=QmhfjuHK2deomh8tARghKuYnF-5t3wwBB661odS2VtU,8065
|
31
|
-
local_deep_research/web_search_engines/search_engine_factory.py,sha256=
|
32
|
-
local_deep_research/web_search_engines/search_engines_config.py,sha256=
|
34
|
+
local_deep_research/web_search_engines/search_engine_factory.py,sha256=Sld6bYTwcyTxgVLx04t00sD7vfJhSHFOl6iiGJ08ZUE,11118
|
35
|
+
local_deep_research/web_search_engines/search_engines_config.py,sha256=5C0tCmy_Jpv1YHLZLlyS7h5B2XToYcWPAaBDEOsxMo0,2739
|
33
36
|
local_deep_research/web_search_engines/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
37
|
local_deep_research/web_search_engines/engines/full_search.py,sha256=BuOz8dX-XocazCG7gGBKFnIY99FZtNFI0-Wq3fhsfp4,4689
|
35
|
-
local_deep_research/web_search_engines/engines/meta_search_engine.py,sha256=
|
38
|
+
local_deep_research/web_search_engines/engines/meta_search_engine.py,sha256=0zU_L5thHzAmAx-BDsV6QNnSk10CfJ3quCRGjfC_Ys0,12130
|
36
39
|
local_deep_research/web_search_engines/engines/search_engine_arxiv.py,sha256=cf8OzhSzE1zqaiOZ6EFQGy_6hTCJMaTysYd8rs1KJNU,15408
|
37
40
|
local_deep_research/web_search_engines/engines/search_engine_brave.py,sha256=J242byUGG5ROQ_bh-mU292_t7Q7m20_9O0r1w5z6d9A,9688
|
38
41
|
local_deep_research/web_search_engines/engines/search_engine_ddg.py,sha256=qK2i65dbPtr_ppoKPU_YA0mDqM_sDAvN6ZztvdFjsCk,4910
|
39
42
|
local_deep_research/web_search_engines/engines/search_engine_github.py,sha256=qqipsw2ycjlRbR6mmMmxzGU3LEcFDJJJ7Ez7xUgWjRM,26768
|
40
43
|
local_deep_research/web_search_engines/engines/search_engine_google_pse.py,sha256=YkXvBmgcqTImCxuyy6580SGRAvImGc6SzInXZgo1kNE,11294
|
41
44
|
local_deep_research/web_search_engines/engines/search_engine_guardian.py,sha256=MW4WIwtNAwcpdigNXronyezAxr50EIZTV1NMedrAv2o,23912
|
42
|
-
local_deep_research/web_search_engines/engines/search_engine_local.py,sha256=
|
43
|
-
local_deep_research/web_search_engines/engines/search_engine_local_all.py,sha256=
|
45
|
+
local_deep_research/web_search_engines/engines/search_engine_local.py,sha256=qKgiohPL8oyvpT6S6jSmNFuR_vuNVVVqO7O4gwliLqw,37981
|
46
|
+
local_deep_research/web_search_engines/engines/search_engine_local_all.py,sha256=7s7MHuFZTR28bDTxRUj19pzKv7Xzc5SG3yhtGG957eg,5981
|
44
47
|
local_deep_research/web_search_engines/engines/search_engine_pubmed.py,sha256=MayfzM2R0XoI7cpXlG1XJ1ktfTN_6H-Xs9RmD89UAao,39236
|
45
48
|
local_deep_research/web_search_engines/engines/search_engine_searxng.py,sha256=GMy6qDMSaVBtjWRm48XBu6TjLAy1HfcO2EFTwr8S9rk,18048
|
46
49
|
local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py,sha256=6VMymjFJ7pyV2nv5dRfFofXgg0kG82rkwbICVnNDNH4,23352
|
47
50
|
local_deep_research/web_search_engines/engines/search_engine_serpapi.py,sha256=XikEYnM-pAaR70VeAJ28lbqpRzCj4bCA9xY29taTV8g,9215
|
48
51
|
local_deep_research/web_search_engines/engines/search_engine_wayback.py,sha256=astAvSLajDZ6rwgthJ3iBcHSWuDSYPO7uilIxaJhXmU,18132
|
49
52
|
local_deep_research/web_search_engines/engines/search_engine_wikipedia.py,sha256=KSGJECbEcxZpVK-PhYsTCtzedSK0l1AjQmvGtx8KBks,9799
|
50
|
-
local_deep_research-0.1.
|
51
|
-
local_deep_research-0.1.
|
52
|
-
local_deep_research-0.1.
|
53
|
-
local_deep_research-0.1.
|
54
|
-
local_deep_research-0.1.
|
55
|
-
local_deep_research-0.1.
|
53
|
+
local_deep_research-0.1.19.dist-info/licenses/LICENSE,sha256=Qg2CaTdu6SWnSqk1_JtgBPp_Da-LdqJDhT1Vt1MUc5s,1072
|
54
|
+
local_deep_research-0.1.19.dist-info/METADATA,sha256=21FU411naMlwV3BLTyMk-hi2gZbTpTZPbs5eg-Xve0o,15608
|
55
|
+
local_deep_research-0.1.19.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
|
56
|
+
local_deep_research-0.1.19.dist-info/entry_points.txt,sha256=u-Y6Z3MWtR3dmsTDFYhXyfkPv7mALUA7YAnY4Fi1XDs,97
|
57
|
+
local_deep_research-0.1.19.dist-info/top_level.txt,sha256=h6-uVE_wSuLOcoWwT9szhX23mBWufu77MqmM25UfbCY,20
|
58
|
+
local_deep_research-0.1.19.dist-info/RECORD,,
|
{local_deep_research-0.1.17.dist-info → local_deep_research-0.1.19.dist-info}/entry_points.txt
RENAMED
File without changes
|
{local_deep_research-0.1.17.dist-info → local_deep_research-0.1.19.dist-info}/licenses/LICENSE
RENAMED
File without changes
|
File without changes
|