local-deep-research 0.3.12__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. local_deep_research/__version__.py +1 -1
  2. local_deep_research/advanced_search_system/filters/base_filter.py +2 -3
  3. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +4 -5
  4. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +298 -0
  5. local_deep_research/advanced_search_system/findings/repository.py +0 -3
  6. local_deep_research/advanced_search_system/strategies/base_strategy.py +1 -2
  7. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +14 -18
  8. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +4 -8
  9. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +5 -6
  10. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +2 -2
  11. local_deep_research/advanced_search_system/strategies/standard_strategy.py +9 -7
  12. local_deep_research/api/benchmark_functions.py +288 -0
  13. local_deep_research/api/research_functions.py +8 -4
  14. local_deep_research/benchmarks/README.md +162 -0
  15. local_deep_research/benchmarks/__init__.py +51 -0
  16. local_deep_research/benchmarks/benchmark_functions.py +353 -0
  17. local_deep_research/benchmarks/cli/__init__.py +16 -0
  18. local_deep_research/benchmarks/cli/benchmark_commands.py +338 -0
  19. local_deep_research/benchmarks/cli.py +347 -0
  20. local_deep_research/benchmarks/comparison/__init__.py +12 -0
  21. local_deep_research/benchmarks/comparison/evaluator.py +768 -0
  22. local_deep_research/benchmarks/datasets/__init__.py +53 -0
  23. local_deep_research/benchmarks/datasets/base.py +295 -0
  24. local_deep_research/benchmarks/datasets/browsecomp.py +116 -0
  25. local_deep_research/benchmarks/datasets/custom_dataset_template.py +98 -0
  26. local_deep_research/benchmarks/datasets/simpleqa.py +74 -0
  27. local_deep_research/benchmarks/datasets/utils.py +116 -0
  28. local_deep_research/benchmarks/datasets.py +31 -0
  29. local_deep_research/benchmarks/efficiency/__init__.py +14 -0
  30. local_deep_research/benchmarks/efficiency/resource_monitor.py +367 -0
  31. local_deep_research/benchmarks/efficiency/speed_profiler.py +214 -0
  32. local_deep_research/benchmarks/evaluators/__init__.py +18 -0
  33. local_deep_research/benchmarks/evaluators/base.py +74 -0
  34. local_deep_research/benchmarks/evaluators/browsecomp.py +83 -0
  35. local_deep_research/benchmarks/evaluators/composite.py +121 -0
  36. local_deep_research/benchmarks/evaluators/simpleqa.py +271 -0
  37. local_deep_research/benchmarks/graders.py +410 -0
  38. local_deep_research/benchmarks/metrics/README.md +80 -0
  39. local_deep_research/benchmarks/metrics/__init__.py +24 -0
  40. local_deep_research/benchmarks/metrics/calculation.py +385 -0
  41. local_deep_research/benchmarks/metrics/reporting.py +155 -0
  42. local_deep_research/benchmarks/metrics/visualization.py +205 -0
  43. local_deep_research/benchmarks/metrics.py +11 -0
  44. local_deep_research/benchmarks/optimization/__init__.py +32 -0
  45. local_deep_research/benchmarks/optimization/api.py +274 -0
  46. local_deep_research/benchmarks/optimization/metrics.py +20 -0
  47. local_deep_research/benchmarks/optimization/optuna_optimizer.py +1163 -0
  48. local_deep_research/benchmarks/runners.py +434 -0
  49. local_deep_research/benchmarks/templates.py +65 -0
  50. local_deep_research/config/llm_config.py +26 -23
  51. local_deep_research/config/search_config.py +1 -5
  52. local_deep_research/defaults/default_settings.json +108 -7
  53. local_deep_research/search_system.py +16 -8
  54. local_deep_research/utilities/db_utils.py +3 -6
  55. local_deep_research/utilities/es_utils.py +441 -0
  56. local_deep_research/utilities/log_utils.py +36 -0
  57. local_deep_research/utilities/search_utilities.py +8 -9
  58. local_deep_research/web/app.py +7 -9
  59. local_deep_research/web/app_factory.py +9 -12
  60. local_deep_research/web/database/migrations.py +8 -5
  61. local_deep_research/web/database/models.py +20 -0
  62. local_deep_research/web/database/schema_upgrade.py +5 -8
  63. local_deep_research/web/models/database.py +15 -18
  64. local_deep_research/web/routes/benchmark_routes.py +427 -0
  65. local_deep_research/web/routes/research_routes.py +13 -17
  66. local_deep_research/web/routes/settings_routes.py +264 -67
  67. local_deep_research/web/services/research_service.py +47 -57
  68. local_deep_research/web/services/settings_manager.py +1 -4
  69. local_deep_research/web/services/settings_service.py +4 -6
  70. local_deep_research/web/static/css/styles.css +12 -0
  71. local_deep_research/web/static/js/components/logpanel.js +164 -155
  72. local_deep_research/web/static/js/components/research.js +44 -3
  73. local_deep_research/web/static/js/components/settings.js +27 -0
  74. local_deep_research/web/static/js/services/socket.js +47 -0
  75. local_deep_research/web_search_engines/default_search_engines.py +38 -0
  76. local_deep_research/web_search_engines/engines/meta_search_engine.py +100 -33
  77. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +31 -17
  78. local_deep_research/web_search_engines/engines/search_engine_brave.py +8 -3
  79. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +343 -0
  80. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +14 -6
  81. local_deep_research/web_search_engines/engines/search_engine_local.py +19 -23
  82. local_deep_research/web_search_engines/engines/search_engine_local_all.py +9 -12
  83. local_deep_research/web_search_engines/engines/search_engine_searxng.py +12 -17
  84. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +8 -4
  85. local_deep_research/web_search_engines/search_engine_base.py +22 -5
  86. local_deep_research/web_search_engines/search_engine_factory.py +32 -11
  87. local_deep_research/web_search_engines/search_engines_config.py +14 -1
  88. {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/METADATA +10 -2
  89. {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/RECORD +92 -49
  90. {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/WHEEL +0 -0
  91. {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/entry_points.txt +0 -0
  92. {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,343 @@
1
+ import json
2
+ import logging
3
+ from typing import Any, Dict, List, Optional
4
+
5
+ from elasticsearch import Elasticsearch
6
+ from langchain_core.language_models import BaseLLM
7
+
8
+ from ...config import search_config
9
+ from ..search_engine_base import BaseSearchEngine
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class ElasticsearchSearchEngine(BaseSearchEngine):
15
+ """Elasticsearch search engine implementation with two-phase approach"""
16
+
17
+ def __init__(
18
+ self,
19
+ hosts: List[str] = ["http://localhost:9200"],
20
+ index_name: str = "documents",
21
+ username: Optional[str] = None,
22
+ password: Optional[str] = None,
23
+ api_key: Optional[str] = None,
24
+ cloud_id: Optional[str] = None,
25
+ max_results: int = 10,
26
+ highlight_fields: List[str] = ["content", "title"],
27
+ search_fields: List[str] = ["content", "title"],
28
+ filter_query: Optional[Dict[str, Any]] = None,
29
+ llm: Optional[BaseLLM] = None,
30
+ max_filtered_results: Optional[int] = None,
31
+ ):
32
+ """
33
+ Initialize the Elasticsearch search engine.
34
+
35
+ Args:
36
+ hosts: List of Elasticsearch hosts
37
+ index_name: Name of the index to search
38
+ username: Optional username for authentication
39
+ password: Optional password for authentication
40
+ api_key: Optional API key for authentication
41
+ cloud_id: Optional Elastic Cloud ID
42
+ max_results: Maximum number of search results
43
+ highlight_fields: Fields to highlight in search results
44
+ search_fields: Fields to search in
45
+ filter_query: Optional filter query in Elasticsearch DSL format
46
+ llm: Language model for relevance filtering
47
+ max_filtered_results: Maximum number of results to keep after filtering
48
+ """
49
+ # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
50
+ super().__init__(
51
+ llm=llm, max_filtered_results=max_filtered_results, max_results=max_results
52
+ )
53
+
54
+ self.index_name = index_name
55
+ self.highlight_fields = highlight_fields
56
+ self.search_fields = search_fields
57
+ self.filter_query = filter_query or {}
58
+
59
+ # Initialize the Elasticsearch client
60
+ es_args = {}
61
+
62
+ # Basic authentication
63
+ if username and password:
64
+ es_args["basic_auth"] = (username, password)
65
+
66
+ # API key authentication
67
+ if api_key:
68
+ es_args["api_key"] = api_key
69
+
70
+ # Cloud ID for Elastic Cloud
71
+ if cloud_id:
72
+ es_args["cloud_id"] = cloud_id
73
+
74
+ # Connect to Elasticsearch
75
+ self.client = Elasticsearch(hosts, **es_args)
76
+
77
+ # Verify connection
78
+ try:
79
+ info = self.client.info()
80
+ logger.info(f"Connected to Elasticsearch cluster: {info.get('cluster_name')}")
81
+ logger.info(f"Elasticsearch version: {info.get('version', {}).get('number')}")
82
+ except Exception as e:
83
+ logger.error(f"Failed to connect to Elasticsearch: {str(e)}")
84
+ raise ConnectionError(f"Could not connect to Elasticsearch: {str(e)}")
85
+
86
+ def _get_previews(self, query: str) -> List[Dict[str, Any]]:
87
+ """
88
+ Get preview information for Elasticsearch documents.
89
+
90
+ Args:
91
+ query: The search query
92
+
93
+ Returns:
94
+ List of preview dictionaries
95
+ """
96
+ logger.info(f"Getting document previews from Elasticsearch with query: {query}")
97
+
98
+ try:
99
+ # Build the search query
100
+ search_query = {
101
+ "query": {
102
+ "multi_match": {
103
+ "query": query,
104
+ "fields": self.search_fields,
105
+ "type": "best_fields",
106
+ "tie_breaker": 0.3,
107
+ }
108
+ },
109
+ "highlight": {
110
+ "fields": {field: {} for field in self.highlight_fields},
111
+ "pre_tags": ["<em>"],
112
+ "post_tags": ["</em>"],
113
+ },
114
+ "size": self.max_results,
115
+ }
116
+
117
+ # Add filter if provided
118
+ if self.filter_query:
119
+ search_query["query"] = {
120
+ "bool": {
121
+ "must": search_query["query"],
122
+ "filter": self.filter_query
123
+ }
124
+ }
125
+
126
+ # Execute the search
127
+ response = self.client.search(
128
+ index=self.index_name,
129
+ body=search_query,
130
+ )
131
+
132
+ # Process the search results
133
+ hits = response.get("hits", {}).get("hits", [])
134
+
135
+ # Format results as previews with basic information
136
+ previews = []
137
+ for hit in hits:
138
+ source = hit.get("_source", {})
139
+ highlight = hit.get("highlight", {})
140
+
141
+ # Extract highlighted snippets or fall back to original content
142
+ snippet = ""
143
+ for field in self.highlight_fields:
144
+ if field in highlight and highlight[field]:
145
+ # Join all highlights for this field
146
+ field_snippets = " ... ".join(highlight[field])
147
+ snippet += field_snippets + " "
148
+
149
+ # If no highlights, use a portion of the content
150
+ if not snippet and "content" in source:
151
+ content = source.get("content", "")
152
+ snippet = content[:250] + "..." if len(content) > 250 else content
153
+
154
+ # Create preview object
155
+ preview = {
156
+ "id": hit.get("_id", ""),
157
+ "title": source.get("title", "Untitled Document"),
158
+ "link": source.get("url", "") or f"elasticsearch://{self.index_name}/{hit.get('_id', '')}",
159
+ "snippet": snippet.strip(),
160
+ "score": hit.get("_score", 0),
161
+ "_index": hit.get("_index", self.index_name),
162
+ }
163
+
164
+ previews.append(preview)
165
+
166
+ logger.info(f"Found {len(previews)} preview results from Elasticsearch")
167
+ return previews
168
+
169
+ except Exception as e:
170
+ logger.error(f"Error getting Elasticsearch previews: {str(e)}")
171
+ return []
172
+
173
+ def _get_full_content(
174
+ self, relevant_items: List[Dict[str, Any]]
175
+ ) -> List[Dict[str, Any]]:
176
+ """
177
+ Get full content for the relevant Elasticsearch documents.
178
+
179
+ Args:
180
+ relevant_items: List of relevant preview dictionaries
181
+
182
+ Returns:
183
+ List of result dictionaries with full content
184
+ """
185
+ # Check if we should get full content
186
+ if (
187
+ hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
188
+ and search_config.SEARCH_SNIPPETS_ONLY
189
+ ):
190
+ logger.info("Snippet-only mode, skipping full content retrieval")
191
+ return relevant_items
192
+
193
+ logger.info("Getting full content for relevant Elasticsearch documents")
194
+
195
+ results = []
196
+ for item in relevant_items:
197
+ # Start with the preview data
198
+ result = item.copy()
199
+
200
+ # Get the document ID
201
+ doc_id = item.get("id")
202
+ if not doc_id:
203
+ # Skip items without ID
204
+ logger.warning(f"Skipping item without ID: {item}")
205
+ results.append(result)
206
+ continue
207
+
208
+ try:
209
+ # Fetch the full document
210
+ doc_response = self.client.get(
211
+ index=self.index_name,
212
+ id=doc_id,
213
+ )
214
+
215
+ # Get the source document
216
+ source = doc_response.get("_source", {})
217
+
218
+ # Add full content to the result
219
+ result["content"] = source.get("content", result.get("snippet", ""))
220
+ result["full_content"] = source.get("content", "")
221
+
222
+ # Add metadata from source
223
+ for key, value in source.items():
224
+ if key not in result and key not in ["content"]:
225
+ result[key] = value
226
+
227
+ except Exception as e:
228
+ logger.error(f"Error fetching full content for document {doc_id}: {str(e)}")
229
+ # Keep the preview data if we can't get the full content
230
+
231
+ results.append(result)
232
+
233
+ return results
234
+
235
+ def search_by_query_string(self, query_string: str) -> List[Dict[str, Any]]:
236
+ """
237
+ Perform a search using Elasticsearch Query String syntax.
238
+
239
+ Args:
240
+ query_string: The query in Elasticsearch Query String syntax
241
+
242
+ Returns:
243
+ List of search results
244
+ """
245
+ try:
246
+ # Build the search query
247
+ search_query = {
248
+ "query": {
249
+ "query_string": {
250
+ "query": query_string,
251
+ "fields": self.search_fields,
252
+ }
253
+ },
254
+ "highlight": {
255
+ "fields": {field: {} for field in self.highlight_fields},
256
+ "pre_tags": ["<em>"],
257
+ "post_tags": ["</em>"],
258
+ },
259
+ "size": self.max_results,
260
+ }
261
+
262
+ # Execute the search
263
+ response = self.client.search(
264
+ index=self.index_name,
265
+ body=search_query,
266
+ )
267
+
268
+ # Process and return the results
269
+ previews = self._process_es_response(response)
270
+ return self._get_full_content(previews)
271
+
272
+ except Exception as e:
273
+ logger.error(f"Error in query_string search: {str(e)}")
274
+ return []
275
+
276
+ def search_by_dsl(self, query_dsl: Dict[str, Any]) -> List[Dict[str, Any]]:
277
+ """
278
+ Perform a search using Elasticsearch DSL (Query Domain Specific Language).
279
+
280
+ Args:
281
+ query_dsl: The query in Elasticsearch DSL format
282
+
283
+ Returns:
284
+ List of search results
285
+ """
286
+ try:
287
+ # Execute the search with the provided DSL
288
+ response = self.client.search(
289
+ index=self.index_name,
290
+ body=query_dsl,
291
+ )
292
+
293
+ # Process and return the results
294
+ previews = self._process_es_response(response)
295
+ return self._get_full_content(previews)
296
+
297
+ except Exception as e:
298
+ logger.error(f"Error in DSL search: {str(e)}")
299
+ return []
300
+
301
+ def _process_es_response(self, response: Dict[str, Any]) -> List[Dict[str, Any]]:
302
+ """
303
+ Process Elasticsearch response into preview dictionaries.
304
+
305
+ Args:
306
+ response: Elasticsearch response dictionary
307
+
308
+ Returns:
309
+ List of preview dictionaries
310
+ """
311
+ hits = response.get("hits", {}).get("hits", [])
312
+
313
+ # Format results as previews
314
+ previews = []
315
+ for hit in hits:
316
+ source = hit.get("_source", {})
317
+ highlight = hit.get("highlight", {})
318
+
319
+ # Extract highlighted snippets or fall back to original content
320
+ snippet = ""
321
+ for field in self.highlight_fields:
322
+ if field in highlight and highlight[field]:
323
+ field_snippets = " ... ".join(highlight[field])
324
+ snippet += field_snippets + " "
325
+
326
+ # If no highlights, use a portion of the content
327
+ if not snippet and "content" in source:
328
+ content = source.get("content", "")
329
+ snippet = content[:250] + "..." if len(content) > 250 else content
330
+
331
+ # Create preview object
332
+ preview = {
333
+ "id": hit.get("_id", ""),
334
+ "title": source.get("title", "Untitled Document"),
335
+ "link": source.get("url", "") or f"elasticsearch://{self.index_name}/{hit.get('_id', '')}",
336
+ "snippet": snippet.strip(),
337
+ "score": hit.get("_score", 0),
338
+ "_index": hit.get("_index", self.index_name),
339
+ }
340
+
341
+ previews.append(preview)
342
+
343
+ return previews
@@ -1,5 +1,4 @@
1
1
  import logging
2
- import os
3
2
  import random
4
3
  import time
5
4
  from typing import Any, Dict, List, Optional
@@ -88,17 +87,26 @@ class GooglePSESearchEngine(BaseSearchEngine):
88
87
  # Region/Country setting
89
88
  self.region = region
90
89
 
91
- # API key and Search Engine ID
92
- self.api_key = api_key or os.getenv("GOOGLE_PSE_API_KEY")
93
- self.search_engine_id = search_engine_id or os.getenv("GOOGLE_PSE_ENGINE_ID")
90
+ # API key and Search Engine ID - check params, env vars, or database
91
+ from ...utilities.db_utils import get_db_setting
92
+
93
+ self.api_key = api_key
94
+ if not self.api_key:
95
+ self.api_key = get_db_setting("search.engine.web.google_pse.api_key")
96
+
97
+ self.search_engine_id = search_engine_id
98
+ if not self.search_engine_id:
99
+ self.search_engine_id = get_db_setting(
100
+ "search.engine.web.google_pse.engine_id"
101
+ )
94
102
 
95
103
  if not self.api_key:
96
104
  raise ValueError(
97
- "Google API key is required. Set it in the GOOGLE_PSE_API_KEY environment variable."
105
+ "Google API key is required. Set it in the UI settings, use the api_key parameter, or set the GOOGLE_PSE_API_KEY environment variable."
98
106
  )
99
107
  if not self.search_engine_id:
100
108
  raise ValueError(
101
- "Google Search Engine ID is required. Set it in the GOOGLE_PSE_ENGINE_ID environment variable."
109
+ "Google Search Engine ID is required. Set it in the UI settings, use the search_engine_id parameter, or set the GOOGLE_PSE_ENGINE_ID environment variable."
102
110
  )
103
111
 
104
112
  # Validate connection and credentials
@@ -1,6 +1,5 @@
1
1
  import hashlib
2
2
  import json
3
- import logging
4
3
  import os
5
4
  import time
6
5
  import uuid
@@ -29,16 +28,13 @@ from langchain_community.vectorstores import FAISS
29
28
  from langchain_core.documents import Document
30
29
  from langchain_core.language_models import BaseLLM
31
30
  from langchain_text_splitters import RecursiveCharacterTextSplitter
31
+ from loguru import logger
32
32
 
33
33
  from ...config import search_config
34
34
  from ...utilities.db_utils import get_db_setting
35
35
  from ...utilities.url_utils import normalize_url
36
36
  from ..search_engine_base import BaseSearchEngine
37
37
 
38
- # Setup logging
39
- logging.basicConfig(level=logging.INFO)
40
- logger = logging.getLogger(__name__)
41
-
42
38
 
43
39
  def _get_file_loader(file_path: str) -> Optional[BaseLoader]:
44
40
  """Get an appropriate document loader for a file based on its extension"""
@@ -62,8 +58,8 @@ def _get_file_loader(file_path: str) -> Optional[BaseLoader]:
62
58
  # Try the text loader as a fallback for unknown extensions
63
59
  logger.warning(f"Unknown file extension for {file_path}, trying TextLoader")
64
60
  return TextLoader(str(file_path), encoding="utf-8")
65
- except Exception as e:
66
- logger.error(f"Error creating loader for {file_path}: {e}")
61
+ except Exception:
62
+ logger.exception(f"Error creating loader for {file_path}")
67
63
  return None
68
64
 
69
65
 
@@ -94,8 +90,8 @@ def _load_document(file_path: Path) -> List[Document]:
94
90
  doc.metadata["source"] = str(file_path)
95
91
  doc.metadata["filename"] = file_path.name
96
92
 
97
- except Exception as e:
98
- logger.error(f"Error loading {file_path}: {e}")
93
+ except Exception:
94
+ logger.exception(f"Error loading {file_path}")
99
95
  return []
100
96
 
101
97
  return docs
@@ -197,8 +193,8 @@ class LocalEmbeddingManager:
197
193
  model_name=self.embedding_model,
198
194
  model_kwargs={"device": self.embedding_device},
199
195
  )
200
- except Exception as e:
201
- logger.error(f"Error initializing embeddings: {e}")
196
+ except Exception:
197
+ logger.exception("Error initializing embeddings")
202
198
  logger.warning(
203
199
  "Falling back to HuggingFaceEmbeddings with all-MiniLM-L6-v2"
204
200
  )
@@ -226,8 +222,8 @@ class LocalEmbeddingManager:
226
222
  logger.info(f"Loaded index with {doc_count} document chunks")
227
223
 
228
224
  return vector_store
229
- except Exception as e:
230
- logger.error(f"Error loading vector store: {e}")
225
+ except Exception:
226
+ logger.exception("Error loading vector store")
231
227
  logger.info("Will create a new vector store")
232
228
 
233
229
  # Create a new vector store
@@ -241,8 +237,8 @@ class LocalEmbeddingManager:
241
237
  try:
242
238
  with open(index_metadata_path, "r") as f:
243
239
  return json.load(f)
244
- except Exception as e:
245
- logger.error(f"Error loading index metadata: {e}")
240
+ except Exception:
241
+ logger.exception("Error loading index metadata")
246
242
 
247
243
  return {}
248
244
 
@@ -253,8 +249,8 @@ class LocalEmbeddingManager:
253
249
  try:
254
250
  with open(index_metadata_path, "w") as f:
255
251
  json.dump(self.indexed_folders, f, indent=2)
256
- except Exception as e:
257
- logger.error(f"Error saving index metadata: {e}")
252
+ except Exception:
253
+ logger.exception("Error saving index metadata")
258
254
 
259
255
  @staticmethod
260
256
  def get_folder_hash(folder_path: Path) -> str:
@@ -397,8 +393,8 @@ class LocalEmbeddingManager:
397
393
  normalize_L2=True,
398
394
  )
399
395
  logger.info(f"Loaded index for {folder_path} from disk")
400
- except Exception as e:
401
- logger.error(f"Error loading index for {folder_path}: {e}")
396
+ except Exception:
397
+ logger.exception(f"Error loading index for {folder_path}")
402
398
  # If loading fails, force reindexing
403
399
  force_reindex = True
404
400
 
@@ -574,8 +570,8 @@ class LocalEmbeddingManager:
574
570
  allow_dangerous_deserialization=True,
575
571
  normalize_L2=True,
576
572
  )
577
- except Exception as e:
578
- logger.error(f"Error loading index for {folder_path}: {e}")
573
+ except Exception:
574
+ logger.exception(f"Error loading index for {folder_path}")
579
575
  continue
580
576
 
581
577
  # Search in this folder
@@ -599,8 +595,8 @@ class LocalEmbeddingManager:
599
595
  }
600
596
 
601
597
  all_results.append(result)
602
- except Exception as e:
603
- logger.error(f"Error searching in {folder_path}: {e}")
598
+ except Exception:
599
+ logger.exception(f"Error searching in {folder_path}")
604
600
 
605
601
  # Sort by similarity (highest first)
606
602
  all_results.sort(key=lambda x: x["similarity"], reverse=True)
@@ -2,19 +2,16 @@
2
2
  Search engine that searches across all local collections
3
3
  """
4
4
 
5
- import logging
6
5
  from typing import Any, Dict, List, Optional, cast
7
6
 
8
7
  from langchain_core.language_models import BaseLLM
8
+ from loguru import logger
9
9
 
10
10
  from ..search_engine_base import BaseSearchEngine
11
11
  from ..search_engine_factory import create_search_engine
12
12
  from ..search_engines_config import local_search_engines
13
13
  from .search_engine_local import LocalSearchEngine
14
14
 
15
- # Setup logging
16
- logger = logging.getLogger(__name__)
17
-
18
15
 
19
16
  class LocalAllSearchEngine(BaseSearchEngine):
20
17
  """
@@ -62,9 +59,9 @@ class LocalAllSearchEngine(BaseSearchEngine):
62
59
  "name": engine.name,
63
60
  "description": engine.description,
64
61
  }
65
- except Exception as e:
66
- logger.error(
67
- f"Error creating search engine for collection '{collection_id}': {e}"
62
+ except Exception:
63
+ logger.exception(
64
+ f"Error creating search engine for collection '{collection_id}'"
68
65
  )
69
66
  except ImportError:
70
67
  logger.warning("No local collections configuration found")
@@ -97,8 +94,8 @@ class LocalAllSearchEngine(BaseSearchEngine):
97
94
  preview["collection_description"] = engine_info["description"]
98
95
 
99
96
  all_previews.extend(previews)
100
- except Exception as e:
101
- logger.error(f"Error searching collection '{collection_id}': {e}")
97
+ except Exception:
98
+ logger.exception(f"Error searching collection '{collection_id}'")
102
99
 
103
100
  if not all_previews:
104
101
  logger.info(f"No local documents found for query: {query}")
@@ -139,9 +136,9 @@ class LocalAllSearchEngine(BaseSearchEngine):
139
136
  try:
140
137
  results = engine._get_full_content(items)
141
138
  all_results.extend(results)
142
- except Exception as e:
143
- logger.error(
144
- f"Error getting full content from collection '{collection_id}': {e}"
139
+ except Exception:
140
+ logger.exception(
141
+ f"Error getting full content from collection '{collection_id}'"
145
142
  )
146
143
  # Fall back to returning the items without full content
147
144
  all_results.extend(items)
@@ -1,20 +1,16 @@
1
1
  import enum
2
- import logging
3
2
  import os
4
3
  import time
5
4
  from typing import Any, Dict, List, Optional
6
5
 
7
6
  import requests
8
7
  from langchain_core.language_models import BaseLLM
8
+ from loguru import logger
9
9
 
10
10
  from ...config import search_config
11
11
  from ..search_engine_base import BaseSearchEngine
12
12
  from .full_search import FullSearchResults
13
13
 
14
- # Setup logging
15
- logging.basicConfig(level=logging.INFO)
16
- logger = logging.getLogger(__name__)
17
-
18
14
 
19
15
  @enum.unique
20
16
  class SafeSearchSetting(enum.IntEnum):
@@ -70,9 +66,8 @@ class SearXNGSearchEngine(BaseSearchEngine):
70
66
  llm=llm, max_filtered_results=max_filtered_results, max_results=max_results
71
67
  )
72
68
 
73
- self.instance_url = instance_url
74
69
  # Validate and normalize the instance URL if provided
75
- self.instance_url = self.instance_url.rstrip("/")
70
+ self.instance_url = instance_url.rstrip("/")
76
71
  logger.info(f"SearXNG initialized with instance URL: {self.instance_url}")
77
72
  try:
78
73
  # Make sure it's accessible.
@@ -182,8 +177,8 @@ class SearXNGSearchEngine(BaseSearchEngine):
182
177
  self.instance_url, headers=initial_headers, timeout=10
183
178
  )
184
179
  cookies = initial_response.cookies
185
- except Exception as e:
186
- logger.warning(f"Failed to get initial cookies: {e}")
180
+ except Exception:
181
+ logger.exception("Failed to get initial cookies")
187
182
  cookies = None
188
183
 
189
184
  params = {
@@ -311,15 +306,15 @@ class SearXNGSearchEngine(BaseSearchEngine):
311
306
  except ImportError:
312
307
  logger.error("BeautifulSoup not available for HTML parsing")
313
308
  return []
314
- except Exception as e:
315
- logger.error(f"Error parsing HTML results: {str(e)}")
309
+ except Exception:
310
+ logger.exception("Error parsing HTML results")
316
311
  return []
317
312
  else:
318
313
  logger.error(f"SearXNG returned status code {response.status_code}")
319
314
  return []
320
315
 
321
- except Exception as e:
322
- logger.error(f"Error getting SearXNG results: {e}")
316
+ except Exception:
317
+ logger.exception("Error getting SearXNG results")
323
318
  return []
324
319
 
325
320
  def _get_previews(self, query: str) -> List[Dict[str, Any]]:
@@ -391,8 +386,8 @@ class SearXNGSearchEngine(BaseSearchEngine):
391
386
  results_with_content = self.full_search._get_full_content(relevant_items)
392
387
  return results_with_content
393
388
 
394
- except Exception as e:
395
- logger.error(f"Error retrieving full content: {e}")
389
+ except Exception:
390
+ logger.exception("Error retrieving full content")
396
391
  return relevant_items
397
392
 
398
393
  def invoke(self, query: str) -> List[Dict[str, Any]]:
@@ -511,7 +506,7 @@ https://searxng.github.io/searxng/admin/installation.html
511
506
  results = super().run(query)
512
507
  logger.info(f"SearXNG search completed with {len(results)} results")
513
508
  return results
514
- except Exception as e:
515
- logger.error(f"Error in SearXNG run method: {str(e)}")
509
+ except Exception:
510
+ logger.exception("Error in SearXNG run method")
516
511
  # Return empty results on error
517
512
  return []
@@ -1,5 +1,4 @@
1
1
  import logging
2
- import os
3
2
  from typing import Any, Dict, List, Optional
4
3
 
5
4
  from langchain_community.utilities import SerpAPIWrapper
@@ -64,11 +63,16 @@ class SerpAPISearchEngine(BaseSearchEngine):
64
63
  "russian": "ru",
65
64
  }
66
65
 
67
- # Get API key
68
- serpapi_api_key = api_key or os.getenv("SERP_API_KEY")
66
+ # Get API key - check params, env vars, or database
67
+ from ...utilities.db_utils import get_db_setting
68
+
69
+ serpapi_api_key = api_key
70
+ if not serpapi_api_key:
71
+ serpapi_api_key = get_db_setting("search.engine.web.serpapi.api_key")
72
+
69
73
  if not serpapi_api_key:
70
74
  raise ValueError(
71
- "SERP_API_KEY not found. Please provide api_key or set the SERP_API_KEY environment variable."
75
+ "SerpAPI key not found. Please provide api_key parameter, set the SERP_API_KEY environment variable, or set it in the UI settings."
72
76
  )
73
77
 
74
78
  # Get language code