local-deep-research 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. local_deep_research/__init__.py +23 -22
  2. local_deep_research/__main__.py +16 -0
  3. local_deep_research/advanced_search_system/__init__.py +7 -0
  4. local_deep_research/advanced_search_system/filters/__init__.py +8 -0
  5. local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
  6. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
  7. local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
  8. local_deep_research/advanced_search_system/findings/repository.py +452 -0
  9. local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
  10. local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
  11. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
  12. local_deep_research/advanced_search_system/questions/__init__.py +1 -0
  13. local_deep_research/advanced_search_system/questions/base_question.py +64 -0
  14. local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
  15. local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
  16. local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
  17. local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
  18. local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
  19. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
  20. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
  21. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
  22. local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
  23. local_deep_research/advanced_search_system/tools/__init__.py +1 -0
  24. local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
  25. local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
  26. local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
  27. local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
  28. local_deep_research/api/__init__.py +5 -5
  29. local_deep_research/api/research_functions.py +154 -160
  30. local_deep_research/app.py +8 -0
  31. local_deep_research/citation_handler.py +25 -16
  32. local_deep_research/{config.py → config/config_files.py} +102 -110
  33. local_deep_research/config/llm_config.py +472 -0
  34. local_deep_research/config/search_config.py +77 -0
  35. local_deep_research/defaults/__init__.py +10 -5
  36. local_deep_research/defaults/main.toml +2 -2
  37. local_deep_research/defaults/search_engines.toml +60 -34
  38. local_deep_research/main.py +121 -19
  39. local_deep_research/migrate_db.py +147 -0
  40. local_deep_research/report_generator.py +87 -45
  41. local_deep_research/search_system.py +153 -283
  42. local_deep_research/setup_data_dir.py +35 -0
  43. local_deep_research/test_migration.py +178 -0
  44. local_deep_research/utilities/__init__.py +0 -0
  45. local_deep_research/utilities/db_utils.py +49 -0
  46. local_deep_research/{utilties → utilities}/enums.py +2 -2
  47. local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
  48. local_deep_research/utilities/search_utilities.py +242 -0
  49. local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
  50. local_deep_research/web/__init__.py +0 -1
  51. local_deep_research/web/app.py +86 -1709
  52. local_deep_research/web/app_factory.py +289 -0
  53. local_deep_research/web/database/README.md +70 -0
  54. local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
  55. local_deep_research/web/database/migrations.py +447 -0
  56. local_deep_research/web/database/models.py +117 -0
  57. local_deep_research/web/database/schema_upgrade.py +107 -0
  58. local_deep_research/web/models/database.py +294 -0
  59. local_deep_research/web/models/settings.py +94 -0
  60. local_deep_research/web/routes/api_routes.py +559 -0
  61. local_deep_research/web/routes/history_routes.py +354 -0
  62. local_deep_research/web/routes/research_routes.py +715 -0
  63. local_deep_research/web/routes/settings_routes.py +1583 -0
  64. local_deep_research/web/services/research_service.py +947 -0
  65. local_deep_research/web/services/resource_service.py +149 -0
  66. local_deep_research/web/services/settings_manager.py +669 -0
  67. local_deep_research/web/services/settings_service.py +187 -0
  68. local_deep_research/web/services/socket_service.py +210 -0
  69. local_deep_research/web/static/css/custom_dropdown.css +277 -0
  70. local_deep_research/web/static/css/settings.css +1223 -0
  71. local_deep_research/web/static/css/styles.css +525 -48
  72. local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
  73. local_deep_research/web/static/js/components/detail.js +348 -0
  74. local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
  75. local_deep_research/web/static/js/components/fallback/ui.js +215 -0
  76. local_deep_research/web/static/js/components/history.js +487 -0
  77. local_deep_research/web/static/js/components/logpanel.js +949 -0
  78. local_deep_research/web/static/js/components/progress.js +1107 -0
  79. local_deep_research/web/static/js/components/research.js +1865 -0
  80. local_deep_research/web/static/js/components/results.js +766 -0
  81. local_deep_research/web/static/js/components/settings.js +3981 -0
  82. local_deep_research/web/static/js/components/settings_sync.js +106 -0
  83. local_deep_research/web/static/js/main.js +226 -0
  84. local_deep_research/web/static/js/services/api.js +253 -0
  85. local_deep_research/web/static/js/services/audio.js +31 -0
  86. local_deep_research/web/static/js/services/formatting.js +119 -0
  87. local_deep_research/web/static/js/services/pdf.js +622 -0
  88. local_deep_research/web/static/js/services/socket.js +882 -0
  89. local_deep_research/web/static/js/services/ui.js +546 -0
  90. local_deep_research/web/templates/base.html +72 -0
  91. local_deep_research/web/templates/components/custom_dropdown.html +47 -0
  92. local_deep_research/web/templates/components/log_panel.html +32 -0
  93. local_deep_research/web/templates/components/mobile_nav.html +22 -0
  94. local_deep_research/web/templates/components/settings_form.html +299 -0
  95. local_deep_research/web/templates/components/sidebar.html +21 -0
  96. local_deep_research/web/templates/pages/details.html +73 -0
  97. local_deep_research/web/templates/pages/history.html +51 -0
  98. local_deep_research/web/templates/pages/progress.html +57 -0
  99. local_deep_research/web/templates/pages/research.html +139 -0
  100. local_deep_research/web/templates/pages/results.html +59 -0
  101. local_deep_research/web/templates/settings_dashboard.html +78 -192
  102. local_deep_research/web/utils/__init__.py +0 -0
  103. local_deep_research/web/utils/formatters.py +76 -0
  104. local_deep_research/web_search_engines/engines/full_search.py +18 -16
  105. local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
  106. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
  107. local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
  108. local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
  109. local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
  110. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
  111. local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
  112. local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
  113. local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
  114. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
  115. local_deep_research/web_search_engines/engines/search_engine_searxng.py +212 -160
  116. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
  117. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
  118. local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
  119. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
  120. local_deep_research/web_search_engines/search_engine_base.py +174 -99
  121. local_deep_research/web_search_engines/search_engine_factory.py +192 -102
  122. local_deep_research/web_search_engines/search_engines_config.py +22 -15
  123. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/METADATA +177 -97
  124. local_deep_research-0.2.2.dist-info/RECORD +135 -0
  125. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/WHEEL +1 -2
  126. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/entry_points.txt +3 -0
  127. local_deep_research/defaults/llm_config.py +0 -338
  128. local_deep_research/utilties/search_utilities.py +0 -114
  129. local_deep_research/web/static/js/app.js +0 -3763
  130. local_deep_research/web/templates/api_keys_config.html +0 -82
  131. local_deep_research/web/templates/collections_config.html +0 -90
  132. local_deep_research/web/templates/index.html +0 -348
  133. local_deep_research/web/templates/llm_config.html +0 -120
  134. local_deep_research/web/templates/main_config.html +0 -89
  135. local_deep_research/web/templates/search_engines_config.html +0 -154
  136. local_deep_research/web/templates/settings.html +0 -519
  137. local_deep_research-0.1.26.dist-info/RECORD +0 -61
  138. local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
  139. /local_deep_research/{utilties → config}/__init__.py +0 -0
  140. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,42 +1,45 @@
1
- import requests
2
1
  import logging
3
2
  import os
4
- from typing import Dict, List, Any, Optional
5
- from langchain_core.language_models import BaseLLM
6
3
  import time
7
- import json
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ import requests
7
+ from langchain_core.language_models import BaseLLM
8
8
 
9
- from web_search_engines.search_engine_base import BaseSearchEngine
10
- from web_search_engines.engines.full_search import FullSearchResults
11
- import config
9
+ from ...config import search_config
10
+ from ..search_engine_base import BaseSearchEngine
11
+ from .full_search import FullSearchResults
12
12
 
13
13
  # Setup logging
14
14
  logging.basicConfig(level=logging.INFO)
15
15
  logger = logging.getLogger(__name__)
16
16
 
17
+
17
18
  class SearXNGSearchEngine(BaseSearchEngine):
18
19
  """
19
20
  SearXNG search engine implementation that requires an instance URL provided via
20
21
  environment variable or configuration. Designed for ethical usage with proper
21
22
  rate limiting and single-instance approach.
22
23
  """
23
-
24
- def __init__(self,
25
- max_results: int = 15,
26
- instance_url: Optional[str] = None, # Can be None if using env var
27
- categories: Optional[List[str]] = None,
28
- engines: Optional[List[str]] = None,
29
- language: str = "en",
30
- safe_search: int = 1,
31
- time_range: Optional[str] = None,
32
- delay_between_requests: float = 2.0,
33
- llm: Optional[BaseLLM] = None,
34
- max_filtered_results: Optional[int] = None,
35
- include_full_content: bool = True,
36
- api_key: Optional[str] = None): # API key is actually the instance URL
24
+
25
+ def __init__(
26
+ self,
27
+ max_results: int = 15,
28
+ instance_url: Optional[str] = None, # Can be None if using env var
29
+ categories: Optional[List[str]] = None,
30
+ engines: Optional[List[str]] = None,
31
+ language: str = "en",
32
+ safe_search: int = 1,
33
+ time_range: Optional[str] = None,
34
+ delay_between_requests: float = 0.0,
35
+ llm: Optional[BaseLLM] = None,
36
+ max_filtered_results: Optional[int] = None,
37
+ include_full_content: bool = True,
38
+ api_key: Optional[str] = None,
39
+ ): # API key is actually the instance URL
37
40
  """
38
41
  Initialize the SearXNG search engine with ethical usage patterns.
39
-
42
+
40
43
  Args:
41
44
  max_results: Maximum number of search results
42
45
  instance_url: URL of your SearXNG instance (preferably self-hosted)
@@ -53,48 +56,62 @@ class SearXNGSearchEngine(BaseSearchEngine):
53
56
  """
54
57
 
55
58
  # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
56
- super().__init__(llm=llm, max_filtered_results=max_filtered_results, max_results=max_results)
57
-
59
+ super().__init__(
60
+ llm=llm, max_filtered_results=max_filtered_results, max_results=max_results
61
+ )
62
+
58
63
  # Get instance URL from various sources in priority order:
59
64
  # 1. api_key parameter (which is actually the instance URL)
60
65
  # 2. SEARXNG_INSTANCE environment variable
61
66
  # 3. instance_url parameter
62
67
  # 4. Default to None, which will disable the engine
63
- self.instance_url = api_key or os.getenv("SEARXNG_INSTANCE") or instance_url
64
-
68
+ self.instance_url = api_key or os.getenv("SEARXNG_INSTANCE") or instance_url or "http://localhost:8080"
69
+
65
70
  # Add debug logging for instance URL
66
- logger.info(f"SearXNG init - Instance URL sources: api_key={api_key}, env={os.getenv('SEARXNG_INSTANCE')}, param={instance_url}")
67
-
71
+ logger.info(
72
+ f"SearXNG init - Instance URL sources: api_key={api_key}, env={os.getenv('SEARXNG_INSTANCE')}, param={instance_url}"
73
+ )
74
+
68
75
  # Validate and normalize the instance URL if provided
69
76
  if self.instance_url:
70
- self.instance_url = self.instance_url.rstrip('/')
77
+ self.instance_url = self.instance_url.rstrip("/")
71
78
  self.is_available = True
72
79
  logger.info(f"SearXNG initialized with instance URL: {self.instance_url}")
73
80
  else:
74
81
  self.is_available = False
75
- logger.error("No SearXNG instance URL provided. The engine is disabled. "
76
- "Set SEARXNG_INSTANCE environment variable or provide instance_url parameter.")
77
-
82
+ logger.error(
83
+ "No SearXNG instance URL provided. The engine is disabled. "
84
+ "Set SEARXNG_INSTANCE environment variable or provide instance_url parameter."
85
+ )
86
+
78
87
  # Add debug logging for all parameters
79
- logger.info(f"SearXNG init params: max_results={max_results}, language={language}, "
80
- f"max_filtered_results={max_filtered_results}, is_available={self.is_available}")
81
-
88
+ logger.info(
89
+ f"SearXNG init params: max_results={max_results}, language={language}, "
90
+ f"max_filtered_results={max_filtered_results}, is_available={self.is_available}"
91
+ )
92
+
82
93
  self.max_results = max_results
83
94
  self.categories = categories or ["general"]
84
95
  self.engines = engines
85
96
  self.language = language
86
97
  self.safe_search = safe_search
87
98
  self.time_range = time_range
88
-
89
- self.delay_between_requests = float(os.getenv("SEARXNG_DELAY", delay_between_requests))
90
-
99
+
100
+ self.delay_between_requests = float(
101
+ os.getenv("SEARXNG_DELAY", delay_between_requests)
102
+ )
103
+
91
104
  self.include_full_content = include_full_content
92
-
105
+
93
106
  if self.is_available:
94
107
  self.search_url = f"{self.instance_url}/search"
95
- logger.info(f"SearXNG engine initialized with instance: {self.instance_url}")
96
- logger.info(f"Rate limiting set to {self.delay_between_requests} seconds between requests")
97
-
108
+ logger.info(
109
+ f"SearXNG engine initialized with instance: {self.instance_url}"
110
+ )
111
+ logger.info(
112
+ f"Rate limiting set to {self.delay_between_requests} seconds between requests"
113
+ )
114
+
98
115
  self.full_search = FullSearchResults(
99
116
  llm=llm,
100
117
  web_search=self,
@@ -102,56 +119,63 @@ class SearXNGSearchEngine(BaseSearchEngine):
102
119
  max_results=max_results,
103
120
  region="wt-wt",
104
121
  time="y",
105
- safesearch="Moderate" if safe_search == 1 else "Off" if safe_search == 0 else "Strict"
122
+ safesearch=(
123
+ "Moderate"
124
+ if safe_search == 1
125
+ else "Off" if safe_search == 0 else "Strict"
126
+ ),
106
127
  )
107
-
128
+
108
129
  self.last_request_time = 0
109
-
130
+
110
131
  def _respect_rate_limit(self):
111
132
  """Apply self-imposed rate limiting between requests"""
112
133
  current_time = time.time()
113
134
  time_since_last_request = current_time - self.last_request_time
114
-
115
135
 
116
136
  if time_since_last_request < self.delay_between_requests:
117
137
  wait_time = self.delay_between_requests - time_since_last_request
118
138
  logger.info(f"Rate limiting: waiting {wait_time:.2f} seconds")
119
139
  time.sleep(wait_time)
120
-
140
+
121
141
  self.last_request_time = time.time()
122
-
142
+
123
143
  def _get_search_results(self, query: str) -> List[Dict[str, Any]]:
124
144
  """
125
145
  Get search results from SearXNG with ethical rate limiting.
126
-
146
+
127
147
  Args:
128
148
  query: The search query
129
-
149
+
130
150
  Returns:
131
151
  List of search results from SearXNG
132
152
  """
133
153
  if not self.is_available:
134
- logger.error("SearXNG engine is disabled (no instance URL provided) - cannot run search")
154
+ logger.error(
155
+ "SearXNG engine is disabled (no instance URL provided) - cannot run search"
156
+ )
135
157
  return []
136
-
158
+
137
159
  logger.info(f"SearXNG running search for query: {query}")
138
-
160
+
139
161
  try:
140
162
  self._respect_rate_limit()
141
-
163
+
142
164
  initial_headers = {
143
165
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
144
166
  "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
145
- "Accept-Language": "en-US,en;q=0.9"
167
+ "Accept-Language": "en-US,en;q=0.9",
146
168
  }
147
-
169
+
148
170
  try:
149
- initial_response = requests.get(self.instance_url, headers=initial_headers, timeout=10)
171
+ initial_response = requests.get(
172
+ self.instance_url, headers=initial_headers, timeout=10
173
+ )
150
174
  cookies = initial_response.cookies
151
175
  except Exception as e:
152
176
  logger.warning(f"Failed to get initial cookies: {e}")
153
177
  cookies = None
154
-
178
+
155
179
  params = {
156
180
  "q": query,
157
181
  "categories": ",".join(self.categories),
@@ -159,15 +183,15 @@ class SearXNGSearchEngine(BaseSearchEngine):
159
183
  "format": "html", # Use HTML format instead of JSON
160
184
  "pageno": 1,
161
185
  "safesearch": self.safe_search,
162
- "count": self.max_results
186
+ "count": self.max_results,
163
187
  }
164
-
188
+
165
189
  if self.engines:
166
190
  params["engines"] = ",".join(self.engines)
167
-
191
+
168
192
  if self.time_range:
169
193
  params["time_range"] = self.time_range
170
-
194
+
171
195
  # Browser-like headers
172
196
  headers = {
173
197
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
@@ -175,91 +199,105 @@ class SearXNGSearchEngine(BaseSearchEngine):
175
199
  "Accept-Language": "en-US,en;q=0.9",
176
200
  "Referer": self.instance_url + "/",
177
201
  "Connection": "keep-alive",
178
- "Upgrade-Insecure-Requests": "1"
202
+ "Upgrade-Insecure-Requests": "1",
179
203
  }
180
-
204
+
181
205
  logger.info(f"Sending request to SearXNG instance at {self.instance_url}")
182
206
  response = requests.get(
183
207
  self.search_url,
184
208
  params=params,
185
209
  headers=headers,
186
210
  cookies=cookies,
187
- timeout=15
211
+ timeout=15,
188
212
  )
189
-
213
+
190
214
  if response.status_code == 200:
191
215
  try:
192
216
  from bs4 import BeautifulSoup
193
-
194
- soup = BeautifulSoup(response.text, 'html.parser')
217
+
218
+ soup = BeautifulSoup(response.text, "html.parser")
195
219
  results = []
196
-
197
- result_elements = soup.select('.result-item')
198
-
220
+
221
+ result_elements = soup.select(".result-item")
222
+
199
223
  if not result_elements:
200
- result_elements = soup.select('.result')
201
-
224
+ result_elements = soup.select(".result")
225
+
202
226
  if not result_elements:
203
- result_elements = soup.select('article')
204
-
227
+ result_elements = soup.select("article")
228
+
205
229
  if not result_elements:
206
- logger.debug(f"Classes found in HTML: {[c['class'] for c in soup.select('[class]') if 'class' in c.attrs][:10]}")
230
+ logger.debug(
231
+ f"Classes found in HTML: {[c['class'] for c in soup.select('[class]') if 'class' in c.attrs][:10]}"
232
+ )
207
233
  result_elements = soup.select('div[id^="result"]')
208
-
234
+
209
235
  logger.info(f"Found {len(result_elements)} search result elements")
210
-
236
+
211
237
  for idx, result_element in enumerate(result_elements):
212
238
  if idx >= self.max_results:
213
239
  break
214
-
240
+
215
241
  title_element = (
216
- result_element.select_one('.result-title') or
217
- result_element.select_one('.title') or
218
- result_element.select_one('h3') or
219
- result_element.select_one('a[href]')
242
+ result_element.select_one(".result-title")
243
+ or result_element.select_one(".title")
244
+ or result_element.select_one("h3")
245
+ or result_element.select_one("a[href]")
220
246
  )
221
-
247
+
222
248
  url_element = (
223
- result_element.select_one('.result-url') or
224
- result_element.select_one('.url') or
225
- result_element.select_one('a[href]')
249
+ result_element.select_one(".result-url")
250
+ or result_element.select_one(".url")
251
+ or result_element.select_one("a[href]")
226
252
  )
227
-
253
+
228
254
  content_element = (
229
- result_element.select_one('.result-content') or
230
- result_element.select_one('.content') or
231
- result_element.select_one('.snippet') or
232
- result_element.select_one('p')
255
+ result_element.select_one(".result-content")
256
+ or result_element.select_one(".content")
257
+ or result_element.select_one(".snippet")
258
+ or result_element.select_one("p")
259
+ )
260
+
261
+ title = (
262
+ title_element.get_text(strip=True) if title_element else ""
233
263
  )
234
-
235
- title = title_element.get_text(strip=True) if title_element else ""
236
-
264
+
237
265
  url = ""
238
- if url_element and url_element.has_attr('href'):
239
- url = url_element['href']
266
+ if url_element and url_element.has_attr("href"):
267
+ url = url_element["href"]
240
268
  elif url_element:
241
269
  url = url_element.get_text(strip=True)
242
-
243
- content = content_element.get_text(strip=True) if content_element else ""
244
-
245
- if not url and title_element and title_element.has_attr('href'):
246
- url = title_element['href']
247
-
248
- logger.debug(f"Extracted result {idx}: title={title[:30]}..., url={url[:30]}..., content={content[:30]}...")
249
-
270
+
271
+ content = (
272
+ content_element.get_text(strip=True)
273
+ if content_element
274
+ else ""
275
+ )
276
+
277
+ if not url and title_element and title_element.has_attr("href"):
278
+ url = title_element["href"]
279
+
280
+ logger.debug(
281
+ f"Extracted result {idx}: title={title[:30]}..., url={url[:30]}..., content={content[:30]}..."
282
+ )
283
+
250
284
  # Add to results if we have at least a title or URL
251
285
  if title or url:
252
- results.append({
253
- "title": title,
254
- "url": url,
255
- "content": content,
256
- "engine": "searxng",
257
- "category": "general"
258
- })
259
-
260
- logger.info(f"SearXNG returned {len(results)} results from HTML parsing")
286
+ results.append(
287
+ {
288
+ "title": title,
289
+ "url": url,
290
+ "content": content,
291
+ "engine": "searxng",
292
+ "category": "general",
293
+ }
294
+ )
295
+
296
+ logger.info(
297
+ f"SearXNG returned {len(results)} results from HTML parsing"
298
+ )
261
299
  return results
262
-
300
+
263
301
  except ImportError:
264
302
  logger.error("BeautifulSoup not available for HTML parsing")
265
303
  return []
@@ -269,123 +307,132 @@ class SearXNGSearchEngine(BaseSearchEngine):
269
307
  else:
270
308
  logger.error(f"SearXNG returned status code {response.status_code}")
271
309
  return []
272
-
310
+
273
311
  except Exception as e:
274
312
  logger.error(f"Error getting SearXNG results: {e}")
275
313
  return []
276
-
314
+
277
315
  def _get_previews(self, query: str) -> List[Dict[str, Any]]:
278
316
  """
279
317
  Get preview information for SearXNG search results.
280
-
318
+
281
319
  Args:
282
320
  query: The search query
283
-
321
+
284
322
  Returns:
285
323
  List of preview dictionaries
286
324
  """
287
325
  if not self.is_available:
288
326
  logger.warning("SearXNG engine is disabled (no instance URL provided)")
289
327
  return []
290
-
328
+
291
329
  logger.info(f"Getting SearXNG previews for query: {query}")
292
-
330
+
293
331
  results = self._get_search_results(query)
294
-
332
+
295
333
  if not results:
296
334
  logger.warning(f"No SearXNG results found for query: {query}")
297
335
  return []
298
-
336
+
299
337
  previews = []
300
338
  for i, result in enumerate(results):
301
339
  title = result.get("title", "")
302
340
  url = result.get("url", "")
303
341
  content = result.get("content", "")
304
-
342
+
305
343
  preview = {
306
344
  "id": url or f"searxng-result-{i}",
307
345
  "title": title,
308
346
  "link": url,
309
347
  "snippet": content,
310
348
  "engine": result.get("engine", ""),
311
- "category": result.get("category", "")
349
+ "category": result.get("category", ""),
312
350
  }
313
-
351
+
314
352
  previews.append(preview)
315
-
353
+
316
354
  return previews
317
-
318
- def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
355
+
356
+ def _get_full_content(
357
+ self, relevant_items: List[Dict[str, Any]]
358
+ ) -> List[Dict[str, Any]]:
319
359
  """
320
360
  Get full content for the relevant search results.
321
-
361
+
322
362
  Args:
323
363
  relevant_items: List of relevant preview dictionaries
324
-
364
+
325
365
  Returns:
326
366
  List of result dictionaries with full content
327
367
  """
328
368
  if not self.is_available:
329
369
  return relevant_items
330
-
331
- if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
370
+
371
+ if (
372
+ hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
373
+ and search_config.SEARCH_SNIPPETS_ONLY
374
+ ):
332
375
  logger.info("Snippet-only mode, skipping full content retrieval")
333
376
  return relevant_items
334
-
377
+
335
378
  logger.info("Retrieving full webpage content")
336
-
379
+
337
380
  try:
338
381
  results_with_content = self.full_search._get_full_content(relevant_items)
339
382
  return results_with_content
340
-
383
+
341
384
  except Exception as e:
342
385
  logger.error(f"Error retrieving full content: {e}")
343
386
  return relevant_items
344
-
387
+
345
388
  def invoke(self, query: str) -> List[Dict[str, Any]]:
346
389
  """Compatibility method for LangChain tools"""
347
390
  return self.run(query)
348
-
349
- def results(self, query: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
391
+
392
+ def results(
393
+ self, query: str, max_results: Optional[int] = None
394
+ ) -> List[Dict[str, Any]]:
350
395
  """
351
396
  Get search results in a format compatible with other search engines.
352
-
397
+
353
398
  Args:
354
399
  query: The search query
355
400
  max_results: Optional override for maximum results
356
-
401
+
357
402
  Returns:
358
403
  List of search result dictionaries
359
404
  """
360
405
  if not self.is_available:
361
406
  return []
362
-
407
+
363
408
  original_max_results = self.max_results
364
-
409
+
365
410
  try:
366
411
  if max_results is not None:
367
412
  self.max_results = max_results
368
-
413
+
369
414
  results = self._get_search_results(query)
370
-
415
+
371
416
  formatted_results = []
372
417
  for result in results:
373
- formatted_results.append({
374
- "title": result.get("title", ""),
375
- "link": result.get("url", ""),
376
- "snippet": result.get("content", "")
377
- })
378
-
418
+ formatted_results.append(
419
+ {
420
+ "title": result.get("title", ""),
421
+ "link": result.get("url", ""),
422
+ "snippet": result.get("content", ""),
423
+ }
424
+ )
425
+
379
426
  return formatted_results
380
-
427
+
381
428
  finally:
382
429
  self.max_results = original_max_results
383
-
430
+
384
431
  @staticmethod
385
432
  def get_self_hosting_instructions() -> str:
386
433
  """
387
434
  Get instructions for self-hosting a SearXNG instance.
388
-
435
+
389
436
  Returns:
390
437
  String with installation instructions
391
438
  """
@@ -441,15 +488,20 @@ https://searxng.github.io/searxng/admin/installation.html
441
488
  Override BaseSearchEngine run method to add SearXNG-specific error handling.
442
489
  """
443
490
  if not self.is_available:
444
- logger.error("SearXNG run method called but engine is not available (missing instance URL)")
491
+ logger.error(
492
+ "SearXNG run method called but engine is not available (missing instance URL)"
493
+ )
445
494
  return []
446
-
447
- logger.info(f"SearXNG run method called with query: {query}")
448
-
495
+
496
+ logger.info(f"SearXNG search engine running with query: '{query}'")
497
+ logger.info(f"SearXNG instance URL: {self.instance_url}")
498
+
449
499
  try:
450
500
  # Call the parent class's run method
451
- return super().run(query)
501
+ results = super().run(query)
502
+ logger.info(f"SearXNG search completed with {len(results)} results")
503
+ return results
452
504
  except Exception as e:
453
505
  logger.error(f"Error in SearXNG run method: {str(e)}")
454
506
  # Return empty results on error
455
- return []
507
+ return []