local-deep-research 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. local_deep_research/__init__.py +23 -22
  2. local_deep_research/__main__.py +16 -0
  3. local_deep_research/advanced_search_system/__init__.py +7 -0
  4. local_deep_research/advanced_search_system/filters/__init__.py +8 -0
  5. local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
  6. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
  7. local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
  8. local_deep_research/advanced_search_system/findings/repository.py +452 -0
  9. local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
  10. local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
  11. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
  12. local_deep_research/advanced_search_system/questions/__init__.py +1 -0
  13. local_deep_research/advanced_search_system/questions/base_question.py +64 -0
  14. local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
  15. local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
  16. local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
  17. local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
  18. local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
  19. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
  20. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
  21. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
  22. local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
  23. local_deep_research/advanced_search_system/tools/__init__.py +1 -0
  24. local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
  25. local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
  26. local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
  27. local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
  28. local_deep_research/api/__init__.py +5 -5
  29. local_deep_research/api/research_functions.py +154 -160
  30. local_deep_research/app.py +8 -0
  31. local_deep_research/citation_handler.py +25 -16
  32. local_deep_research/{config.py → config/config_files.py} +102 -110
  33. local_deep_research/config/llm_config.py +472 -0
  34. local_deep_research/config/search_config.py +77 -0
  35. local_deep_research/defaults/__init__.py +10 -5
  36. local_deep_research/defaults/main.toml +2 -2
  37. local_deep_research/defaults/search_engines.toml +60 -34
  38. local_deep_research/main.py +121 -19
  39. local_deep_research/migrate_db.py +147 -0
  40. local_deep_research/report_generator.py +87 -45
  41. local_deep_research/search_system.py +153 -283
  42. local_deep_research/setup_data_dir.py +35 -0
  43. local_deep_research/test_migration.py +178 -0
  44. local_deep_research/utilities/__init__.py +0 -0
  45. local_deep_research/utilities/db_utils.py +49 -0
  46. local_deep_research/{utilties → utilities}/enums.py +2 -2
  47. local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
  48. local_deep_research/utilities/search_utilities.py +242 -0
  49. local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
  50. local_deep_research/web/__init__.py +0 -1
  51. local_deep_research/web/app.py +86 -1709
  52. local_deep_research/web/app_factory.py +289 -0
  53. local_deep_research/web/database/README.md +70 -0
  54. local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
  55. local_deep_research/web/database/migrations.py +447 -0
  56. local_deep_research/web/database/models.py +117 -0
  57. local_deep_research/web/database/schema_upgrade.py +107 -0
  58. local_deep_research/web/models/database.py +294 -0
  59. local_deep_research/web/models/settings.py +94 -0
  60. local_deep_research/web/routes/api_routes.py +559 -0
  61. local_deep_research/web/routes/history_routes.py +354 -0
  62. local_deep_research/web/routes/research_routes.py +715 -0
  63. local_deep_research/web/routes/settings_routes.py +1583 -0
  64. local_deep_research/web/services/research_service.py +947 -0
  65. local_deep_research/web/services/resource_service.py +149 -0
  66. local_deep_research/web/services/settings_manager.py +669 -0
  67. local_deep_research/web/services/settings_service.py +187 -0
  68. local_deep_research/web/services/socket_service.py +210 -0
  69. local_deep_research/web/static/css/custom_dropdown.css +277 -0
  70. local_deep_research/web/static/css/settings.css +1223 -0
  71. local_deep_research/web/static/css/styles.css +525 -48
  72. local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
  73. local_deep_research/web/static/js/components/detail.js +348 -0
  74. local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
  75. local_deep_research/web/static/js/components/fallback/ui.js +215 -0
  76. local_deep_research/web/static/js/components/history.js +487 -0
  77. local_deep_research/web/static/js/components/logpanel.js +949 -0
  78. local_deep_research/web/static/js/components/progress.js +1107 -0
  79. local_deep_research/web/static/js/components/research.js +1865 -0
  80. local_deep_research/web/static/js/components/results.js +766 -0
  81. local_deep_research/web/static/js/components/settings.js +3981 -0
  82. local_deep_research/web/static/js/components/settings_sync.js +106 -0
  83. local_deep_research/web/static/js/main.js +226 -0
  84. local_deep_research/web/static/js/services/api.js +253 -0
  85. local_deep_research/web/static/js/services/audio.js +31 -0
  86. local_deep_research/web/static/js/services/formatting.js +119 -0
  87. local_deep_research/web/static/js/services/pdf.js +622 -0
  88. local_deep_research/web/static/js/services/socket.js +882 -0
  89. local_deep_research/web/static/js/services/ui.js +546 -0
  90. local_deep_research/web/templates/base.html +72 -0
  91. local_deep_research/web/templates/components/custom_dropdown.html +47 -0
  92. local_deep_research/web/templates/components/log_panel.html +32 -0
  93. local_deep_research/web/templates/components/mobile_nav.html +22 -0
  94. local_deep_research/web/templates/components/settings_form.html +299 -0
  95. local_deep_research/web/templates/components/sidebar.html +21 -0
  96. local_deep_research/web/templates/pages/details.html +73 -0
  97. local_deep_research/web/templates/pages/history.html +51 -0
  98. local_deep_research/web/templates/pages/progress.html +57 -0
  99. local_deep_research/web/templates/pages/research.html +139 -0
  100. local_deep_research/web/templates/pages/results.html +59 -0
  101. local_deep_research/web/templates/settings_dashboard.html +78 -192
  102. local_deep_research/web/utils/__init__.py +0 -0
  103. local_deep_research/web/utils/formatters.py +76 -0
  104. local_deep_research/web_search_engines/engines/full_search.py +18 -16
  105. local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
  106. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
  107. local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
  108. local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
  109. local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
  110. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
  111. local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
  112. local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
  113. local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
  114. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
  115. local_deep_research/web_search_engines/engines/search_engine_searxng.py +212 -160
  116. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
  117. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
  118. local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
  119. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
  120. local_deep_research/web_search_engines/search_engine_base.py +174 -99
  121. local_deep_research/web_search_engines/search_engine_factory.py +192 -102
  122. local_deep_research/web_search_engines/search_engines_config.py +22 -15
  123. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/METADATA +177 -97
  124. local_deep_research-0.2.2.dist-info/RECORD +135 -0
  125. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/WHEEL +1 -2
  126. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/entry_points.txt +3 -0
  127. local_deep_research/defaults/llm_config.py +0 -338
  128. local_deep_research/utilties/search_utilities.py +0 -114
  129. local_deep_research/web/static/js/app.js +0 -3763
  130. local_deep_research/web/templates/api_keys_config.html +0 -82
  131. local_deep_research/web/templates/collections_config.html +0 -90
  132. local_deep_research/web/templates/index.html +0 -348
  133. local_deep_research/web/templates/llm_config.html +0 -120
  134. local_deep_research/web/templates/main_config.html +0 -89
  135. local_deep_research/web/templates/search_engines_config.html +0 -154
  136. local_deep_research/web/templates/settings.html +0 -519
  137. local_deep_research-0.1.26.dist-info/RECORD +0 -61
  138. local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
  139. /local_deep_research/{utilties → config}/__init__.py +0 -0
  140. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,38 +1,41 @@
1
- from typing import Dict, List, Any, Optional
1
+ import logging
2
2
  import os
3
- import requests
4
- import time
5
3
  import random
6
- import logging
7
- from requests.exceptions import RequestException
8
- from urllib.parse import quote_plus
4
+ import time
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ import requests
9
8
  from langchain_core.language_models import BaseLLM
9
+ from requests.exceptions import RequestException
10
10
 
11
- from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
11
+ from ..search_engine_base import BaseSearchEngine
12
12
 
13
13
  # Set up logging
14
14
  logging.basicConfig(level=logging.INFO)
15
15
  logger = logging.getLogger(__name__)
16
16
 
17
+
17
18
  class GooglePSESearchEngine(BaseSearchEngine):
18
19
  """Google Programmable Search Engine implementation"""
19
20
 
20
- def __init__(self,
21
- max_results: int = 10,
22
- region: str = "us",
23
- safe_search: bool = True,
24
- search_language: str = "English",
25
- api_key: Optional[str] = None,
26
- search_engine_id: Optional[str] = None,
27
- llm: Optional[BaseLLM] = None,
28
- include_full_content: bool = False,
29
- max_filtered_results: Optional[int] = None,
30
- max_retries: int = 3,
31
- retry_delay: float = 2.0,
32
- **kwargs):
21
+ def __init__(
22
+ self,
23
+ max_results: int = 10,
24
+ region: str = "us",
25
+ safe_search: bool = True,
26
+ search_language: str = "English",
27
+ api_key: Optional[str] = None,
28
+ search_engine_id: Optional[str] = None,
29
+ llm: Optional[BaseLLM] = None,
30
+ include_full_content: bool = False,
31
+ max_filtered_results: Optional[int] = None,
32
+ max_retries: int = 3,
33
+ retry_delay: float = 2.0,
34
+ **kwargs,
35
+ ):
33
36
  """
34
37
  Initialize the Google Programmable Search Engine.
35
-
38
+
36
39
  Args:
37
40
  max_results: Maximum number of search results
38
41
  region: Region code for search results
@@ -48,17 +51,19 @@ class GooglePSESearchEngine(BaseSearchEngine):
48
51
  **kwargs: Additional parameters (ignored but accepted for compatibility)
49
52
  """
50
53
  # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
51
- super().__init__(llm=llm, max_filtered_results=max_filtered_results, max_results=max_results)
54
+ super().__init__(
55
+ llm=llm, max_filtered_results=max_filtered_results, max_results=max_results
56
+ )
52
57
  self.include_full_content = include_full_content
53
-
58
+
54
59
  # Retry configuration
55
60
  self.max_retries = max_retries
56
61
  self.retry_delay = retry_delay
57
-
62
+
58
63
  # Rate limiting - keep track of last request time
59
64
  self.last_request_time = 0
60
65
  self.min_request_interval = 0.5 # Minimum time between requests in seconds
61
-
66
+
62
67
  # Language code mapping
63
68
  language_code_mapping = {
64
69
  "english": "en",
@@ -70,82 +75,86 @@ class GooglePSESearchEngine(BaseSearchEngine):
70
75
  "korean": "ko",
71
76
  "portuguese": "pt",
72
77
  "russian": "ru",
73
- "chinese": "zh-CN"
78
+ "chinese": "zh-CN",
74
79
  }
75
-
80
+
76
81
  # Get language code
77
82
  search_language = search_language.lower()
78
83
  self.language = language_code_mapping.get(search_language, "en")
79
-
84
+
80
85
  # Safe search setting
81
86
  self.safe = "active" if safe_search else "off"
82
-
87
+
83
88
  # Region/Country setting
84
89
  self.region = region
85
-
90
+
86
91
  # API key and Search Engine ID
87
92
  self.api_key = api_key or os.getenv("GOOGLE_PSE_API_KEY")
88
93
  self.search_engine_id = search_engine_id or os.getenv("GOOGLE_PSE_ENGINE_ID")
89
-
94
+
90
95
  if not self.api_key:
91
- raise ValueError("Google API key is required. Set it in the GOOGLE_PSE_API_KEY environment variable.")
96
+ raise ValueError(
97
+ "Google API key is required. Set it in the GOOGLE_PSE_API_KEY environment variable."
98
+ )
92
99
  if not self.search_engine_id:
93
- raise ValueError("Google Search Engine ID is required. Set it in the GOOGLE_PSE_ENGINE_ID environment variable.")
94
-
100
+ raise ValueError(
101
+ "Google Search Engine ID is required. Set it in the GOOGLE_PSE_ENGINE_ID environment variable."
102
+ )
103
+
95
104
  # Validate connection and credentials
96
105
  self._validate_connection()
97
-
106
+
98
107
  def _validate_connection(self):
99
108
  """Test the connection to ensure API key and Search Engine ID are valid"""
100
109
  try:
101
110
  # Make a minimal test query
102
111
  response = self._make_request("test")
103
-
112
+
104
113
  # Check if we got a valid response
105
114
  if response.get("error"):
106
115
  error_msg = response["error"].get("message", "Unknown error")
107
116
  raise ValueError(f"Google PSE API error: {error_msg}")
108
-
117
+
109
118
  # If we get here, the connection is valid
110
119
  logger.info("Google PSE connection validated successfully")
111
120
  return True
112
-
121
+
113
122
  except Exception as e:
114
123
  # Log the error and re-raise
115
124
  logger.error(f"Error validating Google PSE connection: {str(e)}")
116
125
  raise
117
-
126
+
118
127
  def _respect_rate_limit(self):
119
128
  """Ensure we don't exceed rate limits by adding appropriate delay between requests"""
120
129
  current_time = time.time()
121
130
  elapsed = current_time - self.last_request_time
122
-
131
+
123
132
  # If we've made a request recently, wait until the minimum interval has passed
124
133
  if elapsed < self.min_request_interval:
125
134
  sleep_time = self.min_request_interval - elapsed
126
- logger.debug(f"Rate limiting: sleeping for {sleep_time:.2f}s")
135
+ logger.debug("Rate limiting: sleeping for %.2f s", sleep_time)
127
136
  time.sleep(sleep_time)
128
-
137
+
129
138
  # Update the last request time
130
139
  self.last_request_time = time.time()
131
140
 
132
141
  def _make_request(self, query: str, start_index: int = 1) -> Dict:
133
142
  """
134
143
  Make a request to the Google PSE API with retry logic and rate limiting
135
-
144
+
136
145
  Args:
137
146
  query: Search query string
138
147
  start_index: Starting index for pagination
139
-
148
+
140
149
  Returns:
141
150
  JSON response from the API
142
-
151
+
143
152
  Raises:
144
153
  RequestException: If all retry attempts fail
145
154
  """
146
155
  # Base URL for the API
147
156
  url = "https://www.googleapis.com/customsearch/v1"
148
-
157
+
149
158
  # Parameters for the request
150
159
  params = {
151
160
  "key": self.api_key,
@@ -155,127 +164,151 @@ class GooglePSESearchEngine(BaseSearchEngine):
155
164
  "start": start_index,
156
165
  "safe": self.safe,
157
166
  "lr": f"lang_{self.language}",
158
- "gl": self.region
167
+ "gl": self.region,
159
168
  }
160
-
169
+
161
170
  # Implement retry logic with exponential backoff
162
171
  attempt = 0
163
172
  last_exception = None
164
-
173
+
165
174
  while attempt < self.max_retries:
166
175
  try:
167
176
  # Respect rate limits
168
177
  self._respect_rate_limit()
169
-
178
+
170
179
  # Add jitter to retries after the first attempt
171
180
  if attempt > 0:
172
181
  jitter = random.uniform(0.5, 1.5)
173
182
  sleep_time = self.retry_delay * (2 ** (attempt - 1)) * jitter
174
- logger.info(f"Retry attempt {attempt+1}/{self.max_retries} for query '{query}'. Waiting {sleep_time:.2f}s")
183
+ logger.info(
184
+ "Retry attempt %s / %s for query '%s'. Waiting %s s",
185
+ attempt + 1,
186
+ self.max_retries,
187
+ query,
188
+ f"{sleep_time:.2f}",
189
+ )
175
190
  time.sleep(sleep_time)
176
-
191
+
177
192
  # Make the request
178
- logger.debug(f"Making request to Google PSE API: {query} (start_index={start_index})")
193
+ logger.debug(
194
+ "Making request to Google PSE API: %s (start_index=%s)",
195
+ query,
196
+ start_index,
197
+ )
179
198
  response = requests.get(url, params=params, timeout=10)
180
-
199
+
181
200
  # Check for HTTP errors
182
201
  response.raise_for_status()
183
-
202
+
184
203
  # Return the JSON response
185
204
  return response.json()
186
-
205
+
187
206
  except RequestException as e:
188
- logger.warning(f"Request error on attempt {attempt+1}/{self.max_retries}: {str(e)}")
207
+ logger.warning(
208
+ "Request error on attempt %s / %s: %s",
209
+ attempt + 1,
210
+ self.max_retries,
211
+ str(e),
212
+ )
189
213
  last_exception = e
190
214
  except Exception as e:
191
- logger.warning(f"Error on attempt {attempt+1}/{self.max_retries}: {str(e)}")
215
+ logger.warning(
216
+ "Error on attempt %s / %s: %s",
217
+ attempt + 1,
218
+ self.max_retries,
219
+ str(e),
220
+ )
192
221
  last_exception = e
193
-
222
+
194
223
  attempt += 1
195
-
224
+
196
225
  # If we get here, all retries failed
197
226
  error_msg = f"Failed to get response from Google PSE API after {self.max_retries} attempts"
198
227
  logger.error(error_msg)
199
-
228
+
200
229
  if last_exception:
201
230
  raise RequestException(f"{error_msg}: {str(last_exception)}")
202
231
  else:
203
232
  raise RequestException(error_msg)
204
-
233
+
205
234
  def _get_previews(self, query: str) -> List[Dict[str, Any]]:
206
235
  """Get search result previews/snippets"""
207
236
  results = []
208
-
237
+
209
238
  # Google PSE API returns a maximum of 10 results per request
210
239
  # We may need to make multiple requests to get the desired number
211
240
  start_index = 1
212
241
  total_results = 0
213
-
242
+
214
243
  while total_results < self.max_results:
215
244
  try:
216
245
  response = self._make_request(query, start_index)
217
-
246
+
218
247
  # Break if no items
219
248
  if "items" not in response:
220
249
  break
221
-
250
+
222
251
  items = response.get("items", [])
223
-
252
+
224
253
  # Process each result
225
254
  for item in items:
226
255
  title = item.get("title", "")
227
256
  snippet = item.get("snippet", "")
228
257
  url = item.get("link", "")
229
-
258
+
230
259
  # Skip results without URL
231
260
  if not url:
232
261
  continue
233
-
234
- results.append({
235
- "title": title,
236
- "snippet": snippet,
237
- "url": url,
238
- "source": "Google Programmable Search"
239
- })
240
-
262
+
263
+ results.append(
264
+ {
265
+ "title": title,
266
+ "snippet": snippet,
267
+ "url": url,
268
+ "source": "Google Programmable Search",
269
+ }
270
+ )
271
+
241
272
  total_results += 1
242
273
  if total_results >= self.max_results:
243
274
  break
244
-
275
+
245
276
  # Check if there are more results
246
277
  if not items or total_results >= self.max_results:
247
278
  break
248
-
279
+
249
280
  # Update start index for next request
250
281
  start_index += len(items)
251
-
282
+
252
283
  # Add a small delay between multiple requests to be respectful of the API
253
284
  if total_results < self.max_results:
254
285
  time.sleep(self.min_request_interval)
255
-
286
+
256
287
  except Exception as e:
257
- logger.error(f"Error getting search results: {str(e)}")
288
+ logger.error("Error getting search results: %s", str(e))
258
289
  break
259
-
260
- logger.info(f"Retrieved {len(results)} search results for query: '{query}'")
290
+
291
+ logger.info("Retrieved %s search results for query: '%s'", len(results), query)
261
292
  return results
262
-
263
- def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
293
+
294
+ def _get_full_content(
295
+ self, relevant_items: List[Dict[str, Any]]
296
+ ) -> List[Dict[str, Any]]:
264
297
  """Get full content for search results"""
265
298
  # Use the BaseSearchEngine implementation
266
299
  return super()._get_full_content(relevant_items)
267
-
300
+
268
301
  def run(self, query: str) -> List[Dict[str, Any]]:
269
302
  """Run the search engine to get results for a query"""
270
303
  # Get search result previews/snippets
271
304
  search_results = self._get_previews(query)
272
-
305
+
273
306
  # Filter for relevance if we have an LLM and max_filtered_results
274
307
  if self.llm and self.max_filtered_results:
275
308
  search_results = self._filter_for_relevance(query, search_results)
276
-
309
+
277
310
  # Get full content if needed
278
311
  if self.include_full_content:
279
312
  search_results = self._get_full_content(search_results)
280
-
281
- return search_results
313
+
314
+ return search_results