local-deep-research 0.5.9__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. local_deep_research/__version__.py +1 -1
  2. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +11 -1
  3. local_deep_research/advanced_search_system/questions/browsecomp_question.py +32 -6
  4. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +32 -8
  5. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +2 -0
  6. local_deep_research/api/__init__.py +2 -0
  7. local_deep_research/api/research_functions.py +177 -3
  8. local_deep_research/benchmarks/graders.py +150 -5
  9. local_deep_research/benchmarks/models/__init__.py +19 -0
  10. local_deep_research/benchmarks/models/benchmark_models.py +283 -0
  11. local_deep_research/benchmarks/ui/__init__.py +1 -0
  12. local_deep_research/benchmarks/web_api/__init__.py +6 -0
  13. local_deep_research/benchmarks/web_api/benchmark_routes.py +862 -0
  14. local_deep_research/benchmarks/web_api/benchmark_service.py +920 -0
  15. local_deep_research/config/llm_config.py +106 -21
  16. local_deep_research/defaults/default_settings.json +447 -2
  17. local_deep_research/error_handling/report_generator.py +10 -0
  18. local_deep_research/llm/__init__.py +19 -0
  19. local_deep_research/llm/llm_registry.py +155 -0
  20. local_deep_research/metrics/db_models.py +3 -7
  21. local_deep_research/metrics/search_tracker.py +25 -11
  22. local_deep_research/search_system.py +12 -9
  23. local_deep_research/utilities/log_utils.py +23 -10
  24. local_deep_research/utilities/thread_context.py +99 -0
  25. local_deep_research/web/app_factory.py +32 -8
  26. local_deep_research/web/database/benchmark_schema.py +230 -0
  27. local_deep_research/web/database/convert_research_id_to_string.py +161 -0
  28. local_deep_research/web/database/models.py +55 -1
  29. local_deep_research/web/database/schema_upgrade.py +397 -2
  30. local_deep_research/web/database/uuid_migration.py +265 -0
  31. local_deep_research/web/routes/api_routes.py +62 -31
  32. local_deep_research/web/routes/history_routes.py +13 -6
  33. local_deep_research/web/routes/metrics_routes.py +264 -4
  34. local_deep_research/web/routes/research_routes.py +45 -18
  35. local_deep_research/web/routes/route_registry.py +352 -0
  36. local_deep_research/web/routes/settings_routes.py +382 -22
  37. local_deep_research/web/services/research_service.py +22 -29
  38. local_deep_research/web/services/settings_manager.py +53 -0
  39. local_deep_research/web/services/settings_service.py +2 -0
  40. local_deep_research/web/static/css/styles.css +8 -0
  41. local_deep_research/web/static/js/components/detail.js +7 -14
  42. local_deep_research/web/static/js/components/details.js +8 -10
  43. local_deep_research/web/static/js/components/fallback/ui.js +4 -4
  44. local_deep_research/web/static/js/components/history.js +6 -6
  45. local_deep_research/web/static/js/components/logpanel.js +14 -11
  46. local_deep_research/web/static/js/components/progress.js +51 -46
  47. local_deep_research/web/static/js/components/research.js +250 -89
  48. local_deep_research/web/static/js/components/results.js +5 -7
  49. local_deep_research/web/static/js/components/settings.js +32 -26
  50. local_deep_research/web/static/js/components/settings_sync.js +24 -23
  51. local_deep_research/web/static/js/config/urls.js +285 -0
  52. local_deep_research/web/static/js/main.js +8 -8
  53. local_deep_research/web/static/js/research_form.js +267 -12
  54. local_deep_research/web/static/js/services/api.js +18 -18
  55. local_deep_research/web/static/js/services/keyboard.js +8 -8
  56. local_deep_research/web/static/js/services/socket.js +53 -35
  57. local_deep_research/web/static/js/services/ui.js +1 -1
  58. local_deep_research/web/templates/base.html +4 -1
  59. local_deep_research/web/templates/components/custom_dropdown.html +5 -3
  60. local_deep_research/web/templates/components/mobile_nav.html +3 -3
  61. local_deep_research/web/templates/components/sidebar.html +9 -3
  62. local_deep_research/web/templates/pages/benchmark.html +2697 -0
  63. local_deep_research/web/templates/pages/benchmark_results.html +1274 -0
  64. local_deep_research/web/templates/pages/benchmark_simple.html +453 -0
  65. local_deep_research/web/templates/pages/cost_analytics.html +1 -1
  66. local_deep_research/web/templates/pages/metrics.html +212 -39
  67. local_deep_research/web/templates/pages/research.html +8 -6
  68. local_deep_research/web/templates/pages/star_reviews.html +1 -1
  69. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +14 -1
  70. local_deep_research/web_search_engines/engines/search_engine_brave.py +15 -1
  71. local_deep_research/web_search_engines/engines/search_engine_ddg.py +20 -1
  72. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +26 -2
  73. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +15 -1
  74. local_deep_research/web_search_engines/engines/search_engine_retriever.py +192 -0
  75. local_deep_research/web_search_engines/engines/search_engine_tavily.py +307 -0
  76. local_deep_research/web_search_engines/rate_limiting/__init__.py +14 -0
  77. local_deep_research/web_search_engines/rate_limiting/__main__.py +9 -0
  78. local_deep_research/web_search_engines/rate_limiting/cli.py +209 -0
  79. local_deep_research/web_search_engines/rate_limiting/exceptions.py +21 -0
  80. local_deep_research/web_search_engines/rate_limiting/tracker.py +506 -0
  81. local_deep_research/web_search_engines/retriever_registry.py +108 -0
  82. local_deep_research/web_search_engines/search_engine_base.py +161 -43
  83. local_deep_research/web_search_engines/search_engine_factory.py +14 -0
  84. local_deep_research/web_search_engines/search_engines_config.py +20 -0
  85. local_deep_research-0.6.1.dist-info/METADATA +374 -0
  86. {local_deep_research-0.5.9.dist-info → local_deep_research-0.6.1.dist-info}/RECORD +89 -64
  87. local_deep_research-0.5.9.dist-info/METADATA +0 -420
  88. {local_deep_research-0.5.9.dist-info → local_deep_research-0.6.1.dist-info}/WHEEL +0 -0
  89. {local_deep_research-0.5.9.dist-info → local_deep_research-0.6.1.dist-info}/entry_points.txt +0 -0
  90. {local_deep_research-0.5.9.dist-info → local_deep_research-0.6.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,192 @@
1
+ """
2
+ Search engine implementation that wraps any LangChain retriever.
3
+ This allows using vector stores, databases, or any custom retriever as a search source in LDR.
4
+ """
5
+
6
+ from typing import Any, Dict, List
7
+ from langchain.schema import BaseRetriever, Document
8
+ from loguru import logger
9
+
10
+ from ..search_engine_base import BaseSearchEngine
11
+
12
+
13
+ class RetrieverSearchEngine(BaseSearchEngine):
14
+ """
15
+ Search engine that uses any LangChain retriever.
16
+
17
+ This allows users to plug in any LangChain retriever (vector stores,
18
+ databases, custom implementations) and use it as a search engine in LDR.
19
+ """
20
+
21
+ def __init__(
22
+ self,
23
+ retriever: BaseRetriever,
24
+ max_results: int = 10,
25
+ name: str = None,
26
+ **kwargs,
27
+ ):
28
+ """
29
+ Initialize the retriever-based search engine.
30
+
31
+ Args:
32
+ retriever: Any LangChain BaseRetriever instance
33
+ max_results: Maximum number of results to return
34
+ name: Display name for this retriever (defaults to retriever class name)
35
+ **kwargs: Additional parameters passed to parent
36
+ """
37
+ super().__init__(max_results=max_results, **kwargs)
38
+ self.retriever = retriever
39
+ self.name = name if name is not None else retriever.__class__.__name__
40
+
41
+ def run(self, query: str) -> List[Dict[str, Any]]:
42
+ """
43
+ Execute search using the LangChain retriever.
44
+
45
+ Args:
46
+ query: Search query
47
+
48
+ Returns:
49
+ List of search results in LDR format
50
+ """
51
+ try:
52
+ # Use the retriever to get relevant documents
53
+ docs = self.retriever.invoke(query)
54
+
55
+ # Convert LangChain documents to LDR search result format
56
+ results = []
57
+ for i, doc in enumerate(docs[: self.max_results]):
58
+ result = self._convert_document_to_result(doc, i)
59
+ results.append(result)
60
+
61
+ logger.info(
62
+ f"Retriever '{self.name}' returned {len(results)} results for query: {query}"
63
+ )
64
+ return results
65
+
66
+ except Exception:
67
+ logger.exception("Error in retriever search")
68
+ return []
69
+
70
+ def _convert_document_to_result(
71
+ self, doc: Document, index: int
72
+ ) -> Dict[str, Any]:
73
+ """
74
+ Convert a LangChain Document to LDR search result format.
75
+
76
+ Args:
77
+ doc: LangChain Document
78
+ index: Result index
79
+
80
+ Returns:
81
+ Search result in LDR format
82
+ """
83
+ # Extract metadata
84
+ metadata = doc.metadata or {}
85
+
86
+ # Build the result
87
+ result = {
88
+ # Required fields for LDR
89
+ "title": metadata.get("title", f"Document {index + 1}"),
90
+ "url": metadata.get(
91
+ "source",
92
+ metadata.get("url", f"retriever://{self.name}/doc_{index}"),
93
+ ),
94
+ "snippet": doc.page_content[:500] if doc.page_content else "",
95
+ # Optional fields
96
+ "full_content": doc.page_content,
97
+ "author": metadata.get("author", ""),
98
+ "date": metadata.get("date", ""),
99
+ # Include all metadata for flexibility
100
+ "metadata": metadata,
101
+ # Score if available
102
+ "score": metadata.get("score", 1.0),
103
+ # Source information
104
+ "source": self.name,
105
+ "retriever_type": self.retriever.__class__.__name__,
106
+ }
107
+
108
+ return result
109
+
110
+ def _get_previews(self, query: str) -> List[Dict[str, Any]]:
111
+ """
112
+ Get preview information from the retriever.
113
+
114
+ Args:
115
+ query: Search query
116
+
117
+ Returns:
118
+ List of preview dictionaries
119
+ """
120
+ try:
121
+ # Use the retriever to get relevant documents
122
+ docs = self.retriever.invoke(query)
123
+
124
+ # Convert to preview format
125
+ previews = []
126
+ for i, doc in enumerate(docs[: self.max_results]):
127
+ preview = self._convert_document_to_result(doc, i)
128
+ previews.append(preview)
129
+
130
+ logger.info(
131
+ f"Retriever '{self.name}' returned {len(previews)} previews for query: {query}"
132
+ )
133
+ return previews
134
+
135
+ except Exception:
136
+ logger.exception("Error getting previews from retriever")
137
+ return []
138
+
139
+ def _get_full_content(
140
+ self, relevant_items: List[Dict[str, Any]]
141
+ ) -> List[Dict[str, Any]]:
142
+ """
143
+ For retrievers, previews already contain full content.
144
+
145
+ Args:
146
+ relevant_items: List of relevant preview dictionaries
147
+
148
+ Returns:
149
+ Same list with full content (already included)
150
+ """
151
+ # For retrievers, the preview already contains the full content
152
+ # Just ensure the 'full_content' field is present
153
+ for item in relevant_items:
154
+ if "full_content" not in item and "snippet" in item:
155
+ item["full_content"] = item["snippet"]
156
+ return relevant_items
157
+
158
+ async def arun(self, query: str) -> List[Dict[str, Any]]:
159
+ """
160
+ Async version of search using the retriever.
161
+
162
+ Args:
163
+ query: Search query
164
+
165
+ Returns:
166
+ List of search results in LDR format
167
+ """
168
+ try:
169
+ # Use async retriever if available
170
+ if hasattr(self.retriever, "aget_relevant_documents"):
171
+ docs = await self.retriever.aget_relevant_documents(query)
172
+ else:
173
+ # Fall back to sync version
174
+ logger.debug(
175
+ f"Retriever '{self.name}' doesn't support async, using sync version"
176
+ )
177
+ return self.run(query)
178
+
179
+ # Convert documents to results
180
+ results = []
181
+ for i, doc in enumerate(docs[: self.max_results]):
182
+ result = self._convert_document_to_result(doc, i)
183
+ results.append(result)
184
+
185
+ logger.info(
186
+ f"Retriever '{self.name}' returned {len(results)} async results for query: {query}"
187
+ )
188
+ return results
189
+
190
+ except Exception:
191
+ logger.exception("Error in async retriever search")
192
+ return []
@@ -0,0 +1,307 @@
1
+ import logging
2
+ import os
3
+ from typing import Any, Dict, List, Optional
4
+
5
+ import requests
6
+ from langchain_core.language_models import BaseLLM
7
+
8
+ from ...config import search_config
9
+ from ..search_engine_base import BaseSearchEngine
10
+ from ..rate_limiting import RateLimitError
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class TavilySearchEngine(BaseSearchEngine):
16
+ """Tavily search engine implementation with two-phase approach"""
17
+
18
+ def __init__(
19
+ self,
20
+ max_results: int = 10,
21
+ region: str = "US",
22
+ time_period: str = "y",
23
+ safe_search: bool = True,
24
+ search_language: str = "English",
25
+ api_key: Optional[str] = None,
26
+ llm: Optional[BaseLLM] = None,
27
+ include_full_content: bool = True,
28
+ max_filtered_results: Optional[int] = None,
29
+ search_depth: str = "basic",
30
+ include_domains: Optional[List[str]] = None,
31
+ exclude_domains: Optional[List[str]] = None,
32
+ **kwargs,
33
+ ):
34
+ """
35
+ Initialize the Tavily search engine.
36
+
37
+ Args:
38
+ max_results: Maximum number of search results
39
+ region: Region code for search results (not used by Tavily currently)
40
+ time_period: Time period for search results (not used by Tavily currently)
41
+ safe_search: Whether to enable safe search (not used by Tavily currently)
42
+ search_language: Language for search results (not used by Tavily currently)
43
+ api_key: Tavily API key (can also be set in TAVILY_API_KEY env)
44
+ llm: Language model for relevance filtering
45
+ include_full_content: Whether to include full webpage content in results
46
+ max_filtered_results: Maximum number of results to keep after filtering
47
+ search_depth: "basic" or "advanced" - controls search quality vs speed
48
+ include_domains: List of domains to include in search
49
+ exclude_domains: List of domains to exclude from search
50
+ **kwargs: Additional parameters (ignored but accepted for compatibility)
51
+ """
52
+ # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
53
+ super().__init__(
54
+ llm=llm,
55
+ max_filtered_results=max_filtered_results,
56
+ max_results=max_results,
57
+ )
58
+ self.include_full_content = include_full_content
59
+ self.search_depth = search_depth
60
+ self.include_domains = include_domains or []
61
+ self.exclude_domains = exclude_domains or []
62
+
63
+ # Get API key - check params, database, or env vars
64
+ from ...utilities.db_utils import get_db_setting
65
+
66
+ tavily_api_key = api_key
67
+ if not tavily_api_key:
68
+ tavily_api_key = get_db_setting("search.engine.web.tavily.api_key")
69
+
70
+ if not tavily_api_key:
71
+ tavily_api_key = os.environ.get("TAVILY_API_KEY")
72
+
73
+ if not tavily_api_key:
74
+ raise ValueError(
75
+ "Tavily API key not found. Please provide api_key parameter, "
76
+ "set it in the UI settings, or set TAVILY_API_KEY environment variable."
77
+ )
78
+
79
+ self.api_key = tavily_api_key
80
+ self.base_url = "https://api.tavily.com"
81
+
82
+ # If full content is requested, initialize FullSearchResults
83
+ if include_full_content:
84
+ # Import FullSearchResults only if needed
85
+ try:
86
+ from .full_search import FullSearchResults
87
+
88
+ # Create a simple wrapper for Tavily API calls
89
+ class TavilyWrapper:
90
+ def __init__(self, parent):
91
+ self.parent = parent
92
+
93
+ def run(self, query):
94
+ return self.parent._get_previews(query)
95
+
96
+ self.full_search = FullSearchResults(
97
+ llm=llm,
98
+ web_search=TavilyWrapper(self),
99
+ language=search_language,
100
+ max_results=max_results,
101
+ region=region,
102
+ time=time_period,
103
+ safesearch="moderate" if safe_search else "off",
104
+ )
105
+ except ImportError:
106
+ logger.warning(
107
+ "Warning: FullSearchResults not available. Full content retrieval disabled."
108
+ )
109
+ self.include_full_content = False
110
+
111
+ def _get_previews(self, query: str) -> List[Dict[str, Any]]:
112
+ """
113
+ Get preview information from Tavily Search.
114
+
115
+ Args:
116
+ query: The search query
117
+
118
+ Returns:
119
+ List of preview dictionaries
120
+ """
121
+ logger.info("Getting search results from Tavily")
122
+
123
+ try:
124
+ # Prepare the request payload
125
+ payload = {
126
+ "api_key": self.api_key,
127
+ "query": query[:400], # Limit query length
128
+ "search_depth": self.search_depth,
129
+ "max_results": min(
130
+ 20, self.max_results
131
+ ), # Tavily has a max limit
132
+ "include_answer": False, # We don't need the AI answer
133
+ "include_images": False, # We don't need images
134
+ "include_raw_content": self.include_full_content, # Get content if requested
135
+ }
136
+
137
+ # Add domain filters if specified
138
+ if self.include_domains:
139
+ payload["include_domains"] = self.include_domains
140
+ if self.exclude_domains:
141
+ payload["exclude_domains"] = self.exclude_domains
142
+
143
+ # Make the API request
144
+ response = requests.post(
145
+ f"{self.base_url}/search",
146
+ json=payload,
147
+ headers={"Content-Type": "application/json"},
148
+ timeout=30,
149
+ )
150
+
151
+ # Check for errors
152
+ if response.status_code == 429:
153
+ raise RateLimitError(
154
+ f"Tavily rate limit hit: {response.status_code} - {response.text}"
155
+ )
156
+
157
+ response.raise_for_status()
158
+
159
+ # Parse the response
160
+ data = response.json()
161
+ results = data.get("results", [])
162
+
163
+ # Format results as previews
164
+ previews = []
165
+ for i, result in enumerate(results):
166
+ preview = {
167
+ "id": result.get("url", str(i)), # Use URL as ID
168
+ "title": result.get("title", ""),
169
+ "link": result.get("url", ""),
170
+ "snippet": result.get(
171
+ "content", ""
172
+ ), # Tavily calls it "content"
173
+ "displayed_link": result.get("url", ""),
174
+ "position": i,
175
+ }
176
+
177
+ # Store full Tavily result for later
178
+ preview["_full_result"] = result
179
+
180
+ previews.append(preview)
181
+
182
+ # Store the previews for potential full content retrieval
183
+ self._search_results = previews
184
+
185
+ return previews
186
+
187
+ except RateLimitError:
188
+ raise # Re-raise rate limit errors
189
+ except requests.exceptions.RequestException as e:
190
+ error_msg = str(e)
191
+ logger.exception("Error getting Tavily results")
192
+
193
+ # Check for rate limit patterns in error message
194
+ if any(
195
+ pattern in error_msg.lower()
196
+ for pattern in [
197
+ "429",
198
+ "rate limit",
199
+ "quota",
200
+ "too many requests",
201
+ ]
202
+ ):
203
+ raise RateLimitError(f"Tavily rate limit hit: {error_msg}")
204
+
205
+ return []
206
+ except Exception:
207
+ logger.exception("Unexpected error getting Tavily results")
208
+ return []
209
+
210
+ def _get_full_content(
211
+ self, relevant_items: List[Dict[str, Any]]
212
+ ) -> List[Dict[str, Any]]:
213
+ """
214
+ Get full content for the relevant search results.
215
+ If include_full_content is True and raw content was retrieved,
216
+ includes it in the results.
217
+
218
+ Args:
219
+ relevant_items: List of relevant preview dictionaries
220
+
221
+ Returns:
222
+ List of result dictionaries with full content if available
223
+ """
224
+ # Check if we should get full content
225
+ if (
226
+ hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
227
+ and search_config.SEARCH_SNIPPETS_ONLY
228
+ ):
229
+ logger.info("Snippet-only mode, skipping full content retrieval")
230
+
231
+ # Return the relevant items with their full Tavily information
232
+ results = []
233
+ for item in relevant_items:
234
+ # Use the full result if available, otherwise use the preview
235
+ if "_full_result" in item:
236
+ result = item["_full_result"]
237
+ # Remove temporary field
238
+ if "_full_result" in result:
239
+ del result["_full_result"]
240
+ else:
241
+ result = item
242
+
243
+ results.append(result)
244
+
245
+ return results
246
+
247
+ # If full content retrieval is enabled
248
+ if self.include_full_content and hasattr(self, "full_search"):
249
+ logger.info("Retrieving full webpage content")
250
+
251
+ try:
252
+ # Use FullSearchResults to get full content
253
+ results_with_content = self.full_search._get_full_content(
254
+ relevant_items
255
+ )
256
+
257
+ return results_with_content
258
+
259
+ except Exception:
260
+ logger.exception("Error retrieving full content")
261
+ # Fall back to returning the items without full content
262
+
263
+ # Return items with their full Tavily information
264
+ results = []
265
+ for item in relevant_items:
266
+ # Use the full result if available, otherwise use the preview
267
+ if "_full_result" in item:
268
+ result = item["_full_result"].copy()
269
+
270
+ # If Tavily provided raw_content, include it
271
+ if "raw_content" in result and self.include_full_content:
272
+ result["content"] = result.get(
273
+ "raw_content", result.get("content", "")
274
+ )
275
+
276
+ # Remove temporary field
277
+ if "_full_result" in result:
278
+ del result["_full_result"]
279
+ else:
280
+ result = item.copy()
281
+ if "_full_result" in result:
282
+ del result["_full_result"]
283
+
284
+ results.append(result)
285
+
286
+ return results
287
+
288
+ def run(self, query: str) -> List[Dict[str, Any]]:
289
+ """
290
+ Execute a search using Tavily with the two-phase approach.
291
+
292
+ Args:
293
+ query: The search query
294
+
295
+ Returns:
296
+ List of search results
297
+ """
298
+ logger.info("---Execute a search using Tavily---")
299
+
300
+ # Use the implementation from the parent class which handles all phases
301
+ results = super().run(query)
302
+
303
+ # Clean up
304
+ if hasattr(self, "_search_results"):
305
+ del self._search_results
306
+
307
+ return results
@@ -0,0 +1,14 @@
1
+ """
2
+ Adaptive rate limiting module for search engines.
3
+ """
4
+
5
+ from .exceptions import RateLimitError, AdaptiveRetryError, RateLimitConfigError
6
+ from .tracker import AdaptiveRateLimitTracker, get_tracker
7
+
8
+ __all__ = [
9
+ "RateLimitError",
10
+ "AdaptiveRetryError",
11
+ "RateLimitConfigError",
12
+ "AdaptiveRateLimitTracker",
13
+ "get_tracker",
14
+ ]
@@ -0,0 +1,9 @@
1
+ """
2
+ Entry point for rate limiting CLI module.
3
+ Allows running: python -m local_deep_research.web_search_engines.rate_limiting
4
+ """
5
+
6
+ from .cli import main
7
+
8
+ if __name__ == "__main__":
9
+ main()