local-deep-research 0.5.7__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. local_deep_research/__version__.py +1 -1
  2. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +11 -1
  3. local_deep_research/advanced_search_system/questions/browsecomp_question.py +32 -6
  4. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +33 -8
  5. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +2 -0
  6. local_deep_research/api/__init__.py +2 -0
  7. local_deep_research/api/research_functions.py +177 -3
  8. local_deep_research/benchmarks/graders.py +150 -5
  9. local_deep_research/benchmarks/models/__init__.py +19 -0
  10. local_deep_research/benchmarks/models/benchmark_models.py +283 -0
  11. local_deep_research/benchmarks/ui/__init__.py +1 -0
  12. local_deep_research/benchmarks/web_api/__init__.py +6 -0
  13. local_deep_research/benchmarks/web_api/benchmark_routes.py +862 -0
  14. local_deep_research/benchmarks/web_api/benchmark_service.py +920 -0
  15. local_deep_research/config/llm_config.py +106 -21
  16. local_deep_research/defaults/default_settings.json +448 -3
  17. local_deep_research/error_handling/report_generator.py +10 -0
  18. local_deep_research/llm/__init__.py +19 -0
  19. local_deep_research/llm/llm_registry.py +155 -0
  20. local_deep_research/metrics/db_models.py +3 -7
  21. local_deep_research/metrics/search_tracker.py +25 -11
  22. local_deep_research/report_generator.py +3 -2
  23. local_deep_research/search_system.py +12 -9
  24. local_deep_research/utilities/log_utils.py +23 -10
  25. local_deep_research/utilities/thread_context.py +99 -0
  26. local_deep_research/web/app_factory.py +32 -8
  27. local_deep_research/web/database/benchmark_schema.py +230 -0
  28. local_deep_research/web/database/convert_research_id_to_string.py +161 -0
  29. local_deep_research/web/database/models.py +55 -1
  30. local_deep_research/web/database/schema_upgrade.py +397 -2
  31. local_deep_research/web/database/uuid_migration.py +265 -0
  32. local_deep_research/web/routes/api_routes.py +62 -31
  33. local_deep_research/web/routes/history_routes.py +13 -6
  34. local_deep_research/web/routes/metrics_routes.py +264 -4
  35. local_deep_research/web/routes/research_routes.py +45 -18
  36. local_deep_research/web/routes/route_registry.py +352 -0
  37. local_deep_research/web/routes/settings_routes.py +382 -22
  38. local_deep_research/web/services/research_service.py +22 -29
  39. local_deep_research/web/services/settings_manager.py +53 -0
  40. local_deep_research/web/services/settings_service.py +2 -0
  41. local_deep_research/web/static/css/styles.css +8 -0
  42. local_deep_research/web/static/js/components/detail.js +7 -14
  43. local_deep_research/web/static/js/components/details.js +8 -10
  44. local_deep_research/web/static/js/components/fallback/ui.js +4 -4
  45. local_deep_research/web/static/js/components/history.js +6 -6
  46. local_deep_research/web/static/js/components/logpanel.js +14 -11
  47. local_deep_research/web/static/js/components/progress.js +51 -46
  48. local_deep_research/web/static/js/components/research.js +250 -89
  49. local_deep_research/web/static/js/components/results.js +5 -7
  50. local_deep_research/web/static/js/components/settings.js +32 -26
  51. local_deep_research/web/static/js/components/settings_sync.js +24 -23
  52. local_deep_research/web/static/js/config/urls.js +285 -0
  53. local_deep_research/web/static/js/main.js +8 -8
  54. local_deep_research/web/static/js/research_form.js +267 -12
  55. local_deep_research/web/static/js/services/api.js +18 -18
  56. local_deep_research/web/static/js/services/keyboard.js +8 -8
  57. local_deep_research/web/static/js/services/socket.js +53 -35
  58. local_deep_research/web/static/js/services/ui.js +1 -1
  59. local_deep_research/web/templates/base.html +4 -1
  60. local_deep_research/web/templates/components/custom_dropdown.html +5 -3
  61. local_deep_research/web/templates/components/mobile_nav.html +3 -3
  62. local_deep_research/web/templates/components/sidebar.html +9 -3
  63. local_deep_research/web/templates/pages/benchmark.html +2697 -0
  64. local_deep_research/web/templates/pages/benchmark_results.html +1136 -0
  65. local_deep_research/web/templates/pages/benchmark_simple.html +453 -0
  66. local_deep_research/web/templates/pages/cost_analytics.html +1 -1
  67. local_deep_research/web/templates/pages/metrics.html +212 -39
  68. local_deep_research/web/templates/pages/research.html +8 -6
  69. local_deep_research/web/templates/pages/star_reviews.html +1 -1
  70. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +14 -1
  71. local_deep_research/web_search_engines/engines/search_engine_brave.py +15 -1
  72. local_deep_research/web_search_engines/engines/search_engine_ddg.py +20 -1
  73. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +26 -2
  74. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +15 -1
  75. local_deep_research/web_search_engines/engines/search_engine_retriever.py +192 -0
  76. local_deep_research/web_search_engines/engines/search_engine_tavily.py +307 -0
  77. local_deep_research/web_search_engines/rate_limiting/__init__.py +14 -0
  78. local_deep_research/web_search_engines/rate_limiting/__main__.py +9 -0
  79. local_deep_research/web_search_engines/rate_limiting/cli.py +209 -0
  80. local_deep_research/web_search_engines/rate_limiting/exceptions.py +21 -0
  81. local_deep_research/web_search_engines/rate_limiting/tracker.py +506 -0
  82. local_deep_research/web_search_engines/retriever_registry.py +108 -0
  83. local_deep_research/web_search_engines/search_engine_base.py +161 -43
  84. local_deep_research/web_search_engines/search_engine_factory.py +14 -0
  85. local_deep_research/web_search_engines/search_engines_config.py +20 -0
  86. local_deep_research-0.6.0.dist-info/METADATA +374 -0
  87. {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/RECORD +90 -65
  88. local_deep_research-0.5.7.dist-info/METADATA +0 -420
  89. {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/WHEEL +0 -0
  90. {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/entry_points.txt +0 -0
  91. {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -6,10 +6,28 @@ from typing import Any, Dict, List, Optional
6
6
 
7
7
  from langchain_core.language_models import BaseLLM
8
8
  from loguru import logger
9
+ from tenacity import (
10
+ retry,
11
+ stop_after_attempt,
12
+ retry_if_exception_type,
13
+ RetryError,
14
+ )
15
+ from tenacity.wait import wait_base
9
16
 
10
17
  from ..advanced_search_system.filters.base_filter import BaseFilter
11
18
  from ..metrics.search_tracker import get_search_tracker
12
19
  from ..utilities.db_utils import get_db_setting
20
+ from .rate_limiting import RateLimitError, get_tracker
21
+
22
+
23
+ class AdaptiveWait(wait_base):
24
+ """Custom wait strategy that uses adaptive rate limiting."""
25
+
26
+ def __init__(self, get_wait_func):
27
+ self.get_wait_func = get_wait_func
28
+
29
+ def __call__(self, retry_state):
30
+ return self.get_wait_func()
13
31
 
14
32
 
15
33
  class BaseSearchEngine(ABC):
@@ -59,6 +77,12 @@ class BaseSearchEngine(ABC):
59
77
  1, int(max_results)
60
78
  ) # Ensure it's a positive integer
61
79
 
80
+ # Rate limiting attributes
81
+ self.engine_type = self.__class__.__name__
82
+ self.rate_tracker = get_tracker()
83
+ self._last_wait_time = None
84
+ self._last_results_count = 0
85
+
62
86
  @property
63
87
  def max_filtered_results(self) -> int:
64
88
  """Get the maximum number of filtered results."""
@@ -84,7 +108,32 @@ class BaseSearchEngine(ABC):
84
108
  value = 10
85
109
  self._max_results = max(1, int(value))
86
110
 
87
- def run(self, query: str) -> List[Dict[str, Any]]:
111
+ def _get_adaptive_wait(self) -> float:
112
+ """Get adaptive wait time from tracker."""
113
+ wait_time = self.rate_tracker.get_wait_time(self.engine_type)
114
+ self._last_wait_time = wait_time
115
+ logger.debug(
116
+ f"{self.engine_type} waiting {wait_time:.2f}s before retry"
117
+ )
118
+ return wait_time
119
+
120
+ def _record_retry_outcome(self, retry_state) -> None:
121
+ """Record outcome after retry completes."""
122
+ success = (
123
+ not retry_state.outcome.failed if retry_state.outcome else False
124
+ )
125
+ self.rate_tracker.record_outcome(
126
+ self.engine_type,
127
+ self._last_wait_time or 0,
128
+ success,
129
+ retry_state.attempt_number,
130
+ error_type="RateLimitError" if not success else None,
131
+ search_result_count=self._last_results_count if success else 0,
132
+ )
133
+
134
+ def run(
135
+ self, query: str, research_context: Dict[str, Any] = None
136
+ ) -> List[Dict[str, Any]]:
88
137
  """
89
138
  Run the search engine with a given query, retrieving and filtering results.
90
139
  This implements a two-phase retrieval approach:
@@ -100,6 +149,19 @@ class BaseSearchEngine(ABC):
100
149
  """
101
150
  # Track search call for metrics
102
151
  tracker = get_search_tracker()
152
+
153
+ # For thread-safe context propagation: if we have research_context parameter, use it
154
+ # Otherwise, try to inherit from current thread context (normal case)
155
+ # This allows strategies running in threads to explicitly pass context when needed
156
+ current_context = tracker._get_research_context()
157
+ if research_context:
158
+ # Explicit context provided - use it and set it for this thread
159
+ tracker.set_research_context(research_context)
160
+ elif not current_context.get("research_id"):
161
+ # No context in current thread and none provided - try to get from main thread
162
+ # This handles the case where we're in a worker thread without context
163
+ pass # Will use empty context, research_id will be None
164
+
103
165
  engine_name = self.__class__.__name__.replace(
104
166
  "SearchEngine", ""
105
167
  ).lower()
@@ -109,58 +171,114 @@ class BaseSearchEngine(ABC):
109
171
  error_message = None
110
172
  results_count = 0
111
173
 
112
- try:
113
- # Step 1: Get preview information for items
114
- previews = self._get_previews(query)
115
- if not previews:
116
- logger.info(
117
- f"Search engine {self.__class__.__name__} returned no preview results for query: {query}"
118
- )
119
- results_count = 0
120
- return []
174
+ # Define the core search function with retry logic
175
+ if self.rate_tracker.enabled:
176
+ # Rate limiting enabled - use retry with adaptive wait
177
+ @retry(
178
+ stop=stop_after_attempt(3),
179
+ wait=AdaptiveWait(lambda: self._get_adaptive_wait()),
180
+ retry=retry_if_exception_type((RateLimitError,)),
181
+ after=self._record_retry_outcome,
182
+ reraise=True,
183
+ )
184
+ def _run_with_retry():
185
+ nonlocal success, error_message, results_count
186
+ return _execute_search()
187
+ else:
188
+ # Rate limiting disabled - run without retry
189
+ def _run_with_retry():
190
+ nonlocal success, error_message, results_count
191
+ return _execute_search()
192
+
193
+ def _execute_search():
194
+ nonlocal success, error_message, results_count
195
+
196
+ try:
197
+ # Step 1: Get preview information for items
198
+ previews = self._get_previews(query)
199
+ if not previews:
200
+ logger.info(
201
+ f"Search engine {self.__class__.__name__} returned no preview results for query: {query}"
202
+ )
203
+ results_count = 0
204
+ return []
121
205
 
122
- for preview_filter in self._preview_filters:
123
- previews = preview_filter.filter_results(previews, query)
206
+ for preview_filter in self._preview_filters:
207
+ previews = preview_filter.filter_results(previews, query)
124
208
 
125
- # Step 2: Filter previews for relevance with LLM
126
- # TEMPORARILY DISABLED: Skip LLM relevance filtering
127
- filtered_items = previews
128
- logger.info(
129
- f"LLM relevance filtering disabled - returning all {len(previews)} previews"
130
- )
209
+ # Step 2: Filter previews for relevance with LLM
210
+ # TEMPORARILY DISABLED: Skip LLM relevance filtering
211
+ filtered_items = previews
212
+ logger.info(
213
+ f"LLM relevance filtering disabled - returning all {len(previews)} previews"
214
+ )
131
215
 
132
- # # Original filtering code (disabled):
133
- # filtered_items = self._filter_for_relevance(previews, query)
134
- # if not filtered_items:
135
- # logger.info(
136
- # f"All preview results were filtered out as irrelevant for query: {query}"
137
- # )
138
- # # Do not fall back to previews, return empty list instead
139
- # results_count = 0
140
- # return []
141
-
142
- # Step 3: Get full content for filtered items
143
- # Import config inside the method to avoid circular import
144
-
145
- if get_db_setting("search.snippets_only", True):
146
- logger.info("Returning snippet-only results as per config")
147
- results = filtered_items
148
- else:
149
- results = self._get_full_content(filtered_items)
216
+ # Step 3: Get full content for filtered items
217
+ if get_db_setting("search.snippets_only", True):
218
+ logger.info("Returning snippet-only results as per config")
219
+ results = filtered_items
220
+ else:
221
+ results = self._get_full_content(filtered_items)
222
+
223
+ for content_filter in self._content_filters:
224
+ results = content_filter.filter_results(results, query)
225
+
226
+ results_count = len(results)
227
+ self._last_results_count = results_count
228
+
229
+ # Record success if we get here and rate limiting is enabled
230
+ if (
231
+ self.rate_tracker.enabled
232
+ and self._last_wait_time is not None
233
+ ):
234
+ self.rate_tracker.record_outcome(
235
+ self.engine_type,
236
+ self._last_wait_time,
237
+ success=True,
238
+ retry_count=1, # First attempt succeeded
239
+ search_result_count=results_count,
240
+ )
150
241
 
151
- for content_filter in self._content_filters:
152
- results = content_filter.filter_results(results, query)
242
+ return results
153
243
 
154
- results_count = len(results)
155
- return results
244
+ except RateLimitError:
245
+ # Only re-raise if rate limiting is enabled
246
+ if self.rate_tracker.enabled:
247
+ raise
248
+ else:
249
+ # If rate limiting is disabled, treat as regular error
250
+ success = False
251
+ error_message = "Rate limit hit but rate limiting disabled"
252
+ logger.warning(
253
+ f"Rate limit hit on {self.__class__.__name__} but rate limiting is disabled"
254
+ )
255
+ results_count = 0
256
+ return []
257
+ except Exception as e:
258
+ # Other errors - don't retry
259
+ success = False
260
+ error_message = str(e)
261
+ logger.exception(
262
+ f"Search engine {self.__class__.__name__} failed"
263
+ )
264
+ results_count = 0
265
+ return []
156
266
 
267
+ try:
268
+ return _run_with_retry()
269
+ except RetryError as e:
270
+ # All retries exhausted
271
+ success = False
272
+ error_message = f"Rate limited after all retries: {e}"
273
+ logger.exception(
274
+ f"{self.__class__.__name__} failed after all retries"
275
+ )
276
+ return []
157
277
  except Exception as e:
158
278
  success = False
159
279
  error_message = str(e)
160
- logger.error(f"Search engine {self.__class__.__name__} failed: {e}")
161
- results_count = 0
280
+ logger.exception(f"Search engine {self.__class__.__name__} error")
162
281
  return []
163
-
164
282
  finally:
165
283
  # Record search metrics
166
284
  response_time_ms = int((time.time() - start_time) * 1000)
@@ -8,6 +8,7 @@ from loguru import logger
8
8
  from ..utilities.db_utils import get_db_setting
9
9
  from .search_engine_base import BaseSearchEngine
10
10
  from .search_engines_config import default_search_engine, search_config
11
+ from .retriever_registry import retriever_registry
11
12
 
12
13
 
13
14
  def create_search_engine(
@@ -24,6 +25,18 @@ def create_search_engine(
24
25
  Returns:
25
26
  Initialized search engine instance or None if creation failed
26
27
  """
28
+ # Check if this is a registered retriever first
29
+ retriever = retriever_registry.get(engine_name)
30
+ if retriever:
31
+ logger.info(f"Using registered LangChain retriever: {engine_name}")
32
+ from .engines.search_engine_retriever import RetrieverSearchEngine
33
+
34
+ return RetrieverSearchEngine(
35
+ retriever=retriever,
36
+ name=engine_name,
37
+ max_results=kwargs.get("max_results", 10),
38
+ )
39
+
27
40
  # If engine name not found, use default
28
41
  if engine_name not in search_config():
29
42
  logger.warning(
@@ -313,6 +326,7 @@ def get_search(
313
326
  logger.info(
314
327
  f"Creating search engine for tool: {search_tool} with params: {params.keys()}"
315
328
  )
329
+
316
330
  engine = create_search_engine(search_tool, **params)
317
331
 
318
332
  # Add debugging to check if engine is None
@@ -57,6 +57,26 @@ def search_config() -> Dict[str, Any]:
57
57
  search_engines = _extract_per_engine_config(config_data)
58
58
  search_engines["auto"] = get_db_setting("search.engine.auto", {})
59
59
 
60
+ # Add registered retrievers as available search engines
61
+ from .retriever_registry import retriever_registry
62
+
63
+ for name in retriever_registry.list_registered():
64
+ search_engines[name] = {
65
+ "module_path": ".engines.search_engine_retriever",
66
+ "class_name": "RetrieverSearchEngine",
67
+ "requires_api_key": False,
68
+ "requires_llm": False,
69
+ "description": f"LangChain retriever: {name}",
70
+ "strengths": [
71
+ "Domain-specific knowledge",
72
+ "No rate limits",
73
+ "Fast retrieval",
74
+ ],
75
+ "weaknesses": ["Limited to indexed content"],
76
+ "supports_full_search": True,
77
+ "is_retriever": True, # Mark as retriever for identification
78
+ }
79
+
60
80
  logger.info(
61
81
  f"Loaded {len(search_engines)} search engines from configuration file"
62
82
  )
@@ -0,0 +1,374 @@
1
+ Metadata-Version: 2.1
2
+ Name: local-deep-research
3
+ Version: 0.6.0
4
+ Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
5
+ Author-Email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>, djpetti <djpetti@gmail.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 LearningCircuit
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Classifier: Programming Language :: Python :: 3
29
+ Classifier: License :: OSI Approved :: MIT License
30
+ Classifier: Operating System :: OS Independent
31
+ Project-URL: Homepage, https://github.com/LearningCircuit/local-deep-research
32
+ Project-URL: Bug Tracker, https://github.com/LearningCircuit/local-deep-research/issues
33
+ Requires-Python: >=3.10
34
+ Requires-Dist: langchain>=0.3.18
35
+ Requires-Dist: langchain-community>=0.3.17
36
+ Requires-Dist: langchain-core>=0.3.34
37
+ Requires-Dist: langchain-ollama>=0.2.3
38
+ Requires-Dist: langchain-openai>=0.3.5
39
+ Requires-Dist: langchain-anthropic>=0.3.13
40
+ Requires-Dist: duckduckgo_search>=7.3.2
41
+ Requires-Dist: python-dateutil>=2.9.0
42
+ Requires-Dist: typing_extensions>=4.12.2
43
+ Requires-Dist: justext
44
+ Requires-Dist: playwright
45
+ Requires-Dist: beautifulsoup4
46
+ Requires-Dist: flask>=3.1.0
47
+ Requires-Dist: flask-cors>=3.0.10
48
+ Requires-Dist: flask-socketio>=5.1.1
49
+ Requires-Dist: sqlalchemy>=1.4.23
50
+ Requires-Dist: wikipedia
51
+ Requires-Dist: arxiv>=1.4.3
52
+ Requires-Dist: pypdf
53
+ Requires-Dist: sentence-transformers
54
+ Requires-Dist: faiss-cpu
55
+ Requires-Dist: pydantic>=2.0.0
56
+ Requires-Dist: pydantic-settings>=2.0.0
57
+ Requires-Dist: toml>=0.10.2
58
+ Requires-Dist: platformdirs>=3.0.0
59
+ Requires-Dist: dynaconf
60
+ Requires-Dist: requests>=2.28.0
61
+ Requires-Dist: tiktoken>=0.4.0
62
+ Requires-Dist: xmltodict>=0.13.0
63
+ Requires-Dist: lxml>=4.9.2
64
+ Requires-Dist: pdfplumber>=0.9.0
65
+ Requires-Dist: unstructured>=0.10.0
66
+ Requires-Dist: google-search-results
67
+ Requires-Dist: importlib-resources>=6.5.2
68
+ Requires-Dist: setuptools>=78.1.0
69
+ Requires-Dist: flask-wtf>=1.2.2
70
+ Requires-Dist: optuna>=4.3.0
71
+ Requires-Dist: elasticsearch==8.14.0
72
+ Requires-Dist: methodtools>=0.4.7
73
+ Requires-Dist: loguru>=0.7.3
74
+ Requires-Dist: cachetools>=5.5.2
75
+ Requires-Dist: matplotlib>=3.10.3
76
+ Requires-Dist: pandas>=2.2.3
77
+ Requires-Dist: plotly>=6.0.1
78
+ Requires-Dist: kaleido==0.2.1
79
+ Requires-Dist: aiohttp>=3.9.0
80
+ Requires-Dist: tenacity>=8.0.0
81
+ Description-Content-Type: text/markdown
82
+
83
+ # Local Deep Research
84
+
85
+ <div align="center">
86
+
87
+ [![GitHub stars](https://img.shields.io/github/stars/LearningCircuit/local-deep-research?style=for-the-badge)](https://github.com/LearningCircuit/local-deep-research/stargazers)
88
+ [![Docker Pulls](https://img.shields.io/docker/pulls/localdeepresearch/local-deep-research?style=for-the-badge)](https://hub.docker.com/r/localdeepresearch/local-deep-research)
89
+ [![PyPI Downloads](https://img.shields.io/pypi/dm/local-deep-research?style=for-the-badge)](https://pypi.org/project/local-deep-research/)
90
+
91
+ [![Tests](https://img.shields.io/github/actions/workflow/status/LearningCircuit/local-deep-research/tests.yml?branch=main&style=for-the-badge&label=Tests)](https://github.com/LearningCircuit/local-deep-research/actions/workflows/tests.yml)
92
+ [![CodeQL](https://img.shields.io/github/actions/workflow/status/LearningCircuit/local-deep-research/codeql.yml?branch=main&style=for-the-badge&label=CodeQL)](https://github.com/LearningCircuit/local-deep-research/security/code-scanning)
93
+
94
+ [![Discord](https://img.shields.io/discord/1352043059562680370?style=for-the-badge&logo=discord)](https://discord.gg/ttcqQeFcJ3)
95
+ [![Reddit](https://img.shields.io/badge/Reddit-r/LocalDeepResearch-FF4500?style=for-the-badge&logo=reddit)](https://www.reddit.com/r/LocalDeepResearch/)
96
+
97
+
98
+ **AI-powered research assistant for deep, iterative research**
99
+
100
+ *Performs deep, iterative research using multiple LLMs and search engines with proper citations*
101
+ </div>
102
+ ## 🚀 What is Local Deep Research?
103
+
104
+ LDR is an AI research assistant that performs systematic research by:
105
+
106
+ - **Breaking down complex questions** into focused sub-queries
107
+ - **Searching multiple sources** in parallel (web, academic papers, local documents)
108
+ - **Verifying information** across sources for accuracy
109
+ - **Creating comprehensive reports** with proper citations
110
+
111
+ It aims to help researchers, students, and professionals find accurate information quickly while maintaining transparency about sources.
112
+
113
+ ## 🎯 Why Choose LDR?
114
+
115
+ - **Privacy-Focused**: Run entirely locally with Ollama + SearXNG
116
+ - **Flexible**: Use any LLM, any search engine, any vector store
117
+ - **Comprehensive**: Multiple research modes from quick summaries to detailed reports
118
+ - **Transparent**: Track costs and performance with built-in analytics
119
+ - **Open Source**: MIT licensed with an active community
120
+
121
+ ## ✨ Key Features
122
+
123
+ ### 🔍 Research Modes
124
+ - **Quick Summary** - Get answers in 30 seconds to 3 minutes with citations
125
+ - **Detailed Research** - Comprehensive analysis with structured findings
126
+ - **Report Generation** - Professional reports with sections and table of contents
127
+ - **Document Analysis** - Search your private documents with AI
128
+
129
+ ### 🛠️ Advanced Capabilities
130
+ - **[LangChain Integration](docs/LANGCHAIN_RETRIEVER_INTEGRATION.md)** - Use any vector store as a search engine
131
+ - **[REST API](docs/api-quickstart.md)** - Language-agnostic HTTP access
132
+ - **[Benchmarking](docs/BENCHMARKING.md)** - Test and optimize your configuration
133
+ - **[Analytics Dashboard](docs/analytics-dashboard.md)** - Track costs, performance, and usage metrics
134
+ - **Real-time Updates** - WebSocket support for live research progress
135
+ - **Export Options** - Download results as PDF or Markdown
136
+ - **Research History** - Save, search, and revisit past research
137
+ - **Adaptive Rate Limiting** - Intelligent retry system that learns optimal wait times
138
+ - **Keyboard Shortcuts** - Navigate efficiently (ESC, Ctrl+Shift+1-5)
139
+
140
+ ### 🌐 Search Sources
141
+
142
+ #### Free Search Engines
143
+ - **Academic**: arXiv, PubMed, Semantic Scholar
144
+ - **General**: Wikipedia, SearXNG, DuckDuckGo
145
+ - **Technical**: GitHub, Elasticsearch
146
+ - **Historical**: Wayback Machine
147
+ - **News**: The Guardian
148
+
149
+ #### Premium Search Engines
150
+ - **Tavily** - AI-powered search
151
+ - **Google** - Via SerpAPI or Programmable Search Engine
152
+ - **Brave Search** - Privacy-focused web search
153
+
154
+ #### Custom Sources
155
+ - **Local Documents** - Search your files with AI
156
+ - **LangChain Retrievers** - Any vector store or database
157
+ - **Meta Search** - Combine multiple engines intelligently
158
+
159
+ [Full Search Engines Guide →](docs/search-engines.md)
160
+
161
+ ## ⚡ Quick Start
162
+
163
+ ### Option 1: Docker (Quickstart no MAC/ARM)
164
+
165
+ ```bash
166
+ # Step 1: Pull and run SearXNG for optimal search results
167
+ docker run -d -p 8080:8080 --name searxng searxng/searxng
168
+
169
+ # Step 2: Pull and run Local Deep Research (Please build your own docker on ARM)
170
+ docker run -d -p 5000:5000 --name local-deep-research --volume 'deep-research:/install/.venv/lib/python3.13/site-packages/data/' localdeepresearch/local-deep-research
171
+ ```
172
+
173
+ ### Option 2: Docker Compose (Recommended)
174
+
175
+ LDR uses Docker compose to bundle the web app and all it's dependencies so
176
+ you can get up and running quickly.
177
+
178
+ #### Option 2a: DIY docker-compose
179
+ See [docker-compose.yml](./docker-compose.yml) for a docker-compose file with reasonable defaults to get up and running with ollama, searxng, and local deep research all running locally.
180
+
181
+ Things you may want/need to configure:
182
+ * Ollama GPU driver
183
+ * Ollama context length (depends on available VRAM)
184
+ * Ollama keep alive (duration model will stay loaded into VRAM and idle before getting unloaded automatically)
185
+ * Deep Research model (depends on available VRAM and preference)
186
+
187
+ #### Option 2b: Use Cookie Cutter to tailor a docker-compose to your needs:
188
+
189
+ ##### Prerequisites
190
+
191
+ - [Docker](https://docs.docker.com/engine/install/)
192
+ - [Docker Compose](https://docs.docker.com/compose/install/)
193
+ - `cookiecutter`: Run `pip install --user cookiecutter`
194
+
195
+ Clone the repository:
196
+
197
+ ```bash
198
+ git clone https://github.com/LearningCircuit/local-deep-research.git
199
+ cd local-deep-research
200
+ ```
201
+
202
+ ### Configuring with Docker Compose
203
+
204
+ Cookiecutter will interactively guide you through the process of creating a
205
+ `docker-compose` configuration that meets your specific needs. This is the
206
+ recommended approach if you are not very familiar with Docker.
207
+
208
+ In the LDR repository, run the following command
209
+ to generate the compose file:
210
+
211
+ ```bash
212
+ cookiecutter cookiecutter-docker/
213
+ docker compose -f docker-compose.default.yml up
214
+ ```
215
+
216
+ [Docker Compose Guide →](docs/docker-compose-guide.md)
217
+
218
+ ### Option 3: Python Package
219
+
220
+ ```bash
221
+ # Step 1: Install the package
222
+ pip install local-deep-research
223
+
224
+ # Step 2: Setup SearXNG for best results
225
+ docker pull searxng/searxng
226
+ docker run -d -p 8080:8080 --name searxng searxng/searxng
227
+
228
+ # Step 3: Install Ollama from https://ollama.ai
229
+
230
+ # Step 4: Download a model
231
+ ollama pull gemma3:12b
232
+
233
+ # Step 5: Start the web interface
234
+ python -m local_deep_research.web.app
235
+ ```
236
+
237
+ [Full Installation Guide →](https://github.com/LearningCircuit/local-deep-research/wiki/Installation)
238
+
239
+ ## 💻 Usage Examples
240
+
241
+ ### Python API
242
+ ```python
243
+ from local_deep_research.api import quick_summary
244
+
245
+ # Simple usage
246
+ result = quick_summary("What are the latest advances in quantum computing?")
247
+ print(result["summary"])
248
+
249
+ # Advanced usage with custom configuration
250
+ result = quick_summary(
251
+ query="Impact of AI on healthcare",
252
+ search_tool="searxng",
253
+ search_strategy="focused-iteration",
254
+ iterations=2
255
+ )
256
+ ```
257
+
258
+ ### HTTP API
259
+ ```bash
260
+ curl -X POST http://localhost:5000/api/v1/quick_summary \
261
+ -H "Content-Type: application/json" \
262
+ -d '{"query": "Explain CRISPR gene editing"}'
263
+ ```
264
+
265
+ [More Examples →](examples/api_usage/)
266
+
267
+ ### Command Line Tools
268
+
269
+ ```bash
270
+ # Run benchmarks from CLI
271
+ python -m local_deep_research.benchmarks --dataset simpleqa --examples 50
272
+
273
+ # Manage rate limiting
274
+ python -m local_deep_research.web_search_engines.rate_limiting status
275
+ python -m local_deep_research.web_search_engines.rate_limiting reset
276
+ ```
277
+
278
+ ## 🔗 Enterprise Integration
279
+
280
+ Connect LDR to your existing knowledge base:
281
+
282
+ ```python
283
+ from local_deep_research.api import quick_summary
284
+
285
+ # Use your existing LangChain retriever
286
+ result = quick_summary(
287
+ query="What are our deployment procedures?",
288
+ retrievers={"company_kb": your_retriever},
289
+ search_tool="company_kb"
290
+ )
291
+ ```
292
+
293
+ Works with: FAISS, Chroma, Pinecone, Weaviate, Elasticsearch, and any LangChain-compatible retriever.
294
+
295
+ [Integration Guide →](docs/LANGCHAIN_RETRIEVER_INTEGRATION.md)
296
+
297
+ ## 📊 Performance & Analytics
298
+
299
+ ### Benchmark Results
300
+ Early experiments on small SimpleQA dataset samples:
301
+
302
+ | Configuration | Accuracy | Notes |
303
+ |--------------|----------|--------|
304
+ | gpt-4.1-mini + SearXNG + focused_iteration | 90-95% | Limited sample size |
305
+ | gpt-4.1-mini + Tavily | Up to 95% | Limited sample size |
306
+ | gemini-2.0-flash-001 + SearXNG | 82% | Single test run |
307
+
308
+ Note: These are preliminary results from initial testing. Performance varies significantly based on query types, model versions, and configurations. [Run your own benchmarks →](docs/BENCHMARKING.md)
309
+
310
+ ### Built-in Analytics Dashboard
311
+ Track costs, performance, and usage with detailed metrics. [Learn more →](docs/analytics-dashboard.md)
312
+
313
+ ## 🤖 Supported LLMs
314
+
315
+ ### Local Models (via Ollama)
316
+ - Llama 3, Mistral, Gemma, DeepSeek
317
+ - LLM processing stays local (search queries still go to web)
318
+ - No API costs
319
+
320
+ ### Cloud Models
321
+ - OpenAI (GPT-4, GPT-3.5)
322
+ - Anthropic (Claude 3)
323
+ - Google (Gemini)
324
+ - 100+ models via OpenRouter
325
+
326
+ [Model Setup →](docs/env_configuration.md)
327
+
328
+ ## 📚 Documentation
329
+
330
+ ### Getting Started
331
+ - [Installation Guide](https://github.com/LearningCircuit/local-deep-research/wiki/Installation)
332
+ - [Frequently Asked Questions](docs/faq.md)
333
+ - [API Quickstart](docs/api-quickstart.md)
334
+ - [Configuration Guide](docs/env_configuration.md)
335
+
336
+ ### Core Features
337
+ - [All Features Guide](docs/features.md)
338
+ - [Search Engines Guide](docs/search-engines.md)
339
+ - [Analytics Dashboard](docs/analytics-dashboard.md)
340
+
341
+ ### Advanced Features
342
+ - [LangChain Integration](docs/LANGCHAIN_RETRIEVER_INTEGRATION.md)
343
+ - [Benchmarking System](docs/BENCHMARKING.md)
344
+ - [Elasticsearch Setup](docs/elasticsearch_search_engine.md)
345
+ - [SearXNG Setup](docs/SearXNG-Setup.md)
346
+
347
+ ### Development
348
+ - [Docker Compose Guide](docs/docker-compose-guide.md)
349
+ - [Development Guide](docs/developing.md)
350
+ - [Security Guide](docs/security/CODEQL_GUIDE.md)
351
+ - [Release Guide](docs/RELEASE_GUIDE.md)
352
+
353
+ ### Examples & Tutorials
354
+ - [API Examples](examples/api_usage/)
355
+ - [Benchmark Examples](examples/benchmarks/)
356
+ - [Optimization Examples](examples/optimization/)
357
+
358
+ ## 🤝 Community & Support
359
+
360
+ - [Discord](https://discord.gg/ttcqQeFcJ3) - Get help and share research techniques
361
+ - [Reddit](https://www.reddit.com/r/LocalDeepResearch/) - Updates and showcases
362
+ - [GitHub Issues](https://github.com/LearningCircuit/local-deep-research/issues) - Bug reports
363
+
364
+ ## 🚀 Contributing
365
+
366
+ We welcome contributions! See our [Contributing Guide](CONTRIBUTING.md) to get started.
367
+
368
+ ## 📄 License
369
+
370
+ MIT License - see [LICENSE](LICENSE) file.
371
+
372
+ Built with: [LangChain](https://github.com/hwchase17/langchain), [Ollama](https://ollama.ai), [SearXNG](https://searxng.org/), [FAISS](https://github.com/facebookresearch/faiss)
373
+
374
+ > **Support Free Knowledge:** Consider donating to [Wikipedia](https://donate.wikimedia.org), [arXiv](https://arxiv.org/about/give), or [PubMed](https://www.nlm.nih.gov/pubs/donations/donations.html).