local-deep-research 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. local_deep_research/__init__.py +7 -0
  2. local_deep_research/__version__.py +1 -1
  3. local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
  4. local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
  5. local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
  6. local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
  7. local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
  8. local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
  9. local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
  10. local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
  11. local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
  12. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
  13. local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
  14. local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
  15. local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
  16. local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
  17. local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
  18. local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
  19. local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
  20. local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
  21. local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
  22. local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
  23. local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
  24. local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
  25. local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
  26. local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
  27. local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
  28. local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
  29. local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
  30. local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
  31. local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
  32. local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
  33. local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
  34. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
  35. local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
  36. local_deep_research/advanced_search_system/findings/repository.py +54 -17
  37. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
  38. local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
  39. local_deep_research/advanced_search_system/questions/__init__.py +16 -0
  40. local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
  41. local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
  42. local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
  43. local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
  44. local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
  45. local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
  46. local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
  47. local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
  48. local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
  49. local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
  50. local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
  51. local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
  52. local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
  53. local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
  54. local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
  55. local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
  56. local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
  57. local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
  58. local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
  59. local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
  60. local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
  61. local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
  62. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
  63. local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
  64. local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
  65. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
  66. local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
  67. local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
  68. local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
  69. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
  70. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
  71. local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
  72. local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
  73. local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
  74. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
  75. local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
  76. local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
  77. local_deep_research/api/benchmark_functions.py +6 -2
  78. local_deep_research/api/research_functions.py +10 -4
  79. local_deep_research/benchmarks/__init__.py +9 -7
  80. local_deep_research/benchmarks/benchmark_functions.py +6 -2
  81. local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
  82. local_deep_research/benchmarks/cli.py +38 -13
  83. local_deep_research/benchmarks/comparison/__init__.py +4 -2
  84. local_deep_research/benchmarks/comparison/evaluator.py +316 -239
  85. local_deep_research/benchmarks/datasets/__init__.py +1 -1
  86. local_deep_research/benchmarks/datasets/base.py +91 -72
  87. local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
  88. local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
  89. local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
  90. local_deep_research/benchmarks/datasets/utils.py +48 -29
  91. local_deep_research/benchmarks/datasets.py +4 -11
  92. local_deep_research/benchmarks/efficiency/__init__.py +8 -4
  93. local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
  94. local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
  95. local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
  96. local_deep_research/benchmarks/evaluators/composite.py +6 -2
  97. local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
  98. local_deep_research/benchmarks/graders.py +32 -10
  99. local_deep_research/benchmarks/metrics/README.md +1 -1
  100. local_deep_research/benchmarks/metrics/calculation.py +25 -10
  101. local_deep_research/benchmarks/metrics/reporting.py +7 -3
  102. local_deep_research/benchmarks/metrics/visualization.py +42 -23
  103. local_deep_research/benchmarks/metrics.py +1 -1
  104. local_deep_research/benchmarks/optimization/__init__.py +3 -1
  105. local_deep_research/benchmarks/optimization/api.py +7 -1
  106. local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
  107. local_deep_research/benchmarks/runners.py +48 -15
  108. local_deep_research/citation_handler.py +65 -92
  109. local_deep_research/citation_handlers/__init__.py +15 -0
  110. local_deep_research/citation_handlers/base_citation_handler.py +70 -0
  111. local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
  112. local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
  113. local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
  114. local_deep_research/config/llm_config.py +271 -169
  115. local_deep_research/config/search_config.py +14 -5
  116. local_deep_research/defaults/__init__.py +0 -1
  117. local_deep_research/metrics/__init__.py +13 -0
  118. local_deep_research/metrics/database.py +58 -0
  119. local_deep_research/metrics/db_models.py +115 -0
  120. local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
  121. local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
  122. local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
  123. local_deep_research/metrics/migrate_research_ratings.py +31 -0
  124. local_deep_research/metrics/models.py +61 -0
  125. local_deep_research/metrics/pricing/__init__.py +12 -0
  126. local_deep_research/metrics/pricing/cost_calculator.py +237 -0
  127. local_deep_research/metrics/pricing/pricing_cache.py +143 -0
  128. local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
  129. local_deep_research/metrics/query_utils.py +51 -0
  130. local_deep_research/metrics/search_tracker.py +380 -0
  131. local_deep_research/metrics/token_counter.py +1078 -0
  132. local_deep_research/migrate_db.py +3 -1
  133. local_deep_research/report_generator.py +22 -8
  134. local_deep_research/search_system.py +390 -9
  135. local_deep_research/test_migration.py +15 -5
  136. local_deep_research/utilities/db_utils.py +7 -4
  137. local_deep_research/utilities/es_utils.py +115 -104
  138. local_deep_research/utilities/llm_utils.py +15 -5
  139. local_deep_research/utilities/log_utils.py +151 -0
  140. local_deep_research/utilities/search_cache.py +387 -0
  141. local_deep_research/utilities/search_utilities.py +14 -6
  142. local_deep_research/utilities/threading_utils.py +92 -0
  143. local_deep_research/utilities/url_utils.py +6 -0
  144. local_deep_research/web/api.py +347 -0
  145. local_deep_research/web/app.py +13 -17
  146. local_deep_research/web/app_factory.py +71 -66
  147. local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
  148. local_deep_research/web/database/migrations.py +5 -3
  149. local_deep_research/web/database/models.py +51 -2
  150. local_deep_research/web/database/schema_upgrade.py +49 -29
  151. local_deep_research/web/models/database.py +51 -61
  152. local_deep_research/web/routes/api_routes.py +56 -22
  153. local_deep_research/web/routes/benchmark_routes.py +4 -1
  154. local_deep_research/web/routes/globals.py +22 -0
  155. local_deep_research/web/routes/history_routes.py +71 -46
  156. local_deep_research/web/routes/metrics_routes.py +1155 -0
  157. local_deep_research/web/routes/research_routes.py +227 -41
  158. local_deep_research/web/routes/settings_routes.py +156 -55
  159. local_deep_research/web/services/research_service.py +310 -103
  160. local_deep_research/web/services/resource_service.py +36 -11
  161. local_deep_research/web/services/settings_manager.py +55 -17
  162. local_deep_research/web/services/settings_service.py +12 -4
  163. local_deep_research/web/services/socket_service.py +295 -188
  164. local_deep_research/web/static/css/custom_dropdown.css +180 -0
  165. local_deep_research/web/static/css/styles.css +39 -1
  166. local_deep_research/web/static/js/components/detail.js +633 -267
  167. local_deep_research/web/static/js/components/details.js +751 -0
  168. local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
  169. local_deep_research/web/static/js/components/fallback/ui.js +23 -23
  170. local_deep_research/web/static/js/components/history.js +76 -76
  171. local_deep_research/web/static/js/components/logpanel.js +61 -13
  172. local_deep_research/web/static/js/components/progress.js +13 -2
  173. local_deep_research/web/static/js/components/research.js +99 -12
  174. local_deep_research/web/static/js/components/results.js +239 -106
  175. local_deep_research/web/static/js/main.js +40 -40
  176. local_deep_research/web/static/js/services/audio.js +1 -1
  177. local_deep_research/web/static/js/services/formatting.js +11 -11
  178. local_deep_research/web/static/js/services/keyboard.js +157 -0
  179. local_deep_research/web/static/js/services/pdf.js +80 -80
  180. local_deep_research/web/static/sounds/README.md +1 -1
  181. local_deep_research/web/templates/base.html +1 -0
  182. local_deep_research/web/templates/components/log_panel.html +7 -1
  183. local_deep_research/web/templates/components/mobile_nav.html +1 -1
  184. local_deep_research/web/templates/components/sidebar.html +3 -0
  185. local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
  186. local_deep_research/web/templates/pages/details.html +325 -24
  187. local_deep_research/web/templates/pages/history.html +1 -1
  188. local_deep_research/web/templates/pages/metrics.html +1929 -0
  189. local_deep_research/web/templates/pages/progress.html +2 -2
  190. local_deep_research/web/templates/pages/research.html +53 -17
  191. local_deep_research/web/templates/pages/results.html +12 -1
  192. local_deep_research/web/templates/pages/star_reviews.html +803 -0
  193. local_deep_research/web/utils/formatters.py +9 -3
  194. local_deep_research/web_search_engines/default_search_engines.py +5 -3
  195. local_deep_research/web_search_engines/engines/full_search.py +8 -2
  196. local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
  197. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
  198. local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
  199. local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
  200. local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
  201. local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
  202. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
  203. local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
  204. local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
  205. local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
  206. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
  207. local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
  208. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
  209. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
  210. local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
  211. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
  212. local_deep_research/web_search_engines/search_engine_base.py +83 -35
  213. local_deep_research/web_search_engines/search_engine_factory.py +25 -8
  214. local_deep_research/web_search_engines/search_engines_config.py +9 -3
  215. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/METADATA +7 -1
  216. local_deep_research-0.5.0.dist-info/RECORD +265 -0
  217. local_deep_research-0.4.4.dist-info/RECORD +0 -177
  218. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/WHEEL +0 -0
  219. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/entry_points.txt +0 -0
  220. {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -45,7 +45,9 @@ class SerpAPISearchEngine(BaseSearchEngine):
45
45
  """
46
46
  # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
47
47
  super().__init__(
48
- llm=llm, max_filtered_results=max_filtered_results, max_results=max_results
48
+ llm=llm,
49
+ max_filtered_results=max_filtered_results,
50
+ max_results=max_results,
49
51
  )
50
52
  self.include_full_content = include_full_content
51
53
 
@@ -68,7 +70,9 @@ class SerpAPISearchEngine(BaseSearchEngine):
68
70
 
69
71
  serpapi_api_key = api_key
70
72
  if not serpapi_api_key:
71
- serpapi_api_key = get_db_setting("search.engine.web.serpapi.api_key")
73
+ serpapi_api_key = get_db_setting(
74
+ "search.engine.web.serpapi.api_key"
75
+ )
72
76
 
73
77
  if not serpapi_api_key:
74
78
  raise ValueError(
@@ -126,13 +130,17 @@ class SerpAPISearchEngine(BaseSearchEngine):
126
130
 
127
131
  try:
128
132
  # Get search results from SerpAPI
129
- organic_results = self.engine.results(query).get("organic_results", [])
133
+ organic_results = self.engine.results(query).get(
134
+ "organic_results", []
135
+ )
130
136
 
131
137
  # Format results as previews
132
138
  previews = []
133
139
  for result in organic_results:
134
140
  preview = {
135
- "id": result.get("position", len(previews)), # Use position as ID
141
+ "id": result.get(
142
+ "position", len(previews)
143
+ ), # Use position as ID
136
144
  "title": result.get("title", ""),
137
145
  "link": result.get("link", ""),
138
146
  "snippet": result.get("snippet", ""),
@@ -43,7 +43,9 @@ class WaybackSearchEngine(BaseSearchEngine):
43
43
  """
44
44
  # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
45
45
  super().__init__(
46
- llm=llm, max_filtered_results=max_filtered_results, max_results=max_results
46
+ llm=llm,
47
+ max_filtered_results=max_filtered_results,
48
+ max_results=max_results,
47
49
  )
48
50
  self.max_snapshots_per_url = max_snapshots_per_url
49
51
  self.language = language
@@ -79,7 +81,9 @@ class WaybackSearchEngine(BaseSearchEngine):
79
81
  return [f"http://{query}"]
80
82
 
81
83
  # For non-URL queries, use DuckDuckGo to find relevant URLs
82
- logger.info("Query is not a URL, using DuckDuckGo to find relevant URLs")
84
+ logger.info(
85
+ "Query is not a URL, using DuckDuckGo to find relevant URLs"
86
+ )
83
87
  try:
84
88
  # Import DuckDuckGo search engine
85
89
  from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
@@ -91,9 +95,13 @@ class WaybackSearchEngine(BaseSearchEngine):
91
95
  results = ddg.results(query, url_search_limit)
92
96
 
93
97
  # Extract URLs from results
94
- ddg_urls = [result.get("link") for result in results if result.get("link")]
98
+ ddg_urls = [
99
+ result.get("link") for result in results if result.get("link")
100
+ ]
95
101
  if ddg_urls:
96
- logger.info(f"Found {len(ddg_urls)} URLs from DuckDuckGo search")
102
+ logger.info(
103
+ f"Found {len(ddg_urls)} URLs from DuckDuckGo search"
104
+ )
97
105
  return ddg_urls
98
106
  except Exception as e:
99
107
  logger.error(f"Error using DuckDuckGo for URL discovery: {e}")
@@ -185,12 +193,16 @@ class WaybackSearchEngine(BaseSearchEngine):
185
193
  snapshot = dict(zip(headers, item))
186
194
  timestamp = snapshot.get("timestamp", "")
187
195
 
188
- wayback_url = f"https://web.archive.org/web/{timestamp}/{url}"
196
+ wayback_url = (
197
+ f"https://web.archive.org/web/{timestamp}/{url}"
198
+ )
189
199
 
190
200
  snapshots.append(
191
201
  {
192
202
  "timestamp": timestamp,
193
- "formatted_date": self._format_timestamp(timestamp),
203
+ "formatted_date": self._format_timestamp(
204
+ timestamp
205
+ ),
194
206
  "url": wayback_url,
195
207
  "original_url": url,
196
208
  "available": True,
@@ -265,8 +277,12 @@ class WaybackSearchEngine(BaseSearchEngine):
265
277
  if not html or not html.strip():
266
278
  return ""
267
279
  try:
268
- paragraphs = justext.justext(html, justext.get_stoplist(self.language))
269
- cleaned = "\n".join([p.text for p in paragraphs if not p.is_boilerplate])
280
+ paragraphs = justext.justext(
281
+ html, justext.get_stoplist(self.language)
282
+ )
283
+ cleaned = "\n".join(
284
+ [p.text for p in paragraphs if not p.is_boilerplate]
285
+ )
270
286
  return cleaned
271
287
  except Exception as e:
272
288
  logger.error(f"Error removing boilerplate: {e}")
@@ -471,7 +487,10 @@ class WaybackSearchEngine(BaseSearchEngine):
471
487
  response = requests.get(self.available_api, params={"url": url})
472
488
  data = response.json()
473
489
 
474
- if "archived_snapshots" in data and "closest" in data["archived_snapshots"]:
490
+ if (
491
+ "archived_snapshots" in data
492
+ and "closest" in data["archived_snapshots"]
493
+ ):
475
494
  snapshot = data["archived_snapshots"]["closest"]
476
495
  timestamp = snapshot["timestamp"]
477
496
  wayback_url = snapshot["url"]
@@ -491,7 +510,9 @@ class WaybackSearchEngine(BaseSearchEngine):
491
510
  not hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
492
511
  or not search_config.SEARCH_SNIPPETS_ONLY
493
512
  ):
494
- raw_html, full_content = self._get_wayback_content(wayback_url)
513
+ raw_html, full_content = self._get_wayback_content(
514
+ wayback_url
515
+ )
495
516
  result["raw_html"] = raw_html
496
517
  result["full_content"] = full_content
497
518
 
@@ -38,7 +38,9 @@ class WikipediaSearchEngine(BaseSearchEngine):
38
38
  """
39
39
  # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
40
40
  super().__init__(
41
- llm=llm, max_filtered_results=max_filtered_results, max_results=max_results
41
+ llm=llm,
42
+ max_filtered_results=max_filtered_results,
43
+ max_results=max_results,
42
44
  )
43
45
  self.include_content = include_content
44
46
  self.sentences = sentences
@@ -113,6 +115,7 @@ class WikipediaSearchEngine(BaseSearchEngine):
113
115
  "title": title,
114
116
  "snippet": summary,
115
117
  "link": f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}",
118
+ "source": "Wikipedia",
116
119
  }
117
120
 
118
121
  previews.append(preview)
@@ -128,7 +131,9 @@ class WikipediaSearchEngine(BaseSearchEngine):
128
131
  logger.error(f"Unexpected error for '{title}': {e}")
129
132
  continue
130
133
 
131
- logger.info(f"Successfully created {len(previews)} previews from Wikipedia")
134
+ logger.info(
135
+ f"Successfully created {len(previews)} previews from Wikipedia"
136
+ )
132
137
  return previews
133
138
 
134
139
  except Exception as e:
@@ -176,6 +181,7 @@ class WikipediaSearchEngine(BaseSearchEngine):
176
181
  "title": page.title,
177
182
  "link": page.url,
178
183
  "snippet": item.get("snippet", ""), # Keep existing snippet
184
+ "source": "Wikipedia",
179
185
  }
180
186
 
181
187
  # Add additional information
@@ -218,7 +224,9 @@ class WikipediaSearchEngine(BaseSearchEngine):
218
224
  """
219
225
  sentences = sentences or self.sentences
220
226
  try:
221
- return wikipedia.summary(title, sentences=sentences, auto_suggest=False)
227
+ return wikipedia.summary(
228
+ title, sentences=sentences, auto_suggest=False
229
+ )
222
230
  except wikipedia.exceptions.DisambiguationError as e:
223
231
  if e.options and len(e.options) > 0:
224
232
  return wikipedia.summary(
@@ -250,6 +258,7 @@ class WikipediaSearchEngine(BaseSearchEngine):
250
258
  "title": page.title,
251
259
  "link": page.url,
252
260
  "snippet": self.get_summary(title, self.sentences),
261
+ "source": "Wikipedia",
253
262
  }
254
263
 
255
264
  # Add additional information if requested
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import time
2
3
  from abc import ABC, abstractmethod
3
4
  from datetime import datetime
4
5
  from typing import Any, Dict, List, Optional
@@ -7,7 +8,8 @@ from langchain_core.language_models import BaseLLM
7
8
  from loguru import logger
8
9
 
9
10
  from ..advanced_search_system.filters.base_filter import BaseFilter
10
- from ..config import search_config
11
+ from ..metrics.search_tracker import get_search_tracker
12
+ from ..utilities.db_utils import get_db_setting
11
13
 
12
14
 
13
15
  class BaseSearchEngine(ABC):
@@ -50,8 +52,12 @@ class BaseSearchEngine(ABC):
50
52
  self._content_filters = []
51
53
 
52
54
  self.llm = llm # LLM for relevance filtering
53
- self._max_filtered_results = int(max_filtered_results) # Ensure it's an integer
54
- self._max_results = max(1, int(max_results)) # Ensure it's a positive integer
55
+ self._max_filtered_results = int(
56
+ max_filtered_results
57
+ ) # Ensure it's an integer
58
+ self._max_results = max(
59
+ 1, int(max_results)
60
+ ) # Ensure it's a positive integer
55
61
 
56
62
  @property
57
63
  def max_filtered_results(self) -> int:
@@ -92,44 +98,80 @@ class BaseSearchEngine(ABC):
92
98
  Returns:
93
99
  List of search results with full content (if available)
94
100
  """
95
- # Ensure we're measuring time correctly for citation tracking
101
+ # Track search call for metrics
102
+ tracker = get_search_tracker()
103
+ engine_name = self.__class__.__name__.replace(
104
+ "SearchEngine", ""
105
+ ).lower()
106
+ start_time = time.time()
96
107
 
97
- # Step 1: Get preview information for items
98
- previews = self._get_previews(query)
99
- if not previews:
100
- logger.info(
101
- f"Search engine {self.__class__.__name__} returned no preview results for query: {query}"
102
- )
103
- return []
108
+ success = True
109
+ error_message = None
110
+ results_count = 0
111
+
112
+ try:
113
+ # Step 1: Get preview information for items
114
+ previews = self._get_previews(query)
115
+ if not previews:
116
+ logger.info(
117
+ f"Search engine {self.__class__.__name__} returned no preview results for query: {query}"
118
+ )
119
+ results_count = 0
120
+ return []
104
121
 
105
- for preview_filter in self._preview_filters:
106
- previews = preview_filter.filter_results(previews, query)
122
+ for preview_filter in self._preview_filters:
123
+ previews = preview_filter.filter_results(previews, query)
107
124
 
108
- # Step 2: Filter previews for relevance with LLM
109
- filtered_items = self._filter_for_relevance(previews, query)
110
- if not filtered_items:
125
+ # Step 2: Filter previews for relevance with LLM
126
+ # TEMPORARILY DISABLED: Skip LLM relevance filtering
127
+ filtered_items = previews
111
128
  logger.info(
112
- f"All preview results were filtered out as irrelevant for query: {query}"
129
+ f"LLM relevance filtering disabled - returning all {len(previews)} previews"
113
130
  )
114
- # Do not fall back to previews, return empty list instead
115
- return []
116
131
 
117
- # Step 3: Get full content for filtered items
118
- # Import config inside the method to avoid circular import
132
+ # # Original filtering code (disabled):
133
+ # filtered_items = self._filter_for_relevance(previews, query)
134
+ # if not filtered_items:
135
+ # logger.info(
136
+ # f"All preview results were filtered out as irrelevant for query: {query}"
137
+ # )
138
+ # # Do not fall back to previews, return empty list instead
139
+ # results_count = 0
140
+ # return []
141
+
142
+ # Step 3: Get full content for filtered items
143
+ # Import config inside the method to avoid circular import
144
+
145
+ if get_db_setting("search.snippets_only", True):
146
+ logger.info("Returning snippet-only results as per config")
147
+ results = filtered_items
148
+ else:
149
+ results = self._get_full_content(filtered_items)
150
+
151
+ for content_filter in self._content_filters:
152
+ results = content_filter.filter_results(results, query)
119
153
 
120
- if (
121
- hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
122
- and search_config.SEARCH_SNIPPETS_ONLY
123
- ):
124
- logger.info("Returning snippet-only results as per config")
125
- results = filtered_items
126
- else:
127
- results = self._get_full_content(filtered_items)
154
+ results_count = len(results)
155
+ return results
128
156
 
129
- for content_filter in self._content_filters:
130
- results = content_filter.filter_results(results, query)
157
+ except Exception as e:
158
+ success = False
159
+ error_message = str(e)
160
+ logger.error(f"Search engine {self.__class__.__name__} failed: {e}")
161
+ results_count = 0
162
+ return []
131
163
 
132
- return results
164
+ finally:
165
+ # Record search metrics
166
+ response_time_ms = int((time.time() - start_time) * 1000)
167
+ tracker.record_search(
168
+ engine_name=engine_name,
169
+ query=query,
170
+ results_count=results_count,
171
+ response_time_ms=response_time_ms,
172
+ success=success,
173
+ error_message=error_message,
174
+ )
133
175
 
134
176
  def invoke(self, query: str) -> List[Dict[str, Any]]:
135
177
  """Compatibility method for LangChain tools"""
@@ -229,7 +271,9 @@ Respond with ONLY the JSON array, no other text."""
229
271
  if idx < len(previews):
230
272
  ranked_results.append(previews[idx])
231
273
  else:
232
- logger.warning(f"Index {idx} out of range, skipping")
274
+ logger.warning(
275
+ f"Index {idx} out of range, skipping"
276
+ )
233
277
 
234
278
  # Limit to max_filtered_results if specified
235
279
  if (
@@ -244,14 +288,18 @@ Respond with ONLY the JSON array, no other text."""
244
288
  return ranked_results
245
289
 
246
290
  except json.JSONDecodeError as e:
247
- logger.warning(f"Failed to parse JSON from LLM response: {e}")
291
+ logger.warning(
292
+ f"Failed to parse JSON from LLM response: {e}"
293
+ )
248
294
  logger.debug(f"Problematic JSON text: {array_text}")
249
295
  return []
250
296
  else:
251
297
  logger.warning(
252
298
  "Could not find JSON array in response, returning original previews"
253
299
  )
254
- logger.debug(f"Response text without JSON array: {response_text}")
300
+ logger.debug(
301
+ f"Response text without JSON array: {response_text}"
302
+ )
255
303
  return previews[: min(5, len(previews))]
256
304
 
257
305
  except Exception:
@@ -56,14 +56,18 @@ def create_search_engine(
56
56
  api_key = engine_config.get("api_key")
57
57
 
58
58
  if not api_key:
59
- logger.info(f"Required API key for {engine_name} not found in settings.")
59
+ logger.info(
60
+ f"Required API key for {engine_name} not found in settings."
61
+ )
60
62
  return None
61
63
 
62
64
  # Set the engine-specific environment variable if needed
63
65
  # This is to support engines that directly check environment variables
64
66
  if engine_name == "brave" and not os.getenv("BRAVE_API_KEY"):
65
67
  os.environ["BRAVE_API_KEY"] = api_key
66
- logger.info("Set BRAVE_API_KEY environment variable from database setting")
68
+ logger.info(
69
+ "Set BRAVE_API_KEY environment variable from database setting"
70
+ )
67
71
 
68
72
  # Check for LLM requirements
69
73
  if engine_config.get("requires_llm", False) and not llm:
@@ -149,7 +153,9 @@ def _create_full_search_wrapper(
149
153
  class_name = engine_config.get("full_search_class")
150
154
 
151
155
  if not module_path or not class_name:
152
- logger.warning(f"Full search configuration missing for {engine_name}")
156
+ logger.warning(
157
+ f"Full search configuration missing for {engine_name}"
158
+ )
153
159
  return base_engine
154
160
 
155
161
  # Import the full search class
@@ -163,10 +169,15 @@ def _create_full_search_wrapper(
163
169
  ] # Skip 'self'
164
170
 
165
171
  # Extract relevant parameters for the full search wrapper
166
- wrapper_params = {k: v for k, v in params.items() if k in wrapper_init_params}
172
+ wrapper_params = {
173
+ k: v for k, v in params.items() if k in wrapper_init_params
174
+ }
167
175
 
168
176
  # Special case for SerpAPI which needs the API key directly
169
- if engine_name == "serpapi" and "serpapi_api_key" in wrapper_init_params:
177
+ if (
178
+ engine_name == "serpapi"
179
+ and "serpapi_api_key" in wrapper_init_params
180
+ ):
170
181
  serpapi_api_key = os.getenv("SERP_API_KEY")
171
182
  if serpapi_api_key:
172
183
  wrapper_params["serpapi_api_key"] = serpapi_api_key
@@ -196,7 +207,9 @@ def _create_full_search_wrapper(
196
207
  if not brave_api_key:
197
208
  from ..utilities.db_utils import get_db_setting
198
209
 
199
- brave_api_key = get_db_setting("search.engine.web.brave.api_key")
210
+ brave_api_key = get_db_setting(
211
+ "search.engine.web.brave.api_key"
212
+ )
200
213
 
201
214
  if brave_api_key:
202
215
  wrapper_params["api_key"] = brave_api_key
@@ -236,7 +249,9 @@ def _create_full_search_wrapper(
236
249
  return full_search
237
250
 
238
251
  except Exception:
239
- logger.exception(f"Failed to create full search wrapper for {engine_name}")
252
+ logger.exception(
253
+ f"Failed to create full search wrapper for {engine_name}"
254
+ )
240
255
  return base_engine
241
256
 
242
257
 
@@ -307,7 +322,9 @@ def get_search(
307
322
  )
308
323
  else:
309
324
  engine_type = type(engine).__name__
310
- logger.info(f"Successfully created search engine of type: {engine_type}")
325
+ logger.info(
326
+ f"Successfully created search engine of type: {engine_type}"
327
+ )
311
328
  # Check if the engine has run method
312
329
  if hasattr(engine, "run"):
313
330
  logger.info(f"Engine has 'run' method: {getattr(engine, 'run')}")
@@ -12,7 +12,9 @@ from ..utilities.db_utils import get_db_setting
12
12
  from .default_search_engines import get_default_elasticsearch_config
13
13
 
14
14
 
15
- def _extract_per_engine_config(raw_config: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
15
+ def _extract_per_engine_config(
16
+ raw_config: Dict[str, Any],
17
+ ) -> Dict[str, Dict[str, Any]]:
16
18
  """
17
19
  Converts the "flat" configuration loaded from the settings database into
18
20
  individual settings dictionaries for each engine.
@@ -55,7 +57,9 @@ def search_config() -> Dict[str, Any]:
55
57
  search_engines = _extract_per_engine_config(config_data)
56
58
  search_engines["auto"] = get_db_setting("search.engine.auto", {})
57
59
 
58
- logger.info(f"Loaded {len(search_engines)} search engines from configuration file")
60
+ logger.info(
61
+ f"Loaded {len(search_engines)} search engines from configuration file"
62
+ )
59
63
  logger.info(f"\n {', '.join(sorted(search_engines.keys()))} \n")
60
64
 
61
65
  # Add alias for 'auto' if it exists
@@ -141,7 +145,9 @@ def local_search_engines() -> List[str]:
141
145
  local_collections_data.pop("local_all", None)
142
146
  # Remove disabled collections.
143
147
  local_collections_data = {
144
- k: v for k, v in local_collections_data.items() if v.get("enabled", True)
148
+ k: v
149
+ for k, v in local_collections_data.items()
150
+ if v.get("enabled", True)
145
151
  }
146
152
 
147
153
  enabled_collections = list(local_collections_data.keys())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: local-deep-research
3
- Version: 0.4.4
3
+ Version: 0.5.0
4
4
  Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
5
5
  Author-Email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>
6
6
  License: MIT License
@@ -71,10 +71,12 @@ Requires-Dist: optuna>=4.3.0
71
71
  Requires-Dist: elasticsearch==8.14.0
72
72
  Requires-Dist: methodtools>=0.4.7
73
73
  Requires-Dist: loguru>=0.7.3
74
+ Requires-Dist: cachetools>=5.5.2
74
75
  Requires-Dist: matplotlib>=3.10.3
75
76
  Requires-Dist: pandas>=2.2.3
76
77
  Requires-Dist: plotly>=6.0.1
77
78
  Requires-Dist: kaleido==0.2.1
79
+ Requires-Dist: aiohttp>=3.9.0
78
80
  Description-Content-Type: text/markdown
79
81
 
80
82
  # Local Deep Research
@@ -400,6 +402,10 @@ For more information and examples of what Local Deep Research can produce:
400
402
  - [Reddit](https://www.reddit.com/r/LocalDeepResearch/): Announcements, updates, and community showcase
401
403
  - [GitHub Issues](https://github.com/LearningCircuit/local-deep-research/issues): Bug reports and feature requests
402
404
 
405
+ ## 🚀 Contributing
406
+
407
+ We welcome contributions! Whether you're fixing bugs, adding features, or improving documentation, we'd love to have you as part of our community. Please see our [Contributing Guide](CONTRIBUTING.md) for guidelines on how to get started.
408
+
403
409
  ## 📄 License & Acknowledgments
404
410
 
405
411
  This project is licensed under the MIT License.