local-deep-research 0.5.9__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. local_deep_research/__version__.py +1 -1
  2. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +11 -1
  3. local_deep_research/advanced_search_system/questions/browsecomp_question.py +32 -6
  4. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +32 -8
  5. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +2 -0
  6. local_deep_research/api/__init__.py +2 -0
  7. local_deep_research/api/research_functions.py +177 -3
  8. local_deep_research/benchmarks/graders.py +150 -5
  9. local_deep_research/benchmarks/models/__init__.py +19 -0
  10. local_deep_research/benchmarks/models/benchmark_models.py +283 -0
  11. local_deep_research/benchmarks/ui/__init__.py +1 -0
  12. local_deep_research/benchmarks/web_api/__init__.py +6 -0
  13. local_deep_research/benchmarks/web_api/benchmark_routes.py +862 -0
  14. local_deep_research/benchmarks/web_api/benchmark_service.py +920 -0
  15. local_deep_research/config/llm_config.py +106 -21
  16. local_deep_research/defaults/default_settings.json +447 -2
  17. local_deep_research/error_handling/report_generator.py +10 -0
  18. local_deep_research/llm/__init__.py +19 -0
  19. local_deep_research/llm/llm_registry.py +155 -0
  20. local_deep_research/metrics/db_models.py +3 -7
  21. local_deep_research/metrics/search_tracker.py +25 -11
  22. local_deep_research/search_system.py +12 -9
  23. local_deep_research/utilities/log_utils.py +23 -10
  24. local_deep_research/utilities/thread_context.py +99 -0
  25. local_deep_research/web/app_factory.py +32 -8
  26. local_deep_research/web/database/benchmark_schema.py +230 -0
  27. local_deep_research/web/database/convert_research_id_to_string.py +161 -0
  28. local_deep_research/web/database/models.py +55 -1
  29. local_deep_research/web/database/schema_upgrade.py +397 -2
  30. local_deep_research/web/database/uuid_migration.py +265 -0
  31. local_deep_research/web/routes/api_routes.py +62 -31
  32. local_deep_research/web/routes/history_routes.py +13 -6
  33. local_deep_research/web/routes/metrics_routes.py +264 -4
  34. local_deep_research/web/routes/research_routes.py +45 -18
  35. local_deep_research/web/routes/route_registry.py +352 -0
  36. local_deep_research/web/routes/settings_routes.py +382 -22
  37. local_deep_research/web/services/research_service.py +22 -29
  38. local_deep_research/web/services/settings_manager.py +53 -0
  39. local_deep_research/web/services/settings_service.py +2 -0
  40. local_deep_research/web/static/css/styles.css +8 -0
  41. local_deep_research/web/static/js/components/detail.js +7 -14
  42. local_deep_research/web/static/js/components/details.js +8 -10
  43. local_deep_research/web/static/js/components/fallback/ui.js +4 -4
  44. local_deep_research/web/static/js/components/history.js +6 -6
  45. local_deep_research/web/static/js/components/logpanel.js +14 -11
  46. local_deep_research/web/static/js/components/progress.js +51 -46
  47. local_deep_research/web/static/js/components/research.js +250 -89
  48. local_deep_research/web/static/js/components/results.js +5 -7
  49. local_deep_research/web/static/js/components/settings.js +32 -26
  50. local_deep_research/web/static/js/components/settings_sync.js +24 -23
  51. local_deep_research/web/static/js/config/urls.js +285 -0
  52. local_deep_research/web/static/js/main.js +8 -8
  53. local_deep_research/web/static/js/research_form.js +267 -12
  54. local_deep_research/web/static/js/services/api.js +18 -18
  55. local_deep_research/web/static/js/services/keyboard.js +8 -8
  56. local_deep_research/web/static/js/services/socket.js +53 -35
  57. local_deep_research/web/static/js/services/ui.js +1 -1
  58. local_deep_research/web/templates/base.html +4 -1
  59. local_deep_research/web/templates/components/custom_dropdown.html +5 -3
  60. local_deep_research/web/templates/components/mobile_nav.html +3 -3
  61. local_deep_research/web/templates/components/sidebar.html +9 -3
  62. local_deep_research/web/templates/pages/benchmark.html +2697 -0
  63. local_deep_research/web/templates/pages/benchmark_results.html +1136 -0
  64. local_deep_research/web/templates/pages/benchmark_simple.html +453 -0
  65. local_deep_research/web/templates/pages/cost_analytics.html +1 -1
  66. local_deep_research/web/templates/pages/metrics.html +212 -39
  67. local_deep_research/web/templates/pages/research.html +8 -6
  68. local_deep_research/web/templates/pages/star_reviews.html +1 -1
  69. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +14 -1
  70. local_deep_research/web_search_engines/engines/search_engine_brave.py +15 -1
  71. local_deep_research/web_search_engines/engines/search_engine_ddg.py +20 -1
  72. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +26 -2
  73. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +15 -1
  74. local_deep_research/web_search_engines/engines/search_engine_retriever.py +192 -0
  75. local_deep_research/web_search_engines/engines/search_engine_tavily.py +307 -0
  76. local_deep_research/web_search_engines/rate_limiting/__init__.py +14 -0
  77. local_deep_research/web_search_engines/rate_limiting/__main__.py +9 -0
  78. local_deep_research/web_search_engines/rate_limiting/cli.py +209 -0
  79. local_deep_research/web_search_engines/rate_limiting/exceptions.py +21 -0
  80. local_deep_research/web_search_engines/rate_limiting/tracker.py +506 -0
  81. local_deep_research/web_search_engines/retriever_registry.py +108 -0
  82. local_deep_research/web_search_engines/search_engine_base.py +161 -43
  83. local_deep_research/web_search_engines/search_engine_factory.py +14 -0
  84. local_deep_research/web_search_engines/search_engines_config.py +20 -0
  85. local_deep_research-0.6.0.dist-info/METADATA +374 -0
  86. {local_deep_research-0.5.9.dist-info → local_deep_research-0.6.0.dist-info}/RECORD +89 -64
  87. local_deep_research-0.5.9.dist-info/METADATA +0 -420
  88. {local_deep_research-0.5.9.dist-info → local_deep_research-0.6.0.dist-info}/WHEEL +0 -0
  89. {local_deep_research-0.5.9.dist-info → local_deep_research-0.6.0.dist-info}/entry_points.txt +0 -0
  90. {local_deep_research-0.5.9.dist-info → local_deep_research-0.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -1 +1 @@
1
- __version__ = "0.5.9"
1
+ __version__ = "0.6.0"
@@ -236,6 +236,11 @@ class ProgressiveExplorer:
236
236
  """Execute searches in parallel and return results."""
237
237
  results = []
238
238
 
239
+ # Import context preservation utility
240
+ from ...utilities.thread_context import (
241
+ create_context_preserving_wrapper,
242
+ )
243
+
239
244
  def search_query(query):
240
245
  try:
241
246
  search_results = self.search_engine.run(query)
@@ -244,11 +249,16 @@ class ProgressiveExplorer:
244
249
  logger.error(f"Error searching '{query}': {str(e)}")
245
250
  return (query, [])
246
251
 
252
+ # Create context-preserving wrapper for the search function
253
+ context_aware_search = create_context_preserving_wrapper(search_query)
254
+
247
255
  # Run searches in parallel
248
256
  with concurrent.futures.ThreadPoolExecutor(
249
257
  max_workers=max_workers
250
258
  ) as executor:
251
- futures = [executor.submit(search_query, q) for q in queries]
259
+ futures = [
260
+ executor.submit(context_aware_search, q) for q in queries
261
+ ]
252
262
  for future in concurrent.futures.as_completed(futures):
253
263
  results.append(future.result())
254
264
 
@@ -144,15 +144,25 @@ DESCRIPTORS: [entity1], [entity2], ...
144
144
  # 1. Original query (always include)
145
145
  searches.append(query)
146
146
 
147
+ # If only 1 question requested, return just the original query
148
+ if num_questions <= 1:
149
+ return searches[:1]
150
+
147
151
  # 2. Domain exploration searches (combine key entities)
148
- if entities["names"]:
152
+ if entities["names"] and len(searches) < num_questions:
149
153
  for name in entities["names"][:2]: # Top 2 names
154
+ if len(searches) >= num_questions:
155
+ break
150
156
  searches.append(f"{name}")
151
- if entities["descriptors"]:
157
+ if entities["descriptors"] and len(searches) < num_questions:
152
158
  searches.append(f"{name} {entities['descriptors'][0]}")
153
159
 
154
160
  # 3. Temporal searches if years are important
155
- if entities["temporal"] and len(entities["temporal"]) <= 10:
161
+ if (
162
+ entities["temporal"]
163
+ and len(entities["temporal"]) <= 10
164
+ and len(searches) < num_questions
165
+ ):
156
166
  # For small year ranges, search each year with a key term
157
167
  key_term = (
158
168
  entities["names"][0]
@@ -162,14 +172,18 @@ DESCRIPTORS: [entity1], [entity2], ...
162
172
  else ""
163
173
  )
164
174
  for year in entities["temporal"][:5]: # Limit to 5 years initially
175
+ if len(searches) >= num_questions:
176
+ break
165
177
  if key_term:
166
178
  searches.append(f"{key_term} {year}")
167
179
 
168
180
  # 4. Location-based searches
169
- if entities["locations"]:
181
+ if entities["locations"] and len(searches) < num_questions:
170
182
  for location in entities["locations"][:2]:
183
+ if len(searches) >= num_questions:
184
+ break
171
185
  searches.append(f"{location}")
172
- if entities["descriptors"]:
186
+ if entities["descriptors"] and len(searches) < num_questions:
173
187
  searches.append(f"{location} {entities['descriptors'][0]}")
174
188
 
175
189
  # Remove duplicates and limit to requested number
@@ -179,6 +193,8 @@ DESCRIPTORS: [entity1], [entity2], ...
179
193
  if s.lower() not in seen:
180
194
  seen.add(s.lower())
181
195
  unique_searches.append(s)
196
+ if len(unique_searches) >= num_questions:
197
+ break
182
198
 
183
199
  return unique_searches[:num_questions]
184
200
 
@@ -238,21 +254,26 @@ Format: One search per line
238
254
  if line:
239
255
  searches.append(line)
240
256
 
241
- # Ensure we have enough searches
257
+ # Ensure we have enough searches, but respect the limit
242
258
  while len(searches) < num_questions:
243
259
  # Generate combinations programmatically
244
260
  if iteration <= 5 and entities["temporal"]:
245
261
  # Continue with year-based searches
262
+ added_any = False
246
263
  for year in entities["temporal"]:
247
264
  if not self._was_searched(year, questions_by_iteration):
248
265
  base_term = (
249
266
  entities["names"][0] if entities["names"] else ""
250
267
  )
251
268
  searches.append(f"{base_term} {year}".strip())
269
+ added_any = True
252
270
  if len(searches) >= num_questions:
253
271
  break
272
+ if not added_any:
273
+ break # No more year searches to add
254
274
  else:
255
275
  # Combine multiple constraints
276
+ added_any = False
256
277
  if entities["names"] and entities["descriptors"]:
257
278
  for name in entities["names"]:
258
279
  for desc in entities["descriptors"]:
@@ -261,8 +282,13 @@ Format: One search per line
261
282
  combo, questions_by_iteration
262
283
  ):
263
284
  searches.append(combo)
285
+ added_any = True
264
286
  if len(searches) >= num_questions:
265
287
  break
288
+ if len(searches) >= num_questions:
289
+ break
290
+ if not added_any:
291
+ break # No more combinations to add
266
292
 
267
293
  return searches[:num_questions]
268
294
 
@@ -53,9 +53,9 @@ class FocusedIterationStrategy(BaseSearchStrategy):
53
53
  search=None,
54
54
  citation_handler=None,
55
55
  all_links_of_system=None,
56
- max_iterations: int = 8, # OPTIMAL FOR SIMPLEQA: 96.51% accuracy achieved
56
+ max_iterations: int = 8, # OPTIMAL FOR SIMPLEQA: 90%+ accuracy achieved
57
57
  questions_per_iteration: int = 5, # OPTIMAL FOR SIMPLEQA: proven config
58
- use_browsecomp_optimization: bool = True, # Can be False for pure SimpleQA
58
+ use_browsecomp_optimization: bool = True, # True for 90%+ accuracy with forced_answer handler
59
59
  ):
60
60
  """Initialize with components optimized for focused iteration."""
61
61
  super().__init__(all_links_of_system)
@@ -63,9 +63,9 @@ class FocusedIterationStrategy(BaseSearchStrategy):
63
63
  self.model = model or get_llm()
64
64
  self.progress_callback = None
65
65
 
66
- # Configuration
67
- self.max_iterations = max_iterations
68
- self.questions_per_iteration = questions_per_iteration
66
+ # Configuration - ensure these are integers
67
+ self.max_iterations = int(max_iterations)
68
+ self.questions_per_iteration = int(questions_per_iteration)
69
69
  self.use_browsecomp_optimization = use_browsecomp_optimization
70
70
 
71
71
  # Initialize specialized components
@@ -158,9 +158,11 @@ class FocusedIterationStrategy(BaseSearchStrategy):
158
158
  questions_by_iteration=self.questions_by_iteration,
159
159
  )
160
160
 
161
- # Always include original query in first iteration
161
+ # Always include original query in first iteration, but respect question limit
162
162
  if iteration == 1 and query not in questions:
163
163
  questions = [query] + questions
164
+ # Trim to respect questions_per_iteration limit
165
+ questions = questions[: self.questions_per_iteration]
164
166
 
165
167
  self.questions_by_iteration[iteration] = questions
166
168
  logger.info(f"Iteration {iteration} questions: {questions}")
@@ -357,6 +359,11 @@ class FocusedIterationStrategy(BaseSearchStrategy):
357
359
  """Execute searches in parallel (like source-based strategy)."""
358
360
  all_results = []
359
361
 
362
+ # Import context preservation utility
363
+ from ...utilities.thread_context import (
364
+ create_context_preserving_wrapper,
365
+ )
366
+
360
367
  def search_question(q):
361
368
  try:
362
369
  result = self.search.run(q)
@@ -365,11 +372,18 @@ class FocusedIterationStrategy(BaseSearchStrategy):
365
372
  logger.error(f"Error searching '{q}': {str(e)}")
366
373
  return {"question": q, "results": [], "error": str(e)}
367
374
 
375
+ # Create context-preserving wrapper for the search function
376
+ context_aware_search = create_context_preserving_wrapper(
377
+ search_question
378
+ )
379
+
368
380
  # Run searches in parallel
369
381
  with concurrent.futures.ThreadPoolExecutor(
370
382
  max_workers=len(queries)
371
383
  ) as executor:
372
- futures = [executor.submit(search_question, q) for q in queries]
384
+ futures = [
385
+ executor.submit(context_aware_search, q) for q in queries
386
+ ]
373
387
 
374
388
  for future in concurrent.futures.as_completed(futures):
375
389
  result_dict = future.result()
@@ -385,6 +399,11 @@ class FocusedIterationStrategy(BaseSearchStrategy):
385
399
  completed_searches = 0
386
400
  total_searches = len(queries)
387
401
 
402
+ # Import context preservation utility
403
+ from ...utilities.thread_context import (
404
+ create_context_preserving_wrapper,
405
+ )
406
+
388
407
  def search_question_with_progress(q):
389
408
  nonlocal completed_searches
390
409
  try:
@@ -440,12 +459,17 @@ class FocusedIterationStrategy(BaseSearchStrategy):
440
459
  "result_count": 0,
441
460
  }
442
461
 
462
+ # Create context-preserving wrapper for the search function
463
+ context_aware_search_with_progress = create_context_preserving_wrapper(
464
+ search_question_with_progress
465
+ )
466
+
443
467
  # Run searches in parallel
444
468
  with concurrent.futures.ThreadPoolExecutor(
445
469
  max_workers=min(len(queries), 5)
446
470
  ) as executor:
447
471
  futures = [
448
- executor.submit(search_question_with_progress, q)
472
+ executor.submit(context_aware_search_with_progress, q)
449
473
  for q in queries
450
474
  ]
451
475
 
@@ -8,6 +8,7 @@ from ...config.llm_config import get_llm
8
8
  from ...config.search_config import get_search
9
9
  from ...utilities.db_utils import get_db_setting
10
10
  from ...utilities.threading_utils import thread_context, thread_with_app_context
11
+ from ...utilities.thread_context import preserve_research_context
11
12
  from ..filters.cross_engine_filter import CrossEngineFilter
12
13
  from ..findings.repository import FindingsRepository
13
14
  from ..questions.atomic_fact_question import AtomicFactQuestionGenerator
@@ -211,6 +212,7 @@ class SourceBasedSearchStrategy(BaseSearchStrategy):
211
212
 
212
213
  # Function for thread pool
213
214
  @thread_with_app_context
215
+ @preserve_research_context
214
216
  def search_question(q):
215
217
  try:
216
218
  result = self.search.run(q)
@@ -5,12 +5,14 @@ API module for programmatic access to Local Deep Research functionality.
5
5
 
6
6
  from .research_functions import (
7
7
  analyze_documents,
8
+ detailed_research,
8
9
  generate_report,
9
10
  quick_summary,
10
11
  )
11
12
 
12
13
  __all__ = [
13
14
  "quick_summary",
15
+ "detailed_research",
14
16
  "generate_report",
15
17
  "analyze_documents",
16
18
  ]
@@ -3,7 +3,8 @@ API module for Local Deep Research.
3
3
  Provides programmatic access to search and research capabilities.
4
4
  """
5
5
 
6
- from typing import Any, Callable, Dict, Optional
6
+ from datetime import datetime
7
+ from typing import Any, Callable, Dict, Optional, Union
7
8
 
8
9
  from loguru import logger
9
10
 
@@ -24,6 +25,8 @@ def _init_search_system(
24
25
  search_strategy: str = "source_based",
25
26
  iterations: int = 1,
26
27
  questions_per_iteration: int = 1,
28
+ retrievers: Optional[Dict[str, Any]] = None,
29
+ llms: Optional[Dict[str, Any]] = None,
27
30
  ) -> AdvancedSearchSystem:
28
31
  """
29
32
  Initializes the advanced search system with specified parameters. This function sets up
@@ -43,11 +46,30 @@ def _init_search_system(
43
46
  iterations: Number of research cycles to perform
44
47
  questions_per_iteration: Number of questions to generate per cycle
45
48
  search_strategy: The name of the search strategy to use.
49
+ retrievers: Optional dictionary of {name: retriever} pairs to use as search engines
50
+ llms: Optional dictionary of {name: llm} pairs to use as language models
46
51
 
47
52
  Returns:
48
53
  AdvancedSearchSystem: An instance of the configured AdvancedSearchSystem.
49
54
 
50
55
  """
56
+ # Register retrievers if provided
57
+ if retrievers:
58
+ from ..web_search_engines.retriever_registry import retriever_registry
59
+
60
+ retriever_registry.register_multiple(retrievers)
61
+ logger.info(
62
+ f"Registered {len(retrievers)} retrievers: {list(retrievers.keys())}"
63
+ )
64
+
65
+ # Register LLMs if provided
66
+ if llms:
67
+ from ..llm import register_llm
68
+
69
+ for name, llm_instance in llms.items():
70
+ register_llm(name, llm_instance)
71
+ logger.info(f"Registered {len(llms)} LLMs: {list(llms.keys())}")
72
+
51
73
  # Get language model with custom temperature
52
74
  llm = get_llm(
53
75
  temperature=temperature,
@@ -84,6 +106,9 @@ def _init_search_system(
84
106
 
85
107
  def quick_summary(
86
108
  query: str,
109
+ research_id: Optional[Union[int, str]] = None,
110
+ retrievers: Optional[Dict[str, Any]] = None,
111
+ llms: Optional[Dict[str, Any]] = None,
87
112
  **kwargs: Any,
88
113
  ) -> Dict[str, Any]:
89
114
  """
@@ -91,6 +116,9 @@ def quick_summary(
91
116
 
92
117
  Args:
93
118
  query: The research query to analyze
119
+ research_id: Optional research ID (int or UUID string) for tracking metrics
120
+ retrievers: Optional dictionary of {name: retriever} pairs to use as search engines
121
+ llms: Optional dictionary of {name: llm} pairs to use as language models
94
122
  **kwargs: Configuration for the search system. Will be forwarded to
95
123
  `_init_search_system()`.
96
124
 
@@ -103,7 +131,46 @@ def quick_summary(
103
131
  """
104
132
  logger.info("Generating quick summary for query: %s", query)
105
133
 
106
- system = _init_search_system(**kwargs)
134
+ # Generate a research_id if none provided
135
+ if research_id is None:
136
+ import uuid
137
+
138
+ research_id = str(uuid.uuid4())
139
+ logger.debug(f"Generated research_id: {research_id}")
140
+
141
+ # Register retrievers if provided
142
+ if retrievers:
143
+ from ..web_search_engines.retriever_registry import retriever_registry
144
+
145
+ retriever_registry.register_multiple(retrievers)
146
+ logger.info(
147
+ f"Registered {len(retrievers)} retrievers: {list(retrievers.keys())}"
148
+ )
149
+
150
+ # Register LLMs if provided
151
+ if llms:
152
+ from ..llm import register_llm
153
+
154
+ for name, llm_instance in llms.items():
155
+ register_llm(name, llm_instance)
156
+ logger.info(f"Registered {len(llms)} LLMs: {list(llms.keys())}")
157
+
158
+ # Set search context with research_id
159
+ from ..metrics.search_tracker import set_search_context
160
+
161
+ search_context = {
162
+ "research_id": research_id, # Pass UUID or integer directly
163
+ "research_query": query,
164
+ "research_mode": kwargs.get("research_mode", "quick"),
165
+ "research_phase": "init",
166
+ "search_iteration": 0,
167
+ "search_engine_selected": kwargs.get("search_tool"),
168
+ }
169
+ set_search_context(search_context)
170
+
171
+ # Remove research_mode from kwargs before passing to _init_search_system
172
+ init_kwargs = {k: v for k, v in kwargs.items() if k != "research_mode"}
173
+ system = _init_search_system(llms=llms, **init_kwargs)
107
174
 
108
175
  # Perform the search and analysis
109
176
  results = system.analyze_topic(query)
@@ -130,6 +197,8 @@ def generate_report(
130
197
  output_file: Optional[str] = None,
131
198
  progress_callback: Optional[Callable] = None,
132
199
  searches_per_section: int = 2,
200
+ retrievers: Optional[Dict[str, Any]] = None,
201
+ llms: Optional[Dict[str, Any]] = None,
133
202
  **kwargs: Any,
134
203
  ) -> Dict[str, Any]:
135
204
  """
@@ -141,6 +210,8 @@ def generate_report(
141
210
  progress_callback: Optional callback function to receive progress updates
142
211
  searches_per_section: The number of searches to perform for each
143
212
  section in the report.
213
+ retrievers: Optional dictionary of {name: retriever} pairs to use as search engines
214
+ llms: Optional dictionary of {name: llm} pairs to use as language models
144
215
 
145
216
  Returns:
146
217
  Dictionary containing the research report with keys:
@@ -149,7 +220,24 @@ def generate_report(
149
220
  """
150
221
  logger.info("Generating comprehensive research report for query: %s", query)
151
222
 
152
- system = _init_search_system(**kwargs)
223
+ # Register retrievers if provided
224
+ if retrievers:
225
+ from ..web_search_engines.retriever_registry import retriever_registry
226
+
227
+ retriever_registry.register_multiple(retrievers)
228
+ logger.info(
229
+ f"Registered {len(retrievers)} retrievers: {list(retrievers.keys())}"
230
+ )
231
+
232
+ # Register LLMs if provided
233
+ if llms:
234
+ from ..llm import register_llm
235
+
236
+ for name, llm_instance in llms.items():
237
+ register_llm(name, llm_instance)
238
+ logger.info(f"Registered {len(llms)} LLMs: {list(llms.keys())}")
239
+
240
+ system = _init_search_system(retrievers=retrievers, llms=llms, **kwargs)
153
241
 
154
242
  # Set progress callback if provided
155
243
  if progress_callback:
@@ -175,6 +263,92 @@ def generate_report(
175
263
  return report
176
264
 
177
265
 
266
+ def detailed_research(
267
+ query: str,
268
+ research_id: Optional[Union[int, str]] = None,
269
+ retrievers: Optional[Dict[str, Any]] = None,
270
+ llms: Optional[Dict[str, Any]] = None,
271
+ **kwargs: Any,
272
+ ) -> Dict[str, Any]:
273
+ """
274
+ Perform detailed research with comprehensive analysis.
275
+
276
+ Similar to generate_report but returns structured data instead of markdown.
277
+
278
+ Args:
279
+ query: The research query to analyze
280
+ research_id: Optional research ID (int or UUID string) for tracking metrics
281
+ retrievers: Optional dictionary of {name: retriever} pairs to use as search engines
282
+ llms: Optional dictionary of {name: llm} pairs to use as language models
283
+ **kwargs: Configuration for the search system
284
+
285
+ Returns:
286
+ Dictionary containing detailed research results
287
+ """
288
+ logger.info("Performing detailed research for query: %s", query)
289
+
290
+ # Generate a research_id if none provided
291
+ if research_id is None:
292
+ import uuid
293
+
294
+ research_id = str(uuid.uuid4())
295
+ logger.debug(f"Generated research_id: {research_id}")
296
+
297
+ # Register retrievers if provided
298
+ if retrievers:
299
+ from ..web_search_engines.retriever_registry import retriever_registry
300
+
301
+ retriever_registry.register_multiple(retrievers)
302
+ logger.info(
303
+ f"Registered {len(retrievers)} retrievers: {list(retrievers.keys())}"
304
+ )
305
+
306
+ # Register LLMs if provided
307
+ if llms:
308
+ from ..llm import register_llm
309
+
310
+ for name, llm_instance in llms.items():
311
+ register_llm(name, llm_instance)
312
+ logger.info(f"Registered {len(llms)} LLMs: {list(llms.keys())}")
313
+
314
+ # Set search context
315
+ from ..metrics.search_tracker import set_search_context
316
+
317
+ search_context = {
318
+ "research_id": research_id,
319
+ "research_query": query,
320
+ "research_mode": "detailed",
321
+ "research_phase": "init",
322
+ "search_iteration": 0,
323
+ "search_engine_selected": kwargs.get("search_tool"),
324
+ }
325
+ set_search_context(search_context)
326
+
327
+ # Initialize system
328
+ system = _init_search_system(retrievers=retrievers, llms=llms, **kwargs)
329
+
330
+ # Perform detailed research
331
+ results = system.analyze_topic(query)
332
+
333
+ # Return comprehensive results
334
+ return {
335
+ "query": query,
336
+ "research_id": research_id,
337
+ "summary": results.get("current_knowledge", ""),
338
+ "findings": results.get("findings", []),
339
+ "iterations": results.get("iterations", 0),
340
+ "questions": results.get("questions", {}),
341
+ "formatted_findings": results.get("formatted_findings", ""),
342
+ "sources": results.get("all_links_of_system", []),
343
+ "metadata": {
344
+ "timestamp": datetime.now().isoformat(),
345
+ "search_tool": kwargs.get("search_tool", "auto"),
346
+ "iterations_requested": kwargs.get("iterations", 1),
347
+ "strategy": kwargs.get("search_strategy", "source_based"),
348
+ },
349
+ }
350
+
351
+
178
352
  def analyze_documents(
179
353
  query: str,
180
354
  collection_name: str,
@@ -65,15 +65,16 @@ def get_evaluation_llm(custom_config: Optional[Dict[str, Any]] = None):
65
65
 
66
66
  # Check if we're using openai_endpoint but don't have an API key configured
67
67
  if filtered_config.get("provider") == "openai_endpoint":
68
- # Try to get API key from environment or config
69
- import os
68
+ # Try to get API key from database settings first, then environment
69
+ from ..utilities.db_utils import get_db_setting
70
+
71
+ api_key = get_db_setting("llm.openai_endpoint.api_key")
70
72
 
71
- api_key = os.getenv("OPENAI_ENDPOINT_API_KEY")
72
73
  if not api_key:
73
74
  logger.warning(
74
75
  "Using openai_endpoint provider but no API key found. "
75
- "Set the OPENAI_ENDPOINT_API_KEY environment variable or "
76
- "specify api_key in the evaluation_config."
76
+ "Set the llm.openai_endpoint.api_key setting in the database or "
77
+ "LDR_LLM_OPENAI_ENDPOINT_API_KEY environment variable."
77
78
  )
78
79
  # Try to fall back to LDR's config if API key not explicitly provided
79
80
  # The get_llm function will handle this case
@@ -117,6 +118,150 @@ def extract_answer_from_response(
117
118
  }
118
119
 
119
120
 
121
+ def grade_single_result(
122
+ result_data: Dict[str, Any],
123
+ dataset_type: str = "simpleqa",
124
+ evaluation_config: Optional[Dict[str, Any]] = None,
125
+ ) -> Dict[str, Any]:
126
+ """
127
+ Grade a single benchmark result using LLM.
128
+
129
+ Args:
130
+ result_data: Dictionary containing result data with keys: id, problem, correct_answer, response, extracted_answer
131
+ dataset_type: Type of dataset
132
+ evaluation_config: Optional custom config for evaluation LLM
133
+
134
+ Returns:
135
+ Dictionary with grading results
136
+ """
137
+ # Get evaluation LLM
138
+ evaluation_llm = get_evaluation_llm(evaluation_config)
139
+
140
+ # Select appropriate template
141
+ template = (
142
+ BROWSECOMP_GRADER_TEMPLATE
143
+ if dataset_type.lower() == "browsecomp"
144
+ else SIMPLEQA_GRADER_TEMPLATE
145
+ )
146
+
147
+ question = result_data.get("problem", "")
148
+ correct_answer = result_data.get("correct_answer", "")
149
+ response = result_data.get("response", "")
150
+
151
+ logger.info(f"Grading single result: {question[:50]}...")
152
+
153
+ # Format grading prompt
154
+ grading_prompt = template.format(
155
+ question=question, correct_answer=correct_answer, response=response
156
+ )
157
+
158
+ try:
159
+ # Grade using LLM
160
+ if hasattr(evaluation_llm, "invoke") and callable(
161
+ evaluation_llm.invoke
162
+ ):
163
+ if hasattr(evaluation_llm, "chat_messages"):
164
+ # Handle ChatOpenAI and similar models that use messages
165
+ grading_response = evaluation_llm.invoke(
166
+ [HumanMessage(content=grading_prompt)]
167
+ ).content
168
+ else:
169
+ # Handle other LLM types
170
+ grading_response = evaluation_llm.invoke(grading_prompt)
171
+ if hasattr(grading_response, "content"):
172
+ grading_response = grading_response.content
173
+ else:
174
+ # Fallback for other LLM interfaces
175
+ grading_response = str(evaluation_llm(grading_prompt))
176
+
177
+ # Extract grading information using regex
178
+ if dataset_type.lower() == "browsecomp":
179
+ # BrowseComp-specific extraction
180
+ extracted_answer_match = re.search(
181
+ r"extracted_final_answer:\s*(.*?)(?:\n|$)", grading_response
182
+ )
183
+ extracted_answer = (
184
+ extracted_answer_match.group(1).strip()
185
+ if extracted_answer_match
186
+ else "None"
187
+ )
188
+
189
+ reasoning_match = re.search(
190
+ r"reasoning:\s*(.*?)(?:\n\n|\ncorrect:|\Z)",
191
+ grading_response,
192
+ re.DOTALL,
193
+ )
194
+ reasoning = (
195
+ reasoning_match.group(1).strip() if reasoning_match else ""
196
+ )
197
+
198
+ correct_match = re.search(
199
+ r"correct:\s*(yes|no)", grading_response, re.IGNORECASE
200
+ )
201
+ is_correct = (
202
+ (correct_match.group(1).lower() == "yes")
203
+ if correct_match
204
+ else False
205
+ )
206
+
207
+ confidence_match = re.search(
208
+ r"confidence:\s*(\d+)", grading_response
209
+ )
210
+ confidence = (
211
+ confidence_match.group(1) if confidence_match else "100"
212
+ )
213
+ else:
214
+ # SimpleQA extraction
215
+ extracted_answer_match = re.search(
216
+ r"Extracted Answer:\s*(.*?)(?:\n|$)", grading_response
217
+ )
218
+ extracted_answer = (
219
+ extracted_answer_match.group(1).strip()
220
+ if extracted_answer_match
221
+ else "None"
222
+ )
223
+
224
+ reasoning_match = re.search(
225
+ r"Reasoning:\s*(.*?)(?:\nCorrect:|\Z)",
226
+ grading_response,
227
+ re.DOTALL,
228
+ )
229
+ reasoning = (
230
+ reasoning_match.group(1).strip() if reasoning_match else ""
231
+ )
232
+
233
+ correct_match = re.search(
234
+ r"Correct:\s*(yes|no)", grading_response, re.IGNORECASE
235
+ )
236
+ is_correct = (
237
+ (correct_match.group(1).lower() == "yes")
238
+ if correct_match
239
+ else False
240
+ )
241
+
242
+ confidence = "100" # SimpleQA doesn't have confidence
243
+
244
+ # Format graded result
245
+ graded_result = {
246
+ "extracted_by_grader": extracted_answer,
247
+ "reasoning": reasoning,
248
+ "is_correct": is_correct,
249
+ "graded_confidence": confidence,
250
+ "grader_response": grading_response,
251
+ }
252
+
253
+ return graded_result
254
+
255
+ except Exception as e:
256
+ logger.error(f"Error grading single result: {str(e)}")
257
+ return {
258
+ "grading_error": str(e),
259
+ "is_correct": False,
260
+ "graded_confidence": "0",
261
+ "grader_response": f"Grading failed: {str(e)}",
262
+ }
263
+
264
+
120
265
  def grade_results(
121
266
  results_file: str,
122
267
  output_file: str,