local-deep-research 0.5.7__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. local_deep_research/__version__.py +1 -1
  2. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +11 -1
  3. local_deep_research/advanced_search_system/questions/browsecomp_question.py +32 -6
  4. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +33 -8
  5. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +2 -0
  6. local_deep_research/api/__init__.py +2 -0
  7. local_deep_research/api/research_functions.py +177 -3
  8. local_deep_research/benchmarks/graders.py +150 -5
  9. local_deep_research/benchmarks/models/__init__.py +19 -0
  10. local_deep_research/benchmarks/models/benchmark_models.py +283 -0
  11. local_deep_research/benchmarks/ui/__init__.py +1 -0
  12. local_deep_research/benchmarks/web_api/__init__.py +6 -0
  13. local_deep_research/benchmarks/web_api/benchmark_routes.py +862 -0
  14. local_deep_research/benchmarks/web_api/benchmark_service.py +920 -0
  15. local_deep_research/config/llm_config.py +106 -21
  16. local_deep_research/defaults/default_settings.json +448 -3
  17. local_deep_research/error_handling/report_generator.py +10 -0
  18. local_deep_research/llm/__init__.py +19 -0
  19. local_deep_research/llm/llm_registry.py +155 -0
  20. local_deep_research/metrics/db_models.py +3 -7
  21. local_deep_research/metrics/search_tracker.py +25 -11
  22. local_deep_research/report_generator.py +3 -2
  23. local_deep_research/search_system.py +12 -9
  24. local_deep_research/utilities/log_utils.py +23 -10
  25. local_deep_research/utilities/thread_context.py +99 -0
  26. local_deep_research/web/app_factory.py +32 -8
  27. local_deep_research/web/database/benchmark_schema.py +230 -0
  28. local_deep_research/web/database/convert_research_id_to_string.py +161 -0
  29. local_deep_research/web/database/models.py +55 -1
  30. local_deep_research/web/database/schema_upgrade.py +397 -2
  31. local_deep_research/web/database/uuid_migration.py +265 -0
  32. local_deep_research/web/routes/api_routes.py +62 -31
  33. local_deep_research/web/routes/history_routes.py +13 -6
  34. local_deep_research/web/routes/metrics_routes.py +264 -4
  35. local_deep_research/web/routes/research_routes.py +45 -18
  36. local_deep_research/web/routes/route_registry.py +352 -0
  37. local_deep_research/web/routes/settings_routes.py +382 -22
  38. local_deep_research/web/services/research_service.py +22 -29
  39. local_deep_research/web/services/settings_manager.py +53 -0
  40. local_deep_research/web/services/settings_service.py +2 -0
  41. local_deep_research/web/static/css/styles.css +8 -0
  42. local_deep_research/web/static/js/components/detail.js +7 -14
  43. local_deep_research/web/static/js/components/details.js +8 -10
  44. local_deep_research/web/static/js/components/fallback/ui.js +4 -4
  45. local_deep_research/web/static/js/components/history.js +6 -6
  46. local_deep_research/web/static/js/components/logpanel.js +14 -11
  47. local_deep_research/web/static/js/components/progress.js +51 -46
  48. local_deep_research/web/static/js/components/research.js +250 -89
  49. local_deep_research/web/static/js/components/results.js +5 -7
  50. local_deep_research/web/static/js/components/settings.js +32 -26
  51. local_deep_research/web/static/js/components/settings_sync.js +24 -23
  52. local_deep_research/web/static/js/config/urls.js +285 -0
  53. local_deep_research/web/static/js/main.js +8 -8
  54. local_deep_research/web/static/js/research_form.js +267 -12
  55. local_deep_research/web/static/js/services/api.js +18 -18
  56. local_deep_research/web/static/js/services/keyboard.js +8 -8
  57. local_deep_research/web/static/js/services/socket.js +53 -35
  58. local_deep_research/web/static/js/services/ui.js +1 -1
  59. local_deep_research/web/templates/base.html +4 -1
  60. local_deep_research/web/templates/components/custom_dropdown.html +5 -3
  61. local_deep_research/web/templates/components/mobile_nav.html +3 -3
  62. local_deep_research/web/templates/components/sidebar.html +9 -3
  63. local_deep_research/web/templates/pages/benchmark.html +2697 -0
  64. local_deep_research/web/templates/pages/benchmark_results.html +1136 -0
  65. local_deep_research/web/templates/pages/benchmark_simple.html +453 -0
  66. local_deep_research/web/templates/pages/cost_analytics.html +1 -1
  67. local_deep_research/web/templates/pages/metrics.html +212 -39
  68. local_deep_research/web/templates/pages/research.html +8 -6
  69. local_deep_research/web/templates/pages/star_reviews.html +1 -1
  70. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +14 -1
  71. local_deep_research/web_search_engines/engines/search_engine_brave.py +15 -1
  72. local_deep_research/web_search_engines/engines/search_engine_ddg.py +20 -1
  73. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +26 -2
  74. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +15 -1
  75. local_deep_research/web_search_engines/engines/search_engine_retriever.py +192 -0
  76. local_deep_research/web_search_engines/engines/search_engine_tavily.py +307 -0
  77. local_deep_research/web_search_engines/rate_limiting/__init__.py +14 -0
  78. local_deep_research/web_search_engines/rate_limiting/__main__.py +9 -0
  79. local_deep_research/web_search_engines/rate_limiting/cli.py +209 -0
  80. local_deep_research/web_search_engines/rate_limiting/exceptions.py +21 -0
  81. local_deep_research/web_search_engines/rate_limiting/tracker.py +506 -0
  82. local_deep_research/web_search_engines/retriever_registry.py +108 -0
  83. local_deep_research/web_search_engines/search_engine_base.py +161 -43
  84. local_deep_research/web_search_engines/search_engine_factory.py +14 -0
  85. local_deep_research/web_search_engines/search_engines_config.py +20 -0
  86. local_deep_research-0.6.0.dist-info/METADATA +374 -0
  87. {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/RECORD +90 -65
  88. local_deep_research-0.5.7.dist-info/METADATA +0 -420
  89. {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/WHEEL +0 -0
  90. {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/entry_points.txt +0 -0
  91. {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -1 +1 @@
1
- __version__ = "0.5.7"
1
+ __version__ = "0.6.0"
@@ -236,6 +236,11 @@ class ProgressiveExplorer:
236
236
  """Execute searches in parallel and return results."""
237
237
  results = []
238
238
 
239
+ # Import context preservation utility
240
+ from ...utilities.thread_context import (
241
+ create_context_preserving_wrapper,
242
+ )
243
+
239
244
  def search_query(query):
240
245
  try:
241
246
  search_results = self.search_engine.run(query)
@@ -244,11 +249,16 @@ class ProgressiveExplorer:
244
249
  logger.error(f"Error searching '{query}': {str(e)}")
245
250
  return (query, [])
246
251
 
252
+ # Create context-preserving wrapper for the search function
253
+ context_aware_search = create_context_preserving_wrapper(search_query)
254
+
247
255
  # Run searches in parallel
248
256
  with concurrent.futures.ThreadPoolExecutor(
249
257
  max_workers=max_workers
250
258
  ) as executor:
251
- futures = [executor.submit(search_query, q) for q in queries]
259
+ futures = [
260
+ executor.submit(context_aware_search, q) for q in queries
261
+ ]
252
262
  for future in concurrent.futures.as_completed(futures):
253
263
  results.append(future.result())
254
264
 
@@ -144,15 +144,25 @@ DESCRIPTORS: [entity1], [entity2], ...
144
144
  # 1. Original query (always include)
145
145
  searches.append(query)
146
146
 
147
+ # If only 1 question requested, return just the original query
148
+ if num_questions <= 1:
149
+ return searches[:1]
150
+
147
151
  # 2. Domain exploration searches (combine key entities)
148
- if entities["names"]:
152
+ if entities["names"] and len(searches) < num_questions:
149
153
  for name in entities["names"][:2]: # Top 2 names
154
+ if len(searches) >= num_questions:
155
+ break
150
156
  searches.append(f"{name}")
151
- if entities["descriptors"]:
157
+ if entities["descriptors"] and len(searches) < num_questions:
152
158
  searches.append(f"{name} {entities['descriptors'][0]}")
153
159
 
154
160
  # 3. Temporal searches if years are important
155
- if entities["temporal"] and len(entities["temporal"]) <= 10:
161
+ if (
162
+ entities["temporal"]
163
+ and len(entities["temporal"]) <= 10
164
+ and len(searches) < num_questions
165
+ ):
156
166
  # For small year ranges, search each year with a key term
157
167
  key_term = (
158
168
  entities["names"][0]
@@ -162,14 +172,18 @@ DESCRIPTORS: [entity1], [entity2], ...
162
172
  else ""
163
173
  )
164
174
  for year in entities["temporal"][:5]: # Limit to 5 years initially
175
+ if len(searches) >= num_questions:
176
+ break
165
177
  if key_term:
166
178
  searches.append(f"{key_term} {year}")
167
179
 
168
180
  # 4. Location-based searches
169
- if entities["locations"]:
181
+ if entities["locations"] and len(searches) < num_questions:
170
182
  for location in entities["locations"][:2]:
183
+ if len(searches) >= num_questions:
184
+ break
171
185
  searches.append(f"{location}")
172
- if entities["descriptors"]:
186
+ if entities["descriptors"] and len(searches) < num_questions:
173
187
  searches.append(f"{location} {entities['descriptors'][0]}")
174
188
 
175
189
  # Remove duplicates and limit to requested number
@@ -179,6 +193,8 @@ DESCRIPTORS: [entity1], [entity2], ...
179
193
  if s.lower() not in seen:
180
194
  seen.add(s.lower())
181
195
  unique_searches.append(s)
196
+ if len(unique_searches) >= num_questions:
197
+ break
182
198
 
183
199
  return unique_searches[:num_questions]
184
200
 
@@ -238,21 +254,26 @@ Format: One search per line
238
254
  if line:
239
255
  searches.append(line)
240
256
 
241
- # Ensure we have enough searches
257
+ # Ensure we have enough searches, but respect the limit
242
258
  while len(searches) < num_questions:
243
259
  # Generate combinations programmatically
244
260
  if iteration <= 5 and entities["temporal"]:
245
261
  # Continue with year-based searches
262
+ added_any = False
246
263
  for year in entities["temporal"]:
247
264
  if not self._was_searched(year, questions_by_iteration):
248
265
  base_term = (
249
266
  entities["names"][0] if entities["names"] else ""
250
267
  )
251
268
  searches.append(f"{base_term} {year}".strip())
269
+ added_any = True
252
270
  if len(searches) >= num_questions:
253
271
  break
272
+ if not added_any:
273
+ break # No more year searches to add
254
274
  else:
255
275
  # Combine multiple constraints
276
+ added_any = False
256
277
  if entities["names"] and entities["descriptors"]:
257
278
  for name in entities["names"]:
258
279
  for desc in entities["descriptors"]:
@@ -261,8 +282,13 @@ Format: One search per line
261
282
  combo, questions_by_iteration
262
283
  ):
263
284
  searches.append(combo)
285
+ added_any = True
264
286
  if len(searches) >= num_questions:
265
287
  break
288
+ if len(searches) >= num_questions:
289
+ break
290
+ if not added_any:
291
+ break # No more combinations to add
266
292
 
267
293
  return searches[:num_questions]
268
294
 
@@ -53,9 +53,9 @@ class FocusedIterationStrategy(BaseSearchStrategy):
53
53
  search=None,
54
54
  citation_handler=None,
55
55
  all_links_of_system=None,
56
- max_iterations: int = 8, # OPTIMAL FOR SIMPLEQA: 96.51% accuracy achieved
56
+ max_iterations: int = 8, # OPTIMAL FOR SIMPLEQA: 90%+ accuracy achieved
57
57
  questions_per_iteration: int = 5, # OPTIMAL FOR SIMPLEQA: proven config
58
- use_browsecomp_optimization: bool = True, # Can be False for pure SimpleQA
58
+ use_browsecomp_optimization: bool = True, # True for 90%+ accuracy with forced_answer handler
59
59
  ):
60
60
  """Initialize with components optimized for focused iteration."""
61
61
  super().__init__(all_links_of_system)
@@ -63,9 +63,9 @@ class FocusedIterationStrategy(BaseSearchStrategy):
63
63
  self.model = model or get_llm()
64
64
  self.progress_callback = None
65
65
 
66
- # Configuration
67
- self.max_iterations = max_iterations
68
- self.questions_per_iteration = questions_per_iteration
66
+ # Configuration - ensure these are integers
67
+ self.max_iterations = int(max_iterations)
68
+ self.questions_per_iteration = int(questions_per_iteration)
69
69
  self.use_browsecomp_optimization = use_browsecomp_optimization
70
70
 
71
71
  # Initialize specialized components
@@ -158,9 +158,11 @@ class FocusedIterationStrategy(BaseSearchStrategy):
158
158
  questions_by_iteration=self.questions_by_iteration,
159
159
  )
160
160
 
161
- # Always include original query in first iteration
161
+ # Always include original query in first iteration, but respect question limit
162
162
  if iteration == 1 and query not in questions:
163
163
  questions = [query] + questions
164
+ # Trim to respect questions_per_iteration limit
165
+ questions = questions[: self.questions_per_iteration]
164
166
 
165
167
  self.questions_by_iteration[iteration] = questions
166
168
  logger.info(f"Iteration {iteration} questions: {questions}")
@@ -247,6 +249,7 @@ class FocusedIterationStrategy(BaseSearchStrategy):
247
249
 
248
250
  # Accumulate all results (no filtering!)
249
251
  self.all_search_results.extend(iteration_results)
252
+ self.all_links_of_system.extend(iteration_results)
250
253
 
251
254
  # Update progress
252
255
  self._update_progress(
@@ -356,6 +359,11 @@ class FocusedIterationStrategy(BaseSearchStrategy):
356
359
  """Execute searches in parallel (like source-based strategy)."""
357
360
  all_results = []
358
361
 
362
+ # Import context preservation utility
363
+ from ...utilities.thread_context import (
364
+ create_context_preserving_wrapper,
365
+ )
366
+
359
367
  def search_question(q):
360
368
  try:
361
369
  result = self.search.run(q)
@@ -364,11 +372,18 @@ class FocusedIterationStrategy(BaseSearchStrategy):
364
372
  logger.error(f"Error searching '{q}': {str(e)}")
365
373
  return {"question": q, "results": [], "error": str(e)}
366
374
 
375
+ # Create context-preserving wrapper for the search function
376
+ context_aware_search = create_context_preserving_wrapper(
377
+ search_question
378
+ )
379
+
367
380
  # Run searches in parallel
368
381
  with concurrent.futures.ThreadPoolExecutor(
369
382
  max_workers=len(queries)
370
383
  ) as executor:
371
- futures = [executor.submit(search_question, q) for q in queries]
384
+ futures = [
385
+ executor.submit(context_aware_search, q) for q in queries
386
+ ]
372
387
 
373
388
  for future in concurrent.futures.as_completed(futures):
374
389
  result_dict = future.result()
@@ -384,6 +399,11 @@ class FocusedIterationStrategy(BaseSearchStrategy):
384
399
  completed_searches = 0
385
400
  total_searches = len(queries)
386
401
 
402
+ # Import context preservation utility
403
+ from ...utilities.thread_context import (
404
+ create_context_preserving_wrapper,
405
+ )
406
+
387
407
  def search_question_with_progress(q):
388
408
  nonlocal completed_searches
389
409
  try:
@@ -439,12 +459,17 @@ class FocusedIterationStrategy(BaseSearchStrategy):
439
459
  "result_count": 0,
440
460
  }
441
461
 
462
+ # Create context-preserving wrapper for the search function
463
+ context_aware_search_with_progress = create_context_preserving_wrapper(
464
+ search_question_with_progress
465
+ )
466
+
442
467
  # Run searches in parallel
443
468
  with concurrent.futures.ThreadPoolExecutor(
444
469
  max_workers=min(len(queries), 5)
445
470
  ) as executor:
446
471
  futures = [
447
- executor.submit(search_question_with_progress, q)
472
+ executor.submit(context_aware_search_with_progress, q)
448
473
  for q in queries
449
474
  ]
450
475
 
@@ -8,6 +8,7 @@ from ...config.llm_config import get_llm
8
8
  from ...config.search_config import get_search
9
9
  from ...utilities.db_utils import get_db_setting
10
10
  from ...utilities.threading_utils import thread_context, thread_with_app_context
11
+ from ...utilities.thread_context import preserve_research_context
11
12
  from ..filters.cross_engine_filter import CrossEngineFilter
12
13
  from ..findings.repository import FindingsRepository
13
14
  from ..questions.atomic_fact_question import AtomicFactQuestionGenerator
@@ -211,6 +212,7 @@ class SourceBasedSearchStrategy(BaseSearchStrategy):
211
212
 
212
213
  # Function for thread pool
213
214
  @thread_with_app_context
215
+ @preserve_research_context
214
216
  def search_question(q):
215
217
  try:
216
218
  result = self.search.run(q)
@@ -5,12 +5,14 @@ API module for programmatic access to Local Deep Research functionality.
5
5
 
6
6
  from .research_functions import (
7
7
  analyze_documents,
8
+ detailed_research,
8
9
  generate_report,
9
10
  quick_summary,
10
11
  )
11
12
 
12
13
  __all__ = [
13
14
  "quick_summary",
15
+ "detailed_research",
14
16
  "generate_report",
15
17
  "analyze_documents",
16
18
  ]
@@ -3,7 +3,8 @@ API module for Local Deep Research.
3
3
  Provides programmatic access to search and research capabilities.
4
4
  """
5
5
 
6
- from typing import Any, Callable, Dict, Optional
6
+ from datetime import datetime
7
+ from typing import Any, Callable, Dict, Optional, Union
7
8
 
8
9
  from loguru import logger
9
10
 
@@ -24,6 +25,8 @@ def _init_search_system(
24
25
  search_strategy: str = "source_based",
25
26
  iterations: int = 1,
26
27
  questions_per_iteration: int = 1,
28
+ retrievers: Optional[Dict[str, Any]] = None,
29
+ llms: Optional[Dict[str, Any]] = None,
27
30
  ) -> AdvancedSearchSystem:
28
31
  """
29
32
  Initializes the advanced search system with specified parameters. This function sets up
@@ -43,11 +46,30 @@ def _init_search_system(
43
46
  iterations: Number of research cycles to perform
44
47
  questions_per_iteration: Number of questions to generate per cycle
45
48
  search_strategy: The name of the search strategy to use.
49
+ retrievers: Optional dictionary of {name: retriever} pairs to use as search engines
50
+ llms: Optional dictionary of {name: llm} pairs to use as language models
46
51
 
47
52
  Returns:
48
53
  AdvancedSearchSystem: An instance of the configured AdvancedSearchSystem.
49
54
 
50
55
  """
56
+ # Register retrievers if provided
57
+ if retrievers:
58
+ from ..web_search_engines.retriever_registry import retriever_registry
59
+
60
+ retriever_registry.register_multiple(retrievers)
61
+ logger.info(
62
+ f"Registered {len(retrievers)} retrievers: {list(retrievers.keys())}"
63
+ )
64
+
65
+ # Register LLMs if provided
66
+ if llms:
67
+ from ..llm import register_llm
68
+
69
+ for name, llm_instance in llms.items():
70
+ register_llm(name, llm_instance)
71
+ logger.info(f"Registered {len(llms)} LLMs: {list(llms.keys())}")
72
+
51
73
  # Get language model with custom temperature
52
74
  llm = get_llm(
53
75
  temperature=temperature,
@@ -84,6 +106,9 @@ def _init_search_system(
84
106
 
85
107
  def quick_summary(
86
108
  query: str,
109
+ research_id: Optional[Union[int, str]] = None,
110
+ retrievers: Optional[Dict[str, Any]] = None,
111
+ llms: Optional[Dict[str, Any]] = None,
87
112
  **kwargs: Any,
88
113
  ) -> Dict[str, Any]:
89
114
  """
@@ -91,6 +116,9 @@ def quick_summary(
91
116
 
92
117
  Args:
93
118
  query: The research query to analyze
119
+ research_id: Optional research ID (int or UUID string) for tracking metrics
120
+ retrievers: Optional dictionary of {name: retriever} pairs to use as search engines
121
+ llms: Optional dictionary of {name: llm} pairs to use as language models
94
122
  **kwargs: Configuration for the search system. Will be forwarded to
95
123
  `_init_search_system()`.
96
124
 
@@ -103,7 +131,46 @@ def quick_summary(
103
131
  """
104
132
  logger.info("Generating quick summary for query: %s", query)
105
133
 
106
- system = _init_search_system(**kwargs)
134
+ # Generate a research_id if none provided
135
+ if research_id is None:
136
+ import uuid
137
+
138
+ research_id = str(uuid.uuid4())
139
+ logger.debug(f"Generated research_id: {research_id}")
140
+
141
+ # Register retrievers if provided
142
+ if retrievers:
143
+ from ..web_search_engines.retriever_registry import retriever_registry
144
+
145
+ retriever_registry.register_multiple(retrievers)
146
+ logger.info(
147
+ f"Registered {len(retrievers)} retrievers: {list(retrievers.keys())}"
148
+ )
149
+
150
+ # Register LLMs if provided
151
+ if llms:
152
+ from ..llm import register_llm
153
+
154
+ for name, llm_instance in llms.items():
155
+ register_llm(name, llm_instance)
156
+ logger.info(f"Registered {len(llms)} LLMs: {list(llms.keys())}")
157
+
158
+ # Set search context with research_id
159
+ from ..metrics.search_tracker import set_search_context
160
+
161
+ search_context = {
162
+ "research_id": research_id, # Pass UUID or integer directly
163
+ "research_query": query,
164
+ "research_mode": kwargs.get("research_mode", "quick"),
165
+ "research_phase": "init",
166
+ "search_iteration": 0,
167
+ "search_engine_selected": kwargs.get("search_tool"),
168
+ }
169
+ set_search_context(search_context)
170
+
171
+ # Remove research_mode from kwargs before passing to _init_search_system
172
+ init_kwargs = {k: v for k, v in kwargs.items() if k != "research_mode"}
173
+ system = _init_search_system(llms=llms, **init_kwargs)
107
174
 
108
175
  # Perform the search and analysis
109
176
  results = system.analyze_topic(query)
@@ -130,6 +197,8 @@ def generate_report(
130
197
  output_file: Optional[str] = None,
131
198
  progress_callback: Optional[Callable] = None,
132
199
  searches_per_section: int = 2,
200
+ retrievers: Optional[Dict[str, Any]] = None,
201
+ llms: Optional[Dict[str, Any]] = None,
133
202
  **kwargs: Any,
134
203
  ) -> Dict[str, Any]:
135
204
  """
@@ -141,6 +210,8 @@ def generate_report(
141
210
  progress_callback: Optional callback function to receive progress updates
142
211
  searches_per_section: The number of searches to perform for each
143
212
  section in the report.
213
+ retrievers: Optional dictionary of {name: retriever} pairs to use as search engines
214
+ llms: Optional dictionary of {name: llm} pairs to use as language models
144
215
 
145
216
  Returns:
146
217
  Dictionary containing the research report with keys:
@@ -149,7 +220,24 @@ def generate_report(
149
220
  """
150
221
  logger.info("Generating comprehensive research report for query: %s", query)
151
222
 
152
- system = _init_search_system(**kwargs)
223
+ # Register retrievers if provided
224
+ if retrievers:
225
+ from ..web_search_engines.retriever_registry import retriever_registry
226
+
227
+ retriever_registry.register_multiple(retrievers)
228
+ logger.info(
229
+ f"Registered {len(retrievers)} retrievers: {list(retrievers.keys())}"
230
+ )
231
+
232
+ # Register LLMs if provided
233
+ if llms:
234
+ from ..llm import register_llm
235
+
236
+ for name, llm_instance in llms.items():
237
+ register_llm(name, llm_instance)
238
+ logger.info(f"Registered {len(llms)} LLMs: {list(llms.keys())}")
239
+
240
+ system = _init_search_system(retrievers=retrievers, llms=llms, **kwargs)
153
241
 
154
242
  # Set progress callback if provided
155
243
  if progress_callback:
@@ -175,6 +263,92 @@ def generate_report(
175
263
  return report
176
264
 
177
265
 
266
+ def detailed_research(
267
+ query: str,
268
+ research_id: Optional[Union[int, str]] = None,
269
+ retrievers: Optional[Dict[str, Any]] = None,
270
+ llms: Optional[Dict[str, Any]] = None,
271
+ **kwargs: Any,
272
+ ) -> Dict[str, Any]:
273
+ """
274
+ Perform detailed research with comprehensive analysis.
275
+
276
+ Similar to generate_report but returns structured data instead of markdown.
277
+
278
+ Args:
279
+ query: The research query to analyze
280
+ research_id: Optional research ID (int or UUID string) for tracking metrics
281
+ retrievers: Optional dictionary of {name: retriever} pairs to use as search engines
282
+ llms: Optional dictionary of {name: llm} pairs to use as language models
283
+ **kwargs: Configuration for the search system
284
+
285
+ Returns:
286
+ Dictionary containing detailed research results
287
+ """
288
+ logger.info("Performing detailed research for query: %s", query)
289
+
290
+ # Generate a research_id if none provided
291
+ if research_id is None:
292
+ import uuid
293
+
294
+ research_id = str(uuid.uuid4())
295
+ logger.debug(f"Generated research_id: {research_id}")
296
+
297
+ # Register retrievers if provided
298
+ if retrievers:
299
+ from ..web_search_engines.retriever_registry import retriever_registry
300
+
301
+ retriever_registry.register_multiple(retrievers)
302
+ logger.info(
303
+ f"Registered {len(retrievers)} retrievers: {list(retrievers.keys())}"
304
+ )
305
+
306
+ # Register LLMs if provided
307
+ if llms:
308
+ from ..llm import register_llm
309
+
310
+ for name, llm_instance in llms.items():
311
+ register_llm(name, llm_instance)
312
+ logger.info(f"Registered {len(llms)} LLMs: {list(llms.keys())}")
313
+
314
+ # Set search context
315
+ from ..metrics.search_tracker import set_search_context
316
+
317
+ search_context = {
318
+ "research_id": research_id,
319
+ "research_query": query,
320
+ "research_mode": "detailed",
321
+ "research_phase": "init",
322
+ "search_iteration": 0,
323
+ "search_engine_selected": kwargs.get("search_tool"),
324
+ }
325
+ set_search_context(search_context)
326
+
327
+ # Initialize system
328
+ system = _init_search_system(retrievers=retrievers, llms=llms, **kwargs)
329
+
330
+ # Perform detailed research
331
+ results = system.analyze_topic(query)
332
+
333
+ # Return comprehensive results
334
+ return {
335
+ "query": query,
336
+ "research_id": research_id,
337
+ "summary": results.get("current_knowledge", ""),
338
+ "findings": results.get("findings", []),
339
+ "iterations": results.get("iterations", 0),
340
+ "questions": results.get("questions", {}),
341
+ "formatted_findings": results.get("formatted_findings", ""),
342
+ "sources": results.get("all_links_of_system", []),
343
+ "metadata": {
344
+ "timestamp": datetime.now().isoformat(),
345
+ "search_tool": kwargs.get("search_tool", "auto"),
346
+ "iterations_requested": kwargs.get("iterations", 1),
347
+ "strategy": kwargs.get("search_strategy", "source_based"),
348
+ },
349
+ }
350
+
351
+
178
352
  def analyze_documents(
179
353
  query: str,
180
354
  collection_name: str,