local-deep-research 0.5.7__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__version__.py +1 -1
- local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +11 -1
- local_deep_research/advanced_search_system/questions/browsecomp_question.py +32 -6
- local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +33 -8
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +2 -0
- local_deep_research/api/__init__.py +2 -0
- local_deep_research/api/research_functions.py +177 -3
- local_deep_research/benchmarks/graders.py +150 -5
- local_deep_research/benchmarks/models/__init__.py +19 -0
- local_deep_research/benchmarks/models/benchmark_models.py +283 -0
- local_deep_research/benchmarks/ui/__init__.py +1 -0
- local_deep_research/benchmarks/web_api/__init__.py +6 -0
- local_deep_research/benchmarks/web_api/benchmark_routes.py +862 -0
- local_deep_research/benchmarks/web_api/benchmark_service.py +920 -0
- local_deep_research/config/llm_config.py +106 -21
- local_deep_research/defaults/default_settings.json +448 -3
- local_deep_research/error_handling/report_generator.py +10 -0
- local_deep_research/llm/__init__.py +19 -0
- local_deep_research/llm/llm_registry.py +155 -0
- local_deep_research/metrics/db_models.py +3 -7
- local_deep_research/metrics/search_tracker.py +25 -11
- local_deep_research/report_generator.py +3 -2
- local_deep_research/search_system.py +12 -9
- local_deep_research/utilities/log_utils.py +23 -10
- local_deep_research/utilities/thread_context.py +99 -0
- local_deep_research/web/app_factory.py +32 -8
- local_deep_research/web/database/benchmark_schema.py +230 -0
- local_deep_research/web/database/convert_research_id_to_string.py +161 -0
- local_deep_research/web/database/models.py +55 -1
- local_deep_research/web/database/schema_upgrade.py +397 -2
- local_deep_research/web/database/uuid_migration.py +265 -0
- local_deep_research/web/routes/api_routes.py +62 -31
- local_deep_research/web/routes/history_routes.py +13 -6
- local_deep_research/web/routes/metrics_routes.py +264 -4
- local_deep_research/web/routes/research_routes.py +45 -18
- local_deep_research/web/routes/route_registry.py +352 -0
- local_deep_research/web/routes/settings_routes.py +382 -22
- local_deep_research/web/services/research_service.py +22 -29
- local_deep_research/web/services/settings_manager.py +53 -0
- local_deep_research/web/services/settings_service.py +2 -0
- local_deep_research/web/static/css/styles.css +8 -0
- local_deep_research/web/static/js/components/detail.js +7 -14
- local_deep_research/web/static/js/components/details.js +8 -10
- local_deep_research/web/static/js/components/fallback/ui.js +4 -4
- local_deep_research/web/static/js/components/history.js +6 -6
- local_deep_research/web/static/js/components/logpanel.js +14 -11
- local_deep_research/web/static/js/components/progress.js +51 -46
- local_deep_research/web/static/js/components/research.js +250 -89
- local_deep_research/web/static/js/components/results.js +5 -7
- local_deep_research/web/static/js/components/settings.js +32 -26
- local_deep_research/web/static/js/components/settings_sync.js +24 -23
- local_deep_research/web/static/js/config/urls.js +285 -0
- local_deep_research/web/static/js/main.js +8 -8
- local_deep_research/web/static/js/research_form.js +267 -12
- local_deep_research/web/static/js/services/api.js +18 -18
- local_deep_research/web/static/js/services/keyboard.js +8 -8
- local_deep_research/web/static/js/services/socket.js +53 -35
- local_deep_research/web/static/js/services/ui.js +1 -1
- local_deep_research/web/templates/base.html +4 -1
- local_deep_research/web/templates/components/custom_dropdown.html +5 -3
- local_deep_research/web/templates/components/mobile_nav.html +3 -3
- local_deep_research/web/templates/components/sidebar.html +9 -3
- local_deep_research/web/templates/pages/benchmark.html +2697 -0
- local_deep_research/web/templates/pages/benchmark_results.html +1136 -0
- local_deep_research/web/templates/pages/benchmark_simple.html +453 -0
- local_deep_research/web/templates/pages/cost_analytics.html +1 -1
- local_deep_research/web/templates/pages/metrics.html +212 -39
- local_deep_research/web/templates/pages/research.html +8 -6
- local_deep_research/web/templates/pages/star_reviews.html +1 -1
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +14 -1
- local_deep_research/web_search_engines/engines/search_engine_brave.py +15 -1
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +20 -1
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +26 -2
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +15 -1
- local_deep_research/web_search_engines/engines/search_engine_retriever.py +192 -0
- local_deep_research/web_search_engines/engines/search_engine_tavily.py +307 -0
- local_deep_research/web_search_engines/rate_limiting/__init__.py +14 -0
- local_deep_research/web_search_engines/rate_limiting/__main__.py +9 -0
- local_deep_research/web_search_engines/rate_limiting/cli.py +209 -0
- local_deep_research/web_search_engines/rate_limiting/exceptions.py +21 -0
- local_deep_research/web_search_engines/rate_limiting/tracker.py +506 -0
- local_deep_research/web_search_engines/retriever_registry.py +108 -0
- local_deep_research/web_search_engines/search_engine_base.py +161 -43
- local_deep_research/web_search_engines/search_engine_factory.py +14 -0
- local_deep_research/web_search_engines/search_engines_config.py +20 -0
- local_deep_research-0.6.0.dist-info/METADATA +374 -0
- {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/RECORD +90 -65
- local_deep_research-0.5.7.dist-info/METADATA +0 -420
- {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/WHEEL +0 -0
- {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.5.7.dist-info → local_deep_research-0.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.
|
1
|
+
__version__ = "0.6.0"
|
@@ -236,6 +236,11 @@ class ProgressiveExplorer:
|
|
236
236
|
"""Execute searches in parallel and return results."""
|
237
237
|
results = []
|
238
238
|
|
239
|
+
# Import context preservation utility
|
240
|
+
from ...utilities.thread_context import (
|
241
|
+
create_context_preserving_wrapper,
|
242
|
+
)
|
243
|
+
|
239
244
|
def search_query(query):
|
240
245
|
try:
|
241
246
|
search_results = self.search_engine.run(query)
|
@@ -244,11 +249,16 @@ class ProgressiveExplorer:
|
|
244
249
|
logger.error(f"Error searching '{query}': {str(e)}")
|
245
250
|
return (query, [])
|
246
251
|
|
252
|
+
# Create context-preserving wrapper for the search function
|
253
|
+
context_aware_search = create_context_preserving_wrapper(search_query)
|
254
|
+
|
247
255
|
# Run searches in parallel
|
248
256
|
with concurrent.futures.ThreadPoolExecutor(
|
249
257
|
max_workers=max_workers
|
250
258
|
) as executor:
|
251
|
-
futures = [
|
259
|
+
futures = [
|
260
|
+
executor.submit(context_aware_search, q) for q in queries
|
261
|
+
]
|
252
262
|
for future in concurrent.futures.as_completed(futures):
|
253
263
|
results.append(future.result())
|
254
264
|
|
@@ -144,15 +144,25 @@ DESCRIPTORS: [entity1], [entity2], ...
|
|
144
144
|
# 1. Original query (always include)
|
145
145
|
searches.append(query)
|
146
146
|
|
147
|
+
# If only 1 question requested, return just the original query
|
148
|
+
if num_questions <= 1:
|
149
|
+
return searches[:1]
|
150
|
+
|
147
151
|
# 2. Domain exploration searches (combine key entities)
|
148
|
-
if entities["names"]:
|
152
|
+
if entities["names"] and len(searches) < num_questions:
|
149
153
|
for name in entities["names"][:2]: # Top 2 names
|
154
|
+
if len(searches) >= num_questions:
|
155
|
+
break
|
150
156
|
searches.append(f"{name}")
|
151
|
-
if entities["descriptors"]:
|
157
|
+
if entities["descriptors"] and len(searches) < num_questions:
|
152
158
|
searches.append(f"{name} {entities['descriptors'][0]}")
|
153
159
|
|
154
160
|
# 3. Temporal searches if years are important
|
155
|
-
if
|
161
|
+
if (
|
162
|
+
entities["temporal"]
|
163
|
+
and len(entities["temporal"]) <= 10
|
164
|
+
and len(searches) < num_questions
|
165
|
+
):
|
156
166
|
# For small year ranges, search each year with a key term
|
157
167
|
key_term = (
|
158
168
|
entities["names"][0]
|
@@ -162,14 +172,18 @@ DESCRIPTORS: [entity1], [entity2], ...
|
|
162
172
|
else ""
|
163
173
|
)
|
164
174
|
for year in entities["temporal"][:5]: # Limit to 5 years initially
|
175
|
+
if len(searches) >= num_questions:
|
176
|
+
break
|
165
177
|
if key_term:
|
166
178
|
searches.append(f"{key_term} {year}")
|
167
179
|
|
168
180
|
# 4. Location-based searches
|
169
|
-
if entities["locations"]:
|
181
|
+
if entities["locations"] and len(searches) < num_questions:
|
170
182
|
for location in entities["locations"][:2]:
|
183
|
+
if len(searches) >= num_questions:
|
184
|
+
break
|
171
185
|
searches.append(f"{location}")
|
172
|
-
if entities["descriptors"]:
|
186
|
+
if entities["descriptors"] and len(searches) < num_questions:
|
173
187
|
searches.append(f"{location} {entities['descriptors'][0]}")
|
174
188
|
|
175
189
|
# Remove duplicates and limit to requested number
|
@@ -179,6 +193,8 @@ DESCRIPTORS: [entity1], [entity2], ...
|
|
179
193
|
if s.lower() not in seen:
|
180
194
|
seen.add(s.lower())
|
181
195
|
unique_searches.append(s)
|
196
|
+
if len(unique_searches) >= num_questions:
|
197
|
+
break
|
182
198
|
|
183
199
|
return unique_searches[:num_questions]
|
184
200
|
|
@@ -238,21 +254,26 @@ Format: One search per line
|
|
238
254
|
if line:
|
239
255
|
searches.append(line)
|
240
256
|
|
241
|
-
# Ensure we have enough searches
|
257
|
+
# Ensure we have enough searches, but respect the limit
|
242
258
|
while len(searches) < num_questions:
|
243
259
|
# Generate combinations programmatically
|
244
260
|
if iteration <= 5 and entities["temporal"]:
|
245
261
|
# Continue with year-based searches
|
262
|
+
added_any = False
|
246
263
|
for year in entities["temporal"]:
|
247
264
|
if not self._was_searched(year, questions_by_iteration):
|
248
265
|
base_term = (
|
249
266
|
entities["names"][0] if entities["names"] else ""
|
250
267
|
)
|
251
268
|
searches.append(f"{base_term} {year}".strip())
|
269
|
+
added_any = True
|
252
270
|
if len(searches) >= num_questions:
|
253
271
|
break
|
272
|
+
if not added_any:
|
273
|
+
break # No more year searches to add
|
254
274
|
else:
|
255
275
|
# Combine multiple constraints
|
276
|
+
added_any = False
|
256
277
|
if entities["names"] and entities["descriptors"]:
|
257
278
|
for name in entities["names"]:
|
258
279
|
for desc in entities["descriptors"]:
|
@@ -261,8 +282,13 @@ Format: One search per line
|
|
261
282
|
combo, questions_by_iteration
|
262
283
|
):
|
263
284
|
searches.append(combo)
|
285
|
+
added_any = True
|
264
286
|
if len(searches) >= num_questions:
|
265
287
|
break
|
288
|
+
if len(searches) >= num_questions:
|
289
|
+
break
|
290
|
+
if not added_any:
|
291
|
+
break # No more combinations to add
|
266
292
|
|
267
293
|
return searches[:num_questions]
|
268
294
|
|
@@ -53,9 +53,9 @@ class FocusedIterationStrategy(BaseSearchStrategy):
|
|
53
53
|
search=None,
|
54
54
|
citation_handler=None,
|
55
55
|
all_links_of_system=None,
|
56
|
-
max_iterations: int = 8, # OPTIMAL FOR SIMPLEQA:
|
56
|
+
max_iterations: int = 8, # OPTIMAL FOR SIMPLEQA: 90%+ accuracy achieved
|
57
57
|
questions_per_iteration: int = 5, # OPTIMAL FOR SIMPLEQA: proven config
|
58
|
-
use_browsecomp_optimization: bool = True, #
|
58
|
+
use_browsecomp_optimization: bool = True, # True for 90%+ accuracy with forced_answer handler
|
59
59
|
):
|
60
60
|
"""Initialize with components optimized for focused iteration."""
|
61
61
|
super().__init__(all_links_of_system)
|
@@ -63,9 +63,9 @@ class FocusedIterationStrategy(BaseSearchStrategy):
|
|
63
63
|
self.model = model or get_llm()
|
64
64
|
self.progress_callback = None
|
65
65
|
|
66
|
-
# Configuration
|
67
|
-
self.max_iterations = max_iterations
|
68
|
-
self.questions_per_iteration = questions_per_iteration
|
66
|
+
# Configuration - ensure these are integers
|
67
|
+
self.max_iterations = int(max_iterations)
|
68
|
+
self.questions_per_iteration = int(questions_per_iteration)
|
69
69
|
self.use_browsecomp_optimization = use_browsecomp_optimization
|
70
70
|
|
71
71
|
# Initialize specialized components
|
@@ -158,9 +158,11 @@ class FocusedIterationStrategy(BaseSearchStrategy):
|
|
158
158
|
questions_by_iteration=self.questions_by_iteration,
|
159
159
|
)
|
160
160
|
|
161
|
-
# Always include original query in first iteration
|
161
|
+
# Always include original query in first iteration, but respect question limit
|
162
162
|
if iteration == 1 and query not in questions:
|
163
163
|
questions = [query] + questions
|
164
|
+
# Trim to respect questions_per_iteration limit
|
165
|
+
questions = questions[: self.questions_per_iteration]
|
164
166
|
|
165
167
|
self.questions_by_iteration[iteration] = questions
|
166
168
|
logger.info(f"Iteration {iteration} questions: {questions}")
|
@@ -247,6 +249,7 @@ class FocusedIterationStrategy(BaseSearchStrategy):
|
|
247
249
|
|
248
250
|
# Accumulate all results (no filtering!)
|
249
251
|
self.all_search_results.extend(iteration_results)
|
252
|
+
self.all_links_of_system.extend(iteration_results)
|
250
253
|
|
251
254
|
# Update progress
|
252
255
|
self._update_progress(
|
@@ -356,6 +359,11 @@ class FocusedIterationStrategy(BaseSearchStrategy):
|
|
356
359
|
"""Execute searches in parallel (like source-based strategy)."""
|
357
360
|
all_results = []
|
358
361
|
|
362
|
+
# Import context preservation utility
|
363
|
+
from ...utilities.thread_context import (
|
364
|
+
create_context_preserving_wrapper,
|
365
|
+
)
|
366
|
+
|
359
367
|
def search_question(q):
|
360
368
|
try:
|
361
369
|
result = self.search.run(q)
|
@@ -364,11 +372,18 @@ class FocusedIterationStrategy(BaseSearchStrategy):
|
|
364
372
|
logger.error(f"Error searching '{q}': {str(e)}")
|
365
373
|
return {"question": q, "results": [], "error": str(e)}
|
366
374
|
|
375
|
+
# Create context-preserving wrapper for the search function
|
376
|
+
context_aware_search = create_context_preserving_wrapper(
|
377
|
+
search_question
|
378
|
+
)
|
379
|
+
|
367
380
|
# Run searches in parallel
|
368
381
|
with concurrent.futures.ThreadPoolExecutor(
|
369
382
|
max_workers=len(queries)
|
370
383
|
) as executor:
|
371
|
-
futures = [
|
384
|
+
futures = [
|
385
|
+
executor.submit(context_aware_search, q) for q in queries
|
386
|
+
]
|
372
387
|
|
373
388
|
for future in concurrent.futures.as_completed(futures):
|
374
389
|
result_dict = future.result()
|
@@ -384,6 +399,11 @@ class FocusedIterationStrategy(BaseSearchStrategy):
|
|
384
399
|
completed_searches = 0
|
385
400
|
total_searches = len(queries)
|
386
401
|
|
402
|
+
# Import context preservation utility
|
403
|
+
from ...utilities.thread_context import (
|
404
|
+
create_context_preserving_wrapper,
|
405
|
+
)
|
406
|
+
|
387
407
|
def search_question_with_progress(q):
|
388
408
|
nonlocal completed_searches
|
389
409
|
try:
|
@@ -439,12 +459,17 @@ class FocusedIterationStrategy(BaseSearchStrategy):
|
|
439
459
|
"result_count": 0,
|
440
460
|
}
|
441
461
|
|
462
|
+
# Create context-preserving wrapper for the search function
|
463
|
+
context_aware_search_with_progress = create_context_preserving_wrapper(
|
464
|
+
search_question_with_progress
|
465
|
+
)
|
466
|
+
|
442
467
|
# Run searches in parallel
|
443
468
|
with concurrent.futures.ThreadPoolExecutor(
|
444
469
|
max_workers=min(len(queries), 5)
|
445
470
|
) as executor:
|
446
471
|
futures = [
|
447
|
-
executor.submit(
|
472
|
+
executor.submit(context_aware_search_with_progress, q)
|
448
473
|
for q in queries
|
449
474
|
]
|
450
475
|
|
@@ -8,6 +8,7 @@ from ...config.llm_config import get_llm
|
|
8
8
|
from ...config.search_config import get_search
|
9
9
|
from ...utilities.db_utils import get_db_setting
|
10
10
|
from ...utilities.threading_utils import thread_context, thread_with_app_context
|
11
|
+
from ...utilities.thread_context import preserve_research_context
|
11
12
|
from ..filters.cross_engine_filter import CrossEngineFilter
|
12
13
|
from ..findings.repository import FindingsRepository
|
13
14
|
from ..questions.atomic_fact_question import AtomicFactQuestionGenerator
|
@@ -211,6 +212,7 @@ class SourceBasedSearchStrategy(BaseSearchStrategy):
|
|
211
212
|
|
212
213
|
# Function for thread pool
|
213
214
|
@thread_with_app_context
|
215
|
+
@preserve_research_context
|
214
216
|
def search_question(q):
|
215
217
|
try:
|
216
218
|
result = self.search.run(q)
|
@@ -5,12 +5,14 @@ API module for programmatic access to Local Deep Research functionality.
|
|
5
5
|
|
6
6
|
from .research_functions import (
|
7
7
|
analyze_documents,
|
8
|
+
detailed_research,
|
8
9
|
generate_report,
|
9
10
|
quick_summary,
|
10
11
|
)
|
11
12
|
|
12
13
|
__all__ = [
|
13
14
|
"quick_summary",
|
15
|
+
"detailed_research",
|
14
16
|
"generate_report",
|
15
17
|
"analyze_documents",
|
16
18
|
]
|
@@ -3,7 +3,8 @@ API module for Local Deep Research.
|
|
3
3
|
Provides programmatic access to search and research capabilities.
|
4
4
|
"""
|
5
5
|
|
6
|
-
from
|
6
|
+
from datetime import datetime
|
7
|
+
from typing import Any, Callable, Dict, Optional, Union
|
7
8
|
|
8
9
|
from loguru import logger
|
9
10
|
|
@@ -24,6 +25,8 @@ def _init_search_system(
|
|
24
25
|
search_strategy: str = "source_based",
|
25
26
|
iterations: int = 1,
|
26
27
|
questions_per_iteration: int = 1,
|
28
|
+
retrievers: Optional[Dict[str, Any]] = None,
|
29
|
+
llms: Optional[Dict[str, Any]] = None,
|
27
30
|
) -> AdvancedSearchSystem:
|
28
31
|
"""
|
29
32
|
Initializes the advanced search system with specified parameters. This function sets up
|
@@ -43,11 +46,30 @@ def _init_search_system(
|
|
43
46
|
iterations: Number of research cycles to perform
|
44
47
|
questions_per_iteration: Number of questions to generate per cycle
|
45
48
|
search_strategy: The name of the search strategy to use.
|
49
|
+
retrievers: Optional dictionary of {name: retriever} pairs to use as search engines
|
50
|
+
llms: Optional dictionary of {name: llm} pairs to use as language models
|
46
51
|
|
47
52
|
Returns:
|
48
53
|
AdvancedSearchSystem: An instance of the configured AdvancedSearchSystem.
|
49
54
|
|
50
55
|
"""
|
56
|
+
# Register retrievers if provided
|
57
|
+
if retrievers:
|
58
|
+
from ..web_search_engines.retriever_registry import retriever_registry
|
59
|
+
|
60
|
+
retriever_registry.register_multiple(retrievers)
|
61
|
+
logger.info(
|
62
|
+
f"Registered {len(retrievers)} retrievers: {list(retrievers.keys())}"
|
63
|
+
)
|
64
|
+
|
65
|
+
# Register LLMs if provided
|
66
|
+
if llms:
|
67
|
+
from ..llm import register_llm
|
68
|
+
|
69
|
+
for name, llm_instance in llms.items():
|
70
|
+
register_llm(name, llm_instance)
|
71
|
+
logger.info(f"Registered {len(llms)} LLMs: {list(llms.keys())}")
|
72
|
+
|
51
73
|
# Get language model with custom temperature
|
52
74
|
llm = get_llm(
|
53
75
|
temperature=temperature,
|
@@ -84,6 +106,9 @@ def _init_search_system(
|
|
84
106
|
|
85
107
|
def quick_summary(
|
86
108
|
query: str,
|
109
|
+
research_id: Optional[Union[int, str]] = None,
|
110
|
+
retrievers: Optional[Dict[str, Any]] = None,
|
111
|
+
llms: Optional[Dict[str, Any]] = None,
|
87
112
|
**kwargs: Any,
|
88
113
|
) -> Dict[str, Any]:
|
89
114
|
"""
|
@@ -91,6 +116,9 @@ def quick_summary(
|
|
91
116
|
|
92
117
|
Args:
|
93
118
|
query: The research query to analyze
|
119
|
+
research_id: Optional research ID (int or UUID string) for tracking metrics
|
120
|
+
retrievers: Optional dictionary of {name: retriever} pairs to use as search engines
|
121
|
+
llms: Optional dictionary of {name: llm} pairs to use as language models
|
94
122
|
**kwargs: Configuration for the search system. Will be forwarded to
|
95
123
|
`_init_search_system()`.
|
96
124
|
|
@@ -103,7 +131,46 @@ def quick_summary(
|
|
103
131
|
"""
|
104
132
|
logger.info("Generating quick summary for query: %s", query)
|
105
133
|
|
106
|
-
|
134
|
+
# Generate a research_id if none provided
|
135
|
+
if research_id is None:
|
136
|
+
import uuid
|
137
|
+
|
138
|
+
research_id = str(uuid.uuid4())
|
139
|
+
logger.debug(f"Generated research_id: {research_id}")
|
140
|
+
|
141
|
+
# Register retrievers if provided
|
142
|
+
if retrievers:
|
143
|
+
from ..web_search_engines.retriever_registry import retriever_registry
|
144
|
+
|
145
|
+
retriever_registry.register_multiple(retrievers)
|
146
|
+
logger.info(
|
147
|
+
f"Registered {len(retrievers)} retrievers: {list(retrievers.keys())}"
|
148
|
+
)
|
149
|
+
|
150
|
+
# Register LLMs if provided
|
151
|
+
if llms:
|
152
|
+
from ..llm import register_llm
|
153
|
+
|
154
|
+
for name, llm_instance in llms.items():
|
155
|
+
register_llm(name, llm_instance)
|
156
|
+
logger.info(f"Registered {len(llms)} LLMs: {list(llms.keys())}")
|
157
|
+
|
158
|
+
# Set search context with research_id
|
159
|
+
from ..metrics.search_tracker import set_search_context
|
160
|
+
|
161
|
+
search_context = {
|
162
|
+
"research_id": research_id, # Pass UUID or integer directly
|
163
|
+
"research_query": query,
|
164
|
+
"research_mode": kwargs.get("research_mode", "quick"),
|
165
|
+
"research_phase": "init",
|
166
|
+
"search_iteration": 0,
|
167
|
+
"search_engine_selected": kwargs.get("search_tool"),
|
168
|
+
}
|
169
|
+
set_search_context(search_context)
|
170
|
+
|
171
|
+
# Remove research_mode from kwargs before passing to _init_search_system
|
172
|
+
init_kwargs = {k: v for k, v in kwargs.items() if k != "research_mode"}
|
173
|
+
system = _init_search_system(llms=llms, **init_kwargs)
|
107
174
|
|
108
175
|
# Perform the search and analysis
|
109
176
|
results = system.analyze_topic(query)
|
@@ -130,6 +197,8 @@ def generate_report(
|
|
130
197
|
output_file: Optional[str] = None,
|
131
198
|
progress_callback: Optional[Callable] = None,
|
132
199
|
searches_per_section: int = 2,
|
200
|
+
retrievers: Optional[Dict[str, Any]] = None,
|
201
|
+
llms: Optional[Dict[str, Any]] = None,
|
133
202
|
**kwargs: Any,
|
134
203
|
) -> Dict[str, Any]:
|
135
204
|
"""
|
@@ -141,6 +210,8 @@ def generate_report(
|
|
141
210
|
progress_callback: Optional callback function to receive progress updates
|
142
211
|
searches_per_section: The number of searches to perform for each
|
143
212
|
section in the report.
|
213
|
+
retrievers: Optional dictionary of {name: retriever} pairs to use as search engines
|
214
|
+
llms: Optional dictionary of {name: llm} pairs to use as language models
|
144
215
|
|
145
216
|
Returns:
|
146
217
|
Dictionary containing the research report with keys:
|
@@ -149,7 +220,24 @@ def generate_report(
|
|
149
220
|
"""
|
150
221
|
logger.info("Generating comprehensive research report for query: %s", query)
|
151
222
|
|
152
|
-
|
223
|
+
# Register retrievers if provided
|
224
|
+
if retrievers:
|
225
|
+
from ..web_search_engines.retriever_registry import retriever_registry
|
226
|
+
|
227
|
+
retriever_registry.register_multiple(retrievers)
|
228
|
+
logger.info(
|
229
|
+
f"Registered {len(retrievers)} retrievers: {list(retrievers.keys())}"
|
230
|
+
)
|
231
|
+
|
232
|
+
# Register LLMs if provided
|
233
|
+
if llms:
|
234
|
+
from ..llm import register_llm
|
235
|
+
|
236
|
+
for name, llm_instance in llms.items():
|
237
|
+
register_llm(name, llm_instance)
|
238
|
+
logger.info(f"Registered {len(llms)} LLMs: {list(llms.keys())}")
|
239
|
+
|
240
|
+
system = _init_search_system(retrievers=retrievers, llms=llms, **kwargs)
|
153
241
|
|
154
242
|
# Set progress callback if provided
|
155
243
|
if progress_callback:
|
@@ -175,6 +263,92 @@ def generate_report(
|
|
175
263
|
return report
|
176
264
|
|
177
265
|
|
266
|
+
def detailed_research(
|
267
|
+
query: str,
|
268
|
+
research_id: Optional[Union[int, str]] = None,
|
269
|
+
retrievers: Optional[Dict[str, Any]] = None,
|
270
|
+
llms: Optional[Dict[str, Any]] = None,
|
271
|
+
**kwargs: Any,
|
272
|
+
) -> Dict[str, Any]:
|
273
|
+
"""
|
274
|
+
Perform detailed research with comprehensive analysis.
|
275
|
+
|
276
|
+
Similar to generate_report but returns structured data instead of markdown.
|
277
|
+
|
278
|
+
Args:
|
279
|
+
query: The research query to analyze
|
280
|
+
research_id: Optional research ID (int or UUID string) for tracking metrics
|
281
|
+
retrievers: Optional dictionary of {name: retriever} pairs to use as search engines
|
282
|
+
llms: Optional dictionary of {name: llm} pairs to use as language models
|
283
|
+
**kwargs: Configuration for the search system
|
284
|
+
|
285
|
+
Returns:
|
286
|
+
Dictionary containing detailed research results
|
287
|
+
"""
|
288
|
+
logger.info("Performing detailed research for query: %s", query)
|
289
|
+
|
290
|
+
# Generate a research_id if none provided
|
291
|
+
if research_id is None:
|
292
|
+
import uuid
|
293
|
+
|
294
|
+
research_id = str(uuid.uuid4())
|
295
|
+
logger.debug(f"Generated research_id: {research_id}")
|
296
|
+
|
297
|
+
# Register retrievers if provided
|
298
|
+
if retrievers:
|
299
|
+
from ..web_search_engines.retriever_registry import retriever_registry
|
300
|
+
|
301
|
+
retriever_registry.register_multiple(retrievers)
|
302
|
+
logger.info(
|
303
|
+
f"Registered {len(retrievers)} retrievers: {list(retrievers.keys())}"
|
304
|
+
)
|
305
|
+
|
306
|
+
# Register LLMs if provided
|
307
|
+
if llms:
|
308
|
+
from ..llm import register_llm
|
309
|
+
|
310
|
+
for name, llm_instance in llms.items():
|
311
|
+
register_llm(name, llm_instance)
|
312
|
+
logger.info(f"Registered {len(llms)} LLMs: {list(llms.keys())}")
|
313
|
+
|
314
|
+
# Set search context
|
315
|
+
from ..metrics.search_tracker import set_search_context
|
316
|
+
|
317
|
+
search_context = {
|
318
|
+
"research_id": research_id,
|
319
|
+
"research_query": query,
|
320
|
+
"research_mode": "detailed",
|
321
|
+
"research_phase": "init",
|
322
|
+
"search_iteration": 0,
|
323
|
+
"search_engine_selected": kwargs.get("search_tool"),
|
324
|
+
}
|
325
|
+
set_search_context(search_context)
|
326
|
+
|
327
|
+
# Initialize system
|
328
|
+
system = _init_search_system(retrievers=retrievers, llms=llms, **kwargs)
|
329
|
+
|
330
|
+
# Perform detailed research
|
331
|
+
results = system.analyze_topic(query)
|
332
|
+
|
333
|
+
# Return comprehensive results
|
334
|
+
return {
|
335
|
+
"query": query,
|
336
|
+
"research_id": research_id,
|
337
|
+
"summary": results.get("current_knowledge", ""),
|
338
|
+
"findings": results.get("findings", []),
|
339
|
+
"iterations": results.get("iterations", 0),
|
340
|
+
"questions": results.get("questions", {}),
|
341
|
+
"formatted_findings": results.get("formatted_findings", ""),
|
342
|
+
"sources": results.get("all_links_of_system", []),
|
343
|
+
"metadata": {
|
344
|
+
"timestamp": datetime.now().isoformat(),
|
345
|
+
"search_tool": kwargs.get("search_tool", "auto"),
|
346
|
+
"iterations_requested": kwargs.get("iterations", 1),
|
347
|
+
"strategy": kwargs.get("search_strategy", "source_based"),
|
348
|
+
},
|
349
|
+
}
|
350
|
+
|
351
|
+
|
178
352
|
def analyze_documents(
|
179
353
|
query: str,
|
180
354
|
collection_name: str,
|