local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +7 -0
- local_deep_research/__version__.py +1 -1
- local_deep_research/advanced_search_system/answer_decoding/__init__.py +5 -0
- local_deep_research/advanced_search_system/answer_decoding/browsecomp_answer_decoder.py +421 -0
- local_deep_research/advanced_search_system/candidate_exploration/README.md +219 -0
- local_deep_research/advanced_search_system/candidate_exploration/__init__.py +25 -0
- local_deep_research/advanced_search_system/candidate_exploration/adaptive_explorer.py +329 -0
- local_deep_research/advanced_search_system/candidate_exploration/base_explorer.py +341 -0
- local_deep_research/advanced_search_system/candidate_exploration/constraint_guided_explorer.py +436 -0
- local_deep_research/advanced_search_system/candidate_exploration/diversity_explorer.py +457 -0
- local_deep_research/advanced_search_system/candidate_exploration/parallel_explorer.py +250 -0
- local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +255 -0
- local_deep_research/advanced_search_system/candidates/__init__.py +5 -0
- local_deep_research/advanced_search_system/candidates/base_candidate.py +59 -0
- local_deep_research/advanced_search_system/constraint_checking/README.md +150 -0
- local_deep_research/advanced_search_system/constraint_checking/__init__.py +35 -0
- local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py +122 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_checker.py +223 -0
- local_deep_research/advanced_search_system/constraint_checking/constraint_satisfaction_tracker.py +387 -0
- local_deep_research/advanced_search_system/constraint_checking/dual_confidence_checker.py +424 -0
- local_deep_research/advanced_search_system/constraint_checking/evidence_analyzer.py +174 -0
- local_deep_research/advanced_search_system/constraint_checking/intelligent_constraint_relaxer.py +503 -0
- local_deep_research/advanced_search_system/constraint_checking/rejection_engine.py +143 -0
- local_deep_research/advanced_search_system/constraint_checking/strict_checker.py +259 -0
- local_deep_research/advanced_search_system/constraint_checking/threshold_checker.py +213 -0
- local_deep_research/advanced_search_system/constraints/__init__.py +6 -0
- local_deep_research/advanced_search_system/constraints/base_constraint.py +58 -0
- local_deep_research/advanced_search_system/constraints/constraint_analyzer.py +143 -0
- local_deep_research/advanced_search_system/evidence/__init__.py +12 -0
- local_deep_research/advanced_search_system/evidence/base_evidence.py +57 -0
- local_deep_research/advanced_search_system/evidence/evaluator.py +159 -0
- local_deep_research/advanced_search_system/evidence/requirements.py +122 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +3 -1
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +8 -2
- local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +43 -29
- local_deep_research/advanced_search_system/findings/repository.py +54 -17
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +3 -1
- local_deep_research/advanced_search_system/query_generation/adaptive_query_generator.py +405 -0
- local_deep_research/advanced_search_system/questions/__init__.py +16 -0
- local_deep_research/advanced_search_system/questions/atomic_fact_question.py +171 -0
- local_deep_research/advanced_search_system/questions/browsecomp_question.py +287 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +13 -4
- local_deep_research/advanced_search_system/questions/entity_aware_question.py +184 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +9 -3
- local_deep_research/advanced_search_system/search_optimization/cross_constraint_manager.py +624 -0
- local_deep_research/advanced_search_system/source_management/diversity_manager.py +613 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +42 -0
- local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py +564 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +4 -4
- local_deep_research/advanced_search_system/strategies/browsecomp_entity_strategy.py +1031 -0
- local_deep_research/advanced_search_system/strategies/browsecomp_optimized_strategy.py +778 -0
- local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py +446 -0
- local_deep_research/advanced_search_system/strategies/constrained_search_strategy.py +1348 -0
- local_deep_research/advanced_search_system/strategies/constraint_parallel_strategy.py +522 -0
- local_deep_research/advanced_search_system/strategies/direct_search_strategy.py +217 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_strategy.py +320 -0
- local_deep_research/advanced_search_system/strategies/dual_confidence_with_rejection.py +219 -0
- local_deep_research/advanced_search_system/strategies/early_stop_constrained_strategy.py +369 -0
- local_deep_research/advanced_search_system/strategies/entity_aware_source_strategy.py +140 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy.py +1248 -0
- local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py +1337 -0
- local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +537 -0
- local_deep_research/advanced_search_system/strategies/improved_evidence_based_strategy.py +782 -0
- local_deep_research/advanced_search_system/strategies/iterative_reasoning_strategy.py +760 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +55 -21
- local_deep_research/advanced_search_system/strategies/llm_driven_modular_strategy.py +865 -0
- local_deep_research/advanced_search_system/strategies/modular_strategy.py +1142 -0
- local_deep_research/advanced_search_system/strategies/parallel_constrained_strategy.py +506 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +34 -16
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +29 -9
- local_deep_research/advanced_search_system/strategies/recursive_decomposition_strategy.py +492 -0
- local_deep_research/advanced_search_system/strategies/smart_decomposition_strategy.py +284 -0
- local_deep_research/advanced_search_system/strategies/smart_query_strategy.py +515 -0
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +48 -24
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +34 -14
- local_deep_research/advanced_search_system/tools/base_tool.py +7 -2
- local_deep_research/api/benchmark_functions.py +6 -2
- local_deep_research/api/research_functions.py +10 -4
- local_deep_research/benchmarks/__init__.py +9 -7
- local_deep_research/benchmarks/benchmark_functions.py +6 -2
- local_deep_research/benchmarks/cli/benchmark_commands.py +27 -10
- local_deep_research/benchmarks/cli.py +38 -13
- local_deep_research/benchmarks/comparison/__init__.py +4 -2
- local_deep_research/benchmarks/comparison/evaluator.py +316 -239
- local_deep_research/benchmarks/datasets/__init__.py +1 -1
- local_deep_research/benchmarks/datasets/base.py +91 -72
- local_deep_research/benchmarks/datasets/browsecomp.py +54 -33
- local_deep_research/benchmarks/datasets/custom_dataset_template.py +19 -19
- local_deep_research/benchmarks/datasets/simpleqa.py +14 -14
- local_deep_research/benchmarks/datasets/utils.py +48 -29
- local_deep_research/benchmarks/datasets.py +4 -11
- local_deep_research/benchmarks/efficiency/__init__.py +8 -4
- local_deep_research/benchmarks/efficiency/resource_monitor.py +223 -171
- local_deep_research/benchmarks/efficiency/speed_profiler.py +62 -48
- local_deep_research/benchmarks/evaluators/browsecomp.py +3 -1
- local_deep_research/benchmarks/evaluators/composite.py +6 -2
- local_deep_research/benchmarks/evaluators/simpleqa.py +36 -13
- local_deep_research/benchmarks/graders.py +32 -10
- local_deep_research/benchmarks/metrics/README.md +1 -1
- local_deep_research/benchmarks/metrics/calculation.py +25 -10
- local_deep_research/benchmarks/metrics/reporting.py +7 -3
- local_deep_research/benchmarks/metrics/visualization.py +42 -23
- local_deep_research/benchmarks/metrics.py +1 -1
- local_deep_research/benchmarks/optimization/__init__.py +3 -1
- local_deep_research/benchmarks/optimization/api.py +7 -1
- local_deep_research/benchmarks/optimization/optuna_optimizer.py +75 -26
- local_deep_research/benchmarks/runners.py +48 -15
- local_deep_research/citation_handler.py +65 -92
- local_deep_research/citation_handlers/__init__.py +15 -0
- local_deep_research/citation_handlers/base_citation_handler.py +70 -0
- local_deep_research/citation_handlers/forced_answer_citation_handler.py +179 -0
- local_deep_research/citation_handlers/precision_extraction_handler.py +550 -0
- local_deep_research/citation_handlers/standard_citation_handler.py +80 -0
- local_deep_research/config/llm_config.py +271 -169
- local_deep_research/config/search_config.py +14 -5
- local_deep_research/defaults/__init__.py +0 -1
- local_deep_research/metrics/__init__.py +13 -0
- local_deep_research/metrics/database.py +58 -0
- local_deep_research/metrics/db_models.py +115 -0
- local_deep_research/metrics/migrate_add_provider_to_token_usage.py +148 -0
- local_deep_research/metrics/migrate_call_stack_tracking.py +105 -0
- local_deep_research/metrics/migrate_enhanced_tracking.py +75 -0
- local_deep_research/metrics/migrate_research_ratings.py +31 -0
- local_deep_research/metrics/models.py +61 -0
- local_deep_research/metrics/pricing/__init__.py +12 -0
- local_deep_research/metrics/pricing/cost_calculator.py +237 -0
- local_deep_research/metrics/pricing/pricing_cache.py +143 -0
- local_deep_research/metrics/pricing/pricing_fetcher.py +240 -0
- local_deep_research/metrics/query_utils.py +51 -0
- local_deep_research/metrics/search_tracker.py +380 -0
- local_deep_research/metrics/token_counter.py +1078 -0
- local_deep_research/migrate_db.py +3 -1
- local_deep_research/report_generator.py +22 -8
- local_deep_research/search_system.py +390 -9
- local_deep_research/test_migration.py +15 -5
- local_deep_research/utilities/db_utils.py +7 -4
- local_deep_research/utilities/es_utils.py +115 -104
- local_deep_research/utilities/llm_utils.py +15 -5
- local_deep_research/utilities/log_utils.py +151 -0
- local_deep_research/utilities/search_cache.py +387 -0
- local_deep_research/utilities/search_utilities.py +14 -6
- local_deep_research/utilities/threading_utils.py +92 -0
- local_deep_research/utilities/url_utils.py +6 -0
- local_deep_research/web/api.py +347 -0
- local_deep_research/web/app.py +13 -17
- local_deep_research/web/app_factory.py +71 -66
- local_deep_research/web/database/migrate_to_ldr_db.py +12 -4
- local_deep_research/web/database/migrations.py +20 -3
- local_deep_research/web/database/models.py +74 -25
- local_deep_research/web/database/schema_upgrade.py +49 -29
- local_deep_research/web/models/database.py +63 -83
- local_deep_research/web/routes/api_routes.py +56 -22
- local_deep_research/web/routes/benchmark_routes.py +4 -1
- local_deep_research/web/routes/globals.py +22 -0
- local_deep_research/web/routes/history_routes.py +71 -46
- local_deep_research/web/routes/metrics_routes.py +1155 -0
- local_deep_research/web/routes/research_routes.py +192 -54
- local_deep_research/web/routes/settings_routes.py +156 -55
- local_deep_research/web/services/research_service.py +412 -251
- local_deep_research/web/services/resource_service.py +36 -11
- local_deep_research/web/services/settings_manager.py +55 -17
- local_deep_research/web/services/settings_service.py +12 -4
- local_deep_research/web/services/socket_service.py +295 -188
- local_deep_research/web/static/css/custom_dropdown.css +180 -0
- local_deep_research/web/static/css/styles.css +39 -1
- local_deep_research/web/static/js/components/detail.js +633 -267
- local_deep_research/web/static/js/components/details.js +751 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +11 -11
- local_deep_research/web/static/js/components/fallback/ui.js +23 -23
- local_deep_research/web/static/js/components/history.js +76 -76
- local_deep_research/web/static/js/components/logpanel.js +61 -13
- local_deep_research/web/static/js/components/progress.js +13 -2
- local_deep_research/web/static/js/components/research.js +99 -12
- local_deep_research/web/static/js/components/results.js +239 -106
- local_deep_research/web/static/js/main.js +40 -40
- local_deep_research/web/static/js/services/audio.js +1 -1
- local_deep_research/web/static/js/services/formatting.js +11 -11
- local_deep_research/web/static/js/services/keyboard.js +157 -0
- local_deep_research/web/static/js/services/pdf.js +80 -80
- local_deep_research/web/static/sounds/README.md +1 -1
- local_deep_research/web/templates/base.html +1 -0
- local_deep_research/web/templates/components/log_panel.html +7 -1
- local_deep_research/web/templates/components/mobile_nav.html +1 -1
- local_deep_research/web/templates/components/sidebar.html +3 -0
- local_deep_research/web/templates/pages/cost_analytics.html +1245 -0
- local_deep_research/web/templates/pages/details.html +325 -24
- local_deep_research/web/templates/pages/history.html +1 -1
- local_deep_research/web/templates/pages/metrics.html +1929 -0
- local_deep_research/web/templates/pages/progress.html +2 -2
- local_deep_research/web/templates/pages/research.html +53 -17
- local_deep_research/web/templates/pages/results.html +12 -1
- local_deep_research/web/templates/pages/star_reviews.html +803 -0
- local_deep_research/web/utils/formatters.py +9 -3
- local_deep_research/web_search_engines/default_search_engines.py +5 -3
- local_deep_research/web_search_engines/engines/full_search.py +8 -2
- local_deep_research/web_search_engines/engines/meta_search_engine.py +59 -20
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +19 -6
- local_deep_research/web_search_engines/engines/search_engine_brave.py +6 -2
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +3 -1
- local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +81 -58
- local_deep_research/web_search_engines/engines/search_engine_github.py +46 -15
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +16 -6
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +39 -15
- local_deep_research/web_search_engines/engines/search_engine_local.py +58 -25
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +15 -5
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +63 -21
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +37 -11
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +27 -9
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +12 -4
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +31 -10
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +12 -3
- local_deep_research/web_search_engines/search_engine_base.py +83 -35
- local_deep_research/web_search_engines/search_engine_factory.py +25 -8
- local_deep_research/web_search_engines/search_engines_config.py +9 -3
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/METADATA +7 -1
- local_deep_research-0.5.2.dist-info/RECORD +265 -0
- local_deep_research-0.4.4.dist-info/RECORD +0 -177
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/WHEEL +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.4.4.dist-info → local_deep_research-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -45,7 +45,9 @@ class SerpAPISearchEngine(BaseSearchEngine):
|
|
45
45
|
"""
|
46
46
|
# Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
|
47
47
|
super().__init__(
|
48
|
-
llm=llm,
|
48
|
+
llm=llm,
|
49
|
+
max_filtered_results=max_filtered_results,
|
50
|
+
max_results=max_results,
|
49
51
|
)
|
50
52
|
self.include_full_content = include_full_content
|
51
53
|
|
@@ -68,7 +70,9 @@ class SerpAPISearchEngine(BaseSearchEngine):
|
|
68
70
|
|
69
71
|
serpapi_api_key = api_key
|
70
72
|
if not serpapi_api_key:
|
71
|
-
serpapi_api_key = get_db_setting(
|
73
|
+
serpapi_api_key = get_db_setting(
|
74
|
+
"search.engine.web.serpapi.api_key"
|
75
|
+
)
|
72
76
|
|
73
77
|
if not serpapi_api_key:
|
74
78
|
raise ValueError(
|
@@ -126,13 +130,17 @@ class SerpAPISearchEngine(BaseSearchEngine):
|
|
126
130
|
|
127
131
|
try:
|
128
132
|
# Get search results from SerpAPI
|
129
|
-
organic_results = self.engine.results(query).get(
|
133
|
+
organic_results = self.engine.results(query).get(
|
134
|
+
"organic_results", []
|
135
|
+
)
|
130
136
|
|
131
137
|
# Format results as previews
|
132
138
|
previews = []
|
133
139
|
for result in organic_results:
|
134
140
|
preview = {
|
135
|
-
"id": result.get(
|
141
|
+
"id": result.get(
|
142
|
+
"position", len(previews)
|
143
|
+
), # Use position as ID
|
136
144
|
"title": result.get("title", ""),
|
137
145
|
"link": result.get("link", ""),
|
138
146
|
"snippet": result.get("snippet", ""),
|
@@ -43,7 +43,9 @@ class WaybackSearchEngine(BaseSearchEngine):
|
|
43
43
|
"""
|
44
44
|
# Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
|
45
45
|
super().__init__(
|
46
|
-
llm=llm,
|
46
|
+
llm=llm,
|
47
|
+
max_filtered_results=max_filtered_results,
|
48
|
+
max_results=max_results,
|
47
49
|
)
|
48
50
|
self.max_snapshots_per_url = max_snapshots_per_url
|
49
51
|
self.language = language
|
@@ -79,7 +81,9 @@ class WaybackSearchEngine(BaseSearchEngine):
|
|
79
81
|
return [f"http://{query}"]
|
80
82
|
|
81
83
|
# For non-URL queries, use DuckDuckGo to find relevant URLs
|
82
|
-
logger.info(
|
84
|
+
logger.info(
|
85
|
+
"Query is not a URL, using DuckDuckGo to find relevant URLs"
|
86
|
+
)
|
83
87
|
try:
|
84
88
|
# Import DuckDuckGo search engine
|
85
89
|
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
|
@@ -91,9 +95,13 @@ class WaybackSearchEngine(BaseSearchEngine):
|
|
91
95
|
results = ddg.results(query, url_search_limit)
|
92
96
|
|
93
97
|
# Extract URLs from results
|
94
|
-
ddg_urls = [
|
98
|
+
ddg_urls = [
|
99
|
+
result.get("link") for result in results if result.get("link")
|
100
|
+
]
|
95
101
|
if ddg_urls:
|
96
|
-
logger.info(
|
102
|
+
logger.info(
|
103
|
+
f"Found {len(ddg_urls)} URLs from DuckDuckGo search"
|
104
|
+
)
|
97
105
|
return ddg_urls
|
98
106
|
except Exception as e:
|
99
107
|
logger.error(f"Error using DuckDuckGo for URL discovery: {e}")
|
@@ -185,12 +193,16 @@ class WaybackSearchEngine(BaseSearchEngine):
|
|
185
193
|
snapshot = dict(zip(headers, item))
|
186
194
|
timestamp = snapshot.get("timestamp", "")
|
187
195
|
|
188
|
-
wayback_url =
|
196
|
+
wayback_url = (
|
197
|
+
f"https://web.archive.org/web/{timestamp}/{url}"
|
198
|
+
)
|
189
199
|
|
190
200
|
snapshots.append(
|
191
201
|
{
|
192
202
|
"timestamp": timestamp,
|
193
|
-
"formatted_date": self._format_timestamp(
|
203
|
+
"formatted_date": self._format_timestamp(
|
204
|
+
timestamp
|
205
|
+
),
|
194
206
|
"url": wayback_url,
|
195
207
|
"original_url": url,
|
196
208
|
"available": True,
|
@@ -265,8 +277,12 @@ class WaybackSearchEngine(BaseSearchEngine):
|
|
265
277
|
if not html or not html.strip():
|
266
278
|
return ""
|
267
279
|
try:
|
268
|
-
paragraphs = justext.justext(
|
269
|
-
|
280
|
+
paragraphs = justext.justext(
|
281
|
+
html, justext.get_stoplist(self.language)
|
282
|
+
)
|
283
|
+
cleaned = "\n".join(
|
284
|
+
[p.text for p in paragraphs if not p.is_boilerplate]
|
285
|
+
)
|
270
286
|
return cleaned
|
271
287
|
except Exception as e:
|
272
288
|
logger.error(f"Error removing boilerplate: {e}")
|
@@ -471,7 +487,10 @@ class WaybackSearchEngine(BaseSearchEngine):
|
|
471
487
|
response = requests.get(self.available_api, params={"url": url})
|
472
488
|
data = response.json()
|
473
489
|
|
474
|
-
if
|
490
|
+
if (
|
491
|
+
"archived_snapshots" in data
|
492
|
+
and "closest" in data["archived_snapshots"]
|
493
|
+
):
|
475
494
|
snapshot = data["archived_snapshots"]["closest"]
|
476
495
|
timestamp = snapshot["timestamp"]
|
477
496
|
wayback_url = snapshot["url"]
|
@@ -491,7 +510,9 @@ class WaybackSearchEngine(BaseSearchEngine):
|
|
491
510
|
not hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
|
492
511
|
or not search_config.SEARCH_SNIPPETS_ONLY
|
493
512
|
):
|
494
|
-
raw_html, full_content = self._get_wayback_content(
|
513
|
+
raw_html, full_content = self._get_wayback_content(
|
514
|
+
wayback_url
|
515
|
+
)
|
495
516
|
result["raw_html"] = raw_html
|
496
517
|
result["full_content"] = full_content
|
497
518
|
|
@@ -38,7 +38,9 @@ class WikipediaSearchEngine(BaseSearchEngine):
|
|
38
38
|
"""
|
39
39
|
# Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
|
40
40
|
super().__init__(
|
41
|
-
llm=llm,
|
41
|
+
llm=llm,
|
42
|
+
max_filtered_results=max_filtered_results,
|
43
|
+
max_results=max_results,
|
42
44
|
)
|
43
45
|
self.include_content = include_content
|
44
46
|
self.sentences = sentences
|
@@ -113,6 +115,7 @@ class WikipediaSearchEngine(BaseSearchEngine):
|
|
113
115
|
"title": title,
|
114
116
|
"snippet": summary,
|
115
117
|
"link": f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}",
|
118
|
+
"source": "Wikipedia",
|
116
119
|
}
|
117
120
|
|
118
121
|
previews.append(preview)
|
@@ -128,7 +131,9 @@ class WikipediaSearchEngine(BaseSearchEngine):
|
|
128
131
|
logger.error(f"Unexpected error for '{title}': {e}")
|
129
132
|
continue
|
130
133
|
|
131
|
-
logger.info(
|
134
|
+
logger.info(
|
135
|
+
f"Successfully created {len(previews)} previews from Wikipedia"
|
136
|
+
)
|
132
137
|
return previews
|
133
138
|
|
134
139
|
except Exception as e:
|
@@ -176,6 +181,7 @@ class WikipediaSearchEngine(BaseSearchEngine):
|
|
176
181
|
"title": page.title,
|
177
182
|
"link": page.url,
|
178
183
|
"snippet": item.get("snippet", ""), # Keep existing snippet
|
184
|
+
"source": "Wikipedia",
|
179
185
|
}
|
180
186
|
|
181
187
|
# Add additional information
|
@@ -218,7 +224,9 @@ class WikipediaSearchEngine(BaseSearchEngine):
|
|
218
224
|
"""
|
219
225
|
sentences = sentences or self.sentences
|
220
226
|
try:
|
221
|
-
return wikipedia.summary(
|
227
|
+
return wikipedia.summary(
|
228
|
+
title, sentences=sentences, auto_suggest=False
|
229
|
+
)
|
222
230
|
except wikipedia.exceptions.DisambiguationError as e:
|
223
231
|
if e.options and len(e.options) > 0:
|
224
232
|
return wikipedia.summary(
|
@@ -250,6 +258,7 @@ class WikipediaSearchEngine(BaseSearchEngine):
|
|
250
258
|
"title": page.title,
|
251
259
|
"link": page.url,
|
252
260
|
"snippet": self.get_summary(title, self.sentences),
|
261
|
+
"source": "Wikipedia",
|
253
262
|
}
|
254
263
|
|
255
264
|
# Add additional information if requested
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import json
|
2
|
+
import time
|
2
3
|
from abc import ABC, abstractmethod
|
3
4
|
from datetime import datetime
|
4
5
|
from typing import Any, Dict, List, Optional
|
@@ -7,7 +8,8 @@ from langchain_core.language_models import BaseLLM
|
|
7
8
|
from loguru import logger
|
8
9
|
|
9
10
|
from ..advanced_search_system.filters.base_filter import BaseFilter
|
10
|
-
from ..
|
11
|
+
from ..metrics.search_tracker import get_search_tracker
|
12
|
+
from ..utilities.db_utils import get_db_setting
|
11
13
|
|
12
14
|
|
13
15
|
class BaseSearchEngine(ABC):
|
@@ -50,8 +52,12 @@ class BaseSearchEngine(ABC):
|
|
50
52
|
self._content_filters = []
|
51
53
|
|
52
54
|
self.llm = llm # LLM for relevance filtering
|
53
|
-
self._max_filtered_results = int(
|
54
|
-
|
55
|
+
self._max_filtered_results = int(
|
56
|
+
max_filtered_results
|
57
|
+
) # Ensure it's an integer
|
58
|
+
self._max_results = max(
|
59
|
+
1, int(max_results)
|
60
|
+
) # Ensure it's a positive integer
|
55
61
|
|
56
62
|
@property
|
57
63
|
def max_filtered_results(self) -> int:
|
@@ -92,44 +98,80 @@ class BaseSearchEngine(ABC):
|
|
92
98
|
Returns:
|
93
99
|
List of search results with full content (if available)
|
94
100
|
"""
|
95
|
-
#
|
101
|
+
# Track search call for metrics
|
102
|
+
tracker = get_search_tracker()
|
103
|
+
engine_name = self.__class__.__name__.replace(
|
104
|
+
"SearchEngine", ""
|
105
|
+
).lower()
|
106
|
+
start_time = time.time()
|
96
107
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
108
|
+
success = True
|
109
|
+
error_message = None
|
110
|
+
results_count = 0
|
111
|
+
|
112
|
+
try:
|
113
|
+
# Step 1: Get preview information for items
|
114
|
+
previews = self._get_previews(query)
|
115
|
+
if not previews:
|
116
|
+
logger.info(
|
117
|
+
f"Search engine {self.__class__.__name__} returned no preview results for query: {query}"
|
118
|
+
)
|
119
|
+
results_count = 0
|
120
|
+
return []
|
104
121
|
|
105
|
-
|
106
|
-
|
122
|
+
for preview_filter in self._preview_filters:
|
123
|
+
previews = preview_filter.filter_results(previews, query)
|
107
124
|
|
108
|
-
|
109
|
-
|
110
|
-
|
125
|
+
# Step 2: Filter previews for relevance with LLM
|
126
|
+
# TEMPORARILY DISABLED: Skip LLM relevance filtering
|
127
|
+
filtered_items = previews
|
111
128
|
logger.info(
|
112
|
-
f"
|
129
|
+
f"LLM relevance filtering disabled - returning all {len(previews)} previews"
|
113
130
|
)
|
114
|
-
# Do not fall back to previews, return empty list instead
|
115
|
-
return []
|
116
131
|
|
117
|
-
|
118
|
-
|
132
|
+
# # Original filtering code (disabled):
|
133
|
+
# filtered_items = self._filter_for_relevance(previews, query)
|
134
|
+
# if not filtered_items:
|
135
|
+
# logger.info(
|
136
|
+
# f"All preview results were filtered out as irrelevant for query: {query}"
|
137
|
+
# )
|
138
|
+
# # Do not fall back to previews, return empty list instead
|
139
|
+
# results_count = 0
|
140
|
+
# return []
|
141
|
+
|
142
|
+
# Step 3: Get full content for filtered items
|
143
|
+
# Import config inside the method to avoid circular import
|
144
|
+
|
145
|
+
if get_db_setting("search.snippets_only", True):
|
146
|
+
logger.info("Returning snippet-only results as per config")
|
147
|
+
results = filtered_items
|
148
|
+
else:
|
149
|
+
results = self._get_full_content(filtered_items)
|
150
|
+
|
151
|
+
for content_filter in self._content_filters:
|
152
|
+
results = content_filter.filter_results(results, query)
|
119
153
|
|
120
|
-
|
121
|
-
|
122
|
-
and search_config.SEARCH_SNIPPETS_ONLY
|
123
|
-
):
|
124
|
-
logger.info("Returning snippet-only results as per config")
|
125
|
-
results = filtered_items
|
126
|
-
else:
|
127
|
-
results = self._get_full_content(filtered_items)
|
154
|
+
results_count = len(results)
|
155
|
+
return results
|
128
156
|
|
129
|
-
|
130
|
-
|
157
|
+
except Exception as e:
|
158
|
+
success = False
|
159
|
+
error_message = str(e)
|
160
|
+
logger.error(f"Search engine {self.__class__.__name__} failed: {e}")
|
161
|
+
results_count = 0
|
162
|
+
return []
|
131
163
|
|
132
|
-
|
164
|
+
finally:
|
165
|
+
# Record search metrics
|
166
|
+
response_time_ms = int((time.time() - start_time) * 1000)
|
167
|
+
tracker.record_search(
|
168
|
+
engine_name=engine_name,
|
169
|
+
query=query,
|
170
|
+
results_count=results_count,
|
171
|
+
response_time_ms=response_time_ms,
|
172
|
+
success=success,
|
173
|
+
error_message=error_message,
|
174
|
+
)
|
133
175
|
|
134
176
|
def invoke(self, query: str) -> List[Dict[str, Any]]:
|
135
177
|
"""Compatibility method for LangChain tools"""
|
@@ -229,7 +271,9 @@ Respond with ONLY the JSON array, no other text."""
|
|
229
271
|
if idx < len(previews):
|
230
272
|
ranked_results.append(previews[idx])
|
231
273
|
else:
|
232
|
-
logger.warning(
|
274
|
+
logger.warning(
|
275
|
+
f"Index {idx} out of range, skipping"
|
276
|
+
)
|
233
277
|
|
234
278
|
# Limit to max_filtered_results if specified
|
235
279
|
if (
|
@@ -244,14 +288,18 @@ Respond with ONLY the JSON array, no other text."""
|
|
244
288
|
return ranked_results
|
245
289
|
|
246
290
|
except json.JSONDecodeError as e:
|
247
|
-
logger.warning(
|
291
|
+
logger.warning(
|
292
|
+
f"Failed to parse JSON from LLM response: {e}"
|
293
|
+
)
|
248
294
|
logger.debug(f"Problematic JSON text: {array_text}")
|
249
295
|
return []
|
250
296
|
else:
|
251
297
|
logger.warning(
|
252
298
|
"Could not find JSON array in response, returning original previews"
|
253
299
|
)
|
254
|
-
logger.debug(
|
300
|
+
logger.debug(
|
301
|
+
f"Response text without JSON array: {response_text}"
|
302
|
+
)
|
255
303
|
return previews[: min(5, len(previews))]
|
256
304
|
|
257
305
|
except Exception:
|
@@ -56,14 +56,18 @@ def create_search_engine(
|
|
56
56
|
api_key = engine_config.get("api_key")
|
57
57
|
|
58
58
|
if not api_key:
|
59
|
-
logger.info(
|
59
|
+
logger.info(
|
60
|
+
f"Required API key for {engine_name} not found in settings."
|
61
|
+
)
|
60
62
|
return None
|
61
63
|
|
62
64
|
# Set the engine-specific environment variable if needed
|
63
65
|
# This is to support engines that directly check environment variables
|
64
66
|
if engine_name == "brave" and not os.getenv("BRAVE_API_KEY"):
|
65
67
|
os.environ["BRAVE_API_KEY"] = api_key
|
66
|
-
logger.info(
|
68
|
+
logger.info(
|
69
|
+
"Set BRAVE_API_KEY environment variable from database setting"
|
70
|
+
)
|
67
71
|
|
68
72
|
# Check for LLM requirements
|
69
73
|
if engine_config.get("requires_llm", False) and not llm:
|
@@ -149,7 +153,9 @@ def _create_full_search_wrapper(
|
|
149
153
|
class_name = engine_config.get("full_search_class")
|
150
154
|
|
151
155
|
if not module_path or not class_name:
|
152
|
-
logger.warning(
|
156
|
+
logger.warning(
|
157
|
+
f"Full search configuration missing for {engine_name}"
|
158
|
+
)
|
153
159
|
return base_engine
|
154
160
|
|
155
161
|
# Import the full search class
|
@@ -163,10 +169,15 @@ def _create_full_search_wrapper(
|
|
163
169
|
] # Skip 'self'
|
164
170
|
|
165
171
|
# Extract relevant parameters for the full search wrapper
|
166
|
-
wrapper_params = {
|
172
|
+
wrapper_params = {
|
173
|
+
k: v for k, v in params.items() if k in wrapper_init_params
|
174
|
+
}
|
167
175
|
|
168
176
|
# Special case for SerpAPI which needs the API key directly
|
169
|
-
if
|
177
|
+
if (
|
178
|
+
engine_name == "serpapi"
|
179
|
+
and "serpapi_api_key" in wrapper_init_params
|
180
|
+
):
|
170
181
|
serpapi_api_key = os.getenv("SERP_API_KEY")
|
171
182
|
if serpapi_api_key:
|
172
183
|
wrapper_params["serpapi_api_key"] = serpapi_api_key
|
@@ -196,7 +207,9 @@ def _create_full_search_wrapper(
|
|
196
207
|
if not brave_api_key:
|
197
208
|
from ..utilities.db_utils import get_db_setting
|
198
209
|
|
199
|
-
brave_api_key = get_db_setting(
|
210
|
+
brave_api_key = get_db_setting(
|
211
|
+
"search.engine.web.brave.api_key"
|
212
|
+
)
|
200
213
|
|
201
214
|
if brave_api_key:
|
202
215
|
wrapper_params["api_key"] = brave_api_key
|
@@ -236,7 +249,9 @@ def _create_full_search_wrapper(
|
|
236
249
|
return full_search
|
237
250
|
|
238
251
|
except Exception:
|
239
|
-
logger.exception(
|
252
|
+
logger.exception(
|
253
|
+
f"Failed to create full search wrapper for {engine_name}"
|
254
|
+
)
|
240
255
|
return base_engine
|
241
256
|
|
242
257
|
|
@@ -307,7 +322,9 @@ def get_search(
|
|
307
322
|
)
|
308
323
|
else:
|
309
324
|
engine_type = type(engine).__name__
|
310
|
-
logger.info(
|
325
|
+
logger.info(
|
326
|
+
f"Successfully created search engine of type: {engine_type}"
|
327
|
+
)
|
311
328
|
# Check if the engine has run method
|
312
329
|
if hasattr(engine, "run"):
|
313
330
|
logger.info(f"Engine has 'run' method: {getattr(engine, 'run')}")
|
@@ -12,7 +12,9 @@ from ..utilities.db_utils import get_db_setting
|
|
12
12
|
from .default_search_engines import get_default_elasticsearch_config
|
13
13
|
|
14
14
|
|
15
|
-
def _extract_per_engine_config(
|
15
|
+
def _extract_per_engine_config(
|
16
|
+
raw_config: Dict[str, Any],
|
17
|
+
) -> Dict[str, Dict[str, Any]]:
|
16
18
|
"""
|
17
19
|
Converts the "flat" configuration loaded from the settings database into
|
18
20
|
individual settings dictionaries for each engine.
|
@@ -55,7 +57,9 @@ def search_config() -> Dict[str, Any]:
|
|
55
57
|
search_engines = _extract_per_engine_config(config_data)
|
56
58
|
search_engines["auto"] = get_db_setting("search.engine.auto", {})
|
57
59
|
|
58
|
-
logger.info(
|
60
|
+
logger.info(
|
61
|
+
f"Loaded {len(search_engines)} search engines from configuration file"
|
62
|
+
)
|
59
63
|
logger.info(f"\n {', '.join(sorted(search_engines.keys()))} \n")
|
60
64
|
|
61
65
|
# Add alias for 'auto' if it exists
|
@@ -141,7 +145,9 @@ def local_search_engines() -> List[str]:
|
|
141
145
|
local_collections_data.pop("local_all", None)
|
142
146
|
# Remove disabled collections.
|
143
147
|
local_collections_data = {
|
144
|
-
k: v
|
148
|
+
k: v
|
149
|
+
for k, v in local_collections_data.items()
|
150
|
+
if v.get("enabled", True)
|
145
151
|
}
|
146
152
|
|
147
153
|
enabled_collections = list(local_collections_data.keys())
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: local-deep-research
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.5.2
|
4
4
|
Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
|
5
5
|
Author-Email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -71,10 +71,12 @@ Requires-Dist: optuna>=4.3.0
|
|
71
71
|
Requires-Dist: elasticsearch==8.14.0
|
72
72
|
Requires-Dist: methodtools>=0.4.7
|
73
73
|
Requires-Dist: loguru>=0.7.3
|
74
|
+
Requires-Dist: cachetools>=5.5.2
|
74
75
|
Requires-Dist: matplotlib>=3.10.3
|
75
76
|
Requires-Dist: pandas>=2.2.3
|
76
77
|
Requires-Dist: plotly>=6.0.1
|
77
78
|
Requires-Dist: kaleido==0.2.1
|
79
|
+
Requires-Dist: aiohttp>=3.9.0
|
78
80
|
Description-Content-Type: text/markdown
|
79
81
|
|
80
82
|
# Local Deep Research
|
@@ -400,6 +402,10 @@ For more information and examples of what Local Deep Research can produce:
|
|
400
402
|
- [Reddit](https://www.reddit.com/r/LocalDeepResearch/): Announcements, updates, and community showcase
|
401
403
|
- [GitHub Issues](https://github.com/LearningCircuit/local-deep-research/issues): Bug reports and feature requests
|
402
404
|
|
405
|
+
## 🚀 Contributing
|
406
|
+
|
407
|
+
We welcome contributions! Whether you're fixing bugs, adding features, or improving documentation, we'd love to have you as part of our community. Please see our [Contributing Guide](CONTRIBUTING.md) for guidelines on how to get started.
|
408
|
+
|
403
409
|
## 📄 License & Acknowledgments
|
404
410
|
|
405
411
|
This project is licensed under the MIT License.
|