local-deep-research 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +24 -0
- local_deep_research/citation_handler.py +113 -0
- local_deep_research/config.py +166 -0
- local_deep_research/defaults/__init__.py +44 -0
- local_deep_research/defaults/llm_config.py +269 -0
- local_deep_research/defaults/local_collections.toml +47 -0
- local_deep_research/defaults/main.toml +57 -0
- local_deep_research/defaults/search_engines.toml +244 -0
- local_deep_research/local_collections.py +141 -0
- local_deep_research/main.py +113 -0
- local_deep_research/report_generator.py +206 -0
- local_deep_research/search_system.py +241 -0
- local_deep_research/utilties/__init__.py +0 -0
- local_deep_research/utilties/enums.py +9 -0
- local_deep_research/utilties/llm_utils.py +116 -0
- local_deep_research/utilties/search_utilities.py +115 -0
- local_deep_research/utilties/setup_utils.py +6 -0
- local_deep_research/web/__init__.py +2 -0
- local_deep_research/web/app.py +1209 -0
- local_deep_research/web/static/css/styles.css +1008 -0
- local_deep_research/web/static/js/app.js +2078 -0
- local_deep_research/web/templates/api_keys_config.html +82 -0
- local_deep_research/web/templates/collections_config.html +90 -0
- local_deep_research/web/templates/index.html +312 -0
- local_deep_research/web/templates/llm_config.html +120 -0
- local_deep_research/web/templates/main_config.html +89 -0
- local_deep_research/web/templates/search_engines_config.html +154 -0
- local_deep_research/web/templates/settings.html +519 -0
- local_deep_research/web/templates/settings_dashboard.html +207 -0
- local_deep_research/web_search_engines/__init__.py +0 -0
- local_deep_research/web_search_engines/engines/__init__.py +0 -0
- local_deep_research/web_search_engines/engines/full_search.py +128 -0
- local_deep_research/web_search_engines/engines/meta_search_engine.py +274 -0
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +367 -0
- local_deep_research/web_search_engines/engines/search_engine_brave.py +245 -0
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +123 -0
- local_deep_research/web_search_engines/engines/search_engine_github.py +663 -0
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +283 -0
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +337 -0
- local_deep_research/web_search_engines/engines/search_engine_local.py +901 -0
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +153 -0
- local_deep_research/web_search_engines/engines/search_engine_medrxiv.py +623 -0
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +992 -0
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +230 -0
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +474 -0
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +242 -0
- local_deep_research/web_search_engines/full_search.py +254 -0
- local_deep_research/web_search_engines/search_engine_base.py +197 -0
- local_deep_research/web_search_engines/search_engine_factory.py +233 -0
- local_deep_research/web_search_engines/search_engines_config.py +54 -0
- local_deep_research-0.1.0.dist-info/LICENSE +21 -0
- local_deep_research-0.1.0.dist-info/METADATA +328 -0
- local_deep_research-0.1.0.dist-info/RECORD +56 -0
- local_deep_research-0.1.0.dist-info/WHEEL +5 -0
- local_deep_research-0.1.0.dist-info/entry_points.txt +3 -0
- local_deep_research-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,123 @@
|
|
1
|
+
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
|
2
|
+
from typing import Dict, List, Any, Optional
|
3
|
+
from langchain_core.language_models import BaseLLM
|
4
|
+
|
5
|
+
from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
|
6
|
+
from local_deep_research.web_search_engines.engines.full_search import FullSearchResults # Import the FullSearchResults class
|
7
|
+
|
8
|
+
class DuckDuckGoSearchEngine(BaseSearchEngine):
|
9
|
+
"""DuckDuckGo search engine implementation with two-phase retrieval"""
|
10
|
+
|
11
|
+
def __init__(self,
|
12
|
+
max_results: int = 10,
|
13
|
+
region: str = "us",
|
14
|
+
safe_search: bool = True,
|
15
|
+
llm: Optional[BaseLLM] = None,
|
16
|
+
language: str = "English",
|
17
|
+
include_full_content: bool = False):
|
18
|
+
"""
|
19
|
+
Initialize the DuckDuckGo search engine.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
max_results: Maximum number of search results
|
23
|
+
region: Region code for search results
|
24
|
+
safe_search: Whether to enable safe search
|
25
|
+
llm: Language model for relevance filtering
|
26
|
+
language: Language for content processing
|
27
|
+
include_full_content: Whether to include full webpage content in results
|
28
|
+
"""
|
29
|
+
super().__init__(llm=llm)
|
30
|
+
self.max_results = max_results
|
31
|
+
self.region = region
|
32
|
+
self.safe_search = safe_search
|
33
|
+
self.language = language
|
34
|
+
self.include_full_content = include_full_content
|
35
|
+
|
36
|
+
# Initialize the DuckDuckGo wrapper
|
37
|
+
self.engine = DuckDuckGoSearchAPIWrapper(
|
38
|
+
region=region,
|
39
|
+
max_results=max_results,
|
40
|
+
safesearch="moderate" if safe_search else "off"
|
41
|
+
)
|
42
|
+
|
43
|
+
# Initialize FullSearchResults if full content is requested
|
44
|
+
if include_full_content and llm:
|
45
|
+
self.full_search = FullSearchResults(
|
46
|
+
llm=llm,
|
47
|
+
web_search=self.engine,
|
48
|
+
language=language,
|
49
|
+
max_results=max_results,
|
50
|
+
region=region,
|
51
|
+
time="y",
|
52
|
+
safesearch="Moderate" if safe_search else "Off"
|
53
|
+
)
|
54
|
+
|
55
|
+
def run(self, query: str) -> List[Dict[str, Any]]:
|
56
|
+
"""
|
57
|
+
Execute a search using DuckDuckGo with the two-phase approach.
|
58
|
+
Respects config parameters:
|
59
|
+
- SEARCH_SNIPPETS_ONLY: If True, only returns snippets without full content
|
60
|
+
- SKIP_RELEVANCE_FILTER: If True, returns all results without filtering
|
61
|
+
|
62
|
+
Args:
|
63
|
+
query: The search query
|
64
|
+
|
65
|
+
Returns:
|
66
|
+
List of search results
|
67
|
+
"""
|
68
|
+
print("---Execute a search using DuckDuckGo---")
|
69
|
+
|
70
|
+
# Implementation of the two-phase approach (from parent class)
|
71
|
+
return super().run(query)
|
72
|
+
|
73
|
+
def _get_previews(self, query: str) -> List[Dict[str, Any]]:
|
74
|
+
"""
|
75
|
+
Get preview information (titles and snippets) for initial search results.
|
76
|
+
|
77
|
+
Args:
|
78
|
+
query: The search query
|
79
|
+
|
80
|
+
Returns:
|
81
|
+
List of preview dictionaries with 'id', 'title', and 'snippet' keys
|
82
|
+
"""
|
83
|
+
try:
|
84
|
+
# Get search results from DuckDuckGo
|
85
|
+
results = self.engine.results(query, max_results=self.max_results)
|
86
|
+
|
87
|
+
if not isinstance(results, list):
|
88
|
+
return []
|
89
|
+
|
90
|
+
# Process results to get previews
|
91
|
+
previews = []
|
92
|
+
for i, result in enumerate(results):
|
93
|
+
preview = {
|
94
|
+
"id": result.get("link"), # Use URL as ID for DDG
|
95
|
+
"title": result.get("title", ""),
|
96
|
+
"snippet": result.get("snippet", ""),
|
97
|
+
"link": result.get("link", "")
|
98
|
+
}
|
99
|
+
|
100
|
+
previews.append(preview)
|
101
|
+
|
102
|
+
return previews
|
103
|
+
|
104
|
+
except Exception as e:
|
105
|
+
print(f"Error getting DuckDuckGo previews: {e}")
|
106
|
+
return []
|
107
|
+
|
108
|
+
def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
109
|
+
"""
|
110
|
+
Get full content for the relevant items by using FullSearchResults.
|
111
|
+
|
112
|
+
Args:
|
113
|
+
relevant_items: List of relevant preview dictionaries
|
114
|
+
|
115
|
+
Returns:
|
116
|
+
List of result dictionaries with full content
|
117
|
+
"""
|
118
|
+
# If we have FullSearchResults, use it to get full content
|
119
|
+
if hasattr(self, 'full_search'):
|
120
|
+
return self.full_search._get_full_content(relevant_items)
|
121
|
+
|
122
|
+
# Otherwise, just return the relevant items without full content
|
123
|
+
return relevant_items
|