local-deep-research 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +24 -0
- local_deep_research/citation_handler.py +113 -0
- local_deep_research/config.py +166 -0
- local_deep_research/defaults/__init__.py +44 -0
- local_deep_research/defaults/llm_config.py +269 -0
- local_deep_research/defaults/local_collections.toml +47 -0
- local_deep_research/defaults/main.toml +57 -0
- local_deep_research/defaults/search_engines.toml +244 -0
- local_deep_research/local_collections.py +141 -0
- local_deep_research/main.py +113 -0
- local_deep_research/report_generator.py +206 -0
- local_deep_research/search_system.py +241 -0
- local_deep_research/utilties/__init__.py +0 -0
- local_deep_research/utilties/enums.py +9 -0
- local_deep_research/utilties/llm_utils.py +116 -0
- local_deep_research/utilties/search_utilities.py +115 -0
- local_deep_research/utilties/setup_utils.py +6 -0
- local_deep_research/web/__init__.py +2 -0
- local_deep_research/web/app.py +1209 -0
- local_deep_research/web/static/css/styles.css +1008 -0
- local_deep_research/web/static/js/app.js +2078 -0
- local_deep_research/web/templates/api_keys_config.html +82 -0
- local_deep_research/web/templates/collections_config.html +90 -0
- local_deep_research/web/templates/index.html +312 -0
- local_deep_research/web/templates/llm_config.html +120 -0
- local_deep_research/web/templates/main_config.html +89 -0
- local_deep_research/web/templates/search_engines_config.html +154 -0
- local_deep_research/web/templates/settings.html +519 -0
- local_deep_research/web/templates/settings_dashboard.html +207 -0
- local_deep_research/web_search_engines/__init__.py +0 -0
- local_deep_research/web_search_engines/engines/__init__.py +0 -0
- local_deep_research/web_search_engines/engines/full_search.py +128 -0
- local_deep_research/web_search_engines/engines/meta_search_engine.py +274 -0
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +367 -0
- local_deep_research/web_search_engines/engines/search_engine_brave.py +245 -0
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +123 -0
- local_deep_research/web_search_engines/engines/search_engine_github.py +663 -0
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +283 -0
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +337 -0
- local_deep_research/web_search_engines/engines/search_engine_local.py +901 -0
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +153 -0
- local_deep_research/web_search_engines/engines/search_engine_medrxiv.py +623 -0
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +992 -0
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +230 -0
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +474 -0
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +242 -0
- local_deep_research/web_search_engines/full_search.py +254 -0
- local_deep_research/web_search_engines/search_engine_base.py +197 -0
- local_deep_research/web_search_engines/search_engine_factory.py +233 -0
- local_deep_research/web_search_engines/search_engines_config.py +54 -0
- local_deep_research-0.1.0.dist-info/LICENSE +21 -0
- local_deep_research-0.1.0.dist-info/METADATA +328 -0
- local_deep_research-0.1.0.dist-info/RECORD +56 -0
- local_deep_research-0.1.0.dist-info/WHEEL +5 -0
- local_deep_research-0.1.0.dist-info/entry_points.txt +3 -0
- local_deep_research-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,153 @@
|
|
1
|
+
"""
|
2
|
+
Search engine that searches across all local collections
|
3
|
+
"""
|
4
|
+
|
5
|
+
import logging
|
6
|
+
from typing import Dict, List, Any, Optional
|
7
|
+
from langchain_core.language_models import BaseLLM
|
8
|
+
|
9
|
+
from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
|
10
|
+
from local_deep_research.web_search_engines.search_engine_factory import create_search_engine
|
11
|
+
from local_deep_research import config
|
12
|
+
|
13
|
+
# Setup logging
|
14
|
+
logger = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
class LocalAllSearchEngine(BaseSearchEngine):
|
17
|
+
"""
|
18
|
+
Search engine that searches across all local document collections.
|
19
|
+
Acts as a meta search engine specifically for local collections.
|
20
|
+
"""
|
21
|
+
|
22
|
+
def __init__(
|
23
|
+
self,
|
24
|
+
llm: Optional[BaseLLM] = None,
|
25
|
+
max_results: int = 10,
|
26
|
+
max_filtered_results: Optional[int] = None,
|
27
|
+
**kwargs
|
28
|
+
):
|
29
|
+
"""
|
30
|
+
Initialize the local all-collections search engine.
|
31
|
+
|
32
|
+
Args:
|
33
|
+
llm: Language model for relevance filtering
|
34
|
+
max_results: Maximum number of search results
|
35
|
+
max_filtered_results: Maximum results after filtering
|
36
|
+
**kwargs: Additional parameters passed to LocalSearchEngine instances
|
37
|
+
"""
|
38
|
+
# Initialize the base search engine
|
39
|
+
super().__init__(llm=llm, max_filtered_results=max_filtered_results)
|
40
|
+
|
41
|
+
self.max_results = max_results
|
42
|
+
|
43
|
+
# Find all local collection search engines
|
44
|
+
self.local_engines = {}
|
45
|
+
try:
|
46
|
+
from local_collections import LOCAL_COLLECTIONS
|
47
|
+
|
48
|
+
for collection_id, collection in LOCAL_COLLECTIONS.items():
|
49
|
+
if not collection.get("enabled", True):
|
50
|
+
continue
|
51
|
+
|
52
|
+
# Create a search engine for this collection
|
53
|
+
try:
|
54
|
+
engine = create_search_engine(
|
55
|
+
collection_id,
|
56
|
+
llm=llm,
|
57
|
+
max_filtered_results=max_filtered_results
|
58
|
+
)
|
59
|
+
|
60
|
+
if engine:
|
61
|
+
self.local_engines[collection_id] = {
|
62
|
+
"engine": engine,
|
63
|
+
"name": collection.get("name", collection_id),
|
64
|
+
"description": collection.get("description", "")
|
65
|
+
}
|
66
|
+
except Exception as e:
|
67
|
+
logger.error(f"Error creating search engine for collection '{collection_id}': {e}")
|
68
|
+
except ImportError:
|
69
|
+
logger.warning("No local collections configuration found")
|
70
|
+
|
71
|
+
def _get_previews(self, query: str) -> List[Dict[str, Any]]:
|
72
|
+
"""
|
73
|
+
Get preview information for documents from all local collections.
|
74
|
+
|
75
|
+
Args:
|
76
|
+
query: The search query
|
77
|
+
|
78
|
+
Returns:
|
79
|
+
List of preview dictionaries
|
80
|
+
"""
|
81
|
+
logger.info(f"Searching across all local collections for query: {query}")
|
82
|
+
|
83
|
+
all_previews = []
|
84
|
+
|
85
|
+
# Get previews from each local search engine
|
86
|
+
for collection_id, engine_info in self.local_engines.items():
|
87
|
+
engine = engine_info["engine"]
|
88
|
+
try:
|
89
|
+
# Get previews from this engine
|
90
|
+
previews = engine._get_previews(query)
|
91
|
+
|
92
|
+
# Add collection info to each preview
|
93
|
+
for preview in previews:
|
94
|
+
preview["collection_id"] = collection_id
|
95
|
+
preview["collection_name"] = engine_info["name"]
|
96
|
+
preview["collection_description"] = engine_info["description"]
|
97
|
+
|
98
|
+
all_previews.extend(previews)
|
99
|
+
except Exception as e:
|
100
|
+
logger.error(f"Error searching collection '{collection_id}': {e}")
|
101
|
+
|
102
|
+
if not all_previews:
|
103
|
+
logger.info(f"No local documents found for query: {query}")
|
104
|
+
return []
|
105
|
+
|
106
|
+
# Sort by similarity score if available
|
107
|
+
all_previews.sort(
|
108
|
+
key=lambda x: float(x.get("similarity", 0)),
|
109
|
+
reverse=True
|
110
|
+
)
|
111
|
+
|
112
|
+
# Limit to max_results
|
113
|
+
return all_previews[:self.max_results]
|
114
|
+
|
115
|
+
def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
116
|
+
"""
|
117
|
+
Get full content for the relevant documents.
|
118
|
+
Delegates to the appropriate collection's search engine.
|
119
|
+
|
120
|
+
Args:
|
121
|
+
relevant_items: List of relevant preview dictionaries
|
122
|
+
|
123
|
+
Returns:
|
124
|
+
List of result dictionaries with full content
|
125
|
+
"""
|
126
|
+
# Group items by collection
|
127
|
+
items_by_collection = {}
|
128
|
+
for item in relevant_items:
|
129
|
+
collection_id = item.get("collection_id")
|
130
|
+
if collection_id and collection_id in self.local_engines:
|
131
|
+
if collection_id not in items_by_collection:
|
132
|
+
items_by_collection[collection_id] = []
|
133
|
+
items_by_collection[collection_id].append(item)
|
134
|
+
|
135
|
+
# Process each collection's items with its own engine
|
136
|
+
all_results = []
|
137
|
+
for collection_id, items in items_by_collection.items():
|
138
|
+
engine = self.local_engines[collection_id]["engine"]
|
139
|
+
try:
|
140
|
+
results = engine._get_full_content(items)
|
141
|
+
all_results.extend(results)
|
142
|
+
except Exception as e:
|
143
|
+
logger.error(f"Error getting full content from collection '{collection_id}': {e}")
|
144
|
+
# Fall back to returning the items without full content
|
145
|
+
all_results.extend(items)
|
146
|
+
|
147
|
+
# Add any items that weren't processed
|
148
|
+
processed_ids = set(item["id"] for item in all_results)
|
149
|
+
for item in relevant_items:
|
150
|
+
if item["id"] not in processed_ids:
|
151
|
+
all_results.append(item)
|
152
|
+
|
153
|
+
return all_results
|