local-deep-research 0.1.23__py3-none-any.whl → 0.1.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +10 -1
- local_deep_research/api/__init__.py +20 -0
- local_deep_research/api/research_functions.py +330 -0
- local_deep_research/config.py +12 -1
- local_deep_research/defaults/.env.template +8 -7
- local_deep_research/defaults/llm_config.py +90 -49
- local_deep_research/web_search_engines/search_engine_factory.py +4 -1
- {local_deep_research-0.1.23.dist-info → local_deep_research-0.1.25.dist-info}/METADATA +58 -19
- {local_deep_research-0.1.23.dist-info → local_deep_research-0.1.25.dist-info}/RECORD +13 -11
- {local_deep_research-0.1.23.dist-info → local_deep_research-0.1.25.dist-info}/WHEEL +0 -0
- {local_deep_research-0.1.23.dist-info → local_deep_research-0.1.25.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.1.23.dist-info → local_deep_research-0.1.25.dist-info}/licenses/LICENSE +0 -0
- {local_deep_research-0.1.23.dist-info → local_deep_research-0.1.25.dist-info}/top_level.txt +0 -0
local_deep_research/__init__.py
CHANGED
@@ -15,10 +15,19 @@ from .search_system import AdvancedSearchSystem
|
|
15
15
|
from .report_generator import IntegratedReportGenerator
|
16
16
|
from .config import get_llm, get_search
|
17
17
|
|
18
|
+
# Import API functions
|
19
|
+
from .api import quick_summary, generate_report, analyze_documents
|
20
|
+
from .api import get_available_search_engines, get_available_collections
|
21
|
+
|
18
22
|
# Export it
|
19
23
|
__all__ = [
|
20
24
|
"AdvancedSearchSystem",
|
21
25
|
"IntegratedReportGenerator",
|
22
26
|
"get_llm",
|
23
|
-
"get_search"
|
27
|
+
"get_search",
|
28
|
+
"quick_summary",
|
29
|
+
"generate_report",
|
30
|
+
"analyze_documents",
|
31
|
+
"get_available_search_engines",
|
32
|
+
"get_available_collections"
|
24
33
|
]
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# src/local_deep_research/api/__init__.py
|
2
|
+
"""
|
3
|
+
API module for programmatic access to Local Deep Research functionality.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from .research_functions import (
|
7
|
+
quick_summary,
|
8
|
+
generate_report,
|
9
|
+
analyze_documents,
|
10
|
+
get_available_search_engines,
|
11
|
+
get_available_collections
|
12
|
+
)
|
13
|
+
|
14
|
+
__all__ = [
|
15
|
+
"quick_summary",
|
16
|
+
"generate_report",
|
17
|
+
"analyze_documents",
|
18
|
+
"get_available_search_engines",
|
19
|
+
"get_available_collections"
|
20
|
+
]
|
@@ -0,0 +1,330 @@
|
|
1
|
+
"""
|
2
|
+
API module for Local Deep Research.
|
3
|
+
Provides programmatic access to search and research capabilities.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from typing import Dict, List, Optional, Union, Any, Callable
|
7
|
+
import logging
|
8
|
+
import os
|
9
|
+
import traceback
|
10
|
+
import toml
|
11
|
+
from ..search_system import AdvancedSearchSystem
|
12
|
+
from ..report_generator import IntegratedReportGenerator
|
13
|
+
from ..config import get_llm, get_search, settings
|
14
|
+
from ..utilties.search_utilities import remove_think_tags
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
def quick_summary(
|
19
|
+
query: str,
|
20
|
+
search_tool: Optional[str] = None,
|
21
|
+
iterations: int = 1,
|
22
|
+
questions_per_iteration: int = 1,
|
23
|
+
max_results: int = 20,
|
24
|
+
max_filtered_results: int = 5,
|
25
|
+
region: str = "us",
|
26
|
+
time_period: str = "y",
|
27
|
+
safe_search: bool = True,
|
28
|
+
temperature: float = 0.7,
|
29
|
+
progress_callback: Optional[Callable] = None,
|
30
|
+
) -> Dict[str, Any]:
|
31
|
+
"""
|
32
|
+
Generate a quick research summary for a given query.
|
33
|
+
|
34
|
+
Args:
|
35
|
+
query: The research query to analyze
|
36
|
+
search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
|
37
|
+
iterations: Number of research cycles to perform
|
38
|
+
questions_per_iteration: Number of questions to generate per cycle
|
39
|
+
max_results: Maximum number of search results to consider
|
40
|
+
max_filtered_results: Maximum results after relevance filtering
|
41
|
+
region: Search region/locale
|
42
|
+
time_period: Time period for search results (d=day, w=week, m=month, y=year)
|
43
|
+
safe_search: Whether to enable safe search
|
44
|
+
temperature: LLM temperature for generation
|
45
|
+
progress_callback: Optional callback function to receive progress updates
|
46
|
+
|
47
|
+
Returns:
|
48
|
+
Dictionary containing the research results with keys:
|
49
|
+
- 'summary': The generated summary text
|
50
|
+
- 'findings': List of detailed findings from each search
|
51
|
+
- 'iterations': Number of iterations performed
|
52
|
+
- 'questions': Questions generated during research
|
53
|
+
"""
|
54
|
+
logger.info(f"Generating quick summary for query: {query}")
|
55
|
+
|
56
|
+
|
57
|
+
# Get language model with custom temperature
|
58
|
+
llm = get_llm(temperature=temperature)
|
59
|
+
|
60
|
+
# Create search system with custom parameters
|
61
|
+
system = AdvancedSearchSystem()
|
62
|
+
|
63
|
+
# Override default settings with user-provided values
|
64
|
+
system.max_iterations = iterations
|
65
|
+
system.questions_per_iteration = questions_per_iteration
|
66
|
+
system.model = llm # Ensure the model is directly attached to the system
|
67
|
+
|
68
|
+
# Set the search engine if specified
|
69
|
+
if search_tool:
|
70
|
+
search_engine = get_search(search_tool)
|
71
|
+
if search_engine:
|
72
|
+
system.search = search_engine
|
73
|
+
else:
|
74
|
+
logger.warning(f"Could not create search engine '{search_tool}', using default.")
|
75
|
+
|
76
|
+
# Set progress callback if provided
|
77
|
+
if progress_callback:
|
78
|
+
system.set_progress_callback(progress_callback)
|
79
|
+
|
80
|
+
# Perform the search and analysis
|
81
|
+
results = system.analyze_topic(query)
|
82
|
+
|
83
|
+
# Extract the summary from the current knowledge
|
84
|
+
if results and "current_knowledge" in results:
|
85
|
+
summary = results["current_knowledge"]
|
86
|
+
else:
|
87
|
+
summary = "Unable to generate summary for the query."
|
88
|
+
|
89
|
+
# Prepare the return value
|
90
|
+
return {
|
91
|
+
"summary": summary,
|
92
|
+
"findings": results.get("findings", []),
|
93
|
+
"iterations": results.get("iterations", 0),
|
94
|
+
"questions": results.get("questions", {}),
|
95
|
+
"formatted_findings": results.get("formatted_findings", ""),
|
96
|
+
"sources": results.get("all_links_of_system", [])
|
97
|
+
}
|
98
|
+
|
99
|
+
|
100
|
+
def generate_report(
|
101
|
+
query: str,
|
102
|
+
search_tool: Optional[str] = None,
|
103
|
+
iterations: int = 2,
|
104
|
+
questions_per_iteration: int = 2,
|
105
|
+
searches_per_section: int = 2,
|
106
|
+
max_results: int = 50,
|
107
|
+
max_filtered_results: int = 5,
|
108
|
+
region: str = "us",
|
109
|
+
time_period: str = "y",
|
110
|
+
safe_search: bool = True,
|
111
|
+
temperature: float = 0.7,
|
112
|
+
output_file: Optional[str] = None,
|
113
|
+
progress_callback: Optional[Callable] = None,
|
114
|
+
) -> Dict[str, Any]:
|
115
|
+
"""
|
116
|
+
Generate a comprehensive, structured research report for a given query.
|
117
|
+
|
118
|
+
Args:
|
119
|
+
query: The research query to analyze
|
120
|
+
search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
|
121
|
+
iterations: Number of research cycles to perform
|
122
|
+
questions_per_iteration: Number of questions to generate per cycle
|
123
|
+
searches_per_section: Number of searches to perform per report section
|
124
|
+
max_results: Maximum number of search results to consider
|
125
|
+
max_filtered_results: Maximum results after relevance filtering
|
126
|
+
region: Search region/locale
|
127
|
+
time_period: Time period for search results (d=day, w=week, m=month, y=year)
|
128
|
+
safe_search: Whether to enable safe search
|
129
|
+
temperature: LLM temperature for generation
|
130
|
+
output_file: Optional path to save report markdown file
|
131
|
+
progress_callback: Optional callback function to receive progress updates
|
132
|
+
|
133
|
+
Returns:
|
134
|
+
Dictionary containing the research report with keys:
|
135
|
+
- 'content': The full report content in markdown format
|
136
|
+
- 'metadata': Report metadata including generated timestamp and query
|
137
|
+
"""
|
138
|
+
logger.info(f"Generating comprehensive research report for query: {query}")
|
139
|
+
|
140
|
+
|
141
|
+
# Get language model with custom temperature
|
142
|
+
llm = get_llm(temperature=temperature)
|
143
|
+
|
144
|
+
# Create search system with custom parameters
|
145
|
+
system = AdvancedSearchSystem()
|
146
|
+
|
147
|
+
# Override default settings with user-provided values
|
148
|
+
system.max_iterations = iterations
|
149
|
+
system.questions_per_iteration = questions_per_iteration
|
150
|
+
system.model = llm # Ensure the model is directly attached to the system
|
151
|
+
|
152
|
+
# Set the search engine if specified
|
153
|
+
if search_tool:
|
154
|
+
search_engine = get_search(
|
155
|
+
search_tool,
|
156
|
+
llm_instance=llm,
|
157
|
+
max_results=max_results,
|
158
|
+
max_filtered_results=max_filtered_results,
|
159
|
+
region=region,
|
160
|
+
time_period=time_period,
|
161
|
+
safe_search=safe_search
|
162
|
+
)
|
163
|
+
if search_engine:
|
164
|
+
system.search = search_engine
|
165
|
+
else:
|
166
|
+
logger.warning(f"Could not create search engine '{search_tool}', using default.")
|
167
|
+
|
168
|
+
# Set progress callback if provided
|
169
|
+
if progress_callback:
|
170
|
+
system.set_progress_callback(progress_callback)
|
171
|
+
|
172
|
+
# Perform the initial research
|
173
|
+
initial_findings = system.analyze_topic(query)
|
174
|
+
|
175
|
+
# Generate the structured report
|
176
|
+
report_generator = IntegratedReportGenerator(searches_per_section=searches_per_section)
|
177
|
+
report_generator.model = llm # Ensure the model is set on the report generator too
|
178
|
+
report = report_generator.generate_report(initial_findings, query)
|
179
|
+
|
180
|
+
# Save report to file if path is provided
|
181
|
+
if output_file and report and "content" in report:
|
182
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
183
|
+
f.write(report["content"])
|
184
|
+
logger.info(f"Report saved to {output_file}")
|
185
|
+
report["file_path"] = output_file
|
186
|
+
return report
|
187
|
+
|
188
|
+
|
189
|
+
|
190
|
+
def analyze_documents(
|
191
|
+
query: str,
|
192
|
+
collection_name: str,
|
193
|
+
max_results: int = 10,
|
194
|
+
temperature: float = 0.7,
|
195
|
+
force_reindex: bool = False,
|
196
|
+
output_file: Optional[str] = None,
|
197
|
+
) -> Dict[str, Any]:
|
198
|
+
"""
|
199
|
+
Search and analyze documents in a specific local collection.
|
200
|
+
|
201
|
+
Args:
|
202
|
+
query: The search query
|
203
|
+
collection_name: Name of the local document collection to search
|
204
|
+
max_results: Maximum number of results to return
|
205
|
+
temperature: LLM temperature for summary generation
|
206
|
+
force_reindex: Whether to force reindexing the collection
|
207
|
+
output_file: Optional path to save analysis results to a file
|
208
|
+
|
209
|
+
Returns:
|
210
|
+
Dictionary containing:
|
211
|
+
- 'summary': Summary of the findings
|
212
|
+
- 'documents': List of matching documents with content and metadata
|
213
|
+
"""
|
214
|
+
logger.info(f"Analyzing documents in collection '{collection_name}' for query: {query}")
|
215
|
+
|
216
|
+
|
217
|
+
# Get language model with custom temperature
|
218
|
+
llm = get_llm(temperature=temperature)
|
219
|
+
|
220
|
+
# Get search engine for the specified collection
|
221
|
+
search = get_search(collection_name, llm_instance=llm)
|
222
|
+
|
223
|
+
if not search:
|
224
|
+
return {
|
225
|
+
"summary": f"Error: Collection '{collection_name}' not found or not properly configured.",
|
226
|
+
"documents": []
|
227
|
+
}
|
228
|
+
|
229
|
+
# Set max results
|
230
|
+
search.max_results = max_results
|
231
|
+
|
232
|
+
# Force reindex if requested
|
233
|
+
if force_reindex and hasattr(search, 'embedding_manager'):
|
234
|
+
for folder_path in search.folder_paths:
|
235
|
+
search.embedding_manager.index_folder(folder_path, force_reindex=True)
|
236
|
+
|
237
|
+
# Perform the search
|
238
|
+
results = search.run(query)
|
239
|
+
|
240
|
+
if not results:
|
241
|
+
return {
|
242
|
+
"summary": f"No documents found in collection '{collection_name}' for query: '{query}'",
|
243
|
+
"documents": []
|
244
|
+
}
|
245
|
+
|
246
|
+
# Get LLM to generate a summary of the results
|
247
|
+
|
248
|
+
docs_text = "\n\n".join([f"Document {i+1}: {doc.get('content', doc.get('snippet', ''))[:1000]}"
|
249
|
+
for i, doc in enumerate(results[:5])]) # Limit to first 5 docs and 1000 chars each
|
250
|
+
|
251
|
+
summary_prompt = f"""Analyze these document excerpts related to the query: "{query}"
|
252
|
+
|
253
|
+
{docs_text}
|
254
|
+
|
255
|
+
Provide a concise summary of the key information found in these documents related to the query.
|
256
|
+
"""
|
257
|
+
|
258
|
+
summary_response = llm.invoke(summary_prompt)
|
259
|
+
if hasattr(summary_response, 'content'):
|
260
|
+
summary = remove_think_tags(summary_response.content)
|
261
|
+
else:
|
262
|
+
summary = str(summary_response)
|
263
|
+
|
264
|
+
# Create result dictionary
|
265
|
+
analysis_result = {
|
266
|
+
"summary": summary,
|
267
|
+
"documents": results,
|
268
|
+
"collection": collection_name,
|
269
|
+
"document_count": len(results)
|
270
|
+
}
|
271
|
+
|
272
|
+
# Save to file if requested
|
273
|
+
if output_file:
|
274
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
275
|
+
f.write(f"# Document Analysis: {query}\n\n")
|
276
|
+
f.write(f"## Summary\n\n{summary}\n\n")
|
277
|
+
f.write(f"## Documents Found: {len(results)}\n\n")
|
278
|
+
|
279
|
+
for i, doc in enumerate(results):
|
280
|
+
f.write(f"### Document {i+1}: {doc.get('title', 'Untitled')}\n\n")
|
281
|
+
f.write(f"**Source:** {doc.get('link', 'Unknown')}\n\n")
|
282
|
+
f.write(f"**Content:**\n\n{doc.get('content', doc.get('snippet', 'No content available'))[:1000]}...\n\n")
|
283
|
+
f.write("---\n\n")
|
284
|
+
|
285
|
+
analysis_result["file_path"] = output_file
|
286
|
+
logger.info(f"Analysis saved to {output_file}")
|
287
|
+
|
288
|
+
return analysis_result
|
289
|
+
|
290
|
+
def get_available_search_engines() -> Dict[str, str]:
|
291
|
+
"""
|
292
|
+
Get a dictionary of available search engines.
|
293
|
+
|
294
|
+
Returns:
|
295
|
+
Dictionary mapping engine names to descriptions
|
296
|
+
"""
|
297
|
+
|
298
|
+
from ..web_search_engines.search_engine_factory import get_available_engines
|
299
|
+
engines = get_available_engines()
|
300
|
+
|
301
|
+
# Add some descriptions for common engines
|
302
|
+
descriptions = {
|
303
|
+
"auto": "Automatic selection based on query type",
|
304
|
+
"wikipedia": "Wikipedia articles and general knowledge",
|
305
|
+
"arxiv": "Scientific papers and research",
|
306
|
+
"pubmed": "Medical and biomedical literature",
|
307
|
+
"semantic_scholar": "Academic papers across all fields",
|
308
|
+
"github": "Code repositories and technical documentation",
|
309
|
+
"local_all": "All local document collections"
|
310
|
+
}
|
311
|
+
|
312
|
+
return {engine: descriptions.get(engine, "Search engine") for engine in engines}
|
313
|
+
|
314
|
+
|
315
|
+
def get_available_collections() -> Dict[str, Dict[str, Any]]:
|
316
|
+
"""
|
317
|
+
Get a dictionary of available local document collections.
|
318
|
+
|
319
|
+
Returns:
|
320
|
+
Dictionary mapping collection names to their configuration
|
321
|
+
"""
|
322
|
+
|
323
|
+
|
324
|
+
from ..config import LOCAL_COLLECTIONS_FILE
|
325
|
+
|
326
|
+
if os.path.exists(LOCAL_COLLECTIONS_FILE):
|
327
|
+
collections = toml.load(LOCAL_COLLECTIONS_FILE)
|
328
|
+
return collections
|
329
|
+
|
330
|
+
return {}
|
local_deep_research/config.py
CHANGED
@@ -6,10 +6,12 @@ from platformdirs import user_documents_dir
|
|
6
6
|
import os
|
7
7
|
# Setup logging
|
8
8
|
logger = logging.getLogger(__name__)
|
9
|
+
from dotenv import load_dotenv
|
10
|
+
import platform
|
9
11
|
|
10
12
|
# Get config directory
|
11
13
|
def get_config_dir():
|
12
|
-
|
14
|
+
|
13
15
|
|
14
16
|
if platform.system() == "Windows":
|
15
17
|
# Windows: Use Documents directory
|
@@ -32,7 +34,16 @@ SEARCH_ENGINES_FILE = CONFIG_DIR / "search_engines.toml"
|
|
32
34
|
|
33
35
|
LOCAL_COLLECTIONS_FILE = CONFIG_DIR / "local_collections.toml"
|
34
36
|
|
37
|
+
# Load the .env file explicitly
|
38
|
+
# Load the .env file explicitly
|
39
|
+
config_dir = get_config_dir()
|
40
|
+
env_file = config_dir / ".env"
|
35
41
|
|
42
|
+
if env_file.exists():
|
43
|
+
logger.info(f"Loading environment variables from: {env_file}")
|
44
|
+
load_dotenv(dotenv_path=env_file)
|
45
|
+
else:
|
46
|
+
logger.warning(f"Warning: .env file not found at {env_file}")
|
36
47
|
# Set environment variable for Dynaconf to use
|
37
48
|
docs_base = Path(user_documents_dir()) / "local_deep_research"
|
38
49
|
os.environ["DOCS_DIR"] = str(docs_base)
|
@@ -1,11 +1,12 @@
|
|
1
1
|
# API Keys
|
2
|
-
#
|
3
|
-
#
|
4
|
-
#
|
5
|
-
#
|
6
|
-
#
|
7
|
-
#
|
2
|
+
# LDR_OPENAI_ENDPOINT_API_KEY=your-api-key-here
|
3
|
+
# LDR_ANTHROPIC_API_KEY=your-api-key-here
|
4
|
+
# LDR_OPENAI_API_KEY=your-openai-key-here
|
5
|
+
# LDR_SERP_API_KEY=your-api-key-here
|
6
|
+
# LDR_GUARDIAN_API_KEY=your-api-key-here
|
7
|
+
# LDR_GOOGLE_PSE_API_KEY=your-google-api-key-here
|
8
|
+
# LDR_GOOGLE_PSE_ENGINE_ID=your-programmable-search-engine-id-here
|
8
9
|
|
9
10
|
# SearXNG Configuration, add at least SEARXNG_INSTANCE to .env file to use this search engine
|
10
11
|
# SEARXNG_INSTANCE = "http://localhost:8080"
|
11
|
-
# SEARXNG_DELAY = 2.0
|
12
|
+
# SEARXNG_DELAY = 2.0
|
@@ -9,6 +9,7 @@ from langchain_anthropic import ChatAnthropic
|
|
9
9
|
from langchain_openai import ChatOpenAI
|
10
10
|
from langchain_ollama import ChatOllama
|
11
11
|
from langchain_community.llms import VLLM
|
12
|
+
from local_deep_research.utilties.search_utilities import remove_think_tags
|
12
13
|
from local_deep_research.config import settings
|
13
14
|
import os
|
14
15
|
import logging
|
@@ -23,6 +24,8 @@ VALID_PROVIDERS = ["ollama", "openai", "anthropic", "vllm", "openai_endpoint", "
|
|
23
24
|
# LLM FUNCTIONS
|
24
25
|
# ================================
|
25
26
|
|
27
|
+
|
28
|
+
|
26
29
|
def get_llm(model_name=None, temperature=None, provider=None):
|
27
30
|
"""
|
28
31
|
Get LLM instance based on model name and provider.
|
@@ -33,7 +36,7 @@ def get_llm(model_name=None, temperature=None, provider=None):
|
|
33
36
|
provider: Provider to use (if None, uses settings.llm.provider)
|
34
37
|
|
35
38
|
Returns:
|
36
|
-
A LangChain LLM instance
|
39
|
+
A LangChain LLM instance with automatic think-tag removal
|
37
40
|
"""
|
38
41
|
# Use settings values for parameters if not provided
|
39
42
|
if model_name is None:
|
@@ -56,31 +59,42 @@ def get_llm(model_name=None, temperature=None, provider=None):
|
|
56
59
|
|
57
60
|
# Handle different providers
|
58
61
|
if provider == "anthropic":
|
59
|
-
|
62
|
+
api_key_name = 'ANTHROPIC_API_KEY'
|
63
|
+
api_key = settings.get(api_key_name, '')
|
60
64
|
if not api_key:
|
61
|
-
api_key = os.getenv(
|
65
|
+
api_key = os.getenv(api_key_name)
|
66
|
+
if not api_key:
|
67
|
+
api_key = os.getenv("LDR_" + api_key_name)
|
62
68
|
if not api_key:
|
63
69
|
logger.warning("ANTHROPIC_API_KEY not found. Falling back to default model.")
|
64
70
|
return get_fallback_model(temperature)
|
65
71
|
|
66
|
-
|
72
|
+
llm = ChatAnthropic(
|
67
73
|
model=model_name, anthropic_api_key=api_key, **common_params
|
68
74
|
)
|
75
|
+
return wrap_llm_without_think_tags(llm)
|
69
76
|
|
70
77
|
elif provider == "openai":
|
71
|
-
|
78
|
+
api_key_name = 'OPENAI_API_KEY'
|
79
|
+
api_key = settings.get(api_key_name, '')
|
72
80
|
if not api_key:
|
73
|
-
api_key = os.getenv(
|
81
|
+
api_key = os.getenv(api_key_name)
|
82
|
+
if not api_key:
|
83
|
+
api_key = os.getenv("LDR_" + api_key_name)
|
74
84
|
if not api_key:
|
75
85
|
logger.warning("OPENAI_API_KEY not found. Falling back to default model.")
|
76
86
|
return get_fallback_model(temperature)
|
77
87
|
|
78
|
-
|
88
|
+
llm = ChatOpenAI(model=model_name, api_key=api_key, **common_params)
|
89
|
+
return wrap_llm_without_think_tags(llm)
|
79
90
|
|
80
91
|
elif provider == "openai_endpoint":
|
81
|
-
|
92
|
+
api_key_name = 'OPENAI_ENDPOINT_API_KEY'
|
93
|
+
api_key = settings.get(api_key_name, '')
|
82
94
|
if not api_key:
|
83
|
-
api_key = os.getenv(
|
95
|
+
api_key = os.getenv(api_key_name)
|
96
|
+
if not api_key:
|
97
|
+
api_key = os.getenv("LDR_" + api_key_name)
|
84
98
|
if not api_key:
|
85
99
|
logger.warning("OPENAI_ENDPOINT_API_KEY not found. Falling back to default model.")
|
86
100
|
return get_fallback_model(temperature)
|
@@ -88,16 +102,17 @@ def get_llm(model_name=None, temperature=None, provider=None):
|
|
88
102
|
# Get endpoint URL from settings
|
89
103
|
openai_endpoint_url = settings.llm.openai_endpoint_url
|
90
104
|
|
91
|
-
|
105
|
+
llm = ChatOpenAI(
|
92
106
|
model=model_name,
|
93
107
|
api_key=api_key,
|
94
108
|
openai_api_base=openai_endpoint_url,
|
95
109
|
**common_params
|
96
110
|
)
|
111
|
+
return wrap_llm_without_think_tags(llm)
|
97
112
|
|
98
113
|
elif provider == "vllm":
|
99
114
|
try:
|
100
|
-
|
115
|
+
llm = VLLM(
|
101
116
|
model=model_name,
|
102
117
|
trust_remote_code=True,
|
103
118
|
max_new_tokens=128,
|
@@ -105,6 +120,7 @@ def get_llm(model_name=None, temperature=None, provider=None):
|
|
105
120
|
top_p=0.95,
|
106
121
|
temperature=temperature,
|
107
122
|
)
|
123
|
+
return wrap_llm_without_think_tags(llm)
|
108
124
|
except Exception as e:
|
109
125
|
logger.error(f"Error loading VLLM model: {e}")
|
110
126
|
logger.warning("Falling back.")
|
@@ -114,54 +130,54 @@ def get_llm(model_name=None, temperature=None, provider=None):
|
|
114
130
|
try:
|
115
131
|
# Use the configurable Ollama base URL
|
116
132
|
base_url = settings.get('OLLAMA_BASE_URL', settings.llm.get('ollama_base_url', 'http://localhost:11434'))
|
117
|
-
|
133
|
+
llm = ChatOllama(model=model_name, base_url=base_url, **common_params)
|
134
|
+
return wrap_llm_without_think_tags(llm)
|
118
135
|
except Exception as e:
|
119
136
|
logger.error(f"Error loading Ollama model: {e}")
|
120
137
|
return get_fallback_model(temperature)
|
121
138
|
|
122
139
|
elif provider == "lmstudio":
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
140
|
+
# LM Studio supports OpenAI API format, so we can use ChatOpenAI directly
|
141
|
+
lmstudio_url = settings.llm.get('lmstudio_url', "http://localhost:1234")
|
142
|
+
|
143
|
+
llm = ChatOpenAI(
|
144
|
+
model=model_name,
|
145
|
+
api_key="lm-studio", # LM Studio doesn't require a real API key
|
146
|
+
base_url=f"{lmstudio_url}/v1", # Use the configured URL with /v1 endpoint
|
147
|
+
temperature=temperature,
|
148
|
+
max_tokens=settings.llm.max_tokens
|
149
|
+
)
|
150
|
+
return wrap_llm_without_think_tags(llm)
|
135
151
|
|
136
152
|
elif provider == "llamacpp":
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
raise ValueError("llamacpp_model_path not set in settings.toml")
|
146
|
-
|
147
|
-
# Get additional LlamaCpp parameters
|
148
|
-
n_gpu_layers = settings.llm.get('llamacpp_n_gpu_layers', 1)
|
149
|
-
n_batch = settings.llm.get('llamacpp_n_batch', 512)
|
150
|
-
f16_kv = settings.llm.get('llamacpp_f16_kv', True)
|
153
|
+
# Import LlamaCpp
|
154
|
+
from langchain_community.llms import LlamaCpp
|
155
|
+
|
156
|
+
# Get LlamaCpp model path from settings
|
157
|
+
model_path = settings.llm.get('llamacpp_model_path', "")
|
158
|
+
if not model_path:
|
159
|
+
logger.error("llamacpp_model_path not set in settings")
|
160
|
+
raise ValueError("llamacpp_model_path not set in settings.toml")
|
151
161
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
+
# Get additional LlamaCpp parameters
|
163
|
+
n_gpu_layers = settings.llm.get('llamacpp_n_gpu_layers', 1)
|
164
|
+
n_batch = settings.llm.get('llamacpp_n_batch', 512)
|
165
|
+
f16_kv = settings.llm.get('llamacpp_f16_kv', True)
|
166
|
+
|
167
|
+
# Create LlamaCpp instance
|
168
|
+
llm = LlamaCpp(
|
169
|
+
model_path=model_path,
|
170
|
+
temperature=temperature,
|
171
|
+
max_tokens=settings.llm.max_tokens,
|
172
|
+
n_gpu_layers=n_gpu_layers,
|
173
|
+
n_batch=n_batch,
|
174
|
+
f16_kv=f16_kv,
|
175
|
+
verbose=True
|
176
|
+
)
|
177
|
+
return wrap_llm_without_think_tags(llm)
|
162
178
|
|
163
179
|
else:
|
164
|
-
return get_fallback_model(temperature)
|
180
|
+
return wrap_llm_without_think_tags(get_fallback_model(temperature))
|
165
181
|
|
166
182
|
def get_fallback_model(temperature=None):
|
167
183
|
"""Create a dummy model for when no providers are available"""
|
@@ -174,6 +190,31 @@ def get_fallback_model(temperature=None):
|
|
174
190
|
# COMPATIBILITY FUNCTIONS
|
175
191
|
# ================================
|
176
192
|
|
193
|
+
def wrap_llm_without_think_tags(llm):
|
194
|
+
"""Create a wrapper class that processes LLM outputs with remove_think_tags"""
|
195
|
+
|
196
|
+
|
197
|
+
class ProcessingLLMWrapper:
|
198
|
+
def __init__(self, base_llm):
|
199
|
+
self.base_llm = base_llm
|
200
|
+
|
201
|
+
def invoke(self, *args, **kwargs):
|
202
|
+
response = self.base_llm.invoke(*args, **kwargs)
|
203
|
+
|
204
|
+
# Process the response content if it has a content attribute
|
205
|
+
if hasattr(response, 'content'):
|
206
|
+
response.content = remove_think_tags(response.content)
|
207
|
+
elif isinstance(response, str):
|
208
|
+
response = remove_think_tags(response)
|
209
|
+
|
210
|
+
return response
|
211
|
+
|
212
|
+
# Pass through any other attributes to the base LLM
|
213
|
+
def __getattr__(self, name):
|
214
|
+
return getattr(self.base_llm, name)
|
215
|
+
|
216
|
+
return ProcessingLLMWrapper(llm)
|
217
|
+
|
177
218
|
def get_available_provider_types():
|
178
219
|
"""Return available model providers"""
|
179
220
|
providers = {}
|
@@ -46,7 +46,10 @@ def create_search_engine(engine_name: str, llm=None, **kwargs) -> Optional[BaseS
|
|
46
46
|
|
47
47
|
# First check environment variable
|
48
48
|
api_key = os.getenv(api_key_env)
|
49
|
-
|
49
|
+
if not api_key:
|
50
|
+
api_key = os.getenv("LDR_" + api_key_env)
|
51
|
+
|
52
|
+
|
50
53
|
# If not found in environment, check Dynaconf settings
|
51
54
|
if not api_key and api_key_env:
|
52
55
|
# Convert env var name to settings path (e.g., BRAVE_API_KEY -> brave_api_key)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: local-deep-research
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.25
|
4
4
|
Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
|
5
5
|
Author-email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -65,6 +65,7 @@ Requires-Dist: xmltodict>=0.13.0
|
|
65
65
|
Requires-Dist: lxml>=4.9.2
|
66
66
|
Requires-Dist: pdfplumber>=0.9.0
|
67
67
|
Requires-Dist: unstructured>=0.10.0
|
68
|
+
Requires-Dist: google-search-results
|
68
69
|
Dynamic: license-file
|
69
70
|
|
70
71
|
# Local Deep Research
|
@@ -79,7 +80,15 @@ A powerful AI-powered research assistant that performs deep, iterative analysis
|
|
79
80
|
</a>
|
80
81
|
</div>
|
81
82
|
|
82
|
-
##
|
83
|
+
## Windows Installation
|
84
|
+
|
85
|
+
Download the [Windows Installer](https://github.com/LearningCircuit/local-deep-research/releases/download/v0.1.0/LocalDeepResearch_Setup.exe) for easy one-click installation.
|
86
|
+
|
87
|
+
**Requires Ollama or other model preinstalled.**
|
88
|
+
Download from https://ollama.ai and then pull a model
|
89
|
+
ollama pull gemma3:12b
|
90
|
+
|
91
|
+
## Quick Start (not required if installed with windows installer)
|
83
92
|
|
84
93
|
```bash
|
85
94
|
# Install the package
|
@@ -105,6 +114,53 @@ ldr # (OR python -m local_deep_research.main)
|
|
105
114
|
|
106
115
|
Access the web interface at `http://127.0.0.1:5000` in your browser.
|
107
116
|
|
117
|
+
## Docker Support
|
118
|
+
|
119
|
+
Build the image first if you haven't already
|
120
|
+
```bash
|
121
|
+
docker build -t local-deep-research .
|
122
|
+
```
|
123
|
+
|
124
|
+
Quick Docker Run
|
125
|
+
|
126
|
+
```bash
|
127
|
+
# Run with default settings (connects to Ollama running on the host)
|
128
|
+
docker run --network=host \
|
129
|
+
-e LDR_LLM__PROVIDER="ollama" \
|
130
|
+
-e LDR_LLM__MODEL="mistral" \
|
131
|
+
local-deep-research
|
132
|
+
```
|
133
|
+
|
134
|
+
For comprehensive Docker setup information, see:
|
135
|
+
- [Docker Usage Guide](https://github.com/LearningCircuit/local-deep-research/blob/main/docs/docker-usage-readme.md)
|
136
|
+
- [Docker Compose Guide](https://github.com/LearningCircuit/local-deep-research/blob/main/docs/docker-compose-guide.md)
|
137
|
+
|
138
|
+
## Programmatic Access
|
139
|
+
|
140
|
+
Local Deep Research now provides a simple API for programmatic access to its research capabilities:
|
141
|
+
|
142
|
+
```python
|
143
|
+
import os
|
144
|
+
# Set environment variables to control the LLM
|
145
|
+
os.environ["LDR_LLM__MODEL"] = "mistral" # Specify model name
|
146
|
+
|
147
|
+
from local_deep_research import quick_summary, generate_report, analyze_documents
|
148
|
+
|
149
|
+
# Generate a quick research summary with custom parameters
|
150
|
+
results = quick_summary(
|
151
|
+
query="advances in fusion energy",
|
152
|
+
search_tool="auto", # Auto-select the best search engine
|
153
|
+
iterations=1, # Single research cycle for speed
|
154
|
+
questions_per_iteration=2, # Generate 2 follow-up questions
|
155
|
+
max_results=30, # Consider up to 30 search results
|
156
|
+
temperature=0.7 # Control creativity of generation
|
157
|
+
)
|
158
|
+
print(results["summary"])
|
159
|
+
```
|
160
|
+
|
161
|
+
These functions provide flexible options for customizing the search parameters, iterations, and output formats. For more examples, see the [programmatic access tutorial](https://github.com/LearningCircuit/local-deep-research/blob/main/examples/programmatic_access.ipynb).
|
162
|
+
|
163
|
+
|
108
164
|
## Features
|
109
165
|
|
110
166
|
- 🔍 **Advanced Research Capabilities**
|
@@ -286,23 +342,6 @@ You can use local document search in several ways:
|
|
286
342
|
3. **All collections**: Set `tool = "local_all"` to search across all collections
|
287
343
|
4. **Query syntax**: Type `collection:project_docs your query` to target a specific collection
|
288
344
|
|
289
|
-
## Docker Support
|
290
|
-
|
291
|
-
Local Deep Research can run in Docker containers for easy deployment across environments.
|
292
|
-
|
293
|
-
### Quick Docker Run
|
294
|
-
|
295
|
-
```bash
|
296
|
-
# Run with default settings (connects to Ollama running on the host)
|
297
|
-
docker run --network=host \
|
298
|
-
-e LDR_LLM__PROVIDER="ollama" \
|
299
|
-
-e LDR_LLM__MODEL="mistral" \
|
300
|
-
local-deep-research
|
301
|
-
```
|
302
|
-
|
303
|
-
For comprehensive Docker setup information, see:
|
304
|
-
- [Docker Usage Guide](https://github.com/LearningCircuit/local-deep-research/blob/main/docs/docker-usage-readme.md)
|
305
|
-
- [Docker Compose Guide](https://github.com/LearningCircuit/local-deep-research/blob/main/docs/docker-compose-guide.md)
|
306
345
|
|
307
346
|
## Advanced Configuration
|
308
347
|
|
@@ -1,12 +1,14 @@
|
|
1
|
-
local_deep_research/__init__.py,sha256=
|
1
|
+
local_deep_research/__init__.py,sha256=Grde0sFEYyCXrPCfxd-9b9v1M6OurrzQbVYRmKQ9E7w,886
|
2
2
|
local_deep_research/citation_handler.py,sha256=v_fwTy-2XvUuoH3OQRzmBrvaiN7mBk8jbNfySslmt5g,4357
|
3
|
-
local_deep_research/config.py,sha256=
|
3
|
+
local_deep_research/config.py,sha256=n6TSkNtdie86Sc71jjnejwK_hBIDpJahNZwbiDEfzXg,10233
|
4
4
|
local_deep_research/main.py,sha256=uQXtGQ6LtZNd5Qw63D5ke4Q_LjYimouWVSUknVsk3JQ,3645
|
5
5
|
local_deep_research/report_generator.py,sha256=EvaArnWirMgg42fMzmZeJczoEYujEbJ2ryHHYuuoXx8,8058
|
6
6
|
local_deep_research/search_system.py,sha256=yY3BEzX68vdtUcYF9h6lC3yVao0YA_NSBj6W3-RwlKk,15459
|
7
|
-
local_deep_research/
|
7
|
+
local_deep_research/api/__init__.py,sha256=H0WGFSohUR0T2QswtWngPZWoMYPs9VWQTQYaivAlrJU,440
|
8
|
+
local_deep_research/api/research_functions.py,sha256=Z23wZYsB1x2ivdFYJ9uqIqCAwjR2RdOff7Bq30DxQYU,12099
|
9
|
+
local_deep_research/defaults/.env.template,sha256=SI8WDMFrj-yANlnfd6jJ4fLYke7zSzCd9Ukk_HpyM88,500
|
8
10
|
local_deep_research/defaults/__init__.py,sha256=2Vvlkl-gmP_qPYWegE4JBgummypogl3VXrQ1XzptFDU,1381
|
9
|
-
local_deep_research/defaults/llm_config.py,sha256=
|
11
|
+
local_deep_research/defaults/llm_config.py,sha256=1KiW9k8kmsUD5u9VgEdgWZBNMmK1BA0ZxoGbuC2spAk,11652
|
10
12
|
local_deep_research/defaults/local_collections.toml,sha256=zNa03PVnFrZ757JdZOuW6QDxkOc6ep5tG8baGBrMmXM,1778
|
11
13
|
local_deep_research/defaults/main.toml,sha256=6Lzbc5sVLxMwu83bLBp_tpYOZgmtThCfPL1L42eTGro,1939
|
12
14
|
local_deep_research/defaults/search_engines.toml,sha256=g0-qrw10oMgW74z_lYpPDkGwMje25mvalfY1EJ0nL3g,8134
|
@@ -32,7 +34,7 @@ local_deep_research/web/templates/settings.html,sha256=S9A-tdpzMhP2Zw7kp2jxKlwaW
|
|
32
34
|
local_deep_research/web/templates/settings_dashboard.html,sha256=De-v1KNdVvkXme5i3YZ6sIfU9aAKDc_N-AW9n4PZoso,9109
|
33
35
|
local_deep_research/web_search_engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
36
|
local_deep_research/web_search_engines/search_engine_base.py,sha256=QmhfjuHK2deomh8tARghKuYnF-5t3wwBB661odS2VtU,8065
|
35
|
-
local_deep_research/web_search_engines/search_engine_factory.py,sha256=
|
37
|
+
local_deep_research/web_search_engines/search_engine_factory.py,sha256=8REYoRdDWvB6XLhBym8rqzuULX28VQ-UKWNcRA5tLTQ,11189
|
36
38
|
local_deep_research/web_search_engines/search_engines_config.py,sha256=5C0tCmy_Jpv1YHLZLlyS7h5B2XToYcWPAaBDEOsxMo0,2739
|
37
39
|
local_deep_research/web_search_engines/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
38
40
|
local_deep_research/web_search_engines/engines/full_search.py,sha256=BuOz8dX-XocazCG7gGBKFnIY99FZtNFI0-Wq3fhsfp4,4689
|
@@ -51,9 +53,9 @@ local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py
|
|
51
53
|
local_deep_research/web_search_engines/engines/search_engine_serpapi.py,sha256=XikEYnM-pAaR70VeAJ28lbqpRzCj4bCA9xY29taTV8g,9215
|
52
54
|
local_deep_research/web_search_engines/engines/search_engine_wayback.py,sha256=astAvSLajDZ6rwgthJ3iBcHSWuDSYPO7uilIxaJhXmU,18132
|
53
55
|
local_deep_research/web_search_engines/engines/search_engine_wikipedia.py,sha256=KSGJECbEcxZpVK-PhYsTCtzedSK0l1AjQmvGtx8KBks,9799
|
54
|
-
local_deep_research-0.1.
|
55
|
-
local_deep_research-0.1.
|
56
|
-
local_deep_research-0.1.
|
57
|
-
local_deep_research-0.1.
|
58
|
-
local_deep_research-0.1.
|
59
|
-
local_deep_research-0.1.
|
56
|
+
local_deep_research-0.1.25.dist-info/licenses/LICENSE,sha256=Qg2CaTdu6SWnSqk1_JtgBPp_Da-LdqJDhT1Vt1MUc5s,1072
|
57
|
+
local_deep_research-0.1.25.dist-info/METADATA,sha256=sVEzW1cEvbnt0d-FtGmnZLqzf7_D1cF8PWC13bxbmBM,17711
|
58
|
+
local_deep_research-0.1.25.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
59
|
+
local_deep_research-0.1.25.dist-info/entry_points.txt,sha256=u-Y6Z3MWtR3dmsTDFYhXyfkPv7mALUA7YAnY4Fi1XDs,97
|
60
|
+
local_deep_research-0.1.25.dist-info/top_level.txt,sha256=h6-uVE_wSuLOcoWwT9szhX23mBWufu77MqmM25UfbCY,20
|
61
|
+
local_deep_research-0.1.25.dist-info/RECORD,,
|
File without changes
|
{local_deep_research-0.1.23.dist-info → local_deep_research-0.1.25.dist-info}/entry_points.txt
RENAMED
File without changes
|
{local_deep_research-0.1.23.dist-info → local_deep_research-0.1.25.dist-info}/licenses/LICENSE
RENAMED
File without changes
|
File without changes
|