local-deep-research 0.1.23__py3-none-any.whl → 0.1.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,10 +15,19 @@ from .search_system import AdvancedSearchSystem
15
15
  from .report_generator import IntegratedReportGenerator
16
16
  from .config import get_llm, get_search
17
17
 
18
+ # Import API functions
19
+ from .api import quick_summary, generate_report, analyze_documents
20
+ from .api import get_available_search_engines, get_available_collections
21
+
18
22
  # Export it
19
23
  __all__ = [
20
24
  "AdvancedSearchSystem",
21
25
  "IntegratedReportGenerator",
22
26
  "get_llm",
23
- "get_search"
27
+ "get_search",
28
+ "quick_summary",
29
+ "generate_report",
30
+ "analyze_documents",
31
+ "get_available_search_engines",
32
+ "get_available_collections"
24
33
  ]
@@ -0,0 +1,20 @@
1
+ # src/local_deep_research/api/__init__.py
2
+ """
3
+ API module for programmatic access to Local Deep Research functionality.
4
+ """
5
+
6
+ from .research_functions import (
7
+ quick_summary,
8
+ generate_report,
9
+ analyze_documents,
10
+ get_available_search_engines,
11
+ get_available_collections
12
+ )
13
+
14
+ __all__ = [
15
+ "quick_summary",
16
+ "generate_report",
17
+ "analyze_documents",
18
+ "get_available_search_engines",
19
+ "get_available_collections"
20
+ ]
@@ -0,0 +1,330 @@
1
+ """
2
+ API module for Local Deep Research.
3
+ Provides programmatic access to search and research capabilities.
4
+ """
5
+
6
+ from typing import Dict, List, Optional, Union, Any, Callable
7
+ import logging
8
+ import os
9
+ import traceback
10
+ import toml
11
+ from ..search_system import AdvancedSearchSystem
12
+ from ..report_generator import IntegratedReportGenerator
13
+ from ..config import get_llm, get_search, settings
14
+ from ..utilties.search_utilities import remove_think_tags
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ def quick_summary(
19
+ query: str,
20
+ search_tool: Optional[str] = None,
21
+ iterations: int = 1,
22
+ questions_per_iteration: int = 1,
23
+ max_results: int = 20,
24
+ max_filtered_results: int = 5,
25
+ region: str = "us",
26
+ time_period: str = "y",
27
+ safe_search: bool = True,
28
+ temperature: float = 0.7,
29
+ progress_callback: Optional[Callable] = None,
30
+ ) -> Dict[str, Any]:
31
+ """
32
+ Generate a quick research summary for a given query.
33
+
34
+ Args:
35
+ query: The research query to analyze
36
+ search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
37
+ iterations: Number of research cycles to perform
38
+ questions_per_iteration: Number of questions to generate per cycle
39
+ max_results: Maximum number of search results to consider
40
+ max_filtered_results: Maximum results after relevance filtering
41
+ region: Search region/locale
42
+ time_period: Time period for search results (d=day, w=week, m=month, y=year)
43
+ safe_search: Whether to enable safe search
44
+ temperature: LLM temperature for generation
45
+ progress_callback: Optional callback function to receive progress updates
46
+
47
+ Returns:
48
+ Dictionary containing the research results with keys:
49
+ - 'summary': The generated summary text
50
+ - 'findings': List of detailed findings from each search
51
+ - 'iterations': Number of iterations performed
52
+ - 'questions': Questions generated during research
53
+ """
54
+ logger.info(f"Generating quick summary for query: {query}")
55
+
56
+
57
+ # Get language model with custom temperature
58
+ llm = get_llm(temperature=temperature)
59
+
60
+ # Create search system with custom parameters
61
+ system = AdvancedSearchSystem()
62
+
63
+ # Override default settings with user-provided values
64
+ system.max_iterations = iterations
65
+ system.questions_per_iteration = questions_per_iteration
66
+ system.model = llm # Ensure the model is directly attached to the system
67
+
68
+ # Set the search engine if specified
69
+ if search_tool:
70
+ search_engine = get_search(search_tool)
71
+ if search_engine:
72
+ system.search = search_engine
73
+ else:
74
+ logger.warning(f"Could not create search engine '{search_tool}', using default.")
75
+
76
+ # Set progress callback if provided
77
+ if progress_callback:
78
+ system.set_progress_callback(progress_callback)
79
+
80
+ # Perform the search and analysis
81
+ results = system.analyze_topic(query)
82
+
83
+ # Extract the summary from the current knowledge
84
+ if results and "current_knowledge" in results:
85
+ summary = results["current_knowledge"]
86
+ else:
87
+ summary = "Unable to generate summary for the query."
88
+
89
+ # Prepare the return value
90
+ return {
91
+ "summary": summary,
92
+ "findings": results.get("findings", []),
93
+ "iterations": results.get("iterations", 0),
94
+ "questions": results.get("questions", {}),
95
+ "formatted_findings": results.get("formatted_findings", ""),
96
+ "sources": results.get("all_links_of_system", [])
97
+ }
98
+
99
+
100
+ def generate_report(
101
+ query: str,
102
+ search_tool: Optional[str] = None,
103
+ iterations: int = 2,
104
+ questions_per_iteration: int = 2,
105
+ searches_per_section: int = 2,
106
+ max_results: int = 50,
107
+ max_filtered_results: int = 5,
108
+ region: str = "us",
109
+ time_period: str = "y",
110
+ safe_search: bool = True,
111
+ temperature: float = 0.7,
112
+ output_file: Optional[str] = None,
113
+ progress_callback: Optional[Callable] = None,
114
+ ) -> Dict[str, Any]:
115
+ """
116
+ Generate a comprehensive, structured research report for a given query.
117
+
118
+ Args:
119
+ query: The research query to analyze
120
+ search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
121
+ iterations: Number of research cycles to perform
122
+ questions_per_iteration: Number of questions to generate per cycle
123
+ searches_per_section: Number of searches to perform per report section
124
+ max_results: Maximum number of search results to consider
125
+ max_filtered_results: Maximum results after relevance filtering
126
+ region: Search region/locale
127
+ time_period: Time period for search results (d=day, w=week, m=month, y=year)
128
+ safe_search: Whether to enable safe search
129
+ temperature: LLM temperature for generation
130
+ output_file: Optional path to save report markdown file
131
+ progress_callback: Optional callback function to receive progress updates
132
+
133
+ Returns:
134
+ Dictionary containing the research report with keys:
135
+ - 'content': The full report content in markdown format
136
+ - 'metadata': Report metadata including generated timestamp and query
137
+ """
138
+ logger.info(f"Generating comprehensive research report for query: {query}")
139
+
140
+
141
+ # Get language model with custom temperature
142
+ llm = get_llm(temperature=temperature)
143
+
144
+ # Create search system with custom parameters
145
+ system = AdvancedSearchSystem()
146
+
147
+ # Override default settings with user-provided values
148
+ system.max_iterations = iterations
149
+ system.questions_per_iteration = questions_per_iteration
150
+ system.model = llm # Ensure the model is directly attached to the system
151
+
152
+ # Set the search engine if specified
153
+ if search_tool:
154
+ search_engine = get_search(
155
+ search_tool,
156
+ llm_instance=llm,
157
+ max_results=max_results,
158
+ max_filtered_results=max_filtered_results,
159
+ region=region,
160
+ time_period=time_period,
161
+ safe_search=safe_search
162
+ )
163
+ if search_engine:
164
+ system.search = search_engine
165
+ else:
166
+ logger.warning(f"Could not create search engine '{search_tool}', using default.")
167
+
168
+ # Set progress callback if provided
169
+ if progress_callback:
170
+ system.set_progress_callback(progress_callback)
171
+
172
+ # Perform the initial research
173
+ initial_findings = system.analyze_topic(query)
174
+
175
+ # Generate the structured report
176
+ report_generator = IntegratedReportGenerator(searches_per_section=searches_per_section)
177
+ report_generator.model = llm # Ensure the model is set on the report generator too
178
+ report = report_generator.generate_report(initial_findings, query)
179
+
180
+ # Save report to file if path is provided
181
+ if output_file and report and "content" in report:
182
+ with open(output_file, "w", encoding="utf-8") as f:
183
+ f.write(report["content"])
184
+ logger.info(f"Report saved to {output_file}")
185
+ report["file_path"] = output_file
186
+ return report
187
+
188
+
189
+
190
+ def analyze_documents(
191
+ query: str,
192
+ collection_name: str,
193
+ max_results: int = 10,
194
+ temperature: float = 0.7,
195
+ force_reindex: bool = False,
196
+ output_file: Optional[str] = None,
197
+ ) -> Dict[str, Any]:
198
+ """
199
+ Search and analyze documents in a specific local collection.
200
+
201
+ Args:
202
+ query: The search query
203
+ collection_name: Name of the local document collection to search
204
+ max_results: Maximum number of results to return
205
+ temperature: LLM temperature for summary generation
206
+ force_reindex: Whether to force reindexing the collection
207
+ output_file: Optional path to save analysis results to a file
208
+
209
+ Returns:
210
+ Dictionary containing:
211
+ - 'summary': Summary of the findings
212
+ - 'documents': List of matching documents with content and metadata
213
+ """
214
+ logger.info(f"Analyzing documents in collection '{collection_name}' for query: {query}")
215
+
216
+
217
+ # Get language model with custom temperature
218
+ llm = get_llm(temperature=temperature)
219
+
220
+ # Get search engine for the specified collection
221
+ search = get_search(collection_name, llm_instance=llm)
222
+
223
+ if not search:
224
+ return {
225
+ "summary": f"Error: Collection '{collection_name}' not found or not properly configured.",
226
+ "documents": []
227
+ }
228
+
229
+ # Set max results
230
+ search.max_results = max_results
231
+
232
+ # Force reindex if requested
233
+ if force_reindex and hasattr(search, 'embedding_manager'):
234
+ for folder_path in search.folder_paths:
235
+ search.embedding_manager.index_folder(folder_path, force_reindex=True)
236
+
237
+ # Perform the search
238
+ results = search.run(query)
239
+
240
+ if not results:
241
+ return {
242
+ "summary": f"No documents found in collection '{collection_name}' for query: '{query}'",
243
+ "documents": []
244
+ }
245
+
246
+ # Get LLM to generate a summary of the results
247
+
248
+ docs_text = "\n\n".join([f"Document {i+1}: {doc.get('content', doc.get('snippet', ''))[:1000]}"
249
+ for i, doc in enumerate(results[:5])]) # Limit to first 5 docs and 1000 chars each
250
+
251
+ summary_prompt = f"""Analyze these document excerpts related to the query: "{query}"
252
+
253
+ {docs_text}
254
+
255
+ Provide a concise summary of the key information found in these documents related to the query.
256
+ """
257
+
258
+ summary_response = llm.invoke(summary_prompt)
259
+ if hasattr(summary_response, 'content'):
260
+ summary = remove_think_tags(summary_response.content)
261
+ else:
262
+ summary = str(summary_response)
263
+
264
+ # Create result dictionary
265
+ analysis_result = {
266
+ "summary": summary,
267
+ "documents": results,
268
+ "collection": collection_name,
269
+ "document_count": len(results)
270
+ }
271
+
272
+ # Save to file if requested
273
+ if output_file:
274
+ with open(output_file, "w", encoding="utf-8") as f:
275
+ f.write(f"# Document Analysis: {query}\n\n")
276
+ f.write(f"## Summary\n\n{summary}\n\n")
277
+ f.write(f"## Documents Found: {len(results)}\n\n")
278
+
279
+ for i, doc in enumerate(results):
280
+ f.write(f"### Document {i+1}: {doc.get('title', 'Untitled')}\n\n")
281
+ f.write(f"**Source:** {doc.get('link', 'Unknown')}\n\n")
282
+ f.write(f"**Content:**\n\n{doc.get('content', doc.get('snippet', 'No content available'))[:1000]}...\n\n")
283
+ f.write("---\n\n")
284
+
285
+ analysis_result["file_path"] = output_file
286
+ logger.info(f"Analysis saved to {output_file}")
287
+
288
+ return analysis_result
289
+
290
+ def get_available_search_engines() -> Dict[str, str]:
291
+ """
292
+ Get a dictionary of available search engines.
293
+
294
+ Returns:
295
+ Dictionary mapping engine names to descriptions
296
+ """
297
+
298
+ from ..web_search_engines.search_engine_factory import get_available_engines
299
+ engines = get_available_engines()
300
+
301
+ # Add some descriptions for common engines
302
+ descriptions = {
303
+ "auto": "Automatic selection based on query type",
304
+ "wikipedia": "Wikipedia articles and general knowledge",
305
+ "arxiv": "Scientific papers and research",
306
+ "pubmed": "Medical and biomedical literature",
307
+ "semantic_scholar": "Academic papers across all fields",
308
+ "github": "Code repositories and technical documentation",
309
+ "local_all": "All local document collections"
310
+ }
311
+
312
+ return {engine: descriptions.get(engine, "Search engine") for engine in engines}
313
+
314
+
315
+ def get_available_collections() -> Dict[str, Dict[str, Any]]:
316
+ """
317
+ Get a dictionary of available local document collections.
318
+
319
+ Returns:
320
+ Dictionary mapping collection names to their configuration
321
+ """
322
+
323
+
324
+ from ..config import LOCAL_COLLECTIONS_FILE
325
+
326
+ if os.path.exists(LOCAL_COLLECTIONS_FILE):
327
+ collections = toml.load(LOCAL_COLLECTIONS_FILE)
328
+ return collections
329
+
330
+ return {}
@@ -6,10 +6,12 @@ from platformdirs import user_documents_dir
6
6
  import os
7
7
  # Setup logging
8
8
  logger = logging.getLogger(__name__)
9
+ from dotenv import load_dotenv
10
+ import platform
9
11
 
10
12
  # Get config directory
11
13
  def get_config_dir():
12
- import platform
14
+
13
15
 
14
16
  if platform.system() == "Windows":
15
17
  # Windows: Use Documents directory
@@ -32,7 +34,16 @@ SEARCH_ENGINES_FILE = CONFIG_DIR / "search_engines.toml"
32
34
 
33
35
  LOCAL_COLLECTIONS_FILE = CONFIG_DIR / "local_collections.toml"
34
36
 
37
+ # Load the .env file explicitly
38
+ # Load the .env file explicitly
39
+ config_dir = get_config_dir()
40
+ env_file = config_dir / ".env"
35
41
 
42
+ if env_file.exists():
43
+ logger.info(f"Loading environment variables from: {env_file}")
44
+ load_dotenv(dotenv_path=env_file)
45
+ else:
46
+ logger.warning(f"Warning: .env file not found at {env_file}")
36
47
  # Set environment variable for Dynaconf to use
37
48
  docs_base = Path(user_documents_dir()) / "local_deep_research"
38
49
  os.environ["DOCS_DIR"] = str(docs_base)
@@ -1,11 +1,12 @@
1
1
  # API Keys
2
- # ANTHROPIC_API_KEY=your-api-key-here
3
- # OPENAI_API_KEY=your-openai-key-here
4
- # SERP_API_KEY=your-api-key-here
5
- # GUARDIAN_API_KEY=your-api-key-here
6
- # GOOGLE_PSE_API_KEY=your-google-api-key-here
7
- # GOOGLE_PSE_ENGINE_ID=your-programmable-search-engine-id-here
2
+ # LDR_OPENAI_ENDPOINT_API_KEY=your-api-key-here
3
+ # LDR_ANTHROPIC_API_KEY=your-api-key-here
4
+ # LDR_OPENAI_API_KEY=your-openai-key-here
5
+ # LDR_SERP_API_KEY=your-api-key-here
6
+ # LDR_GUARDIAN_API_KEY=your-api-key-here
7
+ # LDR_GOOGLE_PSE_API_KEY=your-google-api-key-here
8
+ # LDR_GOOGLE_PSE_ENGINE_ID=your-programmable-search-engine-id-here
8
9
 
9
10
  # SearXNG Configuration, add at least SEARXNG_INSTANCE to .env file to use this search engine
10
11
  # SEARXNG_INSTANCE = "http://localhost:8080"
11
- # SEARXNG_DELAY = 2.0
12
+ # SEARXNG_DELAY = 2.0
@@ -9,6 +9,7 @@ from langchain_anthropic import ChatAnthropic
9
9
  from langchain_openai import ChatOpenAI
10
10
  from langchain_ollama import ChatOllama
11
11
  from langchain_community.llms import VLLM
12
+ from local_deep_research.utilties.search_utilities import remove_think_tags
12
13
  from local_deep_research.config import settings
13
14
  import os
14
15
  import logging
@@ -23,6 +24,8 @@ VALID_PROVIDERS = ["ollama", "openai", "anthropic", "vllm", "openai_endpoint", "
23
24
  # LLM FUNCTIONS
24
25
  # ================================
25
26
 
27
+
28
+
26
29
  def get_llm(model_name=None, temperature=None, provider=None):
27
30
  """
28
31
  Get LLM instance based on model name and provider.
@@ -33,7 +36,7 @@ def get_llm(model_name=None, temperature=None, provider=None):
33
36
  provider: Provider to use (if None, uses settings.llm.provider)
34
37
 
35
38
  Returns:
36
- A LangChain LLM instance
39
+ A LangChain LLM instance with automatic think-tag removal
37
40
  """
38
41
  # Use settings values for parameters if not provided
39
42
  if model_name is None:
@@ -56,31 +59,42 @@ def get_llm(model_name=None, temperature=None, provider=None):
56
59
 
57
60
  # Handle different providers
58
61
  if provider == "anthropic":
59
- api_key = settings.get('ANTHROPIC_API_KEY', '')
62
+ api_key_name = 'ANTHROPIC_API_KEY'
63
+ api_key = settings.get(api_key_name, '')
60
64
  if not api_key:
61
- api_key = os.getenv('ANTHROPIC_API_KEY')
65
+ api_key = os.getenv(api_key_name)
66
+ if not api_key:
67
+ api_key = os.getenv("LDR_" + api_key_name)
62
68
  if not api_key:
63
69
  logger.warning("ANTHROPIC_API_KEY not found. Falling back to default model.")
64
70
  return get_fallback_model(temperature)
65
71
 
66
- return ChatAnthropic(
72
+ llm = ChatAnthropic(
67
73
  model=model_name, anthropic_api_key=api_key, **common_params
68
74
  )
75
+ return wrap_llm_without_think_tags(llm)
69
76
 
70
77
  elif provider == "openai":
71
- api_key = settings.get('OPENAI_API_KEY', '')
78
+ api_key_name = 'OPENAI_API_KEY'
79
+ api_key = settings.get(api_key_name, '')
72
80
  if not api_key:
73
- api_key = os.getenv('OPENAI_API_KEY')
81
+ api_key = os.getenv(api_key_name)
82
+ if not api_key:
83
+ api_key = os.getenv("LDR_" + api_key_name)
74
84
  if not api_key:
75
85
  logger.warning("OPENAI_API_KEY not found. Falling back to default model.")
76
86
  return get_fallback_model(temperature)
77
87
 
78
- return ChatOpenAI(model=model_name, api_key=api_key, **common_params)
88
+ llm = ChatOpenAI(model=model_name, api_key=api_key, **common_params)
89
+ return wrap_llm_without_think_tags(llm)
79
90
 
80
91
  elif provider == "openai_endpoint":
81
- api_key = settings.get('OPENAI_ENDPOINT_API_KEY', '')
92
+ api_key_name = 'OPENAI_ENDPOINT_API_KEY'
93
+ api_key = settings.get(api_key_name, '')
82
94
  if not api_key:
83
- api_key = os.getenv('OPENAI_ENDPOINT_API_KEY')
95
+ api_key = os.getenv(api_key_name)
96
+ if not api_key:
97
+ api_key = os.getenv("LDR_" + api_key_name)
84
98
  if not api_key:
85
99
  logger.warning("OPENAI_ENDPOINT_API_KEY not found. Falling back to default model.")
86
100
  return get_fallback_model(temperature)
@@ -88,16 +102,17 @@ def get_llm(model_name=None, temperature=None, provider=None):
88
102
  # Get endpoint URL from settings
89
103
  openai_endpoint_url = settings.llm.openai_endpoint_url
90
104
 
91
- return ChatOpenAI(
105
+ llm = ChatOpenAI(
92
106
  model=model_name,
93
107
  api_key=api_key,
94
108
  openai_api_base=openai_endpoint_url,
95
109
  **common_params
96
110
  )
111
+ return wrap_llm_without_think_tags(llm)
97
112
 
98
113
  elif provider == "vllm":
99
114
  try:
100
- return VLLM(
115
+ llm = VLLM(
101
116
  model=model_name,
102
117
  trust_remote_code=True,
103
118
  max_new_tokens=128,
@@ -105,6 +120,7 @@ def get_llm(model_name=None, temperature=None, provider=None):
105
120
  top_p=0.95,
106
121
  temperature=temperature,
107
122
  )
123
+ return wrap_llm_without_think_tags(llm)
108
124
  except Exception as e:
109
125
  logger.error(f"Error loading VLLM model: {e}")
110
126
  logger.warning("Falling back.")
@@ -114,54 +130,54 @@ def get_llm(model_name=None, temperature=None, provider=None):
114
130
  try:
115
131
  # Use the configurable Ollama base URL
116
132
  base_url = settings.get('OLLAMA_BASE_URL', settings.llm.get('ollama_base_url', 'http://localhost:11434'))
117
- return ChatOllama(model=model_name, base_url=base_url, **common_params)
133
+ llm = ChatOllama(model=model_name, base_url=base_url, **common_params)
134
+ return wrap_llm_without_think_tags(llm)
118
135
  except Exception as e:
119
136
  logger.error(f"Error loading Ollama model: {e}")
120
137
  return get_fallback_model(temperature)
121
138
 
122
139
  elif provider == "lmstudio":
123
-
124
- # LM Studio supports OpenAI API format, so we can use ChatOpenAI directly
125
- lmstudio_url = settings.llm.get('lmstudio_url', "http://localhost:1234")
126
-
127
- return ChatOpenAI(
128
- model=model_name,
129
- api_key="lm-studio", # LM Studio doesn't require a real API key
130
- base_url=f"{lmstudio_url}/v1", # Use the configured URL with /v1 endpoint
131
- temperature=temperature,
132
- max_tokens=settings.llm.max_tokens
133
- )
134
-
140
+ # LM Studio supports OpenAI API format, so we can use ChatOpenAI directly
141
+ lmstudio_url = settings.llm.get('lmstudio_url', "http://localhost:1234")
142
+
143
+ llm = ChatOpenAI(
144
+ model=model_name,
145
+ api_key="lm-studio", # LM Studio doesn't require a real API key
146
+ base_url=f"{lmstudio_url}/v1", # Use the configured URL with /v1 endpoint
147
+ temperature=temperature,
148
+ max_tokens=settings.llm.max_tokens
149
+ )
150
+ return wrap_llm_without_think_tags(llm)
135
151
 
136
152
  elif provider == "llamacpp":
137
-
138
- # Import LlamaCpp
139
- from langchain_community.llms import LlamaCpp
140
-
141
- # Get LlamaCpp model path from settings
142
- model_path = settings.llm.get('llamacpp_model_path', "")
143
- if not model_path:
144
- logger.error("llamacpp_model_path not set in settings")
145
- raise ValueError("llamacpp_model_path not set in settings.toml")
146
-
147
- # Get additional LlamaCpp parameters
148
- n_gpu_layers = settings.llm.get('llamacpp_n_gpu_layers', 1)
149
- n_batch = settings.llm.get('llamacpp_n_batch', 512)
150
- f16_kv = settings.llm.get('llamacpp_f16_kv', True)
153
+ # Import LlamaCpp
154
+ from langchain_community.llms import LlamaCpp
155
+
156
+ # Get LlamaCpp model path from settings
157
+ model_path = settings.llm.get('llamacpp_model_path', "")
158
+ if not model_path:
159
+ logger.error("llamacpp_model_path not set in settings")
160
+ raise ValueError("llamacpp_model_path not set in settings.toml")
151
161
 
152
- # Create LlamaCpp instance
153
- return LlamaCpp(
154
- model_path=model_path,
155
- temperature=temperature,
156
- max_tokens=settings.llm.max_tokens,
157
- n_gpu_layers=n_gpu_layers,
158
- n_batch=n_batch,
159
- f16_kv=f16_kv,
160
- verbose=True
161
- )
162
+ # Get additional LlamaCpp parameters
163
+ n_gpu_layers = settings.llm.get('llamacpp_n_gpu_layers', 1)
164
+ n_batch = settings.llm.get('llamacpp_n_batch', 512)
165
+ f16_kv = settings.llm.get('llamacpp_f16_kv', True)
166
+
167
+ # Create LlamaCpp instance
168
+ llm = LlamaCpp(
169
+ model_path=model_path,
170
+ temperature=temperature,
171
+ max_tokens=settings.llm.max_tokens,
172
+ n_gpu_layers=n_gpu_layers,
173
+ n_batch=n_batch,
174
+ f16_kv=f16_kv,
175
+ verbose=True
176
+ )
177
+ return wrap_llm_without_think_tags(llm)
162
178
 
163
179
  else:
164
- return get_fallback_model(temperature)
180
+ return wrap_llm_without_think_tags(get_fallback_model(temperature))
165
181
 
166
182
  def get_fallback_model(temperature=None):
167
183
  """Create a dummy model for when no providers are available"""
@@ -174,6 +190,31 @@ def get_fallback_model(temperature=None):
174
190
  # COMPATIBILITY FUNCTIONS
175
191
  # ================================
176
192
 
193
+ def wrap_llm_without_think_tags(llm):
194
+ """Create a wrapper class that processes LLM outputs with remove_think_tags"""
195
+
196
+
197
+ class ProcessingLLMWrapper:
198
+ def __init__(self, base_llm):
199
+ self.base_llm = base_llm
200
+
201
+ def invoke(self, *args, **kwargs):
202
+ response = self.base_llm.invoke(*args, **kwargs)
203
+
204
+ # Process the response content if it has a content attribute
205
+ if hasattr(response, 'content'):
206
+ response.content = remove_think_tags(response.content)
207
+ elif isinstance(response, str):
208
+ response = remove_think_tags(response)
209
+
210
+ return response
211
+
212
+ # Pass through any other attributes to the base LLM
213
+ def __getattr__(self, name):
214
+ return getattr(self.base_llm, name)
215
+
216
+ return ProcessingLLMWrapper(llm)
217
+
177
218
  def get_available_provider_types():
178
219
  """Return available model providers"""
179
220
  providers = {}
@@ -46,7 +46,10 @@ def create_search_engine(engine_name: str, llm=None, **kwargs) -> Optional[BaseS
46
46
 
47
47
  # First check environment variable
48
48
  api_key = os.getenv(api_key_env)
49
-
49
+ if not api_key:
50
+ api_key = os.getenv("LDR_" + api_key_env)
51
+
52
+
50
53
  # If not found in environment, check Dynaconf settings
51
54
  if not api_key and api_key_env:
52
55
  # Convert env var name to settings path (e.g., BRAVE_API_KEY -> brave_api_key)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: local-deep-research
3
- Version: 0.1.23
3
+ Version: 0.1.25
4
4
  Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
5
5
  Author-email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>
6
6
  License: MIT License
@@ -65,6 +65,7 @@ Requires-Dist: xmltodict>=0.13.0
65
65
  Requires-Dist: lxml>=4.9.2
66
66
  Requires-Dist: pdfplumber>=0.9.0
67
67
  Requires-Dist: unstructured>=0.10.0
68
+ Requires-Dist: google-search-results
68
69
  Dynamic: license-file
69
70
 
70
71
  # Local Deep Research
@@ -79,7 +80,15 @@ A powerful AI-powered research assistant that performs deep, iterative analysis
79
80
  </a>
80
81
  </div>
81
82
 
82
- ## Quick Start
83
+ ## Windows Installation
84
+
85
+ Download the [Windows Installer](https://github.com/LearningCircuit/local-deep-research/releases/download/v0.1.0/LocalDeepResearch_Setup.exe) for easy one-click installation.
86
+
87
+ **Requires Ollama or other model preinstalled.**
88
+ Download from https://ollama.ai and then pull a model
89
+ ollama pull gemma3:12b
90
+
91
+ ## Quick Start (not required if installed with windows installer)
83
92
 
84
93
  ```bash
85
94
  # Install the package
@@ -105,6 +114,53 @@ ldr # (OR python -m local_deep_research.main)
105
114
 
106
115
  Access the web interface at `http://127.0.0.1:5000` in your browser.
107
116
 
117
+ ## Docker Support
118
+
119
+ Build the image first if you haven't already
120
+ ```bash
121
+ docker build -t local-deep-research .
122
+ ```
123
+
124
+ Quick Docker Run
125
+
126
+ ```bash
127
+ # Run with default settings (connects to Ollama running on the host)
128
+ docker run --network=host \
129
+ -e LDR_LLM__PROVIDER="ollama" \
130
+ -e LDR_LLM__MODEL="mistral" \
131
+ local-deep-research
132
+ ```
133
+
134
+ For comprehensive Docker setup information, see:
135
+ - [Docker Usage Guide](https://github.com/LearningCircuit/local-deep-research/blob/main/docs/docker-usage-readme.md)
136
+ - [Docker Compose Guide](https://github.com/LearningCircuit/local-deep-research/blob/main/docs/docker-compose-guide.md)
137
+
138
+ ## Programmatic Access
139
+
140
+ Local Deep Research now provides a simple API for programmatic access to its research capabilities:
141
+
142
+ ```python
143
+ import os
144
+ # Set environment variables to control the LLM
145
+ os.environ["LDR_LLM__MODEL"] = "mistral" # Specify model name
146
+
147
+ from local_deep_research import quick_summary, generate_report, analyze_documents
148
+
149
+ # Generate a quick research summary with custom parameters
150
+ results = quick_summary(
151
+ query="advances in fusion energy",
152
+ search_tool="auto", # Auto-select the best search engine
153
+ iterations=1, # Single research cycle for speed
154
+ questions_per_iteration=2, # Generate 2 follow-up questions
155
+ max_results=30, # Consider up to 30 search results
156
+ temperature=0.7 # Control creativity of generation
157
+ )
158
+ print(results["summary"])
159
+ ```
160
+
161
+ These functions provide flexible options for customizing the search parameters, iterations, and output formats. For more examples, see the [programmatic access tutorial](https://github.com/LearningCircuit/local-deep-research/blob/main/examples/programmatic_access.ipynb).
162
+
163
+
108
164
  ## Features
109
165
 
110
166
  - 🔍 **Advanced Research Capabilities**
@@ -286,23 +342,6 @@ You can use local document search in several ways:
286
342
  3. **All collections**: Set `tool = "local_all"` to search across all collections
287
343
  4. **Query syntax**: Type `collection:project_docs your query` to target a specific collection
288
344
 
289
- ## Docker Support
290
-
291
- Local Deep Research can run in Docker containers for easy deployment across environments.
292
-
293
- ### Quick Docker Run
294
-
295
- ```bash
296
- # Run with default settings (connects to Ollama running on the host)
297
- docker run --network=host \
298
- -e LDR_LLM__PROVIDER="ollama" \
299
- -e LDR_LLM__MODEL="mistral" \
300
- local-deep-research
301
- ```
302
-
303
- For comprehensive Docker setup information, see:
304
- - [Docker Usage Guide](https://github.com/LearningCircuit/local-deep-research/blob/main/docs/docker-usage-readme.md)
305
- - [Docker Compose Guide](https://github.com/LearningCircuit/local-deep-research/blob/main/docs/docker-compose-guide.md)
306
345
 
307
346
  ## Advanced Configuration
308
347
 
@@ -1,12 +1,14 @@
1
- local_deep_research/__init__.py,sha256=pfHzjzYc6Szo8VCNLtFZRXyAlEz7CViY7r2fH9O7yms,584
1
+ local_deep_research/__init__.py,sha256=Grde0sFEYyCXrPCfxd-9b9v1M6OurrzQbVYRmKQ9E7w,886
2
2
  local_deep_research/citation_handler.py,sha256=v_fwTy-2XvUuoH3OQRzmBrvaiN7mBk8jbNfySslmt5g,4357
3
- local_deep_research/config.py,sha256=3g8-QPMrxoIMjHvyjSJBFUELmAIyOQFHApUnd8p50a8,9881
3
+ local_deep_research/config.py,sha256=n6TSkNtdie86Sc71jjnejwK_hBIDpJahNZwbiDEfzXg,10233
4
4
  local_deep_research/main.py,sha256=uQXtGQ6LtZNd5Qw63D5ke4Q_LjYimouWVSUknVsk3JQ,3645
5
5
  local_deep_research/report_generator.py,sha256=EvaArnWirMgg42fMzmZeJczoEYujEbJ2ryHHYuuoXx8,8058
6
6
  local_deep_research/search_system.py,sha256=yY3BEzX68vdtUcYF9h6lC3yVao0YA_NSBj6W3-RwlKk,15459
7
- local_deep_research/defaults/.env.template,sha256=U4B_InwGZl4IVuAdbY_u0nKN_akHtebMBwUU_e_eljc,427
7
+ local_deep_research/api/__init__.py,sha256=H0WGFSohUR0T2QswtWngPZWoMYPs9VWQTQYaivAlrJU,440
8
+ local_deep_research/api/research_functions.py,sha256=Z23wZYsB1x2ivdFYJ9uqIqCAwjR2RdOff7Bq30DxQYU,12099
9
+ local_deep_research/defaults/.env.template,sha256=SI8WDMFrj-yANlnfd6jJ4fLYke7zSzCd9Ukk_HpyM88,500
8
10
  local_deep_research/defaults/__init__.py,sha256=2Vvlkl-gmP_qPYWegE4JBgummypogl3VXrQ1XzptFDU,1381
9
- local_deep_research/defaults/llm_config.py,sha256=Ql0euemgLw_Uwg5g05sA1SkVzAYK7O_ZAnnBi3rsAi4,10095
11
+ local_deep_research/defaults/llm_config.py,sha256=1KiW9k8kmsUD5u9VgEdgWZBNMmK1BA0ZxoGbuC2spAk,11652
10
12
  local_deep_research/defaults/local_collections.toml,sha256=zNa03PVnFrZ757JdZOuW6QDxkOc6ep5tG8baGBrMmXM,1778
11
13
  local_deep_research/defaults/main.toml,sha256=6Lzbc5sVLxMwu83bLBp_tpYOZgmtThCfPL1L42eTGro,1939
12
14
  local_deep_research/defaults/search_engines.toml,sha256=g0-qrw10oMgW74z_lYpPDkGwMje25mvalfY1EJ0nL3g,8134
@@ -32,7 +34,7 @@ local_deep_research/web/templates/settings.html,sha256=S9A-tdpzMhP2Zw7kp2jxKlwaW
32
34
  local_deep_research/web/templates/settings_dashboard.html,sha256=De-v1KNdVvkXme5i3YZ6sIfU9aAKDc_N-AW9n4PZoso,9109
33
35
  local_deep_research/web_search_engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
36
  local_deep_research/web_search_engines/search_engine_base.py,sha256=QmhfjuHK2deomh8tARghKuYnF-5t3wwBB661odS2VtU,8065
35
- local_deep_research/web_search_engines/search_engine_factory.py,sha256=Sld6bYTwcyTxgVLx04t00sD7vfJhSHFOl6iiGJ08ZUE,11118
37
+ local_deep_research/web_search_engines/search_engine_factory.py,sha256=8REYoRdDWvB6XLhBym8rqzuULX28VQ-UKWNcRA5tLTQ,11189
36
38
  local_deep_research/web_search_engines/search_engines_config.py,sha256=5C0tCmy_Jpv1YHLZLlyS7h5B2XToYcWPAaBDEOsxMo0,2739
37
39
  local_deep_research/web_search_engines/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
40
  local_deep_research/web_search_engines/engines/full_search.py,sha256=BuOz8dX-XocazCG7gGBKFnIY99FZtNFI0-Wq3fhsfp4,4689
@@ -51,9 +53,9 @@ local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py
51
53
  local_deep_research/web_search_engines/engines/search_engine_serpapi.py,sha256=XikEYnM-pAaR70VeAJ28lbqpRzCj4bCA9xY29taTV8g,9215
52
54
  local_deep_research/web_search_engines/engines/search_engine_wayback.py,sha256=astAvSLajDZ6rwgthJ3iBcHSWuDSYPO7uilIxaJhXmU,18132
53
55
  local_deep_research/web_search_engines/engines/search_engine_wikipedia.py,sha256=KSGJECbEcxZpVK-PhYsTCtzedSK0l1AjQmvGtx8KBks,9799
54
- local_deep_research-0.1.23.dist-info/licenses/LICENSE,sha256=Qg2CaTdu6SWnSqk1_JtgBPp_Da-LdqJDhT1Vt1MUc5s,1072
55
- local_deep_research-0.1.23.dist-info/METADATA,sha256=2QywVLwHQyMbLAekWM2r37YKtXNVRnOdV_eXgE2_Sl0,16181
56
- local_deep_research-0.1.23.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
57
- local_deep_research-0.1.23.dist-info/entry_points.txt,sha256=u-Y6Z3MWtR3dmsTDFYhXyfkPv7mALUA7YAnY4Fi1XDs,97
58
- local_deep_research-0.1.23.dist-info/top_level.txt,sha256=h6-uVE_wSuLOcoWwT9szhX23mBWufu77MqmM25UfbCY,20
59
- local_deep_research-0.1.23.dist-info/RECORD,,
56
+ local_deep_research-0.1.25.dist-info/licenses/LICENSE,sha256=Qg2CaTdu6SWnSqk1_JtgBPp_Da-LdqJDhT1Vt1MUc5s,1072
57
+ local_deep_research-0.1.25.dist-info/METADATA,sha256=sVEzW1cEvbnt0d-FtGmnZLqzf7_D1cF8PWC13bxbmBM,17711
58
+ local_deep_research-0.1.25.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
59
+ local_deep_research-0.1.25.dist-info/entry_points.txt,sha256=u-Y6Z3MWtR3dmsTDFYhXyfkPv7mALUA7YAnY4Fi1XDs,97
60
+ local_deep_research-0.1.25.dist-info/top_level.txt,sha256=h6-uVE_wSuLOcoWwT9szhX23mBWufu77MqmM25UfbCY,20
61
+ local_deep_research-0.1.25.dist-info/RECORD,,