local-deep-research 0.1.22__py3-none-any.whl → 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,10 +15,19 @@ from .search_system import AdvancedSearchSystem
15
15
  from .report_generator import IntegratedReportGenerator
16
16
  from .config import get_llm, get_search
17
17
 
18
+ # Import API functions
19
+ from .api import quick_summary, generate_report, analyze_documents
20
+ from .api import get_available_search_engines, get_available_collections
21
+
18
22
  # Export it
19
23
  __all__ = [
20
24
  "AdvancedSearchSystem",
21
25
  "IntegratedReportGenerator",
22
26
  "get_llm",
23
- "get_search"
27
+ "get_search",
28
+ "quick_summary",
29
+ "generate_report",
30
+ "analyze_documents",
31
+ "get_available_search_engines",
32
+ "get_available_collections"
24
33
  ]
@@ -0,0 +1,20 @@
1
+ # src/local_deep_research/api/__init__.py
2
+ """
3
+ API module for programmatic access to Local Deep Research functionality.
4
+ """
5
+
6
+ from .research_functions import (
7
+ quick_summary,
8
+ generate_report,
9
+ analyze_documents,
10
+ get_available_search_engines,
11
+ get_available_collections
12
+ )
13
+
14
+ __all__ = [
15
+ "quick_summary",
16
+ "generate_report",
17
+ "analyze_documents",
18
+ "get_available_search_engines",
19
+ "get_available_collections"
20
+ ]
@@ -0,0 +1,330 @@
1
+ """
2
+ API module for Local Deep Research.
3
+ Provides programmatic access to search and research capabilities.
4
+ """
5
+
6
+ from typing import Dict, List, Optional, Union, Any, Callable
7
+ import logging
8
+ import os
9
+ import traceback
10
+ import toml
11
+ from ..search_system import AdvancedSearchSystem
12
+ from ..report_generator import IntegratedReportGenerator
13
+ from ..config import get_llm, get_search, settings
14
+ from ..utilties.search_utilities import remove_think_tags
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ def quick_summary(
19
+ query: str,
20
+ search_tool: Optional[str] = None,
21
+ iterations: int = 1,
22
+ questions_per_iteration: int = 1,
23
+ max_results: int = 20,
24
+ max_filtered_results: int = 5,
25
+ region: str = "us",
26
+ time_period: str = "y",
27
+ safe_search: bool = True,
28
+ temperature: float = 0.7,
29
+ progress_callback: Optional[Callable] = None,
30
+ ) -> Dict[str, Any]:
31
+ """
32
+ Generate a quick research summary for a given query.
33
+
34
+ Args:
35
+ query: The research query to analyze
36
+ search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
37
+ iterations: Number of research cycles to perform
38
+ questions_per_iteration: Number of questions to generate per cycle
39
+ max_results: Maximum number of search results to consider
40
+ max_filtered_results: Maximum results after relevance filtering
41
+ region: Search region/locale
42
+ time_period: Time period for search results (d=day, w=week, m=month, y=year)
43
+ safe_search: Whether to enable safe search
44
+ temperature: LLM temperature for generation
45
+ progress_callback: Optional callback function to receive progress updates
46
+
47
+ Returns:
48
+ Dictionary containing the research results with keys:
49
+ - 'summary': The generated summary text
50
+ - 'findings': List of detailed findings from each search
51
+ - 'iterations': Number of iterations performed
52
+ - 'questions': Questions generated during research
53
+ """
54
+ logger.info(f"Generating quick summary for query: {query}")
55
+
56
+
57
+ # Get language model with custom temperature
58
+ llm = get_llm(temperature=temperature)
59
+
60
+ # Create search system with custom parameters
61
+ system = AdvancedSearchSystem()
62
+
63
+ # Override default settings with user-provided values
64
+ system.max_iterations = iterations
65
+ system.questions_per_iteration = questions_per_iteration
66
+ system.model = llm # Ensure the model is directly attached to the system
67
+
68
+ # Set the search engine if specified
69
+ if search_tool:
70
+ search_engine = get_search(search_tool)
71
+ if search_engine:
72
+ system.search = search_engine
73
+ else:
74
+ logger.warning(f"Could not create search engine '{search_tool}', using default.")
75
+
76
+ # Set progress callback if provided
77
+ if progress_callback:
78
+ system.set_progress_callback(progress_callback)
79
+
80
+ # Perform the search and analysis
81
+ results = system.analyze_topic(query)
82
+
83
+ # Extract the summary from the current knowledge
84
+ if results and "current_knowledge" in results:
85
+ summary = results["current_knowledge"]
86
+ else:
87
+ summary = "Unable to generate summary for the query."
88
+
89
+ # Prepare the return value
90
+ return {
91
+ "summary": summary,
92
+ "findings": results.get("findings", []),
93
+ "iterations": results.get("iterations", 0),
94
+ "questions": results.get("questions", {}),
95
+ "formatted_findings": results.get("formatted_findings", ""),
96
+ "sources": results.get("all_links_of_system", [])
97
+ }
98
+
99
+
100
+ def generate_report(
101
+ query: str,
102
+ search_tool: Optional[str] = None,
103
+ iterations: int = 2,
104
+ questions_per_iteration: int = 2,
105
+ searches_per_section: int = 2,
106
+ max_results: int = 50,
107
+ max_filtered_results: int = 5,
108
+ region: str = "us",
109
+ time_period: str = "y",
110
+ safe_search: bool = True,
111
+ temperature: float = 0.7,
112
+ output_file: Optional[str] = None,
113
+ progress_callback: Optional[Callable] = None,
114
+ ) -> Dict[str, Any]:
115
+ """
116
+ Generate a comprehensive, structured research report for a given query.
117
+
118
+ Args:
119
+ query: The research query to analyze
120
+ search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
121
+ iterations: Number of research cycles to perform
122
+ questions_per_iteration: Number of questions to generate per cycle
123
+ searches_per_section: Number of searches to perform per report section
124
+ max_results: Maximum number of search results to consider
125
+ max_filtered_results: Maximum results after relevance filtering
126
+ region: Search region/locale
127
+ time_period: Time period for search results (d=day, w=week, m=month, y=year)
128
+ safe_search: Whether to enable safe search
129
+ temperature: LLM temperature for generation
130
+ output_file: Optional path to save report markdown file
131
+ progress_callback: Optional callback function to receive progress updates
132
+
133
+ Returns:
134
+ Dictionary containing the research report with keys:
135
+ - 'content': The full report content in markdown format
136
+ - 'metadata': Report metadata including generated timestamp and query
137
+ """
138
+ logger.info(f"Generating comprehensive research report for query: {query}")
139
+
140
+
141
+ # Get language model with custom temperature
142
+ llm = get_llm(temperature=temperature)
143
+
144
+ # Create search system with custom parameters
145
+ system = AdvancedSearchSystem()
146
+
147
+ # Override default settings with user-provided values
148
+ system.max_iterations = iterations
149
+ system.questions_per_iteration = questions_per_iteration
150
+ system.model = llm # Ensure the model is directly attached to the system
151
+
152
+ # Set the search engine if specified
153
+ if search_tool:
154
+ search_engine = get_search(
155
+ search_tool,
156
+ llm_instance=llm,
157
+ max_results=max_results,
158
+ max_filtered_results=max_filtered_results,
159
+ region=region,
160
+ time_period=time_period,
161
+ safe_search=safe_search
162
+ )
163
+ if search_engine:
164
+ system.search = search_engine
165
+ else:
166
+ logger.warning(f"Could not create search engine '{search_tool}', using default.")
167
+
168
+ # Set progress callback if provided
169
+ if progress_callback:
170
+ system.set_progress_callback(progress_callback)
171
+
172
+ # Perform the initial research
173
+ initial_findings = system.analyze_topic(query)
174
+
175
+ # Generate the structured report
176
+ report_generator = IntegratedReportGenerator(searches_per_section=searches_per_section)
177
+ report_generator.model = llm # Ensure the model is set on the report generator too
178
+ report = report_generator.generate_report(initial_findings, query)
179
+
180
+ # Save report to file if path is provided
181
+ if output_file and report and "content" in report:
182
+ with open(output_file, "w", encoding="utf-8") as f:
183
+ f.write(report["content"])
184
+ logger.info(f"Report saved to {output_file}")
185
+ report["file_path"] = output_file
186
+ return report
187
+
188
+
189
+
190
+ def analyze_documents(
191
+ query: str,
192
+ collection_name: str,
193
+ max_results: int = 10,
194
+ temperature: float = 0.7,
195
+ force_reindex: bool = False,
196
+ output_file: Optional[str] = None,
197
+ ) -> Dict[str, Any]:
198
+ """
199
+ Search and analyze documents in a specific local collection.
200
+
201
+ Args:
202
+ query: The search query
203
+ collection_name: Name of the local document collection to search
204
+ max_results: Maximum number of results to return
205
+ temperature: LLM temperature for summary generation
206
+ force_reindex: Whether to force reindexing the collection
207
+ output_file: Optional path to save analysis results to a file
208
+
209
+ Returns:
210
+ Dictionary containing:
211
+ - 'summary': Summary of the findings
212
+ - 'documents': List of matching documents with content and metadata
213
+ """
214
+ logger.info(f"Analyzing documents in collection '{collection_name}' for query: {query}")
215
+
216
+
217
+ # Get language model with custom temperature
218
+ llm = get_llm(temperature=temperature)
219
+
220
+ # Get search engine for the specified collection
221
+ search = get_search(collection_name, llm_instance=llm)
222
+
223
+ if not search:
224
+ return {
225
+ "summary": f"Error: Collection '{collection_name}' not found or not properly configured.",
226
+ "documents": []
227
+ }
228
+
229
+ # Set max results
230
+ search.max_results = max_results
231
+
232
+ # Force reindex if requested
233
+ if force_reindex and hasattr(search, 'embedding_manager'):
234
+ for folder_path in search.folder_paths:
235
+ search.embedding_manager.index_folder(folder_path, force_reindex=True)
236
+
237
+ # Perform the search
238
+ results = search.run(query)
239
+
240
+ if not results:
241
+ return {
242
+ "summary": f"No documents found in collection '{collection_name}' for query: '{query}'",
243
+ "documents": []
244
+ }
245
+
246
+ # Get LLM to generate a summary of the results
247
+
248
+ docs_text = "\n\n".join([f"Document {i+1}: {doc.get('content', doc.get('snippet', ''))[:1000]}"
249
+ for i, doc in enumerate(results[:5])]) # Limit to first 5 docs and 1000 chars each
250
+
251
+ summary_prompt = f"""Analyze these document excerpts related to the query: "{query}"
252
+
253
+ {docs_text}
254
+
255
+ Provide a concise summary of the key information found in these documents related to the query.
256
+ """
257
+
258
+ summary_response = llm.invoke(summary_prompt)
259
+ if hasattr(summary_response, 'content'):
260
+ summary = remove_think_tags(summary_response.content)
261
+ else:
262
+ summary = str(summary_response)
263
+
264
+ # Create result dictionary
265
+ analysis_result = {
266
+ "summary": summary,
267
+ "documents": results,
268
+ "collection": collection_name,
269
+ "document_count": len(results)
270
+ }
271
+
272
+ # Save to file if requested
273
+ if output_file:
274
+ with open(output_file, "w", encoding="utf-8") as f:
275
+ f.write(f"# Document Analysis: {query}\n\n")
276
+ f.write(f"## Summary\n\n{summary}\n\n")
277
+ f.write(f"## Documents Found: {len(results)}\n\n")
278
+
279
+ for i, doc in enumerate(results):
280
+ f.write(f"### Document {i+1}: {doc.get('title', 'Untitled')}\n\n")
281
+ f.write(f"**Source:** {doc.get('link', 'Unknown')}\n\n")
282
+ f.write(f"**Content:**\n\n{doc.get('content', doc.get('snippet', 'No content available'))[:1000]}...\n\n")
283
+ f.write("---\n\n")
284
+
285
+ analysis_result["file_path"] = output_file
286
+ logger.info(f"Analysis saved to {output_file}")
287
+
288
+ return analysis_result
289
+
290
+ def get_available_search_engines() -> Dict[str, str]:
291
+ """
292
+ Get a dictionary of available search engines.
293
+
294
+ Returns:
295
+ Dictionary mapping engine names to descriptions
296
+ """
297
+
298
+ from ..web_search_engines.search_engine_factory import get_available_engines
299
+ engines = get_available_engines()
300
+
301
+ # Add some descriptions for common engines
302
+ descriptions = {
303
+ "auto": "Automatic selection based on query type",
304
+ "wikipedia": "Wikipedia articles and general knowledge",
305
+ "arxiv": "Scientific papers and research",
306
+ "pubmed": "Medical and biomedical literature",
307
+ "semantic_scholar": "Academic papers across all fields",
308
+ "github": "Code repositories and technical documentation",
309
+ "local_all": "All local document collections"
310
+ }
311
+
312
+ return {engine: descriptions.get(engine, "Search engine") for engine in engines}
313
+
314
+
315
+ def get_available_collections() -> Dict[str, Dict[str, Any]]:
316
+ """
317
+ Get a dictionary of available local document collections.
318
+
319
+ Returns:
320
+ Dictionary mapping collection names to their configuration
321
+ """
322
+
323
+
324
+ from ..config import LOCAL_COLLECTIONS_FILE
325
+
326
+ if os.path.exists(LOCAL_COLLECTIONS_FILE):
327
+ collections = toml.load(LOCAL_COLLECTIONS_FILE)
328
+ return collections
329
+
330
+ return {}
@@ -0,0 +1,11 @@
1
+ # API Keys
2
+ # ANTHROPIC_API_KEY=your-api-key-here
3
+ # OPENAI_API_KEY=your-openai-key-here
4
+ # SERP_API_KEY=your-api-key-here
5
+ # GUARDIAN_API_KEY=your-api-key-here
6
+ # GOOGLE_PSE_API_KEY=your-google-api-key-here
7
+ # GOOGLE_PSE_ENGINE_ID=your-programmable-search-engine-id-here
8
+
9
+ # SearXNG Configuration, add at least SEARXNG_INSTANCE to .env file to use this search engine
10
+ # SEARXNG_INSTANCE = "http://localhost:8080"
11
+ # SEARXNG_DELAY = 2.0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: local-deep-research
3
- Version: 0.1.22
3
+ Version: 0.1.24
4
4
  Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
5
5
  Author-email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>
6
6
  License: MIT License
@@ -105,6 +105,28 @@ ldr # (OR python -m local_deep_research.main)
105
105
 
106
106
  Access the web interface at `http://127.0.0.1:5000` in your browser.
107
107
 
108
+ ## Docker Support
109
+
110
+ ### Build the image first if you haven't already
111
+ ```bash
112
+ docker build -t local-deep-research .
113
+ ```
114
+
115
+ ### Quick Docker Run
116
+
117
+ ```bash
118
+ # Run with default settings (connects to Ollama running on the host)
119
+ docker run --network=host \
120
+ -e LDR_LLM__PROVIDER="ollama" \
121
+ -e LDR_LLM__MODEL="mistral" \
122
+ local-deep-research
123
+ ```
124
+
125
+ For comprehensive Docker setup information, see:
126
+ - [Docker Usage Guide](https://github.com/LearningCircuit/local-deep-research/blob/main/docs/docker-usage-readme.md)
127
+ - [Docker Compose Guide](https://github.com/LearningCircuit/local-deep-research/blob/main/docs/docker-compose-guide.md)
128
+
129
+
108
130
  ## Features
109
131
 
110
132
  - 🔍 **Advanced Research Capabilities**
@@ -286,23 +308,27 @@ You can use local document search in several ways:
286
308
  3. **All collections**: Set `tool = "local_all"` to search across all collections
287
309
  4. **Query syntax**: Type `collection:project_docs your query` to target a specific collection
288
310
 
289
- ## Docker Support
311
+ ## Programmatic Access
290
312
 
291
- Local Deep Research can run in Docker containers for easy deployment across environments.
313
+ Local Deep Research now provides a simple API for programmatic access to its research capabilities:
292
314
 
293
- ### Quick Docker Run
315
+ ```python
316
+ from local_deep_research import quick_summary, generate_report
294
317
 
295
- ```bash
296
- # Run with default settings (connects to Ollama running on the host)
297
- docker run --network=host \
298
- -e LDR_LLM__PROVIDER="ollama" \
299
- -e LDR_LLM__MODEL="mistral" \
300
- local-deep-research
318
+ # Generate a quick research summary
319
+ results = quick_summary("advances in fusion energy")
320
+ print(results["summary"])
321
+
322
+ # Create a comprehensive structured report
323
+ report = generate_report("impact of quantum computing on cryptography")
324
+ print(report["content"])
325
+
326
+ # Analyze documents in a local collection
327
+ from local_deep_research import analyze_documents
328
+ docs = analyze_documents("renewable energy", "research_papers")
301
329
  ```
302
330
 
303
- For comprehensive Docker setup information, see:
304
- - [Docker Usage Guide](https://github.com/LearningCircuit/local-deep-research/blob/main/docs/docker-usage-readme.md)
305
- - [Docker Compose Guide](https://github.com/LearningCircuit/local-deep-research/blob/main/docs/docker-compose-guide.md)
331
+ These functions provide flexible options for customizing the search parameters, iterations, and output formats. For more examples, see the [programmatic access tutorial](https://github.com/LearningCircuit/local-deep-research/blob/programmatic-access/examples/programmatic_access.ipynb).
306
332
 
307
333
  ## Advanced Configuration
308
334
 
@@ -1,9 +1,12 @@
1
- local_deep_research/__init__.py,sha256=pfHzjzYc6Szo8VCNLtFZRXyAlEz7CViY7r2fH9O7yms,584
1
+ local_deep_research/__init__.py,sha256=Grde0sFEYyCXrPCfxd-9b9v1M6OurrzQbVYRmKQ9E7w,886
2
2
  local_deep_research/citation_handler.py,sha256=v_fwTy-2XvUuoH3OQRzmBrvaiN7mBk8jbNfySslmt5g,4357
3
3
  local_deep_research/config.py,sha256=3g8-QPMrxoIMjHvyjSJBFUELmAIyOQFHApUnd8p50a8,9881
4
4
  local_deep_research/main.py,sha256=uQXtGQ6LtZNd5Qw63D5ke4Q_LjYimouWVSUknVsk3JQ,3645
5
5
  local_deep_research/report_generator.py,sha256=EvaArnWirMgg42fMzmZeJczoEYujEbJ2ryHHYuuoXx8,8058
6
6
  local_deep_research/search_system.py,sha256=yY3BEzX68vdtUcYF9h6lC3yVao0YA_NSBj6W3-RwlKk,15459
7
+ local_deep_research/api/__init__.py,sha256=H0WGFSohUR0T2QswtWngPZWoMYPs9VWQTQYaivAlrJU,440
8
+ local_deep_research/api/research_functions.py,sha256=Z23wZYsB1x2ivdFYJ9uqIqCAwjR2RdOff7Bq30DxQYU,12099
9
+ local_deep_research/defaults/.env.template,sha256=U4B_InwGZl4IVuAdbY_u0nKN_akHtebMBwUU_e_eljc,427
7
10
  local_deep_research/defaults/__init__.py,sha256=2Vvlkl-gmP_qPYWegE4JBgummypogl3VXrQ1XzptFDU,1381
8
11
  local_deep_research/defaults/llm_config.py,sha256=Ql0euemgLw_Uwg5g05sA1SkVzAYK7O_ZAnnBi3rsAi4,10095
9
12
  local_deep_research/defaults/local_collections.toml,sha256=zNa03PVnFrZ757JdZOuW6QDxkOc6ep5tG8baGBrMmXM,1778
@@ -50,9 +53,9 @@ local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py
50
53
  local_deep_research/web_search_engines/engines/search_engine_serpapi.py,sha256=XikEYnM-pAaR70VeAJ28lbqpRzCj4bCA9xY29taTV8g,9215
51
54
  local_deep_research/web_search_engines/engines/search_engine_wayback.py,sha256=astAvSLajDZ6rwgthJ3iBcHSWuDSYPO7uilIxaJhXmU,18132
52
55
  local_deep_research/web_search_engines/engines/search_engine_wikipedia.py,sha256=KSGJECbEcxZpVK-PhYsTCtzedSK0l1AjQmvGtx8KBks,9799
53
- local_deep_research-0.1.22.dist-info/licenses/LICENSE,sha256=Qg2CaTdu6SWnSqk1_JtgBPp_Da-LdqJDhT1Vt1MUc5s,1072
54
- local_deep_research-0.1.22.dist-info/METADATA,sha256=_kjb5M093i9x4yhJ1cQ198P1bnDJg-atHCc2otwcrc0,16181
55
- local_deep_research-0.1.22.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
56
- local_deep_research-0.1.22.dist-info/entry_points.txt,sha256=u-Y6Z3MWtR3dmsTDFYhXyfkPv7mALUA7YAnY4Fi1XDs,97
57
- local_deep_research-0.1.22.dist-info/top_level.txt,sha256=h6-uVE_wSuLOcoWwT9szhX23mBWufu77MqmM25UfbCY,20
58
- local_deep_research-0.1.22.dist-info/RECORD,,
56
+ local_deep_research-0.1.24.dist-info/licenses/LICENSE,sha256=Qg2CaTdu6SWnSqk1_JtgBPp_Da-LdqJDhT1Vt1MUc5s,1072
57
+ local_deep_research-0.1.24.dist-info/METADATA,sha256=mM-b8LezrBKyR-VjOYE0lMqoKzEwfvID2kMYgUJH9Z4,17096
58
+ local_deep_research-0.1.24.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
59
+ local_deep_research-0.1.24.dist-info/entry_points.txt,sha256=u-Y6Z3MWtR3dmsTDFYhXyfkPv7mALUA7YAnY4Fi1XDs,97
60
+ local_deep_research-0.1.24.dist-info/top_level.txt,sha256=h6-uVE_wSuLOcoWwT9szhX23mBWufu77MqmM25UfbCY,20
61
+ local_deep_research-0.1.24.dist-info/RECORD,,