local-deep-research 0.2.3__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +1 -1
- local_deep_research/__version__.py +1 -0
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +5 -1
- local_deep_research/advanced_search_system/strategies/base_strategy.py +5 -2
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +23 -16
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +13 -6
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +4 -3
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +57 -62
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +8 -4
- local_deep_research/api/research_functions.py +0 -46
- local_deep_research/citation_handler.py +2 -5
- local_deep_research/config/llm_config.py +25 -68
- local_deep_research/config/search_config.py +8 -21
- local_deep_research/defaults/default_settings.json +3996 -0
- local_deep_research/search_system.py +34 -31
- local_deep_research/utilities/db_utils.py +22 -3
- local_deep_research/utilities/search_utilities.py +10 -7
- local_deep_research/web/app.py +3 -23
- local_deep_research/web/app_factory.py +1 -25
- local_deep_research/web/database/migrations.py +20 -418
- local_deep_research/web/routes/settings_routes.py +75 -364
- local_deep_research/web/services/research_service.py +43 -43
- local_deep_research/web/services/settings_manager.py +108 -315
- local_deep_research/web/services/settings_service.py +3 -56
- local_deep_research/web/static/js/components/research.js +1 -1
- local_deep_research/web/static/js/components/settings.js +16 -4
- local_deep_research/web/static/js/research_form.js +106 -0
- local_deep_research/web/templates/pages/research.html +3 -2
- local_deep_research/web_search_engines/engines/meta_search_engine.py +56 -21
- local_deep_research/web_search_engines/engines/search_engine_local.py +11 -2
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +7 -11
- local_deep_research/web_search_engines/search_engine_factory.py +12 -64
- local_deep_research/web_search_engines/search_engines_config.py +123 -64
- {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.1.dist-info}/METADATA +16 -1
- {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.1.dist-info}/RECORD +38 -39
- local_deep_research/config/config_files.py +0 -245
- local_deep_research/defaults/local_collections.toml +0 -53
- local_deep_research/defaults/main.toml +0 -80
- local_deep_research/defaults/search_engines.toml +0 -291
- {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.1.dist-info}/WHEEL +0 -0
- {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.1.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,53 +0,0 @@
|
|
1
|
-
# Project Documents Collection
|
2
|
-
[project_docs]
|
3
|
-
name = "Project Documents"
|
4
|
-
description = "Project documentation and specifications"
|
5
|
-
paths = ["@format ${DOCS_DIR}/project_documents"]
|
6
|
-
enabled = true
|
7
|
-
embedding_model = "all-MiniLM-L6-v2"
|
8
|
-
embedding_device = "cpu"
|
9
|
-
embedding_model_type = "sentence_transformers"
|
10
|
-
max_results = 20
|
11
|
-
max_filtered_results = 5
|
12
|
-
chunk_size = 1000
|
13
|
-
chunk_overlap = 200
|
14
|
-
cache_dir = "__CACHE_DIR__/local_search/project_docs"
|
15
|
-
strengths = ["project documentation", "specifications", "internal documents"]
|
16
|
-
weaknesses = ["no external information", "limited to organizational knowledge"]
|
17
|
-
reliability = 0.9
|
18
|
-
|
19
|
-
# Research Papers Collection
|
20
|
-
[research_papers]
|
21
|
-
name = "Research Papers"
|
22
|
-
description = "Academic research papers and articles"
|
23
|
-
paths = ["@format ${DOCS_DIR}/research_papers"]
|
24
|
-
enabled = true
|
25
|
-
embedding_model = "all-MiniLM-L6-v2"
|
26
|
-
embedding_device = "cpu"
|
27
|
-
embedding_model_type = "sentence_transformers"
|
28
|
-
max_results = 20
|
29
|
-
max_filtered_results = 5
|
30
|
-
chunk_size = 800
|
31
|
-
chunk_overlap = 150
|
32
|
-
cache_dir = "__CACHE_DIR__/local_search/research_papers"
|
33
|
-
strengths = ["academic research", "scientific papers", "scholarly content"]
|
34
|
-
weaknesses = ["potentially outdated", "limited to collected papers"]
|
35
|
-
reliability = 0.85
|
36
|
-
|
37
|
-
# Personal Notes Collection
|
38
|
-
[personal_notes]
|
39
|
-
name = "Personal Notes"
|
40
|
-
description = "Personal notes and documents"
|
41
|
-
paths = ["@format ${DOCS_DIR}/personal_notes"]
|
42
|
-
enabled = true
|
43
|
-
embedding_model = "all-MiniLM-L6-v2"
|
44
|
-
embedding_device = "cpu"
|
45
|
-
embedding_model_type = "sentence_transformers"
|
46
|
-
max_results = 30
|
47
|
-
max_filtered_results = 10
|
48
|
-
chunk_size = 500
|
49
|
-
chunk_overlap = 100
|
50
|
-
cache_dir = "__CACHE_DIR__/local_search/personal_notes"
|
51
|
-
strengths = ["personal knowledge", "notes", "private documents"]
|
52
|
-
weaknesses = ["subjective content", "informal information"]
|
53
|
-
reliability = 0.75
|
@@ -1,80 +0,0 @@
|
|
1
|
-
|
2
|
-
# Main configuration for Local Deep Research
|
3
|
-
|
4
|
-
[web]
|
5
|
-
port = 5000
|
6
|
-
host = "0.0.0.0"
|
7
|
-
debug = true
|
8
|
-
|
9
|
-
[llm]
|
10
|
-
# LLM provider (one of: ollama, openai, anthropic, vllm, openai_endpoint, lmstudio, llamacpp)
|
11
|
-
provider = "ollama"
|
12
|
-
# Model name
|
13
|
-
model = "gemma3:12b"
|
14
|
-
# Temperature
|
15
|
-
temperature = 0.7
|
16
|
-
# Maximum tokens
|
17
|
-
max_tokens = 30000
|
18
|
-
# OpenAI-compatible endpoint URL
|
19
|
-
openai_endpoint_url = "https://openrouter.ai/api/v1"
|
20
|
-
# LM Studio URL (default: http://localhost:1234)
|
21
|
-
lmstudio_url = "http://localhost:1234"
|
22
|
-
# LlamaCpp model path
|
23
|
-
llamacpp_model_path = ""
|
24
|
-
# LlamaCpp parameters
|
25
|
-
llamacpp_n_gpu_layers = 1
|
26
|
-
llamacpp_n_batch = 512
|
27
|
-
llamacpp_f16_kv = true
|
28
|
-
|
29
|
-
[general]
|
30
|
-
# Directory for research outputs (relative to user data directory)
|
31
|
-
output_dir = "research_outputs"
|
32
|
-
# Knowledge accumulation approach (NONE, QUESTION, or ITERATION)
|
33
|
-
knowledge_accumulation = "ITERATION"
|
34
|
-
# Maximum context size for knowledge accumulation
|
35
|
-
knowledge_accumulation_context_limit = 2000000
|
36
|
-
# Enable fact checking (experimental, works better with large LLMs)
|
37
|
-
enable_fact_checking = false
|
38
|
-
|
39
|
-
|
40
|
-
[search]
|
41
|
-
# Search tool to use (auto, wikipedia, arxiv, duckduckgo, serpapi, google_pse,etc.)
|
42
|
-
# "auto" intelligently selects based on query content (recommended)
|
43
|
-
# "local_all" searches only local document collections
|
44
|
-
tool = "auto"
|
45
|
-
|
46
|
-
# Number of research cycles
|
47
|
-
iterations = 2
|
48
|
-
|
49
|
-
# Questions generated per cycle
|
50
|
-
questions_per_iteration = 2
|
51
|
-
|
52
|
-
# Searches per report section
|
53
|
-
searches_per_section = 2
|
54
|
-
|
55
|
-
# Results per search query
|
56
|
-
max_results = 50
|
57
|
-
|
58
|
-
# Results after relevance filtering
|
59
|
-
max_filtered_results = 5
|
60
|
-
|
61
|
-
# Search region
|
62
|
-
region = "us"
|
63
|
-
|
64
|
-
# Time period (d=day, w=week, m=month, y=year)
|
65
|
-
time_period = "y"
|
66
|
-
|
67
|
-
# Enable safe search
|
68
|
-
safe_search = true
|
69
|
-
|
70
|
-
# Search language
|
71
|
-
search_language = "English"
|
72
|
-
|
73
|
-
# Return only snippets, not full content (faster but less detailed)
|
74
|
-
snippets_only = true
|
75
|
-
|
76
|
-
# Skip relevance filtering (return all results)
|
77
|
-
skip_relevance_filter = false
|
78
|
-
|
79
|
-
# Check URL quality
|
80
|
-
quality_check_urls = true
|
@@ -1,291 +0,0 @@
|
|
1
|
-
# Search Engines Configuration for Local Deep Research
|
2
|
-
# This file defines all available search engines and their properties
|
3
|
-
|
4
|
-
[wikipedia]
|
5
|
-
module_path = ".engines.search_engine_wikipedia"
|
6
|
-
class_name = "WikipediaSearchEngine"
|
7
|
-
requires_api_key = false
|
8
|
-
reliability = 0.95
|
9
|
-
strengths = [
|
10
|
-
"factual information", "general knowledge", "definitions",
|
11
|
-
"historical facts", "biographies", "overview information"
|
12
|
-
]
|
13
|
-
weaknesses = ["recent events", "specialized academic topics", "product comparisons"]
|
14
|
-
|
15
|
-
[wikipedia.default_params]
|
16
|
-
max_results = 20
|
17
|
-
include_content = true
|
18
|
-
|
19
|
-
[arxiv]
|
20
|
-
module_path = ".engines.search_engine_arxiv"
|
21
|
-
class_name = "ArXivSearchEngine"
|
22
|
-
requires_api_key = false
|
23
|
-
reliability = 0.9
|
24
|
-
strengths = [
|
25
|
-
"scientific papers", "academic research", "physics", "computer science",
|
26
|
-
"mathematics", "statistics", "machine learning", "preprints"
|
27
|
-
]
|
28
|
-
weaknesses = ["non-academic topics", "consumer products", "news", "general information"]
|
29
|
-
|
30
|
-
[arxiv.default_params]
|
31
|
-
max_results = 20
|
32
|
-
sort_by = "relevance"
|
33
|
-
sort_order = "descending"
|
34
|
-
|
35
|
-
[pubmed]
|
36
|
-
module_path = ".engines.search_engine_pubmed"
|
37
|
-
class_name = "PubMedSearchEngine"
|
38
|
-
requires_api_key = false
|
39
|
-
api_key_env = "NCBI_API_KEY"
|
40
|
-
reliability = 0.98
|
41
|
-
strengths = [
|
42
|
-
"biomedical literature", "medical research", "clinical studies",
|
43
|
-
"life sciences", "health information", "scientific papers"
|
44
|
-
]
|
45
|
-
weaknesses = [
|
46
|
-
"non-medical topics", "very recent papers may be missing",
|
47
|
-
"limited to published research"
|
48
|
-
]
|
49
|
-
requires_llm = true
|
50
|
-
|
51
|
-
[pubmed.default_params]
|
52
|
-
max_results = 20
|
53
|
-
get_abstracts = true
|
54
|
-
get_full_text = false
|
55
|
-
full_text_limit = 3
|
56
|
-
days_limit = 0
|
57
|
-
optimize_queries = true
|
58
|
-
|
59
|
-
[github]
|
60
|
-
module_path = ".engines.search_engine_github"
|
61
|
-
class_name = "GitHubSearchEngine"
|
62
|
-
requires_api_key = false
|
63
|
-
reliability = 0.99
|
64
|
-
strengths = [
|
65
|
-
"code repositories", "software documentation", "open source projects",
|
66
|
-
"programming issues", "developer information", "technical documentation"
|
67
|
-
]
|
68
|
-
weaknesses = ["non-technical content", "content outside GitHub", "rate limits without API key"]
|
69
|
-
supports_full_search = true
|
70
|
-
|
71
|
-
[github.default_params]
|
72
|
-
max_results = 15
|
73
|
-
search_type = "repositories"
|
74
|
-
include_readme = true
|
75
|
-
include_issues = false
|
76
|
-
|
77
|
-
[serpapi]
|
78
|
-
module_path = ".engines.search_engine_serpapi"
|
79
|
-
class_name = "SerpAPISearchEngine"
|
80
|
-
requires_api_key = true
|
81
|
-
api_key_env = "SERP_API_KEY"
|
82
|
-
reliability = 0.6
|
83
|
-
strengths = [
|
84
|
-
"comprehensive web search", "product information", "reviews",
|
85
|
-
"recent content", "news", "broad coverage"
|
86
|
-
]
|
87
|
-
weaknesses = ["requires API key with usage limits", "not specialized for academic content"]
|
88
|
-
supports_full_search = true
|
89
|
-
full_search_module = "local_deep_research.web_search_engines.engines.full_serp_search_results_old"
|
90
|
-
full_search_class = "FullSerpAPISearchResults"
|
91
|
-
|
92
|
-
[serpapi.default_params]
|
93
|
-
region = "us"
|
94
|
-
time_period = "y"
|
95
|
-
safe_search = true
|
96
|
-
search_language = "English"
|
97
|
-
|
98
|
-
[searxng]
|
99
|
-
module_path = ".engines.search_engine_searxng"
|
100
|
-
class_name = "SearXNGSearchEngine"
|
101
|
-
requires_api_key = false
|
102
|
-
api_key_env = "SEARXNG_INSTANCE"
|
103
|
-
reliability = 0.9
|
104
|
-
strengths = [
|
105
|
-
"privacy-focused", "metasearch engine", "self-hosted",
|
106
|
-
"no tracking", "configurable", "multiple engines in one"
|
107
|
-
]
|
108
|
-
weaknesses = [
|
109
|
-
"requires self-hosting", "depends on other search engines",
|
110
|
-
"may be rate limited by underlying engines"
|
111
|
-
]
|
112
|
-
supports_full_search = true
|
113
|
-
full_search_module = ".engines.full_search"
|
114
|
-
full_search_class = "FullSearchResults"
|
115
|
-
|
116
|
-
[searxng.default_params]
|
117
|
-
max_results = 15
|
118
|
-
categories = ["general"]
|
119
|
-
language = "en"
|
120
|
-
safe_search = 1
|
121
|
-
delay_between_requests = 0.0
|
122
|
-
include_full_content = true
|
123
|
-
|
124
|
-
[google_pse]
|
125
|
-
module_path = ".engines.search_engine_google_pse"
|
126
|
-
class_name = "GooglePSESearchEngine"
|
127
|
-
requires_api_key = true
|
128
|
-
api_key_env = "GOOGLE_PSE_API_KEY"
|
129
|
-
reliability = 0.9
|
130
|
-
strengths = [
|
131
|
-
"custom search scope", "high-quality results", "domain-specific search",
|
132
|
-
"configurable search experience", "control over search index"
|
133
|
-
]
|
134
|
-
weaknesses = [
|
135
|
-
"requires API key with usage limits",
|
136
|
-
"limited to 10,000 queries/day on free tier",
|
137
|
-
"requires search engine configuration in Google Control Panel"
|
138
|
-
]
|
139
|
-
supports_full_search = true
|
140
|
-
full_search_module = ".engines.full_search"
|
141
|
-
full_search_class = "FullSearchResults"
|
142
|
-
|
143
|
-
[google_pse.default_params]
|
144
|
-
region = "us"
|
145
|
-
safe_search = true
|
146
|
-
search_language = "English"
|
147
|
-
|
148
|
-
[brave]
|
149
|
-
module_path = ".engines.search_engine_brave"
|
150
|
-
class_name = "BraveSearchEngine"
|
151
|
-
requires_api_key = true
|
152
|
-
api_key_env = "BRAVE_API_KEY"
|
153
|
-
reliability = 0.7
|
154
|
-
strengths = [
|
155
|
-
"privacy-focused web search", "product information", "reviews",
|
156
|
-
"recent content", "news", "broad coverage"
|
157
|
-
]
|
158
|
-
weaknesses = ["requires API key with usage limits", "smaller index than Google"]
|
159
|
-
supports_full_search = true
|
160
|
-
full_search_module = "local_deep_research.web_search_engines.engines.full_search"
|
161
|
-
full_search_class = "FullSearchResults"
|
162
|
-
|
163
|
-
[brave.default_params]
|
164
|
-
region = "US"
|
165
|
-
time_period = "y"
|
166
|
-
safe_search = true
|
167
|
-
search_language = "English"
|
168
|
-
|
169
|
-
[wayback]
|
170
|
-
module_path = ".engines.search_engine_wayback"
|
171
|
-
class_name = "WaybackSearchEngine"
|
172
|
-
requires_api_key = false
|
173
|
-
reliability = 0.5
|
174
|
-
strengths = [
|
175
|
-
"historical web content", "archived websites", "content verification",
|
176
|
-
"deleted or changed web pages", "website evolution tracking"
|
177
|
-
]
|
178
|
-
weaknesses = [
|
179
|
-
"limited to previously archived content", "may miss recent changes",
|
180
|
-
"archiving quality varies"
|
181
|
-
]
|
182
|
-
supports_full_search = true
|
183
|
-
|
184
|
-
[wayback.default_params]
|
185
|
-
max_results = 15
|
186
|
-
max_snapshots_per_url = 3
|
187
|
-
closest_only = false
|
188
|
-
language = "English"
|
189
|
-
|
190
|
-
[auto]
|
191
|
-
module_path = ".engines.meta_search_engine"
|
192
|
-
class_name = "MetaSearchEngine"
|
193
|
-
requires_api_key = false
|
194
|
-
reliability = 0.85
|
195
|
-
strengths = [
|
196
|
-
"intelligent engine selection", "adaptable to query type",
|
197
|
-
"fallback capabilities"
|
198
|
-
]
|
199
|
-
weaknesses = ["slightly slower due to LLM analysis"]
|
200
|
-
requires_llm = true
|
201
|
-
|
202
|
-
[auto.default_params]
|
203
|
-
use_api_key_services = true
|
204
|
-
max_engines_to_try = 3
|
205
|
-
|
206
|
-
[local_all]
|
207
|
-
module_path = ".engines.search_engine_local_all"
|
208
|
-
class_name = "LocalAllSearchEngine"
|
209
|
-
requires_api_key = false
|
210
|
-
reliability = 0.85
|
211
|
-
strengths = ["searches all local collections", "personal documents", "offline access"]
|
212
|
-
weaknesses = ["may return too many results", "requires indexing"]
|
213
|
-
requires_llm = true
|
214
|
-
|
215
|
-
#[semantic_scholar]
|
216
|
-
#module_path = ".engines.search_engine_semantic_scholar"
|
217
|
-
#class_name = "SemanticScholarSearchEngine"
|
218
|
-
#requires_api_key = false
|
219
|
-
#api_key_env = "S2_API_KEY"
|
220
|
-
#reliability = 0.87
|
221
|
-
#strengths = [
|
222
|
-
# "comprehensive scientific literature",
|
223
|
-
# "extensive citation network",
|
224
|
-
# "AI-generated summaries (TLDRs)",
|
225
|
-
# "academic paper metadata",
|
226
|
-
# "cross-disciplinary coverage",
|
227
|
-
# "200M+ papers across all fields",
|
228
|
-
# "usable without API key"
|
229
|
-
#]
|
230
|
-
#weaknesses = [
|
231
|
-
# "rate limited (1000 requests/day) without API key",
|
232
|
-
# "limited to academic content"
|
233
|
-
#]
|
234
|
-
#supports_full_search = true
|
235
|
-
#requires_llm = false
|
236
|
-
|
237
|
-
#[semantic_scholar.default_params]
|
238
|
-
#max_results = 20
|
239
|
-
#get_abstracts = true
|
240
|
-
#get_tldr = true
|
241
|
-
#get_references = false
|
242
|
-
#get_citations = false
|
243
|
-
#get_embeddings = false
|
244
|
-
#citation_limit = 10
|
245
|
-
#reference_limit = 10
|
246
|
-
#optimize_queries = true
|
247
|
-
#max_retries = 5
|
248
|
-
#retry_backoff_factor = 1.0
|
249
|
-
|
250
|
-
# Default search engine to use if none specified
|
251
|
-
DEFAULT_SEARCH_ENGINE = "wikipedia"
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
# Additional search engines can be added below
|
258
|
-
# Uncomment and modify these templates as needed
|
259
|
-
|
260
|
-
# [duckduckgo]
|
261
|
-
# module_path = ".engines.search_engine_ddg"
|
262
|
-
# class_name = "DuckDuckGoSearchEngine"
|
263
|
-
# requires_api_key = false
|
264
|
-
# reliability = 0.4
|
265
|
-
# strengths = [
|
266
|
-
# "web search", "product information", "reviews", "recent information",
|
267
|
-
# "news", "general queries", "broad coverage"
|
268
|
-
# ]
|
269
|
-
# weaknesses = ["inconsistent due to rate limits", "not specialized for academic content"]
|
270
|
-
# supports_full_search = true
|
271
|
-
# full_search_module = "local_deep_research.web_search_engines.engines.full_search"
|
272
|
-
# full_search_class = "FullSearchResults"
|
273
|
-
#
|
274
|
-
# [duckduckgo.default_params]
|
275
|
-
# region = "us"
|
276
|
-
# safe_search = true
|
277
|
-
|
278
|
-
# [guardian]
|
279
|
-
# module_path = ".engines.search_engine_guardian"
|
280
|
-
# class_name = "GuardianSearchEngine"
|
281
|
-
# requires_api_key = true
|
282
|
-
# api_key_env = "GUARDIAN_API_KEY"
|
283
|
-
# reliability = 0.5
|
284
|
-
# strengths = [
|
285
|
-
# "news articles", "current events", "opinion pieces", "journalism",
|
286
|
-
# "UK and global news", "political analysis"
|
287
|
-
# ]
|
288
|
-
# weaknesses = ["primarily focused on news", "limited historical content pre-1999"]
|
289
|
-
#
|
290
|
-
# [guardian.default_params]
|
291
|
-
# order_by = "relevance"
|
File without changes
|
{local_deep_research-0.2.3.dist-info → local_deep_research-0.3.1.dist-info}/entry_points.txt
RENAMED
File without changes
|
{local_deep_research-0.2.3.dist-info → local_deep_research-0.3.1.dist-info}/licenses/LICENSE
RENAMED
File without changes
|