local-deep-research 0.2.3__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. local_deep_research/__init__.py +1 -1
  2. local_deep_research/__version__.py +1 -0
  3. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +5 -1
  4. local_deep_research/advanced_search_system/strategies/base_strategy.py +5 -2
  5. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +23 -16
  6. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +13 -6
  7. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +4 -3
  8. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +57 -62
  9. local_deep_research/advanced_search_system/strategies/standard_strategy.py +8 -4
  10. local_deep_research/api/research_functions.py +0 -46
  11. local_deep_research/citation_handler.py +2 -5
  12. local_deep_research/config/llm_config.py +25 -68
  13. local_deep_research/config/search_config.py +8 -21
  14. local_deep_research/defaults/default_settings.json +3996 -0
  15. local_deep_research/search_system.py +34 -31
  16. local_deep_research/utilities/db_utils.py +22 -3
  17. local_deep_research/utilities/search_utilities.py +10 -7
  18. local_deep_research/web/app.py +3 -23
  19. local_deep_research/web/app_factory.py +1 -25
  20. local_deep_research/web/database/migrations.py +20 -418
  21. local_deep_research/web/routes/settings_routes.py +75 -364
  22. local_deep_research/web/services/research_service.py +43 -43
  23. local_deep_research/web/services/settings_manager.py +108 -315
  24. local_deep_research/web/services/settings_service.py +3 -56
  25. local_deep_research/web/static/js/components/research.js +1 -1
  26. local_deep_research/web/static/js/components/settings.js +16 -4
  27. local_deep_research/web/static/js/research_form.js +106 -0
  28. local_deep_research/web/templates/pages/research.html +3 -2
  29. local_deep_research/web_search_engines/engines/meta_search_engine.py +56 -21
  30. local_deep_research/web_search_engines/engines/search_engine_local.py +11 -2
  31. local_deep_research/web_search_engines/engines/search_engine_local_all.py +7 -11
  32. local_deep_research/web_search_engines/search_engine_factory.py +12 -64
  33. local_deep_research/web_search_engines/search_engines_config.py +123 -64
  34. {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.1.dist-info}/METADATA +16 -1
  35. {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.1.dist-info}/RECORD +38 -39
  36. local_deep_research/config/config_files.py +0 -245
  37. local_deep_research/defaults/local_collections.toml +0 -53
  38. local_deep_research/defaults/main.toml +0 -80
  39. local_deep_research/defaults/search_engines.toml +0 -291
  40. {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.1.dist-info}/WHEEL +0 -0
  41. {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.1.dist-info}/entry_points.txt +0 -0
  42. {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,53 +0,0 @@
1
- # Project Documents Collection
2
- [project_docs]
3
- name = "Project Documents"
4
- description = "Project documentation and specifications"
5
- paths = ["@format ${DOCS_DIR}/project_documents"]
6
- enabled = true
7
- embedding_model = "all-MiniLM-L6-v2"
8
- embedding_device = "cpu"
9
- embedding_model_type = "sentence_transformers"
10
- max_results = 20
11
- max_filtered_results = 5
12
- chunk_size = 1000
13
- chunk_overlap = 200
14
- cache_dir = "__CACHE_DIR__/local_search/project_docs"
15
- strengths = ["project documentation", "specifications", "internal documents"]
16
- weaknesses = ["no external information", "limited to organizational knowledge"]
17
- reliability = 0.9
18
-
19
- # Research Papers Collection
20
- [research_papers]
21
- name = "Research Papers"
22
- description = "Academic research papers and articles"
23
- paths = ["@format ${DOCS_DIR}/research_papers"]
24
- enabled = true
25
- embedding_model = "all-MiniLM-L6-v2"
26
- embedding_device = "cpu"
27
- embedding_model_type = "sentence_transformers"
28
- max_results = 20
29
- max_filtered_results = 5
30
- chunk_size = 800
31
- chunk_overlap = 150
32
- cache_dir = "__CACHE_DIR__/local_search/research_papers"
33
- strengths = ["academic research", "scientific papers", "scholarly content"]
34
- weaknesses = ["potentially outdated", "limited to collected papers"]
35
- reliability = 0.85
36
-
37
- # Personal Notes Collection
38
- [personal_notes]
39
- name = "Personal Notes"
40
- description = "Personal notes and documents"
41
- paths = ["@format ${DOCS_DIR}/personal_notes"]
42
- enabled = true
43
- embedding_model = "all-MiniLM-L6-v2"
44
- embedding_device = "cpu"
45
- embedding_model_type = "sentence_transformers"
46
- max_results = 30
47
- max_filtered_results = 10
48
- chunk_size = 500
49
- chunk_overlap = 100
50
- cache_dir = "__CACHE_DIR__/local_search/personal_notes"
51
- strengths = ["personal knowledge", "notes", "private documents"]
52
- weaknesses = ["subjective content", "informal information"]
53
- reliability = 0.75
@@ -1,80 +0,0 @@
1
-
2
- # Main configuration for Local Deep Research
3
-
4
- [web]
5
- port = 5000
6
- host = "0.0.0.0"
7
- debug = true
8
-
9
- [llm]
10
- # LLM provider (one of: ollama, openai, anthropic, vllm, openai_endpoint, lmstudio, llamacpp)
11
- provider = "ollama"
12
- # Model name
13
- model = "gemma3:12b"
14
- # Temperature
15
- temperature = 0.7
16
- # Maximum tokens
17
- max_tokens = 30000
18
- # OpenAI-compatible endpoint URL
19
- openai_endpoint_url = "https://openrouter.ai/api/v1"
20
- # LM Studio URL (default: http://localhost:1234)
21
- lmstudio_url = "http://localhost:1234"
22
- # LlamaCpp model path
23
- llamacpp_model_path = ""
24
- # LlamaCpp parameters
25
- llamacpp_n_gpu_layers = 1
26
- llamacpp_n_batch = 512
27
- llamacpp_f16_kv = true
28
-
29
- [general]
30
- # Directory for research outputs (relative to user data directory)
31
- output_dir = "research_outputs"
32
- # Knowledge accumulation approach (NONE, QUESTION, or ITERATION)
33
- knowledge_accumulation = "ITERATION"
34
- # Maximum context size for knowledge accumulation
35
- knowledge_accumulation_context_limit = 2000000
36
- # Enable fact checking (experimental, works better with large LLMs)
37
- enable_fact_checking = false
38
-
39
-
40
- [search]
41
- # Search tool to use (auto, wikipedia, arxiv, duckduckgo, serpapi, google_pse,etc.)
42
- # "auto" intelligently selects based on query content (recommended)
43
- # "local_all" searches only local document collections
44
- tool = "auto"
45
-
46
- # Number of research cycles
47
- iterations = 2
48
-
49
- # Questions generated per cycle
50
- questions_per_iteration = 2
51
-
52
- # Searches per report section
53
- searches_per_section = 2
54
-
55
- # Results per search query
56
- max_results = 50
57
-
58
- # Results after relevance filtering
59
- max_filtered_results = 5
60
-
61
- # Search region
62
- region = "us"
63
-
64
- # Time period (d=day, w=week, m=month, y=year)
65
- time_period = "y"
66
-
67
- # Enable safe search
68
- safe_search = true
69
-
70
- # Search language
71
- search_language = "English"
72
-
73
- # Return only snippets, not full content (faster but less detailed)
74
- snippets_only = true
75
-
76
- # Skip relevance filtering (return all results)
77
- skip_relevance_filter = false
78
-
79
- # Check URL quality
80
- quality_check_urls = true
@@ -1,291 +0,0 @@
1
- # Search Engines Configuration for Local Deep Research
2
- # This file defines all available search engines and their properties
3
-
4
- [wikipedia]
5
- module_path = ".engines.search_engine_wikipedia"
6
- class_name = "WikipediaSearchEngine"
7
- requires_api_key = false
8
- reliability = 0.95
9
- strengths = [
10
- "factual information", "general knowledge", "definitions",
11
- "historical facts", "biographies", "overview information"
12
- ]
13
- weaknesses = ["recent events", "specialized academic topics", "product comparisons"]
14
-
15
- [wikipedia.default_params]
16
- max_results = 20
17
- include_content = true
18
-
19
- [arxiv]
20
- module_path = ".engines.search_engine_arxiv"
21
- class_name = "ArXivSearchEngine"
22
- requires_api_key = false
23
- reliability = 0.9
24
- strengths = [
25
- "scientific papers", "academic research", "physics", "computer science",
26
- "mathematics", "statistics", "machine learning", "preprints"
27
- ]
28
- weaknesses = ["non-academic topics", "consumer products", "news", "general information"]
29
-
30
- [arxiv.default_params]
31
- max_results = 20
32
- sort_by = "relevance"
33
- sort_order = "descending"
34
-
35
- [pubmed]
36
- module_path = ".engines.search_engine_pubmed"
37
- class_name = "PubMedSearchEngine"
38
- requires_api_key = false
39
- api_key_env = "NCBI_API_KEY"
40
- reliability = 0.98
41
- strengths = [
42
- "biomedical literature", "medical research", "clinical studies",
43
- "life sciences", "health information", "scientific papers"
44
- ]
45
- weaknesses = [
46
- "non-medical topics", "very recent papers may be missing",
47
- "limited to published research"
48
- ]
49
- requires_llm = true
50
-
51
- [pubmed.default_params]
52
- max_results = 20
53
- get_abstracts = true
54
- get_full_text = false
55
- full_text_limit = 3
56
- days_limit = 0
57
- optimize_queries = true
58
-
59
- [github]
60
- module_path = ".engines.search_engine_github"
61
- class_name = "GitHubSearchEngine"
62
- requires_api_key = false
63
- reliability = 0.99
64
- strengths = [
65
- "code repositories", "software documentation", "open source projects",
66
- "programming issues", "developer information", "technical documentation"
67
- ]
68
- weaknesses = ["non-technical content", "content outside GitHub", "rate limits without API key"]
69
- supports_full_search = true
70
-
71
- [github.default_params]
72
- max_results = 15
73
- search_type = "repositories"
74
- include_readme = true
75
- include_issues = false
76
-
77
- [serpapi]
78
- module_path = ".engines.search_engine_serpapi"
79
- class_name = "SerpAPISearchEngine"
80
- requires_api_key = true
81
- api_key_env = "SERP_API_KEY"
82
- reliability = 0.6
83
- strengths = [
84
- "comprehensive web search", "product information", "reviews",
85
- "recent content", "news", "broad coverage"
86
- ]
87
- weaknesses = ["requires API key with usage limits", "not specialized for academic content"]
88
- supports_full_search = true
89
- full_search_module = "local_deep_research.web_search_engines.engines.full_serp_search_results_old"
90
- full_search_class = "FullSerpAPISearchResults"
91
-
92
- [serpapi.default_params]
93
- region = "us"
94
- time_period = "y"
95
- safe_search = true
96
- search_language = "English"
97
-
98
- [searxng]
99
- module_path = ".engines.search_engine_searxng"
100
- class_name = "SearXNGSearchEngine"
101
- requires_api_key = false
102
- api_key_env = "SEARXNG_INSTANCE"
103
- reliability = 0.9
104
- strengths = [
105
- "privacy-focused", "metasearch engine", "self-hosted",
106
- "no tracking", "configurable", "multiple engines in one"
107
- ]
108
- weaknesses = [
109
- "requires self-hosting", "depends on other search engines",
110
- "may be rate limited by underlying engines"
111
- ]
112
- supports_full_search = true
113
- full_search_module = ".engines.full_search"
114
- full_search_class = "FullSearchResults"
115
-
116
- [searxng.default_params]
117
- max_results = 15
118
- categories = ["general"]
119
- language = "en"
120
- safe_search = 1
121
- delay_between_requests = 0.0
122
- include_full_content = true
123
-
124
- [google_pse]
125
- module_path = ".engines.search_engine_google_pse"
126
- class_name = "GooglePSESearchEngine"
127
- requires_api_key = true
128
- api_key_env = "GOOGLE_PSE_API_KEY"
129
- reliability = 0.9
130
- strengths = [
131
- "custom search scope", "high-quality results", "domain-specific search",
132
- "configurable search experience", "control over search index"
133
- ]
134
- weaknesses = [
135
- "requires API key with usage limits",
136
- "limited to 10,000 queries/day on free tier",
137
- "requires search engine configuration in Google Control Panel"
138
- ]
139
- supports_full_search = true
140
- full_search_module = ".engines.full_search"
141
- full_search_class = "FullSearchResults"
142
-
143
- [google_pse.default_params]
144
- region = "us"
145
- safe_search = true
146
- search_language = "English"
147
-
148
- [brave]
149
- module_path = ".engines.search_engine_brave"
150
- class_name = "BraveSearchEngine"
151
- requires_api_key = true
152
- api_key_env = "BRAVE_API_KEY"
153
- reliability = 0.7
154
- strengths = [
155
- "privacy-focused web search", "product information", "reviews",
156
- "recent content", "news", "broad coverage"
157
- ]
158
- weaknesses = ["requires API key with usage limits", "smaller index than Google"]
159
- supports_full_search = true
160
- full_search_module = "local_deep_research.web_search_engines.engines.full_search"
161
- full_search_class = "FullSearchResults"
162
-
163
- [brave.default_params]
164
- region = "US"
165
- time_period = "y"
166
- safe_search = true
167
- search_language = "English"
168
-
169
- [wayback]
170
- module_path = ".engines.search_engine_wayback"
171
- class_name = "WaybackSearchEngine"
172
- requires_api_key = false
173
- reliability = 0.5
174
- strengths = [
175
- "historical web content", "archived websites", "content verification",
176
- "deleted or changed web pages", "website evolution tracking"
177
- ]
178
- weaknesses = [
179
- "limited to previously archived content", "may miss recent changes",
180
- "archiving quality varies"
181
- ]
182
- supports_full_search = true
183
-
184
- [wayback.default_params]
185
- max_results = 15
186
- max_snapshots_per_url = 3
187
- closest_only = false
188
- language = "English"
189
-
190
- [auto]
191
- module_path = ".engines.meta_search_engine"
192
- class_name = "MetaSearchEngine"
193
- requires_api_key = false
194
- reliability = 0.85
195
- strengths = [
196
- "intelligent engine selection", "adaptable to query type",
197
- "fallback capabilities"
198
- ]
199
- weaknesses = ["slightly slower due to LLM analysis"]
200
- requires_llm = true
201
-
202
- [auto.default_params]
203
- use_api_key_services = true
204
- max_engines_to_try = 3
205
-
206
- [local_all]
207
- module_path = ".engines.search_engine_local_all"
208
- class_name = "LocalAllSearchEngine"
209
- requires_api_key = false
210
- reliability = 0.85
211
- strengths = ["searches all local collections", "personal documents", "offline access"]
212
- weaknesses = ["may return too many results", "requires indexing"]
213
- requires_llm = true
214
-
215
- #[semantic_scholar]
216
- #module_path = ".engines.search_engine_semantic_scholar"
217
- #class_name = "SemanticScholarSearchEngine"
218
- #requires_api_key = false
219
- #api_key_env = "S2_API_KEY"
220
- #reliability = 0.87
221
- #strengths = [
222
- # "comprehensive scientific literature",
223
- # "extensive citation network",
224
- # "AI-generated summaries (TLDRs)",
225
- # "academic paper metadata",
226
- # "cross-disciplinary coverage",
227
- # "200M+ papers across all fields",
228
- # "usable without API key"
229
- #]
230
- #weaknesses = [
231
- # "rate limited (1000 requests/day) without API key",
232
- # "limited to academic content"
233
- #]
234
- #supports_full_search = true
235
- #requires_llm = false
236
-
237
- #[semantic_scholar.default_params]
238
- #max_results = 20
239
- #get_abstracts = true
240
- #get_tldr = true
241
- #get_references = false
242
- #get_citations = false
243
- #get_embeddings = false
244
- #citation_limit = 10
245
- #reference_limit = 10
246
- #optimize_queries = true
247
- #max_retries = 5
248
- #retry_backoff_factor = 1.0
249
-
250
- # Default search engine to use if none specified
251
- DEFAULT_SEARCH_ENGINE = "wikipedia"
252
-
253
-
254
-
255
-
256
-
257
- # Additional search engines can be added below
258
- # Uncomment and modify these templates as needed
259
-
260
- # [duckduckgo]
261
- # module_path = ".engines.search_engine_ddg"
262
- # class_name = "DuckDuckGoSearchEngine"
263
- # requires_api_key = false
264
- # reliability = 0.4
265
- # strengths = [
266
- # "web search", "product information", "reviews", "recent information",
267
- # "news", "general queries", "broad coverage"
268
- # ]
269
- # weaknesses = ["inconsistent due to rate limits", "not specialized for academic content"]
270
- # supports_full_search = true
271
- # full_search_module = "local_deep_research.web_search_engines.engines.full_search"
272
- # full_search_class = "FullSearchResults"
273
- #
274
- # [duckduckgo.default_params]
275
- # region = "us"
276
- # safe_search = true
277
-
278
- # [guardian]
279
- # module_path = ".engines.search_engine_guardian"
280
- # class_name = "GuardianSearchEngine"
281
- # requires_api_key = true
282
- # api_key_env = "GUARDIAN_API_KEY"
283
- # reliability = 0.5
284
- # strengths = [
285
- # "news articles", "current events", "opinion pieces", "journalism",
286
- # "UK and global news", "political analysis"
287
- # ]
288
- # weaknesses = ["primarily focused on news", "limited historical content pre-1999"]
289
- #
290
- # [guardian.default_params]
291
- # order_by = "relevance"