local-deep-research 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. local_deep_research/__init__.py +1 -1
  2. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +5 -1
  3. local_deep_research/advanced_search_system/strategies/base_strategy.py +5 -2
  4. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +23 -16
  5. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +13 -6
  6. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +4 -3
  7. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +57 -62
  8. local_deep_research/advanced_search_system/strategies/standard_strategy.py +8 -4
  9. local_deep_research/api/research_functions.py +0 -46
  10. local_deep_research/citation_handler.py +2 -5
  11. local_deep_research/config/llm_config.py +25 -68
  12. local_deep_research/config/search_config.py +8 -21
  13. local_deep_research/defaults/default_settings.json +3814 -0
  14. local_deep_research/search_system.py +34 -31
  15. local_deep_research/utilities/db_utils.py +22 -3
  16. local_deep_research/utilities/search_utilities.py +10 -7
  17. local_deep_research/web/app.py +3 -23
  18. local_deep_research/web/app_factory.py +1 -25
  19. local_deep_research/web/database/migrations.py +20 -418
  20. local_deep_research/web/routes/settings_routes.py +75 -364
  21. local_deep_research/web/services/research_service.py +43 -43
  22. local_deep_research/web/services/settings_manager.py +108 -315
  23. local_deep_research/web/services/settings_service.py +3 -56
  24. local_deep_research/web/static/js/components/research.js +1 -1
  25. local_deep_research/web/static/js/components/settings.js +16 -4
  26. local_deep_research/web/static/js/research_form.js +106 -0
  27. local_deep_research/web/templates/pages/research.html +3 -2
  28. local_deep_research/web_search_engines/engines/meta_search_engine.py +13 -18
  29. local_deep_research/web_search_engines/engines/search_engine_local.py +11 -2
  30. local_deep_research/web_search_engines/engines/search_engine_local_all.py +7 -11
  31. local_deep_research/web_search_engines/search_engine_factory.py +12 -64
  32. local_deep_research/web_search_engines/search_engines_config.py +123 -64
  33. {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.0.dist-info}/METADATA +16 -1
  34. {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.0.dist-info}/RECORD +37 -39
  35. local_deep_research/config/config_files.py +0 -245
  36. local_deep_research/defaults/local_collections.toml +0 -53
  37. local_deep_research/defaults/main.toml +0 -80
  38. local_deep_research/defaults/search_engines.toml +0 -291
  39. {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.0.dist-info}/WHEEL +0 -0
  40. {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.0.dist-info}/entry_points.txt +0 -0
  41. {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -984,7 +984,7 @@
984
984
  // Only run this for the main settings dashboard
985
985
  if (!settingsContent) return;
986
986
 
987
- fetch('/research/settings/all_settings')
987
+ fetch('/research/settings/api')
988
988
  .then(response => response.json())
989
989
  .then(data => {
990
990
  if (data.status === 'success') {
@@ -1089,7 +1089,6 @@
1089
1089
  'max_results',
1090
1090
  'quality_check_urls',
1091
1091
  'questions_per_iteration',
1092
- 'research_iterations',
1093
1092
  'region',
1094
1093
  'search_engine',
1095
1094
  'searches_per_section',
@@ -1124,7 +1123,7 @@
1124
1123
  const prioritySettings = {
1125
1124
  'app': ['enable_web', 'enable_notifications', 'web_interface', 'theme', 'default_theme', 'dark_mode', 'debug', 'host', 'port'],
1126
1125
  'llm': ['provider', 'model', 'temperature', 'max_tokens', 'api_key', 'openai_endpoint_url', 'lmstudio_url', 'llamacpp_model_path'],
1127
- 'search': ['tool', 'search_engine', 'iterations', 'questions_per_iteration', 'research_iterations', 'max_results', 'region'],
1126
+ 'search': ['tool', 'iterations', 'questions_per_iteration', 'max_results', 'region', 'search_engine'],
1128
1127
  'report': ['enable_fact_checking', 'knowledge_accumulation', 'output_dir', 'detailed_citations']
1129
1128
  };
1130
1129
 
@@ -1152,6 +1151,11 @@
1152
1151
  return false;
1153
1152
  }
1154
1153
 
1154
+ // Filter out settings that are not marked as visible.
1155
+ if (!setting.visible) {
1156
+ return false;
1157
+ }
1158
+
1155
1159
  // If we're on a specific tab, only show settings for that tab
1156
1160
  if (tab !== 'all') {
1157
1161
  // Only show settings in tab-specific lists for that tab
@@ -3091,7 +3095,15 @@
3091
3095
  * Process settings to handle object values
3092
3096
  */
3093
3097
  function processSettings(settings) {
3094
- return settings.map(setting => {
3098
+ // Convert to a list.
3099
+ const settingsList = [];
3100
+ for (const key in settings) {
3101
+ const setting = settings[key];
3102
+ setting["key"] = key
3103
+ settingsList.push(setting);
3104
+ }
3105
+
3106
+ return settingsList.map(setting => {
3095
3107
  const processedSetting = {...setting};
3096
3108
 
3097
3109
  // Convert object values to JSON strings for display
@@ -0,0 +1,106 @@
1
+ /**
2
+ * Research form handling with settings management
3
+ */
4
+
5
+ document.addEventListener('DOMContentLoaded', function() {
6
+ // Initialize the research form
7
+ console.log('DOM loaded, initializing research form');
8
+ initResearchForm();
9
+ });
10
+
11
+ /**
12
+ * Initialize the research form with values from settings
13
+ */
14
+ function initResearchForm() {
15
+ console.log('Initializing research form...');
16
+ // Get form elements
17
+ const iterationsInput = document.getElementById('iterations');
18
+ const questionsInput = document.getElementById('questions_per_iteration');
19
+
20
+ // Fetch all settings at once (more efficient)
21
+ fetch('/research/settings/api')
22
+ .then(response => {
23
+ if (!response.ok) {
24
+ throw new Error('Failed to fetch settings');
25
+ }
26
+ return response.json();
27
+ })
28
+ .then(data => {
29
+ console.log('Loaded settings:', data);
30
+ if (data && data.status === 'success' && data.settings) {
31
+ // Find our specific settings
32
+ const settings = data.settings;
33
+
34
+ // Look for the iterations setting
35
+ for (const key in settings) {
36
+ const setting = settings[key];
37
+ if (key === 'search.iterations') {
38
+ console.log('Found iterations setting:', setting.value);
39
+ iterationsInput.value = setting.value;
40
+ }
41
+
42
+ if (key === 'search.questions_per_iteration') {
43
+ console.log('Found questions setting:', setting.value);
44
+ questionsInput.value = setting.value;
45
+ }
46
+ }
47
+ }
48
+ })
49
+ .catch(error => {
50
+ console.warn('Error loading research settings:', error);
51
+ // Form will use default values if settings can't be loaded
52
+ });
53
+
54
+ // Add our settings saving to the form submission process
55
+ patchFormSubmitHandler();
56
+ }
57
+
58
+ /**
59
+ * Patch the existing form submit handler to include our settings saving functionality
60
+ */
61
+ function patchFormSubmitHandler() {
62
+ // Get the form element
63
+ const form = document.getElementById('research-form');
64
+ if (!form) return;
65
+
66
+ // Monitor for form submissions using the capture phase to run before other handlers
67
+ form.addEventListener('submit', function(event) {
68
+ // Save research settings first, before the main form handler processes the submission
69
+ saveResearchSettings();
70
+
71
+ // Let the event continue normally to the other handlers
72
+ }, true); // true enables capture phase
73
+ }
74
+
75
+ /**
76
+ * Save research settings to the database
77
+ */
78
+ function saveResearchSettings() {
79
+ const iterations = document.getElementById('iterations').value;
80
+ const questions = document.getElementById('questions_per_iteration').value;
81
+
82
+ console.log('Saving research settings:', { iterations, questions });
83
+
84
+ // Get CSRF token
85
+ const csrfToken = document.querySelector('meta[name="csrf-token"]').getAttribute('content');
86
+
87
+ // Save settings
88
+ fetch('/research/settings/save_all_settings', {
89
+ method: 'POST',
90
+ headers: {
91
+ 'Content-Type': 'application/json',
92
+ 'X-CSRFToken': csrfToken
93
+ },
94
+ body: JSON.stringify({
95
+ 'search.iterations': parseInt(iterations),
96
+ 'search.questions_per_iteration': parseInt(questions)
97
+ })
98
+ })
99
+ .then(response => response.json())
100
+ .then(data => {
101
+ console.log('Settings saved:', data);
102
+ })
103
+ .catch(error => {
104
+ console.warn('Error saving research settings:', error);
105
+ });
106
+ }
@@ -99,9 +99,9 @@
99
99
  </div>
100
100
 
101
101
  <div class="form-row">
102
- <!-- Research Iterations -->
102
+ <!-- Search Iterations -->
103
103
  <div class="form-group half">
104
- <label for="iterations">Research Iterations</label>
104
+ <label for="iterations">Search Iterations</label>
105
105
  <input type="number" id="iterations" name="iterations" class="form-control" min="1" max="5" value="2">
106
106
  <span class="input-help">Number of research cycles to perform</span>
107
107
  </div>
@@ -136,4 +136,5 @@
136
136
  {% block component_scripts %}
137
137
  <script src="{{ url_for('research.serve_static', path='js/components/custom_dropdown.js') }}"></script>
138
138
  <script src="{{ url_for('research.serve_static', path='js/components/research.js') }}"></script>
139
+ <script src="{{ url_for('research.serve_static', path='js/research_form.js') }}"></script>
139
140
  {% endblock %}
@@ -1,12 +1,11 @@
1
1
  import logging
2
- import os
3
2
  from typing import Any, Dict, List, Optional
4
3
 
5
- from ...config import search_config
4
+ from ...utilities.db_utils import get_db_setting
6
5
  from ...web.services.socket_service import emit_socket_event
7
6
  from ..search_engine_base import BaseSearchEngine
8
7
  from ..search_engine_factory import create_search_engine
9
- from ..search_engines_config import SEARCH_ENGINES
8
+ from ..search_engines_config import search_config
10
9
  from .search_engine_wikipedia import WikipediaSearchEngine
11
10
 
12
11
  # Setup logging
@@ -69,7 +68,7 @@ class MetaSearchEngine(BaseSearchEngine):
69
68
  """Get list of available engines, excluding 'meta' and 'auto'"""
70
69
  # Filter out 'meta' and 'auto' and check API key availability
71
70
  available = []
72
- for name, config_ in SEARCH_ENGINES.items():
71
+ for name, config_ in search_config().items():
73
72
  if name in ["meta", "auto"]:
74
73
  continue
75
74
 
@@ -77,15 +76,14 @@ class MetaSearchEngine(BaseSearchEngine):
77
76
  continue
78
77
 
79
78
  if config_.get("requires_api_key", False):
80
- api_key_env = config_.get("api_key_env")
81
- api_key = os.getenv(api_key_env) if api_key_env else None
79
+ api_key = config_.get("api_key")
82
80
  if not api_key:
83
81
  continue
84
82
 
85
83
  available.append(name)
86
84
 
87
85
  # Make sure we have at least one engine available
88
- if not available and "wikipedia" in SEARCH_ENGINES:
86
+ if not available and "wikipedia" in search_config():
89
87
  available.append("wikipedia")
90
88
 
91
89
  return available
@@ -109,7 +107,7 @@ class MetaSearchEngine(BaseSearchEngine):
109
107
  # Return engines sorted by reliability
110
108
  return sorted(
111
109
  self.available_engines,
112
- key=lambda x: SEARCH_ENGINES.get(x, {}).get("reliability", 0),
110
+ key=lambda x: search_config().get(x, {}).get("reliability", 0),
113
111
  reverse=True,
114
112
  )
115
113
 
@@ -117,14 +115,14 @@ class MetaSearchEngine(BaseSearchEngine):
117
115
  engines_info = []
118
116
  for engine_name in self.available_engines:
119
117
  try:
120
- if engine_name in SEARCH_ENGINES:
121
- strengths = SEARCH_ENGINES[engine_name].get(
118
+ if engine_name in search_config():
119
+ strengths = search_config()[engine_name].get(
122
120
  "strengths", "General search"
123
121
  )
124
- weaknesses = SEARCH_ENGINES[engine_name].get(
122
+ weaknesses = search_config()[engine_name].get(
125
123
  "weaknesses", "None specified"
126
124
  )
127
- description = SEARCH_ENGINES[engine_name].get(
125
+ description = search_config()[engine_name].get(
128
126
  "description", engine_name
129
127
  )
130
128
  engines_info.append(
@@ -167,7 +165,7 @@ Example output: wikipedia,arxiv,github"""
167
165
  if not valid_engines:
168
166
  valid_engines = sorted(
169
167
  self.available_engines,
170
- key=lambda x: SEARCH_ENGINES.get(x, {}).get("reliability", 0),
168
+ key=lambda x: search_config().get(x, {}).get("reliability", 0),
171
169
  reverse=True,
172
170
  )
173
171
 
@@ -177,7 +175,7 @@ Example output: wikipedia,arxiv,github"""
177
175
  # Fall back to reliability-based ordering
178
176
  return sorted(
179
177
  self.available_engines,
180
- key=lambda x: SEARCH_ENGINES.get(x, {}).get("reliability", 0),
178
+ key=lambda x: search_config().get(x, {}).get("reliability", 0),
181
179
  reverse=True,
182
180
  )
183
181
 
@@ -276,10 +274,7 @@ Example output: wikipedia,arxiv,github"""
276
274
  List of result dictionaries with full content
277
275
  """
278
276
  # Check if we should get full content
279
- if (
280
- hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
281
- and search_config.SEARCH_SNIPPETS_ONLY
282
- ):
277
+ if get_db_setting("search.snippets_only", True):
283
278
  logger.info("Snippet-only mode, skipping full content retrieval")
284
279
  return relevant_items
285
280
 
@@ -31,6 +31,7 @@ from langchain_core.language_models import BaseLLM
31
31
  from langchain_text_splitters import RecursiveCharacterTextSplitter
32
32
 
33
33
  from ...config import search_config
34
+ from ...utilities.db_utils import get_db_setting
34
35
  from ..search_engine_base import BaseSearchEngine
35
36
 
36
37
  # Setup logging
@@ -168,8 +169,8 @@ class LocalEmbeddingManager:
168
169
  if self.embedding_model_type == "ollama":
169
170
  # Use Ollama for embeddings
170
171
  if not self.ollama_base_url:
171
- self.ollama_base_url = os.getenv(
172
- "OLLAMA_BASE_URL", "http://localhost:11434"
172
+ self.ollama_base_url = get_db_setting(
173
+ "llm.ollama.url", "http://localhost:11434"
173
174
  )
174
175
 
175
176
  logger.info(
@@ -642,6 +643,8 @@ class LocalSearchEngine(BaseSearchEngine):
642
643
  chunk_overlap: int = 200,
643
644
  cache_dir: str = ".cache/local_search",
644
645
  collections: Optional[Dict[str, Dict[str, Any]]] = None,
646
+ name: str = "",
647
+ description: str = "",
645
648
  ):
646
649
  """
647
650
  Initialize the local search engine.
@@ -660,10 +663,16 @@ class LocalSearchEngine(BaseSearchEngine):
660
663
  chunk_overlap: Overlap between chunks
661
664
  cache_dir: Directory to store embedding cache and index
662
665
  collections: Dictionary of named collections with paths and descriptions
666
+ name: Human-readable name of the collection we are searching.
667
+ description: Human-readable description of the collection we are
668
+ searching.
663
669
  """
664
670
  # Initialize the base search engine
665
671
  super().__init__(llm=llm, max_filtered_results=max_filtered_results)
666
672
 
673
+ self.name = name
674
+ self.description = description
675
+
667
676
  # Validate folder paths
668
677
  self.folder_paths = paths
669
678
  self.valid_folder_paths = []
@@ -3,14 +3,14 @@ Search engine that searches across all local collections
3
3
  """
4
4
 
5
5
  import logging
6
- from typing import Any, Dict, List, Optional
6
+ from typing import Any, Dict, List, Optional, cast
7
7
 
8
- import toml
9
8
  from langchain_core.language_models import BaseLLM
10
9
 
11
- from ...config.config_files import LOCAL_COLLECTIONS_FILE
12
10
  from ..search_engine_base import BaseSearchEngine
13
11
  from ..search_engine_factory import create_search_engine
12
+ from ..search_engines_config import local_search_engines
13
+ from .search_engine_local import LocalSearchEngine
14
14
 
15
15
  # Setup logging
16
16
  logger = logging.getLogger(__name__)
@@ -46,12 +46,7 @@ class LocalAllSearchEngine(BaseSearchEngine):
46
46
  # Find all local collection search engines
47
47
  self.local_engines = {}
48
48
  try:
49
- local_collections = toml.load(LOCAL_COLLECTIONS_FILE)
50
-
51
- for collection_id, collection in local_collections.items():
52
- if not collection.get("enabled", True):
53
- continue
54
-
49
+ for collection_id in local_search_engines():
55
50
  # Create a search engine for this collection
56
51
  try:
57
52
  engine = create_search_engine(
@@ -59,12 +54,13 @@ class LocalAllSearchEngine(BaseSearchEngine):
59
54
  llm=llm,
60
55
  max_filtered_results=max_filtered_results,
61
56
  )
57
+ engine = cast(LocalSearchEngine, engine)
62
58
 
63
59
  if engine:
64
60
  self.local_engines[collection_id] = {
65
61
  "engine": engine,
66
- "name": collection.get("name", collection_id),
67
- "description": collection.get("description", ""),
62
+ "name": engine.name,
63
+ "description": engine.description,
68
64
  }
69
65
  except Exception as e:
70
66
  logger.error(
@@ -2,11 +2,11 @@ import importlib
2
2
  import inspect
3
3
  import logging
4
4
  import os
5
- from typing import Any, Dict, List, Optional, Union
5
+ from typing import Any, Dict, Optional
6
6
 
7
7
  from ..utilities.db_utils import get_db_setting
8
8
  from .search_engine_base import BaseSearchEngine
9
- from .search_engines_config import DEFAULT_SEARCH_ENGINE, SEARCH_ENGINES
9
+ from .search_engines_config import default_search_engine, search_config
10
10
 
11
11
  # Setup logging
12
12
  logging.basicConfig(level=logging.INFO)
@@ -28,42 +28,31 @@ def create_search_engine(
28
28
  Initialized search engine instance or None if creation failed
29
29
  """
30
30
  # If engine name not found, use default
31
- if engine_name not in SEARCH_ENGINES:
31
+ if engine_name not in search_config():
32
32
  logger.warning(
33
- f"Search engine '{engine_name}' not found, using default: {DEFAULT_SEARCH_ENGINE}"
33
+ f"Search engine '{engine_name}' not found, using default: "
34
+ f"{default_search_engine()}"
34
35
  )
35
- engine_name = DEFAULT_SEARCH_ENGINE
36
+ engine_name = default_search_engine()
36
37
 
37
38
  # Get engine configuration
38
- engine_config = SEARCH_ENGINES[engine_name]
39
- from ..config.config_files import settings
39
+ engine_config = search_config()[engine_name]
40
40
 
41
41
  # Set default max_results from config if not provided in kwargs
42
42
  if "max_results" not in kwargs:
43
- max_results = get_db_setting("search.max_results", settings.search.max_results)
43
+ max_results = get_db_setting("search.max_results", 10)
44
44
  if max_results is None:
45
45
  max_results = 20
46
46
  kwargs["max_results"] = max_results
47
47
 
48
48
  # Check for API key requirements
49
49
  if engine_config.get("requires_api_key", False):
50
- api_key_env = engine_config.get("api_key_env")
51
-
52
- # First check environment variable
53
- api_key = os.getenv(api_key_env)
50
+ api_key = os.getenv(f"LDR_{engine_name.upper()}_API_KEY")
54
51
  if not api_key:
55
- api_key = os.getenv("LDR_" + api_key_env)
56
-
57
- # If not found in environment, check Dynaconf settings
58
- if not api_key and api_key_env:
59
- # Convert env var name to settings path (e.g., BRAVE_API_KEY -> brave_api_key)
60
- settings_key = api_key_env.lower()
61
- api_key = settings.get(settings_key)
52
+ api_key = engine_config.get("api_key")
62
53
 
63
54
  if not api_key:
64
- logger.info(
65
- f"Required API key for {engine_name} not found in environment variable: {api_key_env} or settings"
66
- )
55
+ logger.info(f"Required API key for {engine_name} not found in settings.")
67
56
  return None
68
57
 
69
58
  # Check for LLM requirements
@@ -139,7 +128,7 @@ def _create_full_search_wrapper(
139
128
  ) -> Optional[BaseSearchEngine]:
140
129
  """Create a full search wrapper for the base engine if supported"""
141
130
  try:
142
- engine_config = SEARCH_ENGINES[engine_name]
131
+ engine_config = search_config()[engine_name]
143
132
 
144
133
  # Get full search class details
145
134
  module_path = engine_config.get("full_search_module")
@@ -232,47 +221,6 @@ def _create_full_search_wrapper(
232
221
  return base_engine
233
222
 
234
223
 
235
- def get_available_engines(
236
- include_api_key_services: bool = True,
237
- ) -> Union[Dict[str, str], List[str]]:
238
- """
239
- Get all available search engines from the configuration.
240
-
241
- Args:
242
- include_api_key_services: Whether to include engines that require API keys
243
-
244
- Returns:
245
- Dictionary of engine names mapped to descriptions, or a list of engine names
246
- """
247
- try:
248
- # Get engines from SEARCH_ENGINES dict
249
- available_engines = {}
250
-
251
- for name, config in SEARCH_ENGINES.items():
252
- # Skip hidden engines (those that start with _)
253
- if name.startswith("_"):
254
- continue
255
-
256
- # Skip engines that require API keys if requested
257
- if not include_api_key_services and config.get("requires_api_key", False):
258
- continue
259
-
260
- # Add to available engines with display name
261
- strengths = config.get("strengths", [])
262
- description = name.replace("_", " ").title()
263
-
264
- if strengths and len(strengths) > 0:
265
- description += f" - {strengths[0]}"
266
-
267
- available_engines[name] = description
268
-
269
- return available_engines
270
- except Exception as e:
271
- logger.error(f"Error getting available engines: {e}")
272
- # Fall back to list of engines directly from keys
273
- return list(SEARCH_ENGINES.keys())
274
-
275
-
276
224
  def get_search(
277
225
  search_tool: str,
278
226
  llm_instance,