local-deep-research 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. local_deep_research/__init__.py +1 -1
  2. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +5 -1
  3. local_deep_research/advanced_search_system/strategies/base_strategy.py +5 -2
  4. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +23 -16
  5. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +13 -6
  6. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +4 -3
  7. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +57 -62
  8. local_deep_research/advanced_search_system/strategies/standard_strategy.py +8 -4
  9. local_deep_research/api/research_functions.py +0 -46
  10. local_deep_research/citation_handler.py +2 -5
  11. local_deep_research/config/llm_config.py +25 -68
  12. local_deep_research/config/search_config.py +8 -21
  13. local_deep_research/defaults/default_settings.json +3814 -0
  14. local_deep_research/search_system.py +34 -31
  15. local_deep_research/utilities/db_utils.py +22 -3
  16. local_deep_research/utilities/search_utilities.py +10 -7
  17. local_deep_research/web/app.py +3 -23
  18. local_deep_research/web/app_factory.py +1 -25
  19. local_deep_research/web/database/migrations.py +20 -418
  20. local_deep_research/web/routes/settings_routes.py +75 -364
  21. local_deep_research/web/services/research_service.py +43 -43
  22. local_deep_research/web/services/settings_manager.py +108 -315
  23. local_deep_research/web/services/settings_service.py +3 -56
  24. local_deep_research/web/static/js/components/research.js +1 -1
  25. local_deep_research/web/static/js/components/settings.js +16 -4
  26. local_deep_research/web/static/js/research_form.js +106 -0
  27. local_deep_research/web/templates/pages/research.html +3 -2
  28. local_deep_research/web_search_engines/engines/meta_search_engine.py +13 -18
  29. local_deep_research/web_search_engines/engines/search_engine_local.py +11 -2
  30. local_deep_research/web_search_engines/engines/search_engine_local_all.py +7 -11
  31. local_deep_research/web_search_engines/search_engine_factory.py +12 -64
  32. local_deep_research/web_search_engines/search_engines_config.py +123 -64
  33. {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.0.dist-info}/METADATA +16 -1
  34. {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.0.dist-info}/RECORD +37 -39
  35. local_deep_research/config/config_files.py +0 -245
  36. local_deep_research/defaults/local_collections.toml +0 -53
  37. local_deep_research/defaults/main.toml +0 -80
  38. local_deep_research/defaults/search_engines.toml +0 -291
  39. {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.0.dist-info}/WHEEL +0 -0
  40. {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.0.dist-info}/entry_points.txt +0 -0
  41. {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -18,7 +18,6 @@ from .advanced_search_system.strategies.source_based_strategy import (
18
18
  )
19
19
  from .advanced_search_system.strategies.standard_strategy import StandardSearchStrategy
20
20
  from .citation_handler import CitationHandler
21
- from .config.config_files import settings
22
21
  from .config.llm_config import get_llm
23
22
  from .config.search_config import get_search
24
23
  from .utilities.db_utils import get_db_setting
@@ -58,11 +57,12 @@ class AdvancedSearchSystem:
58
57
  self.search = search
59
58
  if search is None:
60
59
  self.search = get_search(llm_instance=self.model)
61
- self.max_iterations = get_db_setting(
62
- "search.iterations", settings.search.iterations
63
- )
60
+
61
+ # Get iterations setting
62
+ self.max_iterations = get_db_setting("search.iterations", 1)
63
+
64
64
  self.questions_per_iteration = get_db_setting(
65
- "search.questions_per_iteration", settings.search.questions_per_iteration
65
+ "search.questions_per_iteration", 3
66
66
  )
67
67
 
68
68
  # Log the strategy name that's being used
@@ -74,11 +74,19 @@ class AdvancedSearchSystem:
74
74
  self.citation_handler = CitationHandler(self.model)
75
75
  self.question_generator = StandardQuestionGenerator(self.model)
76
76
  self.findings_repository = FindingsRepository(self.model)
77
+ # For backward compatibility
78
+ self.questions_by_iteration = list()
79
+ self.progress_callback = lambda _1, _2, _3: None
80
+ self.all_links_of_system = list()
77
81
 
78
82
  # Initialize strategy based on name
79
83
  if strategy_name.lower() == "iterdrag":
80
84
  logger.info("Creating IterDRAGStrategy instance")
81
- self.strategy = IterDRAGStrategy(model=self.model, search=self.search)
85
+ self.strategy = IterDRAGStrategy(
86
+ model=self.model,
87
+ search=self.search,
88
+ all_links_of_system=self.all_links_of_system,
89
+ )
82
90
  elif strategy_name.lower() == "source-based":
83
91
  logger.info("Creating SourceBasedSearchStrategy instance")
84
92
  self.strategy = SourceBasedSearchStrategy(
@@ -86,6 +94,7 @@ class AdvancedSearchSystem:
86
94
  search=self.search,
87
95
  include_text_content=include_text_content,
88
96
  use_cross_engine_filter=use_cross_engine_filter,
97
+ all_links_of_system=self.all_links_of_system,
89
98
  )
90
99
  elif strategy_name.lower() == "parallel":
91
100
  logger.info("Creating ParallelSearchStrategy instance")
@@ -94,22 +103,26 @@ class AdvancedSearchSystem:
94
103
  search=self.search,
95
104
  include_text_content=include_text_content,
96
105
  use_cross_engine_filter=use_cross_engine_filter,
106
+ all_links_of_system=self.all_links_of_system,
97
107
  )
98
108
  elif strategy_name.lower() == "rapid":
99
109
  logger.info("Creating RapidSearchStrategy instance")
100
- self.strategy = RapidSearchStrategy(model=self.model, search=self.search)
110
+ self.strategy = RapidSearchStrategy(
111
+ model=self.model,
112
+ search=self.search,
113
+ all_links_of_system=self.all_links_of_system,
114
+ )
101
115
  else:
102
116
  logger.info("Creating StandardSearchStrategy instance")
103
- self.strategy = StandardSearchStrategy(model=self.model, search=self.search)
117
+ self.strategy = StandardSearchStrategy(
118
+ model=self.model,
119
+ search=self.search,
120
+ all_links_of_system=self.all_links_of_system,
121
+ )
104
122
 
105
123
  # Log the actual strategy class
106
124
  logger.info(f"Created strategy of type: {type(self.strategy).__name__}")
107
125
 
108
- # For backward compatibility
109
- self.questions_by_iteration = {}
110
- self.progress_callback = lambda _1, _2, _3: None
111
- self.all_links_of_system = list()
112
-
113
126
  # Configure the strategy with our attributes
114
127
  if hasattr(self, "progress_callback") and self.progress_callback:
115
128
  self.strategy.set_progress_callback(self.progress_callback)
@@ -163,25 +176,15 @@ class AdvancedSearchSystem:
163
176
  result = self.strategy.analyze_topic(query)
164
177
 
165
178
  # Update our attributes for backward compatibility
166
- if hasattr(self.strategy, "questions_by_iteration"):
167
- self.questions_by_iteration = self.strategy.questions_by_iteration
168
- # Send progress message with search info
169
- self.progress_callback(
170
- f"Processed questions: {self.strategy.questions_by_iteration}",
171
- 2, # Low percentage to show this as an early step
172
- {
173
- "phase": "setup",
174
- "search_info": {
175
- "questions_by_iteration": len(
176
- self.strategy.questions_by_iteration
177
- )
178
- },
179
- },
180
- )
181
- if hasattr(self.strategy, "all_links_of_system"):
182
- self.all_links_of_system = self.strategy.all_links_of_system
179
+
180
+ self.questions_by_iteration = self.strategy.questions_by_iteration.copy()
181
+ # Send progress message with search info
182
+
183
+ # if hasattr(self.strategy, "all_links_of_system"):
184
+ self.all_links_of_system.extend(self.strategy.all_links_of_system)
183
185
 
184
186
  # Include the search system instance for access to citations
185
187
  result["search_system"] = self
186
-
188
+ result["all_links_of_system"] = self.all_links_of_system
189
+ result["questions_by_iteration"] = self.questions_by_iteration
187
190
  return result
@@ -1,11 +1,12 @@
1
1
  import logging
2
2
  import os
3
3
  from functools import cache
4
+ from typing import Any, Dict
4
5
 
5
6
  from sqlalchemy import create_engine
6
7
  from sqlalchemy.orm import Session, sessionmaker
7
8
 
8
- from ..web.services.settings_manager import SettingsManager
9
+ from ..web.services.settings_manager import SettingsManager, check_env_setting
9
10
 
10
11
  logger = logging.getLogger(__name__)
11
12
 
@@ -36,8 +37,24 @@ def get_settings_manager() -> SettingsManager:
36
37
  return SettingsManager(db_session=get_db_session())
37
38
 
38
39
 
39
- def get_db_setting(key, default_value=None):
40
- """Get a setting from the database with fallback to default value"""
40
+ def get_db_setting(
41
+ key: str, default_value: Any | None = None, check_env: bool = True
42
+ ) -> str | Dict[str, Any] | None:
43
+ """
44
+ Get a setting from the database with fallback to default value
45
+
46
+ Args:
47
+ key: The setting key.
48
+ default_value: If the setting is not found, it will return this instead.
49
+ check_env: If true, it will check the corresponding environment
50
+ variable before checking the DB and return that if it is set.
51
+
52
+ """
53
+ if check_env:
54
+ env_value = check_env_setting(key)
55
+ if env_value is not None:
56
+ return env_value
57
+
41
58
  try:
42
59
  # Get settings manager which handles database access
43
60
  value = get_settings_manager().get_setting(key)
@@ -46,4 +63,6 @@ def get_db_setting(key, default_value=None):
46
63
  return value
47
64
  except Exception as e:
48
65
  logger.error(f"Error getting setting {key} from database: {e}")
66
+
67
+ logger.warning(f"Could not find setting '{key}' in the database.")
49
68
  return default_value
@@ -43,21 +43,22 @@ def extract_links_from_search_results(search_results: List[Dict]) -> List[Dict]:
43
43
  return links
44
44
 
45
45
 
46
- def format_links(links: List[Dict]) -> str:
47
- formatted_links = format_links_to_markdown(links)
48
- return formatted_links
49
-
50
-
51
46
  def format_links_to_markdown(all_links: List[Dict]) -> str:
52
47
  formatted_text = ""
48
+ logger.info(f"Formatting {len(all_links)} links to markdown...")
49
+
53
50
  if all_links:
54
51
 
55
52
  # Group links by URL and collect all their indices
56
53
  url_to_indices = {}
57
54
  for link in all_links:
58
55
  url = link.get("url")
56
+ if url is None:
57
+ url = link.get("link")
59
58
  index = link.get("index", "")
59
+ # logger.info(f"URL \n {str(url)} ")
60
60
  if url:
61
+
61
62
  if url not in url_to_indices:
62
63
  url_to_indices[url] = []
63
64
  url_to_indices[url].append(index)
@@ -66,6 +67,8 @@ def format_links_to_markdown(all_links: List[Dict]) -> str:
66
67
  seen_urls = set() # Initialize the set here
67
68
  for link in all_links:
68
69
  url = link.get("url")
70
+ if url is None:
71
+ url = link.get("link")
69
72
  title = link.get("title", "Untitled")
70
73
  if url and url not in seen_urls:
71
74
  # Get all indices for this URL
@@ -211,7 +214,7 @@ def format_findings(
211
214
  links = extract_links_from_search_results(search_results)
212
215
  if links:
213
216
  formatted_text += "### SOURCES USED IN THIS SECTION:\n"
214
- formatted_text += format_links(links) + "\n\n"
217
+ formatted_text += format_links_to_markdown(links) + "\n\n"
215
218
  except Exception as link_err:
216
219
  logger.error(
217
220
  f"Error processing search results/links for finding {idx}: {link_err}"
@@ -238,5 +241,5 @@ def print_search_results(search_results):
238
241
  formatted_text = ""
239
242
  links = extract_links_from_search_results(search_results)
240
243
  if links:
241
- formatted_text = format_links(links=links)
244
+ formatted_text = format_links_to_markdown(links=links)
242
245
  logger.info(formatted_text)
@@ -2,7 +2,6 @@ import logging
2
2
  import os
3
3
  import sys
4
4
 
5
- from ..config.config_files import settings
6
5
  from ..setup_data_dir import setup_data_dir
7
6
  from ..utilities.db_utils import get_db_setting
8
7
  from .app_factory import create_app
@@ -92,28 +91,9 @@ def main():
92
91
  print("Please run migration manually.")
93
92
 
94
93
  # Get web server settings with defaults
95
- port = get_db_setting("web.port", settings.web.port)
96
- host = get_db_setting("web.host", settings.web.host)
97
- debug = get_db_setting("web.debug", settings.web.debug)
98
-
99
- # Check for OpenAI availability but don't import it unless necessary
100
- try:
101
- api_key = os.environ.get("OPENAI_API_KEY")
102
- if api_key:
103
- try:
104
- # Only try to import if we have an API key
105
- import openai
106
-
107
- openai.api_key = api_key
108
- logger.info("OpenAI integration is available")
109
- except ImportError:
110
- logger.info("OpenAI package not installed, integration disabled")
111
- else:
112
- logger.info(
113
- "OPENAI_API_KEY not found in environment variables, OpenAI integration disabled"
114
- )
115
- except Exception as e:
116
- logger.error(f"Error checking OpenAI availability: {e}")
94
+ port = get_db_setting("web.port", 5000)
95
+ host = get_db_setting("web.host", "0.0.0.0")
96
+ debug = get_db_setting("web.debug", True)
117
97
 
118
98
  logger.info(f"Starting web server on {host}:{port} (debug: {debug})")
119
99
  socketio.run(app, debug=debug, host=host, port=port, allow_unsafe_werkzeug=True)
@@ -165,29 +165,6 @@ def register_blueprints(app):
165
165
  api_bp, url_prefix="/research/api"
166
166
  ) # Register API blueprint with prefix
167
167
 
168
- # Configure settings paths
169
- # Import config inside the function to avoid circular dependencies
170
- def configure_settings_routes():
171
- try:
172
- from ..config.config_files import SEARCH_ENGINES_FILE, get_config_dir
173
- from .routes.settings_routes import set_config_paths
174
-
175
- CONFIG_DIR = get_config_dir() / "config"
176
- MAIN_CONFIG_FILE = CONFIG_DIR / "settings.toml"
177
- LOCAL_COLLECTIONS_FILE = CONFIG_DIR / "local_collections.toml"
178
-
179
- set_config_paths(
180
- CONFIG_DIR,
181
- SEARCH_ENGINES_FILE,
182
- MAIN_CONFIG_FILE,
183
- LOCAL_COLLECTIONS_FILE,
184
- )
185
- except Exception as e:
186
- logger.error(f"Error configuring settings routes: {e}")
187
-
188
- # Call this after all blueprints are registered
189
- configure_settings_routes()
190
-
191
168
  # Add root route redirect
192
169
  @app.route("/")
193
170
  def root_index():
@@ -260,7 +237,7 @@ def create_database(app):
260
237
  from sqlalchemy import create_engine
261
238
  from sqlalchemy.orm import scoped_session, sessionmaker
262
239
 
263
- from .database.migrations import run_migrations, setup_predefined_settings
240
+ from .database.migrations import run_migrations
264
241
  from .database.models import Base
265
242
 
266
243
  # Configure SQLite to use URI mode, which allows for relative file paths
@@ -281,7 +258,6 @@ def create_database(app):
281
258
 
282
259
  # Run migrations and setup predefined settings
283
260
  run_migrations(engine, app.db_session)
284
- setup_predefined_settings(app.db_session)
285
261
 
286
262
  # Add teardown context
287
263
  @app.teardown_appcontext