local-deep-research 0.2.3__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +1 -1
- local_deep_research/__version__.py +1 -0
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +5 -1
- local_deep_research/advanced_search_system/strategies/base_strategy.py +5 -2
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +23 -16
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +13 -6
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +4 -3
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +57 -62
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +8 -4
- local_deep_research/api/research_functions.py +0 -46
- local_deep_research/citation_handler.py +2 -5
- local_deep_research/config/llm_config.py +25 -68
- local_deep_research/config/search_config.py +8 -21
- local_deep_research/defaults/default_settings.json +3996 -0
- local_deep_research/search_system.py +34 -31
- local_deep_research/utilities/db_utils.py +22 -3
- local_deep_research/utilities/search_utilities.py +10 -7
- local_deep_research/web/app.py +3 -23
- local_deep_research/web/app_factory.py +1 -25
- local_deep_research/web/database/migrations.py +20 -418
- local_deep_research/web/routes/settings_routes.py +75 -364
- local_deep_research/web/services/research_service.py +43 -43
- local_deep_research/web/services/settings_manager.py +108 -315
- local_deep_research/web/services/settings_service.py +3 -56
- local_deep_research/web/static/js/components/research.js +1 -1
- local_deep_research/web/static/js/components/settings.js +16 -4
- local_deep_research/web/static/js/research_form.js +106 -0
- local_deep_research/web/templates/pages/research.html +3 -2
- local_deep_research/web_search_engines/engines/meta_search_engine.py +56 -21
- local_deep_research/web_search_engines/engines/search_engine_local.py +11 -2
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +7 -11
- local_deep_research/web_search_engines/search_engine_factory.py +12 -64
- local_deep_research/web_search_engines/search_engines_config.py +123 -64
- {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.1.dist-info}/METADATA +16 -1
- {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.1.dist-info}/RECORD +38 -39
- local_deep_research/config/config_files.py +0 -245
- local_deep_research/defaults/local_collections.toml +0 -53
- local_deep_research/defaults/main.toml +0 -80
- local_deep_research/defaults/search_engines.toml +0 -291
- {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.1.dist-info}/WHEEL +0 -0
- {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.1.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.2.3.dist-info → local_deep_research-0.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -18,7 +18,6 @@ from .advanced_search_system.strategies.source_based_strategy import (
|
|
18
18
|
)
|
19
19
|
from .advanced_search_system.strategies.standard_strategy import StandardSearchStrategy
|
20
20
|
from .citation_handler import CitationHandler
|
21
|
-
from .config.config_files import settings
|
22
21
|
from .config.llm_config import get_llm
|
23
22
|
from .config.search_config import get_search
|
24
23
|
from .utilities.db_utils import get_db_setting
|
@@ -58,11 +57,12 @@ class AdvancedSearchSystem:
|
|
58
57
|
self.search = search
|
59
58
|
if search is None:
|
60
59
|
self.search = get_search(llm_instance=self.model)
|
61
|
-
|
62
|
-
|
63
|
-
)
|
60
|
+
|
61
|
+
# Get iterations setting
|
62
|
+
self.max_iterations = get_db_setting("search.iterations", 1)
|
63
|
+
|
64
64
|
self.questions_per_iteration = get_db_setting(
|
65
|
-
"search.questions_per_iteration",
|
65
|
+
"search.questions_per_iteration", 3
|
66
66
|
)
|
67
67
|
|
68
68
|
# Log the strategy name that's being used
|
@@ -74,11 +74,19 @@ class AdvancedSearchSystem:
|
|
74
74
|
self.citation_handler = CitationHandler(self.model)
|
75
75
|
self.question_generator = StandardQuestionGenerator(self.model)
|
76
76
|
self.findings_repository = FindingsRepository(self.model)
|
77
|
+
# For backward compatibility
|
78
|
+
self.questions_by_iteration = list()
|
79
|
+
self.progress_callback = lambda _1, _2, _3: None
|
80
|
+
self.all_links_of_system = list()
|
77
81
|
|
78
82
|
# Initialize strategy based on name
|
79
83
|
if strategy_name.lower() == "iterdrag":
|
80
84
|
logger.info("Creating IterDRAGStrategy instance")
|
81
|
-
self.strategy = IterDRAGStrategy(
|
85
|
+
self.strategy = IterDRAGStrategy(
|
86
|
+
model=self.model,
|
87
|
+
search=self.search,
|
88
|
+
all_links_of_system=self.all_links_of_system,
|
89
|
+
)
|
82
90
|
elif strategy_name.lower() == "source-based":
|
83
91
|
logger.info("Creating SourceBasedSearchStrategy instance")
|
84
92
|
self.strategy = SourceBasedSearchStrategy(
|
@@ -86,6 +94,7 @@ class AdvancedSearchSystem:
|
|
86
94
|
search=self.search,
|
87
95
|
include_text_content=include_text_content,
|
88
96
|
use_cross_engine_filter=use_cross_engine_filter,
|
97
|
+
all_links_of_system=self.all_links_of_system,
|
89
98
|
)
|
90
99
|
elif strategy_name.lower() == "parallel":
|
91
100
|
logger.info("Creating ParallelSearchStrategy instance")
|
@@ -94,22 +103,26 @@ class AdvancedSearchSystem:
|
|
94
103
|
search=self.search,
|
95
104
|
include_text_content=include_text_content,
|
96
105
|
use_cross_engine_filter=use_cross_engine_filter,
|
106
|
+
all_links_of_system=self.all_links_of_system,
|
97
107
|
)
|
98
108
|
elif strategy_name.lower() == "rapid":
|
99
109
|
logger.info("Creating RapidSearchStrategy instance")
|
100
|
-
self.strategy = RapidSearchStrategy(
|
110
|
+
self.strategy = RapidSearchStrategy(
|
111
|
+
model=self.model,
|
112
|
+
search=self.search,
|
113
|
+
all_links_of_system=self.all_links_of_system,
|
114
|
+
)
|
101
115
|
else:
|
102
116
|
logger.info("Creating StandardSearchStrategy instance")
|
103
|
-
self.strategy = StandardSearchStrategy(
|
117
|
+
self.strategy = StandardSearchStrategy(
|
118
|
+
model=self.model,
|
119
|
+
search=self.search,
|
120
|
+
all_links_of_system=self.all_links_of_system,
|
121
|
+
)
|
104
122
|
|
105
123
|
# Log the actual strategy class
|
106
124
|
logger.info(f"Created strategy of type: {type(self.strategy).__name__}")
|
107
125
|
|
108
|
-
# For backward compatibility
|
109
|
-
self.questions_by_iteration = {}
|
110
|
-
self.progress_callback = lambda _1, _2, _3: None
|
111
|
-
self.all_links_of_system = list()
|
112
|
-
|
113
126
|
# Configure the strategy with our attributes
|
114
127
|
if hasattr(self, "progress_callback") and self.progress_callback:
|
115
128
|
self.strategy.set_progress_callback(self.progress_callback)
|
@@ -163,25 +176,15 @@ class AdvancedSearchSystem:
|
|
163
176
|
result = self.strategy.analyze_topic(query)
|
164
177
|
|
165
178
|
# Update our attributes for backward compatibility
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
{
|
173
|
-
"phase": "setup",
|
174
|
-
"search_info": {
|
175
|
-
"questions_by_iteration": len(
|
176
|
-
self.strategy.questions_by_iteration
|
177
|
-
)
|
178
|
-
},
|
179
|
-
},
|
180
|
-
)
|
181
|
-
if hasattr(self.strategy, "all_links_of_system"):
|
182
|
-
self.all_links_of_system = self.strategy.all_links_of_system
|
179
|
+
|
180
|
+
self.questions_by_iteration = self.strategy.questions_by_iteration.copy()
|
181
|
+
# Send progress message with search info
|
182
|
+
|
183
|
+
# if hasattr(self.strategy, "all_links_of_system"):
|
184
|
+
self.all_links_of_system.extend(self.strategy.all_links_of_system)
|
183
185
|
|
184
186
|
# Include the search system instance for access to citations
|
185
187
|
result["search_system"] = self
|
186
|
-
|
188
|
+
result["all_links_of_system"] = self.all_links_of_system
|
189
|
+
result["questions_by_iteration"] = self.questions_by_iteration
|
187
190
|
return result
|
@@ -1,11 +1,12 @@
|
|
1
1
|
import logging
|
2
2
|
import os
|
3
3
|
from functools import cache
|
4
|
+
from typing import Any, Dict
|
4
5
|
|
5
6
|
from sqlalchemy import create_engine
|
6
7
|
from sqlalchemy.orm import Session, sessionmaker
|
7
8
|
|
8
|
-
from ..web.services.settings_manager import SettingsManager
|
9
|
+
from ..web.services.settings_manager import SettingsManager, check_env_setting
|
9
10
|
|
10
11
|
logger = logging.getLogger(__name__)
|
11
12
|
|
@@ -36,8 +37,24 @@ def get_settings_manager() -> SettingsManager:
|
|
36
37
|
return SettingsManager(db_session=get_db_session())
|
37
38
|
|
38
39
|
|
39
|
-
def get_db_setting(
|
40
|
-
|
40
|
+
def get_db_setting(
|
41
|
+
key: str, default_value: Any | None = None, check_env: bool = True
|
42
|
+
) -> str | Dict[str, Any] | None:
|
43
|
+
"""
|
44
|
+
Get a setting from the database with fallback to default value
|
45
|
+
|
46
|
+
Args:
|
47
|
+
key: The setting key.
|
48
|
+
default_value: If the setting is not found, it will return this instead.
|
49
|
+
check_env: If true, it will check the corresponding environment
|
50
|
+
variable before checking the DB and return that if it is set.
|
51
|
+
|
52
|
+
"""
|
53
|
+
if check_env:
|
54
|
+
env_value = check_env_setting(key)
|
55
|
+
if env_value is not None:
|
56
|
+
return env_value
|
57
|
+
|
41
58
|
try:
|
42
59
|
# Get settings manager which handles database access
|
43
60
|
value = get_settings_manager().get_setting(key)
|
@@ -46,4 +63,6 @@ def get_db_setting(key, default_value=None):
|
|
46
63
|
return value
|
47
64
|
except Exception as e:
|
48
65
|
logger.error(f"Error getting setting {key} from database: {e}")
|
66
|
+
|
67
|
+
logger.warning(f"Could not find setting '{key}' in the database.")
|
49
68
|
return default_value
|
@@ -43,21 +43,22 @@ def extract_links_from_search_results(search_results: List[Dict]) -> List[Dict]:
|
|
43
43
|
return links
|
44
44
|
|
45
45
|
|
46
|
-
def format_links(links: List[Dict]) -> str:
|
47
|
-
formatted_links = format_links_to_markdown(links)
|
48
|
-
return formatted_links
|
49
|
-
|
50
|
-
|
51
46
|
def format_links_to_markdown(all_links: List[Dict]) -> str:
|
52
47
|
formatted_text = ""
|
48
|
+
logger.info(f"Formatting {len(all_links)} links to markdown...")
|
49
|
+
|
53
50
|
if all_links:
|
54
51
|
|
55
52
|
# Group links by URL and collect all their indices
|
56
53
|
url_to_indices = {}
|
57
54
|
for link in all_links:
|
58
55
|
url = link.get("url")
|
56
|
+
if url is None:
|
57
|
+
url = link.get("link")
|
59
58
|
index = link.get("index", "")
|
59
|
+
# logger.info(f"URL \n {str(url)} ")
|
60
60
|
if url:
|
61
|
+
|
61
62
|
if url not in url_to_indices:
|
62
63
|
url_to_indices[url] = []
|
63
64
|
url_to_indices[url].append(index)
|
@@ -66,6 +67,8 @@ def format_links_to_markdown(all_links: List[Dict]) -> str:
|
|
66
67
|
seen_urls = set() # Initialize the set here
|
67
68
|
for link in all_links:
|
68
69
|
url = link.get("url")
|
70
|
+
if url is None:
|
71
|
+
url = link.get("link")
|
69
72
|
title = link.get("title", "Untitled")
|
70
73
|
if url and url not in seen_urls:
|
71
74
|
# Get all indices for this URL
|
@@ -211,7 +214,7 @@ def format_findings(
|
|
211
214
|
links = extract_links_from_search_results(search_results)
|
212
215
|
if links:
|
213
216
|
formatted_text += "### SOURCES USED IN THIS SECTION:\n"
|
214
|
-
formatted_text +=
|
217
|
+
formatted_text += format_links_to_markdown(links) + "\n\n"
|
215
218
|
except Exception as link_err:
|
216
219
|
logger.error(
|
217
220
|
f"Error processing search results/links for finding {idx}: {link_err}"
|
@@ -238,5 +241,5 @@ def print_search_results(search_results):
|
|
238
241
|
formatted_text = ""
|
239
242
|
links = extract_links_from_search_results(search_results)
|
240
243
|
if links:
|
241
|
-
formatted_text =
|
244
|
+
formatted_text = format_links_to_markdown(links=links)
|
242
245
|
logger.info(formatted_text)
|
local_deep_research/web/app.py
CHANGED
@@ -2,7 +2,6 @@ import logging
|
|
2
2
|
import os
|
3
3
|
import sys
|
4
4
|
|
5
|
-
from ..config.config_files import settings
|
6
5
|
from ..setup_data_dir import setup_data_dir
|
7
6
|
from ..utilities.db_utils import get_db_setting
|
8
7
|
from .app_factory import create_app
|
@@ -92,28 +91,9 @@ def main():
|
|
92
91
|
print("Please run migration manually.")
|
93
92
|
|
94
93
|
# Get web server settings with defaults
|
95
|
-
port = get_db_setting("web.port",
|
96
|
-
host = get_db_setting("web.host",
|
97
|
-
debug = get_db_setting("web.debug",
|
98
|
-
|
99
|
-
# Check for OpenAI availability but don't import it unless necessary
|
100
|
-
try:
|
101
|
-
api_key = os.environ.get("OPENAI_API_KEY")
|
102
|
-
if api_key:
|
103
|
-
try:
|
104
|
-
# Only try to import if we have an API key
|
105
|
-
import openai
|
106
|
-
|
107
|
-
openai.api_key = api_key
|
108
|
-
logger.info("OpenAI integration is available")
|
109
|
-
except ImportError:
|
110
|
-
logger.info("OpenAI package not installed, integration disabled")
|
111
|
-
else:
|
112
|
-
logger.info(
|
113
|
-
"OPENAI_API_KEY not found in environment variables, OpenAI integration disabled"
|
114
|
-
)
|
115
|
-
except Exception as e:
|
116
|
-
logger.error(f"Error checking OpenAI availability: {e}")
|
94
|
+
port = get_db_setting("web.port", 5000)
|
95
|
+
host = get_db_setting("web.host", "0.0.0.0")
|
96
|
+
debug = get_db_setting("web.debug", True)
|
117
97
|
|
118
98
|
logger.info(f"Starting web server on {host}:{port} (debug: {debug})")
|
119
99
|
socketio.run(app, debug=debug, host=host, port=port, allow_unsafe_werkzeug=True)
|
@@ -165,29 +165,6 @@ def register_blueprints(app):
|
|
165
165
|
api_bp, url_prefix="/research/api"
|
166
166
|
) # Register API blueprint with prefix
|
167
167
|
|
168
|
-
# Configure settings paths
|
169
|
-
# Import config inside the function to avoid circular dependencies
|
170
|
-
def configure_settings_routes():
|
171
|
-
try:
|
172
|
-
from ..config.config_files import SEARCH_ENGINES_FILE, get_config_dir
|
173
|
-
from .routes.settings_routes import set_config_paths
|
174
|
-
|
175
|
-
CONFIG_DIR = get_config_dir() / "config"
|
176
|
-
MAIN_CONFIG_FILE = CONFIG_DIR / "settings.toml"
|
177
|
-
LOCAL_COLLECTIONS_FILE = CONFIG_DIR / "local_collections.toml"
|
178
|
-
|
179
|
-
set_config_paths(
|
180
|
-
CONFIG_DIR,
|
181
|
-
SEARCH_ENGINES_FILE,
|
182
|
-
MAIN_CONFIG_FILE,
|
183
|
-
LOCAL_COLLECTIONS_FILE,
|
184
|
-
)
|
185
|
-
except Exception as e:
|
186
|
-
logger.error(f"Error configuring settings routes: {e}")
|
187
|
-
|
188
|
-
# Call this after all blueprints are registered
|
189
|
-
configure_settings_routes()
|
190
|
-
|
191
168
|
# Add root route redirect
|
192
169
|
@app.route("/")
|
193
170
|
def root_index():
|
@@ -260,7 +237,7 @@ def create_database(app):
|
|
260
237
|
from sqlalchemy import create_engine
|
261
238
|
from sqlalchemy.orm import scoped_session, sessionmaker
|
262
239
|
|
263
|
-
from .database.migrations import run_migrations
|
240
|
+
from .database.migrations import run_migrations
|
264
241
|
from .database.models import Base
|
265
242
|
|
266
243
|
# Configure SQLite to use URI mode, which allows for relative file paths
|
@@ -281,7 +258,6 @@ def create_database(app):
|
|
281
258
|
|
282
259
|
# Run migrations and setup predefined settings
|
283
260
|
run_migrations(engine, app.db_session)
|
284
|
-
setup_predefined_settings(app.db_session)
|
285
261
|
|
286
262
|
# Add teardown context
|
287
263
|
@app.teardown_appcontext
|