local-deep-research 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +23 -22
- local_deep_research/__main__.py +16 -0
- local_deep_research/advanced_search_system/__init__.py +7 -0
- local_deep_research/advanced_search_system/filters/__init__.py +8 -0
- local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
- local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
- local_deep_research/advanced_search_system/findings/repository.py +452 -0
- local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
- local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
- local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
- local_deep_research/advanced_search_system/questions/__init__.py +1 -0
- local_deep_research/advanced_search_system/questions/base_question.py +64 -0
- local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
- local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
- local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
- local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
- local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
- local_deep_research/advanced_search_system/tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
- local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
- local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
- local_deep_research/api/__init__.py +5 -5
- local_deep_research/api/research_functions.py +154 -160
- local_deep_research/app.py +8 -0
- local_deep_research/citation_handler.py +25 -16
- local_deep_research/{config.py → config/config_files.py} +102 -110
- local_deep_research/config/llm_config.py +472 -0
- local_deep_research/config/search_config.py +77 -0
- local_deep_research/defaults/__init__.py +10 -5
- local_deep_research/defaults/main.toml +2 -2
- local_deep_research/defaults/search_engines.toml +60 -34
- local_deep_research/main.py +121 -19
- local_deep_research/migrate_db.py +147 -0
- local_deep_research/report_generator.py +87 -45
- local_deep_research/search_system.py +153 -283
- local_deep_research/setup_data_dir.py +35 -0
- local_deep_research/test_migration.py +178 -0
- local_deep_research/utilities/__init__.py +0 -0
- local_deep_research/utilities/db_utils.py +49 -0
- local_deep_research/{utilties → utilities}/enums.py +2 -2
- local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
- local_deep_research/utilities/search_utilities.py +242 -0
- local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
- local_deep_research/web/__init__.py +0 -1
- local_deep_research/web/app.py +86 -1709
- local_deep_research/web/app_factory.py +289 -0
- local_deep_research/web/database/README.md +70 -0
- local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
- local_deep_research/web/database/migrations.py +447 -0
- local_deep_research/web/database/models.py +117 -0
- local_deep_research/web/database/schema_upgrade.py +107 -0
- local_deep_research/web/models/database.py +294 -0
- local_deep_research/web/models/settings.py +94 -0
- local_deep_research/web/routes/api_routes.py +559 -0
- local_deep_research/web/routes/history_routes.py +354 -0
- local_deep_research/web/routes/research_routes.py +715 -0
- local_deep_research/web/routes/settings_routes.py +1583 -0
- local_deep_research/web/services/research_service.py +947 -0
- local_deep_research/web/services/resource_service.py +149 -0
- local_deep_research/web/services/settings_manager.py +669 -0
- local_deep_research/web/services/settings_service.py +187 -0
- local_deep_research/web/services/socket_service.py +210 -0
- local_deep_research/web/static/css/custom_dropdown.css +277 -0
- local_deep_research/web/static/css/settings.css +1223 -0
- local_deep_research/web/static/css/styles.css +525 -48
- local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
- local_deep_research/web/static/js/components/detail.js +348 -0
- local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
- local_deep_research/web/static/js/components/fallback/ui.js +215 -0
- local_deep_research/web/static/js/components/history.js +487 -0
- local_deep_research/web/static/js/components/logpanel.js +949 -0
- local_deep_research/web/static/js/components/progress.js +1107 -0
- local_deep_research/web/static/js/components/research.js +1865 -0
- local_deep_research/web/static/js/components/results.js +766 -0
- local_deep_research/web/static/js/components/settings.js +3981 -0
- local_deep_research/web/static/js/components/settings_sync.js +106 -0
- local_deep_research/web/static/js/main.js +226 -0
- local_deep_research/web/static/js/services/api.js +253 -0
- local_deep_research/web/static/js/services/audio.js +31 -0
- local_deep_research/web/static/js/services/formatting.js +119 -0
- local_deep_research/web/static/js/services/pdf.js +622 -0
- local_deep_research/web/static/js/services/socket.js +882 -0
- local_deep_research/web/static/js/services/ui.js +546 -0
- local_deep_research/web/templates/base.html +72 -0
- local_deep_research/web/templates/components/custom_dropdown.html +47 -0
- local_deep_research/web/templates/components/log_panel.html +32 -0
- local_deep_research/web/templates/components/mobile_nav.html +22 -0
- local_deep_research/web/templates/components/settings_form.html +299 -0
- local_deep_research/web/templates/components/sidebar.html +21 -0
- local_deep_research/web/templates/pages/details.html +73 -0
- local_deep_research/web/templates/pages/history.html +51 -0
- local_deep_research/web/templates/pages/progress.html +57 -0
- local_deep_research/web/templates/pages/research.html +139 -0
- local_deep_research/web/templates/pages/results.html +59 -0
- local_deep_research/web/templates/settings_dashboard.html +78 -192
- local_deep_research/web/utils/__init__.py +0 -0
- local_deep_research/web/utils/formatters.py +76 -0
- local_deep_research/web_search_engines/engines/full_search.py +18 -16
- local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
- local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
- local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
- local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +212 -160
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
- local_deep_research/web_search_engines/search_engine_base.py +174 -99
- local_deep_research/web_search_engines/search_engine_factory.py +192 -102
- local_deep_research/web_search_engines/search_engines_config.py +22 -15
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/METADATA +177 -97
- local_deep_research-0.2.2.dist-info/RECORD +135 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/WHEEL +1 -2
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/entry_points.txt +3 -0
- local_deep_research/defaults/llm_config.py +0 -338
- local_deep_research/utilties/search_utilities.py +0 -114
- local_deep_research/web/static/js/app.js +0 -3763
- local_deep_research/web/templates/api_keys_config.html +0 -82
- local_deep_research/web/templates/collections_config.html +0 -90
- local_deep_research/web/templates/index.html +0 -348
- local_deep_research/web/templates/llm_config.html +0 -120
- local_deep_research/web/templates/main_config.html +0 -89
- local_deep_research/web/templates/search_engines_config.html +0 -154
- local_deep_research/web/templates/settings.html +0 -519
- local_deep_research-0.1.26.dist-info/RECORD +0 -61
- local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
- /local_deep_research/{utilties → config}/__init__.py +0 -0
- {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,208 +1,292 @@
|
|
1
|
-
import os
|
2
1
|
import importlib
|
3
2
|
import inspect
|
4
3
|
import logging
|
5
|
-
|
4
|
+
import os
|
5
|
+
from typing import Any, Dict, List, Optional, Union
|
6
6
|
|
7
|
+
from ..utilities.db_utils import get_db_setting
|
7
8
|
from .search_engine_base import BaseSearchEngine
|
8
|
-
from .search_engines_config import
|
9
|
-
from local_deep_research.config import settings
|
9
|
+
from .search_engines_config import DEFAULT_SEARCH_ENGINE, SEARCH_ENGINES
|
10
10
|
|
11
11
|
# Setup logging
|
12
12
|
logging.basicConfig(level=logging.INFO)
|
13
13
|
logger = logging.getLogger(__name__)
|
14
14
|
|
15
15
|
|
16
|
-
def create_search_engine(
|
16
|
+
def create_search_engine(
|
17
|
+
engine_name: str, llm=None, **kwargs
|
18
|
+
) -> Optional[BaseSearchEngine]:
|
17
19
|
"""
|
18
20
|
Create a search engine instance based on the engine name.
|
19
|
-
|
21
|
+
|
20
22
|
Args:
|
21
23
|
engine_name: Name of the search engine to create
|
22
24
|
llm: Language model instance (required for some engines like meta)
|
23
25
|
**kwargs: Additional parameters to override defaults
|
24
|
-
|
26
|
+
|
25
27
|
Returns:
|
26
28
|
Initialized search engine instance or None if creation failed
|
27
29
|
"""
|
28
30
|
# If engine name not found, use default
|
29
31
|
if engine_name not in SEARCH_ENGINES:
|
30
|
-
logger.warning(
|
32
|
+
logger.warning(
|
33
|
+
f"Search engine '{engine_name}' not found, using default: {DEFAULT_SEARCH_ENGINE}"
|
34
|
+
)
|
31
35
|
engine_name = DEFAULT_SEARCH_ENGINE
|
32
|
-
|
36
|
+
|
33
37
|
# Get engine configuration
|
34
38
|
engine_config = SEARCH_ENGINES[engine_name]
|
35
|
-
from
|
36
|
-
|
39
|
+
from ..config.config_files import settings
|
40
|
+
|
37
41
|
# Set default max_results from config if not provided in kwargs
|
38
|
-
if
|
39
|
-
max_results = settings.search.max_results
|
40
|
-
if max_results
|
41
|
-
|
42
|
-
|
42
|
+
if "max_results" not in kwargs:
|
43
|
+
max_results = get_db_setting("search.max_results", settings.search.max_results)
|
44
|
+
if max_results is None:
|
45
|
+
max_results = 20
|
46
|
+
kwargs["max_results"] = max_results
|
47
|
+
|
43
48
|
# Check for API key requirements
|
44
49
|
if engine_config.get("requires_api_key", False):
|
45
50
|
api_key_env = engine_config.get("api_key_env")
|
46
|
-
|
51
|
+
|
47
52
|
# First check environment variable
|
48
53
|
api_key = os.getenv(api_key_env)
|
49
54
|
if not api_key:
|
50
55
|
api_key = os.getenv("LDR_" + api_key_env)
|
51
56
|
|
52
|
-
|
53
57
|
# If not found in environment, check Dynaconf settings
|
54
58
|
if not api_key and api_key_env:
|
55
59
|
# Convert env var name to settings path (e.g., BRAVE_API_KEY -> brave_api_key)
|
56
60
|
settings_key = api_key_env.lower()
|
57
61
|
api_key = settings.get(settings_key)
|
58
|
-
|
62
|
+
|
59
63
|
if not api_key:
|
60
|
-
logger.info(
|
64
|
+
logger.info(
|
65
|
+
f"Required API key for {engine_name} not found in environment variable: {api_key_env} or settings"
|
66
|
+
)
|
61
67
|
return None
|
62
|
-
|
68
|
+
|
63
69
|
# Check for LLM requirements
|
64
70
|
if engine_config.get("requires_llm", False) and not llm:
|
65
|
-
logger.info(
|
71
|
+
logger.info(
|
72
|
+
f"Engine {engine_name} requires an LLM instance but none was provided"
|
73
|
+
)
|
66
74
|
return None
|
67
|
-
|
75
|
+
|
68
76
|
try:
|
69
77
|
# Load the engine class
|
70
78
|
module_path = engine_config["module_path"]
|
71
79
|
class_name = engine_config["class_name"]
|
72
|
-
|
73
|
-
|
80
|
+
|
81
|
+
package = None
|
82
|
+
if module_path.startswith("."):
|
83
|
+
# This is a relative import. Assume it's relative to
|
84
|
+
# `web_search_engines`.
|
85
|
+
package = "local_deep_research.web_search_engines"
|
86
|
+
module = importlib.import_module(module_path, package=package)
|
74
87
|
engine_class = getattr(module, class_name)
|
75
|
-
|
88
|
+
|
76
89
|
# Get the engine class's __init__ parameters to filter out unsupported ones
|
77
90
|
engine_init_signature = inspect.signature(engine_class.__init__)
|
78
91
|
engine_init_params = list(engine_init_signature.parameters.keys())
|
79
|
-
|
92
|
+
|
80
93
|
# Combine default parameters with provided ones
|
81
94
|
all_params = {**engine_config.get("default_params", {}), **kwargs}
|
82
|
-
|
95
|
+
|
83
96
|
# Filter out parameters that aren't accepted by the engine class
|
84
97
|
# Note: 'self' is always the first parameter of instance methods, so we skip it
|
85
|
-
filtered_params = {
|
86
|
-
|
98
|
+
filtered_params = {
|
99
|
+
k: v for k, v in all_params.items() if k in engine_init_params[1:]
|
100
|
+
}
|
101
|
+
|
87
102
|
# Add LLM if required
|
88
103
|
if engine_config.get("requires_llm", False):
|
89
104
|
filtered_params["llm"] = llm
|
90
|
-
|
105
|
+
|
91
106
|
# Add API key if required and not already in filtered_params
|
92
|
-
if
|
107
|
+
if (
|
108
|
+
engine_config.get("requires_api_key", False)
|
109
|
+
and "api_key" not in filtered_params
|
110
|
+
):
|
93
111
|
api_key_env = engine_config.get("api_key_env")
|
94
112
|
if api_key_env:
|
95
113
|
api_key = os.getenv(api_key_env)
|
96
114
|
if api_key:
|
97
115
|
filtered_params["api_key"] = api_key
|
98
|
-
|
99
|
-
logger.info(
|
100
|
-
|
116
|
+
|
117
|
+
logger.info(
|
118
|
+
f"Creating {engine_name} with filtered parameters: {filtered_params.keys()}"
|
119
|
+
)
|
120
|
+
|
101
121
|
# Create the engine instance with filtered parameters
|
102
122
|
engine = engine_class(**filtered_params)
|
103
|
-
|
123
|
+
|
104
124
|
# Check if we need to wrap with full search capabilities
|
105
|
-
if kwargs.get("use_full_search", False) and engine_config.get(
|
125
|
+
if kwargs.get("use_full_search", False) and engine_config.get(
|
126
|
+
"supports_full_search", False
|
127
|
+
):
|
106
128
|
return _create_full_search_wrapper(engine_name, engine, llm, kwargs)
|
107
|
-
|
129
|
+
|
108
130
|
return engine
|
109
|
-
|
131
|
+
|
110
132
|
except Exception as e:
|
111
133
|
logger.info(f"Failed to create search engine '{engine_name}': {str(e)}")
|
112
134
|
return None
|
113
135
|
|
114
136
|
|
115
|
-
def _create_full_search_wrapper(
|
137
|
+
def _create_full_search_wrapper(
|
138
|
+
engine_name: str, base_engine: BaseSearchEngine, llm, params: Dict[str, Any]
|
139
|
+
) -> Optional[BaseSearchEngine]:
|
116
140
|
"""Create a full search wrapper for the base engine if supported"""
|
117
141
|
try:
|
118
142
|
engine_config = SEARCH_ENGINES[engine_name]
|
119
|
-
|
143
|
+
|
120
144
|
# Get full search class details
|
121
145
|
module_path = engine_config.get("full_search_module")
|
122
146
|
class_name = engine_config.get("full_search_class")
|
123
|
-
|
147
|
+
|
124
148
|
if not module_path or not class_name:
|
125
149
|
logger.warning(f"Full search configuration missing for {engine_name}")
|
126
150
|
return base_engine
|
127
|
-
|
151
|
+
|
128
152
|
# Import the full search class
|
129
153
|
module = importlib.import_module(module_path)
|
130
154
|
full_search_class = getattr(module, class_name)
|
131
|
-
|
155
|
+
|
132
156
|
# Get the wrapper's __init__ parameters to filter out unsupported ones
|
133
157
|
wrapper_init_signature = inspect.signature(full_search_class.__init__)
|
134
|
-
wrapper_init_params = list(wrapper_init_signature.parameters.keys())[
|
135
|
-
|
158
|
+
wrapper_init_params = list(wrapper_init_signature.parameters.keys())[
|
159
|
+
1:
|
160
|
+
] # Skip 'self'
|
161
|
+
|
136
162
|
# Extract relevant parameters for the full search wrapper
|
137
163
|
wrapper_params = {k: v for k, v in params.items() if k in wrapper_init_params}
|
138
|
-
|
164
|
+
|
139
165
|
# Special case for SerpAPI which needs the API key directly
|
140
166
|
if engine_name == "serpapi" and "serpapi_api_key" in wrapper_init_params:
|
141
167
|
serpapi_api_key = os.getenv("SERP_API_KEY")
|
142
168
|
if serpapi_api_key:
|
143
169
|
wrapper_params["serpapi_api_key"] = serpapi_api_key
|
144
|
-
|
170
|
+
|
145
171
|
# Map some parameter names to what the wrapper expects
|
146
|
-
if
|
172
|
+
if (
|
173
|
+
"language" in params
|
174
|
+
and "search_language" not in params
|
175
|
+
and "language" in wrapper_init_params
|
176
|
+
):
|
147
177
|
wrapper_params["language"] = params["language"]
|
148
|
-
|
149
|
-
if
|
150
|
-
|
151
|
-
|
178
|
+
|
179
|
+
if (
|
180
|
+
"safesearch" not in wrapper_params
|
181
|
+
and "safe_search" in params
|
182
|
+
and "safesearch" in wrapper_init_params
|
183
|
+
):
|
184
|
+
wrapper_params["safesearch"] = (
|
185
|
+
"active" if params["safe_search"] else "off"
|
186
|
+
)
|
187
|
+
|
152
188
|
# Special case for Brave which needs the API key directly
|
153
189
|
if engine_name == "brave" and "api_key" in wrapper_init_params:
|
154
190
|
brave_api_key = os.getenv("BRAVE_API_KEY")
|
155
191
|
if brave_api_key:
|
156
192
|
wrapper_params["api_key"] = brave_api_key
|
157
|
-
|
193
|
+
|
158
194
|
# Map some parameter names to what the wrapper expects
|
159
|
-
if
|
195
|
+
if (
|
196
|
+
"language" in params
|
197
|
+
and "search_language" not in params
|
198
|
+
and "language" in wrapper_init_params
|
199
|
+
):
|
160
200
|
wrapper_params["language"] = params["language"]
|
161
|
-
|
162
|
-
if
|
163
|
-
|
164
|
-
|
201
|
+
|
202
|
+
if (
|
203
|
+
"safesearch" not in wrapper_params
|
204
|
+
and "safe_search" in params
|
205
|
+
and "safesearch" in wrapper_init_params
|
206
|
+
):
|
207
|
+
wrapper_params["safesearch"] = (
|
208
|
+
"moderate" if params["safe_search"] else "off"
|
209
|
+
)
|
210
|
+
|
165
211
|
# Always include llm if it's a parameter
|
166
212
|
if "llm" in wrapper_init_params:
|
167
213
|
wrapper_params["llm"] = llm
|
168
|
-
|
214
|
+
|
169
215
|
# If the wrapper needs the base engine and has a parameter for it
|
170
216
|
if "web_search" in wrapper_init_params:
|
171
217
|
wrapper_params["web_search"] = base_engine
|
172
|
-
|
173
|
-
logger.debug(
|
174
|
-
|
218
|
+
|
219
|
+
logger.debug(
|
220
|
+
f"Creating full search wrapper for {engine_name} with filtered parameters: {wrapper_params.keys()}"
|
221
|
+
)
|
222
|
+
|
175
223
|
# Create the full search wrapper with filtered parameters
|
176
224
|
full_search = full_search_class(**wrapper_params)
|
177
|
-
|
225
|
+
|
178
226
|
return full_search
|
179
|
-
|
227
|
+
|
180
228
|
except Exception as e:
|
181
|
-
logger.error(
|
229
|
+
logger.error(
|
230
|
+
f"Failed to create full search wrapper for {engine_name}: {str(e)}"
|
231
|
+
)
|
182
232
|
return base_engine
|
183
233
|
|
184
234
|
|
235
|
+
def get_available_engines(
|
236
|
+
include_api_key_services: bool = True,
|
237
|
+
) -> Union[Dict[str, str], List[str]]:
|
238
|
+
"""
|
239
|
+
Get all available search engines from the configuration.
|
240
|
+
|
241
|
+
Args:
|
242
|
+
include_api_key_services: Whether to include engines that require API keys
|
243
|
+
|
244
|
+
Returns:
|
245
|
+
Dictionary of engine names mapped to descriptions, or a list of engine names
|
246
|
+
"""
|
247
|
+
try:
|
248
|
+
# Get engines from SEARCH_ENGINES dict
|
249
|
+
available_engines = {}
|
250
|
+
|
251
|
+
for name, config in SEARCH_ENGINES.items():
|
252
|
+
# Skip hidden engines (those that start with _)
|
253
|
+
if name.startswith("_"):
|
254
|
+
continue
|
255
|
+
|
256
|
+
# Skip engines that require API keys if requested
|
257
|
+
if not include_api_key_services and config.get("requires_api_key", False):
|
258
|
+
continue
|
259
|
+
|
260
|
+
# Add to available engines with display name
|
261
|
+
strengths = config.get("strengths", [])
|
262
|
+
description = name.replace("_", " ").title()
|
185
263
|
|
186
|
-
|
187
|
-
|
188
|
-
|
264
|
+
if strengths and len(strengths) > 0:
|
265
|
+
description += f" - {strengths[0]}"
|
266
|
+
|
267
|
+
available_engines[name] = description
|
268
|
+
|
269
|
+
return available_engines
|
270
|
+
except Exception as e:
|
271
|
+
logger.error(f"Error getting available engines: {e}")
|
272
|
+
# Fall back to list of engines directly from keys
|
189
273
|
return list(SEARCH_ENGINES.keys())
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
274
|
+
|
275
|
+
|
276
|
+
def get_search(
|
277
|
+
search_tool: str,
|
278
|
+
llm_instance,
|
279
|
+
max_results: int = 10,
|
280
|
+
region: str = "us",
|
281
|
+
time_period: str = "y",
|
282
|
+
safe_search: bool = True,
|
283
|
+
search_snippets_only: bool = False,
|
284
|
+
search_language: str = "English",
|
285
|
+
max_filtered_results: Optional[int] = None,
|
286
|
+
):
|
203
287
|
"""
|
204
288
|
Get search tool instance based on the provided parameters.
|
205
|
-
|
289
|
+
|
206
290
|
Args:
|
207
291
|
search_tool: Name of the search engine to use
|
208
292
|
llm_instance: Language model instance
|
@@ -213,7 +297,7 @@ def get_search(search_tool: str, llm_instance,
|
|
213
297
|
search_snippets_only: Whether to return just snippets (vs. full content)
|
214
298
|
search_language: Language for search results
|
215
299
|
max_filtered_results: Maximum number of results to keep after filtering
|
216
|
-
|
300
|
+
|
217
301
|
Returns:
|
218
302
|
Initialized search engine instance
|
219
303
|
"""
|
@@ -222,43 +306,49 @@ def get_search(search_tool: str, llm_instance,
|
|
222
306
|
"max_results": max_results,
|
223
307
|
"llm": llm_instance, # Only used by engines that need it
|
224
308
|
}
|
225
|
-
|
309
|
+
|
226
310
|
# Add max_filtered_results if provided
|
227
311
|
if max_filtered_results is not None:
|
228
312
|
params["max_filtered_results"] = max_filtered_results
|
229
|
-
|
313
|
+
|
230
314
|
# Add engine-specific parameters
|
231
315
|
if search_tool in ["duckduckgo", "serpapi", "google_pse", "brave"]:
|
232
|
-
params.update(
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
316
|
+
params.update(
|
317
|
+
{
|
318
|
+
"region": region,
|
319
|
+
"safe_search": safe_search,
|
320
|
+
"use_full_search": not search_snippets_only,
|
321
|
+
}
|
322
|
+
)
|
323
|
+
|
238
324
|
if search_tool in ["serpapi", "brave", "google_pse"]:
|
239
325
|
params["search_language"] = search_language
|
240
|
-
|
326
|
+
|
241
327
|
if search_tool == "serpapi":
|
242
328
|
params["time_period"] = time_period
|
243
|
-
|
329
|
+
|
244
330
|
# Create and return the search engine
|
245
|
-
logger.info(
|
331
|
+
logger.info(
|
332
|
+
f"Creating search engine for tool: {search_tool} with params: {params.keys()}"
|
333
|
+
)
|
246
334
|
engine = create_search_engine(search_tool, **params)
|
247
|
-
|
335
|
+
|
248
336
|
# Add debugging to check if engine is None
|
249
337
|
if engine is None:
|
250
|
-
logger.error(
|
338
|
+
logger.error(
|
339
|
+
f"Failed to create search engine for {search_tool} - returned None"
|
340
|
+
)
|
251
341
|
else:
|
252
342
|
engine_type = type(engine).__name__
|
253
343
|
logger.info(f"Successfully created search engine of type: {engine_type}")
|
254
344
|
# Check if the engine has run method
|
255
|
-
if hasattr(engine,
|
345
|
+
if hasattr(engine, "run"):
|
256
346
|
logger.info(f"Engine has 'run' method: {getattr(engine, 'run')}")
|
257
347
|
else:
|
258
|
-
logger.error(
|
259
|
-
|
348
|
+
logger.error("Engine does NOT have 'run' method!")
|
349
|
+
|
260
350
|
# For SearxNG, check availability flag
|
261
|
-
if hasattr(engine,
|
351
|
+
if hasattr(engine, "is_available"):
|
262
352
|
logger.info(f"Engine availability flag: {engine.is_available}")
|
263
|
-
|
353
|
+
|
264
354
|
return engine
|
@@ -2,11 +2,13 @@
|
|
2
2
|
Configuration file for search engines.
|
3
3
|
Loads search engine definitions from the user's configuration.
|
4
4
|
"""
|
5
|
+
|
5
6
|
import logging
|
6
7
|
import os
|
8
|
+
|
7
9
|
import toml
|
8
|
-
|
9
|
-
from
|
10
|
+
|
11
|
+
from ..config.config_files import CONFIG_DIR, LOCAL_COLLECTIONS_FILE
|
10
12
|
|
11
13
|
logger = logging.getLogger(__name__)
|
12
14
|
|
@@ -22,23 +24,28 @@ if os.path.exists(SEARCH_ENGINES_FILE):
|
|
22
24
|
try:
|
23
25
|
# Load the TOML file directly
|
24
26
|
config_data = toml.load(SEARCH_ENGINES_FILE)
|
25
|
-
|
27
|
+
|
26
28
|
# Extract search engine definitions
|
27
29
|
for key, value in config_data.items():
|
28
30
|
if key == "DEFAULT_SEARCH_ENGINE":
|
29
31
|
DEFAULT_SEARCH_ENGINE = value
|
30
32
|
elif isinstance(value, dict):
|
31
33
|
SEARCH_ENGINES[key] = value
|
32
|
-
|
33
|
-
logger.info(
|
34
|
+
|
35
|
+
logger.info(
|
36
|
+
f"Loaded {len(SEARCH_ENGINES)} search engines from configuration file"
|
37
|
+
)
|
38
|
+
logger.info(f"\n {', '.join(sorted(SEARCH_ENGINES.keys()))} \n")
|
34
39
|
except Exception as e:
|
35
40
|
logger.error(f"Error loading search engines from TOML file: {e}")
|
36
41
|
else:
|
37
|
-
logger.warning(
|
42
|
+
logger.warning(
|
43
|
+
f"Search engines configuration file not found: {SEARCH_ENGINES_FILE}"
|
44
|
+
)
|
38
45
|
|
39
46
|
# Add alias for 'auto' if it exists
|
40
|
-
if
|
41
|
-
SEARCH_ENGINES[
|
47
|
+
if "auto" in SEARCH_ENGINES and "meta" not in SEARCH_ENGINES:
|
48
|
+
SEARCH_ENGINES["meta"] = SEARCH_ENGINES["auto"]
|
42
49
|
|
43
50
|
# Register local document collections
|
44
51
|
|
@@ -52,20 +59,20 @@ if os.path.exists(LOCAL_COLLECTIONS_FILE):
|
|
52
59
|
"module_path": "local_deep_research.web_search_engines.engines.search_engine_local",
|
53
60
|
"class_name": "LocalSearchEngine",
|
54
61
|
"default_params": config,
|
55
|
-
"requires_llm": True
|
62
|
+
"requires_llm": True,
|
56
63
|
}
|
57
|
-
|
64
|
+
|
58
65
|
# Copy these specific fields to the top level if they exist
|
59
66
|
for field in ["strengths", "weaknesses", "reliability", "description"]:
|
60
67
|
if field in config:
|
61
68
|
engine_config[field] = config[field]
|
62
|
-
|
69
|
+
|
63
70
|
SEARCH_ENGINES[collection] = engine_config
|
64
|
-
|
65
|
-
logger.info(
|
71
|
+
|
72
|
+
logger.info("Registered local document collections as search engines")
|
66
73
|
except Exception as e:
|
67
74
|
logger.error(f"Error loading local collections from TOML file: {e}")
|
68
75
|
# Ensure the meta search engine is still available at the end if it exists
|
69
|
-
if
|
76
|
+
if "auto" in SEARCH_ENGINES:
|
70
77
|
meta_config = SEARCH_ENGINES["auto"]
|
71
|
-
SEARCH_ENGINES["auto"] = meta_config
|
78
|
+
SEARCH_ENGINES["auto"] = meta_config
|