local-deep-research 0.1.26__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. local_deep_research/__init__.py +23 -22
  2. local_deep_research/__main__.py +16 -0
  3. local_deep_research/advanced_search_system/__init__.py +7 -0
  4. local_deep_research/advanced_search_system/filters/__init__.py +8 -0
  5. local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
  6. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
  7. local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
  8. local_deep_research/advanced_search_system/findings/repository.py +452 -0
  9. local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
  10. local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
  11. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
  12. local_deep_research/advanced_search_system/questions/__init__.py +1 -0
  13. local_deep_research/advanced_search_system/questions/base_question.py +64 -0
  14. local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
  15. local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
  16. local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
  17. local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
  18. local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
  19. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
  20. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
  21. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
  22. local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
  23. local_deep_research/advanced_search_system/tools/__init__.py +1 -0
  24. local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
  25. local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
  26. local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
  27. local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
  28. local_deep_research/api/__init__.py +5 -5
  29. local_deep_research/api/research_functions.py +96 -84
  30. local_deep_research/app.py +8 -0
  31. local_deep_research/citation_handler.py +25 -16
  32. local_deep_research/{config.py → config/config_files.py} +102 -110
  33. local_deep_research/config/llm_config.py +472 -0
  34. local_deep_research/config/search_config.py +77 -0
  35. local_deep_research/defaults/__init__.py +10 -5
  36. local_deep_research/defaults/main.toml +2 -2
  37. local_deep_research/defaults/search_engines.toml +60 -34
  38. local_deep_research/main.py +121 -19
  39. local_deep_research/migrate_db.py +147 -0
  40. local_deep_research/report_generator.py +72 -44
  41. local_deep_research/search_system.py +147 -283
  42. local_deep_research/setup_data_dir.py +35 -0
  43. local_deep_research/test_migration.py +178 -0
  44. local_deep_research/utilities/__init__.py +0 -0
  45. local_deep_research/utilities/db_utils.py +49 -0
  46. local_deep_research/{utilties → utilities}/enums.py +2 -2
  47. local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
  48. local_deep_research/utilities/search_utilities.py +242 -0
  49. local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
  50. local_deep_research/web/__init__.py +0 -1
  51. local_deep_research/web/app.py +86 -1709
  52. local_deep_research/web/app_factory.py +289 -0
  53. local_deep_research/web/database/README.md +70 -0
  54. local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
  55. local_deep_research/web/database/migrations.py +447 -0
  56. local_deep_research/web/database/models.py +117 -0
  57. local_deep_research/web/database/schema_upgrade.py +107 -0
  58. local_deep_research/web/models/database.py +294 -0
  59. local_deep_research/web/models/settings.py +94 -0
  60. local_deep_research/web/routes/api_routes.py +559 -0
  61. local_deep_research/web/routes/history_routes.py +354 -0
  62. local_deep_research/web/routes/research_routes.py +715 -0
  63. local_deep_research/web/routes/settings_routes.py +1592 -0
  64. local_deep_research/web/services/research_service.py +947 -0
  65. local_deep_research/web/services/resource_service.py +149 -0
  66. local_deep_research/web/services/settings_manager.py +669 -0
  67. local_deep_research/web/services/settings_service.py +187 -0
  68. local_deep_research/web/services/socket_service.py +210 -0
  69. local_deep_research/web/static/css/custom_dropdown.css +277 -0
  70. local_deep_research/web/static/css/settings.css +1223 -0
  71. local_deep_research/web/static/css/styles.css +525 -48
  72. local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
  73. local_deep_research/web/static/js/components/detail.js +348 -0
  74. local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
  75. local_deep_research/web/static/js/components/fallback/ui.js +215 -0
  76. local_deep_research/web/static/js/components/history.js +487 -0
  77. local_deep_research/web/static/js/components/logpanel.js +949 -0
  78. local_deep_research/web/static/js/components/progress.js +1107 -0
  79. local_deep_research/web/static/js/components/research.js +1865 -0
  80. local_deep_research/web/static/js/components/results.js +766 -0
  81. local_deep_research/web/static/js/components/settings.js +3981 -0
  82. local_deep_research/web/static/js/components/settings_sync.js +106 -0
  83. local_deep_research/web/static/js/main.js +226 -0
  84. local_deep_research/web/static/js/services/api.js +253 -0
  85. local_deep_research/web/static/js/services/audio.js +31 -0
  86. local_deep_research/web/static/js/services/formatting.js +119 -0
  87. local_deep_research/web/static/js/services/pdf.js +622 -0
  88. local_deep_research/web/static/js/services/socket.js +882 -0
  89. local_deep_research/web/static/js/services/ui.js +546 -0
  90. local_deep_research/web/templates/base.html +72 -0
  91. local_deep_research/web/templates/components/custom_dropdown.html +47 -0
  92. local_deep_research/web/templates/components/log_panel.html +32 -0
  93. local_deep_research/web/templates/components/mobile_nav.html +22 -0
  94. local_deep_research/web/templates/components/settings_form.html +299 -0
  95. local_deep_research/web/templates/components/sidebar.html +21 -0
  96. local_deep_research/web/templates/pages/details.html +73 -0
  97. local_deep_research/web/templates/pages/history.html +51 -0
  98. local_deep_research/web/templates/pages/progress.html +57 -0
  99. local_deep_research/web/templates/pages/research.html +139 -0
  100. local_deep_research/web/templates/pages/results.html +59 -0
  101. local_deep_research/web/templates/settings_dashboard.html +78 -192
  102. local_deep_research/web/utils/__init__.py +0 -0
  103. local_deep_research/web/utils/formatters.py +76 -0
  104. local_deep_research/web_search_engines/engines/full_search.py +18 -16
  105. local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
  106. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
  107. local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
  108. local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
  109. local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
  110. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
  111. local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
  112. local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
  113. local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
  114. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
  115. local_deep_research/web_search_engines/engines/search_engine_searxng.py +211 -159
  116. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
  117. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
  118. local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
  119. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
  120. local_deep_research/web_search_engines/search_engine_base.py +174 -99
  121. local_deep_research/web_search_engines/search_engine_factory.py +192 -102
  122. local_deep_research/web_search_engines/search_engines_config.py +22 -15
  123. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/METADATA +177 -97
  124. local_deep_research-0.2.0.dist-info/RECORD +135 -0
  125. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/WHEEL +1 -2
  126. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/entry_points.txt +3 -0
  127. local_deep_research/defaults/llm_config.py +0 -338
  128. local_deep_research/utilties/search_utilities.py +0 -114
  129. local_deep_research/web/static/js/app.js +0 -3763
  130. local_deep_research/web/templates/api_keys_config.html +0 -82
  131. local_deep_research/web/templates/collections_config.html +0 -90
  132. local_deep_research/web/templates/index.html +0 -348
  133. local_deep_research/web/templates/llm_config.html +0 -120
  134. local_deep_research/web/templates/main_config.html +0 -89
  135. local_deep_research/web/templates/search_engines_config.html +0 -154
  136. local_deep_research/web/templates/settings.html +0 -519
  137. local_deep_research-0.1.26.dist-info/RECORD +0 -61
  138. local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
  139. /local_deep_research/{utilties → config}/__init__.py +0 -0
  140. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,13 +1,17 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ from datetime import datetime
5
+ from typing import Dict, List
6
+
1
7
  import justext
2
8
  from langchain_community.document_loaders import AsyncChromiumLoader
3
9
  from langchain_community.document_transformers import BeautifulSoupTransformer
4
10
  from langchain_core.language_models import BaseLLM
5
- from typing import List, Dict
6
- import json, os
7
- from .utilties.search_utilities import remove_think_tags
8
- from datetime import datetime
9
- from local_deep_research import config
10
- import logging
11
+
12
+ from ...config.search_config import QUALITY_CHECK_DDG_URLS
13
+ from ...utilities.search_utilities import remove_think_tags
14
+
11
15
  logger = logging.getLogger(__name__)
12
16
 
13
17
 
@@ -15,14 +19,13 @@ class FullSearchResults:
15
19
  def __init__(
16
20
  self,
17
21
  llm: BaseLLM, # Add LLM parameter
18
- web_search: list,
22
+ web_search: list,
19
23
  output_format: str = "list",
20
24
  language: str = "English",
21
25
  max_results: int = 10,
22
26
  region: str = "wt-wt",
23
27
  time: str = "y",
24
- safesearch: str = "Moderate"
25
-
28
+ safesearch: str = "Moderate",
26
29
  ):
27
30
  self.llm = llm
28
31
  self.output_format = output_format
@@ -31,10 +34,9 @@ class FullSearchResults:
31
34
  self.region = region
32
35
  self.time = time
33
36
  self.safesearch = safesearch
34
- self.web_search =web_search
37
+ self.web_search = web_search
35
38
  os.environ["USER_AGENT"] = "Local Deep Research/1.0"
36
39
 
37
-
38
40
  self.bs_transformer = BeautifulSoupTransformer()
39
41
  self.tags_to_extract = ["p", "div", "span"]
40
42
 
@@ -54,7 +56,7 @@ class FullSearchResults:
54
56
  {results}
55
57
 
56
58
  Return a JSON array of indices (0-based) for sources that meet ALL criteria.
57
- ONLY Return a JSON array of indices (0-based) and nothing else. No letters.
59
+ ONLY Return a JSON array of indices (0-based) and nothing else. No letters.
58
60
  Example response: \n[0, 2, 4]\n\n"""
59
61
 
60
62
  try:
@@ -66,7 +68,7 @@ class FullSearchResults:
66
68
  return [r for i, r in enumerate(results) if i in good_indices]
67
69
  except Exception as e:
68
70
  logger.error(f"URL filtering error: {e}")
69
- return []
71
+ return []
70
72
 
71
73
  def remove_boilerplate(self, html: str) -> str:
72
74
  if not html or not html.strip():
@@ -77,13 +79,13 @@ class FullSearchResults:
77
79
 
78
80
  def run(self, query: str):
79
81
  nr_full_text = 0
80
- # Step 1: Get search results
82
+ # Step 1: Get search results
81
83
  search_results = self.web_search.invoke(query)
82
84
  if not isinstance(search_results, list):
83
85
  raise ValueError("Expected the search results in list format.")
84
86
 
85
87
  # Step 2: Filter URLs using LLM
86
- if config.QUALITY_CHECK_DDG_URLS:
88
+ if QUALITY_CHECK_DDG_URLS:
87
89
  filtered_results = self.check_urls(search_results, query)
88
90
  else:
89
91
  filtered_results = search_results
@@ -126,4 +128,4 @@ class FullSearchResults:
126
128
  return self.run(query)
127
129
 
128
130
  def __call__(self, query: str):
129
- return self.invoke(query)
131
+ return self.invoke(query)
@@ -1,12 +1,13 @@
1
1
  import logging
2
2
  import os
3
- from typing import Dict, List, Any, Optional
3
+ from typing import Any, Dict, List, Optional
4
4
 
5
- from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
6
- from local_deep_research.web_search_engines.search_engines_config import SEARCH_ENGINES
7
- from local_deep_research.web_search_engines.search_engine_factory import create_search_engine
8
- from local_deep_research.web_search_engines.engines.search_engine_wikipedia import WikipediaSearchEngine
9
- from local_deep_research import config
5
+ from ...config import search_config
6
+ from ...web.services.socket_service import emit_socket_event
7
+ from ..search_engine_base import BaseSearchEngine
8
+ from ..search_engine_factory import create_search_engine
9
+ from ..search_engines_config import SEARCH_ENGINES
10
+ from .search_engine_wikipedia import WikipediaSearchEngine
10
11
 
11
12
  # Setup logging
12
13
  logging.basicConfig(level=logging.INFO)
@@ -18,17 +19,20 @@ class MetaSearchEngine(BaseSearchEngine):
18
19
  LLM-powered meta search engine that intelligently selects and uses
19
20
  the appropriate search engines based on query analysis
20
21
  """
21
-
22
- def __init__(self,
23
- llm,
24
- max_results: int = 10,
25
- use_api_key_services: bool = True,
26
- max_engines_to_try: int = 3,
27
- max_filtered_results: Optional[int] = None,
28
- **kwargs):
22
+
23
+ def __init__(
24
+ self,
25
+ llm,
26
+ max_results: int = 10,
27
+ use_api_key_services: bool = True,
28
+ max_engines_to_try: int = 3,
29
+ max_filtered_results: Optional[int] = None,
30
+ engine_selection_callback=None,
31
+ **kwargs,
32
+ ):
29
33
  """
30
34
  Initialize the meta search engine.
31
-
35
+
32
36
  Args:
33
37
  llm: Language model instance for query classification and relevance filtering
34
38
  max_results: Maximum number of search results to return
@@ -37,247 +41,294 @@ class MetaSearchEngine(BaseSearchEngine):
37
41
  max_filtered_results: Maximum number of results to keep after filtering
38
42
  **kwargs: Additional parameters (ignored but accepted for compatibility)
39
43
  """
40
- # Initialize the BaseSearchEngine with the LLM and max_filtered_results
41
- super().__init__(llm=llm, max_filtered_results=max_filtered_results)
42
-
43
- self.max_results = max_results
44
+ # Initialize the BaseSearchEngine with the LLM, max_filtered_results, and max_results
45
+ super().__init__(
46
+ llm=llm, max_filtered_results=max_filtered_results, max_results=max_results
47
+ )
48
+
44
49
  self.use_api_key_services = use_api_key_services
45
50
  self.max_engines_to_try = max_engines_to_try
46
-
51
+
47
52
  # Cache for engine instances
48
53
  self.engine_cache = {}
49
-
54
+
50
55
  # Get available engines (excluding 'meta' and 'auto')
51
56
  self.available_engines = self._get_available_engines()
52
- logger.info(f"Meta Search Engine initialized with {len(self.available_engines)} available engines: {', '.join(self.available_engines)}")
53
-
57
+ logger.info(
58
+ f"Meta Search Engine initialized with {len(self.available_engines)} available engines: {', '.join(self.available_engines)}"
59
+ )
60
+
54
61
  # Create a fallback engine in case everything else fails
55
62
  self.fallback_engine = WikipediaSearchEngine(
56
- max_results=max_results,
63
+ max_results=self.max_results,
57
64
  llm=llm,
58
- max_filtered_results=max_filtered_results
65
+ max_filtered_results=max_filtered_results,
59
66
  )
60
-
67
+
61
68
  def _get_available_engines(self) -> List[str]:
62
69
  """Get list of available engines, excluding 'meta' and 'auto'"""
63
70
  # Filter out 'meta' and 'auto' and check API key availability
64
71
  available = []
65
- for name, config in SEARCH_ENGINES.items():
72
+ for name, config_ in SEARCH_ENGINES.items():
66
73
  if name in ["meta", "auto"]:
67
74
  continue
68
-
69
- if config.get("requires_api_key", False) and not self.use_api_key_services:
75
+
76
+ if config_.get("requires_api_key", False) and not self.use_api_key_services:
70
77
  continue
71
-
72
- if config.get("requires_api_key", False):
73
- api_key_env = config.get("api_key_env")
78
+
79
+ if config_.get("requires_api_key", False):
80
+ api_key_env = config_.get("api_key_env")
74
81
  api_key = os.getenv(api_key_env) if api_key_env else None
75
82
  if not api_key:
76
83
  continue
77
-
84
+
78
85
  available.append(name)
79
-
86
+
80
87
  # Make sure we have at least one engine available
81
88
  if not available and "wikipedia" in SEARCH_ENGINES:
82
89
  available.append("wikipedia")
83
-
90
+
84
91
  return available
85
-
92
+
86
93
  def analyze_query(self, query: str) -> List[str]:
87
94
  """
88
- Use the LLM to analyze the query and return a ranked list of
89
- recommended search engines to try
95
+ Analyze the query to determine the best search engines to use.
96
+
97
+ Args:
98
+ query: The search query
99
+
100
+ Returns:
101
+ List of search engine names sorted by suitability
90
102
  """
91
- if not self.available_engines:
92
- logger.warning("No search engines available")
93
- return []
94
- engine_descriptions = []
95
- for name in self.available_engines:
96
- logger.info(f"Processing search engine: {name}")
97
- try:
98
- description = f"- {name.upper()}: Good for {', '.join(SEARCH_ENGINES[name]['strengths'][:3])}. " \
99
- f"Weaknesses: {', '.join(SEARCH_ENGINES[name]['weaknesses'][:2])}. " \
100
- f"Reliability: {SEARCH_ENGINES[name]['reliability']*100:.0f}%"
101
- engine_descriptions.append(description)
102
- except KeyError as e:
103
- logger.error(f"Missing key for engine {name}: {e}")
104
- # Add a basic description for engines with missing configuration
105
- engine_descriptions.append(f"- {name.upper()}: General purpose search engine.")
106
- except Exception as e:
107
- logger.error(f"Error processing engine {name}: {e}")
108
- engine_descriptions.append(f"- {name.upper()}: General purpose search engine.")
103
+ try:
104
+ # Check if the LLM is available to help select engines
105
+ if not self.llm:
106
+ logger.warning(
107
+ "No LLM available for query analysis, using default engines"
108
+ )
109
+ # Return engines sorted by reliability
110
+ return sorted(
111
+ self.available_engines,
112
+ key=lambda x: SEARCH_ENGINES.get(x, {}).get("reliability", 0),
113
+ reverse=True,
114
+ )
109
115
 
110
- engine_descriptions = "\n".join(engine_descriptions)
111
-
112
- prompt = f"""Analyze this search query and rank the available search engines in order of most to least appropriate for answering it.
113
-
114
- Query: "{query}"
116
+ # Create a prompt that outlines the available search engines and their strengths
117
+ engines_info = []
118
+ for engine_name in self.available_engines:
119
+ try:
120
+ if engine_name in SEARCH_ENGINES:
121
+ strengths = SEARCH_ENGINES[engine_name].get(
122
+ "strengths", "General search"
123
+ )
124
+ weaknesses = SEARCH_ENGINES[engine_name].get(
125
+ "weaknesses", "None specified"
126
+ )
127
+ description = SEARCH_ENGINES[engine_name].get(
128
+ "description", engine_name
129
+ )
130
+ engines_info.append(
131
+ f"- {engine_name}: {description}\n Strengths: {strengths}\n Weaknesses: {weaknesses}"
132
+ )
133
+ except KeyError as e:
134
+ logger.error(f"Missing key for engine {engine_name}: {str(e)}")
115
135
 
116
- Available search engines:
117
- {engine_descriptions}
136
+ prompt = f"""You are a search query analyst. Consider this search query:
118
137
 
119
- Consider:
120
- 1. The nature of the query (factual, academic, product-related, news, etc.)
121
- 2. The strengths and weaknesses of each engine
122
- 3. The reliability of each engine
138
+ QUERY: {query}
123
139
 
124
- Return ONLY a comma-separated list of search engine names in your recommended order. Example: "wikipedia,arxiv,duckduckgo"
125
- Do not include any engines that are not listed above. Only return the comma-separated list, nothing else."""
140
+ I have these search engines available:
141
+ {chr(10).join(engines_info)}
126
142
 
127
- # Get response from LLM
128
- try:
143
+ Determine which search engines would be most appropriate for answering this query.
144
+ First analyze the nature of the query (factual, scientific, code-related, etc.)
145
+ Then select the 1-3 most appropriate search engines for this type of query.
146
+
147
+ Output ONLY a comma-separated list of the search engine names in order of most appropriate to least appropriate.
148
+ Example output: wikipedia,arxiv,github"""
149
+
150
+ # Get analysis from LLM
129
151
  response = self.llm.invoke(prompt)
130
- content = response.content.strip()
131
-
132
- # Parse the response into a list of engine names
133
- engine_names = [name.strip().lower() for name in content.split(',')]
134
-
135
- # Filter out any invalid engine names
136
- valid_engines = [name for name in engine_names if name in self.available_engines]
137
-
152
+
153
+ # Handle different response formats
154
+ if hasattr(response, "content"):
155
+ content = response.content.strip()
156
+ else:
157
+ content = str(response).strip()
158
+
159
+ # Extract engine names
160
+ valid_engines = []
161
+ for engine_name in content.split(","):
162
+ cleaned_name = engine_name.strip().lower()
163
+ if cleaned_name in self.available_engines:
164
+ valid_engines.append(cleaned_name)
165
+
138
166
  # If no valid engines were returned, use default order based on reliability
139
167
  if not valid_engines:
140
168
  valid_engines = sorted(
141
- self.available_engines,
142
- key=lambda x: SEARCH_ENGINES[x]["reliability"],
143
- reverse=True
169
+ self.available_engines,
170
+ key=lambda x: SEARCH_ENGINES.get(x, {}).get("reliability", 0),
171
+ reverse=True,
144
172
  )
145
-
173
+
146
174
  return valid_engines
147
175
  except Exception as e:
148
176
  logger.error(f"Error analyzing query with LLM: {str(e)}")
149
177
  # Fall back to reliability-based ordering
150
178
  return sorted(
151
- self.available_engines,
152
- key=lambda x: SEARCH_ENGINES[x]["reliability"],
153
- reverse=True
179
+ self.available_engines,
180
+ key=lambda x: SEARCH_ENGINES.get(x, {}).get("reliability", 0),
181
+ reverse=True,
154
182
  )
155
-
183
+
156
184
  def _get_previews(self, query: str) -> List[Dict[str, Any]]:
157
185
  """
158
186
  Get preview information by selecting the best search engine for this query.
159
-
187
+
160
188
  Args:
161
189
  query: The search query
162
-
190
+
163
191
  Returns:
164
192
  List of preview dictionaries
165
193
  """
166
194
  # Get ranked list of engines for this query
167
195
  ranked_engines = self.analyze_query(query)
168
-
196
+
169
197
  if not ranked_engines:
170
- logger.warning("No suitable search engines found for query, using fallback engine")
198
+ logger.warning(
199
+ "No suitable search engines found for query, using fallback engine"
200
+ )
171
201
  return self.fallback_engine._get_previews(query)
172
-
202
+
173
203
  # Limit the number of engines to try
174
- engines_to_try = ranked_engines[:self.max_engines_to_try]
175
-
176
- logger.info(f"Search plan created. Will try these engines in order: {', '.join(engines_to_try)}")
177
-
204
+ engines_to_try = ranked_engines[: self.max_engines_to_try]
205
+ logger.info(
206
+ f"SEARCH_PLAN: Will try these engines in order: {', '.join(engines_to_try)}"
207
+ )
208
+
178
209
  all_errors = []
179
210
  # Try each engine in order
180
211
  for engine_name in engines_to_try:
181
212
  logger.info(f"Trying search engine: {engine_name}")
182
-
213
+
183
214
  # Get or create the engine instance
184
215
  engine = self._get_engine_instance(engine_name)
185
-
216
+
186
217
  if not engine:
187
218
  logger.warning(f"Failed to initialize {engine_name}, skipping")
188
219
  all_errors.append(f"Failed to initialize {engine_name}")
189
220
  continue
190
-
221
+
191
222
  try:
192
223
  # Get previews from this engine
193
224
  previews = engine._get_previews(query)
194
-
225
+
195
226
  # If search was successful, return results
196
227
  if previews and len(previews) > 0:
197
- logger.info(f"Successfully got {len(previews)} preview results from {engine_name}")
228
+ logger.info(f"ENGINE_SELECTED: {engine_name}")
229
+ logger.info(
230
+ f"Successfully got {len(previews)} preview results from {engine_name}"
231
+ )
198
232
  # Store selected engine for later use
199
233
  self._selected_engine = engine
200
234
  self._selected_engine_name = engine_name
235
+
236
+ # Emit a socket event to inform about the selected engine
237
+ try:
238
+ emit_socket_event(
239
+ "search_engine_selected",
240
+ {"engine": engine_name, "result_count": len(previews)},
241
+ )
242
+ except Exception as socket_error:
243
+ logger.error(
244
+ f"Socket emit error (non-critical): {str(socket_error)}"
245
+ )
246
+
201
247
  return previews
202
-
248
+
203
249
  logger.info(f"{engine_name} returned no previews")
204
250
  all_errors.append(f"{engine_name} returned no previews")
205
-
251
+
206
252
  except Exception as e:
207
253
  error_msg = f"Error getting previews from {engine_name}: {str(e)}"
208
254
  logger.error(error_msg)
209
255
  all_errors.append(error_msg)
210
-
256
+
211
257
  # If we reach here, all engines failed, use fallback
212
- logger.warning(f"All engines failed or returned no preview results: {', '.join(all_errors)}")
258
+ logger.warning(
259
+ f"All engines failed or returned no preview results: {', '.join(all_errors)}"
260
+ )
213
261
  logger.info("Using fallback Wikipedia engine for previews")
214
262
  self._selected_engine = self.fallback_engine
215
263
  self._selected_engine_name = "wikipedia"
216
264
  return self.fallback_engine._get_previews(query)
217
-
218
- def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
265
+
266
+ def _get_full_content(
267
+ self, relevant_items: List[Dict[str, Any]]
268
+ ) -> List[Dict[str, Any]]:
219
269
  """
220
270
  Get full content using the engine that provided the previews.
221
-
271
+
222
272
  Args:
223
273
  relevant_items: List of relevant preview dictionaries
224
-
274
+
225
275
  Returns:
226
276
  List of result dictionaries with full content
227
277
  """
228
278
  # Check if we should get full content
229
- if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
279
+ if (
280
+ hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
281
+ and search_config.SEARCH_SNIPPETS_ONLY
282
+ ):
230
283
  logger.info("Snippet-only mode, skipping full content retrieval")
231
284
  return relevant_items
232
-
285
+
233
286
  logger.info("Getting full content for relevant items")
234
-
287
+
235
288
  # Use the selected engine to get full content
236
- if hasattr(self, '_selected_engine'):
289
+ if hasattr(self, "_selected_engine"):
237
290
  try:
238
291
  logger.info(f"Using {self._selected_engine_name} to get full content")
239
292
  return self._selected_engine._get_full_content(relevant_items)
240
293
  except Exception as e:
241
- logger.error(f"Error getting full content from {self._selected_engine_name}: {str(e)}")
294
+ logger.error(
295
+ f"Error getting full content from {self._selected_engine_name}: {str(e)}"
296
+ )
242
297
  # Fall back to returning relevant items without full content
243
298
  return relevant_items
244
299
  else:
245
- logger.warning("No engine was selected during preview phase, returning relevant items as-is")
300
+ logger.warning(
301
+ "No engine was selected during preview phase, returning relevant items as-is"
302
+ )
246
303
  return relevant_items
247
-
304
+
248
305
  def _get_engine_instance(self, engine_name: str) -> Optional[BaseSearchEngine]:
249
306
  """Get or create an instance of the specified search engine"""
250
307
  # Return cached instance if available
251
308
  if engine_name in self.engine_cache:
252
309
  return self.engine_cache[engine_name]
253
-
310
+
254
311
  # Create a new instance
255
312
  engine = None
256
313
  try:
257
314
  # Only pass parameters that all engines accept
258
- common_params = {
259
- "llm": self.llm,
260
- "max_results": self.max_results
261
- }
262
-
315
+ common_params = {"llm": self.llm, "max_results": self.max_results}
316
+
263
317
  # Add max_filtered_results if specified
264
318
  if self.max_filtered_results is not None:
265
319
  common_params["max_filtered_results"] = self.max_filtered_results
266
-
267
- engine = create_search_engine(
268
- engine_name,
269
- **common_params
270
- )
320
+
321
+ engine = create_search_engine(engine_name, **common_params)
271
322
  except Exception as e:
272
323
  logger.error(f"Error creating engine instance for {engine_name}: {str(e)}")
273
324
  return None
274
-
325
+
275
326
  if engine:
276
327
  # Cache the instance
277
328
  self.engine_cache[engine_name] = engine
278
-
329
+
279
330
  return engine
280
-
331
+
281
332
  def invoke(self, query: str) -> List[Dict[str, Any]]:
282
333
  """Compatibility method for LangChain tools"""
283
- return self.run(query)
334
+ return self.run(query)