local-deep-research 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. local_deep_research/__init__.py +23 -22
  2. local_deep_research/__main__.py +16 -0
  3. local_deep_research/advanced_search_system/__init__.py +7 -0
  4. local_deep_research/advanced_search_system/filters/__init__.py +8 -0
  5. local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
  6. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
  7. local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
  8. local_deep_research/advanced_search_system/findings/repository.py +452 -0
  9. local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
  10. local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
  11. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
  12. local_deep_research/advanced_search_system/questions/__init__.py +1 -0
  13. local_deep_research/advanced_search_system/questions/base_question.py +64 -0
  14. local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
  15. local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
  16. local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
  17. local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
  18. local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
  19. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
  20. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
  21. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
  22. local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
  23. local_deep_research/advanced_search_system/tools/__init__.py +1 -0
  24. local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
  25. local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
  26. local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
  27. local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
  28. local_deep_research/api/__init__.py +5 -5
  29. local_deep_research/api/research_functions.py +154 -160
  30. local_deep_research/app.py +8 -0
  31. local_deep_research/citation_handler.py +25 -16
  32. local_deep_research/{config.py → config/config_files.py} +102 -110
  33. local_deep_research/config/llm_config.py +472 -0
  34. local_deep_research/config/search_config.py +77 -0
  35. local_deep_research/defaults/__init__.py +10 -5
  36. local_deep_research/defaults/main.toml +2 -2
  37. local_deep_research/defaults/search_engines.toml +60 -34
  38. local_deep_research/main.py +121 -19
  39. local_deep_research/migrate_db.py +147 -0
  40. local_deep_research/report_generator.py +87 -45
  41. local_deep_research/search_system.py +153 -283
  42. local_deep_research/setup_data_dir.py +35 -0
  43. local_deep_research/test_migration.py +178 -0
  44. local_deep_research/utilities/__init__.py +0 -0
  45. local_deep_research/utilities/db_utils.py +49 -0
  46. local_deep_research/{utilties → utilities}/enums.py +2 -2
  47. local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
  48. local_deep_research/utilities/search_utilities.py +242 -0
  49. local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
  50. local_deep_research/web/__init__.py +0 -1
  51. local_deep_research/web/app.py +86 -1709
  52. local_deep_research/web/app_factory.py +289 -0
  53. local_deep_research/web/database/README.md +70 -0
  54. local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
  55. local_deep_research/web/database/migrations.py +447 -0
  56. local_deep_research/web/database/models.py +117 -0
  57. local_deep_research/web/database/schema_upgrade.py +107 -0
  58. local_deep_research/web/models/database.py +294 -0
  59. local_deep_research/web/models/settings.py +94 -0
  60. local_deep_research/web/routes/api_routes.py +559 -0
  61. local_deep_research/web/routes/history_routes.py +354 -0
  62. local_deep_research/web/routes/research_routes.py +715 -0
  63. local_deep_research/web/routes/settings_routes.py +1583 -0
  64. local_deep_research/web/services/research_service.py +947 -0
  65. local_deep_research/web/services/resource_service.py +149 -0
  66. local_deep_research/web/services/settings_manager.py +669 -0
  67. local_deep_research/web/services/settings_service.py +187 -0
  68. local_deep_research/web/services/socket_service.py +210 -0
  69. local_deep_research/web/static/css/custom_dropdown.css +277 -0
  70. local_deep_research/web/static/css/settings.css +1223 -0
  71. local_deep_research/web/static/css/styles.css +525 -48
  72. local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
  73. local_deep_research/web/static/js/components/detail.js +348 -0
  74. local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
  75. local_deep_research/web/static/js/components/fallback/ui.js +215 -0
  76. local_deep_research/web/static/js/components/history.js +487 -0
  77. local_deep_research/web/static/js/components/logpanel.js +949 -0
  78. local_deep_research/web/static/js/components/progress.js +1107 -0
  79. local_deep_research/web/static/js/components/research.js +1865 -0
  80. local_deep_research/web/static/js/components/results.js +766 -0
  81. local_deep_research/web/static/js/components/settings.js +3981 -0
  82. local_deep_research/web/static/js/components/settings_sync.js +106 -0
  83. local_deep_research/web/static/js/main.js +226 -0
  84. local_deep_research/web/static/js/services/api.js +253 -0
  85. local_deep_research/web/static/js/services/audio.js +31 -0
  86. local_deep_research/web/static/js/services/formatting.js +119 -0
  87. local_deep_research/web/static/js/services/pdf.js +622 -0
  88. local_deep_research/web/static/js/services/socket.js +882 -0
  89. local_deep_research/web/static/js/services/ui.js +546 -0
  90. local_deep_research/web/templates/base.html +72 -0
  91. local_deep_research/web/templates/components/custom_dropdown.html +47 -0
  92. local_deep_research/web/templates/components/log_panel.html +32 -0
  93. local_deep_research/web/templates/components/mobile_nav.html +22 -0
  94. local_deep_research/web/templates/components/settings_form.html +299 -0
  95. local_deep_research/web/templates/components/sidebar.html +21 -0
  96. local_deep_research/web/templates/pages/details.html +73 -0
  97. local_deep_research/web/templates/pages/history.html +51 -0
  98. local_deep_research/web/templates/pages/progress.html +57 -0
  99. local_deep_research/web/templates/pages/research.html +139 -0
  100. local_deep_research/web/templates/pages/results.html +59 -0
  101. local_deep_research/web/templates/settings_dashboard.html +78 -192
  102. local_deep_research/web/utils/__init__.py +0 -0
  103. local_deep_research/web/utils/formatters.py +76 -0
  104. local_deep_research/web_search_engines/engines/full_search.py +18 -16
  105. local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
  106. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
  107. local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
  108. local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
  109. local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
  110. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
  111. local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
  112. local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
  113. local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
  114. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
  115. local_deep_research/web_search_engines/engines/search_engine_searxng.py +212 -160
  116. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
  117. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
  118. local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
  119. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
  120. local_deep_research/web_search_engines/search_engine_base.py +174 -99
  121. local_deep_research/web_search_engines/search_engine_factory.py +192 -102
  122. local_deep_research/web_search_engines/search_engines_config.py +22 -15
  123. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/METADATA +177 -97
  124. local_deep_research-0.2.2.dist-info/RECORD +135 -0
  125. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/WHEEL +1 -2
  126. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/entry_points.txt +3 -0
  127. local_deep_research/defaults/llm_config.py +0 -338
  128. local_deep_research/utilties/search_utilities.py +0 -114
  129. local_deep_research/web/static/js/app.js +0 -3763
  130. local_deep_research/web/templates/api_keys_config.html +0 -82
  131. local_deep_research/web/templates/collections_config.html +0 -90
  132. local_deep_research/web/templates/index.html +0 -348
  133. local_deep_research/web/templates/llm_config.html +0 -120
  134. local_deep_research/web/templates/main_config.html +0 -89
  135. local_deep_research/web/templates/search_engines_config.html +0 -154
  136. local_deep_research/web/templates/settings.html +0 -519
  137. local_deep_research-0.1.26.dist-info/RECORD +0 -61
  138. local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
  139. /local_deep_research/{utilties → config}/__init__.py +0 -0
  140. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,123 +1,158 @@
1
+ import json
2
+ import logging
1
3
  from abc import ABC, abstractmethod
2
- from typing import Dict, List, Any, Optional
3
- from langchain_core.language_models import BaseLLM
4
4
  from datetime import datetime
5
- import json
6
- from local_deep_research.utilties.search_utilities import remove_think_tags
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ from langchain_core.language_models import BaseLLM
8
+
9
+ from ..config import search_config
7
10
 
8
- import logging
9
11
  logger = logging.getLogger(__name__)
10
12
 
13
+
11
14
  class BaseSearchEngine(ABC):
12
15
  """
13
16
  Abstract base class for search engines with two-phase retrieval capability.
14
17
  Handles common parameters and implements the two-phase search approach.
15
18
  """
16
-
17
- def __init__(self,
18
- llm: Optional[BaseLLM] = None,
19
- max_filtered_results: Optional[int] = None,
20
- max_results: Optional[int] = 10, # Default value if not provided
21
- **kwargs):
19
+
20
+ def __init__(
21
+ self,
22
+ llm: Optional[BaseLLM] = None,
23
+ max_filtered_results: Optional[int] = None,
24
+ max_results: Optional[int] = 10, # Default value if not provided
25
+ **kwargs,
26
+ ):
22
27
  """
23
28
  Initialize the search engine with common parameters.
24
-
29
+
25
30
  Args:
26
31
  llm: Optional language model for relevance filtering
27
32
  max_filtered_results: Maximum number of results to keep after filtering
28
33
  max_results: Maximum number of search results to return
29
34
  **kwargs: Additional engine-specific parameters
30
35
  """
31
- if max_filtered_results == None: max_filtered_results = 5
32
- self.llm = llm # LLM for relevance filtering
33
- self.max_filtered_results = max_filtered_results # Limit filtered results
34
-
35
- # Ensure max_results is never None and is a positive integer
36
+ if max_filtered_results is None:
37
+ max_filtered_results = 5
36
38
  if max_results is None:
37
- self.max_results = 25 # Default if None
38
- else:
39
- self.max_results = max(1, int(max_results))
40
-
39
+ max_results = 10
40
+
41
+ self.llm = llm # LLM for relevance filtering
42
+ self._max_filtered_results = int(max_filtered_results) # Ensure it's an integer
43
+ self._max_results = max(1, int(max_results)) # Ensure it's a positive integer
44
+
45
+ @property
46
+ def max_filtered_results(self) -> int:
47
+ """Get the maximum number of filtered results."""
48
+ return self._max_filtered_results
49
+
50
+ @max_filtered_results.setter
51
+ def max_filtered_results(self, value: int) -> None:
52
+ """Set the maximum number of filtered results."""
53
+ if value is None:
54
+ value = 5
55
+ logger.warning("Setting max_filtered_results to 5")
56
+ self._max_filtered_results = int(value)
57
+
58
+ @property
59
+ def max_results(self) -> int:
60
+ """Get the maximum number of search results."""
61
+ return self._max_results
62
+
63
+ @max_results.setter
64
+ def max_results(self, value: int) -> None:
65
+ """Set the maximum number of search results."""
66
+ if value is None:
67
+ value = 10
68
+ self._max_results = max(1, int(value))
69
+
41
70
  def run(self, query: str) -> List[Dict[str, Any]]:
42
71
  """
43
72
  Run the search engine with a given query, retrieving and filtering results.
44
- This implements a two-phase retrieval approach:
73
+ This implements a two-phase retrieval approach:
45
74
  1. Get preview information for many results
46
75
  2. Filter the previews for relevance
47
76
  3. Get full content for only the relevant results
48
-
77
+
49
78
  Args:
50
79
  query: The search query
51
-
80
+
52
81
  Returns:
53
82
  List of search results with full content (if available)
54
83
  """
55
84
  # Ensure we're measuring time correctly for citation tracking
56
85
 
57
-
58
86
  # Step 1: Get preview information for items
59
87
  previews = self._get_previews(query)
60
88
  if not previews:
61
- logger.info(f"Search engine {self.__class__.__name__} returned no preview results for query: {query}")
89
+ logger.info(
90
+ f"Search engine {self.__class__.__name__} returned no preview results for query: {query}"
91
+ )
62
92
  return []
63
-
93
+
64
94
  # Step 2: Filter previews for relevance with LLM
65
95
  filtered_items = self._filter_for_relevance(previews, query)
66
96
  if not filtered_items:
67
- logger.info(f"All preview results were filtered out as irrelevant for query: {query}")
97
+ logger.info(
98
+ f"All preview results were filtered out as irrelevant for query: {query}"
99
+ )
68
100
  # Do not fall back to previews, return empty list instead
69
101
  return []
70
-
102
+
71
103
  # Step 3: Get full content for filtered items
72
104
  # Import config inside the method to avoid circular import
73
- from local_deep_research import config
74
- if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
105
+
106
+ if (
107
+ hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
108
+ and search_config.SEARCH_SNIPPETS_ONLY
109
+ ):
75
110
  logger.info("Returning snippet-only results as per config")
76
111
  results = filtered_items
77
112
  else:
78
113
  results = self._get_full_content(filtered_items)
79
-
114
+
80
115
  return results
81
-
116
+
82
117
  def invoke(self, query: str) -> List[Dict[str, Any]]:
83
118
  """Compatibility method for LangChain tools"""
84
119
  return self.run(query)
85
-
86
- def _filter_for_relevance(self, previews: List[Dict[str, Any]], query: str) -> List[Dict[str, Any]]:
120
+
121
+ def _filter_for_relevance(
122
+ self, previews: List[Dict[str, Any]], query: str
123
+ ) -> List[Dict[str, Any]]:
87
124
  """
88
- Filter search results for relevance to the query using an LLM.
89
-
90
- Checks config.SKIP_RELEVANCE_FILTER to determine whether to perform filtering.
91
-
125
+ Filter search results by relevance to the query using the LLM.
126
+
92
127
  Args:
93
- previews: List of search result dictionaries with preview information
128
+ previews: List of preview dictionaries
94
129
  query: The original search query
95
-
130
+
96
131
  Returns:
97
- Filtered list of the most relevant search results
132
+ Filtered list of preview dictionaries
98
133
  """
99
- # Import config inside the method to avoid circular import
100
- from local_deep_research import config
101
-
102
- # Skip filtering if configured to do so or if no LLM is available
103
- if hasattr(config, 'SKIP_RELEVANCE_FILTER') and config.SKIP_RELEVANCE_FILTER:
104
- # Return all previews up to max_filtered_results if no filtering is performed
105
- limit = self.max_filtered_results or 5
106
- return previews[:limit]
107
-
108
- # Default implementation uses LLM if available
109
- if not self.llm or not previews:
110
- # If no LLM available, return all previews as relevant
111
- if self.max_filtered_results and len(previews) > self.max_filtered_results:
112
- return previews[:self.max_filtered_results]
134
+ # If no LLM or too few previews, return all
135
+ if not self.llm or len(previews) <= 1:
113
136
  return previews
114
-
115
- now = datetime.now()
116
- current_time = now.strftime("%Y-%m-%d")
137
+
138
+ # Create a simple context for LLM
139
+ preview_context = []
140
+ for i, preview in enumerate(previews):
141
+ title = preview.get("title", "Untitled").strip()
142
+ snippet = preview.get("snippet", "").strip()
143
+
144
+ # Clean up snippet if too long
145
+ if len(snippet) > 300:
146
+ snippet = snippet[:300] + "..."
147
+
148
+ preview_context.append(f"[{i}] Title: {title}\nSnippet: {snippet}")
149
+
150
+ # Set a reasonable limit on context length
151
+ current_date = datetime.now().strftime("%Y-%m-%d")
117
152
  prompt = f"""Analyze these search results and provide a ranked list of the most relevant ones.
118
153
 
119
154
  IMPORTANT: Evaluate and rank based on these criteria (in order of importance):
120
- 1. Timeliness - current/recent information as of {current_time}
155
+ 1. Timeliness - current/recent information as of {current_date}
121
156
  2. Direct relevance to query: "{query}"
122
157
  3. Source reliability (prefer official sources, established websites)
123
158
  4. Factual accuracy (cross-reference major claims)
@@ -130,67 +165,107 @@ Include ONLY indices that meet ALL criteria, with the most relevant first.
130
165
  Example response: [4, 0, 2]
131
166
 
132
167
  Respond with ONLY the JSON array, no other text."""
133
-
168
+
134
169
  try:
135
170
  # Get LLM's evaluation
136
171
  response = self.llm.invoke(prompt)
137
-
138
- # Extract JSON array from response
139
- response_text = remove_think_tags(response.content)
140
- # Clean up response to handle potential formatting issues
172
+
173
+ # Log the raw response for debugging
174
+ logger.info(f"Raw LLM response for relevance filtering: {response}")
175
+
176
+ # Handle different response formats
177
+ response_text = ""
178
+ if hasattr(response, "content"):
179
+ response_text = response.content
180
+ else:
181
+ response_text = str(response)
182
+
183
+ # Clean up response
141
184
  response_text = response_text.strip()
142
-
143
- # Find the first occurrence of '[' and the last occurrence of ']'
144
- start_idx = response_text.find('[')
145
- end_idx = response_text.rfind(']')
146
-
185
+ logger.debug(f"Cleaned response text: {response_text}")
186
+
187
+ # Find JSON array in response
188
+ start_idx = response_text.find("[")
189
+ end_idx = response_text.rfind("]")
190
+
147
191
  if start_idx >= 0 and end_idx > start_idx:
148
- array_text = response_text[start_idx:end_idx+1]
149
- ranked_indices = json.loads(array_text)
150
-
151
- # Return the results in ranked order
152
- ranked_results = []
153
- for idx in ranked_indices:
154
- if idx < len(previews):
155
- ranked_results.append(previews[idx])
156
-
157
- # Limit to max_filtered_results if specified
158
- if self.max_filtered_results and len(ranked_results) > self.max_filtered_results:
159
- logger.info(f"Limiting filtered results to top {self.max_filtered_results}")
160
- return ranked_results[:self.max_filtered_results]
161
-
162
- return ranked_results
192
+ array_text = response_text[start_idx : end_idx + 1]
193
+ try:
194
+ ranked_indices = json.loads(array_text)
195
+
196
+ # Validate that ranked_indices is a list of integers
197
+ if not isinstance(ranked_indices, list):
198
+ logger.warning(
199
+ "LLM response is not a list, returning empty results"
200
+ )
201
+ return []
202
+
203
+ if not all(isinstance(idx, int) for idx in ranked_indices):
204
+ logger.warning(
205
+ "LLM response contains non-integer indices, returning empty results"
206
+ )
207
+ return []
208
+
209
+ # Return the results in ranked order
210
+ ranked_results = []
211
+ for idx in ranked_indices:
212
+ if idx < len(previews):
213
+ ranked_results.append(previews[idx])
214
+ else:
215
+ logger.warning(f"Index {idx} out of range, skipping")
216
+
217
+ # Limit to max_filtered_results if specified
218
+ if (
219
+ self.max_filtered_results
220
+ and len(ranked_results) > self.max_filtered_results
221
+ ):
222
+ logger.info(
223
+ f"Limiting filtered results to top {self.max_filtered_results}"
224
+ )
225
+ return ranked_results[: self.max_filtered_results]
226
+
227
+ return ranked_results
228
+
229
+ except json.JSONDecodeError as e:
230
+ logger.warning(f"Failed to parse JSON from LLM response: {e}")
231
+ logger.debug(f"Problematic JSON text: {array_text}")
232
+ return []
163
233
  else:
164
- logger.info("Could not find JSON array in response, returning no previews")
165
- return []
166
-
234
+ logger.warning(
235
+ "Could not find JSON array in response, returning original previews"
236
+ )
237
+ logger.debug(f"Response text without JSON array: {response_text}")
238
+ return previews[: min(5, len(previews))]
239
+
167
240
  except Exception as e:
168
- logger.info(f"Relevance filtering error: {e}")
169
- # Fall back to returning all previews (or top N) on error
170
- return[]
171
-
241
+ logger.error(f"Relevance filtering error: {e}", exc_info=True)
242
+ # Fall back to returning top results on error
243
+ return previews[: min(5, len(previews))]
244
+
172
245
  @abstractmethod
173
246
  def _get_previews(self, query: str) -> List[Dict[str, Any]]:
174
247
  """
175
248
  Get preview information (titles, summaries) for initial search results.
176
-
249
+
177
250
  Args:
178
251
  query: The search query
179
-
252
+
180
253
  Returns:
181
254
  List of preview dictionaries with at least 'id', 'title', and 'snippet' keys
182
255
  """
183
256
  pass
184
-
257
+
185
258
  @abstractmethod
186
- def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
259
+ def _get_full_content(
260
+ self, relevant_items: List[Dict[str, Any]]
261
+ ) -> List[Dict[str, Any]]:
187
262
  """
188
263
  Get full content for the relevant items.
189
-
264
+
190
265
  Args:
191
266
  relevant_items: List of relevant preview dictionaries
192
-
267
+
193
268
  Returns:
194
269
  List of result dictionaries with full content
195
270
  """
196
- pass
271
+ pass