local-deep-research 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. local_deep_research/__init__.py +23 -22
  2. local_deep_research/__main__.py +16 -0
  3. local_deep_research/advanced_search_system/__init__.py +7 -0
  4. local_deep_research/advanced_search_system/filters/__init__.py +8 -0
  5. local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
  6. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
  7. local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
  8. local_deep_research/advanced_search_system/findings/repository.py +452 -0
  9. local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
  10. local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
  11. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
  12. local_deep_research/advanced_search_system/questions/__init__.py +1 -0
  13. local_deep_research/advanced_search_system/questions/base_question.py +64 -0
  14. local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
  15. local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
  16. local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
  17. local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
  18. local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
  19. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
  20. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
  21. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
  22. local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
  23. local_deep_research/advanced_search_system/tools/__init__.py +1 -0
  24. local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
  25. local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
  26. local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
  27. local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
  28. local_deep_research/api/__init__.py +5 -5
  29. local_deep_research/api/research_functions.py +154 -160
  30. local_deep_research/app.py +8 -0
  31. local_deep_research/citation_handler.py +25 -16
  32. local_deep_research/{config.py → config/config_files.py} +102 -110
  33. local_deep_research/config/llm_config.py +472 -0
  34. local_deep_research/config/search_config.py +77 -0
  35. local_deep_research/defaults/__init__.py +10 -5
  36. local_deep_research/defaults/main.toml +2 -2
  37. local_deep_research/defaults/search_engines.toml +60 -34
  38. local_deep_research/main.py +121 -19
  39. local_deep_research/migrate_db.py +147 -0
  40. local_deep_research/report_generator.py +87 -45
  41. local_deep_research/search_system.py +153 -283
  42. local_deep_research/setup_data_dir.py +35 -0
  43. local_deep_research/test_migration.py +178 -0
  44. local_deep_research/utilities/__init__.py +0 -0
  45. local_deep_research/utilities/db_utils.py +49 -0
  46. local_deep_research/{utilties → utilities}/enums.py +2 -2
  47. local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
  48. local_deep_research/utilities/search_utilities.py +242 -0
  49. local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
  50. local_deep_research/web/__init__.py +0 -1
  51. local_deep_research/web/app.py +86 -1709
  52. local_deep_research/web/app_factory.py +289 -0
  53. local_deep_research/web/database/README.md +70 -0
  54. local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
  55. local_deep_research/web/database/migrations.py +447 -0
  56. local_deep_research/web/database/models.py +117 -0
  57. local_deep_research/web/database/schema_upgrade.py +107 -0
  58. local_deep_research/web/models/database.py +294 -0
  59. local_deep_research/web/models/settings.py +94 -0
  60. local_deep_research/web/routes/api_routes.py +559 -0
  61. local_deep_research/web/routes/history_routes.py +354 -0
  62. local_deep_research/web/routes/research_routes.py +715 -0
  63. local_deep_research/web/routes/settings_routes.py +1583 -0
  64. local_deep_research/web/services/research_service.py +947 -0
  65. local_deep_research/web/services/resource_service.py +149 -0
  66. local_deep_research/web/services/settings_manager.py +669 -0
  67. local_deep_research/web/services/settings_service.py +187 -0
  68. local_deep_research/web/services/socket_service.py +210 -0
  69. local_deep_research/web/static/css/custom_dropdown.css +277 -0
  70. local_deep_research/web/static/css/settings.css +1223 -0
  71. local_deep_research/web/static/css/styles.css +525 -48
  72. local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
  73. local_deep_research/web/static/js/components/detail.js +348 -0
  74. local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
  75. local_deep_research/web/static/js/components/fallback/ui.js +215 -0
  76. local_deep_research/web/static/js/components/history.js +487 -0
  77. local_deep_research/web/static/js/components/logpanel.js +949 -0
  78. local_deep_research/web/static/js/components/progress.js +1107 -0
  79. local_deep_research/web/static/js/components/research.js +1865 -0
  80. local_deep_research/web/static/js/components/results.js +766 -0
  81. local_deep_research/web/static/js/components/settings.js +3981 -0
  82. local_deep_research/web/static/js/components/settings_sync.js +106 -0
  83. local_deep_research/web/static/js/main.js +226 -0
  84. local_deep_research/web/static/js/services/api.js +253 -0
  85. local_deep_research/web/static/js/services/audio.js +31 -0
  86. local_deep_research/web/static/js/services/formatting.js +119 -0
  87. local_deep_research/web/static/js/services/pdf.js +622 -0
  88. local_deep_research/web/static/js/services/socket.js +882 -0
  89. local_deep_research/web/static/js/services/ui.js +546 -0
  90. local_deep_research/web/templates/base.html +72 -0
  91. local_deep_research/web/templates/components/custom_dropdown.html +47 -0
  92. local_deep_research/web/templates/components/log_panel.html +32 -0
  93. local_deep_research/web/templates/components/mobile_nav.html +22 -0
  94. local_deep_research/web/templates/components/settings_form.html +299 -0
  95. local_deep_research/web/templates/components/sidebar.html +21 -0
  96. local_deep_research/web/templates/pages/details.html +73 -0
  97. local_deep_research/web/templates/pages/history.html +51 -0
  98. local_deep_research/web/templates/pages/progress.html +57 -0
  99. local_deep_research/web/templates/pages/research.html +139 -0
  100. local_deep_research/web/templates/pages/results.html +59 -0
  101. local_deep_research/web/templates/settings_dashboard.html +78 -192
  102. local_deep_research/web/utils/__init__.py +0 -0
  103. local_deep_research/web/utils/formatters.py +76 -0
  104. local_deep_research/web_search_engines/engines/full_search.py +18 -16
  105. local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
  106. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
  107. local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
  108. local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
  109. local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
  110. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
  111. local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
  112. local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
  113. local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
  114. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
  115. local_deep_research/web_search_engines/engines/search_engine_searxng.py +212 -160
  116. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
  117. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
  118. local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
  119. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
  120. local_deep_research/web_search_engines/search_engine_base.py +174 -99
  121. local_deep_research/web_search_engines/search_engine_factory.py +192 -102
  122. local_deep_research/web_search_engines/search_engines_config.py +22 -15
  123. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/METADATA +177 -97
  124. local_deep_research-0.2.2.dist-info/RECORD +135 -0
  125. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/WHEEL +1 -2
  126. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/entry_points.txt +3 -0
  127. local_deep_research/defaults/llm_config.py +0 -338
  128. local_deep_research/utilties/search_utilities.py +0 -114
  129. local_deep_research/web/static/js/app.js +0 -3763
  130. local_deep_research/web/templates/api_keys_config.html +0 -82
  131. local_deep_research/web/templates/collections_config.html +0 -90
  132. local_deep_research/web/templates/index.html +0 -348
  133. local_deep_research/web/templates/llm_config.html +0 -120
  134. local_deep_research/web/templates/main_config.html +0 -89
  135. local_deep_research/web/templates/search_engines_config.html +0 -154
  136. local_deep_research/web/templates/settings.html +0 -519
  137. local_deep_research-0.1.26.dist-info/RECORD +0 -61
  138. local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
  139. /local_deep_research/{utilties → config}/__init__.py +0 -0
  140. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,27 +1,32 @@
1
- from typing import Dict, List, Any, Optional
2
- from langchain_core.language_models import BaseLLM
1
+ import logging
2
+ from typing import Any, Dict, List, Optional
3
3
 
4
- from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
5
- from local_deep_research import config
6
4
  import arxiv
7
- import logging
5
+ from langchain_core.language_models import BaseLLM
6
+
7
+ from ...config import search_config
8
+ from ..search_engine_base import BaseSearchEngine
9
+
8
10
  logger = logging.getLogger(__name__)
9
11
 
12
+
10
13
  class ArXivSearchEngine(BaseSearchEngine):
11
14
  """arXiv search engine implementation with two-phase approach"""
12
-
13
- def __init__(self,
14
- max_results: int = 10,
15
- sort_by: str = "relevance",
16
- sort_order: str = "descending",
17
- include_full_text: bool = False,
18
- download_dir: Optional[str] = None,
19
- max_full_text: int = 1,
20
- llm: Optional[BaseLLM] = None,
21
- max_filtered_results: Optional[int] = None): # Added this parameter
15
+
16
+ def __init__(
17
+ self,
18
+ max_results: int = 10,
19
+ sort_by: str = "relevance",
20
+ sort_order: str = "descending",
21
+ include_full_text: bool = False,
22
+ download_dir: Optional[str] = None,
23
+ max_full_text: int = 1,
24
+ llm: Optional[BaseLLM] = None,
25
+ max_filtered_results: Optional[int] = None,
26
+ ): # Added this parameter
22
27
  """
23
28
  Initialize the arXiv search engine.
24
-
29
+
25
30
  Args:
26
31
  max_results: Maximum number of search results
27
32
  sort_by: Sorting criteria ('relevance', 'lastUpdatedDate', or 'submittedDate')
@@ -33,77 +38,81 @@ class ArXivSearchEngine(BaseSearchEngine):
33
38
  max_filtered_results: Maximum number of results to keep after filtering
34
39
  """
35
40
  # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
36
- super().__init__(llm=llm, max_filtered_results=max_filtered_results, max_results=max_results)
37
- self.max_results=max(self.max_results,25)
41
+ super().__init__(
42
+ llm=llm, max_filtered_results=max_filtered_results, max_results=max_results
43
+ )
44
+ self.max_results = max(self.max_results, 25)
38
45
  self.sort_by = sort_by
39
46
  self.sort_order = sort_order
40
47
  self.include_full_text = include_full_text
41
48
  self.download_dir = download_dir
42
49
  self.max_full_text = max_full_text
43
-
50
+
44
51
  # Map sort parameters to arxiv package parameters
45
52
  self.sort_criteria = {
46
- 'relevance': arxiv.SortCriterion.Relevance,
47
- 'lastUpdatedDate': arxiv.SortCriterion.LastUpdatedDate,
48
- 'submittedDate': arxiv.SortCriterion.SubmittedDate
53
+ "relevance": arxiv.SortCriterion.Relevance,
54
+ "lastUpdatedDate": arxiv.SortCriterion.LastUpdatedDate,
55
+ "submittedDate": arxiv.SortCriterion.SubmittedDate,
49
56
  }
50
-
57
+
51
58
  self.sort_directions = {
52
- 'ascending': arxiv.SortOrder.Ascending,
53
- 'descending': arxiv.SortOrder.Descending
59
+ "ascending": arxiv.SortOrder.Ascending,
60
+ "descending": arxiv.SortOrder.Descending,
54
61
  }
55
62
 
56
63
  def _get_search_results(self, query: str) -> List[Any]:
57
64
  """
58
65
  Helper method to get search results from arXiv API.
59
-
66
+
60
67
  Args:
61
68
  query: The search query
62
-
69
+
63
70
  Returns:
64
71
  List of arXiv paper objects
65
72
  """
66
73
  # Configure the search client
67
- sort_criteria = self.sort_criteria.get(self.sort_by, arxiv.SortCriterion.Relevance)
68
- sort_order = self.sort_directions.get(self.sort_order, arxiv.SortOrder.Descending)
69
-
70
-
71
-
74
+ sort_criteria = self.sort_criteria.get(
75
+ self.sort_by, arxiv.SortCriterion.Relevance
76
+ )
77
+ sort_order = self.sort_directions.get(
78
+ self.sort_order, arxiv.SortOrder.Descending
79
+ )
80
+
72
81
  # Create the search client
73
82
  client = arxiv.Client(page_size=self.max_results)
74
-
83
+
75
84
  # Create the search query
76
85
  search = arxiv.Search(
77
86
  query=query,
78
87
  max_results=self.max_results,
79
88
  sort_by=sort_criteria,
80
- sort_order=sort_order
89
+ sort_order=sort_order,
81
90
  )
82
-
91
+
83
92
  # Get the search results
84
93
  papers = list(client.results(search))
85
-
94
+
86
95
  return papers
87
-
96
+
88
97
  def _get_previews(self, query: str) -> List[Dict[str, Any]]:
89
98
  """
90
99
  Get preview information for arXiv papers.
91
-
100
+
92
101
  Args:
93
102
  query: The search query
94
-
103
+
95
104
  Returns:
96
105
  List of preview dictionaries
97
106
  """
98
107
  logger.info("Getting paper previews from arXiv")
99
-
108
+
100
109
  try:
101
110
  # Get search results from arXiv
102
111
  papers = self._get_search_results(query)
103
-
112
+
104
113
  # Store the paper objects for later use
105
114
  self._papers = {paper.entry_id: paper for paper in papers}
106
-
115
+
107
116
  # Format results as previews with basic information
108
117
  previews = []
109
118
  for paper in papers:
@@ -111,196 +120,268 @@ class ArXivSearchEngine(BaseSearchEngine):
111
120
  "id": paper.entry_id, # Use entry_id as ID
112
121
  "title": paper.title,
113
122
  "link": paper.entry_id, # arXiv URL
114
- "snippet": paper.summary[:250] + "..." if len(paper.summary) > 250 else paper.summary,
115
- "authors": [author.name for author in paper.authors[:3]], # First 3 authors
116
- "published": paper.published.strftime("%Y-%m-%d") if paper.published else None
123
+ "snippet": (
124
+ paper.summary[:250] + "..."
125
+ if len(paper.summary) > 250
126
+ else paper.summary
127
+ ),
128
+ "authors": [
129
+ author.name for author in paper.authors[:3]
130
+ ], # First 3 authors
131
+ "published": (
132
+ paper.published.strftime("%Y-%m-%d")
133
+ if paper.published
134
+ else None
135
+ ),
117
136
  }
118
-
137
+
119
138
  previews.append(preview)
120
-
139
+
121
140
  return previews
122
-
141
+
123
142
  except Exception as e:
124
143
  logger.error(f"Error getting arXiv previews: {e}")
125
144
  return []
126
-
127
- def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
145
+
146
+ def _get_full_content(
147
+ self, relevant_items: List[Dict[str, Any]]
148
+ ) -> List[Dict[str, Any]]:
128
149
  """
129
150
  Get full content for the relevant arXiv papers.
130
151
  Downloads PDFs and extracts text when include_full_text is True.
131
152
  Limits the number of PDFs processed to max_full_text.
132
-
153
+
133
154
  Args:
134
155
  relevant_items: List of relevant preview dictionaries
135
-
156
+
136
157
  Returns:
137
158
  List of result dictionaries with full content
138
159
  """
139
160
  # Check if we should get full content
140
- if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
161
+ if (
162
+ hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
163
+ and search_config.SEARCH_SNIPPETS_ONLY
164
+ ):
141
165
  logger.info("Snippet-only mode, skipping full content retrieval")
142
166
  return relevant_items
143
-
167
+
144
168
  logger.info("Getting full content for relevant arXiv papers")
145
-
169
+
146
170
  results = []
147
171
  pdf_count = 0 # Track number of PDFs processed
148
-
172
+
149
173
  for item in relevant_items:
150
174
  # Start with the preview data
151
175
  result = item.copy()
152
-
176
+
153
177
  # Get the paper ID
154
178
  paper_id = item.get("id")
155
-
179
+
156
180
  # Try to get the full paper from our cache
157
181
  paper = None
158
- if hasattr(self, '_papers') and paper_id in self._papers:
182
+ if hasattr(self, "_papers") and paper_id in self._papers:
159
183
  paper = self._papers[paper_id]
160
-
184
+
161
185
  if paper:
162
186
  # Add complete paper information
163
- result.update({
164
- "pdf_url": paper.pdf_url,
165
- "authors": [author.name for author in paper.authors], # All authors
166
- "published": paper.published.strftime("%Y-%m-%d") if paper.published else None,
167
- "updated": paper.updated.strftime("%Y-%m-%d") if paper.updated else None,
168
- "categories": paper.categories,
169
- "summary": paper.summary, # Full summary
170
- "comment": paper.comment,
171
- "journal_ref": paper.journal_ref,
172
- "doi": paper.doi
173
- })
174
-
187
+ result.update(
188
+ {
189
+ "pdf_url": paper.pdf_url,
190
+ "authors": [
191
+ author.name for author in paper.authors
192
+ ], # All authors
193
+ "published": (
194
+ paper.published.strftime("%Y-%m-%d")
195
+ if paper.published
196
+ else None
197
+ ),
198
+ "updated": (
199
+ paper.updated.strftime("%Y-%m-%d")
200
+ if paper.updated
201
+ else None
202
+ ),
203
+ "categories": paper.categories,
204
+ "summary": paper.summary, # Full summary
205
+ "comment": paper.comment,
206
+ "journal_ref": paper.journal_ref,
207
+ "doi": paper.doi,
208
+ }
209
+ )
210
+
175
211
  # Default to using summary as content
176
212
  result["content"] = paper.summary
177
213
  result["full_content"] = paper.summary
178
-
214
+
179
215
  # Download PDF and extract text if requested and within limit
180
- if self.include_full_text and self.download_dir and pdf_count < self.max_full_text:
216
+ if (
217
+ self.include_full_text
218
+ and self.download_dir
219
+ and pdf_count < self.max_full_text
220
+ ):
181
221
  try:
182
222
  # Download the paper
183
223
  pdf_count += 1 # Increment counter before attempting download
184
224
  paper_path = paper.download_pdf(dirpath=self.download_dir)
185
225
  result["pdf_path"] = str(paper_path)
186
-
226
+
187
227
  # Extract text from PDF
188
228
  try:
189
229
  # Try PyPDF2 first
190
230
  try:
191
231
  import PyPDF2
192
- with open(paper_path, 'rb') as pdf_file:
232
+
233
+ with open(paper_path, "rb") as pdf_file:
193
234
  pdf_reader = PyPDF2.PdfReader(pdf_file)
194
235
  pdf_text = ""
195
236
  for page in pdf_reader.pages:
196
237
  pdf_text += page.extract_text() + "\n\n"
197
-
198
- if pdf_text.strip(): # Only use if we got meaningful text
238
+
239
+ if (
240
+ pdf_text.strip()
241
+ ): # Only use if we got meaningful text
199
242
  result["content"] = pdf_text
200
243
  result["full_content"] = pdf_text
201
- logger.info(f"Successfully extracted text from PDF using PyPDF2")
244
+ logger.info(
245
+ "Successfully extracted text from PDF using PyPDF2"
246
+ )
202
247
  except (ImportError, Exception) as e1:
203
248
  # Fall back to pdfplumber
204
249
  try:
205
250
  import pdfplumber
251
+
206
252
  with pdfplumber.open(paper_path) as pdf:
207
253
  pdf_text = ""
208
254
  for page in pdf.pages:
209
255
  pdf_text += page.extract_text() + "\n\n"
210
-
211
- if pdf_text.strip(): # Only use if we got meaningful text
256
+
257
+ if (
258
+ pdf_text.strip()
259
+ ): # Only use if we got meaningful text
212
260
  result["content"] = pdf_text
213
261
  result["full_content"] = pdf_text
214
- logger.info(f"Successfully extracted text from PDF using pdfplumber")
262
+ logger.info(
263
+ "Successfully extracted text from PDF using pdfplumber"
264
+ )
215
265
  except (ImportError, Exception) as e2:
216
- logger.error(f"PDF text extraction failed: {str(e1)}, then {str(e2)}")
217
- logger.error(f"Using paper summary as content instead")
266
+ logger.error(
267
+ f"PDF text extraction failed: {str(e1)}, then {str(e2)}"
268
+ )
269
+ logger.error(
270
+ "Using paper summary as content instead"
271
+ )
218
272
  except Exception as e:
219
273
  logger.error(f"Error extracting text from PDF: {e}")
220
- logger.error(f"Using paper summary as content instead")
274
+ logger.error("Using paper summary as content instead")
221
275
  except Exception as e:
222
276
  logger.error(f"Error downloading paper {paper.title}: {e}")
223
277
  result["pdf_path"] = None
224
278
  pdf_count -= 1 # Decrement counter if download fails
225
- elif self.include_full_text and self.download_dir and pdf_count >= self.max_full_text:
279
+ elif (
280
+ self.include_full_text
281
+ and self.download_dir
282
+ and pdf_count >= self.max_full_text
283
+ ):
226
284
  # Reached PDF limit
227
- logger.info(f"Maximum number of PDFs ({self.max_full_text}) reached. Skipping remaining PDFs.")
285
+ logger.info(
286
+ f"Maximum number of PDFs ({self.max_full_text}) reached. Skipping remaining PDFs."
287
+ )
228
288
  result["content"] = paper.summary
229
289
  result["full_content"] = paper.summary
230
-
290
+
231
291
  results.append(result)
232
-
292
+
233
293
  return results
234
-
294
+
235
295
  def run(self, query: str) -> List[Dict[str, Any]]:
236
296
  """
237
297
  Execute a search using arXiv with the two-phase approach.
238
-
298
+
239
299
  Args:
240
300
  query: The search query
241
-
301
+
242
302
  Returns:
243
303
  List of search results
244
304
  """
245
305
  logger.info("---Execute a search using arXiv---")
246
-
306
+
247
307
  # Use the implementation from the parent class which handles all phases
248
308
  results = super().run(query)
249
-
309
+
250
310
  # Clean up
251
- if hasattr(self, '_papers'):
311
+ if hasattr(self, "_papers"):
252
312
  del self._papers
253
-
313
+
254
314
  return results
255
-
315
+
256
316
  def get_paper_details(self, arxiv_id: str) -> Dict[str, Any]:
257
317
  """
258
318
  Get detailed information about a specific arXiv paper.
259
-
319
+
260
320
  Args:
261
321
  arxiv_id: arXiv ID of the paper (e.g., '2101.12345')
262
-
322
+
263
323
  Returns:
264
324
  Dictionary with paper information
265
325
  """
266
326
  try:
267
327
  # Create the search client
268
328
  client = arxiv.Client()
269
-
329
+
270
330
  # Search for the specific paper
271
331
  search = arxiv.Search(id_list=[arxiv_id], max_results=1)
272
-
332
+
273
333
  # Get the paper
274
334
  papers = list(client.results(search))
275
335
  if not papers:
276
336
  return {}
277
-
337
+
278
338
  paper = papers[0]
279
-
339
+
280
340
  # Format result based on config
281
341
  result = {
282
342
  "title": paper.title,
283
343
  "link": paper.entry_id,
284
- "snippet": paper.summary[:250] + "..." if len(paper.summary) > 250 else paper.summary,
285
- "authors": [author.name for author in paper.authors[:3]] # First 3 authors
344
+ "snippet": (
345
+ paper.summary[:250] + "..."
346
+ if len(paper.summary) > 250
347
+ else paper.summary
348
+ ),
349
+ "authors": [
350
+ author.name for author in paper.authors[:3]
351
+ ], # First 3 authors
286
352
  }
287
-
353
+
288
354
  # Add full content if not in snippet-only mode
289
- if not hasattr(config, 'SEARCH_SNIPPETS_ONLY') or not config.SEARCH_SNIPPETS_ONLY:
290
- result.update({
291
- "pdf_url": paper.pdf_url,
292
- "authors": [author.name for author in paper.authors], # All authors
293
- "published": paper.published.strftime("%Y-%m-%d") if paper.published else None,
294
- "updated": paper.updated.strftime("%Y-%m-%d") if paper.updated else None,
295
- "categories": paper.categories,
296
- "summary": paper.summary, # Full summary
297
- "comment": paper.comment,
298
- "journal_ref": paper.journal_ref,
299
- "doi": paper.doi,
300
- "content": paper.summary, # Use summary as content
301
- "full_content": paper.summary # For consistency
302
- })
303
-
355
+ if (
356
+ not hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
357
+ or not search_config.SEARCH_SNIPPETS_ONLY
358
+ ):
359
+ result.update(
360
+ {
361
+ "pdf_url": paper.pdf_url,
362
+ "authors": [
363
+ author.name for author in paper.authors
364
+ ], # All authors
365
+ "published": (
366
+ paper.published.strftime("%Y-%m-%d")
367
+ if paper.published
368
+ else None
369
+ ),
370
+ "updated": (
371
+ paper.updated.strftime("%Y-%m-%d")
372
+ if paper.updated
373
+ else None
374
+ ),
375
+ "categories": paper.categories,
376
+ "summary": paper.summary, # Full summary
377
+ "comment": paper.comment,
378
+ "journal_ref": paper.journal_ref,
379
+ "doi": paper.doi,
380
+ "content": paper.summary, # Use summary as content
381
+ "full_content": paper.summary, # For consistency
382
+ }
383
+ )
384
+
304
385
  # Download PDF if requested
305
386
  if self.include_full_text and self.download_dir:
306
387
  try:
@@ -309,57 +390,61 @@ class ArXivSearchEngine(BaseSearchEngine):
309
390
  result["pdf_path"] = str(paper_path)
310
391
  except Exception as e:
311
392
  logger.error(f"Error downloading paper: {e}")
312
-
393
+
313
394
  return result
314
-
395
+
315
396
  except Exception as e:
316
397
  logger.error(f"Error getting paper details: {e}")
317
398
  return {}
318
-
319
- def search_by_author(self, author_name: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
399
+
400
+ def search_by_author(
401
+ self, author_name: str, max_results: Optional[int] = None
402
+ ) -> List[Dict[str, Any]]:
320
403
  """
321
404
  Search for papers by a specific author.
322
-
405
+
323
406
  Args:
324
407
  author_name: Name of the author
325
408
  max_results: Maximum number of results (defaults to self.max_results)
326
-
409
+
327
410
  Returns:
328
411
  List of papers by the author
329
412
  """
330
413
  original_max_results = self.max_results
331
-
414
+
332
415
  try:
333
416
  if max_results:
334
417
  self.max_results = max_results
335
-
336
- query = f"au:\"{author_name}\""
418
+
419
+ query = f'au:"{author_name}"'
337
420
  return self.run(query)
338
-
421
+
339
422
  finally:
340
423
  # Restore original value
341
424
  self.max_results = original_max_results
342
-
343
- def search_by_category(self, category: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
425
+
426
+ def search_by_category(
427
+ self, category: str, max_results: Optional[int] = None
428
+ ) -> List[Dict[str, Any]]:
344
429
  """
345
430
  Search for papers in a specific arXiv category.
346
-
431
+
347
432
  Args:
348
433
  category: arXiv category (e.g., 'cs.AI', 'physics.optics')
349
434
  max_results: Maximum number of results (defaults to self.max_results)
350
-
435
+
351
436
  Returns:
352
437
  List of papers in the category
353
438
  """
354
439
  original_max_results = self.max_results
355
-
440
+
356
441
  try:
357
442
  if max_results:
358
443
  self.max_results = max_results
359
-
444
+
360
445
  query = f"cat:{category}"
361
446
  return self.run(query)
362
-
447
+
363
448
  finally:
364
449
  # Restore original value
365
- self.max_results = original_max_results
450
+ self.max_results = original_max_results