paper-search-cli 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. paper_search/__init__.py +3 -0
  2. paper_search/academic_platforms/__init__.py +0 -0
  3. paper_search/academic_platforms/acm.py +113 -0
  4. paper_search/academic_platforms/arxiv.py +157 -0
  5. paper_search/academic_platforms/base.py +54 -0
  6. paper_search/academic_platforms/base_search.py +253 -0
  7. paper_search/academic_platforms/biorxiv.py +144 -0
  8. paper_search/academic_platforms/chemrxiv.py +183 -0
  9. paper_search/academic_platforms/citeseerx.py +407 -0
  10. paper_search/academic_platforms/core.py +470 -0
  11. paper_search/academic_platforms/crossref.py +354 -0
  12. paper_search/academic_platforms/dblp.py +387 -0
  13. paper_search/academic_platforms/doaj.py +476 -0
  14. paper_search/academic_platforms/europepmc.py +430 -0
  15. paper_search/academic_platforms/google_scholar.py +233 -0
  16. paper_search/academic_platforms/hal.py +259 -0
  17. paper_search/academic_platforms/iacr.py +499 -0
  18. paper_search/academic_platforms/ieee.py +107 -0
  19. paper_search/academic_platforms/medrxiv.py +145 -0
  20. paper_search/academic_platforms/oaipmh.py +467 -0
  21. paper_search/academic_platforms/openaire.py +718 -0
  22. paper_search/academic_platforms/openalex.py +188 -0
  23. paper_search/academic_platforms/pmc.py +413 -0
  24. paper_search/academic_platforms/pubmed.py +162 -0
  25. paper_search/academic_platforms/sci_hub.py +178 -0
  26. paper_search/academic_platforms/semantic.py +531 -0
  27. paper_search/academic_platforms/ssrn.py +365 -0
  28. paper_search/academic_platforms/unpaywall.py +227 -0
  29. paper_search/academic_platforms/zenodo.py +271 -0
  30. paper_search/cli.py +227 -0
  31. paper_search/config.py +89 -0
  32. paper_search/engine.py +341 -0
  33. paper_search/paper.py +59 -0
  34. paper_search/utils.py +8 -0
  35. paper_search_cli-1.0.2.dist-info/METADATA +191 -0
  36. paper_search_cli-1.0.2.dist-info/RECORD +39 -0
  37. paper_search_cli-1.0.2.dist-info/WHEEL +4 -0
  38. paper_search_cli-1.0.2.dist-info/entry_points.txt +2 -0
  39. paper_search_cli-1.0.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,3 @@
1
+ from .config import load_env_file
2
+
3
+ load_env_file()
File without changes
@@ -0,0 +1,113 @@
1
+ """ACM Digital Library connector — optional, requires API key env.
2
+
3
+ This module is a **skeleton only**. No real ACM DL API requests are made
4
+ unless the ``PAPER_SEARCH_MCP_ACM_API_KEY`` (or legacy ``ACM_API_KEY``)
5
+ environment variable is configured. All methods
6
+ raise :class:`NotImplementedError` with a descriptive message when accessed
7
+ without a valid key so that the rest of the platform continues to work without
8
+ any paid credentials.
9
+
10
+ Enable usage::
11
+
12
+ export PAPER_SEARCH_MCP_ACM_API_KEY=<your_acm_api_key>
13
+
14
+ .. note::
15
+ ACM recently opened a limited metadata API. Check
16
+ https://libraries.acm.org/digital-library/acm-open for Open Access content
17
+ that does NOT require a key. Full-text/PDF download requires ACM membership
18
+ or institutional access.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import logging
24
+ from typing import List
25
+
26
+ from .base import PaperSource
27
+ from ..paper import Paper
28
+ from ..config import get_env
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ _NOT_CONFIGURED_MSG = (
33
+ "ACM Digital Library is not configured. Set PAPER_SEARCH_MCP_ACM_API_KEY "
34
+ "(or legacy ACM_API_KEY) environment "
35
+ "variable to enable ACM DL search. "
36
+ "See https://libraries.acm.org/digital-library/acm-open for access options."
37
+ )
38
+
39
+
40
+ class ACMSearcher(PaperSource):
41
+ """Skeleton connector for ACM Digital Library.
42
+
43
+ Instantiating this class without ``PAPER_SEARCH_MCP_ACM_API_KEY``
44
+ (or ``ACM_API_KEY``) set will log a warning
45
+ but will NOT raise an error. All actual operations raise
46
+ :class:`NotImplementedError` with a clear message directing the user to
47
+ configure their API key.
48
+ """
49
+
50
+ # ACM DL base URL (placeholder — real endpoint TBD once API key is available)
51
+ BASE_URL = "https://dl.acm.org/action/doSearch"
52
+
53
+ def __init__(self) -> None:
54
+ self.api_key: str = get_env("ACM_API_KEY", "")
55
+ if not self.api_key:
56
+ logger.warning(
57
+ "ACMSearcher initialised without PAPER_SEARCH_MCP_ACM_API_KEY/ACM_API_KEY. "
58
+ "All calls will raise NotImplementedError until the key is set."
59
+ )
60
+
61
+ # ------------------------------------------------------------------
62
+ # Public helpers
63
+ # ------------------------------------------------------------------
64
+
65
+ def is_configured(self) -> bool:
66
+ """Return True only when a non-empty ACM API key is available."""
67
+ return bool(self.api_key)
68
+
69
+ # ------------------------------------------------------------------
70
+ # PaperSource interface
71
+ # ------------------------------------------------------------------
72
+
73
+ def search(self, query: str, max_results: int = 10, **kwargs) -> List[Paper]: # type: ignore[override]
74
+ """Search ACM Digital Library — requires PAPER_SEARCH_MCP_ACM_API_KEY or ACM_API_KEY.
75
+
76
+ Raises:
77
+ NotImplementedError: Always, when ACM API key env is not set.
78
+ """
79
+ if not self.is_configured():
80
+ raise NotImplementedError(_NOT_CONFIGURED_MSG)
81
+
82
+ # TODO: implement real ACM DL API call here once key is available
83
+ raise NotImplementedError(
84
+ "ACM DL search is not yet implemented. "
85
+ "Contribute at https://github.com/your-repo/paper-search-cli."
86
+ )
87
+
88
+ def download_pdf(self, paper_id: str, save_path: str = "./downloads") -> str:
89
+ """Download a PDF from ACM DL — requires ACM API key env and institutional access.
90
+
91
+ Raises:
92
+ NotImplementedError: Always, until key + download logic are implemented.
93
+ """
94
+ if not self.is_configured():
95
+ raise NotImplementedError(_NOT_CONFIGURED_MSG)
96
+
97
+ raise NotImplementedError(
98
+ "ACM DL PDF download is not yet implemented. "
99
+ "Note: full-text access also requires ACM membership or institutional access."
100
+ )
101
+
102
+ def read_paper(self, paper_id: str, save_path: str = "./downloads") -> str:
103
+ """Read paper content from ACM DL — requires ACM API key env.
104
+
105
+ Raises:
106
+ NotImplementedError: Always, until download + extraction are implemented.
107
+ """
108
+ if not self.is_configured():
109
+ raise NotImplementedError(_NOT_CONFIGURED_MSG)
110
+
111
+ raise NotImplementedError(
112
+ "ACM DL paper reading is not yet implemented."
113
+ )
@@ -0,0 +1,157 @@
1
+ # paper_search/sources/arxiv.py
2
+ from typing import List
3
+ from datetime import datetime
4
+ import requests
5
+ import feedparser
6
+ import time
7
+ from ..paper import Paper
8
+ from ..utils import extract_doi
9
+ from .base import PaperSource
10
+ from PyPDF2 import PdfReader
11
+ import os
12
+
13
+ class ArxivSearcher(PaperSource):
14
+ """Searcher for arXiv papers"""
15
+ BASE_URL = "http://export.arxiv.org/api/query"
16
+
17
+ def __init__(self):
18
+ self.session = requests.Session()
19
+ self.session.headers.update({
20
+ 'User-Agent': 'paper-search-cli/1.0.0 (mailto:openags@example.com)',
21
+ 'Accept': 'application/atom+xml, application/xml;q=0.9, */*;q=0.8',
22
+ })
23
+
24
+ def search(self, query: str, max_results: int = 10) -> List[Paper]:
25
+ params = {
26
+ 'search_query': f'all:{query}',
27
+ 'max_results': max_results,
28
+ 'sortBy': 'submittedDate',
29
+ 'sortOrder': 'descending'
30
+ }
31
+ response = None
32
+ for attempt in range(3):
33
+ try:
34
+ response = self.session.get(self.BASE_URL, params=params, timeout=30)
35
+ except requests.RequestException:
36
+ time.sleep((attempt + 1) * 1.5)
37
+ continue
38
+ if response.status_code == 200:
39
+ break
40
+ if response.status_code in (429, 500, 502, 503, 504):
41
+ time.sleep((attempt + 1) * 1.5)
42
+ continue
43
+ break
44
+
45
+ if response is None or response.status_code != 200:
46
+ return []
47
+
48
+ feed = feedparser.parse(response.content)
49
+ papers = []
50
+ for entry in feed.entries:
51
+ try:
52
+ authors = [author.name for author in entry.authors]
53
+ published = datetime.strptime(entry.published, '%Y-%m-%dT%H:%M:%SZ')
54
+ updated = datetime.strptime(entry.updated, '%Y-%m-%dT%H:%M:%SZ')
55
+ pdf_url = next((link.href for link in entry.links if link.type == 'application/pdf'), '')
56
+
57
+ # Try to extract DOI from entry.doi or links or summary
58
+ doi = entry.get('doi', '') or extract_doi(entry.summary) or extract_doi(entry.id)
59
+ for link in entry.links:
60
+ if link.get('title') == 'doi':
61
+ doi = doi or extract_doi(link.href)
62
+
63
+ papers.append(Paper(
64
+ paper_id=entry.id.split('/')[-1],
65
+ title=entry.title,
66
+ authors=authors,
67
+ abstract=entry.summary,
68
+ url=entry.id,
69
+ pdf_url=pdf_url,
70
+ published_date=published,
71
+ updated_date=updated,
72
+ source='arxiv',
73
+ categories=[tag.term for tag in entry.tags],
74
+ keywords=[],
75
+ doi=doi
76
+ ))
77
+ except Exception as e:
78
+ print(f"Error parsing arXiv entry: {e}")
79
+ return papers
80
+
81
+ def download_pdf(self, paper_id: str, save_path: str) -> str:
82
+ pdf_url = f"https://arxiv.org/pdf/{paper_id}.pdf"
83
+ response = requests.get(pdf_url)
84
+ os.makedirs(save_path, exist_ok=True)
85
+ output_file = f"{save_path}/{paper_id}.pdf"
86
+ with open(output_file, 'wb') as f:
87
+ f.write(response.content)
88
+ return output_file
89
+
90
+ def read_paper(self, paper_id: str, save_path: str = "./downloads") -> str:
91
+ """Read a paper and convert it to text format.
92
+
93
+ Args:
94
+ paper_id: arXiv paper ID
95
+ save_path: Directory where the PDF is/will be saved
96
+
97
+ Returns:
98
+ str: The extracted text content of the paper
99
+ """
100
+ # First ensure we have the PDF
101
+ pdf_path = f"{save_path}/{paper_id}.pdf"
102
+ if not os.path.exists(pdf_path):
103
+ pdf_path = self.download_pdf(paper_id, save_path)
104
+
105
+ # Read the PDF
106
+ try:
107
+ reader = PdfReader(pdf_path)
108
+ text = ""
109
+
110
+ # Extract text from each page
111
+ for page in reader.pages:
112
+ text += page.extract_text() + "\n"
113
+
114
+ return text.strip()
115
+ except Exception as e:
116
+ print(f"Error reading PDF for paper {paper_id}: {e}")
117
+ return ""
118
+
119
+ if __name__ == "__main__":
120
+ # 测试 ArxivSearcher 的功能
121
+ searcher = ArxivSearcher()
122
+
123
+ # 测试搜索功能
124
+ print("Testing search functionality...")
125
+ query = "machine learning"
126
+ max_results = 5
127
+ try:
128
+ papers = searcher.search(query, max_results=max_results)
129
+ print(f"Found {len(papers)} papers for query '{query}':")
130
+ for i, paper in enumerate(papers, 1):
131
+ print(f"{i}. {paper.title} (ID: {paper.paper_id})")
132
+ except Exception as e:
133
+ print(f"Error during search: {e}")
134
+
135
+ # 测试 PDF 下载功能
136
+ if papers:
137
+ print("\nTesting PDF download functionality...")
138
+ paper_id = papers[0].paper_id
139
+ save_path = "./downloads" # 确保此目录存在
140
+ try:
141
+ os.makedirs(save_path, exist_ok=True)
142
+ pdf_path = searcher.download_pdf(paper_id, save_path)
143
+ print(f"PDF downloaded successfully: {pdf_path}")
144
+ except Exception as e:
145
+ print(f"Error during PDF download: {e}")
146
+
147
+ # 测试论文阅读功能
148
+ if papers:
149
+ print("\nTesting paper reading functionality...")
150
+ paper_id = papers[0].paper_id
151
+ try:
152
+ text_content = searcher.read_paper(paper_id)
153
+ print(f"\nFirst 500 characters of the paper content:")
154
+ print(text_content[:500] + "...")
155
+ print(f"\nTotal length of extracted text: {len(text_content)} characters")
156
+ except Exception as e:
157
+ print(f"Error during paper reading: {e}")
@@ -0,0 +1,54 @@
1
+ """Base class for all academic paper source searchers."""
2
+ from abc import ABC, abstractmethod
3
+ from typing import List
4
+ from ..paper import Paper
5
+
6
+
7
+ class PaperSource(ABC):
8
+ """Abstract base class for academic paper sources."""
9
+
10
+ @abstractmethod
11
+ def search(self, query: str, **kwargs) -> List[Paper]:
12
+ """Search papers matching the query.
13
+
14
+ Args:
15
+ query: Search query string.
16
+ **kwargs: Source-specific parameters (e.g., max_results, year).
17
+
18
+ Returns:
19
+ List of Paper objects.
20
+ """
21
+
22
+ def download_pdf(self, paper_id: str, save_path: str) -> str:
23
+ """Download the PDF for a given paper.
24
+
25
+ Args:
26
+ paper_id: Platform-specific paper identifier.
27
+ save_path: Directory to save the downloaded PDF.
28
+
29
+ Returns:
30
+ Path to the saved PDF file.
31
+
32
+ Raises:
33
+ NotImplementedError: If the source does not support PDF downloads.
34
+ """
35
+ raise NotImplementedError(
36
+ f"{self.__class__.__name__} does not support PDF downloads."
37
+ )
38
+
39
+ def read_paper(self, paper_id: str, save_path: str = "./downloads") -> str:
40
+ """Download and extract text from a paper PDF.
41
+
42
+ Args:
43
+ paper_id: Platform-specific paper identifier.
44
+ save_path: Directory where the PDF is/will be saved.
45
+
46
+ Returns:
47
+ Extracted text content of the paper.
48
+
49
+ Raises:
50
+ NotImplementedError: If the source does not support paper reading.
51
+ """
52
+ raise NotImplementedError(
53
+ f"{self.__class__.__name__} does not support reading paper content."
54
+ )
@@ -0,0 +1,253 @@
1
+ # paper_search/academic_platforms/base_search.py
2
+ """Searcher for BASE (Bielefeld Academic Search Engine).
3
+
4
+ BASE is one of the world's most voluminous search engines especially for
5
+ academic open access web resources. It provides OAI-PMH access to metadata
6
+ from thousands of repositories.
7
+
8
+ OAI-PMH Endpoint: https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi
9
+ Documentation: https://www.base-search.net/about/en/about_sources_date.php
10
+ """
11
+
12
+ from typing import List, Optional, Dict, Any
13
+ import logging
14
+ from .oaipmh import OAIPMHSearcher
15
+ from ..paper import Paper
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class BASESearcher(OAIPMHSearcher):
21
+ """Searcher for BASE (Bielefeld Academic Search Engine)."""
22
+
23
+ def __init__(self):
24
+ """Initialize BASE searcher with OAI-PMH endpoint."""
25
+ super().__init__(
26
+ base_url="https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi",
27
+ metadata_prefix="oai_dc"
28
+ )
29
+ # Update User-Agent for BASE
30
+ self.session.headers.update({
31
+ 'User-Agent': 'paper-search-cli/1.0.0 (BASE OAI-PMH client; https://github.com/openags/paper-search-cli)'
32
+ })
33
+
34
+ def search(self, query: str, max_results: int = 10, **kwargs) -> List[Paper]:
35
+ """Search BASE using OAI-PMH with query filtering.
36
+
37
+ Args:
38
+ query: Search query string
39
+ max_results: Maximum number of results to return
40
+ **kwargs: Additional parameters:
41
+ - set: OAI-PMH set specification (e.g., 'pubtype:article')
42
+ - from_date: Harvest from date (YYYY-MM-DD)
43
+ - until_date: Harvest until date (YYYY-MM-DD)
44
+ - language: Filter by language (e.g., 'en', 'de')
45
+ - subject: Filter by subject category
46
+ - has_fulltext: Filter for fulltext availability (True/False)
47
+ - open_access: Filter for open access content (True/False)
48
+
49
+ Returns:
50
+ List of Paper objects
51
+ """
52
+ # BASE-specific sets
53
+ if 'has_fulltext' in kwargs and kwargs['has_fulltext']:
54
+ kwargs['set'] = kwargs.get('set', '') + ' dcterms:accessRights:open'
55
+ if 'open_access' in kwargs and kwargs['open_access']:
56
+ kwargs['set'] = kwargs.get('set', '') + ' dcterms:accessRights:open'
57
+
58
+ # Call parent OAI-PMH search
59
+ papers = super().search(query, max_results, **kwargs)
60
+
61
+ # Apply additional BASE-specific filtering
62
+ filtered_papers = []
63
+ for paper in papers:
64
+ if self._filter_paper(paper, kwargs):
65
+ filtered_papers.append(paper)
66
+ if len(filtered_papers) >= max_results:
67
+ break
68
+
69
+ return filtered_papers[:max_results]
70
+
71
+ def _filter_paper(self, paper: Paper, filters: Dict[str, Any]) -> bool:
72
+ """Apply BASE-specific filters to paper.
73
+
74
+ Args:
75
+ paper: Paper object
76
+ filters: Filter parameters
77
+
78
+ Returns:
79
+ True if paper passes all filters
80
+ """
81
+ # Language filter
82
+ if 'language' in filters and filters['language']:
83
+ paper_lang = paper.extra.get('language', '').lower() if paper.extra else ''
84
+ if not paper_lang or paper_lang != filters['language'].lower():
85
+ return False
86
+
87
+ # Subject filter
88
+ if 'subject' in filters and filters['subject']:
89
+ subject_lower = filters['subject'].lower()
90
+ in_categories = any(subject_lower in cat.lower() for cat in paper.categories)
91
+ in_keywords = any(subject_lower in kw.lower() for kw in paper.keywords)
92
+ if not in_categories and not in_keywords:
93
+ return False
94
+
95
+ # Open access filter (already handled in OAI-PMH set)
96
+ # Fulltext filter
97
+ if 'has_fulltext' in filters and filters['has_fulltext']:
98
+ if not paper.pdf_url and not paper.url:
99
+ return False
100
+
101
+ return True
102
+
103
+ def _enrich_paper_from_oai(self, paper: Paper, dc_root):
104
+ """Enrich Paper object with BASE-specific metadata.
105
+
106
+ Overrides parent method to extract BASE-specific fields.
107
+
108
+ Args:
109
+ paper: Paper object to enrich
110
+ dc_root: Dublin Core XML element
111
+ """
112
+ super()._enrich_paper_from_oai(paper, dc_root)
113
+
114
+ # BASE-specific fields
115
+ if not paper.extra:
116
+ paper.extra = {}
117
+
118
+ # Extract BASE-specific identifiers
119
+ import xml.etree.ElementTree as ET
120
+ identifiers = dc_root.findall('.//{http://purl.org/dc/elements/1.1/}identifier') or \
121
+ dc_root.findall('identifier')
122
+
123
+ for ident_elem in identifiers:
124
+ if ident_elem.text:
125
+ ident_text = ident_elem.text.lower()
126
+ if 'base-search.net' in ident_text:
127
+ paper.extra['base_id'] = ident_text
128
+ elif 'urn:nbn:' in ident_text:
129
+ paper.extra['urn'] = ident_text
130
+ elif 'hdl.handle.net' in ident_text:
131
+ paper.extra['handle'] = ident_text
132
+
133
+ # Extract rights information
134
+ rights_elems = dc_root.findall('.//{http://purl.org/dc/elements/1.1/}rights') or \
135
+ dc_root.findall('rights')
136
+ if rights_elems:
137
+ paper.extra['rights'] = [elem.text for elem in rights_elems if elem.text]
138
+
139
+ # Extract source repository
140
+ source_elems = dc_root.findall('.//{http://purl.org/dc/elements/1.1/}source') or \
141
+ dc_root.findall('source')
142
+ if source_elems:
143
+ paper.extra['repository'] = source_elems[0].text if source_elems[0].text else ''
144
+
145
+ # Try to extract PDF URL from identifiers
146
+ if not paper.pdf_url:
147
+ for ident_elem in identifiers:
148
+ if ident_elem.text and ident_elem.text.lower().endswith('.pdf'):
149
+ paper.pdf_url = ident_elem.text
150
+ break
151
+
152
+ # Extract BASE relevance score if available
153
+ # (BASE doesn't provide relevance scores in OAI-PMH, but we might add it from other sources)
154
+
155
+ def download_pdf(self, paper_id: str, save_path: str) -> str:
156
+ """Download PDF for a BASE record.
157
+
158
+ BASE often provides direct PDF links in metadata.
159
+
160
+ Args:
161
+ paper_id: BASE identifier or OAI-PMH identifier
162
+ save_path: Directory to save PDF
163
+
164
+ Returns:
165
+ Path to saved PDF file
166
+
167
+ Raises:
168
+ NotImplementedError: If PDF cannot be downloaded
169
+ """
170
+ # Try parent method first (searches for PDF URL)
171
+ try:
172
+ return super().download_pdf(paper_id, save_path)
173
+ except Exception as e:
174
+ logger.warning(f"Parent download failed: {e}")
175
+
176
+ # Try alternative approach: search for paper and use first PDF link
177
+ papers = self.search(paper_id, max_results=1)
178
+ if not papers:
179
+ raise ValueError(f"BASE record not found: {paper_id}")
180
+
181
+ paper = papers[0]
182
+ if paper.pdf_url:
183
+ import os
184
+ import requests
185
+ response = self.session.get(paper.pdf_url, timeout=30)
186
+ response.raise_for_status()
187
+ os.makedirs(save_path, exist_ok=True)
188
+
189
+ # Create safe filename
190
+ safe_id = paper_id.replace('/', '_').replace(':', '_')
191
+ filename = f"base_{safe_id}.pdf"
192
+ output_file = os.path.join(save_path, filename)
193
+
194
+ with open(output_file, 'wb') as f:
195
+ f.write(response.content)
196
+
197
+ logger.info(f"Downloaded PDF to {output_file}")
198
+ return output_file
199
+
200
+ raise NotImplementedError(
201
+ f"No PDF available for BASE record: {paper_id}"
202
+ )
203
+
204
+ def read_paper(self, paper_id: str, save_path: str = "./downloads") -> str:
205
+ """Read paper text from PDF.
206
+
207
+ Args:
208
+ paper_id: Paper identifier
209
+ save_path: Directory where PDF is/will be saved
210
+
211
+ Returns:
212
+ Extracted text content
213
+
214
+ Raises:
215
+ NotImplementedError: If PDF cannot be read
216
+ """
217
+ try:
218
+ return super().read_paper(paper_id, save_path)
219
+ except Exception as e:
220
+ logger.error(f"Error reading BASE paper {paper_id}: {e}")
221
+ raise NotImplementedError(
222
+ f"Cannot read paper from BASE: {e}"
223
+ )
224
+
225
+
226
+ if __name__ == "__main__":
227
+ """Test the BASESearcher."""
228
+ import logging
229
+ logging.basicConfig(level=logging.INFO)
230
+
231
+ searcher = BASESearcher()
232
+
233
+ # Test search
234
+ print("Testing BASE search...")
235
+
236
+ # Test queries
237
+ test_queries = [
238
+ "machine learning",
239
+ "artificial intelligence",
240
+ "data science"
241
+ ]
242
+
243
+ for query in test_queries[:1]: # Test first query only
244
+ print(f"\nSearching BASE for: '{query}'")
245
+ papers = searcher.search(query, max_results=3)
246
+ print(f"Found {len(papers)} papers")
247
+ for i, paper in enumerate(papers):
248
+ print(f"{i+1}. {paper.title}")
249
+ print(f" Authors: {', '.join(paper.authors[:3])}")
250
+ print(f" Source: {paper.source}")
251
+ print(f" PDF: {'Yes' if paper.pdf_url else 'No'}")
252
+ print(f" URL: {paper.url}")
253
+ print()