all-in-mcp 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- all_in_mcp/academic_platforms/__init__.py +2 -1
- all_in_mcp/academic_platforms/crossref.py +328 -0
- all_in_mcp/academic_platforms/google_scholar.py +245 -0
- all_in_mcp/server.py +297 -0
- {all_in_mcp-0.2.3.dist-info → all_in_mcp-0.2.5.dist-info}/METADATA +9 -1
- all_in_mcp-0.2.5.dist-info/RECORD +14 -0
- all_in_mcp-0.2.3.dist-info/RECORD +0 -12
- {all_in_mcp-0.2.3.dist-info → all_in_mcp-0.2.5.dist-info}/WHEEL +0 -0
- {all_in_mcp-0.2.3.dist-info → all_in_mcp-0.2.5.dist-info}/entry_points.txt +0 -0
- {all_in_mcp-0.2.3.dist-info → all_in_mcp-0.2.5.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,7 @@
|
|
1
1
|
# all_in_mcp/academic_platforms/__init__.py
|
2
2
|
from .base import PaperSource
|
3
3
|
from .cryptobib import CryptoBibSearcher
|
4
|
+
from .google_scholar import GoogleScholarSearcher
|
4
5
|
from .iacr import IACRSearcher
|
5
6
|
|
6
|
-
__all__ = ["CryptoBibSearcher", "IACRSearcher", "PaperSource"]
|
7
|
+
__all__ = ["CryptoBibSearcher", "GoogleScholarSearcher", "IACRSearcher", "PaperSource"]
|
@@ -0,0 +1,328 @@
|
|
1
|
+
# all_in_mcp/academic_platforms/crossref.py
|
2
|
+
import logging
|
3
|
+
from datetime import datetime
|
4
|
+
from typing import Optional
|
5
|
+
from urllib.parse import quote_plus
|
6
|
+
|
7
|
+
import httpx
|
8
|
+
|
9
|
+
from ..paper import Paper
|
10
|
+
from .base import PaperSource
|
11
|
+
|
12
|
+
logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
class CrossrefSearcher(PaperSource):
|
16
|
+
"""Crossref API paper search implementation"""
|
17
|
+
|
18
|
+
BASE_URL = "https://api.crossref.org"
|
19
|
+
WORKS_ENDPOINT = f"{BASE_URL}/works"
|
20
|
+
|
21
|
+
def __init__(self, email: Optional[str] = None):
|
22
|
+
"""
|
23
|
+
Initialize Crossref searcher
|
24
|
+
|
25
|
+
Args:
|
26
|
+
email: Optional email for polite API usage (recommended by Crossref)
|
27
|
+
"""
|
28
|
+
self.email = email
|
29
|
+
self.client = httpx.Client(timeout=30.0)
|
30
|
+
|
31
|
+
def _get_headers(self) -> dict:
|
32
|
+
"""Get headers for API requests"""
|
33
|
+
headers = {
|
34
|
+
"User-Agent": "all-in-mcp/0.1.0 (https://github.com/user/all-in-mcp)"
|
35
|
+
}
|
36
|
+
if self.email:
|
37
|
+
headers["User-Agent"] += f" (mailto:{self.email})"
|
38
|
+
return headers
|
39
|
+
|
40
|
+
def _parse_date(self, date_parts: list) -> Optional[datetime]:
|
41
|
+
"""Parse Crossref date parts into datetime"""
|
42
|
+
if not date_parts or not isinstance(date_parts, list):
|
43
|
+
return None
|
44
|
+
|
45
|
+
try:
|
46
|
+
# Crossref provides date as [[year, month, day]] or [[year, month]] or [[year]]
|
47
|
+
if len(date_parts) > 0 and isinstance(date_parts[0], list):
|
48
|
+
parts = date_parts[0]
|
49
|
+
year = parts[0] if len(parts) > 0 else 1
|
50
|
+
month = parts[1] if len(parts) > 1 else 1
|
51
|
+
day = parts[2] if len(parts) > 2 else 1
|
52
|
+
return datetime(year, month, day)
|
53
|
+
except (ValueError, IndexError, TypeError):
|
54
|
+
pass
|
55
|
+
return None
|
56
|
+
|
57
|
+
def _extract_authors(self, authors_data: list) -> list[str]:
|
58
|
+
"""Extract author names from Crossref author data"""
|
59
|
+
authors = []
|
60
|
+
for author in authors_data or []:
|
61
|
+
if isinstance(author, dict):
|
62
|
+
given = author.get("given", "")
|
63
|
+
family = author.get("family", "")
|
64
|
+
if given and family:
|
65
|
+
authors.append(f"{given} {family}")
|
66
|
+
elif family:
|
67
|
+
authors.append(family)
|
68
|
+
elif given:
|
69
|
+
authors.append(given)
|
70
|
+
return authors
|
71
|
+
|
72
|
+
def _parse_work(self, work: dict) -> Optional[Paper]:
|
73
|
+
"""Parse a single work from Crossref API response"""
|
74
|
+
try:
|
75
|
+
# Extract basic information
|
76
|
+
title_list = work.get("title", [])
|
77
|
+
title = title_list[0] if title_list else ""
|
78
|
+
|
79
|
+
if not title:
|
80
|
+
return None
|
81
|
+
|
82
|
+
doi = work.get("DOI", "")
|
83
|
+
paper_id = doi or work.get("URL", "")
|
84
|
+
|
85
|
+
# Extract authors
|
86
|
+
authors = self._extract_authors(work.get("author", []))
|
87
|
+
|
88
|
+
# Extract abstract
|
89
|
+
abstract = work.get("abstract", "")
|
90
|
+
if abstract:
|
91
|
+
# Remove HTML tags if present
|
92
|
+
import re
|
93
|
+
|
94
|
+
abstract = re.sub(r"<[^>]+>", "", abstract)
|
95
|
+
|
96
|
+
# Extract publication date
|
97
|
+
published_date = (
|
98
|
+
self._parse_date(work.get("published-print", {}).get("date-parts"))
|
99
|
+
or self._parse_date(work.get("published-online", {}).get("date-parts"))
|
100
|
+
or self._parse_date(work.get("created", {}).get("date-parts"))
|
101
|
+
)
|
102
|
+
|
103
|
+
# Extract URLs
|
104
|
+
url = work.get("URL", "")
|
105
|
+
pdf_url = ""
|
106
|
+
|
107
|
+
# Look for PDF in links
|
108
|
+
links = work.get("link", [])
|
109
|
+
for link in links:
|
110
|
+
if link.get("content-type") == "application/pdf":
|
111
|
+
pdf_url = link.get("URL", "")
|
112
|
+
break
|
113
|
+
|
114
|
+
# Extract additional metadata
|
115
|
+
container_title = work.get("container-title", [])
|
116
|
+
journal = container_title[0] if container_title else ""
|
117
|
+
|
118
|
+
volume = work.get("volume", "")
|
119
|
+
issue = work.get("issue", "")
|
120
|
+
pages = work.get("page", "")
|
121
|
+
|
122
|
+
# Extract categories/subjects
|
123
|
+
categories = []
|
124
|
+
subjects = work.get("subject", [])
|
125
|
+
if subjects:
|
126
|
+
categories.extend(subjects)
|
127
|
+
|
128
|
+
# Citation count (if available)
|
129
|
+
citations = work.get("is-referenced-by-count", 0)
|
130
|
+
|
131
|
+
# Build extra metadata
|
132
|
+
extra = {
|
133
|
+
"journal": journal,
|
134
|
+
"volume": volume,
|
135
|
+
"issue": issue,
|
136
|
+
"pages": pages,
|
137
|
+
"type": work.get("type", ""),
|
138
|
+
"publisher": work.get("publisher", ""),
|
139
|
+
"issn": work.get("ISSN", []),
|
140
|
+
"isbn": work.get("ISBN", []),
|
141
|
+
}
|
142
|
+
|
143
|
+
# Remove empty values from extra
|
144
|
+
extra = {k: v for k, v in extra.items() if v}
|
145
|
+
|
146
|
+
return Paper(
|
147
|
+
paper_id=paper_id,
|
148
|
+
title=title,
|
149
|
+
authors=authors,
|
150
|
+
abstract=abstract,
|
151
|
+
doi=doi,
|
152
|
+
published_date=published_date or datetime(1900, 1, 1),
|
153
|
+
pdf_url=pdf_url,
|
154
|
+
url=url,
|
155
|
+
source="crossref",
|
156
|
+
categories=categories,
|
157
|
+
citations=citations,
|
158
|
+
extra=extra,
|
159
|
+
)
|
160
|
+
|
161
|
+
except Exception as e:
|
162
|
+
logger.error(f"Error parsing Crossref work: {e}")
|
163
|
+
return None
|
164
|
+
|
165
|
+
def search(
|
166
|
+
self,
|
167
|
+
query: str,
|
168
|
+
max_results: int = 10,
|
169
|
+
year_min: Optional[int] = None,
|
170
|
+
year_max: Optional[int] = None,
|
171
|
+
sort_by: str = "relevance",
|
172
|
+
**kwargs,
|
173
|
+
) -> list[Paper]:
|
174
|
+
"""
|
175
|
+
Search for papers using Crossref API
|
176
|
+
|
177
|
+
Args:
|
178
|
+
query: Search query string
|
179
|
+
max_results: Maximum number of results to return
|
180
|
+
year_min: Minimum publication year
|
181
|
+
year_max: Maximum publication year
|
182
|
+
sort_by: Sort order (relevance, published, indexed, updated)
|
183
|
+
"""
|
184
|
+
if not query.strip():
|
185
|
+
return []
|
186
|
+
|
187
|
+
try:
|
188
|
+
params = {
|
189
|
+
"query": query,
|
190
|
+
"rows": min(max_results, 1000), # Crossref max is 1000
|
191
|
+
"sort": sort_by,
|
192
|
+
"select": "DOI,title,author,abstract,published-print,published-online,created,URL,container-title,volume,issue,page,subject,is-referenced-by-count,type,publisher,ISSN,ISBN,link",
|
193
|
+
}
|
194
|
+
|
195
|
+
# Add year filters if specified
|
196
|
+
filters = []
|
197
|
+
if year_min:
|
198
|
+
filters.append(f"from-pub-date:{year_min}")
|
199
|
+
if year_max:
|
200
|
+
filters.append(f"until-pub-date:{year_max}")
|
201
|
+
|
202
|
+
if filters:
|
203
|
+
params["filter"] = ",".join(filters)
|
204
|
+
|
205
|
+
response = self.client.get(
|
206
|
+
self.WORKS_ENDPOINT, params=params, headers=self._get_headers()
|
207
|
+
)
|
208
|
+
response.raise_for_status()
|
209
|
+
|
210
|
+
data = response.json()
|
211
|
+
works = data.get("message", {}).get("items", [])
|
212
|
+
|
213
|
+
papers = []
|
214
|
+
for work in works:
|
215
|
+
paper = self._parse_work(work)
|
216
|
+
if paper:
|
217
|
+
papers.append(paper)
|
218
|
+
|
219
|
+
return papers[:max_results]
|
220
|
+
|
221
|
+
except Exception as e:
|
222
|
+
logger.error(f"Error searching Crossref: {e}")
|
223
|
+
return []
|
224
|
+
|
225
|
+
def download_pdf(self, paper_id: str, save_path: str) -> str:
|
226
|
+
"""
|
227
|
+
Download PDF for a paper (limited functionality for Crossref)
|
228
|
+
|
229
|
+
Note: Crossref is primarily a metadata service. PDF downloads
|
230
|
+
depend on publisher policies and may not always be available.
|
231
|
+
"""
|
232
|
+
if not paper_id:
|
233
|
+
return "Error: paper_id is required"
|
234
|
+
|
235
|
+
try:
|
236
|
+
# If paper_id is a DOI, try to get work details first
|
237
|
+
if not paper_id.startswith("http"):
|
238
|
+
work_url = f"{self.WORKS_ENDPOINT}/{quote_plus(paper_id)}"
|
239
|
+
response = self.client.get(work_url, headers=self._get_headers())
|
240
|
+
response.raise_for_status()
|
241
|
+
|
242
|
+
work_data = response.json()
|
243
|
+
work = work_data.get("message", {})
|
244
|
+
|
245
|
+
# Look for PDF link
|
246
|
+
links = work.get("link", [])
|
247
|
+
pdf_url = None
|
248
|
+
for link in links:
|
249
|
+
if link.get("content-type") == "application/pdf":
|
250
|
+
pdf_url = link.get("URL")
|
251
|
+
break
|
252
|
+
|
253
|
+
if not pdf_url:
|
254
|
+
return f"Error: No PDF link found for DOI {paper_id}. Crossref provides metadata; PDFs are hosted by publishers."
|
255
|
+
else:
|
256
|
+
pdf_url = paper_id
|
257
|
+
|
258
|
+
# Attempt to download PDF
|
259
|
+
from pathlib import Path
|
260
|
+
|
261
|
+
save_path_obj = Path(save_path)
|
262
|
+
save_path_obj.mkdir(parents=True, exist_ok=True)
|
263
|
+
|
264
|
+
# Create filename from DOI or URL
|
265
|
+
if paper_id.startswith("10."):
|
266
|
+
filename = (
|
267
|
+
f"crossref_{paper_id.replace('/', '_').replace('.', '_')}.pdf"
|
268
|
+
)
|
269
|
+
else:
|
270
|
+
filename = f"crossref_paper_{hash(paper_id) % 10000}.pdf"
|
271
|
+
|
272
|
+
file_path = save_path_obj / filename
|
273
|
+
|
274
|
+
pdf_response = self.client.get(pdf_url, headers=self._get_headers())
|
275
|
+
pdf_response.raise_for_status()
|
276
|
+
|
277
|
+
with open(file_path, "wb") as f:
|
278
|
+
f.write(pdf_response.content)
|
279
|
+
|
280
|
+
return str(file_path)
|
281
|
+
|
282
|
+
except Exception as e:
|
283
|
+
return f"Error downloading PDF: {e}"
|
284
|
+
|
285
|
+
def read_paper(self, paper_id: str, save_path: str) -> str:
|
286
|
+
"""
|
287
|
+
Read paper text (downloads PDF first if needed)
|
288
|
+
|
289
|
+
Note: Success depends on PDF availability from publishers
|
290
|
+
"""
|
291
|
+
if not paper_id:
|
292
|
+
return "Error: paper_id is required"
|
293
|
+
|
294
|
+
try:
|
295
|
+
# First try to download the PDF
|
296
|
+
pdf_path = self.download_pdf(paper_id, save_path)
|
297
|
+
|
298
|
+
if pdf_path.startswith("Error"):
|
299
|
+
return pdf_path
|
300
|
+
|
301
|
+
# Read the PDF using the existing read_pdf function
|
302
|
+
from ..paper import read_pdf
|
303
|
+
|
304
|
+
return read_pdf(pdf_path)
|
305
|
+
|
306
|
+
except Exception as e:
|
307
|
+
return f"Error reading paper: {e}"
|
308
|
+
|
309
|
+
def search_by_doi(self, doi: str) -> Optional[Paper]:
|
310
|
+
"""Search for a specific paper by DOI"""
|
311
|
+
try:
|
312
|
+
work_url = f"{self.WORKS_ENDPOINT}/{quote_plus(doi)}"
|
313
|
+
response = self.client.get(work_url, headers=self._get_headers())
|
314
|
+
response.raise_for_status()
|
315
|
+
|
316
|
+
data = response.json()
|
317
|
+
work = data.get("message", {})
|
318
|
+
|
319
|
+
return self._parse_work(work)
|
320
|
+
|
321
|
+
except Exception as e:
|
322
|
+
logger.error(f"Error searching by DOI {doi}: {e}")
|
323
|
+
return None
|
324
|
+
|
325
|
+
def __del__(self):
|
326
|
+
"""Clean up HTTP client"""
|
327
|
+
if hasattr(self, "client"):
|
328
|
+
self.client.close()
|
@@ -0,0 +1,245 @@
|
|
1
|
+
# all_in_mcp/academic_platforms/google_scholar.py
|
2
|
+
import logging
|
3
|
+
import random
|
4
|
+
import time
|
5
|
+
from datetime import datetime
|
6
|
+
from typing import Optional
|
7
|
+
|
8
|
+
import requests
|
9
|
+
from bs4 import BeautifulSoup
|
10
|
+
|
11
|
+
from ..paper import Paper
|
12
|
+
from .base import PaperSource
|
13
|
+
|
14
|
+
logger = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
|
17
|
+
class GoogleScholarSearcher(PaperSource):
|
18
|
+
"""Google Scholar paper search implementation"""
|
19
|
+
|
20
|
+
SCHOLAR_URL = "https://scholar.google.com/scholar"
|
21
|
+
BROWSERS = [
|
22
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
23
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
24
|
+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
25
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
|
26
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:89.0) Gecko/20100101 Firefox/89.0",
|
27
|
+
]
|
28
|
+
|
29
|
+
def __init__(self):
|
30
|
+
"""Initialize Google Scholar searcher"""
|
31
|
+
self._setup_session()
|
32
|
+
|
33
|
+
def _setup_session(self):
|
34
|
+
"""Initialize session with random user agent"""
|
35
|
+
self.session = requests.Session()
|
36
|
+
self.session.headers.update(
|
37
|
+
{
|
38
|
+
"User-Agent": random.choice(self.BROWSERS),
|
39
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
40
|
+
"Accept-Language": "en-US,en;q=0.9",
|
41
|
+
"Accept-Encoding": "gzip, deflate",
|
42
|
+
"DNT": "1",
|
43
|
+
"Connection": "keep-alive",
|
44
|
+
"Upgrade-Insecure-Requests": "1",
|
45
|
+
}
|
46
|
+
)
|
47
|
+
|
48
|
+
def _extract_year(self, text: str) -> Optional[int]:
|
49
|
+
"""Extract publication year from text"""
|
50
|
+
words = text.replace(",", " ").replace("-", " ").split()
|
51
|
+
for word in words:
|
52
|
+
if word.isdigit() and 1900 <= int(word) <= datetime.now().year:
|
53
|
+
return int(word)
|
54
|
+
return None
|
55
|
+
|
56
|
+
def _extract_citations(self, item) -> int:
|
57
|
+
"""Extract citation count from paper item"""
|
58
|
+
try:
|
59
|
+
citation_elem = item.find("div", class_="gs_fl")
|
60
|
+
if citation_elem:
|
61
|
+
citation_link = citation_elem.find(
|
62
|
+
"a", string=lambda text: text and "Cited by" in text
|
63
|
+
)
|
64
|
+
if citation_link:
|
65
|
+
citation_text = citation_link.get_text()
|
66
|
+
# Extract number from "Cited by X" text
|
67
|
+
citation_num = "".join(filter(str.isdigit, citation_text))
|
68
|
+
return int(citation_num) if citation_num else 0
|
69
|
+
return 0
|
70
|
+
except Exception:
|
71
|
+
return 0
|
72
|
+
|
73
|
+
def _parse_paper(self, item) -> Optional[Paper]:
|
74
|
+
"""Parse a single paper entry from HTML"""
|
75
|
+
try:
|
76
|
+
# Extract main paper elements
|
77
|
+
title_elem = item.find("h3", class_="gs_rt")
|
78
|
+
info_elem = item.find("div", class_="gs_a")
|
79
|
+
abstract_elem = item.find("div", class_="gs_rs")
|
80
|
+
|
81
|
+
if not title_elem or not info_elem:
|
82
|
+
return None
|
83
|
+
|
84
|
+
# Process title and URL
|
85
|
+
title_text = title_elem.get_text(strip=True)
|
86
|
+
# Remove common prefixes
|
87
|
+
title = (
|
88
|
+
title_text.replace("[PDF]", "")
|
89
|
+
.replace("[HTML]", "")
|
90
|
+
.replace("[BOOK]", "")
|
91
|
+
.strip()
|
92
|
+
)
|
93
|
+
|
94
|
+
link = title_elem.find("a", href=True)
|
95
|
+
url = link["href"] if link else ""
|
96
|
+
|
97
|
+
# Process author and publication info
|
98
|
+
info_text = info_elem.get_text()
|
99
|
+
info_parts = info_text.split(" - ")
|
100
|
+
|
101
|
+
# Extract authors (usually the first part before the first dash)
|
102
|
+
authors_text = info_parts[0] if info_parts else ""
|
103
|
+
authors = [a.strip() for a in authors_text.split(",") if a.strip()]
|
104
|
+
|
105
|
+
# Extract year from the info text
|
106
|
+
year = self._extract_year(info_text)
|
107
|
+
|
108
|
+
# Extract abstract
|
109
|
+
abstract = abstract_elem.get_text(strip=True) if abstract_elem else ""
|
110
|
+
|
111
|
+
# Extract citations
|
112
|
+
citations = self._extract_citations(item)
|
113
|
+
|
114
|
+
# Generate a paper ID based on the URL or title
|
115
|
+
paper_id = f"gs_{abs(hash(url if url else title))}"
|
116
|
+
|
117
|
+
# Create paper object
|
118
|
+
return Paper(
|
119
|
+
paper_id=paper_id,
|
120
|
+
title=title,
|
121
|
+
authors=authors,
|
122
|
+
abstract=abstract,
|
123
|
+
url=url,
|
124
|
+
pdf_url="", # Google Scholar doesn't provide direct PDF links
|
125
|
+
published_date=datetime(year, 1, 1) if year else datetime.now(),
|
126
|
+
updated_date=None,
|
127
|
+
source="google_scholar",
|
128
|
+
categories=[],
|
129
|
+
keywords=[],
|
130
|
+
doi="",
|
131
|
+
citations=citations,
|
132
|
+
references=[],
|
133
|
+
extra={"info_text": info_text},
|
134
|
+
)
|
135
|
+
except Exception as e:
|
136
|
+
logger.warning(f"Failed to parse paper: {e}")
|
137
|
+
return None
|
138
|
+
|
139
|
+
def search(self, query: str, max_results: int = 10, **kwargs) -> list[Paper]:
|
140
|
+
"""
|
141
|
+
Search Google Scholar for papers
|
142
|
+
|
143
|
+
Args:
|
144
|
+
query: Search query string
|
145
|
+
max_results: Maximum number of results to return
|
146
|
+
**kwargs: Additional search parameters (e.g., year_low, year_high)
|
147
|
+
|
148
|
+
Returns:
|
149
|
+
List of Paper objects
|
150
|
+
"""
|
151
|
+
papers = []
|
152
|
+
start = 0
|
153
|
+
results_per_page = min(10, max_results)
|
154
|
+
|
155
|
+
# Extract additional parameters
|
156
|
+
year_low = kwargs.get("year_low")
|
157
|
+
year_high = kwargs.get("year_high")
|
158
|
+
|
159
|
+
while len(papers) < max_results:
|
160
|
+
try:
|
161
|
+
# Construct search parameters
|
162
|
+
params = {
|
163
|
+
"q": query,
|
164
|
+
"start": start,
|
165
|
+
"hl": "en",
|
166
|
+
"as_sdt": "0,5", # Include articles and citations
|
167
|
+
"num": results_per_page,
|
168
|
+
}
|
169
|
+
|
170
|
+
# Add year filters if provided
|
171
|
+
if year_low:
|
172
|
+
params["as_ylo"] = year_low
|
173
|
+
if year_high:
|
174
|
+
params["as_yhi"] = year_high
|
175
|
+
|
176
|
+
# Make request with random delay to avoid rate limiting
|
177
|
+
time.sleep(random.uniform(1.0, 3.0))
|
178
|
+
response = self.session.get(self.SCHOLAR_URL, params=params, timeout=30)
|
179
|
+
|
180
|
+
if response.status_code != 200:
|
181
|
+
logger.error(f"Search failed with status {response.status_code}")
|
182
|
+
break
|
183
|
+
|
184
|
+
# Parse results
|
185
|
+
soup = BeautifulSoup(response.text, "html.parser")
|
186
|
+
results = soup.find_all("div", class_="gs_ri")
|
187
|
+
|
188
|
+
if not results:
|
189
|
+
logger.info("No more results found")
|
190
|
+
break
|
191
|
+
|
192
|
+
# Process each result
|
193
|
+
for item in results:
|
194
|
+
if len(papers) >= max_results:
|
195
|
+
break
|
196
|
+
|
197
|
+
paper = self._parse_paper(item)
|
198
|
+
if paper:
|
199
|
+
papers.append(paper)
|
200
|
+
|
201
|
+
start += results_per_page
|
202
|
+
|
203
|
+
except requests.exceptions.RequestException as e:
|
204
|
+
logger.error(f"Network error during search: {e}")
|
205
|
+
break
|
206
|
+
except Exception as e:
|
207
|
+
logger.error(f"Search error: {e}")
|
208
|
+
break
|
209
|
+
|
210
|
+
return papers[:max_results]
|
211
|
+
|
212
|
+
def download_pdf(self, paper_id: str, save_path: str) -> str:
|
213
|
+
"""
|
214
|
+
Google Scholar doesn't support direct PDF downloads
|
215
|
+
|
216
|
+
Args:
|
217
|
+
paper_id: Paper identifier
|
218
|
+
save_path: Directory to save the PDF
|
219
|
+
|
220
|
+
Returns:
|
221
|
+
Error message explaining limitation
|
222
|
+
|
223
|
+
Raises:
|
224
|
+
NotImplementedError: Always raises this error
|
225
|
+
"""
|
226
|
+
raise NotImplementedError(
|
227
|
+
"Google Scholar doesn't provide direct PDF downloads. "
|
228
|
+
"Please use the paper URL to access the publisher's website."
|
229
|
+
)
|
230
|
+
|
231
|
+
def read_paper(self, paper_id: str, save_path: str = "./downloads") -> str:
|
232
|
+
"""
|
233
|
+
Google Scholar doesn't support direct paper reading
|
234
|
+
|
235
|
+
Args:
|
236
|
+
paper_id: Paper identifier
|
237
|
+
save_path: Directory where papers are stored
|
238
|
+
|
239
|
+
Returns:
|
240
|
+
Message indicating the feature is not supported
|
241
|
+
"""
|
242
|
+
return (
|
243
|
+
"Google Scholar doesn't support direct paper reading. "
|
244
|
+
"Please use the paper URL to access the full text on the publisher's website."
|
245
|
+
)
|
all_in_mcp/server.py
CHANGED
@@ -4,6 +4,8 @@ from mcp.server import NotificationOptions, Server
|
|
4
4
|
from mcp.server.models import InitializationOptions
|
5
5
|
|
6
6
|
from .academic_platforms.cryptobib import CryptoBibSearcher
|
7
|
+
from .academic_platforms.crossref import CrossrefSearcher
|
8
|
+
from .academic_platforms.google_scholar import GoogleScholarSearcher
|
7
9
|
|
8
10
|
# Import searchers
|
9
11
|
from .academic_platforms.iacr import IACRSearcher
|
@@ -14,6 +16,8 @@ server = Server("all-in-mcp")
|
|
14
16
|
# Initialize searchers
|
15
17
|
iacr_searcher = IACRSearcher()
|
16
18
|
cryptobib_searcher = CryptoBibSearcher(cache_dir="./downloads")
|
19
|
+
crossref_searcher = CrossrefSearcher()
|
20
|
+
google_scholar_searcher = GoogleScholarSearcher()
|
17
21
|
|
18
22
|
|
19
23
|
@server.list_tools()
|
@@ -122,6 +126,103 @@ async def handle_list_tools() -> list[types.Tool]:
|
|
122
126
|
"required": ["query"],
|
123
127
|
},
|
124
128
|
),
|
129
|
+
types.Tool(
|
130
|
+
name="search-google-scholar-papers",
|
131
|
+
description="Search academic papers from Google Scholar",
|
132
|
+
inputSchema={
|
133
|
+
"type": "object",
|
134
|
+
"properties": {
|
135
|
+
"query": {
|
136
|
+
"type": "string",
|
137
|
+
"description": "Search query string (e.g., 'machine learning', 'neural networks')",
|
138
|
+
},
|
139
|
+
"max_results": {
|
140
|
+
"type": "integer",
|
141
|
+
"description": "Maximum number of papers to return (default: 10)",
|
142
|
+
"default": 10,
|
143
|
+
},
|
144
|
+
"year_low": {
|
145
|
+
"type": "integer",
|
146
|
+
"description": "Minimum publication year (optional)",
|
147
|
+
},
|
148
|
+
"year_high": {
|
149
|
+
"type": "integer",
|
150
|
+
"description": "Maximum publication year (optional)",
|
151
|
+
},
|
152
|
+
},
|
153
|
+
"required": ["query"],
|
154
|
+
},
|
155
|
+
),
|
156
|
+
types.Tool(
|
157
|
+
name="search-crossref-papers",
|
158
|
+
description="Search academic papers from Crossref database",
|
159
|
+
inputSchema={
|
160
|
+
"type": "object",
|
161
|
+
"properties": {
|
162
|
+
"query": {
|
163
|
+
"type": "string",
|
164
|
+
"description": "Search query string (e.g., 'quantum computing', 'machine learning')",
|
165
|
+
},
|
166
|
+
"max_results": {
|
167
|
+
"type": "integer",
|
168
|
+
"description": "Maximum number of papers to return (default: 10)",
|
169
|
+
"default": 10,
|
170
|
+
},
|
171
|
+
"year_min": {
|
172
|
+
"type": "integer",
|
173
|
+
"description": "Minimum publication year (optional)",
|
174
|
+
},
|
175
|
+
"year_max": {
|
176
|
+
"type": "integer",
|
177
|
+
"description": "Maximum publication year (optional)",
|
178
|
+
},
|
179
|
+
"sort_by": {
|
180
|
+
"type": "string",
|
181
|
+
"description": "Sort order: relevance, published, indexed, updated (default: relevance)",
|
182
|
+
"default": "relevance",
|
183
|
+
},
|
184
|
+
},
|
185
|
+
"required": ["query"],
|
186
|
+
},
|
187
|
+
),
|
188
|
+
types.Tool(
|
189
|
+
name="download-crossref-paper",
|
190
|
+
description="Download PDF of a Crossref paper (when available from publisher)",
|
191
|
+
inputSchema={
|
192
|
+
"type": "object",
|
193
|
+
"properties": {
|
194
|
+
"paper_id": {
|
195
|
+
"type": "string",
|
196
|
+
"description": "DOI or URL of the paper",
|
197
|
+
},
|
198
|
+
"save_path": {
|
199
|
+
"type": "string",
|
200
|
+
"description": "Directory to save the PDF (default: './downloads')",
|
201
|
+
"default": "./downloads",
|
202
|
+
},
|
203
|
+
},
|
204
|
+
"required": ["paper_id"],
|
205
|
+
},
|
206
|
+
),
|
207
|
+
types.Tool(
|
208
|
+
name="read-crossref-paper",
|
209
|
+
description="Read and extract text content from a Crossref paper PDF",
|
210
|
+
inputSchema={
|
211
|
+
"type": "object",
|
212
|
+
"properties": {
|
213
|
+
"paper_id": {
|
214
|
+
"type": "string",
|
215
|
+
"description": "DOI or URL of the paper",
|
216
|
+
},
|
217
|
+
"save_path": {
|
218
|
+
"type": "string",
|
219
|
+
"description": "Directory where the PDF is/will be saved (default: './downloads')",
|
220
|
+
"default": "./downloads",
|
221
|
+
},
|
222
|
+
},
|
223
|
+
"required": ["paper_id"],
|
224
|
+
},
|
225
|
+
),
|
125
226
|
types.Tool(
|
126
227
|
name="read-pdf",
|
127
228
|
description="Read and extract text content from a PDF file (local or online)",
|
@@ -337,6 +438,202 @@ async def handle_call_tool(
|
|
337
438
|
|
338
439
|
return [types.TextContent(type="text", text=result_text)]
|
339
440
|
|
441
|
+
elif name == "search-google-scholar-papers":
|
442
|
+
query = arguments.get("query", "")
|
443
|
+
max_results = arguments.get("max_results", 10)
|
444
|
+
year_low = arguments.get("year_low")
|
445
|
+
year_high = arguments.get("year_high")
|
446
|
+
|
447
|
+
if not query:
|
448
|
+
return [
|
449
|
+
types.TextContent(
|
450
|
+
type="text", text="Error: Query parameter is required"
|
451
|
+
)
|
452
|
+
]
|
453
|
+
|
454
|
+
try:
|
455
|
+
papers = google_scholar_searcher.search(
|
456
|
+
query,
|
457
|
+
max_results=max_results,
|
458
|
+
year_low=year_low,
|
459
|
+
year_high=year_high,
|
460
|
+
)
|
461
|
+
|
462
|
+
if not papers:
|
463
|
+
year_filter_msg = ""
|
464
|
+
if year_low or year_high:
|
465
|
+
year_range = (
|
466
|
+
f" ({year_low or 'earliest'}-{year_high or 'latest'})"
|
467
|
+
)
|
468
|
+
year_filter_msg = f" in year range{year_range}"
|
469
|
+
return [
|
470
|
+
types.TextContent(
|
471
|
+
type="text",
|
472
|
+
text=f"No papers found for query: {query}{year_filter_msg}",
|
473
|
+
)
|
474
|
+
]
|
475
|
+
|
476
|
+
year_filter_msg = ""
|
477
|
+
if year_low or year_high:
|
478
|
+
year_range = f" ({year_low or 'earliest'}-{year_high or 'latest'})"
|
479
|
+
year_filter_msg = f" in year range{year_range}"
|
480
|
+
|
481
|
+
result_text = f"Found {len(papers)} Google Scholar papers for query '{query}'{year_filter_msg}:\n\n"
|
482
|
+
for i, paper in enumerate(papers, 1):
|
483
|
+
result_text += f"{i}. **{paper.title}**\n"
|
484
|
+
result_text += f" - Authors: {', '.join(paper.authors)}\n"
|
485
|
+
if paper.citations > 0:
|
486
|
+
result_text += f" - Citations: {paper.citations}\n"
|
487
|
+
if paper.published_date and paper.published_date.year > 1900:
|
488
|
+
result_text += f" - Year: {paper.published_date.year}\n"
|
489
|
+
if paper.url:
|
490
|
+
result_text += f" - URL: {paper.url}\n"
|
491
|
+
if paper.abstract:
|
492
|
+
# Truncate abstract for readability
|
493
|
+
abstract_preview = (
|
494
|
+
paper.abstract[:300] + "..."
|
495
|
+
if len(paper.abstract) > 300
|
496
|
+
else paper.abstract
|
497
|
+
)
|
498
|
+
result_text += f" - Abstract: {abstract_preview}\n"
|
499
|
+
result_text += "\n"
|
500
|
+
|
501
|
+
return [types.TextContent(type="text", text=result_text)]
|
502
|
+
|
503
|
+
except Exception as e:
|
504
|
+
return [
|
505
|
+
types.TextContent(
|
506
|
+
type="text", text=f"Error searching Google Scholar: {e!s}"
|
507
|
+
)
|
508
|
+
]
|
509
|
+
|
510
|
+
elif name == "search-crossref-papers":
|
511
|
+
query = arguments.get("query", "")
|
512
|
+
max_results = arguments.get("max_results", 10)
|
513
|
+
year_min = arguments.get("year_min")
|
514
|
+
year_max = arguments.get("year_max")
|
515
|
+
sort_by = arguments.get("sort_by", "relevance")
|
516
|
+
|
517
|
+
if not query:
|
518
|
+
return [
|
519
|
+
types.TextContent(
|
520
|
+
type="text", text="Error: Query parameter is required"
|
521
|
+
)
|
522
|
+
]
|
523
|
+
|
524
|
+
try:
|
525
|
+
papers = crossref_searcher.search(
|
526
|
+
query,
|
527
|
+
max_results=max_results,
|
528
|
+
year_min=year_min,
|
529
|
+
year_max=year_max,
|
530
|
+
sort_by=sort_by,
|
531
|
+
)
|
532
|
+
|
533
|
+
if not papers:
|
534
|
+
year_filter_msg = ""
|
535
|
+
if year_min or year_max:
|
536
|
+
year_range = (
|
537
|
+
f" ({year_min or 'earliest'}-{year_max or 'latest'})"
|
538
|
+
)
|
539
|
+
year_filter_msg = f" in year range{year_range}"
|
540
|
+
return [
|
541
|
+
types.TextContent(
|
542
|
+
type="text",
|
543
|
+
text=f"No papers found for query: {query}{year_filter_msg}",
|
544
|
+
)
|
545
|
+
]
|
546
|
+
|
547
|
+
year_filter_msg = ""
|
548
|
+
if year_min or year_max:
|
549
|
+
year_range = f" ({year_min or 'earliest'}-{year_max or 'latest'})"
|
550
|
+
year_filter_msg = f" in year range{year_range}"
|
551
|
+
|
552
|
+
result_text = f"Found {len(papers)} Crossref papers for query '{query}'{year_filter_msg}:\n\n"
|
553
|
+
for i, paper in enumerate(papers, 1):
|
554
|
+
result_text += f"{i}. **{paper.title}**\n"
|
555
|
+
result_text += f" - Authors: {', '.join(paper.authors)}\n"
|
556
|
+
if paper.doi:
|
557
|
+
result_text += f" - DOI: {paper.doi}\n"
|
558
|
+
if paper.citations > 0:
|
559
|
+
result_text += f" - Citations: {paper.citations}\n"
|
560
|
+
if paper.published_date and paper.published_date.year > 1900:
|
561
|
+
result_text += f" - Year: {paper.published_date.year}\n"
|
562
|
+
if paper.extra and paper.extra.get("journal"):
|
563
|
+
result_text += f" - Journal: {paper.extra['journal']}\n"
|
564
|
+
if paper.extra and paper.extra.get("volume"):
|
565
|
+
result_text += f" - Volume: {paper.extra['volume']}\n"
|
566
|
+
if paper.extra and paper.extra.get("pages"):
|
567
|
+
result_text += f" - Pages: {paper.extra['pages']}\n"
|
568
|
+
if paper.url:
|
569
|
+
result_text += f" - URL: {paper.url}\n"
|
570
|
+
if paper.abstract:
|
571
|
+
# Truncate abstract for readability
|
572
|
+
abstract_preview = (
|
573
|
+
paper.abstract[:300] + "..."
|
574
|
+
if len(paper.abstract) > 300
|
575
|
+
else paper.abstract
|
576
|
+
)
|
577
|
+
result_text += f" - Abstract: {abstract_preview}\n"
|
578
|
+
result_text += "\n"
|
579
|
+
|
580
|
+
return [types.TextContent(type="text", text=result_text)]
|
581
|
+
|
582
|
+
except Exception as e:
|
583
|
+
return [
|
584
|
+
types.TextContent(
|
585
|
+
type="text", text=f"Error searching Crossref: {e!s}"
|
586
|
+
)
|
587
|
+
]
|
588
|
+
|
589
|
+
elif name == "download-crossref-paper":
|
590
|
+
paper_id = arguments.get("paper_id", "")
|
591
|
+
save_path = arguments.get("save_path", "./downloads")
|
592
|
+
|
593
|
+
if not paper_id:
|
594
|
+
return [
|
595
|
+
types.TextContent(
|
596
|
+
type="text", text="Error: paper_id parameter is required"
|
597
|
+
)
|
598
|
+
]
|
599
|
+
|
600
|
+
result = crossref_searcher.download_pdf(paper_id, save_path)
|
601
|
+
|
602
|
+
if result.startswith("Error"):
|
603
|
+
return [types.TextContent(type="text", text=result)]
|
604
|
+
else:
|
605
|
+
return [
|
606
|
+
types.TextContent(
|
607
|
+
type="text", text=f"PDF downloaded successfully to: {result}"
|
608
|
+
)
|
609
|
+
]
|
610
|
+
|
611
|
+
elif name == "read-crossref-paper":
|
612
|
+
paper_id = arguments.get("paper_id", "")
|
613
|
+
save_path = arguments.get("save_path", "./downloads")
|
614
|
+
|
615
|
+
if not paper_id:
|
616
|
+
return [
|
617
|
+
types.TextContent(
|
618
|
+
type="text", text="Error: paper_id parameter is required"
|
619
|
+
)
|
620
|
+
]
|
621
|
+
|
622
|
+
result = crossref_searcher.read_paper(paper_id, save_path)
|
623
|
+
|
624
|
+
if result.startswith("Error"):
|
625
|
+
return [types.TextContent(type="text", text=result)]
|
626
|
+
else:
|
627
|
+
# Truncate very long text for display
|
628
|
+
if len(result) > 5000:
|
629
|
+
truncated_result = (
|
630
|
+
result[:5000]
|
631
|
+
+ f"\n\n... [Text truncated. Full text is {len(result)} characters long]"
|
632
|
+
)
|
633
|
+
return [types.TextContent(type="text", text=truncated_result)]
|
634
|
+
else:
|
635
|
+
return [types.TextContent(type="text", text=result)]
|
636
|
+
|
340
637
|
elif name == "read-pdf":
|
341
638
|
pdf_source = arguments.get("pdf_source", "")
|
342
639
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: all-in-mcp
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.5
|
4
4
|
Summary: An MCP (Model Context Protocol) server providing daily-use utility functions and academic paper search capabilities
|
5
5
|
Project-URL: Homepage, https://github.com/jiahaoxiang2000/all-in-mcp
|
6
6
|
Project-URL: Repository, https://github.com/jiahaoxiang2000/all-in-mcp
|
@@ -53,6 +53,7 @@ An MCP (Model Context Protocol) server that provides daily-use utility functions
|
|
53
53
|
|
54
54
|
- **Academic Research**: IACR ePrint Archive paper search, download, and reading
|
55
55
|
- **Bibliography Search**: CryptoBib database search for cryptography papers
|
56
|
+
- **Google Scholar**: Search academic papers across disciplines with citation data
|
56
57
|
- **PDF Reading**: Read and extract text from local and online PDF files
|
57
58
|
|
58
59
|
### Paper Search Capabilities
|
@@ -71,6 +72,13 @@ An MCP (Model Context Protocol) server that provides daily-use utility functions
|
|
71
72
|
- Retrieve structured paper metadata or raw BibTeX entries
|
72
73
|
- Support for all major cryptography venues and conferences
|
73
74
|
|
75
|
+
#### Google Scholar
|
76
|
+
|
77
|
+
- Search academic papers across multiple disciplines
|
78
|
+
- Access to citation counts and publication metadata
|
79
|
+
- Broad coverage of academic literature from various sources
|
80
|
+
- Year-based filtering for targeted searches
|
81
|
+
|
74
82
|
## Quick Start
|
75
83
|
|
76
84
|
### Prerequisites
|
@@ -0,0 +1,14 @@
|
|
1
|
+
all_in_mcp/__init__.py,sha256=REDwcbifpuUnsFAhNowIKCZ-8g6irIzUFTI_f8Aunxk,215
|
2
|
+
all_in_mcp/paper.py,sha256=vSJyC_ehfZX5-ASYG048z8gaD1LKafFdJvR13iQcJRw,7104
|
3
|
+
all_in_mcp/server.py,sha256=DyfOtcVclzh6qgN42lETH_3cUXe2qrBXdCFUfDfQvH0,27740
|
4
|
+
all_in_mcp/academic_platforms/__init__.py,sha256=IpI29DMS4_mSmTEa8VkQEiJCl7OyFbswSx7mWSp08P4,285
|
5
|
+
all_in_mcp/academic_platforms/base.py,sha256=VYMp8_tnp7YzXKAXLfr7uUxgvJBNKRyC_NT1uVhBOwY,673
|
6
|
+
all_in_mcp/academic_platforms/crossref.py,sha256=Lun9-D_MgS9kFlQquktg6NXsG1S6nU24s87Y_sOJc0s,10983
|
7
|
+
all_in_mcp/academic_platforms/cryptobib.py,sha256=F9N23eojfyAIjnFDPrJAYOpZ_Vi9iHOqNHGtKC6O16c,17360
|
8
|
+
all_in_mcp/academic_platforms/google_scholar.py,sha256=_KLFfIOZeFCGxFOt-nwzm1fgZKMlXOf3HvIjXAYE5cI,8737
|
9
|
+
all_in_mcp/academic_platforms/iacr.py,sha256=MUPxFycVS0eMsJok71y12RUqjxbRrCReG33V5ORAbfU,15450
|
10
|
+
all_in_mcp-0.2.5.dist-info/METADATA,sha256=PKH2VSeQJzZ7HiVKoh4zmkQntOsOuSDmsM6u0b8eifg,6067
|
11
|
+
all_in_mcp-0.2.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
12
|
+
all_in_mcp-0.2.5.dist-info/entry_points.txt,sha256=FbQOtUQzOIfkMNp4qQV1NTU9K4J7C0XGH9wKKhfK1VM,47
|
13
|
+
all_in_mcp-0.2.5.dist-info/licenses/LICENSE,sha256=idExTHItK7AC5FVo4H9HKnr6h51Z8BKCEztZPyP8nK8,1062
|
14
|
+
all_in_mcp-0.2.5.dist-info/RECORD,,
|
@@ -1,12 +0,0 @@
|
|
1
|
-
all_in_mcp/__init__.py,sha256=REDwcbifpuUnsFAhNowIKCZ-8g6irIzUFTI_f8Aunxk,215
|
2
|
-
all_in_mcp/paper.py,sha256=vSJyC_ehfZX5-ASYG048z8gaD1LKafFdJvR13iQcJRw,7104
|
3
|
-
all_in_mcp/server.py,sha256=pMGyRbgr_kwC_ZNsxMUwXcoEQ8fW4NZx3Sns7uRRa8I,15140
|
4
|
-
all_in_mcp/academic_platforms/__init__.py,sha256=2KgWMc38NBhRkiLYwqyKi43u-Wm5vWK8i-es3fQFlN0,210
|
5
|
-
all_in_mcp/academic_platforms/base.py,sha256=VYMp8_tnp7YzXKAXLfr7uUxgvJBNKRyC_NT1uVhBOwY,673
|
6
|
-
all_in_mcp/academic_platforms/cryptobib.py,sha256=F9N23eojfyAIjnFDPrJAYOpZ_Vi9iHOqNHGtKC6O16c,17360
|
7
|
-
all_in_mcp/academic_platforms/iacr.py,sha256=MUPxFycVS0eMsJok71y12RUqjxbRrCReG33V5ORAbfU,15450
|
8
|
-
all_in_mcp-0.2.3.dist-info/METADATA,sha256=43FE07lBZ-f92fi1AemtCEQO_IVXyKD1d_keztjtcYI,5750
|
9
|
-
all_in_mcp-0.2.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
10
|
-
all_in_mcp-0.2.3.dist-info/entry_points.txt,sha256=FbQOtUQzOIfkMNp4qQV1NTU9K4J7C0XGH9wKKhfK1VM,47
|
11
|
-
all_in_mcp-0.2.3.dist-info/licenses/LICENSE,sha256=idExTHItK7AC5FVo4H9HKnr6h51Z8BKCEztZPyP8nK8,1062
|
12
|
-
all_in_mcp-0.2.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|