all-in-mcp 0.1.4__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- all_in_mcp/academic_platforms/__init__.py +5 -0
- all_in_mcp/academic_platforms/cryptobib.py +460 -0
- all_in_mcp/paper.py +137 -0
- all_in_mcp/server.py +176 -4
- {all_in_mcp-0.1.4.dist-info → all_in_mcp-0.2.3.dist-info}/METADATA +14 -2
- all_in_mcp-0.2.3.dist-info/RECORD +12 -0
- all_in_mcp-0.1.4.dist-info/RECORD +0 -11
- {all_in_mcp-0.1.4.dist-info → all_in_mcp-0.2.3.dist-info}/WHEEL +0 -0
- {all_in_mcp-0.1.4.dist-info → all_in_mcp-0.2.3.dist-info}/entry_points.txt +0 -0
- {all_in_mcp-0.1.4.dist-info → all_in_mcp-0.2.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,460 @@
|
|
1
|
+
# all_in_mcp/academic_platforms/cryptobib.py
|
2
|
+
import logging
|
3
|
+
import random
|
4
|
+
import re
|
5
|
+
from datetime import datetime
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import ClassVar
|
8
|
+
|
9
|
+
import requests
|
10
|
+
|
11
|
+
from ..paper import Paper
|
12
|
+
from .base import PaperSource
|
13
|
+
|
14
|
+
logger = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
|
17
|
+
class CryptoBibSearcher(PaperSource):
|
18
|
+
"""CryptoBib (https://cryptobib.di.ens.fr/) bibliography search implementation"""
|
19
|
+
|
20
|
+
CRYPTOBIB_BASE_URL = "https://cryptobib.di.ens.fr"
|
21
|
+
CRYPTOBIB_BIB_URL = "https://cryptobib.di.ens.fr/cryptobib/static/files/crypto.bib"
|
22
|
+
BROWSERS: ClassVar[list[str]] = [
|
23
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
24
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)",
|
25
|
+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36",
|
26
|
+
]
|
27
|
+
|
28
|
+
def __init__(self, cache_dir: str = "./downloads"):
|
29
|
+
self.cache_dir = Path(cache_dir)
|
30
|
+
self.cache_dir.mkdir(exist_ok=True)
|
31
|
+
self.bib_file_path = self.cache_dir / "crypto.bib"
|
32
|
+
self._setup_session()
|
33
|
+
|
34
|
+
def _setup_session(self):
|
35
|
+
"""Initialize session with random user agent"""
|
36
|
+
self.session = requests.Session()
|
37
|
+
self.session.headers.update(
|
38
|
+
{
|
39
|
+
"User-Agent": random.choice(self.BROWSERS),
|
40
|
+
"Accept": "text/plain,text/html,application/xhtml+xml",
|
41
|
+
"Accept-Language": "en-US,en;q=0.9",
|
42
|
+
}
|
43
|
+
)
|
44
|
+
|
45
|
+
def _download_bib_file(self, force_download: bool = False) -> bool:
|
46
|
+
"""
|
47
|
+
Download the crypto.bib file if not exists or if force_download is True
|
48
|
+
|
49
|
+
Args:
|
50
|
+
force_download: Force download even if file exists
|
51
|
+
|
52
|
+
Returns:
|
53
|
+
bool: True if file is ready, False if download failed
|
54
|
+
"""
|
55
|
+
try:
|
56
|
+
# Check if file exists and force_download is False
|
57
|
+
if self.bib_file_path.exists() and not force_download:
|
58
|
+
logger.info(f"Using cached crypto.bib file at {self.bib_file_path}")
|
59
|
+
return True
|
60
|
+
|
61
|
+
logger.info("Downloading crypto.bib file from CryptoBib...")
|
62
|
+
response = self.session.get(self.CRYPTOBIB_BIB_URL, stream=True)
|
63
|
+
|
64
|
+
if response.status_code != 200:
|
65
|
+
logger.error(
|
66
|
+
f"Failed to download crypto.bib: HTTP {response.status_code}"
|
67
|
+
)
|
68
|
+
return False
|
69
|
+
|
70
|
+
# Download with progress indication
|
71
|
+
total_size = int(response.headers.get("content-length", 0))
|
72
|
+
downloaded = 0
|
73
|
+
|
74
|
+
with open(self.bib_file_path, "wb") as f:
|
75
|
+
for chunk in response.iter_content(
|
76
|
+
chunk_size=1024 * 1024
|
77
|
+
): # 1MB chunks
|
78
|
+
if chunk:
|
79
|
+
f.write(chunk)
|
80
|
+
downloaded += len(chunk)
|
81
|
+
if total_size > 0:
|
82
|
+
progress = (downloaded / total_size) * 100
|
83
|
+
if downloaded % (1024 * 1024 * 5) == 0: # Log every 5MB
|
84
|
+
logger.info(f"Download progress: {progress:.1f}%")
|
85
|
+
|
86
|
+
logger.info(f"Successfully downloaded crypto.bib to {self.bib_file_path}")
|
87
|
+
return True
|
88
|
+
|
89
|
+
except requests.RequestException as e:
|
90
|
+
logger.error(f"Error downloading crypto.bib: {e}")
|
91
|
+
return False
|
92
|
+
except Exception as e:
|
93
|
+
logger.error(f"Unexpected error downloading crypto.bib: {e}")
|
94
|
+
return False
|
95
|
+
|
96
|
+
def _parse_bibtex_entry(self, bibtex_text: str) -> Paper | None:
|
97
|
+
"""Parse a single BibTeX entry into a Paper object"""
|
98
|
+
try:
|
99
|
+
# Extract entry type and key
|
100
|
+
entry_match = re.match(r"@(\w+){([^,]+),", bibtex_text, re.IGNORECASE)
|
101
|
+
if not entry_match:
|
102
|
+
return None
|
103
|
+
|
104
|
+
entry_type = entry_match.group(1).lower()
|
105
|
+
entry_key = entry_match.group(2).strip()
|
106
|
+
|
107
|
+
# Initialize fields
|
108
|
+
title = ""
|
109
|
+
authors = []
|
110
|
+
year = None
|
111
|
+
booktitle = ""
|
112
|
+
journal = ""
|
113
|
+
pages = ""
|
114
|
+
doi = ""
|
115
|
+
url = ""
|
116
|
+
abstract = ""
|
117
|
+
|
118
|
+
# Extract fields using a more robust approach
|
119
|
+
# First, normalize the text by removing extra whitespace
|
120
|
+
re.sub(r"\s+", " ", bibtex_text)
|
121
|
+
|
122
|
+
# Extract fields with better pattern matching
|
123
|
+
field_dict = {}
|
124
|
+
|
125
|
+
# Pattern for quoted fields (handles multi-line)
|
126
|
+
quoted_pattern = r'(\w+)\s*=\s*"([^"]*(?:[^"\\]|\\.)*)"'
|
127
|
+
for match in re.finditer(quoted_pattern, bibtex_text, re.DOTALL):
|
128
|
+
field_name = match.group(1).lower().strip()
|
129
|
+
field_value = match.group(2).strip()
|
130
|
+
field_dict[field_name] = field_value
|
131
|
+
|
132
|
+
# Pattern for unquoted fields (like numbers)
|
133
|
+
unquoted_pattern = r'(\w+)\s*=\s*([^,}\n"]+)'
|
134
|
+
for match in re.finditer(unquoted_pattern, bibtex_text):
|
135
|
+
field_name = match.group(1).lower().strip()
|
136
|
+
field_value = match.group(2).strip()
|
137
|
+
# Only add if not already present from quoted pattern
|
138
|
+
if field_name not in field_dict:
|
139
|
+
field_dict[field_name] = field_value
|
140
|
+
|
141
|
+
for field_name, field_value in field_dict.items():
|
142
|
+
field_name = field_name.lower().strip()
|
143
|
+
field_value = field_value.strip()
|
144
|
+
|
145
|
+
if field_name == "title":
|
146
|
+
# Clean up title (remove LaTeX commands)
|
147
|
+
title = re.sub(r"[{}]", "", field_value)
|
148
|
+
title = re.sub(r"\\[a-zA-Z]+", "", title)
|
149
|
+
title = title.strip()
|
150
|
+
|
151
|
+
elif field_name == "author":
|
152
|
+
# Parse authors - handle "and" separator
|
153
|
+
author_text = re.sub(r"[{}]", "", field_value)
|
154
|
+
authors = [
|
155
|
+
author.strip()
|
156
|
+
for author in re.split(r"\s+and\s+", author_text)
|
157
|
+
if author.strip()
|
158
|
+
]
|
159
|
+
|
160
|
+
elif field_name == "year":
|
161
|
+
try:
|
162
|
+
year = int(field_value)
|
163
|
+
except ValueError:
|
164
|
+
pass
|
165
|
+
|
166
|
+
elif field_name == "booktitle":
|
167
|
+
booktitle = re.sub(r"[{}]", "", field_value)
|
168
|
+
|
169
|
+
elif field_name == "journal":
|
170
|
+
journal = re.sub(r"[{}]", "", field_value)
|
171
|
+
|
172
|
+
elif field_name == "pages":
|
173
|
+
pages = field_value
|
174
|
+
|
175
|
+
elif field_name == "doi":
|
176
|
+
doi = field_value
|
177
|
+
|
178
|
+
elif field_name == "url":
|
179
|
+
url = field_value
|
180
|
+
|
181
|
+
elif field_name == "abstract":
|
182
|
+
abstract = field_value
|
183
|
+
|
184
|
+
# Determine venue (journal or conference)
|
185
|
+
venue = journal if journal else booktitle
|
186
|
+
categories = [entry_type] if entry_type else []
|
187
|
+
|
188
|
+
# Create published date
|
189
|
+
published_date = datetime(year, 1, 1) if year else datetime(1900, 1, 1)
|
190
|
+
|
191
|
+
return Paper(
|
192
|
+
paper_id=entry_key,
|
193
|
+
title=title,
|
194
|
+
authors=authors,
|
195
|
+
abstract=abstract,
|
196
|
+
url=url,
|
197
|
+
pdf_url="", # CryptoBib doesn't provide PDF URLs
|
198
|
+
published_date=published_date,
|
199
|
+
updated_date=None,
|
200
|
+
source="cryptobib",
|
201
|
+
categories=categories,
|
202
|
+
keywords=[],
|
203
|
+
doi=doi,
|
204
|
+
citations=0,
|
205
|
+
extra={
|
206
|
+
"bibtex": bibtex_text.strip(),
|
207
|
+
"venue": venue,
|
208
|
+
"pages": pages,
|
209
|
+
"entry_type": entry_type,
|
210
|
+
},
|
211
|
+
)
|
212
|
+
|
213
|
+
except Exception as e:
|
214
|
+
logger.warning(f"Failed to parse BibTeX entry: {e}")
|
215
|
+
return None
|
216
|
+
|
217
|
+
def search_bibtex(
|
218
|
+
self,
|
219
|
+
query: str,
|
220
|
+
max_results: int = 10,
|
221
|
+
force_download: bool = False,
|
222
|
+
year_min: int | None = None,
|
223
|
+
year_max: int | None = None,
|
224
|
+
) -> list[str]:
|
225
|
+
"""
|
226
|
+
Search CryptoBib and return raw BibTeX entries
|
227
|
+
|
228
|
+
Args:
|
229
|
+
query: Search query string
|
230
|
+
max_results: Maximum number of results to return
|
231
|
+
force_download: Force download the newest crypto.bib file
|
232
|
+
year_min: Minimum publication year (inclusive, optional)
|
233
|
+
year_max: Maximum publication year (inclusive, optional)
|
234
|
+
|
235
|
+
Returns:
|
236
|
+
List[str]: List of BibTeX entries as strings
|
237
|
+
|
238
|
+
Example:
|
239
|
+
>>> searcher = CryptoBibSearcher()
|
240
|
+
>>> # Search for recent implementation papers (2020-2024)
|
241
|
+
>>> entries = searcher.search_bibtex("implement", max_results=5,
|
242
|
+
... year_min=2020, year_max=2024)
|
243
|
+
>>>
|
244
|
+
>>> # Search for older RSA papers (1980-2000)
|
245
|
+
>>> entries = searcher.search_bibtex("RSA", max_results=10,
|
246
|
+
... year_min=1980, year_max=2000)
|
247
|
+
"""
|
248
|
+
bibtex_entries = []
|
249
|
+
|
250
|
+
try:
|
251
|
+
# Ensure we have the bib file locally
|
252
|
+
if not self._download_bib_file(force_download=force_download):
|
253
|
+
logger.error("Failed to download crypto.bib file")
|
254
|
+
return bibtex_entries
|
255
|
+
|
256
|
+
# Search in the local file
|
257
|
+
logger.info(f"Searching local crypto.bib file for: {query}")
|
258
|
+
current_entry = ""
|
259
|
+
in_entry = False
|
260
|
+
brace_count = 0
|
261
|
+
|
262
|
+
# Convert query to lowercase for case-insensitive search
|
263
|
+
query_lower = query.lower()
|
264
|
+
|
265
|
+
with open(self.bib_file_path, encoding="utf-8") as f:
|
266
|
+
for line_num, line in enumerate(f, 1):
|
267
|
+
# Check if this is the start of a new entry
|
268
|
+
if line.strip().startswith("@") and not in_entry:
|
269
|
+
current_entry = line
|
270
|
+
in_entry = True
|
271
|
+
brace_count = line.count("{") - line.count("}")
|
272
|
+
elif in_entry:
|
273
|
+
current_entry += line
|
274
|
+
brace_count += line.count("{") - line.count("}")
|
275
|
+
|
276
|
+
# Check if entry is complete
|
277
|
+
if brace_count <= 0:
|
278
|
+
# Entry is complete, check if it matches the query
|
279
|
+
if query_lower in current_entry.lower():
|
280
|
+
# Check year range if specified
|
281
|
+
if self._entry_matches_year_range(
|
282
|
+
current_entry, year_min, year_max
|
283
|
+
):
|
284
|
+
bibtex_entries.append(current_entry.strip())
|
285
|
+
logger.info(
|
286
|
+
f"Found matching entry {len(bibtex_entries)} at line {line_num}"
|
287
|
+
)
|
288
|
+
|
289
|
+
if len(bibtex_entries) >= max_results:
|
290
|
+
break
|
291
|
+
|
292
|
+
# Reset for next entry
|
293
|
+
current_entry = ""
|
294
|
+
in_entry = False
|
295
|
+
brace_count = 0
|
296
|
+
|
297
|
+
except FileNotFoundError:
|
298
|
+
logger.error(f"crypto.bib file not found at {self.bib_file_path}")
|
299
|
+
except Exception as e:
|
300
|
+
logger.error(f"CryptoBib search error: {e}")
|
301
|
+
|
302
|
+
return bibtex_entries[:max_results]
|
303
|
+
|
304
|
+
def search(
|
305
|
+
self,
|
306
|
+
query: str,
|
307
|
+
max_results: int = 10,
|
308
|
+
return_bibtex: bool = False,
|
309
|
+
force_download: bool = False,
|
310
|
+
year_min: int | None = None,
|
311
|
+
year_max: int | None = None,
|
312
|
+
) -> list[Paper]:
|
313
|
+
"""
|
314
|
+
Search CryptoBib bibliography
|
315
|
+
|
316
|
+
Args:
|
317
|
+
query: Search query string
|
318
|
+
max_results: Maximum number of results to return
|
319
|
+
return_bibtex: If True, include raw BibTeX in results
|
320
|
+
force_download: Force download the newest crypto.bib file
|
321
|
+
year_min: Minimum publication year (inclusive, optional)
|
322
|
+
year_max: Maximum publication year (inclusive, optional)
|
323
|
+
|
324
|
+
Returns:
|
325
|
+
List[Paper]: List of paper objects
|
326
|
+
|
327
|
+
Example:
|
328
|
+
>>> searcher = CryptoBibSearcher()
|
329
|
+
>>> # Search for recent zero-knowledge papers (2020-2024)
|
330
|
+
>>> papers = searcher.search("zero knowledge", max_results=5,
|
331
|
+
... year_min=2020, year_max=2024)
|
332
|
+
>>>
|
333
|
+
>>> # Search for classic RSA papers (1977-1990)
|
334
|
+
>>> papers = searcher.search("RSA", max_results=10,
|
335
|
+
... year_min=1977, year_max=1990)
|
336
|
+
"""
|
337
|
+
papers = []
|
338
|
+
|
339
|
+
try:
|
340
|
+
# Get BibTeX entries
|
341
|
+
bibtex_entries = self.search_bibtex(
|
342
|
+
query,
|
343
|
+
max_results,
|
344
|
+
force_download=force_download,
|
345
|
+
year_min=year_min,
|
346
|
+
year_max=year_max,
|
347
|
+
)
|
348
|
+
|
349
|
+
# Parse each entry into Paper objects
|
350
|
+
for i, bibtex_text in enumerate(bibtex_entries):
|
351
|
+
logger.info(f"Parsing entry {i+1}/{len(bibtex_entries)}")
|
352
|
+
paper = self._parse_bibtex_entry(bibtex_text)
|
353
|
+
if paper:
|
354
|
+
papers.append(paper)
|
355
|
+
|
356
|
+
except Exception as e:
|
357
|
+
logger.error(f"CryptoBib search error: {e}")
|
358
|
+
|
359
|
+
return papers
|
360
|
+
|
361
|
+
def download_pdf(self, paper_id: str, save_path: str) -> str:
|
362
|
+
"""
|
363
|
+
CryptoBib doesn't provide PDF downloads
|
364
|
+
"""
|
365
|
+
return "Error: CryptoBib is a bibliography database and doesn't provide PDF downloads"
|
366
|
+
|
367
|
+
def read_paper(self, paper_id: str, save_path: str = "./downloads") -> str:
|
368
|
+
"""
|
369
|
+
CryptoBib doesn't provide paper content reading
|
370
|
+
"""
|
371
|
+
return "Error: CryptoBib is a bibliography database and doesn't provide paper content"
|
372
|
+
|
373
|
+
def get_bibtex_by_key(
|
374
|
+
self, entry_key: str, force_download: bool = False
|
375
|
+
) -> str | None:
|
376
|
+
"""
|
377
|
+
Get a specific BibTeX entry by its key
|
378
|
+
|
379
|
+
Args:
|
380
|
+
entry_key: The BibTeX entry key (e.g., "ACISP:LZXSW24")
|
381
|
+
force_download: Force download the newest crypto.bib file
|
382
|
+
|
383
|
+
Returns:
|
384
|
+
str: The BibTeX entry or None if not found
|
385
|
+
"""
|
386
|
+
try:
|
387
|
+
# Ensure we have the bib file locally
|
388
|
+
if not self._download_bib_file(force_download=force_download):
|
389
|
+
logger.error("Failed to download crypto.bib file")
|
390
|
+
return None
|
391
|
+
|
392
|
+
logger.info(f"Searching for BibTeX entry: {entry_key}")
|
393
|
+
current_entry = ""
|
394
|
+
in_entry = False
|
395
|
+
brace_count = 0
|
396
|
+
|
397
|
+
with open(self.bib_file_path, encoding="utf-8") as f:
|
398
|
+
for line in f:
|
399
|
+
# Check if this is the start of the entry we're looking for
|
400
|
+
if line.strip().startswith("@") and entry_key in line:
|
401
|
+
current_entry = line
|
402
|
+
in_entry = True
|
403
|
+
brace_count = line.count("{") - line.count("}")
|
404
|
+
elif in_entry:
|
405
|
+
current_entry += line
|
406
|
+
brace_count += line.count("{") - line.count("}")
|
407
|
+
|
408
|
+
# Check if entry is complete
|
409
|
+
if brace_count <= 0:
|
410
|
+
return current_entry.strip()
|
411
|
+
|
412
|
+
except FileNotFoundError:
|
413
|
+
logger.error(f"crypto.bib file not found at {self.bib_file_path}")
|
414
|
+
except Exception as e:
|
415
|
+
logger.error(f"Error searching for entry {entry_key}: {e}")
|
416
|
+
|
417
|
+
return None
|
418
|
+
|
419
|
+
def _entry_matches_year_range(
|
420
|
+
self, bibtex_entry: str, year_min: int | None, year_max: int | None
|
421
|
+
) -> bool:
|
422
|
+
"""
|
423
|
+
Check if a BibTeX entry falls within the specified year range
|
424
|
+
|
425
|
+
Args:
|
426
|
+
bibtex_entry: Raw BibTeX entry text
|
427
|
+
year_min: Minimum year (inclusive, None means no minimum)
|
428
|
+
year_max: Maximum year (inclusive, None means no maximum)
|
429
|
+
|
430
|
+
Returns:
|
431
|
+
bool: True if entry is within year range, False otherwise
|
432
|
+
"""
|
433
|
+
# If no year constraints specified, all entries match
|
434
|
+
if year_min is None and year_max is None:
|
435
|
+
return True
|
436
|
+
|
437
|
+
try:
|
438
|
+
# Extract year from the BibTeX entry
|
439
|
+
year_match = re.search(
|
440
|
+
r'year\s*=\s*(?:["{\s]*)?(\d{4})', bibtex_entry, re.IGNORECASE
|
441
|
+
)
|
442
|
+
if not year_match:
|
443
|
+
# If no year found, exclude from results when year filtering is requested
|
444
|
+
return False
|
445
|
+
|
446
|
+
entry_year = int(year_match.group(1))
|
447
|
+
|
448
|
+
# Check minimum year constraint
|
449
|
+
if year_min is not None and entry_year < year_min:
|
450
|
+
return False
|
451
|
+
|
452
|
+
# Check maximum year constraint
|
453
|
+
if year_max is not None and entry_year > year_max:
|
454
|
+
return False
|
455
|
+
|
456
|
+
return True
|
457
|
+
|
458
|
+
except (ValueError, AttributeError):
|
459
|
+
# If year parsing fails, exclude from results when year filtering is requested
|
460
|
+
return False
|
all_in_mcp/paper.py
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
# all_in_mcp/paper.py
|
2
|
+
import io
|
2
3
|
from dataclasses import dataclass
|
3
4
|
from datetime import datetime
|
5
|
+
from pathlib import Path
|
6
|
+
from urllib.parse import urlparse
|
7
|
+
|
8
|
+
import httpx
|
9
|
+
from pypdf import PdfReader
|
4
10
|
|
5
11
|
|
6
12
|
@dataclass
|
@@ -62,3 +68,134 @@ class Paper:
|
|
62
68
|
"references": self.references,
|
63
69
|
"extra": self.extra,
|
64
70
|
}
|
71
|
+
|
72
|
+
def read_content(self) -> str:
|
73
|
+
"""
|
74
|
+
Read the full text content of this paper's PDF.
|
75
|
+
|
76
|
+
Returns:
|
77
|
+
str: Extracted text content from the paper's PDF
|
78
|
+
|
79
|
+
Raises:
|
80
|
+
ValueError: If no PDF URL is available
|
81
|
+
Exception: If PDF cannot be read or processed
|
82
|
+
"""
|
83
|
+
if not self.pdf_url:
|
84
|
+
raise ValueError("No PDF URL available for this paper")
|
85
|
+
|
86
|
+
return read_pdf(self.pdf_url)
|
87
|
+
|
88
|
+
|
89
|
+
def read_pdf(pdf_source: str | Path) -> str:
|
90
|
+
"""
|
91
|
+
Extract text content from a PDF file (local or online).
|
92
|
+
|
93
|
+
Args:
|
94
|
+
pdf_source: Path to local PDF file or URL to online PDF
|
95
|
+
|
96
|
+
Returns:
|
97
|
+
str: Extracted text content from the PDF
|
98
|
+
|
99
|
+
Raises:
|
100
|
+
FileNotFoundError: If local file doesn't exist
|
101
|
+
ValueError: If URL is invalid or PDF cannot be processed
|
102
|
+
Exception: For other PDF processing errors
|
103
|
+
"""
|
104
|
+
try:
|
105
|
+
if isinstance(pdf_source, str | Path):
|
106
|
+
pdf_source_str = str(pdf_source)
|
107
|
+
|
108
|
+
# Check if it's a URL
|
109
|
+
parsed = urlparse(pdf_source_str)
|
110
|
+
if parsed.scheme in ("http", "https"):
|
111
|
+
# Handle online PDF
|
112
|
+
return _read_pdf_from_url(pdf_source_str)
|
113
|
+
else:
|
114
|
+
# Handle local file
|
115
|
+
return _read_pdf_from_file(Path(pdf_source_str))
|
116
|
+
else:
|
117
|
+
raise ValueError("pdf_source must be a string or Path object")
|
118
|
+
|
119
|
+
except Exception as e:
|
120
|
+
raise Exception(f"Failed to read PDF from {pdf_source}: {e!s}") from e
|
121
|
+
|
122
|
+
|
123
|
+
def _read_pdf_from_file(file_path: Path) -> str:
|
124
|
+
"""Read PDF from local file path."""
|
125
|
+
if not file_path.exists():
|
126
|
+
raise FileNotFoundError(f"PDF file not found: {file_path}")
|
127
|
+
|
128
|
+
if not file_path.suffix.lower() == ".pdf":
|
129
|
+
raise ValueError(f"File must have .pdf extension: {file_path}")
|
130
|
+
|
131
|
+
try:
|
132
|
+
with open(file_path, "rb") as file:
|
133
|
+
pdf_reader = PdfReader(file)
|
134
|
+
text_content = []
|
135
|
+
|
136
|
+
for page_num, page in enumerate(pdf_reader.pages):
|
137
|
+
try:
|
138
|
+
page_text = page.extract_text()
|
139
|
+
if page_text.strip(): # Only add non-empty pages
|
140
|
+
text_content.append(
|
141
|
+
f"--- Page {page_num + 1} ---\n{page_text}\n"
|
142
|
+
)
|
143
|
+
except Exception as e:
|
144
|
+
text_content.append(
|
145
|
+
f"--- Page {page_num + 1} (Error reading page: {e!s}) ---\n"
|
146
|
+
)
|
147
|
+
|
148
|
+
return "\n".join(text_content)
|
149
|
+
|
150
|
+
except Exception as e:
|
151
|
+
raise Exception(f"Error reading PDF file {file_path}: {e!s}") from e
|
152
|
+
|
153
|
+
|
154
|
+
def _read_pdf_from_url(url: str) -> str:
|
155
|
+
"""Download and read PDF from URL."""
|
156
|
+
try:
|
157
|
+
# Download PDF with proper headers
|
158
|
+
headers = {
|
159
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
160
|
+
}
|
161
|
+
|
162
|
+
with httpx.Client(timeout=30.0, headers=headers) as client:
|
163
|
+
response = client.get(url)
|
164
|
+
response.raise_for_status()
|
165
|
+
|
166
|
+
# Check if content is actually a PDF
|
167
|
+
content_type = response.headers.get("content-type", "").lower()
|
168
|
+
if "application/pdf" not in content_type and not url.lower().endswith(
|
169
|
+
".pdf"
|
170
|
+
):
|
171
|
+
# Try to detect PDF by content
|
172
|
+
if not response.content.startswith(b"%PDF"):
|
173
|
+
raise ValueError(f"URL does not point to a valid PDF file: {url}")
|
174
|
+
|
175
|
+
# Read PDF from bytes
|
176
|
+
pdf_bytes = io.BytesIO(response.content)
|
177
|
+
pdf_reader = PdfReader(pdf_bytes)
|
178
|
+
text_content = []
|
179
|
+
|
180
|
+
for page_num, page in enumerate(pdf_reader.pages):
|
181
|
+
try:
|
182
|
+
page_text = page.extract_text()
|
183
|
+
if page_text.strip(): # Only add non-empty pages
|
184
|
+
text_content.append(
|
185
|
+
f"--- Page {page_num + 1} ---\n{page_text}\n"
|
186
|
+
)
|
187
|
+
except Exception as e:
|
188
|
+
text_content.append(
|
189
|
+
f"--- Page {page_num + 1} (Error reading page: {e!s}) ---\n"
|
190
|
+
)
|
191
|
+
|
192
|
+
return "\n".join(text_content)
|
193
|
+
|
194
|
+
except httpx.RequestError as e:
|
195
|
+
raise Exception(f"Network error downloading PDF from {url}: {e!s}") from e
|
196
|
+
except httpx.HTTPStatusError as e:
|
197
|
+
raise Exception(
|
198
|
+
f"HTTP error {e.response.status_code} downloading PDF from {url}"
|
199
|
+
) from e
|
200
|
+
except Exception as e:
|
201
|
+
raise Exception(f"Error processing PDF from URL {url}: {e!s}") from e
|
all_in_mcp/server.py
CHANGED
@@ -1,17 +1,19 @@
|
|
1
|
-
import os
|
2
|
-
from typing import List, Dict
|
3
1
|
import mcp.server.stdio
|
4
2
|
import mcp.types as types
|
5
3
|
from mcp.server import NotificationOptions, Server
|
6
4
|
from mcp.server.models import InitializationOptions
|
7
5
|
|
8
|
-
|
6
|
+
from .academic_platforms.cryptobib import CryptoBibSearcher
|
7
|
+
|
8
|
+
# Import searchers
|
9
9
|
from .academic_platforms.iacr import IACRSearcher
|
10
|
+
from .paper import read_pdf
|
10
11
|
|
11
12
|
server = Server("all-in-mcp")
|
12
13
|
|
13
|
-
# Initialize
|
14
|
+
# Initialize searchers
|
14
15
|
iacr_searcher = IACRSearcher()
|
16
|
+
cryptobib_searcher = CryptoBibSearcher(cache_dir="./downloads")
|
15
17
|
|
16
18
|
|
17
19
|
@server.list_tools()
|
@@ -83,6 +85,57 @@ async def handle_list_tools() -> list[types.Tool]:
|
|
83
85
|
"required": ["paper_id"],
|
84
86
|
},
|
85
87
|
),
|
88
|
+
types.Tool(
|
89
|
+
name="search-cryptobib-papers",
|
90
|
+
description="Search CryptoBib bibliography database for cryptography papers",
|
91
|
+
inputSchema={
|
92
|
+
"type": "object",
|
93
|
+
"properties": {
|
94
|
+
"query": {
|
95
|
+
"type": "string",
|
96
|
+
"description": "Search query string (e.g., 'cryptography', 'lattice', 'homomorphic')",
|
97
|
+
},
|
98
|
+
"max_results": {
|
99
|
+
"type": "integer",
|
100
|
+
"description": "Maximum number of papers to return (default: 10)",
|
101
|
+
"default": 10,
|
102
|
+
},
|
103
|
+
"return_bibtex": {
|
104
|
+
"type": "boolean",
|
105
|
+
"description": "Whether to return raw BibTeX entries (default: False)",
|
106
|
+
"default": False,
|
107
|
+
},
|
108
|
+
"force_download": {
|
109
|
+
"type": "boolean",
|
110
|
+
"description": "Force download the newest crypto.bib file (default: False)",
|
111
|
+
"default": False,
|
112
|
+
},
|
113
|
+
"year_min": {
|
114
|
+
"type": "integer",
|
115
|
+
"description": "Minimum publication year (inclusive, optional)",
|
116
|
+
},
|
117
|
+
"year_max": {
|
118
|
+
"type": "integer",
|
119
|
+
"description": "Maximum publication year (inclusive, optional)",
|
120
|
+
},
|
121
|
+
},
|
122
|
+
"required": ["query"],
|
123
|
+
},
|
124
|
+
),
|
125
|
+
types.Tool(
|
126
|
+
name="read-pdf",
|
127
|
+
description="Read and extract text content from a PDF file (local or online)",
|
128
|
+
inputSchema={
|
129
|
+
"type": "object",
|
130
|
+
"properties": {
|
131
|
+
"pdf_source": {
|
132
|
+
"type": "string",
|
133
|
+
"description": "Path to local PDF file or URL to online PDF",
|
134
|
+
},
|
135
|
+
},
|
136
|
+
"required": ["pdf_source"],
|
137
|
+
},
|
138
|
+
),
|
86
139
|
]
|
87
140
|
|
88
141
|
|
@@ -186,6 +239,125 @@ async def handle_call_tool(
|
|
186
239
|
else:
|
187
240
|
return [types.TextContent(type="text", text=result)]
|
188
241
|
|
242
|
+
elif name == "search-cryptobib-papers":
|
243
|
+
query = arguments.get("query", "")
|
244
|
+
max_results = arguments.get("max_results", 10)
|
245
|
+
return_bibtex = arguments.get("return_bibtex", False)
|
246
|
+
force_download = arguments.get("force_download", False)
|
247
|
+
year_min = arguments.get("year_min")
|
248
|
+
year_max = arguments.get("year_max")
|
249
|
+
|
250
|
+
if not query:
|
251
|
+
return [
|
252
|
+
types.TextContent(
|
253
|
+
type="text", text="Error: Query parameter is required"
|
254
|
+
)
|
255
|
+
]
|
256
|
+
|
257
|
+
if return_bibtex:
|
258
|
+
# Return raw BibTeX entries
|
259
|
+
bibtex_entries = cryptobib_searcher.search_bibtex(
|
260
|
+
query,
|
261
|
+
max_results,
|
262
|
+
force_download=force_download,
|
263
|
+
year_min=year_min,
|
264
|
+
year_max=year_max,
|
265
|
+
)
|
266
|
+
|
267
|
+
if not bibtex_entries:
|
268
|
+
year_filter_msg = ""
|
269
|
+
if year_min or year_max:
|
270
|
+
year_range = (
|
271
|
+
f" ({year_min or 'earliest'}-{year_max or 'latest'})"
|
272
|
+
)
|
273
|
+
year_filter_msg = f" in year range{year_range}"
|
274
|
+
return [
|
275
|
+
types.TextContent(
|
276
|
+
type="text",
|
277
|
+
text=f"No BibTeX entries found for query: {query}{year_filter_msg}",
|
278
|
+
)
|
279
|
+
]
|
280
|
+
|
281
|
+
year_filter_msg = ""
|
282
|
+
if year_min or year_max:
|
283
|
+
year_range = f" ({year_min or 'earliest'}-{year_max or 'latest'})"
|
284
|
+
year_filter_msg = f" in year range{year_range}"
|
285
|
+
result_text = f"Found {len(bibtex_entries)} BibTeX entries for query '{query}'{year_filter_msg}:\n\n"
|
286
|
+
for i, entry in enumerate(bibtex_entries, 1):
|
287
|
+
result_text += f"Entry {i}:\n```bibtex\n{entry}\n```\n\n"
|
288
|
+
|
289
|
+
return [types.TextContent(type="text", text=result_text)]
|
290
|
+
else:
|
291
|
+
# Return parsed Paper objects
|
292
|
+
papers = cryptobib_searcher.search(
|
293
|
+
query,
|
294
|
+
max_results,
|
295
|
+
force_download=force_download,
|
296
|
+
year_min=year_min,
|
297
|
+
year_max=year_max,
|
298
|
+
)
|
299
|
+
|
300
|
+
if not papers:
|
301
|
+
year_filter_msg = ""
|
302
|
+
if year_min or year_max:
|
303
|
+
year_range = (
|
304
|
+
f" ({year_min or 'earliest'}-{year_max or 'latest'})"
|
305
|
+
)
|
306
|
+
year_filter_msg = f" in year range{year_range}"
|
307
|
+
return [
|
308
|
+
types.TextContent(
|
309
|
+
type="text",
|
310
|
+
text=f"No papers found for query: {query}{year_filter_msg}",
|
311
|
+
)
|
312
|
+
]
|
313
|
+
|
314
|
+
year_filter_msg = ""
|
315
|
+
if year_min or year_max:
|
316
|
+
year_range = f" ({year_min or 'earliest'}-{year_max or 'latest'})"
|
317
|
+
year_filter_msg = f" in year range{year_range}"
|
318
|
+
result_text = f"Found {len(papers)} CryptoBib papers for query '{query}'{year_filter_msg}:\n\n"
|
319
|
+
for i, paper in enumerate(papers, 1):
|
320
|
+
result_text += f"{i}. **{paper.title}**\n"
|
321
|
+
result_text += f" - Entry Key: {paper.paper_id}\n"
|
322
|
+
result_text += f" - Authors: {', '.join(paper.authors)}\n"
|
323
|
+
if paper.extra and "venue" in paper.extra:
|
324
|
+
result_text += f" - Venue: {paper.extra['venue']}\n"
|
325
|
+
if paper.published_date and paper.published_date.year > 1900:
|
326
|
+
result_text += f" - Year: {paper.published_date.year}\n"
|
327
|
+
if paper.doi:
|
328
|
+
result_text += f" - DOI: {paper.doi}\n"
|
329
|
+
if paper.extra and "pages" in paper.extra:
|
330
|
+
result_text += f" - Pages: {paper.extra['pages']}\n"
|
331
|
+
# Only include BibTeX when explicitly requested
|
332
|
+
if return_bibtex and paper.extra and "bibtex" in paper.extra:
|
333
|
+
result_text += (
|
334
|
+
f" - BibTeX:\n```bibtex\n{paper.extra['bibtex']}\n```\n"
|
335
|
+
)
|
336
|
+
result_text += "\n"
|
337
|
+
|
338
|
+
return [types.TextContent(type="text", text=result_text)]
|
339
|
+
|
340
|
+
elif name == "read-pdf":
|
341
|
+
pdf_source = arguments.get("pdf_source", "")
|
342
|
+
|
343
|
+
if not pdf_source:
|
344
|
+
return [
|
345
|
+
types.TextContent(
|
346
|
+
type="text", text="Error: pdf_source parameter is required"
|
347
|
+
)
|
348
|
+
]
|
349
|
+
|
350
|
+
try:
|
351
|
+
result = read_pdf(pdf_source)
|
352
|
+
return [types.TextContent(type="text", text=result)]
|
353
|
+
|
354
|
+
except Exception as e:
|
355
|
+
return [
|
356
|
+
types.TextContent(
|
357
|
+
type="text", text=f"Error reading PDF from {pdf_source}: {e!s}"
|
358
|
+
)
|
359
|
+
]
|
360
|
+
|
189
361
|
else:
|
190
362
|
raise ValueError(f"Unknown tool: {name}")
|
191
363
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: all-in-mcp
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.2.3
|
4
4
|
Summary: An MCP (Model Context Protocol) server providing daily-use utility functions and academic paper search capabilities
|
5
5
|
Project-URL: Homepage, https://github.com/jiahaoxiang2000/all-in-mcp
|
6
6
|
Project-URL: Repository, https://github.com/jiahaoxiang2000/all-in-mcp
|
@@ -52,14 +52,25 @@ An MCP (Model Context Protocol) server that provides daily-use utility functions
|
|
52
52
|
### Daily Utilities
|
53
53
|
|
54
54
|
- **Academic Research**: IACR ePrint Archive paper search, download, and reading
|
55
|
+
- **Bibliography Search**: CryptoBib database search for cryptography papers
|
56
|
+
- **PDF Reading**: Read and extract text from local and online PDF files
|
55
57
|
|
56
58
|
### Paper Search Capabilities
|
57
59
|
|
60
|
+
#### IACR ePrint Archive
|
61
|
+
|
58
62
|
- Search academic papers from IACR ePrint Archive
|
59
63
|
- Download PDF files
|
60
64
|
- Extract and read text content from papers
|
61
65
|
- Metadata extraction (authors, publication dates, abstracts)
|
62
66
|
|
67
|
+
#### CryptoBib Database
|
68
|
+
|
69
|
+
- Search comprehensive cryptography bibliography database
|
70
|
+
- Access to thousands of cryptographic research papers
|
71
|
+
- Retrieve structured paper metadata or raw BibTeX entries
|
72
|
+
- Support for all major cryptography venues and conferences
|
73
|
+
|
63
74
|
## Quick Start
|
64
75
|
|
65
76
|
### Prerequisites
|
@@ -75,7 +86,7 @@ An MCP (Model Context Protocol) server that provides daily-use utility functions
|
|
75
86
|
pip install all-in-mcp
|
76
87
|
```
|
77
88
|
|
78
|
-
###
|
89
|
+
### Option 2: Install from Source
|
79
90
|
|
80
91
|
1. Clone this repository:
|
81
92
|
|
@@ -201,6 +212,7 @@ Complete documentation is available in the [`docs/`](docs/) directory:
|
|
201
212
|
- **[API Reference](docs/api.md)** - Complete API documentation
|
202
213
|
- **[Installation Guide](docs/installation.md)** - Setup instructions
|
203
214
|
- **[IACR Integration](docs/iacr.md)** - Academic paper search details
|
215
|
+
- **[CryptoBib Integration](docs/cryptobib.md)** - Bibliography database search
|
204
216
|
- **[Development Guide](docs/development.md)** - Contributing guidelines
|
205
217
|
- **[PyPI Setup Guide](docs/pypi-setup.md)** - Publishing configuration
|
206
218
|
- **[Examples](docs/examples.md)** - Usage examples
|
@@ -0,0 +1,12 @@
|
|
1
|
+
all_in_mcp/__init__.py,sha256=REDwcbifpuUnsFAhNowIKCZ-8g6irIzUFTI_f8Aunxk,215
|
2
|
+
all_in_mcp/paper.py,sha256=vSJyC_ehfZX5-ASYG048z8gaD1LKafFdJvR13iQcJRw,7104
|
3
|
+
all_in_mcp/server.py,sha256=pMGyRbgr_kwC_ZNsxMUwXcoEQ8fW4NZx3Sns7uRRa8I,15140
|
4
|
+
all_in_mcp/academic_platforms/__init__.py,sha256=2KgWMc38NBhRkiLYwqyKi43u-Wm5vWK8i-es3fQFlN0,210
|
5
|
+
all_in_mcp/academic_platforms/base.py,sha256=VYMp8_tnp7YzXKAXLfr7uUxgvJBNKRyC_NT1uVhBOwY,673
|
6
|
+
all_in_mcp/academic_platforms/cryptobib.py,sha256=F9N23eojfyAIjnFDPrJAYOpZ_Vi9iHOqNHGtKC6O16c,17360
|
7
|
+
all_in_mcp/academic_platforms/iacr.py,sha256=MUPxFycVS0eMsJok71y12RUqjxbRrCReG33V5ORAbfU,15450
|
8
|
+
all_in_mcp-0.2.3.dist-info/METADATA,sha256=43FE07lBZ-f92fi1AemtCEQO_IVXyKD1d_keztjtcYI,5750
|
9
|
+
all_in_mcp-0.2.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
10
|
+
all_in_mcp-0.2.3.dist-info/entry_points.txt,sha256=FbQOtUQzOIfkMNp4qQV1NTU9K4J7C0XGH9wKKhfK1VM,47
|
11
|
+
all_in_mcp-0.2.3.dist-info/licenses/LICENSE,sha256=idExTHItK7AC5FVo4H9HKnr6h51Z8BKCEztZPyP8nK8,1062
|
12
|
+
all_in_mcp-0.2.3.dist-info/RECORD,,
|
@@ -1,11 +0,0 @@
|
|
1
|
-
all_in_mcp/__init__.py,sha256=REDwcbifpuUnsFAhNowIKCZ-8g6irIzUFTI_f8Aunxk,215
|
2
|
-
all_in_mcp/paper.py,sha256=QVH2BQpQT3I14T2IaZs1ZeC-MJVoFNVYZXSs1iHlGLY,2293
|
3
|
-
all_in_mcp/server.py,sha256=HHopmtcjoCLlmcf2P3DWlj0pn42fjQ1L0UF44WHK6Mw,7604
|
4
|
-
all_in_mcp/academic_platforms/__init__.py,sha256=FbxJcL8B7hP0KjBvtanNuvt6qfla3B_q-KjxdjnxukY,44
|
5
|
-
all_in_mcp/academic_platforms/base.py,sha256=VYMp8_tnp7YzXKAXLfr7uUxgvJBNKRyC_NT1uVhBOwY,673
|
6
|
-
all_in_mcp/academic_platforms/iacr.py,sha256=MUPxFycVS0eMsJok71y12RUqjxbRrCReG33V5ORAbfU,15450
|
7
|
-
all_in_mcp-0.1.4.dist-info/METADATA,sha256=tTuprD0AU0V0qhuDwg4vgQRHEuvga5tS-i29tGVFyOk,5219
|
8
|
-
all_in_mcp-0.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
9
|
-
all_in_mcp-0.1.4.dist-info/entry_points.txt,sha256=FbQOtUQzOIfkMNp4qQV1NTU9K4J7C0XGH9wKKhfK1VM,47
|
10
|
-
all_in_mcp-0.1.4.dist-info/licenses/LICENSE,sha256=idExTHItK7AC5FVo4H9HKnr6h51Z8BKCEztZPyP8nK8,1062
|
11
|
-
all_in_mcp-0.1.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|