all-in-mcp 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,4 +3,4 @@ from .base import PaperSource
3
3
  from .cryptobib import CryptoBibSearcher
4
4
  from .iacr import IACRSearcher
5
5
 
6
- __all__ = ["PaperSource", "CryptoBibSearcher", "IACRSearcher"]
6
+ __all__ = ["CryptoBibSearcher", "IACRSearcher", "PaperSource"]
@@ -1,6 +1,5 @@
1
1
  # all_in_mcp/academic_platforms/cryptobib.py
2
2
  import logging
3
- import os
4
3
  import random
5
4
  import re
6
5
  from datetime import datetime
@@ -118,7 +117,7 @@ class CryptoBibSearcher(PaperSource):
118
117
 
119
118
  # Extract fields using a more robust approach
120
119
  # First, normalize the text by removing extra whitespace
121
- normalized_text = re.sub(r"\s+", " ", bibtex_text)
120
+ re.sub(r"\s+", " ", bibtex_text)
122
121
 
123
122
  # Extract fields with better pattern matching
124
123
  field_dict = {}
@@ -263,7 +262,7 @@ class CryptoBibSearcher(PaperSource):
263
262
  # Convert query to lowercase for case-insensitive search
264
263
  query_lower = query.lower()
265
264
 
266
- with open(self.bib_file_path, "r", encoding="utf-8") as f:
265
+ with open(self.bib_file_path, encoding="utf-8") as f:
267
266
  for line_num, line in enumerate(f, 1):
268
267
  # Check if this is the start of a new entry
269
268
  if line.strip().startswith("@") and not in_entry:
@@ -395,10 +394,10 @@ class CryptoBibSearcher(PaperSource):
395
394
  in_entry = False
396
395
  brace_count = 0
397
396
 
398
- with open(self.bib_file_path, "r", encoding="utf-8") as f:
397
+ with open(self.bib_file_path, encoding="utf-8") as f:
399
398
  for line in f:
400
399
  # Check if this is the start of the entry we're looking for
401
- if line.strip().startswith(f"@") and entry_key in line:
400
+ if line.strip().startswith("@") and entry_key in line:
402
401
  current_entry = line
403
402
  in_entry = True
404
403
  brace_count = line.count("{") - line.count("}")
all_in_mcp/paper.py CHANGED
@@ -1,6 +1,12 @@
1
1
  # all_in_mcp/paper.py
2
+ import io
2
3
  from dataclasses import dataclass
3
4
  from datetime import datetime
5
+ from pathlib import Path
6
+ from urllib.parse import urlparse
7
+
8
+ import httpx
9
+ from pypdf import PdfReader
4
10
 
5
11
 
6
12
  @dataclass
@@ -62,3 +68,134 @@ class Paper:
62
68
  "references": self.references,
63
69
  "extra": self.extra,
64
70
  }
71
+
72
+ def read_content(self) -> str:
73
+ """
74
+ Read the full text content of this paper's PDF.
75
+
76
+ Returns:
77
+ str: Extracted text content from the paper's PDF
78
+
79
+ Raises:
80
+ ValueError: If no PDF URL is available
81
+ Exception: If PDF cannot be read or processed
82
+ """
83
+ if not self.pdf_url:
84
+ raise ValueError("No PDF URL available for this paper")
85
+
86
+ return read_pdf(self.pdf_url)
87
+
88
+
89
+ def read_pdf(pdf_source: str | Path) -> str:
90
+ """
91
+ Extract text content from a PDF file (local or online).
92
+
93
+ Args:
94
+ pdf_source: Path to local PDF file or URL to online PDF
95
+
96
+ Returns:
97
+ str: Extracted text content from the PDF
98
+
99
+ Raises:
100
+ FileNotFoundError: If local file doesn't exist
101
+ ValueError: If URL is invalid or PDF cannot be processed
102
+ Exception: For other PDF processing errors
103
+ """
104
+ try:
105
+ if isinstance(pdf_source, str | Path):
106
+ pdf_source_str = str(pdf_source)
107
+
108
+ # Check if it's a URL
109
+ parsed = urlparse(pdf_source_str)
110
+ if parsed.scheme in ("http", "https"):
111
+ # Handle online PDF
112
+ return _read_pdf_from_url(pdf_source_str)
113
+ else:
114
+ # Handle local file
115
+ return _read_pdf_from_file(Path(pdf_source_str))
116
+ else:
117
+ raise ValueError("pdf_source must be a string or Path object")
118
+
119
+ except Exception as e:
120
+ raise Exception(f"Failed to read PDF from {pdf_source}: {e!s}") from e
121
+
122
+
123
+ def _read_pdf_from_file(file_path: Path) -> str:
124
+ """Read PDF from local file path."""
125
+ if not file_path.exists():
126
+ raise FileNotFoundError(f"PDF file not found: {file_path}")
127
+
128
+ if not file_path.suffix.lower() == ".pdf":
129
+ raise ValueError(f"File must have .pdf extension: {file_path}")
130
+
131
+ try:
132
+ with open(file_path, "rb") as file:
133
+ pdf_reader = PdfReader(file)
134
+ text_content = []
135
+
136
+ for page_num, page in enumerate(pdf_reader.pages):
137
+ try:
138
+ page_text = page.extract_text()
139
+ if page_text.strip(): # Only add non-empty pages
140
+ text_content.append(
141
+ f"--- Page {page_num + 1} ---\n{page_text}\n"
142
+ )
143
+ except Exception as e:
144
+ text_content.append(
145
+ f"--- Page {page_num + 1} (Error reading page: {e!s}) ---\n"
146
+ )
147
+
148
+ return "\n".join(text_content)
149
+
150
+ except Exception as e:
151
+ raise Exception(f"Error reading PDF file {file_path}: {e!s}") from e
152
+
153
+
154
+ def _read_pdf_from_url(url: str) -> str:
155
+ """Download and read PDF from URL."""
156
+ try:
157
+ # Download PDF with proper headers
158
+ headers = {
159
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
160
+ }
161
+
162
+ with httpx.Client(timeout=30.0, headers=headers) as client:
163
+ response = client.get(url)
164
+ response.raise_for_status()
165
+
166
+ # Check if content is actually a PDF
167
+ content_type = response.headers.get("content-type", "").lower()
168
+ if "application/pdf" not in content_type and not url.lower().endswith(
169
+ ".pdf"
170
+ ):
171
+ # Try to detect PDF by content
172
+ if not response.content.startswith(b"%PDF"):
173
+ raise ValueError(f"URL does not point to a valid PDF file: {url}")
174
+
175
+ # Read PDF from bytes
176
+ pdf_bytes = io.BytesIO(response.content)
177
+ pdf_reader = PdfReader(pdf_bytes)
178
+ text_content = []
179
+
180
+ for page_num, page in enumerate(pdf_reader.pages):
181
+ try:
182
+ page_text = page.extract_text()
183
+ if page_text.strip(): # Only add non-empty pages
184
+ text_content.append(
185
+ f"--- Page {page_num + 1} ---\n{page_text}\n"
186
+ )
187
+ except Exception as e:
188
+ text_content.append(
189
+ f"--- Page {page_num + 1} (Error reading page: {e!s}) ---\n"
190
+ )
191
+
192
+ return "\n".join(text_content)
193
+
194
+ except httpx.RequestError as e:
195
+ raise Exception(f"Network error downloading PDF from {url}: {e!s}") from e
196
+ except httpx.HTTPStatusError as e:
197
+ raise Exception(
198
+ f"HTTP error {e.response.status_code} downloading PDF from {url}"
199
+ ) from e
200
+ except Exception as e:
201
+ raise Exception(f"Error processing PDF from URL {url}: {e!s}") from e
all_in_mcp/server.py CHANGED
@@ -1,13 +1,13 @@
1
- import os
2
- from typing import List, Dict
3
1
  import mcp.server.stdio
4
2
  import mcp.types as types
5
3
  from mcp.server import NotificationOptions, Server
6
4
  from mcp.server.models import InitializationOptions
7
5
 
6
+ from .academic_platforms.cryptobib import CryptoBibSearcher
7
+
8
8
  # Import searchers
9
9
  from .academic_platforms.iacr import IACRSearcher
10
- from .academic_platforms.cryptobib import CryptoBibSearcher
10
+ from .paper import read_pdf
11
11
 
12
12
  server = Server("all-in-mcp")
13
13
 
@@ -122,6 +122,20 @@ async def handle_list_tools() -> list[types.Tool]:
122
122
  "required": ["query"],
123
123
  },
124
124
  ),
125
+ types.Tool(
126
+ name="read-pdf",
127
+ description="Read and extract text content from a PDF file (local or online)",
128
+ inputSchema={
129
+ "type": "object",
130
+ "properties": {
131
+ "pdf_source": {
132
+ "type": "string",
133
+ "description": "Path to local PDF file or URL to online PDF",
134
+ },
135
+ },
136
+ "required": ["pdf_source"],
137
+ },
138
+ ),
125
139
  ]
126
140
 
127
141
 
@@ -323,6 +337,27 @@ async def handle_call_tool(
323
337
 
324
338
  return [types.TextContent(type="text", text=result_text)]
325
339
 
340
+ elif name == "read-pdf":
341
+ pdf_source = arguments.get("pdf_source", "")
342
+
343
+ if not pdf_source:
344
+ return [
345
+ types.TextContent(
346
+ type="text", text="Error: pdf_source parameter is required"
347
+ )
348
+ ]
349
+
350
+ try:
351
+ result = read_pdf(pdf_source)
352
+ return [types.TextContent(type="text", text=result)]
353
+
354
+ except Exception as e:
355
+ return [
356
+ types.TextContent(
357
+ type="text", text=f"Error reading PDF from {pdf_source}: {e!s}"
358
+ )
359
+ ]
360
+
326
361
  else:
327
362
  raise ValueError(f"Unknown tool: {name}")
328
363
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: all-in-mcp
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: An MCP (Model Context Protocol) server providing daily-use utility functions and academic paper search capabilities
5
5
  Project-URL: Homepage, https://github.com/jiahaoxiang2000/all-in-mcp
6
6
  Project-URL: Repository, https://github.com/jiahaoxiang2000/all-in-mcp
@@ -53,6 +53,7 @@ An MCP (Model Context Protocol) server that provides daily-use utility functions
53
53
 
54
54
  - **Academic Research**: IACR ePrint Archive paper search, download, and reading
55
55
  - **Bibliography Search**: CryptoBib database search for cryptography papers
56
+ - **PDF Reading**: Read and extract text from local and online PDF files
56
57
 
57
58
  ### Paper Search Capabilities
58
59
 
@@ -0,0 +1,12 @@
1
+ all_in_mcp/__init__.py,sha256=REDwcbifpuUnsFAhNowIKCZ-8g6irIzUFTI_f8Aunxk,215
2
+ all_in_mcp/paper.py,sha256=vSJyC_ehfZX5-ASYG048z8gaD1LKafFdJvR13iQcJRw,7104
3
+ all_in_mcp/server.py,sha256=pMGyRbgr_kwC_ZNsxMUwXcoEQ8fW4NZx3Sns7uRRa8I,15140
4
+ all_in_mcp/academic_platforms/__init__.py,sha256=2KgWMc38NBhRkiLYwqyKi43u-Wm5vWK8i-es3fQFlN0,210
5
+ all_in_mcp/academic_platforms/base.py,sha256=VYMp8_tnp7YzXKAXLfr7uUxgvJBNKRyC_NT1uVhBOwY,673
6
+ all_in_mcp/academic_platforms/cryptobib.py,sha256=F9N23eojfyAIjnFDPrJAYOpZ_Vi9iHOqNHGtKC6O16c,17360
7
+ all_in_mcp/academic_platforms/iacr.py,sha256=MUPxFycVS0eMsJok71y12RUqjxbRrCReG33V5ORAbfU,15450
8
+ all_in_mcp-0.2.3.dist-info/METADATA,sha256=43FE07lBZ-f92fi1AemtCEQO_IVXyKD1d_keztjtcYI,5750
9
+ all_in_mcp-0.2.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
+ all_in_mcp-0.2.3.dist-info/entry_points.txt,sha256=FbQOtUQzOIfkMNp4qQV1NTU9K4J7C0XGH9wKKhfK1VM,47
11
+ all_in_mcp-0.2.3.dist-info/licenses/LICENSE,sha256=idExTHItK7AC5FVo4H9HKnr6h51Z8BKCEztZPyP8nK8,1062
12
+ all_in_mcp-0.2.3.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- all_in_mcp/__init__.py,sha256=REDwcbifpuUnsFAhNowIKCZ-8g6irIzUFTI_f8Aunxk,215
2
- all_in_mcp/paper.py,sha256=QVH2BQpQT3I14T2IaZs1ZeC-MJVoFNVYZXSs1iHlGLY,2293
3
- all_in_mcp/server.py,sha256=CDiHXXMPlNPMLcpnjrZ5zoKrujNxZLryf8ecgtYt-bg,13971
4
- all_in_mcp/academic_platforms/__init__.py,sha256=-Asc2WpmfyvCCF0s-Ni6kcz8dkyyV7n3gjhhD2Oq1BA,210
5
- all_in_mcp/academic_platforms/base.py,sha256=VYMp8_tnp7YzXKAXLfr7uUxgvJBNKRyC_NT1uVhBOwY,673
6
- all_in_mcp/academic_platforms/cryptobib.py,sha256=4vLVNQdWBw6YLHPlw6bJVEGlsoihPE9rUfNCiAdu5Ic,17399
7
- all_in_mcp/academic_platforms/iacr.py,sha256=MUPxFycVS0eMsJok71y12RUqjxbRrCReG33V5ORAbfU,15450
8
- all_in_mcp-0.2.2.dist-info/METADATA,sha256=tHh2ZAZkW_mRnk44Arz06bn0xxyMV4xoAtJKuf7_rXs,5677
9
- all_in_mcp-0.2.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
- all_in_mcp-0.2.2.dist-info/entry_points.txt,sha256=FbQOtUQzOIfkMNp4qQV1NTU9K4J7C0XGH9wKKhfK1VM,47
11
- all_in_mcp-0.2.2.dist-info/licenses/LICENSE,sha256=idExTHItK7AC5FVo4H9HKnr6h51Z8BKCEztZPyP8nK8,1062
12
- all_in_mcp-0.2.2.dist-info/RECORD,,