PyPI - all-in-mcp - Versions diffs - 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

all-in-mcp 0.2.2py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

all_in_mcp/academic_platforms/__init__.py CHANGED Viewed

@@ -3,4 +3,4 @@ from .base import PaperSource
 from .cryptobib import CryptoBibSearcher
 from .iacr import IACRSearcher
-__all__ = ["PaperSource", "CryptoBibSearcher", "IACRSearcher"]
+__all__ = ["CryptoBibSearcher", "IACRSearcher", "PaperSource"]

all_in_mcp/academic_platforms/cryptobib.py CHANGED Viewed

@@ -1,6 +1,5 @@
 # all_in_mcp/academic_platforms/cryptobib.py
 import logging
-import os
 import random
 import re
 from datetime import datetime
@@ -118,7 +117,7 @@ class CryptoBibSearcher(PaperSource):
             # Extract fields using a more robust approach
             # First, normalize the text by removing extra whitespace
-            normalized_text = re.sub(r"\s+", " ", bibtex_text)
+            re.sub(r"\s+", " ", bibtex_text)
             # Extract fields with better pattern matching
             field_dict = {}
@@ -263,7 +262,7 @@ class CryptoBibSearcher(PaperSource):
             # Convert query to lowercase for case-insensitive search
             query_lower = query.lower()
-            with open(self.bib_file_path, "r", encoding="utf-8") as f:
+            with open(self.bib_file_path, encoding="utf-8") as f:
                 for line_num, line in enumerate(f, 1):
                     # Check if this is the start of a new entry
                     if line.strip().startswith("@") and not in_entry:
@@ -395,10 +394,10 @@ class CryptoBibSearcher(PaperSource):
             in_entry = False
             brace_count = 0
-            with open(self.bib_file_path, "r", encoding="utf-8") as f:
+            with open(self.bib_file_path, encoding="utf-8") as f:
                 for line in f:
                     # Check if this is the start of the entry we're looking for
-                    if line.strip().startswith(f"@") and entry_key in line:
+                    if line.strip().startswith("@") and entry_key in line:
                         current_entry = line
                         in_entry = True
                         brace_count = line.count("{") - line.count("}")

all_in_mcp/paper.py CHANGED Viewed

@@ -1,6 +1,12 @@
 # all_in_mcp/paper.py
+import io
 from dataclasses import dataclass
 from datetime import datetime
+from pathlib import Path
+from urllib.parse import urlparse
+import httpx
+from pypdf import PdfReader
 @dataclass
@@ -62,3 +68,134 @@ class Paper:
             "references": self.references,
             "extra": self.extra,
         }
+    def read_content(self) -> str:
+        """
+        Read the full text content of this paper's PDF.
+        Returns:
+            str: Extracted text content from the paper's PDF
+        Raises:
+            ValueError: If no PDF URL is available
+            Exception: If PDF cannot be read or processed
+        """
+        if not self.pdf_url:
+            raise ValueError("No PDF URL available for this paper")
+        return read_pdf(self.pdf_url)
+def read_pdf(pdf_source: str | Path) -> str:
+    """
+    Extract text content from a PDF file (local or online).
+    Args:
+        pdf_source: Path to local PDF file or URL to online PDF
+    Returns:
+        str: Extracted text content from the PDF
+    Raises:
+        FileNotFoundError: If local file doesn't exist
+        ValueError: If URL is invalid or PDF cannot be processed
+        Exception: For other PDF processing errors
+    """
+    try:
+        if isinstance(pdf_source, str | Path):
+            pdf_source_str = str(pdf_source)
+            # Check if it's a URL
+            parsed = urlparse(pdf_source_str)
+            if parsed.scheme in ("http", "https"):
+                # Handle online PDF
+                return _read_pdf_from_url(pdf_source_str)
+            else:
+                # Handle local file
+                return _read_pdf_from_file(Path(pdf_source_str))
+        else:
+            raise ValueError("pdf_source must be a string or Path object")
+    except Exception as e:
+        raise Exception(f"Failed to read PDF from {pdf_source}: {e!s}") from e
+def _read_pdf_from_file(file_path: Path) -> str:
+    """Read PDF from local file path."""
+    if not file_path.exists():
+        raise FileNotFoundError(f"PDF file not found: {file_path}")
+    if not file_path.suffix.lower() == ".pdf":
+        raise ValueError(f"File must have .pdf extension: {file_path}")
+    try:
+        with open(file_path, "rb") as file:
+            pdf_reader = PdfReader(file)
+            text_content = []
+            for page_num, page in enumerate(pdf_reader.pages):
+                try:
+                    page_text = page.extract_text()
+                    if page_text.strip():  # Only add non-empty pages
+                        text_content.append(
+                            f"--- Page {page_num + 1} ---\n{page_text}\n"
+                        )
+                except Exception as e:
+                    text_content.append(
+                        f"--- Page {page_num + 1} (Error reading page: {e!s}) ---\n"
+                    )
+            return "\n".join(text_content)
+    except Exception as e:
+        raise Exception(f"Error reading PDF file {file_path}: {e!s}") from e
+def _read_pdf_from_url(url: str) -> str:
+    """Download and read PDF from URL."""
+    try:
+        # Download PDF with proper headers
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+        }
+        with httpx.Client(timeout=30.0, headers=headers) as client:
+            response = client.get(url)
+            response.raise_for_status()
+            # Check if content is actually a PDF
+            content_type = response.headers.get("content-type", "").lower()
+            if "application/pdf" not in content_type and not url.lower().endswith(
+                ".pdf"
+            ):
+                # Try to detect PDF by content
+                if not response.content.startswith(b"%PDF"):
+                    raise ValueError(f"URL does not point to a valid PDF file: {url}")
+            # Read PDF from bytes
+            pdf_bytes = io.BytesIO(response.content)
+            pdf_reader = PdfReader(pdf_bytes)
+            text_content = []
+            for page_num, page in enumerate(pdf_reader.pages):
+                try:
+                    page_text = page.extract_text()
+                    if page_text.strip():  # Only add non-empty pages
+                        text_content.append(
+                            f"--- Page {page_num + 1} ---\n{page_text}\n"
+                        )
+                except Exception as e:
+                    text_content.append(
+                        f"--- Page {page_num + 1} (Error reading page: {e!s}) ---\n"
+                    )
+            return "\n".join(text_content)
+    except httpx.RequestError as e:
+        raise Exception(f"Network error downloading PDF from {url}: {e!s}") from e
+    except httpx.HTTPStatusError as e:
+        raise Exception(
+            f"HTTP error {e.response.status_code} downloading PDF from {url}"
+        ) from e
+    except Exception as e:
+        raise Exception(f"Error processing PDF from URL {url}: {e!s}") from e

all_in_mcp/server.py CHANGED Viewed

@@ -1,13 +1,13 @@
-import os
-from typing import List, Dict
 import mcp.server.stdio
 import mcp.types as types
 from mcp.server import NotificationOptions, Server
 from mcp.server.models import InitializationOptions
+from .academic_platforms.cryptobib import CryptoBibSearcher
 # Import searchers
 from .academic_platforms.iacr import IACRSearcher
-from .academic_platforms.cryptobib import CryptoBibSearcher
+from .paper import read_pdf
 server = Server("all-in-mcp")
@@ -122,6 +122,20 @@ async def handle_list_tools() -> list[types.Tool]:
                 "required": ["query"],
             },
         ),
+        types.Tool(
+            name="read-pdf",
+            description="Read and extract text content from a PDF file (local or online)",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "pdf_source": {
+                        "type": "string",
+                        "description": "Path to local PDF file or URL to online PDF",
+                    },
+                },
+                "required": ["pdf_source"],
+            },
+        ),
     ]
@@ -323,6 +337,27 @@ async def handle_call_tool(
                 return [types.TextContent(type="text", text=result_text)]
+        elif name == "read-pdf":
+            pdf_source = arguments.get("pdf_source", "")
+            if not pdf_source:
+                return [
+                    types.TextContent(
+                        type="text", text="Error: pdf_source parameter is required"
+                    )
+                ]
+            try:
+                result = read_pdf(pdf_source)
+                return [types.TextContent(type="text", text=result)]
+            except Exception as e:
+                return [
+                    types.TextContent(
+                        type="text", text=f"Error reading PDF from {pdf_source}: {e!s}"
+                    )
+                ]
         else:
             raise ValueError(f"Unknown tool: {name}")

{all_in_mcp-0.2.2.dist-info → all_in_mcp-0.2.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: all-in-mcp
-Version: 0.2.2
+Version: 0.2.3
 Summary: An MCP (Model Context Protocol) server providing daily-use utility functions and academic paper search capabilities
 Project-URL: Homepage, https://github.com/jiahaoxiang2000/all-in-mcp
 Project-URL: Repository, https://github.com/jiahaoxiang2000/all-in-mcp
@@ -53,6 +53,7 @@ An MCP (Model Context Protocol) server that provides daily-use utility functions
 - **Academic Research**: IACR ePrint Archive paper search, download, and reading
 - **Bibliography Search**: CryptoBib database search for cryptography papers
+- **PDF Reading**: Read and extract text from local and online PDF files
 ### Paper Search Capabilities

all_in_mcp-0.2.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+all_in_mcp/__init__.py,sha256=REDwcbifpuUnsFAhNowIKCZ-8g6irIzUFTI_f8Aunxk,215
+all_in_mcp/paper.py,sha256=vSJyC_ehfZX5-ASYG048z8gaD1LKafFdJvR13iQcJRw,7104
+all_in_mcp/server.py,sha256=pMGyRbgr_kwC_ZNsxMUwXcoEQ8fW4NZx3Sns7uRRa8I,15140
+all_in_mcp/academic_platforms/__init__.py,sha256=2KgWMc38NBhRkiLYwqyKi43u-Wm5vWK8i-es3fQFlN0,210
+all_in_mcp/academic_platforms/base.py,sha256=VYMp8_tnp7YzXKAXLfr7uUxgvJBNKRyC_NT1uVhBOwY,673
+all_in_mcp/academic_platforms/cryptobib.py,sha256=F9N23eojfyAIjnFDPrJAYOpZ_Vi9iHOqNHGtKC6O16c,17360
+all_in_mcp/academic_platforms/iacr.py,sha256=MUPxFycVS0eMsJok71y12RUqjxbRrCReG33V5ORAbfU,15450
+all_in_mcp-0.2.3.dist-info/METADATA,sha256=43FE07lBZ-f92fi1AemtCEQO_IVXyKD1d_keztjtcYI,5750
+all_in_mcp-0.2.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+all_in_mcp-0.2.3.dist-info/entry_points.txt,sha256=FbQOtUQzOIfkMNp4qQV1NTU9K4J7C0XGH9wKKhfK1VM,47
+all_in_mcp-0.2.3.dist-info/licenses/LICENSE,sha256=idExTHItK7AC5FVo4H9HKnr6h51Z8BKCEztZPyP8nK8,1062
+all_in_mcp-0.2.3.dist-info/RECORD,,

all_in_mcp-0.2.2.dist-info/RECORD DELETED Viewed

@@ -1,12 +0,0 @@
-all_in_mcp/__init__.py,sha256=REDwcbifpuUnsFAhNowIKCZ-8g6irIzUFTI_f8Aunxk,215
-all_in_mcp/paper.py,sha256=QVH2BQpQT3I14T2IaZs1ZeC-MJVoFNVYZXSs1iHlGLY,2293
-all_in_mcp/server.py,sha256=CDiHXXMPlNPMLcpnjrZ5zoKrujNxZLryf8ecgtYt-bg,13971
-all_in_mcp/academic_platforms/__init__.py,sha256=-Asc2WpmfyvCCF0s-Ni6kcz8dkyyV7n3gjhhD2Oq1BA,210
-all_in_mcp/academic_platforms/base.py,sha256=VYMp8_tnp7YzXKAXLfr7uUxgvJBNKRyC_NT1uVhBOwY,673
-all_in_mcp/academic_platforms/cryptobib.py,sha256=4vLVNQdWBw6YLHPlw6bJVEGlsoihPE9rUfNCiAdu5Ic,17399
-all_in_mcp/academic_platforms/iacr.py,sha256=MUPxFycVS0eMsJok71y12RUqjxbRrCReG33V5ORAbfU,15450
-all_in_mcp-0.2.2.dist-info/METADATA,sha256=tHh2ZAZkW_mRnk44Arz06bn0xxyMV4xoAtJKuf7_rXs,5677
-all_in_mcp-0.2.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-all_in_mcp-0.2.2.dist-info/entry_points.txt,sha256=FbQOtUQzOIfkMNp4qQV1NTU9K4J7C0XGH9wKKhfK1VM,47
-all_in_mcp-0.2.2.dist-info/licenses/LICENSE,sha256=idExTHItK7AC5FVo4H9HKnr6h51Z8BKCEztZPyP8nK8,1062
-all_in_mcp-0.2.2.dist-info/RECORD,,

{all_in_mcp-0.2.2.dist-info → all_in_mcp-0.2.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{all_in_mcp-0.2.2.dist-info → all_in_mcp-0.2.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{all_in_mcp-0.2.2.dist-info → all_in_mcp-0.2.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

all-in-mcp 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

all-in-mcp 0.2.2py3-none-any.whl → 0.2.3py3-none-any.whl