PyPI - academia-mcp - Versions diffs - 1.9.2__tar.gz → 1.10.0__tar.gz - Mend

academia-mcp 1.9.2tar.gz → 1.10.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{academia_mcp-1.9.2 → academia_mcp-1.10.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: academia-mcp
-Version: 1.9.2
+Version: 1.10.0
 Summary: MCP server that provides different tools to search for scientific publications
 Author-email: Ilya Gusev <phoenixilya@gmail.com>
 Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp

{academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/server.py RENAMED Viewed

@@ -38,7 +38,8 @@ from academia_mcp.tools.bitflip import (
     score_research_proposals,
 )
 from academia_mcp.tools.review import review_pdf_paper, download_pdf_paper
-from academia_mcp.tools.show_image import show_image
+from academia_mcp.tools.show_image import show_image, describe_image
+from academia_mcp.tools.speech_to_text import speech_to_text
 def configure_uvicorn_style_logging(level: int = logging.INFO) -> None:
@@ -116,11 +117,17 @@ def run(
         server.add_tool(generate_research_proposals)
         server.add_tool(score_research_proposals)
         server.add_tool(document_qa)
+        server.add_tool(describe_image)
         if settings.WORKSPACE_DIR:
             server.add_tool(review_pdf_paper)
     else:
         logger.warning("No OpenRouter API key is set, LLM-related tools will not be available!")
+    if settings.OPENAI_API_KEY:
+        server.add_tool(speech_to_text)
+    else:
+        logger.warning("No OpenAI API key is set, speech_to_text will not be available!")
     if port is None:
         if settings.PORT is not None:
             port = int(settings.PORT)

{academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/settings.py RENAMED Viewed

@@ -11,6 +11,7 @@ class Settings(BaseSettings):
     TAVILY_API_KEY: Optional[str] = None
     EXA_API_KEY: Optional[str] = None
     BRAVE_API_KEY: Optional[str] = None
+    OPENAI_API_KEY: Optional[str] = None
     REVIEW_MODEL_NAME: str = "gpt-5"
     BITFLIP_MODEL_NAME: str = "deepseek/deepseek-chat-v3-0324"
@@ -18,6 +19,7 @@ class Settings(BaseSettings):
     DOCUMENT_QA_MODEL_NAME: str = "deepseek/deepseek-chat-v3-0324"
     DOCUMENT_QA_QUESTION_MAX_LENGTH: int = 10000
     DOCUMENT_QA_DOCUMENT_MAX_LENGTH: int = 200000
+    DESCRIBE_IMAGE_MODEL_NAME: str = "gpt-4.1"
     PORT: int = 5056
     WORKSPACE_DIR: Optional[Path] = None

{academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/tools/__init__.py RENAMED Viewed

@@ -14,7 +14,8 @@ from .web_search import web_search, tavily_web_search, exa_web_search, brave_web
 from .visit_webpage import visit_webpage
 from .bitflip import extract_bitflip_info, generate_research_proposals, score_research_proposals
 from .review import review_pdf_paper, download_pdf_paper, review_pdf_paper_by_url
-from .show_image import show_image
+from .show_image import show_image, describe_image
+from .speech_to_text import speech_to_text
 __all__ = [
     "arxiv_search",
@@ -42,4 +43,6 @@ __all__ = [
     "download_pdf_paper",
     "read_pdf",
     "show_image",
+    "describe_image",
+    "speech_to_text",
 ]

academia_mcp-1.10.0/academia_mcp/tools/show_image.py ADDED Viewed

@@ -0,0 +1,104 @@
+import base64
+from pathlib import Path
+from io import BytesIO
+from typing import Dict, Optional
+from textwrap import dedent
+import httpx
+from PIL import Image
+from academia_mcp.files import get_workspace_dir
+from academia_mcp.settings import settings
+from academia_mcp.llm import llm_acall, ChatMessage
+DESCRIBE_PROMPTS = {
+    "general": "Provide a general description of this image. Focus on the main subjects, colors, and overall scene.",
+    "detailed": dedent(
+        """Analyze this image in detail. Include:
+        1. Main subjects and their relationships
+        2. Colors, lighting, and composition
+        3. Any text or symbols present
+        4. Context or possible meaning
+        5. Notable details or interesting elements"""
+    ),
+    "chess": dedent(
+        """Analyze this chess position and provide a detailed description including:
+        1. List of pieces on the board for both white and black
+        2. Whose turn it is to move
+        3. Basic evaluation of the position
+        4. Any immediate tactical opportunities or threats
+        5. Suggested next moves with brief explanations"""
+    ),
+    "text": "Extract and describe any text present in this image. If there are multiple pieces of text, organize them clearly.",
+}
+def show_image(path: str) -> Dict[str, str]:
+    """
+    Reads an image from the specified URL or from the current work directory.
+    Always call this function at the end of the code block.
+    For instance:
+    ```python
+    show_image("https://example.com/image.png")
+    ```
+    Do not print it ever, just return as the last expression.
+    Returns an dictionary with a single "image" key.
+    Args:
+        url: Path to file inside current work directory or web URL
+    """
+    if path.startswith("http"):
+        response = httpx.get(path, timeout=10)
+        response.raise_for_status()
+        image = Image.open(BytesIO(response.content))
+    else:
+        assert settings.WORKSPACE_DIR is not None, "WORKSPACE_DIR is not set"
+        full_path = Path(path)
+        if not full_path.exists():
+            full_path = Path(get_workspace_dir()) / path
+            assert full_path.exists(), f"Image file {path} does not exist"
+        image = Image.open(str(full_path))
+    buffer_io = BytesIO()
+    image.save(buffer_io, format="PNG")
+    img_bytes = buffer_io.getvalue()
+    return {"image_base64": base64.b64encode(img_bytes).decode("utf-8")}
+async def describe_image(
+    path: str, description_type: str = "general", custom_prompt: Optional[str] = None
+) -> str:
+    """
+    Tool to analyze and describe any image using GPT-4 Vision API.
+    Returns a description of the image based on the requested type.
+    Args:
+        image_path (str): Path to the image file.
+        description_type (str): Type of description to generate. Options:
+            - "general": General description of the image
+            - "detailed": Detailed analysis of the image
+            - "chess": Analysis of a chess position
+            - "text": Extract and describe text from the image
+            - "custom": Custom description based on user prompt
+    """
+    image_base64 = show_image(path)["image_base64"]
+    assert (
+        description_type in DESCRIBE_PROMPTS or description_type == "custom"
+    ), f"Invalid description type: {description_type}"
+    prompt = DESCRIBE_PROMPTS.get(description_type, custom_prompt)
+    assert prompt and prompt.strip(), "Please provide a non-empty prompt"
+    content = [
+        {"type": "text", "text": prompt},
+        {
+            "type": "image_url",
+            "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
+        },
+    ]
+    model_name = settings.DESCRIBE_IMAGE_MODEL_NAME
+    response = await llm_acall(
+        model_name=model_name,
+        messages=[ChatMessage(role="user", content=content)],
+    )
+    return response

academia_mcp-1.10.0/academia_mcp/tools/speech_to_text.py ADDED Viewed

@@ -0,0 +1,48 @@
+from pathlib import Path
+from io import BytesIO
+import httpx
+from openai import AsyncOpenAI
+from academia_mcp.files import get_workspace_dir
+from academia_mcp.settings import settings
+async def speech_to_text(audio_path: str, provider: str = "openai") -> str:
+    """
+    Tool to convert speech to text using OpenAI's Whisper model.
+    Returns transcribed text from the audio file.
+    Args:
+        audio_path (str): Path to the audio file.
+        provider (str): Provider to use. Currently only "openai" is supported.
+    """
+    AVAILABLE_PROVIDERS = ("openai",)
+    assert (
+        provider in AVAILABLE_PROVIDERS
+    ), f"Invalid provider: {provider}. Available providers: {AVAILABLE_PROVIDERS}"
+    if audio_path.startswith("http"):
+        response = httpx.get(audio_path, timeout=10)
+        response.raise_for_status()
+        ext = audio_path.split(".")[-1]
+        audio_file = BytesIO(response.content)
+        audio_file.name = f"audio_file.{ext}"
+    else:
+        full_audio_path = Path(audio_path)
+        if not full_audio_path.exists():
+            full_audio_path = Path(get_workspace_dir()) / audio_path
+            assert full_audio_path.exists(), f"Audio file {audio_path} does not exist"
+        audio_file = BytesIO(open(full_audio_path, "rb").read())
+        audio_file.name = audio_path.split("/")[-1]
+    assert provider == "openai"
+    client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
+    result = await client.audio.transcriptions.create(
+        model="gpt-4o-transcribe",
+        file=audio_file,
+        response_format="text",
+    )
+    return result

{academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/tools/visit_webpage.py RENAMED Viewed

@@ -8,7 +8,8 @@ from academia_mcp.utils import get_with_retries, post_with_retries
 from academia_mcp.settings import settings
 EXA_CONTENTS_URL = "https://api.exa.ai/contents"
-AVAILABLE_PROVIDERS = ("basic", "exa")
+TAVILY_EXTRACT_URL = "https://api.tavily.com/extract"
+AVAILABLE_PROVIDERS = ("basic", "exa", "tavily")
 def _exa_visit_webpage(url: str) -> str:
@@ -22,7 +23,17 @@ def _exa_visit_webpage(url: str) -> str:
     return json.dumps(response.json()["results"][0])
-def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
+def _tavily_visit_webpage(url: str) -> str:
+    key = settings.TAVILY_API_KEY or ""
+    assert key, "Error: TAVILY_API_KEY is not set and no api_key was provided"
+    payload = {
+        "urls": [url],
+    }
+    response = post_with_retries(TAVILY_EXTRACT_URL, payload=payload, api_key=key)
+    return json.dumps(response.json()["results"][0]["raw_content"])
+def visit_webpage(url: str, provider: Optional[str] = "tavily") -> str:
     """
     Visit a webpage and return the content.
@@ -32,7 +43,7 @@ def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
     Args:
         url: The URL of the webpage to visit.
-        provider: The provider to use. Available providers: "basic" (default) or "exa".
+        provider: The provider to use. Available providers: "tavily" (default), "exa", or "basic".
     """
     assert (
         provider in AVAILABLE_PROVIDERS
@@ -40,6 +51,10 @@ def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
     if provider == "exa" and settings.EXA_API_KEY:
         return _exa_visit_webpage(url)
+    elif provider == "tavily" and settings.TAVILY_API_KEY:
+        return _tavily_visit_webpage(url)
+    else:
+        provider = "basic"
     assert provider == "basic"
     response = get_with_retries(url)

{academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: academia-mcp
-Version: 1.9.2
+Version: 1.10.0
 Summary: MCP server that provides different tools to search for scientific publications
 Author-email: Ilya Gusev <phoenixilya@gmail.com>
 Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp

{academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp.egg-info/SOURCES.txt RENAMED Viewed

@@ -30,6 +30,7 @@ academia_mcp/tools/py.typed
 academia_mcp/tools/review.py
 academia_mcp/tools/s2.py
 academia_mcp/tools/show_image.py
+academia_mcp/tools/speech_to_text.py
 academia_mcp/tools/visit_webpage.py
 academia_mcp/tools/web_search.py
 tests/test_anthology_search.py
@@ -43,5 +44,6 @@ tests/test_latex.py
 tests/test_review.py
 tests/test_s2.py
 tests/test_show_image.py
+tests/test_speech_to_text.py
 tests/test_visit_webpage.py
 tests/test_web_search.py

{academia_mcp-1.9.2 → academia_mcp-1.10.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "academia-mcp"
-version = "1.9.2"
+version = "1.10.0"
 description = "MCP server that provides different tools to search for scientific publications"
 readme = "README.md"
 authors = [

{academia_mcp-1.9.2 → academia_mcp-1.10.0}/tests/test_show_image.py RENAMED Viewed

@@ -3,7 +3,7 @@ from io import BytesIO
 import httpx
 from PIL import Image
-from academia_mcp.tools import show_image
+from academia_mcp.tools import show_image, describe_image
 from academia_mcp.files import get_workspace_dir
@@ -23,3 +23,9 @@ def test_show_image_local(test_image_url: str) -> None:
     assert result is not None
     assert "image_base64" in result
     assert result["image_base64"] is not None
+async def test_describe_image_base(test_image_url: str) -> None:
+    result = await describe_image(test_image_url)
+    assert result is not None
+    assert "Interrogator" in result

academia_mcp-1.10.0/tests/test_speech_to_text.py ADDED Viewed

@@ -0,0 +1,21 @@
+import httpx
+from academia_mcp.tools import speech_to_text
+from academia_mcp.files import get_workspace_dir
+async def test_speech_to_text_base(test_audio_url: str) -> None:
+    result = await speech_to_text(test_audio_url)
+    assert result is not None
+    assert "dancing in the masquerade" in str(result).lower()
+async def test_speech_to_text_local(test_audio_url: str) -> None:
+    response = httpx.get(test_audio_url, timeout=10)
+    response.raise_for_status()
+    ext = test_audio_url.split(".")[-1]
+    with open(get_workspace_dir() / f"audio_file.{ext}", "wb") as fp:
+        fp.write(response.content)
+    result = await speech_to_text(f"audio_file.{ext}")
+    assert result is not None
+    assert "dancing in the masquerade" in str(result).lower()

{academia_mcp-1.9.2 → academia_mcp-1.10.0}/tests/test_visit_webpage.py RENAMED Viewed

@@ -18,3 +18,15 @@ def test_visit_webpage_exa() -> None:
 def test_visit_webpage_pdf() -> None:
     content = visit_webpage("https://arxiv.org/pdf/2409.06820")
     assert "A Benchmark for Role-Playing" in content
+def test_visit_webpage_nature() -> None:
+    url = "https://www.nature.com/nature/articles?page=51&searchType=journalSearch&sort=PubDate&type=article&year=2020"
+    content = visit_webpage(url)
+    assert "1002" in content
+def test_visit_webpage_github_issue() -> None:
+    url = "https://github.com/numpy/numpy/issues/10881"
+    content = visit_webpage(url, provider="tavily")
+    assert "on Apr 15, 2018" in str(content)

academia_mcp-1.9.2/academia_mcp/tools/show_image.py DELETED Viewed

@@ -1,41 +0,0 @@
-import base64
-from pathlib import Path
-from io import BytesIO
-from typing import Dict
-import httpx
-from PIL import Image
-from academia_mcp.files import get_workspace_dir
-from academia_mcp.settings import settings
-def show_image(path: str) -> Dict[str, str]:
-    """
-    Reads an image from the specified URL or from the current work directory.
-    Always call this function at the end of the code block.
-    For instance:
-    ```python
-    show_image("https://example.com/image.png")
-    ```
-    Do not print it ever, just return as the last expression.
-    Returns an dictionary with a single "image" key.
-    Args:
-        url: Path to file inside current work directory or web URL
-    """
-    if path.startswith("http"):
-        response = httpx.get(path, timeout=10)
-        response.raise_for_status()
-        image = Image.open(BytesIO(response.content))
-    else:
-        assert settings.WORKSPACE_DIR is not None, "WORKSPACE_DIR is not set"
-        full_path = Path(path)
-        if not full_path.exists():
-            full_path = Path(get_workspace_dir()) / path
-            assert full_path.exists(), f"Image file {path} does not exist"
-        image = Image.open(str(full_path))
-    buffer_io = BytesIO()
-    image.save(buffer_io, format="PNG")
-    img_bytes = buffer_io.getvalue()
-    return {"image_base64": base64.b64encode(img_bytes).decode("utf-8")}