PyPI - academia-mcp - Versions diffs - 1.10.8__py3-none-any.whl → 1.11.0__py3-none-any.whl - Mend

academia-mcp 1.10.8py3-none-any.whl → 1.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

academia_mcp/server.py CHANGED Viewed

@@ -63,25 +63,24 @@ def find_free_port() -> int:
     raise RuntimeError("No free port in range 5000-6000 found")
-def run(
-    host: str = "0.0.0.0",
-    port: Optional[int] = None,
-    mount_path: str = "/",
+def create_server(
     streamable_http_path: str = "/mcp",
-    transport: Literal["stdio", "sse", "streamable-http"] = "streamable-http",
+    mount_path: str = "/",
+    stateless_http: bool = True,
     disable_web_search_tools: bool = False,
     disable_llm_tools: bool = False,
-) -> None:
-    configure_uvicorn_style_logging()
+    port: Optional[int] = None,
+    host: str = "0.0.0.0",
+) -> FastMCP:
     server = FastMCP(
         "Academia MCP",
-        stateless_http=True,
+        stateless_http=stateless_http,
         streamable_http_path=streamable_http_path,
         mount_path=mount_path,
     )
     logger = logging.getLogger(__name__)
-    server.add_tool(arxiv_search)
+    server.add_tool(arxiv_search, structured_output=True)
     server.add_tool(arxiv_download)
     server.add_tool(s2_get_citations)
     server.add_tool(s2_get_references)
@@ -140,6 +139,27 @@ def run(
     server.settings.port = port
     server.settings.host = host
+    return server
+def run(
+    host: str = "0.0.0.0",
+    port: Optional[int] = None,
+    mount_path: str = "/",
+    streamable_http_path: str = "/mcp",
+    transport: Literal["stdio", "sse", "streamable-http"] = "streamable-http",
+    disable_web_search_tools: bool = False,
+    disable_llm_tools: bool = False,
+) -> None:
+    configure_uvicorn_style_logging()
+    server = create_server(
+        streamable_http_path=streamable_http_path,
+        mount_path=mount_path,
+        disable_web_search_tools=disable_web_search_tools,
+        disable_llm_tools=disable_llm_tools,
+        port=port,
+        host=host,
+    )
     if transport == "streamable-http":
         # Enable CORS for browser-based clients

academia_mcp/tools/arxiv_search.py CHANGED Viewed

@@ -2,12 +2,12 @@
 # https://github.com/jonatasgrosman/findpapers/blob/master/findpapers/searchers/arxiv_searcher.py
 # https://info.arxiv.org/help/api/user-manual.html
-import json
 import re
 from typing import Optional, List, Dict, Any, Union
 from datetime import datetime, date
 import xmltodict
+from pydantic import BaseModel, Field
 from academia_mcp.utils import get_with_retries
@@ -17,6 +17,25 @@ SORT_BY_OPTIONS = ("relevance", "lastUpdatedDate", "submittedDate")
 SORT_ORDER_OPTIONS = ("ascending", "descending")
+class ArxivSearchEntry(BaseModel):  # type: ignore
+    id: str = Field(description="Paper ID")
+    title: str = Field(description="Paper title")
+    authors: str = Field(description="Authors of the paper")
+    published: str = Field(description="Published date of the paper")
+    updated: str = Field(description="Updated date of the paper")
+    categories: str = Field(description="Categories of the paper")
+    comment: str = Field(description="Comment of the paper")
+    index: int = Field(description="Index of the paper", default=0)
+    abstract: Optional[str] = Field(description="Abstract of the paper", default=None)
+class ArxivSearchResponse(BaseModel):  # type: ignore
+    total_count: int = Field(description="The total number of results")
+    returned_count: int = Field(description="The number of results returned")
+    offset: int = Field(description="The offset of the results")
+    results: List[ArxivSearchEntry] = Field(description="The results, search entries")
 def _format_text_field(text: str) -> str:
     return " ".join([line.strip() for line in text.split() if line.strip()])
@@ -48,17 +67,17 @@ def _format_date(date: str) -> str:
     return dt.strftime("%B %d, %Y")
-def _clean_entry(entry: Dict[str, Any]) -> Dict[str, Any]:
-    return {
-        "id": entry["id"].split("/")[-1],
-        "title": _format_text_field(entry["title"]),
-        "authors": _format_authors(entry["author"]),
-        "abstract": _format_text_field(entry["summary"]),
-        "published": _format_date(entry["published"]),
-        "updated": _format_date(entry["updated"]),
-        "categories": _format_categories(entry.get("category", {})),
-        "comment": _format_text_field(entry.get("arxiv:comment", {}).get("#text", "")),
-    }
+def _clean_entry(entry: Dict[str, Any]) -> ArxivSearchEntry:
+    return ArxivSearchEntry(
+        id=entry["id"].split("/")[-1],
+        title=_format_text_field(entry["title"]),
+        authors=_format_authors(entry["author"]),
+        abstract=_format_text_field(entry["summary"]),
+        published=_format_date(entry["published"]),
+        updated=_format_date(entry["updated"]),
+        categories=_format_categories(entry.get("category", {})),
+        comment=_format_text_field(entry.get("arxiv:comment", {}).get("#text", "")),
+    )
 def _convert_to_yyyymmddtttt(date_str: str) -> str:
@@ -105,22 +124,19 @@ def _format_entries(
     start_index: int,
     include_abstracts: bool,
     total_results: int,
-) -> str:
+) -> ArxivSearchResponse:
     clean_entries: List[Dict[str, Any]] = []
     for entry_num, entry in enumerate(entries):
         clean_entry = _clean_entry(entry)
         if not include_abstracts:
-            clean_entry.pop("abstract")
-        clean_entry["index"] = start_index + entry_num
+            clean_entry.abstract = None
+        clean_entry.index = start_index + entry_num
         clean_entries.append(clean_entry)
-    return json.dumps(
-        {
-            "total_count": total_results,
-            "returned_count": len(entries),
-            "offset": start_index,
-            "results": clean_entries,
-        },
-        ensure_ascii=False,
+    return ArxivSearchResponse(
+        total_count=total_results,
+        returned_count=len(entries),
+        offset=start_index,
+        results=clean_entries,
     )
@@ -133,7 +149,7 @@ def arxiv_search(
     sort_by: Optional[str] = "relevance",
     sort_order: Optional[str] = "descending",
     include_abstracts: Optional[bool] = False,
-) -> str:
+) -> ArxivSearchResponse:
     """
     Search arXiv papers with field-specific queries.
@@ -158,12 +174,6 @@ def arxiv_search(
         all:role OR all:playing OR all:"language model"
         (au:vaswani OR au:"del maestro") ANDNOT ti:attention
-    Returns a JSON object serialized to a string. The structure is:
-    {"total_count": ..., "returned_count": ..., "offset": ..., "results": [...]}
-    Every item in the "results" has the following fields:
-    ("index", "id", "title", "authors", "abstract", "published", "updated", "categories", "comment")
-    Use `json.loads` to deserialize the result if you want to get specific fields.
     Args:
         query: The search query, required.
         offset: The offset to scroll search results. 10 items will be skipped if offset=10. 0 by default.
@@ -211,10 +221,9 @@ def arxiv_search(
     entries = feed.get("entry", [])
     if isinstance(entries, dict):
         entries = [entries]
-    formatted_entries: str = _format_entries(
+    return _format_entries(
         entries,
         start_index=start_index,
         total_results=total_results,
         include_abstracts=include_abstracts,
     )
-    return formatted_entries

academia_mcp/tools/show_image.py CHANGED Viewed

@@ -30,7 +30,20 @@ DESCRIBE_PROMPTS = {
         4. Any immediate tactical opportunities or threats
         5. Suggested next moves with brief explanations"""
     ),
-    "text": "Extract and describe any text present in this image. If there are multiple pieces of text, organize them clearly.",
+    "text": dedent(
+        """You are performing OCR and transcription.
+        Extract ALL text and numbers from the image verbatim.
+        - Preserve original casing, punctuation, symbols, mathematical notation, and whitespace layout when possible.
+        - If layout is multi-column or tabular, reconstruct lines top-to-bottom, left-to-right; use line breaks between blocks.
+        - For any uncertain or low-confidence characters, mark with a '?' and include a note.
+        - After the raw extraction, provide a clean, normalized version (fixing obvious OCR artifacts) as a separate section.
+        Return two sections:
+        [RAW TRANSCRIPTION]
+        ...
+        [NORMALIZED]
+        ...
+        """
+    ),
 }
@@ -44,10 +57,8 @@ def show_image(path: str) -> Dict[str, str]:
     ```
     Do not print it ever, just return as the last expression.
-    Returns an dictionary with a single "image" key.
     Args:
-        url: Path to file inside current work directory or web URL
+        path: Path to file inside current work directory or web URL
     """
     if path.startswith("http"):
         response = httpx.get(path, timeout=10)
@@ -80,7 +91,7 @@ async def describe_image(
             - "general": General description of the image
             - "detailed": Detailed analysis of the image
             - "chess": Analysis of a chess position
-            - "text": Extract and describe text from the image
+            - "text": Extract and describe text or numbers from the image
             - "custom": Custom description based on user prompt
     """
     image_base64 = show_image(path)["image_base64"]
@@ -93,12 +104,16 @@ async def describe_image(
         {"type": "text", "text": prompt},
         {
             "type": "image_url",
-            "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
+            "image_url": {"url": f"data:image/png;base64,{image_base64}"},
         },
     ]
     model_name = settings.DESCRIBE_IMAGE_MODEL_NAME
+    llm_kwargs = {}
+    if description_type in {"text", "chess"}:
+        llm_kwargs["temperature"] = 0.0
     response = await llm_acall(
         model_name=model_name,
         messages=[ChatMessage(role="user", content=content)],
+        **llm_kwargs,
     )
     return response

academia_mcp/tools/visit_webpage.py CHANGED Viewed

@@ -33,12 +33,15 @@ def _tavily_visit_webpage(url: str) -> Dict[str, Any]:
     assert key, "Error: TAVILY_API_KEY is not set and no api_key was provided"
     payload = {
         "urls": [url],
+        "extract_depth": "advanced",
+        "include_images": True,
     }
     response = post_with_retries(TAVILY_EXTRACT_URL, payload=payload, api_key=key)
     results = response.json()["results"]
     if not results:
         return {"error": ERROR_MESSAGE}
-    return {"text": results[0]["raw_content"]}
+    result = results[0]
+    return {"text": result["raw_content"], "images": result["images"]}
 def _basic_visit_webpage(url: str) -> Dict[str, Any]:

academia_mcp/tools/yt_transcript.py CHANGED Viewed

@@ -13,8 +13,8 @@ def yt_transcript(video_url: str) -> str:
     Args:
         video_url (str): YouTube video URL.
     """
-    if "youtu.be" in video_url:
-        video_id = video_url.strip().split("youtu.be/")[1]
+    if "youtu.be/" in video_url:
+        video_id = video_url.strip().split("youtu.be/")[-1]
     else:
         video_id = video_url.strip().split("v=")[-1]
     video_id = video_id.split("?")[0]
@@ -25,6 +25,9 @@ def yt_transcript(video_url: str) -> str:
             proxy_password=settings.WEBSHARE_PROXY_PASSWORD,
         )
     api = YouTubeTranscriptApi(proxy_config=proxy_config)
-    transcript = api.fetch(video_id)
+    try:
+        transcript = api.fetch(video_id)
+    except Exception as e:
+        return f"Error fetching transcript for video {video_url}: {e}"
     snippets = transcript.snippets
     return "\n".join([f"{int(entry.start)}: {' '.join(entry.text.split())}" for entry in snippets])

{academia_mcp-1.10.8.dist-info → academia_mcp-1.11.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: academia-mcp
-Version: 1.10.8
+Version: 1.11.0
 Summary: MCP server that provides different tools to search for scientific publications
 Author-email: Ilya Gusev <phoenixilya@gmail.com>
 Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp

{academia_mcp-1.10.8.dist-info → academia_mcp-1.11.0.dist-info}/RECORD RENAMED Viewed

@@ -4,7 +4,7 @@ academia_mcp/files.py,sha256=ynIt0XbU1Z7EPWkv_hVX0pGKsLlmjYv-MVJLOfi6yzs,817
 academia_mcp/llm.py,sha256=zpGkuJFf58Ofgys_fi28-47_wJ1a7sIs_yZvI1Si6z0,993
 academia_mcp/pdf.py,sha256=9PlXzHGhb6ay3ldbTdxCcTWvH4TkET3bnb64mgoh9i0,1273
 academia_mcp/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-academia_mcp/server.py,sha256=tZ57YkW2EcW4DRIk87n2PFZkkTjAVsVQ5lphvc1AVA4,5517
+academia_mcp/server.py,sha256=s7rwsNePtz8ZJtzJ5FmvzStWR2ApArxiJROcRwyqrww,6102
 academia_mcp/settings.py,sha256=c5s4dI8V_cWmMED-jKDmHjfdIaBcxwEK4HdHNQ3WUIg,1096
 academia_mcp/utils.py,sha256=lRlb615JJ_0d4gcFpMoBjB6w0xXcde9dFDw0LwYpSPQ,4863
 academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty,sha256=hGcEPCYBJS4vdhWvN_yEaJC4GvT_yDroI94CfY2Oguk,12268
@@ -12,7 +12,7 @@ academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex,sha256=
 academia_mcp/tools/__init__.py,sha256=Z30vULZwUeUX5nDz5wcv0znhAeBtZRa0dvz7vD8SUYE,1555
 academia_mcp/tools/anthology_search.py,sha256=rhFpJZqGLABgr0raDuH0CARBiAJNJtEI4dlMrKNHfDQ,7669
 academia_mcp/tools/arxiv_download.py,sha256=gBY0_Kz0yGtVkLMwn6GrAyfBjovZVgcSMuyy67p65Cw,10474
-academia_mcp/tools/arxiv_search.py,sha256=pzM18qrF3QL03A53w003kE7hQi3s3QKtjgw0m7K88UY,8355
+academia_mcp/tools/arxiv_search.py,sha256=Cb9x6SaHz5vjr7jwdq3U1PtvADfFJa5pq52z-0Rg8t0,8882
 academia_mcp/tools/bitflip.py,sha256=1B-EEcDnJjB9YmvVWsGv_Un19Bkeud9SZDw2TpGTCSg,12184
 academia_mcp/tools/document_qa.py,sha256=Wb2nEEVu9UyPp8ktHWeT9wS2JBle8fb9zRjTNVIDdBE,2463
 academia_mcp/tools/hf_datasets_search.py,sha256=KiBkqT4rXjEN4oc1AWZOPnqN_Go90TQogY5-DUm3LQo,2854
@@ -20,14 +20,14 @@ academia_mcp/tools/latex.py,sha256=B1Leqt1FHY6H3DlUgeYse4LMFpf4-K1FQViXl5MKk8A,6
 academia_mcp/tools/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 academia_mcp/tools/review.py,sha256=Va0lFJJKuk-NvWhKS3UZ-Dnuk7CyuDQ4S1nd70D-ffE,11117
 academia_mcp/tools/s2.py,sha256=QX7-pbetab3Xt_1tvVPU6o5D_NAe9y6jcTGRBK1vwtY,6200
-academia_mcp/tools/show_image.py,sha256=jiJlQ53dbZ0T61OBhCT3IKVvBl9NHc6jHgWLfg5BxiE,3856
+academia_mcp/tools/show_image.py,sha256=DWSnYMTn_dJpGTLL1r_sbX5XsB6p9z-vClApDANz84s,4534
 academia_mcp/tools/speech_to_text.py,sha256=YZzMqdvunzXkpcadP_mYhm6cs4qH1Y_42SfY-7eX4O4,1601
-academia_mcp/tools/visit_webpage.py,sha256=swlFwWRzWc7-AHP2ouRZJScSTA4dHZ32fuJnA2V0lUc,3311
+academia_mcp/tools/visit_webpage.py,sha256=rMmjP2KXo_ElO7NPfUWnYN0EsRJHi2ikShekZR7pCms,3428
 academia_mcp/tools/web_search.py,sha256=VphVztf2jZNT3bPJPJuTdMkKbe2-LIbSV7keKV47lac,8616
-academia_mcp/tools/yt_transcript.py,sha256=NPBVGN-LG_N6yElQYBMEnbNhSjkM3-DPeVIJwlFGqA0,1104
-academia_mcp-1.10.8.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-academia_mcp-1.10.8.dist-info/METADATA,sha256=Cw-6atEo6S0MAjBKkgsHZxw-GJCcTZWAiOSQGeftarM,6356
-academia_mcp-1.10.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-academia_mcp-1.10.8.dist-info/entry_points.txt,sha256=gxkiKJ74w2FwJpSECpjA3XtCfI5ZfrM6N8cqnwsq4yY,51
-academia_mcp-1.10.8.dist-info/top_level.txt,sha256=CzGpRFsRRJRqWEb1e3SUlcfGqRzOxevZGaJWrtGF8W0,13
-academia_mcp-1.10.8.dist-info/RECORD,,
+academia_mcp/tools/yt_transcript.py,sha256=ilfOpX14moC1bKHbFmOVvZ8-_NxuQQUoQbV28e9FBaE,1217
+academia_mcp-1.11.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+academia_mcp-1.11.0.dist-info/METADATA,sha256=61gqVlxbohEvg2IwsRyDfX2X8u5Dsw-IUrAbm_o6Hdg,6356
+academia_mcp-1.11.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+academia_mcp-1.11.0.dist-info/entry_points.txt,sha256=gxkiKJ74w2FwJpSECpjA3XtCfI5ZfrM6N8cqnwsq4yY,51
+academia_mcp-1.11.0.dist-info/top_level.txt,sha256=CzGpRFsRRJRqWEb1e3SUlcfGqRzOxevZGaJWrtGF8W0,13
+academia_mcp-1.11.0.dist-info/RECORD,,

{academia_mcp-1.10.8.dist-info → academia_mcp-1.11.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{academia_mcp-1.10.8.dist-info → academia_mcp-1.11.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{academia_mcp-1.10.8.dist-info → academia_mcp-1.11.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{academia_mcp-1.10.8.dist-info → academia_mcp-1.11.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

academia-mcp 1.10.8__py3-none-any.whl → 1.11.0__py3-none-any.whl

academia-mcp 1.10.8py3-none-any.whl → 1.11.0py3-none-any.whl