PyPI - academia-mcp - Versions diffs - 1.11.0__tar.gz → 1.11.2__tar.gz - Mend

academia-mcp 1.11.0tar.gz → 1.11.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

{academia_mcp-1.11.0 → academia_mcp-1.11.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: academia-mcp
-Version: 1.11.0
+Version: 1.11.2
 Summary: MCP server that provides different tools to search for scientific publications
 Author-email: Ilya Gusev <phoenixilya@gmail.com>
 Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp

{academia_mcp-1.11.0 → academia_mcp-1.11.2}/academia_mcp/server.py RENAMED Viewed

@@ -81,16 +81,16 @@ def create_server(
     logger = logging.getLogger(__name__)
     server.add_tool(arxiv_search, structured_output=True)
-    server.add_tool(arxiv_download)
-    server.add_tool(s2_get_citations)
-    server.add_tool(s2_get_references)
+    server.add_tool(arxiv_download, structured_output=True)
+    server.add_tool(visit_webpage, structured_output=True)
+    server.add_tool(s2_get_citations, structured_output=True)
+    server.add_tool(s2_get_references, structured_output=True)
+    server.add_tool(s2_get_info, structured_output=True)
     server.add_tool(s2_corpus_id_from_arxiv_id)
-    server.add_tool(s2_get_info)
     server.add_tool(hf_datasets_search)
     server.add_tool(anthology_search)
     server.add_tool(get_latex_template)
     server.add_tool(get_latex_templates_list)
-    server.add_tool(visit_webpage)
     server.add_tool(show_image)
     server.add_tool(yt_transcript)
@@ -105,20 +105,20 @@ def create_server(
     if not disable_web_search_tools:
         if settings.TAVILY_API_KEY:
-            server.add_tool(tavily_web_search)
+            server.add_tool(tavily_web_search, structured_output=True)
         if settings.EXA_API_KEY:
-            server.add_tool(exa_web_search)
+            server.add_tool(exa_web_search, structured_output=True)
         if settings.BRAVE_API_KEY:
-            server.add_tool(brave_web_search)
+            server.add_tool(brave_web_search, structured_output=True)
         if settings.EXA_API_KEY or settings.BRAVE_API_KEY or settings.TAVILY_API_KEY:
-            server.add_tool(web_search)
+            server.add_tool(web_search, structured_output=True)
         else:
             logger.warning("No web search tools keys are set, web_search will not be available!")
     if not disable_llm_tools and settings.OPENROUTER_API_KEY:
-        server.add_tool(extract_bitflip_info)
-        server.add_tool(generate_research_proposals)
-        server.add_tool(score_research_proposals)
+        server.add_tool(extract_bitflip_info, structured_output=True)
+        server.add_tool(generate_research_proposals, structured_output=True)
+        server.add_tool(score_research_proposals, structured_output=True)
         server.add_tool(document_qa)
         server.add_tool(describe_image)
         if settings.WORKSPACE_DIR:

{academia_mcp-1.11.0 → academia_mcp-1.11.2}/academia_mcp/tools/arxiv_download.py RENAMED Viewed

@@ -3,19 +3,17 @@
 # https://github.com/bytedance/pasa/blob/main/utils.py
 import re
-import json
 import tempfile
 from pathlib import Path
-from typing import Any, List, Optional, Dict
-from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
-import requests
 import bs4
+import requests
 from markdownify import MarkdownConverter  # type: ignore
+from pydantic import BaseModel, Field
+from academia_mcp.pdf import download_pdf, parse_pdf_file
 from academia_mcp.utils import get_with_retries
-from academia_mcp.pdf import parse_pdf_file, download_pdf
 HTML_URL = "https://arxiv.org/html/{paper_id}"
 ABS_URL = "https://arxiv.org/abs/{paper_id}"
@@ -28,12 +26,24 @@ SECTION_STOP_WORDS = (
 )
-@dataclass
-class TOCEntry:
+class DownloadResponse(BaseModel):  # type: ignore
+    title: str = Field(description="Title of the paper")
+    abstract: str = Field(description="Abstract of the paper")
+    toc: str = Field(description="Table of Contents", default="")
+    sections: Optional[List[str]] = Field(description="Sections of the paper", default=None)
+    references: Optional[List[Dict[str, Any]]] = Field(
+        description="Parsed references from the paper", default=None
+    )
+    original_format: str = Field(
+        description="Original format of the paper (pdf or html)", default="html"
+    )
+class TOCEntry(BaseModel):  # type: ignore
     level: int
     title: str
     html_id: Optional[str] = None
-    subsections: List["TOCEntry"] = field(default_factory=list)
+    subsections: List["TOCEntry"] = Field(default_factory=list)
     def linearize(self) -> List["TOCEntry"]:
         entries = [self]
@@ -196,7 +206,7 @@ def _parse_citation_metadata(metas: List[str]) -> Dict[str, Any]:
     return result
-def _extract_citations(soup_biblist: bs4.element.Tag) -> List[Dict[str, Any]]:
+def _extract_references(soup_biblist: bs4.element.Tag) -> List[Dict[str, Any]]:
     extracted = []
     for li in soup_biblist.find_all("li", recursive=False):
         metas = [x.text.strip() for x in li.find_all("span", class_="ltx_bibblock")]
@@ -214,17 +224,17 @@ def _parse_html(paper_id: str) -> Dict[str, Any]:
     article = soup.article
     assert article and isinstance(article, bs4.element.Tag)
-    citations = []
+    references = []
     biblist_tag = article.find(class_="ltx_biblist")
     if biblist_tag and isinstance(biblist_tag, bs4.element.Tag):
-        citations = _extract_citations(biblist_tag)
+        references = _extract_references(biblist_tag)
     toc = _generate_toc(article)
     sections = _build_by_toc(toc, article, url)
     return {
         "toc": toc.to_str(),
         "sections": sections,
-        "citations": citations,
+        "references": references,
         "original_format": "html",
     }
@@ -255,36 +265,24 @@ def _parse_pdf(paper_id: str) -> Dict[str, Any]:
     return {
         "toc": "\n".join([f"Page {page_number}" for page_number in range(1, len(pages) + 1)]),
         "sections": pages,
-        "citations": [],
+        "references": [],
         "original_format": "pdf",
     }
 def arxiv_download(
     paper_id: str,
-    include_citations: Optional[bool] = False,
+    include_references: Optional[bool] = False,
     mode: Optional[str] = "html",
-) -> str:
+) -> DownloadResponse:
     """
     Downloads a paper from Arxiv and converts it to text.
     Use mode = "html" by default.
     Fall back to mode = "pdf" if there are any problems with the HTML version.
-    Returns a JSON with a following structure:
-    {
-        "title": "...",
-        "abstract": "...",
-        "toc": "...",
-        "sections": ["...", ...],
-        "citations": [...]
-    }
-    Use `json.loads` to deserialize the result if you want to get specific fields.
-    For example, `abstract = json.loads(arxiv_download("2409.06820v1"))`
-    The "toc" key contains Table of Contents, that sometimes has indexing for sections.
     Args:
         paper_id: ID of the paper on Arxiv. For instance: 2409.06820v1
-        include_citations: include "citations" in the result or not. False by default.
+        include_references: include "references" in the result or not. False by default.
         mode: Which version of paper to use. Options: ["html", "pdf"]. "html" by default.
     """
@@ -297,7 +295,6 @@ def arxiv_download(
     else:
         content = _parse_pdf(paper_id)
-    if not include_citations and "citations" in content:
-        content.pop("citations")
-    return json.dumps({**abs_meta, **content}, ensure_ascii=False)
+    if not include_references and "references" in content:
+        content.pop("references")
+    return DownloadResponse(**{**abs_meta, **content})

{academia_mcp-1.11.0 → academia_mcp-1.11.2}/academia_mcp/tools/arxiv_search.py RENAMED Viewed

@@ -3,8 +3,8 @@
 # https://info.arxiv.org/help/api/user-manual.html
 import re
-from typing import Optional, List, Dict, Any, Union
-from datetime import datetime, date
+from datetime import date, datetime
+from typing import Any, Dict, List, Optional, Union
 import xmltodict
 from pydantic import BaseModel, Field
@@ -30,10 +30,10 @@ class ArxivSearchEntry(BaseModel):  # type: ignore
 class ArxivSearchResponse(BaseModel):  # type: ignore
-    total_count: int = Field(description="The total number of results")
-    returned_count: int = Field(description="The number of results returned")
-    offset: int = Field(description="The offset of the results")
-    results: List[ArxivSearchEntry] = Field(description="The results, search entries")
+    total_count: int = Field(description="Total number of results")
+    returned_count: int = Field(description="Number of results returned")
+    offset: int = Field(description="Offset for pagination")
+    results: List[ArxivSearchEntry] = Field(description="Search entries")
 def _format_text_field(text: str) -> str:

{academia_mcp-1.11.0 → academia_mcp-1.11.2}/academia_mcp/tools/bitflip.py RENAMED Viewed

@@ -1,17 +1,18 @@
+# Based on
 # https://arxiv.org/abs/2504.12976
 # https://web.stanford.edu/class/cs197c/slides/02-literature-search.pdf
 import json
 import random
-from typing import List, Optional, Any, Dict
+from typing import Any, Dict, List, Optional
-from pydantic import BaseModel
 from datasets import load_dataset  # type: ignore
+from pydantic import BaseModel, Field
-from academia_mcp.tools.arxiv_download import arxiv_download
-from academia_mcp.utils import extract_json, encode_prompt
-from academia_mcp.llm import llm_acall, ChatMessage
+from academia_mcp.llm import ChatMessage, llm_acall
 from academia_mcp.settings import settings
+from academia_mcp.tools.arxiv_download import arxiv_download
+from academia_mcp.utils import encode_prompt, extract_json
 class ProposalDataset:
@@ -128,7 +129,7 @@ Return only the JSON list of proposals in this exact format:
         "spark": "4-6 word summary",
         "abstract": "An abstract that summarizes the proposal in conference format (approximately 250 words).",
         "experiments": ["...", "..."],
-        "risks_and_limitations": "A list of potential risks and limitations of the proposal."
+        "risks_and_limitations": ["...", "..."]
     },
     ...
 ]
@@ -177,12 +178,12 @@ Return only scores for all proposals in this exact format (no extra text):
 class BitFlipInfo(BaseModel):  # type: ignore
-    bit: str
-    flip: str
-    spark: str
+    bit: str = Field(description="Technical limitation or conventional approach")
+    flip: str = Field(description="Innovative approach or solution")
+    spark: str = Field(description="4-6 word summary")
-async def extract_bitflip_info(arxiv_id: str) -> str:
+async def extract_bitflip_info(arxiv_id: str) -> BitFlipInfo:
     """
     Extracts the Bit-Flip information from the arXiv paper.
@@ -190,20 +191,12 @@ async def extract_bitflip_info(arxiv_id: str) -> str:
     questioning existing constraints or reapplying techniques to new domains/scales.
     The "Bit" is the prevailing belief, and the "Flip" is the counterargument.
-    Returns a JSON object in this format:
-    {
-        "bit": "Technical limitation or conventional approach, in at least two sentences",
-        "flip": "Innovative approach or solution, in at least two sentences",
-        "spark": "4-6 word summary of the core idea"
-    }
-    Use `json.loads` to deserialize the result if you want to get specific fields.
     Args:
         arxiv_id: The arXiv ID of the paper to extract the Bit-Flip information from.
     """
     model_name = settings.BITFLIP_MODEL_NAME
     paper = arxiv_download(arxiv_id)
-    abstract = json.loads(paper)["abstract"]
+    abstract = paper.abstract
     prompt = encode_prompt(EXTRACT_PROMPT, abstract=abstract)
     content = await llm_acall(
         model_name=model_name,
@@ -212,12 +205,31 @@ async def extract_bitflip_info(arxiv_id: str) -> str:
     )
     result = extract_json(content)
     bitflip_info: BitFlipInfo = BitFlipInfo.model_validate(result)
-    return str(bitflip_info.model_dump_json())
+    return bitflip_info
+class ResearchProposal(BaseModel):  # type: ignore
+    proposal_id: int = Field(default=0, description="ID of the proposal")
+    flip: str = Field(description="Innovative approach or solution, in at least two sentences")
+    spark: str = Field(description="4-6 word summary")
+    abstract: str = Field(
+        description="An abstract that summarizes the proposal in conference format."
+    )
+    experiments: List[str] = Field(
+        description="A list of experiments that would be conducted to validate the proposal."
+    )
+    risks_and_limitations: List[str] = Field(
+        description="A list of potential risks and limitations of the proposal."
+    )
+class GenerateResearchProposalResponse(BaseModel):  # type: ignore
+    proposals: List[ResearchProposal] = Field(description="A list of research proposals")
 async def generate_research_proposals(
     bit: str, num_proposals: int = 3, additional_context: str = ""
-) -> str:
+) -> GenerateResearchProposalResponse:
     """
     Proposes improvement ideas for the Bit.
@@ -225,20 +237,6 @@ async def generate_research_proposals(
         bit: The Bit to propose improvement ideas for. The bit is a technical limitation or conventional approach of some paper.
         num_proposals: The number of proposals to generate.
         additional_context: Additional context to use when proposing the improvement idea.
-    Returns a JSON string with a research proposal in this format:
-    [
-        {
-            "proposal_id": ...,
-            "flip": "Innovative approach or solution, in at least two sentences",
-            "spark": "4-6 word summary",
-            "abstract": "An abstract that summarizes the proposal in conference format (approximately 250 words).",
-            "experiments": ["...", "..."],
-            "risks_and_limitations": "A list of potential risks and limitations of the proposal."
-        },
-        ...
-    ]
-    Use `json.loads` to deserialize the result if you want to get specific items.
     """
     model_name = settings.BITFLIP_MODEL_NAME
     max_completion_tokens = int(settings.BITFLIP_MAX_COMPLETION_TOKENS)
@@ -262,46 +260,51 @@ async def generate_research_proposals(
         temperature=1.0,
     )
     result = extract_json(content)
-    for proposal in result:
-        proposal["proposal_id"] = random.randint(0, 1000000)
-    return json.dumps(result, ensure_ascii=False)
+    return GenerateResearchProposalResponse(
+        proposals=[ResearchProposal.model_validate(proposal) for proposal in result]
+    )
+class ScoredProposal(BaseModel):  # type: ignore
+    proposal_id: int = Field(default=0, description="ID of the proposal")
+    spark: str = Field(description="4-6 word summary")
+    strengths: List[str] = Field(description="A list of strengths of the proposal")
+    weaknesses: List[str] = Field(description="A list of weaknesses of the proposal")
+    novelty: int = Field(description="Novelty rating from 1 to 4")
+    clarity: int = Field(description="Clarity rating from 1 to 4")
+    significance: int = Field(description="Significance rating from 1 to 4")
+    feasibility: int = Field(description="Feasibility rating from 1 to 4")
+    soundness: int = Field(description="Soundness rating from 1 to 4")
+    overall: int = Field(description="Overall rating from 1 to 10")
-async def score_research_proposals(proposals: str | List[str | Dict[str, Any] | Any]) -> str:
+class ScoreResearchProposalsResponse(BaseModel):  # type: ignore
+    proposals: List[ScoredProposal] = Field(description="List of scored proposals")
+async def score_research_proposals(
+    proposals: str | List[str | Dict[str, Any] | Any],
+) -> ScoreResearchProposalsResponse:
     """
     Scores a list of research proposals.
     Use proposals obtained with the `generate_research_proposal` tool.
-    Returns a JSON string with a list of scores in this format:
-    [
-        {
-            "proposal_id": 0,
-            "spark": "...",
-            "strengths": ["...", "..."],
-            "weaknesses": ["...", "..."],
-            "novelty": 2,
-            "clarity": 2,
-            "significance": 2,
-            "feasibility": 2,
-            "soundness": 2,
-            "overall": 5
-        },
-        ...
-    ]
-    Use `json.loads` to deserialize the result if you want to get specific fields.
     Args:
         proposals: A list of JSON strings with research proposals.
     """
     model_name = settings.BITFLIP_MODEL_NAME
     if isinstance(proposals, str):
         proposals = json.loads(proposals)
-        assert isinstance(proposals, list), "Proposals should be a list of JSON strings"
-    prompt = encode_prompt(SCORE_PROMPT, proposals=[str(p) for p in proposals])
+        assert isinstance(proposals, list), "Proposals should be a list"
+    if isinstance(proposals, list):
+        proposals = [str(p) for p in proposals]
+    prompt = encode_prompt(SCORE_PROMPT, proposals=proposals)
     content = await llm_acall(
         model_name=model_name,
         messages=[ChatMessage(role="user", content=prompt)],
         temperature=0.0,
     )
     scores = extract_json(content)
-    return json.dumps(scores, ensure_ascii=False)
+    return ScoreResearchProposalsResponse(
+        proposals=[ScoredProposal.model_validate(score) for score in scores]
+    )

{academia_mcp-1.11.0 → academia_mcp-1.11.2}/academia_mcp/tools/s2.py RENAMED Viewed

@@ -1,9 +1,10 @@
 # Based on
 # https://api.semanticscholar.org/api-docs/graph#tag/Paper-Data/operation/get_graph_get_paper_citations
-import json
 from typing import Optional, List, Dict, Any
+from pydantic import BaseModel, Field
 from academia_mcp.utils import get_with_retries
@@ -13,42 +14,58 @@ REFERENCES_URL_TEMPLATE = "https://api.semanticscholar.org/graph/v1/paper/{paper
 FIELDS = "title,authors,externalIds,venue,citationCount,publicationDate"
+class S2PaperInfo(BaseModel):  # type: ignore
+    arxiv_id: Optional[str] = Field(description="ArXiv ID of the paper", default=None)
+    external_ids: Optional[Dict[str, Any]] = Field(
+        description="External IDs of the paper.", default=None
+    )
+    title: str = Field(description="Paper title")
+    authors: List[str] = Field(description="Authors of the paper")
+    venue: str = Field(description="Paper venue")
+    citation_count: Optional[int] = Field(description="Paper citation count", default=None)
+    publication_date: Optional[str] = Field(description="Paper publication date", default=None)
+class S2SearchResponse(BaseModel):  # type: ignore
+    total_count: int = Field(description="Total number of results.")
+    returned_count: int = Field(description="Number of results returned.")
+    offset: int = Field(description="Offset of the results.")
+    results: List[S2PaperInfo] = Field(description="Search entries")
 def _format_authors(authors: List[Dict[str, Any]]) -> List[str]:
     return [a["name"] for a in authors]
-def _clean_entry(entry: Dict[str, Any]) -> Dict[str, Any]:
+def _clean_entry(entry: Dict[str, Any]) -> S2PaperInfo:
     entry = entry["citingPaper"] if "citingPaper" in entry else entry["citedPaper"]
     external_ids = entry.get("externalIds")
     if not external_ids:
         external_ids = dict()
     external_ids.pop("CorpusId", None)
     arxiv_id = external_ids.pop("ArXiv", None)
-    return {
-        "arxiv_id": arxiv_id,
-        "external_ids": external_ids if external_ids else None,
-        "title": entry["title"],
-        "authors": _format_authors(entry["authors"]),
-        "venue": entry.get("venue", ""),
-        "citation_count": entry.get("citationCount", 0),
-        "publication_date": entry.get("publicationDate", ""),
-    }
+    return S2PaperInfo(
+        arxiv_id=arxiv_id,
+        external_ids=external_ids if external_ids else None,
+        title=entry["title"],
+        authors=_format_authors(entry["authors"]),
+        venue=entry.get("venue", ""),
+        citation_count=entry.get("citationCount"),
+        publication_date=entry.get("publicationDate"),
+    )
 def _format_entries(
     entries: List[Dict[str, Any]],
     start_index: int,
     total_results: int,
-) -> str:
+) -> S2SearchResponse:
     clean_entries = [_clean_entry(e) for e in entries]
-    return json.dumps(
-        {
-            "total_count": total_results,
-            "returned_count": len(entries),
-            "offset": start_index,
-            "results": clean_entries,
-        },
-        ensure_ascii=False,
+    return S2SearchResponse(
+        total_count=total_results,
+        returned_count=len(entries),
+        offset=start_index,
+        results=clean_entries,
     )
@@ -56,16 +73,10 @@ def s2_get_citations(
     arxiv_id: str,
     offset: Optional[int] = 0,
     limit: Optional[int] = 50,
-) -> str:
+) -> S2SearchResponse:
     """
     Get all papers that cited a given arXiv paper based on Semantic Scholar info.
-    Returns a JSON object serialized to a string. The structure is:
-    {"total_count": ..., "returned_count": ..., "offset": ..., "results": [...]}
-    Every item in the "results" has the following fields:
-    ("arxiv_id", "external_ids", "title", "authors", "venue", "citation_count", "publication_date")
-    Use `json.loads` to deserialize the result if you want to get specific fields.
     Args:
         arxiv_id: The ID of a given arXiv paper.
         offset: The offset to scroll through citations. 10 items will be skipped if offset=10. 0 by default.
@@ -98,16 +109,10 @@ def s2_get_references(
     arxiv_id: str,
     offset: Optional[int] = 0,
     limit: Optional[int] = 50,
-) -> str:
+) -> S2SearchResponse:
     """
     Get all papers that were cited by a given arXiv paper (references) based on Semantic Scholar info.
-    Returns a JSON object serialized to a string. The structure is:
-    {"total_count": ..., "returned_count": ..., "offset": ..., "results": [...]}
-    Every item in the "results" has the following fields:
-    ("arxiv_id", "external_ids", "title", "authors", "venue", "citation_count", "publication_date")
-    Use `json.loads` to deserialize the result if you want to get specific fields.
     Args:
         arxiv_id: The ID of a given arXiv paper.
         offset: The offset to scroll through citations. 10 items will be skipped if offset=10. 0 by default.
@@ -144,14 +149,10 @@ def s2_corpus_id_from_arxiv_id(arxiv_id: str) -> int:
     return int(result["externalIds"]["CorpusId"])
-def s2_get_info(arxiv_id: str) -> str:
+def s2_get_info(arxiv_id: str) -> S2PaperInfo:
     """
     Get the S2 info for a given arXiv ID.
-    Returns a JSON object serialized to a string. The structure is:
-    {"title": ..., "authors": ..., "externalIds": ..., "venue": ..., "citationCount": ..., "publicationDate": ...}
-    Use `json.loads` to deserialize the result if you want to get specific fields.
     Args:
         arxiv_id: The ID of a given arXiv paper.
     """
@@ -160,4 +161,13 @@ def s2_get_info(arxiv_id: str) -> str:
         arxiv_id = arxiv_id.split("v")[0]
     paper_url = PAPER_URL_TEMPLATE.format(paper_id=f"arxiv:{arxiv_id}", fields=FIELDS)
     response = get_with_retries(paper_url)
-    return json.dumps(response.json(), ensure_ascii=False)
+    json_data = response.json()
+    return S2PaperInfo(
+        arxiv_id=json_data.get("externalIds", {}).get("ArXiv"),
+        external_ids=json_data.get("externalIds", {}),
+        title=json_data["title"],
+        authors=_format_authors(json_data["authors"]),
+        venue=json_data.get("venue", ""),
+        citation_count=int(json_data.get("citationCount", 0)),
+        publication_date=str(json_data.get("publicationDate", "")),
+    )

{academia_mcp-1.11.0 → academia_mcp-1.11.2}/academia_mcp/tools/visit_webpage.py RENAMED Viewed

@@ -1,12 +1,11 @@
 import re
-import json
-from typing import Optional, Dict, Any, cast
+from typing import Any, Dict, List, Optional
 from markdownify import markdownify  # type: ignore
+from pydantic import BaseModel, Field
-from academia_mcp.utils import get_with_retries, post_with_retries
 from academia_mcp.settings import settings
-from academia_mcp.utils import sanitize_output
+from academia_mcp.utils import get_with_retries, post_with_retries, sanitize_output
 EXA_CONTENTS_URL = "https://api.exa.ai/contents"
 TAVILY_EXTRACT_URL = "https://api.tavily.com/extract"
@@ -14,6 +13,16 @@ AVAILABLE_PROVIDERS = ("basic", "exa", "tavily")
 ERROR_MESSAGE = "Failed to get content from the page. Try to use another provider."
+class VisitWebpageResponse(BaseModel):  # type: ignore
+    id: str = Field(description="ID of the webpage, usually the URL")
+    provider: str = Field(description="Provider used to get the content")
+    text: Optional[str] = Field(description="Text content of the webpage", default=None)
+    images: List[str] = Field(description="Images of the webpage", default_factory=list)
+    error: Optional[str] = Field(
+        description="Error message if the webpage is not found", default=None
+    )
 def _exa_visit_webpage(url: str) -> Dict[str, Any]:
     key = settings.EXA_API_KEY or ""
     assert key, "Error: EXA_API_KEY is not set and no api_key was provided"
@@ -25,7 +34,7 @@ def _exa_visit_webpage(url: str) -> Dict[str, Any]:
     results = response.json()["results"]
     if not results:
         return {"error": ERROR_MESSAGE}
-    return cast(Dict[str, Any], results[0])
+    return {"text": results[0]["text"]}
 def _tavily_visit_webpage(url: str) -> Dict[str, Any]:
@@ -61,13 +70,9 @@ def _basic_visit_webpage(url: str) -> Dict[str, Any]:
         return {"error": str(e) + "\n" + ERROR_MESSAGE}
-def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
+def visit_webpage(url: str, provider: Optional[str] = "basic") -> VisitWebpageResponse:
     """
     Visit a webpage and return the content.
-    Returns a JSON object serialized to a string. The structure is: {"id": "...", "text": "..."}.
-    If there are errors, the structure is: {"id": "...", "error": "..."}.
-    Use `json.loads` to deserialize the result if you want to get specific fields.
     Try to use both "tavily" and "basic" providers. They might work differently for the same URL.
     Args:
@@ -85,6 +90,9 @@ def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
     else:
         result = _basic_visit_webpage(url)
-    result["id"] = url
-    result["provider"] = provider
-    return sanitize_output(json.dumps(result, ensure_ascii=False))
+    result = VisitWebpageResponse(id=url, provider=provider, **result)
+    if result.text:
+        result.text = sanitize_output(result.text)
+    if result.error:
+        result.error = sanitize_output(result.error)
+    return result

academia-mcp 1.11.0__tar.gz → 1.11.2__tar.gz

academia-mcp 1.11.0tar.gz → 1.11.2tar.gz