academia-mcp 1.9.0__py3-none-any.whl → 1.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
academia_mcp/server.py CHANGED
@@ -10,7 +10,12 @@ from uvicorn.config import LOGGING_CONFIG as UVICORN_LOGGING_CONFIG
10
10
  from academia_mcp.settings import settings
11
11
  from academia_mcp.tools.arxiv_search import arxiv_search
12
12
  from academia_mcp.tools.arxiv_download import arxiv_download
13
- from academia_mcp.tools.s2_citations import s2_get_citations, s2_get_references
13
+ from academia_mcp.tools.s2 import (
14
+ s2_get_citations,
15
+ s2_get_references,
16
+ s2_corpus_id_from_arxiv_id,
17
+ s2_get_info,
18
+ )
14
19
  from academia_mcp.tools.hf_datasets_search import hf_datasets_search
15
20
  from academia_mcp.tools.anthology_search import anthology_search
16
21
  from academia_mcp.tools.document_qa import document_qa
@@ -33,6 +38,7 @@ from academia_mcp.tools.bitflip import (
33
38
  score_research_proposals,
34
39
  )
35
40
  from academia_mcp.tools.review import review_pdf_paper, download_pdf_paper
41
+ from academia_mcp.tools.show_image import show_image
36
42
 
37
43
 
38
44
  def configure_uvicorn_style_logging(level: int = logging.INFO) -> None:
@@ -75,11 +81,14 @@ def run(
75
81
  server.add_tool(arxiv_download)
76
82
  server.add_tool(s2_get_citations)
77
83
  server.add_tool(s2_get_references)
84
+ server.add_tool(s2_corpus_id_from_arxiv_id)
85
+ server.add_tool(s2_get_info)
78
86
  server.add_tool(hf_datasets_search)
79
87
  server.add_tool(anthology_search)
80
88
  server.add_tool(get_latex_template)
81
89
  server.add_tool(get_latex_templates_list)
82
90
  server.add_tool(visit_webpage)
91
+ server.add_tool(show_image)
83
92
 
84
93
  if settings.WORKSPACE_DIR:
85
94
  server.add_tool(compile_latex)
@@ -2,7 +2,7 @@ from .arxiv_search import arxiv_search
2
2
  from .anthology_search import anthology_search
3
3
  from .arxiv_download import arxiv_download
4
4
  from .hf_datasets_search import hf_datasets_search
5
- from .s2_citations import s2_get_references, s2_get_citations
5
+ from .s2 import s2_get_references, s2_get_citations, s2_corpus_id_from_arxiv_id, s2_get_info
6
6
  from .document_qa import document_qa
7
7
  from .latex import (
8
8
  compile_latex,
@@ -14,6 +14,7 @@ from .web_search import web_search, tavily_web_search, exa_web_search, brave_web
14
14
  from .visit_webpage import visit_webpage
15
15
  from .bitflip import extract_bitflip_info, generate_research_proposals, score_research_proposals
16
16
  from .review import review_pdf_paper, download_pdf_paper, review_pdf_paper_by_url
17
+ from .show_image import show_image
17
18
 
18
19
  __all__ = [
19
20
  "arxiv_search",
@@ -21,6 +22,8 @@ __all__ = [
21
22
  "anthology_search",
22
23
  "s2_get_references",
23
24
  "s2_get_citations",
25
+ "s2_corpus_id_from_arxiv_id",
26
+ "s2_get_info",
24
27
  "hf_datasets_search",
25
28
  "document_qa",
26
29
  "compile_latex",
@@ -38,4 +41,5 @@ __all__ = [
38
41
  "review_pdf_paper_by_url",
39
42
  "download_pdf_paper",
40
43
  "read_pdf",
44
+ "show_image",
41
45
  ]
@@ -126,3 +126,38 @@ def s2_get_references(
126
126
  entries = result["data"]
127
127
  total_count = len(result["data"]) + result["offset"]
128
128
  return _format_entries(entries, offset if offset else 0, total_count)
129
+
130
+
131
+ def s2_corpus_id_from_arxiv_id(arxiv_id: str) -> int:
132
+ """
133
+ Get the S2 Corpus ID for a given arXiv ID.
134
+
135
+ Args:
136
+ arxiv_id: The ID of a given arXiv paper.
137
+ """
138
+ assert isinstance(arxiv_id, str), "Error: Your arxiv_id must be a string"
139
+ if "v" in arxiv_id:
140
+ arxiv_id = arxiv_id.split("v")[0]
141
+ paper_url = PAPER_URL_TEMPLATE.format(paper_id=f"arxiv:{arxiv_id}", fields="externalIds")
142
+ response = get_with_retries(paper_url)
143
+ result = response.json()
144
+ return int(result["externalIds"]["CorpusId"])
145
+
146
+
147
+ def s2_get_info(arxiv_id: str) -> str:
148
+ """
149
+ Get the S2 info for a given arXiv ID.
150
+
151
+ Returns a JSON object serialized to a string. The structure is:
152
+ {"title": ..., "authors": ..., "externalIds": ..., "venue": ..., "citationCount": ..., "publicationDate": ...}
153
+ Use `json.loads` to deserialize the result if you want to get specific fields.
154
+
155
+ Args:
156
+ arxiv_id: The ID of a given arXiv paper.
157
+ """
158
+ assert isinstance(arxiv_id, str), "Error: Your arxiv_id must be a string"
159
+ if "v" in arxiv_id:
160
+ arxiv_id = arxiv_id.split("v")[0]
161
+ paper_url = PAPER_URL_TEMPLATE.format(paper_id=f"arxiv:{arxiv_id}", fields=FIELDS)
162
+ response = get_with_retries(paper_url)
163
+ return json.dumps(response.json(), ensure_ascii=False)
@@ -0,0 +1,41 @@
1
+ import base64
2
+ from pathlib import Path
3
+ from io import BytesIO
4
+ from typing import Dict
5
+
6
+ import httpx
7
+ from PIL import Image
8
+
9
+ from academia_mcp.files import get_workspace_dir
10
+ from academia_mcp.settings import settings
11
+
12
+
13
+ def show_image(path: str) -> Dict[str, str]:
14
+ """
15
+ Reads an image from the specified URL or from the current work directory.
16
+ Always call this function at the end of the code block.
17
+ For instance:
18
+ ```python
19
+ show_image("https://example.com/image.png")
20
+ ```
21
+ Do not print it ever, just return as the last expression.
22
+
23
+ Returns an dictionary with a single "image" key.
24
+ Args:
25
+ url: Path to file inside current work directory or web URL
26
+ """
27
+ if path.startswith("http"):
28
+ response = httpx.get(path, timeout=10)
29
+ response.raise_for_status()
30
+ image = Image.open(BytesIO(response.content))
31
+ else:
32
+ assert settings.WORKSPACE_DIR is not None, "WORKSPACE_DIR is not set"
33
+ full_path = Path(path)
34
+ if not full_path.exists():
35
+ full_path = Path(get_workspace_dir()) / path
36
+ assert full_path.exists(), f"Image file {path} does not exist"
37
+ image = Image.open(str(full_path))
38
+ buffer_io = BytesIO()
39
+ image.save(buffer_io, format="PNG")
40
+ img_bytes = buffer_io.getvalue()
41
+ return {"image_base64": base64.b64encode(img_bytes).decode("utf-8")}
@@ -0,0 +1,170 @@
1
+ Metadata-Version: 2.4
2
+ Name: academia-mcp
3
+ Version: 1.9.2
4
+ Summary: MCP server that provides different tools to search for scientific publications
5
+ Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
+ Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Operating System :: OS Independent
10
+ Requires-Python: >=3.12
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: mcp>=1.10.1
14
+ Requires-Dist: xmltodict>=0.14.0
15
+ Requires-Dist: types-xmltodict>=0.14.0
16
+ Requires-Dist: requests>=2.32.0
17
+ Requires-Dist: types-requests>=2.32.0
18
+ Requires-Dist: pypdf>=5.1.0
19
+ Requires-Dist: beautifulsoup4>=4.12.0
20
+ Requires-Dist: types-beautifulsoup4>=4.12.0
21
+ Requires-Dist: markdownify==0.14.1
22
+ Requires-Dist: acl-anthology==0.5.2
23
+ Requires-Dist: markdown==3.7.0
24
+ Requires-Dist: types-markdown==3.7.0.20250322
25
+ Requires-Dist: huggingface-hub>=0.32.4
26
+ Requires-Dist: fire>=0.7.0
27
+ Requires-Dist: openai>=1.97.1
28
+ Requires-Dist: jinja2>=3.1.6
29
+ Requires-Dist: datasets>=4.0.0
30
+ Requires-Dist: pymupdf>=1.26.4
31
+ Requires-Dist: pillow>=11.3.0
32
+ Requires-Dist: pydantic-settings>=2.6.0
33
+ Dynamic: license-file
34
+
35
+ # Academia MCP
36
+
37
+ [![PyPI](https://img.shields.io/pypi/v/academia-mcp?label=PyPI%20package)](https://pypi.org/project/academia-mcp/)
38
+ [![CI](https://github.com/IlyaGusev/academia_mcp/actions/workflows/python.yml/badge.svg)](https://github.com/IlyaGusev/academia_mcp/actions/workflows/python.yml)
39
+ [![License](https://img.shields.io/github/license/IlyaGusev/academia_mcp)](LICENSE)
40
+ [![smithery badge](https://smithery.ai/badge/@IlyaGusev/academia_mcp)](https://smithery.ai/server/@IlyaGusev/academia_mcp)
41
+ [![Verified on MseeP](https://mseep.ai/badge.svg)](https://mseep.ai/app/e818878b-c3a6-4b3d-a5b4-e54dcd1f1fed)
42
+
43
+ MCP server with tools to search, fetch, analyze, and report on scientific papers and datasets.
44
+
45
+ ### Features
46
+ - ArXiv search and download
47
+ - ACL Anthology search
48
+ - Hugging Face datasets search
49
+ - Semantic Scholar citations and references
50
+ - Web search via Exa, Brave, or Tavily
51
+ - Web page crawler, LaTeX compilation, PDF reading
52
+ - Optional LLM-powered tools for document QA and research proposal workflows
53
+
54
+ ### Requirements
55
+ - Python 3.12+
56
+
57
+ ### Install
58
+ - Using pip (end users):
59
+ ```bash
60
+ pip3 install academia-mcp
61
+ ```
62
+
63
+ - For development (uv + Makefile):
64
+ ```bash
65
+ uv venv .venv
66
+ make install
67
+ ```
68
+
69
+ ### Quickstart
70
+ - Run over HTTP (default transport):
71
+ ```bash
72
+ uv run -m academia_mcp --transport streamable-http
73
+ ```
74
+
75
+ - Run over stdio (for local MCP clients like Claude Desktop):
76
+ ```bash
77
+ python -m academia_mcp --transport stdio
78
+ ```
79
+
80
+ Notes:
81
+ - Transports: `stdio`, `sse`, `streamable-http`.
82
+ - `host`/`port` are used for HTTP transports; ignored for `stdio`. Default port is `5056` (or `PORT`).
83
+
84
+ ### Claude Desktop config
85
+ ```json
86
+ {
87
+ "mcpServers": {
88
+ "academia": {
89
+ "command": "python3",
90
+ "args": [
91
+ "-m",
92
+ "academia_mcp",
93
+ "--transport",
94
+ "stdio"
95
+ ]
96
+ }
97
+ }
98
+ }
99
+ ```
100
+
101
+ ### Available tools (one-liners)
102
+ - `arxiv_search`: Query arXiv with field-specific queries and filters.
103
+ - `arxiv_download`: Fetch a paper by ID and convert to structured text (HTML/PDF modes).
104
+ - `anthology_search`: Search ACL Anthology with fielded queries and optional date filtering.
105
+ - `hf_datasets_search`: Find Hugging Face datasets with filters and sorting.
106
+ - `s2_get_citations`: List papers citing a given arXiv paper (Semantic Scholar Graph).
107
+ - `s2_get_references`: List papers referenced by a given arXiv paper.
108
+ - `visit_webpage`: Fetch and normalize a web page.
109
+ - `web_search`: Unified search wrapper; available when at least one of Exa/Brave/Tavily keys is set.
110
+ - `exa_web_search`, `brave_web_search`, `tavily_web_search`: Provider-specific search.
111
+ - `get_latex_templates_list`, `get_latex_template`: Enumerate and fetch built-in LaTeX templates.
112
+ - `compile_latex`: Compile LaTeX to PDF in `WORKSPACE_DIR`.
113
+ - `read_pdf`: Extract text per page from a PDF.
114
+ - `download_pdf_paper`, `review_pdf_paper`: Download and optionally review PDFs (requires LLM + workspace).
115
+ - `document_qa`: Answer questions over provided document chunks (requires LLM).
116
+ - `extract_bitflip_info`, `generate_research_proposals`, `score_research_proposals`: Research proposal helpers (requires LLM).
117
+
118
+ Availability notes:
119
+ - Set `WORKSPACE_DIR` to enable `compile_latex`, `read_pdf`, `download_pdf_paper`, and `review_pdf_paper`.
120
+ - Set `OPENROUTER_API_KEY` to enable LLM tools (`document_qa`, `review_pdf_paper`, and bitflip tools).
121
+ - Set one or more of `EXA_API_KEY`, `BRAVE_API_KEY`, `TAVILY_API_KEY` to enable `web_search` and provider tools.
122
+
123
+ ### Environment variables
124
+ Set as needed depending on which tools you use:
125
+
126
+ - `OPENROUTER_API_KEY`: required for LLM-related tools.
127
+ - `BASE_URL`: override OpenRouter base URL.
128
+ - `DOCUMENT_QA_MODEL_NAME`: override default model for `document_qa`.
129
+ - `BITFLIP_MODEL_NAME`: override default model for bitflip tools.
130
+ - `TAVILY_API_KEY`: enables Tavily in `web_search`.
131
+ - `EXA_API_KEY`: enables Exa in `web_search` and `visit_webpage`.
132
+ - `BRAVE_API_KEY`: enables Brave in `web_search`.
133
+ - `WORKSPACE_DIR`: directory for generated files (PDFs, temp artifacts).
134
+ - `PORT`: HTTP port (default `5056`).
135
+
136
+ You can put these in a `.env` file in the project root.
137
+
138
+ ### Docker
139
+ Build the image:
140
+ ```bash
141
+ docker build -t academia_mcp .
142
+ ```
143
+
144
+ Run the server (HTTP):
145
+ ```bash
146
+ docker run --rm -p 5056:5056 \
147
+ -e PORT=5056 \
148
+ -e OPENROUTER_API_KEY=your_key_here \
149
+ -e WORKSPACE_DIR=/workspace \
150
+ -v "$PWD/workdir:/workspace" \
151
+ academia_mcp
152
+ ```
153
+
154
+ Or use existing image: `phoenix120/academia_mcp`
155
+
156
+ ### Examples
157
+ - [Comprehensive report screencast (YouTube)](https://www.youtube.com/watch?v=4bweqQcN6w8)
158
+ - [Single paper screencast (YouTube)](https://www.youtube.com/watch?v=IAAPMptJ5k8)
159
+
160
+ ### Makefile targets
161
+ - `make install`: install the package in editable mode with uv
162
+ - `make validate`: run black, flake8, and mypy (strict)
163
+ - `make test`: run the test suite with pytest
164
+ - `make publish`: build and publish using uv
165
+
166
+ ### LaTeX/PDF requirements
167
+ Only needed for LaTeX/PDF tools. Ensure a LaTeX distribution is installed and `pdflatex` is on PATH, as well as `latexmk`. On Debian/Ubuntu:
168
+ ```bash
169
+ sudo apt install texlive-latex-base texlive-fonts-recommended texlive-latex-extra texlive-science latexmk
170
+ ```
@@ -4,12 +4,12 @@ academia_mcp/files.py,sha256=ynIt0XbU1Z7EPWkv_hVX0pGKsLlmjYv-MVJLOfi6yzs,817
4
4
  academia_mcp/llm.py,sha256=zpGkuJFf58Ofgys_fi28-47_wJ1a7sIs_yZvI1Si6z0,993
5
5
  academia_mcp/pdf.py,sha256=9PlXzHGhb6ay3ldbTdxCcTWvH4TkET3bnb64mgoh9i0,1273
6
6
  academia_mcp/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- academia_mcp/server.py,sha256=ufNqg_C75ziQFXSZY7rYc41XA6LUBEAmCrX0RyMopjk,4198
7
+ academia_mcp/server.py,sha256=k6o-hxkeowHAgKGXUTWhZ6EI4u1RcSIfA-EFyoRmWZ8,4416
8
8
  academia_mcp/settings.py,sha256=vkhCrglL_OI1W0Me_1vS0WoQwDhpzj_XTfcuOylweYA,907
9
9
  academia_mcp/utils.py,sha256=P9U3RjYzcztE0KxXvJSy5wSBaUg2CM9tpByljYrsrl4,4607
10
10
  academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty,sha256=hGcEPCYBJS4vdhWvN_yEaJC4GvT_yDroI94CfY2Oguk,12268
11
11
  academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex,sha256=Tl1QkHXHRopw9VEfWrD3Layr5JP_0gIzVQjL4KXIWqc,15814
12
- academia_mcp/tools/__init__.py,sha256=Bf76VHYQtRKXsHukdwmxhDVcaVdtlsnMlHe4nxbcUMI,1253
12
+ academia_mcp/tools/__init__.py,sha256=ns0n9MC-TarV-nbkFGCkGptI2Jg-dmb4FU_7l-T_ET8,1390
13
13
  academia_mcp/tools/anthology_search.py,sha256=rhFpJZqGLABgr0raDuH0CARBiAJNJtEI4dlMrKNHfDQ,7669
14
14
  academia_mcp/tools/arxiv_download.py,sha256=gBY0_Kz0yGtVkLMwn6GrAyfBjovZVgcSMuyy67p65Cw,10474
15
15
  academia_mcp/tools/arxiv_search.py,sha256=pzM18qrF3QL03A53w003kE7hQi3s3QKtjgw0m7K88UY,8355
@@ -19,12 +19,13 @@ academia_mcp/tools/hf_datasets_search.py,sha256=KiBkqT4rXjEN4oc1AWZOPnqN_Go90TQo
19
19
  academia_mcp/tools/latex.py,sha256=B1Leqt1FHY6H3DlUgeYse4LMFpf4-K1FQViXl5MKk8A,6144
20
20
  academia_mcp/tools/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  academia_mcp/tools/review.py,sha256=Va0lFJJKuk-NvWhKS3UZ-Dnuk7CyuDQ4S1nd70D-ffE,11117
22
- academia_mcp/tools/s2_citations.py,sha256=XZ3a4rsovAiI_D_kIy0GddRHSjpC5Fa_CS8dmB9Qftg,4902
22
+ academia_mcp/tools/s2.py,sha256=QX7-pbetab3Xt_1tvVPU6o5D_NAe9y6jcTGRBK1vwtY,6200
23
+ academia_mcp/tools/show_image.py,sha256=mbzWpgaZrGjSgUqnfiiPoWLbyAjUS27F04mjOUqb_S8,1371
23
24
  academia_mcp/tools/visit_webpage.py,sha256=uEqZIkMqscZG58Rx2wd6L_OQkGxo5SrzCkoUB55HhC0,2018
24
25
  academia_mcp/tools/web_search.py,sha256=kj3BrPdTVfyTjZ_9Jl2n3YUGzcRZk8diQs6cVSVmPrQ,6293
25
- academia_mcp-1.9.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
26
- academia_mcp-1.9.0.dist-info/METADATA,sha256=GQCmidDARqWtdZuOFww8Sdg5CRCGGOIooeHlS6TIlJ8,3754
27
- academia_mcp-1.9.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
28
- academia_mcp-1.9.0.dist-info/entry_points.txt,sha256=gxkiKJ74w2FwJpSECpjA3XtCfI5ZfrM6N8cqnwsq4yY,51
29
- academia_mcp-1.9.0.dist-info/top_level.txt,sha256=CzGpRFsRRJRqWEb1e3SUlcfGqRzOxevZGaJWrtGF8W0,13
30
- academia_mcp-1.9.0.dist-info/RECORD,,
26
+ academia_mcp-1.9.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
27
+ academia_mcp-1.9.2.dist-info/METADATA,sha256=c4NsjDhKqpgL-RKN58OXWdEzAj7Ip1RDAvOJovT0wmI,6310
28
+ academia_mcp-1.9.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
+ academia_mcp-1.9.2.dist-info/entry_points.txt,sha256=gxkiKJ74w2FwJpSECpjA3XtCfI5ZfrM6N8cqnwsq4yY,51
30
+ academia_mcp-1.9.2.dist-info/top_level.txt,sha256=CzGpRFsRRJRqWEb1e3SUlcfGqRzOxevZGaJWrtGF8W0,13
31
+ academia_mcp-1.9.2.dist-info/RECORD,,
@@ -1,122 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: academia-mcp
3
- Version: 1.9.0
4
- Summary: MCP server that provides different tools to search for scientific publications
5
- Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
- Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
7
- Classifier: Programming Language :: Python :: 3
8
- Classifier: License :: OSI Approved :: MIT License
9
- Classifier: Operating System :: OS Independent
10
- Requires-Python: >=3.12
11
- Description-Content-Type: text/markdown
12
- License-File: LICENSE
13
- Requires-Dist: mcp>=1.10.1
14
- Requires-Dist: xmltodict>=0.14.0
15
- Requires-Dist: types-xmltodict>=0.14.0
16
- Requires-Dist: requests>=2.32.0
17
- Requires-Dist: types-requests>=2.32.0
18
- Requires-Dist: pypdf>=5.1.0
19
- Requires-Dist: beautifulsoup4>=4.12.0
20
- Requires-Dist: types-beautifulsoup4>=4.12.0
21
- Requires-Dist: markdownify==0.14.1
22
- Requires-Dist: acl-anthology==0.5.2
23
- Requires-Dist: markdown==3.7.0
24
- Requires-Dist: types-markdown==3.7.0.20250322
25
- Requires-Dist: huggingface-hub>=0.32.4
26
- Requires-Dist: fire>=0.7.0
27
- Requires-Dist: openai>=1.97.1
28
- Requires-Dist: jinja2>=3.1.6
29
- Requires-Dist: datasets>=4.0.0
30
- Requires-Dist: pymupdf>=1.26.4
31
- Requires-Dist: pillow>=11.3.0
32
- Requires-Dist: pydantic-settings>=2.6.0
33
- Dynamic: license-file
34
-
35
- # Academia MCP
36
-
37
- [![PyPI](https://img.shields.io/pypi/v/codearkt?label=PyPI%20package)](https://pypi.org/project/academia-mcp/)
38
- [![CI](https://github.com/IlyaGusev/academia_mcp/actions/workflows/python.yml/badge.svg)](https://github.com/IlyaGusev/academia_mcp/actions/workflows/python.yml)
39
- [![License](https://img.shields.io/github/license/IlyaGusev/academia_mcp)](LICENSE)
40
- [![smithery badge](https://smithery.ai/badge/@IlyaGusev/academia_mcp)](https://smithery.ai/server/@IlyaGusev/academia_mcp)
41
-
42
- A collection of MCP tools related to the search of scientific papers:
43
- - ArXiv search and download
44
- - ACL Anthology search
45
- - HuggingFact datasets search
46
- - Semantic Scholar citation graphs
47
- - Web search: Exa/Brave/Tavily
48
- - Page crawler
49
-
50
- ## Install
51
-
52
- - Using pip (end users):
53
- ```
54
- pip3 install academia-mcp
55
- ```
56
-
57
- - For development (uv + Makefile):
58
- ```
59
- uv venv .venv
60
- make install
61
- ```
62
-
63
- ## Examples
64
- Comprehensive report screencast: https://www.youtube.com/watch?v=4bweqQcN6w8
65
-
66
- Single paper screencast: https://www.youtube.com/watch?v=IAAPMptJ5k8
67
-
68
-
69
- ## Claude Desktop config
70
- ```
71
- {
72
- "mcpServers": {
73
- "academia": {
74
- "command": "python3",
75
- "args": [
76
- "-m",
77
- "academia_mcp",
78
- "--transport",
79
- "stdio"
80
- ]
81
- }
82
- }
83
- }
84
- ```
85
-
86
- ## Running the server (CLI)
87
-
88
- ```
89
- uv run -m academia_mcp --transport streamable-http
90
- ```
91
-
92
- Notes:
93
- - Transports supported: `stdio`, `sse`, `streamable-http`.
94
- - Host/port are used for HTTP transports; for `stdio` they are ignored.
95
-
96
- ## Makefile targets
97
-
98
- - `make install`: install the package in editable mode with uv.
99
- - `make validate`: run black, flake8, and mypy (strict).
100
- - `make test`: run the test suite with pytest.
101
- - `make publish`: build and publish using uv.
102
-
103
- ## Environment variables
104
-
105
- Set as needed depending on which tools you use:
106
-
107
- - `TAVILY_API_KEY`: enables Tavily in `web_search`.
108
- - `EXA_API_KEY`: enables Exa in `web_search` and `visit_webpage`.
109
- - `BRAVE_API_KEY`: enables Brave in `web_search`.
110
- - `OPENROUTER_API_KEY`: required for `document_qa`.
111
- - `BASE_URL`: override OpenRouter base URL for `document_qa` and bitflip tools.
112
- - `DOCUMENT_QA_MODEL_NAME`: override default model for `document_qa`.
113
- - `BITFLIP_MODEL_NAME`: override default model for bitflip tools.
114
- - `WORKSPACE_DIR`: directory for generated files (PDFs, temp artifacts).
115
-
116
- ## md_to_pdf requirements
117
-
118
- The `md_to_pdf` tool invokes `pdflatex`. Ensure a LaTeX distribution is installed and `pdflatex` is on PATH. On Debian/Ubuntu:
119
-
120
- ```
121
- sudo apt install texlive-latex-base texlive-fonts-recommended texlive-latex-extra texlive-science
122
- ```