academia-mcp 1.8.0__tar.gz → 1.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/PKG-INFO +2 -1
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/files.py +3 -2
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/llm.py +4 -3
- academia_mcp-1.9.0/academia_mcp/server.py +126 -0
- academia_mcp-1.9.0/academia_mcp/settings.py +33 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/tools/__init__.py +4 -5
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/tools/bitflip.py +5 -5
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/tools/document_qa.py +4 -6
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/tools/latex.py +9 -18
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/tools/review.py +32 -9
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/tools/visit_webpage.py +4 -4
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/tools/web_search.py +7 -7
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp.egg-info/PKG-INFO +2 -1
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp.egg-info/SOURCES.txt +1 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp.egg-info/requires.txt +1 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/pyproject.toml +3 -1
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/tests/test_latex.py +3 -16
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/tests/test_review.py +7 -1
- academia_mcp-1.8.0/academia_mcp/server.py +0 -103
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/LICENSE +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/README.md +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/__init__.py +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/__main__.py +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/pdf.py +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/py.typed +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/tools/anthology_search.py +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/tools/arxiv_download.py +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/tools/arxiv_search.py +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/tools/hf_datasets_search.py +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/tools/py.typed +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/tools/s2_citations.py +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp/utils.py +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp.egg-info/dependency_links.txt +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp.egg-info/entry_points.txt +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/academia_mcp.egg-info/top_level.txt +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/setup.cfg +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/tests/test_anthology_search.py +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/tests/test_arxiv_download.py +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/tests/test_arxiv_search.py +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/tests/test_bitflip.py +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/tests/test_document_qa.py +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/tests/test_extract_json.py +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/tests/test_hf_dataset_search.py +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/tests/test_s2_citations.py +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/tests/test_visit_webpage.py +0 -0
- {academia_mcp-1.8.0 → academia_mcp-1.9.0}/tests/test_web_search.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: academia-mcp
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.9.0
|
4
4
|
Summary: MCP server that provides different tools to search for scientific publications
|
5
5
|
Author-email: Ilya Gusev <phoenixilya@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
|
@@ -29,6 +29,7 @@ Requires-Dist: jinja2>=3.1.6
|
|
29
29
|
Requires-Dist: datasets>=4.0.0
|
30
30
|
Requires-Dist: pymupdf>=1.26.4
|
31
31
|
Requires-Dist: pillow>=11.3.0
|
32
|
+
Requires-Dist: pydantic-settings>=2.6.0
|
32
33
|
Dynamic: license-file
|
33
34
|
|
34
35
|
# Academia MCP
|
@@ -1,7 +1,8 @@
|
|
1
|
-
import os
|
2
1
|
from typing import Optional
|
3
2
|
from pathlib import Path
|
4
3
|
|
4
|
+
from academia_mcp.settings import settings
|
5
|
+
|
5
6
|
DIR_PATH = Path(__file__).parent
|
6
7
|
ROOT_PATH = DIR_PATH.parent
|
7
8
|
DEFAULT_WORKSPACE_DIR_PATH: Path = DIR_PATH / "workdir"
|
@@ -14,7 +15,7 @@ class WorkspaceDirectory:
|
|
14
15
|
@classmethod
|
15
16
|
def get_dir(cls) -> Path:
|
16
17
|
if cls.workspace_dir is None:
|
17
|
-
return Path(
|
18
|
+
return Path(settings.WORKSPACE_DIR)
|
18
19
|
return cls.workspace_dir
|
19
20
|
|
20
21
|
@classmethod
|
@@ -1,10 +1,11 @@
|
|
1
|
-
import os
|
2
1
|
from typing import List, Dict, Any
|
3
2
|
|
4
3
|
from pydantic import BaseModel
|
5
4
|
from openai import AsyncOpenAI
|
6
5
|
from openai.types.chat.chat_completion_message import ChatCompletionMessage
|
7
6
|
|
7
|
+
from academia_mcp.settings import settings
|
8
|
+
|
8
9
|
|
9
10
|
class ChatMessage(BaseModel): # type: ignore
|
10
11
|
role: str
|
@@ -15,9 +16,9 @@ ChatMessages = List[ChatMessage]
|
|
15
16
|
|
16
17
|
|
17
18
|
async def llm_acall(model_name: str, messages: ChatMessages, **kwargs: Any) -> str:
|
18
|
-
key =
|
19
|
+
key = settings.OPENROUTER_API_KEY
|
19
20
|
assert key, "Please set OPENROUTER_API_KEY in the environment variables"
|
20
|
-
base_url =
|
21
|
+
base_url = settings.BASE_URL
|
21
22
|
|
22
23
|
client = AsyncOpenAI(base_url=base_url, api_key=key)
|
23
24
|
response: ChatCompletionMessage = (
|
@@ -0,0 +1,126 @@
|
|
1
|
+
import socket
|
2
|
+
import logging
|
3
|
+
from logging.config import dictConfig
|
4
|
+
from typing import Optional, Literal
|
5
|
+
|
6
|
+
import fire # type: ignore
|
7
|
+
from mcp.server.fastmcp import FastMCP
|
8
|
+
from uvicorn.config import LOGGING_CONFIG as UVICORN_LOGGING_CONFIG
|
9
|
+
|
10
|
+
from academia_mcp.settings import settings
|
11
|
+
from academia_mcp.tools.arxiv_search import arxiv_search
|
12
|
+
from academia_mcp.tools.arxiv_download import arxiv_download
|
13
|
+
from academia_mcp.tools.s2_citations import s2_get_citations, s2_get_references
|
14
|
+
from academia_mcp.tools.hf_datasets_search import hf_datasets_search
|
15
|
+
from academia_mcp.tools.anthology_search import anthology_search
|
16
|
+
from academia_mcp.tools.document_qa import document_qa
|
17
|
+
from academia_mcp.tools.latex import (
|
18
|
+
compile_latex,
|
19
|
+
get_latex_template,
|
20
|
+
get_latex_templates_list,
|
21
|
+
read_pdf,
|
22
|
+
)
|
23
|
+
from academia_mcp.tools.web_search import (
|
24
|
+
web_search,
|
25
|
+
tavily_web_search,
|
26
|
+
exa_web_search,
|
27
|
+
brave_web_search,
|
28
|
+
)
|
29
|
+
from academia_mcp.tools.visit_webpage import visit_webpage
|
30
|
+
from academia_mcp.tools.bitflip import (
|
31
|
+
extract_bitflip_info,
|
32
|
+
generate_research_proposals,
|
33
|
+
score_research_proposals,
|
34
|
+
)
|
35
|
+
from academia_mcp.tools.review import review_pdf_paper, download_pdf_paper
|
36
|
+
|
37
|
+
|
38
|
+
def configure_uvicorn_style_logging(level: int = logging.INFO) -> None:
|
39
|
+
config = {**UVICORN_LOGGING_CONFIG}
|
40
|
+
config["disable_existing_loggers"] = False
|
41
|
+
config["root"] = {"handlers": ["default"], "level": logging.getLevelName(level)}
|
42
|
+
dictConfig(config)
|
43
|
+
|
44
|
+
|
45
|
+
def find_free_port() -> int:
|
46
|
+
for port in range(5000, 6001):
|
47
|
+
try:
|
48
|
+
with socket.socket() as s:
|
49
|
+
s.bind(("", port))
|
50
|
+
return port
|
51
|
+
except Exception:
|
52
|
+
continue
|
53
|
+
raise RuntimeError("No free port in range 5000-6000 found")
|
54
|
+
|
55
|
+
|
56
|
+
def run(
|
57
|
+
host: str = "0.0.0.0",
|
58
|
+
port: Optional[int] = None,
|
59
|
+
mount_path: str = "/",
|
60
|
+
streamable_http_path: str = "/mcp",
|
61
|
+
transport: Literal["stdio", "sse", "streamable-http"] = "streamable-http",
|
62
|
+
disable_web_search_tools: bool = False,
|
63
|
+
disable_llm_tools: bool = False,
|
64
|
+
) -> None:
|
65
|
+
configure_uvicorn_style_logging()
|
66
|
+
server = FastMCP(
|
67
|
+
"Academia MCP",
|
68
|
+
stateless_http=True,
|
69
|
+
streamable_http_path=streamable_http_path,
|
70
|
+
mount_path=mount_path,
|
71
|
+
)
|
72
|
+
logger = logging.getLogger(__name__)
|
73
|
+
|
74
|
+
server.add_tool(arxiv_search)
|
75
|
+
server.add_tool(arxiv_download)
|
76
|
+
server.add_tool(s2_get_citations)
|
77
|
+
server.add_tool(s2_get_references)
|
78
|
+
server.add_tool(hf_datasets_search)
|
79
|
+
server.add_tool(anthology_search)
|
80
|
+
server.add_tool(get_latex_template)
|
81
|
+
server.add_tool(get_latex_templates_list)
|
82
|
+
server.add_tool(visit_webpage)
|
83
|
+
|
84
|
+
if settings.WORKSPACE_DIR:
|
85
|
+
server.add_tool(compile_latex)
|
86
|
+
server.add_tool(download_pdf_paper)
|
87
|
+
server.add_tool(read_pdf)
|
88
|
+
else:
|
89
|
+
logger.warning(
|
90
|
+
"WORKSPACE_DIR is not set, compile_latex/download_pdf_paper/read_pdf will not be available!"
|
91
|
+
)
|
92
|
+
|
93
|
+
if not disable_web_search_tools:
|
94
|
+
if settings.TAVILY_API_KEY:
|
95
|
+
server.add_tool(tavily_web_search)
|
96
|
+
if settings.EXA_API_KEY:
|
97
|
+
server.add_tool(exa_web_search)
|
98
|
+
if settings.BRAVE_API_KEY:
|
99
|
+
server.add_tool(brave_web_search)
|
100
|
+
if settings.EXA_API_KEY or settings.BRAVE_API_KEY or settings.TAVILY_API_KEY:
|
101
|
+
server.add_tool(web_search)
|
102
|
+
else:
|
103
|
+
logger.warning("No web search tools keys are set, web_search will not be available!")
|
104
|
+
|
105
|
+
if not disable_llm_tools and settings.OPENROUTER_API_KEY:
|
106
|
+
server.add_tool(extract_bitflip_info)
|
107
|
+
server.add_tool(generate_research_proposals)
|
108
|
+
server.add_tool(score_research_proposals)
|
109
|
+
server.add_tool(document_qa)
|
110
|
+
if settings.WORKSPACE_DIR:
|
111
|
+
server.add_tool(review_pdf_paper)
|
112
|
+
else:
|
113
|
+
logger.warning("No OpenRouter API key is set, LLM-related tools will not be available!")
|
114
|
+
|
115
|
+
if port is None:
|
116
|
+
if settings.PORT is not None:
|
117
|
+
port = int(settings.PORT)
|
118
|
+
else:
|
119
|
+
port = find_free_port()
|
120
|
+
server.settings.port = port
|
121
|
+
server.settings.host = host
|
122
|
+
server.run(transport=transport)
|
123
|
+
|
124
|
+
|
125
|
+
if __name__ == "__main__":
|
126
|
+
fire.Fire(run)
|
@@ -0,0 +1,33 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
5
|
+
|
6
|
+
|
7
|
+
class Settings(BaseSettings):
|
8
|
+
BASE_URL: str = "https://openrouter.ai/api/v1"
|
9
|
+
|
10
|
+
OPENROUTER_API_KEY: str = ""
|
11
|
+
TAVILY_API_KEY: Optional[str] = None
|
12
|
+
EXA_API_KEY: Optional[str] = None
|
13
|
+
BRAVE_API_KEY: Optional[str] = None
|
14
|
+
|
15
|
+
REVIEW_MODEL_NAME: str = "gpt-5"
|
16
|
+
BITFLIP_MODEL_NAME: str = "deepseek/deepseek-chat-v3-0324"
|
17
|
+
BITFLIP_MAX_COMPLETION_TOKENS: int = 16384
|
18
|
+
DOCUMENT_QA_MODEL_NAME: str = "deepseek/deepseek-chat-v3-0324"
|
19
|
+
DOCUMENT_QA_QUESTION_MAX_LENGTH: int = 10000
|
20
|
+
DOCUMENT_QA_DOCUMENT_MAX_LENGTH: int = 200000
|
21
|
+
|
22
|
+
PORT: int = 5056
|
23
|
+
WORKSPACE_DIR: Optional[Path] = None
|
24
|
+
|
25
|
+
model_config = SettingsConfigDict(
|
26
|
+
env_file=".env",
|
27
|
+
env_file_encoding="utf-8",
|
28
|
+
env_prefix="",
|
29
|
+
extra="ignore",
|
30
|
+
)
|
31
|
+
|
32
|
+
|
33
|
+
settings = Settings()
|
@@ -5,8 +5,7 @@ from .hf_datasets_search import hf_datasets_search
|
|
5
5
|
from .s2_citations import s2_get_references, s2_get_citations
|
6
6
|
from .document_qa import document_qa
|
7
7
|
from .latex import (
|
8
|
-
|
9
|
-
compile_latex_from_str,
|
8
|
+
compile_latex,
|
10
9
|
get_latex_template,
|
11
10
|
get_latex_templates_list,
|
12
11
|
read_pdf,
|
@@ -14,7 +13,7 @@ from .latex import (
|
|
14
13
|
from .web_search import web_search, tavily_web_search, exa_web_search, brave_web_search
|
15
14
|
from .visit_webpage import visit_webpage
|
16
15
|
from .bitflip import extract_bitflip_info, generate_research_proposals, score_research_proposals
|
17
|
-
from .review import review_pdf_paper, download_pdf_paper
|
16
|
+
from .review import review_pdf_paper, download_pdf_paper, review_pdf_paper_by_url
|
18
17
|
|
19
18
|
__all__ = [
|
20
19
|
"arxiv_search",
|
@@ -24,8 +23,7 @@ __all__ = [
|
|
24
23
|
"s2_get_citations",
|
25
24
|
"hf_datasets_search",
|
26
25
|
"document_qa",
|
27
|
-
"
|
28
|
-
"compile_latex_from_str",
|
26
|
+
"compile_latex",
|
29
27
|
"get_latex_template",
|
30
28
|
"get_latex_templates_list",
|
31
29
|
"web_search",
|
@@ -37,6 +35,7 @@ __all__ = [
|
|
37
35
|
"generate_research_proposals",
|
38
36
|
"score_research_proposals",
|
39
37
|
"review_pdf_paper",
|
38
|
+
"review_pdf_paper_by_url",
|
40
39
|
"download_pdf_paper",
|
41
40
|
"read_pdf",
|
42
41
|
]
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# https://web.stanford.edu/class/cs197c/slides/02-literature-search.pdf
|
3
3
|
|
4
4
|
import json
|
5
|
-
import os
|
6
5
|
import random
|
7
6
|
from typing import List, Optional, Any, Dict
|
8
7
|
|
@@ -12,6 +11,7 @@ from datasets import load_dataset # type: ignore
|
|
12
11
|
from academia_mcp.tools.arxiv_download import arxiv_download
|
13
12
|
from academia_mcp.utils import extract_json, encode_prompt
|
14
13
|
from academia_mcp.llm import llm_acall, ChatMessage
|
14
|
+
from academia_mcp.settings import settings
|
15
15
|
|
16
16
|
|
17
17
|
class ProposalDataset:
|
@@ -201,7 +201,7 @@ async def extract_bitflip_info(arxiv_id: str) -> str:
|
|
201
201
|
Args:
|
202
202
|
arxiv_id: The arXiv ID of the paper to extract the Bit-Flip information from.
|
203
203
|
"""
|
204
|
-
model_name =
|
204
|
+
model_name = settings.BITFLIP_MODEL_NAME
|
205
205
|
paper = arxiv_download(arxiv_id)
|
206
206
|
abstract = json.loads(paper)["abstract"]
|
207
207
|
prompt = encode_prompt(EXTRACT_PROMPT, abstract=abstract)
|
@@ -240,8 +240,8 @@ async def generate_research_proposals(
|
|
240
240
|
]
|
241
241
|
Use `json.loads` to deserialize the result if you want to get specific items.
|
242
242
|
"""
|
243
|
-
model_name =
|
244
|
-
max_completion_tokens = int(
|
243
|
+
model_name = settings.BITFLIP_MODEL_NAME
|
244
|
+
max_completion_tokens = int(settings.BITFLIP_MAX_COMPLETION_TOKENS)
|
245
245
|
examples = ProposalDataset.get_dataset()[:]
|
246
246
|
examples = random.choices(examples, k=2)
|
247
247
|
|
@@ -293,7 +293,7 @@ async def score_research_proposals(proposals: str | List[str | Dict[str, Any] |
|
|
293
293
|
Args:
|
294
294
|
proposals: A list of JSON strings with research proposals.
|
295
295
|
"""
|
296
|
-
model_name =
|
296
|
+
model_name = settings.BITFLIP_MODEL_NAME
|
297
297
|
if isinstance(proposals, str):
|
298
298
|
proposals = json.loads(proposals)
|
299
299
|
assert isinstance(proposals, list), "Proposals should be a list of JSON strings"
|
@@ -1,14 +1,12 @@
|
|
1
|
-
import os
|
2
1
|
import json
|
3
2
|
from typing import List, Any, Dict
|
4
|
-
from dotenv import load_dotenv
|
5
3
|
|
6
4
|
from pydantic import BaseModel
|
7
5
|
|
8
6
|
from academia_mcp.llm import llm_acall
|
9
7
|
from academia_mcp.utils import truncate_content
|
8
|
+
from academia_mcp.settings import settings
|
10
9
|
|
11
|
-
load_dotenv()
|
12
10
|
|
13
11
|
PROMPT = """You are a helpful assistant that answers questions about documents accurately and concisely.
|
14
12
|
Please answer the following questions based solely on the provided document.
|
@@ -65,10 +63,10 @@ async def document_qa(
|
|
65
63
|
document = json.dumps(document)
|
66
64
|
assert document and document.strip(), "Please provide non-empty 'document'"
|
67
65
|
|
68
|
-
question = truncate_content(question,
|
69
|
-
document = truncate_content(document,
|
66
|
+
question = truncate_content(question, settings.DOCUMENT_QA_QUESTION_MAX_LENGTH)
|
67
|
+
document = truncate_content(document, settings.DOCUMENT_QA_DOCUMENT_MAX_LENGTH)
|
70
68
|
|
71
|
-
model_name =
|
69
|
+
model_name = settings.DOCUMENT_QA_MODEL_NAME
|
72
70
|
prompt = PROMPT.format(question=question, document=document)
|
73
71
|
content = await llm_acall(
|
74
72
|
model_name=model_name, messages=[ChatMessage(role="user", content=prompt)]
|
@@ -46,7 +46,7 @@ def get_latex_template(template_name: str) -> str:
|
|
46
46
|
return json.dumps({"template": template_path.read_text(), "style": style_path.read_text()})
|
47
47
|
|
48
48
|
|
49
|
-
def
|
49
|
+
def compile_latex(
|
50
50
|
input_filename: str, output_filename: str = "output.pdf", timeout: int = 60
|
51
51
|
) -> str:
|
52
52
|
"""
|
@@ -63,24 +63,8 @@ def compile_latex_from_file(
|
|
63
63
|
if not input_filename_path.exists():
|
64
64
|
input_filename_path = Path(get_workspace_dir()) / input_filename
|
65
65
|
assert input_filename_path.exists(), f"Input file {input_filename} does not exist"
|
66
|
-
|
67
|
-
latex_code = file.read()
|
68
|
-
return compile_latex_from_str(latex_code, output_filename, timeout)
|
66
|
+
latex_code = input_filename_path.read_text(encoding="utf-8")
|
69
67
|
|
70
|
-
|
71
|
-
def compile_latex_from_str(
|
72
|
-
latex_code: str, output_filename: str = "output.pdf", timeout: int = 60
|
73
|
-
) -> str:
|
74
|
-
"""
|
75
|
-
Compile a latex code.
|
76
|
-
|
77
|
-
Returns a string with the result of the compilation.
|
78
|
-
|
79
|
-
Args:
|
80
|
-
latex_code: The latex code to compile.
|
81
|
-
output_filename: The path to the output pdf file.
|
82
|
-
timeout: The timeout for the compilation. 60 seconds by default.
|
83
|
-
"""
|
84
68
|
if shutil.which("pdflatex") is None:
|
85
69
|
return "pdflatex is not installed or not found in PATH."
|
86
70
|
|
@@ -116,6 +100,13 @@ def compile_latex_from_str(
|
|
116
100
|
except Exception:
|
117
101
|
pass
|
118
102
|
|
103
|
+
try:
|
104
|
+
bib_source_path = input_filename_path.parent / "references.bib"
|
105
|
+
if bib_source_path.exists():
|
106
|
+
shutil.copyfile(bib_source_path, temp_dir_path / "references.bib")
|
107
|
+
except Exception:
|
108
|
+
pass
|
109
|
+
|
119
110
|
try:
|
120
111
|
subprocess.run(
|
121
112
|
[
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import base64
|
2
|
-
import os
|
3
2
|
import uuid
|
3
|
+
import tempfile
|
4
4
|
from io import BytesIO
|
5
5
|
from pathlib import Path
|
6
6
|
from typing import List, Dict, Any
|
@@ -8,6 +8,7 @@ from typing import List, Dict, Any
|
|
8
8
|
from academia_mcp.pdf import parse_pdf_file_to_images, parse_pdf_file, download_pdf
|
9
9
|
from academia_mcp.llm import llm_acall, ChatMessage
|
10
10
|
from academia_mcp.files import get_workspace_dir
|
11
|
+
from academia_mcp.settings import settings
|
11
12
|
|
12
13
|
|
13
14
|
PROMPT = """
|
@@ -138,6 +139,16 @@ Always produce a correct JSON object.
|
|
138
139
|
"""
|
139
140
|
|
140
141
|
|
142
|
+
def _create_pdf_filename(pdf_url: str) -> str:
|
143
|
+
if "arxiv.org/pdf" in pdf_url:
|
144
|
+
pdf_filename = pdf_url.split("/")[-1]
|
145
|
+
else:
|
146
|
+
pdf_filename = str(uuid.uuid4())
|
147
|
+
if not pdf_filename.endswith(".pdf"):
|
148
|
+
pdf_filename += ".pdf"
|
149
|
+
return pdf_filename
|
150
|
+
|
151
|
+
|
141
152
|
def download_pdf_paper(pdf_url: str) -> str:
|
142
153
|
"""
|
143
154
|
Download a pdf file from a url to the workspace directory.
|
@@ -147,13 +158,7 @@ def download_pdf_paper(pdf_url: str) -> str:
|
|
147
158
|
Args:
|
148
159
|
pdf_url: The url of the pdf file.
|
149
160
|
"""
|
150
|
-
|
151
|
-
pdf_filename = pdf_url.split("/")[-1]
|
152
|
-
else:
|
153
|
-
pdf_filename = str(uuid.uuid4())
|
154
|
-
if not pdf_filename.endswith(".pdf"):
|
155
|
-
pdf_filename += ".pdf"
|
156
|
-
|
161
|
+
pdf_filename = _create_pdf_filename(pdf_url)
|
157
162
|
pdf_path = Path(get_workspace_dir()) / pdf_filename
|
158
163
|
download_pdf(pdf_url, pdf_path)
|
159
164
|
return pdf_filename
|
@@ -198,7 +203,7 @@ async def review_pdf_paper(pdf_filename: str) -> str:
|
|
198
203
|
"text": "####\n\nInstructions:\n\n" + PROMPT,
|
199
204
|
}
|
200
205
|
)
|
201
|
-
model_name =
|
206
|
+
model_name = settings.REVIEW_MODEL_NAME
|
202
207
|
llm_response = await llm_acall(
|
203
208
|
model_name=model_name,
|
204
209
|
messages=[
|
@@ -206,3 +211,21 @@ async def review_pdf_paper(pdf_filename: str) -> str:
|
|
206
211
|
],
|
207
212
|
)
|
208
213
|
return llm_response.strip()
|
214
|
+
|
215
|
+
|
216
|
+
async def review_pdf_paper_by_url(pdf_url: str) -> str:
|
217
|
+
"""
|
218
|
+
Review a pdf file with a paper by url.
|
219
|
+
It downloads the pdf file and then reviews it.
|
220
|
+
It parses the pdf file into images and then sends the images to the LLM for review.
|
221
|
+
It can detect different issues with the paper formatting.
|
222
|
+
Returns a proper NeurIPS-style review.
|
223
|
+
|
224
|
+
Args:
|
225
|
+
pdf_url: The url of the pdf file.
|
226
|
+
"""
|
227
|
+
pdf_filename = _create_pdf_filename(pdf_url)
|
228
|
+
with tempfile.TemporaryDirectory(prefix="temp_pdf_") as temp_dir:
|
229
|
+
pdf_path = Path(temp_dir) / pdf_filename
|
230
|
+
download_pdf(pdf_url, pdf_path)
|
231
|
+
return await review_pdf_paper(str(pdf_path))
|
@@ -1,18 +1,18 @@
|
|
1
1
|
import re
|
2
|
-
import os
|
3
2
|
import json
|
4
3
|
from typing import Optional
|
5
4
|
|
6
5
|
from markdownify import markdownify # type: ignore
|
7
6
|
|
8
7
|
from academia_mcp.utils import get_with_retries, post_with_retries
|
8
|
+
from academia_mcp.settings import settings
|
9
9
|
|
10
10
|
EXA_CONTENTS_URL = "https://api.exa.ai/contents"
|
11
11
|
AVAILABLE_PROVIDERS = ("basic", "exa")
|
12
12
|
|
13
13
|
|
14
14
|
def _exa_visit_webpage(url: str) -> str:
|
15
|
-
key =
|
15
|
+
key = settings.EXA_API_KEY or ""
|
16
16
|
assert key, "Error: EXA_API_KEY is not set and no api_key was provided"
|
17
17
|
payload = {
|
18
18
|
"urls": [url],
|
@@ -38,14 +38,14 @@ def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
|
|
38
38
|
provider in AVAILABLE_PROVIDERS
|
39
39
|
), f"Invalid provider: {provider}. Available providers: {AVAILABLE_PROVIDERS}"
|
40
40
|
|
41
|
-
if provider == "exa":
|
41
|
+
if provider == "exa" and settings.EXA_API_KEY:
|
42
42
|
return _exa_visit_webpage(url)
|
43
43
|
|
44
44
|
assert provider == "basic"
|
45
45
|
response = get_with_retries(url)
|
46
46
|
content_type = response.headers.get("content-type", "").lower()
|
47
47
|
if not content_type or (not content_type.startswith("text/") and "html" not in content_type):
|
48
|
-
if
|
48
|
+
if settings.EXA_API_KEY:
|
49
49
|
return _exa_visit_webpage(url)
|
50
50
|
return json.dumps(
|
51
51
|
{"id": url, "error": f"Unsupported content-type: {content_type or 'unknown'}"}
|
@@ -1,8 +1,8 @@
|
|
1
|
-
import os
|
2
1
|
import json
|
3
2
|
from typing import Optional
|
4
3
|
|
5
4
|
from academia_mcp.utils import post_with_retries, get_with_retries
|
5
|
+
from academia_mcp.settings import settings
|
6
6
|
|
7
7
|
|
8
8
|
EXA_SEARCH_URL = "https://api.exa.ai/search"
|
@@ -32,9 +32,9 @@ def web_search(
|
|
32
32
|
providers = ("tavily", "brave", "exa")
|
33
33
|
assert provider in providers, "Error: provider must be either 'exa', 'tavily' or 'brave'"
|
34
34
|
|
35
|
-
is_tavily_available =
|
36
|
-
is_exa_available =
|
37
|
-
is_brave_available =
|
35
|
+
is_tavily_available = bool(settings.TAVILY_API_KEY)
|
36
|
+
is_exa_available = bool(settings.EXA_API_KEY)
|
37
|
+
is_brave_available = bool(settings.BRAVE_API_KEY)
|
38
38
|
assert is_tavily_available or is_exa_available or is_brave_available
|
39
39
|
availability = {
|
40
40
|
"tavily": is_tavily_available,
|
@@ -76,7 +76,7 @@ def tavily_web_search(query: str, limit: Optional[int] = 20) -> str:
|
|
76
76
|
assert isinstance(limit, int), "Error: limit should be an integer"
|
77
77
|
assert 0 < limit <= 25, "Error: limit should be between 1 and 25"
|
78
78
|
|
79
|
-
key =
|
79
|
+
key = settings.TAVILY_API_KEY or ""
|
80
80
|
assert key, "Error: TAVILY_API_KEY is not set and no api_key was provided"
|
81
81
|
payload = {
|
82
82
|
"query": query,
|
@@ -112,7 +112,7 @@ def exa_web_search(query: str, limit: Optional[int] = 20) -> str:
|
|
112
112
|
assert isinstance(limit, int), "Error: limit should be an integer"
|
113
113
|
assert 0 < limit <= 25, "Error: limit should be between 1 and 25"
|
114
114
|
|
115
|
-
key =
|
115
|
+
key = settings.EXA_API_KEY or ""
|
116
116
|
assert key, "Error: EXA_API_KEY is not set and no api_key was provided"
|
117
117
|
payload = {
|
118
118
|
"query": query,
|
@@ -151,7 +151,7 @@ def brave_web_search(query: str, limit: Optional[int] = 20) -> str:
|
|
151
151
|
assert isinstance(limit, int), "Error: limit should be an integer"
|
152
152
|
assert 0 < limit <= 20, "Error: limit should be between 1 and 20"
|
153
153
|
|
154
|
-
key =
|
154
|
+
key = settings.BRAVE_API_KEY or ""
|
155
155
|
assert key, "Error: BRAVE_API_KEY is not set and no api_key was provided"
|
156
156
|
payload = {
|
157
157
|
"q": query,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: academia-mcp
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.9.0
|
4
4
|
Summary: MCP server that provides different tools to search for scientific publications
|
5
5
|
Author-email: Ilya Gusev <phoenixilya@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
|
@@ -29,6 +29,7 @@ Requires-Dist: jinja2>=3.1.6
|
|
29
29
|
Requires-Dist: datasets>=4.0.0
|
30
30
|
Requires-Dist: pymupdf>=1.26.4
|
31
31
|
Requires-Dist: pillow>=11.3.0
|
32
|
+
Requires-Dist: pydantic-settings>=2.6.0
|
32
33
|
Dynamic: license-file
|
33
34
|
|
34
35
|
# Academia MCP
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "academia-mcp"
|
7
|
-
version = "1.
|
7
|
+
version = "1.9.0"
|
8
8
|
description = "MCP server that provides different tools to search for scientific publications"
|
9
9
|
readme = "README.md"
|
10
10
|
authors = [
|
@@ -36,6 +36,7 @@ dependencies = [
|
|
36
36
|
"datasets>=4.0.0",
|
37
37
|
"pymupdf>=1.26.4",
|
38
38
|
"pillow>=11.3.0",
|
39
|
+
"pydantic-settings>=2.6.0",
|
39
40
|
]
|
40
41
|
|
41
42
|
[dependency-groups]
|
@@ -78,4 +79,5 @@ filterwarnings = [
|
|
78
79
|
"ignore:builtin type SwigPyObject has no __module__ attribute:DeprecationWarning",
|
79
80
|
"ignore:builtin type swigvarlink has no __module__ attribute:DeprecationWarning",
|
80
81
|
"ignore:The 'warn' method is deprecated, use 'warning' instead:DeprecationWarning:acl_anthology.text.texmath",
|
82
|
+
"ignore:Data directory contains a different schema.rnc as this library; you might need to update the data or the acl-anthology library.",
|
81
83
|
]
|
@@ -3,8 +3,7 @@ import tempfile
|
|
3
3
|
from pathlib import Path
|
4
4
|
|
5
5
|
from academia_mcp.tools.latex import (
|
6
|
-
|
7
|
-
compile_latex_from_str,
|
6
|
+
compile_latex,
|
8
7
|
get_latex_template,
|
9
8
|
get_latex_templates_list,
|
10
9
|
read_pdf,
|
@@ -25,12 +24,6 @@ def test_latex_get_latex_template() -> None:
|
|
25
24
|
assert result["style"] is not None
|
26
25
|
|
27
26
|
|
28
|
-
def test_latex_compile_latex_from_str() -> None:
|
29
|
-
template = json.loads(get_latex_template("agents4science_2025"))
|
30
|
-
result = compile_latex_from_str(template["template"], "test.pdf")
|
31
|
-
assert "Compilation successful" in result
|
32
|
-
|
33
|
-
|
34
27
|
def test_latex_compile_latex_from_file() -> None:
|
35
28
|
template = json.loads(get_latex_template("agents4science_2025"))
|
36
29
|
with tempfile.TemporaryDirectory() as temp_dir:
|
@@ -39,7 +32,7 @@ def test_latex_compile_latex_from_file() -> None:
|
|
39
32
|
tex_file_path = temp_dir_path / tex_filename
|
40
33
|
pdf_filename = "test.pdf"
|
41
34
|
tex_file_path.write_text(template["template"], encoding="utf-8")
|
42
|
-
result =
|
35
|
+
result = compile_latex(str(tex_file_path), pdf_filename)
|
43
36
|
assert "Compilation successful" in result
|
44
37
|
|
45
38
|
|
@@ -51,13 +44,7 @@ def test_latex_read_pdf() -> None:
|
|
51
44
|
tex_file_path = temp_dir_path / tex_filename
|
52
45
|
pdf_filename = "test.pdf"
|
53
46
|
tex_file_path.write_text(template["template"], encoding="utf-8")
|
54
|
-
|
47
|
+
compile_latex(str(tex_file_path), pdf_filename)
|
55
48
|
read_result = json.loads(read_pdf(pdf_filename))
|
56
49
|
assert read_result
|
57
50
|
assert "Page 1" in read_result[0]
|
58
|
-
|
59
|
-
|
60
|
-
def test_latex_compile_latex_error() -> None:
|
61
|
-
template = json.loads(get_latex_template("agents4science_2025"))
|
62
|
-
result = compile_latex_from_str(template["template"][:100], "test.pdf")
|
63
|
-
print(result)
|
@@ -1,7 +1,13 @@
|
|
1
|
-
from academia_mcp.tools.review import review_pdf_paper, download_pdf_paper
|
1
|
+
from academia_mcp.tools.review import review_pdf_paper, download_pdf_paper, review_pdf_paper_by_url
|
2
2
|
|
3
3
|
|
4
4
|
async def test_review_pdf_paper() -> None:
|
5
5
|
download_pdf_paper("https://arxiv.org/pdf/2502.01220")
|
6
6
|
review = await review_pdf_paper("2502.01220.pdf")
|
7
7
|
assert review
|
8
|
+
|
9
|
+
|
10
|
+
async def test_review_pdf_paper_by_url() -> None:
|
11
|
+
review = await review_pdf_paper_by_url("https://arxiv.org/pdf/2502.01220")
|
12
|
+
assert review
|
13
|
+
assert "format_issues" in str(review)
|
@@ -1,103 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
import socket
|
3
|
-
from typing import Optional, Literal
|
4
|
-
|
5
|
-
import fire # type: ignore
|
6
|
-
from mcp.server.fastmcp import FastMCP
|
7
|
-
from dotenv import load_dotenv
|
8
|
-
|
9
|
-
from .tools.arxiv_search import arxiv_search
|
10
|
-
from .tools.arxiv_download import arxiv_download
|
11
|
-
from .tools.s2_citations import s2_get_citations, s2_get_references
|
12
|
-
from .tools.hf_datasets_search import hf_datasets_search
|
13
|
-
from .tools.anthology_search import anthology_search
|
14
|
-
from .tools.document_qa import document_qa
|
15
|
-
from .tools.latex import (
|
16
|
-
compile_latex_from_file,
|
17
|
-
compile_latex_from_str,
|
18
|
-
get_latex_template,
|
19
|
-
get_latex_templates_list,
|
20
|
-
read_pdf,
|
21
|
-
)
|
22
|
-
from .tools.web_search import web_search, tavily_web_search, exa_web_search, brave_web_search
|
23
|
-
from .tools.visit_webpage import visit_webpage
|
24
|
-
from .tools.bitflip import (
|
25
|
-
extract_bitflip_info,
|
26
|
-
generate_research_proposals,
|
27
|
-
score_research_proposals,
|
28
|
-
)
|
29
|
-
from .tools.review import review_pdf_paper, download_pdf_paper
|
30
|
-
|
31
|
-
|
32
|
-
load_dotenv()
|
33
|
-
|
34
|
-
|
35
|
-
def find_free_port() -> int:
|
36
|
-
for port in range(5000, 6001):
|
37
|
-
try:
|
38
|
-
with socket.socket() as s:
|
39
|
-
s.bind(("", port))
|
40
|
-
return port
|
41
|
-
except Exception:
|
42
|
-
continue
|
43
|
-
raise RuntimeError("No free port in range 5000-6000 found")
|
44
|
-
|
45
|
-
|
46
|
-
def run(
|
47
|
-
host: str = "0.0.0.0",
|
48
|
-
port: Optional[int] = None,
|
49
|
-
mount_path: str = "/",
|
50
|
-
streamable_http_path: str = "/mcp",
|
51
|
-
transport: Literal["stdio", "sse", "streamable-http"] = "streamable-http",
|
52
|
-
disable_web_search_tools: bool = False,
|
53
|
-
disable_llm_tools: bool = False,
|
54
|
-
) -> None:
|
55
|
-
server = FastMCP(
|
56
|
-
"Academia MCP",
|
57
|
-
stateless_http=True,
|
58
|
-
streamable_http_path=streamable_http_path,
|
59
|
-
mount_path=mount_path,
|
60
|
-
)
|
61
|
-
|
62
|
-
server.add_tool(arxiv_search)
|
63
|
-
server.add_tool(arxiv_download)
|
64
|
-
server.add_tool(s2_get_citations)
|
65
|
-
server.add_tool(s2_get_references)
|
66
|
-
server.add_tool(hf_datasets_search)
|
67
|
-
server.add_tool(anthology_search)
|
68
|
-
server.add_tool(compile_latex_from_file)
|
69
|
-
server.add_tool(compile_latex_from_str)
|
70
|
-
server.add_tool(get_latex_template)
|
71
|
-
server.add_tool(get_latex_templates_list)
|
72
|
-
server.add_tool(visit_webpage)
|
73
|
-
server.add_tool(download_pdf_paper)
|
74
|
-
server.add_tool(read_pdf)
|
75
|
-
|
76
|
-
if not disable_web_search_tools:
|
77
|
-
if os.getenv("TAVILY_API_KEY"):
|
78
|
-
server.add_tool(tavily_web_search)
|
79
|
-
if os.getenv("EXA_API_KEY"):
|
80
|
-
server.add_tool(exa_web_search)
|
81
|
-
if os.getenv("BRAVE_API_KEY"):
|
82
|
-
server.add_tool(brave_web_search)
|
83
|
-
if os.getenv("EXA_API_KEY") or os.getenv("BRAVE_API_KEY") or os.getenv("TAVILY_API_KEY"):
|
84
|
-
server.add_tool(web_search)
|
85
|
-
|
86
|
-
if not disable_llm_tools and os.getenv("OPENROUTER_API_KEY"):
|
87
|
-
server.add_tool(extract_bitflip_info)
|
88
|
-
server.add_tool(generate_research_proposals)
|
89
|
-
server.add_tool(score_research_proposals)
|
90
|
-
server.add_tool(document_qa)
|
91
|
-
server.add_tool(review_pdf_paper)
|
92
|
-
|
93
|
-
if port is None:
|
94
|
-
port = int(os.environ.get("PORT", -1))
|
95
|
-
if port == -1:
|
96
|
-
port = find_free_port()
|
97
|
-
server.settings.port = port
|
98
|
-
server.settings.host = host
|
99
|
-
server.run(transport=transport)
|
100
|
-
|
101
|
-
|
102
|
-
if __name__ == "__main__":
|
103
|
-
fire.Fire(run)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|