academia-mcp 1.4.2__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex +3 -3
- academia_mcp/llm.py +1 -4
- academia_mcp/pdf.py +44 -0
- academia_mcp/tools/arxiv_download.py +1 -28
- academia_mcp/tools/bitflip.py +10 -4
- academia_mcp/tools/document_qa.py +3 -1
- academia_mcp/tools/review.py +51 -0
- {academia_mcp-1.4.2.dist-info → academia_mcp-1.5.0.dist-info}/METADATA +3 -1
- {academia_mcp-1.4.2.dist-info → academia_mcp-1.5.0.dist-info}/RECORD +13 -11
- {academia_mcp-1.4.2.dist-info → academia_mcp-1.5.0.dist-info}/WHEEL +0 -0
- {academia_mcp-1.4.2.dist-info → academia_mcp-1.5.0.dist-info}/entry_points.txt +0 -0
- {academia_mcp-1.4.2.dist-info → academia_mcp-1.5.0.dist-info}/licenses/LICENSE +0 -0
- {academia_mcp-1.4.2.dist-info → academia_mcp-1.5.0.dist-info}/top_level.txt +0 -0
@@ -5,11 +5,11 @@
|
|
5
5
|
% before loading agents4science_2025
|
6
6
|
|
7
7
|
% ready for submission
|
8
|
-
\usepackage{agents4science_2025}
|
8
|
+
% \usepackage{agents4science_2025}
|
9
9
|
|
10
10
|
% to compile a preprint version, e.g., for submission to arXiv, add the
|
11
11
|
% [preprint] option:
|
12
|
-
|
12
|
+
\usepackage[preprint]{agents4science_2025}
|
13
13
|
|
14
14
|
% to compile a camera-ready version, add the [final] option, e.g.:
|
15
15
|
% \usepackage[final]{agents4science_2025}
|
@@ -683,4 +683,4 @@ IMPORTANT, please:
|
|
683
683
|
\end{enumerate}
|
684
684
|
|
685
685
|
|
686
|
-
\end{document}
|
686
|
+
\end{document}
|
academia_mcp/llm.py
CHANGED
@@ -14,14 +14,11 @@ class ChatMessage(BaseModel): # type: ignore
|
|
14
14
|
ChatMessages = List[ChatMessage]
|
15
15
|
|
16
16
|
|
17
|
-
async def llm_acall(model_name: str,
|
17
|
+
async def llm_acall(model_name: str, messages: ChatMessages) -> str:
|
18
18
|
key = os.getenv("OPENROUTER_API_KEY", "")
|
19
19
|
assert key, "Please set OPENROUTER_API_KEY in the environment variables"
|
20
20
|
base_url = os.getenv("BASE_URL", "https://openrouter.ai/api/v1")
|
21
21
|
|
22
|
-
messages: ChatMessages = [
|
23
|
-
ChatMessage(role="user", content=prompt),
|
24
|
-
]
|
25
22
|
client = AsyncOpenAI(base_url=base_url, api_key=key)
|
26
23
|
response: ChatCompletionMessage = (
|
27
24
|
(
|
academia_mcp/pdf.py
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import List
|
3
|
+
|
4
|
+
from pypdf import PdfReader
|
5
|
+
from PIL import Image
|
6
|
+
import pymupdf # type: ignore
|
7
|
+
|
8
|
+
from academia_mcp.utils import get_with_retries
|
9
|
+
|
10
|
+
|
11
|
+
def download_pdf(url: str, output_path: Path) -> None:
|
12
|
+
response = get_with_retries(url)
|
13
|
+
response.raise_for_status()
|
14
|
+
content_type = response.headers.get("content-type")
|
15
|
+
assert content_type
|
16
|
+
assert "application/pdf" in content_type.lower()
|
17
|
+
with open(output_path.resolve(), "wb") as fp:
|
18
|
+
fp.write(response.content)
|
19
|
+
|
20
|
+
|
21
|
+
def parse_pdf_file(pdf_path: Path) -> List[str]:
|
22
|
+
# Why not Marker? Because it is too heavy.
|
23
|
+
reader = PdfReader(str(pdf_path.resolve()))
|
24
|
+
|
25
|
+
pages = []
|
26
|
+
for page_number, page in enumerate(reader.pages, start=1):
|
27
|
+
try:
|
28
|
+
text = page.extract_text()
|
29
|
+
if not text:
|
30
|
+
continue
|
31
|
+
prefix = f"## Page {page_number}\n\n"
|
32
|
+
pages.append(prefix + text)
|
33
|
+
except Exception:
|
34
|
+
continue
|
35
|
+
return pages
|
36
|
+
|
37
|
+
|
38
|
+
def parse_pdf_file_to_images(pdf_path: Path) -> List[Image.Image]:
|
39
|
+
doc = pymupdf.open(str(pdf_path.resolve()))
|
40
|
+
images = []
|
41
|
+
for page in doc:
|
42
|
+
pil_image: Image.Image = page.get_pixmap().pil_image()
|
43
|
+
images.append(pil_image)
|
44
|
+
return images
|
@@ -12,36 +12,9 @@ from dataclasses import dataclass, field
|
|
12
12
|
import requests
|
13
13
|
import bs4
|
14
14
|
from markdownify import MarkdownConverter # type: ignore
|
15
|
-
from pypdf import PdfReader
|
16
15
|
|
17
16
|
from academia_mcp.utils import get_with_retries
|
18
|
-
|
19
|
-
|
20
|
-
def download_pdf(url: str, output_path: Path) -> None:
|
21
|
-
response = get_with_retries(url)
|
22
|
-
response.raise_for_status()
|
23
|
-
content_type = response.headers.get("content-type")
|
24
|
-
assert content_type
|
25
|
-
assert "application/pdf" in content_type.lower()
|
26
|
-
with open(output_path.resolve(), "wb") as fp:
|
27
|
-
fp.write(response.content)
|
28
|
-
|
29
|
-
|
30
|
-
def parse_pdf_file(pdf_path: Path) -> List[str]:
|
31
|
-
# Why not Marker? Because it is too heavy.
|
32
|
-
reader = PdfReader(str(pdf_path.resolve()))
|
33
|
-
|
34
|
-
pages = []
|
35
|
-
for page_number, page in enumerate(reader.pages, start=1):
|
36
|
-
try:
|
37
|
-
text = page.extract_text()
|
38
|
-
if not text:
|
39
|
-
continue
|
40
|
-
prefix = f"## Page {page_number}\n\n"
|
41
|
-
pages.append(prefix + text)
|
42
|
-
except Exception:
|
43
|
-
continue
|
44
|
-
return pages
|
17
|
+
from academia_mcp.pdf import parse_pdf_file, download_pdf
|
45
18
|
|
46
19
|
|
47
20
|
HTML_URL = "https://arxiv.org/html/{paper_id}"
|
academia_mcp/tools/bitflip.py
CHANGED
@@ -11,7 +11,7 @@ from datasets import load_dataset # type: ignore
|
|
11
11
|
|
12
12
|
from academia_mcp.tools.arxiv_download import arxiv_download
|
13
13
|
from academia_mcp.utils import extract_json, encode_prompt
|
14
|
-
from academia_mcp.llm import llm_acall
|
14
|
+
from academia_mcp.llm import llm_acall, ChatMessage
|
15
15
|
|
16
16
|
|
17
17
|
class ProposalDataset:
|
@@ -208,7 +208,9 @@ async def extract_bitflip_info(arxiv_id: str) -> str:
|
|
208
208
|
paper = arxiv_download(arxiv_id)
|
209
209
|
abstract = json.loads(paper)["abstract"]
|
210
210
|
prompt = encode_prompt(EXTRACT_PROMPT, abstract=abstract)
|
211
|
-
content = await llm_acall(
|
211
|
+
content = await llm_acall(
|
212
|
+
model_name=model_name, messages=[ChatMessage(role="user", content=prompt)]
|
213
|
+
)
|
212
214
|
result = extract_json(content)
|
213
215
|
bitflip_info: BitFlipInfo = BitFlipInfo.model_validate(result)
|
214
216
|
return str(bitflip_info.model_dump_json())
|
@@ -240,7 +242,9 @@ async def generate_research_proposal(bit: str, additional_context: str = "") ->
|
|
240
242
|
prompt = encode_prompt(
|
241
243
|
IMPROVEMENT_PROMPT, bit=bit, examples=examples, additional_context=additional_context
|
242
244
|
)
|
243
|
-
content = await llm_acall(
|
245
|
+
content = await llm_acall(
|
246
|
+
model_name=model_name, messages=[ChatMessage(role="user", content=prompt)]
|
247
|
+
)
|
244
248
|
result = extract_json(content)
|
245
249
|
proposal: Proposal = Proposal.model_validate(result)
|
246
250
|
proposal.proposal_id = random.randint(0, 1000000)
|
@@ -276,7 +280,9 @@ async def score_research_proposals(proposals: List[str]) -> str:
|
|
276
280
|
model_name = os.getenv("BITFLIP_MODEL_NAME", "deepseek/deepseek-chat-v3-0324")
|
277
281
|
proposals = [Proposal.model_validate_json(proposal) for proposal in proposals]
|
278
282
|
prompt = encode_prompt(SCORE_PROMPT, proposals=proposals)
|
279
|
-
content = await llm_acall(
|
283
|
+
content = await llm_acall(
|
284
|
+
model_name=model_name, messages=[ChatMessage(role="user", content=prompt)]
|
285
|
+
)
|
280
286
|
scores = extract_json(content)
|
281
287
|
final_scores = [ProposalScores.model_validate(score) for score in scores]
|
282
288
|
return json.dumps([s.model_dump() for s in final_scores], ensure_ascii=False)
|
@@ -70,5 +70,7 @@ async def document_qa(
|
|
70
70
|
|
71
71
|
model_name = os.getenv("DOCUMENT_QA_MODEL_NAME", "deepseek/deepseek-chat-v3-0324")
|
72
72
|
prompt = PROMPT.format(question=question, document=document)
|
73
|
-
content = await llm_acall(
|
73
|
+
content = await llm_acall(
|
74
|
+
model_name=model_name, messages=[ChatMessage(role="user", content=prompt)]
|
75
|
+
)
|
74
76
|
return content.strip()
|
@@ -0,0 +1,51 @@
|
|
1
|
+
import base64
|
2
|
+
from io import BytesIO
|
3
|
+
from pathlib import Path
|
4
|
+
|
5
|
+
from academia_mcp.pdf import parse_pdf_file_to_images
|
6
|
+
from academia_mcp.llm import llm_acall, ChatMessage
|
7
|
+
from academia_mcp.files import get_workspace_dir
|
8
|
+
|
9
|
+
|
10
|
+
PROMPT = """
|
11
|
+
Find problems with the paper formatiing.
|
12
|
+
"""
|
13
|
+
|
14
|
+
|
15
|
+
async def review_pdf(pdf_filename: str) -> str:
|
16
|
+
"""
|
17
|
+
Review a pdf file.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
pdf_path: The path to the pdf file.
|
21
|
+
"""
|
22
|
+
pdf_filename_path = Path(pdf_filename)
|
23
|
+
if not pdf_filename_path.exists():
|
24
|
+
pdf_filename_path = Path(get_workspace_dir()) / pdf_filename
|
25
|
+
|
26
|
+
images = parse_pdf_file_to_images(pdf_filename_path)
|
27
|
+
content_parts = []
|
28
|
+
for image in images:
|
29
|
+
buffer_io = BytesIO()
|
30
|
+
image.save(buffer_io, format="PNG")
|
31
|
+
img_bytes = buffer_io.getvalue()
|
32
|
+
image_base64 = base64.b64encode(img_bytes).decode("utf-8")
|
33
|
+
image_content = {
|
34
|
+
"type": "image_url",
|
35
|
+
"image_url": {"url": f"data:image/png;base64,{image_base64}"},
|
36
|
+
}
|
37
|
+
content_parts.append(image_content)
|
38
|
+
|
39
|
+
content_parts.append(
|
40
|
+
{
|
41
|
+
"type": "text",
|
42
|
+
"text": "Please review the paper and provide a summary of its content.",
|
43
|
+
}
|
44
|
+
)
|
45
|
+
llm_response = await llm_acall(
|
46
|
+
model_name="gpt-4o",
|
47
|
+
messages=[
|
48
|
+
ChatMessage(role="user", content=content_parts),
|
49
|
+
],
|
50
|
+
)
|
51
|
+
return llm_response.strip()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: academia-mcp
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.5.0
|
4
4
|
Summary: MCP server that provides different tools to search for scientific publications
|
5
5
|
Author-email: Ilya Gusev <phoenixilya@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
|
@@ -27,6 +27,8 @@ Requires-Dist: fire>=0.7.0
|
|
27
27
|
Requires-Dist: openai>=1.97.1
|
28
28
|
Requires-Dist: jinja2>=3.1.6
|
29
29
|
Requires-Dist: datasets>=4.0.0
|
30
|
+
Requires-Dist: pymupdf>=1.26.4
|
31
|
+
Requires-Dist: pillow>=11.3.0
|
30
32
|
Dynamic: license-file
|
31
33
|
|
32
34
|
# Academia MCP
|
@@ -1,27 +1,29 @@
|
|
1
1
|
academia_mcp/__init__.py,sha256=2Ru2I5u4cE7DrkkAsibDUEF1K6sYtqppb9VyFrRoQKI,94
|
2
2
|
academia_mcp/__main__.py,sha256=rcmsOtJd3SA82exjrcGBuxuptcoxF8AXI7jNjiVq2BY,59
|
3
3
|
academia_mcp/files.py,sha256=tvt3OPr5q6pAPCZ0XvRHHL9ZWuTXINRZvqjeRFmx5YE,815
|
4
|
-
academia_mcp/llm.py,sha256=
|
4
|
+
academia_mcp/llm.py,sha256=jh-_H3_gNyRsvpFqFx-yWVhGznnXxehFP79inUy4vVQ,995
|
5
|
+
academia_mcp/pdf.py,sha256=9PlXzHGhb6ay3ldbTdxCcTWvH4TkET3bnb64mgoh9i0,1273
|
5
6
|
academia_mcp/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
7
|
academia_mcp/server.py,sha256=FRrPAacAPs1IZ3LRKDFZi7copAqzy_aPGYd2RLsA01U,2974
|
7
8
|
academia_mcp/utils.py,sha256=P9U3RjYzcztE0KxXvJSy5wSBaUg2CM9tpByljYrsrl4,4607
|
8
9
|
academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty,sha256=hGcEPCYBJS4vdhWvN_yEaJC4GvT_yDroI94CfY2Oguk,12268
|
9
|
-
academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex,sha256=
|
10
|
+
academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex,sha256=VxuE1Va7_QtZ87P1iUm8-JOW1e_9tdbSSvhH_hz7Ras,33801
|
10
11
|
academia_mcp/tools/__init__.py,sha256=u_6HkChV2P46zXxGp92s1cTSjkkd42udhCM3BFDYQ_c,1137
|
11
12
|
academia_mcp/tools/anthology_search.py,sha256=rhFpJZqGLABgr0raDuH0CARBiAJNJtEI4dlMrKNHfDQ,7669
|
12
|
-
academia_mcp/tools/arxiv_download.py,sha256=
|
13
|
+
academia_mcp/tools/arxiv_download.py,sha256=gBY0_Kz0yGtVkLMwn6GrAyfBjovZVgcSMuyy67p65Cw,10474
|
13
14
|
academia_mcp/tools/arxiv_search.py,sha256=pzM18qrF3QL03A53w003kE7hQi3s3QKtjgw0m7K88UY,8355
|
14
|
-
academia_mcp/tools/bitflip.py,sha256=
|
15
|
-
academia_mcp/tools/document_qa.py,sha256=
|
15
|
+
academia_mcp/tools/bitflip.py,sha256=Lu2UASqabiMQ-F-s1BTDWMNuIceys9NIOM9M1bpJczk,11118
|
16
|
+
academia_mcp/tools/document_qa.py,sha256=t9mygYQ7AFIAPiha1nZ-y043luQlkTCBdWb_SDnzEsE,2444
|
16
17
|
academia_mcp/tools/hf_datasets_search.py,sha256=KiBkqT4rXjEN4oc1AWZOPnqN_Go90TQogY5-DUm3LQo,2854
|
17
18
|
academia_mcp/tools/latex.py,sha256=bf8VZUgCByzBAMTZCeqrRrmakotext3d3DbtkiOTh1k,5892
|
18
19
|
academia_mcp/tools/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
20
|
+
academia_mcp/tools/review.py,sha256=YR72fl8pdzPur8TfdQeBZHfjpKjGTl-bngPjhYafsRI,1409
|
19
21
|
academia_mcp/tools/s2_citations.py,sha256=dqrBp76RrX1zH2XzcMAoWBbvbtyhxLeF-xnqOKD_JiM,4852
|
20
22
|
academia_mcp/tools/visit_webpage.py,sha256=OZdqDkVPIbANyFw5o5jIjU5Rr_dolxrGDs63Ud-GmRM,1966
|
21
23
|
academia_mcp/tools/web_search.py,sha256=mobKm4iqKppn8pduZYMzWRo1MQBjkAqmMtrFLI5XY2Y,6296
|
22
|
-
academia_mcp-1.
|
23
|
-
academia_mcp-1.
|
24
|
-
academia_mcp-1.
|
25
|
-
academia_mcp-1.
|
26
|
-
academia_mcp-1.
|
27
|
-
academia_mcp-1.
|
24
|
+
academia_mcp-1.5.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
25
|
+
academia_mcp-1.5.0.dist-info/METADATA,sha256=caqX8xdo4NeS5eUtSWkXYr5Y6kvXJISb4HhUsjZvhRo,3233
|
26
|
+
academia_mcp-1.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
27
|
+
academia_mcp-1.5.0.dist-info/entry_points.txt,sha256=gxkiKJ74w2FwJpSECpjA3XtCfI5ZfrM6N8cqnwsq4yY,51
|
28
|
+
academia_mcp-1.5.0.dist-info/top_level.txt,sha256=CzGpRFsRRJRqWEb1e3SUlcfGqRzOxevZGaJWrtGF8W0,13
|
29
|
+
academia_mcp-1.5.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|