academia-mcp 1.4.2__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,11 +5,11 @@
5
5
  % before loading agents4science_2025
6
6
 
7
7
  % ready for submission
8
- \usepackage{agents4science_2025}
8
+ % \usepackage{agents4science_2025}
9
9
 
10
10
  % to compile a preprint version, e.g., for submission to arXiv, add the
11
11
  % [preprint] option:
12
- % \usepackage[preprint]{agents4science_2025}
12
+ \usepackage[preprint]{agents4science_2025}
13
13
 
14
14
  % to compile a camera-ready version, add the [final] option, e.g.:
15
15
  % \usepackage[final]{agents4science_2025}
@@ -683,4 +683,4 @@ IMPORTANT, please:
683
683
  \end{enumerate}
684
684
 
685
685
 
686
- \end{document}
686
+ \end{document}
academia_mcp/llm.py CHANGED
@@ -14,14 +14,11 @@ class ChatMessage(BaseModel): # type: ignore
14
14
  ChatMessages = List[ChatMessage]
15
15
 
16
16
 
17
- async def llm_acall(model_name: str, prompt: str) -> str:
17
+ async def llm_acall(model_name: str, messages: ChatMessages) -> str:
18
18
  key = os.getenv("OPENROUTER_API_KEY", "")
19
19
  assert key, "Please set OPENROUTER_API_KEY in the environment variables"
20
20
  base_url = os.getenv("BASE_URL", "https://openrouter.ai/api/v1")
21
21
 
22
- messages: ChatMessages = [
23
- ChatMessage(role="user", content=prompt),
24
- ]
25
22
  client = AsyncOpenAI(base_url=base_url, api_key=key)
26
23
  response: ChatCompletionMessage = (
27
24
  (
academia_mcp/pdf.py ADDED
@@ -0,0 +1,44 @@
1
+ from pathlib import Path
2
+ from typing import List
3
+
4
+ from pypdf import PdfReader
5
+ from PIL import Image
6
+ import pymupdf # type: ignore
7
+
8
+ from academia_mcp.utils import get_with_retries
9
+
10
+
11
+ def download_pdf(url: str, output_path: Path) -> None:
12
+ response = get_with_retries(url)
13
+ response.raise_for_status()
14
+ content_type = response.headers.get("content-type")
15
+ assert content_type
16
+ assert "application/pdf" in content_type.lower()
17
+ with open(output_path.resolve(), "wb") as fp:
18
+ fp.write(response.content)
19
+
20
+
21
+ def parse_pdf_file(pdf_path: Path) -> List[str]:
22
+ # Why not Marker? Because it is too heavy.
23
+ reader = PdfReader(str(pdf_path.resolve()))
24
+
25
+ pages = []
26
+ for page_number, page in enumerate(reader.pages, start=1):
27
+ try:
28
+ text = page.extract_text()
29
+ if not text:
30
+ continue
31
+ prefix = f"## Page {page_number}\n\n"
32
+ pages.append(prefix + text)
33
+ except Exception:
34
+ continue
35
+ return pages
36
+
37
+
38
+ def parse_pdf_file_to_images(pdf_path: Path) -> List[Image.Image]:
39
+ doc = pymupdf.open(str(pdf_path.resolve()))
40
+ images = []
41
+ for page in doc:
42
+ pil_image: Image.Image = page.get_pixmap().pil_image()
43
+ images.append(pil_image)
44
+ return images
@@ -12,36 +12,9 @@ from dataclasses import dataclass, field
12
12
  import requests
13
13
  import bs4
14
14
  from markdownify import MarkdownConverter # type: ignore
15
- from pypdf import PdfReader
16
15
 
17
16
  from academia_mcp.utils import get_with_retries
18
-
19
-
20
- def download_pdf(url: str, output_path: Path) -> None:
21
- response = get_with_retries(url)
22
- response.raise_for_status()
23
- content_type = response.headers.get("content-type")
24
- assert content_type
25
- assert "application/pdf" in content_type.lower()
26
- with open(output_path.resolve(), "wb") as fp:
27
- fp.write(response.content)
28
-
29
-
30
- def parse_pdf_file(pdf_path: Path) -> List[str]:
31
- # Why not Marker? Because it is too heavy.
32
- reader = PdfReader(str(pdf_path.resolve()))
33
-
34
- pages = []
35
- for page_number, page in enumerate(reader.pages, start=1):
36
- try:
37
- text = page.extract_text()
38
- if not text:
39
- continue
40
- prefix = f"## Page {page_number}\n\n"
41
- pages.append(prefix + text)
42
- except Exception:
43
- continue
44
- return pages
17
+ from academia_mcp.pdf import parse_pdf_file, download_pdf
45
18
 
46
19
 
47
20
  HTML_URL = "https://arxiv.org/html/{paper_id}"
@@ -11,7 +11,7 @@ from datasets import load_dataset # type: ignore
11
11
 
12
12
  from academia_mcp.tools.arxiv_download import arxiv_download
13
13
  from academia_mcp.utils import extract_json, encode_prompt
14
- from academia_mcp.llm import llm_acall
14
+ from academia_mcp.llm import llm_acall, ChatMessage
15
15
 
16
16
 
17
17
  class ProposalDataset:
@@ -208,7 +208,9 @@ async def extract_bitflip_info(arxiv_id: str) -> str:
208
208
  paper = arxiv_download(arxiv_id)
209
209
  abstract = json.loads(paper)["abstract"]
210
210
  prompt = encode_prompt(EXTRACT_PROMPT, abstract=abstract)
211
- content = await llm_acall(model_name=model_name, prompt=prompt)
211
+ content = await llm_acall(
212
+ model_name=model_name, messages=[ChatMessage(role="user", content=prompt)]
213
+ )
212
214
  result = extract_json(content)
213
215
  bitflip_info: BitFlipInfo = BitFlipInfo.model_validate(result)
214
216
  return str(bitflip_info.model_dump_json())
@@ -240,7 +242,9 @@ async def generate_research_proposal(bit: str, additional_context: str = "") ->
240
242
  prompt = encode_prompt(
241
243
  IMPROVEMENT_PROMPT, bit=bit, examples=examples, additional_context=additional_context
242
244
  )
243
- content = await llm_acall(model_name=model_name, prompt=prompt)
245
+ content = await llm_acall(
246
+ model_name=model_name, messages=[ChatMessage(role="user", content=prompt)]
247
+ )
244
248
  result = extract_json(content)
245
249
  proposal: Proposal = Proposal.model_validate(result)
246
250
  proposal.proposal_id = random.randint(0, 1000000)
@@ -276,7 +280,9 @@ async def score_research_proposals(proposals: List[str]) -> str:
276
280
  model_name = os.getenv("BITFLIP_MODEL_NAME", "deepseek/deepseek-chat-v3-0324")
277
281
  proposals = [Proposal.model_validate_json(proposal) for proposal in proposals]
278
282
  prompt = encode_prompt(SCORE_PROMPT, proposals=proposals)
279
- content = await llm_acall(model_name=model_name, prompt=prompt)
283
+ content = await llm_acall(
284
+ model_name=model_name, messages=[ChatMessage(role="user", content=prompt)]
285
+ )
280
286
  scores = extract_json(content)
281
287
  final_scores = [ProposalScores.model_validate(score) for score in scores]
282
288
  return json.dumps([s.model_dump() for s in final_scores], ensure_ascii=False)
@@ -70,5 +70,7 @@ async def document_qa(
70
70
 
71
71
  model_name = os.getenv("DOCUMENT_QA_MODEL_NAME", "deepseek/deepseek-chat-v3-0324")
72
72
  prompt = PROMPT.format(question=question, document=document)
73
- content = await llm_acall(model_name=model_name, prompt=prompt)
73
+ content = await llm_acall(
74
+ model_name=model_name, messages=[ChatMessage(role="user", content=prompt)]
75
+ )
74
76
  return content.strip()
@@ -0,0 +1,51 @@
1
+ import base64
2
+ from io import BytesIO
3
+ from pathlib import Path
4
+
5
+ from academia_mcp.pdf import parse_pdf_file_to_images
6
+ from academia_mcp.llm import llm_acall, ChatMessage
7
+ from academia_mcp.files import get_workspace_dir
8
+
9
+
10
+ PROMPT = """
11
+ Find problems with the paper formatiing.
12
+ """
13
+
14
+
15
+ async def review_pdf(pdf_filename: str) -> str:
16
+ """
17
+ Review a pdf file.
18
+
19
+ Args:
20
+ pdf_path: The path to the pdf file.
21
+ """
22
+ pdf_filename_path = Path(pdf_filename)
23
+ if not pdf_filename_path.exists():
24
+ pdf_filename_path = Path(get_workspace_dir()) / pdf_filename
25
+
26
+ images = parse_pdf_file_to_images(pdf_filename_path)
27
+ content_parts = []
28
+ for image in images:
29
+ buffer_io = BytesIO()
30
+ image.save(buffer_io, format="PNG")
31
+ img_bytes = buffer_io.getvalue()
32
+ image_base64 = base64.b64encode(img_bytes).decode("utf-8")
33
+ image_content = {
34
+ "type": "image_url",
35
+ "image_url": {"url": f"data:image/png;base64,{image_base64}"},
36
+ }
37
+ content_parts.append(image_content)
38
+
39
+ content_parts.append(
40
+ {
41
+ "type": "text",
42
+ "text": "Please review the paper and provide a summary of its content.",
43
+ }
44
+ )
45
+ llm_response = await llm_acall(
46
+ model_name="gpt-4o",
47
+ messages=[
48
+ ChatMessage(role="user", content=content_parts),
49
+ ],
50
+ )
51
+ return llm_response.strip()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academia-mcp
3
- Version: 1.4.2
3
+ Version: 1.5.0
4
4
  Summary: MCP server that provides different tools to search for scientific publications
5
5
  Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
@@ -27,6 +27,8 @@ Requires-Dist: fire>=0.7.0
27
27
  Requires-Dist: openai>=1.97.1
28
28
  Requires-Dist: jinja2>=3.1.6
29
29
  Requires-Dist: datasets>=4.0.0
30
+ Requires-Dist: pymupdf>=1.26.4
31
+ Requires-Dist: pillow>=11.3.0
30
32
  Dynamic: license-file
31
33
 
32
34
  # Academia MCP
@@ -1,27 +1,29 @@
1
1
  academia_mcp/__init__.py,sha256=2Ru2I5u4cE7DrkkAsibDUEF1K6sYtqppb9VyFrRoQKI,94
2
2
  academia_mcp/__main__.py,sha256=rcmsOtJd3SA82exjrcGBuxuptcoxF8AXI7jNjiVq2BY,59
3
3
  academia_mcp/files.py,sha256=tvt3OPr5q6pAPCZ0XvRHHL9ZWuTXINRZvqjeRFmx5YE,815
4
- academia_mcp/llm.py,sha256=o84FQNSbjjVSk9DlvFXWsUDiz5IOaavYU6kOqnPEG7E,1071
4
+ academia_mcp/llm.py,sha256=jh-_H3_gNyRsvpFqFx-yWVhGznnXxehFP79inUy4vVQ,995
5
+ academia_mcp/pdf.py,sha256=9PlXzHGhb6ay3ldbTdxCcTWvH4TkET3bnb64mgoh9i0,1273
5
6
  academia_mcp/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
7
  academia_mcp/server.py,sha256=FRrPAacAPs1IZ3LRKDFZi7copAqzy_aPGYd2RLsA01U,2974
7
8
  academia_mcp/utils.py,sha256=P9U3RjYzcztE0KxXvJSy5wSBaUg2CM9tpByljYrsrl4,4607
8
9
  academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty,sha256=hGcEPCYBJS4vdhWvN_yEaJC4GvT_yDroI94CfY2Oguk,12268
9
- academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex,sha256=Nu_nL-3WrnIrPZ03qswhKpKUOXm0Dr8gFmUfASkLMBc,33800
10
+ academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex,sha256=VxuE1Va7_QtZ87P1iUm8-JOW1e_9tdbSSvhH_hz7Ras,33801
10
11
  academia_mcp/tools/__init__.py,sha256=u_6HkChV2P46zXxGp92s1cTSjkkd42udhCM3BFDYQ_c,1137
11
12
  academia_mcp/tools/anthology_search.py,sha256=rhFpJZqGLABgr0raDuH0CARBiAJNJtEI4dlMrKNHfDQ,7669
12
- academia_mcp/tools/arxiv_download.py,sha256=soa9nPDHV1-ZgTLtqh-Fj69WkRsrb1hRKouWDa2ePng,11269
13
+ academia_mcp/tools/arxiv_download.py,sha256=gBY0_Kz0yGtVkLMwn6GrAyfBjovZVgcSMuyy67p65Cw,10474
13
14
  academia_mcp/tools/arxiv_search.py,sha256=pzM18qrF3QL03A53w003kE7hQi3s3QKtjgw0m7K88UY,8355
14
- academia_mcp/tools/bitflip.py,sha256=u0hSOPWbnCDu2EbA_RkueX496SvTKz9QhZcXugshSfI,10949
15
- academia_mcp/tools/document_qa.py,sha256=04pJpiYCg27EFiZhfmTaMjeobu8SMT0Dls7OAFDoH00,2392
15
+ academia_mcp/tools/bitflip.py,sha256=Lu2UASqabiMQ-F-s1BTDWMNuIceys9NIOM9M1bpJczk,11118
16
+ academia_mcp/tools/document_qa.py,sha256=t9mygYQ7AFIAPiha1nZ-y043luQlkTCBdWb_SDnzEsE,2444
16
17
  academia_mcp/tools/hf_datasets_search.py,sha256=KiBkqT4rXjEN4oc1AWZOPnqN_Go90TQogY5-DUm3LQo,2854
17
18
  academia_mcp/tools/latex.py,sha256=bf8VZUgCByzBAMTZCeqrRrmakotext3d3DbtkiOTh1k,5892
18
19
  academia_mcp/tools/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ academia_mcp/tools/review.py,sha256=YR72fl8pdzPur8TfdQeBZHfjpKjGTl-bngPjhYafsRI,1409
19
21
  academia_mcp/tools/s2_citations.py,sha256=dqrBp76RrX1zH2XzcMAoWBbvbtyhxLeF-xnqOKD_JiM,4852
20
22
  academia_mcp/tools/visit_webpage.py,sha256=OZdqDkVPIbANyFw5o5jIjU5Rr_dolxrGDs63Ud-GmRM,1966
21
23
  academia_mcp/tools/web_search.py,sha256=mobKm4iqKppn8pduZYMzWRo1MQBjkAqmMtrFLI5XY2Y,6296
22
- academia_mcp-1.4.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
23
- academia_mcp-1.4.2.dist-info/METADATA,sha256=aGkaWNjsNm7aB6o-mwEEvekh8ybOOuE5vuuIJW2rkog,3172
24
- academia_mcp-1.4.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
25
- academia_mcp-1.4.2.dist-info/entry_points.txt,sha256=gxkiKJ74w2FwJpSECpjA3XtCfI5ZfrM6N8cqnwsq4yY,51
26
- academia_mcp-1.4.2.dist-info/top_level.txt,sha256=CzGpRFsRRJRqWEb1e3SUlcfGqRzOxevZGaJWrtGF8W0,13
27
- academia_mcp-1.4.2.dist-info/RECORD,,
24
+ academia_mcp-1.5.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
25
+ academia_mcp-1.5.0.dist-info/METADATA,sha256=caqX8xdo4NeS5eUtSWkXYr5Y6kvXJISb4HhUsjZvhRo,3233
26
+ academia_mcp-1.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
27
+ academia_mcp-1.5.0.dist-info/entry_points.txt,sha256=gxkiKJ74w2FwJpSECpjA3XtCfI5ZfrM6N8cqnwsq4yY,51
28
+ academia_mcp-1.5.0.dist-info/top_level.txt,sha256=CzGpRFsRRJRqWEb1e3SUlcfGqRzOxevZGaJWrtGF8W0,13
29
+ academia_mcp-1.5.0.dist-info/RECORD,,