academia-mcp 1.4.2__tar.gz → 1.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/PKG-INFO +3 -1
  2. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex +3 -3
  3. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/llm.py +1 -4
  4. academia_mcp-1.5.0/academia_mcp/pdf.py +44 -0
  5. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/tools/arxiv_download.py +1 -28
  6. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/tools/bitflip.py +10 -4
  7. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/tools/document_qa.py +3 -1
  8. academia_mcp-1.5.0/academia_mcp/tools/review.py +51 -0
  9. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp.egg-info/PKG-INFO +3 -1
  10. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp.egg-info/SOURCES.txt +3 -0
  11. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp.egg-info/requires.txt +2 -0
  12. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/pyproject.toml +9 -1
  13. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/tests/test_latex.py +2 -1
  14. academia_mcp-1.5.0/tests/test_review.py +20 -0
  15. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/LICENSE +0 -0
  16. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/README.md +0 -0
  17. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/__init__.py +0 -0
  18. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/__main__.py +0 -0
  19. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/files.py +0 -0
  20. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty +0 -0
  21. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/py.typed +0 -0
  22. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/server.py +0 -0
  23. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/tools/__init__.py +0 -0
  24. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/tools/anthology_search.py +0 -0
  25. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/tools/arxiv_search.py +0 -0
  26. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/tools/hf_datasets_search.py +0 -0
  27. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/tools/latex.py +0 -0
  28. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/tools/py.typed +0 -0
  29. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/tools/s2_citations.py +0 -0
  30. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/tools/visit_webpage.py +0 -0
  31. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/tools/web_search.py +0 -0
  32. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp/utils.py +0 -0
  33. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp.egg-info/dependency_links.txt +0 -0
  34. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp.egg-info/entry_points.txt +0 -0
  35. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/academia_mcp.egg-info/top_level.txt +0 -0
  36. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/setup.cfg +0 -0
  37. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/tests/test_anthology_search.py +0 -0
  38. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/tests/test_arxiv_download.py +0 -0
  39. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/tests/test_arxiv_search.py +0 -0
  40. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/tests/test_bitflip.py +0 -0
  41. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/tests/test_document_qa.py +0 -0
  42. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/tests/test_extract_json.py +0 -0
  43. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/tests/test_hf_dataset_search.py +0 -0
  44. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/tests/test_s2_citations.py +0 -0
  45. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/tests/test_visit_webpage.py +0 -0
  46. {academia_mcp-1.4.2 → academia_mcp-1.5.0}/tests/test_web_search.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academia-mcp
3
- Version: 1.4.2
3
+ Version: 1.5.0
4
4
  Summary: MCP server that provides different tools to search for scientific publications
5
5
  Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
@@ -27,6 +27,8 @@ Requires-Dist: fire>=0.7.0
27
27
  Requires-Dist: openai>=1.97.1
28
28
  Requires-Dist: jinja2>=3.1.6
29
29
  Requires-Dist: datasets>=4.0.0
30
+ Requires-Dist: pymupdf>=1.26.4
31
+ Requires-Dist: pillow>=11.3.0
30
32
  Dynamic: license-file
31
33
 
32
34
  # Academia MCP
@@ -5,11 +5,11 @@
5
5
  % before loading agents4science_2025
6
6
 
7
7
  % ready for submission
8
- \usepackage{agents4science_2025}
8
+ % \usepackage{agents4science_2025}
9
9
 
10
10
  % to compile a preprint version, e.g., for submission to arXiv, add the
11
11
  % [preprint] option:
12
- % \usepackage[preprint]{agents4science_2025}
12
+ \usepackage[preprint]{agents4science_2025}
13
13
 
14
14
  % to compile a camera-ready version, add the [final] option, e.g.:
15
15
  % \usepackage[final]{agents4science_2025}
@@ -683,4 +683,4 @@ IMPORTANT, please:
683
683
  \end{enumerate}
684
684
 
685
685
 
686
- \end{document}
686
+ \end{document}
@@ -14,14 +14,11 @@ class ChatMessage(BaseModel): # type: ignore
14
14
  ChatMessages = List[ChatMessage]
15
15
 
16
16
 
17
- async def llm_acall(model_name: str, prompt: str) -> str:
17
+ async def llm_acall(model_name: str, messages: ChatMessages) -> str:
18
18
  key = os.getenv("OPENROUTER_API_KEY", "")
19
19
  assert key, "Please set OPENROUTER_API_KEY in the environment variables"
20
20
  base_url = os.getenv("BASE_URL", "https://openrouter.ai/api/v1")
21
21
 
22
- messages: ChatMessages = [
23
- ChatMessage(role="user", content=prompt),
24
- ]
25
22
  client = AsyncOpenAI(base_url=base_url, api_key=key)
26
23
  response: ChatCompletionMessage = (
27
24
  (
@@ -0,0 +1,44 @@
1
+ from pathlib import Path
2
+ from typing import List
3
+
4
+ from pypdf import PdfReader
5
+ from PIL import Image
6
+ import pymupdf # type: ignore
7
+
8
+ from academia_mcp.utils import get_with_retries
9
+
10
+
11
+ def download_pdf(url: str, output_path: Path) -> None:
12
+ response = get_with_retries(url)
13
+ response.raise_for_status()
14
+ content_type = response.headers.get("content-type")
15
+ assert content_type
16
+ assert "application/pdf" in content_type.lower()
17
+ with open(output_path.resolve(), "wb") as fp:
18
+ fp.write(response.content)
19
+
20
+
21
+ def parse_pdf_file(pdf_path: Path) -> List[str]:
22
+ # Why not Marker? Because it is too heavy.
23
+ reader = PdfReader(str(pdf_path.resolve()))
24
+
25
+ pages = []
26
+ for page_number, page in enumerate(reader.pages, start=1):
27
+ try:
28
+ text = page.extract_text()
29
+ if not text:
30
+ continue
31
+ prefix = f"## Page {page_number}\n\n"
32
+ pages.append(prefix + text)
33
+ except Exception:
34
+ continue
35
+ return pages
36
+
37
+
38
+ def parse_pdf_file_to_images(pdf_path: Path) -> List[Image.Image]:
39
+ doc = pymupdf.open(str(pdf_path.resolve()))
40
+ images = []
41
+ for page in doc:
42
+ pil_image: Image.Image = page.get_pixmap().pil_image()
43
+ images.append(pil_image)
44
+ return images
@@ -12,36 +12,9 @@ from dataclasses import dataclass, field
12
12
  import requests
13
13
  import bs4
14
14
  from markdownify import MarkdownConverter # type: ignore
15
- from pypdf import PdfReader
16
15
 
17
16
  from academia_mcp.utils import get_with_retries
18
-
19
-
20
- def download_pdf(url: str, output_path: Path) -> None:
21
- response = get_with_retries(url)
22
- response.raise_for_status()
23
- content_type = response.headers.get("content-type")
24
- assert content_type
25
- assert "application/pdf" in content_type.lower()
26
- with open(output_path.resolve(), "wb") as fp:
27
- fp.write(response.content)
28
-
29
-
30
- def parse_pdf_file(pdf_path: Path) -> List[str]:
31
- # Why not Marker? Because it is too heavy.
32
- reader = PdfReader(str(pdf_path.resolve()))
33
-
34
- pages = []
35
- for page_number, page in enumerate(reader.pages, start=1):
36
- try:
37
- text = page.extract_text()
38
- if not text:
39
- continue
40
- prefix = f"## Page {page_number}\n\n"
41
- pages.append(prefix + text)
42
- except Exception:
43
- continue
44
- return pages
17
+ from academia_mcp.pdf import parse_pdf_file, download_pdf
45
18
 
46
19
 
47
20
  HTML_URL = "https://arxiv.org/html/{paper_id}"
@@ -11,7 +11,7 @@ from datasets import load_dataset # type: ignore
11
11
 
12
12
  from academia_mcp.tools.arxiv_download import arxiv_download
13
13
  from academia_mcp.utils import extract_json, encode_prompt
14
- from academia_mcp.llm import llm_acall
14
+ from academia_mcp.llm import llm_acall, ChatMessage
15
15
 
16
16
 
17
17
  class ProposalDataset:
@@ -208,7 +208,9 @@ async def extract_bitflip_info(arxiv_id: str) -> str:
208
208
  paper = arxiv_download(arxiv_id)
209
209
  abstract = json.loads(paper)["abstract"]
210
210
  prompt = encode_prompt(EXTRACT_PROMPT, abstract=abstract)
211
- content = await llm_acall(model_name=model_name, prompt=prompt)
211
+ content = await llm_acall(
212
+ model_name=model_name, messages=[ChatMessage(role="user", content=prompt)]
213
+ )
212
214
  result = extract_json(content)
213
215
  bitflip_info: BitFlipInfo = BitFlipInfo.model_validate(result)
214
216
  return str(bitflip_info.model_dump_json())
@@ -240,7 +242,9 @@ async def generate_research_proposal(bit: str, additional_context: str = "") ->
240
242
  prompt = encode_prompt(
241
243
  IMPROVEMENT_PROMPT, bit=bit, examples=examples, additional_context=additional_context
242
244
  )
243
- content = await llm_acall(model_name=model_name, prompt=prompt)
245
+ content = await llm_acall(
246
+ model_name=model_name, messages=[ChatMessage(role="user", content=prompt)]
247
+ )
244
248
  result = extract_json(content)
245
249
  proposal: Proposal = Proposal.model_validate(result)
246
250
  proposal.proposal_id = random.randint(0, 1000000)
@@ -276,7 +280,9 @@ async def score_research_proposals(proposals: List[str]) -> str:
276
280
  model_name = os.getenv("BITFLIP_MODEL_NAME", "deepseek/deepseek-chat-v3-0324")
277
281
  proposals = [Proposal.model_validate_json(proposal) for proposal in proposals]
278
282
  prompt = encode_prompt(SCORE_PROMPT, proposals=proposals)
279
- content = await llm_acall(model_name=model_name, prompt=prompt)
283
+ content = await llm_acall(
284
+ model_name=model_name, messages=[ChatMessage(role="user", content=prompt)]
285
+ )
280
286
  scores = extract_json(content)
281
287
  final_scores = [ProposalScores.model_validate(score) for score in scores]
282
288
  return json.dumps([s.model_dump() for s in final_scores], ensure_ascii=False)
@@ -70,5 +70,7 @@ async def document_qa(
70
70
 
71
71
  model_name = os.getenv("DOCUMENT_QA_MODEL_NAME", "deepseek/deepseek-chat-v3-0324")
72
72
  prompt = PROMPT.format(question=question, document=document)
73
- content = await llm_acall(model_name=model_name, prompt=prompt)
73
+ content = await llm_acall(
74
+ model_name=model_name, messages=[ChatMessage(role="user", content=prompt)]
75
+ )
74
76
  return content.strip()
@@ -0,0 +1,51 @@
1
+ import base64
2
+ from io import BytesIO
3
+ from pathlib import Path
4
+
5
+ from academia_mcp.pdf import parse_pdf_file_to_images
6
+ from academia_mcp.llm import llm_acall, ChatMessage
7
+ from academia_mcp.files import get_workspace_dir
8
+
9
+
10
+ PROMPT = """
11
+ Find problems with the paper formatiing.
12
+ """
13
+
14
+
15
+ async def review_pdf(pdf_filename: str) -> str:
16
+ """
17
+ Review a pdf file.
18
+
19
+ Args:
20
+ pdf_path: The path to the pdf file.
21
+ """
22
+ pdf_filename_path = Path(pdf_filename)
23
+ if not pdf_filename_path.exists():
24
+ pdf_filename_path = Path(get_workspace_dir()) / pdf_filename
25
+
26
+ images = parse_pdf_file_to_images(pdf_filename_path)
27
+ content_parts = []
28
+ for image in images:
29
+ buffer_io = BytesIO()
30
+ image.save(buffer_io, format="PNG")
31
+ img_bytes = buffer_io.getvalue()
32
+ image_base64 = base64.b64encode(img_bytes).decode("utf-8")
33
+ image_content = {
34
+ "type": "image_url",
35
+ "image_url": {"url": f"data:image/png;base64,{image_base64}"},
36
+ }
37
+ content_parts.append(image_content)
38
+
39
+ content_parts.append(
40
+ {
41
+ "type": "text",
42
+ "text": "Please review the paper and provide a summary of its content.",
43
+ }
44
+ )
45
+ llm_response = await llm_acall(
46
+ model_name="gpt-4o",
47
+ messages=[
48
+ ChatMessage(role="user", content=content_parts),
49
+ ],
50
+ )
51
+ return llm_response.strip()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academia-mcp
3
- Version: 1.4.2
3
+ Version: 1.5.0
4
4
  Summary: MCP server that provides different tools to search for scientific publications
5
5
  Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
@@ -27,6 +27,8 @@ Requires-Dist: fire>=0.7.0
27
27
  Requires-Dist: openai>=1.97.1
28
28
  Requires-Dist: jinja2>=3.1.6
29
29
  Requires-Dist: datasets>=4.0.0
30
+ Requires-Dist: pymupdf>=1.26.4
31
+ Requires-Dist: pillow>=11.3.0
30
32
  Dynamic: license-file
31
33
 
32
34
  # Academia MCP
@@ -5,6 +5,7 @@ academia_mcp/__init__.py
5
5
  academia_mcp/__main__.py
6
6
  academia_mcp/files.py
7
7
  academia_mcp/llm.py
8
+ academia_mcp/pdf.py
8
9
  academia_mcp/py.typed
9
10
  academia_mcp/server.py
10
11
  academia_mcp/utils.py
@@ -25,6 +26,7 @@ academia_mcp/tools/document_qa.py
25
26
  academia_mcp/tools/hf_datasets_search.py
26
27
  academia_mcp/tools/latex.py
27
28
  academia_mcp/tools/py.typed
29
+ academia_mcp/tools/review.py
28
30
  academia_mcp/tools/s2_citations.py
29
31
  academia_mcp/tools/visit_webpage.py
30
32
  academia_mcp/tools/web_search.py
@@ -36,6 +38,7 @@ tests/test_document_qa.py
36
38
  tests/test_extract_json.py
37
39
  tests/test_hf_dataset_search.py
38
40
  tests/test_latex.py
41
+ tests/test_review.py
39
42
  tests/test_s2_citations.py
40
43
  tests/test_visit_webpage.py
41
44
  tests/test_web_search.py
@@ -15,3 +15,5 @@ fire>=0.7.0
15
15
  openai>=1.97.1
16
16
  jinja2>=3.1.6
17
17
  datasets>=4.0.0
18
+ pymupdf>=1.26.4
19
+ pillow>=11.3.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "academia-mcp"
7
- version = "1.4.2"
7
+ version = "1.5.0"
8
8
  description = "MCP server that provides different tools to search for scientific publications"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -34,6 +34,8 @@ dependencies = [
34
34
  "openai>=1.97.1",
35
35
  "jinja2>=3.1.6",
36
36
  "datasets>=4.0.0",
37
+ "pymupdf>=1.26.4",
38
+ "pillow>=11.3.0",
37
39
  ]
38
40
 
39
41
  [dependency-groups]
@@ -71,3 +73,9 @@ follow_imports = "skip"
71
73
  [tool.pytest.ini_options]
72
74
  asyncio_mode = "auto"
73
75
  asyncio_default_test_loop_scope = "function"
76
+ filterwarnings = [
77
+ "ignore:builtin type SwigPyPacked has no __module__ attribute:DeprecationWarning",
78
+ "ignore:builtin type SwigPyObject has no __module__ attribute:DeprecationWarning",
79
+ "ignore:builtin type swigvarlink has no __module__ attribute:DeprecationWarning",
80
+ "ignore:The 'warn' method is deprecated, use 'warning' instead:DeprecationWarning:acl_anthology.text.texmath",
81
+ ]
@@ -36,6 +36,7 @@ def test_latex_compile_latex_from_file() -> None:
36
36
  temp_dir_path = Path(temp_dir)
37
37
  tex_filename = "temp.tex"
38
38
  tex_file_path = temp_dir_path / tex_filename
39
+ pdf_filename = "test.pdf"
39
40
  tex_file_path.write_text(template["template"], encoding="utf-8")
40
- result = compile_latex_from_file(str(tex_file_path), "test.pdf")
41
+ result = compile_latex_from_file(str(tex_file_path), pdf_filename)
41
42
  assert "Compilation successful" in result
@@ -0,0 +1,20 @@
1
+ import json
2
+ import tempfile
3
+ from pathlib import Path
4
+
5
+ from academia_mcp.tools.review import review_pdf
6
+ from academia_mcp.tools.latex import compile_latex_from_file, get_latex_template
7
+
8
+
9
+ async def test_review_pdf() -> None:
10
+ template = json.loads(get_latex_template("agents4science_2025"))
11
+ with tempfile.TemporaryDirectory() as temp_dir:
12
+ temp_dir_path = Path(temp_dir)
13
+ tex_filename = "temp.tex"
14
+ tex_file_path = temp_dir_path / tex_filename
15
+ pdf_filename = "test.pdf"
16
+ tex_file_path.write_text(template["template"], encoding="utf-8")
17
+ result = compile_latex_from_file(str(tex_file_path), pdf_filename)
18
+ assert "Compilation successful" in result
19
+ review = await review_pdf(str(pdf_filename))
20
+ print(review)
File without changes
File without changes
File without changes