academia-mcp 1.8.1__tar.gz → 1.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/PKG-INFO +2 -1
  2. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/files.py +3 -2
  3. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/llm.py +4 -3
  4. academia_mcp-1.9.0/academia_mcp/server.py +126 -0
  5. academia_mcp-1.9.0/academia_mcp/settings.py +33 -0
  6. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/tools/__init__.py +2 -1
  7. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/tools/bitflip.py +5 -5
  8. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/tools/document_qa.py +4 -6
  9. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/tools/review.py +32 -9
  10. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/tools/visit_webpage.py +4 -4
  11. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/tools/web_search.py +7 -7
  12. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp.egg-info/PKG-INFO +2 -1
  13. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp.egg-info/SOURCES.txt +1 -0
  14. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp.egg-info/requires.txt +1 -0
  15. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/pyproject.toml +3 -1
  16. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/tests/test_review.py +7 -1
  17. academia_mcp-1.8.1/academia_mcp/server.py +0 -101
  18. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/LICENSE +0 -0
  19. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/README.md +0 -0
  20. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/__init__.py +0 -0
  21. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/__main__.py +0 -0
  22. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty +0 -0
  23. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex +0 -0
  24. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/pdf.py +0 -0
  25. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/py.typed +0 -0
  26. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/tools/anthology_search.py +0 -0
  27. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/tools/arxiv_download.py +0 -0
  28. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/tools/arxiv_search.py +0 -0
  29. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/tools/hf_datasets_search.py +0 -0
  30. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/tools/latex.py +0 -0
  31. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/tools/py.typed +0 -0
  32. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/tools/s2_citations.py +0 -0
  33. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp/utils.py +0 -0
  34. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp.egg-info/dependency_links.txt +0 -0
  35. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp.egg-info/entry_points.txt +0 -0
  36. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/academia_mcp.egg-info/top_level.txt +0 -0
  37. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/setup.cfg +0 -0
  38. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/tests/test_anthology_search.py +0 -0
  39. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/tests/test_arxiv_download.py +0 -0
  40. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/tests/test_arxiv_search.py +0 -0
  41. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/tests/test_bitflip.py +0 -0
  42. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/tests/test_document_qa.py +0 -0
  43. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/tests/test_extract_json.py +0 -0
  44. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/tests/test_hf_dataset_search.py +0 -0
  45. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/tests/test_latex.py +0 -0
  46. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/tests/test_s2_citations.py +0 -0
  47. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/tests/test_visit_webpage.py +0 -0
  48. {academia_mcp-1.8.1 → academia_mcp-1.9.0}/tests/test_web_search.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academia-mcp
3
- Version: 1.8.1
3
+ Version: 1.9.0
4
4
  Summary: MCP server that provides different tools to search for scientific publications
5
5
  Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
@@ -29,6 +29,7 @@ Requires-Dist: jinja2>=3.1.6
29
29
  Requires-Dist: datasets>=4.0.0
30
30
  Requires-Dist: pymupdf>=1.26.4
31
31
  Requires-Dist: pillow>=11.3.0
32
+ Requires-Dist: pydantic-settings>=2.6.0
32
33
  Dynamic: license-file
33
34
 
34
35
  # Academia MCP
@@ -1,7 +1,8 @@
1
- import os
2
1
  from typing import Optional
3
2
  from pathlib import Path
4
3
 
4
+ from academia_mcp.settings import settings
5
+
5
6
  DIR_PATH = Path(__file__).parent
6
7
  ROOT_PATH = DIR_PATH.parent
7
8
  DEFAULT_WORKSPACE_DIR_PATH: Path = DIR_PATH / "workdir"
@@ -14,7 +15,7 @@ class WorkspaceDirectory:
14
15
  @classmethod
15
16
  def get_dir(cls) -> Path:
16
17
  if cls.workspace_dir is None:
17
- return Path(os.getenv("WORKSPACE_DIR", DEFAULT_WORKSPACE_DIR_PATH))
18
+ return Path(settings.WORKSPACE_DIR)
18
19
  return cls.workspace_dir
19
20
 
20
21
  @classmethod
@@ -1,10 +1,11 @@
1
- import os
2
1
  from typing import List, Dict, Any
3
2
 
4
3
  from pydantic import BaseModel
5
4
  from openai import AsyncOpenAI
6
5
  from openai.types.chat.chat_completion_message import ChatCompletionMessage
7
6
 
7
+ from academia_mcp.settings import settings
8
+
8
9
 
9
10
  class ChatMessage(BaseModel): # type: ignore
10
11
  role: str
@@ -15,9 +16,9 @@ ChatMessages = List[ChatMessage]
15
16
 
16
17
 
17
18
  async def llm_acall(model_name: str, messages: ChatMessages, **kwargs: Any) -> str:
18
- key = os.getenv("OPENROUTER_API_KEY", "")
19
+ key = settings.OPENROUTER_API_KEY
19
20
  assert key, "Please set OPENROUTER_API_KEY in the environment variables"
20
- base_url = os.getenv("BASE_URL", "https://openrouter.ai/api/v1")
21
+ base_url = settings.BASE_URL
21
22
 
22
23
  client = AsyncOpenAI(base_url=base_url, api_key=key)
23
24
  response: ChatCompletionMessage = (
@@ -0,0 +1,126 @@
1
+ import socket
2
+ import logging
3
+ from logging.config import dictConfig
4
+ from typing import Optional, Literal
5
+
6
+ import fire # type: ignore
7
+ from mcp.server.fastmcp import FastMCP
8
+ from uvicorn.config import LOGGING_CONFIG as UVICORN_LOGGING_CONFIG
9
+
10
+ from academia_mcp.settings import settings
11
+ from academia_mcp.tools.arxiv_search import arxiv_search
12
+ from academia_mcp.tools.arxiv_download import arxiv_download
13
+ from academia_mcp.tools.s2_citations import s2_get_citations, s2_get_references
14
+ from academia_mcp.tools.hf_datasets_search import hf_datasets_search
15
+ from academia_mcp.tools.anthology_search import anthology_search
16
+ from academia_mcp.tools.document_qa import document_qa
17
+ from academia_mcp.tools.latex import (
18
+ compile_latex,
19
+ get_latex_template,
20
+ get_latex_templates_list,
21
+ read_pdf,
22
+ )
23
+ from academia_mcp.tools.web_search import (
24
+ web_search,
25
+ tavily_web_search,
26
+ exa_web_search,
27
+ brave_web_search,
28
+ )
29
+ from academia_mcp.tools.visit_webpage import visit_webpage
30
+ from academia_mcp.tools.bitflip import (
31
+ extract_bitflip_info,
32
+ generate_research_proposals,
33
+ score_research_proposals,
34
+ )
35
+ from academia_mcp.tools.review import review_pdf_paper, download_pdf_paper
36
+
37
+
38
+ def configure_uvicorn_style_logging(level: int = logging.INFO) -> None:
39
+ config = {**UVICORN_LOGGING_CONFIG}
40
+ config["disable_existing_loggers"] = False
41
+ config["root"] = {"handlers": ["default"], "level": logging.getLevelName(level)}
42
+ dictConfig(config)
43
+
44
+
45
+ def find_free_port() -> int:
46
+ for port in range(5000, 6001):
47
+ try:
48
+ with socket.socket() as s:
49
+ s.bind(("", port))
50
+ return port
51
+ except Exception:
52
+ continue
53
+ raise RuntimeError("No free port in range 5000-6000 found")
54
+
55
+
56
+ def run(
57
+ host: str = "0.0.0.0",
58
+ port: Optional[int] = None,
59
+ mount_path: str = "/",
60
+ streamable_http_path: str = "/mcp",
61
+ transport: Literal["stdio", "sse", "streamable-http"] = "streamable-http",
62
+ disable_web_search_tools: bool = False,
63
+ disable_llm_tools: bool = False,
64
+ ) -> None:
65
+ configure_uvicorn_style_logging()
66
+ server = FastMCP(
67
+ "Academia MCP",
68
+ stateless_http=True,
69
+ streamable_http_path=streamable_http_path,
70
+ mount_path=mount_path,
71
+ )
72
+ logger = logging.getLogger(__name__)
73
+
74
+ server.add_tool(arxiv_search)
75
+ server.add_tool(arxiv_download)
76
+ server.add_tool(s2_get_citations)
77
+ server.add_tool(s2_get_references)
78
+ server.add_tool(hf_datasets_search)
79
+ server.add_tool(anthology_search)
80
+ server.add_tool(get_latex_template)
81
+ server.add_tool(get_latex_templates_list)
82
+ server.add_tool(visit_webpage)
83
+
84
+ if settings.WORKSPACE_DIR:
85
+ server.add_tool(compile_latex)
86
+ server.add_tool(download_pdf_paper)
87
+ server.add_tool(read_pdf)
88
+ else:
89
+ logger.warning(
90
+ "WORKSPACE_DIR is not set, compile_latex/download_pdf_paper/read_pdf will not be available!"
91
+ )
92
+
93
+ if not disable_web_search_tools:
94
+ if settings.TAVILY_API_KEY:
95
+ server.add_tool(tavily_web_search)
96
+ if settings.EXA_API_KEY:
97
+ server.add_tool(exa_web_search)
98
+ if settings.BRAVE_API_KEY:
99
+ server.add_tool(brave_web_search)
100
+ if settings.EXA_API_KEY or settings.BRAVE_API_KEY or settings.TAVILY_API_KEY:
101
+ server.add_tool(web_search)
102
+ else:
103
+ logger.warning("No web search tools keys are set, web_search will not be available!")
104
+
105
+ if not disable_llm_tools and settings.OPENROUTER_API_KEY:
106
+ server.add_tool(extract_bitflip_info)
107
+ server.add_tool(generate_research_proposals)
108
+ server.add_tool(score_research_proposals)
109
+ server.add_tool(document_qa)
110
+ if settings.WORKSPACE_DIR:
111
+ server.add_tool(review_pdf_paper)
112
+ else:
113
+ logger.warning("No OpenRouter API key is set, LLM-related tools will not be available!")
114
+
115
+ if port is None:
116
+ if settings.PORT is not None:
117
+ port = int(settings.PORT)
118
+ else:
119
+ port = find_free_port()
120
+ server.settings.port = port
121
+ server.settings.host = host
122
+ server.run(transport=transport)
123
+
124
+
125
+ if __name__ == "__main__":
126
+ fire.Fire(run)
@@ -0,0 +1,33 @@
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
+ from pydantic_settings import BaseSettings, SettingsConfigDict
5
+
6
+
7
+ class Settings(BaseSettings):
8
+ BASE_URL: str = "https://openrouter.ai/api/v1"
9
+
10
+ OPENROUTER_API_KEY: str = ""
11
+ TAVILY_API_KEY: Optional[str] = None
12
+ EXA_API_KEY: Optional[str] = None
13
+ BRAVE_API_KEY: Optional[str] = None
14
+
15
+ REVIEW_MODEL_NAME: str = "gpt-5"
16
+ BITFLIP_MODEL_NAME: str = "deepseek/deepseek-chat-v3-0324"
17
+ BITFLIP_MAX_COMPLETION_TOKENS: int = 16384
18
+ DOCUMENT_QA_MODEL_NAME: str = "deepseek/deepseek-chat-v3-0324"
19
+ DOCUMENT_QA_QUESTION_MAX_LENGTH: int = 10000
20
+ DOCUMENT_QA_DOCUMENT_MAX_LENGTH: int = 200000
21
+
22
+ PORT: int = 5056
23
+ WORKSPACE_DIR: Optional[Path] = None
24
+
25
+ model_config = SettingsConfigDict(
26
+ env_file=".env",
27
+ env_file_encoding="utf-8",
28
+ env_prefix="",
29
+ extra="ignore",
30
+ )
31
+
32
+
33
+ settings = Settings()
@@ -13,7 +13,7 @@ from .latex import (
13
13
  from .web_search import web_search, tavily_web_search, exa_web_search, brave_web_search
14
14
  from .visit_webpage import visit_webpage
15
15
  from .bitflip import extract_bitflip_info, generate_research_proposals, score_research_proposals
16
- from .review import review_pdf_paper, download_pdf_paper
16
+ from .review import review_pdf_paper, download_pdf_paper, review_pdf_paper_by_url
17
17
 
18
18
  __all__ = [
19
19
  "arxiv_search",
@@ -35,6 +35,7 @@ __all__ = [
35
35
  "generate_research_proposals",
36
36
  "score_research_proposals",
37
37
  "review_pdf_paper",
38
+ "review_pdf_paper_by_url",
38
39
  "download_pdf_paper",
39
40
  "read_pdf",
40
41
  ]
@@ -2,7 +2,6 @@
2
2
  # https://web.stanford.edu/class/cs197c/slides/02-literature-search.pdf
3
3
 
4
4
  import json
5
- import os
6
5
  import random
7
6
  from typing import List, Optional, Any, Dict
8
7
 
@@ -12,6 +11,7 @@ from datasets import load_dataset # type: ignore
12
11
  from academia_mcp.tools.arxiv_download import arxiv_download
13
12
  from academia_mcp.utils import extract_json, encode_prompt
14
13
  from academia_mcp.llm import llm_acall, ChatMessage
14
+ from academia_mcp.settings import settings
15
15
 
16
16
 
17
17
  class ProposalDataset:
@@ -201,7 +201,7 @@ async def extract_bitflip_info(arxiv_id: str) -> str:
201
201
  Args:
202
202
  arxiv_id: The arXiv ID of the paper to extract the Bit-Flip information from.
203
203
  """
204
- model_name = os.getenv("BITFLIP_MODEL_NAME", "deepseek/deepseek-chat-v3-0324")
204
+ model_name = settings.BITFLIP_MODEL_NAME
205
205
  paper = arxiv_download(arxiv_id)
206
206
  abstract = json.loads(paper)["abstract"]
207
207
  prompt = encode_prompt(EXTRACT_PROMPT, abstract=abstract)
@@ -240,8 +240,8 @@ async def generate_research_proposals(
240
240
  ]
241
241
  Use `json.loads` to deserialize the result if you want to get specific items.
242
242
  """
243
- model_name = os.getenv("BITFLIP_MODEL_NAME", "deepseek/deepseek-chat-v3-0324")
244
- max_completion_tokens = int(os.getenv("BITFLIP_MAX_COMPLETION_TOKENS", 16384))
243
+ model_name = settings.BITFLIP_MODEL_NAME
244
+ max_completion_tokens = int(settings.BITFLIP_MAX_COMPLETION_TOKENS)
245
245
  examples = ProposalDataset.get_dataset()[:]
246
246
  examples = random.choices(examples, k=2)
247
247
 
@@ -293,7 +293,7 @@ async def score_research_proposals(proposals: str | List[str | Dict[str, Any] |
293
293
  Args:
294
294
  proposals: A list of JSON strings with research proposals.
295
295
  """
296
- model_name = os.getenv("BITFLIP_MODEL_NAME", "deepseek/deepseek-chat-v3-0324")
296
+ model_name = settings.BITFLIP_MODEL_NAME
297
297
  if isinstance(proposals, str):
298
298
  proposals = json.loads(proposals)
299
299
  assert isinstance(proposals, list), "Proposals should be a list of JSON strings"
@@ -1,14 +1,12 @@
1
- import os
2
1
  import json
3
2
  from typing import List, Any, Dict
4
- from dotenv import load_dotenv
5
3
 
6
4
  from pydantic import BaseModel
7
5
 
8
6
  from academia_mcp.llm import llm_acall
9
7
  from academia_mcp.utils import truncate_content
8
+ from academia_mcp.settings import settings
10
9
 
11
- load_dotenv()
12
10
 
13
11
  PROMPT = """You are a helpful assistant that answers questions about documents accurately and concisely.
14
12
  Please answer the following questions based solely on the provided document.
@@ -65,10 +63,10 @@ async def document_qa(
65
63
  document = json.dumps(document)
66
64
  assert document and document.strip(), "Please provide non-empty 'document'"
67
65
 
68
- question = truncate_content(question, 10000)
69
- document = truncate_content(document, 200000)
66
+ question = truncate_content(question, settings.DOCUMENT_QA_QUESTION_MAX_LENGTH)
67
+ document = truncate_content(document, settings.DOCUMENT_QA_DOCUMENT_MAX_LENGTH)
70
68
 
71
- model_name = os.getenv("DOCUMENT_QA_MODEL_NAME", "deepseek/deepseek-chat-v3-0324")
69
+ model_name = settings.DOCUMENT_QA_MODEL_NAME
72
70
  prompt = PROMPT.format(question=question, document=document)
73
71
  content = await llm_acall(
74
72
  model_name=model_name, messages=[ChatMessage(role="user", content=prompt)]
@@ -1,6 +1,6 @@
1
1
  import base64
2
- import os
3
2
  import uuid
3
+ import tempfile
4
4
  from io import BytesIO
5
5
  from pathlib import Path
6
6
  from typing import List, Dict, Any
@@ -8,6 +8,7 @@ from typing import List, Dict, Any
8
8
  from academia_mcp.pdf import parse_pdf_file_to_images, parse_pdf_file, download_pdf
9
9
  from academia_mcp.llm import llm_acall, ChatMessage
10
10
  from academia_mcp.files import get_workspace_dir
11
+ from academia_mcp.settings import settings
11
12
 
12
13
 
13
14
  PROMPT = """
@@ -138,6 +139,16 @@ Always produce a correct JSON object.
138
139
  """
139
140
 
140
141
 
142
+ def _create_pdf_filename(pdf_url: str) -> str:
143
+ if "arxiv.org/pdf" in pdf_url:
144
+ pdf_filename = pdf_url.split("/")[-1]
145
+ else:
146
+ pdf_filename = str(uuid.uuid4())
147
+ if not pdf_filename.endswith(".pdf"):
148
+ pdf_filename += ".pdf"
149
+ return pdf_filename
150
+
151
+
141
152
  def download_pdf_paper(pdf_url: str) -> str:
142
153
  """
143
154
  Download a pdf file from a url to the workspace directory.
@@ -147,13 +158,7 @@ def download_pdf_paper(pdf_url: str) -> str:
147
158
  Args:
148
159
  pdf_url: The url of the pdf file.
149
160
  """
150
- if "arxiv.org/pdf" in pdf_url:
151
- pdf_filename = pdf_url.split("/")[-1]
152
- else:
153
- pdf_filename = str(uuid.uuid4())
154
- if not pdf_filename.endswith(".pdf"):
155
- pdf_filename += ".pdf"
156
-
161
+ pdf_filename = _create_pdf_filename(pdf_url)
157
162
  pdf_path = Path(get_workspace_dir()) / pdf_filename
158
163
  download_pdf(pdf_url, pdf_path)
159
164
  return pdf_filename
@@ -198,7 +203,7 @@ async def review_pdf_paper(pdf_filename: str) -> str:
198
203
  "text": "####\n\nInstructions:\n\n" + PROMPT,
199
204
  }
200
205
  )
201
- model_name = os.getenv("REVIEW_MODEL_NAME", "gpt-5")
206
+ model_name = settings.REVIEW_MODEL_NAME
202
207
  llm_response = await llm_acall(
203
208
  model_name=model_name,
204
209
  messages=[
@@ -206,3 +211,21 @@ async def review_pdf_paper(pdf_filename: str) -> str:
206
211
  ],
207
212
  )
208
213
  return llm_response.strip()
214
+
215
+
216
+ async def review_pdf_paper_by_url(pdf_url: str) -> str:
217
+ """
218
+ Review a pdf file with a paper by url.
219
+ It downloads the pdf file and then reviews it.
220
+ It parses the pdf file into images and then sends the images to the LLM for review.
221
+ It can detect different issues with the paper formatting.
222
+ Returns a proper NeurIPS-style review.
223
+
224
+ Args:
225
+ pdf_url: The url of the pdf file.
226
+ """
227
+ pdf_filename = _create_pdf_filename(pdf_url)
228
+ with tempfile.TemporaryDirectory(prefix="temp_pdf_") as temp_dir:
229
+ pdf_path = Path(temp_dir) / pdf_filename
230
+ download_pdf(pdf_url, pdf_path)
231
+ return await review_pdf_paper(str(pdf_path))
@@ -1,18 +1,18 @@
1
1
  import re
2
- import os
3
2
  import json
4
3
  from typing import Optional
5
4
 
6
5
  from markdownify import markdownify # type: ignore
7
6
 
8
7
  from academia_mcp.utils import get_with_retries, post_with_retries
8
+ from academia_mcp.settings import settings
9
9
 
10
10
  EXA_CONTENTS_URL = "https://api.exa.ai/contents"
11
11
  AVAILABLE_PROVIDERS = ("basic", "exa")
12
12
 
13
13
 
14
14
  def _exa_visit_webpage(url: str) -> str:
15
- key = os.getenv("EXA_API_KEY", "")
15
+ key = settings.EXA_API_KEY or ""
16
16
  assert key, "Error: EXA_API_KEY is not set and no api_key was provided"
17
17
  payload = {
18
18
  "urls": [url],
@@ -38,14 +38,14 @@ def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
38
38
  provider in AVAILABLE_PROVIDERS
39
39
  ), f"Invalid provider: {provider}. Available providers: {AVAILABLE_PROVIDERS}"
40
40
 
41
- if provider == "exa":
41
+ if provider == "exa" and settings.EXA_API_KEY:
42
42
  return _exa_visit_webpage(url)
43
43
 
44
44
  assert provider == "basic"
45
45
  response = get_with_retries(url)
46
46
  content_type = response.headers.get("content-type", "").lower()
47
47
  if not content_type or (not content_type.startswith("text/") and "html" not in content_type):
48
- if os.getenv("EXA_API_KEY"):
48
+ if settings.EXA_API_KEY:
49
49
  return _exa_visit_webpage(url)
50
50
  return json.dumps(
51
51
  {"id": url, "error": f"Unsupported content-type: {content_type or 'unknown'}"}
@@ -1,8 +1,8 @@
1
- import os
2
1
  import json
3
2
  from typing import Optional
4
3
 
5
4
  from academia_mcp.utils import post_with_retries, get_with_retries
5
+ from academia_mcp.settings import settings
6
6
 
7
7
 
8
8
  EXA_SEARCH_URL = "https://api.exa.ai/search"
@@ -32,9 +32,9 @@ def web_search(
32
32
  providers = ("tavily", "brave", "exa")
33
33
  assert provider in providers, "Error: provider must be either 'exa', 'tavily' or 'brave'"
34
34
 
35
- is_tavily_available = os.getenv("TAVILY_API_KEY") is not None
36
- is_exa_available = os.getenv("EXA_API_KEY") is not None
37
- is_brave_available = os.getenv("BRAVE_API_KEY") is not None
35
+ is_tavily_available = bool(settings.TAVILY_API_KEY)
36
+ is_exa_available = bool(settings.EXA_API_KEY)
37
+ is_brave_available = bool(settings.BRAVE_API_KEY)
38
38
  assert is_tavily_available or is_exa_available or is_brave_available
39
39
  availability = {
40
40
  "tavily": is_tavily_available,
@@ -76,7 +76,7 @@ def tavily_web_search(query: str, limit: Optional[int] = 20) -> str:
76
76
  assert isinstance(limit, int), "Error: limit should be an integer"
77
77
  assert 0 < limit <= 25, "Error: limit should be between 1 and 25"
78
78
 
79
- key = os.getenv("TAVILY_API_KEY", "")
79
+ key = settings.TAVILY_API_KEY or ""
80
80
  assert key, "Error: TAVILY_API_KEY is not set and no api_key was provided"
81
81
  payload = {
82
82
  "query": query,
@@ -112,7 +112,7 @@ def exa_web_search(query: str, limit: Optional[int] = 20) -> str:
112
112
  assert isinstance(limit, int), "Error: limit should be an integer"
113
113
  assert 0 < limit <= 25, "Error: limit should be between 1 and 25"
114
114
 
115
- key = os.getenv("EXA_API_KEY", "")
115
+ key = settings.EXA_API_KEY or ""
116
116
  assert key, "Error: EXA_API_KEY is not set and no api_key was provided"
117
117
  payload = {
118
118
  "query": query,
@@ -151,7 +151,7 @@ def brave_web_search(query: str, limit: Optional[int] = 20) -> str:
151
151
  assert isinstance(limit, int), "Error: limit should be an integer"
152
152
  assert 0 < limit <= 20, "Error: limit should be between 1 and 20"
153
153
 
154
- key = os.getenv("BRAVE_API_KEY", "")
154
+ key = settings.BRAVE_API_KEY or ""
155
155
  assert key, "Error: BRAVE_API_KEY is not set and no api_key was provided"
156
156
  payload = {
157
157
  "q": query,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academia-mcp
3
- Version: 1.8.1
3
+ Version: 1.9.0
4
4
  Summary: MCP server that provides different tools to search for scientific publications
5
5
  Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
@@ -29,6 +29,7 @@ Requires-Dist: jinja2>=3.1.6
29
29
  Requires-Dist: datasets>=4.0.0
30
30
  Requires-Dist: pymupdf>=1.26.4
31
31
  Requires-Dist: pillow>=11.3.0
32
+ Requires-Dist: pydantic-settings>=2.6.0
32
33
  Dynamic: license-file
33
34
 
34
35
  # Academia MCP
@@ -8,6 +8,7 @@ academia_mcp/llm.py
8
8
  academia_mcp/pdf.py
9
9
  academia_mcp/py.typed
10
10
  academia_mcp/server.py
11
+ academia_mcp/settings.py
11
12
  academia_mcp/utils.py
12
13
  academia_mcp.egg-info/PKG-INFO
13
14
  academia_mcp.egg-info/SOURCES.txt
@@ -17,3 +17,4 @@ jinja2>=3.1.6
17
17
  datasets>=4.0.0
18
18
  pymupdf>=1.26.4
19
19
  pillow>=11.3.0
20
+ pydantic-settings>=2.6.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "academia-mcp"
7
- version = "1.8.1"
7
+ version = "1.9.0"
8
8
  description = "MCP server that provides different tools to search for scientific publications"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -36,6 +36,7 @@ dependencies = [
36
36
  "datasets>=4.0.0",
37
37
  "pymupdf>=1.26.4",
38
38
  "pillow>=11.3.0",
39
+ "pydantic-settings>=2.6.0",
39
40
  ]
40
41
 
41
42
  [dependency-groups]
@@ -78,4 +79,5 @@ filterwarnings = [
78
79
  "ignore:builtin type SwigPyObject has no __module__ attribute:DeprecationWarning",
79
80
  "ignore:builtin type swigvarlink has no __module__ attribute:DeprecationWarning",
80
81
  "ignore:The 'warn' method is deprecated, use 'warning' instead:DeprecationWarning:acl_anthology.text.texmath",
82
+ "ignore:Data directory contains a different schema.rnc as this library; you might need to update the data or the acl-anthology library.",
81
83
  ]
@@ -1,7 +1,13 @@
1
- from academia_mcp.tools.review import review_pdf_paper, download_pdf_paper
1
+ from academia_mcp.tools.review import review_pdf_paper, download_pdf_paper, review_pdf_paper_by_url
2
2
 
3
3
 
4
4
  async def test_review_pdf_paper() -> None:
5
5
  download_pdf_paper("https://arxiv.org/pdf/2502.01220")
6
6
  review = await review_pdf_paper("2502.01220.pdf")
7
7
  assert review
8
+
9
+
10
+ async def test_review_pdf_paper_by_url() -> None:
11
+ review = await review_pdf_paper_by_url("https://arxiv.org/pdf/2502.01220")
12
+ assert review
13
+ assert "format_issues" in str(review)
@@ -1,101 +0,0 @@
1
- import os
2
- import socket
3
- from typing import Optional, Literal
4
-
5
- import fire # type: ignore
6
- from mcp.server.fastmcp import FastMCP
7
- from dotenv import load_dotenv
8
-
9
- from .tools.arxiv_search import arxiv_search
10
- from .tools.arxiv_download import arxiv_download
11
- from .tools.s2_citations import s2_get_citations, s2_get_references
12
- from .tools.hf_datasets_search import hf_datasets_search
13
- from .tools.anthology_search import anthology_search
14
- from .tools.document_qa import document_qa
15
- from .tools.latex import (
16
- compile_latex,
17
- get_latex_template,
18
- get_latex_templates_list,
19
- read_pdf,
20
- )
21
- from .tools.web_search import web_search, tavily_web_search, exa_web_search, brave_web_search
22
- from .tools.visit_webpage import visit_webpage
23
- from .tools.bitflip import (
24
- extract_bitflip_info,
25
- generate_research_proposals,
26
- score_research_proposals,
27
- )
28
- from .tools.review import review_pdf_paper, download_pdf_paper
29
-
30
-
31
- load_dotenv()
32
-
33
-
34
- def find_free_port() -> int:
35
- for port in range(5000, 6001):
36
- try:
37
- with socket.socket() as s:
38
- s.bind(("", port))
39
- return port
40
- except Exception:
41
- continue
42
- raise RuntimeError("No free port in range 5000-6000 found")
43
-
44
-
45
- def run(
46
- host: str = "0.0.0.0",
47
- port: Optional[int] = None,
48
- mount_path: str = "/",
49
- streamable_http_path: str = "/mcp",
50
- transport: Literal["stdio", "sse", "streamable-http"] = "streamable-http",
51
- disable_web_search_tools: bool = False,
52
- disable_llm_tools: bool = False,
53
- ) -> None:
54
- server = FastMCP(
55
- "Academia MCP",
56
- stateless_http=True,
57
- streamable_http_path=streamable_http_path,
58
- mount_path=mount_path,
59
- )
60
-
61
- server.add_tool(arxiv_search)
62
- server.add_tool(arxiv_download)
63
- server.add_tool(s2_get_citations)
64
- server.add_tool(s2_get_references)
65
- server.add_tool(hf_datasets_search)
66
- server.add_tool(anthology_search)
67
- server.add_tool(compile_latex)
68
- server.add_tool(get_latex_template)
69
- server.add_tool(get_latex_templates_list)
70
- server.add_tool(visit_webpage)
71
- server.add_tool(download_pdf_paper)
72
- server.add_tool(read_pdf)
73
-
74
- if not disable_web_search_tools:
75
- if os.getenv("TAVILY_API_KEY"):
76
- server.add_tool(tavily_web_search)
77
- if os.getenv("EXA_API_KEY"):
78
- server.add_tool(exa_web_search)
79
- if os.getenv("BRAVE_API_KEY"):
80
- server.add_tool(brave_web_search)
81
- if os.getenv("EXA_API_KEY") or os.getenv("BRAVE_API_KEY") or os.getenv("TAVILY_API_KEY"):
82
- server.add_tool(web_search)
83
-
84
- if not disable_llm_tools and os.getenv("OPENROUTER_API_KEY"):
85
- server.add_tool(extract_bitflip_info)
86
- server.add_tool(generate_research_proposals)
87
- server.add_tool(score_research_proposals)
88
- server.add_tool(document_qa)
89
- server.add_tool(review_pdf_paper)
90
-
91
- if port is None:
92
- port = int(os.environ.get("PORT", -1))
93
- if port == -1:
94
- port = find_free_port()
95
- server.settings.port = port
96
- server.settings.host = host
97
- server.run(transport=transport)
98
-
99
-
100
- if __name__ == "__main__":
101
- fire.Fire(run)
File without changes
File without changes
File without changes