academia-mcp 1.8.1__tar.gz → 1.9.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. academia_mcp-1.9.1/PKG-INFO +170 -0
  2. academia_mcp-1.9.1/README.md +136 -0
  3. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/files.py +3 -2
  4. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/llm.py +4 -3
  5. academia_mcp-1.9.1/academia_mcp/server.py +133 -0
  6. academia_mcp-1.9.1/academia_mcp/settings.py +33 -0
  7. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/__init__.py +5 -2
  8. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/bitflip.py +5 -5
  9. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/document_qa.py +4 -6
  10. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/review.py +32 -9
  11. academia_mcp-1.8.1/academia_mcp/tools/s2_citations.py → academia_mcp-1.9.1/academia_mcp/tools/s2.py +35 -0
  12. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/visit_webpage.py +4 -4
  13. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/web_search.py +7 -7
  14. academia_mcp-1.9.1/academia_mcp.egg-info/PKG-INFO +170 -0
  15. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp.egg-info/SOURCES.txt +3 -2
  16. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp.egg-info/requires.txt +1 -0
  17. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/pyproject.toml +3 -1
  18. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_review.py +7 -1
  19. academia_mcp-1.8.1/tests/test_s2_citations.py → academia_mcp-1.9.1/tests/test_s2.py +21 -1
  20. academia_mcp-1.8.1/PKG-INFO +0 -121
  21. academia_mcp-1.8.1/README.md +0 -88
  22. academia_mcp-1.8.1/academia_mcp/server.py +0 -101
  23. academia_mcp-1.8.1/academia_mcp.egg-info/PKG-INFO +0 -121
  24. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/LICENSE +0 -0
  25. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/__init__.py +0 -0
  26. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/__main__.py +0 -0
  27. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty +0 -0
  28. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex +0 -0
  29. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/pdf.py +0 -0
  30. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/py.typed +0 -0
  31. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/anthology_search.py +0 -0
  32. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/arxiv_download.py +0 -0
  33. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/arxiv_search.py +0 -0
  34. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/hf_datasets_search.py +0 -0
  35. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/latex.py +0 -0
  36. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/py.typed +0 -0
  37. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/utils.py +0 -0
  38. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp.egg-info/dependency_links.txt +0 -0
  39. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp.egg-info/entry_points.txt +0 -0
  40. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp.egg-info/top_level.txt +0 -0
  41. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/setup.cfg +0 -0
  42. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_anthology_search.py +0 -0
  43. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_arxiv_download.py +0 -0
  44. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_arxiv_search.py +0 -0
  45. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_bitflip.py +0 -0
  46. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_document_qa.py +0 -0
  47. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_extract_json.py +0 -0
  48. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_hf_dataset_search.py +0 -0
  49. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_latex.py +0 -0
  50. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_visit_webpage.py +0 -0
  51. {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_web_search.py +0 -0
@@ -0,0 +1,170 @@
1
+ Metadata-Version: 2.4
2
+ Name: academia-mcp
3
+ Version: 1.9.1
4
+ Summary: MCP server that provides different tools to search for scientific publications
5
+ Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
+ Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Operating System :: OS Independent
10
+ Requires-Python: >=3.12
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: mcp>=1.10.1
14
+ Requires-Dist: xmltodict>=0.14.0
15
+ Requires-Dist: types-xmltodict>=0.14.0
16
+ Requires-Dist: requests>=2.32.0
17
+ Requires-Dist: types-requests>=2.32.0
18
+ Requires-Dist: pypdf>=5.1.0
19
+ Requires-Dist: beautifulsoup4>=4.12.0
20
+ Requires-Dist: types-beautifulsoup4>=4.12.0
21
+ Requires-Dist: markdownify==0.14.1
22
+ Requires-Dist: acl-anthology==0.5.2
23
+ Requires-Dist: markdown==3.7.0
24
+ Requires-Dist: types-markdown==3.7.0.20250322
25
+ Requires-Dist: huggingface-hub>=0.32.4
26
+ Requires-Dist: fire>=0.7.0
27
+ Requires-Dist: openai>=1.97.1
28
+ Requires-Dist: jinja2>=3.1.6
29
+ Requires-Dist: datasets>=4.0.0
30
+ Requires-Dist: pymupdf>=1.26.4
31
+ Requires-Dist: pillow>=11.3.0
32
+ Requires-Dist: pydantic-settings>=2.6.0
33
+ Dynamic: license-file
34
+
35
+ # Academia MCP
36
+
37
+ [![PyPI](https://img.shields.io/pypi/v/academia-mcp?label=PyPI%20package)](https://pypi.org/project/academia-mcp/)
38
+ [![CI](https://github.com/IlyaGusev/academia_mcp/actions/workflows/python.yml/badge.svg)](https://github.com/IlyaGusev/academia_mcp/actions/workflows/python.yml)
39
+ [![License](https://img.shields.io/github/license/IlyaGusev/academia_mcp)](LICENSE)
40
+ [![smithery badge](https://smithery.ai/badge/@IlyaGusev/academia_mcp)](https://smithery.ai/server/@IlyaGusev/academia_mcp)
41
+ [![Verified on MseeP](https://mseep.ai/badge.svg)](https://mseep.ai/app/e818878b-c3a6-4b3d-a5b4-e54dcd1f1fed)
42
+
43
+ MCP server with tools to search, fetch, analyze, and report on scientific papers and datasets.
44
+
45
+ ### Features
46
+ - ArXiv search and download
47
+ - ACL Anthology search
48
+ - Hugging Face datasets search
49
+ - Semantic Scholar citations and references
50
+ - Web search via Exa, Brave, or Tavily
51
+ - Web page crawler, LaTeX compilation, PDF reading
52
+ - Optional LLM-powered tools for document QA and research proposal workflows
53
+
54
+ ### Requirements
55
+ - Python 3.12+
56
+
57
+ ### Install
58
+ - Using pip (end users):
59
+ ```bash
60
+ pip3 install academia-mcp
61
+ ```
62
+
63
+ - For development (uv + Makefile):
64
+ ```bash
65
+ uv venv .venv
66
+ make install
67
+ ```
68
+
69
+ ### Quickstart
70
+ - Run over HTTP (default transport):
71
+ ```bash
72
+ uv run -m academia_mcp --transport streamable-http
73
+ ```
74
+
75
+ - Run over stdio (for local MCP clients like Claude Desktop):
76
+ ```bash
77
+ python -m academia_mcp --transport stdio
78
+ ```
79
+
80
+ Notes:
81
+ - Transports: `stdio`, `sse`, `streamable-http`.
82
+ - `host`/`port` are used for HTTP transports; ignored for `stdio`. Default port is `5056` (or `PORT`).
83
+
84
+ ### Claude Desktop config
85
+ ```json
86
+ {
87
+ "mcpServers": {
88
+ "academia": {
89
+ "command": "python3",
90
+ "args": [
91
+ "-m",
92
+ "academia_mcp",
93
+ "--transport",
94
+ "stdio"
95
+ ]
96
+ }
97
+ }
98
+ }
99
+ ```
100
+
101
+ ### Available tools (one-liners)
102
+ - `arxiv_search`: Query arXiv with field-specific queries and filters.
103
+ - `arxiv_download`: Fetch a paper by ID and convert to structured text (HTML/PDF modes).
104
+ - `anthology_search`: Search ACL Anthology with fielded queries and optional date filtering.
105
+ - `hf_datasets_search`: Find Hugging Face datasets with filters and sorting.
106
+ - `s2_get_citations`: List papers citing a given arXiv paper (Semantic Scholar Graph).
107
+ - `s2_get_references`: List papers referenced by a given arXiv paper.
108
+ - `visit_webpage`: Fetch and normalize a web page.
109
+ - `web_search`: Unified search wrapper; available when at least one of Exa/Brave/Tavily keys is set.
110
+ - `exa_web_search`, `brave_web_search`, `tavily_web_search`: Provider-specific search.
111
+ - `get_latex_templates_list`, `get_latex_template`: Enumerate and fetch built-in LaTeX templates.
112
+ - `compile_latex`: Compile LaTeX to PDF in `WORKSPACE_DIR`.
113
+ - `read_pdf`: Extract text per page from a PDF.
114
+ - `download_pdf_paper`, `review_pdf_paper`: Download and optionally review PDFs (requires LLM + workspace).
115
+ - `document_qa`: Answer questions over provided document chunks (requires LLM).
116
+ - `extract_bitflip_info`, `generate_research_proposals`, `score_research_proposals`: Research proposal helpers (requires LLM).
117
+
118
+ Availability notes:
119
+ - Set `WORKSPACE_DIR` to enable `compile_latex`, `read_pdf`, `download_pdf_paper`, and `review_pdf_paper`.
120
+ - Set `OPENROUTER_API_KEY` to enable LLM tools (`document_qa`, `review_pdf_paper`, and bitflip tools).
121
+ - Set one or more of `EXA_API_KEY`, `BRAVE_API_KEY`, `TAVILY_API_KEY` to enable `web_search` and provider tools.
122
+
123
+ ### Environment variables
124
+ Set as needed depending on which tools you use:
125
+
126
+ - `OPENROUTER_API_KEY`: required for LLM-related tools.
127
+ - `BASE_URL`: override OpenRouter base URL.
128
+ - `DOCUMENT_QA_MODEL_NAME`: override default model for `document_qa`.
129
+ - `BITFLIP_MODEL_NAME`: override default model for bitflip tools.
130
+ - `TAVILY_API_KEY`: enables Tavily in `web_search`.
131
+ - `EXA_API_KEY`: enables Exa in `web_search` and `visit_webpage`.
132
+ - `BRAVE_API_KEY`: enables Brave in `web_search`.
133
+ - `WORKSPACE_DIR`: directory for generated files (PDFs, temp artifacts).
134
+ - `PORT`: HTTP port (default `5056`).
135
+
136
+ You can put these in a `.env` file in the project root.
137
+
138
+ ### Docker
139
+ Build the image:
140
+ ```bash
141
+ docker build -t academia_mcp .
142
+ ```
143
+
144
+ Run the server (HTTP):
145
+ ```bash
146
+ docker run --rm -p 5056:5056 \
147
+ -e PORT=5056 \
148
+ -e OPENROUTER_API_KEY=your_key_here \
149
+ -e WORKSPACE_DIR=/workspace \
150
+ -v "$PWD/workdir:/workspace" \
151
+ academia_mcp
152
+ ```
153
+
154
+ Or use existing image: `phoenix120/academia_mcp`
155
+
156
+ ### Examples
157
+ - [Comprehensive report screencast (YouTube)](https://www.youtube.com/watch?v=4bweqQcN6w8)
158
+ - [Single paper screencast (YouTube)](https://www.youtube.com/watch?v=IAAPMptJ5k8)
159
+
160
+ ### Makefile targets
161
+ - `make install`: install the package in editable mode with uv
162
+ - `make validate`: run black, flake8, and mypy (strict)
163
+ - `make test`: run the test suite with pytest
164
+ - `make publish`: build and publish using uv
165
+
166
+ ### LaTeX/PDF requirements
167
+ Only needed for LaTeX/PDF tools. Ensure a LaTeX distribution is installed and `pdflatex` is on PATH, as well as `latexmk`. On Debian/Ubuntu:
168
+ ```bash
169
+ sudo apt install texlive-latex-base texlive-fonts-recommended texlive-latex-extra texlive-science latexmk
170
+ ```
@@ -0,0 +1,136 @@
1
+ # Academia MCP
2
+
3
+ [![PyPI](https://img.shields.io/pypi/v/academia-mcp?label=PyPI%20package)](https://pypi.org/project/academia-mcp/)
4
+ [![CI](https://github.com/IlyaGusev/academia_mcp/actions/workflows/python.yml/badge.svg)](https://github.com/IlyaGusev/academia_mcp/actions/workflows/python.yml)
5
+ [![License](https://img.shields.io/github/license/IlyaGusev/academia_mcp)](LICENSE)
6
+ [![smithery badge](https://smithery.ai/badge/@IlyaGusev/academia_mcp)](https://smithery.ai/server/@IlyaGusev/academia_mcp)
7
+ [![Verified on MseeP](https://mseep.ai/badge.svg)](https://mseep.ai/app/e818878b-c3a6-4b3d-a5b4-e54dcd1f1fed)
8
+
9
+ MCP server with tools to search, fetch, analyze, and report on scientific papers and datasets.
10
+
11
+ ### Features
12
+ - ArXiv search and download
13
+ - ACL Anthology search
14
+ - Hugging Face datasets search
15
+ - Semantic Scholar citations and references
16
+ - Web search via Exa, Brave, or Tavily
17
+ - Web page crawler, LaTeX compilation, PDF reading
18
+ - Optional LLM-powered tools for document QA and research proposal workflows
19
+
20
+ ### Requirements
21
+ - Python 3.12+
22
+
23
+ ### Install
24
+ - Using pip (end users):
25
+ ```bash
26
+ pip3 install academia-mcp
27
+ ```
28
+
29
+ - For development (uv + Makefile):
30
+ ```bash
31
+ uv venv .venv
32
+ make install
33
+ ```
34
+
35
+ ### Quickstart
36
+ - Run over HTTP (default transport):
37
+ ```bash
38
+ uv run -m academia_mcp --transport streamable-http
39
+ ```
40
+
41
+ - Run over stdio (for local MCP clients like Claude Desktop):
42
+ ```bash
43
+ python -m academia_mcp --transport stdio
44
+ ```
45
+
46
+ Notes:
47
+ - Transports: `stdio`, `sse`, `streamable-http`.
48
+ - `host`/`port` are used for HTTP transports; ignored for `stdio`. Default port is `5056` (or `PORT`).
49
+
50
+ ### Claude Desktop config
51
+ ```json
52
+ {
53
+ "mcpServers": {
54
+ "academia": {
55
+ "command": "python3",
56
+ "args": [
57
+ "-m",
58
+ "academia_mcp",
59
+ "--transport",
60
+ "stdio"
61
+ ]
62
+ }
63
+ }
64
+ }
65
+ ```
66
+
67
+ ### Available tools (one-liners)
68
+ - `arxiv_search`: Query arXiv with field-specific queries and filters.
69
+ - `arxiv_download`: Fetch a paper by ID and convert to structured text (HTML/PDF modes).
70
+ - `anthology_search`: Search ACL Anthology with fielded queries and optional date filtering.
71
+ - `hf_datasets_search`: Find Hugging Face datasets with filters and sorting.
72
+ - `s2_get_citations`: List papers citing a given arXiv paper (Semantic Scholar Graph).
73
+ - `s2_get_references`: List papers referenced by a given arXiv paper.
74
+ - `visit_webpage`: Fetch and normalize a web page.
75
+ - `web_search`: Unified search wrapper; available when at least one of Exa/Brave/Tavily keys is set.
76
+ - `exa_web_search`, `brave_web_search`, `tavily_web_search`: Provider-specific search.
77
+ - `get_latex_templates_list`, `get_latex_template`: Enumerate and fetch built-in LaTeX templates.
78
+ - `compile_latex`: Compile LaTeX to PDF in `WORKSPACE_DIR`.
79
+ - `read_pdf`: Extract text per page from a PDF.
80
+ - `download_pdf_paper`, `review_pdf_paper`: Download and optionally review PDFs (requires LLM + workspace).
81
+ - `document_qa`: Answer questions over provided document chunks (requires LLM).
82
+ - `extract_bitflip_info`, `generate_research_proposals`, `score_research_proposals`: Research proposal helpers (requires LLM).
83
+
84
+ Availability notes:
85
+ - Set `WORKSPACE_DIR` to enable `compile_latex`, `read_pdf`, `download_pdf_paper`, and `review_pdf_paper`.
86
+ - Set `OPENROUTER_API_KEY` to enable LLM tools (`document_qa`, `review_pdf_paper`, and bitflip tools).
87
+ - Set one or more of `EXA_API_KEY`, `BRAVE_API_KEY`, `TAVILY_API_KEY` to enable `web_search` and provider tools.
88
+
89
+ ### Environment variables
90
+ Set as needed depending on which tools you use:
91
+
92
+ - `OPENROUTER_API_KEY`: required for LLM-related tools.
93
+ - `BASE_URL`: override OpenRouter base URL.
94
+ - `DOCUMENT_QA_MODEL_NAME`: override default model for `document_qa`.
95
+ - `BITFLIP_MODEL_NAME`: override default model for bitflip tools.
96
+ - `TAVILY_API_KEY`: enables Tavily in `web_search`.
97
+ - `EXA_API_KEY`: enables Exa in `web_search` and `visit_webpage`.
98
+ - `BRAVE_API_KEY`: enables Brave in `web_search`.
99
+ - `WORKSPACE_DIR`: directory for generated files (PDFs, temp artifacts).
100
+ - `PORT`: HTTP port (default `5056`).
101
+
102
+ You can put these in a `.env` file in the project root.
103
+
104
+ ### Docker
105
+ Build the image:
106
+ ```bash
107
+ docker build -t academia_mcp .
108
+ ```
109
+
110
+ Run the server (HTTP):
111
+ ```bash
112
+ docker run --rm -p 5056:5056 \
113
+ -e PORT=5056 \
114
+ -e OPENROUTER_API_KEY=your_key_here \
115
+ -e WORKSPACE_DIR=/workspace \
116
+ -v "$PWD/workdir:/workspace" \
117
+ academia_mcp
118
+ ```
119
+
120
+ Or use existing image: `phoenix120/academia_mcp`
121
+
122
+ ### Examples
123
+ - [Comprehensive report screencast (YouTube)](https://www.youtube.com/watch?v=4bweqQcN6w8)
124
+ - [Single paper screencast (YouTube)](https://www.youtube.com/watch?v=IAAPMptJ5k8)
125
+
126
+ ### Makefile targets
127
+ - `make install`: install the package in editable mode with uv
128
+ - `make validate`: run black, flake8, and mypy (strict)
129
+ - `make test`: run the test suite with pytest
130
+ - `make publish`: build and publish using uv
131
+
132
+ ### LaTeX/PDF requirements
133
+ Only needed for LaTeX/PDF tools. Ensure a LaTeX distribution is installed and `pdflatex` is on PATH, as well as `latexmk`. On Debian/Ubuntu:
134
+ ```bash
135
+ sudo apt install texlive-latex-base texlive-fonts-recommended texlive-latex-extra texlive-science latexmk
136
+ ```
@@ -1,7 +1,8 @@
1
- import os
2
1
  from typing import Optional
3
2
  from pathlib import Path
4
3
 
4
+ from academia_mcp.settings import settings
5
+
5
6
  DIR_PATH = Path(__file__).parent
6
7
  ROOT_PATH = DIR_PATH.parent
7
8
  DEFAULT_WORKSPACE_DIR_PATH: Path = DIR_PATH / "workdir"
@@ -14,7 +15,7 @@ class WorkspaceDirectory:
14
15
  @classmethod
15
16
  def get_dir(cls) -> Path:
16
17
  if cls.workspace_dir is None:
17
- return Path(os.getenv("WORKSPACE_DIR", DEFAULT_WORKSPACE_DIR_PATH))
18
+ return Path(settings.WORKSPACE_DIR)
18
19
  return cls.workspace_dir
19
20
 
20
21
  @classmethod
@@ -1,10 +1,11 @@
1
- import os
2
1
  from typing import List, Dict, Any
3
2
 
4
3
  from pydantic import BaseModel
5
4
  from openai import AsyncOpenAI
6
5
  from openai.types.chat.chat_completion_message import ChatCompletionMessage
7
6
 
7
+ from academia_mcp.settings import settings
8
+
8
9
 
9
10
  class ChatMessage(BaseModel): # type: ignore
10
11
  role: str
@@ -15,9 +16,9 @@ ChatMessages = List[ChatMessage]
15
16
 
16
17
 
17
18
  async def llm_acall(model_name: str, messages: ChatMessages, **kwargs: Any) -> str:
18
- key = os.getenv("OPENROUTER_API_KEY", "")
19
+ key = settings.OPENROUTER_API_KEY
19
20
  assert key, "Please set OPENROUTER_API_KEY in the environment variables"
20
- base_url = os.getenv("BASE_URL", "https://openrouter.ai/api/v1")
21
+ base_url = settings.BASE_URL
21
22
 
22
23
  client = AsyncOpenAI(base_url=base_url, api_key=key)
23
24
  response: ChatCompletionMessage = (
@@ -0,0 +1,133 @@
1
+ import socket
2
+ import logging
3
+ from logging.config import dictConfig
4
+ from typing import Optional, Literal
5
+
6
+ import fire # type: ignore
7
+ from mcp.server.fastmcp import FastMCP
8
+ from uvicorn.config import LOGGING_CONFIG as UVICORN_LOGGING_CONFIG
9
+
10
+ from academia_mcp.settings import settings
11
+ from academia_mcp.tools.arxiv_search import arxiv_search
12
+ from academia_mcp.tools.arxiv_download import arxiv_download
13
+ from academia_mcp.tools.s2 import (
14
+ s2_get_citations,
15
+ s2_get_references,
16
+ s2_corpus_id_from_arxiv_id,
17
+ s2_get_info,
18
+ )
19
+ from academia_mcp.tools.hf_datasets_search import hf_datasets_search
20
+ from academia_mcp.tools.anthology_search import anthology_search
21
+ from academia_mcp.tools.document_qa import document_qa
22
+ from academia_mcp.tools.latex import (
23
+ compile_latex,
24
+ get_latex_template,
25
+ get_latex_templates_list,
26
+ read_pdf,
27
+ )
28
+ from academia_mcp.tools.web_search import (
29
+ web_search,
30
+ tavily_web_search,
31
+ exa_web_search,
32
+ brave_web_search,
33
+ )
34
+ from academia_mcp.tools.visit_webpage import visit_webpage
35
+ from academia_mcp.tools.bitflip import (
36
+ extract_bitflip_info,
37
+ generate_research_proposals,
38
+ score_research_proposals,
39
+ )
40
+ from academia_mcp.tools.review import review_pdf_paper, download_pdf_paper
41
+
42
+
43
+ def configure_uvicorn_style_logging(level: int = logging.INFO) -> None:
44
+ config = {**UVICORN_LOGGING_CONFIG}
45
+ config["disable_existing_loggers"] = False
46
+ config["root"] = {"handlers": ["default"], "level": logging.getLevelName(level)}
47
+ dictConfig(config)
48
+
49
+
50
+ def find_free_port() -> int:
51
+ for port in range(5000, 6001):
52
+ try:
53
+ with socket.socket() as s:
54
+ s.bind(("", port))
55
+ return port
56
+ except Exception:
57
+ continue
58
+ raise RuntimeError("No free port in range 5000-6000 found")
59
+
60
+
61
+ def run(
62
+ host: str = "0.0.0.0",
63
+ port: Optional[int] = None,
64
+ mount_path: str = "/",
65
+ streamable_http_path: str = "/mcp",
66
+ transport: Literal["stdio", "sse", "streamable-http"] = "streamable-http",
67
+ disable_web_search_tools: bool = False,
68
+ disable_llm_tools: bool = False,
69
+ ) -> None:
70
+ configure_uvicorn_style_logging()
71
+ server = FastMCP(
72
+ "Academia MCP",
73
+ stateless_http=True,
74
+ streamable_http_path=streamable_http_path,
75
+ mount_path=mount_path,
76
+ )
77
+ logger = logging.getLogger(__name__)
78
+
79
+ server.add_tool(arxiv_search)
80
+ server.add_tool(arxiv_download)
81
+ server.add_tool(s2_get_citations)
82
+ server.add_tool(s2_get_references)
83
+ server.add_tool(s2_corpus_id_from_arxiv_id)
84
+ server.add_tool(s2_get_info)
85
+ server.add_tool(hf_datasets_search)
86
+ server.add_tool(anthology_search)
87
+ server.add_tool(get_latex_template)
88
+ server.add_tool(get_latex_templates_list)
89
+ server.add_tool(visit_webpage)
90
+
91
+ if settings.WORKSPACE_DIR:
92
+ server.add_tool(compile_latex)
93
+ server.add_tool(download_pdf_paper)
94
+ server.add_tool(read_pdf)
95
+ else:
96
+ logger.warning(
97
+ "WORKSPACE_DIR is not set, compile_latex/download_pdf_paper/read_pdf will not be available!"
98
+ )
99
+
100
+ if not disable_web_search_tools:
101
+ if settings.TAVILY_API_KEY:
102
+ server.add_tool(tavily_web_search)
103
+ if settings.EXA_API_KEY:
104
+ server.add_tool(exa_web_search)
105
+ if settings.BRAVE_API_KEY:
106
+ server.add_tool(brave_web_search)
107
+ if settings.EXA_API_KEY or settings.BRAVE_API_KEY or settings.TAVILY_API_KEY:
108
+ server.add_tool(web_search)
109
+ else:
110
+ logger.warning("No web search tools keys are set, web_search will not be available!")
111
+
112
+ if not disable_llm_tools and settings.OPENROUTER_API_KEY:
113
+ server.add_tool(extract_bitflip_info)
114
+ server.add_tool(generate_research_proposals)
115
+ server.add_tool(score_research_proposals)
116
+ server.add_tool(document_qa)
117
+ if settings.WORKSPACE_DIR:
118
+ server.add_tool(review_pdf_paper)
119
+ else:
120
+ logger.warning("No OpenRouter API key is set, LLM-related tools will not be available!")
121
+
122
+ if port is None:
123
+ if settings.PORT is not None:
124
+ port = int(settings.PORT)
125
+ else:
126
+ port = find_free_port()
127
+ server.settings.port = port
128
+ server.settings.host = host
129
+ server.run(transport=transport)
130
+
131
+
132
+ if __name__ == "__main__":
133
+ fire.Fire(run)
@@ -0,0 +1,33 @@
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
+ from pydantic_settings import BaseSettings, SettingsConfigDict
5
+
6
+
7
+ class Settings(BaseSettings):
8
+ BASE_URL: str = "https://openrouter.ai/api/v1"
9
+
10
+ OPENROUTER_API_KEY: str = ""
11
+ TAVILY_API_KEY: Optional[str] = None
12
+ EXA_API_KEY: Optional[str] = None
13
+ BRAVE_API_KEY: Optional[str] = None
14
+
15
+ REVIEW_MODEL_NAME: str = "gpt-5"
16
+ BITFLIP_MODEL_NAME: str = "deepseek/deepseek-chat-v3-0324"
17
+ BITFLIP_MAX_COMPLETION_TOKENS: int = 16384
18
+ DOCUMENT_QA_MODEL_NAME: str = "deepseek/deepseek-chat-v3-0324"
19
+ DOCUMENT_QA_QUESTION_MAX_LENGTH: int = 10000
20
+ DOCUMENT_QA_DOCUMENT_MAX_LENGTH: int = 200000
21
+
22
+ PORT: int = 5056
23
+ WORKSPACE_DIR: Optional[Path] = None
24
+
25
+ model_config = SettingsConfigDict(
26
+ env_file=".env",
27
+ env_file_encoding="utf-8",
28
+ env_prefix="",
29
+ extra="ignore",
30
+ )
31
+
32
+
33
+ settings = Settings()
@@ -2,7 +2,7 @@ from .arxiv_search import arxiv_search
2
2
  from .anthology_search import anthology_search
3
3
  from .arxiv_download import arxiv_download
4
4
  from .hf_datasets_search import hf_datasets_search
5
- from .s2_citations import s2_get_references, s2_get_citations
5
+ from .s2 import s2_get_references, s2_get_citations, s2_corpus_id_from_arxiv_id, s2_get_info
6
6
  from .document_qa import document_qa
7
7
  from .latex import (
8
8
  compile_latex,
@@ -13,7 +13,7 @@ from .latex import (
13
13
  from .web_search import web_search, tavily_web_search, exa_web_search, brave_web_search
14
14
  from .visit_webpage import visit_webpage
15
15
  from .bitflip import extract_bitflip_info, generate_research_proposals, score_research_proposals
16
- from .review import review_pdf_paper, download_pdf_paper
16
+ from .review import review_pdf_paper, download_pdf_paper, review_pdf_paper_by_url
17
17
 
18
18
  __all__ = [
19
19
  "arxiv_search",
@@ -21,6 +21,8 @@ __all__ = [
21
21
  "anthology_search",
22
22
  "s2_get_references",
23
23
  "s2_get_citations",
24
+ "s2_corpus_id_from_arxiv_id",
25
+ "s2_get_info",
24
26
  "hf_datasets_search",
25
27
  "document_qa",
26
28
  "compile_latex",
@@ -35,6 +37,7 @@ __all__ = [
35
37
  "generate_research_proposals",
36
38
  "score_research_proposals",
37
39
  "review_pdf_paper",
40
+ "review_pdf_paper_by_url",
38
41
  "download_pdf_paper",
39
42
  "read_pdf",
40
43
  ]
@@ -2,7 +2,6 @@
2
2
  # https://web.stanford.edu/class/cs197c/slides/02-literature-search.pdf
3
3
 
4
4
  import json
5
- import os
6
5
  import random
7
6
  from typing import List, Optional, Any, Dict
8
7
 
@@ -12,6 +11,7 @@ from datasets import load_dataset # type: ignore
12
11
  from academia_mcp.tools.arxiv_download import arxiv_download
13
12
  from academia_mcp.utils import extract_json, encode_prompt
14
13
  from academia_mcp.llm import llm_acall, ChatMessage
14
+ from academia_mcp.settings import settings
15
15
 
16
16
 
17
17
  class ProposalDataset:
@@ -201,7 +201,7 @@ async def extract_bitflip_info(arxiv_id: str) -> str:
201
201
  Args:
202
202
  arxiv_id: The arXiv ID of the paper to extract the Bit-Flip information from.
203
203
  """
204
- model_name = os.getenv("BITFLIP_MODEL_NAME", "deepseek/deepseek-chat-v3-0324")
204
+ model_name = settings.BITFLIP_MODEL_NAME
205
205
  paper = arxiv_download(arxiv_id)
206
206
  abstract = json.loads(paper)["abstract"]
207
207
  prompt = encode_prompt(EXTRACT_PROMPT, abstract=abstract)
@@ -240,8 +240,8 @@ async def generate_research_proposals(
240
240
  ]
241
241
  Use `json.loads` to deserialize the result if you want to get specific items.
242
242
  """
243
- model_name = os.getenv("BITFLIP_MODEL_NAME", "deepseek/deepseek-chat-v3-0324")
244
- max_completion_tokens = int(os.getenv("BITFLIP_MAX_COMPLETION_TOKENS", 16384))
243
+ model_name = settings.BITFLIP_MODEL_NAME
244
+ max_completion_tokens = int(settings.BITFLIP_MAX_COMPLETION_TOKENS)
245
245
  examples = ProposalDataset.get_dataset()[:]
246
246
  examples = random.choices(examples, k=2)
247
247
 
@@ -293,7 +293,7 @@ async def score_research_proposals(proposals: str | List[str | Dict[str, Any] |
293
293
  Args:
294
294
  proposals: A list of JSON strings with research proposals.
295
295
  """
296
- model_name = os.getenv("BITFLIP_MODEL_NAME", "deepseek/deepseek-chat-v3-0324")
296
+ model_name = settings.BITFLIP_MODEL_NAME
297
297
  if isinstance(proposals, str):
298
298
  proposals = json.loads(proposals)
299
299
  assert isinstance(proposals, list), "Proposals should be a list of JSON strings"
@@ -1,14 +1,12 @@
1
- import os
2
1
  import json
3
2
  from typing import List, Any, Dict
4
- from dotenv import load_dotenv
5
3
 
6
4
  from pydantic import BaseModel
7
5
 
8
6
  from academia_mcp.llm import llm_acall
9
7
  from academia_mcp.utils import truncate_content
8
+ from academia_mcp.settings import settings
10
9
 
11
- load_dotenv()
12
10
 
13
11
  PROMPT = """You are a helpful assistant that answers questions about documents accurately and concisely.
14
12
  Please answer the following questions based solely on the provided document.
@@ -65,10 +63,10 @@ async def document_qa(
65
63
  document = json.dumps(document)
66
64
  assert document and document.strip(), "Please provide non-empty 'document'"
67
65
 
68
- question = truncate_content(question, 10000)
69
- document = truncate_content(document, 200000)
66
+ question = truncate_content(question, settings.DOCUMENT_QA_QUESTION_MAX_LENGTH)
67
+ document = truncate_content(document, settings.DOCUMENT_QA_DOCUMENT_MAX_LENGTH)
70
68
 
71
- model_name = os.getenv("DOCUMENT_QA_MODEL_NAME", "deepseek/deepseek-chat-v3-0324")
69
+ model_name = settings.DOCUMENT_QA_MODEL_NAME
72
70
  prompt = PROMPT.format(question=question, document=document)
73
71
  content = await llm_acall(
74
72
  model_name=model_name, messages=[ChatMessage(role="user", content=prompt)]