academia-mcp 1.8.1__tar.gz → 1.9.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- academia_mcp-1.9.1/PKG-INFO +170 -0
- academia_mcp-1.9.1/README.md +136 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/files.py +3 -2
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/llm.py +4 -3
- academia_mcp-1.9.1/academia_mcp/server.py +133 -0
- academia_mcp-1.9.1/academia_mcp/settings.py +33 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/__init__.py +5 -2
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/bitflip.py +5 -5
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/document_qa.py +4 -6
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/review.py +32 -9
- academia_mcp-1.8.1/academia_mcp/tools/s2_citations.py → academia_mcp-1.9.1/academia_mcp/tools/s2.py +35 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/visit_webpage.py +4 -4
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/web_search.py +7 -7
- academia_mcp-1.9.1/academia_mcp.egg-info/PKG-INFO +170 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp.egg-info/SOURCES.txt +3 -2
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp.egg-info/requires.txt +1 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/pyproject.toml +3 -1
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_review.py +7 -1
- academia_mcp-1.8.1/tests/test_s2_citations.py → academia_mcp-1.9.1/tests/test_s2.py +21 -1
- academia_mcp-1.8.1/PKG-INFO +0 -121
- academia_mcp-1.8.1/README.md +0 -88
- academia_mcp-1.8.1/academia_mcp/server.py +0 -101
- academia_mcp-1.8.1/academia_mcp.egg-info/PKG-INFO +0 -121
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/LICENSE +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/__init__.py +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/__main__.py +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/pdf.py +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/py.typed +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/anthology_search.py +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/arxiv_download.py +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/arxiv_search.py +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/hf_datasets_search.py +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/latex.py +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/tools/py.typed +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp/utils.py +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp.egg-info/dependency_links.txt +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp.egg-info/entry_points.txt +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/academia_mcp.egg-info/top_level.txt +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/setup.cfg +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_anthology_search.py +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_arxiv_download.py +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_arxiv_search.py +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_bitflip.py +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_document_qa.py +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_extract_json.py +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_hf_dataset_search.py +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_latex.py +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_visit_webpage.py +0 -0
- {academia_mcp-1.8.1 → academia_mcp-1.9.1}/tests/test_web_search.py +0 -0
@@ -0,0 +1,170 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: academia-mcp
|
3
|
+
Version: 1.9.1
|
4
|
+
Summary: MCP server that provides different tools to search for scientific publications
|
5
|
+
Author-email: Ilya Gusev <phoenixilya@gmail.com>
|
6
|
+
Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
9
|
+
Classifier: Operating System :: OS Independent
|
10
|
+
Requires-Python: >=3.12
|
11
|
+
Description-Content-Type: text/markdown
|
12
|
+
License-File: LICENSE
|
13
|
+
Requires-Dist: mcp>=1.10.1
|
14
|
+
Requires-Dist: xmltodict>=0.14.0
|
15
|
+
Requires-Dist: types-xmltodict>=0.14.0
|
16
|
+
Requires-Dist: requests>=2.32.0
|
17
|
+
Requires-Dist: types-requests>=2.32.0
|
18
|
+
Requires-Dist: pypdf>=5.1.0
|
19
|
+
Requires-Dist: beautifulsoup4>=4.12.0
|
20
|
+
Requires-Dist: types-beautifulsoup4>=4.12.0
|
21
|
+
Requires-Dist: markdownify==0.14.1
|
22
|
+
Requires-Dist: acl-anthology==0.5.2
|
23
|
+
Requires-Dist: markdown==3.7.0
|
24
|
+
Requires-Dist: types-markdown==3.7.0.20250322
|
25
|
+
Requires-Dist: huggingface-hub>=0.32.4
|
26
|
+
Requires-Dist: fire>=0.7.0
|
27
|
+
Requires-Dist: openai>=1.97.1
|
28
|
+
Requires-Dist: jinja2>=3.1.6
|
29
|
+
Requires-Dist: datasets>=4.0.0
|
30
|
+
Requires-Dist: pymupdf>=1.26.4
|
31
|
+
Requires-Dist: pillow>=11.3.0
|
32
|
+
Requires-Dist: pydantic-settings>=2.6.0
|
33
|
+
Dynamic: license-file
|
34
|
+
|
35
|
+
# Academia MCP
|
36
|
+
|
37
|
+
[](https://pypi.org/project/academia-mcp/)
|
38
|
+
[](https://github.com/IlyaGusev/academia_mcp/actions/workflows/python.yml)
|
39
|
+
[](LICENSE)
|
40
|
+
[](https://smithery.ai/server/@IlyaGusev/academia_mcp)
|
41
|
+
[](https://mseep.ai/app/e818878b-c3a6-4b3d-a5b4-e54dcd1f1fed)
|
42
|
+
|
43
|
+
MCP server with tools to search, fetch, analyze, and report on scientific papers and datasets.
|
44
|
+
|
45
|
+
### Features
|
46
|
+
- ArXiv search and download
|
47
|
+
- ACL Anthology search
|
48
|
+
- Hugging Face datasets search
|
49
|
+
- Semantic Scholar citations and references
|
50
|
+
- Web search via Exa, Brave, or Tavily
|
51
|
+
- Web page crawler, LaTeX compilation, PDF reading
|
52
|
+
- Optional LLM-powered tools for document QA and research proposal workflows
|
53
|
+
|
54
|
+
### Requirements
|
55
|
+
- Python 3.12+
|
56
|
+
|
57
|
+
### Install
|
58
|
+
- Using pip (end users):
|
59
|
+
```bash
|
60
|
+
pip3 install academia-mcp
|
61
|
+
```
|
62
|
+
|
63
|
+
- For development (uv + Makefile):
|
64
|
+
```bash
|
65
|
+
uv venv .venv
|
66
|
+
make install
|
67
|
+
```
|
68
|
+
|
69
|
+
### Quickstart
|
70
|
+
- Run over HTTP (default transport):
|
71
|
+
```bash
|
72
|
+
uv run -m academia_mcp --transport streamable-http
|
73
|
+
```
|
74
|
+
|
75
|
+
- Run over stdio (for local MCP clients like Claude Desktop):
|
76
|
+
```bash
|
77
|
+
python -m academia_mcp --transport stdio
|
78
|
+
```
|
79
|
+
|
80
|
+
Notes:
|
81
|
+
- Transports: `stdio`, `sse`, `streamable-http`.
|
82
|
+
- `host`/`port` are used for HTTP transports; ignored for `stdio`. Default port is `5056` (or `PORT`).
|
83
|
+
|
84
|
+
### Claude Desktop config
|
85
|
+
```json
|
86
|
+
{
|
87
|
+
"mcpServers": {
|
88
|
+
"academia": {
|
89
|
+
"command": "python3",
|
90
|
+
"args": [
|
91
|
+
"-m",
|
92
|
+
"academia_mcp",
|
93
|
+
"--transport",
|
94
|
+
"stdio"
|
95
|
+
]
|
96
|
+
}
|
97
|
+
}
|
98
|
+
}
|
99
|
+
```
|
100
|
+
|
101
|
+
### Available tools (one-liners)
|
102
|
+
- `arxiv_search`: Query arXiv with field-specific queries and filters.
|
103
|
+
- `arxiv_download`: Fetch a paper by ID and convert to structured text (HTML/PDF modes).
|
104
|
+
- `anthology_search`: Search ACL Anthology with fielded queries and optional date filtering.
|
105
|
+
- `hf_datasets_search`: Find Hugging Face datasets with filters and sorting.
|
106
|
+
- `s2_get_citations`: List papers citing a given arXiv paper (Semantic Scholar Graph).
|
107
|
+
- `s2_get_references`: List papers referenced by a given arXiv paper.
|
108
|
+
- `visit_webpage`: Fetch and normalize a web page.
|
109
|
+
- `web_search`: Unified search wrapper; available when at least one of Exa/Brave/Tavily keys is set.
|
110
|
+
- `exa_web_search`, `brave_web_search`, `tavily_web_search`: Provider-specific search.
|
111
|
+
- `get_latex_templates_list`, `get_latex_template`: Enumerate and fetch built-in LaTeX templates.
|
112
|
+
- `compile_latex`: Compile LaTeX to PDF in `WORKSPACE_DIR`.
|
113
|
+
- `read_pdf`: Extract text per page from a PDF.
|
114
|
+
- `download_pdf_paper`, `review_pdf_paper`: Download and optionally review PDFs (requires LLM + workspace).
|
115
|
+
- `document_qa`: Answer questions over provided document chunks (requires LLM).
|
116
|
+
- `extract_bitflip_info`, `generate_research_proposals`, `score_research_proposals`: Research proposal helpers (requires LLM).
|
117
|
+
|
118
|
+
Availability notes:
|
119
|
+
- Set `WORKSPACE_DIR` to enable `compile_latex`, `read_pdf`, `download_pdf_paper`, and `review_pdf_paper`.
|
120
|
+
- Set `OPENROUTER_API_KEY` to enable LLM tools (`document_qa`, `review_pdf_paper`, and bitflip tools).
|
121
|
+
- Set one or more of `EXA_API_KEY`, `BRAVE_API_KEY`, `TAVILY_API_KEY` to enable `web_search` and provider tools.
|
122
|
+
|
123
|
+
### Environment variables
|
124
|
+
Set as needed depending on which tools you use:
|
125
|
+
|
126
|
+
- `OPENROUTER_API_KEY`: required for LLM-related tools.
|
127
|
+
- `BASE_URL`: override OpenRouter base URL.
|
128
|
+
- `DOCUMENT_QA_MODEL_NAME`: override default model for `document_qa`.
|
129
|
+
- `BITFLIP_MODEL_NAME`: override default model for bitflip tools.
|
130
|
+
- `TAVILY_API_KEY`: enables Tavily in `web_search`.
|
131
|
+
- `EXA_API_KEY`: enables Exa in `web_search` and `visit_webpage`.
|
132
|
+
- `BRAVE_API_KEY`: enables Brave in `web_search`.
|
133
|
+
- `WORKSPACE_DIR`: directory for generated files (PDFs, temp artifacts).
|
134
|
+
- `PORT`: HTTP port (default `5056`).
|
135
|
+
|
136
|
+
You can put these in a `.env` file in the project root.
|
137
|
+
|
138
|
+
### Docker
|
139
|
+
Build the image:
|
140
|
+
```bash
|
141
|
+
docker build -t academia_mcp .
|
142
|
+
```
|
143
|
+
|
144
|
+
Run the server (HTTP):
|
145
|
+
```bash
|
146
|
+
docker run --rm -p 5056:5056 \
|
147
|
+
-e PORT=5056 \
|
148
|
+
-e OPENROUTER_API_KEY=your_key_here \
|
149
|
+
-e WORKSPACE_DIR=/workspace \
|
150
|
+
-v "$PWD/workdir:/workspace" \
|
151
|
+
academia_mcp
|
152
|
+
```
|
153
|
+
|
154
|
+
Or use existing image: `phoenix120/academia_mcp`
|
155
|
+
|
156
|
+
### Examples
|
157
|
+
- [Comprehensive report screencast (YouTube)](https://www.youtube.com/watch?v=4bweqQcN6w8)
|
158
|
+
- [Single paper screencast (YouTube)](https://www.youtube.com/watch?v=IAAPMptJ5k8)
|
159
|
+
|
160
|
+
### Makefile targets
|
161
|
+
- `make install`: install the package in editable mode with uv
|
162
|
+
- `make validate`: run black, flake8, and mypy (strict)
|
163
|
+
- `make test`: run the test suite with pytest
|
164
|
+
- `make publish`: build and publish using uv
|
165
|
+
|
166
|
+
### LaTeX/PDF requirements
|
167
|
+
Only needed for LaTeX/PDF tools. Ensure a LaTeX distribution is installed and `pdflatex` is on PATH, as well as `latexmk`. On Debian/Ubuntu:
|
168
|
+
```bash
|
169
|
+
sudo apt install texlive-latex-base texlive-fonts-recommended texlive-latex-extra texlive-science latexmk
|
170
|
+
```
|
@@ -0,0 +1,136 @@
|
|
1
|
+
# Academia MCP
|
2
|
+
|
3
|
+
[](https://pypi.org/project/academia-mcp/)
|
4
|
+
[](https://github.com/IlyaGusev/academia_mcp/actions/workflows/python.yml)
|
5
|
+
[](LICENSE)
|
6
|
+
[](https://smithery.ai/server/@IlyaGusev/academia_mcp)
|
7
|
+
[](https://mseep.ai/app/e818878b-c3a6-4b3d-a5b4-e54dcd1f1fed)
|
8
|
+
|
9
|
+
MCP server with tools to search, fetch, analyze, and report on scientific papers and datasets.
|
10
|
+
|
11
|
+
### Features
|
12
|
+
- ArXiv search and download
|
13
|
+
- ACL Anthology search
|
14
|
+
- Hugging Face datasets search
|
15
|
+
- Semantic Scholar citations and references
|
16
|
+
- Web search via Exa, Brave, or Tavily
|
17
|
+
- Web page crawler, LaTeX compilation, PDF reading
|
18
|
+
- Optional LLM-powered tools for document QA and research proposal workflows
|
19
|
+
|
20
|
+
### Requirements
|
21
|
+
- Python 3.12+
|
22
|
+
|
23
|
+
### Install
|
24
|
+
- Using pip (end users):
|
25
|
+
```bash
|
26
|
+
pip3 install academia-mcp
|
27
|
+
```
|
28
|
+
|
29
|
+
- For development (uv + Makefile):
|
30
|
+
```bash
|
31
|
+
uv venv .venv
|
32
|
+
make install
|
33
|
+
```
|
34
|
+
|
35
|
+
### Quickstart
|
36
|
+
- Run over HTTP (default transport):
|
37
|
+
```bash
|
38
|
+
uv run -m academia_mcp --transport streamable-http
|
39
|
+
```
|
40
|
+
|
41
|
+
- Run over stdio (for local MCP clients like Claude Desktop):
|
42
|
+
```bash
|
43
|
+
python -m academia_mcp --transport stdio
|
44
|
+
```
|
45
|
+
|
46
|
+
Notes:
|
47
|
+
- Transports: `stdio`, `sse`, `streamable-http`.
|
48
|
+
- `host`/`port` are used for HTTP transports; ignored for `stdio`. Default port is `5056` (or `PORT`).
|
49
|
+
|
50
|
+
### Claude Desktop config
|
51
|
+
```json
|
52
|
+
{
|
53
|
+
"mcpServers": {
|
54
|
+
"academia": {
|
55
|
+
"command": "python3",
|
56
|
+
"args": [
|
57
|
+
"-m",
|
58
|
+
"academia_mcp",
|
59
|
+
"--transport",
|
60
|
+
"stdio"
|
61
|
+
]
|
62
|
+
}
|
63
|
+
}
|
64
|
+
}
|
65
|
+
```
|
66
|
+
|
67
|
+
### Available tools (one-liners)
|
68
|
+
- `arxiv_search`: Query arXiv with field-specific queries and filters.
|
69
|
+
- `arxiv_download`: Fetch a paper by ID and convert to structured text (HTML/PDF modes).
|
70
|
+
- `anthology_search`: Search ACL Anthology with fielded queries and optional date filtering.
|
71
|
+
- `hf_datasets_search`: Find Hugging Face datasets with filters and sorting.
|
72
|
+
- `s2_get_citations`: List papers citing a given arXiv paper (Semantic Scholar Graph).
|
73
|
+
- `s2_get_references`: List papers referenced by a given arXiv paper.
|
74
|
+
- `visit_webpage`: Fetch and normalize a web page.
|
75
|
+
- `web_search`: Unified search wrapper; available when at least one of Exa/Brave/Tavily keys is set.
|
76
|
+
- `exa_web_search`, `brave_web_search`, `tavily_web_search`: Provider-specific search.
|
77
|
+
- `get_latex_templates_list`, `get_latex_template`: Enumerate and fetch built-in LaTeX templates.
|
78
|
+
- `compile_latex`: Compile LaTeX to PDF in `WORKSPACE_DIR`.
|
79
|
+
- `read_pdf`: Extract text per page from a PDF.
|
80
|
+
- `download_pdf_paper`, `review_pdf_paper`: Download and optionally review PDFs (requires LLM + workspace).
|
81
|
+
- `document_qa`: Answer questions over provided document chunks (requires LLM).
|
82
|
+
- `extract_bitflip_info`, `generate_research_proposals`, `score_research_proposals`: Research proposal helpers (requires LLM).
|
83
|
+
|
84
|
+
Availability notes:
|
85
|
+
- Set `WORKSPACE_DIR` to enable `compile_latex`, `read_pdf`, `download_pdf_paper`, and `review_pdf_paper`.
|
86
|
+
- Set `OPENROUTER_API_KEY` to enable LLM tools (`document_qa`, `review_pdf_paper`, and bitflip tools).
|
87
|
+
- Set one or more of `EXA_API_KEY`, `BRAVE_API_KEY`, `TAVILY_API_KEY` to enable `web_search` and provider tools.
|
88
|
+
|
89
|
+
### Environment variables
|
90
|
+
Set as needed depending on which tools you use:
|
91
|
+
|
92
|
+
- `OPENROUTER_API_KEY`: required for LLM-related tools.
|
93
|
+
- `BASE_URL`: override OpenRouter base URL.
|
94
|
+
- `DOCUMENT_QA_MODEL_NAME`: override default model for `document_qa`.
|
95
|
+
- `BITFLIP_MODEL_NAME`: override default model for bitflip tools.
|
96
|
+
- `TAVILY_API_KEY`: enables Tavily in `web_search`.
|
97
|
+
- `EXA_API_KEY`: enables Exa in `web_search` and `visit_webpage`.
|
98
|
+
- `BRAVE_API_KEY`: enables Brave in `web_search`.
|
99
|
+
- `WORKSPACE_DIR`: directory for generated files (PDFs, temp artifacts).
|
100
|
+
- `PORT`: HTTP port (default `5056`).
|
101
|
+
|
102
|
+
You can put these in a `.env` file in the project root.
|
103
|
+
|
104
|
+
### Docker
|
105
|
+
Build the image:
|
106
|
+
```bash
|
107
|
+
docker build -t academia_mcp .
|
108
|
+
```
|
109
|
+
|
110
|
+
Run the server (HTTP):
|
111
|
+
```bash
|
112
|
+
docker run --rm -p 5056:5056 \
|
113
|
+
-e PORT=5056 \
|
114
|
+
-e OPENROUTER_API_KEY=your_key_here \
|
115
|
+
-e WORKSPACE_DIR=/workspace \
|
116
|
+
-v "$PWD/workdir:/workspace" \
|
117
|
+
academia_mcp
|
118
|
+
```
|
119
|
+
|
120
|
+
Or use existing image: `phoenix120/academia_mcp`
|
121
|
+
|
122
|
+
### Examples
|
123
|
+
- [Comprehensive report screencast (YouTube)](https://www.youtube.com/watch?v=4bweqQcN6w8)
|
124
|
+
- [Single paper screencast (YouTube)](https://www.youtube.com/watch?v=IAAPMptJ5k8)
|
125
|
+
|
126
|
+
### Makefile targets
|
127
|
+
- `make install`: install the package in editable mode with uv
|
128
|
+
- `make validate`: run black, flake8, and mypy (strict)
|
129
|
+
- `make test`: run the test suite with pytest
|
130
|
+
- `make publish`: build and publish using uv
|
131
|
+
|
132
|
+
### LaTeX/PDF requirements
|
133
|
+
Only needed for LaTeX/PDF tools. Ensure a LaTeX distribution is installed and `pdflatex` is on PATH, as well as `latexmk`. On Debian/Ubuntu:
|
134
|
+
```bash
|
135
|
+
sudo apt install texlive-latex-base texlive-fonts-recommended texlive-latex-extra texlive-science latexmk
|
136
|
+
```
|
@@ -1,7 +1,8 @@
|
|
1
|
-
import os
|
2
1
|
from typing import Optional
|
3
2
|
from pathlib import Path
|
4
3
|
|
4
|
+
from academia_mcp.settings import settings
|
5
|
+
|
5
6
|
DIR_PATH = Path(__file__).parent
|
6
7
|
ROOT_PATH = DIR_PATH.parent
|
7
8
|
DEFAULT_WORKSPACE_DIR_PATH: Path = DIR_PATH / "workdir"
|
@@ -14,7 +15,7 @@ class WorkspaceDirectory:
|
|
14
15
|
@classmethod
|
15
16
|
def get_dir(cls) -> Path:
|
16
17
|
if cls.workspace_dir is None:
|
17
|
-
return Path(
|
18
|
+
return Path(settings.WORKSPACE_DIR)
|
18
19
|
return cls.workspace_dir
|
19
20
|
|
20
21
|
@classmethod
|
@@ -1,10 +1,11 @@
|
|
1
|
-
import os
|
2
1
|
from typing import List, Dict, Any
|
3
2
|
|
4
3
|
from pydantic import BaseModel
|
5
4
|
from openai import AsyncOpenAI
|
6
5
|
from openai.types.chat.chat_completion_message import ChatCompletionMessage
|
7
6
|
|
7
|
+
from academia_mcp.settings import settings
|
8
|
+
|
8
9
|
|
9
10
|
class ChatMessage(BaseModel): # type: ignore
|
10
11
|
role: str
|
@@ -15,9 +16,9 @@ ChatMessages = List[ChatMessage]
|
|
15
16
|
|
16
17
|
|
17
18
|
async def llm_acall(model_name: str, messages: ChatMessages, **kwargs: Any) -> str:
|
18
|
-
key =
|
19
|
+
key = settings.OPENROUTER_API_KEY
|
19
20
|
assert key, "Please set OPENROUTER_API_KEY in the environment variables"
|
20
|
-
base_url =
|
21
|
+
base_url = settings.BASE_URL
|
21
22
|
|
22
23
|
client = AsyncOpenAI(base_url=base_url, api_key=key)
|
23
24
|
response: ChatCompletionMessage = (
|
@@ -0,0 +1,133 @@
|
|
1
|
+
import socket
|
2
|
+
import logging
|
3
|
+
from logging.config import dictConfig
|
4
|
+
from typing import Optional, Literal
|
5
|
+
|
6
|
+
import fire # type: ignore
|
7
|
+
from mcp.server.fastmcp import FastMCP
|
8
|
+
from uvicorn.config import LOGGING_CONFIG as UVICORN_LOGGING_CONFIG
|
9
|
+
|
10
|
+
from academia_mcp.settings import settings
|
11
|
+
from academia_mcp.tools.arxiv_search import arxiv_search
|
12
|
+
from academia_mcp.tools.arxiv_download import arxiv_download
|
13
|
+
from academia_mcp.tools.s2 import (
|
14
|
+
s2_get_citations,
|
15
|
+
s2_get_references,
|
16
|
+
s2_corpus_id_from_arxiv_id,
|
17
|
+
s2_get_info,
|
18
|
+
)
|
19
|
+
from academia_mcp.tools.hf_datasets_search import hf_datasets_search
|
20
|
+
from academia_mcp.tools.anthology_search import anthology_search
|
21
|
+
from academia_mcp.tools.document_qa import document_qa
|
22
|
+
from academia_mcp.tools.latex import (
|
23
|
+
compile_latex,
|
24
|
+
get_latex_template,
|
25
|
+
get_latex_templates_list,
|
26
|
+
read_pdf,
|
27
|
+
)
|
28
|
+
from academia_mcp.tools.web_search import (
|
29
|
+
web_search,
|
30
|
+
tavily_web_search,
|
31
|
+
exa_web_search,
|
32
|
+
brave_web_search,
|
33
|
+
)
|
34
|
+
from academia_mcp.tools.visit_webpage import visit_webpage
|
35
|
+
from academia_mcp.tools.bitflip import (
|
36
|
+
extract_bitflip_info,
|
37
|
+
generate_research_proposals,
|
38
|
+
score_research_proposals,
|
39
|
+
)
|
40
|
+
from academia_mcp.tools.review import review_pdf_paper, download_pdf_paper
|
41
|
+
|
42
|
+
|
43
|
+
def configure_uvicorn_style_logging(level: int = logging.INFO) -> None:
|
44
|
+
config = {**UVICORN_LOGGING_CONFIG}
|
45
|
+
config["disable_existing_loggers"] = False
|
46
|
+
config["root"] = {"handlers": ["default"], "level": logging.getLevelName(level)}
|
47
|
+
dictConfig(config)
|
48
|
+
|
49
|
+
|
50
|
+
def find_free_port() -> int:
|
51
|
+
for port in range(5000, 6001):
|
52
|
+
try:
|
53
|
+
with socket.socket() as s:
|
54
|
+
s.bind(("", port))
|
55
|
+
return port
|
56
|
+
except Exception:
|
57
|
+
continue
|
58
|
+
raise RuntimeError("No free port in range 5000-6000 found")
|
59
|
+
|
60
|
+
|
61
|
+
def run(
|
62
|
+
host: str = "0.0.0.0",
|
63
|
+
port: Optional[int] = None,
|
64
|
+
mount_path: str = "/",
|
65
|
+
streamable_http_path: str = "/mcp",
|
66
|
+
transport: Literal["stdio", "sse", "streamable-http"] = "streamable-http",
|
67
|
+
disable_web_search_tools: bool = False,
|
68
|
+
disable_llm_tools: bool = False,
|
69
|
+
) -> None:
|
70
|
+
configure_uvicorn_style_logging()
|
71
|
+
server = FastMCP(
|
72
|
+
"Academia MCP",
|
73
|
+
stateless_http=True,
|
74
|
+
streamable_http_path=streamable_http_path,
|
75
|
+
mount_path=mount_path,
|
76
|
+
)
|
77
|
+
logger = logging.getLogger(__name__)
|
78
|
+
|
79
|
+
server.add_tool(arxiv_search)
|
80
|
+
server.add_tool(arxiv_download)
|
81
|
+
server.add_tool(s2_get_citations)
|
82
|
+
server.add_tool(s2_get_references)
|
83
|
+
server.add_tool(s2_corpus_id_from_arxiv_id)
|
84
|
+
server.add_tool(s2_get_info)
|
85
|
+
server.add_tool(hf_datasets_search)
|
86
|
+
server.add_tool(anthology_search)
|
87
|
+
server.add_tool(get_latex_template)
|
88
|
+
server.add_tool(get_latex_templates_list)
|
89
|
+
server.add_tool(visit_webpage)
|
90
|
+
|
91
|
+
if settings.WORKSPACE_DIR:
|
92
|
+
server.add_tool(compile_latex)
|
93
|
+
server.add_tool(download_pdf_paper)
|
94
|
+
server.add_tool(read_pdf)
|
95
|
+
else:
|
96
|
+
logger.warning(
|
97
|
+
"WORKSPACE_DIR is not set, compile_latex/download_pdf_paper/read_pdf will not be available!"
|
98
|
+
)
|
99
|
+
|
100
|
+
if not disable_web_search_tools:
|
101
|
+
if settings.TAVILY_API_KEY:
|
102
|
+
server.add_tool(tavily_web_search)
|
103
|
+
if settings.EXA_API_KEY:
|
104
|
+
server.add_tool(exa_web_search)
|
105
|
+
if settings.BRAVE_API_KEY:
|
106
|
+
server.add_tool(brave_web_search)
|
107
|
+
if settings.EXA_API_KEY or settings.BRAVE_API_KEY or settings.TAVILY_API_KEY:
|
108
|
+
server.add_tool(web_search)
|
109
|
+
else:
|
110
|
+
logger.warning("No web search tools keys are set, web_search will not be available!")
|
111
|
+
|
112
|
+
if not disable_llm_tools and settings.OPENROUTER_API_KEY:
|
113
|
+
server.add_tool(extract_bitflip_info)
|
114
|
+
server.add_tool(generate_research_proposals)
|
115
|
+
server.add_tool(score_research_proposals)
|
116
|
+
server.add_tool(document_qa)
|
117
|
+
if settings.WORKSPACE_DIR:
|
118
|
+
server.add_tool(review_pdf_paper)
|
119
|
+
else:
|
120
|
+
logger.warning("No OpenRouter API key is set, LLM-related tools will not be available!")
|
121
|
+
|
122
|
+
if port is None:
|
123
|
+
if settings.PORT is not None:
|
124
|
+
port = int(settings.PORT)
|
125
|
+
else:
|
126
|
+
port = find_free_port()
|
127
|
+
server.settings.port = port
|
128
|
+
server.settings.host = host
|
129
|
+
server.run(transport=transport)
|
130
|
+
|
131
|
+
|
132
|
+
if __name__ == "__main__":
|
133
|
+
fire.Fire(run)
|
@@ -0,0 +1,33 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
5
|
+
|
6
|
+
|
7
|
+
class Settings(BaseSettings):
|
8
|
+
BASE_URL: str = "https://openrouter.ai/api/v1"
|
9
|
+
|
10
|
+
OPENROUTER_API_KEY: str = ""
|
11
|
+
TAVILY_API_KEY: Optional[str] = None
|
12
|
+
EXA_API_KEY: Optional[str] = None
|
13
|
+
BRAVE_API_KEY: Optional[str] = None
|
14
|
+
|
15
|
+
REVIEW_MODEL_NAME: str = "gpt-5"
|
16
|
+
BITFLIP_MODEL_NAME: str = "deepseek/deepseek-chat-v3-0324"
|
17
|
+
BITFLIP_MAX_COMPLETION_TOKENS: int = 16384
|
18
|
+
DOCUMENT_QA_MODEL_NAME: str = "deepseek/deepseek-chat-v3-0324"
|
19
|
+
DOCUMENT_QA_QUESTION_MAX_LENGTH: int = 10000
|
20
|
+
DOCUMENT_QA_DOCUMENT_MAX_LENGTH: int = 200000
|
21
|
+
|
22
|
+
PORT: int = 5056
|
23
|
+
WORKSPACE_DIR: Optional[Path] = None
|
24
|
+
|
25
|
+
model_config = SettingsConfigDict(
|
26
|
+
env_file=".env",
|
27
|
+
env_file_encoding="utf-8",
|
28
|
+
env_prefix="",
|
29
|
+
extra="ignore",
|
30
|
+
)
|
31
|
+
|
32
|
+
|
33
|
+
settings = Settings()
|
@@ -2,7 +2,7 @@ from .arxiv_search import arxiv_search
|
|
2
2
|
from .anthology_search import anthology_search
|
3
3
|
from .arxiv_download import arxiv_download
|
4
4
|
from .hf_datasets_search import hf_datasets_search
|
5
|
-
from .
|
5
|
+
from .s2 import s2_get_references, s2_get_citations, s2_corpus_id_from_arxiv_id, s2_get_info
|
6
6
|
from .document_qa import document_qa
|
7
7
|
from .latex import (
|
8
8
|
compile_latex,
|
@@ -13,7 +13,7 @@ from .latex import (
|
|
13
13
|
from .web_search import web_search, tavily_web_search, exa_web_search, brave_web_search
|
14
14
|
from .visit_webpage import visit_webpage
|
15
15
|
from .bitflip import extract_bitflip_info, generate_research_proposals, score_research_proposals
|
16
|
-
from .review import review_pdf_paper, download_pdf_paper
|
16
|
+
from .review import review_pdf_paper, download_pdf_paper, review_pdf_paper_by_url
|
17
17
|
|
18
18
|
__all__ = [
|
19
19
|
"arxiv_search",
|
@@ -21,6 +21,8 @@ __all__ = [
|
|
21
21
|
"anthology_search",
|
22
22
|
"s2_get_references",
|
23
23
|
"s2_get_citations",
|
24
|
+
"s2_corpus_id_from_arxiv_id",
|
25
|
+
"s2_get_info",
|
24
26
|
"hf_datasets_search",
|
25
27
|
"document_qa",
|
26
28
|
"compile_latex",
|
@@ -35,6 +37,7 @@ __all__ = [
|
|
35
37
|
"generate_research_proposals",
|
36
38
|
"score_research_proposals",
|
37
39
|
"review_pdf_paper",
|
40
|
+
"review_pdf_paper_by_url",
|
38
41
|
"download_pdf_paper",
|
39
42
|
"read_pdf",
|
40
43
|
]
|
@@ -2,7 +2,6 @@
|
|
2
2
|
# https://web.stanford.edu/class/cs197c/slides/02-literature-search.pdf
|
3
3
|
|
4
4
|
import json
|
5
|
-
import os
|
6
5
|
import random
|
7
6
|
from typing import List, Optional, Any, Dict
|
8
7
|
|
@@ -12,6 +11,7 @@ from datasets import load_dataset # type: ignore
|
|
12
11
|
from academia_mcp.tools.arxiv_download import arxiv_download
|
13
12
|
from academia_mcp.utils import extract_json, encode_prompt
|
14
13
|
from academia_mcp.llm import llm_acall, ChatMessage
|
14
|
+
from academia_mcp.settings import settings
|
15
15
|
|
16
16
|
|
17
17
|
class ProposalDataset:
|
@@ -201,7 +201,7 @@ async def extract_bitflip_info(arxiv_id: str) -> str:
|
|
201
201
|
Args:
|
202
202
|
arxiv_id: The arXiv ID of the paper to extract the Bit-Flip information from.
|
203
203
|
"""
|
204
|
-
model_name =
|
204
|
+
model_name = settings.BITFLIP_MODEL_NAME
|
205
205
|
paper = arxiv_download(arxiv_id)
|
206
206
|
abstract = json.loads(paper)["abstract"]
|
207
207
|
prompt = encode_prompt(EXTRACT_PROMPT, abstract=abstract)
|
@@ -240,8 +240,8 @@ async def generate_research_proposals(
|
|
240
240
|
]
|
241
241
|
Use `json.loads` to deserialize the result if you want to get specific items.
|
242
242
|
"""
|
243
|
-
model_name =
|
244
|
-
max_completion_tokens = int(
|
243
|
+
model_name = settings.BITFLIP_MODEL_NAME
|
244
|
+
max_completion_tokens = int(settings.BITFLIP_MAX_COMPLETION_TOKENS)
|
245
245
|
examples = ProposalDataset.get_dataset()[:]
|
246
246
|
examples = random.choices(examples, k=2)
|
247
247
|
|
@@ -293,7 +293,7 @@ async def score_research_proposals(proposals: str | List[str | Dict[str, Any] |
|
|
293
293
|
Args:
|
294
294
|
proposals: A list of JSON strings with research proposals.
|
295
295
|
"""
|
296
|
-
model_name =
|
296
|
+
model_name = settings.BITFLIP_MODEL_NAME
|
297
297
|
if isinstance(proposals, str):
|
298
298
|
proposals = json.loads(proposals)
|
299
299
|
assert isinstance(proposals, list), "Proposals should be a list of JSON strings"
|
@@ -1,14 +1,12 @@
|
|
1
|
-
import os
|
2
1
|
import json
|
3
2
|
from typing import List, Any, Dict
|
4
|
-
from dotenv import load_dotenv
|
5
3
|
|
6
4
|
from pydantic import BaseModel
|
7
5
|
|
8
6
|
from academia_mcp.llm import llm_acall
|
9
7
|
from academia_mcp.utils import truncate_content
|
8
|
+
from academia_mcp.settings import settings
|
10
9
|
|
11
|
-
load_dotenv()
|
12
10
|
|
13
11
|
PROMPT = """You are a helpful assistant that answers questions about documents accurately and concisely.
|
14
12
|
Please answer the following questions based solely on the provided document.
|
@@ -65,10 +63,10 @@ async def document_qa(
|
|
65
63
|
document = json.dumps(document)
|
66
64
|
assert document and document.strip(), "Please provide non-empty 'document'"
|
67
65
|
|
68
|
-
question = truncate_content(question,
|
69
|
-
document = truncate_content(document,
|
66
|
+
question = truncate_content(question, settings.DOCUMENT_QA_QUESTION_MAX_LENGTH)
|
67
|
+
document = truncate_content(document, settings.DOCUMENT_QA_DOCUMENT_MAX_LENGTH)
|
70
68
|
|
71
|
-
model_name =
|
69
|
+
model_name = settings.DOCUMENT_QA_MODEL_NAME
|
72
70
|
prompt = PROMPT.format(question=question, document=document)
|
73
71
|
content = await llm_acall(
|
74
72
|
model_name=model_name, messages=[ChatMessage(role="user", content=prompt)]
|