academia-mcp 1.10.8__py3-none-any.whl → 1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- academia_mcp/server.py +29 -9
- academia_mcp/tools/arxiv_search.py +41 -32
- academia_mcp/tools/show_image.py +21 -6
- academia_mcp/tools/visit_webpage.py +4 -1
- academia_mcp/tools/yt_transcript.py +6 -3
- {academia_mcp-1.10.8.dist-info → academia_mcp-1.11.0.dist-info}/METADATA +1 -1
- {academia_mcp-1.10.8.dist-info → academia_mcp-1.11.0.dist-info}/RECORD +11 -11
- {academia_mcp-1.10.8.dist-info → academia_mcp-1.11.0.dist-info}/WHEEL +0 -0
- {academia_mcp-1.10.8.dist-info → academia_mcp-1.11.0.dist-info}/entry_points.txt +0 -0
- {academia_mcp-1.10.8.dist-info → academia_mcp-1.11.0.dist-info}/licenses/LICENSE +0 -0
- {academia_mcp-1.10.8.dist-info → academia_mcp-1.11.0.dist-info}/top_level.txt +0 -0
academia_mcp/server.py
CHANGED
@@ -63,25 +63,24 @@ def find_free_port() -> int:
|
|
63
63
|
raise RuntimeError("No free port in range 5000-6000 found")
|
64
64
|
|
65
65
|
|
66
|
-
def
|
67
|
-
host: str = "0.0.0.0",
|
68
|
-
port: Optional[int] = None,
|
69
|
-
mount_path: str = "/",
|
66
|
+
def create_server(
|
70
67
|
streamable_http_path: str = "/mcp",
|
71
|
-
|
68
|
+
mount_path: str = "/",
|
69
|
+
stateless_http: bool = True,
|
72
70
|
disable_web_search_tools: bool = False,
|
73
71
|
disable_llm_tools: bool = False,
|
74
|
-
|
75
|
-
|
72
|
+
port: Optional[int] = None,
|
73
|
+
host: str = "0.0.0.0",
|
74
|
+
) -> FastMCP:
|
76
75
|
server = FastMCP(
|
77
76
|
"Academia MCP",
|
78
|
-
stateless_http=
|
77
|
+
stateless_http=stateless_http,
|
79
78
|
streamable_http_path=streamable_http_path,
|
80
79
|
mount_path=mount_path,
|
81
80
|
)
|
82
81
|
logger = logging.getLogger(__name__)
|
83
82
|
|
84
|
-
server.add_tool(arxiv_search)
|
83
|
+
server.add_tool(arxiv_search, structured_output=True)
|
85
84
|
server.add_tool(arxiv_download)
|
86
85
|
server.add_tool(s2_get_citations)
|
87
86
|
server.add_tool(s2_get_references)
|
@@ -140,6 +139,27 @@ def run(
|
|
140
139
|
|
141
140
|
server.settings.port = port
|
142
141
|
server.settings.host = host
|
142
|
+
return server
|
143
|
+
|
144
|
+
|
145
|
+
def run(
|
146
|
+
host: str = "0.0.0.0",
|
147
|
+
port: Optional[int] = None,
|
148
|
+
mount_path: str = "/",
|
149
|
+
streamable_http_path: str = "/mcp",
|
150
|
+
transport: Literal["stdio", "sse", "streamable-http"] = "streamable-http",
|
151
|
+
disable_web_search_tools: bool = False,
|
152
|
+
disable_llm_tools: bool = False,
|
153
|
+
) -> None:
|
154
|
+
configure_uvicorn_style_logging()
|
155
|
+
server = create_server(
|
156
|
+
streamable_http_path=streamable_http_path,
|
157
|
+
mount_path=mount_path,
|
158
|
+
disable_web_search_tools=disable_web_search_tools,
|
159
|
+
disable_llm_tools=disable_llm_tools,
|
160
|
+
port=port,
|
161
|
+
host=host,
|
162
|
+
)
|
143
163
|
|
144
164
|
if transport == "streamable-http":
|
145
165
|
# Enable CORS for browser-based clients
|
@@ -2,12 +2,12 @@
|
|
2
2
|
# https://github.com/jonatasgrosman/findpapers/blob/master/findpapers/searchers/arxiv_searcher.py
|
3
3
|
# https://info.arxiv.org/help/api/user-manual.html
|
4
4
|
|
5
|
-
import json
|
6
5
|
import re
|
7
6
|
from typing import Optional, List, Dict, Any, Union
|
8
7
|
from datetime import datetime, date
|
9
8
|
|
10
9
|
import xmltodict
|
10
|
+
from pydantic import BaseModel, Field
|
11
11
|
|
12
12
|
from academia_mcp.utils import get_with_retries
|
13
13
|
|
@@ -17,6 +17,25 @@ SORT_BY_OPTIONS = ("relevance", "lastUpdatedDate", "submittedDate")
|
|
17
17
|
SORT_ORDER_OPTIONS = ("ascending", "descending")
|
18
18
|
|
19
19
|
|
20
|
+
class ArxivSearchEntry(BaseModel): # type: ignore
|
21
|
+
id: str = Field(description="Paper ID")
|
22
|
+
title: str = Field(description="Paper title")
|
23
|
+
authors: str = Field(description="Authors of the paper")
|
24
|
+
published: str = Field(description="Published date of the paper")
|
25
|
+
updated: str = Field(description="Updated date of the paper")
|
26
|
+
categories: str = Field(description="Categories of the paper")
|
27
|
+
comment: str = Field(description="Comment of the paper")
|
28
|
+
index: int = Field(description="Index of the paper", default=0)
|
29
|
+
abstract: Optional[str] = Field(description="Abstract of the paper", default=None)
|
30
|
+
|
31
|
+
|
32
|
+
class ArxivSearchResponse(BaseModel): # type: ignore
|
33
|
+
total_count: int = Field(description="The total number of results")
|
34
|
+
returned_count: int = Field(description="The number of results returned")
|
35
|
+
offset: int = Field(description="The offset of the results")
|
36
|
+
results: List[ArxivSearchEntry] = Field(description="The results, search entries")
|
37
|
+
|
38
|
+
|
20
39
|
def _format_text_field(text: str) -> str:
|
21
40
|
return " ".join([line.strip() for line in text.split() if line.strip()])
|
22
41
|
|
@@ -48,17 +67,17 @@ def _format_date(date: str) -> str:
|
|
48
67
|
return dt.strftime("%B %d, %Y")
|
49
68
|
|
50
69
|
|
51
|
-
def _clean_entry(entry: Dict[str, Any]) ->
|
52
|
-
return
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
70
|
+
def _clean_entry(entry: Dict[str, Any]) -> ArxivSearchEntry:
|
71
|
+
return ArxivSearchEntry(
|
72
|
+
id=entry["id"].split("/")[-1],
|
73
|
+
title=_format_text_field(entry["title"]),
|
74
|
+
authors=_format_authors(entry["author"]),
|
75
|
+
abstract=_format_text_field(entry["summary"]),
|
76
|
+
published=_format_date(entry["published"]),
|
77
|
+
updated=_format_date(entry["updated"]),
|
78
|
+
categories=_format_categories(entry.get("category", {})),
|
79
|
+
comment=_format_text_field(entry.get("arxiv:comment", {}).get("#text", "")),
|
80
|
+
)
|
62
81
|
|
63
82
|
|
64
83
|
def _convert_to_yyyymmddtttt(date_str: str) -> str:
|
@@ -105,22 +124,19 @@ def _format_entries(
|
|
105
124
|
start_index: int,
|
106
125
|
include_abstracts: bool,
|
107
126
|
total_results: int,
|
108
|
-
) ->
|
127
|
+
) -> ArxivSearchResponse:
|
109
128
|
clean_entries: List[Dict[str, Any]] = []
|
110
129
|
for entry_num, entry in enumerate(entries):
|
111
130
|
clean_entry = _clean_entry(entry)
|
112
131
|
if not include_abstracts:
|
113
|
-
clean_entry.
|
114
|
-
clean_entry
|
132
|
+
clean_entry.abstract = None
|
133
|
+
clean_entry.index = start_index + entry_num
|
115
134
|
clean_entries.append(clean_entry)
|
116
|
-
return
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
"results": clean_entries,
|
122
|
-
},
|
123
|
-
ensure_ascii=False,
|
135
|
+
return ArxivSearchResponse(
|
136
|
+
total_count=total_results,
|
137
|
+
returned_count=len(entries),
|
138
|
+
offset=start_index,
|
139
|
+
results=clean_entries,
|
124
140
|
)
|
125
141
|
|
126
142
|
|
@@ -133,7 +149,7 @@ def arxiv_search(
|
|
133
149
|
sort_by: Optional[str] = "relevance",
|
134
150
|
sort_order: Optional[str] = "descending",
|
135
151
|
include_abstracts: Optional[bool] = False,
|
136
|
-
) ->
|
152
|
+
) -> ArxivSearchResponse:
|
137
153
|
"""
|
138
154
|
Search arXiv papers with field-specific queries.
|
139
155
|
|
@@ -158,12 +174,6 @@ def arxiv_search(
|
|
158
174
|
all:role OR all:playing OR all:"language model"
|
159
175
|
(au:vaswani OR au:"del maestro") ANDNOT ti:attention
|
160
176
|
|
161
|
-
Returns a JSON object serialized to a string. The structure is:
|
162
|
-
{"total_count": ..., "returned_count": ..., "offset": ..., "results": [...]}
|
163
|
-
Every item in the "results" has the following fields:
|
164
|
-
("index", "id", "title", "authors", "abstract", "published", "updated", "categories", "comment")
|
165
|
-
Use `json.loads` to deserialize the result if you want to get specific fields.
|
166
|
-
|
167
177
|
Args:
|
168
178
|
query: The search query, required.
|
169
179
|
offset: The offset to scroll search results. 10 items will be skipped if offset=10. 0 by default.
|
@@ -211,10 +221,9 @@ def arxiv_search(
|
|
211
221
|
entries = feed.get("entry", [])
|
212
222
|
if isinstance(entries, dict):
|
213
223
|
entries = [entries]
|
214
|
-
|
224
|
+
return _format_entries(
|
215
225
|
entries,
|
216
226
|
start_index=start_index,
|
217
227
|
total_results=total_results,
|
218
228
|
include_abstracts=include_abstracts,
|
219
229
|
)
|
220
|
-
return formatted_entries
|
academia_mcp/tools/show_image.py
CHANGED
@@ -30,7 +30,20 @@ DESCRIBE_PROMPTS = {
|
|
30
30
|
4. Any immediate tactical opportunities or threats
|
31
31
|
5. Suggested next moves with brief explanations"""
|
32
32
|
),
|
33
|
-
"text":
|
33
|
+
"text": dedent(
|
34
|
+
"""You are performing OCR and transcription.
|
35
|
+
Extract ALL text and numbers from the image verbatim.
|
36
|
+
- Preserve original casing, punctuation, symbols, mathematical notation, and whitespace layout when possible.
|
37
|
+
- If layout is multi-column or tabular, reconstruct lines top-to-bottom, left-to-right; use line breaks between blocks.
|
38
|
+
- For any uncertain or low-confidence characters, mark with a '?' and include a note.
|
39
|
+
- After the raw extraction, provide a clean, normalized version (fixing obvious OCR artifacts) as a separate section.
|
40
|
+
Return two sections:
|
41
|
+
[RAW TRANSCRIPTION]
|
42
|
+
...
|
43
|
+
[NORMALIZED]
|
44
|
+
...
|
45
|
+
"""
|
46
|
+
),
|
34
47
|
}
|
35
48
|
|
36
49
|
|
@@ -44,10 +57,8 @@ def show_image(path: str) -> Dict[str, str]:
|
|
44
57
|
```
|
45
58
|
Do not print it ever, just return as the last expression.
|
46
59
|
|
47
|
-
Returns an dictionary with a single "image" key.
|
48
|
-
|
49
60
|
Args:
|
50
|
-
|
61
|
+
path: Path to file inside current work directory or web URL
|
51
62
|
"""
|
52
63
|
if path.startswith("http"):
|
53
64
|
response = httpx.get(path, timeout=10)
|
@@ -80,7 +91,7 @@ async def describe_image(
|
|
80
91
|
- "general": General description of the image
|
81
92
|
- "detailed": Detailed analysis of the image
|
82
93
|
- "chess": Analysis of a chess position
|
83
|
-
- "text": Extract and describe text from the image
|
94
|
+
- "text": Extract and describe text or numbers from the image
|
84
95
|
- "custom": Custom description based on user prompt
|
85
96
|
"""
|
86
97
|
image_base64 = show_image(path)["image_base64"]
|
@@ -93,12 +104,16 @@ async def describe_image(
|
|
93
104
|
{"type": "text", "text": prompt},
|
94
105
|
{
|
95
106
|
"type": "image_url",
|
96
|
-
"image_url": {"url": f"data:image/
|
107
|
+
"image_url": {"url": f"data:image/png;base64,{image_base64}"},
|
97
108
|
},
|
98
109
|
]
|
99
110
|
model_name = settings.DESCRIBE_IMAGE_MODEL_NAME
|
111
|
+
llm_kwargs = {}
|
112
|
+
if description_type in {"text", "chess"}:
|
113
|
+
llm_kwargs["temperature"] = 0.0
|
100
114
|
response = await llm_acall(
|
101
115
|
model_name=model_name,
|
102
116
|
messages=[ChatMessage(role="user", content=content)],
|
117
|
+
**llm_kwargs,
|
103
118
|
)
|
104
119
|
return response
|
@@ -33,12 +33,15 @@ def _tavily_visit_webpage(url: str) -> Dict[str, Any]:
|
|
33
33
|
assert key, "Error: TAVILY_API_KEY is not set and no api_key was provided"
|
34
34
|
payload = {
|
35
35
|
"urls": [url],
|
36
|
+
"extract_depth": "advanced",
|
37
|
+
"include_images": True,
|
36
38
|
}
|
37
39
|
response = post_with_retries(TAVILY_EXTRACT_URL, payload=payload, api_key=key)
|
38
40
|
results = response.json()["results"]
|
39
41
|
if not results:
|
40
42
|
return {"error": ERROR_MESSAGE}
|
41
|
-
|
43
|
+
result = results[0]
|
44
|
+
return {"text": result["raw_content"], "images": result["images"]}
|
42
45
|
|
43
46
|
|
44
47
|
def _basic_visit_webpage(url: str) -> Dict[str, Any]:
|
@@ -13,8 +13,8 @@ def yt_transcript(video_url: str) -> str:
|
|
13
13
|
Args:
|
14
14
|
video_url (str): YouTube video URL.
|
15
15
|
"""
|
16
|
-
if "youtu.be" in video_url:
|
17
|
-
video_id = video_url.strip().split("youtu.be/")[1]
|
16
|
+
if "youtu.be/" in video_url:
|
17
|
+
video_id = video_url.strip().split("youtu.be/")[-1]
|
18
18
|
else:
|
19
19
|
video_id = video_url.strip().split("v=")[-1]
|
20
20
|
video_id = video_id.split("?")[0]
|
@@ -25,6 +25,9 @@ def yt_transcript(video_url: str) -> str:
|
|
25
25
|
proxy_password=settings.WEBSHARE_PROXY_PASSWORD,
|
26
26
|
)
|
27
27
|
api = YouTubeTranscriptApi(proxy_config=proxy_config)
|
28
|
-
|
28
|
+
try:
|
29
|
+
transcript = api.fetch(video_id)
|
30
|
+
except Exception as e:
|
31
|
+
return f"Error fetching transcript for video {video_url}: {e}"
|
29
32
|
snippets = transcript.snippets
|
30
33
|
return "\n".join([f"{int(entry.start)}: {' '.join(entry.text.split())}" for entry in snippets])
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: academia-mcp
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.11.0
|
4
4
|
Summary: MCP server that provides different tools to search for scientific publications
|
5
5
|
Author-email: Ilya Gusev <phoenixilya@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
|
@@ -4,7 +4,7 @@ academia_mcp/files.py,sha256=ynIt0XbU1Z7EPWkv_hVX0pGKsLlmjYv-MVJLOfi6yzs,817
|
|
4
4
|
academia_mcp/llm.py,sha256=zpGkuJFf58Ofgys_fi28-47_wJ1a7sIs_yZvI1Si6z0,993
|
5
5
|
academia_mcp/pdf.py,sha256=9PlXzHGhb6ay3ldbTdxCcTWvH4TkET3bnb64mgoh9i0,1273
|
6
6
|
academia_mcp/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
academia_mcp/server.py,sha256=
|
7
|
+
academia_mcp/server.py,sha256=s7rwsNePtz8ZJtzJ5FmvzStWR2ApArxiJROcRwyqrww,6102
|
8
8
|
academia_mcp/settings.py,sha256=c5s4dI8V_cWmMED-jKDmHjfdIaBcxwEK4HdHNQ3WUIg,1096
|
9
9
|
academia_mcp/utils.py,sha256=lRlb615JJ_0d4gcFpMoBjB6w0xXcde9dFDw0LwYpSPQ,4863
|
10
10
|
academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty,sha256=hGcEPCYBJS4vdhWvN_yEaJC4GvT_yDroI94CfY2Oguk,12268
|
@@ -12,7 +12,7 @@ academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex,sha256=
|
|
12
12
|
academia_mcp/tools/__init__.py,sha256=Z30vULZwUeUX5nDz5wcv0znhAeBtZRa0dvz7vD8SUYE,1555
|
13
13
|
academia_mcp/tools/anthology_search.py,sha256=rhFpJZqGLABgr0raDuH0CARBiAJNJtEI4dlMrKNHfDQ,7669
|
14
14
|
academia_mcp/tools/arxiv_download.py,sha256=gBY0_Kz0yGtVkLMwn6GrAyfBjovZVgcSMuyy67p65Cw,10474
|
15
|
-
academia_mcp/tools/arxiv_search.py,sha256=
|
15
|
+
academia_mcp/tools/arxiv_search.py,sha256=Cb9x6SaHz5vjr7jwdq3U1PtvADfFJa5pq52z-0Rg8t0,8882
|
16
16
|
academia_mcp/tools/bitflip.py,sha256=1B-EEcDnJjB9YmvVWsGv_Un19Bkeud9SZDw2TpGTCSg,12184
|
17
17
|
academia_mcp/tools/document_qa.py,sha256=Wb2nEEVu9UyPp8ktHWeT9wS2JBle8fb9zRjTNVIDdBE,2463
|
18
18
|
academia_mcp/tools/hf_datasets_search.py,sha256=KiBkqT4rXjEN4oc1AWZOPnqN_Go90TQogY5-DUm3LQo,2854
|
@@ -20,14 +20,14 @@ academia_mcp/tools/latex.py,sha256=B1Leqt1FHY6H3DlUgeYse4LMFpf4-K1FQViXl5MKk8A,6
|
|
20
20
|
academia_mcp/tools/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
21
|
academia_mcp/tools/review.py,sha256=Va0lFJJKuk-NvWhKS3UZ-Dnuk7CyuDQ4S1nd70D-ffE,11117
|
22
22
|
academia_mcp/tools/s2.py,sha256=QX7-pbetab3Xt_1tvVPU6o5D_NAe9y6jcTGRBK1vwtY,6200
|
23
|
-
academia_mcp/tools/show_image.py,sha256=
|
23
|
+
academia_mcp/tools/show_image.py,sha256=DWSnYMTn_dJpGTLL1r_sbX5XsB6p9z-vClApDANz84s,4534
|
24
24
|
academia_mcp/tools/speech_to_text.py,sha256=YZzMqdvunzXkpcadP_mYhm6cs4qH1Y_42SfY-7eX4O4,1601
|
25
|
-
academia_mcp/tools/visit_webpage.py,sha256=
|
25
|
+
academia_mcp/tools/visit_webpage.py,sha256=rMmjP2KXo_ElO7NPfUWnYN0EsRJHi2ikShekZR7pCms,3428
|
26
26
|
academia_mcp/tools/web_search.py,sha256=VphVztf2jZNT3bPJPJuTdMkKbe2-LIbSV7keKV47lac,8616
|
27
|
-
academia_mcp/tools/yt_transcript.py,sha256=
|
28
|
-
academia_mcp-1.
|
29
|
-
academia_mcp-1.
|
30
|
-
academia_mcp-1.
|
31
|
-
academia_mcp-1.
|
32
|
-
academia_mcp-1.
|
33
|
-
academia_mcp-1.
|
27
|
+
academia_mcp/tools/yt_transcript.py,sha256=ilfOpX14moC1bKHbFmOVvZ8-_NxuQQUoQbV28e9FBaE,1217
|
28
|
+
academia_mcp-1.11.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
29
|
+
academia_mcp-1.11.0.dist-info/METADATA,sha256=61gqVlxbohEvg2IwsRyDfX2X8u5Dsw-IUrAbm_o6Hdg,6356
|
30
|
+
academia_mcp-1.11.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
31
|
+
academia_mcp-1.11.0.dist-info/entry_points.txt,sha256=gxkiKJ74w2FwJpSECpjA3XtCfI5ZfrM6N8cqnwsq4yY,51
|
32
|
+
academia_mcp-1.11.0.dist-info/top_level.txt,sha256=CzGpRFsRRJRqWEb1e3SUlcfGqRzOxevZGaJWrtGF8W0,13
|
33
|
+
academia_mcp-1.11.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|