academia-mcp 1.10.8__py3-none-any.whl → 1.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
academia_mcp/server.py CHANGED
@@ -63,25 +63,24 @@ def find_free_port() -> int:
63
63
  raise RuntimeError("No free port in range 5000-6000 found")
64
64
 
65
65
 
66
- def run(
67
- host: str = "0.0.0.0",
68
- port: Optional[int] = None,
69
- mount_path: str = "/",
66
+ def create_server(
70
67
  streamable_http_path: str = "/mcp",
71
- transport: Literal["stdio", "sse", "streamable-http"] = "streamable-http",
68
+ mount_path: str = "/",
69
+ stateless_http: bool = True,
72
70
  disable_web_search_tools: bool = False,
73
71
  disable_llm_tools: bool = False,
74
- ) -> None:
75
- configure_uvicorn_style_logging()
72
+ port: Optional[int] = None,
73
+ host: str = "0.0.0.0",
74
+ ) -> FastMCP:
76
75
  server = FastMCP(
77
76
  "Academia MCP",
78
- stateless_http=True,
77
+ stateless_http=stateless_http,
79
78
  streamable_http_path=streamable_http_path,
80
79
  mount_path=mount_path,
81
80
  )
82
81
  logger = logging.getLogger(__name__)
83
82
 
84
- server.add_tool(arxiv_search)
83
+ server.add_tool(arxiv_search, structured_output=True)
85
84
  server.add_tool(arxiv_download)
86
85
  server.add_tool(s2_get_citations)
87
86
  server.add_tool(s2_get_references)
@@ -140,6 +139,27 @@ def run(
140
139
 
141
140
  server.settings.port = port
142
141
  server.settings.host = host
142
+ return server
143
+
144
+
145
+ def run(
146
+ host: str = "0.0.0.0",
147
+ port: Optional[int] = None,
148
+ mount_path: str = "/",
149
+ streamable_http_path: str = "/mcp",
150
+ transport: Literal["stdio", "sse", "streamable-http"] = "streamable-http",
151
+ disable_web_search_tools: bool = False,
152
+ disable_llm_tools: bool = False,
153
+ ) -> None:
154
+ configure_uvicorn_style_logging()
155
+ server = create_server(
156
+ streamable_http_path=streamable_http_path,
157
+ mount_path=mount_path,
158
+ disable_web_search_tools=disable_web_search_tools,
159
+ disable_llm_tools=disable_llm_tools,
160
+ port=port,
161
+ host=host,
162
+ )
143
163
 
144
164
  if transport == "streamable-http":
145
165
  # Enable CORS for browser-based clients
@@ -2,12 +2,12 @@
2
2
  # https://github.com/jonatasgrosman/findpapers/blob/master/findpapers/searchers/arxiv_searcher.py
3
3
  # https://info.arxiv.org/help/api/user-manual.html
4
4
 
5
- import json
6
5
  import re
7
6
  from typing import Optional, List, Dict, Any, Union
8
7
  from datetime import datetime, date
9
8
 
10
9
  import xmltodict
10
+ from pydantic import BaseModel, Field
11
11
 
12
12
  from academia_mcp.utils import get_with_retries
13
13
 
@@ -17,6 +17,25 @@ SORT_BY_OPTIONS = ("relevance", "lastUpdatedDate", "submittedDate")
17
17
  SORT_ORDER_OPTIONS = ("ascending", "descending")
18
18
 
19
19
 
20
+ class ArxivSearchEntry(BaseModel): # type: ignore
21
+ id: str = Field(description="Paper ID")
22
+ title: str = Field(description="Paper title")
23
+ authors: str = Field(description="Authors of the paper")
24
+ published: str = Field(description="Published date of the paper")
25
+ updated: str = Field(description="Updated date of the paper")
26
+ categories: str = Field(description="Categories of the paper")
27
+ comment: str = Field(description="Comment of the paper")
28
+ index: int = Field(description="Index of the paper", default=0)
29
+ abstract: Optional[str] = Field(description="Abstract of the paper", default=None)
30
+
31
+
32
+ class ArxivSearchResponse(BaseModel): # type: ignore
33
+ total_count: int = Field(description="The total number of results")
34
+ returned_count: int = Field(description="The number of results returned")
35
+ offset: int = Field(description="The offset of the results")
36
+ results: List[ArxivSearchEntry] = Field(description="The results, search entries")
37
+
38
+
20
39
  def _format_text_field(text: str) -> str:
21
40
  return " ".join([line.strip() for line in text.split() if line.strip()])
22
41
 
@@ -48,17 +67,17 @@ def _format_date(date: str) -> str:
48
67
  return dt.strftime("%B %d, %Y")
49
68
 
50
69
 
51
- def _clean_entry(entry: Dict[str, Any]) -> Dict[str, Any]:
52
- return {
53
- "id": entry["id"].split("/")[-1],
54
- "title": _format_text_field(entry["title"]),
55
- "authors": _format_authors(entry["author"]),
56
- "abstract": _format_text_field(entry["summary"]),
57
- "published": _format_date(entry["published"]),
58
- "updated": _format_date(entry["updated"]),
59
- "categories": _format_categories(entry.get("category", {})),
60
- "comment": _format_text_field(entry.get("arxiv:comment", {}).get("#text", "")),
61
- }
70
+ def _clean_entry(entry: Dict[str, Any]) -> ArxivSearchEntry:
71
+ return ArxivSearchEntry(
72
+ id=entry["id"].split("/")[-1],
73
+ title=_format_text_field(entry["title"]),
74
+ authors=_format_authors(entry["author"]),
75
+ abstract=_format_text_field(entry["summary"]),
76
+ published=_format_date(entry["published"]),
77
+ updated=_format_date(entry["updated"]),
78
+ categories=_format_categories(entry.get("category", {})),
79
+ comment=_format_text_field(entry.get("arxiv:comment", {}).get("#text", "")),
80
+ )
62
81
 
63
82
 
64
83
  def _convert_to_yyyymmddtttt(date_str: str) -> str:
@@ -105,22 +124,19 @@ def _format_entries(
105
124
  start_index: int,
106
125
  include_abstracts: bool,
107
126
  total_results: int,
108
- ) -> str:
127
+ ) -> ArxivSearchResponse:
109
128
  clean_entries: List[Dict[str, Any]] = []
110
129
  for entry_num, entry in enumerate(entries):
111
130
  clean_entry = _clean_entry(entry)
112
131
  if not include_abstracts:
113
- clean_entry.pop("abstract")
114
- clean_entry["index"] = start_index + entry_num
132
+ clean_entry.abstract = None
133
+ clean_entry.index = start_index + entry_num
115
134
  clean_entries.append(clean_entry)
116
- return json.dumps(
117
- {
118
- "total_count": total_results,
119
- "returned_count": len(entries),
120
- "offset": start_index,
121
- "results": clean_entries,
122
- },
123
- ensure_ascii=False,
135
+ return ArxivSearchResponse(
136
+ total_count=total_results,
137
+ returned_count=len(entries),
138
+ offset=start_index,
139
+ results=clean_entries,
124
140
  )
125
141
 
126
142
 
@@ -133,7 +149,7 @@ def arxiv_search(
133
149
  sort_by: Optional[str] = "relevance",
134
150
  sort_order: Optional[str] = "descending",
135
151
  include_abstracts: Optional[bool] = False,
136
- ) -> str:
152
+ ) -> ArxivSearchResponse:
137
153
  """
138
154
  Search arXiv papers with field-specific queries.
139
155
 
@@ -158,12 +174,6 @@ def arxiv_search(
158
174
  all:role OR all:playing OR all:"language model"
159
175
  (au:vaswani OR au:"del maestro") ANDNOT ti:attention
160
176
 
161
- Returns a JSON object serialized to a string. The structure is:
162
- {"total_count": ..., "returned_count": ..., "offset": ..., "results": [...]}
163
- Every item in the "results" has the following fields:
164
- ("index", "id", "title", "authors", "abstract", "published", "updated", "categories", "comment")
165
- Use `json.loads` to deserialize the result if you want to get specific fields.
166
-
167
177
  Args:
168
178
  query: The search query, required.
169
179
  offset: The offset to scroll search results. 10 items will be skipped if offset=10. 0 by default.
@@ -211,10 +221,9 @@ def arxiv_search(
211
221
  entries = feed.get("entry", [])
212
222
  if isinstance(entries, dict):
213
223
  entries = [entries]
214
- formatted_entries: str = _format_entries(
224
+ return _format_entries(
215
225
  entries,
216
226
  start_index=start_index,
217
227
  total_results=total_results,
218
228
  include_abstracts=include_abstracts,
219
229
  )
220
- return formatted_entries
@@ -30,7 +30,20 @@ DESCRIBE_PROMPTS = {
30
30
  4. Any immediate tactical opportunities or threats
31
31
  5. Suggested next moves with brief explanations"""
32
32
  ),
33
- "text": "Extract and describe any text present in this image. If there are multiple pieces of text, organize them clearly.",
33
+ "text": dedent(
34
+ """You are performing OCR and transcription.
35
+ Extract ALL text and numbers from the image verbatim.
36
+ - Preserve original casing, punctuation, symbols, mathematical notation, and whitespace layout when possible.
37
+ - If layout is multi-column or tabular, reconstruct lines top-to-bottom, left-to-right; use line breaks between blocks.
38
+ - For any uncertain or low-confidence characters, mark with a '?' and include a note.
39
+ - After the raw extraction, provide a clean, normalized version (fixing obvious OCR artifacts) as a separate section.
40
+ Return two sections:
41
+ [RAW TRANSCRIPTION]
42
+ ...
43
+ [NORMALIZED]
44
+ ...
45
+ """
46
+ ),
34
47
  }
35
48
 
36
49
 
@@ -44,10 +57,8 @@ def show_image(path: str) -> Dict[str, str]:
44
57
  ```
45
58
  Do not print it ever, just return as the last expression.
46
59
 
47
- Returns an dictionary with a single "image" key.
48
-
49
60
  Args:
50
- url: Path to file inside current work directory or web URL
61
+ path: Path to file inside current work directory or web URL
51
62
  """
52
63
  if path.startswith("http"):
53
64
  response = httpx.get(path, timeout=10)
@@ -80,7 +91,7 @@ async def describe_image(
80
91
  - "general": General description of the image
81
92
  - "detailed": Detailed analysis of the image
82
93
  - "chess": Analysis of a chess position
83
- - "text": Extract and describe text from the image
94
+ - "text": Extract and describe text or numbers from the image
84
95
  - "custom": Custom description based on user prompt
85
96
  """
86
97
  image_base64 = show_image(path)["image_base64"]
@@ -93,12 +104,16 @@ async def describe_image(
93
104
  {"type": "text", "text": prompt},
94
105
  {
95
106
  "type": "image_url",
96
- "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
107
+ "image_url": {"url": f"data:image/png;base64,{image_base64}"},
97
108
  },
98
109
  ]
99
110
  model_name = settings.DESCRIBE_IMAGE_MODEL_NAME
111
+ llm_kwargs = {}
112
+ if description_type in {"text", "chess"}:
113
+ llm_kwargs["temperature"] = 0.0
100
114
  response = await llm_acall(
101
115
  model_name=model_name,
102
116
  messages=[ChatMessage(role="user", content=content)],
117
+ **llm_kwargs,
103
118
  )
104
119
  return response
@@ -33,12 +33,15 @@ def _tavily_visit_webpage(url: str) -> Dict[str, Any]:
33
33
  assert key, "Error: TAVILY_API_KEY is not set and no api_key was provided"
34
34
  payload = {
35
35
  "urls": [url],
36
+ "extract_depth": "advanced",
37
+ "include_images": True,
36
38
  }
37
39
  response = post_with_retries(TAVILY_EXTRACT_URL, payload=payload, api_key=key)
38
40
  results = response.json()["results"]
39
41
  if not results:
40
42
  return {"error": ERROR_MESSAGE}
41
- return {"text": results[0]["raw_content"]}
43
+ result = results[0]
44
+ return {"text": result["raw_content"], "images": result["images"]}
42
45
 
43
46
 
44
47
  def _basic_visit_webpage(url: str) -> Dict[str, Any]:
@@ -13,8 +13,8 @@ def yt_transcript(video_url: str) -> str:
13
13
  Args:
14
14
  video_url (str): YouTube video URL.
15
15
  """
16
- if "youtu.be" in video_url:
17
- video_id = video_url.strip().split("youtu.be/")[1]
16
+ if "youtu.be/" in video_url:
17
+ video_id = video_url.strip().split("youtu.be/")[-1]
18
18
  else:
19
19
  video_id = video_url.strip().split("v=")[-1]
20
20
  video_id = video_id.split("?")[0]
@@ -25,6 +25,9 @@ def yt_transcript(video_url: str) -> str:
25
25
  proxy_password=settings.WEBSHARE_PROXY_PASSWORD,
26
26
  )
27
27
  api = YouTubeTranscriptApi(proxy_config=proxy_config)
28
- transcript = api.fetch(video_id)
28
+ try:
29
+ transcript = api.fetch(video_id)
30
+ except Exception as e:
31
+ return f"Error fetching transcript for video {video_url}: {e}"
29
32
  snippets = transcript.snippets
30
33
  return "\n".join([f"{int(entry.start)}: {' '.join(entry.text.split())}" for entry in snippets])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academia-mcp
3
- Version: 1.10.8
3
+ Version: 1.11.0
4
4
  Summary: MCP server that provides different tools to search for scientific publications
5
5
  Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
@@ -4,7 +4,7 @@ academia_mcp/files.py,sha256=ynIt0XbU1Z7EPWkv_hVX0pGKsLlmjYv-MVJLOfi6yzs,817
4
4
  academia_mcp/llm.py,sha256=zpGkuJFf58Ofgys_fi28-47_wJ1a7sIs_yZvI1Si6z0,993
5
5
  academia_mcp/pdf.py,sha256=9PlXzHGhb6ay3ldbTdxCcTWvH4TkET3bnb64mgoh9i0,1273
6
6
  academia_mcp/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- academia_mcp/server.py,sha256=tZ57YkW2EcW4DRIk87n2PFZkkTjAVsVQ5lphvc1AVA4,5517
7
+ academia_mcp/server.py,sha256=s7rwsNePtz8ZJtzJ5FmvzStWR2ApArxiJROcRwyqrww,6102
8
8
  academia_mcp/settings.py,sha256=c5s4dI8V_cWmMED-jKDmHjfdIaBcxwEK4HdHNQ3WUIg,1096
9
9
  academia_mcp/utils.py,sha256=lRlb615JJ_0d4gcFpMoBjB6w0xXcde9dFDw0LwYpSPQ,4863
10
10
  academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty,sha256=hGcEPCYBJS4vdhWvN_yEaJC4GvT_yDroI94CfY2Oguk,12268
@@ -12,7 +12,7 @@ academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex,sha256=
12
12
  academia_mcp/tools/__init__.py,sha256=Z30vULZwUeUX5nDz5wcv0znhAeBtZRa0dvz7vD8SUYE,1555
13
13
  academia_mcp/tools/anthology_search.py,sha256=rhFpJZqGLABgr0raDuH0CARBiAJNJtEI4dlMrKNHfDQ,7669
14
14
  academia_mcp/tools/arxiv_download.py,sha256=gBY0_Kz0yGtVkLMwn6GrAyfBjovZVgcSMuyy67p65Cw,10474
15
- academia_mcp/tools/arxiv_search.py,sha256=pzM18qrF3QL03A53w003kE7hQi3s3QKtjgw0m7K88UY,8355
15
+ academia_mcp/tools/arxiv_search.py,sha256=Cb9x6SaHz5vjr7jwdq3U1PtvADfFJa5pq52z-0Rg8t0,8882
16
16
  academia_mcp/tools/bitflip.py,sha256=1B-EEcDnJjB9YmvVWsGv_Un19Bkeud9SZDw2TpGTCSg,12184
17
17
  academia_mcp/tools/document_qa.py,sha256=Wb2nEEVu9UyPp8ktHWeT9wS2JBle8fb9zRjTNVIDdBE,2463
18
18
  academia_mcp/tools/hf_datasets_search.py,sha256=KiBkqT4rXjEN4oc1AWZOPnqN_Go90TQogY5-DUm3LQo,2854
@@ -20,14 +20,14 @@ academia_mcp/tools/latex.py,sha256=B1Leqt1FHY6H3DlUgeYse4LMFpf4-K1FQViXl5MKk8A,6
20
20
  academia_mcp/tools/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  academia_mcp/tools/review.py,sha256=Va0lFJJKuk-NvWhKS3UZ-Dnuk7CyuDQ4S1nd70D-ffE,11117
22
22
  academia_mcp/tools/s2.py,sha256=QX7-pbetab3Xt_1tvVPU6o5D_NAe9y6jcTGRBK1vwtY,6200
23
- academia_mcp/tools/show_image.py,sha256=jiJlQ53dbZ0T61OBhCT3IKVvBl9NHc6jHgWLfg5BxiE,3856
23
+ academia_mcp/tools/show_image.py,sha256=DWSnYMTn_dJpGTLL1r_sbX5XsB6p9z-vClApDANz84s,4534
24
24
  academia_mcp/tools/speech_to_text.py,sha256=YZzMqdvunzXkpcadP_mYhm6cs4qH1Y_42SfY-7eX4O4,1601
25
- academia_mcp/tools/visit_webpage.py,sha256=swlFwWRzWc7-AHP2ouRZJScSTA4dHZ32fuJnA2V0lUc,3311
25
+ academia_mcp/tools/visit_webpage.py,sha256=rMmjP2KXo_ElO7NPfUWnYN0EsRJHi2ikShekZR7pCms,3428
26
26
  academia_mcp/tools/web_search.py,sha256=VphVztf2jZNT3bPJPJuTdMkKbe2-LIbSV7keKV47lac,8616
27
- academia_mcp/tools/yt_transcript.py,sha256=NPBVGN-LG_N6yElQYBMEnbNhSjkM3-DPeVIJwlFGqA0,1104
28
- academia_mcp-1.10.8.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
29
- academia_mcp-1.10.8.dist-info/METADATA,sha256=Cw-6atEo6S0MAjBKkgsHZxw-GJCcTZWAiOSQGeftarM,6356
30
- academia_mcp-1.10.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
31
- academia_mcp-1.10.8.dist-info/entry_points.txt,sha256=gxkiKJ74w2FwJpSECpjA3XtCfI5ZfrM6N8cqnwsq4yY,51
32
- academia_mcp-1.10.8.dist-info/top_level.txt,sha256=CzGpRFsRRJRqWEb1e3SUlcfGqRzOxevZGaJWrtGF8W0,13
33
- academia_mcp-1.10.8.dist-info/RECORD,,
27
+ academia_mcp/tools/yt_transcript.py,sha256=ilfOpX14moC1bKHbFmOVvZ8-_NxuQQUoQbV28e9FBaE,1217
28
+ academia_mcp-1.11.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
29
+ academia_mcp-1.11.0.dist-info/METADATA,sha256=61gqVlxbohEvg2IwsRyDfX2X8u5Dsw-IUrAbm_o6Hdg,6356
30
+ academia_mcp-1.11.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
31
+ academia_mcp-1.11.0.dist-info/entry_points.txt,sha256=gxkiKJ74w2FwJpSECpjA3XtCfI5ZfrM6N8cqnwsq4yY,51
32
+ academia_mcp-1.11.0.dist-info/top_level.txt,sha256=CzGpRFsRRJRqWEb1e3SUlcfGqRzOxevZGaJWrtGF8W0,13
33
+ academia_mcp-1.11.0.dist-info/RECORD,,