academia-mcp 1.11.3__tar.gz → 1.11.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/PKG-INFO +12 -8
  2. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/README.md +6 -2
  3. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/server.py +3 -1
  4. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/tools/__init__.py +9 -2
  5. academia_mcp-1.11.3/academia_mcp/tools/show_image.py → academia_mcp-1.11.5/academia_mcp/tools/image_processing.py +74 -6
  6. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/tools/s2.py +68 -19
  7. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/utils.py +1 -1
  8. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp.egg-info/PKG-INFO +12 -8
  9. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp.egg-info/SOURCES.txt +2 -2
  10. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp.egg-info/requires.txt +5 -5
  11. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/pyproject.toml +6 -6
  12. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/tests/test_bitflip.py +0 -15
  13. academia_mcp-1.11.3/tests/test_show_image.py → academia_mcp-1.11.5/tests/test_image_processing.py +6 -0
  14. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/tests/test_s2.py +29 -0
  15. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/tests/test_visit_webpage.py +0 -2
  16. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/LICENSE +0 -0
  17. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/__init__.py +0 -0
  18. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/__main__.py +0 -0
  19. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/files.py +0 -0
  20. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty +0 -0
  21. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex +0 -0
  22. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/llm.py +0 -0
  23. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/pdf.py +0 -0
  24. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/py.typed +0 -0
  25. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/settings.py +0 -0
  26. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/tools/anthology_search.py +0 -0
  27. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/tools/arxiv_download.py +0 -0
  28. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/tools/arxiv_search.py +0 -0
  29. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/tools/bitflip.py +0 -0
  30. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/tools/document_qa.py +0 -0
  31. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/tools/hf_datasets_search.py +0 -0
  32. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/tools/latex.py +0 -0
  33. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/tools/py.typed +0 -0
  34. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/tools/review.py +0 -0
  35. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/tools/speech_to_text.py +0 -0
  36. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/tools/visit_webpage.py +0 -0
  37. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/tools/web_search.py +0 -0
  38. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp/tools/yt_transcript.py +0 -0
  39. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp.egg-info/dependency_links.txt +0 -0
  40. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp.egg-info/entry_points.txt +0 -0
  41. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/academia_mcp.egg-info/top_level.txt +0 -0
  42. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/setup.cfg +0 -0
  43. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/tests/test_anthology_search.py +0 -0
  44. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/tests/test_arxiv_download.py +0 -0
  45. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/tests/test_arxiv_search.py +0 -0
  46. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/tests/test_document_qa.py +0 -0
  47. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/tests/test_extract_json.py +0 -0
  48. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/tests/test_hf_dataset_search.py +0 -0
  49. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/tests/test_latex.py +0 -0
  50. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/tests/test_review.py +0 -0
  51. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/tests/test_server.py +0 -0
  52. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/tests/test_speech_to_text.py +0 -0
  53. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/tests/test_web_search.py +0 -0
  54. {academia_mcp-1.11.3 → academia_mcp-1.11.5}/tests/test_yt_transcript.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academia-mcp
3
- Version: 1.11.3
3
+ Version: 1.11.5
4
4
  Summary: MCP server that provides different tools to search for scientific publications
5
5
  Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
@@ -12,16 +12,14 @@ Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
13
  Requires-Dist: mcp>=1.10.1
14
14
  Requires-Dist: xmltodict>=0.14.0
15
- Requires-Dist: types-xmltodict>=0.14.0
16
15
  Requires-Dist: requests>=2.32.0
17
- Requires-Dist: types-requests>=2.32.0
18
16
  Requires-Dist: pypdf>=5.1.0
19
17
  Requires-Dist: beautifulsoup4>=4.12.0
20
- Requires-Dist: types-beautifulsoup4>=4.12.0
21
18
  Requires-Dist: markdownify==0.14.1
19
+ Requires-Dist: types-xmltodict>=0.14.0
20
+ Requires-Dist: types-requests>=2.32.0
21
+ Requires-Dist: types-beautifulsoup4>=4.12.0
22
22
  Requires-Dist: acl-anthology==0.5.2
23
- Requires-Dist: markdown==3.7.0
24
- Requires-Dist: types-markdown==3.7.0.20250322
25
23
  Requires-Dist: huggingface-hub>=0.32.4
26
24
  Requires-Dist: fire>=0.7.0
27
25
  Requires-Dist: openai>=1.97.1
@@ -31,6 +29,8 @@ Requires-Dist: pymupdf>=1.26.4
31
29
  Requires-Dist: pillow>=11.3.0
32
30
  Requires-Dist: pydantic-settings>=2.6.0
33
31
  Requires-Dist: youtube-transcript-api>=1.2.2
32
+ Requires-Dist: paddlepaddle>=3.2.0
33
+ Requires-Dist: paddleocr>=3.2.0
34
34
  Dynamic: license-file
35
35
 
36
36
  # Academia MCP
@@ -70,12 +70,16 @@ make install
70
70
  ### Quickstart
71
71
  - Run over HTTP (default transport):
72
72
  ```bash
73
+ python -m academia_mcp --transport streamable-http
74
+ # OR
73
75
  uv run -m academia_mcp --transport streamable-http
74
76
  ```
75
77
 
76
78
  - Run over stdio (for local MCP clients like Claude Desktop):
77
79
  ```bash
78
80
  python -m academia_mcp --transport stdio
81
+ # OR
82
+ uv run -m academia_mcp --transport stdio
79
83
  ```
80
84
 
81
85
  Notes:
@@ -122,7 +126,7 @@ Availability notes:
122
126
  - Set one or more of `EXA_API_KEY`, `BRAVE_API_KEY`, `TAVILY_API_KEY` to enable `web_search` and provider tools.
123
127
 
124
128
  ### Environment variables
125
- Set as needed depending on which tools you use:
129
+ Set as needed, depending on which tools you use:
126
130
 
127
131
  - `OPENROUTER_API_KEY`: required for LLM-related tools.
128
132
  - `BASE_URL`: override OpenRouter base URL.
@@ -152,7 +156,7 @@ docker run --rm -p 5056:5056 \
152
156
  academia_mcp
153
157
  ```
154
158
 
155
- Or use existing image: `phoenix120/academia_mcp`
159
+ Or use existing image: [`phoenix120/academia_mcp`](https://hub.docker.com/repository/docker/phoenix120/academia_mcp)
156
160
 
157
161
  ### Examples
158
162
  - [Comprehensive report screencast (YouTube)](https://www.youtube.com/watch?v=4bweqQcN6w8)
@@ -35,12 +35,16 @@ make install
35
35
  ### Quickstart
36
36
  - Run over HTTP (default transport):
37
37
  ```bash
38
+ python -m academia_mcp --transport streamable-http
39
+ # OR
38
40
  uv run -m academia_mcp --transport streamable-http
39
41
  ```
40
42
 
41
43
  - Run over stdio (for local MCP clients like Claude Desktop):
42
44
  ```bash
43
45
  python -m academia_mcp --transport stdio
46
+ # OR
47
+ uv run -m academia_mcp --transport stdio
44
48
  ```
45
49
 
46
50
  Notes:
@@ -87,7 +91,7 @@ Availability notes:
87
91
  - Set one or more of `EXA_API_KEY`, `BRAVE_API_KEY`, `TAVILY_API_KEY` to enable `web_search` and provider tools.
88
92
 
89
93
  ### Environment variables
90
- Set as needed depending on which tools you use:
94
+ Set as needed, depending on which tools you use:
91
95
 
92
96
  - `OPENROUTER_API_KEY`: required for LLM-related tools.
93
97
  - `BASE_URL`: override OpenRouter base URL.
@@ -117,7 +121,7 @@ docker run --rm -p 5056:5056 \
117
121
  academia_mcp
118
122
  ```
119
123
 
120
- Or use existing image: `phoenix120/academia_mcp`
124
+ Or use existing image: [`phoenix120/academia_mcp`](https://hub.docker.com/repository/docker/phoenix120/academia_mcp)
121
125
 
122
126
  ### Examples
123
127
  - [Comprehensive report screencast (YouTube)](https://www.youtube.com/watch?v=4bweqQcN6w8)
@@ -17,6 +17,7 @@ from academia_mcp.tools.s2 import (
17
17
  s2_get_references,
18
18
  s2_corpus_id_from_arxiv_id,
19
19
  s2_get_info,
20
+ s2_search,
20
21
  )
21
22
  from academia_mcp.tools.hf_datasets_search import hf_datasets_search
22
23
  from academia_mcp.tools.anthology_search import anthology_search
@@ -40,7 +41,7 @@ from academia_mcp.tools.bitflip import (
40
41
  score_research_proposals,
41
42
  )
42
43
  from academia_mcp.tools.review import review_pdf_paper, download_pdf_paper
43
- from academia_mcp.tools.show_image import show_image, describe_image
44
+ from academia_mcp.tools.image_processing import show_image, describe_image
44
45
  from academia_mcp.tools.speech_to_text import speech_to_text
45
46
  from academia_mcp.tools.yt_transcript import yt_transcript
46
47
 
@@ -86,6 +87,7 @@ def create_server(
86
87
  server.add_tool(s2_get_citations, structured_output=True)
87
88
  server.add_tool(s2_get_references, structured_output=True)
88
89
  server.add_tool(s2_get_info, structured_output=True)
90
+ server.add_tool(s2_search, structured_output=True)
89
91
  server.add_tool(s2_corpus_id_from_arxiv_id)
90
92
  server.add_tool(hf_datasets_search)
91
93
  server.add_tool(anthology_search)
@@ -2,7 +2,13 @@ from .arxiv_search import arxiv_search
2
2
  from .anthology_search import anthology_search
3
3
  from .arxiv_download import arxiv_download
4
4
  from .hf_datasets_search import hf_datasets_search
5
- from .s2 import s2_get_references, s2_get_citations, s2_corpus_id_from_arxiv_id, s2_get_info
5
+ from .s2 import (
6
+ s2_get_references,
7
+ s2_get_citations,
8
+ s2_corpus_id_from_arxiv_id,
9
+ s2_get_info,
10
+ s2_search,
11
+ )
6
12
  from .document_qa import document_qa
7
13
  from .latex import (
8
14
  compile_latex,
@@ -14,7 +20,7 @@ from .web_search import web_search, tavily_web_search, exa_web_search, brave_web
14
20
  from .visit_webpage import visit_webpage
15
21
  from .bitflip import extract_bitflip_info, generate_research_proposals, score_research_proposals
16
22
  from .review import review_pdf_paper, download_pdf_paper, review_pdf_paper_by_url
17
- from .show_image import show_image, describe_image
23
+ from .image_processing import show_image, describe_image
18
24
  from .speech_to_text import speech_to_text
19
25
  from .yt_transcript import yt_transcript
20
26
 
@@ -26,6 +32,7 @@ __all__ = [
26
32
  "s2_get_citations",
27
33
  "s2_corpus_id_from_arxiv_id",
28
34
  "s2_get_info",
35
+ "s2_search",
29
36
  "hf_datasets_search",
30
37
  "document_qa",
31
38
  "compile_latex",
@@ -1,16 +1,28 @@
1
+ import asyncio
1
2
  import base64
2
- from pathlib import Path
3
+ import contextlib
4
+ import json
5
+ import logging
6
+ import os
7
+ import threading
3
8
  from io import BytesIO
4
- from typing import Dict, Optional
9
+ from pathlib import Path
5
10
  from textwrap import dedent
11
+ from typing import Dict, List, Optional, Any
6
12
 
7
13
  import httpx
14
+ from paddleocr import PaddleOCR # type: ignore
8
15
  from PIL import Image
16
+ from pydantic import BaseModel
9
17
 
10
18
  from academia_mcp.files import get_workspace_dir
19
+ from academia_mcp.llm import ChatMessage, llm_acall
11
20
  from academia_mcp.settings import settings
12
- from academia_mcp.llm import llm_acall, ChatMessage
13
21
 
22
+ paddlex_logger = logging.getLogger("paddlex")
23
+ paddleocr_logger = logging.getLogger("paddleocr")
24
+ paddlex_logger.setLevel(logging.ERROR)
25
+ paddleocr_logger.setLevel(logging.ERROR)
14
26
 
15
27
  DESCRIBE_PROMPTS = {
16
28
  "general": "Provide a general description of this image. Focus on the main subjects, colors, and overall scene.",
@@ -37,16 +49,64 @@ DESCRIBE_PROMPTS = {
37
49
  - If layout is multi-column or tabular, reconstruct lines top-to-bottom, left-to-right; use line breaks between blocks.
38
50
  - For any uncertain or low-confidence characters, mark with a '?' and include a note.
39
51
  - After the raw extraction, provide a clean, normalized version (fixing obvious OCR artifacts) as a separate section.
40
- Return two sections:
52
+ Return three sections:
53
+ [GENERAL IMAGE DESCRIPTION]
54
+ ...
41
55
  [RAW TRANSCRIPTION]
42
56
  ...
43
- [NORMALIZED]
57
+ [NORMALIZED TRANSCRIPTION]
44
58
  ...
45
59
  """
46
60
  ),
47
61
  }
48
62
 
49
63
 
64
+ class OCRBox(BaseModel): # type: ignore
65
+ poly: List[List[float]]
66
+ text: str
67
+ score: float
68
+
69
+
70
+ class OCRSingleton:
71
+ instance: Optional[PaddleOCR] = None
72
+ lock: threading.Lock = threading.Lock()
73
+
74
+ @classmethod
75
+ def get(cls) -> PaddleOCR:
76
+ if cls.instance is not None:
77
+ return cls.instance
78
+ with cls.lock:
79
+ if cls.instance is None:
80
+ with open(os.devnull, "w") as devnull:
81
+ with contextlib.redirect_stderr(devnull):
82
+ cls.instance = PaddleOCR(
83
+ use_doc_orientation_classify=False,
84
+ use_doc_unwarping=False,
85
+ use_textline_orientation=False,
86
+ )
87
+ return cls.instance
88
+
89
+
90
+ async def _run_ocr(path: str) -> Dict[str, Any]:
91
+ def _sync_ocr(path: str) -> Dict[str, Any]:
92
+ try:
93
+ ocr = OCRSingleton.get()
94
+ with open(os.devnull, "w") as devnull:
95
+ with contextlib.redirect_stderr(devnull):
96
+ result = ocr.predict(input=path)[0]
97
+ rec_texts = result["rec_texts"]
98
+ rec_scores = result["rec_scores"]
99
+ rec_polys = result["rec_polys"]
100
+ except Exception as e:
101
+ return {"error": str(e)}
102
+ items = []
103
+ for poly, text, score in zip(rec_polys, rec_texts, rec_scores):
104
+ items.append(OCRBox(poly=poly, text=text, score=score).model_dump())
105
+ return {"boxes": items}
106
+
107
+ return await asyncio.to_thread(_sync_ocr, path)
108
+
109
+
50
110
  def show_image(path: str) -> Dict[str, str]:
51
111
  """
52
112
  Reads an image from the specified URL or from the current work directory.
@@ -91,7 +151,7 @@ async def describe_image(
91
151
  - "general": General description of the image
92
152
  - "detailed": Detailed analysis of the image
93
153
  - "chess": Analysis of a chess position
94
- - "text": Extract and describe text or numbers from the image
154
+ - "text": Extract and describe text or numbers with an OCR pipeline.
95
155
  - "custom": Custom description based on user prompt
96
156
  """
97
157
  image_base64 = show_image(path)["image_base64"]
@@ -116,4 +176,12 @@ async def describe_image(
116
176
  messages=[ChatMessage(role="user", content=content)],
117
177
  **llm_kwargs,
118
178
  )
179
+ if description_type == "text":
180
+ ocr_response = await _run_ocr(path)
181
+ response = json.dumps(
182
+ {
183
+ "vlm_response": response,
184
+ "ocr_response": ocr_response if ocr_response else [],
185
+ }
186
+ )
119
187
  return response
@@ -8,10 +8,12 @@ from pydantic import BaseModel, Field
8
8
  from academia_mcp.utils import get_with_retries
9
9
 
10
10
 
11
- PAPER_URL_TEMPLATE = "https://api.semanticscholar.org/graph/v1/paper/{paper_id}?fields={fields}"
12
- CITATIONS_URL_TEMPLATE = "https://api.semanticscholar.org/graph/v1/paper/{paper_id}/citations?fields={fields}&offset={offset}&limit={limit}"
13
- REFERENCES_URL_TEMPLATE = "https://api.semanticscholar.org/graph/v1/paper/{paper_id}/references?fields={fields}&offset={offset}&limit={limit}"
14
- FIELDS = "title,authors,externalIds,venue,citationCount,publicationDate"
11
+ BASE_URL = "https://api.semanticscholar.org/graph/v1"
12
+ PAPER_URL_TEMPLATE = "{base_url}/paper/{paper_id}"
13
+ CITATIONS_URL_TEMPLATE = "{base_url}/paper/{paper_id}/citations"
14
+ REFERENCES_URL_TEMPLATE = "{base_url}/paper/{paper_id}/references"
15
+ SEARCH_URL_TEMPLATE = "{base_url}/paper/search"
16
+ FIELDS = "paperId,title,authors,externalIds,venue,citationCount,publicationDate"
15
17
 
16
18
 
17
19
  class S2PaperInfo(BaseModel): # type: ignore
@@ -38,7 +40,10 @@ def _format_authors(authors: List[Dict[str, Any]]) -> List[str]:
38
40
 
39
41
 
40
42
  def _clean_entry(entry: Dict[str, Any]) -> S2PaperInfo:
41
- entry = entry["citingPaper"] if "citingPaper" in entry else entry["citedPaper"]
43
+ if "citingPaper" in entry:
44
+ entry = entry["citingPaper"]
45
+ elif "citedPaper" in entry:
46
+ entry = entry["citedPaper"]
42
47
  external_ids = entry.get("externalIds")
43
48
  if not external_ids:
44
49
  external_ids = dict()
@@ -88,17 +93,17 @@ def s2_get_citations(
88
93
  arxiv_id = arxiv_id.split("v")[0]
89
94
  paper_id = f"arxiv:{arxiv_id}"
90
95
 
91
- url = CITATIONS_URL_TEMPLATE.format(
92
- paper_id=paper_id, fields=FIELDS, offset=offset, limit=limit
93
- )
94
- response = get_with_retries(url)
96
+ url = CITATIONS_URL_TEMPLATE.format(base_url=BASE_URL, paper_id=paper_id)
97
+ payload = {"fields": FIELDS, "offset": offset, "limit": limit}
98
+ response = get_with_retries(url, params=payload)
95
99
  result = response.json()
96
100
  entries = result["data"]
97
101
  total_count = len(result["data"]) + result["offset"]
98
102
 
99
103
  if "next" in result:
100
- paper_url = PAPER_URL_TEMPLATE.format(paper_id=paper_id, fields=FIELDS)
101
- paper_response = get_with_retries(paper_url)
104
+ paper_url = PAPER_URL_TEMPLATE.format(base_url=BASE_URL, paper_id=paper_id)
105
+ payload = {"fields": FIELDS}
106
+ paper_response = get_with_retries(paper_url, params=payload)
102
107
  paper_result = paper_response.json()
103
108
  total_count = paper_result["citationCount"]
104
109
 
@@ -123,10 +128,9 @@ def s2_get_references(
123
128
  arxiv_id = arxiv_id.split("v")[0]
124
129
  paper_id = f"arxiv:{arxiv_id}"
125
130
 
126
- url = REFERENCES_URL_TEMPLATE.format(
127
- paper_id=paper_id, fields=FIELDS, offset=offset, limit=limit
128
- )
129
- response = get_with_retries(url)
131
+ url = REFERENCES_URL_TEMPLATE.format(base_url=BASE_URL, paper_id=paper_id)
132
+ payload = {"fields": FIELDS, "offset": offset, "limit": limit}
133
+ response = get_with_retries(url, params=payload)
130
134
  result = response.json()
131
135
  entries = result["data"]
132
136
  total_count = len(result["data"]) + result["offset"]
@@ -143,8 +147,9 @@ def s2_corpus_id_from_arxiv_id(arxiv_id: str) -> int:
143
147
  assert isinstance(arxiv_id, str), "Error: Your arxiv_id must be a string"
144
148
  if "v" in arxiv_id:
145
149
  arxiv_id = arxiv_id.split("v")[0]
146
- paper_url = PAPER_URL_TEMPLATE.format(paper_id=f"arxiv:{arxiv_id}", fields="externalIds")
147
- response = get_with_retries(paper_url)
150
+ paper_url = PAPER_URL_TEMPLATE.format(base_url=BASE_URL, paper_id=f"arxiv:{arxiv_id}")
151
+ payload = {"fields": "externalIds"}
152
+ response = get_with_retries(paper_url, params=payload)
148
153
  result = response.json()
149
154
  return int(result["externalIds"]["CorpusId"])
150
155
 
@@ -159,8 +164,10 @@ def s2_get_info(arxiv_id: str) -> S2PaperInfo:
159
164
  assert isinstance(arxiv_id, str), "Error: Your arxiv_id must be a string"
160
165
  if "v" in arxiv_id:
161
166
  arxiv_id = arxiv_id.split("v")[0]
162
- paper_url = PAPER_URL_TEMPLATE.format(paper_id=f"arxiv:{arxiv_id}", fields=FIELDS)
163
- response = get_with_retries(paper_url)
167
+ paper_id = f"arxiv:{arxiv_id}"
168
+ payload = {"fields": FIELDS}
169
+ paper_url = PAPER_URL_TEMPLATE.format(base_url=BASE_URL, paper_id=paper_id)
170
+ response = get_with_retries(paper_url, params=payload)
164
171
  json_data = response.json()
165
172
  return S2PaperInfo(
166
173
  arxiv_id=json_data.get("externalIds", {}).get("ArXiv"),
@@ -171,3 +178,45 @@ def s2_get_info(arxiv_id: str) -> S2PaperInfo:
171
178
  citation_count=int(json_data.get("citationCount", 0)),
172
179
  publication_date=str(json_data.get("publicationDate", "")),
173
180
  )
181
+
182
+
183
+ def s2_search(
184
+ query: str,
185
+ offset: int = 0,
186
+ limit: int = 5,
187
+ min_citation_count: int = 0,
188
+ publication_date: Optional[str] = None,
189
+ ) -> S2SearchResponse:
190
+ """
191
+ Search the S2 corpus for a given query.
192
+
193
+ Args:
194
+ query: The query to search for.
195
+ offset: The offset to scroll through results. 10 items will be skipped if offset=10. 0 by default.
196
+ limit: The maximum number of items to return. limit=50 by default.
197
+ min_citation_count: The minimum citation count to return. 0 by default.
198
+ publication_date: Restricts results to the given range of publication dates or years (inclusive).
199
+ Accepts the format <startDate>:<endDate> with each date in YYYY-MM-DD format. None by default.
200
+ """
201
+ url = SEARCH_URL_TEMPLATE.format(base_url=BASE_URL)
202
+ payload = {
203
+ "query": query,
204
+ "offset": offset,
205
+ "limit": limit,
206
+ "minCitationCount": min_citation_count,
207
+ "fields": FIELDS,
208
+ }
209
+ if publication_date:
210
+ payload["publicationDateOrYear"] = publication_date
211
+ response = get_with_retries(url, params=payload, backoff_factor=10.0, num_retries=5)
212
+ result = response.json()
213
+ if "data" not in result:
214
+ return S2SearchResponse(
215
+ total_count=0,
216
+ returned_count=0,
217
+ offset=offset if offset else 0,
218
+ results=[],
219
+ )
220
+ entries = result["data"]
221
+ total_count = result["total"]
222
+ return _format_entries(entries, offset if offset else 0, total_count)
@@ -42,7 +42,7 @@ def post_with_retries(
42
42
  def get_with_retries(
43
43
  url: str,
44
44
  api_key: Optional[str] = None,
45
- timeout: int = 30,
45
+ timeout: int = 60,
46
46
  num_retries: int = 3,
47
47
  backoff_factor: float = 3.0,
48
48
  params: Optional[Dict[str, Any]] = None,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academia-mcp
3
- Version: 1.11.3
3
+ Version: 1.11.5
4
4
  Summary: MCP server that provides different tools to search for scientific publications
5
5
  Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
@@ -12,16 +12,14 @@ Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
13
  Requires-Dist: mcp>=1.10.1
14
14
  Requires-Dist: xmltodict>=0.14.0
15
- Requires-Dist: types-xmltodict>=0.14.0
16
15
  Requires-Dist: requests>=2.32.0
17
- Requires-Dist: types-requests>=2.32.0
18
16
  Requires-Dist: pypdf>=5.1.0
19
17
  Requires-Dist: beautifulsoup4>=4.12.0
20
- Requires-Dist: types-beautifulsoup4>=4.12.0
21
18
  Requires-Dist: markdownify==0.14.1
19
+ Requires-Dist: types-xmltodict>=0.14.0
20
+ Requires-Dist: types-requests>=2.32.0
21
+ Requires-Dist: types-beautifulsoup4>=4.12.0
22
22
  Requires-Dist: acl-anthology==0.5.2
23
- Requires-Dist: markdown==3.7.0
24
- Requires-Dist: types-markdown==3.7.0.20250322
25
23
  Requires-Dist: huggingface-hub>=0.32.4
26
24
  Requires-Dist: fire>=0.7.0
27
25
  Requires-Dist: openai>=1.97.1
@@ -31,6 +29,8 @@ Requires-Dist: pymupdf>=1.26.4
31
29
  Requires-Dist: pillow>=11.3.0
32
30
  Requires-Dist: pydantic-settings>=2.6.0
33
31
  Requires-Dist: youtube-transcript-api>=1.2.2
32
+ Requires-Dist: paddlepaddle>=3.2.0
33
+ Requires-Dist: paddleocr>=3.2.0
34
34
  Dynamic: license-file
35
35
 
36
36
  # Academia MCP
@@ -70,12 +70,16 @@ make install
70
70
  ### Quickstart
71
71
  - Run over HTTP (default transport):
72
72
  ```bash
73
+ python -m academia_mcp --transport streamable-http
74
+ # OR
73
75
  uv run -m academia_mcp --transport streamable-http
74
76
  ```
75
77
 
76
78
  - Run over stdio (for local MCP clients like Claude Desktop):
77
79
  ```bash
78
80
  python -m academia_mcp --transport stdio
81
+ # OR
82
+ uv run -m academia_mcp --transport stdio
79
83
  ```
80
84
 
81
85
  Notes:
@@ -122,7 +126,7 @@ Availability notes:
122
126
  - Set one or more of `EXA_API_KEY`, `BRAVE_API_KEY`, `TAVILY_API_KEY` to enable `web_search` and provider tools.
123
127
 
124
128
  ### Environment variables
125
- Set as needed depending on which tools you use:
129
+ Set as needed, depending on which tools you use:
126
130
 
127
131
  - `OPENROUTER_API_KEY`: required for LLM-related tools.
128
132
  - `BASE_URL`: override OpenRouter base URL.
@@ -152,7 +156,7 @@ docker run --rm -p 5056:5056 \
152
156
  academia_mcp
153
157
  ```
154
158
 
155
- Or use existing image: `phoenix120/academia_mcp`
159
+ Or use existing image: [`phoenix120/academia_mcp`](https://hub.docker.com/repository/docker/phoenix120/academia_mcp)
156
160
 
157
161
  ### Examples
158
162
  - [Comprehensive report screencast (YouTube)](https://www.youtube.com/watch?v=4bweqQcN6w8)
@@ -25,11 +25,11 @@ academia_mcp/tools/arxiv_search.py
25
25
  academia_mcp/tools/bitflip.py
26
26
  academia_mcp/tools/document_qa.py
27
27
  academia_mcp/tools/hf_datasets_search.py
28
+ academia_mcp/tools/image_processing.py
28
29
  academia_mcp/tools/latex.py
29
30
  academia_mcp/tools/py.typed
30
31
  academia_mcp/tools/review.py
31
32
  academia_mcp/tools/s2.py
32
- academia_mcp/tools/show_image.py
33
33
  academia_mcp/tools/speech_to_text.py
34
34
  academia_mcp/tools/visit_webpage.py
35
35
  academia_mcp/tools/web_search.py
@@ -41,11 +41,11 @@ tests/test_bitflip.py
41
41
  tests/test_document_qa.py
42
42
  tests/test_extract_json.py
43
43
  tests/test_hf_dataset_search.py
44
+ tests/test_image_processing.py
44
45
  tests/test_latex.py
45
46
  tests/test_review.py
46
47
  tests/test_s2.py
47
48
  tests/test_server.py
48
- tests/test_show_image.py
49
49
  tests/test_speech_to_text.py
50
50
  tests/test_visit_webpage.py
51
51
  tests/test_web_search.py
@@ -1,15 +1,13 @@
1
1
  mcp>=1.10.1
2
2
  xmltodict>=0.14.0
3
- types-xmltodict>=0.14.0
4
3
  requests>=2.32.0
5
- types-requests>=2.32.0
6
4
  pypdf>=5.1.0
7
5
  beautifulsoup4>=4.12.0
8
- types-beautifulsoup4>=4.12.0
9
6
  markdownify==0.14.1
7
+ types-xmltodict>=0.14.0
8
+ types-requests>=2.32.0
9
+ types-beautifulsoup4>=4.12.0
10
10
  acl-anthology==0.5.2
11
- markdown==3.7.0
12
- types-markdown==3.7.0.20250322
13
11
  huggingface-hub>=0.32.4
14
12
  fire>=0.7.0
15
13
  openai>=1.97.1
@@ -19,3 +17,5 @@ pymupdf>=1.26.4
19
17
  pillow>=11.3.0
20
18
  pydantic-settings>=2.6.0
21
19
  youtube-transcript-api>=1.2.2
20
+ paddlepaddle>=3.2.0
21
+ paddleocr>=3.2.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "academia-mcp"
7
- version = "1.11.3"
7
+ version = "1.11.5"
8
8
  description = "MCP server that provides different tools to search for scientific publications"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -19,16 +19,14 @@ classifiers = [
19
19
  dependencies = [
20
20
  "mcp>=1.10.1",
21
21
  "xmltodict>=0.14.0",
22
- "types-xmltodict>=0.14.0",
23
22
  "requests>=2.32.0",
24
- "types-requests>=2.32.0",
25
23
  "pypdf>=5.1.0",
26
24
  "beautifulsoup4>=4.12.0",
27
- "types-beautifulsoup4>=4.12.0",
28
25
  "markdownify==0.14.1",
26
+ "types-xmltodict>=0.14.0",
27
+ "types-requests>=2.32.0",
28
+ "types-beautifulsoup4>=4.12.0",
29
29
  "acl-anthology==0.5.2",
30
- "markdown==3.7.0",
31
- "types-markdown==3.7.0.20250322",
32
30
  "huggingface-hub>=0.32.4",
33
31
  "fire>=0.7.0",
34
32
  "openai>=1.97.1",
@@ -38,6 +36,8 @@ dependencies = [
38
36
  "pillow>=11.3.0",
39
37
  "pydantic-settings>=2.6.0",
40
38
  "youtube-transcript-api>=1.2.2",
39
+ "paddlepaddle>=3.2.0",
40
+ "paddleocr>=3.2.0",
41
41
  ]
42
42
 
43
43
  [dependency-groups]
@@ -35,18 +35,3 @@ async def test_bitflip_score_research_proposals_base() -> None:
35
35
  assert scores.proposals[1].strengths is not None
36
36
  assert scores.proposals[0].weaknesses is not None
37
37
  assert scores.proposals[1].weaknesses is not None
38
-
39
-
40
- async def test_bitflip_score_research_proposals_str() -> None:
41
- arxiv_id = "2503.07826"
42
- bit = (await extract_bitflip_info(arxiv_id)).bit
43
- proposals = await generate_research_proposals(bit=bit, num_proposals=2)
44
- scores = await score_research_proposals(proposals)
45
- assert scores.proposals
46
- assert len(scores.proposals) == 2
47
- assert scores.proposals[0].spark is not None
48
- assert scores.proposals[1].spark is not None
49
- assert scores.proposals[0].strengths is not None
50
- assert scores.proposals[1].strengths is not None
51
- assert scores.proposals[0].weaknesses is not None
52
- assert scores.proposals[1].weaknesses is not None
@@ -29,3 +29,9 @@ async def test_describe_image_base(test_image_url: str) -> None:
29
29
  result = await describe_image(test_image_url)
30
30
  assert result is not None
31
31
  assert "Interrogator" in result
32
+
33
+
34
+ async def test_describe_image_text(test_image_url: str) -> None:
35
+ result = await describe_image(test_image_url, description_type="text")
36
+ assert result is not None
37
+ assert '"text": "Interrogator"' in result
@@ -3,6 +3,7 @@ from academia_mcp.tools import (
3
3
  s2_get_references,
4
4
  s2_corpus_id_from_arxiv_id,
5
5
  s2_get_info,
6
+ s2_search,
6
7
  )
7
8
 
8
9
 
@@ -40,3 +41,31 @@ def test_s2_get_info() -> None:
40
41
  assert info.citation_count is not None
41
42
  assert info.publication_date is not None
42
43
  assert info.external_ids["CorpusId"] == 272593138
44
+
45
+
46
+ def test_s2_search_base() -> None:
47
+ result = s2_search("transformers")
48
+ assert result.total_count >= 1
49
+ assert "transformers" in str(result.results).lower()
50
+ assert result.offset == 0
51
+ assert result.returned_count == 5
52
+
53
+
54
+ def test_s2_search_offset() -> None:
55
+ result = s2_search("transformers", offset=10)
56
+ assert result.total_count >= 1
57
+ assert "transformers" in str(result.results).lower()
58
+ assert result.offset == 10
59
+ assert result.returned_count == 5
60
+
61
+
62
+ def test_s2_search_min_citation_count() -> None:
63
+ result = s2_search("transformers", min_citation_count=100000)
64
+ assert result.total_count >= 2 and result.total_count <= 10
65
+
66
+
67
+ def test_s2_search_publication_date() -> None:
68
+ result = s2_search(
69
+ "transformers", min_citation_count=100000, publication_date="2017-01-01:2017-12-31"
70
+ )
71
+ assert result.total_count == 1
@@ -7,7 +7,6 @@ def test_visit_webpage_basic() -> None:
7
7
  assert content.id == "https://example.com/"
8
8
  assert content.provider == "basic"
9
9
  assert "Example Domain" in content.text
10
- assert "illustrative" in content.text
11
10
 
12
11
 
13
12
  def test_visit_webpage_exa() -> None:
@@ -16,7 +15,6 @@ def test_visit_webpage_exa() -> None:
16
15
  assert content.id == "https://example.com/"
17
16
  assert content.provider == "exa"
18
17
  assert "Example Domain" in content.text
19
- assert "illustrative" in content.text
20
18
 
21
19
 
22
20
  def test_visit_webpage_pdf() -> None:
File without changes
File without changes