academia-mcp 1.9.2__tar.gz → 1.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/PKG-INFO +1 -1
  2. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/server.py +8 -1
  3. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/settings.py +2 -0
  4. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/tools/__init__.py +4 -1
  5. academia_mcp-1.10.0/academia_mcp/tools/show_image.py +104 -0
  6. academia_mcp-1.10.0/academia_mcp/tools/speech_to_text.py +48 -0
  7. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/tools/visit_webpage.py +18 -3
  8. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp.egg-info/PKG-INFO +1 -1
  9. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp.egg-info/SOURCES.txt +2 -0
  10. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/pyproject.toml +1 -1
  11. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/tests/test_show_image.py +7 -1
  12. academia_mcp-1.10.0/tests/test_speech_to_text.py +21 -0
  13. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/tests/test_visit_webpage.py +12 -0
  14. academia_mcp-1.9.2/academia_mcp/tools/show_image.py +0 -41
  15. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/LICENSE +0 -0
  16. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/README.md +0 -0
  17. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/__init__.py +0 -0
  18. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/__main__.py +0 -0
  19. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/files.py +0 -0
  20. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty +0 -0
  21. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex +0 -0
  22. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/llm.py +0 -0
  23. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/pdf.py +0 -0
  24. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/py.typed +0 -0
  25. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/tools/anthology_search.py +0 -0
  26. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/tools/arxiv_download.py +0 -0
  27. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/tools/arxiv_search.py +0 -0
  28. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/tools/bitflip.py +0 -0
  29. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/tools/document_qa.py +0 -0
  30. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/tools/hf_datasets_search.py +0 -0
  31. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/tools/latex.py +0 -0
  32. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/tools/py.typed +0 -0
  33. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/tools/review.py +0 -0
  34. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/tools/s2.py +0 -0
  35. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/tools/web_search.py +0 -0
  36. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp/utils.py +0 -0
  37. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp.egg-info/dependency_links.txt +0 -0
  38. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp.egg-info/entry_points.txt +0 -0
  39. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp.egg-info/requires.txt +0 -0
  40. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/academia_mcp.egg-info/top_level.txt +0 -0
  41. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/setup.cfg +0 -0
  42. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/tests/test_anthology_search.py +0 -0
  43. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/tests/test_arxiv_download.py +0 -0
  44. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/tests/test_arxiv_search.py +0 -0
  45. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/tests/test_bitflip.py +0 -0
  46. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/tests/test_document_qa.py +0 -0
  47. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/tests/test_extract_json.py +0 -0
  48. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/tests/test_hf_dataset_search.py +0 -0
  49. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/tests/test_latex.py +0 -0
  50. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/tests/test_review.py +0 -0
  51. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/tests/test_s2.py +0 -0
  52. {academia_mcp-1.9.2 → academia_mcp-1.10.0}/tests/test_web_search.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academia-mcp
3
- Version: 1.9.2
3
+ Version: 1.10.0
4
4
  Summary: MCP server that provides different tools to search for scientific publications
5
5
  Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
@@ -38,7 +38,8 @@ from academia_mcp.tools.bitflip import (
38
38
  score_research_proposals,
39
39
  )
40
40
  from academia_mcp.tools.review import review_pdf_paper, download_pdf_paper
41
- from academia_mcp.tools.show_image import show_image
41
+ from academia_mcp.tools.show_image import show_image, describe_image
42
+ from academia_mcp.tools.speech_to_text import speech_to_text
42
43
 
43
44
 
44
45
  def configure_uvicorn_style_logging(level: int = logging.INFO) -> None:
@@ -116,11 +117,17 @@ def run(
116
117
  server.add_tool(generate_research_proposals)
117
118
  server.add_tool(score_research_proposals)
118
119
  server.add_tool(document_qa)
120
+ server.add_tool(describe_image)
119
121
  if settings.WORKSPACE_DIR:
120
122
  server.add_tool(review_pdf_paper)
121
123
  else:
122
124
  logger.warning("No OpenRouter API key is set, LLM-related tools will not be available!")
123
125
 
126
+ if settings.OPENAI_API_KEY:
127
+ server.add_tool(speech_to_text)
128
+ else:
129
+ logger.warning("No OpenAI API key is set, speech_to_text will not be available!")
130
+
124
131
  if port is None:
125
132
  if settings.PORT is not None:
126
133
  port = int(settings.PORT)
@@ -11,6 +11,7 @@ class Settings(BaseSettings):
11
11
  TAVILY_API_KEY: Optional[str] = None
12
12
  EXA_API_KEY: Optional[str] = None
13
13
  BRAVE_API_KEY: Optional[str] = None
14
+ OPENAI_API_KEY: Optional[str] = None
14
15
 
15
16
  REVIEW_MODEL_NAME: str = "gpt-5"
16
17
  BITFLIP_MODEL_NAME: str = "deepseek/deepseek-chat-v3-0324"
@@ -18,6 +19,7 @@ class Settings(BaseSettings):
18
19
  DOCUMENT_QA_MODEL_NAME: str = "deepseek/deepseek-chat-v3-0324"
19
20
  DOCUMENT_QA_QUESTION_MAX_LENGTH: int = 10000
20
21
  DOCUMENT_QA_DOCUMENT_MAX_LENGTH: int = 200000
22
+ DESCRIBE_IMAGE_MODEL_NAME: str = "gpt-4.1"
21
23
 
22
24
  PORT: int = 5056
23
25
  WORKSPACE_DIR: Optional[Path] = None
@@ -14,7 +14,8 @@ from .web_search import web_search, tavily_web_search, exa_web_search, brave_web
14
14
  from .visit_webpage import visit_webpage
15
15
  from .bitflip import extract_bitflip_info, generate_research_proposals, score_research_proposals
16
16
  from .review import review_pdf_paper, download_pdf_paper, review_pdf_paper_by_url
17
- from .show_image import show_image
17
+ from .show_image import show_image, describe_image
18
+ from .speech_to_text import speech_to_text
18
19
 
19
20
  __all__ = [
20
21
  "arxiv_search",
@@ -42,4 +43,6 @@ __all__ = [
42
43
  "download_pdf_paper",
43
44
  "read_pdf",
44
45
  "show_image",
46
+ "describe_image",
47
+ "speech_to_text",
45
48
  ]
@@ -0,0 +1,104 @@
1
+ import base64
2
+ from pathlib import Path
3
+ from io import BytesIO
4
+ from typing import Dict, Optional
5
+ from textwrap import dedent
6
+
7
+ import httpx
8
+ from PIL import Image
9
+
10
+ from academia_mcp.files import get_workspace_dir
11
+ from academia_mcp.settings import settings
12
+ from academia_mcp.llm import llm_acall, ChatMessage
13
+
14
+
15
+ DESCRIBE_PROMPTS = {
16
+ "general": "Provide a general description of this image. Focus on the main subjects, colors, and overall scene.",
17
+ "detailed": dedent(
18
+ """Analyze this image in detail. Include:
19
+ 1. Main subjects and their relationships
20
+ 2. Colors, lighting, and composition
21
+ 3. Any text or symbols present
22
+ 4. Context or possible meaning
23
+ 5. Notable details or interesting elements"""
24
+ ),
25
+ "chess": dedent(
26
+ """Analyze this chess position and provide a detailed description including:
27
+ 1. List of pieces on the board for both white and black
28
+ 2. Whose turn it is to move
29
+ 3. Basic evaluation of the position
30
+ 4. Any immediate tactical opportunities or threats
31
+ 5. Suggested next moves with brief explanations"""
32
+ ),
33
+ "text": "Extract and describe any text present in this image. If there are multiple pieces of text, organize them clearly.",
34
+ }
35
+
36
+
37
+ def show_image(path: str) -> Dict[str, str]:
38
+ """
39
+ Reads an image from the specified URL or from the current work directory.
40
+ Always call this function at the end of the code block.
41
+ For instance:
42
+ ```python
43
+ show_image("https://example.com/image.png")
44
+ ```
45
+ Do not print it ever, just return as the last expression.
46
+
47
+ Returns an dictionary with a single "image" key.
48
+
49
+ Args:
50
+ url: Path to file inside current work directory or web URL
51
+ """
52
+ if path.startswith("http"):
53
+ response = httpx.get(path, timeout=10)
54
+ response.raise_for_status()
55
+ image = Image.open(BytesIO(response.content))
56
+ else:
57
+ assert settings.WORKSPACE_DIR is not None, "WORKSPACE_DIR is not set"
58
+ full_path = Path(path)
59
+ if not full_path.exists():
60
+ full_path = Path(get_workspace_dir()) / path
61
+ assert full_path.exists(), f"Image file {path} does not exist"
62
+ image = Image.open(str(full_path))
63
+ buffer_io = BytesIO()
64
+ image.save(buffer_io, format="PNG")
65
+ img_bytes = buffer_io.getvalue()
66
+ return {"image_base64": base64.b64encode(img_bytes).decode("utf-8")}
67
+
68
+
69
+ async def describe_image(
70
+ path: str, description_type: str = "general", custom_prompt: Optional[str] = None
71
+ ) -> str:
72
+ """
73
+ Tool to analyze and describe any image using GPT-4 Vision API.
74
+
75
+ Returns a description of the image based on the requested type.
76
+
77
+ Args:
78
+ image_path (str): Path to the image file.
79
+ description_type (str): Type of description to generate. Options:
80
+ - "general": General description of the image
81
+ - "detailed": Detailed analysis of the image
82
+ - "chess": Analysis of a chess position
83
+ - "text": Extract and describe text from the image
84
+ - "custom": Custom description based on user prompt
85
+ """
86
+ image_base64 = show_image(path)["image_base64"]
87
+ assert (
88
+ description_type in DESCRIBE_PROMPTS or description_type == "custom"
89
+ ), f"Invalid description type: {description_type}"
90
+ prompt = DESCRIBE_PROMPTS.get(description_type, custom_prompt)
91
+ assert prompt and prompt.strip(), "Please provide a non-empty prompt"
92
+ content = [
93
+ {"type": "text", "text": prompt},
94
+ {
95
+ "type": "image_url",
96
+ "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
97
+ },
98
+ ]
99
+ model_name = settings.DESCRIBE_IMAGE_MODEL_NAME
100
+ response = await llm_acall(
101
+ model_name=model_name,
102
+ messages=[ChatMessage(role="user", content=content)],
103
+ )
104
+ return response
@@ -0,0 +1,48 @@
1
+ from pathlib import Path
2
+ from io import BytesIO
3
+
4
+ import httpx
5
+ from openai import AsyncOpenAI
6
+ from academia_mcp.files import get_workspace_dir
7
+
8
+ from academia_mcp.settings import settings
9
+
10
+
11
+ async def speech_to_text(audio_path: str, provider: str = "openai") -> str:
12
+ """
13
+ Tool to convert speech to text using OpenAI's Whisper model.
14
+
15
+ Returns transcribed text from the audio file.
16
+
17
+ Args:
18
+ audio_path (str): Path to the audio file.
19
+ provider (str): Provider to use. Currently only "openai" is supported.
20
+ """
21
+
22
+ AVAILABLE_PROVIDERS = ("openai",)
23
+ assert (
24
+ provider in AVAILABLE_PROVIDERS
25
+ ), f"Invalid provider: {provider}. Available providers: {AVAILABLE_PROVIDERS}"
26
+
27
+ if audio_path.startswith("http"):
28
+ response = httpx.get(audio_path, timeout=10)
29
+ response.raise_for_status()
30
+ ext = audio_path.split(".")[-1]
31
+ audio_file = BytesIO(response.content)
32
+ audio_file.name = f"audio_file.{ext}"
33
+ else:
34
+ full_audio_path = Path(audio_path)
35
+ if not full_audio_path.exists():
36
+ full_audio_path = Path(get_workspace_dir()) / audio_path
37
+ assert full_audio_path.exists(), f"Audio file {audio_path} does not exist"
38
+ audio_file = BytesIO(open(full_audio_path, "rb").read())
39
+ audio_file.name = audio_path.split("/")[-1]
40
+
41
+ assert provider == "openai"
42
+ client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
43
+ result = await client.audio.transcriptions.create(
44
+ model="gpt-4o-transcribe",
45
+ file=audio_file,
46
+ response_format="text",
47
+ )
48
+ return result
@@ -8,7 +8,8 @@ from academia_mcp.utils import get_with_retries, post_with_retries
8
8
  from academia_mcp.settings import settings
9
9
 
10
10
  EXA_CONTENTS_URL = "https://api.exa.ai/contents"
11
- AVAILABLE_PROVIDERS = ("basic", "exa")
11
+ TAVILY_EXTRACT_URL = "https://api.tavily.com/extract"
12
+ AVAILABLE_PROVIDERS = ("basic", "exa", "tavily")
12
13
 
13
14
 
14
15
  def _exa_visit_webpage(url: str) -> str:
@@ -22,7 +23,17 @@ def _exa_visit_webpage(url: str) -> str:
22
23
  return json.dumps(response.json()["results"][0])
23
24
 
24
25
 
25
- def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
26
+ def _tavily_visit_webpage(url: str) -> str:
27
+ key = settings.TAVILY_API_KEY or ""
28
+ assert key, "Error: TAVILY_API_KEY is not set and no api_key was provided"
29
+ payload = {
30
+ "urls": [url],
31
+ }
32
+ response = post_with_retries(TAVILY_EXTRACT_URL, payload=payload, api_key=key)
33
+ return json.dumps(response.json()["results"][0]["raw_content"])
34
+
35
+
36
+ def visit_webpage(url: str, provider: Optional[str] = "tavily") -> str:
26
37
  """
27
38
  Visit a webpage and return the content.
28
39
 
@@ -32,7 +43,7 @@ def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
32
43
 
33
44
  Args:
34
45
  url: The URL of the webpage to visit.
35
- provider: The provider to use. Available providers: "basic" (default) or "exa".
46
+ provider: The provider to use. Available providers: "tavily" (default), "exa", or "basic".
36
47
  """
37
48
  assert (
38
49
  provider in AVAILABLE_PROVIDERS
@@ -40,6 +51,10 @@ def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
40
51
 
41
52
  if provider == "exa" and settings.EXA_API_KEY:
42
53
  return _exa_visit_webpage(url)
54
+ elif provider == "tavily" and settings.TAVILY_API_KEY:
55
+ return _tavily_visit_webpage(url)
56
+ else:
57
+ provider = "basic"
43
58
 
44
59
  assert provider == "basic"
45
60
  response = get_with_retries(url)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academia-mcp
3
- Version: 1.9.2
3
+ Version: 1.10.0
4
4
  Summary: MCP server that provides different tools to search for scientific publications
5
5
  Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
@@ -30,6 +30,7 @@ academia_mcp/tools/py.typed
30
30
  academia_mcp/tools/review.py
31
31
  academia_mcp/tools/s2.py
32
32
  academia_mcp/tools/show_image.py
33
+ academia_mcp/tools/speech_to_text.py
33
34
  academia_mcp/tools/visit_webpage.py
34
35
  academia_mcp/tools/web_search.py
35
36
  tests/test_anthology_search.py
@@ -43,5 +44,6 @@ tests/test_latex.py
43
44
  tests/test_review.py
44
45
  tests/test_s2.py
45
46
  tests/test_show_image.py
47
+ tests/test_speech_to_text.py
46
48
  tests/test_visit_webpage.py
47
49
  tests/test_web_search.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "academia-mcp"
7
- version = "1.9.2"
7
+ version = "1.10.0"
8
8
  description = "MCP server that provides different tools to search for scientific publications"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -3,7 +3,7 @@ from io import BytesIO
3
3
  import httpx
4
4
  from PIL import Image
5
5
 
6
- from academia_mcp.tools import show_image
6
+ from academia_mcp.tools import show_image, describe_image
7
7
  from academia_mcp.files import get_workspace_dir
8
8
 
9
9
 
@@ -23,3 +23,9 @@ def test_show_image_local(test_image_url: str) -> None:
23
23
  assert result is not None
24
24
  assert "image_base64" in result
25
25
  assert result["image_base64"] is not None
26
+
27
+
28
+ async def test_describe_image_base(test_image_url: str) -> None:
29
+ result = await describe_image(test_image_url)
30
+ assert result is not None
31
+ assert "Interrogator" in result
@@ -0,0 +1,21 @@
1
+ import httpx
2
+
3
+ from academia_mcp.tools import speech_to_text
4
+ from academia_mcp.files import get_workspace_dir
5
+
6
+
7
+ async def test_speech_to_text_base(test_audio_url: str) -> None:
8
+ result = await speech_to_text(test_audio_url)
9
+ assert result is not None
10
+ assert "dancing in the masquerade" in str(result).lower()
11
+
12
+
13
+ async def test_speech_to_text_local(test_audio_url: str) -> None:
14
+ response = httpx.get(test_audio_url, timeout=10)
15
+ response.raise_for_status()
16
+ ext = test_audio_url.split(".")[-1]
17
+ with open(get_workspace_dir() / f"audio_file.{ext}", "wb") as fp:
18
+ fp.write(response.content)
19
+ result = await speech_to_text(f"audio_file.{ext}")
20
+ assert result is not None
21
+ assert "dancing in the masquerade" in str(result).lower()
@@ -18,3 +18,15 @@ def test_visit_webpage_exa() -> None:
18
18
  def test_visit_webpage_pdf() -> None:
19
19
  content = visit_webpage("https://arxiv.org/pdf/2409.06820")
20
20
  assert "A Benchmark for Role-Playing" in content
21
+
22
+
23
+ def test_visit_webpage_nature() -> None:
24
+ url = "https://www.nature.com/nature/articles?page=51&searchType=journalSearch&sort=PubDate&type=article&year=2020"
25
+ content = visit_webpage(url)
26
+ assert "1002" in content
27
+
28
+
29
+ def test_visit_webpage_github_issue() -> None:
30
+ url = "https://github.com/numpy/numpy/issues/10881"
31
+ content = visit_webpage(url, provider="tavily")
32
+ assert "on Apr 15, 2018" in str(content)
@@ -1,41 +0,0 @@
1
- import base64
2
- from pathlib import Path
3
- from io import BytesIO
4
- from typing import Dict
5
-
6
- import httpx
7
- from PIL import Image
8
-
9
- from academia_mcp.files import get_workspace_dir
10
- from academia_mcp.settings import settings
11
-
12
-
13
- def show_image(path: str) -> Dict[str, str]:
14
- """
15
- Reads an image from the specified URL or from the current work directory.
16
- Always call this function at the end of the code block.
17
- For instance:
18
- ```python
19
- show_image("https://example.com/image.png")
20
- ```
21
- Do not print it ever, just return as the last expression.
22
-
23
- Returns an dictionary with a single "image" key.
24
- Args:
25
- url: Path to file inside current work directory or web URL
26
- """
27
- if path.startswith("http"):
28
- response = httpx.get(path, timeout=10)
29
- response.raise_for_status()
30
- image = Image.open(BytesIO(response.content))
31
- else:
32
- assert settings.WORKSPACE_DIR is not None, "WORKSPACE_DIR is not set"
33
- full_path = Path(path)
34
- if not full_path.exists():
35
- full_path = Path(get_workspace_dir()) / path
36
- assert full_path.exists(), f"Image file {path} does not exist"
37
- image = Image.open(str(full_path))
38
- buffer_io = BytesIO()
39
- image.save(buffer_io, format="PNG")
40
- img_bytes = buffer_io.getvalue()
41
- return {"image_base64": base64.b64encode(img_bytes).decode("utf-8")}
File without changes
File without changes
File without changes