academia-mcp 1.9.2__py3-none-any.whl → 1.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
academia_mcp/server.py CHANGED
@@ -38,7 +38,8 @@ from academia_mcp.tools.bitflip import (
38
38
  score_research_proposals,
39
39
  )
40
40
  from academia_mcp.tools.review import review_pdf_paper, download_pdf_paper
41
- from academia_mcp.tools.show_image import show_image
41
+ from academia_mcp.tools.show_image import show_image, describe_image
42
+ from academia_mcp.tools.speech_to_text import speech_to_text
42
43
 
43
44
 
44
45
  def configure_uvicorn_style_logging(level: int = logging.INFO) -> None:
@@ -116,11 +117,17 @@ def run(
116
117
  server.add_tool(generate_research_proposals)
117
118
  server.add_tool(score_research_proposals)
118
119
  server.add_tool(document_qa)
120
+ server.add_tool(describe_image)
119
121
  if settings.WORKSPACE_DIR:
120
122
  server.add_tool(review_pdf_paper)
121
123
  else:
122
124
  logger.warning("No OpenRouter API key is set, LLM-related tools will not be available!")
123
125
 
126
+ if settings.OPENAI_API_KEY:
127
+ server.add_tool(speech_to_text)
128
+ else:
129
+ logger.warning("No OpenAI API key is set, speech_to_text will not be available!")
130
+
124
131
  if port is None:
125
132
  if settings.PORT is not None:
126
133
  port = int(settings.PORT)
academia_mcp/settings.py CHANGED
@@ -11,6 +11,7 @@ class Settings(BaseSettings):
11
11
  TAVILY_API_KEY: Optional[str] = None
12
12
  EXA_API_KEY: Optional[str] = None
13
13
  BRAVE_API_KEY: Optional[str] = None
14
+ OPENAI_API_KEY: Optional[str] = None
14
15
 
15
16
  REVIEW_MODEL_NAME: str = "gpt-5"
16
17
  BITFLIP_MODEL_NAME: str = "deepseek/deepseek-chat-v3-0324"
@@ -18,6 +19,7 @@ class Settings(BaseSettings):
18
19
  DOCUMENT_QA_MODEL_NAME: str = "deepseek/deepseek-chat-v3-0324"
19
20
  DOCUMENT_QA_QUESTION_MAX_LENGTH: int = 10000
20
21
  DOCUMENT_QA_DOCUMENT_MAX_LENGTH: int = 200000
22
+ DESCRIBE_IMAGE_MODEL_NAME: str = "gpt-4.1"
21
23
 
22
24
  PORT: int = 5056
23
25
  WORKSPACE_DIR: Optional[Path] = None
@@ -14,7 +14,8 @@ from .web_search import web_search, tavily_web_search, exa_web_search, brave_web
14
14
  from .visit_webpage import visit_webpage
15
15
  from .bitflip import extract_bitflip_info, generate_research_proposals, score_research_proposals
16
16
  from .review import review_pdf_paper, download_pdf_paper, review_pdf_paper_by_url
17
- from .show_image import show_image
17
+ from .show_image import show_image, describe_image
18
+ from .speech_to_text import speech_to_text
18
19
 
19
20
  __all__ = [
20
21
  "arxiv_search",
@@ -42,4 +43,6 @@ __all__ = [
42
43
  "download_pdf_paper",
43
44
  "read_pdf",
44
45
  "show_image",
46
+ "describe_image",
47
+ "speech_to_text",
45
48
  ]
@@ -1,13 +1,37 @@
1
1
  import base64
2
2
  from pathlib import Path
3
3
  from io import BytesIO
4
- from typing import Dict
4
+ from typing import Dict, Optional
5
+ from textwrap import dedent
5
6
 
6
7
  import httpx
7
8
  from PIL import Image
8
9
 
9
10
  from academia_mcp.files import get_workspace_dir
10
11
  from academia_mcp.settings import settings
12
+ from academia_mcp.llm import llm_acall, ChatMessage
13
+
14
+
15
+ DESCRIBE_PROMPTS = {
16
+ "general": "Provide a general description of this image. Focus on the main subjects, colors, and overall scene.",
17
+ "detailed": dedent(
18
+ """Analyze this image in detail. Include:
19
+ 1. Main subjects and their relationships
20
+ 2. Colors, lighting, and composition
21
+ 3. Any text or symbols present
22
+ 4. Context or possible meaning
23
+ 5. Notable details or interesting elements"""
24
+ ),
25
+ "chess": dedent(
26
+ """Analyze this chess position and provide a detailed description including:
27
+ 1. List of pieces on the board for both white and black
28
+ 2. Whose turn it is to move
29
+ 3. Basic evaluation of the position
30
+ 4. Any immediate tactical opportunities or threats
31
+ 5. Suggested next moves with brief explanations"""
32
+ ),
33
+ "text": "Extract and describe any text present in this image. If there are multiple pieces of text, organize them clearly.",
34
+ }
11
35
 
12
36
 
13
37
  def show_image(path: str) -> Dict[str, str]:
@@ -21,6 +45,7 @@ def show_image(path: str) -> Dict[str, str]:
21
45
  Do not print it ever, just return as the last expression.
22
46
 
23
47
  Returns an dictionary with a single "image" key.
48
+
24
49
  Args:
25
50
  url: Path to file inside current work directory or web URL
26
51
  """
@@ -39,3 +64,41 @@ def show_image(path: str) -> Dict[str, str]:
39
64
  image.save(buffer_io, format="PNG")
40
65
  img_bytes = buffer_io.getvalue()
41
66
  return {"image_base64": base64.b64encode(img_bytes).decode("utf-8")}
67
+
68
+
69
+ async def describe_image(
70
+ path: str, description_type: str = "general", custom_prompt: Optional[str] = None
71
+ ) -> str:
72
+ """
73
+ Tool to analyze and describe any image using GPT-4 Vision API.
74
+
75
+ Returns a description of the image based on the requested type.
76
+
77
+ Args:
78
+ image_path (str): Path to the image file.
79
+ description_type (str): Type of description to generate. Options:
80
+ - "general": General description of the image
81
+ - "detailed": Detailed analysis of the image
82
+ - "chess": Analysis of a chess position
83
+ - "text": Extract and describe text from the image
84
+ - "custom": Custom description based on user prompt
85
+ """
86
+ image_base64 = show_image(path)["image_base64"]
87
+ assert (
88
+ description_type in DESCRIBE_PROMPTS or description_type == "custom"
89
+ ), f"Invalid description type: {description_type}"
90
+ prompt = DESCRIBE_PROMPTS.get(description_type, custom_prompt)
91
+ assert prompt and prompt.strip(), "Please provide a non-empty prompt"
92
+ content = [
93
+ {"type": "text", "text": prompt},
94
+ {
95
+ "type": "image_url",
96
+ "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
97
+ },
98
+ ]
99
+ model_name = settings.DESCRIBE_IMAGE_MODEL_NAME
100
+ response = await llm_acall(
101
+ model_name=model_name,
102
+ messages=[ChatMessage(role="user", content=content)],
103
+ )
104
+ return response
@@ -0,0 +1,48 @@
1
+ from pathlib import Path
2
+ from io import BytesIO
3
+
4
+ import httpx
5
+ from openai import AsyncOpenAI
6
+ from academia_mcp.files import get_workspace_dir
7
+
8
+ from academia_mcp.settings import settings
9
+
10
+
11
+ async def speech_to_text(audio_path: str, provider: str = "openai") -> str:
12
+ """
13
+ Tool to convert speech to text using OpenAI's Whisper model.
14
+
15
+ Returns transcribed text from the audio file.
16
+
17
+ Args:
18
+ audio_path (str): Path to the audio file.
19
+ provider (str): Provider to use. Currently only "openai" is supported.
20
+ """
21
+
22
+ AVAILABLE_PROVIDERS = ("openai",)
23
+ assert (
24
+ provider in AVAILABLE_PROVIDERS
25
+ ), f"Invalid provider: {provider}. Available providers: {AVAILABLE_PROVIDERS}"
26
+
27
+ if audio_path.startswith("http"):
28
+ response = httpx.get(audio_path, timeout=10)
29
+ response.raise_for_status()
30
+ ext = audio_path.split(".")[-1]
31
+ audio_file = BytesIO(response.content)
32
+ audio_file.name = f"audio_file.{ext}"
33
+ else:
34
+ full_audio_path = Path(audio_path)
35
+ if not full_audio_path.exists():
36
+ full_audio_path = Path(get_workspace_dir()) / audio_path
37
+ assert full_audio_path.exists(), f"Audio file {audio_path} does not exist"
38
+ audio_file = BytesIO(open(full_audio_path, "rb").read())
39
+ audio_file.name = audio_path.split("/")[-1]
40
+
41
+ assert provider == "openai"
42
+ client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
43
+ result = await client.audio.transcriptions.create(
44
+ model="gpt-4o-transcribe",
45
+ file=audio_file,
46
+ response_format="text",
47
+ )
48
+ return result
@@ -8,7 +8,8 @@ from academia_mcp.utils import get_with_retries, post_with_retries
8
8
  from academia_mcp.settings import settings
9
9
 
10
10
  EXA_CONTENTS_URL = "https://api.exa.ai/contents"
11
- AVAILABLE_PROVIDERS = ("basic", "exa")
11
+ TAVILY_EXTRACT_URL = "https://api.tavily.com/extract"
12
+ AVAILABLE_PROVIDERS = ("basic", "exa", "tavily")
12
13
 
13
14
 
14
15
  def _exa_visit_webpage(url: str) -> str:
@@ -22,17 +23,27 @@ def _exa_visit_webpage(url: str) -> str:
22
23
  return json.dumps(response.json()["results"][0])
23
24
 
24
25
 
25
- def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
26
+ def _tavily_visit_webpage(url: str) -> str:
27
+ key = settings.TAVILY_API_KEY or ""
28
+ assert key, "Error: TAVILY_API_KEY is not set and no api_key was provided"
29
+ payload = {
30
+ "urls": [url],
31
+ }
32
+ response = post_with_retries(TAVILY_EXTRACT_URL, payload=payload, api_key=key)
33
+ return json.dumps(response.json()["results"][0]["raw_content"])
34
+
35
+
36
+ def visit_webpage(url: str, provider: Optional[str] = "tavily") -> str:
26
37
  """
27
38
  Visit a webpage and return the content.
28
39
 
29
40
  Returns a JSON object serialized to a string. The structure is: {"url": "...", "text": "..."}
30
41
  Use `json.loads` to deserialize the result if you want to get specific fields.
31
- Use "exa" provider in case "basic" fails.
42
+ Try to use both "tavily" and "basic" providers. They might work differently for the same URL.
32
43
 
33
44
  Args:
34
45
  url: The URL of the webpage to visit.
35
- provider: The provider to use. Available providers: "basic" (default) or "exa".
46
+ provider: The provider to use. Available providers: "tavily" (default), "exa", or "basic".
36
47
  """
37
48
  assert (
38
49
  provider in AVAILABLE_PROVIDERS
@@ -40,6 +51,10 @@ def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
40
51
 
41
52
  if provider == "exa" and settings.EXA_API_KEY:
42
53
  return _exa_visit_webpage(url)
54
+ elif provider == "tavily" and settings.TAVILY_API_KEY:
55
+ return _tavily_visit_webpage(url)
56
+ else:
57
+ provider = "basic"
43
58
 
44
59
  assert provider == "basic"
45
60
  response = get_with_retries(url)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academia-mcp
3
- Version: 1.9.2
3
+ Version: 1.10.1
4
4
  Summary: MCP server that provides different tools to search for scientific publications
5
5
  Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
@@ -4,12 +4,12 @@ academia_mcp/files.py,sha256=ynIt0XbU1Z7EPWkv_hVX0pGKsLlmjYv-MVJLOfi6yzs,817
4
4
  academia_mcp/llm.py,sha256=zpGkuJFf58Ofgys_fi28-47_wJ1a7sIs_yZvI1Si6z0,993
5
5
  academia_mcp/pdf.py,sha256=9PlXzHGhb6ay3ldbTdxCcTWvH4TkET3bnb64mgoh9i0,1273
6
6
  academia_mcp/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- academia_mcp/server.py,sha256=k6o-hxkeowHAgKGXUTWhZ6EI4u1RcSIfA-EFyoRmWZ8,4416
8
- academia_mcp/settings.py,sha256=vkhCrglL_OI1W0Me_1vS0WoQwDhpzj_XTfcuOylweYA,907
7
+ academia_mcp/server.py,sha256=B29AeCWYYk3mj8eZP-it0i_SgbMUzzWQBfZ0DO3HvgQ,4706
8
+ academia_mcp/settings.py,sha256=MSQYjmhZ3NDalTzu4z3ey1Aw60TrhkDcPWUTE4-iOaU,995
9
9
  academia_mcp/utils.py,sha256=P9U3RjYzcztE0KxXvJSy5wSBaUg2CM9tpByljYrsrl4,4607
10
10
  academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty,sha256=hGcEPCYBJS4vdhWvN_yEaJC4GvT_yDroI94CfY2Oguk,12268
11
11
  academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex,sha256=Tl1QkHXHRopw9VEfWrD3Layr5JP_0gIzVQjL4KXIWqc,15814
12
- academia_mcp/tools/__init__.py,sha256=ns0n9MC-TarV-nbkFGCkGptI2Jg-dmb4FU_7l-T_ET8,1390
12
+ academia_mcp/tools/__init__.py,sha256=lGUy5C4IymplHOXqOiwDD7CT4Z8aPHJqSxXo2g9qkks,1493
13
13
  academia_mcp/tools/anthology_search.py,sha256=rhFpJZqGLABgr0raDuH0CARBiAJNJtEI4dlMrKNHfDQ,7669
14
14
  academia_mcp/tools/arxiv_download.py,sha256=gBY0_Kz0yGtVkLMwn6GrAyfBjovZVgcSMuyy67p65Cw,10474
15
15
  academia_mcp/tools/arxiv_search.py,sha256=pzM18qrF3QL03A53w003kE7hQi3s3QKtjgw0m7K88UY,8355
@@ -20,12 +20,13 @@ academia_mcp/tools/latex.py,sha256=B1Leqt1FHY6H3DlUgeYse4LMFpf4-K1FQViXl5MKk8A,6
20
20
  academia_mcp/tools/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  academia_mcp/tools/review.py,sha256=Va0lFJJKuk-NvWhKS3UZ-Dnuk7CyuDQ4S1nd70D-ffE,11117
22
22
  academia_mcp/tools/s2.py,sha256=QX7-pbetab3Xt_1tvVPU6o5D_NAe9y6jcTGRBK1vwtY,6200
23
- academia_mcp/tools/show_image.py,sha256=mbzWpgaZrGjSgUqnfiiPoWLbyAjUS27F04mjOUqb_S8,1371
24
- academia_mcp/tools/visit_webpage.py,sha256=uEqZIkMqscZG58Rx2wd6L_OQkGxo5SrzCkoUB55HhC0,2018
23
+ academia_mcp/tools/show_image.py,sha256=jiJlQ53dbZ0T61OBhCT3IKVvBl9NHc6jHgWLfg5BxiE,3856
24
+ academia_mcp/tools/speech_to_text.py,sha256=YZzMqdvunzXkpcadP_mYhm6cs4qH1Y_42SfY-7eX4O4,1601
25
+ academia_mcp/tools/visit_webpage.py,sha256=oKy8CFwTYyIPD73IOcfrUsokING8jpIyosAQ9WraO9E,2645
25
26
  academia_mcp/tools/web_search.py,sha256=kj3BrPdTVfyTjZ_9Jl2n3YUGzcRZk8diQs6cVSVmPrQ,6293
26
- academia_mcp-1.9.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
27
- academia_mcp-1.9.2.dist-info/METADATA,sha256=c4NsjDhKqpgL-RKN58OXWdEzAj7Ip1RDAvOJovT0wmI,6310
28
- academia_mcp-1.9.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
- academia_mcp-1.9.2.dist-info/entry_points.txt,sha256=gxkiKJ74w2FwJpSECpjA3XtCfI5ZfrM6N8cqnwsq4yY,51
30
- academia_mcp-1.9.2.dist-info/top_level.txt,sha256=CzGpRFsRRJRqWEb1e3SUlcfGqRzOxevZGaJWrtGF8W0,13
31
- academia_mcp-1.9.2.dist-info/RECORD,,
27
+ academia_mcp-1.10.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
28
+ academia_mcp-1.10.1.dist-info/METADATA,sha256=3Vuyr7l05zOUPVWTnWY_2p6cS1XSHt26oZLtkB47tVw,6311
29
+ academia_mcp-1.10.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
30
+ academia_mcp-1.10.1.dist-info/entry_points.txt,sha256=gxkiKJ74w2FwJpSECpjA3XtCfI5ZfrM6N8cqnwsq4yY,51
31
+ academia_mcp-1.10.1.dist-info/top_level.txt,sha256=CzGpRFsRRJRqWEb1e3SUlcfGqRzOxevZGaJWrtGF8W0,13
32
+ academia_mcp-1.10.1.dist-info/RECORD,,