academia-mcp 1.10.6__tar.gz → 1.10.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/PKG-INFO +2 -1
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/server.py +2 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/settings.py +3 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/__init__.py +2 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/web_search.py +26 -1
- academia_mcp-1.10.8/academia_mcp/tools/yt_transcript.py +30 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp.egg-info/PKG-INFO +2 -1
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp.egg-info/SOURCES.txt +3 -1
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp.egg-info/requires.txt +1 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/pyproject.toml +2 -1
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_web_search.py +11 -0
- academia_mcp-1.10.8/tests/test_yt_transcript.py +13 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/LICENSE +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/README.md +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/__init__.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/__main__.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/files.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/llm.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/pdf.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/py.typed +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/anthology_search.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/arxiv_download.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/arxiv_search.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/bitflip.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/document_qa.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/hf_datasets_search.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/latex.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/py.typed +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/review.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/s2.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/show_image.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/speech_to_text.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/visit_webpage.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/utils.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp.egg-info/dependency_links.txt +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp.egg-info/entry_points.txt +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp.egg-info/top_level.txt +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/setup.cfg +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_anthology_search.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_arxiv_download.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_arxiv_search.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_bitflip.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_document_qa.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_extract_json.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_hf_dataset_search.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_latex.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_review.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_s2.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_show_image.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_speech_to_text.py +0 -0
- {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_visit_webpage.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: academia-mcp
|
3
|
-
Version: 1.10.
|
3
|
+
Version: 1.10.8
|
4
4
|
Summary: MCP server that provides different tools to search for scientific publications
|
5
5
|
Author-email: Ilya Gusev <phoenixilya@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
|
@@ -30,6 +30,7 @@ Requires-Dist: datasets>=4.0.0
|
|
30
30
|
Requires-Dist: pymupdf>=1.26.4
|
31
31
|
Requires-Dist: pillow>=11.3.0
|
32
32
|
Requires-Dist: pydantic-settings>=2.6.0
|
33
|
+
Requires-Dist: youtube-transcript-api>=1.2.2
|
33
34
|
Dynamic: license-file
|
34
35
|
|
35
36
|
# Academia MCP
|
@@ -42,6 +42,7 @@ from academia_mcp.tools.bitflip import (
|
|
42
42
|
from academia_mcp.tools.review import review_pdf_paper, download_pdf_paper
|
43
43
|
from academia_mcp.tools.show_image import show_image, describe_image
|
44
44
|
from academia_mcp.tools.speech_to_text import speech_to_text
|
45
|
+
from academia_mcp.tools.yt_transcript import yt_transcript
|
45
46
|
|
46
47
|
|
47
48
|
def configure_uvicorn_style_logging(level: int = logging.INFO) -> None:
|
@@ -92,6 +93,7 @@ def run(
|
|
92
93
|
server.add_tool(get_latex_templates_list)
|
93
94
|
server.add_tool(visit_webpage)
|
94
95
|
server.add_tool(show_image)
|
96
|
+
server.add_tool(yt_transcript)
|
95
97
|
|
96
98
|
if settings.WORKSPACE_DIR:
|
97
99
|
server.add_tool(compile_latex)
|
@@ -21,6 +21,9 @@ class Settings(BaseSettings):
|
|
21
21
|
DOCUMENT_QA_DOCUMENT_MAX_LENGTH: int = 200000
|
22
22
|
DESCRIBE_IMAGE_MODEL_NAME: str = "gpt-4.1"
|
23
23
|
|
24
|
+
WEBSHARE_PROXY_USERNAME: Optional[str] = None
|
25
|
+
WEBSHARE_PROXY_PASSWORD: Optional[str] = None
|
26
|
+
|
24
27
|
PORT: int = 5056
|
25
28
|
WORKSPACE_DIR: Optional[Path] = None
|
26
29
|
|
@@ -16,6 +16,7 @@ from .bitflip import extract_bitflip_info, generate_research_proposals, score_re
|
|
16
16
|
from .review import review_pdf_paper, download_pdf_paper, review_pdf_paper_by_url
|
17
17
|
from .show_image import show_image, describe_image
|
18
18
|
from .speech_to_text import speech_to_text
|
19
|
+
from .yt_transcript import yt_transcript
|
19
20
|
|
20
21
|
__all__ = [
|
21
22
|
"arxiv_search",
|
@@ -45,4 +46,5 @@ __all__ = [
|
|
45
46
|
"show_image",
|
46
47
|
"describe_image",
|
47
48
|
"speech_to_text",
|
49
|
+
"yt_transcript",
|
48
50
|
]
|
@@ -1,5 +1,5 @@
|
|
1
1
|
import json
|
2
|
-
from typing import Optional, List
|
2
|
+
from typing import Optional, List, Tuple
|
3
3
|
|
4
4
|
from academia_mcp.utils import post_with_retries, get_with_retries
|
5
5
|
from academia_mcp.settings import settings
|
@@ -12,6 +12,24 @@ BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/web/search"
|
|
12
12
|
EXCLUDE_DOMAINS = ["chatpaper.com"]
|
13
13
|
|
14
14
|
|
15
|
+
def _parse_domains(query: str) -> Tuple[str, List[str]]:
|
16
|
+
site_term = "site:"
|
17
|
+
if site_term not in query:
|
18
|
+
return query, []
|
19
|
+
parts = query.split()
|
20
|
+
query_parts = []
|
21
|
+
include_domains = []
|
22
|
+
for part in parts:
|
23
|
+
if not part.startswith(site_term):
|
24
|
+
query_parts.append(part)
|
25
|
+
continue
|
26
|
+
domain = part[len(site_term) :]
|
27
|
+
if domain:
|
28
|
+
include_domains.append(domain)
|
29
|
+
query = " ".join(query_parts)
|
30
|
+
return query, include_domains
|
31
|
+
|
32
|
+
|
15
33
|
def web_search(
|
16
34
|
query: str,
|
17
35
|
limit: Optional[int] = 20,
|
@@ -40,6 +58,13 @@ def web_search(
|
|
40
58
|
isinstance(domain, str) for domain in include_domains
|
41
59
|
), "Error: include_domains should be a list of strings"
|
42
60
|
|
61
|
+
query, query_include_domains = _parse_domains(query)
|
62
|
+
if query_include_domains:
|
63
|
+
if include_domains:
|
64
|
+
include_domains.extend(query_include_domains)
|
65
|
+
else:
|
66
|
+
include_domains = query_include_domains
|
67
|
+
|
43
68
|
is_tavily_available = bool(settings.TAVILY_API_KEY)
|
44
69
|
is_exa_available = bool(settings.EXA_API_KEY)
|
45
70
|
is_brave_available = bool(settings.BRAVE_API_KEY)
|
@@ -0,0 +1,30 @@
|
|
1
|
+
from youtube_transcript_api import YouTubeTranscriptApi
|
2
|
+
from youtube_transcript_api.proxies import WebshareProxyConfig
|
3
|
+
|
4
|
+
from academia_mcp.settings import settings
|
5
|
+
|
6
|
+
|
7
|
+
def yt_transcript(video_url: str) -> str:
|
8
|
+
"""
|
9
|
+
Tool to fetch the transcript of a YouTube video given its URL.
|
10
|
+
|
11
|
+
Returns a transcript of the video as a single string.
|
12
|
+
|
13
|
+
Args:
|
14
|
+
video_url (str): YouTube video URL.
|
15
|
+
"""
|
16
|
+
if "youtu.be" in video_url:
|
17
|
+
video_id = video_url.strip().split("youtu.be/")[1]
|
18
|
+
else:
|
19
|
+
video_id = video_url.strip().split("v=")[-1]
|
20
|
+
video_id = video_id.split("?")[0]
|
21
|
+
proxy_config = None
|
22
|
+
if settings.WEBSHARE_PROXY_USERNAME and settings.WEBSHARE_PROXY_PASSWORD:
|
23
|
+
proxy_config = WebshareProxyConfig(
|
24
|
+
proxy_username=settings.WEBSHARE_PROXY_USERNAME,
|
25
|
+
proxy_password=settings.WEBSHARE_PROXY_PASSWORD,
|
26
|
+
)
|
27
|
+
api = YouTubeTranscriptApi(proxy_config=proxy_config)
|
28
|
+
transcript = api.fetch(video_id)
|
29
|
+
snippets = transcript.snippets
|
30
|
+
return "\n".join([f"{int(entry.start)}: {' '.join(entry.text.split())}" for entry in snippets])
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: academia-mcp
|
3
|
-
Version: 1.10.
|
3
|
+
Version: 1.10.8
|
4
4
|
Summary: MCP server that provides different tools to search for scientific publications
|
5
5
|
Author-email: Ilya Gusev <phoenixilya@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
|
@@ -30,6 +30,7 @@ Requires-Dist: datasets>=4.0.0
|
|
30
30
|
Requires-Dist: pymupdf>=1.26.4
|
31
31
|
Requires-Dist: pillow>=11.3.0
|
32
32
|
Requires-Dist: pydantic-settings>=2.6.0
|
33
|
+
Requires-Dist: youtube-transcript-api>=1.2.2
|
33
34
|
Dynamic: license-file
|
34
35
|
|
35
36
|
# Academia MCP
|
@@ -33,6 +33,7 @@ academia_mcp/tools/show_image.py
|
|
33
33
|
academia_mcp/tools/speech_to_text.py
|
34
34
|
academia_mcp/tools/visit_webpage.py
|
35
35
|
academia_mcp/tools/web_search.py
|
36
|
+
academia_mcp/tools/yt_transcript.py
|
36
37
|
tests/test_anthology_search.py
|
37
38
|
tests/test_arxiv_download.py
|
38
39
|
tests/test_arxiv_search.py
|
@@ -46,4 +47,5 @@ tests/test_s2.py
|
|
46
47
|
tests/test_show_image.py
|
47
48
|
tests/test_speech_to_text.py
|
48
49
|
tests/test_visit_webpage.py
|
49
|
-
tests/test_web_search.py
|
50
|
+
tests/test_web_search.py
|
51
|
+
tests/test_yt_transcript.py
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "academia-mcp"
|
7
|
-
version = "1.10.
|
7
|
+
version = "1.10.8"
|
8
8
|
description = "MCP server that provides different tools to search for scientific publications"
|
9
9
|
readme = "README.md"
|
10
10
|
authors = [
|
@@ -37,6 +37,7 @@ dependencies = [
|
|
37
37
|
"pymupdf>=1.26.4",
|
38
38
|
"pillow>=11.3.0",
|
39
39
|
"pydantic-settings>=2.6.0",
|
40
|
+
"youtube-transcript-api>=1.2.2",
|
40
41
|
]
|
41
42
|
|
42
43
|
[dependency-groups]
|
@@ -42,3 +42,14 @@ def test_web_search_include_domains() -> None:
|
|
42
42
|
assert results
|
43
43
|
assert len(results["results"]) > 0
|
44
44
|
assert all("wikipedia.org" in result["url"] for result in results["results"])
|
45
|
+
|
46
|
+
|
47
|
+
def test_web_search_include_query_domains() -> None:
|
48
|
+
results = web_search(
|
49
|
+
"site:wikipedia.org autoregressive models path-star graphs",
|
50
|
+
)
|
51
|
+
assert results
|
52
|
+
results = json.loads(results)
|
53
|
+
assert results
|
54
|
+
assert len(results["results"]) > 0
|
55
|
+
assert all("wikipedia.org" in result["url"] for result in results["results"])
|
@@ -0,0 +1,13 @@
|
|
1
|
+
from academia_mcp.tools import yt_transcript
|
2
|
+
|
3
|
+
|
4
|
+
def test_yt_transcript_base() -> None:
|
5
|
+
result = yt_transcript("https://www.youtube.com/watch?v=21EYKqUsPfg")
|
6
|
+
assert result is not None
|
7
|
+
assert "chatting with richard sutton" in result.lower()
|
8
|
+
|
9
|
+
|
10
|
+
def test_yt_transcript_short_link() -> None:
|
11
|
+
result = yt_transcript("https://youtu.be/21EYKqUsPfg?si=iity_X55GIWUQWuT")
|
12
|
+
assert result is not None
|
13
|
+
assert "chatting with richard sutton" in result.lower()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|