academia-mcp 1.10.6__tar.gz → 1.10.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/PKG-INFO +2 -1
  2. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/server.py +2 -0
  3. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/settings.py +3 -0
  4. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/__init__.py +2 -0
  5. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/web_search.py +26 -1
  6. academia_mcp-1.10.8/academia_mcp/tools/yt_transcript.py +30 -0
  7. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp.egg-info/PKG-INFO +2 -1
  8. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp.egg-info/SOURCES.txt +3 -1
  9. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp.egg-info/requires.txt +1 -0
  10. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/pyproject.toml +2 -1
  11. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_web_search.py +11 -0
  12. academia_mcp-1.10.8/tests/test_yt_transcript.py +13 -0
  13. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/LICENSE +0 -0
  14. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/README.md +0 -0
  15. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/__init__.py +0 -0
  16. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/__main__.py +0 -0
  17. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/files.py +0 -0
  18. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty +0 -0
  19. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex +0 -0
  20. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/llm.py +0 -0
  21. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/pdf.py +0 -0
  22. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/py.typed +0 -0
  23. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/anthology_search.py +0 -0
  24. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/arxiv_download.py +0 -0
  25. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/arxiv_search.py +0 -0
  26. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/bitflip.py +0 -0
  27. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/document_qa.py +0 -0
  28. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/hf_datasets_search.py +0 -0
  29. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/latex.py +0 -0
  30. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/py.typed +0 -0
  31. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/review.py +0 -0
  32. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/s2.py +0 -0
  33. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/show_image.py +0 -0
  34. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/speech_to_text.py +0 -0
  35. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/tools/visit_webpage.py +0 -0
  36. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp/utils.py +0 -0
  37. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp.egg-info/dependency_links.txt +0 -0
  38. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp.egg-info/entry_points.txt +0 -0
  39. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/academia_mcp.egg-info/top_level.txt +0 -0
  40. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/setup.cfg +0 -0
  41. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_anthology_search.py +0 -0
  42. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_arxiv_download.py +0 -0
  43. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_arxiv_search.py +0 -0
  44. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_bitflip.py +0 -0
  45. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_document_qa.py +0 -0
  46. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_extract_json.py +0 -0
  47. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_hf_dataset_search.py +0 -0
  48. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_latex.py +0 -0
  49. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_review.py +0 -0
  50. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_s2.py +0 -0
  51. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_show_image.py +0 -0
  52. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_speech_to_text.py +0 -0
  53. {academia_mcp-1.10.6 → academia_mcp-1.10.8}/tests/test_visit_webpage.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academia-mcp
3
- Version: 1.10.6
3
+ Version: 1.10.8
4
4
  Summary: MCP server that provides different tools to search for scientific publications
5
5
  Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
@@ -30,6 +30,7 @@ Requires-Dist: datasets>=4.0.0
30
30
  Requires-Dist: pymupdf>=1.26.4
31
31
  Requires-Dist: pillow>=11.3.0
32
32
  Requires-Dist: pydantic-settings>=2.6.0
33
+ Requires-Dist: youtube-transcript-api>=1.2.2
33
34
  Dynamic: license-file
34
35
 
35
36
  # Academia MCP
@@ -42,6 +42,7 @@ from academia_mcp.tools.bitflip import (
42
42
  from academia_mcp.tools.review import review_pdf_paper, download_pdf_paper
43
43
  from academia_mcp.tools.show_image import show_image, describe_image
44
44
  from academia_mcp.tools.speech_to_text import speech_to_text
45
+ from academia_mcp.tools.yt_transcript import yt_transcript
45
46
 
46
47
 
47
48
  def configure_uvicorn_style_logging(level: int = logging.INFO) -> None:
@@ -92,6 +93,7 @@ def run(
92
93
  server.add_tool(get_latex_templates_list)
93
94
  server.add_tool(visit_webpage)
94
95
  server.add_tool(show_image)
96
+ server.add_tool(yt_transcript)
95
97
 
96
98
  if settings.WORKSPACE_DIR:
97
99
  server.add_tool(compile_latex)
@@ -21,6 +21,9 @@ class Settings(BaseSettings):
21
21
  DOCUMENT_QA_DOCUMENT_MAX_LENGTH: int = 200000
22
22
  DESCRIBE_IMAGE_MODEL_NAME: str = "gpt-4.1"
23
23
 
24
+ WEBSHARE_PROXY_USERNAME: Optional[str] = None
25
+ WEBSHARE_PROXY_PASSWORD: Optional[str] = None
26
+
24
27
  PORT: int = 5056
25
28
  WORKSPACE_DIR: Optional[Path] = None
26
29
 
@@ -16,6 +16,7 @@ from .bitflip import extract_bitflip_info, generate_research_proposals, score_re
16
16
  from .review import review_pdf_paper, download_pdf_paper, review_pdf_paper_by_url
17
17
  from .show_image import show_image, describe_image
18
18
  from .speech_to_text import speech_to_text
19
+ from .yt_transcript import yt_transcript
19
20
 
20
21
  __all__ = [
21
22
  "arxiv_search",
@@ -45,4 +46,5 @@ __all__ = [
45
46
  "show_image",
46
47
  "describe_image",
47
48
  "speech_to_text",
49
+ "yt_transcript",
48
50
  ]
@@ -1,5 +1,5 @@
1
1
  import json
2
- from typing import Optional, List
2
+ from typing import Optional, List, Tuple
3
3
 
4
4
  from academia_mcp.utils import post_with_retries, get_with_retries
5
5
  from academia_mcp.settings import settings
@@ -12,6 +12,24 @@ BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/web/search"
12
12
  EXCLUDE_DOMAINS = ["chatpaper.com"]
13
13
 
14
14
 
15
+ def _parse_domains(query: str) -> Tuple[str, List[str]]:
16
+ site_term = "site:"
17
+ if site_term not in query:
18
+ return query, []
19
+ parts = query.split()
20
+ query_parts = []
21
+ include_domains = []
22
+ for part in parts:
23
+ if not part.startswith(site_term):
24
+ query_parts.append(part)
25
+ continue
26
+ domain = part[len(site_term) :]
27
+ if domain:
28
+ include_domains.append(domain)
29
+ query = " ".join(query_parts)
30
+ return query, include_domains
31
+
32
+
15
33
  def web_search(
16
34
  query: str,
17
35
  limit: Optional[int] = 20,
@@ -40,6 +58,13 @@ def web_search(
40
58
  isinstance(domain, str) for domain in include_domains
41
59
  ), "Error: include_domains should be a list of strings"
42
60
 
61
+ query, query_include_domains = _parse_domains(query)
62
+ if query_include_domains:
63
+ if include_domains:
64
+ include_domains.extend(query_include_domains)
65
+ else:
66
+ include_domains = query_include_domains
67
+
43
68
  is_tavily_available = bool(settings.TAVILY_API_KEY)
44
69
  is_exa_available = bool(settings.EXA_API_KEY)
45
70
  is_brave_available = bool(settings.BRAVE_API_KEY)
@@ -0,0 +1,30 @@
1
+ from youtube_transcript_api import YouTubeTranscriptApi
2
+ from youtube_transcript_api.proxies import WebshareProxyConfig
3
+
4
+ from academia_mcp.settings import settings
5
+
6
+
7
+ def yt_transcript(video_url: str) -> str:
8
+ """
9
+ Tool to fetch the transcript of a YouTube video given its URL.
10
+
11
+ Returns a transcript of the video as a single string.
12
+
13
+ Args:
14
+ video_url (str): YouTube video URL.
15
+ """
16
+ if "youtu.be" in video_url:
17
+ video_id = video_url.strip().split("youtu.be/")[1]
18
+ else:
19
+ video_id = video_url.strip().split("v=")[-1]
20
+ video_id = video_id.split("?")[0]
21
+ proxy_config = None
22
+ if settings.WEBSHARE_PROXY_USERNAME and settings.WEBSHARE_PROXY_PASSWORD:
23
+ proxy_config = WebshareProxyConfig(
24
+ proxy_username=settings.WEBSHARE_PROXY_USERNAME,
25
+ proxy_password=settings.WEBSHARE_PROXY_PASSWORD,
26
+ )
27
+ api = YouTubeTranscriptApi(proxy_config=proxy_config)
28
+ transcript = api.fetch(video_id)
29
+ snippets = transcript.snippets
30
+ return "\n".join([f"{int(entry.start)}: {' '.join(entry.text.split())}" for entry in snippets])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academia-mcp
3
- Version: 1.10.6
3
+ Version: 1.10.8
4
4
  Summary: MCP server that provides different tools to search for scientific publications
5
5
  Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
@@ -30,6 +30,7 @@ Requires-Dist: datasets>=4.0.0
30
30
  Requires-Dist: pymupdf>=1.26.4
31
31
  Requires-Dist: pillow>=11.3.0
32
32
  Requires-Dist: pydantic-settings>=2.6.0
33
+ Requires-Dist: youtube-transcript-api>=1.2.2
33
34
  Dynamic: license-file
34
35
 
35
36
  # Academia MCP
@@ -33,6 +33,7 @@ academia_mcp/tools/show_image.py
33
33
  academia_mcp/tools/speech_to_text.py
34
34
  academia_mcp/tools/visit_webpage.py
35
35
  academia_mcp/tools/web_search.py
36
+ academia_mcp/tools/yt_transcript.py
36
37
  tests/test_anthology_search.py
37
38
  tests/test_arxiv_download.py
38
39
  tests/test_arxiv_search.py
@@ -46,4 +47,5 @@ tests/test_s2.py
46
47
  tests/test_show_image.py
47
48
  tests/test_speech_to_text.py
48
49
  tests/test_visit_webpage.py
49
- tests/test_web_search.py
50
+ tests/test_web_search.py
51
+ tests/test_yt_transcript.py
@@ -18,3 +18,4 @@ datasets>=4.0.0
18
18
  pymupdf>=1.26.4
19
19
  pillow>=11.3.0
20
20
  pydantic-settings>=2.6.0
21
+ youtube-transcript-api>=1.2.2
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "academia-mcp"
7
- version = "1.10.6"
7
+ version = "1.10.8"
8
8
  description = "MCP server that provides different tools to search for scientific publications"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -37,6 +37,7 @@ dependencies = [
37
37
  "pymupdf>=1.26.4",
38
38
  "pillow>=11.3.0",
39
39
  "pydantic-settings>=2.6.0",
40
+ "youtube-transcript-api>=1.2.2",
40
41
  ]
41
42
 
42
43
  [dependency-groups]
@@ -42,3 +42,14 @@ def test_web_search_include_domains() -> None:
42
42
  assert results
43
43
  assert len(results["results"]) > 0
44
44
  assert all("wikipedia.org" in result["url"] for result in results["results"])
45
+
46
+
47
+ def test_web_search_include_query_domains() -> None:
48
+ results = web_search(
49
+ "site:wikipedia.org autoregressive models path-star graphs",
50
+ )
51
+ assert results
52
+ results = json.loads(results)
53
+ assert results
54
+ assert len(results["results"]) > 0
55
+ assert all("wikipedia.org" in result["url"] for result in results["results"])
@@ -0,0 +1,13 @@
1
+ from academia_mcp.tools import yt_transcript
2
+
3
+
4
+ def test_yt_transcript_base() -> None:
5
+ result = yt_transcript("https://www.youtube.com/watch?v=21EYKqUsPfg")
6
+ assert result is not None
7
+ assert "chatting with richard sutton" in result.lower()
8
+
9
+
10
+ def test_yt_transcript_short_link() -> None:
11
+ result = yt_transcript("https://youtu.be/21EYKqUsPfg?si=iity_X55GIWUQWuT")
12
+ assert result is not None
13
+ assert "chatting with richard sutton" in result.lower()
File without changes
File without changes
File without changes