academia-mcp 1.2.1__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/PKG-INFO +51 -3
  2. academia_mcp-1.3.0/README.md +83 -0
  3. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp/server.py +17 -13
  4. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp/tools/anthology_search.py +1 -8
  5. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp/tools/arxiv_download.py +2 -0
  6. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp/tools/document_qa.py +4 -1
  7. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp/tools/md_to_pdf.py +7 -0
  8. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp/tools/visit_webpage.py +19 -8
  9. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp/tools/web_search.py +28 -10
  10. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp/utils.py +22 -2
  11. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp.egg-info/PKG-INFO +51 -3
  12. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/pyproject.toml +1 -1
  13. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/tests/test_visit_webpage.py +5 -0
  14. academia_mcp-1.2.1/README.md +0 -35
  15. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/LICENSE +0 -0
  16. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp/__init__.py +0 -0
  17. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp/__main__.py +0 -0
  18. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp/files.py +0 -0
  19. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp/llm.py +0 -0
  20. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp/py.typed +0 -0
  21. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp/tools/__init__.py +0 -0
  22. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp/tools/arxiv_search.py +0 -0
  23. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp/tools/bitflip.py +0 -0
  24. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp/tools/hf_datasets_search.py +0 -0
  25. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp/tools/py.typed +0 -0
  26. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp/tools/s2_citations.py +0 -0
  27. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp.egg-info/SOURCES.txt +0 -0
  28. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp.egg-info/dependency_links.txt +0 -0
  29. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp.egg-info/entry_points.txt +0 -0
  30. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp.egg-info/requires.txt +0 -0
  31. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/academia_mcp.egg-info/top_level.txt +0 -0
  32. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/setup.cfg +0 -0
  33. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/tests/test_anthology_search.py +0 -0
  34. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/tests/test_arxiv_download.py +0 -0
  35. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/tests/test_arxiv_search.py +0 -0
  36. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/tests/test_bitflip.py +0 -0
  37. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/tests/test_document_qa.py +0 -0
  38. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/tests/test_extract_json.py +0 -0
  39. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/tests/test_hf_dataset_search.py +0 -0
  40. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/tests/test_md_to_pdf.py +0 -0
  41. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/tests/test_s2_citations.py +0 -0
  42. {academia_mcp-1.2.1 → academia_mcp-1.3.0}/tests/test_web_search.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academia-mcp
3
- Version: 1.2.1
3
+ Version: 1.3.0
4
4
  Summary: MCP server that provides different tools to search for scientific publications
5
5
  Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
@@ -44,16 +44,26 @@ A collection of MCP tools related to the search of scientific papers:
44
44
  - Web search: Exa/Brave/Tavily
45
45
  - Page crawler
46
46
 
47
- Install:
47
+ ## Install
48
+
49
+ - Using pip (end users):
48
50
  ```
49
51
  pip3 install academia-mcp
50
52
  ```
51
53
 
54
+ - For development (uv + Makefile):
55
+ ```
56
+ uv venv .venv
57
+ make install
58
+ ```
59
+
60
+ ## Examples
52
61
  Comprehensive report screencast: https://www.youtube.com/watch?v=4bweqQcN6w8
53
62
 
54
63
  Single paper screencast: https://www.youtube.com/watch?v=IAAPMptJ5k8
55
64
 
56
- Claude Desktop config:
65
+
66
+ ## Claude Desktop config
57
67
  ```
58
68
  {
59
69
  "mcpServers": {
@@ -69,3 +79,41 @@ Claude Desktop config:
69
79
  }
70
80
  }
71
81
  ```
82
+
83
+ ## Running the server (CLI)
84
+
85
+ ```
86
+ uv run -m academia_mcp --transport streamable-http
87
+ ```
88
+
89
+ Notes:
90
+ - Transports supported: `stdio`, `sse`, `streamable-http`.
91
+ - Host/port are used for HTTP transports; for `stdio` they are ignored.
92
+
93
+ ## Makefile targets
94
+
95
+ - `make install`: install the package in editable mode with uv.
96
+ - `make validate`: run black, flake8, and mypy (strict).
97
+ - `make test`: run the test suite with pytest.
98
+ - `make publish`: build and publish using uv.
99
+
100
+ ## Environment variables
101
+
102
+ Set as needed depending on which tools you use:
103
+
104
+ - `TAVILY_API_KEY`: enables Tavily in `web_search`.
105
+ - `EXA_API_KEY`: enables Exa in `web_search` and `visit_webpage`.
106
+ - `BRAVE_API_KEY`: enables Brave in `web_search`.
107
+ - `OPENROUTER_API_KEY`: required for `document_qa`.
108
+ - `BASE_URL`: override OpenRouter base URL for `document_qa` and bitflip tools.
109
+ - `DOCUMENT_QA_MODEL_NAME`: override default model for `document_qa`.
110
+ - `BITFLIP_MODEL_NAME`: override default model for bitflip tools.
111
+ - `WORKSPACE_DIR`: directory for generated files (PDFs, temp artifacts).
112
+
113
+ ## md_to_pdf requirements
114
+
115
+ The `md_to_pdf` tool invokes `pdflatex`. Ensure a LaTeX distribution is installed and `pdflatex` is on PATH. On Debian/Ubuntu:
116
+
117
+ ```
118
+ sudo apt install texlive-latex-base texlive-fonts-recommended texlive-latex-extra texlive-science
119
+ ```
@@ -0,0 +1,83 @@
1
+ # Academia MCP
2
+
3
+ A collection of MCP tools related to the search of scientific papers:
4
+ - ArXiv search and download
5
+ - ACL Anthology search
6
+ - HuggingFact datasets search
7
+ - Semantic Scholar citation graphs
8
+ - Web search: Exa/Brave/Tavily
9
+ - Page crawler
10
+
11
+ ## Install
12
+
13
+ - Using pip (end users):
14
+ ```
15
+ pip3 install academia-mcp
16
+ ```
17
+
18
+ - For development (uv + Makefile):
19
+ ```
20
+ uv venv .venv
21
+ make install
22
+ ```
23
+
24
+ ## Examples
25
+ Comprehensive report screencast: https://www.youtube.com/watch?v=4bweqQcN6w8
26
+
27
+ Single paper screencast: https://www.youtube.com/watch?v=IAAPMptJ5k8
28
+
29
+
30
+ ## Claude Desktop config
31
+ ```
32
+ {
33
+ "mcpServers": {
34
+ "academia": {
35
+ "command": "python3",
36
+ "args": [
37
+ "-m",
38
+ "academia_mcp",
39
+ "--transport",
40
+ "stdio"
41
+ ]
42
+ }
43
+ }
44
+ }
45
+ ```
46
+
47
+ ## Running the server (CLI)
48
+
49
+ ```
50
+ uv run -m academia_mcp --transport streamable-http
51
+ ```
52
+
53
+ Notes:
54
+ - Transports supported: `stdio`, `sse`, `streamable-http`.
55
+ - Host/port are used for HTTP transports; for `stdio` they are ignored.
56
+
57
+ ## Makefile targets
58
+
59
+ - `make install`: install the package in editable mode with uv.
60
+ - `make validate`: run black, flake8, and mypy (strict).
61
+ - `make test`: run the test suite with pytest.
62
+ - `make publish`: build and publish using uv.
63
+
64
+ ## Environment variables
65
+
66
+ Set as needed depending on which tools you use:
67
+
68
+ - `TAVILY_API_KEY`: enables Tavily in `web_search`.
69
+ - `EXA_API_KEY`: enables Exa in `web_search` and `visit_webpage`.
70
+ - `BRAVE_API_KEY`: enables Brave in `web_search`.
71
+ - `OPENROUTER_API_KEY`: required for `document_qa`.
72
+ - `BASE_URL`: override OpenRouter base URL for `document_qa` and bitflip tools.
73
+ - `DOCUMENT_QA_MODEL_NAME`: override default model for `document_qa`.
74
+ - `BITFLIP_MODEL_NAME`: override default model for bitflip tools.
75
+ - `WORKSPACE_DIR`: directory for generated files (PDFs, temp artifacts).
76
+
77
+ ## md_to_pdf requirements
78
+
79
+ The `md_to_pdf` tool invokes `pdflatex`. Ensure a LaTeX distribution is installed and `pdflatex` is on PATH. On Debian/Ubuntu:
80
+
81
+ ```
82
+ sudo apt install texlive-latex-base texlive-fonts-recommended texlive-latex-extra texlive-science
83
+ ```
@@ -33,7 +33,7 @@ def find_free_port() -> int:
33
33
  return port
34
34
  except Exception:
35
35
  continue
36
- return 5000
36
+ raise RuntimeError("No free port in range 5000-6000 found")
37
37
 
38
38
 
39
39
  def run(
@@ -42,6 +42,8 @@ def run(
42
42
  mount_path: str = "/",
43
43
  streamable_http_path: str = "/mcp",
44
44
  transport: Literal["stdio", "sse", "streamable-http"] = "streamable-http",
45
+ disable_web_search_tools: bool = False,
46
+ disable_llm_tools: bool = False,
45
47
  ) -> None:
46
48
  server = FastMCP(
47
49
  "Academia MCP",
@@ -58,19 +60,21 @@ def run(
58
60
  server.add_tool(anthology_search)
59
61
  server.add_tool(md_to_pdf)
60
62
  server.add_tool(visit_webpage)
61
- server.add_tool(extract_bitflip_info)
62
- server.add_tool(generate_research_proposal)
63
- server.add_tool(score_research_proposals)
64
63
 
65
- if os.getenv("TAVILY_API_KEY"):
66
- server.add_tool(tavily_web_search)
67
- if os.getenv("EXA_API_KEY"):
68
- server.add_tool(exa_web_search)
69
- if os.getenv("BRAVE_API_KEY"):
70
- server.add_tool(brave_web_search)
71
- if os.getenv("EXA_API_KEY") or os.getenv("BRAVE_API_KEY") or os.getenv("TAVILY_API_KEY"):
72
- server.add_tool(web_search)
73
- if os.getenv("OPENROUTER_API_KEY"):
64
+ if not disable_web_search_tools:
65
+ if os.getenv("TAVILY_API_KEY"):
66
+ server.add_tool(tavily_web_search)
67
+ if os.getenv("EXA_API_KEY"):
68
+ server.add_tool(exa_web_search)
69
+ if os.getenv("BRAVE_API_KEY"):
70
+ server.add_tool(brave_web_search)
71
+ if os.getenv("EXA_API_KEY") or os.getenv("BRAVE_API_KEY") or os.getenv("TAVILY_API_KEY"):
72
+ server.add_tool(web_search)
73
+
74
+ if not disable_llm_tools and os.getenv("OPENROUTER_API_KEY"):
75
+ server.add_tool(extract_bitflip_info)
76
+ server.add_tool(generate_research_proposal)
77
+ server.add_tool(score_research_proposals)
74
78
  server.add_tool(document_qa)
75
79
 
76
80
  if port is None:
@@ -34,20 +34,13 @@ def _format_authors(authors: List[Any]) -> str:
34
34
  return result
35
35
 
36
36
 
37
- def _format_date(date_str: str) -> str:
38
- try:
39
- return datetime.strptime(date_str, "%Y").strftime("%B %d, %Y")
40
- except ValueError:
41
- return date_str
42
-
43
-
44
37
  def _clean_entry(entry: Any) -> Dict[str, Any]:
45
38
  return {
46
39
  "id": entry.full_id,
47
40
  "title": _format_text_field(entry.title.as_text()),
48
41
  "authors": _format_authors(entry.authors),
49
42
  "abstract": (_format_text_field(entry.abstract.as_text()) if entry.abstract else ""),
50
- "published": _format_date(entry.year),
43
+ "published_year": entry.year,
51
44
  "categories": ", ".join(entry.venue_ids),
52
45
  "comment": entry.note if entry.note else "",
53
46
  "url": entry.pdf.url if entry.pdf else "",
@@ -35,6 +35,8 @@ def parse_pdf_file(pdf_path: Path) -> List[str]:
35
35
  for page_number, page in enumerate(reader.pages, start=1):
36
36
  try:
37
37
  text = page.extract_text()
38
+ if not text:
39
+ continue
38
40
  prefix = f"## Page {page_number}\n\n"
39
41
  pages.append(prefix + text)
40
42
  except Exception:
@@ -6,6 +6,7 @@ from dotenv import load_dotenv
6
6
  from pydantic import BaseModel
7
7
 
8
8
  from academia_mcp.llm import llm_acall
9
+ from academia_mcp.utils import truncate_content
9
10
 
10
11
  load_dotenv()
11
12
 
@@ -62,9 +63,11 @@ async def document_qa(
62
63
  assert question and question.strip(), "Please provide non-empty 'question'"
63
64
  if isinstance(document, dict):
64
65
  document = json.dumps(document)
65
-
66
66
  assert document and document.strip(), "Please provide non-empty 'document'"
67
67
 
68
+ question = truncate_content(question, 10000)
69
+ document = truncate_content(document, 200000)
70
+
68
71
  model_name = os.getenv("DOCUMENT_QA_MODEL_NAME", "deepseek/deepseek-chat-v3-0324")
69
72
  prompt = PROMPT.format(question=question, document=document)
70
73
  content = await llm_acall(model_name=model_name, prompt=prompt)
@@ -367,6 +367,10 @@ def md_to_pdf(markdown_text: str, output_filename: str = "output") -> str:
367
367
  with open(tex_file_path, "w", encoding="utf-8") as f:
368
368
  f.write(latex_code)
369
369
 
370
+ if shutil.which("pdflatex") is None:
371
+ shutil.rmtree(temp_dir, ignore_errors=True)
372
+ return "pdflatex is not installed or not found in PATH."
373
+
370
374
  try:
371
375
  subprocess.run(
372
376
  [
@@ -383,12 +387,14 @@ def md_to_pdf(markdown_text: str, output_filename: str = "output") -> str:
383
387
  )
384
388
 
385
389
  except subprocess.TimeoutExpired:
390
+ shutil.rmtree(temp_dir, ignore_errors=True)
386
391
  return "Compilation timed out after 30 seconds"
387
392
  except subprocess.CalledProcessError as e:
388
393
  error_msg = e.stdout.decode("utf-8")
389
394
  error_lines = [
390
395
  line for line in error_msg.split("\n") if "error" in line.lower() or "!" in line
391
396
  ]
397
+ shutil.rmtree(temp_dir, ignore_errors=True)
392
398
  if error_lines:
393
399
  return "Compilation failed. LaTeX errors:\n" + "\n".join(error_lines)
394
400
  return f"Compilation failed. Full LaTeX output:\n{error_msg}"
@@ -401,4 +407,5 @@ def md_to_pdf(markdown_text: str, output_filename: str = "output") -> str:
401
407
  shutil.rmtree(temp_dir, ignore_errors=True)
402
408
  return f"Compilation successful! PDF file saved as {output_filename}.pdf"
403
409
 
410
+ shutil.rmtree(temp_dir, ignore_errors=True)
404
411
  return "Compilation completed, but PDF file was not created. Check LaTeX code for errors."
@@ -11,6 +11,17 @@ EXA_CONTENTS_URL = "https://api.exa.ai/contents"
11
11
  AVAILABLE_PROVIDERS = ("basic", "exa")
12
12
 
13
13
 
14
+ def _exa_visit_webpage(url: str) -> str:
15
+ key = os.getenv("EXA_API_KEY", "")
16
+ assert key, "Error: EXA_API_KEY is not set and no api_key was provided"
17
+ payload = {
18
+ "urls": [url],
19
+ "text": True,
20
+ }
21
+ response = post_with_retries(EXA_CONTENTS_URL, payload=payload, api_key=key)
22
+ return json.dumps(response.json()["results"][0])
23
+
24
+
14
25
  def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
15
26
  """
16
27
  Visit a webpage and return the content.
@@ -28,17 +39,17 @@ def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
28
39
  ), f"Invalid provider: {provider}. Available providers: {AVAILABLE_PROVIDERS}"
29
40
 
30
41
  if provider == "exa":
31
- key = os.getenv("EXA_API_KEY", "")
32
- assert key, "Error: EXA_API_KEY is not set and no api_key was provided"
33
- payload = {
34
- "urls": [url],
35
- "text": True,
36
- }
37
- response = post_with_retries(EXA_CONTENTS_URL, payload=payload, api_key=key)
38
- return json.dumps(response.json()["results"][0])
42
+ return _exa_visit_webpage(url)
39
43
 
40
44
  assert provider == "basic"
41
45
  response = get_with_retries(url)
46
+ content_type = response.headers.get("content-type", "").lower()
47
+ if not content_type or (not content_type.startswith("text/") and "html" not in content_type):
48
+ if os.getenv("EXA_API_KEY"):
49
+ return _exa_visit_webpage(url)
50
+ return json.dumps(
51
+ {"id": url, "error": f"Unsupported content-type: {content_type or 'unknown'}"}
52
+ )
42
53
  markdown_content = markdownify(response.text).strip()
43
54
  markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
44
55
  return json.dumps({"id": url, "text": markdown_content})
@@ -17,7 +17,8 @@ def web_search(
17
17
  provider: Optional[str] = "tavily",
18
18
  ) -> str:
19
19
  """
20
- Search the web using Exa Search or Tavily and return normalized results.
20
+ Search the web using Exa Search, Brave Search or Tavily and return normalized results.
21
+ If the specified provider is not available, the function will try to use the next available provider.
21
22
 
22
23
  Returns a JSON object serialized to a string. The structure is: {"results": [...]}
23
24
  Every item in the "results" has at least the following fields: ("title", "url")
@@ -28,17 +29,34 @@ def web_search(
28
29
  limit: The maximum number of items to return. 20 by default, maximum 25.
29
30
  provider: The provider to use. "exa", "tavily" or "brave". "tavily" by default.
30
31
  """
31
- assert provider in (
32
- "exa",
33
- "tavily",
34
- "brave",
35
- ), "Error: provider must be either 'exa', 'tavily' or 'brave'"
32
+ providers = ("tavily", "brave", "exa")
33
+ assert provider in providers, "Error: provider must be either 'exa', 'tavily' or 'brave'"
34
+
35
+ is_tavily_available = os.getenv("TAVILY_API_KEY") is not None
36
+ is_exa_available = os.getenv("EXA_API_KEY") is not None
37
+ is_brave_available = os.getenv("BRAVE_API_KEY") is not None
38
+ assert is_tavily_available or is_exa_available or is_brave_available
39
+ availability = {
40
+ "tavily": is_tavily_available,
41
+ "brave": is_brave_available,
42
+ "exa": is_exa_available,
43
+ }
44
+
45
+ if not availability[provider]:
46
+ for p in providers:
47
+ if availability[p]:
48
+ provider = p
49
+ break
36
50
 
51
+ result = {}
37
52
  if provider == "exa":
38
- return exa_web_search(query, limit)
39
- if provider == "brave":
40
- return brave_web_search(query, limit)
41
- return tavily_web_search(query, limit)
53
+ result = json.loads(exa_web_search(query, limit))
54
+ elif provider == "brave":
55
+ result = json.loads(brave_web_search(query, limit))
56
+ elif provider == "tavily":
57
+ result = json.loads(tavily_web_search(query, limit))
58
+ result["search_provider"] = provider
59
+ return json.dumps(result, ensure_ascii=False)
42
60
 
43
61
 
44
62
  def tavily_web_search(query: str, limit: Optional[int] = 20) -> str:
@@ -13,10 +13,11 @@ def post_with_retries(
13
13
  api_key: Optional[str] = None,
14
14
  timeout: int = 30,
15
15
  num_retries: int = 3,
16
+ backoff_factor: float = 3.0,
16
17
  ) -> requests.Response:
17
18
  retry_strategy = Retry(
18
19
  total=num_retries,
19
- backoff_factor=3,
20
+ backoff_factor=backoff_factor,
20
21
  status_forcelist=[429, 500, 502, 503, 504],
21
22
  allowed_methods=["POST"],
22
23
  )
@@ -24,6 +25,7 @@ def post_with_retries(
24
25
  session = requests.Session()
25
26
  adapter = requests.adapters.HTTPAdapter(max_retries=retry_strategy)
26
27
  session.mount("https://", adapter)
28
+ session.mount("http://", adapter)
27
29
 
28
30
  headers = {
29
31
  "x-api-key": api_key,
@@ -42,11 +44,12 @@ def get_with_retries(
42
44
  api_key: Optional[str] = None,
43
45
  timeout: int = 30,
44
46
  num_retries: int = 3,
47
+ backoff_factor: float = 3.0,
45
48
  params: Optional[Dict[str, Any]] = None,
46
49
  ) -> requests.Response:
47
50
  retry_strategy = Retry(
48
51
  total=num_retries,
49
- backoff_factor=30,
52
+ backoff_factor=backoff_factor,
50
53
  status_forcelist=[429, 500, 502, 503, 504],
51
54
  allowed_methods=["GET"],
52
55
  )
@@ -54,6 +57,7 @@ def get_with_retries(
54
57
  session = requests.Session()
55
58
  adapter = requests.adapters.HTTPAdapter(max_retries=retry_strategy)
56
59
  session.mount("https://", adapter)
60
+ session.mount("http://", adapter)
57
61
 
58
62
  headers = {}
59
63
  if api_key:
@@ -145,3 +149,19 @@ def extract_json(text: str) -> Any:
145
149
  def encode_prompt(template: str, **kwargs: Any) -> str:
146
150
  template_obj = Template(template)
147
151
  return template_obj.render(**kwargs).strip()
152
+
153
+
154
+ def truncate_content(
155
+ content: str,
156
+ max_length: int,
157
+ ) -> str:
158
+ disclaimer = (
159
+ f"\n\n..._This content has been truncated to stay below {max_length} characters_...\n\n"
160
+ )
161
+ half_length = max_length // 2
162
+ if len(content) <= max_length:
163
+ return content
164
+
165
+ prefix = content[:half_length]
166
+ suffix = content[-half_length:]
167
+ return prefix + disclaimer + suffix
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academia-mcp
3
- Version: 1.2.1
3
+ Version: 1.3.0
4
4
  Summary: MCP server that provides different tools to search for scientific publications
5
5
  Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
@@ -44,16 +44,26 @@ A collection of MCP tools related to the search of scientific papers:
44
44
  - Web search: Exa/Brave/Tavily
45
45
  - Page crawler
46
46
 
47
- Install:
47
+ ## Install
48
+
49
+ - Using pip (end users):
48
50
  ```
49
51
  pip3 install academia-mcp
50
52
  ```
51
53
 
54
+ - For development (uv + Makefile):
55
+ ```
56
+ uv venv .venv
57
+ make install
58
+ ```
59
+
60
+ ## Examples
52
61
  Comprehensive report screencast: https://www.youtube.com/watch?v=4bweqQcN6w8
53
62
 
54
63
  Single paper screencast: https://www.youtube.com/watch?v=IAAPMptJ5k8
55
64
 
56
- Claude Desktop config:
65
+
66
+ ## Claude Desktop config
57
67
  ```
58
68
  {
59
69
  "mcpServers": {
@@ -69,3 +79,41 @@ Claude Desktop config:
69
79
  }
70
80
  }
71
81
  ```
82
+
83
+ ## Running the server (CLI)
84
+
85
+ ```
86
+ uv run -m academia_mcp --transport streamable-http
87
+ ```
88
+
89
+ Notes:
90
+ - Transports supported: `stdio`, `sse`, `streamable-http`.
91
+ - Host/port are used for HTTP transports; for `stdio` they are ignored.
92
+
93
+ ## Makefile targets
94
+
95
+ - `make install`: install the package in editable mode with uv.
96
+ - `make validate`: run black, flake8, and mypy (strict).
97
+ - `make test`: run the test suite with pytest.
98
+ - `make publish`: build and publish using uv.
99
+
100
+ ## Environment variables
101
+
102
+ Set as needed depending on which tools you use:
103
+
104
+ - `TAVILY_API_KEY`: enables Tavily in `web_search`.
105
+ - `EXA_API_KEY`: enables Exa in `web_search` and `visit_webpage`.
106
+ - `BRAVE_API_KEY`: enables Brave in `web_search`.
107
+ - `OPENROUTER_API_KEY`: required for `document_qa`.
108
+ - `BASE_URL`: override OpenRouter base URL for `document_qa` and bitflip tools.
109
+ - `DOCUMENT_QA_MODEL_NAME`: override default model for `document_qa`.
110
+ - `BITFLIP_MODEL_NAME`: override default model for bitflip tools.
111
+ - `WORKSPACE_DIR`: directory for generated files (PDFs, temp artifacts).
112
+
113
+ ## md_to_pdf requirements
114
+
115
+ The `md_to_pdf` tool invokes `pdflatex`. Ensure a LaTeX distribution is installed and `pdflatex` is on PATH. On Debian/Ubuntu:
116
+
117
+ ```
118
+ sudo apt install texlive-latex-base texlive-fonts-recommended texlive-latex-extra texlive-science
119
+ ```
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "academia-mcp"
7
- version = "1.2.1"
7
+ version = "1.3.0"
8
8
  description = "MCP server that provides different tools to search for scientific publications"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -13,3 +13,8 @@ def test_visit_webpage_exa() -> None:
13
13
  assert content is not None
14
14
  assert "Example Domain" in content
15
15
  assert "illustrative" in content
16
+
17
+
18
+ def test_visit_webpage_pdf() -> None:
19
+ content = visit_webpage("https://arxiv.org/pdf/2409.06820")
20
+ assert "A Benchmark for Role-Playing" in content
@@ -1,35 +0,0 @@
1
- # Academia MCP
2
-
3
- A collection of MCP tools related to the search of scientific papers:
4
- - ArXiv search and download
5
- - ACL Anthology search
6
- - HuggingFact datasets search
7
- - Semantic Scholar citation graphs
8
- - Web search: Exa/Brave/Tavily
9
- - Page crawler
10
-
11
- Install:
12
- ```
13
- pip3 install academia-mcp
14
- ```
15
-
16
- Comprehensive report screencast: https://www.youtube.com/watch?v=4bweqQcN6w8
17
-
18
- Single paper screencast: https://www.youtube.com/watch?v=IAAPMptJ5k8
19
-
20
- Claude Desktop config:
21
- ```
22
- {
23
- "mcpServers": {
24
- "academia": {
25
- "command": "python3",
26
- "args": [
27
- "-m",
28
- "academia_mcp",
29
- "--transport",
30
- "stdio"
31
- ]
32
- }
33
- }
34
- }
35
- ```
File without changes
File without changes