academia-mcp 1.10.3__tar.gz → 1.10.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/PKG-INFO +1 -1
  2. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/visit_webpage.py +37 -21
  3. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp.egg-info/PKG-INFO +1 -1
  4. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/pyproject.toml +1 -1
  5. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_visit_webpage.py +6 -0
  6. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_web_search.py +8 -0
  7. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/LICENSE +0 -0
  8. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/README.md +0 -0
  9. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/__init__.py +0 -0
  10. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/__main__.py +0 -0
  11. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/files.py +0 -0
  12. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty +0 -0
  13. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex +0 -0
  14. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/llm.py +0 -0
  15. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/pdf.py +0 -0
  16. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/py.typed +0 -0
  17. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/server.py +0 -0
  18. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/settings.py +0 -0
  19. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/__init__.py +0 -0
  20. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/anthology_search.py +0 -0
  21. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/arxiv_download.py +0 -0
  22. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/arxiv_search.py +0 -0
  23. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/bitflip.py +0 -0
  24. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/document_qa.py +0 -0
  25. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/hf_datasets_search.py +0 -0
  26. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/latex.py +0 -0
  27. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/py.typed +0 -0
  28. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/review.py +0 -0
  29. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/s2.py +0 -0
  30. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/show_image.py +0 -0
  31. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/speech_to_text.py +0 -0
  32. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/web_search.py +0 -0
  33. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/utils.py +0 -0
  34. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp.egg-info/SOURCES.txt +0 -0
  35. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp.egg-info/dependency_links.txt +0 -0
  36. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp.egg-info/entry_points.txt +0 -0
  37. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp.egg-info/requires.txt +0 -0
  38. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp.egg-info/top_level.txt +0 -0
  39. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/setup.cfg +0 -0
  40. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_anthology_search.py +0 -0
  41. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_arxiv_download.py +0 -0
  42. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_arxiv_search.py +0 -0
  43. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_bitflip.py +0 -0
  44. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_document_qa.py +0 -0
  45. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_extract_json.py +0 -0
  46. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_hf_dataset_search.py +0 -0
  47. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_latex.py +0 -0
  48. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_review.py +0 -0
  49. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_s2.py +0 -0
  50. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_show_image.py +0 -0
  51. {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_speech_to_text.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academia-mcp
3
- Version: 1.10.3
3
+ Version: 1.10.5
4
4
  Summary: MCP server that provides different tools to search for scientific publications
5
5
  Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
@@ -1,6 +1,6 @@
1
1
  import re
2
2
  import json
3
- from typing import Optional
3
+ from typing import Optional, Dict, Any, cast
4
4
 
5
5
  from markdownify import markdownify # type: ignore
6
6
 
@@ -11,9 +11,10 @@ from academia_mcp.utils import sanitize_output
11
11
  EXA_CONTENTS_URL = "https://api.exa.ai/contents"
12
12
  TAVILY_EXTRACT_URL = "https://api.tavily.com/extract"
13
13
  AVAILABLE_PROVIDERS = ("basic", "exa", "tavily")
14
+ ERROR_MESSAGE = "Failed to get content from the page. Try to use another provider."
14
15
 
15
16
 
16
- def _exa_visit_webpage(url: str) -> str:
17
+ def _exa_visit_webpage(url: str) -> Dict[str, Any]:
17
18
  key = settings.EXA_API_KEY or ""
18
19
  assert key, "Error: EXA_API_KEY is not set and no api_key was provided"
19
20
  payload = {
@@ -21,24 +22,48 @@ def _exa_visit_webpage(url: str) -> str:
21
22
  "text": True,
22
23
  }
23
24
  response = post_with_retries(EXA_CONTENTS_URL, payload=payload, api_key=key)
24
- return sanitize_output(json.dumps(response.json()["results"][0]))
25
+ results = response.json()["results"]
26
+ if not results:
27
+ return {"error": ERROR_MESSAGE}
28
+ return cast(Dict[str, Any], results[0])
25
29
 
26
30
 
27
- def _tavily_visit_webpage(url: str) -> str:
31
+ def _tavily_visit_webpage(url: str) -> Dict[str, Any]:
28
32
  key = settings.TAVILY_API_KEY or ""
29
33
  assert key, "Error: TAVILY_API_KEY is not set and no api_key was provided"
30
34
  payload = {
31
35
  "urls": [url],
32
36
  }
33
37
  response = post_with_retries(TAVILY_EXTRACT_URL, payload=payload, api_key=key)
34
- return sanitize_output(json.dumps(response.json()["results"][0]["raw_content"]))
38
+ results = response.json()["results"]
39
+ if not results:
40
+ return {"error": ERROR_MESSAGE}
41
+ return {"text": results[0]["raw_content"]}
42
+
43
+
44
+ def _basic_visit_webpage(url: str) -> Dict[str, Any]:
45
+ try:
46
+ response = get_with_retries(url)
47
+ content_type = response.headers.get("content-type", "").lower()
48
+ if not content_type or (
49
+ not content_type.startswith("text/") and "html" not in content_type
50
+ ):
51
+ if settings.EXA_API_KEY:
52
+ return _exa_visit_webpage(url)
53
+ return {"error": f"Unsupported content-type: {content_type or 'unknown'}"}
54
+ markdown_content = markdownify(response.text).strip()
55
+ markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
56
+ return {"text": markdown_content}
57
+ except Exception as e:
58
+ return {"error": str(e) + "\n" + ERROR_MESSAGE}
35
59
 
36
60
 
37
61
  def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
38
62
  """
39
63
  Visit a webpage and return the content.
40
64
 
41
- Returns a JSON object serialized to a string. The structure is: {"url": "...", "text": "..."}
65
+ Returns a JSON object serialized to a string. The structure is: {"id": "...", "text": "..."}.
66
+ If there are errors, the structure is: {"id": "...", "error": "..."}.
42
67
  Use `json.loads` to deserialize the result if you want to get specific fields.
43
68
  Try to use both "tavily" and "basic" providers. They might work differently for the same URL.
44
69
 
@@ -51,21 +76,12 @@ def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
51
76
  ), f"Invalid provider: {provider}. Available providers: {AVAILABLE_PROVIDERS}"
52
77
 
53
78
  if provider == "exa" and settings.EXA_API_KEY:
54
- return _exa_visit_webpage(url)
79
+ result = _exa_visit_webpage(url)
55
80
  elif provider == "tavily" and settings.TAVILY_API_KEY:
56
- return _tavily_visit_webpage(url)
81
+ result = _tavily_visit_webpage(url)
57
82
  else:
58
- provider = "basic"
83
+ result = _basic_visit_webpage(url)
59
84
 
60
- assert provider == "basic"
61
- response = get_with_retries(url)
62
- content_type = response.headers.get("content-type", "").lower()
63
- if not content_type or (not content_type.startswith("text/") and "html" not in content_type):
64
- if settings.EXA_API_KEY:
65
- return _exa_visit_webpage(url)
66
- return json.dumps(
67
- {"id": url, "error": f"Unsupported content-type: {content_type or 'unknown'}"}
68
- )
69
- markdown_content = markdownify(response.text).strip()
70
- markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
71
- return sanitize_output(json.dumps({"id": url, "text": markdown_content}))
85
+ result["id"] = url
86
+ result["provider"] = provider
87
+ return sanitize_output(json.dumps(result, ensure_ascii=False))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: academia-mcp
3
- Version: 1.10.3
3
+ Version: 1.10.5
4
4
  Summary: MCP server that provides different tools to search for scientific publications
5
5
  Author-email: Ilya Gusev <phoenixilya@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "academia-mcp"
7
- version = "1.10.3"
7
+ version = "1.10.5"
8
8
  description = "MCP server that provides different tools to search for scientific publications"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -30,3 +30,9 @@ def test_visit_webpage_github_issue() -> None:
30
30
  url = "https://github.com/numpy/numpy/issues/10881"
31
31
  content = visit_webpage(url, provider="tavily")
32
32
  assert "on Apr 15, 2018" in str(content)
33
+
34
+
35
+ def test_visit_webpage_exception() -> None:
36
+ url = "https://www.researchgate.net/profile/Peter-Giovannini"
37
+ content = visit_webpage(url)
38
+ assert "error" in str(content)
@@ -22,3 +22,11 @@ def test_web_search_brave() -> None:
22
22
  assert "The Mystery of the Pathological" in result
23
23
  results = json.loads(result)
24
24
  assert results
25
+
26
+
27
+ def test_web_search_bug() -> None:
28
+ results = web_search(
29
+ '"Can Hiccup Supply Enough Fish to Maintain a Dragon\'s Diet?" University of Leicester'
30
+ )
31
+ assert results
32
+ assert len(results.splitlines()) == 1
File without changes
File without changes
File without changes