academia-mcp 1.10.3__tar.gz → 1.10.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/PKG-INFO +1 -1
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/visit_webpage.py +37 -21
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp.egg-info/PKG-INFO +1 -1
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/pyproject.toml +1 -1
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_visit_webpage.py +6 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_web_search.py +8 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/LICENSE +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/README.md +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/__init__.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/__main__.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/files.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/llm.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/pdf.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/py.typed +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/server.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/settings.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/__init__.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/anthology_search.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/arxiv_download.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/arxiv_search.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/bitflip.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/document_qa.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/hf_datasets_search.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/latex.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/py.typed +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/review.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/s2.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/show_image.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/speech_to_text.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/tools/web_search.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp/utils.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp.egg-info/SOURCES.txt +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp.egg-info/dependency_links.txt +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp.egg-info/entry_points.txt +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp.egg-info/requires.txt +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/academia_mcp.egg-info/top_level.txt +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/setup.cfg +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_anthology_search.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_arxiv_download.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_arxiv_search.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_bitflip.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_document_qa.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_extract_json.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_hf_dataset_search.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_latex.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_review.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_s2.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_show_image.py +0 -0
- {academia_mcp-1.10.3 → academia_mcp-1.10.5}/tests/test_speech_to_text.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: academia-mcp
|
3
|
-
Version: 1.10.
|
3
|
+
Version: 1.10.5
|
4
4
|
Summary: MCP server that provides different tools to search for scientific publications
|
5
5
|
Author-email: Ilya Gusev <phoenixilya@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import re
|
2
2
|
import json
|
3
|
-
from typing import Optional
|
3
|
+
from typing import Optional, Dict, Any, cast
|
4
4
|
|
5
5
|
from markdownify import markdownify # type: ignore
|
6
6
|
|
@@ -11,9 +11,10 @@ from academia_mcp.utils import sanitize_output
|
|
11
11
|
EXA_CONTENTS_URL = "https://api.exa.ai/contents"
|
12
12
|
TAVILY_EXTRACT_URL = "https://api.tavily.com/extract"
|
13
13
|
AVAILABLE_PROVIDERS = ("basic", "exa", "tavily")
|
14
|
+
ERROR_MESSAGE = "Failed to get content from the page. Try to use another provider."
|
14
15
|
|
15
16
|
|
16
|
-
def _exa_visit_webpage(url: str) -> str:
|
17
|
+
def _exa_visit_webpage(url: str) -> Dict[str, Any]:
|
17
18
|
key = settings.EXA_API_KEY or ""
|
18
19
|
assert key, "Error: EXA_API_KEY is not set and no api_key was provided"
|
19
20
|
payload = {
|
@@ -21,24 +22,48 @@ def _exa_visit_webpage(url: str) -> str:
|
|
21
22
|
"text": True,
|
22
23
|
}
|
23
24
|
response = post_with_retries(EXA_CONTENTS_URL, payload=payload, api_key=key)
|
24
|
-
|
25
|
+
results = response.json()["results"]
|
26
|
+
if not results:
|
27
|
+
return {"error": ERROR_MESSAGE}
|
28
|
+
return cast(Dict[str, Any], results[0])
|
25
29
|
|
26
30
|
|
27
|
-
def _tavily_visit_webpage(url: str) -> str:
|
31
|
+
def _tavily_visit_webpage(url: str) -> Dict[str, Any]:
|
28
32
|
key = settings.TAVILY_API_KEY or ""
|
29
33
|
assert key, "Error: TAVILY_API_KEY is not set and no api_key was provided"
|
30
34
|
payload = {
|
31
35
|
"urls": [url],
|
32
36
|
}
|
33
37
|
response = post_with_retries(TAVILY_EXTRACT_URL, payload=payload, api_key=key)
|
34
|
-
|
38
|
+
results = response.json()["results"]
|
39
|
+
if not results:
|
40
|
+
return {"error": ERROR_MESSAGE}
|
41
|
+
return {"text": results[0]["raw_content"]}
|
42
|
+
|
43
|
+
|
44
|
+
def _basic_visit_webpage(url: str) -> Dict[str, Any]:
|
45
|
+
try:
|
46
|
+
response = get_with_retries(url)
|
47
|
+
content_type = response.headers.get("content-type", "").lower()
|
48
|
+
if not content_type or (
|
49
|
+
not content_type.startswith("text/") and "html" not in content_type
|
50
|
+
):
|
51
|
+
if settings.EXA_API_KEY:
|
52
|
+
return _exa_visit_webpage(url)
|
53
|
+
return {"error": f"Unsupported content-type: {content_type or 'unknown'}"}
|
54
|
+
markdown_content = markdownify(response.text).strip()
|
55
|
+
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
56
|
+
return {"text": markdown_content}
|
57
|
+
except Exception as e:
|
58
|
+
return {"error": str(e) + "\n" + ERROR_MESSAGE}
|
35
59
|
|
36
60
|
|
37
61
|
def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
|
38
62
|
"""
|
39
63
|
Visit a webpage and return the content.
|
40
64
|
|
41
|
-
Returns a JSON object serialized to a string. The structure is: {"
|
65
|
+
Returns a JSON object serialized to a string. The structure is: {"id": "...", "text": "..."}.
|
66
|
+
If there are errors, the structure is: {"id": "...", "error": "..."}.
|
42
67
|
Use `json.loads` to deserialize the result if you want to get specific fields.
|
43
68
|
Try to use both "tavily" and "basic" providers. They might work differently for the same URL.
|
44
69
|
|
@@ -51,21 +76,12 @@ def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
|
|
51
76
|
), f"Invalid provider: {provider}. Available providers: {AVAILABLE_PROVIDERS}"
|
52
77
|
|
53
78
|
if provider == "exa" and settings.EXA_API_KEY:
|
54
|
-
|
79
|
+
result = _exa_visit_webpage(url)
|
55
80
|
elif provider == "tavily" and settings.TAVILY_API_KEY:
|
56
|
-
|
81
|
+
result = _tavily_visit_webpage(url)
|
57
82
|
else:
|
58
|
-
|
83
|
+
result = _basic_visit_webpage(url)
|
59
84
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
if not content_type or (not content_type.startswith("text/") and "html" not in content_type):
|
64
|
-
if settings.EXA_API_KEY:
|
65
|
-
return _exa_visit_webpage(url)
|
66
|
-
return json.dumps(
|
67
|
-
{"id": url, "error": f"Unsupported content-type: {content_type or 'unknown'}"}
|
68
|
-
)
|
69
|
-
markdown_content = markdownify(response.text).strip()
|
70
|
-
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
71
|
-
return sanitize_output(json.dumps({"id": url, "text": markdown_content}))
|
85
|
+
result["id"] = url
|
86
|
+
result["provider"] = provider
|
87
|
+
return sanitize_output(json.dumps(result, ensure_ascii=False))
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: academia-mcp
|
3
|
-
Version: 1.10.
|
3
|
+
Version: 1.10.5
|
4
4
|
Summary: MCP server that provides different tools to search for scientific publications
|
5
5
|
Author-email: Ilya Gusev <phoenixilya@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
|
@@ -30,3 +30,9 @@ def test_visit_webpage_github_issue() -> None:
|
|
30
30
|
url = "https://github.com/numpy/numpy/issues/10881"
|
31
31
|
content = visit_webpage(url, provider="tavily")
|
32
32
|
assert "on Apr 15, 2018" in str(content)
|
33
|
+
|
34
|
+
|
35
|
+
def test_visit_webpage_exception() -> None:
|
36
|
+
url = "https://www.researchgate.net/profile/Peter-Giovannini"
|
37
|
+
content = visit_webpage(url)
|
38
|
+
assert "error" in str(content)
|
@@ -22,3 +22,11 @@ def test_web_search_brave() -> None:
|
|
22
22
|
assert "The Mystery of the Pathological" in result
|
23
23
|
results = json.loads(result)
|
24
24
|
assert results
|
25
|
+
|
26
|
+
|
27
|
+
def test_web_search_bug() -> None:
|
28
|
+
results = web_search(
|
29
|
+
'"Can Hiccup Supply Enough Fish to Maintain a Dragon\'s Diet?" University of Leicester'
|
30
|
+
)
|
31
|
+
assert results
|
32
|
+
assert len(results.splitlines()) == 1
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|