academia-mcp 1.10.3__py3-none-any.whl → 1.10.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- academia_mcp/tools/visit_webpage.py +37 -21
- {academia_mcp-1.10.3.dist-info → academia_mcp-1.10.5.dist-info}/METADATA +1 -1
- {academia_mcp-1.10.3.dist-info → academia_mcp-1.10.5.dist-info}/RECORD +7 -7
- {academia_mcp-1.10.3.dist-info → academia_mcp-1.10.5.dist-info}/WHEEL +0 -0
- {academia_mcp-1.10.3.dist-info → academia_mcp-1.10.5.dist-info}/entry_points.txt +0 -0
- {academia_mcp-1.10.3.dist-info → academia_mcp-1.10.5.dist-info}/licenses/LICENSE +0 -0
- {academia_mcp-1.10.3.dist-info → academia_mcp-1.10.5.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
import re
|
2
2
|
import json
|
3
|
-
from typing import Optional
|
3
|
+
from typing import Optional, Dict, Any, cast
|
4
4
|
|
5
5
|
from markdownify import markdownify # type: ignore
|
6
6
|
|
@@ -11,9 +11,10 @@ from academia_mcp.utils import sanitize_output
|
|
11
11
|
EXA_CONTENTS_URL = "https://api.exa.ai/contents"
|
12
12
|
TAVILY_EXTRACT_URL = "https://api.tavily.com/extract"
|
13
13
|
AVAILABLE_PROVIDERS = ("basic", "exa", "tavily")
|
14
|
+
ERROR_MESSAGE = "Failed to get content from the page. Try to use another provider."
|
14
15
|
|
15
16
|
|
16
|
-
def _exa_visit_webpage(url: str) -> str:
|
17
|
+
def _exa_visit_webpage(url: str) -> Dict[str, Any]:
|
17
18
|
key = settings.EXA_API_KEY or ""
|
18
19
|
assert key, "Error: EXA_API_KEY is not set and no api_key was provided"
|
19
20
|
payload = {
|
@@ -21,24 +22,48 @@ def _exa_visit_webpage(url: str) -> str:
|
|
21
22
|
"text": True,
|
22
23
|
}
|
23
24
|
response = post_with_retries(EXA_CONTENTS_URL, payload=payload, api_key=key)
|
24
|
-
|
25
|
+
results = response.json()["results"]
|
26
|
+
if not results:
|
27
|
+
return {"error": ERROR_MESSAGE}
|
28
|
+
return cast(Dict[str, Any], results[0])
|
25
29
|
|
26
30
|
|
27
|
-
def _tavily_visit_webpage(url: str) -> str:
|
31
|
+
def _tavily_visit_webpage(url: str) -> Dict[str, Any]:
|
28
32
|
key = settings.TAVILY_API_KEY or ""
|
29
33
|
assert key, "Error: TAVILY_API_KEY is not set and no api_key was provided"
|
30
34
|
payload = {
|
31
35
|
"urls": [url],
|
32
36
|
}
|
33
37
|
response = post_with_retries(TAVILY_EXTRACT_URL, payload=payload, api_key=key)
|
34
|
-
|
38
|
+
results = response.json()["results"]
|
39
|
+
if not results:
|
40
|
+
return {"error": ERROR_MESSAGE}
|
41
|
+
return {"text": results[0]["raw_content"]}
|
42
|
+
|
43
|
+
|
44
|
+
def _basic_visit_webpage(url: str) -> Dict[str, Any]:
|
45
|
+
try:
|
46
|
+
response = get_with_retries(url)
|
47
|
+
content_type = response.headers.get("content-type", "").lower()
|
48
|
+
if not content_type or (
|
49
|
+
not content_type.startswith("text/") and "html" not in content_type
|
50
|
+
):
|
51
|
+
if settings.EXA_API_KEY:
|
52
|
+
return _exa_visit_webpage(url)
|
53
|
+
return {"error": f"Unsupported content-type: {content_type or 'unknown'}"}
|
54
|
+
markdown_content = markdownify(response.text).strip()
|
55
|
+
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
56
|
+
return {"text": markdown_content}
|
57
|
+
except Exception as e:
|
58
|
+
return {"error": str(e) + "\n" + ERROR_MESSAGE}
|
35
59
|
|
36
60
|
|
37
61
|
def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
|
38
62
|
"""
|
39
63
|
Visit a webpage and return the content.
|
40
64
|
|
41
|
-
Returns a JSON object serialized to a string. The structure is: {"
|
65
|
+
Returns a JSON object serialized to a string. The structure is: {"id": "...", "text": "..."}.
|
66
|
+
If there are errors, the structure is: {"id": "...", "error": "..."}.
|
42
67
|
Use `json.loads` to deserialize the result if you want to get specific fields.
|
43
68
|
Try to use both "tavily" and "basic" providers. They might work differently for the same URL.
|
44
69
|
|
@@ -51,21 +76,12 @@ def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
|
|
51
76
|
), f"Invalid provider: {provider}. Available providers: {AVAILABLE_PROVIDERS}"
|
52
77
|
|
53
78
|
if provider == "exa" and settings.EXA_API_KEY:
|
54
|
-
|
79
|
+
result = _exa_visit_webpage(url)
|
55
80
|
elif provider == "tavily" and settings.TAVILY_API_KEY:
|
56
|
-
|
81
|
+
result = _tavily_visit_webpage(url)
|
57
82
|
else:
|
58
|
-
|
83
|
+
result = _basic_visit_webpage(url)
|
59
84
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
if not content_type or (not content_type.startswith("text/") and "html" not in content_type):
|
64
|
-
if settings.EXA_API_KEY:
|
65
|
-
return _exa_visit_webpage(url)
|
66
|
-
return json.dumps(
|
67
|
-
{"id": url, "error": f"Unsupported content-type: {content_type or 'unknown'}"}
|
68
|
-
)
|
69
|
-
markdown_content = markdownify(response.text).strip()
|
70
|
-
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
71
|
-
return sanitize_output(json.dumps({"id": url, "text": markdown_content}))
|
85
|
+
result["id"] = url
|
86
|
+
result["provider"] = provider
|
87
|
+
return sanitize_output(json.dumps(result, ensure_ascii=False))
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: academia-mcp
|
3
|
-
Version: 1.10.
|
3
|
+
Version: 1.10.5
|
4
4
|
Summary: MCP server that provides different tools to search for scientific publications
|
5
5
|
Author-email: Ilya Gusev <phoenixilya@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
|
@@ -22,11 +22,11 @@ academia_mcp/tools/review.py,sha256=Va0lFJJKuk-NvWhKS3UZ-Dnuk7CyuDQ4S1nd70D-ffE,
|
|
22
22
|
academia_mcp/tools/s2.py,sha256=QX7-pbetab3Xt_1tvVPU6o5D_NAe9y6jcTGRBK1vwtY,6200
|
23
23
|
academia_mcp/tools/show_image.py,sha256=jiJlQ53dbZ0T61OBhCT3IKVvBl9NHc6jHgWLfg5BxiE,3856
|
24
24
|
academia_mcp/tools/speech_to_text.py,sha256=YZzMqdvunzXkpcadP_mYhm6cs4qH1Y_42SfY-7eX4O4,1601
|
25
|
-
academia_mcp/tools/visit_webpage.py,sha256=
|
25
|
+
academia_mcp/tools/visit_webpage.py,sha256=swlFwWRzWc7-AHP2ouRZJScSTA4dHZ32fuJnA2V0lUc,3311
|
26
26
|
academia_mcp/tools/web_search.py,sha256=0gKE3gtLBhdQ6G1eSgYLs1LIuo__PHwsYx5I5mTn254,6408
|
27
|
-
academia_mcp-1.10.
|
28
|
-
academia_mcp-1.10.
|
29
|
-
academia_mcp-1.10.
|
30
|
-
academia_mcp-1.10.
|
31
|
-
academia_mcp-1.10.
|
32
|
-
academia_mcp-1.10.
|
27
|
+
academia_mcp-1.10.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
28
|
+
academia_mcp-1.10.5.dist-info/METADATA,sha256=UeUZu8wGM23bbwue80r60CSinNd2lcCWoIPHXdmr0Sc,6311
|
29
|
+
academia_mcp-1.10.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
30
|
+
academia_mcp-1.10.5.dist-info/entry_points.txt,sha256=gxkiKJ74w2FwJpSECpjA3XtCfI5ZfrM6N8cqnwsq4yY,51
|
31
|
+
academia_mcp-1.10.5.dist-info/top_level.txt,sha256=CzGpRFsRRJRqWEb1e3SUlcfGqRzOxevZGaJWrtGF8W0,13
|
32
|
+
academia_mcp-1.10.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|