academia-mcp 1.10.4__tar.gz → 1.10.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/PKG-INFO +1 -1
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/tools/visit_webpage.py +30 -25
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/tools/web_search.py +32 -5
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp.egg-info/PKG-INFO +1 -1
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/pyproject.toml +1 -1
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/tests/test_web_search.py +12 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/LICENSE +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/README.md +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/__init__.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/__main__.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/files.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/llm.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/pdf.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/py.typed +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/server.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/settings.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/tools/__init__.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/tools/anthology_search.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/tools/arxiv_download.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/tools/arxiv_search.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/tools/bitflip.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/tools/document_qa.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/tools/hf_datasets_search.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/tools/latex.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/tools/py.typed +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/tools/review.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/tools/s2.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/tools/show_image.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/tools/speech_to_text.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp/utils.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp.egg-info/SOURCES.txt +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp.egg-info/dependency_links.txt +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp.egg-info/entry_points.txt +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp.egg-info/requires.txt +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/academia_mcp.egg-info/top_level.txt +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/setup.cfg +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/tests/test_anthology_search.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/tests/test_arxiv_download.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/tests/test_arxiv_search.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/tests/test_bitflip.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/tests/test_document_qa.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/tests/test_extract_json.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/tests/test_hf_dataset_search.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/tests/test_latex.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/tests/test_review.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/tests/test_s2.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/tests/test_show_image.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/tests/test_speech_to_text.py +0 -0
- {academia_mcp-1.10.4 → academia_mcp-1.10.6}/tests/test_visit_webpage.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: academia-mcp
|
3
|
-
Version: 1.10.
|
3
|
+
Version: 1.10.6
|
4
4
|
Summary: MCP server that provides different tools to search for scientific publications
|
5
5
|
Author-email: Ilya Gusev <phoenixilya@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import re
|
2
2
|
import json
|
3
|
-
from typing import Optional
|
3
|
+
from typing import Optional, Dict, Any, cast
|
4
4
|
|
5
5
|
from markdownify import markdownify # type: ignore
|
6
6
|
|
@@ -14,7 +14,7 @@ AVAILABLE_PROVIDERS = ("basic", "exa", "tavily")
|
|
14
14
|
ERROR_MESSAGE = "Failed to get content from the page. Try to use another provider."
|
15
15
|
|
16
16
|
|
17
|
-
def _exa_visit_webpage(url: str) -> str:
|
17
|
+
def _exa_visit_webpage(url: str) -> Dict[str, Any]:
|
18
18
|
key = settings.EXA_API_KEY or ""
|
19
19
|
assert key, "Error: EXA_API_KEY is not set and no api_key was provided"
|
20
20
|
payload = {
|
@@ -24,11 +24,11 @@ def _exa_visit_webpage(url: str) -> str:
|
|
24
24
|
response = post_with_retries(EXA_CONTENTS_URL, payload=payload, api_key=key)
|
25
25
|
results = response.json()["results"]
|
26
26
|
if not results:
|
27
|
-
return
|
28
|
-
return
|
27
|
+
return {"error": ERROR_MESSAGE}
|
28
|
+
return cast(Dict[str, Any], results[0])
|
29
29
|
|
30
30
|
|
31
|
-
def _tavily_visit_webpage(url: str) -> str:
|
31
|
+
def _tavily_visit_webpage(url: str) -> Dict[str, Any]:
|
32
32
|
key = settings.TAVILY_API_KEY or ""
|
33
33
|
assert key, "Error: TAVILY_API_KEY is not set and no api_key was provided"
|
34
34
|
payload = {
|
@@ -37,8 +37,25 @@ def _tavily_visit_webpage(url: str) -> str:
|
|
37
37
|
response = post_with_retries(TAVILY_EXTRACT_URL, payload=payload, api_key=key)
|
38
38
|
results = response.json()["results"]
|
39
39
|
if not results:
|
40
|
-
return
|
41
|
-
return
|
40
|
+
return {"error": ERROR_MESSAGE}
|
41
|
+
return {"text": results[0]["raw_content"]}
|
42
|
+
|
43
|
+
|
44
|
+
def _basic_visit_webpage(url: str) -> Dict[str, Any]:
|
45
|
+
try:
|
46
|
+
response = get_with_retries(url)
|
47
|
+
content_type = response.headers.get("content-type", "").lower()
|
48
|
+
if not content_type or (
|
49
|
+
not content_type.startswith("text/") and "html" not in content_type
|
50
|
+
):
|
51
|
+
if settings.EXA_API_KEY:
|
52
|
+
return _exa_visit_webpage(url)
|
53
|
+
return {"error": f"Unsupported content-type: {content_type or 'unknown'}"}
|
54
|
+
markdown_content = markdownify(response.text).strip()
|
55
|
+
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
56
|
+
return {"text": markdown_content}
|
57
|
+
except Exception as e:
|
58
|
+
return {"error": str(e) + "\n" + ERROR_MESSAGE}
|
42
59
|
|
43
60
|
|
44
61
|
def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
|
@@ -59,24 +76,12 @@ def visit_webpage(url: str, provider: Optional[str] = "basic") -> str:
|
|
59
76
|
), f"Invalid provider: {provider}. Available providers: {AVAILABLE_PROVIDERS}"
|
60
77
|
|
61
78
|
if provider == "exa" and settings.EXA_API_KEY:
|
62
|
-
|
79
|
+
result = _exa_visit_webpage(url)
|
63
80
|
elif provider == "tavily" and settings.TAVILY_API_KEY:
|
64
|
-
|
81
|
+
result = _tavily_visit_webpage(url)
|
65
82
|
else:
|
66
|
-
|
83
|
+
result = _basic_visit_webpage(url)
|
67
84
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
except Exception as e:
|
72
|
-
return json.dumps({"id": url, "error": str(e)})
|
73
|
-
content_type = response.headers.get("content-type", "").lower()
|
74
|
-
if not content_type or (not content_type.startswith("text/") and "html" not in content_type):
|
75
|
-
if settings.EXA_API_KEY:
|
76
|
-
return _exa_visit_webpage(url)
|
77
|
-
return json.dumps(
|
78
|
-
{"id": url, "error": f"Unsupported content-type: {content_type or 'unknown'}"}
|
79
|
-
)
|
80
|
-
markdown_content = markdownify(response.text).strip()
|
81
|
-
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
82
|
-
return sanitize_output(json.dumps({"id": url, "text": markdown_content}))
|
85
|
+
result["id"] = url
|
86
|
+
result["provider"] = provider
|
87
|
+
return sanitize_output(json.dumps(result, ensure_ascii=False))
|
@@ -1,5 +1,5 @@
|
|
1
1
|
import json
|
2
|
-
from typing import Optional
|
2
|
+
from typing import Optional, List
|
3
3
|
|
4
4
|
from academia_mcp.utils import post_with_retries, get_with_retries
|
5
5
|
from academia_mcp.settings import settings
|
@@ -16,6 +16,7 @@ def web_search(
|
|
16
16
|
query: str,
|
17
17
|
limit: Optional[int] = 20,
|
18
18
|
provider: Optional[str] = "tavily",
|
19
|
+
include_domains: Optional[List[str]] = None,
|
19
20
|
) -> str:
|
20
21
|
"""
|
21
22
|
Search the web using Exa Search, Brave Search or Tavily and return normalized results.
|
@@ -29,9 +30,15 @@ def web_search(
|
|
29
30
|
query: The search query, required.
|
30
31
|
limit: The maximum number of items to return. 20 by default, maximum 25.
|
31
32
|
provider: The provider to use. "exa", "tavily" or "brave". "tavily" by default.
|
33
|
+
include_domains: Optional list of domains to include in the search. None by default.
|
32
34
|
"""
|
33
35
|
providers = ("tavily", "brave", "exa")
|
34
36
|
assert provider in providers, "Error: provider must be either 'exa', 'tavily' or 'brave'"
|
37
|
+
if include_domains:
|
38
|
+
assert len(include_domains) > 0, "Error: include_domains should be a non-empty list"
|
39
|
+
assert all(
|
40
|
+
isinstance(domain, str) for domain in include_domains
|
41
|
+
), "Error: include_domains should be a list of strings"
|
35
42
|
|
36
43
|
is_tavily_available = bool(settings.TAVILY_API_KEY)
|
37
44
|
is_exa_available = bool(settings.EXA_API_KEY)
|
@@ -51,16 +58,18 @@ def web_search(
|
|
51
58
|
|
52
59
|
result = {}
|
53
60
|
if provider == "exa":
|
54
|
-
result = json.loads(exa_web_search(query, limit))
|
61
|
+
result = json.loads(exa_web_search(query, limit, include_domains=include_domains))
|
55
62
|
elif provider == "brave":
|
56
63
|
result = json.loads(brave_web_search(query, limit))
|
57
64
|
elif provider == "tavily":
|
58
|
-
result = json.loads(tavily_web_search(query, limit))
|
65
|
+
result = json.loads(tavily_web_search(query, limit, include_domains=include_domains))
|
59
66
|
result["search_provider"] = provider
|
60
67
|
return sanitize_output(json.dumps(result, ensure_ascii=False))
|
61
68
|
|
62
69
|
|
63
|
-
def tavily_web_search(
|
70
|
+
def tavily_web_search(
|
71
|
+
query: str, limit: Optional[int] = 20, include_domains: Optional[List[str]] = None
|
72
|
+
) -> str:
|
64
73
|
"""
|
65
74
|
Search the web using Tavily and return results.
|
66
75
|
|
@@ -71,11 +80,17 @@ def tavily_web_search(query: str, limit: Optional[int] = 20) -> str:
|
|
71
80
|
Args:
|
72
81
|
query: The search query, required.
|
73
82
|
limit: The maximum number of items to return. 20 by default, maximum 25.
|
83
|
+
include_domains: Optional list of domains to include in the search. None by default.
|
74
84
|
"""
|
75
85
|
assert isinstance(query, str), "Error: Your search query must be a string"
|
76
86
|
assert query.strip(), "Error: Your query should not be empty"
|
77
87
|
assert isinstance(limit, int), "Error: limit should be an integer"
|
78
88
|
assert 0 < limit <= 25, "Error: limit should be between 1 and 25"
|
89
|
+
if include_domains:
|
90
|
+
assert len(include_domains) > 0, "Error: include_domains should be a non-empty list"
|
91
|
+
assert all(
|
92
|
+
isinstance(domain, str) for domain in include_domains
|
93
|
+
), "Error: include_domains should be a list of strings"
|
79
94
|
|
80
95
|
key = settings.TAVILY_API_KEY or ""
|
81
96
|
assert key, "Error: TAVILY_API_KEY is not set and no api_key was provided"
|
@@ -85,6 +100,8 @@ def tavily_web_search(query: str, limit: Optional[int] = 20) -> str:
|
|
85
100
|
"auto_parameters": True,
|
86
101
|
"exclude_domains": EXCLUDE_DOMAINS,
|
87
102
|
}
|
103
|
+
if include_domains:
|
104
|
+
payload["include_domains"] = include_domains
|
88
105
|
response = post_with_retries(TAVILY_SEARCH_URL, payload, key)
|
89
106
|
results = response.json()["results"]
|
90
107
|
for result in results:
|
@@ -96,7 +113,9 @@ def tavily_web_search(query: str, limit: Optional[int] = 20) -> str:
|
|
96
113
|
return sanitize_output(json.dumps({"results": results}, ensure_ascii=False))
|
97
114
|
|
98
115
|
|
99
|
-
def exa_web_search(
|
116
|
+
def exa_web_search(
|
117
|
+
query: str, limit: Optional[int] = 20, include_domains: Optional[List[str]] = None
|
118
|
+
) -> str:
|
100
119
|
"""
|
101
120
|
Search the web using Exa and return results.
|
102
121
|
|
@@ -107,11 +126,17 @@ def exa_web_search(query: str, limit: Optional[int] = 20) -> str:
|
|
107
126
|
Args:
|
108
127
|
query: The search query, required.
|
109
128
|
limit: The maximum number of items to return. 20 by default, maximum 25.
|
129
|
+
include_domains: Optional list of domains to include in the search. None by default.
|
110
130
|
"""
|
111
131
|
assert isinstance(query, str), "Error: Your search query must be a string"
|
112
132
|
assert query.strip(), "Error: Your query should not be empty"
|
113
133
|
assert isinstance(limit, int), "Error: limit should be an integer"
|
114
134
|
assert 0 < limit <= 25, "Error: limit should be between 1 and 25"
|
135
|
+
if include_domains:
|
136
|
+
assert len(include_domains) > 0, "Error: include_domains should be a non-empty list"
|
137
|
+
assert all(
|
138
|
+
isinstance(domain, str) for domain in include_domains
|
139
|
+
), "Error: include_domains should be a list of strings"
|
115
140
|
|
116
141
|
key = settings.EXA_API_KEY or ""
|
117
142
|
assert key, "Error: EXA_API_KEY is not set and no api_key was provided"
|
@@ -129,6 +154,8 @@ def exa_web_search(query: str, limit: Optional[int] = 20) -> str:
|
|
129
154
|
"context": False,
|
130
155
|
},
|
131
156
|
}
|
157
|
+
if include_domains:
|
158
|
+
payload["includeDomains"] = include_domains
|
132
159
|
|
133
160
|
response = post_with_retries(EXA_SEARCH_URL, payload, key)
|
134
161
|
results = response.json()["results"]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: academia-mcp
|
3
|
-
Version: 1.10.
|
3
|
+
Version: 1.10.6
|
4
4
|
Summary: MCP server that provides different tools to search for scientific publications
|
5
5
|
Author-email: Ilya Gusev <phoenixilya@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
|
@@ -30,3 +30,15 @@ def test_web_search_bug() -> None:
|
|
30
30
|
)
|
31
31
|
assert results
|
32
32
|
assert len(results.splitlines()) == 1
|
33
|
+
|
34
|
+
|
35
|
+
def test_web_search_include_domains() -> None:
|
36
|
+
results = web_search(
|
37
|
+
"autoregressive models path-star graphs",
|
38
|
+
include_domains=["wikipedia.org"],
|
39
|
+
)
|
40
|
+
assert results
|
41
|
+
results = json.loads(results)
|
42
|
+
assert results
|
43
|
+
assert len(results["results"]) > 0
|
44
|
+
assert all("wikipedia.org" in result["url"] for result in results["results"])
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|