academia-mcp 1.10.5__tar.gz → 1.10.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/PKG-INFO +1 -1
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/tools/web_search.py +57 -5
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp.egg-info/PKG-INFO +1 -1
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/pyproject.toml +1 -1
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/tests/test_web_search.py +23 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/LICENSE +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/README.md +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/__init__.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/__main__.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/files.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.sty +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/latex_templates/agents4science_2025/agents4science_2025.tex +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/llm.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/pdf.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/py.typed +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/server.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/settings.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/tools/__init__.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/tools/anthology_search.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/tools/arxiv_download.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/tools/arxiv_search.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/tools/bitflip.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/tools/document_qa.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/tools/hf_datasets_search.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/tools/latex.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/tools/py.typed +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/tools/review.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/tools/s2.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/tools/show_image.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/tools/speech_to_text.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/tools/visit_webpage.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp/utils.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp.egg-info/SOURCES.txt +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp.egg-info/dependency_links.txt +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp.egg-info/entry_points.txt +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp.egg-info/requires.txt +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/academia_mcp.egg-info/top_level.txt +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/setup.cfg +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/tests/test_anthology_search.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/tests/test_arxiv_download.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/tests/test_arxiv_search.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/tests/test_bitflip.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/tests/test_document_qa.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/tests/test_extract_json.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/tests/test_hf_dataset_search.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/tests/test_latex.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/tests/test_review.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/tests/test_s2.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/tests/test_show_image.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/tests/test_speech_to_text.py +0 -0
- {academia_mcp-1.10.5 → academia_mcp-1.10.7}/tests/test_visit_webpage.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: academia-mcp
|
3
|
-
Version: 1.10.
|
3
|
+
Version: 1.10.7
|
4
4
|
Summary: MCP server that provides different tools to search for scientific publications
|
5
5
|
Author-email: Ilya Gusev <phoenixilya@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
|
@@ -1,5 +1,5 @@
|
|
1
1
|
import json
|
2
|
-
from typing import Optional
|
2
|
+
from typing import Optional, List, Tuple
|
3
3
|
|
4
4
|
from academia_mcp.utils import post_with_retries, get_with_retries
|
5
5
|
from academia_mcp.settings import settings
|
@@ -12,10 +12,29 @@ BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/web/search"
|
|
12
12
|
EXCLUDE_DOMAINS = ["chatpaper.com"]
|
13
13
|
|
14
14
|
|
15
|
+
def _parse_domains(query: str) -> Tuple[str, List[str]]:
|
16
|
+
site_term = "site:"
|
17
|
+
if site_term not in query:
|
18
|
+
return query, []
|
19
|
+
parts = query.split()
|
20
|
+
query_parts = []
|
21
|
+
include_domains = []
|
22
|
+
for part in parts:
|
23
|
+
if not part.startswith(site_term):
|
24
|
+
query_parts.append(part)
|
25
|
+
continue
|
26
|
+
domain = part[len(site_term) :]
|
27
|
+
if domain:
|
28
|
+
include_domains.append(domain)
|
29
|
+
query = " ".join(query_parts)
|
30
|
+
return query, include_domains
|
31
|
+
|
32
|
+
|
15
33
|
def web_search(
|
16
34
|
query: str,
|
17
35
|
limit: Optional[int] = 20,
|
18
36
|
provider: Optional[str] = "tavily",
|
37
|
+
include_domains: Optional[List[str]] = None,
|
19
38
|
) -> str:
|
20
39
|
"""
|
21
40
|
Search the web using Exa Search, Brave Search or Tavily and return normalized results.
|
@@ -29,9 +48,22 @@ def web_search(
|
|
29
48
|
query: The search query, required.
|
30
49
|
limit: The maximum number of items to return. 20 by default, maximum 25.
|
31
50
|
provider: The provider to use. "exa", "tavily" or "brave". "tavily" by default.
|
51
|
+
include_domains: Optional list of domains to include in the search. None by default.
|
32
52
|
"""
|
33
53
|
providers = ("tavily", "brave", "exa")
|
34
54
|
assert provider in providers, "Error: provider must be either 'exa', 'tavily' or 'brave'"
|
55
|
+
if include_domains:
|
56
|
+
assert len(include_domains) > 0, "Error: include_domains should be a non-empty list"
|
57
|
+
assert all(
|
58
|
+
isinstance(domain, str) for domain in include_domains
|
59
|
+
), "Error: include_domains should be a list of strings"
|
60
|
+
|
61
|
+
query, query_include_domains = _parse_domains(query)
|
62
|
+
if query_include_domains:
|
63
|
+
if include_domains:
|
64
|
+
include_domains.extend(query_include_domains)
|
65
|
+
else:
|
66
|
+
include_domains = query_include_domains
|
35
67
|
|
36
68
|
is_tavily_available = bool(settings.TAVILY_API_KEY)
|
37
69
|
is_exa_available = bool(settings.EXA_API_KEY)
|
@@ -51,16 +83,18 @@ def web_search(
|
|
51
83
|
|
52
84
|
result = {}
|
53
85
|
if provider == "exa":
|
54
|
-
result = json.loads(exa_web_search(query, limit))
|
86
|
+
result = json.loads(exa_web_search(query, limit, include_domains=include_domains))
|
55
87
|
elif provider == "brave":
|
56
88
|
result = json.loads(brave_web_search(query, limit))
|
57
89
|
elif provider == "tavily":
|
58
|
-
result = json.loads(tavily_web_search(query, limit))
|
90
|
+
result = json.loads(tavily_web_search(query, limit, include_domains=include_domains))
|
59
91
|
result["search_provider"] = provider
|
60
92
|
return sanitize_output(json.dumps(result, ensure_ascii=False))
|
61
93
|
|
62
94
|
|
63
|
-
def tavily_web_search(
|
95
|
+
def tavily_web_search(
|
96
|
+
query: str, limit: Optional[int] = 20, include_domains: Optional[List[str]] = None
|
97
|
+
) -> str:
|
64
98
|
"""
|
65
99
|
Search the web using Tavily and return results.
|
66
100
|
|
@@ -71,11 +105,17 @@ def tavily_web_search(query: str, limit: Optional[int] = 20) -> str:
|
|
71
105
|
Args:
|
72
106
|
query: The search query, required.
|
73
107
|
limit: The maximum number of items to return. 20 by default, maximum 25.
|
108
|
+
include_domains: Optional list of domains to include in the search. None by default.
|
74
109
|
"""
|
75
110
|
assert isinstance(query, str), "Error: Your search query must be a string"
|
76
111
|
assert query.strip(), "Error: Your query should not be empty"
|
77
112
|
assert isinstance(limit, int), "Error: limit should be an integer"
|
78
113
|
assert 0 < limit <= 25, "Error: limit should be between 1 and 25"
|
114
|
+
if include_domains:
|
115
|
+
assert len(include_domains) > 0, "Error: include_domains should be a non-empty list"
|
116
|
+
assert all(
|
117
|
+
isinstance(domain, str) for domain in include_domains
|
118
|
+
), "Error: include_domains should be a list of strings"
|
79
119
|
|
80
120
|
key = settings.TAVILY_API_KEY or ""
|
81
121
|
assert key, "Error: TAVILY_API_KEY is not set and no api_key was provided"
|
@@ -85,6 +125,8 @@ def tavily_web_search(query: str, limit: Optional[int] = 20) -> str:
|
|
85
125
|
"auto_parameters": True,
|
86
126
|
"exclude_domains": EXCLUDE_DOMAINS,
|
87
127
|
}
|
128
|
+
if include_domains:
|
129
|
+
payload["include_domains"] = include_domains
|
88
130
|
response = post_with_retries(TAVILY_SEARCH_URL, payload, key)
|
89
131
|
results = response.json()["results"]
|
90
132
|
for result in results:
|
@@ -96,7 +138,9 @@ def tavily_web_search(query: str, limit: Optional[int] = 20) -> str:
|
|
96
138
|
return sanitize_output(json.dumps({"results": results}, ensure_ascii=False))
|
97
139
|
|
98
140
|
|
99
|
-
def exa_web_search(
|
141
|
+
def exa_web_search(
|
142
|
+
query: str, limit: Optional[int] = 20, include_domains: Optional[List[str]] = None
|
143
|
+
) -> str:
|
100
144
|
"""
|
101
145
|
Search the web using Exa and return results.
|
102
146
|
|
@@ -107,11 +151,17 @@ def exa_web_search(query: str, limit: Optional[int] = 20) -> str:
|
|
107
151
|
Args:
|
108
152
|
query: The search query, required.
|
109
153
|
limit: The maximum number of items to return. 20 by default, maximum 25.
|
154
|
+
include_domains: Optional list of domains to include in the search. None by default.
|
110
155
|
"""
|
111
156
|
assert isinstance(query, str), "Error: Your search query must be a string"
|
112
157
|
assert query.strip(), "Error: Your query should not be empty"
|
113
158
|
assert isinstance(limit, int), "Error: limit should be an integer"
|
114
159
|
assert 0 < limit <= 25, "Error: limit should be between 1 and 25"
|
160
|
+
if include_domains:
|
161
|
+
assert len(include_domains) > 0, "Error: include_domains should be a non-empty list"
|
162
|
+
assert all(
|
163
|
+
isinstance(domain, str) for domain in include_domains
|
164
|
+
), "Error: include_domains should be a list of strings"
|
115
165
|
|
116
166
|
key = settings.EXA_API_KEY or ""
|
117
167
|
assert key, "Error: EXA_API_KEY is not set and no api_key was provided"
|
@@ -129,6 +179,8 @@ def exa_web_search(query: str, limit: Optional[int] = 20) -> str:
|
|
129
179
|
"context": False,
|
130
180
|
},
|
131
181
|
}
|
182
|
+
if include_domains:
|
183
|
+
payload["includeDomains"] = include_domains
|
132
184
|
|
133
185
|
response = post_with_retries(EXA_SEARCH_URL, payload, key)
|
134
186
|
results = response.json()["results"]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: academia-mcp
|
3
|
-
Version: 1.10.
|
3
|
+
Version: 1.10.7
|
4
4
|
Summary: MCP server that provides different tools to search for scientific publications
|
5
5
|
Author-email: Ilya Gusev <phoenixilya@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/IlyaGusev/academia_mcp
|
@@ -30,3 +30,26 @@ def test_web_search_bug() -> None:
|
|
30
30
|
)
|
31
31
|
assert results
|
32
32
|
assert len(results.splitlines()) == 1
|
33
|
+
|
34
|
+
|
35
|
+
def test_web_search_include_domains() -> None:
|
36
|
+
results = web_search(
|
37
|
+
"autoregressive models path-star graphs",
|
38
|
+
include_domains=["wikipedia.org"],
|
39
|
+
)
|
40
|
+
assert results
|
41
|
+
results = json.loads(results)
|
42
|
+
assert results
|
43
|
+
assert len(results["results"]) > 0
|
44
|
+
assert all("wikipedia.org" in result["url"] for result in results["results"])
|
45
|
+
|
46
|
+
|
47
|
+
def test_web_search_include_query_domains() -> None:
|
48
|
+
results = web_search(
|
49
|
+
"site:wikipedia.org autoregressive models path-star graphs",
|
50
|
+
)
|
51
|
+
assert results
|
52
|
+
results = json.loads(results)
|
53
|
+
assert results
|
54
|
+
assert len(results["results"]) > 0
|
55
|
+
assert all("wikipedia.org" in result["url"] for result in results["results"])
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|