mcp-server-fetch 0.6.1__py3-none-any.whl → 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_server_fetch/server.py +10 -8
- {mcp_server_fetch-0.6.1.dist-info → mcp_server_fetch-0.6.2.dist-info}/METADATA +1 -1
- mcp_server_fetch-0.6.2.dist-info/RECORD +8 -0
- mcp_server_fetch-0.6.1.dist-info/RECORD +0 -8
- {mcp_server_fetch-0.6.1.dist-info → mcp_server_fetch-0.6.2.dist-info}/WHEEL +0 -0
- {mcp_server_fetch-0.6.1.dist-info → mcp_server_fetch-0.6.2.dist-info}/entry_points.txt +0 -0
- {mcp_server_fetch-0.6.1.dist-info → mcp_server_fetch-0.6.2.dist-info}/licenses/LICENSE +0 -0
mcp_server_fetch/server.py
CHANGED
@@ -44,7 +44,7 @@ def extract_content_from_html(html: str) -> str:
|
|
44
44
|
return content
|
45
45
|
|
46
46
|
|
47
|
-
def get_robots_txt_url(url:
|
47
|
+
def get_robots_txt_url(url: str) -> str:
|
48
48
|
"""Get the robots.txt URL for a given website URL.
|
49
49
|
|
50
50
|
Args:
|
@@ -54,7 +54,7 @@ def get_robots_txt_url(url: AnyUrl | str) -> str:
|
|
54
54
|
URL of the robots.txt file
|
55
55
|
"""
|
56
56
|
# Parse the URL into components
|
57
|
-
parsed = urlparse(
|
57
|
+
parsed = urlparse(url)
|
58
58
|
|
59
59
|
# Reconstruct the base URL with just scheme, netloc, and /robots.txt path
|
60
60
|
robots_url = urlunparse((parsed.scheme, parsed.netloc, "/robots.txt", "", "", ""))
|
@@ -62,7 +62,7 @@ def get_robots_txt_url(url: AnyUrl | str) -> str:
|
|
62
62
|
return robots_url
|
63
63
|
|
64
64
|
|
65
|
-
async def check_may_autonomously_fetch_url(url:
|
65
|
+
async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
|
66
66
|
"""
|
67
67
|
Check if the URL can be fetched by the user agent according to the robots.txt file.
|
68
68
|
Raises a McpError if not.
|
@@ -74,7 +74,9 @@ async def check_may_autonomously_fetch_url(url: AnyUrl | str, user_agent: str) -
|
|
74
74
|
async with AsyncClient() as client:
|
75
75
|
try:
|
76
76
|
response = await client.get(
|
77
|
-
robot_txt_url,
|
77
|
+
robot_txt_url,
|
78
|
+
follow_redirects=True,
|
79
|
+
headers={"User-Agent": user_agent},
|
78
80
|
)
|
79
81
|
except HTTPError:
|
80
82
|
raise McpError(
|
@@ -93,7 +95,7 @@ async def check_may_autonomously_fetch_url(url: AnyUrl | str, user_agent: str) -
|
|
93
95
|
line for line in robot_txt.splitlines() if not line.strip().startswith("#")
|
94
96
|
)
|
95
97
|
robot_parser = Protego.parse(processed_robot_txt)
|
96
|
-
if not robot_parser.can_fetch(url, user_agent):
|
98
|
+
if not robot_parser.can_fetch(str(url), user_agent):
|
97
99
|
raise McpError(
|
98
100
|
INTERNAL_ERROR,
|
99
101
|
f"The sites robots.txt ({robot_txt_url}), specifies that autonomous fetching of this page is not allowed, "
|
@@ -106,7 +108,7 @@ async def check_may_autonomously_fetch_url(url: AnyUrl | str, user_agent: str) -
|
|
106
108
|
|
107
109
|
|
108
110
|
async def fetch_url(
|
109
|
-
url:
|
111
|
+
url: str, user_agent: str, force_raw: bool = False
|
110
112
|
) -> Tuple[str, str]:
|
111
113
|
"""
|
112
114
|
Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
|
@@ -116,7 +118,7 @@ async def fetch_url(
|
|
116
118
|
async with AsyncClient() as client:
|
117
119
|
try:
|
118
120
|
response = await client.get(
|
119
|
-
|
121
|
+
url,
|
120
122
|
follow_redirects=True,
|
121
123
|
headers={"User-Agent": user_agent},
|
122
124
|
timeout=30,
|
@@ -221,7 +223,7 @@ Although originally you did not have internet access, and were advised to refuse
|
|
221
223
|
except ValueError as e:
|
222
224
|
raise McpError(INVALID_PARAMS, str(e))
|
223
225
|
|
224
|
-
url = args.url
|
226
|
+
url = str(args.url)
|
225
227
|
if not url:
|
226
228
|
raise McpError(INVALID_PARAMS, "URL is required")
|
227
229
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: mcp-server-fetch
|
3
|
-
Version: 0.6.
|
3
|
+
Version: 0.6.2
|
4
4
|
Summary: A Model Context Protocol server providing tools to fetch and convert web content for usage by LLMs
|
5
5
|
Author: Anthropic, PBC.
|
6
6
|
Maintainer-email: Jack Adamson <jadamson@anthropic.com>
|
@@ -0,0 +1,8 @@
|
|
1
|
+
mcp_server_fetch/__init__.py,sha256=6mqCwMSe8NtUcwXsmZTGjln83bc1vE31CL5yInKZd0s,614
|
2
|
+
mcp_server_fetch/__main__.py,sha256=P5j_W1F3QvOrY7x2YIQ0KlY1Y9eO_vS6rrOo1mL1fvk,57
|
3
|
+
mcp_server_fetch/server.py,sha256=3SZB1yVHwLq_mrjwtPeY8PYQapkeYqo3QTvWSwZmAOI,9491
|
4
|
+
mcp_server_fetch-0.6.2.dist-info/METADATA,sha256=eYx-lmVttmODvBUxGN6-Yc_mRoflqKUUnChgUONd4l4,4645
|
5
|
+
mcp_server_fetch-0.6.2.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
6
|
+
mcp_server_fetch-0.6.2.dist-info/entry_points.txt,sha256=tYA4AQfADMVk6YWCfuPe7TjGGmPmk7gLosHt_ewL48c,59
|
7
|
+
mcp_server_fetch-0.6.2.dist-info/licenses/LICENSE,sha256=jMfG4zsk7U7o_MzDPszxAlSdBPpMuXN87Ml3Da0QgP8,1059
|
8
|
+
mcp_server_fetch-0.6.2.dist-info/RECORD,,
|
@@ -1,8 +0,0 @@
|
|
1
|
-
mcp_server_fetch/__init__.py,sha256=6mqCwMSe8NtUcwXsmZTGjln83bc1vE31CL5yInKZd0s,614
|
2
|
-
mcp_server_fetch/__main__.py,sha256=P5j_W1F3QvOrY7x2YIQ0KlY1Y9eO_vS6rrOo1mL1fvk,57
|
3
|
-
mcp_server_fetch/server.py,sha256=G_oJaTAt4RWfVcsCT9kKiZ8ObCaZF95SHeSWpz_d-ms,9462
|
4
|
-
mcp_server_fetch-0.6.1.dist-info/METADATA,sha256=LvajvVT7StDVQ8jkr-EirJLMlh4N4wnLH9eTU25JP6c,4645
|
5
|
-
mcp_server_fetch-0.6.1.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
6
|
-
mcp_server_fetch-0.6.1.dist-info/entry_points.txt,sha256=tYA4AQfADMVk6YWCfuPe7TjGGmPmk7gLosHt_ewL48c,59
|
7
|
-
mcp_server_fetch-0.6.1.dist-info/licenses/LICENSE,sha256=jMfG4zsk7U7o_MzDPszxAlSdBPpMuXN87Ml3Da0QgP8,1059
|
8
|
-
mcp_server_fetch-0.6.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|