agentskills-http 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agentskills-http
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: HTTP-based skill providers for the Agent Skills format (https://agentskills.io)
5
5
  License: MIT
6
6
  Author: Pratik Panda
@@ -13,9 +13,9 @@ Classifier: Programming Language :: Python :: 3.12
13
13
  Classifier: Programming Language :: Python :: 3.13
14
14
  Classifier: Programming Language :: Python :: 3.14
15
15
  Classifier: Topic :: Software Development :: Libraries
16
- Requires-Dist: agentskills-core (>=0.1.0)
17
- Requires-Dist: httpx (>=0.27)
18
- Requires-Dist: pyyaml (>=6.0)
16
+ Requires-Dist: agentskills-core (>=0.1.0,<1.0)
17
+ Requires-Dist: httpx (>=0.27,<1.0)
18
+ Requires-Dist: pyyaml (>=6.0,<7.0)
19
19
  Project-URL: Homepage, https://agentskills.io
20
20
  Project-URL: Repository, https://github.com/pratikxpanda/agentskills-sdk
21
21
  Description-Content-Type: text/markdown
@@ -28,7 +28,7 @@ Description-Content-Type: text/markdown
28
28
 
29
29
  > HTTP static-file skill provider for the [Agent Skills SDK](../../README.md).
30
30
 
31
- Serves [Agent Skills](https://agentskills.io) from any static HTTP file host S3, Azure Blob, CDN, GitHub Pages, Nginx, etc. Expects the same directory-tree layout as the filesystem provider, served over HTTP.
31
+ Serves [Agent Skills](https://agentskills.io) from any static HTTP file host - S3, Azure Blob, CDN, GitHub Pages, Nginx, etc. Expects the same directory-tree layout as the filesystem provider, served over HTTP.
32
32
 
33
33
  ## Installation
34
34
 
@@ -93,7 +93,18 @@ provider = HTTPStaticFileSkillProvider("https://cdn.example.com/skills", client=
93
93
 
94
94
  ## API
95
95
 
96
- ### `HTTPStaticFileSkillProvider(base_url, *, client=None, headers=None)`
96
+ ### `HTTPStaticFileSkillProvider(base_url, *, client=None, headers=None, params=None, require_tls=False, max_response_bytes=10_485_760)`
97
+
98
+ | Parameter | Type | Default | Description |
99
+ | --- | --- | --- | --- |
100
+ | `base_url` | `str` | - | Root URL where the skill tree is hosted |
101
+ | `client` | `AsyncClient \| None` | `None` | Pre-configured httpx client (caller manages lifecycle) |
102
+ | `headers` | `dict \| None` | `None` | Extra headers sent with every request |
103
+ | `params` | `dict \| None` | `None` | Query parameters appended to every request |
104
+ | `require_tls` | `bool` | `False` | Reject `http://` URLs with `ValueError` |
105
+ | `max_response_bytes` | `int` | `10_485_760` | Maximum allowed response size in bytes |
106
+
107
+ > **Note:** `client` and `headers`/`params` are mutually exclusive. Configure headers and params on the client directly when providing your own.
97
108
 
98
109
  | Method | Returns | Description |
99
110
  | --- | --- | --- |
@@ -117,6 +128,26 @@ Supports `async with` for automatic cleanup.
117
128
 
118
129
  All exceptions inherit from `AgentSkillsError`.
119
130
 
131
+ ## Security
132
+
133
+ - **Input validation** - Skill IDs and resource names are validated against a safe-character pattern (`^[a-zA-Z0-9][a-zA-Z0-9._-]*$`) to prevent path-traversal and injection attacks.
134
+ - **TLS warnings** - A `UserWarning` is emitted when `base_url` uses unencrypted HTTP. Set `require_tls=True` to reject HTTP URLs entirely.
135
+ - **Redirect protection** - The internally-created HTTP client does not follow redirects by default, preventing open-redirect SSRF.
136
+ - **Timeouts** - Default 30-second timeout on all HTTP requests.
137
+ - **Response size limits** - Responses exceeding 10 MB (default) are rejected before processing. Configure via `max_response_bytes`.
138
+ - **Error-message sanitization** - Error messages omit URLs and include only status codes and generic descriptions, preventing internal URL leakage.
139
+
140
+ For the full security policy, see [SECURITY.md](../../../SECURITY.md).
141
+
142
+ ## Deployment Considerations
143
+
144
+ - **Rate limiting** - The SDK does not enforce rate limits on MCP tool
145
+ calls or HTTP requests. Deploy behind a reverse proxy or API gateway
146
+ that provides rate limiting in production environments.
147
+ - **Credential management** - Do not store secrets (API keys, SAS
148
+ tokens, Authorization headers) in config files committed to version
149
+ control. Use environment variables or a secret manager instead.
150
+
120
151
  ## License
121
152
 
122
153
  MIT
@@ -6,7 +6,7 @@
6
6
 
7
7
  > HTTP static-file skill provider for the [Agent Skills SDK](../../README.md).
8
8
 
9
- Serves [Agent Skills](https://agentskills.io) from any static HTTP file host S3, Azure Blob, CDN, GitHub Pages, Nginx, etc. Expects the same directory-tree layout as the filesystem provider, served over HTTP.
9
+ Serves [Agent Skills](https://agentskills.io) from any static HTTP file host - S3, Azure Blob, CDN, GitHub Pages, Nginx, etc. Expects the same directory-tree layout as the filesystem provider, served over HTTP.
10
10
 
11
11
  ## Installation
12
12
 
@@ -71,7 +71,18 @@ provider = HTTPStaticFileSkillProvider("https://cdn.example.com/skills", client=
71
71
 
72
72
  ## API
73
73
 
74
- ### `HTTPStaticFileSkillProvider(base_url, *, client=None, headers=None)`
74
+ ### `HTTPStaticFileSkillProvider(base_url, *, client=None, headers=None, params=None, require_tls=False, max_response_bytes=10_485_760)`
75
+
76
+ | Parameter | Type | Default | Description |
77
+ | --- | --- | --- | --- |
78
+ | `base_url` | `str` | - | Root URL where the skill tree is hosted |
79
+ | `client` | `AsyncClient \| None` | `None` | Pre-configured httpx client (caller manages lifecycle) |
80
+ | `headers` | `dict \| None` | `None` | Extra headers sent with every request |
81
+ | `params` | `dict \| None` | `None` | Query parameters appended to every request |
82
+ | `require_tls` | `bool` | `False` | Reject `http://` URLs with `ValueError` |
83
+ | `max_response_bytes` | `int` | `10_485_760` | Maximum allowed response size in bytes |
84
+
85
+ > **Note:** `client` and `headers`/`params` are mutually exclusive. Configure headers and params on the client directly when providing your own.
75
86
 
76
87
  | Method | Returns | Description |
77
88
  | --- | --- | --- |
@@ -95,6 +106,26 @@ Supports `async with` for automatic cleanup.
95
106
 
96
107
  All exceptions inherit from `AgentSkillsError`.
97
108
 
109
+ ## Security
110
+
111
+ - **Input validation** - Skill IDs and resource names are validated against a safe-character pattern (`^[a-zA-Z0-9][a-zA-Z0-9._-]*$`) to prevent path-traversal and injection attacks.
112
+ - **TLS warnings** - A `UserWarning` is emitted when `base_url` uses unencrypted HTTP. Set `require_tls=True` to reject HTTP URLs entirely.
113
+ - **Redirect protection** - The internally-created HTTP client does not follow redirects by default, preventing open-redirect SSRF.
114
+ - **Timeouts** - Default 30-second timeout on all HTTP requests.
115
+ - **Response size limits** - Responses exceeding 10 MB (default) are rejected before processing. Configure via `max_response_bytes`.
116
+ - **Error-message sanitization** - Error messages omit URLs and include only status codes and generic descriptions, preventing internal URL leakage.
117
+
118
+ For the full security policy, see [SECURITY.md](../../../SECURITY.md).
119
+
120
+ ## Deployment Considerations
121
+
122
+ - **Rate limiting** - The SDK does not enforce rate limits on MCP tool
123
+ calls or HTTP requests. Deploy behind a reverse proxy or API gateway
124
+ that provides rate limiting in production environments.
125
+ - **Credential management** - Do not store secrets (API keys, SAS
126
+ tokens, Authorization headers) in config files committed to version
127
+ control. Use environment variables or a secret manager instead.
128
+
98
129
  ## License
99
130
 
100
131
  MIT
@@ -28,8 +28,10 @@ for non-blocking HTTP requests.
28
28
 
29
29
  from __future__ import annotations
30
30
 
31
+ import re
32
+ import warnings
31
33
  from typing import Any
32
- from urllib.parse import quote
34
+ from urllib.parse import quote, urlparse
33
35
 
34
36
  import httpx
35
37
 
@@ -41,6 +43,18 @@ from agentskills_core import (
41
43
  split_frontmatter,
42
44
  )
43
45
 
46
+ # Input validation: identifiers (skill_id, resource name) must be safe
47
+ # URL path segments. Allows alphanumeric, hyphens, dots, underscores.
48
+ # Must start with an alphanumeric character. No path separators or
49
+ # traversal sequences (e.g. ``../``).
50
+ _SAFE_IDENTIFIER_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$")
51
+
52
+ #: Default maximum HTTP response size in bytes (10 MB).
53
+ DEFAULT_MAX_RESPONSE_BYTES: int = 10 * 1024 * 1024
54
+
55
+ #: Default HTTP request timeout in seconds.
56
+ DEFAULT_TIMEOUT_SECONDS: float = 30.0
57
+
44
58
 
45
59
  class HTTPStaticFileSkillProvider(SkillProvider):
46
60
  """Skill provider backed by a static HTTP file host.
@@ -60,10 +74,20 @@ class HTTPStaticFileSkillProvider(SkillProvider):
60
74
  slash is stripped automatically.
61
75
  client: Optional pre-configured :class:`httpx.AsyncClient`.
62
76
  When provided, the caller is responsible for closing it.
77
+ The provider will still enforce *max_response_bytes* but
78
+ will **not** override the client's timeout or redirect
79
+ settings.
63
80
  headers: Optional extra headers sent with every request (e.g.
64
81
  ``Authorization``).
65
82
  params: Optional query parameters appended to every request
66
83
  (e.g. SAS tokens for Azure Blob Storage).
84
+ require_tls: If ``True``, reject ``http://`` base URLs with
85
+ a :class:`ValueError`. Defaults to ``False``, which
86
+ allows HTTP but emits a :class:`UserWarning`.
87
+ max_response_bytes: Maximum allowed response size in bytes.
88
+ Responses exceeding this limit raise
89
+ :class:`~agentskills_core.AgentSkillsError`. Defaults to
90
+ 10 MB.
67
91
 
68
92
  Example::
69
93
 
@@ -83,15 +107,40 @@ class HTTPStaticFileSkillProvider(SkillProvider):
83
107
  client: httpx.AsyncClient | None = None,
84
108
  headers: dict[str, str] | None = None,
85
109
  params: dict[str, str] | None = None,
110
+ require_tls: bool = False,
111
+ max_response_bytes: int = DEFAULT_MAX_RESPONSE_BYTES,
86
112
  ) -> None:
87
113
  if client is not None and (headers is not None or params is not None):
88
114
  raise ValueError(
89
115
  "Cannot specify both 'client' and 'headers'/'params'. "
90
116
  "Configure headers and params on the client directly."
91
117
  )
118
+
119
+ # TLS enforcement
120
+ parsed = urlparse(base_url)
121
+ if parsed.scheme == "http":
122
+ if require_tls:
123
+ raise ValueError(
124
+ "require_tls is enabled but base_url uses plain HTTP. "
125
+ "Use an HTTPS URL or set require_tls=False."
126
+ )
127
+ warnings.warn(
128
+ "base_url uses unencrypted HTTP. "
129
+ "Skill content fetched over HTTP is vulnerable to "
130
+ "man-in-the-middle attacks. Use HTTPS in production.",
131
+ UserWarning,
132
+ stacklevel=2,
133
+ )
134
+
92
135
  self._base_url = base_url.rstrip("/")
136
+ self._max_response_bytes = max_response_bytes
93
137
  self._owns_client = client is None
94
- self._client = client or httpx.AsyncClient(headers=headers, params=params)
138
+ self._client = client or httpx.AsyncClient(
139
+ headers=headers,
140
+ params=params,
141
+ timeout=httpx.Timeout(DEFAULT_TIMEOUT_SECONDS),
142
+ follow_redirects=False,
143
+ )
95
144
 
96
145
  async def aclose(self) -> None:
97
146
  """Close the underlying HTTP client if it is owned by this provider."""
@@ -104,6 +153,24 @@ class HTTPStaticFileSkillProvider(SkillProvider):
104
153
  async def __aexit__(self, *exc: object) -> None:
105
154
  await self.aclose()
106
155
 
156
+ # ------------------------------------------------------------------
157
+ # Input validation
158
+ # ------------------------------------------------------------------
159
+
160
+ @staticmethod
161
+ def _validate_identifier(value: str, label: str) -> None:
162
+ """Raise :class:`ValueError` if *value* is not a safe URL path segment.
163
+
164
+ Prevents path-traversal attacks (e.g. ``../``) and other
165
+ injection via ``skill_id`` or resource ``name``.
166
+ """
167
+ if not _SAFE_IDENTIFIER_RE.match(value):
168
+ raise ValueError(
169
+ f"Invalid {label}: {value!r} — must start with an "
170
+ f"alphanumeric character and contain only alphanumeric "
171
+ f"characters, hyphens, dots, and underscores"
172
+ )
173
+
107
174
  # ------------------------------------------------------------------
108
175
  # Metadata & body
109
176
  # ------------------------------------------------------------------
@@ -203,50 +270,90 @@ class HTTPStaticFileSkillProvider(SkillProvider):
203
270
  # Internal helpers
204
271
  # ------------------------------------------------------------------
205
272
 
273
+ async def _stream_bytes(self, url: str, not_found_error: type[Exception]) -> bytes:
274
+ """Stream a GET request and return the response bytes.
275
+
276
+ Uses ``httpx.AsyncClient.stream`` so that overly large
277
+ responses are detected **during** download rather than after
278
+ the entire body has been buffered into memory.
279
+
280
+ Args:
281
+ url: The URL to fetch.
282
+ not_found_error: Exception type to raise on HTTP 404
283
+ (e.g. :class:`SkillNotFoundError` or
284
+ :class:`ResourceNotFoundError`).
285
+
286
+ Raises:
287
+ not_found_error: On 404.
288
+ AgentSkillsError: On other HTTP/connection errors or if
289
+ the response exceeds *max_response_bytes*.
290
+ """
291
+ try:
292
+ async with self._client.stream("GET", url) as resp:
293
+ if resp.status_code == 404:
294
+ raise not_found_error("Skill content not found")
295
+ try:
296
+ resp.raise_for_status()
297
+ except httpx.HTTPStatusError as exc:
298
+ raise AgentSkillsError(f"HTTP {resp.status_code} error") from exc
299
+
300
+ # Check Content-Length header for an early reject when
301
+ # the server advertises the size up-front.
302
+ cl = resp.headers.get("content-length")
303
+ if cl is not None and int(cl) > self._max_response_bytes:
304
+ raise AgentSkillsError(
305
+ f"Response exceeds maximum size ({self._max_response_bytes} bytes)"
306
+ )
307
+
308
+ # Stream chunks and enforce the byte limit
309
+ # incrementally to avoid buffering the full body.
310
+ chunks: list[bytes] = []
311
+ received = 0
312
+ async for chunk in resp.aiter_bytes():
313
+ received += len(chunk)
314
+ if received > self._max_response_bytes:
315
+ raise AgentSkillsError(
316
+ f"Response exceeds maximum size ({self._max_response_bytes} bytes)"
317
+ )
318
+ chunks.append(chunk)
319
+
320
+ except (SkillNotFoundError, ResourceNotFoundError, AgentSkillsError):
321
+ raise
322
+ except httpx.HTTPError as exc:
323
+ raise AgentSkillsError("HTTP request failed") from exc
324
+
325
+ return b"".join(chunks)
326
+
206
327
  async def _get_text(self, url: str) -> str:
207
328
  """GET a URL and return the response text.
208
329
 
209
330
  Raises:
210
331
  SkillNotFoundError: On 404.
211
- AgentSkillsError: On other HTTP or connection errors.
332
+ AgentSkillsError: On other HTTP or connection errors,
333
+ or if the response exceeds *max_response_bytes*.
212
334
  """
213
- try:
214
- resp = await self._client.get(url)
215
- except httpx.HTTPError as exc:
216
- raise AgentSkillsError(f"HTTP request failed: {url}") from exc
217
- if resp.status_code == 404:
218
- raise SkillNotFoundError(f"Not found: {url}")
219
- try:
220
- resp.raise_for_status()
221
- except httpx.HTTPStatusError as exc:
222
- raise AgentSkillsError(f"HTTP {resp.status_code} error for {url}") from exc
223
- return resp.text
335
+ data = await self._stream_bytes(url, SkillNotFoundError)
336
+ return data.decode("utf-8")
224
337
 
225
338
  async def _get_bytes(self, url: str) -> bytes:
226
339
  """GET a URL and return the response bytes.
227
340
 
228
341
  Raises:
229
342
  ResourceNotFoundError: On 404.
230
- AgentSkillsError: On other HTTP or connection errors.
343
+ AgentSkillsError: On other HTTP or connection errors,
344
+ or if the response exceeds *max_response_bytes*.
231
345
  """
232
- try:
233
- resp = await self._client.get(url)
234
- except httpx.HTTPError as exc:
235
- raise AgentSkillsError(f"HTTP request failed: {url}") from exc
236
- if resp.status_code == 404:
237
- raise ResourceNotFoundError(f"Not found: {url}")
238
- try:
239
- resp.raise_for_status()
240
- except httpx.HTTPStatusError as exc:
241
- raise AgentSkillsError(f"HTTP {resp.status_code} error for {url}") from exc
242
- return resp.content
346
+ return await self._stream_bytes(url, ResourceNotFoundError)
243
347
 
244
348
  async def _get_skill_md(self, skill_id: str) -> str:
245
349
  """Fetch the full text of a skill's ``SKILL.md``."""
246
- url = f"{self._base_url}/{quote(skill_id)}/SKILL.md"
350
+ self._validate_identifier(skill_id, "skill_id")
351
+ url = f"{self._base_url}/{quote(skill_id, safe='')}/SKILL.md"
247
352
  return await self._get_text(url)
248
353
 
249
354
  async def _get_resource(self, skill_id: str, subdir: str, name: str) -> bytes:
250
355
  """Fetch a single resource file from a skill subdirectory."""
251
- url = f"{self._base_url}/{quote(skill_id)}/{subdir}/{quote(name)}"
356
+ self._validate_identifier(skill_id, "skill_id")
357
+ self._validate_identifier(name, "resource name")
358
+ url = f"{self._base_url}/{quote(skill_id, safe='')}/{subdir}/{quote(name, safe='')}"
252
359
  return await self._get_bytes(url)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "agentskills-http"
3
- version = "0.2.0"
3
+ version = "0.2.2"
4
4
  description = "HTTP-based skill providers for the Agent Skills format (https://agentskills.io)"
5
5
  license = "MIT"
6
6
  authors = ["Pratik Panda"]
@@ -18,9 +18,9 @@ classifiers = [
18
18
 
19
19
  [tool.poetry.dependencies]
20
20
  python = ">=3.12,<4.0"
21
- agentskills-core = ">=0.1.0"
22
- httpx = ">=0.27"
23
- pyyaml = ">=6.0"
21
+ agentskills-core = ">=0.1.0,<1.0"
22
+ httpx = ">=0.27,<1.0"
23
+ pyyaml = ">=6.0,<7.0"
24
24
 
25
25
  [build-system]
26
26
  requires = ["poetry-core"]