dais-skills 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.3
2
+ Name: dais-skills
3
+ Version: 0.1.0
4
+ Summary: A Python package for scanning and downloading agent skills from specified GitHub repositories.
5
+ Author: BHznJNs
6
+ Author-email: BHznJNs <bhznjns@outlook.com>
7
+ Requires-Dist: httpx>=0.28.1
8
+ Requires-Dist: python-frontmatter>=1.1.0
9
+ Requires-Dist: binaryornot>=0.6.0
10
+ Requires-Python: >=3.14
11
+ Description-Content-Type: text/markdown
12
+
File without changes
@@ -0,0 +1,22 @@
1
+ [project]
2
+ name = "dais-skills"
3
+ version = "0.1.0"
4
+ description = "A Python package for scanning and downloading agent skills from specified GitHub repositories."
5
+ readme = "README.md"
6
+ authors = [{ name = "BHznJNs", email = "bhznjns@outlook.com" }]
7
+ requires-python = ">=3.14"
8
+ dependencies = [
9
+ "httpx>=0.28.1",
10
+ "python-frontmatter>=1.1.0",
11
+ "binaryornot>=0.6.0",
12
+ ]
13
+
14
+ [dependency-groups]
15
+ dev = [
16
+ "pytest>=8.0.0",
17
+ "pytest-asyncio>=0.25.2",
18
+ ]
19
+
20
+ [build-system]
21
+ requires = ["uv_build>=0.9.28,<0.12.0"]
22
+ build-backend = "uv_build"
@@ -0,0 +1,11 @@
1
+ from .extractor import Skill, SkillResource
2
+ from .downloader import download_skill_zip
3
+ from .scanner import scan_repo, ScannedSkill
4
+
5
+ __all__ = [
6
+ "Skill",
7
+ "SkillResource",
8
+ "download_skill_zip",
9
+ "scan_repo",
10
+ "ScannedSkill",
11
+ ]
@@ -0,0 +1,11 @@
1
+ import httpx
2
+ from .github import DownloaderError, GitHubDownloader
3
+
4
+
5
+ async def download_skill_zip(repo_url: str, skill_path: str) -> bytes:
6
+ async with httpx.AsyncClient(timeout=10.0) as client:
7
+ downloader = GitHubDownloader(client)
8
+ return await downloader.download_skill_zip(repo_url, skill_path)
9
+
10
+
11
+ __all__ = ["download_skill_zip", "DownloaderError"]
@@ -0,0 +1,82 @@
1
+ import io
2
+ import zipfile
3
+ from pathlib import PurePosixPath
4
+
5
+ import httpx
6
+
7
+ from dais_skills.public.github import (
8
+ GitHubBlob,
9
+ GitHubClient,
10
+ GitHubError,
11
+ GitHubRepo,
12
+ parse_github_repo_url,
13
+ )
14
+
15
+
16
+ class DownloaderError(Exception):
17
+ pass
18
+
19
+
20
+ def normalize_skill_path(skill_path: str) -> str:
21
+ normalized = skill_path.strip().strip("/")
22
+ if not normalized:
23
+ raise DownloaderError("Skill path must not be empty")
24
+ if PurePosixPath(normalized).name.lower() == "skill.md":
25
+ normalized = str(PurePosixPath(normalized).parent)
26
+ if normalized in {"", "."}:
27
+ raise DownloaderError("Skill path must point to a skill directory")
28
+ return normalized
29
+
30
+
31
+ class GitHubDownloader:
32
+ def __init__(self, client: httpx.AsyncClient):
33
+ self._github = GitHubClient(client)
34
+
35
+ async def download_skill_zip(self, repo_url: str, skill_path: str) -> bytes:
36
+ repo = parse_github_repo_url(repo_url)
37
+ skill_dir = normalize_skill_path(skill_path)
38
+ try:
39
+ tree_ref, blobs = await self._github.fetch_tree(repo)
40
+ except GitHubError as exc:
41
+ raise DownloaderError(str(exc)) from exc
42
+
43
+ skill_blobs = filter_skill_blobs(blobs, skill_dir)
44
+ if not skill_blobs:
45
+ raise DownloaderError(f"Skill path not found: {skill_dir}")
46
+
47
+ archive_root = PurePosixPath(skill_dir).name or "skill"
48
+ zip_buffer = io.BytesIO()
49
+ with zipfile.ZipFile(zip_buffer, "w", compression=zipfile.ZIP_DEFLATED) as zf:
50
+ for blob in skill_blobs:
51
+ try:
52
+ content = await self._github.fetch_blob(repo, tree_ref, blob.path)
53
+ except GitHubError as exc:
54
+ raise DownloaderError(str(exc)) from exc
55
+ relative = PurePosixPath(blob.path).relative_to(PurePosixPath(skill_dir))
56
+ archive_path = str(PurePosixPath(archive_root) / relative)
57
+ zf.writestr(archive_path, content)
58
+
59
+ return zip_buffer.getvalue()
60
+
61
+
62
+ def filter_skill_blobs(blobs: list[GitHubBlob], skill_dir: str) -> list[GitHubBlob]:
63
+ prefix = f"{skill_dir}/"
64
+ filtered = [
65
+ blob
66
+ for blob in blobs
67
+ if blob.path == f"{skill_dir}/SKILL.md"
68
+ or blob.path == f"{skill_dir}/skill.md"
69
+ or blob.path.startswith(prefix)
70
+ ]
71
+ return sorted(filtered, key=lambda blob: blob.path)
72
+
73
+
74
+ __all__ = [
75
+ "DownloaderError",
76
+ "GitHubBlob",
77
+ "GitHubDownloader",
78
+ "GitHubRepo",
79
+ "filter_skill_blobs",
80
+ "normalize_skill_path",
81
+ "parse_github_repo_url",
82
+ ]
@@ -0,0 +1,108 @@
1
+ import zipfile
2
+ import frontmatter
3
+ from dataclasses import dataclass, field
4
+ from pathlib import PurePosixPath
5
+ from typing import Any, cast
6
+ from .resource import SkillResource, create_from_bytes as create_resource_from_bytes
7
+ from .exceptions import InvalidSkillArchiveError
8
+
9
+
10
+ ZipPath = PurePosixPath
11
+
12
+ @dataclass
13
+ class SkillMd:
14
+ name: str
15
+ description: str
16
+ content: str
17
+
18
+ license: str | None = None
19
+ compatibility: str | None = None
20
+ allowed_tools: str | None = None
21
+ metadata: dict[str, Any] = field(default_factory=dict)
22
+
23
+ class SkillParser:
24
+ SKILL_MD_NAME = "skill.md"
25
+
26
+ @staticmethod
27
+ def find_skill_root(paths: list[ZipPath]) -> ZipPath | None:
28
+ """
29
+ According to the [specification](https://agentskills.io/home), the skill root should be the first-level directory that contains the SKILL.md file.
30
+ If there is no such directory, the root is the archive root.
31
+ """
32
+ for path in paths:
33
+ parts = path.parts
34
+ if len(parts) == 2 and parts[1].lower() == SkillParser.SKILL_MD_NAME:
35
+ return path.parent
36
+ if len(parts) == 1 and path.name.lower() == SkillParser.SKILL_MD_NAME:
37
+ return path.parent
38
+ return None
39
+
40
+ @staticmethod
41
+ def find_skill_md(paths: list[ZipPath], root: ZipPath) -> ZipPath | None:
42
+ for path in paths:
43
+ if path.name.lower() == SkillParser.SKILL_MD_NAME and path.parent == root:
44
+ return path
45
+ return None
46
+
47
+ @staticmethod
48
+ def parse_skill_md(text: str) -> SkillMd:
49
+ def resolve_optional_str(value: Any) -> str | None:
50
+ if value is None:
51
+ return None
52
+ if isinstance(value, str):
53
+ return value
54
+ return str(value)
55
+
56
+ result = frontmatter.loads(text)
57
+ return SkillMd(
58
+ name=str(result["name"]),
59
+ description=str(result["description"]),
60
+ content=result.content,
61
+
62
+ license=resolve_optional_str(result.get("license")),
63
+ compatibility=resolve_optional_str(result.get("compatibility")),
64
+ allowed_tools=resolve_optional_str(result.get("allowed-tools")),
65
+ metadata=cast(dict[str, Any], result.get("metadata", {})),
66
+ )
67
+
68
+ @dataclass
69
+ class Skill(SkillMd):
70
+ resources: list[SkillResource] = field(default_factory=list)
71
+
72
+ @classmethod
73
+ def from_zip(cls, zip_file: zipfile.ZipFile) -> Skill:
74
+ names = zip_file.namelist()
75
+ paths = [ZipPath(name) for name in names]
76
+
77
+ skill_root = SkillParser.find_skill_root(paths)
78
+ if skill_root is None:
79
+ raise InvalidSkillArchiveError("Skill root not found")
80
+
81
+ skill_md = SkillParser.find_skill_md(paths, skill_root)
82
+ if skill_md is None:
83
+ raise InvalidSkillArchiveError("SKILL.md not found")
84
+
85
+ skill_md_text = zip_file.read(str(skill_md)).decode("utf-8-sig", errors="replace")
86
+ skill = SkillParser.parse_skill_md(skill_md_text)
87
+
88
+ resources: list[SkillResource] = []
89
+ for info in zip_file.infolist():
90
+ if info.is_dir(): continue
91
+ info_path = ZipPath(info.filename)
92
+ if info_path == skill_md: continue # skip SKILL.md
93
+ if skill_root != ZipPath(".") and skill_root not in {info_path, *info_path.parents}:
94
+ continue
95
+
96
+ relative = info_path.relative_to(skill_root)
97
+ content_bytes = zip_file.read(info.filename)
98
+ resources.append(create_resource_from_bytes(str(relative), content_bytes))
99
+
100
+ return cls(
101
+ **skill.__dict__,
102
+ resources=resources,
103
+ )
104
+
105
+ __all__ = [
106
+ "SkillResource",
107
+ "Skill",
108
+ ]
@@ -0,0 +1,13 @@
1
+ class ExtractorException(Exception): ...
2
+
3
+
4
+ class InvalidSkillArchiveError(ExtractorException):
5
+ def __init__(self, message: str):
6
+ super().__init__(message)
7
+ self.message = message
8
+
9
+
10
+ __all__ = [
11
+ "ExtractorException",
12
+ "InvalidSkillArchiveError",
13
+ ]
@@ -0,0 +1,36 @@
1
+ from dataclasses import dataclass
2
+ from typing import Literal
3
+ from binaryornot.helpers import is_binary_string
4
+
5
+
6
+ @dataclass
7
+ class BaseResource:
8
+ relative: str
9
+
10
+ @dataclass
11
+ class TextResource(BaseResource):
12
+ content: str
13
+ type: Literal["text"] = "text"
14
+
15
+ @dataclass
16
+ class BinaryResource(BaseResource):
17
+ content: bytes
18
+ type: Literal["binary"] = "binary"
19
+
20
+ type SkillResource = TextResource | BinaryResource
21
+
22
+ def create_from_bytes(relative: str, content: bytes) -> SkillResource:
23
+ is_binary = is_binary_string(content) or b"\x00" in content
24
+ if is_binary:
25
+ return BinaryResource(relative=relative, content=content)
26
+ else:
27
+ return TextResource(relative=relative, content=content.decode("utf-8-sig", errors="replace"))
28
+
29
+
30
+ __all__ = [
31
+ "BaseResource",
32
+ "TextResource",
33
+ "BinaryResource",
34
+ "SkillResource",
35
+ "create_from_bytes",
36
+ ]
@@ -0,0 +1,15 @@
1
+ from .github import (
2
+ GitHubBlob,
3
+ GitHubClient,
4
+ GitHubError,
5
+ GitHubRepo,
6
+ parse_github_repo_url,
7
+ )
8
+
9
+ __all__ = [
10
+ "GitHubBlob",
11
+ "GitHubClient",
12
+ "GitHubError",
13
+ "GitHubRepo",
14
+ "parse_github_repo_url",
15
+ ]
@@ -0,0 +1,120 @@
1
+ from dataclasses import dataclass
2
+ from urllib.parse import urlparse
3
+
4
+ import httpx
5
+
6
+
7
+ API_BASE_URL = "https://api.github.com"
8
+ RAW_BASE_URL = "https://raw.githubusercontent.com"
9
+ USER_AGENT = "dais-skills"
10
+
11
+
12
+ class GitHubError(Exception):
13
+ pass
14
+
15
+
16
+ @dataclass(frozen=True)
17
+ class GitHubRepo:
18
+ owner: str
19
+ repo: str
20
+ ref: str | None = None
21
+
22
+ @property
23
+ def owner_repo(self) -> str:
24
+ return f"{self.owner}/{self.repo}"
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class GitHubBlob:
29
+ path: str
30
+ size: int | None = None
31
+
32
+
33
+ def parse_github_repo_url(repo_url: str) -> GitHubRepo:
34
+ parsed = urlparse(repo_url.strip())
35
+ if parsed.scheme not in {"http", "https"} or parsed.hostname != "github.com":
36
+ raise GitHubError(f"Unsupported GitHub repository URL: {repo_url}")
37
+
38
+ parts = [part for part in parsed.path.split("/") if part]
39
+ if len(parts) < 2:
40
+ raise GitHubError(f"Unsupported GitHub repository URL: {repo_url}")
41
+
42
+ owner = parts[0]
43
+ repo = parts[1].removesuffix(".git")
44
+
45
+ ref = None
46
+ if len(parts) >= 4 and parts[2] == "tree":
47
+ ref = "/".join(parts[3:])
48
+
49
+ return GitHubRepo(owner=owner, repo=repo, ref=ref)
50
+
51
+
52
+ class GitHubClient:
53
+ """Thin async wrapper around GitHub's tree and raw endpoints."""
54
+
55
+ def __init__(self, client: httpx.AsyncClient):
56
+ self._client = client
57
+
58
+ async def fetch_tree(self, repo: GitHubRepo) -> tuple[str, list[GitHubBlob]]:
59
+ refs_to_try = [repo.ref] if repo.ref else ["main", "master"]
60
+ last_error: GitHubError | None = None
61
+
62
+ for tree_ref in refs_to_try:
63
+ try:
64
+ tree = await self._get_json(
65
+ f"/repos/{repo.owner_repo}/git/trees/{tree_ref}?recursive=1"
66
+ )
67
+ except GitHubError as exc:
68
+ last_error = exc
69
+ continue
70
+
71
+ blobs = [
72
+ GitHubBlob(path=entry["path"], size=entry.get("size"))
73
+ for entry in tree.get("tree", [])
74
+ if entry.get("type") == "blob" and isinstance(entry.get("path"), str)
75
+ ]
76
+ return tree_ref, blobs
77
+
78
+ if last_error is not None:
79
+ raise last_error
80
+ raise GitHubError(f"Unable to fetch repository tree for {repo.owner_repo}")
81
+
82
+ async def fetch_blob(self, repo: GitHubRepo, ref: str, path: str) -> bytes:
83
+ url = f"{RAW_BASE_URL}/{repo.owner}/{repo.repo}/{ref}/{path}"
84
+ response = await self._client.get(url, headers={"User-Agent": USER_AGENT})
85
+ try:
86
+ response.raise_for_status()
87
+ except httpx.HTTPStatusError as exc:
88
+ raise GitHubError(
89
+ f"Failed to download file {path}: HTTP {exc.response.status_code}"
90
+ ) from exc
91
+ return response.content
92
+
93
+ async def fetch_text(self, repo: GitHubRepo, ref: str, path: str) -> str:
94
+ content = await self.fetch_blob(repo, ref, path)
95
+ return content.decode("utf-8", errors="replace")
96
+
97
+ async def _get_json(self, path: str) -> dict:
98
+ response = await self._client.get(
99
+ f"{API_BASE_URL}{path}",
100
+ headers={
101
+ "Accept": "application/vnd.github.v3+json",
102
+ "User-Agent": USER_AGENT,
103
+ },
104
+ )
105
+ try:
106
+ response.raise_for_status()
107
+ except httpx.HTTPStatusError as exc:
108
+ raise GitHubError(
109
+ f"GitHub API request failed: HTTP {exc.response.status_code}"
110
+ ) from exc
111
+ return response.json()
112
+
113
+
114
+ __all__ = [
115
+ "GitHubError",
116
+ "GitHubRepo",
117
+ "GitHubBlob",
118
+ "GitHubClient",
119
+ "parse_github_repo_url",
120
+ ]
@@ -0,0 +1,12 @@
1
+ import httpx
2
+
3
+ from .github import GitHubScanner, ScannedSkill, ScannerError
4
+
5
+
6
+ async def scan_repo(repo_url: str) -> list[ScannedSkill]:
7
+ async with httpx.AsyncClient(timeout=10.0) as client:
8
+ scanner = GitHubScanner(client)
9
+ return await scanner.scan_repo(repo_url)
10
+
11
+
12
+ __all__ = ["scan_repo", "ScannedSkill", "ScannerError"]
@@ -0,0 +1,161 @@
1
+ from dataclasses import dataclass
2
+ from pathlib import PurePosixPath
3
+
4
+ import frontmatter
5
+ import httpx
6
+
7
+ from dais_skills.public.github import (
8
+ GitHubBlob,
9
+ GitHubClient,
10
+ GitHubError,
11
+ parse_github_repo_url,
12
+ )
13
+
14
+
15
+ class ScannerError(Exception):
16
+ pass
17
+
18
+
19
+ PRIORITY_PREFIXES = [
20
+ "",
21
+ "skills/",
22
+ "skills/.curated/",
23
+ "skills/.experimental/",
24
+ "skills/.system/",
25
+ ".agents/skills/",
26
+ ".claude/skills/",
27
+ ".cline/skills/",
28
+ ".codebuddy/skills/",
29
+ ".codex/skills/",
30
+ ".commandcode/skills/",
31
+ ".continue/skills/",
32
+ ".github/skills/",
33
+ ".goose/skills/",
34
+ ".iflow/skills/",
35
+ ".junie/skills/",
36
+ ".kilocode/skills/",
37
+ ".kiro/skills/",
38
+ ".mux/skills/",
39
+ ".neovate/skills/",
40
+ ".opencode/skills/",
41
+ ".openhands/skills/",
42
+ ".pi/skills/",
43
+ ".qoder/skills/",
44
+ ".roo/skills/",
45
+ ".trae/skills/",
46
+ ".windsurf/skills/",
47
+ ".zencoder/skills/",
48
+ ]
49
+
50
+ SKIP_DIRS = {"node_modules", ".git", "dist", "build", "__pycache__"}
51
+
52
+
53
+ @dataclass(frozen=True)
54
+ class ScannedSkill:
55
+ path: str
56
+ name: str
57
+ description: str
58
+
59
+
60
+ class GitHubScanner:
61
+ def __init__(self, client: httpx.AsyncClient):
62
+ self._github = GitHubClient(client)
63
+
64
+ async def scan_repo(self, repo_url: str) -> list[ScannedSkill]:
65
+ repo = parse_github_repo_url(repo_url)
66
+ try:
67
+ tree_ref, blobs = await self._github.fetch_tree(repo)
68
+ except GitHubError as exc:
69
+ raise ScannerError(str(exc)) from exc
70
+
71
+ skill_md_paths = find_skill_md_paths(blobs)
72
+
73
+ skills: list[ScannedSkill] = []
74
+ for skill_md_path in skill_md_paths:
75
+ try:
76
+ content = await self._github.fetch_text(repo, tree_ref, skill_md_path)
77
+ except GitHubError:
78
+ continue
79
+ skill = parse_skill_from_content(content, skill_md_path)
80
+ if skill is not None:
81
+ skills.append(skill)
82
+
83
+ return skills
84
+
85
+
86
+ def _is_skill_md(path: str) -> bool:
87
+ return "/" in path and path.rsplit("/", 1)[-1].lower() == "skill.md"
88
+
89
+
90
+ def find_skill_md_paths(blobs: list[GitHubBlob]) -> list[str]:
91
+ filtered = [blob.path for blob in blobs if _is_skill_md(blob.path)]
92
+ if not filtered:
93
+ return []
94
+
95
+ priority_results: list[str] = []
96
+ seen: set[str] = set()
97
+ lower_skill_md_set = {path.lower() for path in filtered}
98
+
99
+ for priority_prefix in PRIORITY_PREFIXES:
100
+ is_container = priority_prefix != ""
101
+
102
+ for skill_md in filtered:
103
+ if not skill_md.startswith(priority_prefix):
104
+ continue
105
+
106
+ rest = skill_md[len(priority_prefix):]
107
+ parts = rest.split("/")
108
+
109
+ if len(parts) == 2:
110
+ if skill_md not in seen:
111
+ priority_results.append(skill_md)
112
+ seen.add(skill_md)
113
+ continue
114
+
115
+ if (
116
+ is_container
117
+ and len(parts) == 3
118
+ and parts[0] not in SKIP_DIRS
119
+ and parts[1] not in SKIP_DIRS
120
+ ):
121
+ parent_skill_md = f"{priority_prefix}{parts[0]}/SKILL.md".lower()
122
+ if parent_skill_md not in lower_skill_md_set and skill_md not in seen:
123
+ priority_results.append(skill_md)
124
+ seen.add(skill_md)
125
+
126
+ if priority_results:
127
+ return priority_results
128
+
129
+ return [
130
+ path
131
+ for path in filtered
132
+ if path.count("/") <= 5
133
+ and not any(part in SKIP_DIRS for part in path.split("/"))
134
+ ]
135
+
136
+
137
+ def parse_skill_from_content(content: str, repo_path: str) -> ScannedSkill | None:
138
+ try:
139
+ post = frontmatter.loads(content)
140
+ except Exception:
141
+ return None
142
+
143
+ name = post.metadata.get("name")
144
+ description = post.metadata.get("description")
145
+ if not isinstance(name, str) or not isinstance(description, str):
146
+ return None
147
+
148
+ return ScannedSkill(
149
+ path=str(PurePosixPath(repo_path).parent),
150
+ name=name.strip(),
151
+ description=description.strip(),
152
+ )
153
+
154
+
155
+ __all__ = [
156
+ "ScannerError",
157
+ "ScannedSkill",
158
+ "GitHubScanner",
159
+ "find_skill_md_paths",
160
+ "parse_skill_from_content",
161
+ ]