PyPI - skill-auditor - Versions diffs - 0.4.0__py3-none-any.whl - Mend

skill-auditor 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

skill_auditor/__init__.py +4 -0
skill_auditor/__main__.py +6 -0
skill_auditor/analyzers.py +165 -0
skill_auditor/archives.py +169 -0
skill_auditor/cli.py +873 -0
skill_auditor/config.py +196 -0
skill_auditor/formats.py +178 -0
skill_auditor/paths.py +135 -0
skill_auditor/render_catalog.py +122 -0
skill_auditor/rules/archives.yaml +30 -0
skill_auditor/rules/credentials.yaml +38 -0
skill_auditor/rules/dangerous-shell.yaml +59 -0
skill_auditor/rules/description-mismatch.yaml +12 -0
skill_auditor/rules/dynamic-execution.yaml +50 -0
skill_auditor/rules/exfiltration.yaml +31 -0
skill_auditor/rules/filesystem-boundary.yaml +16 -0
skill_auditor/rules/git-hooks.yaml +15 -0
skill_auditor/rules/language-exfiltration.yaml +18 -0
skill_auditor/rules/logic-bomb.yaml +11 -0
skill_auditor/rules/mcp-config.yaml +23 -0
skill_auditor/rules/obfuscation.yaml +31 -0
skill_auditor/rules/powershell.yaml +34 -0
skill_auditor/rules/prompt-injection.yaml +32 -0
skill_auditor/rules_loader.py +169 -0
skill_auditor-0.4.0.dist-info/METADATA +429 -0
skill_auditor-0.4.0.dist-info/RECORD +30 -0
skill_auditor-0.4.0.dist-info/WHEEL +5 -0
skill_auditor-0.4.0.dist-info/entry_points.txt +2 -0
skill_auditor-0.4.0.dist-info/licenses/LICENSE +21 -0
skill_auditor-0.4.0.dist-info/top_level.txt +1 -0

skill_auditor/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+"""Security scanner for AI Agent skills."""
+__version__ = "0.4.0"

skill_auditor/__main__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from .cli import main
+if __name__ == "__main__":
+    raise SystemExit(main())

skill_auditor/analyzers.py ADDED Viewed

@@ -0,0 +1,165 @@
+"""Small, bounded cross-line analyzers for language-specific risk patterns."""
+from __future__ import annotations
+import ast
+import re
+from pathlib import Path
+def run_named_check(rule: dict, relative_path: str, text: str) -> list[tuple[int, str]]:
+    check = rule.get("check")
+    suffix = Path(relative_path).suffix.lower()
+    if check == "python-exfiltration" and suffix == ".py":
+        return _python_exfiltration(text)
+    if check == "python-decoded-exec" and suffix == ".py":
+        return _python_decoded_exec(text)
+    if check == "node-exfiltration" and suffix in {".js", ".mjs", ".cjs", ".ts"}:
+        return _node_exfiltration(text)
+    if check == "powershell-download-exec" and suffix == ".ps1":
+        return _powershell_download_exec(text)
+    if check == "mcp-config-write":
+        return _mcp_config_write(text)
+    return []
+def _python_exfiltration(text: str) -> list[tuple[int, str]]:
+    try:
+        tree = ast.parse(text)
+    except SyntaxError:
+        return []
+    tainted: set[str] = set()
+    for node in ast.walk(tree):
+        if isinstance(node, (ast.Assign, ast.AnnAssign)):
+            value = node.value
+            if value is not None and _is_sensitive_python_expression(value):
+                targets = node.targets if isinstance(node, ast.Assign) else [node.target]
+                for target in targets:
+                    tainted.update(_assigned_names(target))
+    output = []
+    for node in ast.walk(tree):
+        if not isinstance(node, ast.Call) or not _is_python_network_sink(node):
+            continue
+        payload_nodes = [*node.args, *(keyword.value for keyword in node.keywords)]
+        if any(_is_sensitive_python_expression(item) or _references_names(item, tainted)
+               for item in payload_nodes):
+            output.append((getattr(node, "lineno", 1), _source_line(text, node)))
+    return output
+def _python_decoded_exec(text: str) -> list[tuple[int, str]]:
+    try:
+        tree = ast.parse(text)
+    except SyntaxError:
+        return []
+    output = []
+    for node in ast.walk(tree):
+        if not isinstance(node, ast.Call):
+            continue
+        name = _call_name(node.func)
+        if name not in {"eval", "exec"} or not node.args:
+            continue
+        expression = ast.unparse(node.args[0]) if hasattr(ast, "unparse") else ""
+        if re.search(r"(base64|b64decode|urlopen|requests\.|socket\.)", expression, re.I):
+            output.append((getattr(node, "lineno", 1), expression))
+    return output
+def _call_name(node: ast.AST) -> str:
+    if isinstance(node, ast.Name):
+        return node.id
+    if isinstance(node, ast.Attribute):
+        parent = _call_name(node.value)
+        return f"{parent}.{node.attr}" if parent else node.attr
+    return ""
+def _node_exfiltration(text: str) -> list[tuple[int, str]]:
+    assignment = re.compile(
+        r"\b(?:const|let|var)\s+([A-Za-z_$][\w$]*)\s*=\s*[^\n]*"
+        r"(?:readFileSync|readFile|process\.env|\.ssh|\.aws|\.env|TOKEN|SECRET|PASSWORD)",
+        re.IGNORECASE,
+    )
+    sink = re.compile(
+        r"(?:fetch\s*\(|https?\.request\s*\(|axios\.(?:post|put|patch)"
+        r"|child_process\.(?:exec|spawn)[^\n]*(?:curl|wget))",
+        re.IGNORECASE,
+    )
+    tainted = {match.group(1) for match in assignment.finditer(text)}
+    if not tainted:
+        return []
+    output = []
+    for line_number, line in enumerate(text.splitlines(), start=1):
+        if sink.search(line) and any(re.search(rf"\b{re.escape(name)}\b", line) for name in tainted):
+            output.append((line_number, line.strip()))
+    return output
+def _powershell_download_exec(text: str) -> list[tuple[int, str]]:
+    download = re.compile(r"(?:Invoke-WebRequest|iwr\b|Net\.WebClient|DownloadString)", re.I)
+    execute = re.compile(r"(?:Invoke-Expression|\biex\b|Start-Process|&\s*\$)", re.I)
+    if not download.search(text) or not execute.search(text):
+        return []
+    return _matching_lines(text, execute)
+def _mcp_config_write(text: str) -> list[tuple[int, str]]:
+    target = re.compile(
+        r"(?:claude_desktop_config\.json|\.cursor[\\/].*(?:mcp|config)"
+        r"|\.codex[\\/].*(?:config|mcp)|mcpServers)",
+        re.I,
+    )
+    mutation = re.compile(
+        r"(?:write_text|writeFile|Set-Content|Add-Content|Out-File|>>|json\.dump)",
+        re.I,
+    )
+    if not target.search(text) or not mutation.search(text):
+        return []
+    return _matching_lines(text, mutation)
+def _matching_lines(text: str, pattern: re.Pattern) -> list[tuple[int, str]]:
+    output = []
+    for line_number, line in enumerate(text.splitlines(), start=1):
+        if pattern.search(line):
+            output.append((line_number, line.strip()))
+    return output
+def _assigned_names(node: ast.AST) -> set[str]:
+    return {item.id for item in ast.walk(node) if isinstance(item, ast.Name)}
+def _references_names(node: ast.AST, names: set[str]) -> bool:
+    return any(isinstance(item, ast.Name) and item.id in names for item in ast.walk(node))
+def _is_sensitive_python_expression(node: ast.AST) -> bool:
+    rendered = ast.dump(node, include_attributes=False)
+    return bool(re.search(
+        r"(?:\bopen\b|read_text|read_bytes|os.*(?:environ|getenv)|"
+        r"\.ssh|\.aws|\.env|TOKEN|SECRET|PASSWORD|CREDENTIAL)",
+        rendered,
+        re.IGNORECASE,
+    ))
+def _is_python_network_sink(node: ast.Call) -> bool:
+    name = _call_name(node.func)
+    if name in {
+        "requests.post", "requests.put", "requests.patch",
+        "urllib.request.urlopen", "urllib.request.Request",
+    }:
+        return True
+    if name.endswith((".send", ".sendall", ".sendto")):
+        return True
+    if name in {"subprocess.run", "subprocess.Popen", "subprocess.call"}:
+        rendered = ast.dump(node, include_attributes=False)
+        return bool(re.search(r"(?:curl|wget)", rendered, re.IGNORECASE))
+    return False
+def _source_line(text: str, node: ast.AST) -> str:
+    lines = text.splitlines()
+    number = max(1, getattr(node, "lineno", 1))
+    return lines[number - 1].strip() if number <= len(lines) else ""

skill_auditor/archives.py ADDED Viewed

@@ -0,0 +1,169 @@
+"""Read-only zip/tar inspection with bounded resource use."""
+from __future__ import annotations
+import io
+import stat
+import tarfile
+import zipfile
+from pathlib import Path, PurePosixPath
+MAX_ARCHIVE_BYTES = 25_000_000
+MAX_MEMBERS = 2_000
+MAX_EXPANDED_BYTES = 100_000_000
+MAX_MEMBER_BYTES = 1_000_000
+MAX_COMPRESSION_RATIO = 200
+ARCHIVE_SUFFIXES = (".zip", ".tar", ".tar.gz", ".tgz", ".tar.bz2", ".tbz2", ".tar.xz")
+TEXT_SUFFIXES = {
+    ".md", ".txt", ".sh", ".bash", ".zsh", ".fish", ".py", ".js", ".mjs",
+    ".cjs", ".ts", ".ps1", ".bat", ".cmd", ".yaml", ".yml", ".json", ".toml",
+    ".cfg", ".ini", ".env", "",
+}
+class ArchiveError(ValueError):
+    pass
+def is_archive(path: Path) -> bool:
+    lower = path.name.lower()
+    return lower.endswith(ARCHIVE_SUFFIXES)
+def inspect_archive(path: Path) -> tuple[list[dict], list[tuple[str, str]], list[dict]]:
+    try:
+        if path.stat().st_size > MAX_ARCHIVE_BYTES:
+            raise ArchiveError(f"archive exceeds {MAX_ARCHIVE_BYTES} bytes")
+    except OSError as exc:
+        raise ArchiveError(f"cannot stat archive: {exc}") from exc
+    if zipfile.is_zipfile(path):
+        return _inspect_zip(path)
+    if tarfile.is_tarfile(path):
+        return _inspect_tar(path)
+    raise ArchiveError("unsupported or invalid archive")
+def _inspect_zip(path: Path):
+    findings: list[dict] = []
+    texts: list[tuple[str, str]] = []
+    diagnostics: list[dict] = []
+    total = 0
+    with zipfile.ZipFile(path) as archive:
+        members = archive.infolist()
+        _check_member_count(members)
+        for member in members:
+            name = member.filename.replace("\\", "/")
+            total += member.file_size
+            if total > MAX_EXPANDED_BYTES:
+                findings.append(_archive_finding("ARCHIVE-004", name, "archive expansion limit exceeded"))
+                break
+            findings.extend(_member_name_findings(name))
+            if _is_nested_archive(name):
+                findings.append(_archive_finding(
+                    "ARCHIVE-004", name, "nested archive skipped at the depth limit"
+                ))
+                continue
+            mode = member.external_attr >> 16
+            if stat.S_ISLNK(mode):
+                findings.append(_archive_finding("ARCHIVE-002", name, "archive contains a symlink"))
+                continue
+            if _looks_hidden_executable(name, mode):
+                findings.append(_archive_finding("ARCHIVE-003", name, "hidden executable or hook member"))
+            compressed = max(member.compress_size, 1)
+            if member.file_size / compressed > MAX_COMPRESSION_RATIO:
+                findings.append(_archive_finding("ARCHIVE-004", name, "suspicious compression ratio"))
+            if member.is_dir() or member.file_size > MAX_MEMBER_BYTES:
+                continue
+            if PurePosixPath(name).suffix.lower() not in TEXT_SUFFIXES:
+                continue
+            try:
+                raw = archive.read(member)
+                texts.append((name, raw.decode("utf-8")))
+            except (OSError, UnicodeError, RuntimeError):
+                diagnostics.append({"path": name, "message": "archive member is not readable UTF-8 text"})
+    return findings, texts, diagnostics
+def _inspect_tar(path: Path):
+    findings: list[dict] = []
+    texts: list[tuple[str, str]] = []
+    diagnostics: list[dict] = []
+    total = 0
+    with tarfile.open(path, mode="r:*") as archive:
+        members = archive.getmembers()
+        _check_member_count(members)
+        for member in members:
+            name = member.name.replace("\\", "/")
+            total += member.size
+            if total > MAX_EXPANDED_BYTES:
+                findings.append(_archive_finding("ARCHIVE-004", name, "archive expansion limit exceeded"))
+                break
+            findings.extend(_member_name_findings(name))
+            if _is_nested_archive(name):
+                findings.append(_archive_finding(
+                    "ARCHIVE-004", name, "nested archive skipped at the depth limit"
+                ))
+                continue
+            if member.issym() or member.islnk():
+                findings.append(_archive_finding("ARCHIVE-002", name, "archive contains a link"))
+                continue
+            if _looks_hidden_executable(name, member.mode):
+                findings.append(_archive_finding("ARCHIVE-003", name, "hidden executable or hook member"))
+            if not member.isfile() or member.size > MAX_MEMBER_BYTES:
+                continue
+            if PurePosixPath(name).suffix.lower() not in TEXT_SUFFIXES:
+                continue
+            handle = archive.extractfile(member)
+            if handle is None:
+                continue
+            try:
+                texts.append((name, handle.read().decode("utf-8")))
+            except (OSError, UnicodeError):
+                diagnostics.append({"path": name, "message": "archive member is not readable UTF-8 text"})
+    return findings, texts, diagnostics
+def validate_archive_skill(texts: list[tuple[str, str]]) -> str:
+    skill_paths = [name for name, _ in texts if PurePosixPath(name).name.lower() == "skill.md"]
+    if len(skill_paths) != 1:
+        raise ArchiveError("archive must contain exactly one SKILL.md")
+    skill_path = PurePosixPath(skill_paths[0])
+    return "" if str(skill_path.parent) == "." else str(skill_path.parent) + "/"
+def _check_member_count(members) -> None:
+    if len(members) > MAX_MEMBERS:
+        raise ArchiveError(f"archive has more than {MAX_MEMBERS} members")
+def _member_name_findings(name: str) -> list[dict]:
+    path = PurePosixPath(name)
+    if path.is_absolute() or ".." in path.parts or _looks_windows_absolute(name):
+        return [_archive_finding("ARCHIVE-001", name, "archive member escapes its extraction root")]
+    return []
+def _looks_windows_absolute(name: str) -> bool:
+    return len(name) >= 3 and name[1] == ":" and name[2] in "/\\"
+def _is_nested_archive(name: str) -> bool:
+    lower = name.lower()
+    return lower.endswith(ARCHIVE_SUFFIXES)
+def _looks_hidden_executable(name: str, mode: int) -> bool:
+    lower = name.lower()
+    hook_names = {
+        "pre-commit", "post-checkout", "post-merge", "post-rewrite",
+        "pre-push", "commit-msg", "prepare-commit-msg",
+    }
+    return (
+        "/.git/hooks/" in "/" + lower
+        or PurePosixPath(lower).name in hook_names
+        or ((mode & 0o111) and PurePosixPath(lower).suffix in {".sh", ".py", ".js", ".ps1", ""})
+    )
+def _archive_finding(rule_id: str, name: str, message: str) -> dict:
+    return {"rule_id": rule_id, "member": name, "message": message}