skill-auditor 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ """Security scanner for AI Agent skills."""
2
+
3
+ __version__ = "0.4.0"
4
+
@@ -0,0 +1,6 @@
1
+ from .cli import main
2
+
3
+
4
+ if __name__ == "__main__":
5
+ raise SystemExit(main())
6
+
@@ -0,0 +1,165 @@
1
+ """Small, bounded cross-line analyzers for language-specific risk patterns."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import ast
6
+ import re
7
+ from pathlib import Path
8
+
9
+
10
+ def run_named_check(rule: dict, relative_path: str, text: str) -> list[tuple[int, str]]:
11
+ check = rule.get("check")
12
+ suffix = Path(relative_path).suffix.lower()
13
+ if check == "python-exfiltration" and suffix == ".py":
14
+ return _python_exfiltration(text)
15
+ if check == "python-decoded-exec" and suffix == ".py":
16
+ return _python_decoded_exec(text)
17
+ if check == "node-exfiltration" and suffix in {".js", ".mjs", ".cjs", ".ts"}:
18
+ return _node_exfiltration(text)
19
+ if check == "powershell-download-exec" and suffix == ".ps1":
20
+ return _powershell_download_exec(text)
21
+ if check == "mcp-config-write":
22
+ return _mcp_config_write(text)
23
+ return []
24
+
25
+
26
+ def _python_exfiltration(text: str) -> list[tuple[int, str]]:
27
+ try:
28
+ tree = ast.parse(text)
29
+ except SyntaxError:
30
+ return []
31
+ tainted: set[str] = set()
32
+ for node in ast.walk(tree):
33
+ if isinstance(node, (ast.Assign, ast.AnnAssign)):
34
+ value = node.value
35
+ if value is not None and _is_sensitive_python_expression(value):
36
+ targets = node.targets if isinstance(node, ast.Assign) else [node.target]
37
+ for target in targets:
38
+ tainted.update(_assigned_names(target))
39
+ output = []
40
+ for node in ast.walk(tree):
41
+ if not isinstance(node, ast.Call) or not _is_python_network_sink(node):
42
+ continue
43
+ payload_nodes = [*node.args, *(keyword.value for keyword in node.keywords)]
44
+ if any(_is_sensitive_python_expression(item) or _references_names(item, tainted)
45
+ for item in payload_nodes):
46
+ output.append((getattr(node, "lineno", 1), _source_line(text, node)))
47
+ return output
48
+
49
+
50
+ def _python_decoded_exec(text: str) -> list[tuple[int, str]]:
51
+ try:
52
+ tree = ast.parse(text)
53
+ except SyntaxError:
54
+ return []
55
+ output = []
56
+ for node in ast.walk(tree):
57
+ if not isinstance(node, ast.Call):
58
+ continue
59
+ name = _call_name(node.func)
60
+ if name not in {"eval", "exec"} or not node.args:
61
+ continue
62
+ expression = ast.unparse(node.args[0]) if hasattr(ast, "unparse") else ""
63
+ if re.search(r"(base64|b64decode|urlopen|requests\.|socket\.)", expression, re.I):
64
+ output.append((getattr(node, "lineno", 1), expression))
65
+ return output
66
+
67
+
68
+ def _call_name(node: ast.AST) -> str:
69
+ if isinstance(node, ast.Name):
70
+ return node.id
71
+ if isinstance(node, ast.Attribute):
72
+ parent = _call_name(node.value)
73
+ return f"{parent}.{node.attr}" if parent else node.attr
74
+ return ""
75
+
76
+
77
+ def _node_exfiltration(text: str) -> list[tuple[int, str]]:
78
+ assignment = re.compile(
79
+ r"\b(?:const|let|var)\s+([A-Za-z_$][\w$]*)\s*=\s*[^\n]*"
80
+ r"(?:readFileSync|readFile|process\.env|\.ssh|\.aws|\.env|TOKEN|SECRET|PASSWORD)",
81
+ re.IGNORECASE,
82
+ )
83
+ sink = re.compile(
84
+ r"(?:fetch\s*\(|https?\.request\s*\(|axios\.(?:post|put|patch)"
85
+ r"|child_process\.(?:exec|spawn)[^\n]*(?:curl|wget))",
86
+ re.IGNORECASE,
87
+ )
88
+ tainted = {match.group(1) for match in assignment.finditer(text)}
89
+ if not tainted:
90
+ return []
91
+ output = []
92
+ for line_number, line in enumerate(text.splitlines(), start=1):
93
+ if sink.search(line) and any(re.search(rf"\b{re.escape(name)}\b", line) for name in tainted):
94
+ output.append((line_number, line.strip()))
95
+ return output
96
+
97
+
98
+ def _powershell_download_exec(text: str) -> list[tuple[int, str]]:
99
+ download = re.compile(r"(?:Invoke-WebRequest|iwr\b|Net\.WebClient|DownloadString)", re.I)
100
+ execute = re.compile(r"(?:Invoke-Expression|\biex\b|Start-Process|&\s*\$)", re.I)
101
+ if not download.search(text) or not execute.search(text):
102
+ return []
103
+ return _matching_lines(text, execute)
104
+
105
+
106
+ def _mcp_config_write(text: str) -> list[tuple[int, str]]:
107
+ target = re.compile(
108
+ r"(?:claude_desktop_config\.json|\.cursor[\\/].*(?:mcp|config)"
109
+ r"|\.codex[\\/].*(?:config|mcp)|mcpServers)",
110
+ re.I,
111
+ )
112
+ mutation = re.compile(
113
+ r"(?:write_text|writeFile|Set-Content|Add-Content|Out-File|>>|json\.dump)",
114
+ re.I,
115
+ )
116
+ if not target.search(text) or not mutation.search(text):
117
+ return []
118
+ return _matching_lines(text, mutation)
119
+
120
+
121
+ def _matching_lines(text: str, pattern: re.Pattern) -> list[tuple[int, str]]:
122
+ output = []
123
+ for line_number, line in enumerate(text.splitlines(), start=1):
124
+ if pattern.search(line):
125
+ output.append((line_number, line.strip()))
126
+ return output
127
+
128
+
129
+ def _assigned_names(node: ast.AST) -> set[str]:
130
+ return {item.id for item in ast.walk(node) if isinstance(item, ast.Name)}
131
+
132
+
133
+ def _references_names(node: ast.AST, names: set[str]) -> bool:
134
+ return any(isinstance(item, ast.Name) and item.id in names for item in ast.walk(node))
135
+
136
+
137
+ def _is_sensitive_python_expression(node: ast.AST) -> bool:
138
+ rendered = ast.dump(node, include_attributes=False)
139
+ return bool(re.search(
140
+ r"(?:\bopen\b|read_text|read_bytes|os.*(?:environ|getenv)|"
141
+ r"\.ssh|\.aws|\.env|TOKEN|SECRET|PASSWORD|CREDENTIAL)",
142
+ rendered,
143
+ re.IGNORECASE,
144
+ ))
145
+
146
+
147
+ def _is_python_network_sink(node: ast.Call) -> bool:
148
+ name = _call_name(node.func)
149
+ if name in {
150
+ "requests.post", "requests.put", "requests.patch",
151
+ "urllib.request.urlopen", "urllib.request.Request",
152
+ }:
153
+ return True
154
+ if name.endswith((".send", ".sendall", ".sendto")):
155
+ return True
156
+ if name in {"subprocess.run", "subprocess.Popen", "subprocess.call"}:
157
+ rendered = ast.dump(node, include_attributes=False)
158
+ return bool(re.search(r"(?:curl|wget)", rendered, re.IGNORECASE))
159
+ return False
160
+
161
+
162
+ def _source_line(text: str, node: ast.AST) -> str:
163
+ lines = text.splitlines()
164
+ number = max(1, getattr(node, "lineno", 1))
165
+ return lines[number - 1].strip() if number <= len(lines) else ""
@@ -0,0 +1,169 @@
1
+ """Read-only zip/tar inspection with bounded resource use."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import io
6
+ import stat
7
+ import tarfile
8
+ import zipfile
9
+ from pathlib import Path, PurePosixPath
10
+
11
+ MAX_ARCHIVE_BYTES = 25_000_000
12
+ MAX_MEMBERS = 2_000
13
+ MAX_EXPANDED_BYTES = 100_000_000
14
+ MAX_MEMBER_BYTES = 1_000_000
15
+ MAX_COMPRESSION_RATIO = 200
16
+ ARCHIVE_SUFFIXES = (".zip", ".tar", ".tar.gz", ".tgz", ".tar.bz2", ".tbz2", ".tar.xz")
17
+ TEXT_SUFFIXES = {
18
+ ".md", ".txt", ".sh", ".bash", ".zsh", ".fish", ".py", ".js", ".mjs",
19
+ ".cjs", ".ts", ".ps1", ".bat", ".cmd", ".yaml", ".yml", ".json", ".toml",
20
+ ".cfg", ".ini", ".env", "",
21
+ }
22
+
23
+
24
+ class ArchiveError(ValueError):
25
+ pass
26
+
27
+
28
+ def is_archive(path: Path) -> bool:
29
+ lower = path.name.lower()
30
+ return lower.endswith(ARCHIVE_SUFFIXES)
31
+
32
+
33
+ def inspect_archive(path: Path) -> tuple[list[dict], list[tuple[str, str]], list[dict]]:
34
+ try:
35
+ if path.stat().st_size > MAX_ARCHIVE_BYTES:
36
+ raise ArchiveError(f"archive exceeds {MAX_ARCHIVE_BYTES} bytes")
37
+ except OSError as exc:
38
+ raise ArchiveError(f"cannot stat archive: {exc}") from exc
39
+ if zipfile.is_zipfile(path):
40
+ return _inspect_zip(path)
41
+ if tarfile.is_tarfile(path):
42
+ return _inspect_tar(path)
43
+ raise ArchiveError("unsupported or invalid archive")
44
+
45
+
46
+ def _inspect_zip(path: Path):
47
+ findings: list[dict] = []
48
+ texts: list[tuple[str, str]] = []
49
+ diagnostics: list[dict] = []
50
+ total = 0
51
+ with zipfile.ZipFile(path) as archive:
52
+ members = archive.infolist()
53
+ _check_member_count(members)
54
+ for member in members:
55
+ name = member.filename.replace("\\", "/")
56
+ total += member.file_size
57
+ if total > MAX_EXPANDED_BYTES:
58
+ findings.append(_archive_finding("ARCHIVE-004", name, "archive expansion limit exceeded"))
59
+ break
60
+ findings.extend(_member_name_findings(name))
61
+ if _is_nested_archive(name):
62
+ findings.append(_archive_finding(
63
+ "ARCHIVE-004", name, "nested archive skipped at the depth limit"
64
+ ))
65
+ continue
66
+ mode = member.external_attr >> 16
67
+ if stat.S_ISLNK(mode):
68
+ findings.append(_archive_finding("ARCHIVE-002", name, "archive contains a symlink"))
69
+ continue
70
+ if _looks_hidden_executable(name, mode):
71
+ findings.append(_archive_finding("ARCHIVE-003", name, "hidden executable or hook member"))
72
+ compressed = max(member.compress_size, 1)
73
+ if member.file_size / compressed > MAX_COMPRESSION_RATIO:
74
+ findings.append(_archive_finding("ARCHIVE-004", name, "suspicious compression ratio"))
75
+ if member.is_dir() or member.file_size > MAX_MEMBER_BYTES:
76
+ continue
77
+ if PurePosixPath(name).suffix.lower() not in TEXT_SUFFIXES:
78
+ continue
79
+ try:
80
+ raw = archive.read(member)
81
+ texts.append((name, raw.decode("utf-8")))
82
+ except (OSError, UnicodeError, RuntimeError):
83
+ diagnostics.append({"path": name, "message": "archive member is not readable UTF-8 text"})
84
+ return findings, texts, diagnostics
85
+
86
+
87
+ def _inspect_tar(path: Path):
88
+ findings: list[dict] = []
89
+ texts: list[tuple[str, str]] = []
90
+ diagnostics: list[dict] = []
91
+ total = 0
92
+ with tarfile.open(path, mode="r:*") as archive:
93
+ members = archive.getmembers()
94
+ _check_member_count(members)
95
+ for member in members:
96
+ name = member.name.replace("\\", "/")
97
+ total += member.size
98
+ if total > MAX_EXPANDED_BYTES:
99
+ findings.append(_archive_finding("ARCHIVE-004", name, "archive expansion limit exceeded"))
100
+ break
101
+ findings.extend(_member_name_findings(name))
102
+ if _is_nested_archive(name):
103
+ findings.append(_archive_finding(
104
+ "ARCHIVE-004", name, "nested archive skipped at the depth limit"
105
+ ))
106
+ continue
107
+ if member.issym() or member.islnk():
108
+ findings.append(_archive_finding("ARCHIVE-002", name, "archive contains a link"))
109
+ continue
110
+ if _looks_hidden_executable(name, member.mode):
111
+ findings.append(_archive_finding("ARCHIVE-003", name, "hidden executable or hook member"))
112
+ if not member.isfile() or member.size > MAX_MEMBER_BYTES:
113
+ continue
114
+ if PurePosixPath(name).suffix.lower() not in TEXT_SUFFIXES:
115
+ continue
116
+ handle = archive.extractfile(member)
117
+ if handle is None:
118
+ continue
119
+ try:
120
+ texts.append((name, handle.read().decode("utf-8")))
121
+ except (OSError, UnicodeError):
122
+ diagnostics.append({"path": name, "message": "archive member is not readable UTF-8 text"})
123
+ return findings, texts, diagnostics
124
+
125
+
126
+ def validate_archive_skill(texts: list[tuple[str, str]]) -> str:
127
+ skill_paths = [name for name, _ in texts if PurePosixPath(name).name.lower() == "skill.md"]
128
+ if len(skill_paths) != 1:
129
+ raise ArchiveError("archive must contain exactly one SKILL.md")
130
+ skill_path = PurePosixPath(skill_paths[0])
131
+ return "" if str(skill_path.parent) == "." else str(skill_path.parent) + "/"
132
+
133
+
134
+ def _check_member_count(members) -> None:
135
+ if len(members) > MAX_MEMBERS:
136
+ raise ArchiveError(f"archive has more than {MAX_MEMBERS} members")
137
+
138
+
139
+ def _member_name_findings(name: str) -> list[dict]:
140
+ path = PurePosixPath(name)
141
+ if path.is_absolute() or ".." in path.parts or _looks_windows_absolute(name):
142
+ return [_archive_finding("ARCHIVE-001", name, "archive member escapes its extraction root")]
143
+ return []
144
+
145
+
146
+ def _looks_windows_absolute(name: str) -> bool:
147
+ return len(name) >= 3 and name[1] == ":" and name[2] in "/\\"
148
+
149
+
150
+ def _is_nested_archive(name: str) -> bool:
151
+ lower = name.lower()
152
+ return lower.endswith(ARCHIVE_SUFFIXES)
153
+
154
+
155
+ def _looks_hidden_executable(name: str, mode: int) -> bool:
156
+ lower = name.lower()
157
+ hook_names = {
158
+ "pre-commit", "post-checkout", "post-merge", "post-rewrite",
159
+ "pre-push", "commit-msg", "prepare-commit-msg",
160
+ }
161
+ return (
162
+ "/.git/hooks/" in "/" + lower
163
+ or PurePosixPath(lower).name in hook_names
164
+ or ((mode & 0o111) and PurePosixPath(lower).suffix in {".sh", ".py", ".js", ".ps1", ""})
165
+ )
166
+
167
+
168
+ def _archive_finding(rule_id: str, name: str, message: str) -> dict:
169
+ return {"rule_id": rule_id, "member": name, "message": message}