PyPI - isnad-scan - Versions diffs - 0.3.0__py3-none-any.whl - Mend

isnad-scan 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

isnad_scan/__init__.py +2 -0
isnad_scan/ast_analyzer.py +374 -0
isnad_scan/binary_scanner.py +230 -0
isnad_scan/cli.py +221 -0
isnad_scan/cve_checker.py +238 -0
isnad_scan/js_analyzer.py +154 -0
isnad_scan/patterns.py +573 -0
isnad_scan/scanner.py +342 -0
isnad_scan-0.3.0.dist-info/METADATA +186 -0
isnad_scan-0.3.0.dist-info/RECORD +12 -0
isnad_scan-0.3.0.dist-info/WHEEL +4 -0
isnad_scan-0.3.0.dist-info/entry_points.txt +2 -0

isnad_scan/cli.py ADDED Viewed

@@ -0,0 +1,221 @@
+"""CLI entry point for ISNAD Skill Scanner."""
+import json
+import sys
+from typing import Optional
+import click
+from rich.console import Console
+from rich.table import Table
+from rich.panel import Panel
+from rich.text import Text
+from . import __version__
+from .scanner import scan_skill, ScanResult
+from .patterns import Severity
+console = Console()
+def format_severity(severity: Severity) -> Text:
+    """Format severity with color."""
+    colors = {
+        Severity.DANGER: "red bold",
+        Severity.WARN: "yellow",
+        Severity.INFO: "blue",
+    }
+    return Text(severity.value.upper(), style=colors.get(severity, "white"))
+def format_trust_level(level: str) -> Text:
+    """Format trust level with color and emoji."""
+    config = {
+        "DANGER": ("🚨", "red bold"),
+        "WARN": ("⚠️ ", "yellow bold"),
+        "CAUTION": ("⚡", "yellow"),
+        "SAFE": ("✅", "green bold"),
+    }
+    emoji, style = config.get(level, ("", "white"))
+    return Text(f"{emoji} {level}", style=style)
+def print_result(result: ScanResult, verbose: bool = False, show_hash: bool = False):
+    """Print scan result in human-readable format."""
+    console.print()
+    # Header with trust level
+    level_text = format_trust_level(result.trust_level)
+    console.print(Panel(
+        Text.assemble("Trust Level: ", level_text),
+        title=f"[bold]ISNAD Scan: {result.path}[/bold]",
+        border_style="blue"
+    ))
+    # Summary stats
+    summary = result.summary()
+    console.print(f"\n📁 Files scanned: {summary['files_scanned']}")
+    console.print(f"⏭️  Files skipped: {summary['files_skipped']}")
+    if show_hash and result.content_hash:
+        console.print(f"🔑 Content hash: {result.content_hash}")
+    # Findings summary
+    findings = summary['findings']
+    if findings['danger'] > 0:
+        console.print(f"🚨 [red bold]DANGER findings: {findings['danger']}[/red bold]")
+    if findings['warn'] > 0:
+        console.print(f"⚠️  [yellow]WARN findings: {findings['warn']}[/yellow]")
+    if findings['info'] > 0 and verbose:
+        console.print(f"ℹ️  [blue]INFO findings: {findings['info']}[/blue]")
+    # Symlinks
+    if result.symlinks_found:
+        console.print(f"🔗 [red]Unsafe symlinks: {len(result.symlinks_found)}[/red]")
+    # Warnings
+    if result.warnings and verbose:
+        console.print(f"⚡ Warnings: {len(result.warnings)}")
+    # Detailed findings grouped by severity
+    if result.findings:
+        console.print("\n[bold]Findings:[/bold]\n")
+        # Group and sort by severity
+        for severity in [Severity.DANGER, Severity.WARN, Severity.INFO]:
+            severity_findings = [f for f in result.findings if f.severity == severity]
+            if not severity_findings:
+                continue
+            if severity == Severity.INFO and not verbose:
+                continue
+            # Deduplicate similar findings
+            seen_patterns = {}
+            for finding in severity_findings:
+                key = (finding.pattern_id, finding.file)
+                if key not in seen_patterns:
+                    seen_patterns[key] = []
+                seen_patterns[key].append(finding)
+            for (pattern_id, file), file_findings in seen_patterns.items():
+                sev_text = format_severity(file_findings[0].severity)
+                if len(file_findings) == 1:
+                    f = file_findings[0]
+                    console.print(Text.assemble(
+                        "[", sev_text, "] ",
+                        f"{f.file}:{f.line} — ",
+                        (f.pattern_id, "cyan"),
+                    ))
+                    console.print(f"    {f.description}")
+                    if f.context and verbose:
+                        console.print(f"    [dim]{f.context}[/dim]")
+                else:
+                    # Multiple occurrences
+                    lines = [str(f.line) for f in file_findings[:5]]
+                    line_str = ",".join(lines) + ("..." if len(file_findings) > 5 else "")
+                    console.print(Text.assemble(
+                        "[", sev_text, "] ",
+                        f"{file}:{{{line_str}}} — ",
+                        (pattern_id, "cyan"),
+                        f" ({len(file_findings)} occurrences)",
+                    ))
+                    console.print(f"    {file_findings[0].description}")
+                console.print()
+    # Errors
+    if result.errors:
+        console.print("[red bold]Errors:[/red bold]")
+        for error in result.errors:
+            console.print(f"  ❌ {error}")
+    # Warnings (verbose only)
+    if result.warnings and verbose:
+        console.print("\n[yellow]Warnings:[/yellow]")
+        for warning in result.warnings:
+            console.print(f"  ⚡ {warning}")
+    console.print()
+def print_json(result: ScanResult):
+    """Print scan result as JSON."""
+    output = {
+        "version": __version__,
+        "path": result.path,
+        "trust_level": result.trust_level,
+        "content_hash": result.content_hash,
+        "summary": result.summary(),
+        "findings": [
+            {
+                "severity": f.severity.value,
+                "pattern_id": f.pattern_id,
+                "description": f.description,
+                "file": f.file,
+                "line": f.line,
+                "match": f.match,
+                "context": f.context,
+            }
+            for f in result.findings
+        ],
+        "symlinks": result.symlinks_found,
+        "errors": result.errors,
+        "warnings": result.warnings,
+    }
+    print(json.dumps(output, indent=2))
+@click.command()
+@click.argument('path')
+@click.option('--json', 'output_json', is_flag=True, help='Output as JSON')
+@click.option('--verbose', '-v', is_flag=True, help='Show INFO findings and extra details')
+@click.option('--hash', 'show_hash', is_flag=True, help='Show content hash')
+@click.option('--version', is_flag=True, help='Show version')
+@click.option('--quiet', '-q', is_flag=True, help='Only output trust level')
+@click.option('--cve', 'check_cves', is_flag=True, help='Check dependencies against CVE database (requires network)')
+def main(path: str, output_json: bool, verbose: bool, show_hash: bool, version: bool, quiet: bool, check_cves: bool):
+    """
+    Scan an agent skill for security issues.
+    PATH can be a local directory or file.
+    \b
+    Exit codes:
+      0 = SAFE     - No issues found
+      1 = CAUTION  - Minor issues, review recommended
+      2 = DANGER   - Security issues detected
+      3 = ERROR    - Scanner error
+    \b
+    Examples:
+      isnad-scan ./skills/my-skill/
+      isnad-scan ./skills/my-skill/ --json
+      isnad-scan ./skills/my-skill/ -v --hash
+    """
+    if version:
+        print(f"isnad-scan {__version__}")
+        sys.exit(0)
+    try:
+        result = scan_skill(path, check_cves=check_cves)
+    except Exception as e:
+        if output_json:
+            print(json.dumps({"error": str(e)}))
+        else:
+            console.print(f"[red]Error:[/red] {e}")
+        sys.exit(3)
+    if quiet:
+        print(result.trust_level)
+    elif output_json:
+        print_json(result)
+    else:
+        print_result(result, verbose=verbose, show_hash=show_hash)
+    sys.exit(result.exit_code)
+if __name__ == "__main__":
+    main()

isnad_scan/cve_checker.py ADDED Viewed

@@ -0,0 +1,238 @@
+"""CVE database integration using OSV (Open Source Vulnerabilities).
+Checks dependencies against known vulnerabilities without requiring API keys.
+Uses the OSV.dev API which covers PyPI, npm, and other ecosystems.
+"""
+import json
+import re
+import urllib.request
+import urllib.error
+from dataclasses import dataclass
+from typing import List, Optional, Dict, Tuple
+from pathlib import Path
+from .patterns import Finding, Severity
+OSV_API_URL = "https://api.osv.dev/v1/query"
+OSV_BATCH_URL = "https://api.osv.dev/v1/querybatch"
+# Cache for OSV responses (in-memory, per-session)
+_osv_cache: Dict[str, List[dict]] = {}
+@dataclass
+class PackageVersion:
+    """A package with optional version constraint."""
+    name: str
+    version: Optional[str] = None
+    ecosystem: str = "PyPI"  # PyPI, npm, Go, etc.
+    def to_osv_query(self) -> dict:
+        """Convert to OSV API query format."""
+        query = {
+            "package": {
+                "name": self.name,
+                "ecosystem": self.ecosystem,
+            }
+        }
+        if self.version:
+            query["version"] = self.version
+        return query
+def parse_requirements_txt(content: str) -> List[PackageVersion]:
+    """Parse requirements.txt format."""
+    packages = []
+    for line in content.split('\n'):
+        line = line.strip()
+        # Skip comments and empty lines
+        if not line or line.startswith('#') or line.startswith('-'):
+            continue
+        # Skip URLs and paths
+        if line.startswith(('http://', 'https://', 'git+', '/', '.')):
+            continue
+        # Parse package==version, package>=version, etc.
+        match = re.match(r'^([a-zA-Z0-9_-]+)\s*(?:[=<>!~]+\s*([0-9][a-zA-Z0-9._-]*))?', line)
+        if match:
+            name = match.group(1).lower()
+            version = match.group(2)
+            packages.append(PackageVersion(name=name, version=version, ecosystem="PyPI"))
+    return packages
+def parse_package_json(content: str) -> List[PackageVersion]:
+    """Parse package.json dependencies."""
+    packages = []
+    try:
+        data = json.loads(content)
+    except json.JSONDecodeError:
+        return packages
+    for dep_key in ('dependencies', 'devDependencies', 'peerDependencies'):
+        deps = data.get(dep_key, {})
+        for name, version_spec in deps.items():
+            # Extract version number from spec like "^1.2.3", "~1.2.3", ">=1.0.0"
+            version_match = re.search(r'(\d+\.\d+\.\d+)', version_spec)
+            version = version_match.group(1) if version_match else None
+            packages.append(PackageVersion(name=name, version=version, ecosystem="npm"))
+    return packages
+def parse_pyproject_toml(content: str) -> List[PackageVersion]:
+    """Parse pyproject.toml dependencies (simple parsing, not full TOML)."""
+    packages = []
+    in_deps = False
+    for line in content.split('\n'):
+        line = line.strip()
+        if line.startswith('[') and 'dependencies' in line.lower():
+            in_deps = True
+            continue
+        elif line.startswith('['):
+            in_deps = False
+            continue
+        if in_deps and '=' in line:
+            # Handle: package = ">=1.0"
+            match = re.match(r'^([a-zA-Z0-9_-]+)\s*=\s*["\']?([^"\']+)?', line)
+            if match:
+                name = match.group(1).lower()
+                version_spec = match.group(2) or ''
+                version_match = re.search(r'(\d+\.\d+\.?\d*)', version_spec)
+                version = version_match.group(1) if version_match else None
+                packages.append(PackageVersion(name=name, version=version, ecosystem="PyPI"))
+        elif in_deps and line and not line.startswith('#'):
+            # Handle list format: "package>=1.0",
+            match = re.match(r'^["\']?([a-zA-Z0-9_-]+)', line)
+            if match:
+                name = match.group(1).lower()
+                version_match = re.search(r'(\d+\.\d+\.?\d*)', line)
+                version = version_match.group(1) if version_match else None
+                packages.append(PackageVersion(name=name, version=version, ecosystem="PyPI"))
+    return packages
+def query_osv(packages: List[PackageVersion], timeout: float = 10.0) -> Dict[str, List[dict]]:
+    """Query OSV API for vulnerabilities in packages."""
+    if not packages:
+        return {}
+    results = {}
+    # Use batch API for efficiency
+    queries = [pkg.to_osv_query() for pkg in packages]
+    try:
+        request_data = json.dumps({"queries": queries}).encode('utf-8')
+        req = urllib.request.Request(
+            OSV_BATCH_URL,
+            data=request_data,
+            headers={'Content-Type': 'application/json'},
+            method='POST'
+        )
+        with urllib.request.urlopen(req, timeout=timeout) as response:
+            data = json.loads(response.read())
+    except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError, json.JSONDecodeError) as e:
+        # Network error - return empty but don't fail the scan
+        return {}
+    # Parse batch response
+    for i, result in enumerate(data.get('results', [])):
+        if i < len(packages):
+            vulns = result.get('vulns', [])
+            if vulns:
+                pkg = packages[i]
+                key = f"{pkg.ecosystem}:{pkg.name}"
+                results[key] = vulns
+    return results
+def check_dependencies_for_cves(content: str, filename: str, timeout: float = 10.0) -> List[Finding]:
+    """Check a dependency file for known CVEs."""
+    findings = []
+    # Parse based on file type
+    fname = Path(filename).name.lower()
+    if fname == 'requirements.txt' or fname.startswith('requirements'):
+        packages = parse_requirements_txt(content)
+    elif fname == 'package.json':
+        packages = parse_package_json(content)
+    elif fname == 'pyproject.toml':
+        packages = parse_pyproject_toml(content)
+    else:
+        return findings
+    if not packages:
+        return findings
+    # Query OSV for vulnerabilities
+    vulns_by_pkg = query_osv(packages, timeout=timeout)
+    # Generate findings for vulnerable packages
+    for pkg in packages:
+        key = f"{pkg.ecosystem}:{pkg.name}"
+        vulns = vulns_by_pkg.get(key, [])
+        for vuln in vulns:
+            vuln_id = vuln.get('id', 'UNKNOWN')
+            summary = vuln.get('summary', 'No summary available')
+            severity_data = vuln.get('severity', [])
+            # Determine severity from CVSS if available
+            severity = Severity.WARN
+            cvss_score = None
+            for s in severity_data:
+                if 'score' in s:
+                    try:
+                        cvss_score = float(s['score'])
+                        if cvss_score >= 9.0:
+                            severity = Severity.DANGER
+                        elif cvss_score >= 7.0:
+                            severity = Severity.DANGER
+                        elif cvss_score >= 4.0:
+                            severity = Severity.WARN
+                    except (ValueError, TypeError):
+                        pass
+            # High severity CVEs are DANGER
+            if vuln_id.startswith('CVE-') or vuln_id.startswith('GHSA-'):
+                if any(kw in summary.lower() for kw in ('remote code', 'rce', 'arbitrary code', 'command injection', 'sql injection')):
+                    severity = Severity.DANGER
+            findings.append(Finding(
+                severity=severity,
+                pattern_id='known_vulnerability',
+                description=f'{vuln_id}: {summary[:150]}',
+                file=filename,
+                line=1,  # We don't track line numbers in deps
+                match=f'{pkg.name}=={pkg.version}' if pkg.version else pkg.name,
+                context=f'CVSS: {cvss_score}' if cvss_score else '',
+            ))
+    return findings
+def check_cves_enabled() -> bool:
+    """Check if CVE checking is available (has network access)."""
+    try:
+        req = urllib.request.Request(
+            "https://api.osv.dev/v1/",
+            method='HEAD'
+        )
+        with urllib.request.urlopen(req, timeout=2.0):
+            return True
+    except Exception:
+        return False

isnad_scan/js_analyzer.py ADDED Viewed

@@ -0,0 +1,154 @@
+"""JavaScript-specific analysis including minified code handling.
+Handles:
+- Minified/bundled JS detection and basic unminification
+- JS-specific dangerous patterns
+- Node.js specific risks
+"""
+import re
+from typing import List, Tuple
+from .patterns import Finding, Severity
+def is_minified_js(content: str) -> bool:
+    """Detect if JavaScript content is minified."""
+    lines = content.split('\n')
+    # If file has very few lines but lots of content, likely minified
+    if len(lines) < 10 and len(content) > 5000:
+        return True
+    # Check average line length (minified = very long lines)
+    non_empty_lines = [l for l in lines if l.strip()]
+    if non_empty_lines:
+        avg_length = sum(len(l) for l in non_empty_lines) / len(non_empty_lines)
+        if avg_length > 500:
+            return True
+    # Check for lack of whitespace (minified removes most whitespace)
+    if len(content) > 1000:
+        whitespace_ratio = content.count(' ') / len(content)
+        if whitespace_ratio < 0.05:
+            return True
+    return False
+def basic_unminify(content: str) -> str:
+    """
+    Basic unminification to make patterns more detectable.
+    This is NOT a full beautifier, just enough to expose dangerous patterns.
+    """
+    # Add newlines after common statement endings
+    content = re.sub(r';(?=[^\s])', ';\n', content)
+    content = re.sub(r'\{(?=[^\s])', '{\n', content)
+    content = re.sub(r'\}(?=[^\s])', '}\n', content)
+    # Add spaces around operators to expose patterns
+    content = re.sub(r'([=<>!]+)', r' \1 ', content)
+    return content
+def analyze_javascript(content: str, filename: str) -> List[Finding]:
+    """Analyze JavaScript code for dangerous patterns."""
+    findings = []
+    original_content = content
+    # Check if minified
+    minified = is_minified_js(content)
+    if minified:
+        findings.append(Finding(
+            severity=Severity.WARN,
+            pattern_id='minified_javascript',
+            description='Minified JavaScript detected - harder to audit, may hide malicious code',
+            file=filename,
+            line=1,
+            match='<minified>',
+            context=f'File size: {len(content)} bytes, few lines',
+        ))
+        # Unminify for analysis
+        content = basic_unminify(content)
+    lines = content.split('\n')
+    # JS-specific patterns that need context
+    js_patterns = [
+        # Dynamic code execution
+        (r'\beval\s*\(', 'js_eval', Severity.DANGER, 'eval() in JavaScript'),
+        (r'new\s+Function\s*\(', 'js_function_constructor', Severity.DANGER, 'Function constructor (eval equivalent)'),
+        (r'setTimeout\s*\(\s*["\'][^"\']*["\']', 'js_settimeout_string', Severity.DANGER, 'setTimeout with string (implicit eval)'),
+        (r'setInterval\s*\(\s*["\'][^"\']*["\']', 'js_setinterval_string', Severity.DANGER, 'setInterval with string (implicit eval)'),
+        # Dangerous Node.js APIs
+        (r'child_process', 'js_child_process', Severity.WARN, 'child_process module - command execution'),
+        (r'\.exec\s*\(', 'js_exec', Severity.WARN, 'exec() call - may be command execution'),
+        (r'\.execSync\s*\(', 'js_exec_sync', Severity.DANGER, 'execSync() - synchronous command execution'),
+        (r'require\s*\(\s*["\']child_process["\']', 'js_require_child_process', Severity.DANGER, 'Requiring child_process module'),
+        # Prototype pollution
+        (r'__proto__', 'js_proto_access', Severity.WARN, '__proto__ access - prototype pollution risk'),
+        (r'constructor\s*\[\s*["\']prototype["\']', 'js_constructor_proto', Severity.DANGER, 'Prototype access via constructor'),
+        (r'Object\.assign\s*\([^,]+,\s*[^)]+\)', 'js_object_assign', Severity.INFO, 'Object.assign - check for prototype pollution'),
+        # DOM XSS sinks
+        (r'\.innerHTML\s*=', 'js_innerhtml', Severity.WARN, 'innerHTML assignment - XSS risk'),
+        (r'\.outerHTML\s*=', 'js_outerhtml', Severity.WARN, 'outerHTML assignment - XSS risk'),
+        (r'document\.write\s*\(', 'js_document_write', Severity.WARN, 'document.write - XSS risk'),
+        (r'\.insertAdjacentHTML\s*\(', 'js_insert_html', Severity.WARN, 'insertAdjacentHTML - XSS risk'),
+        # Dangerous URL handling
+        (r'location\s*=|location\.href\s*=', 'js_location_assign', Severity.WARN, 'Location assignment - open redirect risk'),
+        (r'window\.open\s*\(', 'js_window_open', Severity.INFO, 'window.open - popup/redirect'),
+        # Fetch/XHR to unknown destinations
+        (r'fetch\s*\(\s*[^"\'`]', 'js_fetch_dynamic', Severity.WARN, 'Fetch with dynamic URL'),
+        (r'XMLHttpRequest', 'js_xhr', Severity.INFO, 'XMLHttpRequest usage'),
+        # Credential access patterns
+        (r'localStorage\.getItem\s*\([^)]*(?:token|key|secret|password|credential)', 'js_localstorage_cred', Severity.WARN, 'Reading credentials from localStorage'),
+        (r'sessionStorage\.getItem\s*\([^)]*(?:token|key|secret|password|credential)', 'js_sessionstorage_cred', Severity.WARN, 'Reading credentials from sessionStorage'),
+        (r'document\.cookie', 'js_cookie_access', Severity.INFO, 'Cookie access'),
+        # WebSocket (may exfiltrate data)
+        (r'new\s+WebSocket\s*\(', 'js_websocket', Severity.INFO, 'WebSocket connection'),
+        # Obfuscation indicators
+        (r'\\x[0-9a-fA-F]{2}(\\x[0-9a-fA-F]{2}){5,}', 'js_hex_string', Severity.WARN, 'Hex-encoded string in JS'),
+        (r'\\u[0-9a-fA-F]{4}(\\u[0-9a-fA-F]{4}){5,}', 'js_unicode_string', Severity.WARN, 'Unicode-encoded string in JS'),
+        (r'atob\s*\(', 'js_atob', Severity.WARN, 'Base64 decoding (atob)'),
+        (r'String\.fromCharCode\s*\([^)]{20,}\)', 'js_fromcharcode', Severity.WARN, 'String.fromCharCode - potential obfuscation'),
+    ]
+    for pattern, pattern_id, severity, description in js_patterns:
+        for match in re.finditer(pattern, content, re.IGNORECASE):
+            # Find line number in potentially unminified content
+            line_num = content[:match.start()].count('\n') + 1
+            # Get context
+            if line_num <= len(lines):
+                context = lines[line_num - 1].strip()[:200]
+            else:
+                context = match.group(0)[:100]
+            # Note if found in minified code
+            if minified:
+                description += ' (found in minified code)'
+            findings.append(Finding(
+                severity=severity,
+                pattern_id=pattern_id,
+                description=description,
+                file=filename,
+                line=line_num,
+                match=match.group(0)[:100],
+                context=context,
+            ))
+    return findings
+def is_javascript_file(filename: str) -> bool:
+    """Check if a file is JavaScript."""
+    return filename.endswith(('.js', '.mjs', '.cjs', '.jsx', '.ts', '.tsx'))